Changeset 205 for trunk/twindoi.sh
- Timestamp:
- 05/21/12 11:37:29 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/twindoi.sh
r116 r205 8 8 # ========== 9 9 # 10 # --------------------------11 # detection of duplicate DOI12 # --------------------------13 #14 10 # SYNOPSIS 15 11 # ======== … … 17 13 # :: 18 14 # 19 # $twindoi.sh -i filein -t type15 # twindoi.sh -i filein -t type 20 16 # 21 17 # … … 32 28 # ======== 33 29 # 34 # :: 35 # 36 # $ ./twindoi.sh -i data/biball.txt -t raw 37 # 38 # 39 # :: 40 # 41 # $ ./twindoi.sh -i data/biball.xml -t xml 30 # To detect duplicate DOI in a raw file:: 31 # 32 # twindoi.sh -i data/biball.txt -t raw 33 # 34 # 35 # To detect duplicate DOI in a XML/DocBook file:: 36 # 37 # twindoi.sh -i data/biball.xml -t xml 38 # 39 # To detect duplicate DOI in a bibtex file:: 40 # 41 # twindoi.sh -i data/biball.xml -t bibtex 42 42 # 43 43 # TODO … … 50 50 # :: 51 51 # 52 # $ ./twindoi.sh -i data/biball.xml -t raw52 # twindoi.sh -i data/biball.xml -t raw 53 53 # 54 54 # EVOLUTIONS … … 57 57 # $Id$ 58 58 # 59 # - fplod 20120521T080342Z cratos (Linux) 60 # 61 # * rename type variable to ftype to avoid usage of a reserved word 62 # * revision of indentation 63 # * typo 64 # * add bibtex as file type 65 # 59 66 # - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin) 60 67 # … … 77 84 system=$(uname) 78 85 case "${system}" in 79 AIX|IRIX64)80 echo "www : no specific posix checking"81 ;;82 *)83 set -o posix84 ;;86 AIX|IRIX64) 87 echo "${command} : www : no specific posix checking" 88 ;; 89 *) 90 set -o posix 91 ;; 85 92 esac 86 93 unset system … … 88 95 set -u 89 96 # 97 action=$(basename ${0} .sh) 90 98 command=$(basename ${0}) 91 99 log_date=$(date -u +"%Y%m%dT%H%M%SZ") 92 log= /tmp/$(basename ${command} .sh).log.${log_date}100 log=${PROJECT_LOG}/$(basename ${command} .sh).log.${log_date} 93 101 # 94 102 usage=" Usage : ${command} -i filein -t type" 103 # 104 minargcount=4 105 #echo " narg ${#}" 106 if [ ${#} -lt ${minargcount} ] 107 then 108 echo "${command} : eee : not enought arguments" 109 echo "${usage}" 110 exit 1 111 fi 112 unset minargcount 95 113 # 96 114 while [ ${#} -gt 0 ] 97 115 do 98 case ${1} in99 -i)100 filein=${2}101 shift102 ;;103 -t)104 type=${2}105 shift106 ;;107 *)108 # other choice109 echo "eee : unknown option ${1}"110 echo "${usage}"111 exit 1112 ;;113 esac114 # next flag115 shift116 case ${1} in 117 -i) 118 filein=${2} 119 shift 120 ;; 121 -t) 122 ftype=${2} 123 shift 124 ;; 125 *) 126 # other choice 127 echo "${command} : eee : unknown option ${1}" 128 echo "${usage}" 129 exit 1 130 ;; 131 esac 132 # next flag 133 shift 116 134 done 117 135 unset usage … … 120 138 if [ ! -f ${filein} ] 121 139 then 122 echo "eee : ${filein} not found"123 exit 1140 echo "${command} : eee : ${filein} not found" 141 exit 1 124 142 fi 125 143 # 126 case ${type} in 127 raw) # file like data/biball.txt 128 fileraw=${filein} 129 ;; 130 xml) # file like data/biball.xml 131 filexml=${filein} 132 ;; 133 *) 134 echo "eee : type should be raw or xml" 135 exit 1 136 ;; 144 case ${ftype} in 145 raw) # file like data/biball.txt 146 fileraw=${filein} 147 ;; 148 xml) # file like data/biball.xml 149 filexml=${filein} 150 ;; 151 bibtex) # file like data/biball.bib 152 filebibtex=${filein} 153 ;; 154 *) 155 echo "${command} : eee : type should be raw, xml or bibtex" 156 exit 1 157 ;; 137 158 esac 138 # 139 case ${type} in 140 raw) 141 grep -i "doi:" ${fileraw} | \ 142 sed -e "s/^.*doi: *//" | \ 143 sed -e "s/^\(.*\)\.$/ \1/" | \ 144 grep -v "???" | \ 145 sort -d > /tmp/doilist.txt 146 ;; 147 xml) 148 xml sel -N dbk="http://docbook.org/ns/docbook" \ 149 -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ 150 grep -v "???" | \ 151 sort -d > /tmp/doilist.txt 152 ;; 153 *) 154 echo "eee : error unknown file type" 155 exit 1 156 ;; 159 unset filein 160 # 161 case ${ftype} in 162 raw) 163 grep -i "doi:" ${fileraw} | \ 164 sed -e "s/^.*doi: *//" | \ 165 sed -e "s/^\(.*\)\.$/ \1/" | \ 166 grep -v "???" | \ 167 sort -d > ${PROJECT_LOG}/${action}${$}.txt 168 ;; 169 bibtex) 170 grep -i "doi *= *" ${filebibtex} | \ 171 sed -e "s/^.*doi *= *//" | \ 172 sed -e "s/^\(.*\)\.$/ \1/" | \ 173 grep -v "???" | \ 174 sort -d > ${PROJECT_LOG}/${action}${$}.txt 175 ;; 176 xml) 177 xml sel -N dbk="http://docbook.org/ns/docbook" \ 178 -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ 179 grep -v "???" | \ 180 sort -d > ${PROJECT_LOG}/${action}${$}.txt 181 ;; 182 *) 183 echo "${command} : eee : error unknown file type ${ftype}" 184 exit 1 185 ;; 157 186 esac 158 # 159 nl=$( cat /tmp/doilist.txt | wc -l ) 187 unset ftype 188 # 189 nl=$( cat ${PROJECT_LOG}/${action}${$}.txt | wc -l ) 160 190 if [ ${nl} -eq 0 ] 161 191 then 162 echo "www : no DOI found in ${filein}"163 rm /tmp/doilist.txt 2> /dev/null164 exit 1192 echo "${command} : www : no DOI found in ${filein}" 193 rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null 194 exit 1 165 195 fi 166 196 n=1 167 197 while [ ${n} -lt ${nl} ] 168 198 do 169 l1=$( head -${n} /tmp/doilist.txt | tail -1 )170 l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 )171 [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}"172 unset l1173 unet l2174 n=$(( ${n} + 1 ))199 l1=$( head -${n} ${PROJECT_LOG}/${action}${$}.txt | tail -1 ) 200 l2=$( head -$(( ${n} + 1 )) ${PROJECT_LOG}/${action}${$}.txt | tail -1 ) 201 [ "${l1}" == "${l2}" ] && echo "${command} : eee : line ${n} : ${l1}" 202 unset l1 203 unset l2 204 n=$(( ${n} + 1 )) 175 205 done 176 206 unset n 177 # 178 rm /tmp/doilist.txt 2> /dev/null 207 unset nl 208 # 209 rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null 210 unset command 211 unset log 212 unset log_date 213 # 214 #++set 179 215 exit 0
Note: See TracChangeset
for help on using the changeset viewer.