1 | #! /bin/sh |
---|
2 | #+ |
---|
3 | # |
---|
4 | # .. program:: twindoi.sh |
---|
5 | # |
---|
6 | # ========== |
---|
7 | # twindoi.sh |
---|
8 | # ========== |
---|
9 | # |
---|
10 | # SYNOPSIS |
---|
11 | # ======== |
---|
12 | # |
---|
13 | # .. code-block:: bash |
---|
14 | # |
---|
15 | # twindoi.sh -i filein -t type |
---|
16 | # |
---|
17 | # DESCRIPTION |
---|
18 | # =========== |
---|
19 | # |
---|
20 | # .. option:: -i <filein> |
---|
21 | # .. option:: -t <type> |
---|
22 | # |
---|
23 | # detection of duplicate DOI |
---|
24 | # |
---|
25 | # EXAMPLES |
---|
26 | # ======== |
---|
27 | # |
---|
28 | # To detect duplicate DOI in a raw file: |
---|
29 | # |
---|
30 | # .. code-block:: bash |
---|
31 | # |
---|
32 | # twindoi.sh -i data/biball.txt -t raw |
---|
33 | # |
---|
34 | # To detect duplicate DOI in a XML/DocBook file: |
---|
35 | # |
---|
36 | # .. code-block:: bash |
---|
37 | # |
---|
38 | # twindoi.sh -i data/biball.xml -t xml |
---|
39 | # |
---|
40 | # To detect duplicate DOI in a bibtex file: |
---|
41 | # |
---|
42 | # .. code-block:: bash |
---|
43 | # |
---|
44 | # twindoi.sh -i data/biball.xml -t bibtex |
---|
45 | # |
---|
46 | # TODO |
---|
47 | # ==== |
---|
48 | # |
---|
49 | # option debug |
---|
50 | # |
---|
51 | # the following command which is not convenient |
---|
52 | # (xml vs txt) did not give any alert and check inside xml comments |
---|
53 | # |
---|
54 | # .. code-block:: bash |
---|
55 | # |
---|
56 | # twindoi.sh -i data/biball.xml -t raw |
---|
57 | # |
---|
58 | # EVOLUTIONS |
---|
59 | # ========== |
---|
60 | # |
---|
61 | # $Id$ |
---|
62 | # |
---|
63 | # - fplod 20131010T113730Z callisto.locean-ipsl.upmc.fr (Linux) |
---|
64 | # |
---|
65 | # * dynamic xmlstarlet vs xml |
---|
66 | # |
---|
67 | # - fplod 20120521T080342Z cratos (Linux) |
---|
68 | # |
---|
69 | # * rename type variable to ftype to avoid usage of a reserved word |
---|
70 | # * revision of indentation |
---|
71 | # * typo |
---|
72 | # * add bibtex as file type |
---|
73 | # |
---|
74 | # - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
75 | # |
---|
76 | # * unset |
---|
77 | # |
---|
78 | # - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
79 | # |
---|
80 | # * usage of xml(starlet) for doi extraction in xml file |
---|
81 | # |
---|
82 | # - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
83 | # |
---|
84 | # * consolidation and homogenisation |
---|
85 | # |
---|
86 | # - smasson 2007-06-20T16:11:47Z |
---|
87 | # |
---|
88 | # * creation |
---|
89 | # |
---|
90 | #- |
---|
91 | # |
---|
92 | system=$(uname) |
---|
93 | case "${system}" in |
---|
94 | AIX|IRIX64) |
---|
95 | echo "${command} : www : no specific posix checking" |
---|
96 | ;; |
---|
97 | *) |
---|
98 | set -o posix |
---|
99 | ;; |
---|
100 | esac |
---|
101 | unset system |
---|
102 | # |
---|
103 | set -u |
---|
104 | action=$(basename ${0} .sh) |
---|
105 | command=$(basename ${0}) |
---|
106 | log_date=$(date -u +"%Y%m%dT%H%M%SZ") |
---|
107 | log=${PROJECT_LOG}/$(basename ${command} .sh).log.${log_date} |
---|
108 | # |
---|
109 | tool=${xmlcmd} |
---|
110 | type ${tool} 1> /dev/null 2>&1 |
---|
111 | status=${?} |
---|
112 | if [ ${status} -ne 0 ] |
---|
113 | then |
---|
114 | echo "${command} : eee : tool ${tool} not found" |
---|
115 | exit 1 |
---|
116 | fi |
---|
117 | unset tool |
---|
118 | unset status |
---|
119 | # |
---|
120 | usage=" Usage : ${command} -i filein -t type" |
---|
121 | # |
---|
122 | minargcount=4 |
---|
123 | #echo " narg ${#}" |
---|
124 | if [ ${#} -lt ${minargcount} ] |
---|
125 | then |
---|
126 | echo "${command} : eee : not enough arguments" |
---|
127 | echo "${usage}" |
---|
128 | exit 1 |
---|
129 | fi |
---|
130 | unset minargcount |
---|
131 | # |
---|
132 | while [ ${#} -gt 0 ] |
---|
133 | do |
---|
134 | case ${1} in |
---|
135 | -i) |
---|
136 | filein=${2} |
---|
137 | shift |
---|
138 | ;; |
---|
139 | -t) |
---|
140 | ftype=${2} |
---|
141 | shift |
---|
142 | ;; |
---|
143 | *) |
---|
144 | # other choice |
---|
145 | echo "${command} : eee : unknown option ${1}" |
---|
146 | echo "${usage}" |
---|
147 | exit 1 |
---|
148 | ;; |
---|
149 | esac |
---|
150 | # next flag |
---|
151 | shift |
---|
152 | done |
---|
153 | unset usage |
---|
154 | # |
---|
155 | # check for filein |
---|
156 | if [ ! -f ${filein} ] |
---|
157 | then |
---|
158 | echo "${command} : eee : ${filein} not found" |
---|
159 | exit 1 |
---|
160 | fi |
---|
161 | # |
---|
162 | case ${ftype} in |
---|
163 | raw) # file like data/biball.txt |
---|
164 | fileraw=${filein} |
---|
165 | ;; |
---|
166 | xml) # file like data/biball.xml |
---|
167 | filexml=${filein} |
---|
168 | ;; |
---|
169 | bibtex) # file like data/biball.bib |
---|
170 | filebibtex=${filein} |
---|
171 | ;; |
---|
172 | *) |
---|
173 | echo "${command} : eee : type should be raw, xml or bibtex" |
---|
174 | exit 1 |
---|
175 | ;; |
---|
176 | esac |
---|
177 | unset filein |
---|
178 | # |
---|
179 | case ${ftype} in |
---|
180 | raw) |
---|
181 | grep -i "doi:" ${fileraw} | \ |
---|
182 | sed -e "s/^.*doi: *//" | \ |
---|
183 | sed -e "s/^\(.*\)\.$/ \1/" | \ |
---|
184 | grep -v "???" | \ |
---|
185 | sort -d > ${PROJECT_LOG}/${action}${$}.txt |
---|
186 | ;; |
---|
187 | bibtex) |
---|
188 | grep -i "doi *= *" ${filebibtex} | \ |
---|
189 | sed -e "s/^.*doi *= *//" | \ |
---|
190 | sed -e "s/^\(.*\)\.$/ \1/" | \ |
---|
191 | grep -v "???" | \ |
---|
192 | sort -d > ${PROJECT_LOG}/${action}${$}.txt |
---|
193 | ;; |
---|
194 | xml) |
---|
195 | ${xmlcmd} sel -N dbk="http://docbook.org/ns/docbook" \ |
---|
196 | -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ |
---|
197 | grep -v "???" | \ |
---|
198 | sort -d > ${PROJECT_LOG}/${action}${$}.txt |
---|
199 | ;; |
---|
200 | *) |
---|
201 | echo "${command} : eee : error unknown file type ${ftype}" |
---|
202 | exit 1 |
---|
203 | ;; |
---|
204 | esac |
---|
205 | unset ftype |
---|
206 | # |
---|
207 | nl=$( cat ${PROJECT_LOG}/${action}${$}.txt | wc -l ) |
---|
208 | if [ ${nl} -eq 0 ] |
---|
209 | then |
---|
210 | echo "${command} : www : no DOI found in ${filein}" |
---|
211 | rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null |
---|
212 | exit 1 |
---|
213 | fi |
---|
214 | n=1 |
---|
215 | while [ ${n} -lt ${nl} ] |
---|
216 | do |
---|
217 | l1=$( head -${n} ${PROJECT_LOG}/${action}${$}.txt | tail -1 ) |
---|
218 | l2=$( head -$(( ${n} + 1 )) ${PROJECT_LOG}/${action}${$}.txt | tail -1 ) |
---|
219 | [ "${l1}" == "${l2}" ] && echo "${command} : eee : line ${n} : ${l1}" |
---|
220 | unset l1 |
---|
221 | unset l2 |
---|
222 | n=$(( ${n} + 1 )) |
---|
223 | done |
---|
224 | unset n |
---|
225 | unset nl |
---|
226 | # |
---|
227 | rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null |
---|
228 | unset command |
---|
229 | unset log |
---|
230 | unset log_date |
---|
231 | # |
---|
232 | #++set |
---|
233 | exit 0 |
---|