1 | #! /bin/sh |
---|
2 | # module : |
---|
3 | # detection of duplicate DOI |
---|
4 | # |
---|
5 | # original location : |
---|
6 | # /usr/home/fplod/src/superbib_ws/twindoi.sh sur aedon.locean-ipsl.upmc.fr |
---|
7 | # |
---|
8 | # example : |
---|
9 | # $ ./twindoi.sh -i data/biball.txt -t raw |
---|
10 | # $ ./twindoi.sh -i data/biball.xml -t xml |
---|
11 | # |
---|
12 | # update : |
---|
13 | # ++ option debug |
---|
14 | # ++ the following command wich is not convinient |
---|
15 | # (xml vs txt) did not give any alert and check inside |
---|
16 | # xml comments |
---|
17 | # $ ./twindoi.sh -i data/biball.xml -t raw |
---|
18 | # $Id$ |
---|
19 | # smasson 2007-06-20T16:11:47Z |
---|
20 | # creation |
---|
21 | # fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
22 | # consolidation and homogeneisation |
---|
23 | # |
---|
24 | # |
---|
25 | set -o posix |
---|
26 | command=$(basename ${0}) |
---|
27 | log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ") |
---|
28 | log=/tmp/${command}.${log_date} |
---|
29 | # |
---|
30 | usage=" Usage : ${command} -i filein -t type" |
---|
31 | # |
---|
32 | while [ ! -z "${1}" ] # ++ pb bash |
---|
33 | do |
---|
34 | case ${1} in |
---|
35 | -i) # filein |
---|
36 | filein=${2} |
---|
37 | shift |
---|
38 | ;; |
---|
39 | -t) # type |
---|
40 | type=${2} |
---|
41 | shift |
---|
42 | ;; |
---|
43 | *) # other choice |
---|
44 | echo "${usage}" |
---|
45 | exit 1 |
---|
46 | ;; |
---|
47 | esac |
---|
48 | shift # next flag |
---|
49 | done |
---|
50 | set -u |
---|
51 | # |
---|
52 | # check for filein |
---|
53 | if [ ! -f ${filein} ] |
---|
54 | then |
---|
55 | echo "eee : ${filein} not found" |
---|
56 | exit 1 |
---|
57 | fi |
---|
58 | # |
---|
59 | case ${type} in |
---|
60 | raw) # file like data/biball.txt |
---|
61 | fileraw=${filein} |
---|
62 | ;; |
---|
63 | xml) # file like data/biball.xml |
---|
64 | filexml=${filein} |
---|
65 | ;; |
---|
66 | *) |
---|
67 | echo "eee : type should be raw or xml" |
---|
68 | exit 1 |
---|
69 | ;; |
---|
70 | esac |
---|
71 | # |
---|
72 | case ${type} in |
---|
73 | raw) |
---|
74 | grep -i "doi:" ${fileraw} | \ |
---|
75 | sed -e "s/^.*doi: *//" | \ |
---|
76 | sed -e "s/^\(.*\)\.$/ \1/" | \ |
---|
77 | grep -v "???" | \ |
---|
78 | sort -d > /tmp/doilist.txt |
---|
79 | ;; |
---|
80 | xml) |
---|
81 | grep -i '<biblioid class="doi">' ${filexml} | \ |
---|
82 | sed -e 's+^.*<biblioid class="doi">\(.*\)</biblioid>.*$+\1+' | \ |
---|
83 | grep -v "???" | \ |
---|
84 | sort -d > /tmp/doilist.txt |
---|
85 | ;; |
---|
86 | *) |
---|
87 | echo "eee : error unknown file type" |
---|
88 | exit 1 |
---|
89 | ;; |
---|
90 | esac |
---|
91 | # |
---|
92 | nl=$( cat /tmp/doilist.txt | wc -l ) |
---|
93 | if [ ${nl} -eq 0 ] |
---|
94 | then |
---|
95 | echo "www : no DOI found in ${filein}" |
---|
96 | rm /tmp/doilist.txt 2> /dev/null |
---|
97 | exit 1 |
---|
98 | fi |
---|
99 | n=1 |
---|
100 | while [ ${n} -lt ${nl} ] |
---|
101 | do |
---|
102 | l1=$( head -${n} /tmp/doilist.txt | tail -1 ) |
---|
103 | l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 ) |
---|
104 | [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}" |
---|
105 | n=$(( ${n} + 1 )) |
---|
106 | done |
---|
107 | # |
---|
108 | rm /tmp/doilist.txt 2> /dev/null |
---|
109 | exit 0 |
---|