[20] | 1 | #! /bin/sh |
---|
[75] | 2 | #+ |
---|
[80] | 3 | # |
---|
[109] | 4 | # .. program:: twindoi.sh |
---|
[103] | 5 | # |
---|
[95] | 6 | # ========== |
---|
| 7 | # twindoi.sh |
---|
| 8 | # ========== |
---|
[75] | 9 | # |
---|
[95] | 10 | # -------------------------- |
---|
| 11 | # detection of duplicate DOI |
---|
| 12 | # -------------------------- |
---|
[75] | 13 | # |
---|
[93] | 14 | # SYNOPSIS |
---|
[75] | 15 | # ======== |
---|
| 16 | # |
---|
| 17 | # :: |
---|
| 18 | # |
---|
| 19 | # $ twindoi.sh -i filein -t type |
---|
| 20 | # |
---|
| 21 | # |
---|
| 22 | # DESCRIPTION |
---|
| 23 | # =========== |
---|
| 24 | # |
---|
| 25 | # |
---|
[108] | 26 | # .. option:: -i <filein> |
---|
| 27 | # .. option:: -t <type> |
---|
| 28 | # |
---|
[20] | 29 | # detection of duplicate DOI |
---|
| 30 | # |
---|
[75] | 31 | # EXAMPLES |
---|
| 32 | # ======== |
---|
| 33 | # |
---|
| 34 | # :: |
---|
| 35 | # |
---|
| 36 | # $ ./twindoi.sh -i data/biball.txt -t raw |
---|
| 37 | # |
---|
| 38 | # |
---|
| 39 | # :: |
---|
| 40 | # |
---|
| 41 | # $ ./twindoi.sh -i data/biball.xml -t xml |
---|
| 42 | # |
---|
[95] | 43 | # TODO |
---|
| 44 | # ==== |
---|
[75] | 45 | # |
---|
[20] | 46 | # ++ option debug |
---|
[75] | 47 | # |
---|
[20] | 48 | # ++ the following command wich is not convinient |
---|
[75] | 49 | # (xml vs txt) did not give any alert and check inside xml comments |
---|
| 50 | # :: |
---|
| 51 | # |
---|
| 52 | # $ ./twindoi.sh -i data/biball.xml -t raw |
---|
| 53 | # |
---|
[95] | 54 | # EVOLUTIONS |
---|
| 55 | # ========== |
---|
[75] | 56 | # |
---|
[20] | 57 | # $Id$ |
---|
| 58 | # |
---|
[100] | 59 | # - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
| 60 | # |
---|
| 61 | # * unset |
---|
| 62 | # |
---|
[75] | 63 | # - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
[20] | 64 | # |
---|
[80] | 65 | # * usage of xml(starlet) for doi extraction in xml file |
---|
[75] | 66 | # |
---|
| 67 | # - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin) |
---|
| 68 | # |
---|
[80] | 69 | # * consolidation and homogeneisation |
---|
[75] | 70 | # |
---|
| 71 | # - smasson 2007-06-20T16:11:47Z |
---|
| 72 | # |
---|
[80] | 73 | # * creation |
---|
| 74 | # |
---|
[75] | 75 | #- |
---|
| 76 | # |
---|
| 77 | system=$(uname) |
---|
| 78 | case "${system}" in |
---|
[100] | 79 | AIX|IRIX64) |
---|
| 80 | echo " www : no specific posix checking" |
---|
| 81 | ;; |
---|
| 82 | *) |
---|
| 83 | set -o posix |
---|
| 84 | ;; |
---|
[75] | 85 | esac |
---|
[100] | 86 | unset system |
---|
| 87 | # |
---|
[56] | 88 | command=$(basename ${0}) |
---|
[93] | 89 | log_date=$(date -u +"%Y%m%dT%H%M%SZ") |
---|
| 90 | log=/tmp/$(basename ${command} .sh).log.${log_date} |
---|
[20] | 91 | # |
---|
| 92 | usage=" Usage : ${command} -i filein -t type" |
---|
| 93 | # |
---|
[102] | 94 | set +u |
---|
| 95 | while [ ! -z "${1}" ] |
---|
[20] | 96 | do |
---|
[100] | 97 | case ${1} in |
---|
[101] | 98 | -i) |
---|
[100] | 99 | filein=${2} |
---|
| 100 | shift |
---|
| 101 | ;; |
---|
[101] | 102 | -t) |
---|
[100] | 103 | type=${2} |
---|
| 104 | shift |
---|
| 105 | ;; |
---|
[101] | 106 | *) |
---|
| 107 | # other choice |
---|
[100] | 108 | echo "eee : unknown option ${1}" |
---|
| 109 | echo "${usage}" |
---|
| 110 | exit 1 |
---|
| 111 | ;; |
---|
| 112 | esac |
---|
[101] | 113 | # next flag |
---|
| 114 | shift |
---|
[20] | 115 | done |
---|
[100] | 116 | unset usage |
---|
| 117 | # |
---|
[20] | 118 | set -u |
---|
| 119 | # |
---|
[23] | 120 | # check for filein |
---|
[20] | 121 | if [ ! -f ${filein} ] |
---|
| 122 | then |
---|
[100] | 123 | echo "eee : ${filein} not found" |
---|
| 124 | exit 1 |
---|
[20] | 125 | fi |
---|
| 126 | # |
---|
| 127 | case ${type} in |
---|
[100] | 128 | raw) # file like data/biball.txt |
---|
| 129 | fileraw=${filein} |
---|
| 130 | ;; |
---|
| 131 | xml) # file like data/biball.xml |
---|
| 132 | filexml=${filein} |
---|
| 133 | ;; |
---|
| 134 | *) |
---|
| 135 | echo "eee : type should be raw or xml" |
---|
| 136 | exit 1 |
---|
| 137 | ;; |
---|
[20] | 138 | esac |
---|
| 139 | # |
---|
| 140 | case ${type} in |
---|
[100] | 141 | raw) |
---|
| 142 | grep -i "doi:" ${fileraw} | \ |
---|
| 143 | sed -e "s/^.*doi: *//" | \ |
---|
| 144 | sed -e "s/^\(.*\)\.$/ \1/" | \ |
---|
| 145 | grep -v "???" | \ |
---|
| 146 | sort -d > /tmp/doilist.txt |
---|
| 147 | ;; |
---|
| 148 | xml) |
---|
| 149 | xml sel -N dbk="http://docbook.org/ns/docbook" \ |
---|
| 150 | -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ |
---|
| 151 | grep -v "???" | \ |
---|
| 152 | sort -d > /tmp/doilist.txt |
---|
| 153 | ;; |
---|
| 154 | *) |
---|
| 155 | echo "eee : error unknown file type" |
---|
| 156 | exit 1 |
---|
| 157 | ;; |
---|
[20] | 158 | esac |
---|
| 159 | # |
---|
| 160 | nl=$( cat /tmp/doilist.txt | wc -l ) |
---|
| 161 | if [ ${nl} -eq 0 ] |
---|
| 162 | then |
---|
[23] | 163 | echo "www : no DOI found in ${filein}" |
---|
[20] | 164 | rm /tmp/doilist.txt 2> /dev/null |
---|
| 165 | exit 1 |
---|
[23] | 166 | fi |
---|
[20] | 167 | n=1 |
---|
| 168 | while [ ${n} -lt ${nl} ] |
---|
[100] | 169 | do |
---|
[20] | 170 | l1=$( head -${n} /tmp/doilist.txt | tail -1 ) |
---|
| 171 | l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 ) |
---|
| 172 | [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}" |
---|
[100] | 173 | unset l1 |
---|
| 174 | unet l2 |
---|
| 175 | n=$(( ${n} + 1 )) |
---|
[20] | 176 | done |
---|
[100] | 177 | unset n |
---|
[20] | 178 | # |
---|
| 179 | rm /tmp/doilist.txt 2> /dev/null |
---|
| 180 | exit 0 |
---|