#! /bin/sh #+ # # NAME # ==== # # twindoi.sh - detection of duplicate DOI # # SYNOPSYS # ======== # # :: # # $ twindoi.sh -i filein -t type # # # DESCRIPTION # =========== # # # detection of duplicate DOI # # EXAMPLES # ======== # # :: # # $ ./twindoi.sh -i data/biball.txt -t raw # # # :: # # $ ./twindoi.sh -i data/biball.xml -t xml # # FILES # ===== # # original location # ~~~~~~~~~~~~~~~~~ # # /usr/home/fplod/src/superbib_ws/twindoi.sh sur aedon.locean-ipsl.upmc.fr # # # EVOLUTIONS # ========== # # ++ option debug # # ++ the following command wich is not convinient # (xml vs txt) did not give any alert and check inside xml comments # :: # # $ ./twindoi.sh -i data/biball.xml -t raw # # # $Id$ # # - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin) # # * usage of xml(starlet) for doi extraction in xml file # # - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin) # # * consolidation and homogeneisation # # - smasson 2007-06-20T16:11:47Z # # * creation # #- # system=$(uname) case "${system}" in AIX|IRIX64) echo " www : no specific posix checking" ;; *) set -o posix ;; esac command=$(basename ${0}) log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ") log=/tmp/$(basename ${command} .sh).${log_date} # usage=" Usage : ${command} -i filein -t type" # while [ ! -z "${1}" ] # ++ pb bash do case ${1} in -i) # filein filein=${2} shift ;; -t) # type type=${2} shift ;; *) # other choice echo "${usage}" exit 1 ;; esac shift # next flag done set -u # # check for filein if [ ! -f ${filein} ] then echo "eee : ${filein} not found" exit 1 fi # case ${type} in raw) # file like data/biball.txt fileraw=${filein} ;; xml) # file like data/biball.xml filexml=${filein} ;; *) echo "eee : type should be raw or xml" exit 1 ;; esac # case ${type} in raw) grep -i "doi:" ${fileraw} | \ sed -e "s/^.*doi: *//" | \ sed -e "s/^\(.*\)\.$/ \1/" | \ grep -v "???" | \ sort -d > /tmp/doilist.txt ;; xml) xml sel -N dbk="http://docbook.org/ns/docbook" \ -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ grep -v "???" | \ sort -d > /tmp/doilist.txt ;; *) echo "eee : error unknown file type" exit 1 ;; esac # nl=$( cat /tmp/doilist.txt | wc -l ) if [ ${nl} -eq 0 ] then echo "www : no DOI found in ${filein}" rm /tmp/doilist.txt 2> /dev/null exit 1 fi n=1 while [ ${n} -lt ${nl} ] do l1=$( head -${n} /tmp/doilist.txt | tail -1 ) l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 ) [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}" n=$(( ${n} + 1 )) done # rm /tmp/doilist.txt 2> /dev/null exit 0