#! /bin/sh #+ # # .. program:: twindoi.sh # # ========== # twindoi.sh # ========== # # -------------------------- # detection of duplicate DOI # -------------------------- # # SYNOPSIS # ======== # # :: # # $ twindoi.sh -i filein -t type # # # DESCRIPTION # =========== # # # .. option:: -i # .. option:: -t # # detection of duplicate DOI # # EXAMPLES # ======== # # :: # # $ ./twindoi.sh -i data/biball.txt -t raw # # # :: # # $ ./twindoi.sh -i data/biball.xml -t xml # # TODO # ==== # # ++ option debug # # ++ the following command wich is not convinient # (xml vs txt) did not give any alert and check inside xml comments # :: # # $ ./twindoi.sh -i data/biball.xml -t raw # # EVOLUTIONS # ========== # # $Id$ # # - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin) # # * unset # # - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin) # # * usage of xml(starlet) for doi extraction in xml file # # - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin) # # * consolidation and homogeneisation # # - smasson 2007-06-20T16:11:47Z # # * creation # #- # system=$(uname) case "${system}" in AIX|IRIX64) echo " www : no specific posix checking" ;; *) set -o posix ;; esac unset system # command=$(basename ${0}) log_date=$(date -u +"%Y%m%dT%H%M%SZ") log=/tmp/$(basename ${command} .sh).log.${log_date} # usage=" Usage : ${command} -i filein -t type" # set +u while [ ! -z "${1}" ] do case ${1} in -i) filein=${2} shift ;; -t) type=${2} shift ;; *) # other choice echo "eee : unknown option ${1}" echo "${usage}" exit 1 ;; esac # next flag shift done unset usage # set -u # # check for filein if [ ! -f ${filein} ] then echo "eee : ${filein} not found" exit 1 fi # case ${type} in raw) # file like data/biball.txt fileraw=${filein} ;; xml) # file like data/biball.xml filexml=${filein} ;; *) echo "eee : type should be raw or xml" exit 1 ;; esac # case ${type} in raw) grep -i "doi:" ${fileraw} | \ sed -e "s/^.*doi: *//" | \ sed -e "s/^\(.*\)\.$/ \1/" | \ grep -v "???" | \ sort -d > /tmp/doilist.txt ;; xml) xml sel -N dbk="http://docbook.org/ns/docbook" \ -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ grep -v "???" | \ sort -d > /tmp/doilist.txt ;; *) echo "eee : error unknown file type" exit 1 ;; esac # nl=$( cat /tmp/doilist.txt | wc -l ) if [ ${nl} -eq 0 ] then echo "www : no DOI found in ${filein}" rm /tmp/doilist.txt 2> /dev/null exit 1 fi n=1 while [ ${n} -lt ${nl} ] do l1=$( head -${n} /tmp/doilist.txt | tail -1 ) l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 ) [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}" unset l1 unet l2 n=$(( ${n} + 1 )) done unset n # rm /tmp/doilist.txt 2> /dev/null exit 0