#! /bin/sh #+ # # .. program:: twindoi.sh # # ========== # twindoi.sh # ========== # # SYNOPSIS # ======== # # :: # # twindoi.sh -i filein -t type # # # DESCRIPTION # =========== # # # .. option:: -i # .. option:: -t # # detection of duplicate DOI # # EXAMPLES # ======== # # To detect duplicate DOI in a raw file:: # # twindoi.sh -i data/biball.txt -t raw # # # To detect duplicate DOI in a XML/DocBook file:: # # twindoi.sh -i data/biball.xml -t xml # # To detect duplicate DOI in a bibtex file:: # # twindoi.sh -i data/biball.xml -t bibtex # # TODO # ==== # # ++ option debug # # ++ the following command wich is not convinient # (xml vs txt) did not give any alert and check inside xml comments # :: # # twindoi.sh -i data/biball.xml -t raw # # EVOLUTIONS # ========== # # $Id$ # # - fplod 20131010T113730Z callisto.locean-ipsl.upmc.fr (Linux) # # * dynamic xmlstarlet vs xml # # - fplod 20120521T080342Z cratos (Linux) # # * rename type variable to ftype to avoid usage of a reserved word # * revision of indentation # * typo # * add bibtex as file type # # - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin) # # * unset # # - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin) # # * usage of xml(starlet) for doi extraction in xml file # # - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin) # # * consolidation and homogeneisation # # - smasson 2007-06-20T16:11:47Z # # * creation # #- # system=$(uname) case "${system}" in AIX|IRIX64) echo "${command} : www : no specific posix checking" ;; *) set -o posix ;; esac unset system # set -u action=$(basename ${0} .sh) command=$(basename ${0}) log_date=$(date -u +"%Y%m%dT%H%M%SZ") log=${PROJECT_LOG}/$(basename ${command} .sh).log.${log_date} # tool=${xmlcmd} type ${tool} 1> /dev/null 2>&1 status=${?} if [ ${status} -ne 0 ] then echo "${command} : eee : tool ${tool} not found" exit 1 fi unset tool unset status # usage=" Usage : ${command} -i filein -t type" # minargcount=4 #echo " narg ${#}" if [ ${#} -lt ${minargcount} ] then echo "${command} : eee : not enought arguments" echo "${usage}" exit 1 fi unset minargcount # while [ ${#} -gt 0 ] do case ${1} in -i) filein=${2} shift ;; -t) ftype=${2} shift ;; *) # other choice echo "${command} : eee : unknown option ${1}" echo "${usage}" exit 1 ;; esac # next flag shift done unset usage # # check for filein if [ ! -f ${filein} ] then echo "${command} : eee : ${filein} not found" exit 1 fi # case ${ftype} in raw) # file like data/biball.txt fileraw=${filein} ;; xml) # file like data/biball.xml filexml=${filein} ;; bibtex) # file like data/biball.bib filebibtex=${filein} ;; *) echo "${command} : eee : type should be raw, xml or bibtex" exit 1 ;; esac unset filein # case ${ftype} in raw) grep -i "doi:" ${fileraw} | \ sed -e "s/^.*doi: *//" | \ sed -e "s/^\(.*\)\.$/ \1/" | \ grep -v "???" | \ sort -d > ${PROJECT_LOG}/${action}${$}.txt ;; bibtex) grep -i "doi *= *" ${filebibtex} | \ sed -e "s/^.*doi *= *//" | \ sed -e "s/^\(.*\)\.$/ \1/" | \ grep -v "???" | \ sort -d > ${PROJECT_LOG}/${action}${$}.txt ;; xml) ${xmlcmd} sel -N dbk="http://docbook.org/ns/docbook" \ -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \ grep -v "???" | \ sort -d > ${PROJECT_LOG}/${action}${$}.txt ;; *) echo "${command} : eee : error unknown file type ${ftype}" exit 1 ;; esac unset ftype # nl=$( cat ${PROJECT_LOG}/${action}${$}.txt | wc -l ) if [ ${nl} -eq 0 ] then echo "${command} : www : no DOI found in ${filein}" rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null exit 1 fi n=1 while [ ${n} -lt ${nl} ] do l1=$( head -${n} ${PROJECT_LOG}/${action}${$}.txt | tail -1 ) l2=$( head -$(( ${n} + 1 )) ${PROJECT_LOG}/${action}${$}.txt | tail -1 ) [ "${l1}" == "${l2}" ] && echo "${command} : eee : line ${n} : ${l1}" unset l1 unset l2 n=$(( ${n} + 1 )) done unset n unset nl # rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null unset command unset log unset log_date # #++set exit 0