source: trunk/twindoi.sh @ 110

Last change on this file since 110 was 109, checked in by pinsard, 14 years ago

usage of program directive

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 3.0 KB
RevLine 
[20]1#! /bin/sh
[75]2#+
[80]3#
[109]4# .. program:: twindoi.sh
[103]5#
[95]6# ==========
7# twindoi.sh
8# ==========
[75]9#
[95]10# --------------------------
11# detection of duplicate DOI
12# --------------------------
[75]13#
[93]14# SYNOPSIS
[75]15# ========
16#
17# ::
18#
19#  $ twindoi.sh -i filein -t type
20#
21#
22# DESCRIPTION
23# ===========
24#
25#
[108]26# .. option:: -i <filein>
27# .. option:: -t <type>
28#
[20]29# detection of duplicate DOI
30#
[75]31# EXAMPLES
32# ========
33#
34# ::
35#
36#  $ ./twindoi.sh -i data/biball.txt -t raw
37#
38#
39# ::
40#
41#  $ ./twindoi.sh -i data/biball.xml -t xml
42#
[95]43# TODO
44# ====
[75]45#
[20]46# ++ option debug
[75]47#
[20]48# ++ the following command wich is not convinient
[75]49# (xml vs txt) did not give any alert and check inside xml comments
50# ::
51#
52#  $ ./twindoi.sh -i data/biball.xml -t raw
53#
[95]54# EVOLUTIONS
55# ==========
[75]56#
[20]57# $Id$
58#
[100]59# - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin)
60#
61#   * unset
62#
[75]63# - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin)
[20]64#
[80]65#   * usage of xml(starlet) for doi extraction in xml file
[75]66#
67# - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin)
68#
[80]69#   * consolidation and homogeneisation
[75]70#
71# - smasson 2007-06-20T16:11:47Z
72#
[80]73#   * creation
74#
[75]75#-
76#
77system=$(uname)
78case "${system}" in
[100]79   AIX|IRIX64)
80      echo " www : no specific posix checking"
81   ;;
82   *)
83      set -o posix
84   ;;
[75]85esac
[100]86unset system
87#
[56]88command=$(basename ${0})
[93]89log_date=$(date -u +"%Y%m%dT%H%M%SZ")
90log=/tmp/$(basename ${command} .sh).log.${log_date}
[20]91#
92usage=" Usage : ${command} -i filein -t type"
93#
[102]94set +u
95while [ ! -z "${1}" ]
[20]96do
[100]97   case ${1} in
[101]98      -i)
[100]99         filein=${2}
100         shift
101      ;;
[101]102      -t)
[100]103         type=${2}
104         shift
105      ;;
[101]106      *)
107        # other choice
[100]108        echo "eee : unknown option ${1}"
109        echo "${usage}"
110        exit 1
111      ;;
112   esac
[101]113   # next flag
114   shift
[20]115done
[100]116unset usage
117#
[20]118set -u
119#
[23]120# check for filein
[20]121if [ ! -f ${filein} ]
122then
[100]123   echo "eee : ${filein} not found"
124   exit 1
[20]125fi
126#
127case ${type} in
[100]128   raw)  # file like data/biball.txt
129      fileraw=${filein}
130   ;;
131   xml)   # file like data/biball.xml
132      filexml=${filein}
133   ;;
134   *)
135      echo "eee : type should be raw or xml"
136      exit 1
137   ;;
[20]138esac
139#
140case ${type} in
[100]141   raw)
142      grep -i "doi:" ${fileraw} | \
143         sed -e "s/^.*doi: *//" | \
144         sed -e "s/^\(.*\)\.$/ \1/" | \
145         grep -v "???" | \
146         sort -d > /tmp/doilist.txt
147   ;;
148   xml)
149      xml sel -N dbk="http://docbook.org/ns/docbook" \
150         -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \
151         grep  -v "???" | \
152         sort -d > /tmp/doilist.txt
153   ;;
154   *)
155      echo "eee : error unknown file type"
156      exit 1
157   ;;
[20]158esac
159#
160nl=$( cat /tmp/doilist.txt | wc -l )
161if [ ${nl} -eq 0 ]
162then
[23]163   echo "www : no DOI found in ${filein}"
[20]164   rm /tmp/doilist.txt 2> /dev/null
165   exit 1
[23]166fi
[20]167n=1
168while [ ${n} -lt ${nl} ]
[100]169do
[20]170   l1=$( head -${n} /tmp/doilist.txt | tail -1 )
171   l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 )
172   [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}"
[100]173   unset l1
174   unet l2
175   n=$(( ${n} + 1 ))
[20]176done
[100]177unset n
[20]178#
179rm /tmp/doilist.txt 2> /dev/null
180exit 0
Note: See TracBrowser for help on using the repository browser.