source: trunk/twindoi.sh @ 110

Last change on this file since 110 was 109, checked in by pinsard, 14 years ago

usage of program directive

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 3.0 KB
Line 
1#! /bin/sh
2#+
3#
4# .. program:: twindoi.sh
5#
6# ==========
7# twindoi.sh
8# ==========
9#
10# --------------------------
11# detection of duplicate DOI
12# --------------------------
13#
14# SYNOPSIS
15# ========
16#
17# ::
18#
19#  $ twindoi.sh -i filein -t type
20#
21#
22# DESCRIPTION
23# ===========
24#
25#
26# .. option:: -i <filein>
27# .. option:: -t <type>
28#
29# detection of duplicate DOI
30#
31# EXAMPLES
32# ========
33#
34# ::
35#
36#  $ ./twindoi.sh -i data/biball.txt -t raw
37#
38#
39# ::
40#
41#  $ ./twindoi.sh -i data/biball.xml -t xml
42#
43# TODO
44# ====
45#
46# ++ option debug
47#
48# ++ the following command wich is not convinient
49# (xml vs txt) did not give any alert and check inside xml comments
50# ::
51#
52#  $ ./twindoi.sh -i data/biball.xml -t raw
53#
54# EVOLUTIONS
55# ==========
56#
57# $Id$
58#
59# - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin)
60#
61#   * unset
62#
63# - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin)
64#
65#   * usage of xml(starlet) for doi extraction in xml file
66#
67# - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin)
68#
69#   * consolidation and homogeneisation
70#
71# - smasson 2007-06-20T16:11:47Z
72#
73#   * creation
74#
75#-
76#
77system=$(uname)
78case "${system}" in
79   AIX|IRIX64)
80      echo " www : no specific posix checking"
81   ;;
82   *)
83      set -o posix
84   ;;
85esac
86unset system
87#
88command=$(basename ${0})
89log_date=$(date -u +"%Y%m%dT%H%M%SZ")
90log=/tmp/$(basename ${command} .sh).log.${log_date}
91#
92usage=" Usage : ${command} -i filein -t type"
93#
94set +u
95while [ ! -z "${1}" ]
96do
97   case ${1} in
98      -i)
99         filein=${2}
100         shift
101      ;;
102      -t)
103         type=${2}
104         shift
105      ;;
106      *)
107        # other choice
108        echo "eee : unknown option ${1}"
109        echo "${usage}"
110        exit 1
111      ;;
112   esac
113   # next flag
114   shift
115done
116unset usage
117#
118set -u
119#
120# check for filein
121if [ ! -f ${filein} ]
122then
123   echo "eee : ${filein} not found"
124   exit 1
125fi
126#
127case ${type} in
128   raw)  # file like data/biball.txt
129      fileraw=${filein}
130   ;;
131   xml)   # file like data/biball.xml
132      filexml=${filein}
133   ;;
134   *)
135      echo "eee : type should be raw or xml"
136      exit 1
137   ;;
138esac
139#
140case ${type} in
141   raw)
142      grep -i "doi:" ${fileraw} | \
143         sed -e "s/^.*doi: *//" | \
144         sed -e "s/^\(.*\)\.$/ \1/" | \
145         grep -v "???" | \
146         sort -d > /tmp/doilist.txt
147   ;;
148   xml)
149      xml sel -N dbk="http://docbook.org/ns/docbook" \
150         -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \
151         grep  -v "???" | \
152         sort -d > /tmp/doilist.txt
153   ;;
154   *)
155      echo "eee : error unknown file type"
156      exit 1
157   ;;
158esac
159#
160nl=$( cat /tmp/doilist.txt | wc -l )
161if [ ${nl} -eq 0 ]
162then
163   echo "www : no DOI found in ${filein}"
164   rm /tmp/doilist.txt 2> /dev/null
165   exit 1
166fi
167n=1
168while [ ${n} -lt ${nl} ]
169do
170   l1=$( head -${n} /tmp/doilist.txt | tail -1 )
171   l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 )
172   [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}"
173   unset l1
174   unet l2
175   n=$(( ${n} + 1 ))
176done
177unset n
178#
179rm /tmp/doilist.txt 2> /dev/null
180exit 0
Note: See TracBrowser for help on using the repository browser.