source: trunk/twindoi.sh @ 140

Last change on this file since 140 was 116, checked in by pinsard, 13 years ago

Consolidation of shell scripts

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 2.9 KB
Line 
1#! /bin/sh
2#+
3#
4# .. program:: twindoi.sh
5#
6# ==========
7# twindoi.sh
8# ==========
9#
10# --------------------------
11# detection of duplicate DOI
12# --------------------------
13#
14# SYNOPSIS
15# ========
16#
17# ::
18#
19#  $ twindoi.sh -i filein -t type
20#
21#
22# DESCRIPTION
23# ===========
24#
25#
26# .. option:: -i <filein>
27# .. option:: -t <type>
28#
29# detection of duplicate DOI
30#
31# EXAMPLES
32# ========
33#
34# ::
35#
36#  $ ./twindoi.sh -i data/biball.txt -t raw
37#
38#
39# ::
40#
41#  $ ./twindoi.sh -i data/biball.xml -t xml
42#
43# TODO
44# ====
45#
46# ++ option debug
47#
48# ++ the following command wich is not convinient
49# (xml vs txt) did not give any alert and check inside xml comments
50# ::
51#
52#  $ ./twindoi.sh -i data/biball.xml -t raw
53#
54# EVOLUTIONS
55# ==========
56#
57# $Id$
58#
59# - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin)
60#
61#   * unset
62#
63# - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin)
64#
65#   * usage of xml(starlet) for doi extraction in xml file
66#
67# - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin)
68#
69#   * consolidation and homogeneisation
70#
71# - smasson 2007-06-20T16:11:47Z
72#
73#   * creation
74#
75#-
76#
77system=$(uname)
78case "${system}" in
79   AIX|IRIX64)
80      echo " www : no specific posix checking"
81   ;;
82   *)
83      set -o posix
84   ;;
85esac
86unset system
87#
88set -u
89#
90command=$(basename ${0})
91log_date=$(date -u +"%Y%m%dT%H%M%SZ")
92log=/tmp/$(basename ${command} .sh).log.${log_date}
93#
94usage=" Usage : ${command} -i filein -t type"
95#
96while [ ${#} -gt 0 ]
97do
98   case ${1} in
99      -i)
100         filein=${2}
101         shift
102      ;;
103      -t)
104         type=${2}
105         shift
106      ;;
107      *)
108        # other choice
109        echo "eee : unknown option ${1}"
110        echo "${usage}"
111        exit 1
112      ;;
113   esac
114   # next flag
115   shift
116done
117unset usage
118#
119# check for filein
120if [ ! -f ${filein} ]
121then
122   echo "eee : ${filein} not found"
123   exit 1
124fi
125#
126case ${type} in
127   raw)  # file like data/biball.txt
128      fileraw=${filein}
129   ;;
130   xml)   # file like data/biball.xml
131      filexml=${filein}
132   ;;
133   *)
134      echo "eee : type should be raw or xml"
135      exit 1
136   ;;
137esac
138#
139case ${type} in
140   raw)
141      grep -i "doi:" ${fileraw} | \
142         sed -e "s/^.*doi: *//" | \
143         sed -e "s/^\(.*\)\.$/ \1/" | \
144         grep -v "???" | \
145         sort -d > /tmp/doilist.txt
146   ;;
147   xml)
148      xml sel -N dbk="http://docbook.org/ns/docbook" \
149         -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \
150         grep  -v "???" | \
151         sort -d > /tmp/doilist.txt
152   ;;
153   *)
154      echo "eee : error unknown file type"
155      exit 1
156   ;;
157esac
158#
159nl=$( cat /tmp/doilist.txt | wc -l )
160if [ ${nl} -eq 0 ]
161then
162   echo "www : no DOI found in ${filein}"
163   rm /tmp/doilist.txt 2> /dev/null
164   exit 1
165fi
166n=1
167while [ ${n} -lt ${nl} ]
168do
169   l1=$( head -${n} /tmp/doilist.txt | tail -1 )
170   l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 )
171   [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}"
172   unset l1
173   unet l2
174   n=$(( ${n} + 1 ))
175done
176unset n
177#
178rm /tmp/doilist.txt 2> /dev/null
179exit 0
Note: See TracBrowser for help on using the repository browser.