source: trunk/twindoi.sh

Last change on this file was 353, checked in by pinsard, 10 years ago

fix thanks to coding rules; typo

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 4.4 KB
Line 
1#! /bin/sh
2#+
3#
4# .. program:: twindoi.sh
5#
6# ==========
7# twindoi.sh
8# ==========
9#
10# SYNOPSIS
11# ========
12#
13# .. code-block:: bash
14#
15#    twindoi.sh -i filein -t type
16#
17# DESCRIPTION
18# ===========
19#
20# .. option:: -i <filein>
21# .. option:: -t <type>
22#
23# detection of duplicate DOI
24#
25# EXAMPLES
26# ========
27#
28# To detect duplicate DOI in a raw file:
29#
30# .. code-block:: bash
31#
32#    twindoi.sh -i data/biball.txt -t raw
33#
34# To detect duplicate DOI in a XML/DocBook file:
35#
36# .. code-block:: bash
37#
38#    twindoi.sh -i data/biball.xml -t xml
39#
40# To detect duplicate DOI in a bibtex file:
41#
42# .. code-block:: bash
43#
44#    twindoi.sh -i data/biball.xml -t bibtex
45#
46# TODO
47# ====
48#
49# option debug
50#
51# the following command which is not convenient
52# (xml vs txt) did not give any alert and check inside xml comments
53#
54# .. code-block:: bash
55#
56#    twindoi.sh -i data/biball.xml -t raw
57#
58# EVOLUTIONS
59# ==========
60#
61# $Id$
62#
63# - fplod 20131010T113730Z callisto.locean-ipsl.upmc.fr (Linux)
64#
65#   * dynamic xmlstarlet vs xml
66#
67# - fplod 20120521T080342Z cratos (Linux)
68#
69#   * rename type variable to ftype to avoid usage of a reserved word
70#   * revision of indentation
71#   * typo
72#   * add bibtex as file type
73#
74# - fplod 20100318T083708Z aedon.locean-ipsl.upmc.fr (Darwin)
75#
76#   * unset
77#
78# - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin)
79#
80#   * usage of xml(starlet) for doi extraction in xml file
81#
82# - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin)
83#
84#   * consolidation and homogenisation
85#
86# - smasson 2007-06-20T16:11:47Z
87#
88#   * creation
89#
90#-
91#
92system=$(uname)
93case "${system}" in
94    AIX|IRIX64)
95        echo "${command} : www : no specific posix checking"
96    ;;
97    *)
98        set -o posix
99    ;;
100esac
101unset system
102#
103set -u
104action=$(basename ${0} .sh)
105command=$(basename ${0})
106log_date=$(date -u +"%Y%m%dT%H%M%SZ")
107log=${PROJECT_LOG}/$(basename ${command} .sh).log.${log_date}
108#
109tool=${xmlcmd}
110type ${tool} 1> /dev/null 2>&1
111status=${?}
112if [ ${status} -ne 0 ]
113then
114    echo "${command} : eee : tool ${tool} not found"
115    exit 1
116fi
117unset tool
118unset status
119#
120usage=" Usage : ${command} -i filein -t type"
121#
122minargcount=4
123#echo " narg ${#}"
124if [ ${#} -lt ${minargcount} ]
125then
126    echo "${command} : eee : not enough arguments"
127    echo "${usage}"
128    exit 1
129fi
130unset minargcount
131#
132while [ ${#} -gt 0 ]
133do
134    case ${1} in
135        -i)
136            filein=${2}
137            shift
138        ;;
139        -t)
140            ftype=${2}
141            shift
142        ;;
143        *)
144            # other choice
145            echo "${command} : eee : unknown option ${1}"
146            echo "${usage}"
147            exit 1
148        ;;
149    esac
150    # next flag
151    shift
152done
153unset usage
154#
155# check for filein
156if [ ! -f ${filein} ]
157then
158    echo "${command} : eee : ${filein} not found"
159    exit 1
160fi
161#
162case ${ftype} in
163    raw) # file like data/biball.txt
164        fileraw=${filein}
165    ;;
166    xml) # file like data/biball.xml
167        filexml=${filein}
168    ;;
169    bibtex) # file like data/biball.bib
170        filebibtex=${filein}
171    ;;
172    *)
173       echo "${command} : eee : type should be raw, xml or bibtex"
174       exit 1
175    ;;
176esac
177unset filein
178#
179case ${ftype} in
180    raw)
181        grep -i "doi:" ${fileraw} | \
182        sed -e "s/^.*doi: *//" | \
183        sed -e "s/^\(.*\)\.$/ \1/" | \
184        grep -v "???" | \
185        sort -d > ${PROJECT_LOG}/${action}${$}.txt
186    ;;
187    bibtex)
188        grep -i "doi *= *" ${filebibtex} | \
189        sed -e "s/^.*doi *= *//" | \
190        sed -e "s/^\(.*\)\.$/ \1/" | \
191        grep -v "???" | \
192        sort -d > ${PROJECT_LOG}/${action}${$}.txt
193    ;;
194    xml)
195        ${xmlcmd} sel -N dbk="http://docbook.org/ns/docbook" \
196        -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \
197        grep  -v "???" | \
198        sort -d > ${PROJECT_LOG}/${action}${$}.txt
199    ;;
200    *)
201        echo "${command} : eee : error unknown file type ${ftype}"
202        exit 1
203    ;;
204esac
205unset ftype
206#
207nl=$( cat ${PROJECT_LOG}/${action}${$}.txt | wc -l )
208if [ ${nl} -eq 0 ]
209then
210    echo "${command} : www : no DOI found in ${filein}"
211    rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null
212    exit 1
213fi
214n=1
215while [ ${n} -lt ${nl} ]
216do
217    l1=$( head -${n} ${PROJECT_LOG}/${action}${$}.txt | tail -1 )
218    l2=$( head -$(( ${n} + 1 )) ${PROJECT_LOG}/${action}${$}.txt | tail -1 )
219    [ "${l1}" == "${l2}" ] && echo "${command} : eee : line ${n} : ${l1}"
220    unset l1
221    unset l2
222    n=$(( ${n} + 1 ))
223done
224unset n
225unset nl
226#
227rm ${PROJECT_LOG}/${action}${$}.txt 2> /dev/null
228unset command
229unset log
230unset log_date
231#
232#++set
233exit 0
Note: See TracBrowser for help on using the repository browser.