source: trunk/twindoi.sh @ 87

Last change on this file since 87 was 80, checked in by pinsard, 16 years ago

add journals and bug fixes in flat to docbook conversion tool

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 2.6 KB
Line 
1#! /bin/sh
2#+
3#
4# NAME
5# ====
6#
7# twindoi.sh - detection of duplicate DOI
8#
9# SYNOPSYS
10# ========
11#
12# ::
13#
14#  $ twindoi.sh -i filein -t type
15#
16#
17# DESCRIPTION
18# ===========
19#
20#
21# detection of duplicate DOI
22#
23# EXAMPLES
24# ========
25#
26# ::
27#
28#  $ ./twindoi.sh -i data/biball.txt -t raw
29#
30#
31# ::
32#
33#  $ ./twindoi.sh -i data/biball.xml -t xml
34#
35# FILES
36# =====
37#
38# original location
39# ~~~~~~~~~~~~~~~~~
40#
41# /usr/home/fplod/src/superbib_ws/twindoi.sh sur aedon.locean-ipsl.upmc.fr
42#
43#
44# EVOLUTIONS
45# ==========
46#
47# ++ option debug
48#
49# ++ the following command wich is not convinient
50# (xml vs txt) did not give any alert and check inside xml comments
51# ::
52#
53#  $ ./twindoi.sh -i data/biball.xml -t raw
54#
55#
56# $Id$
57#
58# - fplod 2008-05-05T14:26:31Z aedon.locean-ipsl.upmc.fr (Darwin)
59#
60#   * usage of xml(starlet) for doi extraction in xml file
61#
62# - fplod 2007-06-20T16:12:22Z aedon.locean-ipsl.upmc.fr (Darwin)
63#
64#   * consolidation and homogeneisation
65#
66# - smasson 2007-06-20T16:11:47Z
67#
68#   * creation
69#
70#-
71#
72system=$(uname)
73case "${system}" in
74 AIX|IRIX64)
75  echo " www : no specific posix checking"
76 ;;
77 *)
78  set -o posix
79 ;;
80esac
81command=$(basename ${0})
82log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
83log=/tmp/$(basename ${command} .sh).${log_date}
84#
85usage=" Usage : ${command} -i filein -t type"
86#
87while [ ! -z "${1}" ] # ++ pb bash
88do
89 case ${1} in
90 -i) # filein
91  filein=${2}
92  shift
93 ;;
94 -t) # type
95  type=${2}
96  shift
97 ;;
98 *) # other choice
99  echo "${usage}"
100  exit 1
101 ;;
102 esac
103 shift # next flag
104done
105set -u
106#
107# check for filein
108if [ ! -f ${filein} ]
109then
110  echo "eee : ${filein} not found"
111  exit 1
112fi
113#
114case ${type} in
115raw) # file like data/biball.txt
116 fileraw=${filein}
117;;
118xml)  # file like data/biball.xml
119 filexml=${filein}
120;;
121*)
122   echo "eee : type should be raw or xml"
123   exit 1
124;;
125esac
126#
127case ${type} in
128     raw)
129        grep -i "doi:" ${fileraw} | \
130        sed -e "s/^.*doi: *//" | \
131        sed -e "s/^\(.*\)\.$/ \1/" | \
132        grep -v "???" | \
133        sort -d > /tmp/doilist.txt
134        ;;
135     xml)
136        xml sel -N dbk="http://docbook.org/ns/docbook" \
137        -t -m "//dbk:biblioid[@class='doi']" -v . -n ${filexml} | \
138        grep -v "???" | \
139        sort -d > /tmp/doilist.txt
140        ;;
141     *)
142        echo "eee : error unknown file type"
143        exit 1
144        ;;
145esac
146#
147nl=$( cat /tmp/doilist.txt | wc -l )
148if [ ${nl} -eq 0 ]
149then
150   echo "www : no DOI found in ${filein}"
151   rm /tmp/doilist.txt 2> /dev/null
152   exit 1
153fi
154n=1
155while [ ${n} -lt ${nl} ]
156   do
157   l1=$( head -${n} /tmp/doilist.txt | tail -1 )
158   l2=$( head -$(( ${n} + 1 )) /tmp/doilist.txt | tail -1 )
159   [ "${l1}" == "${l2}" ] && echo "eee : line ${n} : ${l1}"
160n=$(( ${n} + 1 ))
161done
162#
163rm /tmp/doilist.txt 2> /dev/null
164exit 0
Note: See TracBrowser for help on using the repository browser.