source: trunk/bibopa.sh @ 21

Last change on this file since 21 was 21, checked in by pinsard, 17 years ago

change encoding of DOI in bibliography XML file. cf. ticket#14

File size: 12.5 KB
Line 
1#!/bin/bash
2#
3#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
4#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
5#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
6#    press.
7#   
8#    donnerait
9#   
10#    <biblioentry id="behara2004">
11#    <authorgroup>
12#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
13#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
14#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
15#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
16#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
17#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
18#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
19#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
20#    </authorgroup>
21#    <date>2004</date>
22#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
23#    Short Rains: A CGCM Study</title>
24#    <publishername>J. Climate</publishername>
25#    <biblioid class="doi">doi</bibliomisc>
26#    <bibliomisc role="pseudoref">In press.</bibliomisc>
27#    <bibliomisc role="internalref">from
28#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
29#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
30#    </biblioentry>
31#   
32#
33# example :
34# $ ./bibopa.sh -i /Users/smasson/Bibopa/biball.txt -t raw
35# $ ./bibopa.sh -i data/mail2007-04-25T08:58:16Z.txt -t mailbody
36#
37# see also mailtousernemo.sh
38#
39# original location :
40# /usr/home/fplod/incas/bibnemo/src/bibnemomaf/bibopa.sh sur cerbere.locean-ipsl.upmc.fr
41#
42# update
43# ++ gestion des comments
44# ++ gestion des id existants (cf à la fin)
45# ++ option debug
46# fplod 2007-06-20T17:18:02Z aedon.locean-ipsl.upmc.fr (Darwin)
47# <bibliomisc role="id"> replace by <biblioid class="doi">
48# smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
49# Add journals
50# fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
51# dernières touches
52# fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
53# add a filein parameter and an option for mailbody
54# comments  (line begininig with #) are now possible
55# Sebastien Masson avril 2007 creation
56#
57
58rmbl () {
59    echo "$1" | sed -e "s/^ *//" | sed -e "s/ *$//"
60}
61cleanname () {
62    echo "$1" | sed -e "s/^ *//" \
63        -e "s/^ *,//" \
64        -e "s/^ *;//" \
65        -e "s/^ *\.//" \
66        -e "s/ *$//" \
67        -e "s/, *$//" \
68        -e "s/; *$//" \
69        -e "s/\. *$//"
70}
71#
72set -o posix
73command=$(basename ${0} .sh)
74log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
75log=/tmp/${command}.${log_date}
76#
77# test if xsltproc is available
78type xsltproc 1> /dev/null 2>&1
79status=${?}
80if [ ${status} -ne 0 ]
81then
82 echo " eee : xsltproc not found"
83 exit 1
84fi
85#
86usage=" Usage : ${command} -i filein -t type"
87#
88while [ ! -z "${1}" ] # ++ pb bash
89do
90 case ${1} in
91 -i) # filein
92  filein=${2}
93  shift
94 ;;
95 -t) # type
96  type=${2}
97  shift
98 ;;
99 *) # other choice
100  echo "${usage}"
101  exit 1
102 ;;
103 esac
104 shift # next flag
105done
106#
107set -u
108#
109# check for filein
110if [ ! -f ${filein} ]
111then
112  echo "eee : ${filein} not found"
113  exit 1
114fi
115#
116case ${type} in
117raw) # file like data/biball.txt
118 fileraw=${filein}
119;;
120mailbody) # file like data/mail2007-04-25T08:58:16Z.txt
121 fileraw=/tmp/$(basename ${filein}).raw
122 echo "# from ${filein}" > ${fileraw}
123 echo "# ${log_date}" >> ${fileraw}
124 echo "# corrections" >> ${fileraw}
125 grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
126 sed -e "s/correction_.*=//">> ${fileraw}
127 echo "# new references" >> ${fileraw}
128 awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
129 #more ${fileraw} # ++ if debug
130 #read a #++ if debug
131;;
132*) 
133   echo "eee : type should be raw or mailbody"
134   exit 1
135;;
136esac
137#
138# check for output
139fileou=$( basename ${filein} .txt).xml
140if [ -f ${fileou} ]
141then
142   echo "eee : ${fileou} already exist"
143   #exit 1 # ++ if not debug
144fi
145#
146jlist[1]="J. Climate"
147jlist[2]="Journal of Climate"
148jlist[3]="Ocean Modelling"
149jlist[4]="Geophys. Res. Lett."
150jlist[5]="J. Geophys. Res."
151jlist[6]="Tellus A"
152jlist[7]="Tellus B"
153jlist[8]="J. Phys. Oceanogr."
154jlist[9]="Clim. Dyn."
155jlist[10]="Climate Dynamics"
156jlist[11]="Dyn. Atmos. Oceans"
157jlist[12]="Mon. Wea. Rev."
158jlist[13]="Global Biogeochem. Cycles"
159jlist[14]="Nonlinear Processes in Geophysics"
160jlist[15]="Ocean Science"
161jlist[16]="J. Mar. Systems"
162jlist[17]="J. Atmos. Sc."
163jlist[18]="Proc. Royal Soc."
164jlist[19]="Bull. Amer. Meteorol. Soc."
165jlist[20]="Ocean Dyn."
166jlist[21]="Geophysical Monograph Series"
167jlist[22]="Paleoceanography"
168jlist[23]="Annales Geophysicae"
169jlist[24]="Annals of Geophys."
170jlist[25]="Deep Sea Research Part II: Topical Studies in Oceanography"
171jlist[26]="Deep Sea Res. II"
172jlist[27]="Atmospheric Chemistry and Physics"
173jlist[28]="Atmospheric Chemistry and Physics Discussions"
174jlist[29]="Earth Plan. Sc. Lett."
175jlist[30]="J. Mar. Res."
176jlist[31]="PCMDI Report Series"
177jlist[32]="J. Mar. Syst."
178jlist[33]="Note du Pole de Modelisation"
179jlist[34]="Calculateurs Paralleles"
180jlist[35]="Note Scientifique du Pole de Modelisation"
181jlist[36]="Lecture Notes in Computer Science"
182jlist[37]="Prog. Oceanogr."
183jlist[38]="Deep Sea Research Part I: Oceanographic Research Papers"
184jlist[39]="Deep Sea Res. I"
185jlist[40]="IEEE Trans. Geosci. Remote Sensing"
186jlist[41]="ECMWF Tech. Memorandum"
187jlist[42]="J. Environ. Radioactivity"
188jlist[43]="Int. WOCE Newsletter"
189jlist[44]="C. R. Acad. Sci. Paris"
190jlist[45]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
191jlist[46]="In The mathematics of models for climatology and environment"
192jlist[47]="Oceanol. Acta"
193jlist[48]="The global atmosphere and ocean system"
194jlist[49]="NATO Advanced Study Institute"
195jlist[50]="WCRP"
196jlist[51]="The Courier"
197jlist[52]="Elsevier Oceanographic Series"
198jlist[53]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
199jlist[54]="In Science and engineering on Cray Supercomputers"
200jlist[55]="In Modeling the Earth's Climate and its Variability"
201jlist[56]="Fish. Oceanogr."
202jlist[57]="Q. J. R. Meteor. Soc."
203jlist[58]="In High performance computing in the geosciences"
204jlist[59]="Int. J. Numer. Meth. in Fluids"
205jlist[60]="Lecture notes in Physics"
206jlist[61]="J. Meterol. Soc. Japan"
207jlist[62]="Journal of Marine Systems"
208jlist[63]="Canadian Journal of Fisheries and Aquatic Sciences"
209jlist[64]="J. Atmos. Ocean. Tech."
210jlist[65]="EOS"
211jlist[66]="Nature"
212jlist[67]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
213jlist[68]="Journal of Atmospheric and Oceanic Technology"
214jlist[69]="Lecture Notes in Computational Science Engineering"
215jlist[70]="In Greenhouse Gas Control Technologies"
216jlist[71]="Chemical Geology"
217jlist[72]="Marine Geology"
218jlist[73]="Developments in Paleoenvironmental Research"
219jlist[74]="Science"
220jlist[75]="In Mediterranean Climate Variability"
221jlist[76]="Global and Planetary Change"
222jlist[77]="Physical Review Letters"
223#jlist[]=""
224#
225#
226    cat <<EOF > ${fileou}
227<?xml version='1.0' encoding='ISO-8859-1'?>
228<!DOCTYPE bibliography PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
229 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
230<bibliography id="bibrefnemo" lang="en">
231<bibliographyinfo>
232<date>$( date -u +"%Y-%m-%dT%H:%M:%SZ" )</date>
233</bibliographyinfo>
234EOF
235#
236# strip comments
237fileraw_strict=/tmp/$(basename ${fileraw})_strict
238grep -v "^#" ${fileraw} > ${fileraw_strict}
239totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
240l=1
241while [ ${l} -le ${totlines}  ]
242do
243# extract one line
244
245  line=$( sed -n ${l}p ${fileraw_strict} )
246  orgline=$( echo ${line} | sed -e "s/--/- -/g" )
247  line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
248# before the first :
249  tmp=${line%%:*}
250# before the last ,
251  auths=${tmp%,*},
252# supress and
253  auths=$( echo "${auths}" | sed -e "s/ and //g" )
254# after the last ,
255  year=${tmp##*,}
256  year=$( rmbl "${year}" )
257## first author before the first .,
258  first=${auths%%.,*}.
259# its firstname after the last ,
260  firstfn=${first##*,}
261  firstfn=$( rmbl "${firstfn}" )
262# its surname ; before the first ,
263  firstsn=${first%%,*}
264  firstsn=$( rmbl "${firstsn}" )
265## ref id
266  refid=$( echo ${firstsn} | tr "[:upper:]" "[:lower:]" | tr -s " " "_"  | tr -s "'" "_" | recode -d -f ISO-8859-1..flat )${year}
267  num=$( grep -c "<biblioentry id=\"${refid}_[0-9][0-9]\">" ${fileou} )
268  num=$(( ${num} + 1 ))
269  [ ${num} -le 9 ] && num=0${num} 
270  refid=${refid}_${num}
271 
272          cat <<EOF >> ${fileou}
273<biblioentry id="${refid}">
274  <!-- date
275  $( date -u +"%Y-%m-%dT%H:%M:%SZ" )
276  -->
277  <!-- original text
278  $orgline
279  -->
280  <authorgroup>
281    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
282EOF
283 
284## other authors..
285  previous=${first},
286# next authors...
287  next=${auths##*${previous}}
288# while the next author is not empty
289  while [  "${next}" != "" ]
290    do
291# get the first next author; before the first ,
292    next=${next%%,*}
293# its surname ; after the last .
294    nextsn=${next##*.}
295    nextsn=$( rmbl "${nextsn}" )
296# its firstname ; before the last .
297    nextfn=${next%.*}.
298    nextfn=$( rmbl "${nextfn}" )
299#
300    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
301    previous=${next},
302    next=${auths##*${previous}}
303   
304  done
305  echo "  </authorgroup>"  >> ${fileou}
306 
307# end of the line ; after the first :
308  endline=${line#*:}
309 
310## find the journal
311  j=1
312  jfound=""
313  jlistsize=${#jlist[@]}
314  while [[ $j -le $jlistsize && "${jfound}" == "" ]]
315    do
316    ok=$( echo ${endline} | grep -ci "${jlist[j]} *," ) 
317    [ $ok -eq 1 ] && jfound="${jlist[j]}"
318    j=$(( $j + 1 ))
319  done
320  if [ "$jfound" == "" ]
321      then
322      echo "ERROR Journal not found"
323      echo "${endline}"
324      exit
325  fi
326## title
327# before the first :
328  title=${endline%%${jfound}*}
329  title=$( cleanname "${title}" )
330  echo "  <title>${title}</title>" >> ${fileou}
331## end
332## end of the line ; after the first ${jfound}
333  endline=${endline#*${jfound}}
334  endline=$( cleanname "${endline}" )
335## doi
336  endline=$( echo ${endline} | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
337  ok=$( echo ${endline} | grep -ic "doi:" )
338  if [ $ok -eq 1 ]
339      then
340      doi=${endline##*doi:}
341      echo "  <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou}
342      endline=${endline%doi:*}
343      endline=$( cleanname "${endline}" )
344  else
345      echo "non doi: ${line}"
346  fi
347  num=$( echo ${endline} |  tr -dc "," | wc -c )
348  case ${num} in
349      1) 
350### echo ${num}: ${endline}
351          vol=${endline%,*}
352          vol=$( cleanname "${vol}" )
353          pag=${endline##*,} 
354          pag=$( cleanname "${pag}" )
355          cat <<EOF >> ${fileou}
356  <biblioset relation="journal">
357    <title>${jfound}</title>
358    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
359    <pubdate>${year}</pubdate>
360  </biblioset>
361EOF
362      ;;
363      2) 
364          vol=${endline%,*}
365          vol=$( cleanname "${vol}" )
366          iss=${vol##*,} 
367          iss=$( cleanname "${iss}" )
368          vol=${vol%,*}
369          vol=$( cleanname "${vol}" )
370          pag=${endline##*,} 
371          pag=$( cleanname "${pag}" )
372          cat <<EOF >> ${fileou}
373  <biblioset role="journal">
374    <title>${jfound}</title>
375    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
376    <pubdate>${year}</pubdate>
377  </biblioset>
378EOF
379      ;;
380      *)
381echo ${num}: ${endline}
382          cat <<EOF >> ${fileou}
383  <biblioset role="journal">
384    <title>${jfound}</title>
385    <pubdate>${year}</pubdate>
386    <bibliomisc>${endline}</bibliomisc>
387  </biblioset>
388EOF
389      ;;
390  esac
391 
392          cat <<EOF >> ${fileou}
393</biblioentry>
394 
395EOF
396
397
398
399
400 
401  l=$(( $l + 1 ))
402 
403done
404echo "</bibliography>" >> ${fileou}
405
406xsltproc \
407--output $( basename ${filein} .txt).html \
408http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl \
409${fileou}
410
411xmlto pdf ${fileou} 2> xml.err
412#
413# clean
414echo "iii : xml.err contains stderr from the following command "
415echo "iii : which was done just to check consistence of ${fileou}" 
416echo "iii : xmlto pdf ${fileou}"
417rm -i xml.err
418case ${type} in
419raw) 
420 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
421 rm -i ${fileraw_strict}
422;;
423mailbody)
424 echo "iii : ${fileraw} contains a copy of input file"
425 rm -i ${fileraw}
426 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
427 rm -i ${fileraw_strict}
428;;
429esac
430#
431echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
432echo " iii : you can add them in bibrefnemo.xml" #++ filename path
433echo " iii : modify date in bibrefnemo.xml" #++ filename path
434echo " iii : but please take care of id last part (after underscore) that may"
435echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
436#
437exit 0
Note: See TracBrowser for help on using the repository browser.