source: trunk/bibopa.sh @ 15

Last change on this file since 15 was 15, checked in by smasson, 15 years ago

Add journals

File size: 12.3 KB
Line 
1#!/bin/bash
2#
3#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
4#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
5#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
6#    press.
7#   
8#    donnerait
9#   
10#    <biblioentry id="behara2004">
11#    <authorgroup>
12#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
13#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
14#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
15#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
16#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
17#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
18#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
19#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
20#    </authorgroup>
21#    <date>2004</date>
22#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
23#    Short Rains: A CGCM Study</title>
24#    <publishername>J. Climate</publishername>
25#    <bibliomisc role="doi">doi</bibliomisc>
26#    <bibliomisc role="pseudoref">In press.</bibliomisc>
27#    <bibliomisc role="internalref">from
28#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
29#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
30#    </biblioentry>
31#   
32#
33# example :
34# $ ./bibopa.sh -i /Users/smasson/Bibopa/biball.txt -t raw
35# $ ./bibopa.sh -i data/mail2007-04-25T08:58:16Z.txt -t mailbody
36#
37# see also mailtousernemo.sh
38#
39# original location :
40# /usr/home/fplod/incas/bibnemo/src/bibnemomaf/bibopa.sh sur cerbere.locean-ipsl.upmc.fr
41#
42# update
43# ++ gestion des comments
44# ++ gestion des id existants (cf à la fin)
45# ++ option debug
46# smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
47# Add journals
48# fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
49# dernières touches
50# fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
51# add a filein parameter and an option for mailbody
52# comments  (line begininig with #) are now possible
53# Sebastien Masson avril 2007 creation
54#
55
56rmbl () {
57    echo "$1" | sed -e "s/^ *//" | sed -e "s/ *$//"
58}
59cleanname () {
60    echo "$1" | sed -e "s/^ *//" \
61        -e "s/^ *,//" \
62        -e "s/^ *;//" \
63        -e "s/^ *\.//" \
64        -e "s/ *$//" \
65        -e "s/, *$//" \
66        -e "s/; *$//" \
67        -e "s/\. *$//"
68}
69#
70set -o posix
71command=$(basename ${0} .sh)
72log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
73log=/tmp/${command}.${log_date}
74#
75# test if xsltproc is available
76type xsltproc 1> /dev/null 2>&1
77status=${?}
78if [ ${status} -ne 0 ]
79then
80 echo " eee : xsltproc not found"
81 exit 1
82fi
83#
84usage=" Usage : ${command} -i filein -t type"
85#
86while [ ! -z "${1}" ] # ++ pb bash
87do
88 case ${1} in
89 -i) # filein
90  filein=${2}
91  shift
92 ;;
93 -t) # type
94  type=${2}
95  shift
96 ;;
97 *) # other choice
98  echo "${usage}"
99  exit 1
100 ;;
101 esac
102 shift # next flag
103done
104#
105set -u
106#
107# check for filein
108if [ ! -f ${filein} ]
109then
110  echo "eee : ${filein} not found"
111  exit 1
112fi
113#
114case ${type} in
115raw) # file like data/biball.txt
116 fileraw=${filein}
117;;
118mailbody) # file like data/mail2007-04-25T08:58:16Z.txt
119 fileraw=/tmp/$(basename ${filein}).raw
120 echo "# from ${filein}" > ${fileraw}
121 echo "# ${log_date}" >> ${fileraw}
122 echo "# corrections" >> ${fileraw}
123 grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
124 sed -e "s/correction_.*=//">> ${fileraw}
125 echo "# new references" >> ${fileraw}
126 awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
127 #more ${fileraw} # ++ if debug
128 #read a #++ if debug
129;;
130*) 
131   echo "eee : type should be raw or mailbody"
132   exit 1
133;;
134esac
135#
136# check for output
137fileou=$( basename ${filein} .txt).xml
138if [ -f ${fileou} ]
139then
140   echo "eee : ${fileou} already exist"
141   #exit 1 # ++ if not debug
142fi
143#
144jlist[1]="J. Climate"
145jlist[2]="Journal of Climate"
146jlist[3]="Ocean Modelling"
147jlist[4]="Geophys. Res. Lett."
148jlist[5]="J. Geophys. Res."
149jlist[6]="Tellus A"
150jlist[7]="Tellus B"
151jlist[8]="J. Phys. Oceanogr."
152jlist[9]="Clim. Dyn."
153jlist[10]="Climate Dynamics"
154jlist[11]="Dyn. Atmos. Oceans"
155jlist[12]="Mon. Wea. Rev."
156jlist[13]="Global Biogeochem. Cycles"
157jlist[14]="Nonlinear Processes in Geophysics"
158jlist[15]="Ocean Science"
159jlist[16]="J. Mar. Systems"
160jlist[17]="J. Atmos. Sc."
161jlist[18]="Proc. Royal Soc."
162jlist[19]="Bull. Amer. Meteorol. Soc."
163jlist[20]="Ocean Dyn."
164jlist[21]="Geophysical Monograph Series"
165jlist[22]="Paleoceanography"
166jlist[23]="Annales Geophysicae"
167jlist[24]="Annals of Geophys."
168jlist[25]="Deep Sea Research Part II: Topical Studies in Oceanography"
169jlist[26]="Deep Sea Res. II"
170jlist[27]="Atmospheric Chemistry and Physics"
171jlist[28]="Atmospheric Chemistry and Physics Discussions"
172jlist[29]="Earth Plan. Sc. Lett."
173jlist[30]="J. Mar. Res."
174jlist[31]="PCMDI Report Series"
175jlist[32]="J. Mar. Syst."
176jlist[33]="Note du Pole de Modelisation"
177jlist[34]="Calculateurs Paralleles"
178jlist[35]="Note Scientifique du Pole de Modelisation"
179jlist[36]="Lecture Notes in Computer Science"
180jlist[37]="Prog. Oceanogr."
181jlist[38]="Deep Sea Research Part I: Oceanographic Research Papers"
182jlist[39]="Deep Sea Res. I"
183jlist[40]="IEEE Trans. Geosci. Remote Sensing"
184jlist[41]="ECMWF Tech. Memorandum"
185jlist[42]="J. Environ. Radioactivity"
186jlist[43]="Int. WOCE Newsletter"
187jlist[44]="C. R. Acad. Sci. Paris"
188jlist[45]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
189jlist[46]="In The mathematics of models for climatology and environment"
190jlist[47]="Oceanol. Acta"
191jlist[48]="The global atmosphere and ocean system"
192jlist[49]="NATO Advanced Study Institute"
193jlist[50]="WCRP"
194jlist[51]="The Courier"
195jlist[52]="Elsevier Oceanographic Series"
196jlist[53]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
197jlist[54]="In Science and engineering on Cray Supercomputers"
198jlist[55]="In Modeling the Earth's Climate and its Variability"
199jlist[56]="Fish. Oceanogr."
200jlist[57]="Q. J. R. Meteor. Soc."
201jlist[58]="In High performance computing in the geosciences"
202jlist[59]="Int. J. Numer. Meth. in Fluids"
203jlist[60]="Lecture notes in Physics"
204jlist[61]="J. Meterol. Soc. Japan"
205jlist[62]="Journal of Marine Systems"
206jlist[63]="Canadian Journal of Fisheries and Aquatic Sciences"
207jlist[64]="J. Atmos. Ocean. Tech."
208jlist[65]="EOS"
209jlist[66]="Nature"
210jlist[67]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
211jlist[68]="Journal of Atmospheric and Oceanic Technology"
212jlist[69]="Lecture Notes in Computational Science Engineering"
213jlist[70]="In Greenhouse Gas Control Technologies"
214jlist[71]="Chemical Geology"
215jlist[72]="Marine Geology"
216jlist[73]="Developments in Paleoenvironmental Research"
217jlist[74]="Science"
218jlist[75]="In Mediterranean Climate Variability"
219jlist[76]="Global and Planetary Change"
220jlist[77]="Physical Review Letters"
221#jlist[]=""
222#
223#
224    cat <<EOF > ${fileou}
225<?xml version='1.0' encoding='ISO-8859-1'?>
226<!DOCTYPE bibliography PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
227 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
228<bibliography id="bibrefnemo" lang="en">
229<bibliographyinfo>
230<date>$( date -u +"%Y-%m-%dT%H:%M:%SZ" )</date>
231</bibliographyinfo>
232EOF
233#
234# strip comments
235fileraw_strict=/tmp/$(basename ${fileraw})_strict
236grep -v "^#" ${fileraw} > ${fileraw_strict}
237totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
238l=1
239while [ $l -le $totlines  ]
240do
241# extract one line
242
243  line=$( sed -n ${l}p ${fileraw_strict} )
244  orgline=$( echo $line | sed -e "s/--/- -/g" )
245  line=$( echo $line | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
246# before the first :
247  tmp=${line%%:*}
248# before the last ,
249  auths=${tmp%,*},
250# supress and
251  auths=$( echo "$auths" | sed -e "s/ and //g" )
252# after the last ,
253  year=${tmp##*,}
254  year=$( rmbl "$year" )
255## first author before the first .,
256  first=${auths%%.,*}.
257# its firstname after the last ,
258  firstfn=${first##*,}
259  firstfn=$( rmbl "$firstfn" )
260# its surname ; before the first ,
261  firstsn=${first%%,*}
262  firstsn=$( rmbl "$firstsn" )
263## ref id
264  refid=$( echo $firstsn | tr "[:upper:]" "[:lower:]" | tr -s " " "_"  | tr -s "'" "_" | recode -d -f ISO-8859-1..flat )$year
265  num=$( grep -c "<biblioentry id=\"${refid}_[0-9][0-9]\">" $fileou )
266  num=$(( $num + 1 ))
267  [ $num -le 9 ] && num=0$num 
268  refid=${refid}_$num
269 
270          cat <<EOF >> $fileou
271<biblioentry id="${refid}">
272  <!-- date
273  $( date -u +"%Y-%m-%dT%H:%M:%SZ" )
274  -->
275  <!-- original text
276  $orgline
277  -->
278  <authorgroup>
279    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
280EOF
281 
282## other authors..
283  previous=${first},
284# next authors...
285  next=${auths##*${previous}}
286# while the next author is not empty
287  while [  "$next" != "" ]
288    do
289# get the first next author; before the first ,
290    next=${next%%,*}
291# its surname ; after the last .
292    nextsn=${next##*.}
293    nextsn=$( rmbl "$nextsn" )
294# its firstname ; before the last .
295    nextfn=${next%.*}.
296    nextfn=$( rmbl "$nextfn" )
297#
298    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
299    previous=${next},
300    next=${auths##*${previous}}
301   
302  done
303  echo "  </authorgroup>"  >> $fileou
304 
305# end of the line ; after the first :
306  endline=${line#*:}
307 
308## find the journal
309  j=1
310  jfound=""
311  jlistsize=${#jlist[@]}
312  while [[ $j -le $jlistsize && "${jfound}" == "" ]]
313    do
314    ok=$( echo $endline | grep -ci "${jlist[j]} *," ) 
315    [ $ok -eq 1 ] && jfound="${jlist[j]}"
316    j=$(( $j + 1 ))
317  done
318  if [ "$jfound" == "" ]
319      then
320      echo ERROR Journal not found
321      echo $endline
322      exit
323  fi
324## title
325# before the first :
326  title=${endline%%${jfound}*}
327  title=$( cleanname "$title" )
328  echo "  <title>${title}</title>" >> $fileou
329## end
330## end of the line ; after the first ${jfound}
331  endline=${endline#*${jfound}}
332  endline=$( cleanname "$endline" )
333## doi
334  endline=$( echo $endline | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
335  ok=$( echo $endline | grep -ic "doi:" )
336  if [ $ok -eq 1 ]
337      then
338      doi=${endline##*doi:}
339      echo "  <bibliomisc role=\"doi\">${doi}</bibliomisc>" >> $fileou
340      endline=${endline%doi:*}
341      endline=$( cleanname "$endline" )
342  else
343      echo non doi: $line
344  fi
345  num=$( echo $endline |  tr -dc "," | wc -c )
346  case $num in
347      1) 
348### echo $num: ${endline}
349          vol=${endline%,*}
350          vol=$( cleanname "$vol" )
351          pag=${endline##*,} 
352          pag=$( cleanname "$pag" )
353          cat <<EOF >> $fileou
354  <biblioset relation="journal">
355    <title>${jfound}</title>
356    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
357    <pubdate>${year}</pubdate>
358  </biblioset>
359EOF
360      ;;
361      2) 
362          vol=${endline%,*}
363          vol=$( cleanname "$vol" )
364          iss=${vol##*,} 
365          iss=$( cleanname "$iss" )
366          vol=${vol%,*}
367          vol=$( cleanname "$vol" )
368          pag=${endline##*,} 
369          pag=$( cleanname "$pag" )
370          cat <<EOF >> $fileou
371  <biblioset role="journal">
372    <title>${jfound}</title>
373    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
374    <pubdate>${year}</pubdate>
375  </biblioset>
376EOF
377      ;;
378      *)
379echo $num: ${endline}
380          cat <<EOF >> $fileou
381  <biblioset role="journal">
382    <title>${jfound}</title>
383    <pubdate>${year}</pubdate>
384    <bibliomisc>${endline}</bibliomisc>
385  </biblioset>
386EOF
387      ;;
388  esac
389 
390          cat <<EOF >> $fileou
391</biblioentry>
392 
393EOF
394
395
396
397
398 
399  l=$(( $l + 1 ))
400 
401done
402echo "</bibliography>" >> $fileou
403
404xsltproc \
405--output $( basename ${filein} .txt).html \
406http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl \
407${fileou}
408
409xmlto pdf ${fileou} 2> xml.err
410#
411# clean
412echo "iii : xml.err contains stderr from the following command "
413echo "iii : which was done just to check consistence of ${fileou}" 
414echo "iii : xmlto pdf ${fileou}"
415rm -i xml.err
416case ${type} in
417raw) 
418 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
419 rm -i ${fileraw_strict}
420;;
421mailbody)
422 echo "iii : ${fileraw} contains a copy of input file"
423 rm -i ${fileraw}
424 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
425 rm -i ${fileraw_strict}
426;;
427esac
428#
429echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
430echo " iii : you can add them in bibrefnemo.xml" #++ filename path
431echo " iii : modify date in bibrefnemo.xml" #++ filename path
432echo " iii : but please take care of id last part (after underscore) that may"
433echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
434#
435exit 0
Note: See TracBrowser for help on using the repository browser.