source: trunk/bibopa.sh @ 39

Last change on this file since 39 was 33, checked in by pinsard, 17 years ago

add Id in many headers; replace bibrefnemo.xml by biblio.xml

  • Property svn:keywords set to Id
File size: 12.8 KB
Line 
1#!/bin/bash
2#
3#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
4#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
5#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
6#    press.
7#
8#    donnerait
9#
10#    <biblioentry id="behara2004">
11#    <authorgroup>
12#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
13#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
14#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
15#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
16#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
17#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
18#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
19#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
20#    </authorgroup>
21#    <date>2004</date>
22#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
23#    Short Rains: A CGCM Study</title>
24#    <publishername>J. Climate</publishername>
25#    <biblioid class="doi">doi</bibliomisc>
26#    <bibliomisc role="pseudoref">In press.</bibliomisc>
27#    <bibliomisc role="internalref">from
28#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
29#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
30#    </biblioentry>
31#
32#
33# example :
34# $ ./bibopa.sh -i /Users/smasson/Bibopa/biball.txt -t raw
35# $ ./bibopa.sh -i data/mail2007-04-25T08:58:16Z.txt -t mailbody
36#
37# see also mailtouser.sh
38#
39# original location :
40# /usr/home/fplod/incas/bibnemo/src/bibnemomaf/bibopa.sh sur cerbere.locean-ipsl.upmc.fr
41#
42# update
43# ++ gestion des comments
44# ++ gestion des id existants (cf à la fin)
45# ++ option debug
46# $Id$
47# fplod 2007-06-20T17:18:02Z aedon.locean-ipsl.upmc.fr (Darwin)
48# <bibliomisc role="id"> replace by <biblioid class="doi">
49# smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
50# Add journals
51# fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
52# dernières touches
53# fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
54# add a filein parameter and an option for mailbody
55# comments  (line begininig with #) are now possible
56# Sebastien Masson avril 2007 creation
57#
58
59rmbl () {
60    echo "${1}" | sed -e "s/^ *//" | sed -e "s/ *$//"
61}
62cleanname () {
63    echo "${1}" | sed -e "s/^ *//" \
64        -e "s/^ *,//" \
65        -e "s/^ *;//" \
66        -e "s/^ *\.//" \
67        -e "s/ *$//" \
68        -e "s/, *$//" \
69        -e "s/; *$//" \
70        -e "s/\. *$//"
71}
72#
73set -o posix
74command=$(basename ${0} .sh)
75log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
76log=/tmp/${command}.${log_date}
77#
78# test if xsltproc is available
79type xsltproc 1> /dev/null 2>&1
80status=${?}
81if [ ${status} -ne 0 ]
82then
83 echo " eee : xsltproc not found"
84 exit 1
85fi
86#
87usage=" Usage : ${command} -i filein -t type"
88#
89minargcount=4
90echo " narg ${#}"
91if [ ${#} -lt ${minargcount} ]
92then
93  echo "eee : not enought arguments"
94  echo "${usage}"
95  exit 1
96fi
97#
98while [ ! -z "${1}" ]
99do
100 case ${1} in
101 -i) # filein
102  filein=${2}
103  shift
104 ;;
105 -t) # type
106  type=${2}
107  shift
108 ;;
109 -h)
110  echo "${usage}"
111  exit 0
112 ;;
113 *) # other choice
114  echo "${usage}"
115  exit 1
116 ;;
117 esac
118 shift # next flag
119done
120#
121set -u
122#
123# check for filein
124if [ ! -f ${filein} ]
125then
126  echo "eee : ${filein} not found"
127  exit 1
128fi
129#
130case ${type} in
131raw) # file like data/biball.txt
132 fileraw=${filein}
133;;
134mailbody) # file like data/mail2007-04-25T08:58:16Z.txt
135 fileraw=/tmp/$(basename ${filein}).raw
136 echo "# from ${filein}" > ${fileraw}
137 echo "# ${log_date}" >> ${fileraw}
138 echo "# corrections" >> ${fileraw}
139 grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
140 sed -e "s/correction_.*=//">> ${fileraw}
141 echo "# new references" >> ${fileraw}
142 awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
143 #more ${fileraw} # ++ if debug
144 #read a #++ if debug
145;;
146*)
147   echo "eee : type should be raw or mailbody"
148   exit 1
149;;
150esac
151#
152# check for output
153fileou=$( basename ${filein} .txt).xml
154if [ -f ${fileou} ]
155then
156   echo "eee : ${fileou} already exist"
157   #exit 1 # ++ if not debug
158fi
159#
160jlist[1]="J. Climate"
161jlist[2]="Journal of Climate"
162jlist[3]="Ocean Modelling"
163jlist[4]="Geophys. Res. Lett."
164jlist[5]="J. Geophys. Res."
165jlist[6]="Tellus A"
166jlist[7]="Tellus B"
167jlist[8]="J. Phys. Oceanogr."
168jlist[9]="Clim. Dyn."
169jlist[10]="Climate Dynamics"
170jlist[11]="Dyn. Atmos. Oceans"
171jlist[12]="Mon. Wea. Rev."
172jlist[13]="Global Biogeochem. Cycles"
173jlist[14]="Nonlinear Processes in Geophysics"
174jlist[15]="Ocean Science"
175jlist[16]="J. Mar. Systems"
176jlist[17]="J. Atmos. Sc."
177jlist[18]="Proc. Royal Soc."
178jlist[19]="Bull. Amer. Meteorol. Soc."
179jlist[20]="Ocean Dyn."
180jlist[21]="Geophysical Monograph Series"
181jlist[22]="Paleoceanography"
182jlist[23]="Annales Geophysicae"
183jlist[24]="Annals of Geophys."
184jlist[25]="Deep Sea Research Part II: Topical Studies in Oceanography"
185jlist[26]="Deep Sea Res. II"
186jlist[27]="Atmospheric Chemistry and Physics"
187jlist[28]="Atmospheric Chemistry and Physics Discussions"
188jlist[29]="Earth Plan. Sc. Lett."
189jlist[30]="J. Mar. Res."
190jlist[31]="PCMDI Report Series"
191jlist[32]="J. Mar. Syst."
192jlist[33]="Note du Pole de Modelisation"
193jlist[34]="Calculateurs Paralleles"
194jlist[35]="Note Scientifique du Pole de Modelisation"
195jlist[36]="Lecture Notes in Computer Science"
196jlist[37]="Prog. Oceanogr."
197jlist[38]="Deep Sea Research Part I: Oceanographic Research Papers"
198jlist[39]="Deep Sea Res. I"
199jlist[40]="IEEE Trans. Geosci. Remote Sensing"
200jlist[41]="ECMWF Tech. Memorandum"
201jlist[42]="J. Environ. Radioactivity"
202jlist[43]="Int. WOCE Newsletter"
203jlist[44]="C. R. Acad. Sci. Paris"
204jlist[45]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
205jlist[46]="In The mathematics of models for climatology and environment"
206jlist[47]="Oceanol. Acta"
207jlist[48]="The global atmosphere and ocean system"
208jlist[49]="NATO Advanced Study Institute"
209jlist[50]="WCRP"
210jlist[51]="The Courier"
211jlist[52]="Elsevier Oceanographic Series"
212jlist[53]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
213jlist[54]="In Science and engineering on Cray Supercomputers"
214jlist[55]="In Modeling the Earth's Climate and its Variability"
215jlist[56]="Fish. Oceanogr."
216jlist[57]="Q. J. R. Meteor. Soc."
217jlist[58]="In High performance computing in the geosciences"
218jlist[59]="Int. J. Numer. Meth. in Fluids"
219jlist[60]="Lecture notes in Physics"
220jlist[61]="J. Meterol. Soc. Japan"
221jlist[62]="Journal of Marine Systems"
222jlist[63]="Canadian Journal of Fisheries and Aquatic Sciences"
223jlist[64]="J. Atmos. Ocean. Tech."
224jlist[65]="EOS"
225jlist[66]="Nature"
226jlist[67]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
227jlist[68]="Journal of Atmospheric and Oceanic Technology"
228jlist[69]="Lecture Notes in Computational Science Engineering"
229jlist[70]="In Greenhouse Gas Control Technologies"
230jlist[71]="Chemical Geology"
231jlist[72]="Marine Geology"
232jlist[73]="Developments in Paleoenvironmental Research"
233jlist[74]="Science"
234jlist[75]="In Mediterranean Climate Variability"
235jlist[76]="Global and Planetary Change"
236jlist[77]="Physical Review Letters"
237#jlist[]=""
238#
239#
240    cat <<EOF > ${fileou}
241<?xml version='1.0' encoding='ISO-8859-1'?>
242<!DOCTYPE bibliography PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
243 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
244<bibliography id="bibrefnemo" lang="en">
245<bibliographyinfo>
246<date>$( date -u +"%Y-%m-%dT%H:%M:%SZ" )</date>
247</bibliographyinfo>
248EOF
249#
250# strip comments
251fileraw_strict=/tmp/$(basename ${fileraw})_strict
252grep -v "^#" ${fileraw} > ${fileraw_strict}
253totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
254l=1
255while [ ${l} -le ${totlines}  ]
256do
257# extract one line
258  line=$( sed -n ${l}p ${fileraw_strict} )
259  orgline=$( echo ${line} | sed -e "s/--/- -/g" )
260  line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
261# before the first :
262  tmp=${line%%:*}
263# before the last ,
264  auths=${tmp%,*},
265# supress and
266  auths=$( echo "${auths}" | sed -e "s/ and //g" )
267# after the last ,
268  year=${tmp##*,}
269  year=$( rmbl "${year}" )
270## first author before the first .,
271  first=${auths%%.,*}.
272# its firstname after the last ,
273  firstfn=${first##*,}
274  firstfn=$( rmbl "${firstfn}" )
275# its surname ; before the first ,
276  firstsn=${first%%,*}
277  firstsn=$( rmbl "${firstsn}" )
278## ref id
279  refid=$( echo ${firstsn} | tr "[:upper:]" "[:lower:]" | tr -s " " "_"  | tr -s "'" "_" | recode -d -f ISO-8859-1..flat )${year}
280  num=$( grep -c "<biblioentry id=\"${refid}_[0-9][0-9]\">" ${fileou} )
281  num=$(( ${num} + 1 ))
282  [ ${num} -le 9 ] && num=0${num}
283  refid=${refid}_${num}
284
285          cat <<EOF >> ${fileou}
286<biblioentry id="${refid}">
287  <!-- date
288  $( date -u +"%Y-%m-%dT%H:%M:%SZ" )
289  -->
290  <!-- original text
291  ${orgline}
292  -->
293  <authorgroup>
294    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
295EOF
296
297## other authors..
298  previous=${first},
299# next authors...
300  next=${auths##*${previous}}
301# while the next author is not empty
302  while [  "${next}" != "" ]
303    do
304# get the first next author; before the first ,
305    next=${next%%,*}
306# its surname ; after the last .
307    nextsn=${next##*.}
308    nextsn=$( rmbl "${nextsn}" )
309# its firstname ; before the last .
310    nextfn=${next%.*}.
311    nextfn=$( rmbl "${nextfn}" )
312#
313    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
314    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>"  #++debug
315    previous=${next},
316    next=${auths##*${previous}}
317
318  done
319  echo "  </authorgroup>"  >> ${fileou}
320
321# end of the line ; after the first :
322  endline=${line#*:}
323
324## find the journal
325  j=1
326  jfound=""
327  jlistsize=${#jlist[@]}
328  while [[ ${j} -le ${jlistsize} && "${jfound}" == "" ]]
329    do
330    ok=$( echo ${endline} | grep -ci "${jlist[j]} *," )
331    [ $ok -eq 1 ] && jfound="${jlist[j]}"
332    j=$(( ${j} + 1 ))
333  done
334  if [ "${jfound}" == "" ]
335      then
336      echo "eee: Journal not found "
337      echo "${endline}"
338      exit
339  fi
340## title
341# before the first :
342  title=${endline%%${jfound}*}
343  title=$( cleanname "${title}" )
344  echo "  <title>${title}</title>" >> ${fileou}
345## end
346## end of the line ; after the first ${jfound}
347  endline=${endline#*${jfound}}
348  endline=$( cleanname "${endline}" )
349## doi
350  endline=$( echo ${endline} | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
351  ok=$( echo ${endline} | grep -ic "doi:" )
352  if [ ${ok} -eq 1 ]
353      then
354      doi=${endline##*doi:}
355      echo "  <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou}
356      endline=${endline%doi:*}
357      endline=$( cleanname "${endline}" )
358  else
359      echo "non doi: ${line}"
360  fi
361  num=$( echo ${endline} |  tr -dc "," | wc -c )
362  case ${num} in
363      1)
364### echo ${num}: ${endline}
365          vol=${endline%,*}
366          vol=$( cleanname "${vol}" )
367          pag=${endline##*,}
368          pag=$( cleanname "${pag}" )
369          cat <<EOF >> ${fileou}
370  <biblioset relation="journal">
371    <title>${jfound}</title>
372    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
373    <pubdate>${year}</pubdate>
374  </biblioset>
375EOF
376      ;;
377      2)
378          vol=${endline%,*}
379          vol=$( cleanname "${vol}" )
380          iss=${vol##*,}
381          iss=$( cleanname "${iss}" )
382          vol=${vol%,*}
383          vol=$( cleanname "${vol}" )
384          pag=${endline##*,}
385          pag=$( cleanname "${pag}" )
386          cat <<EOF >> ${fileou}
387  <biblioset role="journal">
388    <title>${jfound}</title>
389    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
390    <pubdate>${year}</pubdate>
391  </biblioset>
392EOF
393      ;;
394      *)
395echo ${num}: ${endline}
396          cat <<EOF >> ${fileou}
397  <biblioset role="journal">
398    <title>${jfound}</title>
399    <pubdate>${year}</pubdate>
400    <bibliomisc>${endline}</bibliomisc>
401  </biblioset>
402EOF
403      ;;
404  esac
405
406          cat <<EOF >> ${fileou}
407</biblioentry>
408
409EOF
410
411
412
413
414
415  l=$(( ${l} + 1 ))
416
417done
418echo "</bibliography>" >> ${fileou}
419
420xsltproc \
421--output $( basename ${filein} .txt).html \
422http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl \
423${fileou}
424
425xmlto pdf ${fileou} 2> xml.err
426#
427# clean
428echo "iii : xml.err contains stderr from the following command "
429echo "iii : which was done just to check consistence of ${fileou}"
430echo "iii : xmlto pdf ${fileou}"
431rm -i xml.err
432case ${type} in
433raw)
434 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
435 rm -i ${fileraw_strict}
436;;
437mailbody)
438 echo "iii : ${fileraw} contains a copy of input file"
439 rm -i ${fileraw}
440 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
441 rm -i ${fileraw_strict}
442;;
443esac
444#
445echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
446echo " iii : you can add them in bibrefnemo.xml" #++ filename path
447echo " iii : modify date in bibrefnemo.xml" #++ filename path
448echo " iii : but please take care of id last part (after underscore) that may"
449echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
450#
451exit 0
Note: See TracBrowser for help on using the repository browser.