source: trunk/bibopa.sh @ 44

Last change on this file since 44 was 44, checked in by pinsard, 16 years ago

add bibtex intput (just beginning)

  • Property svn:keywords set to Id
File size: 13.1 KB
Line 
1#!/bin/bash
2#
3#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
4#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
5#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
6#    press.
7#
8#    donnerait
9#
10#    <biblioentry id="behara2004">
11#    <authorgroup>
12#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
13#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
14#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
15#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
16#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
17#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
18#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
19#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
20#    </authorgroup>
21#    <date>2004</date>
22#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
23#    Short Rains: A CGCM Study</title>
24#    <publishername>J. Climate</publishername>
25#    <biblioid class="doi">doi</bibliomisc>
26#    <bibliomisc role="pseudoref">In press.</bibliomisc>
27#    <bibliomisc role="internalref">from
28#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
29#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
30#    </biblioentry>
31#
32#
33# example :
34# $ ./bibopa.sh -i /Users/smasson/Bibopa/biball.txt -t raw
35# $ ./bibopa.sh -i data/mail2007-04-25T08:58:16Z.txt -t mailbody
36# $ ./bibopa.sh -i data/petitpoly.bib -t bibtex
37#
38# see also mailtouser.sh
39#
40# original location :
41# /usr/home/fplod/incas/bibnemo/src/bibnemomaf/bibopa.sh sur cerbere.locean-ipsl.upmc.fr
42#
43# update
44# ++ gestion des comments
45# ++ gestion des id existants (cf à la fin)
46# ++ option debug
47# $Id$
48# fplod 2008-03-11T11:25:27Z aedon.locean-ipsl.upmc.fr (Darwin)
49# add bibtex entry (not finished)++
50# fplod 2007-06-20T17:18:02Z aedon.locean-ipsl.upmc.fr (Darwin)
51# <bibliomisc role="id"> replace by <biblioid class="doi">
52# smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
53# Add journals
54# fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
55# dernières touches
56# fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
57# add a filein parameter and an option for mailbody
58# comments  (line begininig with #) are now possible
59# Sebastien Masson avril 2007 creation
60#
61
62rmbl () {
63    echo "${1}" | sed -e "s/^ *//" | sed -e "s/ *$//"
64}
65cleanname () {
66    echo "${1}" | sed -e "s/^ *//" \
67        -e "s/^ *,//" \
68        -e "s/^ *;//" \
69        -e "s/^ *\.//" \
70        -e "s/ *$//" \
71        -e "s/, *$//" \
72        -e "s/; *$//" \
73        -e "s/\. *$//"
74}
75#
76set -o posix
77command=$(basename ${0} .sh)
78log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
79log=/tmp/${command}.${log_date}
80#
81# test if xsltproc is available
82type xsltproc 1> /dev/null 2>&1
83status=${?}
84if [ ${status} -ne 0 ]
85then
86 echo " eee : xsltproc not found"
87 exit 1
88fi
89#
90usage=" Usage : ${command} -i filein -t type"
91#
92minargcount=4
93echo " narg ${#}"
94if [ ${#} -lt ${minargcount} ]
95then
96  echo "eee : not enought arguments"
97  echo "${usage}"
98  exit 1
99fi
100#
101while [ ! -z "${1}" ]
102do
103 case ${1} in
104 -i) # filein
105  filein=${2}
106  shift
107 ;;
108 -t) # type
109  type=${2}
110  shift
111 ;;
112 -h)
113  echo "${usage}"
114  exit 0
115 ;;
116 *) # other choice
117  echo "${usage}"
118  exit 1
119 ;;
120 esac
121 shift # next flag
122done
123#
124set -u
125#
126# check for filein
127if [ ! -f ${filein} ]
128then
129  echo "eee : ${filein} not found"
130  exit 1
131fi
132#
133case ${type} in
134raw) # file like data/biball.txt
135 fileraw=${filein}
136;;
137mailbody) # file like data/mail2007-04-25T08:58:16Z.txt
138 fileraw=/tmp/$(basename ${filein}).raw
139 echo "# from ${filein}" > ${fileraw}
140 echo "# ${log_date}" >> ${fileraw}
141 echo "# corrections" >> ${fileraw}
142 grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
143 sed -e "s/correction_.*=//">> ${fileraw}
144 echo "# new references" >> ${fileraw}
145 awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
146 #more ${fileraw} # ++ if debug
147 #read a #++ if debug
148;;
149bibtex) # file like data/petitpoly.bib
150 fileraw=/tmp/$(basename ${filein}).raw
151 echo "not yet implented. sorry."
152 exit 1
153;;
154*)
155   echo "eee : type should be raw, mailbody or bibtex"
156   exit 1
157;;
158esac
159#
160# check for output
161fileou=$( basename ${filein} .txt).xml
162if [ -f ${fileou} ]
163then
164   echo "eee : ${fileou} already exist"
165   #exit 1 # ++ if not debug
166fi
167#
168jlist[1]="J. Climate"
169jlist[2]="Journal of Climate"
170jlist[3]="Ocean Modelling"
171jlist[4]="Geophys. Res. Lett."
172jlist[5]="J. Geophys. Res."
173jlist[6]="Tellus A"
174jlist[7]="Tellus B"
175jlist[8]="J. Phys. Oceanogr."
176jlist[9]="Clim. Dyn."
177jlist[10]="Climate Dynamics"
178jlist[11]="Dyn. Atmos. Oceans"
179jlist[12]="Mon. Wea. Rev."
180jlist[13]="Global Biogeochem. Cycles"
181jlist[14]="Nonlinear Processes in Geophysics"
182jlist[15]="Ocean Science"
183jlist[16]="J. Mar. Systems"
184jlist[17]="J. Atmos. Sc."
185jlist[18]="Proc. Royal Soc."
186jlist[19]="Bull. Amer. Meteorol. Soc."
187jlist[20]="Ocean Dyn."
188jlist[21]="Geophysical Monograph Series"
189jlist[22]="Paleoceanography"
190jlist[23]="Annales Geophysicae"
191jlist[24]="Annals of Geophys."
192jlist[25]="Deep Sea Research Part II: Topical Studies in Oceanography"
193jlist[26]="Deep Sea Res. II"
194jlist[27]="Atmospheric Chemistry and Physics"
195jlist[28]="Atmospheric Chemistry and Physics Discussions"
196jlist[29]="Earth Plan. Sc. Lett."
197jlist[30]="J. Mar. Res."
198jlist[31]="PCMDI Report Series"
199jlist[32]="J. Mar. Syst."
200jlist[33]="Note du Pole de Modelisation"
201jlist[34]="Calculateurs Paralleles"
202jlist[35]="Note Scientifique du Pole de Modelisation"
203jlist[36]="Lecture Notes in Computer Science"
204jlist[37]="Prog. Oceanogr."
205jlist[38]="Deep Sea Research Part I: Oceanographic Research Papers"
206jlist[39]="Deep Sea Res. I"
207jlist[40]="IEEE Trans. Geosci. Remote Sensing"
208jlist[41]="ECMWF Tech. Memorandum"
209jlist[42]="J. Environ. Radioactivity"
210jlist[43]="Int. WOCE Newsletter"
211jlist[44]="C. R. Acad. Sci. Paris"
212jlist[45]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
213jlist[46]="In The mathematics of models for climatology and environment"
214jlist[47]="Oceanol. Acta"
215jlist[48]="The global atmosphere and ocean system"
216jlist[49]="NATO Advanced Study Institute"
217jlist[50]="WCRP"
218jlist[51]="The Courier"
219jlist[52]="Elsevier Oceanographic Series"
220jlist[53]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
221jlist[54]="In Science and engineering on Cray Supercomputers"
222jlist[55]="In Modeling the Earth's Climate and its Variability"
223jlist[56]="Fish. Oceanogr."
224jlist[57]="Q. J. R. Meteor. Soc."
225jlist[58]="In High performance computing in the geosciences"
226jlist[59]="Int. J. Numer. Meth. in Fluids"
227jlist[60]="Lecture notes in Physics"
228jlist[61]="J. Meterol. Soc. Japan"
229jlist[62]="Journal of Marine Systems"
230jlist[63]="Canadian Journal of Fisheries and Aquatic Sciences"
231jlist[64]="J. Atmos. Ocean. Tech."
232jlist[65]="EOS"
233jlist[66]="Nature"
234jlist[67]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
235jlist[68]="Journal of Atmospheric and Oceanic Technology"
236jlist[69]="Lecture Notes in Computational Science Engineering"
237jlist[70]="In Greenhouse Gas Control Technologies"
238jlist[71]="Chemical Geology"
239jlist[72]="Marine Geology"
240jlist[73]="Developments in Paleoenvironmental Research"
241jlist[74]="Science"
242jlist[75]="In Mediterranean Climate Variability"
243jlist[76]="Global and Planetary Change"
244jlist[77]="Physical Review Letters"
245#jlist[]=""
246#
247#
248    cat <<EOF > ${fileou}
249<?xml version='1.0' encoding='ISO-8859-1'?>
250<!DOCTYPE bibliography PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
251 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
252<bibliography id="bibrefnemo" lang="en">
253<bibliographyinfo>
254<date>$( date -u +"%Y-%m-%dT%H:%M:%SZ" )</date>
255</bibliographyinfo>
256EOF
257#
258# strip comments
259fileraw_strict=/tmp/$(basename ${fileraw})_strict
260grep -v "^#" ${fileraw} > ${fileraw_strict}
261totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
262l=1
263while [ ${l} -le ${totlines}  ]
264do
265# extract one line
266  line=$( sed -n ${l}p ${fileraw_strict} )
267  orgline=$( echo ${line} | sed -e "s/--/- -/g" )
268  line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
269# before the first :
270  tmp=${line%%:*}
271# before the last ,
272  auths=${tmp%,*},
273# supress and
274  auths=$( echo "${auths}" | sed -e "s/ and //g" )
275# after the last ,
276  year=${tmp##*,}
277  year=$( rmbl "${year}" )
278## first author before the first .,
279  first=${auths%%.,*}.
280# its firstname after the last ,
281  firstfn=${first##*,}
282  firstfn=$( rmbl "${firstfn}" )
283# its surname ; before the first ,
284  firstsn=${first%%,*}
285  firstsn=$( rmbl "${firstsn}" )
286## ref id
287  refid=$( echo ${firstsn} | tr "[:upper:]" "[:lower:]" | tr -s " " "_"  | tr -s "'" "_" | recode -d -f ISO-8859-1..flat )${year}
288  num=$( grep -c "<biblioentry id=\"${refid}_[0-9][0-9]\">" ${fileou} )
289  num=$(( ${num} + 1 ))
290  [ ${num} -le 9 ] && num=0${num}
291  refid=${refid}_${num}
292
293          cat <<EOF >> ${fileou}
294<biblioentry id="${refid}">
295  <!-- date
296  $( date -u +"%Y-%m-%dT%H:%M:%SZ" )
297  -->
298  <!-- original text
299  ${orgline}
300  -->
301  <authorgroup>
302    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
303EOF
304
305## other authors..
306  previous=${first},
307# next authors...
308  next=${auths##*${previous}}
309# while the next author is not empty
310  while [  "${next}" != "" ]
311    do
312# get the first next author; before the first ,
313    next=${next%%,*}
314# its surname ; after the last .
315    nextsn=${next##*.}
316    nextsn=$( rmbl "${nextsn}" )
317# its firstname ; before the last .
318    nextfn=${next%.*}.
319    nextfn=$( rmbl "${nextfn}" )
320#
321    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
322    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>"  #++debug
323    previous=${next},
324    next=${auths##*${previous}}
325
326  done
327  echo "  </authorgroup>"  >> ${fileou}
328
329# end of the line ; after the first :
330  endline=${line#*:}
331
332## find the journal
333  j=1
334  jfound=""
335  jlistsize=${#jlist[@]}
336  while [[ ${j} -le ${jlistsize} && "${jfound}" == "" ]]
337    do
338    ok=$( echo ${endline} | grep -ci "${jlist[j]} *," )
339    [ $ok -eq 1 ] && jfound="${jlist[j]}"
340    j=$(( ${j} + 1 ))
341  done
342  if [ "${jfound}" == "" ]
343      then
344      echo "eee: Journal not found "
345      echo "${endline}"
346      exit
347  fi
348## title
349# before the first :
350  title=${endline%%${jfound}*}
351  title=$( cleanname "${title}" )
352  echo "  <title>${title}</title>" >> ${fileou}
353## end
354## end of the line ; after the first ${jfound}
355  endline=${endline#*${jfound}}
356  endline=$( cleanname "${endline}" )
357## doi
358  endline=$( echo ${endline} | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
359  ok=$( echo ${endline} | grep -ic "doi:" )
360  if [ ${ok} -eq 1 ]
361      then
362      doi=${endline##*doi:}
363      echo "  <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou}
364      endline=${endline%doi:*}
365      endline=$( cleanname "${endline}" )
366  else
367      echo "non doi: ${line}"
368  fi
369  num=$( echo ${endline} |  tr -dc "," | wc -c )
370  case ${num} in
371      1)
372### echo ${num}: ${endline}
373          vol=${endline%,*}
374          vol=$( cleanname "${vol}" )
375          pag=${endline##*,}
376          pag=$( cleanname "${pag}" )
377          cat <<EOF >> ${fileou}
378  <biblioset relation="journal">
379    <title>${jfound}</title>
380    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
381    <pubdate>${year}</pubdate>
382  </biblioset>
383EOF
384      ;;
385      2)
386          vol=${endline%,*}
387          vol=$( cleanname "${vol}" )
388          iss=${vol##*,}
389          iss=$( cleanname "${iss}" )
390          vol=${vol%,*}
391          vol=$( cleanname "${vol}" )
392          pag=${endline##*,}
393          pag=$( cleanname "${pag}" )
394          cat <<EOF >> ${fileou}
395  <biblioset role="journal">
396    <title>${jfound}</title>
397    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
398    <pubdate>${year}</pubdate>
399  </biblioset>
400EOF
401      ;;
402      *)
403echo ${num}: ${endline}
404          cat <<EOF >> ${fileou}
405  <biblioset role="journal">
406    <title>${jfound}</title>
407    <pubdate>${year}</pubdate>
408    <bibliomisc>${endline}</bibliomisc>
409  </biblioset>
410EOF
411      ;;
412  esac
413
414          cat <<EOF >> ${fileou}
415</biblioentry>
416
417EOF
418
419
420
421
422
423  l=$(( ${l} + 1 ))
424
425done
426echo "</bibliography>" >> ${fileou}
427
428xsltproc \
429--output $( basename ${filein} .txt).html \
430http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl \
431${fileou}
432
433xmlto pdf ${fileou} 2> xml.err
434#
435# clean
436echo "iii : xml.err contains stderr from the following command "
437echo "iii : which was done just to check consistence of ${fileou}"
438echo "iii : xmlto pdf ${fileou}"
439rm -i xml.err
440case ${type} in
441raw)
442 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
443 rm -i ${fileraw_strict}
444;;
445mailbody)
446 echo "iii : ${fileraw} contains a copy of input file"
447 rm -i ${fileraw}
448 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
449 rm -i ${fileraw_strict}
450;;
451esac
452#
453echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
454echo " iii : you can add them in bibrefnemo.xml" #++ filename path
455echo " iii : modify date in bibrefnemo.xml" #++ filename path
456echo " iii : but please take care of id last part (after underscore) that may"
457echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
458#
459exit 0
Note: See TracBrowser for help on using the repository browser.