source: trunk/bibopa.sh @ 47

Last change on this file since 47 was 47, checked in by pinsard, 16 years ago

add -p and -l parameter to bibopa.sh; add bibtex input (continuation)

  • Property svn:keywords set to Id
File size: 18.0 KB
Line 
1#!/bin/bash
2#
3#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
4#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
5#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
6#    press.
7#
8#    donnerait
9#
10#    <biblioentry id="behara2004">
11#    <authorgroup>
12#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
13#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
14#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
15#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
16#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
17#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
18#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
19#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
20#    </authorgroup>
21#    <date>2004</date>
22#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
23#    Short Rains: A CGCM Study</title>
24#    <publishername>J. Climate</publishername>
25#    <biblioid class="doi">doi</biblioid>
26#    <bibliomisc role="pseudoref">In press.</bibliomisc>
27#    <bibliomisc role="internalref">from
28#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
29#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
30#    </biblioentry>
31#
32#   @book{MetReiCoh2004 ,
33#      author    = {Michael Metcalf and
34#              John Reid and Malcolm Cohen},
35#      title     = {Fortran 95/2003 explained},
36#      year      = {2004},
37#      publisher = {Oxford University Press},
38#      edition   = {Third},
39#      ISBN      = {0-19-852693-8},
40#      pages = {434}
41#   }
42# donnerait
43#    <biblioentry id="MetReiCoh2004">
44#    <authorgroup>
45#    <author><firstname>Michael</firstname> <surname>Metcalf</surname></author>
46#    <author><firstname>John</firstname> <surname>Reid</surname></author>
47#    <author><firstname>Malcolm</firstname> <surname>Cohen</surname></author>
48#    </authorgroup>
49#    <date>2004}</date>
50#    <title>Fortran 95/2003 explained</title>
51#    <publisher><publishername>Oxford University Press</publishername></publisher>
52#    <biblioid class="isbn">0-19-852693-8</biblioid>
53#    <pagenums>434</pagenums>
54#    <edition>Third</edition>
55#
56# example :
57# $ ./bibopa.sh -p birefnemo -i data/biball.txt -t raw
58# $ ./bibopa.sh -p bibrefnemo -i data/mail2007-04-25T08:58:16Z.txt -t mailbody
59# $ ./bibopa.sh -p polyfortran -i data/petitpolyfp.bib -t bibtex -l fr
60#
61# see also mailtouser.sh
62#
63# original location :
64# /usr/home/fplod/incas/bibnemo/src/bibnemomaf/bibopa.sh sur cerbere.locean-ipsl.upmc.fr
65#
66# update
67# ++ gestion des comments
68# ++ gestion des id existants (cf à la fin)
69# ++ option debug
70# $Id$
71# fplod 2008-03-12T16:11:07Z aedon.locean-ipsl.upmc.fr (Darwin)
72# add -p option (to overide bibrefnemo in file name and id)
73# add -l option by default en
74# fplod 2008-03-11T11:25:27Z aedon.locean-ipsl.upmc.fr (Darwin)
75# add bibtex entry (not finished)++
76# fplod 2007-06-20T17:18:02Z aedon.locean-ipsl.upmc.fr (Darwin)
77# <bibliomisc role="id"> replace by <biblioid class="doi">
78# smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
79# Add journals
80# fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
81# dernières touches
82# fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
83# add a filein parameter and an option for mailbody
84# comments  (line begininig with #) are now possible
85# Sebastien Masson avril 2007 creation
86#
87
88rmbl () {
89    echo "${1}" | sed -e "s/^ *//" | sed -e "s/ *$//"
90}
91cleanname () {
92    echo "${1}" | sed -e "s/^ *//" \
93        -e "s/^ *,//" \
94        -e "s/^ *;//" \
95        -e "s/^ *\.//" \
96        -e "s/ *$//" \
97        -e "s/, *$//" \
98        -e "s/; *$//" \
99        -e "s/\. *$//"
100}
101#
102set -o posix
103command=$(basename ${0} .sh)
104log_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
105log=/tmp/${command}.${log_date}
106#
107# test if xsltproc is available
108type xsltproc 1> /dev/null 2>&1
109status=${?}
110if [ ${status} -ne 0 ]
111then
112 echo " eee : xsltproc not found"
113 exit 1
114fi
115#
116usage=" Usage : ${command} -i filein -t type -p project -l lang"
117#
118# default
119lang=en
120#
121minargcount=6
122echo " narg ${#}"
123if [ ${#} -lt ${minargcount} ]
124then
125   echo "eee : not enought arguments"
126   echo "${usage}"
127   exit 1
128fi
129#
130while [ ! -z "${1}" ]
131do
132 case ${1} in
133 -i) # filein
134  filein=${2}
135  shift
136 ;;
137 -t) # type
138  type=${2}
139  shift
140 ;;
141 -p)
142  project=${2}
143  shift
144 ;;
145 -l)
146  lang=${2}
147  shift
148 ;;
149 -h)
150  echo "${usage}"
151  exit 0
152 ;;
153 *) # other choice
154  echo "${usage}"
155  exit 1
156 ;;
157 esac
158 shift # next flag
159done
160#
161set -u
162#
163# check for filein
164if [ ! -f ${filein} ]
165then
166   echo "eee : ${filein} not found"
167   exit 1
168fi
169#
170case ${type} in
171raw) # file like data/biball.txt
172 fileraw=${filein}
173;;
174mailbody) # file like data/mail2007-04-25T08:58:16Z.txt
175 fileraw=/tmp/$(basename ${filein}).raw
176 echo "# from ${filein}" > ${fileraw}
177 echo "# ${log_date}" >> ${fileraw}
178 echo "# corrections" >> ${fileraw}
179 grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
180 sed -e "s/correction_.*=//">> ${fileraw}
181 echo "# new references" >> ${fileraw}
182 awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
183 #more ${fileraw} # ++ if debug
184 #read a #++ if debug
185;;
186bibtex) # file like data/petitpoly.bib
187 fileraw=/tmp/$(basename ${filein}).raw
188 awk -f join_endcomma.awk ${filein} >> ${fileraw}
189 #more ${fileraw} # ++ if debug
190 #read a #++ if debug
191;;
192*)
193   echo "eee : type should be raw, mailbody or bibtex"
194   exit 1
195;;
196esac
197#
198# check for output
199fileou=$( basename ${filein} .txt).xml
200if [ -f ${fileou} ]
201then
202   echo "eee : ${fileou} already exist"
203   #exit 1 # ++ if not debug
204fi
205#
206jlist[1]="J. Climate"
207jlist[2]="Journal of Climate"
208jlist[3]="Ocean Modelling"
209jlist[4]="Geophys. Res. Lett."
210jlist[5]="J. Geophys. Res."
211jlist[6]="Tellus A"
212jlist[7]="Tellus B"
213jlist[8]="J. Phys. Oceanogr."
214jlist[9]="Clim. Dyn."
215jlist[10]="Climate Dynamics"
216jlist[11]="Dyn. Atmos. Oceans"
217jlist[12]="Mon. Wea. Rev."
218jlist[13]="Global Biogeochem. Cycles"
219jlist[14]="Nonlinear Processes in Geophysics"
220jlist[15]="Ocean Science"
221jlist[16]="J. Mar. Systems"
222jlist[17]="J. Atmos. Sc."
223jlist[18]="Proc. Royal Soc."
224jlist[19]="Bull. Amer. Meteorol. Soc."
225jlist[20]="Ocean Dyn."
226jlist[21]="Geophysical Monograph Series"
227jlist[22]="Paleoceanography"
228jlist[23]="Annales Geophysicae"
229jlist[24]="Annals of Geophys."
230jlist[25]="Deep Sea Research Part II: Topical Studies in Oceanography"
231jlist[26]="Deep Sea Res. II"
232jlist[27]="Atmospheric Chemistry and Physics"
233jlist[28]="Atmospheric Chemistry and Physics Discussions"
234jlist[29]="Earth Plan. Sc. Lett."
235jlist[30]="J. Mar. Res."
236jlist[31]="PCMDI Report Series"
237jlist[32]="J. Mar. Syst."
238jlist[33]="Note du Pole de Modelisation"
239jlist[34]="Calculateurs Paralleles"
240jlist[35]="Note Scientifique du Pole de Modelisation"
241jlist[36]="Lecture Notes in Computer Science"
242jlist[37]="Prog. Oceanogr."
243jlist[38]="Deep Sea Research Part I: Oceanographic Research Papers"
244jlist[39]="Deep Sea Res. I"
245jlist[40]="IEEE Trans. Geosci. Remote Sensing"
246jlist[41]="ECMWF Tech. Memorandum"
247jlist[42]="J. Environ. Radioactivity"
248jlist[43]="Int. WOCE Newsletter"
249jlist[44]="C. R. Acad. Sci. Paris"
250jlist[45]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
251jlist[46]="In The mathematics of models for climatology and environment"
252jlist[47]="Oceanol. Acta"
253jlist[48]="The global atmosphere and ocean system"
254jlist[49]="NATO Advanced Study Institute"
255jlist[50]="WCRP"
256jlist[51]="The Courier"
257jlist[52]="Elsevier Oceanographic Series"
258jlist[53]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
259jlist[54]="In Science and engineering on Cray Supercomputers"
260jlist[55]="In Modeling the Earth's Climate and its Variability"
261jlist[56]="Fish. Oceanogr."
262jlist[57]="Q. J. R. Meteor. Soc."
263jlist[58]="In High performance computing in the geosciences"
264jlist[59]="Int. J. Numer. Meth. in Fluids"
265jlist[60]="Lecture notes in Physics"
266jlist[61]="J. Meterol. Soc. Japan"
267jlist[62]="Journal of Marine Systems"
268jlist[63]="Canadian Journal of Fisheries and Aquatic Sciences"
269jlist[64]="J. Atmos. Ocean. Tech."
270jlist[65]="EOS"
271jlist[66]="Nature"
272jlist[67]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
273jlist[68]="Journal of Atmospheric and Oceanic Technology"
274jlist[69]="Lecture Notes in Computational Science Engineering"
275jlist[70]="In Greenhouse Gas Control Technologies"
276jlist[71]="Chemical Geology"
277jlist[72]="Marine Geology"
278jlist[73]="Developments in Paleoenvironmental Research"
279jlist[74]="Science"
280jlist[75]="In Mediterranean Climate Variability"
281jlist[76]="Global and Planetary Change"
282jlist[77]="Physical Review Letters"
283#jlist[]=""
284#
285#
286    cat <<EOF > ${fileou}
287<?xml version='1.0' encoding='ISO-8859-1'?>
288<!DOCTYPE bibliography PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
289 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
290<bibliography id="bib${project}ref" lang="${lang}">
291<bibliographyinfo>
292<date>$( date -u +"%Y-%m-%dT%H:%M:%SZ" )</date>
293</bibliographyinfo>
294EOF
295#
296# strip comments
297fileraw_strict=/tmp/$(basename ${fileraw})_strict
298grep -v "^#" ${fileraw} > ${fileraw_strict}
299totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
300l=1
301while [ ${l} -le ${totlines}  ]
302do
303# extract one line
304  line=$( sed -n ${l}p ${fileraw_strict} )
305  orgline=$( echo ${line} | sed -e "s/--/- -/g" )
306  line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
307  # detect if bibtex or not bibtex (starting with @)
308  if [ ${line:0:1} != "@" ]
309  then
310     # parsing non bibtex line
311     bibtex=0
312     # before the first :
313       tmp=${line%%:*}
314     # before the last ,
315       auths=${tmp%,*},
316     # supress and
317       auths=$( echo "${auths}" | sed -e "s/ and //g" )
318     # after the last ,
319       year=${tmp##*,}
320       year=$( rmbl "${year}" )
321     ## first author before the first .,
322       first=${auths%%.,*}.
323     # its firstname after the last ,
324       firstfn=${first##*,}
325       firstfn=$( rmbl "${firstfn}" )
326     # its surname ; before the first ,
327       firstsn=${first%%,*}
328       firstsn=$( rmbl "${firstsn}" )
329     ## ref id
330       refid=$( echo ${firstsn} | tr "[:upper:]" "[:lower:]" | tr -s " " "_"  | tr -s "'" "_" | recode -d -f ISO-8859-1..flat )${year}
331       num=$( grep -c "<biblioentry id=\"${refid}_[0-9][0-9]\">" ${fileou} )
332       num=$(( ${num} + 1 ))
333       [ ${num} -le 9 ] && num=0${num}
334       refid=${refid}_${num}
335   else
336      # parsing bibtex line
337      bibtex=1
338      # before the first ,
339      tmp=${line%%,*}
340      endline=${line#*,}
341      # refid is after { and before the first comma
342      refid=${tmp#*{}
343      # ++ test if refid already exist
344      # check if author field is there
345      echo ${endline} | grep -q "author"
346      hasauthor=${?}
347      if [ ${hasauthor} -eq 0 ] 
348      then
349         auths=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(author * = *{\)\(.*\)\(}\)/\2/")
350         echo "auths $auths" # genre  Michael Metcalf and John Reid and Malcolm Cohen
351      else
352        echo "pas d'auteurs" # ++
353      fi
354      endline=${endline#*,}
355      title=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(title * = *{\)\(.*\)\(}\)/\2/")
356      endline=${endline#*,}
357echo ${endline} | awk -F "," '{print $1}'
358      year=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(year * = *{\)\(.*\)\(}\)/\2/")
359      endline=${endline#*,}
360      publisher=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(publisher * = *{\)\(.*\)\(}\)/\2/")
361      endline=${endline#*,}
362      edition=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(edition * = *{\)\(.*\)\(}\)/\2/")
363      endline=${endline#*,}
364      isbn=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(isbn * = *{\)\(.*\)\(}\)/\2/")
365      endline=${endline#*,}
366      pag=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(page * = *{\)\(.*\)\(}\)/\2/")
367
368      if [ ${hasauthor} -eq 1 ] 
369      then 
370         ## first author before the first and
371         first=${auths%%and*}
372         # its surname after the last " "  ++ pas vrai double nom et pb blanc dans la syntaxe
373         # firstsn=${first##* } . ok en interactif . pas ok en script
374         firstsn=$( echo ${first}  | awk '{print $2}')
375         echo "firstsn $firstsn"
376         # its firstname before the first " "
377         firstfn=${first%% *}
378      fi
379   fi
380
381          cat <<EOF >> ${fileou}
382<biblioentry id="${refid}">
383  <!-- date
384  $( date -u +"%Y-%m-%dT%H:%M:%SZ" )
385  -->
386  <!-- original text
387  ${orgline}
388  -->
389EOF
390      if [ ${hasauthor} -eq 1 ] 
391      then
392          cat <<EOF >> ${fileou}
393  <authorgroup>
394    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
395EOF
396
397## other authors..
398  if [ ${bibtex} -eq 0 ]
399  then
400   previous=${first},
401   # next authors...
402   next=${auths##*${previous}}
403  fi
404  if [ ${bibtex} -eq 1 ]
405  then
406   previous=${first}"and "
407   # next authors...
408   echo "auths ${auths}"
409   next=${auths##*${previous}}
410   if [ "${next}and " = "${previous}" ]
411   then
412     next=""
413   fi
414  fi
415  echo "previous ${previous}"
416  echo "next ${next}"
417# while the next author is not empty
418  while [  "${next}" != "" ]
419    do
420      if [ ${bibtex} -eq 0 ]
421      then
422         # get the first next author; before the first ,
423         next=${next%%,*}
424         # its surname ; after the last .
425         nextsn=${next##*.}
426         nextsn=$( rmbl "${nextsn}" )
427         # its firstname ; before the last .
428         nextfn=${next%.*}.
429         nextfn=$( rmbl "${nextfn}" )
430       fi
431      if [ ${bibtex} -eq 1 ]
432      then
433         # get the first next author; before the first "and"
434         next=${next%%and *}
435         # its surname after the last " "  ++ pas vrai double nom et pb blanc dans la syntaxe
436         # nextsn=${next##* } . ok en interactif . pas ok en script
437         nextsn=$( echo ${next}  | awk '{print $2}')
438         echo "nextsn $nextsn"
439         # its nextname before the first " "
440         nextfn=${next%% *}
441      fi
442#
443    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
444    echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>"  #++debug
445    if [ ${bibtex} -eq 0 ]
446    then
447      previous=${next},
448      next=${auths##*${previous}}
449    fi
450    if [ ${bibtex} -eq 1 ]
451    then
452      previous=${next}"and "
453      next=${auths##*${previous}}
454    fi
455  done
456  echo "  </authorgroup>"  >> ${fileou}
457  fi
458
459# end of the line ; after the first :
460  endline=${line#*:}
461
462## find the journal
463  j=1
464  jfound=""
465  jlistsize=${#jlist[@]}
466  while [[ ${j} -le ${jlistsize} && "${jfound}" == "" ]]
467    do
468    ok=$( echo ${endline} | grep -ci "${jlist[j]} *," )
469    [ $ok -eq 1 ] && jfound="${jlist[j]}"
470    j=$(( ${j} + 1 ))
471  done
472  if [ "${jfound}" == "" ]
473      then
474      echo "eee: Journal not found "
475      echo "${endline}"
476      exit 1
477#+++ following lines are not yet validate
478      # it might be a book, a manual, a conference, etc. ++
479      echo "  <title>${title}</title>" >> ${fileou}
480      echo "  <biblioid class=\"isbn\">${isbn}</biblioid>" >> ${fileou}
481          cat <<EOF >> ${fileou}
482  <biblioset relation="nojournal">
483    <title>${title}</title>
484    <pagenums>${pag}</pagenums>
485    <pubdate>${year}</pubdate>
486  </biblioset>
487EOF
488          cat <<EOF >> ${fileou}
489</biblioentry>
490
491EOF
492  fi # end of if jfound empty (ie not an article)
493  if [ "${jfound}" != "" ]
494  then
495## title
496# before the first :
497  title=${endline%%${jfound}*}
498  title=$( cleanname "${title}" )
499  echo "  <title>${title}</title>" >> ${fileou}
500## end
501## end of the line ; after the first ${jfound}
502  endline=${endline#*${jfound}}
503  endline=$( cleanname "${endline}" )
504## doi
505  endline=$( echo ${endline} | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
506  ok=$( echo ${endline} | grep -ic "doi:" )
507  if [ ${ok} -eq 1 ]
508      then
509      doi=${endline##*doi:}
510      echo "  <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou}
511      endline=${endline%doi:*}
512      endline=$( cleanname "${endline}" )
513  else
514      echo "non doi: ${line}"
515  fi
516  num=$( echo ${endline} |  tr -dc "," | wc -c )
517  case ${num} in
518      1)
519### echo ${num}: ${endline}
520          vol=${endline%,*}
521          vol=$( cleanname "${vol}" )
522          pag=${endline##*,}
523          pag=$( cleanname "${pag}" )
524          cat <<EOF >> ${fileou}
525  <biblioset relation="journal">
526    <title>${jfound}</title>
527    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
528    <pubdate>${year}</pubdate>
529  </biblioset>
530EOF
531      ;;
532      2)
533          vol=${endline%,*}
534          vol=$( cleanname "${vol}" )
535          iss=${vol##*,}
536          iss=$( cleanname "${iss}" )
537          vol=${vol%,*}
538          vol=$( cleanname "${vol}" )
539          pag=${endline##*,}
540          pag=$( cleanname "${pag}" )
541          cat <<EOF >> ${fileou}
542  <biblioset role="journal">
543    <title>${jfound}</title>
544    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
545    <pubdate>${year}</pubdate>
546  </biblioset>
547EOF
548      ;;
549      *)
550echo ${num}: ${endline}
551          cat <<EOF >> ${fileou}
552  <biblioset role="journal">
553    <title>${jfound}</title>
554    <pubdate>${year}</pubdate>
555    <bibliomisc>${endline}</bibliomisc>
556  </biblioset>
557EOF
558      ;;
559  esac
560
561          cat <<EOF >> ${fileou}
562</biblioentry>
563
564EOF
565
566
567
568
569  fi # end of if jfound not empty
570  l=$(( ${l} + 1 ))
571
572done
573echo "</bibliography>" >> ${fileou}
574
575xsltproc \
576--output $( basename ${filein} .txt).html \
577http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl \
578${fileou}
579
580xmlto pdf ${fileou} 2> xml.err
581#
582# clean
583echo "iii : xml.err contains stderr from the following command "
584echo "iii : which was done just to check consistence of ${fileou}"
585echo "iii : xmlto pdf ${fileou}"
586rm -i xml.err
587case ${type} in
588raw)
589 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
590 rm -i ${fileraw_strict}
591;;
592mailbody)
593 echo "iii : ${fileraw} contains a copy of input file"
594 rm -i ${fileraw}
595 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
596 rm -i ${fileraw_strict}
597;;
598esac
599#
600echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
601echo " iii : you can add them in ${fileou}" #++ filename path
602echo " iii : modify date in ${fileou}" #++ filename path
603echo " iii : but please take care of id last part (after underscore) that may"
604echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
605#
606exit 0
Note: See TracBrowser for help on using the repository browser.