source: trunk/bibopa.sh @ 102

Last change on this file since 102 was 102, checked in by pinsard, 14 years ago

improve shell scripts robustness

  • Property svn:keywords set to Id
File size: 33.5 KB
Line 
1#!/bin/bash
2#+
3#
4# =========
5# bibopa.sh
6# =========
7#
8# -------------------------------------------------
9# transform a bibliography file in DocBook 5 format
10# -------------------------------------------------
11#
12# SYNOPSIS
13# ========
14#
15# ::
16#
17#  $ bibopa.sh -i filein -t type -o order -p project -l lang
18#
19#
20# DESCRIPTION
21# ===========
22#
23#
24# Each entry in the same input file must follows the same order for authors
25# except the first one.
26#
27# With ``fs`` in option order you will correctly process raw entry with
28# firstname and surname in this order for authors except the first one.
29#
30# If raw entry uses surname and firstname order, you should use ``sf``.
31#
32# ::
33#
34#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
35#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
36#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
37#    press.
38#
39#
40# donnerait
41#
42# ::
43#
44#    <biblioentry xml:id="behara2004">
45#    <authorgroup>
46#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
47#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
48#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
49#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
50#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
51#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
52#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
53#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
54#    </authorgroup>
55#    <date>2004</date>
56#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
57#    Short Rains: A CGCM Study</title>
58#    <publishername>J. Climate</publishername>
59#    <biblioid class="doi">doi</biblioid>
60#    <bibliomisc role="pseudoref">In press.</bibliomisc>
61#    <bibliomisc role="internalref">from
62#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
63#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
64#    </biblioentry>
65#
66#
67# ::
68#
69#   @book{MetReiCoh2004 ,
70#      author    = {Michael Metcalf and
71#              John Reid and Malcolm Cohen},
72#      title     = {Fortran 95/2003 explained},
73#      year      = {2004},
74#      publisher = {Oxford University Press},
75#      edition   = {Third},
76#      ISBN      = {0-19-852693-8},
77#      pages = {434}
78#   }
79#
80#
81# donnerait
82#
83# ::
84#
85#    <biblioentry xml:id="MetReiCoh2004">
86#    <authorgroup>
87#    <author><firstname>Michael</firstname> <surname>Metcalf</surname></author>
88#    <author><firstname>John</firstname> <surname>Reid</surname></author>
89#    <author><firstname>Malcolm</firstname> <surname>Cohen</surname></author>
90#    </authorgroup>
91#    <date>2004}</date>
92#    <title>Fortran 95/2003 explained</title>
93#    <publisher><publishername>Oxford University Press</publishername></publisher>
94#    <biblioid class="isbn">0-19-852693-8</biblioid>
95#    <pagenums>434</pagenums>
96#    <edition>Third</edition>
97#
98#
99# EXAMPLES
100# ========
101#
102# To transform the NEMO bibliography file in raw format::
103#
104#  $ ./bibopa.sh -p birefnemo -i data/biball.txt -t raw
105#
106#
107# To transform a bibliography file in mailbody format::
108#
109#  $ ./bibopa.sh -p bibrefnemo -i data/mail2007-05-10T09:01:56Z -t mailbody
110#
111#
112# To transform a bibliography file in bibtex format in french::
113#
114#  $ ./bibopa.sh -p polyfortran -i data/petitpolyfp.bib -t bibtex -l fr
115#
116#
117# To transform the SUPERBIB demo bibliography file in raw format::
118#
119#  $ ./bibopa.sh -p demo1 -i data/bibdemo1.txt -t raw -l fr
120#
121#
122# SEE ALSO
123# ========
124#
125# mailtouser.sh_
126#
127# .. _mailtouser.sh : mailtouser.sh.html
128#
129# TODO
130# ====
131#
132# gestion des comments
133#
134# gestion des id existants (cf à la fin)
135#
136# option debug
137#
138# should use iconv instead of recode
139# for portability issue but not found yet the "flat" fonctionnality in
140# iconv
141#
142# write something in the logfile !
143#
144# EVOLUTIONS
145# ==========
146#
147# $Id$
148#
149# - fplod 2009-03-13T09:35:42Z aedon.locean-ipsl.upmc.fr (Darwin)
150#
151#   * add "High Performance Computing in Science and Engineering"
152#   * add "Annals of Glaciology"
153#   * add "Journal of Physical Oceanography"
154#
155# - fplod 2009-01-15T10:36:18Z aedon.locean-ipsl.upmc.fr (Darwin)
156#
157#   * add description of ``-o`` option
158#   * add journal "PNAS"
159#   * add journal "Lettre PIGB-PMRC"
160#
161# - fplod 2009-01-14T15:27:48Z aedon.locean-ipsl.upmc.fr (Darwin)
162#
163#   * add journal "Water Resources Research"
164#   * add journal "Climate Research"
165#   * add conf "Colloque du GIS « Climat-Environnement-Sociétés »"
166#   * add journal "Agricultural and Forest Meteorology"
167#   * add conf "VIIth IAHS Scientific Assembly"
168#   * add conf "AGU fall Meeting"
169#   * add journal "Phil. Trans. Roy. Soc. B"
170#   * add journal "Phil. Trans. Roy. Soc. B"
171#   * add conf "4th Alexander Von Humboldt International Conference"
172#   * add journal/conf "??" (Should not be used too much !)
173#   * add journal "Nature Geosciences"
174#   * add journal "International Journal of Climatology"
175#   * add journal "Journal of Hydrology"
176#   * add journal "Revue de l'Énergie"
177#   * add journal "Clim. Res."
178#   * add journal "Hydrol. Earth Sys. Sci."
179#   * add journal "Ambio"
180#   * add journal "Theoretical and applied climatology"
181#   * add journal "Remote Sensing of Environnement"
182#
183# - fplod 2008-10-28T11:40:19Z aedon.locean-ipsl.upmc.fr (Darwin)
184#
185#   * add journal "Progress In Oceanography"
186#   * add journal "Climatic Change"
187#   * add journal "Ocean Dynamics"
188#   * add journal "Phil. Trans. Roy. Soc. A"
189#   * add journal "Izvestiya Atmospheric and Oceanic Physics"
190#   * add journal "Journal of Hydrometeorology"
191#   * add journal "Remote Sensing of Environment"
192#   * add journal "Oceanology"
193#   * add journal "Water Resour. Res."
194#   * add journal "Advances in Atmospheric Sciences"
195#   * add journal "Quaternary Science Reviews"
196#   * replace "Elsevier Oceanography Series" by "Elsevier Oceanographic Series"
197#   * bug fix : unclosed date tag
198#   * trouble with flat conversion for id building ...
199#   * remove interactivity at then end (allowing redirection of stderr and
200#     stdout)
201#   * strip blank lines
202#   * bug fix : counting nb of occurences of "," technique ... to be improved
203#
204# - fplod 2008-09-16T15:19:59Z aedon.locean-ipsl.upmc.fr (Darwin)
205#
206#   * comments in ReStructured Text
207#
208# - fplod 2008-05-16T10:33:16Z aedon.locean-ipsl.upmc.fr (Darwin)
209#
210#   * add parameter -o for firstname/surname order in bibtex file
211#
212# - fplod 2008-05-15T15:15:04Z aedon.locean-ipsl.upmc.fr (Darwin)
213#
214#   * new way of processing bibtex file : external/bibtex2xml.py
215#
216# - fplod 2008-04-30T07:11:58Z aedon.locean-ipsl.upmc.fr (Darwin)
217#
218#   * chgt for dbk5 out
219#   * xmlto 0.0.18 does'nt like this docbook release. remove
220#   * usage of xml (xmlstarlet) for validation
221#   * bug fix
222#   * add journal names from demo1
223#   * fplod 2008-03-12T16:11:07Z aedon.locean-ipsl.upmc.fr (Darwin)
224#   * add -p option (to overide bibrefnemo in file name and id)
225#   * add -l option by default en
226#
227# - fplod 2008-03-11T11:25:27Z aedon.locean-ipsl.upmc.fr (Darwin)
228#
229#   * add bibtex entry (not finished)
230#
231# - fplod 2007-06-20T17:18:02Z aedon.locean-ipsl.upmc.fr (Darwin)
232#
233#   * <bibliomisc role="id"> replace by <biblioid class="doi">
234#
235# - smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
236#
237#   * Add journals
238#
239# - fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
240#
241#   * dernières touches
242#
243# - fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
244#
245#   * add a filein parameter and an option for mailbody
246#   * comments (line begininig with #) are now possible
247#
248# - Sebastien Masson avril 2007
249#
250#   * creation
251#
252#-
253
254rmbl () {
255   echo "${1}" | sed -e "s/^ *//" | sed -e "s/ *$//"
256}
257cleanname () {
258   echo "${1}" | sed -e "s/^ *//" \
259      -e "s/^ *,//" \
260      -e "s/^ *;//" \
261      -e "s/^ *\.//" \
262      -e "s/ *$//" \
263      -e "s/, *$//" \
264      -e "s/; *$//" \
265      -e "s/\. *$//"
266}
267#
268system=$(uname)
269case "${system}" in
270   AIX|IRIX64)
271      echo " www : no specific posix checking"
272   ;;
273   *)
274      set -o posix
275   ;;
276esac
277unset system
278#
279command=$(basename ${0})
280log_date=$(date -u +"%Y%m%dT%H%M%SZ")
281log=/tmp/$(basename ${command} .sh).log.${log_date}
282#
283# test if xsltproc is available
284tool=xsltproc
285type ${tool} 1> /dev/null 2>&1
286status=${?}
287if [ ${status} -ne 0 ]
288then
289   echo " eee : ${tool} not found"
290   exit 1
291fi
292unset status
293unset tool
294#
295# test if xml is available
296tool=xml
297type ${tool} 1> /dev/null 2>&1
298status=${?}
299if [ ${status} -ne 0 ]
300then
301   echo " eee : ${tool} not found"
302   exit 1
303fi
304unset status
305unset tool
306#
307# test if python is available
308tool=python
309type ${tool} 1> /dev/null 2>&1
310status=${?}
311if [ ${status} -ne 0 ]
312then
313   echo " eee : ${tool} not found"
314   exit 1
315fi
316unset status
317unset tool
318#
319# test if recode is available
320tool=recode
321type ${tool} 1> /dev/null 2>&1
322status=${?}
323if [ ${status} -ne 0 ]
324then
325   echo " eee : ${tool} not found"
326   exit 1
327fi
328unset status
329unset tool
330#
331usage=" Usage : ${command} -i filein -t type -o order -p project -l lang"
332#
333# default
334lang=en
335# fs mean "firstname surname"
336order="fs"
337#
338minargcount=6
339#echo " narg ${#}"
340if [ ${#} -lt ${minargcount} ]
341then
342   echo "eee : not enought arguments"
343   echo "${usage}"
344   exit 1
345fi
346unset minargcount
347#
348set +u
349while [ ! -z "${1}" ]
350do
351   case ${1} in
352      -i)
353         filein=${2}
354         shift
355      ;;
356      -t)
357         type=${2}
358         shift
359      ;;
360      -o)
361         # order of firstname surname in bibtex file
362         order=${2}
363         shift
364      ;;
365      -p)
366         project=${2}
367         shift
368      ;;
369      -l)
370         lang=${2}
371         shift
372      ;;
373      -h)
374         echo "${usage}"
375         exit 0
376      ;;
377      *)
378         # other choice
379         echo "eee : unknown option ${1}"
380         echo "${usage}"
381         exit 1
382      ;;
383   esac
384   # next flag
385   shift
386done
387unset usage
388#
389set -u
390#
391# check for filein
392if [ ! -f ${filein} ]
393then
394   echo "eee : ${filein} not found"
395   exit 1
396fi
397#
398case ${type} in
399   raw)
400      # file like data/biball.txt
401      fileraw=${filein}
402      fileou=$( basename ${filein} .txt).xml
403   ;;
404   mailbody)
405      # file like data/mail2007-04-25T08:58:16Z.txt
406      fileraw=/tmp/$(basename ${filein}).raw
407      fileou=$( basename ${filein} .txt).xml
408      echo "# from ${filein}" > ${fileraw}
409      echo "# ${log_date}" >> ${fileraw}
410      echo "# corrections" >> ${fileraw}
411      grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
412         sed -e "s/correction_.*=//">> ${fileraw}
413      echo "# new references" >> ${fileraw}
414      awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
415      #more ${fileraw} # ++ if debug
416      #read a #++ if debug
417   ;;
418   bibtex)
419      # file like data/petitpoly.bib
420      fileraw=/tmp/$(basename ${filein}).raw
421      awk -f join_endcomma.awk ${filein} > ${fileraw}
422      fileou=$( basename ${filein} .bib).xml
423   ;;
424   *)
425      echo "eee : type should be raw, mailbody or bibtex"
426      exit 1
427   ;;
428esac
429#
430# check for output
431if [ -f ${fileou} ]
432then
433   echo "eee : ${fileou} already exist"
434   exit 1
435fi
436#
437ij=1
438jlist[${ij}]="J. Climate"
439ij=$(( ${ij} + 1))
440jlist[${ij}]="Journal of Climate"
441ij=$(( ${ij} + 1))
442jlist[${ij}]="Ocean Modelling"
443ij=$(( ${ij} + 1))
444jlist[${ij}]="Geophys. Res. Lett."
445ij=$(( ${ij} + 1))
446jlist[${ij}]="J. Geophys. Res."
447ij=$(( ${ij} + 1))
448jlist[${ij}]="Tellus A"
449ij=$(( ${ij} + 1))
450jlist[${ij}]="Tellus B"
451ij=$(( ${ij} + 1))
452jlist[${ij}]="J. Phys. Oceanogr."
453ij=$(( ${ij} + 1))
454jlist[${ij}]="Clim. Dyn."
455ij=$(( ${ij} + 1))
456jlist[${ij}]="Clim. Res."
457ij=$(( ${ij} + 1))
458jlist[${ij}]="Climate Dynamics"
459ij=$(( ${ij} + 1))
460jlist[${ij}]="Dyn. Atmos. Oceans"
461ij=$(( ${ij} + 1))
462jlist[${ij}]="Mon. Wea. Rev."
463ij=$(( ${ij} + 1))
464jlist[${ij}]="Global Biogeochem. Cycles"
465ij=$(( ${ij} + 1))
466jlist[${ij}]="Nonlinear Processes in Geophysics"
467ij=$(( ${ij} + 1))
468jlist[${ij}]="Ocean Science"
469ij=$(( ${ij} + 1))
470jlist[${ij}]="J. Mar. Systems"
471ij=$(( ${ij} + 1))
472jlist[${ij}]="J. Atmos. Sc."
473ij=$(( ${ij} + 1))
474jlist[${ij}]="Proc. Royal Soc."
475ij=$(( ${ij} + 1))
476jlist[${ij}]="Bull. Amer. Meteorol. Soc."
477ij=$(( ${ij} + 1))
478jlist[${ij}]="Ocean Dyn."
479ij=$(( ${ij} + 1))
480jlist[${ij}]="Geophysical Monograph Series, Earth's Climate: The Ocean-Atmosphere Interaction"
481ij=$(( ${ij} + 1))
482jlist[${ij}]="Paleoceanography"
483ij=$(( ${ij} + 1))
484jlist[${ij}]="Annales Geophysicae"
485ij=$(( ${ij} + 1))
486jlist[${ij}]="Annals of Geophys."
487ij=$(( ${ij} + 1))
488jlist[${ij}]="Deep Sea Research Part II: Topical Studies in Oceanography"
489ij=$(( ${ij} + 1))
490jlist[${ij}]="Deep Sea Res. II"
491ij=$(( ${ij} + 1))
492jlist[${ij}]="Atmospheric Chemistry and Physics"
493ij=$(( ${ij} + 1))
494jlist[${ij}]="Atmospheric Chemistry and Physics Discussions"
495ij=$(( ${ij} + 1))
496jlist[${ij}]="Earth Plan. Sc. Lett."
497ij=$(( ${ij} + 1))
498jlist[${ij}]="J. Mar. Res."
499ij=$(( ${ij} + 1))
500jlist[${ij}]="PCMDI Report Series"
501ij=$(( ${ij} + 1))
502jlist[${ij}]="J. Mar. Syst."
503ij=$(( ${ij} + 1))
504jlist[${ij}]="Note du Pole de Modelisation"
505ij=$(( ${ij} + 1))
506jlist[${ij}]="Calculateurs Paralleles"
507ij=$(( ${ij} + 1))
508jlist[${ij}]="Note Scientifique du Pole de Modelisation"
509ij=$(( ${ij} + 1))
510jlist[${ij}]="Lecture Notes in Computer Science"
511ij=$(( ${ij} + 1))
512jlist[${ij}]="Prog. Oceanogr."
513ij=$(( ${ij} + 1))
514jlist[${ij}]="Deep Sea Research Part I: Oceanographic Research Papers"
515ij=$(( ${ij} + 1))
516jlist[${ij}]="Deep Sea Res. I"
517ij=$(( ${ij} + 1))
518jlist[${ij}]="IEEE Trans. Geosci. Remote Sensing"
519ij=$(( ${ij} + 1))
520jlist[${ij}]="ECMWF Tech. Memorandum"
521ij=$(( ${ij} + 1))
522jlist[${ij}]="J. Environ. Radioactivity"
523ij=$(( ${ij} + 1))
524jlist[${ij}]="Int. WOCE Newsletter"
525ij=$(( ${ij} + 1))
526jlist[${ij}]="C. R. Acad. Sci. Paris"
527ij=$(( ${ij} + 1))
528jlist[${ij}]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
529ij=$(( ${ij} + 1))
530jlist[${ij}]="In The mathematics of models for climatology and environment"
531ij=$(( ${ij} + 1))
532jlist[${ij}]="Oceanol. Acta"
533ij=$(( ${ij} + 1))
534jlist[${ij}]="The global atmosphere and ocean system"
535ij=$(( ${ij} + 1))
536jlist[${ij}]="NATO Advanced Study Institute"
537ij=$(( ${ij} + 1))
538jlist[${ij}]="WCRP"
539ij=$(( ${ij} + 1))
540jlist[${ij}]="The Courier"
541ij=$(( ${ij} + 1))
542jlist[${ij}]="Elsevier Oceanography Series"
543ij=$(( ${ij} + 1))
544jlist[${ij}]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
545ij=$(( ${ij} + 1))
546jlist[${ij}]="In Science and engineering on Cray Supercomputers"
547ij=$(( ${ij} + 1))
548jlist[${ij}]="In Modeling the Earth's Climate and its Variability"
549ij=$(( ${ij} + 1))
550jlist[${ij}]="Fish. Oceanogr."
551ij=$(( ${ij} + 1))
552jlist[${ij}]="Q. J. R. Meteor. Soc."
553ij=$(( ${ij} + 1))
554jlist[${ij}]="In High performance computing in the geosciences"
555ij=$(( ${ij} + 1))
556jlist[${ij}]="Int. J. Numer. Meth. in Fluids"
557ij=$(( ${ij} + 1))
558jlist[${ij}]="Lecture notes in Physics"
559ij=$(( ${ij} + 1))
560jlist[${ij}]="J. Meterol. Soc. Japan"
561ij=$(( ${ij} + 1))
562jlist[${ij}]="Journal of Marine Systems"
563ij=$(( ${ij} + 1))
564jlist[${ij}]="Canadian Journal of Fisheries and Aquatic Sciences"
565ij=$(( ${ij} + 1))
566jlist[${ij}]="J. Atmos. Ocean. Tech."
567ij=$(( ${ij} + 1))
568jlist[${ij}]="EOS"
569ij=$(( ${ij} + 1))
570jlist[${ij}]="Nature"
571ij=$(( ${ij} + 1))
572jlist[${ij}]="Nature Geosciences"
573ij=$(( ${ij} + 1))
574jlist[${ij}]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
575ij=$(( ${ij} + 1))
576jlist[${ij}]="Journal of Atmospheric and Oceanic Technology"
577ij=$(( ${ij} + 1))
578jlist[${ij}]="Lecture Notes in Computational Science Engineering"
579ij=$(( ${ij} + 1))
580jlist[${ij}]="In Greenhouse Gas Control Technologies"
581ij=$(( ${ij} + 1))
582jlist[${ij}]="Chemical Geology"
583ij=$(( ${ij} + 1))
584jlist[${ij}]="Marine Geology"
585ij=$(( ${ij} + 1))
586jlist[${ij}]="Developments in Paleoenvironmental Research"
587ij=$(( ${ij} + 1))
588jlist[${ij}]="Science"
589ij=$(( ${ij} + 1))
590jlist[${ij}]="In Mediterranean Climate Variability"
591ij=$(( ${ij} + 1))
592jlist[${ij}]="Global and Planetary Change"
593ij=$(( ${ij} + 1))
594jlist[${ij}]="Physical Review Letters"
595ij=$(( ${ij} + 1))
596jlist[${ij}]="Progress In Oceanography"
597ij=$(( ${ij} + 1))
598jlist[${ij}]="Climatic Change"
599ij=$(( ${ij} + 1))
600jlist[${ij}]="Ocean Dynamics"
601ij=$(( ${ij} + 1))
602jlist[${ij}]="Phil. Trans. Roy. Soc. A"
603ij=$(( ${ij} + 1))
604jlist[${ij}]="Phil. Trans. Roy. Soc. B"
605ij=$(( ${ij} + 1))
606jlist[${ij}]="Izvestiya Atmospheric and Oceanic Physics"
607ij=$(( ${ij} + 1))
608jlist[${ij}]="Journal of Hydrometeorology"
609ij=$(( ${ij} + 1))
610jlist[${ij}]="Journal of Hydrology"
611ij=$(( ${ij} + 1))
612jlist[${ij}]="Remote Sensing of Environment"
613ij=$(( ${ij} + 1))
614jlist[${ij}]="Oceanology"
615ij=$(( ${ij} + 1))
616jlist[${ij}]="Water Resour. Res."
617ij=$(( ${ij} + 1))
618jlist[${ij}]="Water Resources Research"
619ij=$(( ${ij} + 1))
620jlist[${ij}]="Advances in Atmospheric Sciences"
621ij=$(( ${ij} + 1))
622jlist[${ij}]="Quaternary Science Reviews"
623ij=$(( ${ij} + 1))
624jlist[${ij}]="AGU monograph Ocean Circulation Mechanisms and Impacts"
625ij=$(( ${ij} + 1))
626jlist[${ij}]="Climate Research"
627ij=$(( ${ij} + 1))
628jlist[$ij]="Colloque du GIS « Climat-Environnement-Sociétés »"
629ij=$(( ${ij} + 1))
630jlist[$ij]="Agricultural and Forest Meteorology"
631ij=$(( ${ij} + 1))
632jlist[$ij]="VIIth IAHS Scientific Assembly"
633ij=$(( ${ij} + 1))
634jlist[$ij]="AGU fall Meeting"
635ij=$(( ${ij} + 1))
636jlist[$ij]="4th Alexander Von Humboldt International Conference"
637ij=$(( ${ij} + 1))
638jlist[$ij]="International Journal of Climatology"
639ij=$(( ${ij} + 1))
640jlist[$ij]="Revue de l'Énergie"
641ij=$(( ${ij} + 1))
642jlist[$ij]="Hydrol. Earth Sys. Sci."
643ij=$(( ${ij} + 1))
644jlist[$ij]="Ambio"
645ij=$(( ${ij} + 1))
646jlist[$ij]="Theoretical and applied climatology"
647ij=$(( ${ij} + 1))
648jlist[$ij]="Remote Sensing of Environnement"
649ij=$(( ${ij} + 1))
650jlist[$ij]="PNAS"
651ij=$(( ${ij} + 1))
652jlist[$ij]="Lettre PIGB-PMRC"
653ij=$(( ${ij} + 1))
654jlist[$ij]="High Performance Computing in Science and Engineering"
655ij=$(( ${ij} + 1))
656jlist[$ij]="Annals of Glaciology"
657ij=$(( ${ij} + 1))
658jlist[$ij]="Journal of Physical Oceanography"
659ij=$(( ${ij} + 1))
660jlist[$ij]="??"
661#
662# following Journal name are fake ones for demo1 : references extracted from
663# Cantratrix Sopronica L. written by Georges Perec
664#find in http://www-rocq.inria.fr/who/Marc.Thiriet/PsFil/Biblio/cantatrix.ps
665ij=$(( ${ij} + 1))
666jlist[$ij]="Res. Proc. neurophysiol. Fanatic Soc."
667ij=$(( ${ij} + 1))
668jlist[$ij]="New Records Ass. J."
669ij=$(( ${ij} + 1))
670jlist[$ij]="Am. J. Allegrol."
671ij=$(( ${ij} + 1))
672jlist[$ij]="J. Neurochem. Neurocytol. Enzymol."
673ij=$(( ${ij} + 1))
674jlist[$ij]="Hammersmith J."
675ij=$(( ${ij} + 1))
676jlist[$ij]="Nasa Rept."
677ij=$(( ${ij} + 1))
678jlist[$ij]="C.r. Assoc. Conc. Lam."
679ij=$(( ${ij} + 1))
680jlist[$ij]="Gaz. med. franco-rus."
681ij=$(( ${ij} + 1))
682jlist[$ij]="Amer. J. music. Deciency"
683#
684# strip comments and blank lines
685fileraw_strict=/tmp/$(basename ${fileraw})_strict
686grep -v "^#" ${fileraw} | grep -v "^%" | sed "/^$/d" > ${fileraw_strict}
687# following line is not compatible with XML header ++
688# comments for memory (why did I write this line !?)
689# iconv -f ISO-8859-1 -t UTF-8 ${fileraw_strict} > ${fileraw_strict}2
690# mv ${fileraw_strict}2 ${fileraw_strict}
691case ${type} in
692   bibtex)
693      fileou_bibtexml=/tmp/$(basename ${fileraw})_bibtexml
694      python ./external/bibtex2xml.py ${fileraw_strict} > ${fileou_bibtexml}
695      #more  ${fileou_bibtexml}
696      #read a
697      xsltproc --output ${fileou} \
698         --param lang "'${lang}'" \
699         --param project "'${project}'" \
700         --param makedate "'${log_date}'" \
701         --param order "'${order}'" \
702         bibtexml2dbk.xsl ${fileou_bibtexml}
703      rm ${fileou_bibtexml}
704   ;;
705   *)
706      cat <<EOF > ${fileou}
707<?xml version='1.0' encoding='ISO-8859-1'?>
708<bibliography
709version="5.0"
710xmlns="http://docbook.org/ns/docbook"
711xml:id="bib${project}ref"
712xml:lang="${lang}">
713
714<info>
715<date>${log_date}</date>
716</info>
717EOF
718      totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
719      l=1
720      while [ ${l} -le ${totlines}  ]
721      do
722         # extract one line
723         line=$( sed -n ${l}p ${fileraw_strict} )
724         orgline=$( echo ${line} | sed -e "s/--/- -/g" )
725         line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
726         # parsing non bibtex line
727         bibtex=0
728         # before the first :
729         tmp=${line%%:*}
730         # before the last ,
731         auths=${tmp%,*},
732         # replace " and " by ","
733         auths=$( echo "${auths}" | sed -e "s/ and /,/g" )
734         # replace " et " by ","
735         auths=$( echo "${auths}" | sed -e "s/ et /,/g" )
736         # after the last ,
737         year=${tmp##*,}
738         year=$( rmbl "${year}" )
739         ## first author before the first .,
740         first=${auths%%.,*}.
741         # its firstname after the last ,
742         firstfn=${first##*,}
743         firstfn=$( rmbl "${firstfn}" )
744         # its surname ; before the first ,
745         firstsn=${first%%,*}
746         firstsn=$( rmbl "${firstsn}" )
747         ## ref id
748         firstsn_flat=$( echo "${firstsn}" | recode -d -f ISO-8859-15..flat )
749         # echo "firstsn : ${firstsn}" # ++ debug
750         # echo "firstsn_flat : ${firstsn_flat}" #++ debug
751         # test if no encoding problem after ISO-8859-15 to flat conversion
752         firstsn_test1=$(echo "${firstsn_flat}" | tr -d "[:alpha:]" | tr -d " " | tr -d "'" | tr -d "-")
753         #echo "firstsn_test1 : ${firstsn_test1}" # ++ debug
754         if [ ! -z "${firstsn_test1}" ]
755         then
756            echo "www : pb on ISO-8859-15 to flat conversion"
757            echo "www : on ${firstsn}"
758            echo "www : trying UTF-8 to flat conversion ..."
759            firstsn_flat=$( echo "${firstsn}" | recode -d -f UTF-8..flat )
760            # test if no encoding problem after UTF-8 to flat conversion
761            firstsn_test2=$(echo "${firstsn_flat}" | tr -d "[:alpha:]" | tr -d " " | tr -d "'")
762            #echo "firstsn_test2 : A${firstsn_test2}A" # ++ debug
763            if [ ! -z "${firstsn_test2}" ]
764            then
765               echo "eee : pb on flat conversion of ${firstsn}"
766               exit 1
767            fi
768         fi
769         firstsn_lower=$( echo "${firstsn_flat}" | tr "[:upper:]" "[:lower:]")
770         firstsn_nopunct=$( echo "${firstsn_lower}" | tr -s " " "_"  | tr -s "'" "_")
771         refid="${firstsn_nopunct}${year}"
772         unset firstsn_lower
773         unset firstsn_flat
774         unset firstsn_test1
775         unset firstsn_test2
776         num=$( grep -c "<biblioentry xml:id=\"${refid}_[0-9][0-9]\">" ${fileou} )
777         num=$(( ${num} + 1 ))
778         [ ${num} -le 9 ] && num=0${num}
779         refid=${refid}_${num}
780         hasauthor=1
781
782         cat <<EOF >> ${fileou}
783<biblioentry xml:id="${refid}">
784  <!-- date
785  $( date -u +"%Y%m%dT%H%M%SZ" )
786  -->
787  <!-- original text
788  ${orgline}
789  -->
790EOF
791         if [ ${hasauthor} -eq 1 ]
792         then
793            cat <<EOF >> ${fileou}
794  <authorgroup>
795    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
796EOF
797
798            ## other authors..
799            if [ ${bibtex} -eq 0 ]
800            then
801               previous=${first},
802               # next authors...
803               next=${auths##*${previous}}
804            fi
805            if [ ${bibtex} -eq 1 ]
806            then
807               previous=${first}"and "
808               # next authors...
809               #echo "auths ${auths}"
810               next=${auths##*${previous}}
811               if [ "${next}and " = "${previous}" ]
812               then
813                  next=""
814               fi
815            fi
816            #echo "previous ${previous}"
817            #echo "next ${next}"
818            # while the next author is not empty
819            while [  "${next}" != "" ]
820            do
821               if [ ${bibtex} -eq 0 ]
822               then
823                  # get the first next author; before the first ,
824                  next=${next%%,*}
825                  # its surname ; after the last .
826                  nextsn=${next##*.}
827                  nextsn=$( rmbl "${nextsn}" )
828                  # its firstname ; before the last .
829                  nextfn=${next%.*}.
830                  nextfn=$( rmbl "${nextfn}" )
831               fi
832               if [ ${bibtex} -eq 1 ]
833               then
834                  # get the first next author; before the first "and"
835                  next=${next%%and *}
836                  # its surname after the last " "  ++ pas vrai double nom et pb blanc dans la syntaxe
837                  # nextsn=${next##* } . ok en interactif . pas ok en script
838                  nextsn=$( echo ${next}  | awk '{print $2}')
839                  #echo "nextsn $nextsn"
840                  # its nextname before the first " "
841                  nextfn=${next%% *}
842               fi
843    #
844               echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
845               #echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>"  #++debug
846               if [ ${bibtex} -eq 0 ]
847               then
848                  previous=${next},
849                  next=${auths##*${previous}}
850               fi
851               if [ ${bibtex} -eq 1 ]
852               then
853                  previous=${next}"and "
854                  next=${auths##*${previous}}
855               fi
856            done
857            echo "  </authorgroup>"  >> ${fileou}
858         fi
859         # end of the line ; after the first :
860         endline=${line#*:}
861         ## find the journal
862         j=1
863         jfound=""
864         jlistsize=${#jlist[@]}
865         while [[ ${j} -le ${jlistsize} && "${jfound}" == "" ]]
866         do
867            ok=$( echo ${endline} | grep -ci "${jlist[j]} *," )
868            [ ${ok} -eq 1 ] && jfound="${jlist[j]}"
869            j=$(( ${j} + 1 ))
870         done
871         unset ok
872         if [ "${jfound}" == "" ]
873         then
874            echo "eee: Journal not found "
875            echo "${endline}"
876            # uncomment next line for debug
877            # set # ++ debug
878            exit 1
879            #+++ following lines are not yet validate
880            # it might be a book, a manual, a conference, etc. ++
881            echo "  <title>${title}</title>" >> ${fileou}
882            cat <<EOF >> ${fileou}
883  <biblioset relation="nojournal">
884    <title>${title}</title>
885    <pagenums>${pag}</pagenums>
886    <pubdate>${year}</pubdate>
887  </biblioset>
888EOF
889            cat <<EOF >> ${fileou}
890</biblioentry>
891
892EOF
893            # end of if jfound empty (ie not an article)
894         fi
895         if [ "${jfound}" != "" ]
896         then
897            ## title
898            # before the first :
899            title=${endline%%${jfound}*}
900            title=$( cleanname "${title}" )
901            echo "  <title>${title}</title>" >> ${fileou}
902            ## end
903            ## end of the line ; after the first ${jfound}
904            endline=${endline#*${jfound}}
905            endline=$( cleanname "${endline}" )
906            ## doi
907            endline=$( echo ${endline} | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
908            #echo "endline for doi ${endline}"
909            ok=$( echo ${endline} | grep -ic "doi:" )
910            if [ ${ok} -eq 1 ]
911            then
912               doi=${endline##*doi:}
913               #echo "doi : $doi"
914               echo "  <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou}
915               unset doi
916               endline=${endline%doi:*}
917               endline=$( cleanname "${endline}" )
918            else
919               echo "non doi: ${line}"
920               echo ""
921            fi
922            num=$( echo "${endline}" |  tr -dc "," | wc -c | tr -d " ")
923            case "${num}" in
924               0)
925                  ## echo ${num}: ${endline}
926                  cat <<EOF >> ${fileou}
927  <biblioset relation="journal">
928    <title>${jfound}</title>
929    <pubdate>${year}</pubdate>
930    <bibliomisc>${endline}</bibliomisc>
931  </biblioset>
932EOF
933                  unset jfound
934                  unset year
935               ;;
936               1)
937                  ### echo ${num}: ${endline}
938                  vol=${endline%,*}
939                  vol=$( cleanname "${vol}" )
940                  pag=${endline##*,}
941                  pag=$( cleanname "${pag}" )
942                  cat <<EOF >> ${fileou}
943  <biblioset relation="journal">
944    <title>${jfound}</title>
945    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
946    <pubdate>${year}</pubdate>
947  </biblioset>
948EOF
949                  unset vol
950                  unset pag
951                  unset jfound
952                  unset year
953               ;;
954               2)
955                  vol=${endline%,*}
956                  vol=$( cleanname "${vol}" )
957                  iss=${vol##*,}
958                  iss=$( cleanname "${iss}" )
959                  vol=${vol%,*}
960                  vol=$( cleanname "${vol}" )
961                  pag=${endline##*,}
962                  pag=$( cleanname "${pag}" )
963                  cat <<EOF >> ${fileou}
964  <biblioset relation="journal">
965    <title>${jfound}</title>
966    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
967    <pubdate>${year}</pubdate>
968  </biblioset>
969                  EOF
970                  unset vol
971                  unset pag
972                  unset jfound
973                  unset year
974                  unset iss
975               ;;
976               6)
977                  conftitle=${endline%%,*}
978                  endline=${endline#*,}
979                  # day(s) month
980                  confdates=${endline%%,*}
981                  endline=${endline#*,}
982                  # day(s) month and year
983                  confdates="${confdates}, ${endline%%,*}"
984                  confdates=$( cleanname "${confdates}" )
985                  endline=${endline#*,}
986                  # Town
987                  confaddress=${endline%%,*}
988                  endline=${endline#*,}
989                  # Town and country
990                  confaddress="${confaddress}, ${endline%%,*}"
991                  confaddress=$( cleanname "${confaddress}" )
992                  endline=${endline#*,}
993                  publishername=${endline%%,*}
994                  publishername=$( cleanname "${publishername}" )
995                  endline=${endline#*,}
996                  pag=${endline##*,}
997                  pag=$( cleanname "${pag}" )
998                  #set
999                  #read a
1000                  cat <<EOF >> ${fileou}
1001<biblioset relation="conference">
1002 <title>${jfound}</title>
1003 <pubdate>${year}</pubdate>
1004 <publisher><publishername>${publishername}</publishername></publisher>
1005 <pagenums>${pag}</pagenums>
1006 <confgroup>
1007  <conftitle>${conftitle}</conftitle>
1008  <confdates>${confdates}</confdates>
1009  <address>${confaddress}</address>
1010 </confgroup>
1011</biblioset>
1012EOF
1013                  unset confaddress
1014                  unset conftitle
1015                  unset confdates
1016                  unset pag
1017                  unset publishername
1018                  unset year
1019                  unset jfound
1020               ;;
1021               10)
1022                  publishername=${endline%%Editors,*}
1023                  publishername="${publishername} Editors"
1024                  publishername=$( cleanname "${publishername}" )
1025                  endline=${endline#*Editors,}
1026                  vol=${endline%%,*}
1027                  vol=$( cleanname "${vol}" )
1028                  endline=${endline#*,}
1029                  pag=${endline%%,*}
1030                  pag=$( cleanname "${pag}" )
1031                  endline=${endline#*,}
1032                  isbn=${endline%%,*}
1033                  isbn=$(echo ${isbn} | sed -e "s/ISBN ://")
1034                  isbn=$( cleanname "${isbn}" )
1035                  endline=${endline#*,}
1036                  agu=${endline%%,*}
1037                  agu=$(echo ${agu} | sed -e "s/AGU ://")
1038                  agu=$( cleanname "${agu}" )
1039                  endline=${endline#*${agu}}
1040                  endline=$( cleanname "${endline}" )
1041                  endline=$( cleanname "${endline}" )
1042                  cat <<EOF >> ${fileou}
1043  <biblioid class="isbn">${isbn}</biblioid>
1044  <biblioid class="other" otherclass="AGU">${agu}</biblioid>
1045  <biblioset relation="journal">
1046    <title>${jfound}</title>
1047    <pubdate>${year}</pubdate>
1048    <publisher><publishername>${publishername}</publishername></publisher>
1049    <volumenum>${vol}</volumenum>
1050    <pagenums>${pag}</pagenums>
1051    <bibliomisc>${endline}</bibliomisc>
1052  </biblioset>
1053                  EOF
1054                  unset isbn
1055                  unset agu
1056                  unset pag
1057                  unset publishername
1058                  unset year
1059                  unset jfound
1060                  unset vol
1061               ;;
1062               *)
1063                  #echo "${num}: ${endline}" # ++debug
1064                  #set # ++ debug
1065                  #exit 1 #++ debug
1066                  cat <<EOF >> ${fileou}
1067  <biblioset relation="journal">
1068    <title>${jfound}</title>
1069    <pubdate>${year}</pubdate>
1070    <bibliomisc>${endline}</bibliomisc>
1071  </biblioset>
1072                  EOF
1073               ;;
1074            esac
1075
1076            cat <<EOF >> ${fileou}
1077</biblioentry>
1078
1079EOF
1080            # end of if jfound not empty
1081         fi
1082         unset title
1083         unset vol
1084         unset year
1085         unset pag
1086         l=$(( ${l} + 1 ))
1087      done
1088      unset totlines
1089      unset l
1090      echo "</bibliography>" >> ${fileou}
1091   ;;
1092# end of case bibtex vs other for processing
1093esac
1094
1095xml val --err \
1096   --xsd http://www.docbook.org/xml/5.0/xsd/docbook.xsd \
1097   ${fileou} 1> xmlstarlet.log 2>&1
1098#
1099# clean
1100echo "iii : xslstarlet.log contains sdtout and stderr from xml command on ${fileou}"
1101echo "iii : which was done just to check consistence of ${fileou}"
1102more xmlstarlet.log
1103#
1104case ${type} in
1105   raw)
1106      echo "iii : ${fileraw_strict} contains a copy of input file without comments"
1107   ;;
1108   mailbody)
1109      echo "iii : ${fileraw} contains a copy of input file"
1110      echo "iii : ${fileraw_strict} contains a copy of input file without comments"
1111   ;;
1112esac
1113#
1114echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
1115echo " iii : you can add them in ${fileou}" #++ filename path
1116echo " iii : modify date in ${fileou}" #++ filename path
1117echo " iii : but please take care of id last part (after underscore) that may"
1118echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
1119#
1120exit 0
Note: See TracBrowser for help on using the repository browser.