source: trunk/bibopa.sh @ 100

Last change on this file since 100 was 100, checked in by pinsard, 14 years ago

indent shell scripts

  • Property svn:keywords set to Id
File size: 33.4 KB
Line 
1#!/bin/bash
2#+
3#
4# =========
5# bibopa.sh
6# =========
7#
8# -------------------------------------------------
9# transform a bibliography file in DocBook 5 format
10# -------------------------------------------------
11#
12# SYNOPSIS
13# ========
14#
15# ::
16#
17#  $ bibopa.sh -i filein -t type -o order -p project -l lang
18#
19#
20# DESCRIPTION
21# ===========
22#
23#
24# Each entry in the same input file must follows the same order for authors
25# except the first one.
26#
27# With ``fs`` in option order you will correctly process raw entry with
28# firstname and surname in this order for authors except the first one.
29#
30# If raw entry uses surname and firstname order, you should use ``sf``.
31#
32# ::
33#
34#    Behera, S. K., J. Luo, S. Masson, S. Rao, S. Gualdi, P. Delecluse, A.
35#    Navarra and T. Yamagata, 2004 : Paramount Impact of the Indian Ocean
36#    Dipole on the East African Short Rains: A CGCM Study, J. Climate, In
37#    press.
38#
39#
40# donnerait
41#
42# ::
43#
44#    <biblioentry xml:id="behara2004">
45#    <authorgroup>
46#    <author><surname>Behera</surname> <firstname>S. K.</firstname> </author>
47#    <author><firstname>J.</firstname> <surname>Luo</surname></author>
48#    <author><firstname>S.</firstname> <surname>Masson</surname></author>
49#    <author><firstname>S.</firstname> <surname>Rao</surname></author>
50#    <author><firstname>S.</firstname> <surname>Gualdi</surname></author>
51#    <author><firstname>P.</firstname> <surname>Delecluse</surname></author>
52#    <author><firstname>A.</firstname> <surname>Navara</surname></author>
53#    <author><firstname>T.</firstname> <surname>Yamagata</surname></author>
54#    </authorgroup>
55#    <date>2004</date>
56#    <title>Paramount Impact of the Indian Ocean Dipole on the East African
57#    Short Rains: A CGCM Study</title>
58#    <publishername>J. Climate</publishername>
59#    <biblioid class="doi">doi</biblioid>
60#    <bibliomisc role="pseudoref">In press.</bibliomisc>
61#    <bibliomisc role="internalref">from
62#    http://www.lodyc.jussieu.fr/~opatlod/NEMO_v1/6_Menu/2_page/index.html
63#    2007-03-29T16:24:31Z fplod by hand</bibliomisc>
64#    </biblioentry>
65#
66#
67# ::
68#
69#   @book{MetReiCoh2004 ,
70#      author    = {Michael Metcalf and
71#              John Reid and Malcolm Cohen},
72#      title     = {Fortran 95/2003 explained},
73#      year      = {2004},
74#      publisher = {Oxford University Press},
75#      edition   = {Third},
76#      ISBN      = {0-19-852693-8},
77#      pages = {434}
78#   }
79#
80#
81# donnerait
82#
83# ::
84#
85#    <biblioentry xml:id="MetReiCoh2004">
86#    <authorgroup>
87#    <author><firstname>Michael</firstname> <surname>Metcalf</surname></author>
88#    <author><firstname>John</firstname> <surname>Reid</surname></author>
89#    <author><firstname>Malcolm</firstname> <surname>Cohen</surname></author>
90#    </authorgroup>
91#    <date>2004}</date>
92#    <title>Fortran 95/2003 explained</title>
93#    <publisher><publishername>Oxford University Press</publishername></publisher>
94#    <biblioid class="isbn">0-19-852693-8</biblioid>
95#    <pagenums>434</pagenums>
96#    <edition>Third</edition>
97#
98#
99# EXAMPLES
100# ========
101#
102# To transform the NEMO bibliography file in raw format::
103#
104#  $ ./bibopa.sh -p birefnemo -i data/biball.txt -t raw
105#
106#
107# To transform a bibliography file in mailbody format::
108#
109#  $ ./bibopa.sh -p bibrefnemo -i data/mail2007-05-10T09:01:56Z -t mailbody
110#
111#
112# To transform a bibliography file in bibtex format in french::
113#
114#  $ ./bibopa.sh -p polyfortran -i data/petitpolyfp.bib -t bibtex -l fr
115#
116#
117# To transform the SUPERBIB demo bibliography file in raw format::
118#
119#  $ ./bibopa.sh -p demo1 -i data/bibdemo1.txt -t raw -l fr
120#
121#
122# SEE ALSO
123# ========
124#
125# mailtouser.sh_
126#
127# .. _mailtouser.sh : mailtouser.sh.html
128#
129# TODO
130# ====
131#
132# gestion des comments
133#
134# gestion des id existants (cf à la fin)
135#
136# option debug
137#
138# should use iconv instead of recode
139# for portability issue but not found yet the "flat" fonctionnality in
140# iconv
141#
142# write something in the logfile !
143#
144# EVOLUTIONS
145# ==========
146#
147# $Id$
148#
149# - fplod 2009-03-13T09:35:42Z aedon.locean-ipsl.upmc.fr (Darwin)
150#
151#   * add "High Performance Computing in Science and Engineering"
152#   * add "Annals of Glaciology"
153#   * add "Journal of Physical Oceanography"
154#
155# - fplod 2009-01-15T10:36:18Z aedon.locean-ipsl.upmc.fr (Darwin)
156#
157#   * add description of ``-o`` option
158#   * add journal "PNAS"
159#   * add journal "Lettre PIGB-PMRC"
160#
161# - fplod 2009-01-14T15:27:48Z aedon.locean-ipsl.upmc.fr (Darwin)
162#
163#   * add journal "Water Resources Research"
164#   * add journal "Climate Research"
165#   * add conf "Colloque du GIS « Climat-Environnement-Sociétés »"
166#   * add journal "Agricultural and Forest Meteorology"
167#   * add conf "VIIth IAHS Scientific Assembly"
168#   * add conf "AGU fall Meeting"
169#   * add journal "Phil. Trans. Roy. Soc. B"
170#   * add journal "Phil. Trans. Roy. Soc. B"
171#   * add conf "4th Alexander Von Humboldt International Conference"
172#   * add journal/conf "??" (Should not be used too much !)
173#   * add journal "Nature Geosciences"
174#   * add journal "International Journal of Climatology"
175#   * add journal "Journal of Hydrology"
176#   * add journal "Revue de l'Énergie"
177#   * add journal "Clim. Res."
178#   * add journal "Hydrol. Earth Sys. Sci."
179#   * add journal "Ambio"
180#   * add journal "Theoretical and applied climatology"
181#   * add journal "Remote Sensing of Environnement"
182#
183# - fplod 2008-10-28T11:40:19Z aedon.locean-ipsl.upmc.fr (Darwin)
184#
185#   * add journal "Progress In Oceanography"
186#   * add journal "Climatic Change"
187#   * add journal "Ocean Dynamics"
188#   * add journal "Phil. Trans. Roy. Soc. A"
189#   * add journal "Izvestiya Atmospheric and Oceanic Physics"
190#   * add journal "Journal of Hydrometeorology"
191#   * add journal "Remote Sensing of Environment"
192#   * add journal "Oceanology"
193#   * add journal "Water Resour. Res."
194#   * add journal "Advances in Atmospheric Sciences"
195#   * add journal "Quaternary Science Reviews"
196#   * replace "Elsevier Oceanography Series" by "Elsevier Oceanographic Series"
197#   * bug fix : unclosed date tag
198#   * trouble with flat conversion for id building ...
199#   * remove interactivity at then end (allowing redirection of stderr and
200#     stdout)
201#   * strip blank lines
202#   * bug fix : counting nb of occurences of "," technique ... to be improved
203#
204# - fplod 2008-09-16T15:19:59Z aedon.locean-ipsl.upmc.fr (Darwin)
205#
206#   * comments in ReStructured Text
207#
208# - fplod 2008-05-16T10:33:16Z aedon.locean-ipsl.upmc.fr (Darwin)
209#
210#   * add parameter -o for firstname/surname order in bibtex file
211#
212# - fplod 2008-05-15T15:15:04Z aedon.locean-ipsl.upmc.fr (Darwin)
213#
214#   * new way of processing bibtex file : external/bibtex2xml.py
215#
216# - fplod 2008-04-30T07:11:58Z aedon.locean-ipsl.upmc.fr (Darwin)
217#
218#   * chgt for dbk5 out
219#   * xmlto 0.0.18 does'nt like this docbook release. remove
220#   * usage of xml (xmlstarlet) for validation
221#   * bug fix
222#   * add journal names from demo1
223#   * fplod 2008-03-12T16:11:07Z aedon.locean-ipsl.upmc.fr (Darwin)
224#   * add -p option (to overide bibrefnemo in file name and id)
225#   * add -l option by default en
226#
227# - fplod 2008-03-11T11:25:27Z aedon.locean-ipsl.upmc.fr (Darwin)
228#
229#   * add bibtex entry (not finished)
230#
231# - fplod 2007-06-20T17:18:02Z aedon.locean-ipsl.upmc.fr (Darwin)
232#
233#   * <bibliomisc role="id"> replace by <biblioid class="doi">
234#
235# - smasson 2007-06-07T16:43:42Z arete.locean-ipsl.upmc.fr (Darwin)
236#
237#   * Add journals
238#
239# - fplod 2007-05-10T09:17:09Z aedon.locean-ipsl.upmc.fr (Darwin)
240#
241#   * dernières touches
242#
243# - fplod 2007-04-25T10:59:49Z cerbere.locean-ipsl.upmc.fr (Linux)
244#
245#   * add a filein parameter and an option for mailbody
246#   * comments (line begininig with #) are now possible
247#
248# - Sebastien Masson avril 2007
249#
250#   * creation
251#
252#-
253
254rmbl () {
255   echo "${1}" | sed -e "s/^ *//" | sed -e "s/ *$//"
256}
257cleanname () {
258   echo "${1}" | sed -e "s/^ *//" \
259      -e "s/^ *,//" \
260      -e "s/^ *;//" \
261      -e "s/^ *\.//" \
262      -e "s/ *$//" \
263      -e "s/, *$//" \
264      -e "s/; *$//" \
265      -e "s/\. *$//"
266}
267#
268system=$(uname)
269case "${system}" in
270   AIX|IRIX64)
271      echo " www : no specific posix checking"
272   ;;
273   *)
274      set -o posix
275   ;;
276esac
277unset system
278#
279command=$(basename ${0})
280log_date=$(date -u +"%Y%m%dT%H%M%SZ")
281log=/tmp/$(basename ${command} .sh).log.${log_date}
282#
283# test if xsltproc is available
284tool=xsltproc
285type ${tool} 1> /dev/null 2>&1
286status=${?}
287if [ ${status} -ne 0 ]
288then
289   echo " eee : ${tool} not found"
290   exit 1
291fi
292unset status
293unset tool
294#
295# test if xml is available
296tool=xml
297type ${tool} 1> /dev/null 2>&1
298status=${?}
299if [ ${status} -ne 0 ]
300then
301   echo " eee : ${tool} not found"
302   exit 1
303fi
304unset status
305unset tool
306#
307# test if python is available
308tool=python
309type ${tool} 1> /dev/null 2>&1
310status=${?}
311if [ ${status} -ne 0 ]
312then
313   echo " eee : ${tool} not found"
314   exit 1
315fi
316unset status
317unset tool
318#
319# test if recode is available
320tool=recode
321type ${tool} 1> /dev/null 2>&1
322status=${?}
323if [ ${status} -ne 0 ]
324then
325   echo " eee : ${tool} not found"
326   exit 1
327fi
328unset status
329unset tool
330#
331usage=" Usage : ${command} -i filein -t type -o order -p project -l lang"
332#
333# default
334lang=en
335order="fs" # firstname surname
336#
337minargcount=6
338#echo " narg ${#}"
339if [ ${#} -lt ${minargcount} ]
340then
341   echo "eee : not enought arguments"
342   echo "${usage}"
343   exit 1
344fi
345unset minargcount
346#
347while [ ! -z "${1}" ]
348do
349   case ${1} in
350      -i) # filein
351         filein=${2}
352         shift
353      ;;
354      -t) # type
355         type=${2}
356         shift
357      ;;
358      -o) # order of firstname surname in bibtex file
359         order=${2}
360         shift
361      ;;
362      -p)
363         project=${2}
364         shift
365      ;;
366      -l)
367         lang=${2}
368         shift
369      ;;
370      -h)
371         echo "${usage}"
372         exit 0
373      ;;
374      *) # other choice
375         echo "eee : unknown option ${1}"
376         echo "${usage}"
377         exit 1
378      ;;
379   esac
380   shift # next flag
381done
382unset usage
383#
384set -u
385#
386# check for filein
387if [ ! -f ${filein} ]
388then
389   echo "eee : ${filein} not found"
390   exit 1
391fi
392#
393case ${type} in
394   raw) # file like data/biball.txt
395      fileraw=${filein}
396      fileou=$( basename ${filein} .txt).xml
397   ;;
398   mailbody) # file like data/mail2007-04-25T08:58:16Z.txt
399      fileraw=/tmp/$(basename ${filein}).raw
400      fileou=$( basename ${filein} .txt).xml
401      echo "# from ${filein}" > ${fileraw}
402      echo "# ${log_date}" >> ${fileraw}
403      echo "# corrections" >> ${fileraw}
404      grep "correction_.*=" ${filein} | grep -v "correction_.*=$" | \
405         sed -e "s/correction_.*=//">> ${fileraw}
406      echo "# new references" >> ${fileraw}
407      awk "/newreferences=/,/comments=/" ${filein} | sed -e "s/newreferences=//" -e "/comments=/d" >> ${fileraw}
408      #more ${fileraw} # ++ if debug
409      #read a #++ if debug
410   ;;
411   bibtex) # file like data/petitpoly.bib
412      fileraw=/tmp/$(basename ${filein}).raw
413      awk -f join_endcomma.awk ${filein} > ${fileraw}
414      fileou=$( basename ${filein} .bib).xml
415   ;;
416   *)
417      echo "eee : type should be raw, mailbody or bibtex"
418      exit 1
419   ;;
420esac
421#
422# check for output
423if [ -f ${fileou} ]
424then
425   echo "eee : ${fileou} already exist"
426   exit 1
427fi
428#
429ij=1
430jlist[${ij}]="J. Climate"
431ij=$(( ${ij} + 1))
432jlist[${ij}]="Journal of Climate"
433ij=$(( ${ij} + 1))
434jlist[${ij}]="Ocean Modelling"
435ij=$(( ${ij} + 1))
436jlist[${ij}]="Geophys. Res. Lett."
437ij=$(( ${ij} + 1))
438jlist[${ij}]="J. Geophys. Res."
439ij=$(( ${ij} + 1))
440jlist[${ij}]="Tellus A"
441ij=$(( ${ij} + 1))
442jlist[${ij}]="Tellus B"
443ij=$(( ${ij} + 1))
444jlist[${ij}]="J. Phys. Oceanogr."
445ij=$(( ${ij} + 1))
446jlist[${ij}]="Clim. Dyn."
447ij=$(( ${ij} + 1))
448jlist[${ij}]="Clim. Res."
449ij=$(( ${ij} + 1))
450jlist[${ij}]="Climate Dynamics"
451ij=$(( ${ij} + 1))
452jlist[${ij}]="Dyn. Atmos. Oceans"
453ij=$(( ${ij} + 1))
454jlist[${ij}]="Mon. Wea. Rev."
455ij=$(( ${ij} + 1))
456jlist[${ij}]="Global Biogeochem. Cycles"
457ij=$(( ${ij} + 1))
458jlist[${ij}]="Nonlinear Processes in Geophysics"
459ij=$(( ${ij} + 1))
460jlist[${ij}]="Ocean Science"
461ij=$(( ${ij} + 1))
462jlist[${ij}]="J. Mar. Systems"
463ij=$(( ${ij} + 1))
464jlist[${ij}]="J. Atmos. Sc."
465ij=$(( ${ij} + 1))
466jlist[${ij}]="Proc. Royal Soc."
467ij=$(( ${ij} + 1))
468jlist[${ij}]="Bull. Amer. Meteorol. Soc."
469ij=$(( ${ij} + 1))
470jlist[${ij}]="Ocean Dyn."
471ij=$(( ${ij} + 1))
472jlist[${ij}]="Geophysical Monograph Series, Earth's Climate: The Ocean-Atmosphere Interaction"
473ij=$(( ${ij} + 1))
474jlist[${ij}]="Paleoceanography"
475ij=$(( ${ij} + 1))
476jlist[${ij}]="Annales Geophysicae"
477ij=$(( ${ij} + 1))
478jlist[${ij}]="Annals of Geophys."
479ij=$(( ${ij} + 1))
480jlist[${ij}]="Deep Sea Research Part II: Topical Studies in Oceanography"
481ij=$(( ${ij} + 1))
482jlist[${ij}]="Deep Sea Res. II"
483ij=$(( ${ij} + 1))
484jlist[${ij}]="Atmospheric Chemistry and Physics"
485ij=$(( ${ij} + 1))
486jlist[${ij}]="Atmospheric Chemistry and Physics Discussions"
487ij=$(( ${ij} + 1))
488jlist[${ij}]="Earth Plan. Sc. Lett."
489ij=$(( ${ij} + 1))
490jlist[${ij}]="J. Mar. Res."
491ij=$(( ${ij} + 1))
492jlist[${ij}]="PCMDI Report Series"
493ij=$(( ${ij} + 1))
494jlist[${ij}]="J. Mar. Syst."
495ij=$(( ${ij} + 1))
496jlist[${ij}]="Note du Pole de Modelisation"
497ij=$(( ${ij} + 1))
498jlist[${ij}]="Calculateurs Paralleles"
499ij=$(( ${ij} + 1))
500jlist[${ij}]="Note Scientifique du Pole de Modelisation"
501ij=$(( ${ij} + 1))
502jlist[${ij}]="Lecture Notes in Computer Science"
503ij=$(( ${ij} + 1))
504jlist[${ij}]="Prog. Oceanogr."
505ij=$(( ${ij} + 1))
506jlist[${ij}]="Deep Sea Research Part I: Oceanographic Research Papers"
507ij=$(( ${ij} + 1))
508jlist[${ij}]="Deep Sea Res. I"
509ij=$(( ${ij} + 1))
510jlist[${ij}]="IEEE Trans. Geosci. Remote Sensing"
511ij=$(( ${ij} + 1))
512jlist[${ij}]="ECMWF Tech. Memorandum"
513ij=$(( ${ij} + 1))
514jlist[${ij}]="J. Environ. Radioactivity"
515ij=$(( ${ij} + 1))
516jlist[${ij}]="Int. WOCE Newsletter"
517ij=$(( ${ij} + 1))
518jlist[${ij}]="C. R. Acad. Sci. Paris"
519ij=$(( ${ij} + 1))
520jlist[${ij}]="C. R. Acad. Sci. Paris, Earth and Planetary Sciences"
521ij=$(( ${ij} + 1))
522jlist[${ij}]="In The mathematics of models for climatology and environment"
523ij=$(( ${ij} + 1))
524jlist[${ij}]="Oceanol. Acta"
525ij=$(( ${ij} + 1))
526jlist[${ij}]="The global atmosphere and ocean system"
527ij=$(( ${ij} + 1))
528jlist[${ij}]="NATO Advanced Study Institute"
529ij=$(( ${ij} + 1))
530jlist[${ij}]="WCRP"
531ij=$(( ${ij} + 1))
532jlist[${ij}]="The Courier"
533ij=$(( ${ij} + 1))
534jlist[${ij}]="Elsevier Oceanography Series"
535ij=$(( ${ij} + 1))
536jlist[${ij}]="In Conference Proceedings of the 1988 International Conference on Supercomputing"
537ij=$(( ${ij} + 1))
538jlist[${ij}]="In Science and engineering on Cray Supercomputers"
539ij=$(( ${ij} + 1))
540jlist[${ij}]="In Modeling the Earth's Climate and its Variability"
541ij=$(( ${ij} + 1))
542jlist[${ij}]="Fish. Oceanogr."
543ij=$(( ${ij} + 1))
544jlist[${ij}]="Q. J. R. Meteor. Soc."
545ij=$(( ${ij} + 1))
546jlist[${ij}]="In High performance computing in the geosciences"
547ij=$(( ${ij} + 1))
548jlist[${ij}]="Int. J. Numer. Meth. in Fluids"
549ij=$(( ${ij} + 1))
550jlist[${ij}]="Lecture notes in Physics"
551ij=$(( ${ij} + 1))
552jlist[${ij}]="J. Meterol. Soc. Japan"
553ij=$(( ${ij} + 1))
554jlist[${ij}]="Journal of Marine Systems"
555ij=$(( ${ij} + 1))
556jlist[${ij}]="Canadian Journal of Fisheries and Aquatic Sciences"
557ij=$(( ${ij} + 1))
558jlist[${ij}]="J. Atmos. Ocean. Tech."
559ij=$(( ${ij} + 1))
560jlist[${ij}]="EOS"
561ij=$(( ${ij} + 1))
562jlist[${ij}]="Nature"
563ij=$(( ${ij} + 1))
564jlist[${ij}]="Nature Geosciences"
565ij=$(( ${ij} + 1))
566jlist[${ij}]="Physics and Chemistry of the Earth, Part B: Hydrology, Oceans and Atmosphere"
567ij=$(( ${ij} + 1))
568jlist[${ij}]="Journal of Atmospheric and Oceanic Technology"
569ij=$(( ${ij} + 1))
570jlist[${ij}]="Lecture Notes in Computational Science Engineering"
571ij=$(( ${ij} + 1))
572jlist[${ij}]="In Greenhouse Gas Control Technologies"
573ij=$(( ${ij} + 1))
574jlist[${ij}]="Chemical Geology"
575ij=$(( ${ij} + 1))
576jlist[${ij}]="Marine Geology"
577ij=$(( ${ij} + 1))
578jlist[${ij}]="Developments in Paleoenvironmental Research"
579ij=$(( ${ij} + 1))
580jlist[${ij}]="Science"
581ij=$(( ${ij} + 1))
582jlist[${ij}]="In Mediterranean Climate Variability"
583ij=$(( ${ij} + 1))
584jlist[${ij}]="Global and Planetary Change"
585ij=$(( ${ij} + 1))
586jlist[${ij}]="Physical Review Letters"
587ij=$(( ${ij} + 1))
588jlist[${ij}]="Progress In Oceanography"
589ij=$(( ${ij} + 1))
590jlist[${ij}]="Climatic Change"
591ij=$(( ${ij} + 1))
592jlist[${ij}]="Ocean Dynamics"
593ij=$(( ${ij} + 1))
594jlist[${ij}]="Phil. Trans. Roy. Soc. A"
595ij=$(( ${ij} + 1))
596jlist[${ij}]="Phil. Trans. Roy. Soc. B"
597ij=$(( ${ij} + 1))
598jlist[${ij}]="Izvestiya Atmospheric and Oceanic Physics"
599ij=$(( ${ij} + 1))
600jlist[${ij}]="Journal of Hydrometeorology"
601ij=$(( ${ij} + 1))
602jlist[${ij}]="Journal of Hydrology"
603ij=$(( ${ij} + 1))
604jlist[${ij}]="Remote Sensing of Environment"
605ij=$(( ${ij} + 1))
606jlist[${ij}]="Oceanology"
607ij=$(( ${ij} + 1))
608jlist[${ij}]="Water Resour. Res."
609ij=$(( ${ij} + 1))
610jlist[${ij}]="Water Resources Research"
611ij=$(( ${ij} + 1))
612jlist[${ij}]="Advances in Atmospheric Sciences"
613ij=$(( ${ij} + 1))
614jlist[${ij}]="Quaternary Science Reviews"
615ij=$(( ${ij} + 1))
616jlist[${ij}]="AGU monograph Ocean Circulation Mechanisms and Impacts"
617ij=$(( ${ij} + 1))
618jlist[${ij}]="Climate Research"
619ij=$(( ${ij} + 1))
620jlist[$ij]="Colloque du GIS « Climat-Environnement-Sociétés »"
621ij=$(( ${ij} + 1))
622jlist[$ij]="Agricultural and Forest Meteorology"
623ij=$(( ${ij} + 1))
624jlist[$ij]="VIIth IAHS Scientific Assembly"
625ij=$(( ${ij} + 1))
626jlist[$ij]="AGU fall Meeting"
627ij=$(( ${ij} + 1))
628jlist[$ij]="4th Alexander Von Humboldt International Conference"
629ij=$(( ${ij} + 1))
630jlist[$ij]="International Journal of Climatology"
631ij=$(( ${ij} + 1))
632jlist[$ij]="Revue de l'Énergie"
633ij=$(( ${ij} + 1))
634jlist[$ij]="Hydrol. Earth Sys. Sci."
635ij=$(( ${ij} + 1))
636jlist[$ij]="Ambio"
637ij=$(( ${ij} + 1))
638jlist[$ij]="Theoretical and applied climatology"
639ij=$(( ${ij} + 1))
640jlist[$ij]="Remote Sensing of Environnement"
641ij=$(( ${ij} + 1))
642jlist[$ij]="PNAS"
643ij=$(( ${ij} + 1))
644jlist[$ij]="Lettre PIGB-PMRC"
645ij=$(( ${ij} + 1))
646jlist[$ij]="High Performance Computing in Science and Engineering"
647ij=$(( ${ij} + 1))
648jlist[$ij]="Annals of Glaciology"
649ij=$(( ${ij} + 1))
650jlist[$ij]="Journal of Physical Oceanography"
651ij=$(( ${ij} + 1))
652jlist[$ij]="??"
653#
654# following Journal name are fake ones for demo1 : references extracted from
655# Cantratrix Sopronica L. written by Georges Perec
656#find in http://www-rocq.inria.fr/who/Marc.Thiriet/PsFil/Biblio/cantatrix.ps
657ij=$(( ${ij} + 1))
658jlist[$ij]="Res. Proc. neurophysiol. Fanatic Soc."
659ij=$(( ${ij} + 1))
660jlist[$ij]="New Records Ass. J."
661ij=$(( ${ij} + 1))
662jlist[$ij]="Am. J. Allegrol."
663ij=$(( ${ij} + 1))
664jlist[$ij]="J. Neurochem. Neurocytol. Enzymol."
665ij=$(( ${ij} + 1))
666jlist[$ij]="Hammersmith J."
667ij=$(( ${ij} + 1))
668jlist[$ij]="Nasa Rept."
669ij=$(( ${ij} + 1))
670jlist[$ij]="C.r. Assoc. Conc. Lam."
671ij=$(( ${ij} + 1))
672jlist[$ij]="Gaz. med. franco-rus."
673ij=$(( ${ij} + 1))
674jlist[$ij]="Amer. J. music. Deciency"
675#
676# strip comments and blank lines
677fileraw_strict=/tmp/$(basename ${fileraw})_strict
678grep -v "^#" ${fileraw} | grep -v "^%" | sed "/^$/d" > ${fileraw_strict}
679# following line is not compatible with XML header ++
680# comments for memory (why did I write this line !?)
681# iconv -f ISO-8859-1 -t UTF-8 ${fileraw_strict} > ${fileraw_strict}2
682# mv ${fileraw_strict}2 ${fileraw_strict}
683case ${type} in
684   bibtex)
685      fileou_bibtexml=/tmp/$(basename ${fileraw})_bibtexml
686      python ./external/bibtex2xml.py ${fileraw_strict} > ${fileou_bibtexml}
687      #more  ${fileou_bibtexml}
688      #read a
689      xsltproc --output ${fileou} \
690         --param lang "'${lang}'" \
691         --param project "'${project}'" \
692         --param makedate "'${log_date}'" \
693         --param order "'${order}'" \
694         bibtexml2dbk.xsl ${fileou_bibtexml}
695      rm ${fileou_bibtexml}
696   ;;
697   *)
698      cat <<EOF > ${fileou}
699<?xml version='1.0' encoding='ISO-8859-1'?>
700<bibliography
701version="5.0"
702xmlns="http://docbook.org/ns/docbook"
703xml:id="bib${project}ref"
704xml:lang="${lang}">
705
706<info>
707<date>${log_date}</date>
708</info>
709EOF
710      totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' )
711      l=1
712      while [ ${l} -le ${totlines}  ]
713      do
714         # extract one line
715         line=$( sed -n ${l}p ${fileraw_strict} )
716         orgline=$( echo ${line} | sed -e "s/--/- -/g" )
717         line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" )
718         # parsing non bibtex line
719         bibtex=0
720         # before the first :
721         tmp=${line%%:*}
722         # before the last ,
723         auths=${tmp%,*},
724         # replace " and " by ","
725         auths=$( echo "${auths}" | sed -e "s/ and /,/g" )
726         # replace " et " by ","
727         auths=$( echo "${auths}" | sed -e "s/ et /,/g" )
728         # after the last ,
729         year=${tmp##*,}
730         year=$( rmbl "${year}" )
731         ## first author before the first .,
732         first=${auths%%.,*}.
733         # its firstname after the last ,
734         firstfn=${first##*,}
735         firstfn=$( rmbl "${firstfn}" )
736         # its surname ; before the first ,
737         firstsn=${first%%,*}
738         firstsn=$( rmbl "${firstsn}" )
739         ## ref id
740         firstsn_flat=$( echo "${firstsn}" | recode -d -f ISO-8859-15..flat )
741         # echo "firstsn : ${firstsn}" # ++ debug
742         # echo "firstsn_flat : ${firstsn_flat}" #++ debug
743         # test if no encoding problem after ISO-8859-15 to flat conversion
744         firstsn_test1=$(echo "${firstsn_flat}" | tr -d "[:alpha:]" | tr -d " " | tr -d "'" | tr -d "-")
745         #echo "firstsn_test1 : ${firstsn_test1}" # ++ debug
746         if [ ! -z "${firstsn_test1}" ]
747         then
748            echo "www : pb on ISO-8859-15 to flat conversion"
749            echo "www : on ${firstsn}"
750            echo "www : trying UTF-8 to flat conversion ..."
751            firstsn_flat=$( echo "${firstsn}" | recode -d -f UTF-8..flat )
752            # test if no encoding problem after UTF-8 to flat conversion
753            firstsn_test2=$(echo "${firstsn_flat}" | tr -d "[:alpha:]" | tr -d " " | tr -d "'")
754            #echo "firstsn_test2 : A${firstsn_test2}A" # ++ debug
755            if [ ! -z "${firstsn_test2}" ]
756            then
757               echo "eee : pb on flat conversion of ${firstsn}"
758               exit 1
759            fi
760         fi
761         firstsn_lower=$( echo "${firstsn_flat}" | tr "[:upper:]" "[:lower:]")
762         firstsn_nopunct=$( echo "${firstsn_lower}" | tr -s " " "_"  | tr -s "'" "_")
763         refid="${firstsn_nopunct}${year}"
764         unset firstsn_lower
765         unset firstsn_flat
766         unset firstsn_test1
767         unset firstsn_test2
768         num=$( grep -c "<biblioentry xml:id=\"${refid}_[0-9][0-9]\">" ${fileou} )
769         num=$(( ${num} + 1 ))
770         [ ${num} -le 9 ] && num=0${num}
771         refid=${refid}_${num}
772         hasauthor=1
773         
774         cat <<EOF >> ${fileou}
775<biblioentry xml:id="${refid}">
776  <!-- date
777  $( date -u +"%Y%m%dT%H%M%SZ" )
778  -->
779  <!-- original text
780  ${orgline}
781  -->
782EOF
783         if [ ${hasauthor} -eq 1 ]
784         then
785            cat <<EOF >> ${fileou}
786  <authorgroup>
787    <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author>
788EOF
789
790            ## other authors..
791            if [ ${bibtex} -eq 0 ]
792            then
793               previous=${first},
794               # next authors...
795               next=${auths##*${previous}}
796            fi
797            if [ ${bibtex} -eq 1 ]
798            then
799               previous=${first}"and "
800               # next authors...
801               #echo "auths ${auths}"
802               next=${auths##*${previous}}
803               if [ "${next}and " = "${previous}" ]
804               then
805                  next=""
806               fi
807            fi
808            #echo "previous ${previous}"
809            #echo "next ${next}"
810            # while the next author is not empty
811            while [  "${next}" != "" ]
812            do
813               if [ ${bibtex} -eq 0 ]
814               then
815                  # get the first next author; before the first ,
816                  next=${next%%,*}
817                  # its surname ; after the last .
818                  nextsn=${next##*.}
819                  nextsn=$( rmbl "${nextsn}" )
820                  # its firstname ; before the last .
821                  nextfn=${next%.*}.
822                  nextfn=$( rmbl "${nextfn}" )
823               fi
824               if [ ${bibtex} -eq 1 ]
825               then
826                  # get the first next author; before the first "and"
827                  next=${next%%and *}
828                  # its surname after the last " "  ++ pas vrai double nom et pb blanc dans la syntaxe
829                  # nextsn=${next##* } . ok en interactif . pas ok en script
830                  nextsn=$( echo ${next}  | awk '{print $2}')
831                  #echo "nextsn $nextsn"
832                  # its nextname before the first " "
833                  nextfn=${next%% *}
834               fi
835    #
836               echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou}
837               #echo "    <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>"  #++debug
838               if [ ${bibtex} -eq 0 ]
839               then
840                  previous=${next},
841                  next=${auths##*${previous}}
842               fi
843               if [ ${bibtex} -eq 1 ]
844               then
845                  previous=${next}"and "
846                  next=${auths##*${previous}}
847               fi
848            done
849            echo "  </authorgroup>"  >> ${fileou}
850         fi
851         # end of the line ; after the first :
852         endline=${line#*:}
853         ## find the journal
854         j=1
855         jfound=""
856         jlistsize=${#jlist[@]}
857         while [[ ${j} -le ${jlistsize} && "${jfound}" == "" ]]
858         do
859            ok=$( echo ${endline} | grep -ci "${jlist[j]} *," )
860            [ ${ok} -eq 1 ] && jfound="${jlist[j]}"
861            j=$(( ${j} + 1 ))
862         done
863         unset ok
864         if [ "${jfound}" == "" ]
865         then
866            echo "eee: Journal not found "
867            echo "${endline}"
868            # uncomment next line for debug
869            # set # ++ debug
870            exit 1
871            #+++ following lines are not yet validate
872            # it might be a book, a manual, a conference, etc. ++
873            echo "  <title>${title}</title>" >> ${fileou}
874            cat <<EOF >> ${fileou}
875  <biblioset relation="nojournal">
876    <title>${title}</title>
877    <pagenums>${pag}</pagenums>
878    <pubdate>${year}</pubdate>
879  </biblioset>
880EOF
881            cat <<EOF >> ${fileou}
882</biblioentry>
883
884EOF
885         fi # end of if jfound empty (ie not an article)
886         if [ "${jfound}" != "" ]
887         then
888            ## title
889            # before the first :
890            title=${endline%%${jfound}*}
891            title=$( cleanname "${title}" )
892            echo "  <title>${title}</title>" >> ${fileou}
893            ## end
894            ## end of the line ; after the first ${jfound}
895            endline=${endline#*${jfound}}
896            endline=$( cleanname "${endline}" )
897            ## doi
898            endline=$( echo ${endline} | sed -e "s/[dD][oO][iI] *\t* *: *\t* */doi:/" )
899            #echo "endline for doi ${endline}"
900            ok=$( echo ${endline} | grep -ic "doi:" )
901            if [ ${ok} -eq 1 ]
902            then
903               doi=${endline##*doi:}
904               #echo "doi : $doi"
905               echo "  <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou}
906               unset doi
907               endline=${endline%doi:*}
908               endline=$( cleanname "${endline}" )
909            else
910               echo "non doi: ${line}"
911               echo ""
912            fi
913            num=$( echo "${endline}" |  tr -dc "," | wc -c | tr -d " ")
914            case "${num}" in
915               0)
916                  ## echo ${num}: ${endline}
917                  cat <<EOF >> ${fileou}
918  <biblioset relation="journal">
919    <title>${jfound}</title>
920    <pubdate>${year}</pubdate>
921    <bibliomisc>${endline}</bibliomisc>
922  </biblioset>
923EOF
924                  unset jfound
925                  unset year
926               ;;
927               1)
928                  ### echo ${num}: ${endline}
929                  vol=${endline%,*}
930                  vol=$( cleanname "${vol}" )
931                  pag=${endline##*,}
932                  pag=$( cleanname "${pag}" )
933                  cat <<EOF >> ${fileou}
934  <biblioset relation="journal">
935    <title>${jfound}</title>
936    <volumenum>${vol}</volumenum><pagenums>${pag}</pagenums>
937    <pubdate>${year}</pubdate>
938  </biblioset>
939EOF
940                  unset vol
941                  unset pag
942                  unset jfound
943                  unset year
944               ;;
945               2)
946                  vol=${endline%,*}
947                  vol=$( cleanname "${vol}" )
948                  iss=${vol##*,}
949                  iss=$( cleanname "${iss}" )
950                  vol=${vol%,*}
951                  vol=$( cleanname "${vol}" )
952                  pag=${endline##*,}
953                  pag=$( cleanname "${pag}" )
954                  cat <<EOF >> ${fileou}
955  <biblioset relation="journal">
956    <title>${jfound}</title>
957    <volumenum>${vol}</volumenum><issuenum>${iss}</issuenum><pagenums>${pag}</pagenums>
958    <pubdate>${year}</pubdate>
959  </biblioset>
960                  EOF
961                  unset vol
962                  unset pag
963                  unset jfound
964                  unset year
965                  unset iss
966               ;;
967               6)
968                  conftitle=${endline%%,*}
969                  endline=${endline#*,}
970                  confdates=${endline%%,*} # day(s) month
971                  endline=${endline#*,}
972                  confdates="${confdates}, ${endline%%,*}" # day(s) month and year
973                  confdates=$( cleanname "${confdates}" )
974                  endline=${endline#*,}
975                  confaddress=${endline%%,*} # Town
976                  endline=${endline#*,}
977                  confaddress="${confaddress}, ${endline%%,*}" # Town and country
978                  confaddress=$( cleanname "${confaddress}" )
979                  endline=${endline#*,}
980                  publishername=${endline%%,*}
981                  publishername=$( cleanname "${publishername}" )
982                  endline=${endline#*,}
983                  pag=${endline##*,}
984                  pag=$( cleanname "${pag}" )
985                  #set
986                  #read a
987                  cat <<EOF >> ${fileou}
988<biblioset relation="conference">
989 <title>${jfound}</title>
990 <pubdate>${year}</pubdate>
991 <publisher><publishername>${publishername}</publishername></publisher>
992 <pagenums>${pag}</pagenums>
993 <confgroup>
994  <conftitle>${conftitle}</conftitle>
995  <confdates>${confdates}</confdates>
996  <address>${confaddress}</address>
997 </confgroup>
998</biblioset>
999EOF
1000                  unset confaddress
1001                  unset conftitle
1002                  unset confdates
1003                  unset pag
1004                  unset publishername
1005                  unset year
1006                  unset jfound
1007               ;;
1008               10)
1009                  publishername=${endline%%Editors,*}
1010                  publishername="${publishername} Editors"
1011                  publishername=$( cleanname "${publishername}" )
1012                  endline=${endline#*Editors,}
1013                  vol=${endline%%,*}
1014                  vol=$( cleanname "${vol}" )
1015                  endline=${endline#*,}
1016                  pag=${endline%%,*}
1017                  pag=$( cleanname "${pag}" )
1018                  endline=${endline#*,}
1019                  isbn=${endline%%,*}
1020                  isbn=$(echo ${isbn} | sed -e "s/ISBN ://")
1021                  isbn=$( cleanname "${isbn}" )
1022                  endline=${endline#*,}
1023                  agu=${endline%%,*}
1024                  agu=$(echo ${agu} | sed -e "s/AGU ://")
1025                  agu=$( cleanname "${agu}" )
1026                  endline=${endline#*${agu}}
1027                  endline=$( cleanname "${endline}" )
1028                  endline=$( cleanname "${endline}" )
1029                  cat <<EOF >> ${fileou}
1030  <biblioid class="isbn">${isbn}</biblioid>
1031  <biblioid class="other" otherclass="AGU">${agu}</biblioid>
1032  <biblioset relation="journal">
1033    <title>${jfound}</title>
1034    <pubdate>${year}</pubdate>
1035    <publisher><publishername>${publishername}</publishername></publisher>
1036    <volumenum>${vol}</volumenum>
1037    <pagenums>${pag}</pagenums>
1038    <bibliomisc>${endline}</bibliomisc>
1039  </biblioset>
1040                  EOF
1041                  unset isbn
1042                  unset agu
1043                  unset pag
1044                  unset publishername
1045                  unset year
1046                  unset jfound
1047                  unset vol
1048               ;;
1049               *)
1050                  #echo "${num}: ${endline}" # ++debug
1051                  #set # ++ debug
1052                  #exit 1 #++ debug
1053                  cat <<EOF >> ${fileou}
1054  <biblioset relation="journal">
1055    <title>${jfound}</title>
1056    <pubdate>${year}</pubdate>
1057    <bibliomisc>${endline}</bibliomisc>
1058  </biblioset>
1059                  EOF
1060               ;;
1061            esac
1062
1063            cat <<EOF >> ${fileou}
1064</biblioentry>
1065
1066EOF
1067         fi # end of if jfound not empty
1068         unset title
1069         unset vol
1070         unset year
1071         unset pag
1072         l=$(( ${l} + 1 ))
1073      done
1074      unset totlines
1075      unset l
1076      echo "</bibliography>" >> ${fileou}
1077   ;;
1078esac # end of case bibtex vs other for processing
1079
1080xml val --err \
1081   --xsd http://www.docbook.org/xml/5.0/xsd/docbook.xsd \
1082   ${fileou} 1> xmlstarlet.log 2>&1
1083#
1084# clean
1085echo "iii : xslstarlet.log contains sdtout and stderr from xml command on ${fileou}"
1086echo "iii : which was done just to check consistence of ${fileou}"
1087more xmlstarlet.log
1088#
1089case ${type} in
1090   raw)
1091      echo "iii : ${fileraw_strict} contains a copy of input file without comments"
1092   ;;
1093   mailbody)
1094      echo "iii : ${fileraw} contains a copy of input file"
1095      echo "iii : ${fileraw_strict} contains a copy of input file without comments"
1096   ;;
1097esac
1098#
1099echo " iii : you have a some new or modified bibliographic references in ${fileou} (biblioentry)"
1100echo " iii : you can add them in ${fileou}" #++ filename path
1101echo " iii : modify date in ${fileou}" #++ filename path
1102echo " iii : but please take care of id last part (after underscore) that may"
1103echo " iii : be in conflict with existing ones" #++ pour s'affranchir de ce souci il faudrait donner en entrée complémentaire le fichier à compléter.
1104#
1105exit 0
Note: See TracBrowser for help on using the repository browser.