Changeset 67


Ignore:
Timestamp:
05/16/08 12:24:13 (16 years ago)
Author:
pinsard
Message:

introducing bibtex ingestion. still some trouble because of firstane+surname decomposition of personname

Location:
trunk
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/biblioentry_xml.xsl

    r66 r67  
    1313update : 
    1414$Id$ 
     15fplod 2008-05-15T09:18:54Z aedon.locean-ipsl.upmc.fr (Darwin) 
     16correction on separators between authors for bibtex 
    1517fplod 2008-05-14T08:51:10Z aedon.locean-ipsl.upmc.fr (Darwin) 
    1618introducting bibtex (latex) output 
     
    2426add category choices 
    2527fplod 2008-04-29T15:41:56Z aedon.locean-ipsl.upmc.fr (Darwin) 
    26 chgt for dbk5 in  
     28chgt for dbk5 in 
    2729fplod 2008-04-29T09:10:31Z aedon.locean-ipsl.upmc.fr (Darwin) 
    2830chgt for dbk5 out 
     
    3739factorisation de biblioentry 
    3840fplod 2007-05-14T15:48:04Z aedon.locean-ipsl.upmc.fr (Darwin) 
    39 correction to add biblioentry/date (replace biblioentry/biblioset/pubdate)  
     41correction to add biblioentry/date (replace biblioentry/biblioset/pubdate) 
    4042when "in press" 
    4143fplod 2007-05-04T07:59:34Z aedon.locean-ipsl.upmc.fr (Darwin) 
     
    258260 
    259261<xsl:template match="d:authorgroup" mode="raw"> 
    260  <xsl:apply-templates select="d:author"/> 
    261  <xsl:apply-templates select="d:corpauthor"/> 
     262 <xsl:apply-templates select="d:author" mode="raw"/> 
     263 <xsl:apply-templates select="d:corpauthor" mode="raw"/> 
    262264</xsl:template> 
    263265 
     
    265267 <xsl:text>, </xsl:text> 
    266268 <xsl:text>author = {</xsl:text> 
    267  <xsl:apply-templates select="d:author"/> 
    268  <xsl:apply-templates select="d:corpauthor"/> 
    269  <xsl:text>} </xsl:text> 
    270 </xsl:template> 
    271  
    272 <xsl:template match="d:author"> 
     269 <xsl:apply-templates select="d:author" mode="bibtex"/> 
     270 <xsl:apply-templates select="d:corpauthor" mode="bibtex"/> 
     271 <xsl:text>}</xsl:text> 
     272</xsl:template> 
     273 
     274<xsl:template match="d:author|d:corpauthor" mode="raw"> 
    273275<!-- 
    2742761er auteur Nom, prénom les autres Prenom Nom 
     
    327329</xsl:template> 
    328330 
    329 <xsl:template match="d:corpauthor"> 
    330 <!-- 
    331 une virgule entre les auteurs sauf entre les deux derniers 
     331<xsl:template match="d:author|d:corpauthor" mode="bibtex"> 
     332<!-- 
     3331er auteur Nom prénom les autres Prenom Nom 
     334and entre les auteurs 
     335--> 
     336 <xsl:value-of select="d:personname/d:firstname"/><xsl:text> </xsl:text> 
     337 <xsl:value-of select="d:personname/d:surname"/> 
     338 
     339<!-- 
     340and entre les auteurs 
    332341--> 
    333342  <xsl:choose> 
    334    <xsl:when test="count(following-sibling::*)  &gt; 1"> <!-- plusieurs suivants --> 
    335     <xsl:text>, </xsl:text> 
    336    </xsl:when> 
    337    <xsl:when test="count(following-sibling::*)  = 1"> 
    338     <xsl:text> and </xsl:text> <!-- un suivant --> 
     343   <xsl:when test="count(following-sibling::*)  &gt; 0"> <!-- un suivant--> 
     344    <xsl:text> and </xsl:text> 
    339345   </xsl:when> 
    340346  </xsl:choose> 
    341   <xsl:value-of select="normalize-space(.)"/> 
    342347</xsl:template> 
    343348 
     
    361366 <xsl:text>, journal = {</xsl:text> 
    362367 <xsl:value-of select="normalize-space(.)"/> 
    363  <xsl:text>} </xsl:text> 
     368 <xsl:text>}</xsl:text> 
    364369</xsl:template> 
    365370 
     
    370375 
    371376<xsl:template match="d:biblioset[@relation='conference']/d:title" mode="bibtex"> 
    372  <xsl:text>, inproceedings = {</xsl:text> 
    373  <xsl:value-of select="normalize-space(.)"/> 
    374  <xsl:text>} </xsl:text> 
     377 <xsl:text>, booktitle = {</xsl:text> 
     378 <xsl:value-of select="normalize-space(.)"/> 
     379 <xsl:text>}</xsl:text> 
    375380</xsl:template> 
    376381 
     
    383388 <xsl:text>, publisher = {</xsl:text> 
    384389 <xsl:value-of select="normalize-space(.)"/> 
    385  <xsl:text>} </xsl:text> 
     390 <xsl:text>}</xsl:text> 
    386391</xsl:template> 
    387392 
     
    393398<xsl:template match="d:pubdate" mode="bibtex"> 
    394399 <xsl:text>, year = {</xsl:text> 
    395  <xsl:value-of select="normalize-space(.)"/>  
    396  <xsl:text>} </xsl:text> 
     400 <xsl:value-of select="normalize-space(.)"/> 
     401 <xsl:text>}</xsl:text> 
    397402</xsl:template> 
    398403 
    399404<xsl:template match="d:date"> 
    400405 <xsl:message terminate="no"> 
    401    eee : date  
     406   eee : date 
    402407   <xsl:value-of select="normalize-space(.)"/> exists in node 
    403408   <xsl:value-of select="ancestor::d:biblioentry/@xml:id"/> 
     
    413418<xsl:template match="d:volumenum" mode="bibtex"> 
    414419 <xsl:text>, volume = {</xsl:text> 
    415  <xsl:value-of select="normalize-space(.)"/>  
    416  <xsl:text>} </xsl:text> 
     420 <xsl:value-of select="normalize-space(.)"/> 
     421 <xsl:text>}</xsl:text> 
    417422</xsl:template> 
    418423 
     
    424429<xsl:template match="d:issuenum" mode="bibtex"> 
    425430 <xsl:text>, number = {</xsl:text> 
    426  <xsl:value-of select="normalize-space(.)"/>  
    427  <xsl:text>} </xsl:text> 
     431 <xsl:value-of select="normalize-space(.)"/> 
     432 <xsl:text>}</xsl:text> 
    428433</xsl:template> 
    429434 
     
    436441 <xsl:text>, pages = {</xsl:text> 
    437442 <xsl:value-of select="normalize-space(.)"/> 
    438  <xsl:text>} </xsl:text> 
     443 <xsl:text>}</xsl:text> 
    439444</xsl:template> 
    440445 
     
    447452 <xsl:text>, isbn = {</xsl:text> 
    448453 <xsl:value-of select="normalize-space(.)"/> 
    449  <xsl:text>} </xsl:text> 
     454 <xsl:text>}</xsl:text> 
    450455</xsl:template> 
    451456 
     
    463468 <xsl:text> : </xsl:text> 
    464469 <xsl:value-of select="normalize-space(.)"/> 
    465  <xsl:text>} </xsl:text> 
     470 <xsl:text>}</xsl:text> 
    466471</xsl:template> 
    467472 
    468473<xsl:template match="d:biblioid[@class='doi']" mode="bibtex"> 
    469  <xsl:message terminate="no"> 
    470 BBBB 
    471  </xsl:message> 
    472474   <xsl:choose> 
    473475    <xsl:when test=". = '???'"> 
    474      <xsl:message> iii : no doi found for  
     476     <xsl:message> iii : no doi found for 
    475477      <xsl:value-of select="ancestor::d:biblioentry/@xml:id"/> 
    476478     </xsl:message> 
     
    483485    </xsl:otherwise> 
    484486   </xsl:choose> 
    485  <xsl:text>} </xsl:text> 
     487 <xsl:text>}</xsl:text> 
    486488</xsl:template> 
    487489 
     
    532534 <xsl:text>, address = {</xsl:text> 
    533535 <xsl:value-of select="normalize-space(.)"/> 
    534  <xsl:text>} </xsl:text> 
     536 <xsl:text>}</xsl:text> 
    535537</xsl:template> 
    536538 
     
    539541<xsl:when test="@class='doi'"> 
    540542 <xsl:apply-templates select="." mode="bibtex"/> 
    541  <xsl:message terminate="no"> 
    542 AAAA <xsl:value-of select="normalize-space(.)"/> 
    543  </xsl:message> 
    544543</xsl:when> 
    545544<xsl:when test="@class='isbn'"> 
     
    548547<xsl:when test="@class='other'"> 
    549548 <xsl:apply-templates select="." mode="bibtex"/> 
    550  <xsl:message terminate="no"> 
    551 CCCC <xsl:value-of select="normalize-space(.)"/> 
    552  </xsl:message> 
    553549</xsl:when> 
    554550<xsl:otherwise> 
     
    584580   <xsl:text>, note = {</xsl:text> 
    585581 <xsl:value-of select="normalize-space(.)"/> 
    586    <xsl:text>} </xsl:text> 
     582   <xsl:text>}</xsl:text> 
    587583</xsl:when> 
    588584<xsl:otherwise> 
     
    593589 <xsl:text>, note = {</xsl:text> 
    594590 <xsl:value-of select="normalize-space(.)"/> 
    595  <xsl:text>} </xsl:text> 
     591 <xsl:text>}</xsl:text> 
    596592</xsl:otherwise> 
    597593</xsl:choose> 
  • trunk/bibopa.sh

    r66 r67  
    7070# ++ option debug 
    7171# $Id$ 
     72# fplod 2008-05-15T15:15:04Z aedon.locean-ipsl.upmc.fr (Darwin) 
     73# new way of processing bibtex file : external/bibtex2xml.py 
    7274# fplod 2008-04-30T07:11:58Z aedon.locean-ipsl.upmc.fr (Darwin) 
    7375# chgt for dbk5 out 
     
    191193raw) # file like data/biball.txt 
    192194 fileraw=${filein} 
     195 fileou=$( basename ${filein} .txt).xml 
    193196;; 
    194197mailbody) # file like data/mail2007-04-25T08:58:16Z.txt 
    195198 fileraw=/tmp/$(basename ${filein}).raw 
     199 fileou=$( basename ${filein} .txt).xml 
    196200 echo "# from ${filein}" > ${fileraw} 
    197201 echo "# ${log_date}" >> ${fileraw} 
     
    206210bibtex) # file like data/petitpoly.bib 
    207211 fileraw=/tmp/$(basename ${filein}).raw 
    208  awk -f join_endcomma.awk ${filein} >> ${fileraw} 
    209  #more ${fileraw} # ++ if debug 
    210  #read a #++ if debug 
     212 awk -f join_endcomma.awk ${filein} > ${fileraw} 
     213 fileou=$( basename ${filein} .bib).xml 
    211214;; 
    212215*) 
     
    217220# 
    218221# check for output 
    219 fileou=$( basename ${filein} .txt).xml 
    220222if [ -f ${fileou} ] 
    221223then 
     
    401403jlist[$ij]="Amer. J. music. Deciency" 
    402404# 
     405# 
     406# strip comments 
     407fileraw_strict=/tmp/$(basename ${fileraw})_strict 
     408grep -v "^#" ${fileraw} | grep -v "^%" > ${fileraw_strict} 
     409case ${type} in 
     410bibtex)  
     411 fileou_bibtexml=/tmp/$(basename ${fileraw})_bibtexml 
     412 python ./external/bibtex2xml.py ${fileraw_strict} > ${fileou_bibtexml} 
     413 xsltproc --output ${fileou} \ 
     414 --param lang "'${lang}'" \ 
     415 --param project "'${project}'" \ 
     416 --param makedate "'${log_date}'" \ 
     417 bibtexml2dbk.xsl ${fileou_bibtexml} 
     418#++ firstname surname 
     419 rm ${fileou_bibtexml}  
     420;; 
     421*) 
    403422    cat <<EOF > ${fileou} 
    404423<?xml version='1.0' encoding='ISO-8859-1'?> 
     
    410429 
    411430<info> 
    412 <date>$( date -u +"%Y-%m-%dT%H:%M:%SZ" )</date> 
     431<date>${log_date}<date> 
    413432</info> 
    414433EOF 
    415 unset lang 
    416 # 
    417 # strip comments 
    418 fileraw_strict=/tmp/$(basename ${fileraw})_strict 
    419 grep -v "^#" ${fileraw} | grep -v "^%" > ${fileraw_strict} 
    420434totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' ) 
    421435l=1 
     
    426440  orgline=$( echo ${line} | sed -e "s/--/- -/g" ) 
    427441  line=$( echo ${line} | sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" ) 
    428   # detect if bibtex or not bibtex (starting with @) 
    429   if [ ${line:0:1} != "@" ] 
    430   then 
    431442     # parsing non bibtex line 
    432443     bibtex=0 
     
    457468       refid=${refid}_${num} 
    458469       hasauthor=1 
    459    else 
    460       # parsing bibtex line 
    461       bibtex=1 
    462       # before the first , 
    463       tmp=${line%%,*} 
    464       endline=${line#*,} 
    465       # refid is after { and before the first comma 
    466       refid=${tmp#*{} 
    467       # ++ test if refid already exist 
    468       # check if author field is there 
    469       echo ${endline} | grep -q "author" 
    470       hasauthor=${?} 
    471       if [ ${hasauthor} -eq 0 ] 
    472       then 
    473          auths=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(author * = *{\)\(.*\)\(}\)/\2/") 
    474          #echo "auths $auths" # genre  Michael Metcalf and John Reid and Malcolm Cohen 
    475       else 
    476         echo "pas d'auteurs" # ++ 
    477       fi 
    478       endline=${endline#*,} 
    479       title=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(title * = *{\)\(.*\)\(}\)/\2/") 
    480       #echo "title : ${title}" 
    481       endline=${endline#*,} 
    482 #echo ${endline} | awk -F "," '{print $1}' #++debug 
    483       year=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(year * = *{\)\(.*\)\(}\)/\2/") 
    484       endline=${endline#*,} 
    485       publisher=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(publisher * = *{\)\(.*\)\(}\)/\2/") 
    486       endline=${endline#*,} 
    487       edition=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(edition * = *{\)\(.*\)\(}\)/\2/") 
    488       endline=${endline#*,} 
    489       isbn=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(isbn * = *{\)\(.*\)\(}\)/\2/") 
    490       endline=${endline#*,} 
    491       pag=$( echo ${endline} | awk -F "," '{print $1}' | sed -e "s/\(page * = *{\)\(.*\)\(}\)/\2/") 
    492  
    493       if [ ${hasauthor} -eq 1 ] 
    494       then 
    495          ## first author before the first and 
    496          first=${auths%%and*} 
    497          # its surname after the last " "  ++ pas vrai double nom et pb blanc dans la syntaxe 
    498          # firstsn=${first##* } . ok en interactif . pas ok en script 
    499          firstsn=$( echo ${first}  | awk '{print $2}') 
    500          #echo "firstsn $firstsn" 
    501          # its firstname before the first " " 
    502          firstfn=${first%% *} 
    503       fi 
    504    fi 
    505470 
    506471          cat <<EOF >> ${fileou} 
     
    599564      echo "eee: Journal not found " 
    600565      echo "${endline}" 
    601       #++set 
    602       #++exit 1 
     566      set 
     567      exit 1 
    603568#+++ following lines are not yet validate 
    604569      # it might be a book, a manual, a conference, etc. ++ 
     
    810775unset l 
    811776echo "</bibliography>" >> ${fileou} 
     777;; 
     778esac # end of case bibtex vs other for processing 
    812779 
    813780xml val --err \ 
  • trunk/makefile

    r66 r67  
    178178        -@rm -fr $(DIRWWW)/ 
    179179        -@rm -f $(DIRTMP)/bibdemo1_dbk5_1.txt 
    180         -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.txt 
     180        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.bib 
     181        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.tex 
    181182        -@rm -f $(DIRTMP)/bibdemo1_dbk5_1.xml 
    182183        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.xml 
    183         -@rm -f $(DIRTMP)/bibdemo1_dbk5.bib 
     184        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.bbl 
     185        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.blg 
     186        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.out 
     187        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.aux 
     188        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.dvi 
     189        -@rm -f $(DIRTMP)/bibdemo1_dbk5_2.log 
    184190        -@rm -f $(DIRTMP)/bibnemo1.xml 
    185191        -@rm -f $(DIRTMP)/bibnemo1.txt 
     
    553559test_raw_demo1_1 \ 
    554560test_raw_demo1_2 \ 
     561test_raw_demo1_2_tex \ 
    555562test_raw_nemo1 \ 
    556563test_raw_nemoall 
     
    560567$(DIRTMP)/bibdemo1_dbk5_1.xml \ 
    561568$(DIRTMP)/bibdemo1_dbk5_1.txt 
     569        @echo "test from xml to xml via raw form" 
    562570        diff -b $(DIRTMP)/bibdemo1_dbk5_1.xml $(DIRSRC)/data/bibdemo1_dbk5.xml | more 
    563571        rm -i $(DIRTMP)/bibdemo1_dbk5_1.xml $(DIRTMP)/bibdemo1_dbk5_1.txt 
     
    577585        $(DIRSRC)/bibopa.sh -p demo1 -i $(DIRTMP)/bibdemo1_dbk5_1.txt -t raw -l fr 
    578586 
     587test_raw_demo1_2_tex : \ 
     588$(DIRTMP)/bibdemo1_dbk5_2.dvi 
     589        @echo "test from xml to latex via bibtex form" 
     590 
     591$(DIRTMP)/bibdemo1_dbk5_2.dvi : \ 
     592$(DIRTMP)/bibdemo1_dbk5_2.tex \ 
     593$(DIRTMP)/bibdemo1_dbk5_2.bib 
     594        cd $(DIRTMP)/ 
     595        latex bibdemo1_dbk5_2 
     596        bibtex bibdemo1_dbk5_2 
     597        latex bibdemo1_dbk5_2 
     598 
     599$(DIRTMP)/bibdemo1_dbk5_2.tex : \ 
     600$(DIRSRC)/data/superbib01.tex 
     601        @sed -e "s/bibliography{superbib01}/bibliography{bibdemo1_dbk5_2}/" \ 
     602        $(DIRSRC)/data/superbib01.tex > $(DIRTMP)/bibdemo1_dbk5_2.tex 
     603         
    579604test_raw_demo1_2 : \ 
    580605$(DIRSRC)/data/bibdemo1_dbk5.xml \ 
    581606$(DIRTMP)/bibdemo1_dbk5_2.xml \ 
    582 $(DIRTMP)/bibdemo1_dbk5_2.txt 
     607$(DIRTMP)/bibdemo1_dbk5_2.bib 
     608        @echo "test from xml to xml via bibtex form" 
    583609        diff -b $(DIRTMP)/bibdemo1_dbk5_2.xml $(DIRSRC)/data/bibdemo1_dbk5.xml | more 
    584         rm -i $(DIRTMP)/bibdemo1_dbk5_2.xml $(DIRTMP)/bibdemo1_dbk5_2.txt 
    585  
    586 $(DIRTMP)/bibdemo1_dbk5_2.txt : \ 
     610        rm -i $(DIRTMP)/bibdemo1_dbk5_2.xml $(DIRTMP)/bibdemo1_dbk5_2.bib 
     611 
     612$(DIRTMP)/bibdemo1_dbk5_2.bib : \ 
    587613$(DIRSRC)/data/bibdemo1_dbk5.xml \ 
    588614$(DIRSRC)/superbib02_txt.xsl \ 
     
    594620 
    595621$(DIRTMP)/bibdemo1_dbk5_2.xml : \ 
    596 $(DIRTMP)/bibdemo1_dbk5_2.txt \ 
     622$(DIRTMP)/bibdemo1_dbk5_2.bib \ 
    597623$(DIRSRC)/bibopa.sh 
    598         $(DIRSRC)/bibopa.sh -p demo1 -i $(DIRTMP)/bibdemo1_dbk5_2.txt -t bibtex -l fr 
     624        $(DIRSRC)/bibopa.sh -p demo1 -i $(DIRTMP)/bibdemo1_dbk5_2.bib -t bibtex -l fr 
    599625 
    600626test_raw_nemo1 : \ 
Note: See TracChangeset for help on using the changeset viewer.