Changeset 107 for trunk/bibopa.sh
- Timestamp:
- 10/06/10 16:40:28 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/bibopa.sh
r103 r107 8 8 # ========= 9 9 # 10 # ------------------------------------------------- 11 # transform a bibliography file in DocBook 5 format12 # ------------------------------------------------- 10 # ----------------------------------------------------- 11 # transform a bibliography file in XML/DocBook 5 format 12 # ----------------------------------------------------- 13 13 # 14 14 # SYNOPSIS … … 17 17 # :: 18 18 # 19 # $ bibopa.sh -i filein -t type -o order -p project -l lang19 # $ bibopa.sh -i filein -ti typein -o order -p project -l lang -to typeout 20 20 # 21 21 # … … 102 102 # ======== 103 103 # 104 # To transform the NEMO bibliography file in raw format::105 # 106 # $ ./bibopa.sh -p birefnemo -i data/biball.txt -t raw107 # 108 # 109 # To transform a bibliography file in mailbody format::110 # 111 # $ ./bibopa.sh -p bibrefnemo -i data/mail2007-05-10T09:01:56Z -t mailbody112 # 113 # 114 # To transform a bibliography file in bibtex format in french::115 # 116 # $ ./bibopa.sh -p polyfortran -i data/petitpolyfp.bib -t bibtex -l fr117 # 118 # 119 # To transform the SUPERBIB demo bibliography file in raw format::120 # 121 # $ ./bibopa.sh -p demo1 -i data/bibdemo1.txt -t raw -l fr104 # To transform the NEMO bibliography file written in raw format to XML/DocBook:: 105 # 106 # $ ./bibopa.sh -p birefnemo -i data/biball.txt -ti raw 107 # 108 # 109 # To transform a bibliography file written mailbody format to XML/DocBook:: 110 # 111 # $ ./bibopa.sh -p bibrefnemo -i data/mail2007-05-10T09:01:56Z -ti mailbody 112 # 113 # 114 # To transform a bibliography file written in bibtex format in french to XML/DocBook:: 115 # 116 # $ ./bibopa.sh -p polyfortran -i data/petitpolyfp.bib -ti bibtex -l fr 117 # 118 # 119 # To transform the SUPERBIB demo bibliography file written in raw format to XML/DocBook:: 120 # 121 # $ ./bibopa.sh -p demo1 -i data/bibdemo1.txt -ti raw -l fr 122 122 # 123 123 # … … 139 139 # for portability issue but not found yet the "flat" fonctionnality in 140 140 # iconv 141 # tips 20101006 : iconv: //TRANSLIT feature not documented cf. 142 # http://www.gnu.org/software/libiconv/documentation/libiconv/iconv_open.3.html 143 # not already used because non-standard usage 141 144 # 142 145 # write something in the logfile ! … … 146 149 # 147 150 # $Id$ 151 # 152 # - fplod 20101006T113902Z aedon.locean-ipsl.upmc.fr (Darwin) 153 # 154 # * add "Remote Sensing Env." 155 # * add "Ocean Sciences" 156 # * add "Dynamics of Atmospheres and Oceans" 157 # * add "Comptes Rendus Geoscience" 158 # * add "Marine Pollution Bulletin" 159 # * add "Continental Shelf Research" 160 # * add ""Ocean Sci." 161 # * add "Environmental Fluid Mechanics" 162 # * add "Geochem. Geophys. Geosyst." 163 # * add "Journal of Applied Meteorology and Climatology" 164 # * add "Monthly Weather Review" 165 # * add "High Performance Computing on Vector Systems 2007" 166 # * add "High Performance Computing on Vector Systems 2006" 167 # * add "High Performance Computing in Science and Engineering '08" 168 # * add "Developments in Earth and Environmental Sciences" 169 # * add "Oceanography" 170 # 171 # - fplod 20101006T100532Z aedon.locean-ipsl.upmc.fr (Darwin) 172 # 173 # * bug fix (missplaced EOF) 174 # * replace -t option by -ti 175 # * add -to (typeout) option (by default dbk) 176 # * add bibtex production 148 177 # 149 178 # - fplod 2009-03-13T09:35:42Z aedon.locean-ipsl.upmc.fr (Darwin) … … 329 358 unset tool 330 359 # 331 usage=" Usage : ${command} -i filein -t type -o order -p project -l lang"360 usage=" Usage : ${command} -i filein -ti typein -o order -p project -l lang -to typeout" 332 361 # 333 362 # default … … 335 364 # fs mean "firstname surname" 336 365 order="fs" 366 typeout="dbk" 337 367 # 338 368 minargcount=6 … … 354 384 shift 355 385 ;; 356 -t) 357 type=${2} 386 -ti) 387 typein=${2} 388 shift 389 ;; 390 -to) 391 typeout=${2} 358 392 shift 359 393 ;; … … 396 430 fi 397 431 # 398 case ${type} in 432 case ${typeout} in 433 dbk) 434 fileou=$( basename ${filein} .txt).xml 435 ;; 436 bibtex) 437 fileou=$( basename ${filein} .txt).bib 438 ;; 439 *) 440 echo "eee : typeout should be dbk or bibtex" 441 exit 1 442 ;; 443 esac 444 # 445 # check for output 446 case ${typein} in 399 447 raw) 400 448 # file like data/biball.txt 401 449 fileraw=${filein} 402 fileou=$( basename ${filein} .txt).xml403 450 ;; 404 451 mailbody) 405 452 # file like data/mail2007-04-25T08:58:16Z.txt 406 453 fileraw=/tmp/$(basename ${filein}).raw 407 fileou=$( basename ${filein} .txt).xml408 454 echo "# from ${filein}" > ${fileraw} 409 455 echo "# ${log_date}" >> ${fileraw} … … 420 466 fileraw=/tmp/$(basename ${filein}).raw 421 467 awk -f join_endcomma.awk ${filein} > ${fileraw} 422 fileou=$( basename ${filein} .bib).xml423 468 ;; 424 469 *) 425 echo "eee : type should be raw, mailbody or bibtex"470 echo "eee : typein should be raw, mailbody or bibtex" 426 471 exit 1 427 472 ;; … … 458 503 jlist[${ij}]="Climate Dynamics" 459 504 ij=$(( ${ij} + 1)) 505 jlist[${ij}]="Dynamics of Atmospheres and Oceans" 506 ij=$(( ${ij} + 1)) 460 507 jlist[${ij}]="Dyn. Atmos. Oceans" 461 508 ij=$(( ${ij} + 1)) 462 509 jlist[${ij}]="Mon. Wea. Rev." 463 510 ij=$(( ${ij} + 1)) 511 jlist[$ij]="Monthly Weather Review" 512 ij=$(( ${ij} + 1)) 464 513 jlist[${ij}]="Global Biogeochem. Cycles" 465 514 ij=$(( ${ij} + 1)) 466 515 jlist[${ij}]="Nonlinear Processes in Geophysics" 467 516 ij=$(( ${ij} + 1)) 517 jlist[${ij}]="Ocean Sci." 518 ij=$(( ${ij} + 1)) 468 519 jlist[${ij}]="Ocean Science" 469 520 ij=$(( ${ij} + 1)) 521 jlist[${ij}]="Ocean Sciences" 522 ij=$(( ${ij} + 1)) 470 523 jlist[${ij}]="J. Mar. Systems" 471 524 ij=$(( ${ij} + 1)) … … 586 639 jlist[${ij}]="Developments in Paleoenvironmental Research" 587 640 ij=$(( ${ij} + 1)) 641 jlist[${ij}]=" Developments in Earth and Environmental Sciences" 642 ij=$(( ${ij} + 1)) 588 643 jlist[${ij}]="Science" 589 644 ij=$(( ${ij} + 1)) … … 612 667 jlist[${ij}]="Remote Sensing of Environment" 613 668 ij=$(( ${ij} + 1)) 669 jlist[${ij}]="Remote Sensing Env." 670 ij=$(( ${ij} + 1)) 614 671 jlist[${ij}]="Oceanology" 615 672 ij=$(( ${ij} + 1)) … … 654 711 jlist[$ij]="High Performance Computing in Science and Engineering" 655 712 ij=$(( ${ij} + 1)) 713 jlist[$ij]="High Performance Computing in Science and Engineering '08" 714 ij=$(( ${ij} + 1)) 715 jlist[$ij]="High Performance Computing on Vector Systems 2006" 716 ij=$(( ${ij} + 1)) 717 jlist[$ij]="High Performance Computing on Vector Systems 2007" 718 ij=$(( ${ij} + 1)) 656 719 jlist[$ij]="Annals of Glaciology" 657 720 ij=$(( ${ij} + 1)) 721 jlist[$ij]="Oceanography" 722 ij=$(( ${ij} + 1)) 658 723 jlist[$ij]="Journal of Physical Oceanography" 724 ij=$(( ${ij} + 1)) 725 jlist[$ij]="In Deep convection and deep water formation in the oceans" 726 ij=$(( ${ij} + 1)) 727 jlist[$ij]="Journal of Applied Meteorology and Climatology" 728 ij=$(( ${ij} + 1)) 729 jlist[$ij]="Geochem. Geophys. Geosyst." 730 ij=$(( ${ij} + 1)) 731 jlist[$ij]="Environmental Fluid Mechanics" 732 ij=$(( ${ij} + 1)) 733 jlist[$ij]="Continental Shelf Research" 734 ij=$(( ${ij} + 1)) 735 jlist[$ij]="Marine Pollution Bulletin" 736 ij=$(( ${ij} + 1)) 737 jlist[$ij]="Comptes Rendus Geosciences" 659 738 ij=$(( ${ij} + 1)) 660 739 jlist[$ij]="??" … … 689 768 # iconv -f ISO-8859-1 -t UTF-8 ${fileraw_strict} > ${fileraw_strict}2 690 769 # mv ${fileraw_strict}2 ${fileraw_strict} 691 case ${type } in770 case ${typein} in 692 771 bibtex) 693 772 fileou_bibtexml=/tmp/$(basename ${fileraw})_bibtexml … … 704 783 ;; 705 784 *) 706 cat <<EOF > ${fileou} 785 case ${typeout} in 786 dbk) 787 cat <<EOF > ${fileou} 707 788 <?xml version='1.0' encoding='ISO-8859-1'?> 708 789 <bibliography … … 716 797 </info> 717 798 EOF 799 ;; 800 bibtex) 801 cat <<EOF > ${fileou} 802 803 @PREAMBLE{"bibliography of ${project}"}" 804 805 % ${log_date} 806 807 EOF 808 ;; 809 esac 718 810 totlines=$( wc -l ${fileraw_strict} | awk '{print $1}' ) 719 811 l=1 720 while [ ${l} -le ${totlines} 812 while [ ${l} -le ${totlines} ] 721 813 do 814 #echo "line: $l" 722 815 # extract one line 723 816 line=$( sed -n ${l}p ${fileraw_strict} ) … … 750 843 # echo "firstsn_flat : ${firstsn_flat}" #++ debug 751 844 # test if no encoding problem after ISO-8859-15 to flat conversion 752 firstsn_test1=$(echo "${firstsn_flat}" | tr -d "[:alpha:]" | tr -d " " | tr -d "'" | tr -d "-" )753 # echo "firstsn_test1 : ${firstsn_test1}" # ++ debug845 firstsn_test1=$(echo "${firstsn_flat}" | tr -d "[:alpha:]" | tr -d " " | tr -d "'" | tr -d "-" | tr -d ".") 846 # echo "firstsn_test1 : ${firstsn_test1}" # ++ debug 754 847 if [ ! -z "${firstsn_test1}" ] 755 848 then … … 774 867 unset firstsn_test1 775 868 unset firstsn_test2 776 num=$( grep -c "<biblioentry xml:id=\"${refid}_[0-9][0-9]\">" ${fileou} ) 869 # look for id of biblio entry to produce a unique one 870 case ${typeout} in 871 dbk) 872 num=$( grep -c "<biblioentry xml:id=\"${refid}_[0-9][0-9]\">" ${fileou} ) 873 ;; 874 bibtex) 875 num=$( grep -c "@.*{${refid}_[0-9][0-9]," ${fileou} ) 876 ;; 877 esac 777 878 num=$(( ${num} + 1 )) 778 879 [ ${num} -le 9 ] && num=0${num} 779 880 refid=${refid}_${num} 780 881 hasauthor=1 781 782 cat <<EOF >> ${fileou} 882 case ${typeout} in 883 dbk) 884 cat <<EOF >> ${fileou} 783 885 <biblioentry xml:id="${refid}"> 784 <!-- date 785 $( date -u +"%Y%m%dT%H%M%SZ" ) 786 --> 886 <!-- date $( date -u +"%Y%m%dT%H%M%SZ" ) --> 787 887 <!-- original text 788 888 ${orgline} 789 889 --> 790 890 EOF 891 ;; 892 bibtex) 893 cat <<EOF >> ${fileou} 894 % 895 % date $( date -u +"%Y%m%dT%H%M%SZ" ) 896 % original text : 897 % ${orgline} 898 % 899 @article{${refid}, 900 EOF 901 ;; 902 esac 791 903 if [ ${hasauthor} -eq 1 ] 792 904 then 793 cat <<EOF >> ${fileou} 905 case ${typeout} in 906 dbk) 907 cat <<EOF >> ${fileou} 794 908 <authorgroup> 795 909 <author> <personname> <surname>${firstsn}</surname> <firstname>${firstfn}</firstname> </personname> </author> 796 910 EOF 911 ;; 912 bibtex) 913 cat <<EOF >> ${fileou} 914 author = {${firstsn}, ${firstfn}}, 915 EOF 916 ;; 917 esac 797 918 798 919 ## other authors.. … … 842 963 fi 843 964 # 844 echo " <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou} 845 #echo " <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" #++debug 965 case ${typeout} in 966 dbk) 967 echo " <author> <personname> <surname>${nextsn}</surname> <firstname>${nextfn}</firstname> </personname> </author>" >> ${fileou} 968 ;; 969 bibtex) 970 echo "author = {${nextsn}, ${nextfn}}," >> ${fileou} 971 ;; 972 esac 846 973 if [ ${bibtex} -eq 0 ] 847 974 then … … 855 982 fi 856 983 done 857 echo " </authorgroup>" >> ${fileou} 984 case ${typeout} in 985 dbk) 986 echo " </authorgroup>" >> ${fileou} 987 ;; 988 esac 858 989 fi 859 990 # end of the line ; after the first : … … 879 1010 #+++ following lines are not yet validate 880 1011 # it might be a book, a manual, a conference, etc. ++ 881 echo " <title>${title}</title>" >> ${fileou} 882 cat <<EOF >> ${fileou} 1012 case ${typeout} in 1013 dbk) 1014 echo " <title>${title}</title>" >> ${fileou} 1015 cat <<EOF >> ${fileou} 883 1016 <biblioset relation="nojournal"> 884 1017 <title>${title}</title> … … 886 1019 <pubdate>${year}</pubdate> 887 1020 </biblioset> 1021 </biblioentry> 888 1022 EOF 889 cat <<EOF >> ${fileou} 890 </biblioentry> 891 892 EOF 1023 ;; 1024 bibtex) 1025 echo "title = {${title}}," >> ${fileou} 1026 echo "pages = {${pag}}," >> ${fileou} 1027 echo "year = {${year}}" >> ${fileou} 1028 echo "}" >> ${fileou} 1029 ;; 1030 esac 893 1031 # end of if jfound empty (ie not an article) 894 1032 fi … … 899 1037 title=${endline%%${jfound}*} 900 1038 title=$( cleanname "${title}" ) 901 echo " <title>${title}</title>" >> ${fileou} 1039 case ${typeout} in 1040 dbk) 1041 echo " <title>${title}</title>" >> ${fileou} 1042 ;; 1043 bibtex) 1044 echo "title = {${title}}," >> ${fileou} 1045 esac 902 1046 ## end 903 1047 ## end of the line ; after the first ${jfound} … … 912 1056 doi=${endline##*doi:} 913 1057 #echo "doi : $doi" 914 echo " <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou} 1058 case ${typeout} in 1059 dbk) 1060 echo " <biblioid class=\"doi\">${doi}</biblioid>" >> ${fileou} 1061 ;; 1062 bibtex) 1063 echo "doi={${doi}}," >> ${fileou} 1064 ;; 1065 esac 915 1066 unset doi 916 1067 endline=${endline%doi:*} … … 924 1075 0) 925 1076 ## echo ${num}: ${endline} 926 cat <<EOF >> ${fileou} 1077 case ${typeout} in 1078 dbk) 1079 cat <<EOF >> ${fileou} 927 1080 <biblioset relation="journal"> 928 1081 <title>${jfound}</title> … … 931 1084 </biblioset> 932 1085 EOF 1086 ;; 1087 bibtex) 1088 cat <<EOF >> ${fileou} 1089 journal = {${jfound}}, 1090 year = {${year}}, 1091 notes = {${endline}}, 1092 EOF 1093 ;; 1094 esac 1095 933 1096 unset jfound 934 1097 unset year … … 940 1103 pag=${endline##*,} 941 1104 pag=$( cleanname "${pag}" ) 942 cat <<EOF >> ${fileou} 1105 case ${typeout} in 1106 dbk) 1107 cat <<EOF >> ${fileou} 943 1108 <biblioset relation="journal"> 944 1109 <title>${jfound}</title> … … 947 1112 </biblioset> 948 1113 EOF 1114 ;; 1115 bibtex) 1116 echo "journal = {${jfound}}," >> ${fileou} 1117 echo "volume = {${vol}}," >> ${fileou} 1118 echo "pages = {${pag}}," >> ${fileou} 1119 echo "year = {${year}}," >> ${fileou} 1120 ;; 1121 esac 1122 ;; 1123 bibtex) 949 1124 unset vol 950 1125 unset pag … … 961 1136 pag=${endline##*,} 962 1137 pag=$( cleanname "${pag}" ) 963 cat <<EOF >> ${fileou} 1138 case ${typeout} in 1139 dbk) 1140 cat <<EOF >> ${fileou} 964 1141 <biblioset relation="journal"> 965 1142 <title>${jfound}</title> … … 967 1144 <pubdate>${year}</pubdate> 968 1145 </biblioset> 969 EOF 1146 EOF 1147 ;; 1148 bibtex) 1149 echo "journal = {${jfound}}," >> ${fileou} 1150 echo "volume = {${vol}}," >> ${fileou} 1151 echo "number = {${iss}}," >> ${fileou} 1152 echo "pages = {${pag}}," >> ${fileou} 1153 echo "year = {${year}}," >> ${fileou} 1154 ;; 1155 esac 970 1156 unset vol 971 1157 unset pag … … 998 1184 #set 999 1185 #read a 1000 cat <<EOF >> ${fileou} 1186 case ${typeout} in 1187 dbk) 1188 cat <<EOF >> ${fileou} 1001 1189 <biblioset relation="conference"> 1002 1190 <title>${jfound}</title> … … 1011 1199 </biblioset> 1012 1200 EOF 1201 ;; 1202 bibtex) 1203 echo "journal = {${jfound}}," >> ${fileou} 1204 echo "year = {${year}}," >> ${fileou} 1205 echo "publisher = {${publishername}}," >> ${fileou} 1206 echo "booktitle = {${conftitle}}," >> ${fileou} 1207 echo "date = {${confdates}}," >> ${fileou} 1208 echo "address = {${confaddress}}," >> ${fileou} 1209 ;; 1210 esac 1013 1211 unset confaddress 1014 1212 unset conftitle … … 1040 1238 endline=$( cleanname "${endline}" ) 1041 1239 endline=$( cleanname "${endline}" ) 1042 cat <<EOF >> ${fileou} 1240 case ${typeout} in 1241 dbk) 1242 cat <<EOF >> ${fileou} 1043 1243 <biblioid class="isbn">${isbn}</biblioid> 1044 1244 <biblioid class="other" otherclass="AGU">${agu}</biblioid> … … 1051 1251 <bibliomisc>${endline}</bibliomisc> 1052 1252 </biblioset> 1053 EOF 1253 EOF 1254 ;; 1255 bibtex) 1256 echo "isbn= {${isbn}}," >> ${fileou} 1257 echo "agu= {${agu}}," >> ${fileou} 1258 echo "year = {${year}}," >> ${fileou} 1259 echo "journal = {${jfound}}," >> ${fileou} 1260 echo "publisher = {${publishername}}," >> ${fileou} 1261 echo "volume = {${vol}}," >> ${fileou} 1262 echo "pages = {${pag}}," >> ${fileou} 1263 echo "notes = {${endline}}," >> ${fileou} 1264 ;; 1265 esac 1054 1266 unset isbn 1055 1267 unset agu … … 1064 1276 #set # ++ debug 1065 1277 #exit 1 #++ debug 1066 cat <<EOF >> ${fileou} 1278 case ${typeout} in 1279 dbk) 1280 cat <<EOF >> ${fileou} 1067 1281 <biblioset relation="journal"> 1068 1282 <title>${jfound}</title> … … 1070 1284 <bibliomisc>${endline}</bibliomisc> 1071 1285 </biblioset> 1072 EOF 1286 EOF 1287 ;; 1288 bibtex) 1289 echo "journal = {${jfound}}," >> ${fileou} 1290 echo "year = {${year}}," >> ${fileou} 1291 echo "notes = {${endline}}," >> ${fileou} 1292 ;; 1293 esac 1073 1294 ;; 1074 1295 esac 1075 1076 cat <<EOF >> ${fileou} 1077 </biblioentry> 1078 1079 EOF 1296 case ${typeout} in 1297 dbk) 1298 echo "</biblioentry>" >> ${fileou} 1299 ;; 1300 bibtex) 1301 echo "}" >> ${fileou} 1302 ;; 1303 esac 1080 1304 # end of if jfound not empty 1081 1305 fi … … 1088 1312 unset totlines 1089 1313 unset l 1090 echo "</bibliography>" >> ${fileou} 1314 case ${typeout} in 1315 dbk) 1316 echo "</bibliography>" >> ${fileou} 1317 ;; 1318 esac 1091 1319 ;; 1092 1320 # end of case bibtex vs other for processing 1093 1321 esac 1094 1322 1095 xml val --err \ 1096 --xsd http://www.docbook.org/xml/5.0/xsd/docbook.xsd \ 1097 ${fileou} 1> xmlstarlet.log 2>&1 1098 # 1099 # clean 1100 echo "iii : xslstarlet.log contains sdtout and stderr from xml command on ${fileou}" 1101 echo "iii : which was done just to check consistence of ${fileou}" 1102 more xmlstarlet.log 1103 # 1104 case ${type} in 1323 # check conformity of fileou 1324 # if possible 1325 case ${typeout} in 1326 dbk) 1327 xml val --err \ 1328 --xsd http://www.docbook.org/xml/5.0/xsd/docbook.xsd \ 1329 ${fileou} 1> xmlstarlet.log 2>&1 1330 # 1331 # clean 1332 echo "iii : xmlstarlet.log contains sdtout and stderr from xml command on ${fileou}" 1333 echo "iii : which was done just to check consistence of ${fileou}" 1334 more xmlstarlet.log 1335 # 1336 ;; 1337 bibtex) 1338 echo "iii : no check conformity of bibtex file" 1339 ;; 1340 esac 1341 # 1342 case ${typein} in 1105 1343 raw) 1106 1344 echo "iii : ${fileraw_strict} contains a copy of input file without comments"
Note: See TracChangeset
for help on using the changeset viewer.