Changeset 183 for trunk/aeres/scripts
- Timestamp:
- 04/10/12 14:20:30 (12 years ago)
- Location:
- trunk/aeres/scripts
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/aeres/scripts/aeresrh.py
r182 r183 76 76 77 77 $URL$ 78 79 - fplod 20120410 80 81 * real titles 82 * start dictionnary 78 83 79 84 - fplod 20120409 … … 178 183 # detect title row 179 184 for rownum in range(sh.nrows): 180 if sh.row_values(rownum)[0] == 't':185 if sh.row_values(rownum)[0] == "Type d'emploi (EC, Ch, AP)": 181 186 titlerow = rownum 182 187 if is_verbose == True: … … 197 202 # detect surname column 198 203 for colnum in range(sh.ncols): 199 if sh.col_values(colnum)[titlerow] == ' nom':204 if sh.col_values(colnum)[titlerow] == 'Nom': 200 205 colsurname = colnum 201 206 if is_verbose == True: … … 204 209 # detect firstname column 205 210 for colnum in range(sh.ncols): 206 if sh.col_values(colnum)[titlerow] == 'prenom':211 if sh.col_values(colnum)[titlerow] == u'Prénom': 207 212 colfirstname = colnum 208 213 if is_verbose == True: … … 211 216 # detect arrival column 212 217 for colnum in range(sh.ncols): 213 if sh.col_values(colnum)[titlerow] == u'arrivée':218 if sh.col_values(colnum)[titlerow] == u"Date d'arriv\xe9e dans l'unit\xe9\n(9)": 214 219 colarrival = colnum 215 220 if is_verbose == True: … … 218 223 # detect publi column 219 224 for colnum in range(sh.ncols): 220 if sh.col_values(colnum)[titlerow] == u'publi':225 if sh.col_values(colnum)[titlerow] == u'N\xb0 des 5 productions les plus significatives dans la p\xe9riode \xe9valu\xe9e\n(11)': 221 226 colpubli = colnum 222 227 if is_verbose == True: … … 282 287 firstname_id = build_firstname_id(firstname) 283 288 # 289 author_id = [] 290 for index in range(len(surname_id)): 291 author_id.append('{surname_id}_{firstname_id}'.format(surname_id=surname_id[index], firstname_id=firstname_id[index])) 292 293 print('author id 0 %s ' % author_id[0]) 294 284 295 # populate dictionary 285 #++people = {} 286 #++for index, onesurname in enumarate(surname): 287 #++ print (' surname %s' % (surname)) 288 #++ people[n] = surname[index] 289 290 find_in_dict(people, 'surname', 'Pinsard') 296 people = {} 297 keys = ('surname_id', 'firstname_id') 298 people = dict.fromkeys(author_id,keys) 299 people['COPPIN_elisabeth'] 300 for index, oneauthor_id in enumerate(author_id): 301 people[oneauthor_id] 302 #people[oneauthor_id]['surname_id'] = surname_id[index] 303 #people[oneauthor_id]['firstname_id'] = firsname_id[index] 304 # # people = {'pk1':{'firstname':'Brian', 'age':42}, 'pk2':{'firstname':'Alex', 'age':50}} 305 306 people['PINSARD_Francoise'] 307 find_in_dict(people, 'surname', 'PINSARD') 291 308 292 309 # Run main, if called from the command line -
trunk/aeres/scripts/build_firstname_id.py
r182 r183 18 18 19 19 :ref:`firstname_id.xsl` 20 21 :mod:`unidecode` 20 22 21 23 EXAMPLES … … 46 48 import string 47 49 import sys 50 import re 51 52 from unidecode import unidecode 48 53 49 54 def build_firstname_id(firstname): … … 53 58 >>> firstname = [] 54 59 [] 55 >>> firstname.append(' Ginette') 56 [' Ginette'] 57 >>> firstname.append('Gin ette ') 58 [' Ginette', 'Gin ette'] 59 >>> firstname.append("G\'in ette ") 60 [' Ginette', 'Gin ette', "G\'in ette"] 60 >>> firstname.append(u' Ginette') 61 >>> firstname 62 [u' Ginette'] 63 >>> firstname.append(u'Gin ette ') 64 >>> firstname 65 [u' Ginette', u'Gin ette '] 66 >>> firstname.append(u"G\'in ette ") 67 >>> firstname 68 [u' Ginette', u'Gin ette ', u"G\'in ette "] 69 >>> firstname.append(u"Gïnette") 70 >>> firstname 71 [u' Ginette', u'Gin ette ', u"G'in ette ", u'G\xefnette'] 61 72 >>> firstname_id = build_firstname_id(firstname) 62 ['ginette','ginette','ginette'] 73 >>> #firstname_id 74 ['ginette', 'ginette', 'ginette', 'ginette'] 63 75 """ 64 76 … … 67 79 sys.exit(-1) 68 80 69 # convert to str 70 #++firstname_str = [str(item) for item in firstname] 71 #++firstname_str = [item.encode('iso-8859-1','replace') for item in firstname] 72 firstname_str = [item.encode('iso-8859-1','xmlcharrefreplace') for item in firstname] 81 firstname_nondiacritics = [] 73 82 for item in firstname: 74 print ('iii : item type %s : %s ' % (item, type(item))) 83 #print ('iii : item row %s ' % (item)) 84 item_nondiacritics = unidecode(item) 85 #print ('iii : item non diacritics %s ' % (item_nondiacritics)) 86 firstname_nondiacritics.append(item_nondiacritics) 75 87 76 # remove white space before and after 77 firstname_id = map(str.strip,firstname_str) 78 #print ('1 sans blan debut fin %s' % firstname_id) 79 # 88 #for item in firstname_nondiacritics: 89 # print ('iii : item type %s : %s ' % (item, type(item))) 90 91 sl = firstname_nondiacritics 92 93 # remove white space 94 firstname_noblanks =[] 95 for item in sl: 96 #print ('iii : item avant sup blancs %s ' % (item)) 97 item_noblanks = re.sub(u' ',u'',item) 98 #print ('iii : item apres sup blancs %s ' % (item_noblanks)) 99 firstname_noblanks.append(item_noblanks) 100 101 sl = firstname_noblanks 102 103 80 104 # lower 81 firstname_id = map(str.lower,firstname_id) 82 #print ('2 upper %s' % firstname_id) 83 # 84 # remove white space inside 85 firstname_id_no_spaces = [x.replace(' ', '') for x in firstname_id] 86 #print ('3 sans blanc milieu %s' % firstname_id_no_spaces) 87 firstname_id = firstname_id_no_spaces 105 firstname_lower = [] 106 for item in sl: 107 #print ('iii : item avant lower %s ' % (item)) 108 item_lower = item.lower() 109 #print ('iii : item apres lower %s ' % (item_lower)) 110 firstname_lower.append(item_lower) 111 112 sl = firstname_lower 113 firstname_id = sl 88 114 # 89 115 # remove punctuation
Note: See TracChangeset
for help on using the changeset viewer.