Changeset 183 for trunk/aeres/scripts/build_firstname_id.py
- Timestamp:
- 04/10/12 14:20:30 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/aeres/scripts/build_firstname_id.py
r182 r183 18 18 19 19 :ref:`firstname_id.xsl` 20 21 :mod:`unidecode` 20 22 21 23 EXAMPLES … … 46 48 import string 47 49 import sys 50 import re 51 52 from unidecode import unidecode 48 53 49 54 def build_firstname_id(firstname): … … 53 58 >>> firstname = [] 54 59 [] 55 >>> firstname.append(' Ginette') 56 [' Ginette'] 57 >>> firstname.append('Gin ette ') 58 [' Ginette', 'Gin ette'] 59 >>> firstname.append("G\'in ette ") 60 [' Ginette', 'Gin ette', "G\'in ette"] 60 >>> firstname.append(u' Ginette') 61 >>> firstname 62 [u' Ginette'] 63 >>> firstname.append(u'Gin ette ') 64 >>> firstname 65 [u' Ginette', u'Gin ette '] 66 >>> firstname.append(u"G\'in ette ") 67 >>> firstname 68 [u' Ginette', u'Gin ette ', u"G\'in ette "] 69 >>> firstname.append(u"Gïnette") 70 >>> firstname 71 [u' Ginette', u'Gin ette ', u"G'in ette ", u'G\xefnette'] 61 72 >>> firstname_id = build_firstname_id(firstname) 62 ['ginette','ginette','ginette'] 73 >>> #firstname_id 74 ['ginette', 'ginette', 'ginette', 'ginette'] 63 75 """ 64 76 … … 67 79 sys.exit(-1) 68 80 69 # convert to str 70 #++firstname_str = [str(item) for item in firstname] 71 #++firstname_str = [item.encode('iso-8859-1','replace') for item in firstname] 72 firstname_str = [item.encode('iso-8859-1','xmlcharrefreplace') for item in firstname] 81 firstname_nondiacritics = [] 73 82 for item in firstname: 74 print ('iii : item type %s : %s ' % (item, type(item))) 83 #print ('iii : item row %s ' % (item)) 84 item_nondiacritics = unidecode(item) 85 #print ('iii : item non diacritics %s ' % (item_nondiacritics)) 86 firstname_nondiacritics.append(item_nondiacritics) 75 87 76 # remove white space before and after 77 firstname_id = map(str.strip,firstname_str) 78 #print ('1 sans blan debut fin %s' % firstname_id) 79 # 88 #for item in firstname_nondiacritics: 89 # print ('iii : item type %s : %s ' % (item, type(item))) 90 91 sl = firstname_nondiacritics 92 93 # remove white space 94 firstname_noblanks =[] 95 for item in sl: 96 #print ('iii : item avant sup blancs %s ' % (item)) 97 item_noblanks = re.sub(u' ',u'',item) 98 #print ('iii : item apres sup blancs %s ' % (item_noblanks)) 99 firstname_noblanks.append(item_noblanks) 100 101 sl = firstname_noblanks 102 103 80 104 # lower 81 firstname_id = map(str.lower,firstname_id) 82 #print ('2 upper %s' % firstname_id) 83 # 84 # remove white space inside 85 firstname_id_no_spaces = [x.replace(' ', '') for x in firstname_id] 86 #print ('3 sans blanc milieu %s' % firstname_id_no_spaces) 87 firstname_id = firstname_id_no_spaces 105 firstname_lower = [] 106 for item in sl: 107 #print ('iii : item avant lower %s ' % (item)) 108 item_lower = item.lower() 109 #print ('iii : item apres lower %s ' % (item_lower)) 110 firstname_lower.append(item_lower) 111 112 sl = firstname_lower 113 firstname_id = sl 88 114 # 89 115 # remove punctuation
Note: See TracChangeset
for help on using the changeset viewer.