1 | #!/usr/bin/env python |
---|
2 | # -*- coding: iso-8859-1 -*- |
---|
3 | |
---|
4 | """ |
---|
5 | |
---|
6 | ===================== |
---|
7 | build_firstname_id.py |
---|
8 | ===================== |
---|
9 | |
---|
10 | DESCRIPTION |
---|
11 | =========== |
---|
12 | |
---|
13 | generation de la partie "prénom" de l'id : |
---|
14 | - suppress blanks |
---|
15 | - suppress punctation |
---|
16 | - replace diacritals by their non-diacriticals equivalent (ie ï become i) |
---|
17 | |
---|
18 | SEE ALSO |
---|
19 | ======== |
---|
20 | |
---|
21 | :ref:`firstname_id.xsl` |
---|
22 | |
---|
23 | :mod:`unidecode` |
---|
24 | |
---|
25 | EXAMPLES |
---|
26 | ======== |
---|
27 | |
---|
28 | cf. doctest |
---|
29 | |
---|
30 | :: |
---|
31 | |
---|
32 | python -v build_firstname_id |
---|
33 | |
---|
34 | TODO |
---|
35 | ==== |
---|
36 | |
---|
37 | ne passe pas les doctests |
---|
38 | |
---|
39 | EVOLUTIONS |
---|
40 | ========== |
---|
41 | |
---|
42 | $Id$ |
---|
43 | |
---|
44 | $URL$ |
---|
45 | |
---|
46 | - fplod 20120409 |
---|
47 | |
---|
48 | * creation |
---|
49 | |
---|
50 | """ |
---|
51 | |
---|
52 | import string |
---|
53 | import sys |
---|
54 | import re |
---|
55 | |
---|
56 | try: |
---|
57 | from unidecode import unidecode |
---|
58 | except ImportError: |
---|
59 | print("Failed to import unidecode from any known place") |
---|
60 | sys.exit(1) |
---|
61 | |
---|
62 | def build_firstname_id(firstname): |
---|
63 | |
---|
64 | |
---|
65 | """ |
---|
66 | >>> firstname = [] |
---|
67 | [] |
---|
68 | >>> firstname.append(u' Ginette') |
---|
69 | >>> firstname |
---|
70 | [u' Ginette'] |
---|
71 | >>> firstname.append(u'Gin ette ') |
---|
72 | >>> firstname |
---|
73 | [u' Ginette', u'Gin ette '] |
---|
74 | >>> firstname.append(u"G\'in ette ") |
---|
75 | >>> firstname |
---|
76 | [u' Ginette', u'Gin ette ', u"G\'in ette "] |
---|
77 | >>> firstname.append(u"Gïnette") |
---|
78 | >>> firstname |
---|
79 | [u' Ginette', u'Gin ette ', u"G'in ette ", u'G\xefnette'] |
---|
80 | >>> firstname_id = build_firstname_id(firstname) |
---|
81 | >>> #firstname_id |
---|
82 | ['ginette', 'ginette', 'ginette', 'ginette'] |
---|
83 | """ |
---|
84 | |
---|
85 | if len(firstname) == 0: |
---|
86 | print('eee : firstname empty') |
---|
87 | sys.exit(-1) |
---|
88 | |
---|
89 | firstname_nondiacritics = [] |
---|
90 | for item in firstname: |
---|
91 | #print ('iii : item row %s ' % (item)) |
---|
92 | item_nondiacritics = unidecode(item) |
---|
93 | #print ('iii : item non diacritics %s ' % (item_nondiacritics)) |
---|
94 | firstname_nondiacritics.append(item_nondiacritics) |
---|
95 | |
---|
96 | #for item in firstname_nondiacritics: |
---|
97 | # print ('iii : item type %s : %s ' % (item, type(item))) |
---|
98 | |
---|
99 | sl = firstname_nondiacritics |
---|
100 | |
---|
101 | # remove white space |
---|
102 | firstname_noblanks =[] |
---|
103 | for item in sl: |
---|
104 | #print ('iii : item avant sup blancs %s ' % (item)) |
---|
105 | item_noblanks = re.sub(u' ',u'',item) |
---|
106 | #print ('iii : item apres sup blancs %s ' % (item_noblanks)) |
---|
107 | firstname_noblanks.append(item_noblanks) |
---|
108 | |
---|
109 | sl = firstname_noblanks |
---|
110 | |
---|
111 | |
---|
112 | # lower |
---|
113 | firstname_lower = [] |
---|
114 | for item in sl: |
---|
115 | #print ('iii : item avant lower %s ' % (item)) |
---|
116 | item_lower = item.lower() |
---|
117 | #print ('iii : item apres lower %s ' % (item_lower)) |
---|
118 | firstname_lower.append(item_lower) |
---|
119 | |
---|
120 | sl = firstname_lower |
---|
121 | firstname_id = sl |
---|
122 | # |
---|
123 | # remove punctuation |
---|
124 | for c in string.punctuation: |
---|
125 | firstname_id_no_punctuation = [x.replace(c, '') for x in firstname_id] |
---|
126 | #print ('4 sans %s avec replace string.punctuation %s' % (c, firstname_id_no_punctuation)) |
---|
127 | firstname_id = firstname_id_no_punctuation |
---|
128 | |
---|
129 | return firstname_id |
---|
130 | |
---|
131 | if __name__ == "__main__": |
---|
132 | import doctest |
---|
133 | if doctest.testmod().failed: |
---|
134 | import sys |
---|
135 | sys.exit(1) |
---|
136 | |
---|