source: trunk/aeres/scripts/build_firstname_id.py @ 325

Last change on this file since 325 was 325, checked in by pinsard, 11 years ago

improvments of doc + some doctest features

File size: 2.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3
4"""
5
6=====================
7build_firstname_id.py
8=====================
9
10DESCRIPTION
11===========
12
13generation de la partie "prénom" de l'id :
14 - suppress blanks
15 - suppress punctation
16 - replace diacritals by their non-diacriticals equivalent (ie ï become i)
17
18SEE ALSO
19========
20
21:ref:`firstname_id.xsl`
22
23:mod:`unidecode`
24
25EXAMPLES
26========
27
28cf. doctest
29
30::
31
32    python -v build_firstname_id
33
34TODO
35====
36
37ne passe pas les doctests
38
39EVOLUTIONS
40==========
41
42$Id$
43
44$URL$
45
46- fplod 20120409
47
48  * creation
49
50"""
51
52import string
53import sys
54import re
55
56try:
57    from unidecode import unidecode
58except ImportError:
59    print("Failed to import unidecode from any known place")
60    sys.exit(1)
61
62def build_firstname_id(firstname):
63
64
65    """
66    >>> firstname = []
67    []
68    >>> firstname.append(u' Ginette')
69    >>> firstname
70    [u' Ginette']
71    >>> firstname.append(u'Gin ette ')
72    >>> firstname
73    [u' Ginette', u'Gin  ette ']
74    >>> firstname.append(u"G\'in ette ")
75    >>> firstname
76    [u' Ginette', u'Gin ette ', u"G\'in ette "]
77    >>> firstname.append(u"Gïnette")
78    >>> firstname
79    [u' Ginette', u'Gin ette ', u"G'in ette ", u'G\xefnette']
80    >>> firstname_id = build_firstname_id(firstname)
81    >>> #firstname_id
82    ['ginette', 'ginette', 'ginette', 'ginette']
83    """
84
85    if len(firstname) == 0:
86        print('eee : firstname empty')
87        sys.exit(-1)
88
89    firstname_nondiacritics = []
90    for item in firstname:
91        #print ('iii : item row  %s ' % (item))
92        item_nondiacritics = unidecode(item)
93        #print ('iii : item non diacritics  %s ' % (item_nondiacritics))
94        firstname_nondiacritics.append(item_nondiacritics)
95
96    #for item in firstname_nondiacritics:
97    #    print ('iii : item type %s : %s ' % (item, type(item)))
98
99    sl = firstname_nondiacritics
100
101    # remove white space
102    firstname_noblanks =[]
103    for item in sl:
104        #print ('iii : item avant sup blancs %s ' % (item))
105        item_noblanks = re.sub(u' ',u'',item)
106        #print ('iii : item apres sup blancs %s ' % (item_noblanks))
107        firstname_noblanks.append(item_noblanks)
108
109    sl = firstname_noblanks
110
111
112    # lower
113    firstname_lower = []
114    for item in sl:
115        #print ('iii : item avant lower %s ' % (item))
116        item_lower = item.lower()
117        #print ('iii : item apres lower %s ' % (item_lower))
118        firstname_lower.append(item_lower)
119
120    sl = firstname_lower
121    firstname_id = sl
122    #
123    # remove punctuation
124    for c in string.punctuation:
125        firstname_id_no_punctuation = [x.replace(c, '') for x in firstname_id]
126        #print ('4 sans %s avec replace string.punctuation %s' % (c, firstname_id_no_punctuation))
127        firstname_id = firstname_id_no_punctuation
128
129    return firstname_id
130
131if __name__ == "__main__":
132    import doctest
133    if doctest.testmod().failed:
134        import sys
135        sys.exit(1)
136   
Note: See TracBrowser for help on using the repository browser.