source: trunk/aeres/scripts/build_surname_id.py @ 195

Last change on this file since 195 was 195, checked in by pinsard, 12 years ago

cleanings

File size: 2.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3
4"""
5
6===================
7build_surname_id.py
8===================
9
10DESCRIPTION
11===========
12
13generation de la partie "nom" de l'id
14
15
16SEE ALSO
17========
18
19:ref:`surname_id.xsl`
20
21EXAMPLES
22========
23
24cf. doctest
25
26::
27
28    python -v build_surname_id
29
30TODO
31====
32
33EVOLUTIONS
34==========
35
36$Id$
37
38$URL$
39
40- fplod 20120409
41
42  * work with fake data
43
44- fplod 20120407
45
46  * creation
47
48"""
49
50import string
51import sys
52
53try:
54    from unidecode import unidecode
55except ImportError:
56    print("Failed to import unidecode from any known place")
57    sys.exit(1)
58
59def build_surname_id(surname):
60
61
62    """
63    >>> surname = []
64    []
65    >>> surname.append(' Tartempillion ')
66    [' Tartempillion ']
67    >>> surname.append('Tar tempillion ')
68    [' Tartempillion ', 'Tar tempillion ']
69    >>> surname.append("T\'ar tempillion ")
70    [' Tartempillion ', 'Tar tempillion ', "T\'ar tempillion "]
71    >>> surname_id = build_surname_id(surname)
72    ['TARTEMPILLION','TARTEMPILLION','TARTEMPILLION']
73    """
74
75    if len(surname) == 0:
76        print('eee : surname empty')
77        sys.exit(-1)
78
79    # convert to str
80    surname_str = [str(item) for item in surname]
81    #for item in surname:
82    #    print ('iii : item type %s : %s ' % (item, type(item)))
83
84    # remove white space before and after
85    surname_id = map(str.strip,surname_str)
86    #print ('1 sans blan debut fin %s' % surname_id)
87    #
88    # upper
89    surname_id = map(str.upper,surname_id)
90    #print ('2 upper %s' % surname_id)
91    #
92    # remove white space inside
93    surname_id_no_spaces = [x.replace(' ', '') for x in surname_id]
94    #print ('3 sans blanc milieu %s' % surname_id_no_spaces)
95    surname_id = surname_id_no_spaces
96    #
97    # remove punctuation
98    for c in string.punctuation:
99        surname_id_no_punctuation = [x.replace(c, '') for x in surname_id]
100        #print ('4 sans %s avec replace string.punctuation %s' % (c, surname_id_no_punctuation))
101        surname_id = surname_id_no_punctuation
102
103    return surname_id
104
105if __name__ == "__main__":
106    import doctest
107    doctest.testmod()
Note: See TracBrowser for help on using the repository browser.