1 | #!/usr/bin/env python |
---|
2 | # -*- coding: iso-8859-1 -*- |
---|
3 | |
---|
4 | __docformat__ = 'reStructuredText' |
---|
5 | __revision__ = "$Id$" |
---|
6 | __autodoc__ = ['aeresrh'] |
---|
7 | |
---|
8 | |
---|
9 | """ |
---|
10 | |
---|
11 | ========== |
---|
12 | aeresrh.py |
---|
13 | ========== |
---|
14 | |
---|
15 | DESCRIPTION |
---|
16 | =========== |
---|
17 | |
---|
18 | extract information from aeresrh.xls |
---|
19 | |
---|
20 | .. option:: --ifile <file> |
---|
21 | |
---|
22 | file to read |
---|
23 | |
---|
24 | .. only:: man |
---|
25 | |
---|
26 | Figure is visible on PDF and HTML documents only. |
---|
27 | |
---|
28 | .. only:: html or latex |
---|
29 | |
---|
30 | .. graphviz:: |
---|
31 | |
---|
32 | digraph aeresrh { |
---|
33 | rh [shape=diamond, |
---|
34 | fontname=Courier, |
---|
35 | label='{ifile}' |
---|
36 | |
---|
37 | aeresrh [shape=box, |
---|
38 | fontname=Courier, |
---|
39 | color=blue, |
---|
40 | URL="http://forge.ipsl.jussieu.fr/pulsation/browser/trunk/aeres/scripts/aeresrh.py", |
---|
41 | label="${PROJECT}/aeres/scripts/aeresrh.py"]; |
---|
42 | } |
---|
43 | |
---|
44 | SEE ALSO |
---|
45 | ======== |
---|
46 | |
---|
47 | .. index:: ! Excel |
---|
48 | |
---|
49 | https://secure.simplistix.co.uk/svn/xlrd/trunk/xlrd/doc/xlrd.html |
---|
50 | http://www.simplistix.co.uk/presentations/python-excel.pdf |
---|
51 | |
---|
52 | |
---|
53 | EXAMPLES |
---|
54 | ======== |
---|
55 | |
---|
56 | :: |
---|
57 | |
---|
58 | aeresrh.py --ifile ${PROJECT}/data/aeresrh.xls |
---|
59 | |
---|
60 | aeresrh.py --ifile ${HOME}/Downloads/personnel_franc\314\247oise.xls |
---|
61 | |
---|
62 | TODO |
---|
63 | ==== |
---|
64 | |
---|
65 | pb de type de fichier ~/Downloads/personnel\ franc\314\247oise.xlsx |
---|
66 | |
---|
67 | ne marche pas avec des chemins relatifs |
---|
68 | |
---|
69 | append sur ifiles car liste permanent et liste thesard |
---|
70 | |
---|
71 | comment j'ai la liste des gens partis au 30 juin 2012 avec |
---|
72 | publication (retraite, decès, mutation) |
---|
73 | |
---|
74 | test with real file (encoding, francais, etc) |
---|
75 | |
---|
76 | missing values |
---|
77 | |
---|
78 | coding rules (pylint) |
---|
79 | |
---|
80 | EVOLUTIONS |
---|
81 | ========== |
---|
82 | |
---|
83 | $Id$ |
---|
84 | |
---|
85 | $URL$ |
---|
86 | |
---|
87 | - fplod 20120426 |
---|
88 | |
---|
89 | * big process in dictionnary using itertools |
---|
90 | |
---|
91 | - fplod 20120410 |
---|
92 | |
---|
93 | * real titles |
---|
94 | * start dictionnary |
---|
95 | |
---|
96 | - fplod 20120409 |
---|
97 | |
---|
98 | * no more hard coded column number |
---|
99 | |
---|
100 | - fplod 20120406 |
---|
101 | |
---|
102 | * tst avec copie de vrai fichier |
---|
103 | (fplod@halios:Downloads/personnel\ franc\314\247oise.xlsx) dans toto.xsl |
---|
104 | - fplod 20120405 |
---|
105 | |
---|
106 | * gestion des temps (arrivée, départ) (to be cont.) |
---|
107 | * creation draft |
---|
108 | thanks to http://www.portailsig.org/content/python-lire-et-ecrire-des-fichiers-microsoft-excel-application-quantum-gis |
---|
109 | and http://scienceoss.com/read-excel-files-from-python/ |
---|
110 | |
---|
111 | """ |
---|
112 | |
---|
113 | import sys |
---|
114 | import os |
---|
115 | from os.path import dirname, basename |
---|
116 | import glob |
---|
117 | from optparse import OptionParser |
---|
118 | |
---|
119 | import datetime |
---|
120 | |
---|
121 | import itertools |
---|
122 | |
---|
123 | try: |
---|
124 | import xlrd |
---|
125 | except ImportError: |
---|
126 | print("Failed to import xlrd from any known place") |
---|
127 | sys.exit(1) |
---|
128 | |
---|
129 | from build_surname_id import build_surname_id |
---|
130 | from build_firstname_id import build_firstname_id |
---|
131 | |
---|
132 | def get_option_parser (): |
---|
133 | """parse CLI arguments |
---|
134 | |
---|
135 | :returns: parser |
---|
136 | :rtype: :class:`optparse.OptionParser` |
---|
137 | """ |
---|
138 | |
---|
139 | parser = OptionParser('%prog [--verbose ] [--ifile file') |
---|
140 | parser.add_option ('-V', '--verbose', |
---|
141 | help='produce verbose output', |
---|
142 | action='store_true', |
---|
143 | default=False, |
---|
144 | dest='is_verbose') |
---|
145 | parser.add_option ('-i', '--ifile', |
---|
146 | help='file to be read', |
---|
147 | type='string', |
---|
148 | dest='ifile', |
---|
149 | default=None, |
---|
150 | action='store') |
---|
151 | |
---|
152 | return parser |
---|
153 | |
---|
154 | def find_in_dict(d, criterion, lookup): |
---|
155 | """ |
---|
156 | >>> people = {'pk1':{'firstname':'Brian', 'age':42}, 'pk2':{'firstname':'Alex', 'age':50}} |
---|
157 | >>> result = find_in_dict(people, 'age', 42) |
---|
158 | |
---|
159 | http://stackoverflow.com/questions/8987631/searching-2-dimensional-dictionary |
---|
160 | """ |
---|
161 | |
---|
162 | result = [] |
---|
163 | for key, val in d.iteritems(): |
---|
164 | if criterion in val and val[criterion] == lookup: |
---|
165 | result.append((key,val)) |
---|
166 | return result |
---|
167 | |
---|
168 | def readrh(ifile, is_verbose): |
---|
169 | |
---|
170 | """ |
---|
171 | read ifile |
---|
172 | |
---|
173 | return surname, firstname, arrival, publi |
---|
174 | """ |
---|
175 | |
---|
176 | if is_verbose == True: |
---|
177 | print('iii : read %s' % ifile) |
---|
178 | |
---|
179 | # ouverture du fichier Excel |
---|
180 | wb = xlrd.open_workbook(ifile, formatting_info=True) |
---|
181 | |
---|
182 | # feuilles dans le classeur |
---|
183 | if is_verbose == True: |
---|
184 | print('sheets name %s' % ( wb.sheet_names())) |
---|
185 | |
---|
186 | # lecture des données dans la première feuille |
---|
187 | sh = wb.sheet_by_index(0) |
---|
188 | |
---|
189 | # |
---|
190 | if is_verbose == True: |
---|
191 | for rownum in range(sh.nrows): |
---|
192 | print('values of in row %d : %s' % (rownum, sh.row_values(rownum))) |
---|
193 | |
---|
194 | if is_verbose == True: |
---|
195 | for colnum in range(sh.ncols): |
---|
196 | print('values of in col %d : %s' % (colnum, sh.col_values(colnum))) |
---|
197 | |
---|
198 | # detect title row |
---|
199 | for rownum in range(sh.nrows): |
---|
200 | if sh.row_values(rownum)[0] == "Type d'emploi (EC, Ch, AP)": |
---|
201 | titlerow = rownum |
---|
202 | if is_verbose == True: |
---|
203 | print('titlerow : %d ' % (titlerow)) |
---|
204 | |
---|
205 | # deduce first usefull row |
---|
206 | firstrow = titlerow + 1 |
---|
207 | # |
---|
208 | # detect last usefull row |
---|
209 | uselessrow = [] |
---|
210 | for rownum in range(titlerow, sh.nrows): |
---|
211 | if sh.row_values(rownum)[0] == '': |
---|
212 | uselessrow.append(rownum) |
---|
213 | lastrow = uselessrow[0] |
---|
214 | if is_verbose == True: |
---|
215 | print('lastrow : %d ' % (lastrow)) |
---|
216 | |
---|
217 | # detect surname column |
---|
218 | for colnum in range(sh.ncols): |
---|
219 | if sh.col_values(colnum)[titlerow] == 'Nom': |
---|
220 | colsurname = colnum |
---|
221 | if is_verbose == True: |
---|
222 | print('colsurname: %d ' % (colsurname)) |
---|
223 | |
---|
224 | # detect firstname column |
---|
225 | for colnum in range(sh.ncols): |
---|
226 | if sh.col_values(colnum)[titlerow] == u'Prénom': |
---|
227 | colfirstname = colnum |
---|
228 | if is_verbose == True: |
---|
229 | print('colfirstname: %d ' % (colfirstname)) |
---|
230 | |
---|
231 | # detect arrival column |
---|
232 | for colnum in range(sh.ncols): |
---|
233 | if sh.col_values(colnum)[titlerow] == u"Date d'arriv\xe9e dans l'unit\xe9\n(9)": |
---|
234 | colarrival = colnum |
---|
235 | if is_verbose == True: |
---|
236 | print('colarrival: %d ' % (colarrival)) |
---|
237 | |
---|
238 | # detect publi column |
---|
239 | for colnum in range(sh.ncols): |
---|
240 | if sh.col_values(colnum)[titlerow] == u'N\xb0 des 5 productions les plus significatives dans la p\xe9riode \xe9valu\xe9e\n(11)': |
---|
241 | colpubli = colnum |
---|
242 | if is_verbose == True: |
---|
243 | print('colpubli: %d ' % (colpubli)) |
---|
244 | |
---|
245 | datemode = 0 |
---|
246 | |
---|
247 | surname = sh.col_values(colsurname)[firstrow:lastrow] |
---|
248 | |
---|
249 | if is_verbose == True: |
---|
250 | print(' nb surname %d' % len(surname)) |
---|
251 | print(' surname %s' % surname) |
---|
252 | |
---|
253 | firstname = sh.col_values(colfirstname)[firstrow:lastrow] |
---|
254 | |
---|
255 | if is_verbose == True: |
---|
256 | print(' nb firstname %d' % len(firstname)) |
---|
257 | print(' firstname %s' % firstname) |
---|
258 | |
---|
259 | temp = sh.col_values(colarrival)[firstrow:lastrow] |
---|
260 | arrival = [] |
---|
261 | for rownum in range(len(temp)): |
---|
262 | if is_verbose == True: |
---|
263 | print(' temp arrival AAA%sAAA' % temp[rownum]) |
---|
264 | if temp[rownum] == '': |
---|
265 | print('eee : pas de date arrivee colonne %d ' % rownum) |
---|
266 | else: |
---|
267 | arrival.append(datetime.datetime(*xlrd.xldate_as_tuple(temp[rownum], datemode))) |
---|
268 | |
---|
269 | if is_verbose == True: |
---|
270 | print(' nb arrival %d' % len(arrival)) |
---|
271 | print(' arrival %s' % arrival) |
---|
272 | |
---|
273 | publi = sh.col_values(colpubli)[firstrow:lastrow] |
---|
274 | |
---|
275 | if is_verbose == True: |
---|
276 | print(' nb publi %d' % len(publi)) |
---|
277 | print(' publi %s' % publi) |
---|
278 | |
---|
279 | return surname, firstname, arrival, publi |
---|
280 | |
---|
281 | def aeresrh(): |
---|
282 | """main |
---|
283 | """ |
---|
284 | try: |
---|
285 | parser = get_option_parser () |
---|
286 | (fromcli, args) = parser.parse_args() |
---|
287 | except IOError, msg: |
---|
288 | parser.error(str(msg)) |
---|
289 | |
---|
290 | is_verbose = fromcli.is_verbose |
---|
291 | ifile = fromcli.ifile |
---|
292 | if fromcli.is_verbose == True: |
---|
293 | print ('ifile : %s' % (ifile)) |
---|
294 | |
---|
295 | surname, firstname, arrival, publi = readrh(ifile, is_verbose) |
---|
296 | # |
---|
297 | # build surname id |
---|
298 | surname_id = build_surname_id(surname) |
---|
299 | firstname_id = build_firstname_id(firstname) |
---|
300 | # |
---|
301 | # populate dictionary |
---|
302 | keys_for_all = [] |
---|
303 | values_for_all = [] |
---|
304 | keys_for_one = ['surname', 'firstname', 'arrival', 'publi'] |
---|
305 | for index in range(len(surname_id)): |
---|
306 | values_for_one = [] |
---|
307 | values_for_one.append(surname[index]) |
---|
308 | values_for_one.append(firstname[index]) |
---|
309 | values_for_one.append(arrival[index]) |
---|
310 | values_for_one.append(publi[index]) |
---|
311 | values_for_all.append(dict(itertools.izip(keys_for_one, values_for_one))) |
---|
312 | keys_for_all.append('{surname_id}_{firstname_id}'.format(surname_id=surname_id[index], firstname_id=firstname_id[index])) |
---|
313 | |
---|
314 | |
---|
315 | if is_verbose == True: |
---|
316 | print ('len keys_for_all : %d ' % len(keys_for_all)) |
---|
317 | print ('person_id 0 : %s ' % keys_for_all[0]) |
---|
318 | for index, key_for_all in enumerate(keys_for_all): |
---|
319 | print ('person_id (key) : %s ' % key_for_all) |
---|
320 | print ('associated info : %s ' % values_for_all[index]) |
---|
321 | |
---|
322 | people = dict(itertools.izip(keys_for_all, values_for_all)) |
---|
323 | |
---|
324 | if is_verbose == True: |
---|
325 | print (' result looking for key PINSARD_francoise : %s ' % people['PINSARD_francoise']) |
---|
326 | |
---|
327 | if is_verbose == True: |
---|
328 | result = find_in_dict(people, 'surname', 'PINSARD') |
---|
329 | print (' result looking for PINSARD %s ' % result) |
---|
330 | |
---|
331 | # Run main, if called from the command line |
---|
332 | if __name__ == '__main__': |
---|
333 | aeresrh() |
---|
334 | |
---|