source: trunk/src/read_lai_gis.m @ 321

Last change on this file since 321 was 321, checked in by lelod, 13 years ago

corrrection path

File size: 11.6 KB
Line 
1function [lon_value, lat_value, datejul_value, datestr_value, lai_value, gridcode_value, classes_value,msd_value, msdnom_value]=read_lai_gis(yyyy,nbligne)
2
3%READ_LAI_GIS fill LAI 2D array and associated arrays date, lat, lon, vegetation classe, pedologiy classe, etc. with data in :file:`${PROJECT_ID}/LAI/LAI{yyyy}F_veg2010_sol.csv`
4
5%
6%+
7%
8% ==============
9% read_lai_gis.m
10% ==============
11%
12% .. function:: read_lai_gis(yyyy)
13%
14% DESCRIPTION
15% ===========
16%
17%    :param yyyy: year
18%    :type yyyy: int16
19%je ne comprend pas: avec un int16, num2str ne fonctionne pas sous
20%octave
21%    :type yyyy: int8 (ordinaire)
22%    :raise yyyy: required ex 2008
23%    : type nbligne : nb ligne a lire ex 10000
24%
25% read a file :file:`${PROJECT_ID}/LAI_CSV/LAI{yyyy}F_veg2010_sol.csv`
26%
27% return associated arrays lat, lon, lai, vegetation class , pedology class
28%
29% Les dates au format `yyyymmddhhmn` sont stockées dans le tableau
30% **datestr_value** et les dates en jour julien sont stockées dans
31% **datejul_value**.
32% test realise sur des vraies donnees
33% corrige pb de colonne mapusub souvent vide de donnees avec
34% textedit (remplace ;; par ;" ";)
35%lecture reussie sur un nb limite de lignes, mais pb memoire pour
36%traitement du fichier complet (sur mon Mac)
37%corrige quelques pbs dans le prgm (inversion colonnes, ismember
38%remplace par isnan,...)
39%
40% pb de memoire: impossible d'avoir l'allocation memoire pour les 50000 lignes du fichier
41% fonctionne jusqu'a 45000 lignes (sur cratos).
42%pour developpements d'analyse LAI par Soukeye, ajout du parametre
43%nbligne dans la fonction
44% lelod 20110728
45%
46% EXAMPLES
47% ========
48%
49%
50% TIPS
51% ====
52%
53% To see the first line (header) of this kind of file::
54%
55%   $  sed -ne "1,1p" lai2008_Clip_Intersect_Inter.csv
56%
57%
58% To see the second line (first line of data) of this kind of file::
59%
60%   $  sed -ne "2,2p" lai2008_Clip_Intersect_Inter.csv
61%
62%
63% SEE ALSO
64% ========
65%
66% :ref:`data_lai`
67% :ref:`zones`
68%
69% :func:`simul_lai_gis` ++
70% :func:`write_lai_gis`
71% :func:`plot_lai_gis` ++
72%
73% :func:`read_lai_2d.m`
74%
75% TODO
76% ====
77%
78% add zi_code parameter when filename convention will be changed
79%
80%
81% il ne devrait plus y avoir des lignes avec tous le LAI à NaN ici
82%
83%
84% hard coded value of nb_line_data_max;
85% handling eof vs nb_line_data_max
86%
87% coding rules
88%
89% EVOLUTIONS
90% ==========
91%
92% $Id$
93%
94% $URL$
95%
96% - fplod 20110708T085037Z aedon.locean-ipsl.upmc.fr (Darwin)
97%
98%   * reading data line using technique of read_zi.m
99%     because of string values dmlread connaot be used
100%   * file written in correct format (. for decimal, ";" as
101%     separator, float values for numbers (no more string for
102%     numeric values)   - lelod 20110720
103%   * add datejul_value output and all GIS info
104%
105% - fplod 20110706T154713Z aedon.locean-ipsl.upmc.fr (Darwin)
106%
107%   * creation draft from la2008i_Clip_Intersect_Inter.csv produce by
108%     openoffice saving la2008i_Clip_Intersect_Inter.dbf in CSV format
109%     and read_lai_2d.m
110%
111%     cautions : some modification on the original test file
112%     because too many user no friendly features.
113%
114%-
115%
116global PROJECT_ID
117%
118%usage='[lon_value, lat_value, datejul_value, datestr_value,lai_value, gridcode_value, classes_value,msd_value, msdnom_value]=read_lai_gis(yyyy, nbligne)';
119%
120%if nargin~=1
121%    disp(['Incorrect number of arguments']);
122%    error(usage);
123%end
124%
125%arg_info=whos('yyyy');
126%if ~strcmp(arg_info.class,'int16')
127%    disp(['Incorrect type of arg yyyy']);
128%    whos yyyy
129%    error(usage);
130%end
131%clear arg_info
132%
133% build filename to be read++
134%fullfilename=['/Users/lelod/progX11/varamma_id/senegal/LAI/LAI_CSV/LAI' num2str(yyyy) 'F_veg2010_sol.csv'];
135%
136%fullfilename=['/usr/temp/lelod/varamma_d/LAI_CSV/LAI' num2str(yyyy) 'F_veg2010_sol.csv'];
137%
138fullfilename=[ PROJECT_ID 'LAI_CSV/LAI' num2str(yyyy) 'F_veg2010_sol.csv'];
139disp(['iii : opening for reading ', fullfilename]);
140% opening file
141[fid,message]=fopen(fullfilename,'r');
142% test if fullfilename exists
143if (fid == -1)
144    disp(message)
145    error([ fullfilename ' not exist'])
146end
147clear message
148%
149% first line contains "lat","lon" and
150% an unknown quantity of days of years
151% so we read this line and loop to find the number of columns
152%
153% get first line as a string
154header_line = fgetl(fid);
155%
156% loop to find the number of columns
157% and initialize the "day of year" array
158index_column = 0;
159% the maximum of doy is the number of days in year yyyy
160% divided by 8 (according to time resolution of this dataset)
161yyyy_d=double(yyyy);
162nb_doy_max=ceil((datenum(yyyy_d,12,eomday(yyyy_d,12)) - datenum(yyyy_d,1,1))/7);
163% attention les semaines sont de 7 jours )--> 52 semaines (avec 8j,
164% on tombe a 46)
165doy=zeros(nb_doy_max,1);
166doy(:)=NaN;
167while ( ~isempty(header_line) )
168  % parse next column label
169  [next,header_line] = strtok(header_line,';');
170  switch next
171     case {'"Y"'}
172     case {'"X"'}
173    case {'"FID_occups"'}
174     case {'"ID"'}
175     case {'"GRIDCOD"'}
176     case {'"FID_bvferl"'}
177    case {'"ID_1"'}
178     case {'"GRIDCODE_1"'}
179     case {'"classe"'}
180     case {'"AREA"'}
181     case {'"PERIMETER"'}
182     case {'"SGSOLRU_"'}
183    case {'"SGSOLRU_ID"'}
184     case {'"MAPU"'}
185     case {'"MAPUSUB"'}
186     case {'"MSD"'}
187     case {'"MSDNOM"'}
188     otherwise
189        % here we expect somesthing like "doy"
190        % for example day 9 will be coded "9"
191        % ++ todo regexp
192        [sdoy, extra_sdoy] = strtok(next,';');
193        clear extra_sdoy
194        doy_string = sdoy;
195        %(1,3:end); plus utile, puisque header nettoye a la fabrication
196        clear sdoy
197        doy(index_column - 1) = str2double(doy_string);
198        clear doy_string
199  end
200  clear next
201  % increase index of column
202  index_column = index_column + 1;
203end
204%
205% we know now how many columns are written on header
206nb_column = index_column;
207nb_doy = nb_column - 2 - 15
208clear index_column
209%
210% remove extra doy NaN values due to preallocation
211doy=doy(~isnan(doy));
212%
213% initialisation d'un tableau string de nb_doy elements à 9999999
214datestr_value=repmat('99999999',nb_doy,1);
215for index_doy=1:nb_doy
216    datejul_value(index_doy)=datenum(yyyy_d,1,1) + doy(index_doy) - 1;
217end
218datestr_value=datestr(datejul_value,'yyyymmdd');
219%
220% preallocation of output arrays to missing value: attention:
221% entre en parametre
222nb_line_data_max = nbligne;
223%45000 valeur max;
224%warning!!! 20 lignes pour le test - a ajuster sur le plus gros
225%fichier (50247 lignes?)
226lat_value=zeros(1,nb_line_data_max);
227lat_value(:)=NaN;
228lon_value=zeros(1,nb_line_data_max);
229lon_value(:)=NaN;
230lai_value=zeros(nb_doy,nb_line_data_max);
231lai_value(:,:)=NaN;
232gridcode_value=zeros(nb_line_data_max);
233gridcode_value(:)=NaN;
234%gridcode_1_value=zeros(nb_line_data_max);
235%gridcode_1_value(:)=NaN;
236classes_value=repmat({'NaN'},nb_line_data_max,1);
237%area_value=zeros(nb_line_data_max);
238%area_value(:)=NaN;
239%perimeter_value=zeros(nb_line_data_max);
240%perimeter_value(:)=NaN;
241%sgsolru_value=zeros(nb_line_data_max);
242%sgsolru_value(:)=NaN;
243%mapu_value=repmat({'NaN'},nb_line_data_max,1);
244%mapusub_value=repmat({'NaN'},nb_line_data_max,1);
245msd_value=zeros(nb_line_data_max);
246msd_value(:)=NaN;
247msdnom_value=repmat({'NaN'},nb_line_data_max,1);
248%
249% reading data from file (from second line till end of file)
250%lat_value=dlmread(fid,';',0,1)
251
252index_line_data=1;
253%
254%Attention: fichier trop gros. On ne lit quie les premieres nbligne lignes
255%while (feof(fid)~= 1)
256while (index_line_data <nbligne+1)
257%
258%
259  data_line = fgetl(fid);
260  % loop to handle eahc item on the data line (";" separated value)
261  % attention, il arrive qu'une cellule soit vide (mapusub) : corrige en principe
262  index_item=1;
263  while ( ~isempty(data_line))
264     % parse next column label
265     [item,data_line] = strtok(data_line,';');
266     if (index_item == 1 )
267       lat_value(index_line_data)=str2double(item);
268     elseif (index_item == 2 )
269       lon_value(index_line_data)=str2double(item);
270     elseif (index_item > 2 && index_item <= 2 + nb_doy)
271       lai_value(index_item - 2, index_line_data)=str2double(item);
272     elseif (index_item ==  2 + nb_doy + 1)
273   %    fid_occups_value(index_line_data)=str2double(item);
274    elseif (index_item ==  2 + nb_doy + 2)
275    %   id_value(index_line_data)=str2double(item);
276     elseif (index_item ==  2 + nb_doy + 3)
277       gridcode_value(index_line_data)=str2double(item);
278     elseif (index_item ==  2 + nb_doy + 4)
279    %   fid_bvferl_value(index_line_data)=str2double(item);
280    elseif (index_item ==  2 + nb_doy + 5)
281   %   id_1_value(index_line_data)=str2double(item);
282     elseif (index_item ==  2 + nb_doy + 6)
283    %   gridcode_1_value(index_line_data)=str2double(item);
284     elseif (index_item ==  2 + nb_doy + 7)
285       classes_value(index_line_data)=cellstr(item);
286     elseif (index_item ==  2 + nb_doy + 8)
287    %   area_value(index_line_data)=str2double(item);
288     elseif (index_item ==  2 + nb_doy + 9)
289    %   perimeter_value(index_line_data)=str2double(item);
290     elseif (index_item ==  2 + nb_doy + 10)
291    %   sgsolru_value(index_line_data)=str2double(item);
292     elseif (index_item ==  2 + nb_doy + 11)
293    %   sgsolru_id_value(index_line_data)=str2double(item);
294     elseif (index_item ==  2 + nb_doy + 12)
295    %   mapu_value(index_line_data)=cellstr(item);
296     elseif (index_item ==  2 + nb_doy + 13)
297    %   mapusub_value(index_line_data)=cellstr(item);
298    elseif (index_item ==  2 + nb_doy + 14)
299       msd_value(index_line_data)=str2double(item);
300    elseif (index_item ==  2 + nb_doy + 15)
301       msdnom_value(index_line_data)=cellstr(item);
302    else
303       disp([' eee : data_line ', data_line ]);
304       disp([' eee : item ', item]);
305       disp([' eee : index_line_data ', num2str(index_line_data) ]);
306       disp([' eee : index_item ', num2str(index_item) ]);
307       error([ 'invalid data line (too many items) ']);
308    end
309    clear item
310    % increase index of item
311    index_item = index_item + 1;
312  end
313  % check for number of item in current data line
314  if (index_item < nb_doy + 2 + 15)
315     disp([' eee : data_line ', data_line ]);
316     disp([' eee : index_line_data ', num2str(index_line_data) ]);
317     disp([' eee : index_item ', num2str(index_item) ]);
318     error([ 'invalid data line (missing items) :'])
319  end
320  index_line_data=index_line_data+1;
321  clear data_line
322  clear index_item
323end
324if (index_line_data > nb_line_data_max+1)
325  error([' need to increase nb_line_data_max up to ' num2str(index_line_data) ]);
326end
327%
328clear index_line_data
329%
330fclose(fid);
331clear fid
332%
333% remove extra data NaN values due to preallocation
334% criteria = when lon_value is not NaN
335missing_data_indices=find(isnan(lon_value));
336if (~isempty(missing_data_indices))
337    nb_data=min(missing_data_indices)-1
338    lat_value=lat_value(1:nb_data);
339    lon_value=lon_value(1:nb_data);
340    lai_value=lai_value(:,1:nb_data);
341    %fid_occups_value=fid_occups_value(1:nb_data);
342    %id_value=id_value(1:nb_data);
343    gridcode_value=gridcode_value(1:nb_data);
344    %fid_bvferl_value=fid_bvferl_value(1:nb_data);
345    %id_1_value=id_1_value(1:nb_data);
346    %gridcode_1_value=gridcode_1_value(1:nb_data);
347    classes_value=classes_value(1:nb_data);
348    %area_value=area_value(1:nb_data);
349   % perimeter_value=perimeter_value(1:nb_data);
350    %sgsolru_value=sgsolru_value(1:nb_data);
351    %sgsolru_id_value=sgsolru_id_value(1:nb_data);
352    %mapu_value=mapu_value(1:nb_data);
353   % mapusub_value=mapusub_value(1:nb_data);
354    msd_value=msd_value(1:nb_data);
355    msdnom_value=msdnom_value(1:nb_data);
356end
357%
358clear nb_line_data_max
359%
360%whos
361%
362result=0;
363end
364
365%!demo
366%! % To read :file:`${PROJECT_ID}/LAI/la2008i_Clip_Intersect_Inter.csv`:
367%! clear all
368%! close all
369%! varamma_startup
370%! more off
371%! yyyy=int16(2008);
372%! [lon_value, lat_value, datejul_value, datestr_value, lai_value, fid_occups_value, id_value, gridcode_value, fid_bvferl_value, id_1_value, gridcode_1_value, classes_value, area_value, perimeter_value, sgsolru_value, sgsolru_id_value, mapu_value, mapusub, msd_value, msdnom_value]=read_lai_gis(yyyy);
Note: See TracBrowser for help on using the repository browser.