source: trunk/src/read_lai_gis.m @ 327

Last change on this file since 327 was 327, checked in by pinsard, 13 years ago

change svn properties

  • Property svn:keywords set to URL
File size: 11.7 KB
Line 
1function [lon_value, lat_value, datejul_value, datestr_value, lai_value, gridcode_value, classes_value,msd_value, msdnom_value]=read_lai_gis(yyyy,nbligne)
2
3%READ_LAI_GIS fill LAI 2D array and associated arrays date, lat, lon, vegetation classe, pedologiy classe, etc. with data in :file:`${PROJECT_ID}/LAI/LAI{yyyy}F_veg2010_sol.csv`
4
5%
6%+
7%
8% ==============
9% read_lai_gis.m
10% ==============
11%
12% .. function:: read_lai_gis(yyyy)
13%
14% DESCRIPTION
15% ===========
16%
17%    :param yyyy: year
18%    :type yyyy: int16
19%                ++todo++ je ne comprend pas: avec un int16, num2str ne fonctionne pas sous octave
20%    :type yyyy: int8 (ordinaire)
21%    :raise yyyy: required ex 2008
22%
23%    :param nbligne : nb ligne a lire ex 10000
24%    :type nbligne : int
25%    :raise nbligne: required
26%
27% read a file :file:`${PROJECT_ID}/LAI_CSV/LAI{yyyy}F_veg2010_sol.csv`
28%
29% return associated arrays lat, lon, lai, vegetation class , pedology class
30%
31% Les dates au format `yyyymmddhhmn` sont stockées dans le tableau
32% **datestr_value** et les dates en jour julien sont stockées dans
33% **datejul_value**.
34% test realise sur des vraies donnees
35% corrige pb de colonne mapusub souvent vide de donnees avec
36% textedit (remplace ;; par ;" ";)
37%lecture reussie sur un nb limite de lignes, mais pb memoire pour
38%traitement du fichier complet (sur mon Mac)
39%corrige quelques pbs dans le prgm (inversion colonnes, ismember
40%remplace par isnan,...)
41%
42% pb de memoire: impossible d'avoir l'allocation memoire pour les 50000 lignes du fichier
43% fonctionne jusqu'a 45000 lignes (sur cratos).
44%pour developpements d'analyse LAI par Soukeye, ajout du parametre
45%nbligne dans la fonction
46% lelod 20110728
47%
48% EXAMPLES
49% ========
50%
51%
52% TIPS
53% ====
54%
55% To see the first line (header) of this kind of file::
56%
57%   $  sed -ne "1,1p" lai2008_Clip_Intersect_Inter.csv
58%
59%
60% To see the second line (first line of data) of this kind of file::
61%
62%   $  sed -ne "2,2p" lai2008_Clip_Intersect_Inter.csv
63%
64%
65% SEE ALSO
66% ========
67%
68% :ref:`data_lai`
69% :ref:`zones`
70%
71% :func:`simul_lai_gis` ++
72% :func:`write_lai_gis`
73% :func:`plot_lai_gis` ++
74%
75% :func:`read_lai_2d.m`
76%
77% TODO
78% ====
79%
80% add zi_code parameter when filename convention will be changed
81%
82%
83% il ne devrait plus y avoir des lignes avec tous le LAI à NaN ici
84%
85%
86% hard coded value of nb_line_data_max;
87% handling eof vs nb_line_data_max
88%
89% coding rules
90%
91% EVOLUTIONS
92% ==========
93%
94% $Id$
95%
96% $URL$
97%
98% - fplod 20110708T085037Z aedon.locean-ipsl.upmc.fr (Darwin)
99%
100%   * reading data line using technique of read_zi.m
101%     because of string values dmlread connaot be used
102%   * file written in correct format (. for decimal, ";" as
103%     separator, float values for numbers (no more string for
104%     numeric values)   - lelod 20110720
105%   * add datejul_value output and all GIS info
106%
107% - fplod 20110706T154713Z aedon.locean-ipsl.upmc.fr (Darwin)
108%
109%   * creation draft from la2008i_Clip_Intersect_Inter.csv produce by
110%     openoffice saving la2008i_Clip_Intersect_Inter.dbf in CSV format
111%     and read_lai_2d.m
112%
113%     cautions : some modification on the original test file
114%     because too many user no friendly features.
115%
116%-
117%
118global PROJECT_ID
119%
120%usage='[lon_value, lat_value, datejul_value, datestr_value,lai_value, gridcode_value, classes_value,msd_value, msdnom_value]=read_lai_gis(yyyy, nbligne)';
121%
122%if nargin~=1
123%    disp(['Incorrect number of arguments']);
124%    error(usage);
125%end
126%
127%arg_info=whos('yyyy');
128%if ~strcmp(arg_info.class,'int16')
129%    disp(['Incorrect type of arg yyyy']);
130%    whos yyyy
131%    error(usage);
132%end
133%clear arg_info
134%
135% build filename to be read++
136%fullfilename=['/Users/lelod/progX11/varamma_id/senegal/LAI/LAI_CSV/LAI' num2str(yyyy) 'F_veg2010_sol.csv'];
137%
138%fullfilename=['/usr/temp/lelod/varamma_d/LAI_CSV/LAI' num2str(yyyy) 'F_veg2010_sol.csv'];
139%
140fullfilename=[ PROJECT_ID 'LAI_CSV/LAI' num2str(yyyy) 'F_veg2010_sol.csv'];
141disp(['iii : opening for reading ', fullfilename]);
142% opening file
143[fid,message]=fopen(fullfilename,'r');
144% test if fullfilename exists
145if (fid == -1)
146    disp(message)
147    error([ fullfilename ' not exist'])
148end
149clear message
150%
151% first line contains "lat","lon" and
152% an unknown quantity of days of years
153% so we read this line and loop to find the number of columns
154%
155% get first line as a string
156header_line = fgetl(fid);
157%
158% loop to find the number of columns
159% and initialize the "day of year" array
160index_column = 0;
161% the maximum of doy is the number of days in year yyyy
162% divided by 8 (according to time resolution of this dataset)
163yyyy_d=double(yyyy);
164nb_doy_max=ceil((datenum(yyyy_d,12,eomday(yyyy_d,12)) - datenum(yyyy_d,1,1))/8);
165% attention les semaines sont de 7 jours )--> 52 semaines (avec 8j,
166% on tombe a 46)
167doy=zeros(nb_doy_max,1);
168doy(:)=NaN;
169while ( ~isempty(header_line) )
170  % parse next column label
171  [next,header_line] = strtok(header_line,';');
172  switch next
173     case {'"Y"'}
174     case {'"X"'}
175    case {'"FID_occups"'}
176     case {'"ID"'}
177     case {'"GRIDCOD"'}
178     case {'"FID_bvferl"'}
179    case {'"ID_1"'}
180     case {'"GRIDCODE_1"'}
181     case {'"classe"'}
182     case {'"AREA"'}
183     case {'"PERIMETER"'}
184     case {'"SGSOLRU_"'}
185    case {'"SGSOLRU_ID"'}
186     case {'"MAPU"'}
187     case {'"MAPUSUB"'}
188     case {'"MSD"'}
189     case {'"MSDNOM"'}
190     otherwise
191        % here we expect somesthing like "doy"
192        % for example day 9 will be coded "9"
193        % ++ todo regexp
194        [sdoy, extra_sdoy] = strtok(next,';');
195        clear extra_sdoy
196        doy_string = sdoy;
197        %(1,3:end); plus utile, puisque header nettoye a la fabrication
198        clear sdoy
199        doy(index_column - 1) = str2double(doy_string);
200        clear doy_string
201  end
202  clear next
203  % increase index of column
204  index_column = index_column + 1;
205end
206%
207% we know now how many columns are written on header
208nb_column = index_column;
209nb_doy = nb_column - 2 - 15
210clear index_column
211%
212% remove extra doy NaN values due to preallocation
213doy=doy(~isnan(doy));
214%
215% initialisation d'un tableau string de nb_doy elements à 9999999
216datestr_value=repmat('99999999',nb_doy,1);
217for index_doy=1:nb_doy
218    datejul_value(index_doy)=datenum(yyyy_d,1,1) + doy(index_doy) - 1;
219end
220datestr_value=datestr(datejul_value,'yyyymmdd');
221%
222% preallocation of output arrays to missing value: attention:
223% entre en parametre
224nb_line_data_max = nbligne;
225%45000 valeur max;
226%warning!!! 20 lignes pour le test - a ajuster sur le plus gros
227%fichier (50247 lignes?)
228lat_value=zeros(1,nb_line_data_max);
229lat_value(:)=NaN;
230lon_value=zeros(1,nb_line_data_max);
231lon_value(:)=NaN;
232lai_value=zeros(nb_doy,nb_line_data_max);
233lai_value(:,:)=NaN;
234gridcode_value=zeros(1,nb_line_data_max);
235gridcode_value(:)=NaN;
236%gridcode_1_value=zeros(nb_line_data_max);
237%gridcode_1_value(:)=NaN;
238classes_value=repmat({'NaN'},nb_line_data_max,1);
239%area_value=zeros(nb_line_data_max);
240%area_value(:)=NaN;
241%perimeter_value=zeros(nb_line_data_max);
242%perimeter_value(:)=NaN;
243%sgsolru_value=zeros(nb_line_data_max);
244%sgsolru_value(:)=NaN;
245%mapu_value=repmat({'NaN'},nb_line_data_max,1);
246%mapusub_value=repmat({'NaN'},nb_line_data_max,1);
247msd_value=zeros(1,nb_line_data_max);
248msd_value(:)=NaN;
249msdnom_value=repmat({'NaN'},nb_line_data_max,1);
250%
251% reading data from file (from second line till end of file)
252%lat_value=dlmread(fid,';',0,1)
253
254index_line_data=1;
255%
256%Attention: fichier trop gros. On ne lit que les premieres nbligne lignes
257%while (feof(fid)~= 1)
258while (index_line_data <nbligne+1)
259%
260%
261  data_line = fgetl(fid);
262  % loop to handle eahc item on the data line (";" separated value)
263  % attention, il arrive qu'une cellule soit vide (mapusub) : corrige en principe
264  index_item=1;
265  while ( ~isempty(data_line))
266     % parse next column label
267     [item,data_line] = strtok(data_line,';');
268     if (index_item == 1 )
269       lat_value(index_line_data)=str2double(item);
270     elseif (index_item == 2 )
271       lon_value(index_line_data)=str2double(item);
272     elseif (index_item > 2 && index_item <= 2 + nb_doy)
273       lai_value(index_item - 2, index_line_data)=str2double(item);
274     elseif (index_item ==  2 + nb_doy + 1)
275   %    fid_occups_value(index_line_data)=str2double(item);
276    elseif (index_item ==  2 + nb_doy + 2)
277    %   id_value(index_line_data)=str2double(item);
278     elseif (index_item ==  2 + nb_doy + 3)
279       gridcode_value(index_line_data)=str2double(item);
280     elseif (index_item ==  2 + nb_doy + 4)
281    %   fid_bvferl_value(index_line_data)=str2double(item);
282    elseif (index_item ==  2 + nb_doy + 5)
283   %   id_1_value(index_line_data)=str2double(item);
284     elseif (index_item ==  2 + nb_doy + 6)
285    %   gridcode_1_value(index_line_data)=str2double(item);
286     elseif (index_item ==  2 + nb_doy + 7)
287       classes_value(index_line_data)=cellstr(item);
288     elseif (index_item ==  2 + nb_doy + 8)
289    %   area_value(index_line_data)=str2double(item);
290     elseif (index_item ==  2 + nb_doy + 9)
291    %   perimeter_value(index_line_data)=str2double(item);
292     elseif (index_item ==  2 + nb_doy + 10)
293    %   sgsolru_value(index_line_data)=str2double(item);
294     elseif (index_item ==  2 + nb_doy + 11)
295    %   sgsolru_id_value(index_line_data)=str2double(item);
296     elseif (index_item ==  2 + nb_doy + 12)
297    %   mapu_value(index_line_data)=cellstr(item);
298     elseif (index_item ==  2 + nb_doy + 13)
299    %   mapusub_value(index_line_data)=cellstr(item);
300    elseif (index_item ==  2 + nb_doy + 14)
301       msd_value(index_line_data)=str2double(item);
302    elseif (index_item ==  2 + nb_doy + 15)
303       msdnom_value(index_line_data)=cellstr(item);
304    else
305       disp([' eee : data_line ', data_line ]);
306       disp([' eee : item ', item]);
307       disp([' eee : index_line_data ', num2str(index_line_data) ]);
308       disp([' eee : index_item ', num2str(index_item) ]);
309       error([ 'invalid data line (too many items) ']);
310    end
311    clear item
312    % increase index of item
313    index_item = index_item + 1;
314  end
315  % check for number of item in current data line
316  if (index_item < nb_doy + 2 + 15)
317     disp([' eee : data_line ', data_line ]);
318     disp([' eee : index_line_data ', num2str(index_line_data) ]);
319     disp([' eee : index_item ', num2str(index_item) ]);
320     error([ 'invalid data line (missing items) :'])
321  end
322  index_line_data=index_line_data+1;
323  clear data_line
324  clear index_item
325end
326if (index_line_data > nb_line_data_max+1)
327  error([' need to increase nb_line_data_max up to ' num2str(index_line_data) ]);
328end
329%
330clear index_line_data
331%
332fclose(fid);
333clear fid
334%
335% remove extra data NaN values due to preallocation
336% criteria = when lon_value is not NaN
337missing_data_indices=find(isnan(lon_value));
338if (~isempty(missing_data_indices))
339    nb_data=min(missing_data_indices)-1
340    lat_value=lat_value(1:nb_data);
341    lon_value=lon_value(1:nb_data);
342    lai_value=lai_value(:,1:nb_data);
343    %fid_occups_value=fid_occups_value(1:nb_data);
344    %id_value=id_value(1:nb_data);
345    gridcode_value=gridcode_value(1:nb_data);
346    %fid_bvferl_value=fid_bvferl_value(1:nb_data);
347    %id_1_value=id_1_value(1:nb_data);
348    %gridcode_1_value=gridcode_1_value(1:nb_data);
349    classes_value=classes_value(1:nb_data);
350    %area_value=area_value(1:nb_data);
351   % perimeter_value=perimeter_value(1:nb_data);
352    %sgsolru_value=sgsolru_value(1:nb_data);
353    %sgsolru_id_value=sgsolru_id_value(1:nb_data);
354    %mapu_value=mapu_value(1:nb_data);
355   % mapusub_value=mapusub_value(1:nb_data);
356    msd_value=msd_value(1:nb_data);
357    msdnom_value=msdnom_value(1:nb_data);
358end
359%
360clear nb_line_data_max
361%
362%whos
363%
364result=0;
365end
366
367%!demo
368%! % To read :file:`${PROJECT_ID}/LAI/la2008i_Clip_Intersect_Inter.csv`:
369%! clear all
370%! close all
371%! varamma_startup
372%! more off
373%! yyyy=int16(2008);
374%! [lon_value, lat_value, datejul_value, datestr_value, lai_value, fid_occups_value, id_value, gridcode_value, fid_bvferl_value, id_1_value, gridcode_1_value, classes_value, area_value, perimeter_value, sgsolru_value, sgsolru_id_value, mapu_value, mapusub, msd_value, msdnom_value]=read_lai_gis(yyyy);
Note: See TracBrowser for help on using the repository browser.