source: branches/2012/dev_v3_4_STABLE_2012/NEMOGCM/EXTERNAL/XMLF90/src/xpath/m_path.f90 @ 5171

Last change on this file since 5171 was 5171, checked in by pabouttier, 6 years ago

Remove star character to avoid C-style comments inside fortran comments; See Ticket #1501

  • Property svn:keywords set to Id
File size: 19.2 KB
Line 
1module m_path
2!
3! XPATH-like API for XML Parsing
4! Copyright Alberto Garcia <wdpgaara@lg.ehu.es>, August 2003
5!
6use flib_sax
7
8private
9!
10public :: get_node, mark_node, enable_debug, disable_debug
11
12private :: match, process_node, get_path
13private :: begin_element, end_element, pcdata_handler, empty_element
14private :: pause_parsing
15
16!
17integer, private, save :: global_status
18integer, public, parameter :: END_OF_FILE = -1
19integer, public, parameter :: END_OF_ANCESTOR_ELEMENT = -2
20integer, public, parameter :: PCDATA_OVERFLOW = 7
21
22logical, private, save :: debug_xpath = .false.
23logical, private, save :: debug_sax = .false.
24character(len=500), private, save :: path_required
25character(len=100), private, save :: target_path ! *** Hard limit
26
27
28logical, private, save :: in_target_element = .false.
29logical, private, save :: in_pcdata_level = .false.
30
31logical, private, save :: stop_parsing = .false.
32!
33! This global variable determines whether we stop after
34! getting the initial element tag, or after digesting the full node.
35!
36logical, private, save :: full_node = .true.
37
38logical, private, save :: relative_path = .false.
39logical, private, save :: looking_for_current_element
40
41logical, private, save :: attributes_requested
42type(dictionary_t), private, save, pointer :: attributes_recovered
43
44integer, parameter, private :: MAX_PCDATA_SIZE = 65536
45logical, private, save :: pcdata_requested
46character(len=MAX_PCDATA_SIZE), private, &
47                                save :: pcdata_recovered !*** Hard
48integer, private, save :: len_pcdata
49integer, private, save :: max_len_pcdata
50
51type(xml_t), pointer, save, private :: xp
52
53CONTAINS !===========================================================
54
55!----------------------------------------------------
56! Debugging control
57!
58subroutine enable_debug(sax)
59logical, intent(in), optional :: sax
60  debug_xpath = .true.
61  debug_sax = .false.
62  if (present(sax)) then
63     debug_sax = sax
64  endif
65end subroutine enable_debug
66
67subroutine disable_debug()
68  debug_xpath = .false.
69end subroutine disable_debug
70
71!----------------------------------------------------
72! Main routines
73!---------------------------------------------------------------------
74subroutine mark_node(fxml,path,att_name,att_value,attributes,status)
75!
76! Performs a search of a given element (by path, and/or presence of
77! a given attribute and/or value of that attribute), returning optionally
78! the element's attribute dictionary, and leaving the file handle fxml'
79! ready to process the rest of the element's contents (child elements'
80! and/or pcdata).
81!
82! Side effects: it sets "ancestor_path" to the element's path'
83!
84! If the argument "path" is present and evaluates to a relative path (a
85! string not beginning with "/"), the search is interrupted after the end
86! of the "ancestor_element" set by a previous call to "mark_node".
87! If not earlier, the search ends at the end of the file.
88!
89! The status argument, if present, will hold a return value,
90! which will be:
91!
92! 0 on success,
93! negative in case of end-of-file or end-of-ancestor-element, or
94! positive in case of a malfunction
95!
96type(xml_t), intent(inout), target :: fxml
97character(len=*), intent(in), optional :: path
98character(len=*), intent(in), optional :: att_name
99character(len=*), intent(in), optional :: att_value
100type(dictionary_t), intent(out), optional :: attributes
101integer, intent(out), optional :: status
102
103
104character(len=200) :: ancestor_path ! local variable
105
106     full_node = .false.
107     call process_node(fxml, &
108                       path,att_name,att_value, &
109                       attributes, &
110                       status=status)
111     if (status == 0) then
112        call xml_mark_path(fxml,ancestor_path)
113        if (debug_xpath) print *, "Setting ancestor_path to: ", trim(ancestor_path)
114     endif
115
116end subroutine mark_node
117
118!--------------------------------------------------------------------
119subroutine get_node(fxml,path,att_name,att_value,attributes,pcdata,status)
120!
121! Performs a search of a given element (by path, and/or presence of
122! a given attribute and/or value of that attribute), returning optionally
123! the element's attribute dictionary and any PCDATA characters contained'
124! in the element's scope (but not child elements). It leaves the file handle'
125! physically and logically positioned:
126!
127! after the end of the element's start tag if 'pcdata' is not present'
128! after the end of the element's end tag if 'pcdata' is present'
129!
130! If the argument "path" is present and evaluates to a relative path (a
131! string not beginning with "/"), the search is interrupted after the end
132! of the "ancestor_element" set by a previous call to "mark_node".
133! If not earlier, the search ends at the end of the file.
134!
135! The status argument, if present, will hold a return value,
136! which will be:
137!
138! 0 on success,
139! negative in case of end-of-file or end-of-ancestor-element, or
140! positive in case of a malfunction (such as the overflow of the
141! user's pcdata buffer).'
142!
143type(xml_t), intent(inout), target :: fxml
144character(len=*), intent(in), optional :: path
145character(len=*), intent(in), optional :: att_name
146character(len=*), intent(in), optional :: att_value
147type(dictionary_t), intent(out), optional :: attributes
148character(len=*), intent(out), optional, target :: pcdata
149integer, intent(out), optional :: status
150
151     full_node = present(pcdata)
152     call process_node(fxml, &
153                       path,att_name,att_value, &
154                       attributes,pcdata, &
155                       status=status)
156
157end subroutine get_node
158!
159!--------------------------------------------------------------------
160! Workhorse routines follow
161!--------------------------------------------------------------------
162subroutine process_node(fxml,path,att_name,att_value, &
163                        attributes,pcdata,&
164                        status)
165type(xml_t), intent(inout), target :: fxml
166character(len=*), intent(in), optional :: path
167character(len=*), intent(in), optional :: att_name
168character(len=*), intent(in), optional :: att_value
169type(dictionary_t), intent(out), optional :: attributes
170character(len=*), intent(out), optional, target :: pcdata
171integer, intent(out), optional :: status
172
173logical :: path_present, att_name_present, att_value_present
174logical :: attributes_present
175
176character(len=3) :: any_path = "//*"
177character(len=200) :: local_path, ancestor_path ! *** Hard limit
178character(len=500) :: value ! *** Hard limit
179integer :: local_status
180
181type(dictionary_t) :: local_attributes
182
183global_status = 0 ! reset
184
185path_present = present(path)
186attributes_present = present(attributes)
187att_name_present = present(att_name)
188att_value_present = present(att_value)
189
190relative_path = .false.
191
192if (path_present) then
193   if (debug_xpath) print *, "SEARCHING for: ", trim(path)
194   if (path(1:1) /= "/") then
195      !
196      ! Relative path search
197      !
198      call xml_path(fxml,local_path)
199      call xml_get_path_mark(fxml,ancestor_path)
200      if (ancestor_path == "") then
201        if (debug_xpath) print *, "Relative search with null ancestor..."
202      endif
203      relative_path = .true.
204      if (debug_xpath) print *, "Relative search. ANCESTOR ELEMENT: ", &
205                             trim(ancestor_path)
206      !
207      ! Convert to absolute path
208      local_path = trim(local_path) // "/" // trim(path)
209      if (debug_xpath) print *, "Converting ", trim(path), &
210               " to absolute path: ", trim(local_path)
211   else
212      local_path = path
213   endif
214else
215   local_path = any_path
216endif
217
218looking_for_current_element = (path == ".")
219!
220! Use local_attributes, since it is in principle possible that
221! the user does not need to get back the attribute list.
222!
223do ! Loop until we satisfy the constraints
224
225   if (debug_xpath) print *, "--> Calling get_path ..."
226   call get_path(fxml,local_path,local_attributes,pcdata,local_status)
227   if (debug_xpath) print *, "-->Status after get_path: ", local_status
228   if (local_status /= 0) EXIT
229
230   if (debug_xpath) print *, "FOUND path matching: ", trim(local_path)
231
232   if (att_name_present) then
233      if (debug_xpath) print *, "Checking ", trim(att_name), " among ", &
234                          number_of_entries(local_attributes), " entries:"
235      if (debug_xpath) call print_dict(local_attributes)
236
237      if (has_key(local_attributes,att_name)) then
238
239         if (att_value_present) then
240            call get_value(local_attributes,att_name,value,local_status)
241            if (local_status /= 0) then
242               if (debug_xpath) print *, "Failed to get value of att: ", &
243                                    trim(att_name)
244               EXIT
245            endif
246
247            if (att_value == value) then
248               local_status = 0
249               if (debug_xpath) print *, "Got correct att name and value "
250               EXIT
251            else
252               if (debug_xpath) print *, "att value: ", trim(value), &
253                                   " does not match"
254               cycle ! We keep searching
255            endif
256         else ! Found att_name, and no value required
257            local_status = 0
258            if (debug_xpath) print *, "Got correct att name"
259            EXIT
260         endif
261      else ! Did not find that attribute name
262         if (debug_xpath) print *, "Att name not present"
263         cycle ! keep searching
264      endif
265   else ! Found path, and no att info required
266      local_status = 0
267      if (debug_xpath) print *, "Found correct path. No other reqs."
268      EXIT
269   endif
270
271enddo
272
273if (present(status)) then
274   status = local_status
275   if (debug_xpath) print *, "--Returning status: ", status
276endif
277
278if (attributes_present) then
279   attributes = local_attributes
280endif
281
282end subroutine process_node
283
284!--------------------------------------------------------------------
285subroutine get_path(fxml,path,attributes,pcdata,status)
286type(xml_t), intent(inout), target :: fxml
287character(len=*), intent(in) :: path
288type(dictionary_t), intent(out), optional, target :: attributes
289character(len=*), intent(out), optional, target :: pcdata
290integer, intent(out), optional :: status
291
292logical :: status_present
293
294xp => fxml
295
296path_required = path
297status_present = present(status)
298pcdata_requested = (present(pcdata))
299
300attributes_requested = (present(attributes))
301if (attributes_requested) then
302      call reset_dict(attributes)
303      attributes_recovered => attributes
304endif
305
306if (pcdata_requested) then
307!
308! Make sure we do not overstep the bounds of the supplied argument
309!
310   max_len_pcdata = min(len(pcdata),MAX_PCDATA_SIZE)
311   len_pcdata = 0
312   pcdata_recovered(1:max_len_pcdata) = ""
313   if (debug_xpath) print *, "Max length of pcdata store: ", max_len_pcdata
314endif
315
316if (looking_for_current_element) then
317   if (debug_xpath) print *, "Returning info about current element"
318
319   ! We are now in the desired element, and we have the name and
320   ! attribute list saved in xp.
321   !
322   if (attributes_requested) call xml_attributes(xp,attributes_recovered)
323   !
324   if (pcdata_requested) then
325   !
326   ! Set things up so that we can get the pcdata
327   !
328      call xml_path(xp,target_path)
329      in_target_element = .true.
330      in_pcdata_level = .true.
331   else
332      if (status_present) status = 0
333      RETURN ! We are done
334   endif
335else
336   target_path = ""
337   in_target_element = .false.
338   in_pcdata_level = .false.
339endif
340
341stop_parsing = .false.
342
343call xml_parse(fxml, &
344               begin_element_handler = begin_element , &
345               end_element_handler = end_element, &
346               pcdata_chunk_handler = pcdata_handler, &
347               verbose = debug_sax, signal_handler=pause_parsing, &
348               empty_element_handler = empty_element)
349
350if (eof_xmlfile(fxml)) then
351   global_status = END_OF_FILE
352   if (debug_xpath) print *, "Found end of file"
353   if (pcdata_requested) pcdata = ""
354else if (global_status == END_OF_ANCESTOR_ELEMENT) then
355   if (debug_xpath) print *, "Found end of ancestor element"
356   if (pcdata_requested) pcdata = ""
357else
358   if (debug_xpath) print *, "Parser found candidate element"
359   if (pcdata_requested) then
360      pcdata = pcdata_recovered(1:len_pcdata)
361      if (debug_xpath) print *, "PCDATA recovered: ", pcdata_recovered(1:len_pcdata)
362   endif
363endif
364if (global_status > 0) then
365   if (debug_xpath) print *, "Something went slightly wrong. Status > 0"
366endif
367!
368if (present(status)) status = global_status
369
370
371end subroutine get_path
372
373!==================================================================
374subroutine begin_element(name,attributes)
375character(len=*), intent(in) :: name
376type(dictionary_t), intent(in) :: attributes
377
378character(len=1000) :: path ! *** Hard limit
379
380call xml_path(xp,path)
381if (debug_xpath) print *, " begin_element ::: PATH: " , trim(path)
382if (debug_xpath) print *, "path: ", trim(path), " req: ", trim(path_required)
383if (match(path,path_required)) then
384      if (debug_xpath) print *, " Match path: " , trim(path)
385      in_target_element = .true.
386      target_path = path
387      in_pcdata_level = .true.
388      if (debug_xpath) print *, "In element name: " , name
389      if (attributes_requested) attributes_recovered = attributes
390      ! stop parsing
391      if (debug_xpath) print *, "full_node: ", full_node
392      if (.not. full_node) then
393         if (debug_xpath) print *, "Stopping parsing after initial tag"
394         stop_parsing = .true.
395      endif
396else
397   !
398   ! If we are at the pcdata level and we enter another element,
399   ! we must not read pcdata
400   !
401   if (in_pcdata_level) in_pcdata_level = .false.
402endif
403
404end subroutine begin_element
405!------------------------------------------------------------
406subroutine end_element(name)
407character(len=*), intent(in) :: name
408
409character(len=300) :: path ! *** Hard limit
410character(len=300) :: left_path ! *** Hard limit
411character(len=300) :: ancestor_path ! *** Hard limit
412!
413
414call xml_path(xp,path) ! path *after* leaving element
415left_path = trim(path) // "/" // trim(name)
416
417if (in_target_element) then
418   if (path == target_path) then
419      !
420      ! We are back to pcdata level after visiting child elements
421      !
422      in_pcdata_level = .true.
423
424   else if (left_path == target_path) then
425
426      ! We stop the parsing at the end of the element
427      !
428      if (debug_xpath) print *, "Exiting target element: ", trim(target_path)
429      in_target_element = .false.
430      in_pcdata_level = .false.
431      if (debug_xpath) print *, "Stopping parsing after end of target element"
432      stop_parsing = .true.
433   endif
434
435else if (relative_path) then
436   !
437   ! Check in case we go out of ancestor element
438   !
439   call xml_get_path_mark(xp,ancestor_path)
440   if (match(left_path,ancestor_path)) then
441      !
442      ! We are leaving the ancestor element
443      !
444      if (debug_xpath) print *, "Relative search. End of element: ", name
445      if (debug_xpath) print *, "Leaving Path: ", trim(left_path)
446      if (debug_xpath) print *, "Ancestor Path: ", trim(ancestor_path)
447      if (debug_xpath) print *, "Stopping parsing after end of ancestor element"
448
449      stop_parsing = .true.
450      global_status = END_OF_ANCESTOR_ELEMENT
451   endif
452endif
453
454end subroutine end_element
455
456!------------------------------------------------------------
457subroutine empty_element(name,attributes)
458character(len=*), intent(in) :: name
459type(dictionary_t), intent(in) :: attributes
460
461character(len=300) :: path ! *** Hard limit
462
463call xml_path(xp,path)
464if (debug_xpath) print *, " empty_element ::: PATH: " , trim(path)
465if (debug_xpath) print *, "path: ", trim(path), " req: ", trim(path_required)
466if (match(path,path_required)) then
467      if (debug_xpath) print *, " Match path: " , trim(path)
468      if (debug_xpath) print *, "In (empty) element name: " , name
469      if (attributes_requested) attributes_recovered = attributes
470      ! stop parsing
471      stop_parsing = .true.
472      if (debug_xpath) print *, "Stopping parsing after empty tag"
473      if (debug_xpath) print *, "full_node: ", full_node
474      if (full_node) then
475         if (debug_xpath) print *, "*Warning: full_node requested, empty tag found"
476      endif
477endif
478!
479! There is no logic for ancestor element handling, as by definition
480! an emtpy element cannot have children.
481!
482end subroutine empty_element
483
484!-----------------------------------------------------------
485subroutine pcdata_handler(chunk)
486character(len=*), intent(in) :: chunk
487
488integer :: len_chunk
489
490if (in_pcdata_level) then
491   !
492   ! Build pcdata_recovered chunk by chunk, until it overflows
493   !
494   if (pcdata_requested) then
495      if (debug_xpath) print *, "Found chunk of pcdata: ", chunk
496      len_chunk = len(chunk)
497      if ((len_pcdata + len_chunk) > max_len_pcdata) then
498         !
499         if (debug_xpath) print *, "***Pcdata Overflow "
500         global_status = PCDATA_OVERFLOW
501         stop_parsing = .true.
502         return
503      endif
504      pcdata_recovered(len_pcdata+1:len_pcdata+len_chunk) = chunk
505      len_pcdata = len_pcdata + len_chunk
506   endif
507endif
508
509end subroutine pcdata_handler
510!--------------------------------------------------------------------
511
512subroutine pause_parsing(res)
513logical, intent(out) :: res
514
515res = stop_parsing
516
517end subroutine pause_parsing
518!--------------------------------------------------------------------
519
520recursive function match(p,ptarget) result(res_match)
521character(len=*), intent(in) :: p
522character(len=*), intent(in) :: ptarget
523logical :: res_match
524
525!
526! Checks whether a given XML path matches the target path ptarget
527! Only absolute paths are considered.
528!
529! Examples of target paths:
530!
531! /pseudo/vps/radfunc [1]
532! //radfunc/data
533! //data
534! ///vps/data
535! //job//data
536! //
537!
538integer :: len_target, len_path, pos_target, pos_path
539character(len=100) :: anchor_leaf ! *** Hard limit
540
541res_match = .false.
542
543if (trim(p) == trim(ptarget)) then
544   res_match = .true.
545   return
546
547else if (ptarget == "/") then
548   ! We process // in the middle below
549
550   res_match = .true.
551   return
552
553else ! We get the extreme elements
554
555   len_target = len_trim(ptarget)
556   len_path = len_trim(p)
557   pos_target = index(ptarget,"/",back=.true.)
558   pos_path = index(p,"/",back=.true.)
559
560   if (pos_target == len_target) then ! // in the middle...
561      ! Get leaf further up
562      search_anchor : do
563         pos_target = index(ptarget(1:len_target-1),"/",back=.true.)
564         if (pos_target == 1) then ! Target begins by /.//
565            res_match = .true.
566            return
567         endif
568         anchor_leaf = ptarget(pos_target:len_target-1)
569         if (anchor_leaf == "/.") then ! keep searching
570            len_target = pos_target
571            cycle search_anchor
572         else
573            exit search_anchor
574         endif
575      enddo search_anchor
576
577      ! Note that the anchor includes the leading /
578      ! Now we search for that anchor in the candidate path
579      !
580      pos_path = index(p(1:len_path),trim(anchor_leaf),back=.true.)
581      if (pos_path /= 0) then
582
583         ! Found anchor. Continue further up.
584         !
585         res_match = match(p(1:pos_path-1),ptarget(1:pos_target-1))
586      endif
587
588   else if (ptarget(pos_target+1:len_target) == ".") then
589
590      ! A dot is a dummy. Continue further up in target path.
591      !
592      res_match = match(p(1:len_path),ptarget(1:pos_target-1))
593
594   else if (ptarget(pos_target+1:len_target) == "*") then
595
596      if (len_path == pos_path) then
597         RETURN ! empty path element
598      endif
599
600      ! A star matches any non-empty leaf. Continue further up.
601      !
602      res_match = match(p(1:pos_path-1),ptarget(1:pos_target-1))
603
604   else if (p(pos_path+1:len_path) == &
605             ptarget(pos_target+1:len_target)) then
606
607      ! Leafs are equal. Continue further up.
608      !
609      res_match = match(p(1:pos_path-1),ptarget(1:pos_target-1))
610
611   endif
612
613endif
614
615end function match
616
617end module m_path
Note: See TracBrowser for help on using the repository browser.