source: XIOS/dev/branch_yushan_merged/src/event_scheduler.hpp @ 1134

Last change on this file since 1134 was 1134, checked in by yushan, 7 years ago

branch merged with trunk r1130

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
File size: 7.7 KB
Line 
1#ifndef __EVENT_SCHEDULER_HPP__
2#define __EVENT_SCHEDULER_HPP__
3
4#include "xios_spl.hpp"
5#include "mpi.hpp"
6#ifdef _usingEP
7#include "ep_declaration.hpp"
8#endif
9
10
11namespace xios
12{
13
14    //!  Event scheduling class. An instance of this class is used to order the event providing from different context to avoid dead lock.
15    /*!
16     *   Event are ordered in a same context using the timeLine id, so each server will process the same event. But between different
17     *   context, events are not scheduled and servers may choose to process different events and deadlock or MPI crash may occurs if
18     *   collective MPI communication are involved by the events.
19     *   This class solve the problem by scheduling the event and choose which event must be process by each server to insure correct
20     *   synchronisation. Information is send by asynchronous MPI communication to the root process that order the different events
21     *   (First In First Out) and brodcast the information to the other servers. To avoid to much incoming communication for the root
22     *   process, and hierachical tree is used for communicating from a limited number of child processes to the parent. 
23     */
24   
25    class CEventScheduler
26    {
27       public:
28       //!  Constructor
29       /*! A new communicator is created by duplicate comm. The communicating tree hierarchy is created.
30        *  @param[in] comm : MPI communicator du duplicate for internal use
31        */
32       CEventScheduler(const MPI_Comm& comm) ;
33
34
35       //! Destructor
36       ~CEventScheduler() ;
37
38
39
40       //! public interface for registring an event from the server
41       /*!
42        *  @param[in] timeLine : Time line id of the event
43        *  @param[in] contextHashId : Hashed id of the context
44        */
45       void registerEvent(const size_t timeLine, const size_t contextHashId) ;
46
47
48
49       //! public interface for query if the event defined by timeLine and hashId is sheduled next
50       /*!
51        *  @param[in] timeLine : Time line id of the event
52        *  @param[in] contextHasId : Hashed id of the context
53        *  @return  : boolean value, true is the event is scheduled next
54        *
55        *  If the event is scheduled next, it is remove from the `eventStack` queue list 
56        */   
57       bool queryEvent(const size_t timeLine, const size_t contextHashId) ;
58
59
60       //! Public interface to give the hand to the instance to check pending or incoming message.
61       /*!
62        * Must be called periodicaly. Call `checkParentRequest` and `checkChildRequest` private method.
63        */
64       void checkEvent(void) ;
65
66       private:
67
68
69       //! Send an event to the parent of level `lev+1`
70       /*!
71        *  @param[in] timeLine : Time line id of the event
72        *  @param[in] contextHasId : Hashed id of the context
73        *  @param[in] lev : actual level of the child in the hierarchy
74        *  The event is sent by an asynchrounous MPI_ISend
75        */
76       void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t lev) ;
77
78
79
80       //! Children side. Check potential incoming message and if pending request are completed
81       /*!
82        *  - Check by `MPI_Test` if pending request sent to parents are complete.
83        *  - Probe incoming message from parent by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`
84        *  - Check by `MPI_Test` if pending received requests are complete. if yes :
85        *    + Broadcast the event to the childrens if is also a parent
86        *    + Otherwise : push the incomming event in the `eventStack` queue.
87        */
88       void checkParentRequest(void) ;
89
90
91
92       //! Parent side. Check potential incoming message and if pending request are completed
93       /*!
94        *  - Probe incoming message from chidren by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`.
95        *  - Check pending received event request from children using `MPI_Probe`. If and event is received, it is incerted in the
96        *    map `recvEvent` which is increased by 1. If the number of request received from children for this event is equal to the number
97        *    of children then :
98        *    + if the event level is 0, bcast the event to the children.
99        *    + else send the event to the parent.
100        *  - Check pending sent event request to children using `MPI_TEST` and if complete release the corresponding buffer
101        */
102       void checkChildRequest(void) ;
103
104
105
106       //! Parent side. Broadcast a received event from the parent to the children.
107       /*!
108        *  @param[in] timeLine : Time line id of the event
109        *  @param[in] contextHasId : Hashed id of the context
110        *  @param[in] lev : actual level of the child in the hierarchy
111        * Asynchronus MPI_ISend is used.
112        */
113       void bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t lev) ;
114       
115
116
117
118       //! Structure defining an event, composed of the timeLine, the context hashId and the hierachical level of the communication.
119       struct SEvent
120       {
121         size_t timeLine ; /*!< Time line id of the event in the context */
122         size_t hashId ; /*!< hassh id of the context */
123         size_t level ;  /*!<hierarchical level of the communication*/
124
125         //! Definition of the == operator : needed to order the object in a map container
126         /*!
127            @param[in] e : object to compare with
128            @return : boolean result of the comparison
129         */
130         bool operator==(const SEvent& e) const
131         { 
132           if (timeLine == e.timeLine && hashId == e.hashId && level==e.level) return true ;
133           else return false ;
134         } ;
135       
136
137         //! Definition of the < operator : needed to order the object in a map container
138         /*!
139            @param[in] e : object to compare with
140            @return : boolean result of the comparison
141         */
142
143         bool operator<(const SEvent& e) const
144         { 
145           if (timeLine < e.timeLine) return true ;
146           else if (timeLine == e.timeLine && hashId < e.hashId) return true ;
147           else if (timeLine == e.timeLine && hashId == e.hashId && level<e.level) return true ;
148           else return false ;
149         } ;
150       } ;       
151       
152
153       //! Pending request struture. It keep send or receive buffer from asynchronous communication while the request is not complete.
154       struct SPendingRequest
155       {
156         size_t buffer[3] ;      /*!< communication buffer : timeLine, hashId, level */
157         MPI_Request request ;   /*!< pending MPI request */ 
158       } ;
159       
160       MPI_Comm communicator ;  /*!< Internal MPI communicator */ 
161       int mpiRank ;            /*!< Rank in the communicator */
162       int mpiSize ;            /*!< Size of the communicator */
163 
164       queue< pair<size_t, size_t> > eventStack ;         
165       queue<SPendingRequest* > pendingSentParentRequest ;   /*!< Pending request sent to parent   */
166       queue<SPendingRequest*>  pendingRecvParentRequest ;   /*!< Pending request recv from parent */   
167       list<SPendingRequest* >  pendingRecvChildRequest ;    /*!< Pending request recv from child  */
168       list<SPendingRequest*>   pendingSentChildRequest ;    /*!< Pending request sent to child    */
169       map< SEvent, int > recvEvent ;                        /*!< list of event received from children. Contains the currnet number children that have already post the same event */
170       
171       
172       int level ;                   /*!< Number of hierachical level for communication */
173       vector<int> parent ;          /*!< Parent rank for each level */ 
174       vector<vector<int> >  child ; /*!< List of child rank for each level */
175       vector<int> nbChild ;         /*!< Number of child for each level */   
176
177    } ;
178}
179
180#endif
Note: See TracBrowser for help on using the repository browser.