source: XIOS3/trunk/src/event_scheduler.hpp @ 2628

Last change on this file since 2628 was 2570, checked in by jderouillat, 9 months ago

Memory cleaning of the EventScheduler? hierarchy triggered by the PoolRessource? cleaning through freeRessourceEventScheduler_. Cleaning of MPI_Window associated to services operated through the MpiGarbageCollector?.

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
File size: 9.3 KB
Line 
1#ifndef __EVENT_SCHEDULER_HPP__
2#define __EVENT_SCHEDULER_HPP__
3
4#include "xios_spl.hpp"
5#include "mpi.hpp"
6
7namespace xios
8{
9
10    //!  Event scheduling class. An instance of this class is used to order the event providing from different context to avoid dead lock.
11    /*!
12     *   Event are ordered in a same context using the timeLine id, so each server will process the same event. But between different
13     *   context, events are not scheduled and servers may choose to process different events and deadlock or MPI crash may occurs if
14     *   collective MPI communication are involved by the events.
15     *   This class solve the problem by scheduling the event and choose which event must be process by each server to insure correct
16     *   synchronisation. Information is send by asynchronous MPI communication to the root process that order the different events
17     *   (First In First Out) and brodcast the information to the other servers. To avoid to much incoming communication for the root
18     *   process, and hierachical tree is used for communicating from a limited number of child processes to the parent. 
19     */
20   
21    class CEventScheduler
22    {
23       public:
24       //!  Constructor
25       /*! A new communicator is created by duplicate comm. The communicating tree hierarchy is created.
26        *  @param[in] comm : MPI communicator du duplicate for internal use
27        */
28       CEventScheduler(const MPI_Comm& comm) ;
29       CEventScheduler(const MPI_Comm& comm, size_t schedulerLevel) ;
30
31       //! Destructor
32       ~CEventScheduler() ;
33
34
35
36       //! public interface for registring an event from the server
37       /*!
38        *  @param[in] timeLine : Time line id of the event
39        *  @param[in] contextHashId : Hashed id of the context
40        */
41       void registerEvent(const size_t timeLine, const size_t contextHashId) ;
42       
43       private:
44       CEventScheduler* getBaseScheduler(void) { if (childScheduler_== nullptr) return this; else return childScheduler_->getBaseScheduler();}
45
46       public:
47       //! public interface for query if the event defined by timeLine and hashId is sheduled next
48       /*!
49        *  @param[in] timeLine : Time line id of the event
50        *  @param[in] contextHasId : Hashed id of the context
51        *  @return  : boolean value, true is the event is scheduled next
52        *
53        *  If the event is scheduled next, it is remove from the `eventStack` queue list 
54        */   
55       bool queryEvent(const size_t timeLine, const size_t contextHashId) { return getBaseScheduler()->queryEvent_(timeLine, contextHashId); }
56       bool queryEvent_(const size_t timeLine, const size_t contextHashId) ;
57       void popEvent() { getBaseScheduler()->popEvent_() ; }
58       void popEvent_() { eventStack_.pop() ; }
59       bool isRoot(void) { return parent_[0]==mpiRank_ ;}
60       void setParentScheduler(shared_ptr<CEventScheduler> parentScheduler) { parentScheduler_ = parentScheduler ;}
61       void setChildScheduler(shared_ptr<CEventScheduler> childScheduler) { childScheduler_ = childScheduler ;}
62       void splitScheduler(const MPI_Comm& splittedComm, shared_ptr<CEventScheduler>& parent, shared_ptr<CEventScheduler>& child) ;
63       void cleanSplitSchedulers();
64
65       //! Public interface to give the hand to the instance to check pending or incoming message.
66       /*!
67        * Must be called periodicaly. Call `checkParentRequest` and `checkChildRequest` private method.
68        */
69       void checkEvent(void) { getBaseScheduler()->checkEvent_(); } 
70       void checkEvent_(void) ;
71
72       private:
73         void initialize(const MPI_Comm& comm) ;
74       
75       //! Send an event to the parent of level `lev+1`
76       /*!
77        *  @param[in] timeLine : Time line id of the event
78        *  @param[in] contextHasId : Hashed id of the context
79        *  @param[in] lev : actual level of the child in the hierarchy
80        *  The event is sent by an asynchrounous MPI_ISend
81        */
82     
83       void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel) ;
84       void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel, const size_t lev) ;
85
86
87
88       //! Children side. Check potential incoming message and if pending request are completed
89       /*!
90        *  - Check by `MPI_Test` if pending request sent to parents are complete.
91        *  - Probe incoming message from parent by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`
92        *  - Check by `MPI_Test` if pending received requests are complete. if yes :
93        *    + Broadcast the event to the childrens if is also a parent
94        *    + Otherwise : push the incomming event in the `eventStack` queue.
95        */
96       void checkParentRequest(void) ;
97
98
99
100       //! Parent side. Check potential incoming message and if pending request are completed
101       /*!
102        *  - Probe incoming message from chidren by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`.
103        *  - Check pending received event request from children using `MPI_Probe`. If and event is received, it is incerted in the
104        *    map `recvEvent` which is increased by 1. If the number of request received from children for this event is equal to the number
105        *    of children then :
106        *    + if the event level is 0, bcast the event to the children.
107        *    + else send the event to the parent.
108        *  - Check pending sent event request to children using `MPI_TEST` and if complete release the corresponding buffer
109        */
110       void checkChildRequest(void) ;
111
112
113
114       //! Parent side. Broadcast a received event from the parent to the children.
115       /*!
116        *  @param[in] timeLine : Time line id of the event
117        *  @param[in] contextHasId : Hashed id of the context
118        *  @param[in] lev : actual level of the child in the hierarchy
119        * Asynchronus MPI_ISend is used.
120        */
121       void bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel ,const size_t lev) ;
122       
123
124
125
126       //! Structure defining an event, composed of the timeLine, the context hashId and the hierachical level of the communication.
127       struct SEvent
128       {
129         size_t timeLine ; /*!< Time line id of the event in the context */
130         size_t hashId ; /*!< hassh id of the context */
131         size_t schedulerLevel ; /*!< hierarchical level of scherduler */
132         size_t level ;  /*!<hierarchical level of the communication*/
133
134         //! Definition of the == operator : needed to order the object in a map container
135         /*!
136            @param[in] e : object to compare with
137            @return : boolean result of the comparison
138         */
139         bool operator==(const SEvent& e) const
140         { 
141           if (timeLine == e.timeLine && hashId == e.hashId && level==e.level && schedulerLevel==e.schedulerLevel) return true ;
142           else return false ;
143         } ;
144       
145
146         //! Definition of the < operator : needed to order the object in a map container
147         /*!
148            @param[in] e : object to compare with
149            @return : boolean result of the comparison
150         */
151
152         bool operator<(const SEvent& e) const
153         { 
154           if (timeLine < e.timeLine) return true ;
155           else if (timeLine == e.timeLine && hashId < e.hashId) return true ;
156           else if (timeLine == e.timeLine && hashId == e.hashId && schedulerLevel<e.schedulerLevel) return true ;
157           else if (timeLine == e.timeLine && hashId == e.hashId && schedulerLevel==e.schedulerLevel && level<e.level) return true ;
158           else return false ;
159         } ;
160       } ;       
161       
162       //! Pending request struture. It keep send or receive buffer from asynchronous communication while the request is not complete.
163       struct SPendingRequest
164       {
165         size_t buffer[4] ;      /*!< communication buffer : timeLine, hashId, level */
166         MPI_Request request ;   /*!< pending MPI request */ 
167       } ;
168       
169       MPI_Comm communicator_ ;  /*!< Internal MPI communicator */ 
170       int mpiRank_ ;            /*!< Rank in the communicator */
171       int mpiSize_ ;            /*!< Size of the communicator */
172 
173       queue< pair<size_t, size_t> > eventStack_ ;         
174       queue<SPendingRequest* > pendingSentParentRequest_ ;   /*!< Pending request sent to parent   */
175       queue<SPendingRequest*>  pendingRecvParentRequest_ ;   /*!< Pending request recv from parent */   
176       list<SPendingRequest* >  pendingRecvChildRequest_ ;    /*!< Pending request recv from child  */
177       list<SPendingRequest*>   pendingSentChildRequest_ ;    /*!< Pending request sent to child    */
178       map< SEvent, int > recvEvent_ ;                        /*!< list of event received from children. Contains the currnet number children that have already post the same event */
179       
180       
181       int level_ ;                   /*!< Number of hierachical level for communication */
182       vector<int> parent_ ;          /*!< Parent rank for each level */ 
183       vector<vector<int> >  child_ ; /*!< List of child rank for each level */
184       vector<int> nbChild_ ;         /*!< Number of child for each level */   
185       
186       shared_ptr<CEventScheduler> parentScheduler_ ;
187       shared_ptr<CEventScheduler> childScheduler_ ;
188       bool hasParentScheduler_=false ;
189       size_t schedulerLevel_ ;
190
191    } ;
192}
193
194#endif
Note: See TracBrowser for help on using the repository browser.