source: XIOS/dev/dev_ym/XIOS_COUPLING/src/event_scheduler.hpp @ 2230

Last change on this file since 2230 was 2230, checked in by ymipsl, 3 years ago

Fix some Dead-lock issue...
YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
File size: 7.7 KB
Line 
1#ifndef __EVENT_SCHEDULER_HPP__
2#define __EVENT_SCHEDULER_HPP__
3
4#include "xios_spl.hpp"
5#include "mpi.hpp"
6
7namespace xios
8{
9
10    //!  Event scheduling class. An instance of this class is used to order the event providing from different context to avoid dead lock.
11    /*!
12     *   Event are ordered in a same context using the timeLine id, so each server will process the same event. But between different
13     *   context, events are not scheduled and servers may choose to process different events and deadlock or MPI crash may occurs if
14     *   collective MPI communication are involved by the events.
15     *   This class solve the problem by scheduling the event and choose which event must be process by each server to insure correct
16     *   synchronisation. Information is send by asynchronous MPI communication to the root process that order the different events
17     *   (First In First Out) and brodcast the information to the other servers. To avoid to much incoming communication for the root
18     *   process, and hierachical tree is used for communicating from a limited number of child processes to the parent. 
19     */
20   
21    class CEventScheduler
22    {
23       public:
24       //!  Constructor
25       /*! A new communicator is created by duplicate comm. The communicating tree hierarchy is created.
26        *  @param[in] comm : MPI communicator du duplicate for internal use
27        */
28       CEventScheduler(const MPI_Comm& comm) ;
29
30
31       //! Destructor
32       ~CEventScheduler() ;
33
34
35
36       //! public interface for registring an event from the server
37       /*!
38        *  @param[in] timeLine : Time line id of the event
39        *  @param[in] contextHashId : Hashed id of the context
40        */
41       void registerEvent(const size_t timeLine, const size_t contextHashId) ;
42
43
44
45       //! public interface for query if the event defined by timeLine and hashId is sheduled next
46       /*!
47        *  @param[in] timeLine : Time line id of the event
48        *  @param[in] contextHasId : Hashed id of the context
49        *  @return  : boolean value, true is the event is scheduled next
50        *
51        *  If the event is scheduled next, it is remove from the `eventStack` queue list 
52        */   
53       bool queryEvent(const size_t timeLine, const size_t contextHashId) ;
54       void popEvent() { eventStack.pop() ; }
55
56
57       //! Public interface to give the hand to the instance to check pending or incoming message.
58       /*!
59        * Must be called periodicaly. Call `checkParentRequest` and `checkChildRequest` private method.
60        */
61       void checkEvent(void) ;
62
63       private:
64
65
66       //! Send an event to the parent of level `lev+1`
67       /*!
68        *  @param[in] timeLine : Time line id of the event
69        *  @param[in] contextHasId : Hashed id of the context
70        *  @param[in] lev : actual level of the child in the hierarchy
71        *  The event is sent by an asynchrounous MPI_ISend
72        */
73       void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t lev) ;
74
75
76
77       //! Children side. Check potential incoming message and if pending request are completed
78       /*!
79        *  - Check by `MPI_Test` if pending request sent to parents are complete.
80        *  - Probe incoming message from parent by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`
81        *  - Check by `MPI_Test` if pending received requests are complete. if yes :
82        *    + Broadcast the event to the childrens if is also a parent
83        *    + Otherwise : push the incomming event in the `eventStack` queue.
84        */
85       void checkParentRequest(void) ;
86
87
88
89       //! Parent side. Check potential incoming message and if pending request are completed
90       /*!
91        *  - Probe incoming message from chidren by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`.
92        *  - Check pending received event request from children using `MPI_Probe`. If and event is received, it is incerted in the
93        *    map `recvEvent` which is increased by 1. If the number of request received from children for this event is equal to the number
94        *    of children then :
95        *    + if the event level is 0, bcast the event to the children.
96        *    + else send the event to the parent.
97        *  - Check pending sent event request to children using `MPI_TEST` and if complete release the corresponding buffer
98        */
99       void checkChildRequest(void) ;
100
101
102
103       //! Parent side. Broadcast a received event from the parent to the children.
104       /*!
105        *  @param[in] timeLine : Time line id of the event
106        *  @param[in] contextHasId : Hashed id of the context
107        *  @param[in] lev : actual level of the child in the hierarchy
108        * Asynchronus MPI_ISend is used.
109        */
110       void bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t lev) ;
111       
112
113
114
115       //! Structure defining an event, composed of the timeLine, the context hashId and the hierachical level of the communication.
116       struct SEvent
117       {
118         size_t timeLine ; /*!< Time line id of the event in the context */
119         size_t hashId ; /*!< hassh id of the context */
120         size_t level ;  /*!<hierarchical level of the communication*/
121
122         //! Definition of the == operator : needed to order the object in a map container
123         /*!
124            @param[in] e : object to compare with
125            @return : boolean result of the comparison
126         */
127         bool operator==(const SEvent& e) const
128         { 
129           if (timeLine == e.timeLine && hashId == e.hashId && level==e.level) return true ;
130           else return false ;
131         } ;
132       
133
134         //! Definition of the < operator : needed to order the object in a map container
135         /*!
136            @param[in] e : object to compare with
137            @return : boolean result of the comparison
138         */
139
140         bool operator<(const SEvent& e) const
141         { 
142           if (timeLine < e.timeLine) return true ;
143           else if (timeLine == e.timeLine && hashId < e.hashId) return true ;
144           else if (timeLine == e.timeLine && hashId == e.hashId && level<e.level) return true ;
145           else return false ;
146         } ;
147       } ;       
148       
149
150       //! Pending request struture. It keep send or receive buffer from asynchronous communication while the request is not complete.
151       struct SPendingRequest
152       {
153         size_t buffer[3] ;      /*!< communication buffer : timeLine, hashId, level */
154         MPI_Request request ;   /*!< pending MPI request */ 
155       } ;
156       
157       MPI_Comm communicator ;  /*!< Internal MPI communicator */ 
158       int mpiRank ;            /*!< Rank in the communicator */
159       int mpiSize ;            /*!< Size of the communicator */
160 
161       queue< pair<size_t, size_t> > eventStack ;         
162       queue<SPendingRequest* > pendingSentParentRequest ;   /*!< Pending request sent to parent   */
163       queue<SPendingRequest*>  pendingRecvParentRequest ;   /*!< Pending request recv from parent */   
164       list<SPendingRequest* >  pendingRecvChildRequest ;    /*!< Pending request recv from child  */
165       list<SPendingRequest*>   pendingSentChildRequest ;    /*!< Pending request sent to child    */
166       map< SEvent, int > recvEvent ;                        /*!< list of event received from children. Contains the currnet number children that have already post the same event */
167       
168       
169       int level ;                   /*!< Number of hierachical level for communication */
170       vector<int> parent ;          /*!< Parent rank for each level */ 
171       vector<vector<int> >  child ; /*!< List of child rank for each level */
172       vector<int> nbChild ;         /*!< Number of child for each level */   
173
174    } ;
175}
176
177#endif
Note: See TracBrowser for help on using the repository browser.