1 | #!/usr/bin/env python |
---|
2 | # -*- coding: ISO-8859-1 -*- |
---|
3 | |
---|
4 | ################################## |
---|
5 | # @program smon |
---|
6 | # @description simulation monitor |
---|
7 | # @copyright Copyright â(c)2009 Centre National de la Recherche Scientifique CNRS. |
---|
8 | # All Rights Reservedâ |
---|
9 | # @svn_file $Id: analyzer 2545 2013-02-01 09:58:10Z jripsl $ |
---|
10 | # @version $Rev: 2545 $ |
---|
11 | # @lastrevision $Date: 2013-02-01 10:58:10 +0100 (Fri, 01 Feb 2013) $ |
---|
12 | # @license CeCILL (http://dods.ipsl.jussieu.fr/jripsl/smon/LICENSE) |
---|
13 | ################################## |
---|
14 | |
---|
15 | import sys |
---|
16 | import signal |
---|
17 | import traceback |
---|
18 | import smtplib |
---|
19 | from email.mime.text import MIMEText |
---|
20 | import time; |
---|
21 | from datetime import datetime |
---|
22 | |
---|
23 | from smon import repo_io |
---|
24 | |
---|
25 | |
---|
26 | |
---|
27 | """ |
---|
28 | Code list reminder |
---|
29 | |
---|
30 | 0000 (la simulation démarre) |
---|
31 | 1000 (le job d'une simulation démarre) |
---|
32 | 2000 (PushStack) |
---|
33 | 3000 (PopStack OK) |
---|
34 | 9000 (PopStack NOK) |
---|
35 | 9999 (FATAL) |
---|
36 | """ |
---|
37 | |
---|
38 | class CheckList(): |
---|
39 | max_time_between_msg=10 # unit => seconds |
---|
40 | |
---|
41 | @classmethod |
---|
42 | def datetime_to_epoch(cls,datetime): |
---|
43 | epoch = time.mktime(time.strptime(datetime, "%d.%m.%Y %H:%M:%S")).time(); # assuming datetime format is "29.08.2011 11:05:02" |
---|
44 | return epoch |
---|
45 | |
---|
46 | @classmethod |
---|
47 | def msg_timeout(cls,message): |
---|
48 | |
---|
49 | msg_time=cls.datetime_to_epoch(message.timestamp) |
---|
50 | current_time=time.time() |
---|
51 | |
---|
52 | diff=current_time-msg_time |
---|
53 | |
---|
54 | |
---|
55 | if diff>cls.max_time_between_msg: |
---|
56 | |
---|
57 | return True |
---|
58 | else: |
---|
59 | return False |
---|
60 | |
---|
61 | @classmethod |
---|
62 | def C0001(cls): |
---|
63 | """ |
---|
64 | description |
---|
65 | check heartbeat (currently, heartbeat is implemented using simulation progress messages). |
---|
66 | if progress messages suddently stops, it is likely that the simulation was Killed or |
---|
67 | a Segfault occurs. In such case, we inform the other components (failover, prodiguer GUI..) by |
---|
68 | changing the simulation status |
---|
69 | """ |
---|
70 | running_simulations=repo_io.get_running_simulations() |
---|
71 | for simulation in running_simulations: |
---|
72 | message=repo_io.retrieve_last_messages(simulation) |
---|
73 | |
---|
74 | if msg_timeout(message): |
---|
75 | |
---|
76 | repo_io.update_simulation_status(simulation) #from "running" to "killed" |
---|
77 | |
---|
78 | class Analyzer(): |
---|
79 | |
---|
80 | @classmethod |
---|
81 | def start(cls): |
---|
82 | pass |
---|
83 | |
---|
84 | @classmethod |
---|
85 | def stop(cls): |
---|
86 | pass |
---|
87 | |
---|
88 | @classmethod |
---|
89 | def main(self): |
---|
90 | |
---|
91 | """ |
---|
92 | # parse args |
---|
93 | parser = argparse.ArgumentParser(prog='analyzer') |
---|
94 | parser.add_argument('-v', dest='verbose',required=False,action='store_true') |
---|
95 | args = parser.parse_args() |
---|
96 | |
---|
97 | # check |
---|
98 | if not os.path.exists(SMON.smon_home): |
---|
99 | sys.exit(1) |
---|
100 | |
---|
101 | SMON.init_singleton() |
---|
102 | """ |
---|
103 | |
---|
104 | print ' [*] Analyzer running. To exit press CTRL+C' |
---|
105 | |
---|
106 | CheckList.C0001() |
---|
107 | |
---|
108 | |
---|
109 | sleep(10) |
---|
110 | |
---|
111 | """ |
---|
112 | SMON.free_singleton() |
---|
113 | """ |
---|
114 | |
---|
115 | def signal_handler(signal, frame): |
---|
116 | print 'You pressed Ctrl+C!' |
---|
117 | sys.exit(0) |
---|
118 | |
---|
119 | if __name__ == '__main__': |
---|
120 | |
---|
121 | signal.signal(signal.SIGINT, signal_handler) |
---|
122 | |
---|
123 | try: |
---|
124 | Analyzer.main() |
---|
125 | |
---|
126 | sys.exit(0) |
---|
127 | |
---|
128 | except Exception, e: |
---|
129 | |
---|
130 | traceback.print_exc() |
---|
131 | |
---|
132 | sys.exit(1) |
---|