#!/usr/bin/env python # -*- coding: ISO-8859-1 -*- ################################## # @program smon # @description simulation monitor # @copyright Copyright “(c)2009 Centre National de la Recherche Scientifique CNRS. # All Rights Reserved” # @svn_file $Id: analyzer 2545 2013-02-01 09:58:10Z jripsl $ # @version $Rev: 2545 $ # @lastrevision $Date: 2013-02-01 10:58:10 +0100 (Fri, 01 Feb 2013) $ # @license CeCILL (http://dods.ipsl.jussieu.fr/jripsl/smon/LICENSE) ################################## import sys import signal import traceback import smtplib from email.mime.text import MIMEText import time import datetime # line below is to include "smon" package in the search path sys.path.append("/home/jripsl/snapshot/Monitoring") import smon.repo_io as repo_io import smon.types as types class CheckList(): max_time_between_msg=20 # unit => seconds @classmethod def msg_timeout(cls,message): # get current epoch current_epoch=time.time() # get msg epoch msg_time=time.strptime(str(message.crea_date), "%Y-%m-%d %H:%M:%S.%f") msg_epoch=time.mktime(msg_time) diff = current_epoch - msg_epoch # debug #print "cur=%i,ms=%s"%(current_epoch,message.crea_date) # debug #print "diff=%i"%int(diff) if diff>cls.max_time_between_msg: return True else: return False @classmethod def C0001(cls): """ description check heartbeat (currently, heartbeat is implemented using simulation progress messages). if progress messages suddently stops, it is likely that the simulation was Killed or a Segfault occurs. In such case, we inform the other components (failover, prodiguer GUI..) by changing the simulation status """ for simulation in repo_io.get_running_simulations(): print "\nchecking heartbeat ('%s')"%simulation.name try: message=repo_io.retrieve_last_message(simulation) # debug #print "found" except types.MessageNotFoundException, e: # when we are here, it mean we are in the interval when a new simulation have just been inserted but the corresponding message have not been inserted yet print "no message found for simulation ('%s')"%simulation.name continue if cls.msg_timeout(message): simulation.status="error" repo_io.update_simulation_status(simulation) print "heartbeat NOK - simulation status set to 'error'\n" else: print "heartbeat OK\n" class Analyzer(): @classmethod def start(cls): repo_io.init() # open DB connection Analyzer.main() @classmethod def stop(cls): repo_io.free() # close DB connection @classmethod def main(self): """ # parse args parser = argparse.ArgumentParser(prog='analyzer') parser.add_argument('-v', dest='verbose',required=False,action='store_true') args = parser.parse_args() # check if not os.path.exists(SMON.smon_home): sys.exit(1) SMON.init_singleton() """ print ' [*] Analyzer running. To exit press CTRL+C' while True: #print "checking simulations heartbeats" CheckList.C0001() time.sleep(3) """ SMON.free_singleton() """ def signal_handler(signal, frame): print 'You pressed Ctrl+C!' Analyzer.stop() sys.exit(0) if __name__ == '__main__': signal.signal(signal.SIGINT, signal_handler) try: Analyzer.start() sys.exit(0) except Exception, e: traceback.print_exc() sys.exit(1)