Changeset 1853 for TOOLS/PACK_IPSL/launch_and_measureTime.sh
- Timestamp:
- 09/03/12 11:29:13 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TOOLS/PACK_IPSL/launch_and_measureTime.sh
r1849 r1853 1 1 #!/bin/bash 2 2 3 function update_tasks_list 4 { 5 local tasksListFile=$1 # fichier contenant la nouvelle liste de cmds 6 touch $tasksListFile 7 local logFileOfPrevPack=$2 # fichier log de relatif a l'execution de la derniere liste de cmds 8 local tasksListFileOfPrevPack=$3 # fichier contenant la derniere liste de cmds 9 10 > $tasksListFile 11 12 old_IFS=$IFS # sauvegarde du séparateur de champ 13 IFS=$'\n' # nouveau séparateur de champ, le caractère fin de ligne 14 for cmdReport in $( cat $logFileOfPrevPack ) 15 do 16 hasCmdGoodFormat=`echo $cmdReport | grep -e '^#executed by process' | wc -l ` 17 if [ "x${hasCmdGoodFormat}" == "x0" ] 18 then 19 continue 20 fi 21 22 local resCmd=`echo $cmdReport | awk '{print $9}' ` 23 24 if [ "x$resCmd" != "x0" ] 25 then 26 local cmdToPrint=`echo $cmdReport | awk '{print $NF}' ` 27 echo "./process_list.sh $cmdToPrint" >> ${tasksListFile} 28 fi 29 done 30 IFS=$old_IFS # rétablissement du séparateur de champ par défaut 31 32 # Il faut aussi rajouter les cmds qui n'ont pas ete traitees du tout, 33 # par exemple en cas d'interruption du calculateur 34 # Pour cela, on boucle sur la derniere liste de cmds et on cherche dans le 35 # fichier log associe si certaines sont absentes. On remet ces commandes (absentes) 36 # dans la nouvelle liste 37 old_IFS=$IFS # sauvegarde du séparateur de champ 38 IFS=$'\n' # nouveau séparateur de champ, le caractère fin de ligne 39 for cmd in $( cat $tasksListFileOfPrevPack ) 40 do 41 local list=`echo $cmd | awk '{print $NF}' ` 42 hasListBeenTreated=`grep $list $logFileOfPrevPack | wc -l ` 43 if [ "x${hasListBeenTreated}" == "x0" ] 44 then 45 echo $cmd >> ${tasksListFile} 46 fi 47 48 49 done 50 IFS=$old_IFS # rétablissement du séparateur de champ par défaut 51 52 } 53 54 function getNumeroOfCurrentTry 55 { 56 local num_try="1" 57 tryNumFile=${USER_OUTPUT_PROGRESS}/numero_current_try.txt 58 if [ ! -e $tryNumFile ] 59 then 60 echo "Le fichier :" >> $badFailureFile 61 echo "$tryNumFile" >> $badFailureFile 62 echo "doit etre present dans le repertoire :" >> $badFailureFile 63 echo "${USER_OUTPUT_PROGRESS}" >> $badFailureFile 64 echo "et il doit contenir un numero d'essai" >> $badFailureFile 65 exit 1 66 fi 67 num_try=`head -n 1 $tryNumFile ` 68 echo $num_try 69 70 } 71 72 function getNumeroOfLastInstance 73 { 74 local num_instance="0" 75 local numTry=$1 76 ici=$PWD 77 local progressDirectory="${USER_OUTPUT_PROGRESS}/TRY__${numTry}" 78 if [ ! -e $progressDirectory ] 79 then 80 echo "fonction getNumeroOfLastInstance :" >> $badFailureFile 81 echo "Le repertoire :" >> $badFailureFile 82 echo "$progressDirectory" >> $badFailureFile 83 echo "devrait exister. Il n'existe pas." >> $badFailureFile 84 exit 1 85 fi 86 cd $progressDirectory 87 88 listFiles=`ls | grep -e "inputCmd__try__${numTry}__instance__[[:digit:]]\{1,2\}.list" ` 89 for listFile in $listFiles 90 do 91 num=`echo $listFile | awk -F"__" '{print $NF}' | awk -F"." '{print $1}' ` 92 if [ $num -gt $num_instance ] 93 then 94 num_instance=$num 95 fi 96 done 97 cd $ici 98 echo $num_instance 99 100 101 } 102 103 104 function check_progress 105 { 106 local file1=$1 107 local file2=$2 108 if [ "x${file1}" == "x" ] || [ "x${file2}" == "x" ] 109 then 110 echo "check_progress : Le nom d'au moins 1 des 2 fichiers d'entree est vide" >> $badFailureFile 111 exit 1 112 fi 113 114 if [ ! -e $file1 ] || [ ! -e $file2 ] 115 then 116 echo "check_progress : au moins un des 2 fichiers suivants n'existe pas :" >> $badFailureFile 117 echo "$file1" 118 echo "$file2" 119 exit 1 120 fi 121 local nbLineFile1=`cat $file1 | wc -l ` 122 local nbLineFile1=`cat $file2 | wc -l ` 123 if [ $nbLineFile1 -ne $nbLineFile1 ] 124 then 125 echo 1 126 return 127 fi 128 129 old_IFS=$IFS # sauvegarde du séparateur de champ 130 IFS=$'\n' # nouveau séparateur de champ, le caractère fin de ligne 131 for line in $( cat $file1 ) 132 do 133 # echo $line 134 local isLineInFile2=`grep $line $file2 | wc -l ` 135 if [ $isLineInFile2 -eq 0 ] 136 then 137 echo 1 138 return 139 fi 140 done 141 IFS=$old_IFS # rétablissement du séparateur de champ par défaut 142 echo 0 143 } 144 145 146 function update_report 147 { 148 > $reportFile # on vide le fichier rapport 149 echo "Execution of tasks :" >> $reportFile 150 echo "------------------" >> $reportFile 151 cat $inputCmd >> $reportFile 152 echo >> $reportFile 153 echo "Results of tasks :" >> $reportFile 154 echo "----------------" >> $reportFile 155 cat $output >> $reportFile 156 echo >> $reportFile 157 echo >> $reportFile 158 159 } 160 161 ########## batch directives : begin ########## 3 162 #MSUB -r pack_ipsl # Nom du job 4 #MSUB -e IO_parallel/launchMeasureTimeOutput/pack_ipsl_%I.e 5 #MSUB -o IO_parallel/launchMeasureTimeOutput/pack_ipsl_%I.o 6 #MSUB -n 7 # Reservation du processus 7 #MSUB -T 2500 # Limite de temps elapsed du job (en secondes) 163 ### mutable directives ### 164 #MSUB -o /ccc/dmfbuf/import_data.2/ccrt/dmnfs12/cont003/bacasable/GUILLAUME/PSEUDO_DMNFS_PROGRESS/zIGCM_OUT/LMDZ4OR_v2/detailed_pack_output/pack_ipsl_%I.o 165 #MSUB -e /ccc/dmfbuf/import_data.2/ccrt/dmnfs12/cont003/bacasable/GUILLAUME/PSEUDO_DMNFS_PROGRESS/zIGCM_OUT/LMDZ4OR_v2/detailed_pack_output/pack_ipsl_%I.e 166 #MSUB -n 4 167 #MSUB -T 900 8 168 #MSUB -A tgcc0013 9 169 #MSUB -q standard 10 # #MSUB -q large11 170 #MSUB -Qos test 12 13 171 ########## batch directives : end ########## 14 172 15 173 export JOB_DIR=${LS_SUBCWD:-${PWD}} … … 17 175 source ${EXE_DIR}/DEM_utilities.sh 18 176 19 20 export IGCM_DEM=${SCRATCHDIR}/IGCM_DEM 21 22 23 # inputCmd="myIO/inputCmd.list" 24 inputCmd="inputCmd_Realistic.list" 25 output="IO_parallel/output.log" 26 27 > ${inputCmd} 28 29 listOfDir="output_ncrcat output_tar restart_tar debug_tar store_cp work_cp" 30 31 for CONFIG in $( awk '{print $1}' ${IGCM_DEM}/config_card.liste ) ; do 32 33 PATH_SIMU=$( dirname $CONFIG ) 34 echo "PATH_SIMU=$PATH_SIMU" 35 echo "------------------------------------------" 36 for typeDir in $listOfDir 37 do 38 typeDirList=`find $PATH_SIMU -type d -name "$typeDir" ` 39 for dir in $typeDirList 177 export badFailureFile=${USER_OUTPUT_PROGRESS}/badFailure.txt 178 179 export numCurrentTry=$( getNumeroOfCurrentTry ) 180 export progressDir="${USER_OUTPUT_PROGRESS}/TRY__${numCurrentTry}" 181 if [ ! -e $progressDir ] 182 then 183 echo "Le repertoire de suivi :" >> $badFailureFile 184 echo "$progressDir" >> $badFailureFile 185 echo "n'existe pas. STOP." >> $badFailureFile 186 exit 1 187 fi 188 export numPrevInstance=$( getNumeroOfLastInstance $numCurrentTry ) 189 export numNewInstance=$(( $numPrevInstance + 1 )) 190 191 export inputCmd="${progressDir}/inputCmd__try__${numCurrentTry}__instance__${numNewInstance}.list" 192 export nextInputCmd="${progressDir}/nextInputCmd__try__${numCurrentTry}__instance__${numNewInstance}.list" 193 export output="${progressDir}/packOutput__try__${numCurrentTry}__instance__${numNewInstance}.log" 194 export reportFile="${progressDir}/report__try__${numCurrentTry}__instance__${numNewInstance}.log" 195 196 197 export noInterruptFile="${progressDir}/noInterrupt__try__${numCurrentTry}__instance__${numNewInstance}.txt" 198 199 200 # a virer 201 #echo "inputCmd=$inputCmd" >> $badFailureFile 202 #echo "nextInputCmd=$nextInputCmd" >> $badFailureFile 203 #echo "output=$nextInputCmd" >> $badFailureFile 204 #echo "reportFile=$nextInputCmd" >> $badFailureFile 205 #echo "noInterruptFile=$noInterruptFile" >> $badFailureFile 206 207 # exit 0 # a virer 208 209 # a virer 210 #if [ ${numNewInstance} -ge 4 ] 211 #then 212 # echo "inputCmd=$inputCmd" >> $badFailureFile 213 # echo "nextInputCmd=$nextInputCmd" >> $badFailureFile 214 # echo "output=$nextInputCmd" >> $badFailureFile 215 # echo "reportFile=$nextInputCmd" >> $badFailureFile 216 # echo "noInterruptFile=$noInterruptFile" >> $badFailureFile 217 # echo >> $badFailureFile 218 #fi 219 220 if [ ${numCurrentTry} -le 1 ] && [ ${numNewInstance} -le 1 ] 221 then 222 # C'est le tout premier essai 223 # on construit la liste des taches a effectuer en inventoriant les fichiers *.liste 224 # dans les rep contenus dans le fichier "config_card.liste" 225 > ${inputCmd} 226 for CONFIG in $( awk '{print $1}' ${IGCM_DEM}/config_card.liste ) ; do 227 228 PATH_SIMU=$( dirname $CONFIG ) 229 # echo "PATH_SIMU=$PATH_SIMU" 230 231 setOfListFiles=`find $PATH_SIMU -type f -name "*.list" ` 232 for file in $setOfListFiles 40 233 do 41 # echo "dir=$dir" 42 listFilesInDir=`find $dir -name "*.list" ` 43 for file in $listFilesInDir 44 do 45 echo "./process_list.sh $file" >> ${inputCmd} 46 done 234 echo "./process_list.sh $file" >> ${inputCmd} 47 235 done 48 done 49 50 51 done 52 53 54 236 237 done 238 else 239 # if try > 1 && inst == 1 ==> construction liste cmd avec fichiers try - 1, derniere instance 240 # dans ce dernier cas, gerer une eventuelle interruption au try - 1 241 # if try > 1 && inst > 1 ==> construction liste cmd avec fichiers try, instance precedente 242 if [ ${numNewInstance} -ge 2 ] 243 then 244 nextInputCmd_of_PrevInst="${progressDir}/nextInputCmd__try__${numCurrentTry}__instance__${numPrevInstance}.list" 245 if [ ! -e $nextInputCmd_of_PrevInst ] 246 then 247 echo "Le fichier suivant :" >> $badFailureFile 248 echo "$nextInputCmd_of_PrevInst" >> $badFailureFile 249 echo "n'existe pas. Il devrait exister. STOP." >> $badFailureFile 250 exit 1 251 fi 252 cat $nextInputCmd_of_PrevInst > ${inputCmd} 253 254 else # numNewInstance == 1 255 numPrevTry=$(( $numCurrentTry - 1 )) 256 # echo "numCurrentTry=$numCurrentTry" >> $badFailureFile # a virer 257 # echo "numPrevTry=$numPrevTry" >> $badFailureFile # a virer 258 # exit 1 # a virer 259 numLastInstInstanceInPrevTry=$( getNumeroOfLastInstance $numPrevTry ) 260 noInterruptFile="${progressDir}/noInterrupt__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.txt" 261 if [ ! -e $noInterruptFile ] 262 then 263 # il y a eu interruption non prevue au dernier essai, il faut recomposer la liste des cmds avec les resultats 264 # de l'essai precedent, derniere instance 265 prevProgressDir="${USER_OUTPUT_PROGRESS}/TRY__${numPrevTry}" 266 nextInputCmd_of_LastInst="${prevProgressDir}/nextInputCmd__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.list" 267 output_of_LastInst="${prevProgressDir}/packOutput__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.log" 268 inputCmd_of_LastInst="${prevProgressDir}/inputCmd__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.list" 269 if [ ! -e $output_of_LastInst ] || [ ! -e $inputCmd_of_LastInst ] 270 then 271 echo "Les fichiers suivants :" >> $badFailureFile 272 echo "$output_of_LastInst" >> $badFailureFile 273 echo "$inputCmd_of_LastInst" >> $badFailureFile 274 echo "n'existent pas. Il devrait exister. STOP." >> $badFailureFile 275 exit 1 276 fi 277 278 update_tasks_list ${nextInputCmd_of_LastInst} ${output_of_LastInst} ${inputCmd_of_LastInst} 279 cat $nextInputCmd_of_LastInst > ${inputCmd} 280 else 281 nextInputCmd_of_LastInst="${progressDir}/nextInputCmd__try__${numPrevTry}__instance__${numLastInstInstanceInPrevTry}.list" 282 if [ ! -e $nextInputCmd_of_LastInst ] 283 then 284 echo "Le fichier suivant :" >> $badFailureFile 285 echo "$nextInputCmd_of_LastInst" >> $badFailureFile 286 echo "n'existe pas. Il devrait exister. STOP." >> $badFailureFile 287 exit 1 288 fi 289 cat $nextInputCmd_of_LastInst > ${inputCmd} 290 fi 291 292 fi 293 fi 294 295 # Initialisation du rapport : par defaut, les calculs ont ete interrompus 296 echo "No report. Le computation must have interrupted." > $reportFile 297 298 # exit 0 # a virer 299 300 # a virer ######################################### 301 #if [ ${numNewInstance} -ge 4 ] 302 #then 303 # echo >> $badFailureFile 304 # echo "4eme instance. STOP." >> $badFailureFile 305 # exit 1 306 #fi 307 ################################################### 55 308 startTime=$( getDateMilliSeconds ) 56 309 echo "start time:$startTime" >> $timeHandlingFile 57 310 58 ccc_mprun ./cmd_launch.exe ${inputCmd} 2>${output} 59 # ./cmd_launch.exe ${inputCmd} 2>${output} 60 # ccc_mprun -p standard -n ${BRIDGE_MSUB_NPROC} ./cmd_launch.exe ${inputCmd} 2>myIO/output.log 61 # mpirun -n 4 ./cmd_launch.exe myIO/inputCmd10.list 2>myIO/output.log 311 ccc_mprun ./glost_launch -R $timeLimitBeforeEnd ${inputCmd} 2>${output} 312 313 ### ccc_mprun ./cmd_launch.exe ${inputCmd} 2>${output} 314 315 ### ./cmd_launch.exe ${inputCmd} 2>${output} 316 ### ccc_mprun -p standard -n ${BRIDGE_MSUB_NPROC} ./cmd_launch.exe ${inputCmd} 2>myIO/output.log 317 ### mpirun -n 4 ./cmd_launch.exe myIO/inputCmd10.list 2>myIO/output.log 62 318 63 319 meantime=$( getTimeDiffSeconds $startTime ) … … 66 322 echo "end time:$endExecutionTime" >> $timeHandlingFile 67 323 68 69 324 echo "meantime ncrcat = $meantime" 325 326 # exit 0 # a virer 327 328 # Gestion des reprises : 329 # -------------------- 330 331 update_report 332 333 update_tasks_list $nextInputCmd $output $inputCmd 334 335 echo "no interruption has occured" > ${noInterruptFile} 336 337 # Tout s'est bien passe 338 # ---------------------- 339 everythingOK=`cat $nextInputCmd | wc -l ` 340 if [ "x{everythingOK}" == "x0"] 341 then 342 echo "Tout s'est fini correctement" >> $badFailureFile 343 exit 1 344 fi 345 346 # y a t il progression ? 347 # ---------------------- 348 resDiff=$( check_progress $inputCmd $nextInputCmd ) 349 # resDiff == 1 : fichiers differents 350 # resDiff == 0 : fichiers identiques 351 if [ "x${resDiff}" == "x0" ] 352 then 353 echo "Il n'y pas plus de progression" >> $badFailureFile 354 exit 1 355 fi 356 357 # On enchaine avec le meme script 358 ccc_msub launch_and_measureTime.sh
Note: See TracChangeset
for help on using the changeset viewer.