Changeset 1009


Ignore:
Timestamp:
07/03/14 08:50:57 (10 years ago)
Author:
sdipsl
Message:

fixing #189 due to ccc_mprun missbehaviour by preparing variables available for execution and building the mpi/omp execution script only once.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/libIGCM/libIGCM_config/libIGCM_config.ksh

    r997 r1009  
    786786  #================================================================# 
    787787  #       Prepare variables available for binary execution         # 
     788  #       Call function for creation of run script                 # 
     789  #       Only done once per job                                   # 
    788790  #================================================================# 
    789791 
    790   typeset ExeNameIn ExeNameFirst CompNameFirst comp i j 
    791   typeset tempvar tempvarMPI tempvarNOD NbElts NbExec 
    792  
    793   PROCESSUS_NUMBER=0 
    794   NbExec=0 
    795   i=0 
    796  
    797   OK_PARA_MPI=false 
    798   OK_PARA_OMP=false 
    799   OK_PARA_NOD=false 
    800   OK_PARA_MPMD=false 
    801  
    802   for comp in ${config_ListOfComponents[*]} ; do 
    803  
    804     IGCM_debug_Print 1 ${comp} 
    805  
    806     eval ExeNameIn=\${config_Executable_${comp}[0]} 
    807  
    808     # NO order in config.card for parallelized values ! 
    809     # just use suffix : MPI , OMP and NOD (for number of NODes.) 
    810  
    811     # NOD is the number of NODes allocated 
    812     eval ${comp}_PROC_NOD=0 
    813  
    814     # MPI is the number of MPI processus per nodes 
    815     eval ${comp}_PROC_MPI=0 
    816  
    817     # OMP is the number of OpenMP threads per MPI processus 
    818     eval ${comp}_PROC_OMP=0 
    819  
    820     # Only if we really have an executable for the component : 
    821     if ( [ "X${ExeNameIn}" != X\"\" ] && [ "X${ExeNameIn}" != "Xinca.dat" ] ) ; then 
    822  
    823       # Keep the first executable found and the first CompName 
    824       ExeNameFirst=${ExeNameIn} 
    825       CompNameFirst=${comp} 
    826  
    827       # Are we a second executable? 
    828       (( NbExec = NbExec + 1 )) 
    829  
    830       # set 1 MPI task, 1 OpenMP thread and 1 node as default 
    831       eval ${comp}_PROC_MPI=1 
    832       eval ${comp}_PROC_OMP=1 
    833       eval ${comp}_PROC_NOD=1 
    834  
    835       eval NbElts=\${#config_Executable_${comp}[@]} 
    836  
    837       if [ ${NbElts} -ge 2 ] ; then 
    838         (( j = 2 )) 
    839         while [ $j -lt ${NbElts} ] ; do 
    840           eval tempvar=\${config_Executable_${comp}[${j}]} 
    841           IGCM_debug_Print 2 ${tempvar} 
    842  
    843           if [ X${tempvar} = X ] ; then 
    844             IGCM_debug_Print 2 "Error reading MPI/OMP parameters !!!" 
    845             IGCM_debug_Exit "Check your config.card. Exit now" 
    846             IGCM_debug_Verif_Exit 
     792  if [ ${Period} -eq 1 ]; then 
     793    typeset ExeNameIn ExeNameFirst CompNameFirst comp i j 
     794    typeset tempvar tempvarMPI tempvarNOD NbElts NbExec 
     795 
     796    PROCESSUS_NUMBER=0 
     797    NbExec=0 
     798    i=0 
     799 
     800    OK_PARA_MPI=false 
     801    OK_PARA_OMP=false 
     802    OK_PARA_NOD=false 
     803    OK_PARA_MPMD=false 
     804 
     805    for comp in ${config_ListOfComponents[*]} ; do 
     806 
     807      IGCM_debug_Print 1 ${comp} 
     808 
     809      eval ExeNameIn=\${config_Executable_${comp}[0]} 
     810 
     811      # NO order in config.card for parallelized values ! 
     812      # just use suffix : MPI , OMP and NOD (for number of NODes.) 
     813 
     814      # NOD is the number of NODes allocated 
     815      eval ${comp}_PROC_NOD=0 
     816 
     817      # MPI is the number of MPI processus per nodes 
     818      eval ${comp}_PROC_MPI=0 
     819 
     820      # OMP is the number of OpenMP threads per MPI processus 
     821      eval ${comp}_PROC_OMP=0 
     822 
     823      # Only if we really have an executable for the component : 
     824      if ( [ "X${ExeNameIn}" != X\"\" ] && [ "X${ExeNameIn}" != "Xinca.dat" ] ) ; then 
     825 
     826        # Keep the first executable found and the first CompName 
     827        ExeNameFirst=${ExeNameIn} 
     828        CompNameFirst=${comp} 
     829 
     830        # Are we a second executable? 
     831        (( NbExec = NbExec + 1 )) 
     832 
     833        # set 1 MPI task, 1 OpenMP thread and 1 node as default 
     834        eval ${comp}_PROC_MPI=1 
     835        eval ${comp}_PROC_OMP=1 
     836        eval ${comp}_PROC_NOD=1 
     837 
     838        eval NbElts=\${#config_Executable_${comp}[@]} 
     839 
     840        if [ ${NbElts} -ge 2 ] ; then 
     841          (( j = 2 )) 
     842          while [ $j -lt ${NbElts} ] ; do 
     843            eval tempvar=\${config_Executable_${comp}[${j}]} 
     844            IGCM_debug_Print 2 ${tempvar} 
     845 
     846            if [ X${tempvar} = X ] ; then 
     847              IGCM_debug_Print 2 "Error reading MPI/OMP parameters !!!" 
     848              IGCM_debug_Exit "Check your config.card. Exit now" 
     849              IGCM_debug_Verif_Exit 
     850            fi 
     851 
     852            case ${tempvar} in 
     853            *[mM][pP][iI]*) 
     854              # Read MPI parameter for composante 
     855              eval ${comp}_PROC_MPI=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/MPI//" ) 
     856              OK_PARA_MPI=true;; 
     857            *[oO][mM][pP]*) 
     858              # Read OMP parameter for composante 
     859              eval ${comp}_PROC_OMP=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/OMP//" ) 
     860              OK_PARA_OMP=true;; 
     861            *[nN][oO][dD]*) 
     862              # Read NOD (NumBer of Nodes) parameter for composante 
     863              eval ${comp}_PROC_NOD=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/NOD//" ) 
     864              OK_PARA_NOD=true 
     865              OK_PARA_MPI=true;; 
     866            esac 
     867            (( j = j + 1 )) 
     868          done 
     869        fi 
     870        eval tempvarMPI=\${${comp}_PROC_MPI} 
     871        eval tempvarNOD=\${${comp}_PROC_NOD} 
     872        eval tempvarOMP=\${${comp}_PROC_OMP} 
     873 
     874        (( PROCESSUS_NUMBER = PROCESSUS_NUMBER + tempvarMPI * tempvarNOD * tempvarOMP )) 
     875      fi 
     876      (( i=i+1 )) 
     877    done 
     878 
     879    # set MPMD mode if more than 2 executable names. 
     880    [ ${NbExec} -ge 2 ] && OK_PARA_MPMD=true 
     881 
     882    # Verification of BATCH_NUM_PROC_TOT total number of processors set in job header. 
     883    if [ X${BATCH_NUM_PROC_TOT} != X ] ; then 
     884      # BATCH_NUM_PROC_TOT is set 
     885      if ( ${OK_PARA_MPI} ) ; then 
     886        IGCM_debug_Print 1 "MPI/OMP/NOD found into config.card and BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 
     887      else 
     888        # with previous method. 
     889        if [ ${BATCH_NUM_PROC_TOT} -gt 1 ] ; then 
     890          # with more than 1 proc 
     891          if ( ${OK_PARA_MPMD} ) ; then 
     892            # with MPMD ie CPL/oasis method 
     893            IGCM_debug_Print 2 "Use default number of MPI tasks for this machine : " 
     894            IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_OCE} for OCE" 
     895            IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_CPL} for CPL" 
     896            IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_ATM} for ATM" 
     897            OK_PARA_MPI=true 
     898            CPL_PROC_MPI=${DEFAULT_NUM_PROC_CPL} 
     899            OCE_PROC_MPI=${DEFAULT_NUM_PROC_OCE} 
     900            ATM_PROC_MPI=${DEFAULT_NUM_PROC_ATM} 
     901            PROCESSUS_NUMBER=${DEFAULT_NUM_PROC_TOTAL} 
     902          else 
     903            # with have only one executable 
     904            IGCM_debug_Print 2 "Use ${BATCH_NUM_PROC_TOT} MPI tasks for ${CompNameFirst} : ${ExeNameFirst} " 
     905            OK_PARA_MPI=true 
     906            eval ${CompNameFirst}_PROC_MPI=${BATCH_NUM_PROC_TOT} 
     907            PROCESSUS_NUMBER=${BATCH_NUM_PROC_TOT} 
    847908          fi 
    848  
    849           case ${tempvar} in 
    850           *[mM][pP][iI]*) 
    851             # Read MPI parameter for composante 
    852             eval ${comp}_PROC_MPI=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/MPI//" ) 
    853             OK_PARA_MPI=true;; 
    854           *[oO][mM][pP]*) 
    855             # Read OMP parameter for composante 
    856             eval ${comp}_PROC_OMP=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/OMP//" ) 
    857             OK_PARA_OMP=true;; 
    858           *[nN][oO][dD]*) 
    859             # Read NOD (NumBer of Nodes) parameter for composante 
    860             eval ${comp}_PROC_NOD=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/NOD//" ) 
    861             OK_PARA_NOD=true 
    862             OK_PARA_MPI=true;; 
    863           esac 
    864           (( j = j + 1 )) 
    865         done 
     909        else 
     910          PROCESSUS_NUMBER=1 
     911        fi 
    866912      fi 
    867       eval tempvarMPI=\${${comp}_PROC_MPI} 
    868       eval tempvarNOD=\${${comp}_PROC_NOD} 
    869       eval tempvarOMP=\${${comp}_PROC_OMP} 
    870  
    871       (( PROCESSUS_NUMBER = PROCESSUS_NUMBER + tempvarMPI * tempvarNOD * tempvarOMP )) 
    872     fi 
    873     (( i=i+1 )) 
    874   done 
    875  
    876   # set MPMD mode if more than 2 executable names. 
    877   [ ${NbExec} -ge 2 ] && OK_PARA_MPMD=true 
    878  
    879   # Verification of BATCH_NUM_PROC_TOT total number of processors set in job header. 
    880   if [ X${BATCH_NUM_PROC_TOT} != X ] ; then 
    881     # BATCH_NUM_PROC_TOT is set 
    882     if ( ${OK_PARA_MPI} ) ; then 
    883       IGCM_debug_Print 1 "MPI/OMP/NOD found into config.card and BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 
    884     else 
    885       # with previous method. 
    886       if [ ${BATCH_NUM_PROC_TOT} -gt 1 ] ; then 
    887         # with more than 1 proc 
    888         if ( ${OK_PARA_MPMD} ) ; then 
    889           # with MPMD ie CPL/oasis method 
    890           IGCM_debug_Print 2 "Use default number of MPI tasks for this machine : " 
    891           IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_OCE} for OCE" 
    892           IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_CPL} for CPL" 
    893           IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_ATM} for ATM" 
    894           OK_PARA_MPI=true 
    895           CPL_PROC_MPI=${DEFAULT_NUM_PROC_CPL} 
    896           OCE_PROC_MPI=${DEFAULT_NUM_PROC_OCE} 
    897           ATM_PROC_MPI=${DEFAULT_NUM_PROC_ATM} 
    898           PROCESSUS_NUMBER=${DEFAULT_NUM_PROC_TOTAL} 
    899         else 
    900           # with have only one executable 
    901           IGCM_debug_Print 2 "Use ${BATCH_NUM_PROC_TOT} MPI tasks for ${CompNameFirst} : ${ExeNameFirst} " 
    902           OK_PARA_MPI=true 
    903           eval ${CompNameFirst}_PROC_MPI=${BATCH_NUM_PROC_TOT} 
    904           PROCESSUS_NUMBER=${BATCH_NUM_PROC_TOT} 
     913      # Verification with PBS parameter 
     914      if [ ${BATCH_NUM_PROC_TOT} -ne ${PROCESSUS_NUMBER} ] ; then 
     915        IGCM_debug_Exit "ERROR with parallelization parameters !" 
     916        IGCM_debug_Print 2 "Job header variable BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 
     917        IGCM_debug_Print 2 "is the total number of _processors_ reserved." 
     918        IGCM_debug_Print 2 "It is not equal to the sum of _processus_  = ${PROCESSUS_NUMBER}." 
     919        IGCM_debug_Verif_Exit 
     920      fi 
     921      NUM_PROC_CPL=${CPL_PROC_MPI} # for backward compatibility 
     922      NUM_PROC_OCE=${OCE_PROC_MPI} # for backward compatibility 
     923      NUM_PROC_ATM=${ATM_PROC_MPI} # for backward compatibility 
     924    else # BATCH_NUM_PROC_TOT="" 
     925      if ( ${OK_PARA_MPI} ) ; then 
     926        IGCM_debug_Exit "ERROR : missing value for ${BATCH_NUM_PROC_TOT} processors," 
     927        IGCM_debug_Print 2 "You have parallel parameters in config->Executable->list." 
     928        IGCM_debug_Print 2 "Please add BATCH_NUM_PROC_TOT variable in job header as well." 
     929        IGCM_debug_Exit "Exit now." 
     930        IGCM_debug_Verif_Exit 
     931      else 
     932        # sequential case ! 
     933        if [ ${PROCESSUS_NUMBER} -eq 0 ] ; then 
     934          (( PROCESSUS_NUMBER = 1 )) 
     935          IGCM_debug_Print 2 "PROCESSUS_NUMBER is all 0 (sequential use of old definition in config->Executable->list)." 
     936          IGCM_debug_Print 2 "We set it to 1." 
    905937        fi 
    906       else 
    907         PROCESSUS_NUMBER=1 
    908938      fi 
    909939    fi 
    910     # Verification with PBS parameter 
    911     if [ ${BATCH_NUM_PROC_TOT} -ne ${PROCESSUS_NUMBER} ] ; then 
    912       IGCM_debug_Exit "ERROR with parallelization parameters !" 
    913       IGCM_debug_Print 2 "Job header variable BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 
    914       IGCM_debug_Print 2 "is the total number of _processors_ reserved." 
    915       IGCM_debug_Print 2 "It is not equal to the sum of _processus_  = ${PROCESSUS_NUMBER}." 
    916       IGCM_debug_Verif_Exit 
    917     fi 
    918     NUM_PROC_CPL=${CPL_PROC_MPI} # for backward compatibility 
    919     NUM_PROC_OCE=${OCE_PROC_MPI} # for backward compatibility 
    920     NUM_PROC_ATM=${ATM_PROC_MPI} # for backward compatibility 
    921   else # BATCH_NUM_PROC_TOT="" 
    922     if ( ${OK_PARA_MPI} ) ; then 
    923       IGCM_debug_Exit "ERROR : missing value for ${BATCH_NUM_PROC_TOT} processors," 
    924       IGCM_debug_Print 2 "You have parallel parameters in config->Executable->list." 
    925       IGCM_debug_Print 2 "Please add BATCH_NUM_PROC_TOT variable in job header as well." 
    926       IGCM_debug_Exit "Exit now." 
    927       IGCM_debug_Verif_Exit 
    928     else 
    929       # sequential case ! 
    930       if [ ${PROCESSUS_NUMBER} -eq 0 ] ; then 
    931         (( PROCESSUS_NUMBER = 1 )) 
    932         IGCM_debug_Print 2 "PROCESSUS_NUMBER is all 0 (sequential use of old definition in config->Executable->list)." 
    933         IGCM_debug_Print 2 "We set it to 1." 
    934       fi 
    935     fi 
    936   fi 
    937  
    938   IGCM_debug_Print 1 "MPI/OMP treatment PROCESSUS_NUMBER = ${PROCESSUS_NUMBER}" 
    939  
    940   IGCM_sys_build_execution_scripts 
     940 
     941    IGCM_debug_Print 1 "MPI/OMP treatment PROCESSUS_NUMBER = ${PROCESSUS_NUMBER}" 
     942 
     943    IGCM_sys_build_execution_scripts 
     944  fi 
    941945 
    942946  ExecutionFail=false 
Note: See TracChangeset for help on using the changeset viewer.