Ignore:
Timestamp:
09/03/15 14:30:39 (9 years ago)
Author:
sdipsl
Message:
  • MPI/OMP handling refactoring
  • IGCM_config_ConfigureExexution will define the MPMD/SPMDO/MPI/OMP context
  • 6 execution types are introduced and will be documented later
  • prerequisite to have ins_job editing the headers
  • IGCM_sys_build_execution_scripts will be adapted later
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/libIGCM/libIGCM_config/libIGCM_config.ksh

    r1220 r1230  
    571571 
    572572#=================================== 
     573function IGCM_config_ConfigureExexution 
     574{ 
     575  IGCM_debug_PushStack " IGCM_config_ConfigureExexution" 
     576 
     577  echo 
     578  IGCM_debug_Print 1 " IGCM_config_ConfigureExexution" 
     579  echo 
     580 
     581  typeset ExeNameIn ExeNameFirst CompNameFirst comp i 
     582  typeset tempvar tempvarMPI tempvarNOD NbElts NbExec 
     583  typeset executionType 
     584 
     585  PROCESSUS_NUMBER=0 
     586  NbExec=0 
     587 
     588  OK_PARA_MPI=false 
     589  OK_PARA_OMP=false 
     590  OK_PARA_NOD=false 
     591  OK_PARA_MPMD=false 
     592 
     593  for comp in ${config_ListOfComponents[*]} ; do 
     594 
     595    IGCM_debug_Print 1 ${comp} 
     596 
     597    eval ExeNameIn=\${config_Executable_${comp}[0]} 
     598 
     599    # NO order in config.card for parallelized values ! 
     600    # just use suffix : MPI , OMP and NOD (for number of NODes.) 
     601 
     602    # NOD is the number of NODes allocated 
     603    eval ${comp}_PROC_NOD=0 
     604 
     605    # MPI is the number of MPI processus per nodes 
     606    eval ${comp}_PROC_MPI=0 
     607 
     608    # OMP is the number of OpenMP threads per MPI processus 
     609    eval ${comp}_PROC_OMP=0 
     610 
     611    # Only if we really have an executable for the component : 
     612    if ( [ "X${ExeNameIn}" != X\"\" ] && [ "X${ExeNameIn}" != "Xinca.dat" ] ) ; then 
     613 
     614      # Keep the first executable found and the first CompName 
     615      ExeNameFirst=${ExeNameIn} 
     616      CompNameFirst=${comp} 
     617 
     618      # Are we a second executable? 
     619      (( NbExec = NbExec + 1 )) 
     620 
     621      # set 1 MPI task, 1 OpenMP thread and 1 node as default 
     622      eval ${comp}_PROC_MPI=1 
     623      eval ${comp}_PROC_OMP=1 
     624      eval ${comp}_PROC_NOD=1 
     625 
     626      eval NbElts=\${#config_Executable_${comp}[@]} 
     627 
     628      if [ ${NbElts} -ge 2 ] ; then 
     629        # 
     630        # CURRENT METHOD TO SPECIFY MPI AND OMP RESSOURCES 
     631        # 
     632        i=2 
     633        while [ ${i} -lt ${NbElts} ] ; do 
     634          eval tempvar=\${config_Executable_${comp}[${i}]} 
     635          IGCM_debug_Print 2 ${tempvar} 
     636 
     637          if [ X${tempvar} = X ] ; then 
     638            IGCM_debug_Print 2 "Error reading MPI/OMP parameters !!!" 
     639            IGCM_debug_Exit "Check your config.card. Exit now" 
     640            IGCM_debug_Verif_Exit 
     641          fi 
     642 
     643          case ${tempvar} in 
     644          *[mM][pP][iI]*) 
     645            # Read MPI parameter for composante 
     646            eval ${comp}_PROC_MPI=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/MPI//" ) 
     647            OK_PARA_MPI=true;; 
     648          *[oO][mM][pP]*) 
     649            # Read OMP parameter for composante 
     650            eval ${comp}_PROC_OMP=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/OMP//" ) 
     651            ;; 
     652          *[nN][oO][dD]*) 
     653            # Read NOD (NumBer of Nodes) parameter for composante 
     654            eval ${comp}_PROC_NOD=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/NOD//" ) 
     655            OK_PARA_NOD=true 
     656            OK_PARA_MPI=true 
     657            ;; 
     658          esac 
     659          (( i = i + 1 )) 
     660        done 
     661      else 
     662        # 
     663        # BACKWARD COMPATIBILITY 
     664        # 
     665        IGCM_debug_Print 2 "Use default number of MPI tasks for this machine : " 
     666        IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_OCE} for OCE" 
     667        IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_CPL} for CPL" 
     668        IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_ATM} for ATM" 
     669        OK_PARA_MPI=true 
     670        CPL_PROC_MPI=${DEFAULT_NUM_PROC_CPL} 
     671        OCE_PROC_MPI=${DEFAULT_NUM_PROC_OCE} 
     672        ATM_PROC_MPI=${DEFAULT_NUM_PROC_ATM} 
     673      fi 
     674      eval tempvarMPI=\${${comp}_PROC_MPI} 
     675      eval tempvarNOD=\${${comp}_PROC_NOD} 
     676      eval tempvarOMP=\${${comp}_PROC_OMP} 
     677 
     678      # set OMP mode if more than 1 OMP thread. 
     679      [ ${tempvarOMP} -ge 2 ] && OK_PARA_OMP=true 
     680 
     681      # SUM UP TOTAL NUMBER OF CORES 
     682      (( PROCESSUS_NUMBER = PROCESSUS_NUMBER + tempvarMPI * tempvarNOD * tempvarOMP )) 
     683    fi 
     684  done 
     685 
     686  # set MPMD mode if more than 2 executable names. 
     687  [ ${NbExec} -ge 2 ] && OK_PARA_MPMD=true   
     688 
     689  # Define the execution type we are running in 
     690  if [ ${OK_PARA_MPMD} ] ; then 
     691    # MPMD always implies MPI 
     692    if [ ${OK_PARA_MPI} ] ; then 
     693      executionType=1 
     694    fi 
     695    # MPMD + MPI/OMP 
     696    if [ ${OK_PARA_OMP} ] ; then 
     697      executionType=2 
     698    fi 
     699  else 
     700    # SPMD + MPI/OMP 
     701    if ( [ ${OK_PARA_MPI} ] && [ ${OK_PARA_OMP} ] ) ; then 
     702      executionType=3 
     703      # SPMD + MPI only 
     704    elif ( [ ${OK_PARA_MPI} ] && [ ! ${OK_PARA_OMP} ] ) ; then 
     705      executionType=4 
     706      # SPMD + OMP only 
     707    elif ( [ ! ${OK_PARA_MPI} ] && [ ${OK_PARA_OMP} ] ) ; then 
     708      executionType=5 
     709      # SEQUENTIAL THEN 
     710    elif ( [ ! ${OK_PARA_MPI} ] && [ ! ${OK_PARA_OMP} ] ) ; then 
     711      executionType=6 
     712      PROCESSUS_NUMBER=1 
     713    fi 
     714  fi 
     715 
     716  IGCM_debug_Print 1 "MPI/OMP treatment PROCESSUS_NUMBER = ${PROCESSUS_NUMBER}" 
     717 
     718  IGCM_debug_PopStack "IGCM_config_ConfigureExexution" 
     719} 
     720 
     721#=================================== 
    573722function IGCM_config_PeriodStart 
    574723{ 
     
    801950 
    802951  if [ ${Period} -eq 1 ]; then 
    803     typeset ExeNameIn ExeNameFirst CompNameFirst comp i j 
    804     typeset tempvar tempvarMPI tempvarNOD NbElts NbExec 
    805  
    806     PROCESSUS_NUMBER=0 
    807     NbExec=0 
    808     i=0 
    809  
    810     OK_PARA_MPI=false 
    811     OK_PARA_OMP=false 
    812     OK_PARA_NOD=false 
    813     OK_PARA_MPMD=false 
    814  
    815     for comp in ${config_ListOfComponents[*]} ; do 
    816  
    817       IGCM_debug_Print 1 ${comp} 
    818  
    819       eval ExeNameIn=\${config_Executable_${comp}[0]} 
    820  
    821       # NO order in config.card for parallelized values ! 
    822       # just use suffix : MPI , OMP and NOD (for number of NODes.) 
    823  
    824       # NOD is the number of NODes allocated 
    825       eval ${comp}_PROC_NOD=0 
    826  
    827       # MPI is the number of MPI processus per nodes 
    828       eval ${comp}_PROC_MPI=0 
    829  
    830       # OMP is the number of OpenMP threads per MPI processus 
    831       eval ${comp}_PROC_OMP=0 
    832  
    833       # Only if we really have an executable for the component : 
    834       if ( [ "X${ExeNameIn}" != X\"\" ] && [ "X${ExeNameIn}" != "Xinca.dat" ] ) ; then 
    835  
    836         # Keep the first executable found and the first CompName 
    837         ExeNameFirst=${ExeNameIn} 
    838         CompNameFirst=${comp} 
    839  
    840         # Are we a second executable? 
    841         (( NbExec = NbExec + 1 )) 
    842  
    843         # set 1 MPI task, 1 OpenMP thread and 1 node as default 
    844         eval ${comp}_PROC_MPI=1 
    845         eval ${comp}_PROC_OMP=1 
    846         eval ${comp}_PROC_NOD=1 
    847  
    848         eval NbElts=\${#config_Executable_${comp}[@]} 
    849  
    850         if [ ${NbElts} -ge 2 ] ; then 
    851           (( j = 2 )) 
    852           while [ $j -lt ${NbElts} ] ; do 
    853             eval tempvar=\${config_Executable_${comp}[${j}]} 
    854             IGCM_debug_Print 2 ${tempvar} 
    855  
    856             if [ X${tempvar} = X ] ; then 
    857               IGCM_debug_Print 2 "Error reading MPI/OMP parameters !!!" 
    858               IGCM_debug_Exit "Check your config.card. Exit now" 
    859               IGCM_debug_Verif_Exit 
    860             fi 
    861  
    862             case ${tempvar} in 
    863             *[mM][pP][iI]*) 
    864               # Read MPI parameter for composante 
    865               eval ${comp}_PROC_MPI=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/MPI//" ) 
    866               OK_PARA_MPI=true;; 
    867             *[oO][mM][pP]*) 
    868               # Read OMP parameter for composante 
    869               eval ${comp}_PROC_OMP=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/OMP//" ) 
    870               ;; 
    871             *[nN][oO][dD]*) 
    872               # Read NOD (NumBer of Nodes) parameter for composante 
    873               eval ${comp}_PROC_NOD=$( echo ${tempvar} | tr '[a-z]' '[A-Z]' | sed -e "s/NOD//" ) 
    874               OK_PARA_NOD=true 
    875               OK_PARA_MPI=true;; 
    876             esac 
    877             (( j = j + 1 )) 
    878           done 
    879         fi 
    880         eval tempvarMPI=\${${comp}_PROC_MPI} 
    881         eval tempvarNOD=\${${comp}_PROC_NOD} 
    882         eval tempvarOMP=\${${comp}_PROC_OMP} 
    883  
    884         # set OMP mode if more than 1 OMP thread. 
    885         [ ${tempvarOMP} -ge 2 ] && OK_PARA_OMP=true 
    886  
    887         (( PROCESSUS_NUMBER = PROCESSUS_NUMBER + tempvarMPI * tempvarNOD * tempvarOMP )) 
    888       fi 
    889       (( i=i+1 )) 
    890     done 
    891  
    892     # set MPMD mode if more than 2 executable names. 
    893     [ ${NbExec} -ge 2 ] && OK_PARA_MPMD=true 
    894  
    895     # Verification of BATCH_NUM_PROC_TOT total number of processors set in job header. 
    896     if [ X${BATCH_NUM_PROC_TOT} != X ] ; then 
    897       # BATCH_NUM_PROC_TOT is set 
    898       if ( ${OK_PARA_MPI} ) ; then 
    899         IGCM_debug_Print 1 "MPI/OMP/NOD found into config.card and BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 
    900       else 
    901         # with previous method. 
    902         if [ ${BATCH_NUM_PROC_TOT} -gt 1 ] ; then 
    903           # with more than 1 proc 
    904           if ( ${OK_PARA_MPMD} ) ; then 
    905             # with MPMD ie CPL/oasis method 
    906             IGCM_debug_Print 2 "Use default number of MPI tasks for this machine : " 
    907             IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_OCE} for OCE" 
    908             IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_CPL} for CPL" 
    909             IGCM_debug_Print 2 "${DEFAULT_NUM_PROC_ATM} for ATM" 
    910             OK_PARA_MPI=true 
    911             CPL_PROC_MPI=${DEFAULT_NUM_PROC_CPL} 
    912             OCE_PROC_MPI=${DEFAULT_NUM_PROC_OCE} 
    913             ATM_PROC_MPI=${DEFAULT_NUM_PROC_ATM} 
    914             PROCESSUS_NUMBER=${DEFAULT_NUM_PROC_TOTAL} 
    915           else 
    916             # with have only one executable 
    917             IGCM_debug_Print 2 "Use ${BATCH_NUM_PROC_TOT} MPI tasks for ${CompNameFirst} : ${ExeNameFirst} " 
    918             OK_PARA_MPI=true 
    919             eval ${CompNameFirst}_PROC_MPI=${BATCH_NUM_PROC_TOT} 
    920             PROCESSUS_NUMBER=${BATCH_NUM_PROC_TOT} 
    921           fi 
    922         else 
    923           PROCESSUS_NUMBER=1 
    924         fi 
    925       fi 
    926       # Verification with PBS parameter 
    927       if [ ${BATCH_NUM_PROC_TOT} -ne ${PROCESSUS_NUMBER} ] ; then 
    928         IGCM_debug_Exit "ERROR with parallelization parameters !" 
    929         IGCM_debug_Print 2 "Job header variable BATCH_NUM_PROC_TOT = ${BATCH_NUM_PROC_TOT} " 
    930         IGCM_debug_Print 2 "is the total number of _processors_ reserved." 
    931         IGCM_debug_Print 2 "It is not equal to the sum of _processus_  = ${PROCESSUS_NUMBER}." 
    932         IGCM_debug_Verif_Exit 
    933       fi 
    934       NUM_PROC_CPL=${CPL_PROC_MPI} # for backward compatibility 
    935       NUM_PROC_OCE=${OCE_PROC_MPI} # for backward compatibility 
    936       NUM_PROC_ATM=${ATM_PROC_MPI} # for backward compatibility 
    937     else # BATCH_NUM_PROC_TOT="" 
    938       if ( ${OK_PARA_MPI} ) ; then 
    939         IGCM_debug_Exit "ERROR : missing value for ${BATCH_NUM_PROC_TOT} processors," 
    940         IGCM_debug_Print 2 "You have parallel parameters in config->Executable->list." 
    941         IGCM_debug_Print 2 "Please add BATCH_NUM_PROC_TOT variable in job header as well." 
    942         IGCM_debug_Exit "Exit now." 
    943         IGCM_debug_Verif_Exit 
    944       else 
    945         # sequential case ! 
    946         if [ ${PROCESSUS_NUMBER} -eq 0 ] ; then 
    947           (( PROCESSUS_NUMBER = 1 )) 
    948           IGCM_debug_Print 2 "PROCESSUS_NUMBER is all 0 (sequential use of old definition in config->Executable->list)." 
    949           IGCM_debug_Print 2 "We set it to 1." 
    950         fi 
    951       fi 
    952     fi 
    953  
    954     IGCM_debug_Print 1 "MPI/OMP treatment PROCESSUS_NUMBER = ${PROCESSUS_NUMBER}" 
    955  
     952    # Define the execution context (MPMD, SPMD, MPI/OMP ...) 
     953    IGCM_config_ConfigureExexution 
     954    # Create the execution script for the current context 
    956955    IGCM_sys_build_execution_scripts 
    957956  fi 
Note: See TracChangeset for help on using the changeset viewer.