Changeset 1300 for trunk/libIGCM/libIGCM_sys/libIGCM_sys_default.ksh
- Timestamp:
- 03/11/16 13:02:10 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libIGCM/libIGCM_sys/libIGCM_sys_default.ksh
r1286 r1300 62 62 63 63 #===================================================== 64 # Host and user names64 # Host user names project maxCpuTime 65 65 # $hostname ou hostname 66 66 typeset HOST=${HOST:=$( hostname )} … … 136 136 137 137 #==================================================== 138 #- set PackDefault to false on obelix138 #- set PackDefault to false on the default machine 139 139 PackDefault=false 140 141 #====================================================142 #- Number of core per node (max number of OpenMP task)143 NUM_COREPERNODE=1144 140 145 141 #==================================================== … … 147 143 #- required for backward compatibility 148 144 #- 149 DEFAULT_NUM_PROC_OCE= 1145 DEFAULT_NUM_PROC_OCE=5 150 146 DEFAULT_NUM_PROC_CPL=1 151 147 (( DEFAULT_NUM_PROC_ATM = BATCH_NUM_PROC_TOT - DEFAULT_NUM_PROC_OCE - DEFAULT_NUM_PROC_CPL )) … … 334 330 fi 335 331 336 \ tree -f $@332 \ls -lR ${@} 337 333 338 334 IGCM_debug_PopStack "IGCM_sys_Tree" … … 393 389 echo "IGCM_sys_RmRunDir :" $@ 394 390 IGCM_debug_Print 1 "Dummy call, let the user do that." 395 # IGCM_sys_Rm -rf ${RUN_DIR}396 391 fi 397 392 IGCM_debug_PopStack "IGCM_sys_RmRunDir" … … 688 683 #D-#================================================== 689 684 #D-function IGCM_sys_sync 690 #D-* Purpose: flush buffer on disk (dummy function on Ada)685 #D-* Purpose: flush buffer on disk (dummy function on Default machine) 691 686 #D-* Examples: 692 687 #D- … … 721 716 #D- Other specifications 722 717 # -------------------------------------------------------------------- 718 719 ulimit -s unlimited 723 720 724 721 IGCM_debug_PopStack "IGCM_sys_activ_variables" … … 817 814 #D- 818 815 function IGCM_sys_build_run_file { 819 820 816 IGCM_debug_Print 3 " dummy function : IGCM_sys_build_run_file " 821 822 817 } 823 818 … … 837 832 fi 838 833 839 typeset nodes listnodes init_node start_num init_exec comp ExeNameIn ExeNameOut 840 typeset node_num_current node_current comp_proc_mpi_loc comp_proc_omp_loc 841 typeset num_corempi nombre_restant_node nombre_restant_comp 842 834 EXECUTION=${HOST_MPIRUN_COMMAND} 835 836 # MPMD mode 843 837 if ( ${OK_PARA_MPMD} ) ; then 844 838 845 if [ -f run_file ] ; then 846 IGCM_sys_Rm -f run_file 847 fi 848 touch run_file 849 850 if ( ${OK_PARA_OMP} ) ; then 839 # Only MPI (MPMD) 840 if ( ! ${OK_PARA_OMP} ) ; then 841 842 if [ -f run_file ] ; then 843 IGCM_sys_Rm -f run_file 844 fi 845 touch run_file 846 847 # Build run_file 848 849 # First loop on the components for the coupler ie oasis (only if oasis3) 850 # the coupler ie oasis3 must be the first one 851 for comp in ${config_ListOfComponents[*]} ; do 852 853 eval ExeNameIn=\${config_Executable_${comp}[0]} 854 eval ExeNameOut=\${config_Executable_${comp}[1]} 855 856 # for CPL component only 857 if [ "X${comp}" = "XCPL" ] && [ "X${ExeNameOut}" != X\"\" ] ; then 858 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 859 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 860 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 861 fi 862 done 863 864 # Then second loop on the components 865 for comp in ${config_ListOfComponents[*]} ; do 866 867 eval ExeNameIn=\${config_Executable_${comp}[0]} 868 eval ExeNameOut=\${config_Executable_${comp}[1]} 869 870 # Only if we really have an executable for the component and not the coupler ie oasis: 871 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 872 873 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 874 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 875 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 876 fi 877 done 878 879 EXECUTION="${HOST_MPIRUN_COMMAND} --app ./run_file" 880 881 IGCM_sys_Chmod u+x run_file 882 if ( $DEBUG_sys ) ; then 883 echo "run_file contains : " 884 cat run_file 885 fi 886 887 # MPI-OpenMP (MPMD) 888 else 851 889 852 890 # Hosts treatment 853 854 891 ${HOST_MPIRUN_COMMAND} hostname | sort | uniq > hosts.tmp 855 892 856 893 i=0 857 894 rm -f hosts 858 IGCM_debug_Print 1 "sys $( hostname ), Hosts avai ble :"895 IGCM_debug_Print 1 "sys $( hostname ), Hosts available :" 859 896 for nodes in `cat hosts.tmp` ; do 860 897 host[$i]=$nodes … … 870 907 871 908 # Initialisation 872 873 init_node=y 874 node_num_current=0 875 start_num=0 909 rank=0 910 current_core=0 911 core_per_node=4 876 912 init_exec=n 877 913 878 # Build run_file 879 880 # First loop on the components for the coupler ie oasis (only if oasis3) 881 # the coupler ie oasis3 must be the first one 914 # Loop on the components 882 915 for comp in ${config_ListOfComponents[*]} ; do 883 916 884 if [ "X${comp}" = "XCPL" ] ; then 885 886 eval ExeNameIn=\${config_Executable_${comp}[0]} 887 eval ExeNameOut=\${config_Executable_${comp}[1]} 917 eval ExeNameIn=\${config_Executable_${comp}[0]} 918 eval ExeNameOut=\${config_Executable_${comp}[1]} 919 920 # Not possible if oasis has an executable (i.e old version of oasis3) 921 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" = "XCPL" ] ) ; then 922 IGCM_debug_Exit "ERROR MPMD with hybrid MPI-OpenMP is not available with oasis3 version" 923 IGCM_debug_Print 2 "Only available with oasis3-MCT version coupler" 924 IGCM_debug_Verif_Exit 925 fi 926 927 # Only if we really have an executable for the component : 928 if [ "X${ExeNameOut}" != X\"\" ] ; then 929 930 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 931 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 888 932 889 933 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 890 934 echo "" >> script_${ExeNameOut}.ksh 891 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 892 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 893 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 894 echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh 935 if [ ${comp_proc_omp_loc} -gt 1 ] ; then 936 937 # Check if the number of threads is correct 938 case ${comp_proc_omp_loc} in 939 2|4) 940 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 941 ;; 942 *) 943 IGCM_debug_Exit "ERROR with OMP parameters !" 944 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 945 IGCM_debug_Print 2 "Only 2,4 as number of OMP threads are possible " 946 IGCM_debug_Verif_Exit 947 ;; 948 esac 949 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 950 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 951 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 952 echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 953 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 954 fi 955 #echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh 956 #echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 957 echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 895 958 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 896 959 897 init_node=n 898 899 (( nombre_restant_node = NUM_COREPERNODE - 1 )) 900 node_num_current=0 901 node_current=${host[${node_num_current}]} 902 903 EXECUTION="${EXECUTION} -H ${node_current} -np 1 ./script_${ExeNameOut}.ksh" 904 905 init_exec=y 906 start_num=1 907 908 fi 909 910 done 911 912 # Then loop on the components (except for oasis) 913 914 for comp in ${config_ListOfComponents[*]} ; do 915 916 eval ExeNameIn=\${config_Executable_${comp}[0]} 917 eval ExeNameOut=\${config_Executable_${comp}[1]} 918 919 # Only if we really have an executable for the component : 920 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 921 922 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 923 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 924 925 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 926 # echo "set -vx" >> script_${ExeNameOut}.ksh 927 echo "" >> script_${ExeNameOut}.ksh 928 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 929 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 930 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 931 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 932 echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh 933 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 934 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 935 936 node_num=0 937 938 # We define the number of MPI process to be assigned for the component 939 940 nombre_restant_comp=${comp_proc_mpi_loc} 941 942 # Loop on the allocated nodes 943 944 for node in ${listnodes} ; do 945 946 # We go to the current node 947 if [ ${node_num} = ${node_num_current} ] ; then 948 949 node_current=${host[${node_num_current}]} 950 951 # If first time on the node : initialisation 952 953 if [ ${init_node} = y ] ; then 954 nombre_restant_node=${NUM_COREPERNODE} 955 fi 956 957 # Test on the number of OMP threads 958 959 if [ ${comp_proc_omp_loc} -gt ${nombre_restant_node} ] ; then 960 (( node_num = node_num + 1 )) 961 node_num_current=${node_num} 962 init_node=y 963 continue 964 fi 965 966 # Number of MPI process to assign 967 968 (( num_corempi = nombre_restant_node / comp_proc_omp_loc )) 969 970 if [ ${num_corempi} -gt ${nombre_restant_comp} ] ; then 971 num_corempi=${nombre_restant_comp} 972 fi 973 974 (( nombre_restant_node = nombre_restant_node - num_corempi * comp_proc_omp_loc )) 975 (( nombre_restant_comp = nombre_restant_comp - num_corempi )) 976 977 if [ ${init_exec} = y ] ; then 978 EXECUTION="${EXECUTION} : -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 979 else 980 EXECUTION="${EXECUTION} -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 981 init_exec=y 982 fi 983 984 (( start_num = num_corempi + start_num )) 985 986 else 987 988 (( node_num = node_num + 1 )) 989 continue 990 fi 991 992 # Test on the number of core/process remaining on the node/component 993 994 if [ ${nombre_restant_node} = 0 ] ; then 995 (( node_num = node_num + 1 )) 996 node_num_current=${node_num} 997 init_node=y 998 999 if [ ${nombre_restant_comp} = 0 ] ; then 1000 break 1 1001 fi 1002 else 1003 1004 node_num_current=${node_num} 1005 init_node=n 1006 1007 if [ ${nombre_restant_comp} = 0 ] ; then 1008 break 1 1009 fi 1010 fi 960 if [ ${init_exec} = y ] ; then 961 EXECUTION="${EXECUTION} : -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 962 else 963 EXECUTION="${EXECUTION} -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 964 init_exec=y 965 fi 966 967 # Build rankfile : method used to assign cores and nodes for the MPI process 968 # Ex : 969 #rank 0=curie5296 slot=0,1,2,3 970 #rank 1=curie5296 slot=4,5,6,7 971 # Example of final command : 972 # mpirun -hostfile hosts -rankfile rankfile -np 27 ./script_lmdz.x.ksh : -np 5 ./script_opa.xx.ksh 973 # with script_lmdz.x.ksh : 974 # #!/bin/ksh 975 #export OMP_STACKSIZE=200M 976 #export OMP_NUM_THREADS=4 977 #./lmdz.x 978 979 for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 980 (( index_host = current_core / core_per_node )) 981 host_value=${host[${index_host}]} 982 (( slot = current_core % core_per_node )) 983 virg="," 984 string_final="" 985 for index in `seq $slot $(($slot+$comp_proc_omp_loc-1))`; do 986 string=$index$virg 987 string_final=$string_final$string 988 done 989 string_final=$( echo $string_final | sed "s/.$//" ) 990 echo "rank $rank=$host_value slot=$string_final" >> rankfile 991 (( rank = rank + 1 )) 992 (( current_core = current_core + comp_proc_omp_loc )) 1011 993 done 1012 994 fi 1013 995 done 1014 1015 else 1016 1017 # Then first loop on the components for the coupler ie oasis 1018 1019 ## the coupler ie oasis must be the first one 1020 for comp in ${config_ListOfComponents[*]} ; do 1021 1022 eval ExeNameOut=\${config_Executable_${comp}[1]} 1023 1024 # for CPL component only 1025 if [ "X${comp}" = "XCPL" ] ; then 1026 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1027 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 1028 fi 1029 done 1030 1031 # Then second loop on the components 1032 1033 for comp in ${config_ListOfComponents[*]} ; do 1034 1035 eval ExeNameOut=\${config_Executable_${comp}[1]} 1036 1037 # Only if we really have an executable for the component and not the coupler ie oasis: 1038 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1039 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1040 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 1041 fi 1042 done 1043 IGCM_sys_Chmod u+x run_file 1044 1045 EXECUTION="${HOST_MPIRUN_COMMAND} --app ./run_file" 1046 1047 fi 1048 1049 else # Only one executable. launch it. 996 fi 997 998 # Only one executable (SPMD mode). 999 else 1050 1000 1051 1001 for comp in ${config_ListOfComponents[*]} ; do … … 1059 1009 if ( ${OK_PARA_OMP} ) ; then 1060 1010 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1061 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1011 echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 1012 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1062 1013 fi 1063 1014 if ( ${OK_PARA_MPI} ) ; then … … 1122 1073 echo "IGCM_sys_check_quota" 1123 1074 fi 1075 1076 # TO BE IMPLEMENTED 1077 1124 1078 IGCM_debug_PopStack "IGCM_sys_check_quota" 1125 1079 } … … 1152 1106 fi 1153 1107 1108 # N/A content 1154 1109 eval ${1}="N/A" 1155 1110 1156 1111 IGCM_debug_PopStack "IGCM_sys_getJobSchedulerID" 1112 } 1113 1114 #D-#================================================== 1115 #D-function IGCM_sys_GetJobID 1116 #D-* Purpose: Get the job ID from the JobName 1117 #D-* Examples: IGCM_sys_GetJobID ${JobName} ${TargetUsr} JobID 1118 #D- 1119 function IGCM_sys_GetJobID { 1120 IGCM_debug_PushStack "IGCM_sys_GetJobID" 1121 if ( $DEBUG_sys ) ; then 1122 echo "IGCM_sys_GetJobID" 1123 fi 1124 1125 # N/A content 1126 eval ${3}="N/A" 1127 IGCM_debug_PopStack "IGCM_sys_GetJobID" 1157 1128 } 1158 1129
Note: See TracChangeset
for help on using the changeset viewer.