Changeset 1300 for trunk/libIGCM/libIGCM_sys/libIGCM_sys_obelix.ksh
- Timestamp:
- 03/11/16 13:02:10 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/libIGCM/libIGCM_sys/libIGCM_sys_obelix.ksh
r1286 r1300 62 62 63 63 #===================================================== 64 # Host and user names64 # Host user names project maxCpuTime 65 65 # $hostname ou hostname 66 66 typeset HOST=${HOST:=$( hostname )} … … 148 148 149 149 #==================================================== 150 #- Number of core per node (max number of OpenMP task)151 NUM_COREPERNODE=4152 153 #====================================================154 150 #- Default number of MPI task for IPSL coupled model 155 151 #- required for backward compatibility 156 152 #- 157 DEFAULT_NUM_PROC_OCE= 1153 DEFAULT_NUM_PROC_OCE=5 158 154 DEFAULT_NUM_PROC_CPL=1 159 155 (( DEFAULT_NUM_PROC_ATM = BATCH_NUM_PROC_TOT - DEFAULT_NUM_PROC_OCE - DEFAULT_NUM_PROC_CPL )) … … 342 338 fi 343 339 344 \ tree -f $@340 \ls -lR ${@} 345 341 346 342 IGCM_debug_PopStack "IGCM_sys_Tree" … … 707 703 #D-#================================================== 708 704 #D-function IGCM_sys_sync 709 #D-* Purpose: flush buffer on disk (dummy function on Ada)705 #D-* Purpose: flush buffer on disk (dummy function on Obelix) 710 706 #D-* Examples: 711 707 #D- … … 740 736 #D- Other specifications 741 737 # -------------------------------------------------------------------- 738 739 ulimit -s unlimited 742 740 743 741 IGCM_debug_PopStack "IGCM_sys_activ_variables" … … 836 834 #D- 837 835 function IGCM_sys_build_run_file { 838 839 836 IGCM_debug_Print 3 " dummy function : IGCM_sys_build_run_file " 840 841 837 } 842 838 … … 856 852 fi 857 853 858 typeset nodes listnodes init_node start_num init_exec comp ExeNameIn ExeNameOut 859 typeset node_num_current node_current comp_proc_mpi_loc comp_proc_omp_loc 860 typeset num_corempi nombre_restant_node nombre_restant_comp 861 854 EXECUTION=${HOST_MPIRUN_COMMAND} 855 856 # MPMD mode 862 857 if ( ${OK_PARA_MPMD} ) ; then 863 858 864 if [ -f run_file ] ; then 865 IGCM_sys_Rm -f run_file 866 fi 867 touch run_file 868 869 if ( ${OK_PARA_OMP} ) ; then 859 # Only MPI (MPMD) 860 if ( ! ${OK_PARA_OMP} ) ; then 861 862 if [ -f run_file ] ; then 863 IGCM_sys_Rm -f run_file 864 fi 865 touch run_file 866 867 # Build run_file 868 869 # First loop on the components for the coupler ie oasis (only if oasis3) 870 # the coupler ie oasis3 must be the first one 871 for comp in ${config_ListOfComponents[*]} ; do 872 873 eval ExeNameIn=\${config_Executable_${comp}[0]} 874 eval ExeNameOut=\${config_Executable_${comp}[1]} 875 876 # for CPL component only 877 if [ "X${comp}" = "XCPL" ] && [ "X${ExeNameOut}" != X\"\" ] ; then 878 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 879 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 880 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 881 fi 882 done 883 884 # Then second loop on the components 885 for comp in ${config_ListOfComponents[*]} ; do 886 887 eval ExeNameIn=\${config_Executable_${comp}[0]} 888 eval ExeNameOut=\${config_Executable_${comp}[1]} 889 890 # Only if we really have an executable for the component and not the coupler ie oasis: 891 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 892 893 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 894 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 895 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 896 fi 897 done 898 899 EXECUTION="${HOST_MPIRUN_COMMAND} --app ./run_file" 900 901 IGCM_sys_Chmod u+x run_file 902 if ( $DEBUG_sys ) ; then 903 echo "run_file contains : " 904 cat run_file 905 fi 906 907 # MPI-OpenMP (MPMD) 908 else 870 909 871 910 # Hosts treatment 872 873 911 ${HOST_MPIRUN_COMMAND} hostname | sort | uniq > hosts.tmp 874 912 875 913 i=0 876 914 rm -f hosts 877 IGCM_debug_Print 1 "sys Obelix, Hosts avai ble :"915 IGCM_debug_Print 1 "sys Obelix, Hosts available :" 878 916 for nodes in `cat hosts.tmp` ; do 879 917 host[$i]=$nodes … … 889 927 890 928 # Initialisation 891 892 init_node=y 893 node_num_current=0 894 start_num=0 929 rank=0 930 current_core=0 931 core_per_node=6 895 932 init_exec=n 896 933 897 # Build run_file 898 899 # First loop on the components for the coupler ie oasis (only if oasis3) 900 # the coupler ie oasis3 must be the first one 934 # Loop on the components 901 935 for comp in ${config_ListOfComponents[*]} ; do 902 936 903 if [ "X${comp}" = "XCPL" ] ; then 904 905 eval ExeNameIn=\${config_Executable_${comp}[0]} 906 eval ExeNameOut=\${config_Executable_${comp}[1]} 937 eval ExeNameIn=\${config_Executable_${comp}[0]} 938 eval ExeNameOut=\${config_Executable_${comp}[1]} 939 940 # Not possible if oasis has an executable (i.e old version of oasis3) 941 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" = "XCPL" ] ) ; then 942 IGCM_debug_Exit "ERROR MPMD with hybrid MPI-OpenMP is not available with oasis3 version" 943 IGCM_debug_Print 2 "Only available with oasis3-MCT version coupler" 944 IGCM_debug_Verif_Exit 945 fi 946 947 # Only if we really have an executable for the component : 948 if [ "X${ExeNameOut}" != X\"\" ] ; then 949 950 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 951 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 907 952 908 953 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 909 954 echo "" >> script_${ExeNameOut}.ksh 910 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 911 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 912 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 913 echo "./${ExeNameOut} > out_${ExeNameOut}.out 2>out_${ExeNameOut}.err" >> script_${ExeNameOut}.ksh 955 if [ ${comp_proc_omp_loc} -gt 1 ] ; then 956 957 # Check if the number of threads is correct 958 case ${comp_proc_omp_loc} in 959 2|4|6) 960 IGCM_debug_Print 1 "You run ${ExeNameOut} on ${comp_proc_omp_loc} OMP threads" 961 ;; 962 *) 963 IGCM_debug_Exit "ERROR with OMP parameters !" 964 IGCM_debug_Print 2 "${comp_proc_omp_loc} is not possible as number of OMP threads" 965 IGCM_debug_Print 2 "Only 2,4,6 as number of OMP threads are possible " 966 IGCM_debug_Verif_Exit 967 ;; 968 esac 969 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 970 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 971 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 972 echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 973 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 974 fi 975 #echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh 976 #echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 977 echo "./${ExeNameOut}" >> script_${ExeNameOut}.ksh 914 978 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 915 979 916 init_node=n 917 918 (( nombre_restant_node = NUM_COREPERNODE - 1 )) 919 node_num_current=0 920 node_current=${host[${node_num_current}]} 921 922 EXECUTION="${EXECUTION} -H ${node_current} -np 1 ./script_${ExeNameOut}.ksh" 923 924 init_exec=y 925 start_num=1 926 927 fi 928 929 done 930 931 # Then loop on the components (except for oasis) 932 933 for comp in ${config_ListOfComponents[*]} ; do 934 935 eval ExeNameIn=\${config_Executable_${comp}[0]} 936 eval ExeNameOut=\${config_Executable_${comp}[1]} 937 938 # Only if we really have an executable for the component : 939 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 940 941 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 942 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 943 944 echo "#!/bin/ksh" > script_${ExeNameOut}.ksh 945 # echo "set -vx" >> script_${ExeNameOut}.ksh 946 echo "" >> script_${ExeNameOut}.ksh 947 #echo "export KMP_STACKSIZE=3g" >> script_${ExeNameOut}.ksh 948 #echo "export KMP_LIBRARY=turnaround" >> script_${ExeNameOut}.ksh 949 #echo "export MKL_SERIAL=YES" >> script_${ExeNameOut}.ksh 950 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 951 echo "(( MYMPIRANK = OMPI_COMM_WORLD_RANK - ${start_num})) " >> script_${ExeNameOut}.ksh 952 echo "./${ExeNameOut} > out_${ExeNameOut}.out.\${MYMPIRANK} 2>out_${ExeNameOut}.err.\${MYMPIRANK}" >> script_${ExeNameOut}.ksh 953 IGCM_sys_Chmod u+x script_${ExeNameOut}.ksh 954 955 node_num=0 956 957 # We define the number of MPI process to be assigned for the component 958 959 nombre_restant_comp=${comp_proc_mpi_loc} 960 961 # Loop on the allocated nodes 962 963 for node in ${listnodes} ; do 964 965 # We go to the current node 966 if [ ${node_num} = ${node_num_current} ] ; then 967 968 node_current=${host[${node_num_current}]} 969 970 # If first time on the node : initialisation 971 972 if [ ${init_node} = y ] ; then 973 nombre_restant_node=${NUM_COREPERNODE} 974 fi 975 976 # Test on the number of OMP threads 977 978 if [ ${comp_proc_omp_loc} -gt ${nombre_restant_node} ] ; then 979 (( node_num = node_num + 1 )) 980 node_num_current=${node_num} 981 init_node=y 982 continue 983 fi 984 985 # Number of MPI process to assign 986 987 (( num_corempi = nombre_restant_node / comp_proc_omp_loc )) 988 989 if [ ${num_corempi} -gt ${nombre_restant_comp} ] ; then 990 num_corempi=${nombre_restant_comp} 991 fi 992 993 (( nombre_restant_node = nombre_restant_node - num_corempi * comp_proc_omp_loc )) 994 (( nombre_restant_comp = nombre_restant_comp - num_corempi )) 995 996 if [ ${init_exec} = y ] ; then 997 EXECUTION="${EXECUTION} : -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 998 else 999 EXECUTION="${EXECUTION} -H ${node_current} -np ${num_corempi} ./script_${ExeNameOut}.ksh" 1000 init_exec=y 1001 fi 1002 1003 (( start_num = num_corempi + start_num )) 1004 1005 else 1006 1007 (( node_num = node_num + 1 )) 1008 continue 1009 fi 1010 1011 # Test on the number of core/process remaining on the node/component 1012 1013 if [ ${nombre_restant_node} = 0 ] ; then 1014 (( node_num = node_num + 1 )) 1015 node_num_current=${node_num} 1016 init_node=y 1017 1018 if [ ${nombre_restant_comp} = 0 ] ; then 1019 break 1 1020 fi 1021 else 1022 1023 node_num_current=${node_num} 1024 init_node=n 1025 1026 if [ ${nombre_restant_comp} = 0 ] ; then 1027 break 1 1028 fi 1029 fi 980 if [ ${init_exec} = y ] ; then 981 EXECUTION="${EXECUTION} : -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 982 else 983 EXECUTION="${EXECUTION} -np ${comp_proc_mpi_loc} ./script_${ExeNameOut}.ksh" 984 init_exec=y 985 fi 986 987 # Build rankfile : method used to assign cores and nodes for the MPI process 988 # Ex : 989 #rank 0=curie5296 slot=0,1,2,3 990 #rank 1=curie5296 slot=4,5,6,7 991 # Example of final command : 992 # mpirun -hostfile hosts -rankfile rankfile -np 27 ./script_lmdz.x.ksh : -np 5 ./script_opa.xx.ksh 993 # with script_lmdz.x.ksh : 994 # #!/bin/ksh 995 #export OMP_STACKSIZE=200M 996 #export OMP_NUM_THREADS=4 997 #./lmdz.x 998 999 for nb_proc_mpi in `seq 0 $(($comp_proc_mpi_loc-1))`; do 1000 (( index_host = current_core / core_per_node )) 1001 host_value=${host[${index_host}]} 1002 (( slot = current_core % core_per_node )) 1003 virg="," 1004 string_final="" 1005 for index in `seq $slot $(($slot+$comp_proc_omp_loc-1))`; do 1006 string=$index$virg 1007 string_final=$string_final$string 1008 done 1009 string_final=$( echo $string_final | sed "s/.$//" ) 1010 echo "rank $rank=$host_value slot=$string_final" >> rankfile 1011 (( rank = rank + 1 )) 1012 (( current_core = current_core + comp_proc_omp_loc )) 1030 1013 done 1031 1014 fi 1032 1015 done 1033 1034 else 1035 1036 # Then first loop on the components for the coupler ie oasis 1037 1038 ## the coupler ie oasis must be the first one 1039 for comp in ${config_ListOfComponents[*]} ; do 1040 1041 eval ExeNameOut=\${config_Executable_${comp}[1]} 1042 1043 # for CPL component only 1044 if [ "X${comp}" = "XCPL" ] ; then 1045 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1046 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut} " >> run_file 1047 fi 1048 done 1049 1050 # Then second loop on the components 1051 1052 for comp in ${config_ListOfComponents[*]} ; do 1053 1054 eval ExeNameOut=\${config_Executable_${comp}[1]} 1055 1056 # Only if we really have an executable for the component and not the coupler ie oasis: 1057 if ( [ "X${ExeNameOut}" != X\"\" ] && [ "X${comp}" != "XCPL" ] ) ; then 1058 eval comp_proc_mpi_loc=\${${comp}_PROC_MPI} 1059 echo "-np ${comp_proc_mpi_loc} ./${ExeNameOut}" >> run_file 1060 fi 1061 done 1062 IGCM_sys_Chmod u+x run_file 1063 1064 EXECUTION="${HOST_MPIRUN_COMMAND} --app ./run_file" 1065 1066 fi 1067 1068 else # Only one executable. launch it. 1016 fi 1017 1018 # Only one executable (SPMD mode). 1019 else 1069 1020 1070 1021 for comp in ${config_ListOfComponents[*]} ; do … … 1078 1029 if ( ${OK_PARA_OMP} ) ; then 1079 1030 eval comp_proc_omp_loc=\${${comp}_PROC_OMP} 1080 echo "OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1031 echo "export OMP_STACKSIZE=200M" >> script_${ExeNameOut}.ksh 1032 echo "export OMP_NUM_THREADS=${comp_proc_omp_loc}" >> script_${ExeNameOut}.ksh 1081 1033 fi 1082 1034 if ( ${OK_PARA_MPI} ) ; then … … 1141 1093 echo "IGCM_sys_check_quota" 1142 1094 fi 1095 1096 # TO BE IMPLEMENTED 1097 1143 1098 IGCM_debug_PopStack "IGCM_sys_check_quota" 1144 1099 } … … 1188 1143 1189 1144 # With -f option, the full job name is given in the last column 1190 1145 ID="$( qstat -u $2 | grep -w $1 | gawk '-F ' '{print $10}' )" 1191 1146 1192 1147 eval ${3}=${ID}
Note: See TracChangeset
for help on using the changeset viewer.