101 repro_sum_rel_diff_max_in, &
102 repro_sum_recompute_in, &
107 logical,
intent(in),
optional :: repro_sum_use_ddpdd_in
109 real(r8),
intent(in),
optional :: repro_sum_rel_diff_max_in
112 logical,
intent(in),
optional :: repro_sum_recompute_in
115 logical,
intent(in),
optional :: repro_sum_master
118 integer,
intent(in),
optional :: repro_sum_logunit
123 logical,
save :: firstcall = .true.
124 character(len=*),
parameter :: subname =
'(oasis_reprosum_setopts)' 127 if (
present(repro_sum_master) )
then 128 master = repro_sum_master
133 if (
present(repro_sum_logunit) )
then 134 llogunit = repro_sum_logunit
139 if (.not. firstcall)
then 140 write(llogunit,*) subname,
estr,
' can only be called once' 145 if (
present(repro_sum_use_ddpdd_in) )
then 148 if (
present(repro_sum_rel_diff_max_in) )
then 151 if (
present(repro_sum_recompute_in) )
then 156 write(llogunit,*) subname, &
157 'Using double-double-based (scalable) usually reproducible ', &
158 'distributed sum algorithm' 160 write(llogunit,*) subname, &
161 'Using fixed-point-based (scalable) reproducible ', &
162 'distributed sum algorithm' 166 write(llogunit,*) subname, &
167 ' with a maximum relative error tolerance of ', &
170 write(llogunit,*) subname, &
171 'If tolerance exceeded, sum is recomputed using ', &
172 'a serial algorithm.' 174 write(llogunit,*) subname, &
175 'If tolerance exceeded, fixed-precision is sum used ', &
176 'but a warning is output.' 179 write(llogunit,*) subname, &
180 'and not comparing with floating point algorithms.' 262 arr_gbl_max, arr_gbl_max_out, &
263 arr_max_levels, arr_max_levels_out, &
264 gbl_max_nsummands, gbl_max_nsummands_out,&
265 gbl_count, repro_sum_validate, &
266 repro_sum_stats, rel_diff, commid )
271 integer,
intent(in) :: nsummands
272 integer,
intent(in) :: dsummands
273 integer,
intent(in) :: nflds
274 real(r8),
intent(in) :: arr(dsummands,nflds)
277 real(r8),
intent(out):: arr_gsum(nflds)
280 logical,
intent(in),
optional :: ddpdd_sum
284 real(r8),
intent(in),
optional :: arr_gbl_max(nflds)
287 real(r8),
intent(out),
optional :: arr_gbl_max_out(nflds)
291 integer,
intent(in),
optional :: arr_max_levels(nflds)
295 integer,
intent(out),
optional :: arr_max_levels_out(nflds)
299 integer,
intent(in),
optional :: gbl_max_nsummands
303 integer,
intent(out),
optional :: gbl_max_nsummands_out
307 integer,
intent(in),
optional :: gbl_count
312 logical,
intent(in),
optional :: repro_sum_validate
316 integer,
intent(inout),
optional :: repro_sum_stats(5)
324 real(r8),
intent(out),
optional :: rel_diff(2,nflds)
329 integer,
intent(in),
optional :: commid
334 logical :: use_ddpdd_sum
342 integer :: omp_nthreads
346 integer :: ifld, isum, ithread
347 integer :: max_nsummands
350 integer,
allocatable :: isum_beg(:), isum_end(:)
353 integer,
allocatable :: arr_tlmin_exp(:,:)
355 integer,
allocatable :: arr_tlmax_exp(:,:)
357 integer :: arr_exp, arr_exp_tlmin, arr_exp_tlmax
359 integer :: arr_lmin_exp(nflds)
360 integer :: arr_lmax_exp(nflds)
361 integer :: arr_lextremes(0:nflds,2)
362 integer :: arr_gextremes(0:nflds,2)
364 integer :: arr_gmax_exp(nflds)
365 integer :: arr_gmin_exp(nflds)
366 integer :: arr_max_shift
369 integer :: max_levels(nflds)
372 integer :: gbl_max_red
373 integer :: repro_sum_fast
374 integer :: repro_sum_slow
375 integer :: repro_sum_both
376 integer :: nonrepro_sum
378 real(r8) :: xmax_nsummands
379 real(r8) :: arr_lsum(nflds)
380 real(r8) :: arr_gsum_fast(nflds)
387 integer omp_get_max_threads
388 external omp_get_max_threads
390 character(len=*),
parameter :: subname =
'(oasis_reprosum_calc)' 396 if (
present(ddpdd_sum) )
then 397 use_ddpdd_sum = ddpdd_sum
403 use_ddpdd_sum = use_ddpdd_sum .or. (radix(0._r8) /= radix(0_i8))
413 if (
present(commid) )
then 416 mpi_comm = mpi_comm_world
425 if ( use_ddpdd_sum )
then 440 call mpi_comm_size(mpi_comm, tasks, ierr)
444 omp_nthreads = omp_get_max_threads()
452 if (
present(arr_gbl_max) .and.
present(arr_max_levels) )
then 457 max_level = (64/nflds) + 1
459 if ((arr_gbl_max(ifld) .ge. 0.0_r8) .and. &
460 (arr_max_levels(ifld) > 0))
then 462 arr_gmax_exp(ifld) = exponent(arr_gbl_max(ifld))
463 if (max_level < arr_max_levels(ifld)) &
464 max_level = arr_max_levels(ifld)
471 if (.not. recompute)
then 476 if (
present(gbl_max_nsummands) )
then 477 if (gbl_max_nsummands < 1)
then 478 call mpi_allreduce (nsummands, max_nsummands, 1, &
479 mpi_integer, mpi_max, mpi_comm, ierr)
482 max_nsummands = gbl_max_nsummands
485 call mpi_allreduce (nsummands, max_nsummands, 1, &
486 mpi_integer, mpi_max, mpi_comm, ierr)
495 if (
present( gbl_max_nsummands_out) )
then 496 gbl_max_nsummands_out = max_nsummands
500 max_nsummands = (max_nsummands/omp_nthreads) + 1
502 max_nsummands = max(max_nsummands, tasks*omp_nthreads)
504 xmax_nsummands = dble(max_nsummands)
505 arr_max_shift = digits(0_i8) - (exponent(xmax_nsummands) + 1)
506 if (arr_max_shift < 2)
then 507 write(
nulprt,*) subname,
estr,
' number of summands too large for fixed precision algorithm' 512 if (
present(repro_sum_validate))
then 513 validate = repro_sum_validate
518 nflds, arr_max_shift, arr_gmax_exp, &
519 arr_max_levels, max_level, validate, &
520 recompute, omp_nthreads, mpi_comm)
528 if (
present(arr_max_levels_out) )
then 530 arr_max_levels_out(ifld) = arr_max_levels(ifld)
533 if (
present(arr_gbl_max_out) )
then 535 arr_gbl_max_out(ifld) = arr_gbl_max(ifld)
553 allocate(arr_tlmax_exp(nflds,omp_nthreads))
554 allocate(arr_tlmin_exp(nflds,omp_nthreads))
555 allocate(isum_beg(omp_nthreads))
556 allocate(isum_end(omp_nthreads))
559 call split_indices(nsummands, omp_nthreads, isum_beg, isum_end)
564 do ithread=1,omp_nthreads
567 arr_exp_tlmin = maxexponent(1._r8)
568 arr_exp_tlmax = minexponent(1._r8)
569 do isum=isum_beg(ithread),isum_end(ithread)
570 if (arr(isum,ifld) .ne. 0.0_r8)
then 571 arr_exp = exponent(arr(isum,ifld))
572 arr_exp_tlmin = min(arr_exp,arr_exp_tlmin)
573 arr_exp_tlmax = max(arr_exp,arr_exp_tlmax)
576 arr_tlmin_exp(ifld,ithread) = arr_exp_tlmin
577 arr_tlmax_exp(ifld,ithread) = arr_exp_tlmax
583 arr_lmax_exp(ifld) = maxval(arr_tlmax_exp(ifld,:))
584 arr_lmin_exp(ifld) = minval(arr_tlmin_exp(ifld,:))
586 deallocate(arr_tlmin_exp,arr_tlmax_exp,isum_beg,isum_end)
588 arr_lextremes(0,:) = -nsummands
589 arr_lextremes(1:nflds,1) = -arr_lmax_exp(:)
590 arr_lextremes(1:nflds,2) = arr_lmin_exp(:)
592 call mpi_allreduce (arr_lextremes, arr_gextremes, 2*(nflds+1), &
593 mpi_integer, mpi_min, mpi_comm, ierr)
595 max_nsummands = -arr_gextremes(0,1)
596 arr_gmax_exp(:) = -arr_gextremes(1:nflds,1)
597 arr_gmin_exp(:) = arr_gextremes(1:nflds,2)
603 arr_gmin_exp(ifld) = min(arr_gmax_exp(ifld),arr_gmin_exp(ifld))
607 if (
present(arr_gbl_max_out) )
then 609 arr_gbl_max_out(ifld) = scale(1.0_r8,arr_gmax_exp(ifld))
614 if (
present( gbl_max_nsummands_out) )
then 615 gbl_max_nsummands_out = max_nsummands
623 max_nsummands = (max_nsummands/omp_nthreads) + 1
625 max_nsummands = max(max_nsummands, tasks*omp_nthreads)
627 xmax_nsummands = dble(max_nsummands)
628 arr_max_shift = digits(0_i8) - (exponent(xmax_nsummands) + 1)
629 if (arr_max_shift < 2)
then 630 write(
nulprt,*) subname,
estr,
' number of summands too large for fixed precision algorithm' 640 max_level = (64/nflds) + 1
642 max_levels(ifld) = 2 + &
643 ((digits(0_i8) + (arr_gmax_exp(ifld)-arr_gmin_exp(ifld))) &
645 if (
present(arr_max_levels) .and. (.not. validate) )
then 648 if ( arr_max_levels(ifld) > 0 )
then 650 min(arr_max_levels(ifld),max_levels(ifld))
653 if (max_level < max_levels(ifld)) &
654 max_level = max_levels(ifld)
658 if (
present(arr_max_levels_out) )
then 660 arr_max_levels_out(ifld) = max_levels(ifld)
667 arr_max_shift, arr_gmax_exp, max_levels, &
668 max_level, validate, recompute, &
669 omp_nthreads, mpi_comm)
678 if (
present(rel_diff) )
then 697 arr_lsum(ifld) = arr(isum,ifld) + arr_lsum(ifld)
701 call mpi_allreduce (arr_lsum, arr_gsum_fast, nflds, &
702 mpi_real8, mpi_sum, mpi_comm, ierr)
711 abs_diff = abs(arr_gsum_fast(ifld)-arr_gsum(ifld))
712 if (abs(arr_gsum(ifld)) > abs_diff)
then 713 rel_diff(1,ifld) = abs_diff/abs(arr_gsum(ifld))
715 rel_diff(1,ifld) = abs_diff
717 rel_diff(2,ifld) = abs_diff
720 rel_diff(:,:) = 0.0_r8
725 if (
present(repro_sum_stats) )
then 726 repro_sum_stats(1) = repro_sum_stats(1) + repro_sum_fast
727 repro_sum_stats(2) = repro_sum_stats(2) + repro_sum_slow
728 repro_sum_stats(3) = repro_sum_stats(3) + repro_sum_both
729 repro_sum_stats(4) = repro_sum_stats(4) + nonrepro_sum
730 repro_sum_stats(5) = repro_sum_stats(5) + gbl_max_red
748 arr_max_shift, arr_gmax_exp, max_levels, &
749 max_level, validate, recompute, &
750 omp_nthreads, mpi_comm )
756 integer,
intent(in) :: nsummands
757 integer,
intent(in) :: dsummands
758 integer,
intent(in) :: nflds
759 integer,
intent(in) :: arr_max_shift
762 integer,
intent(in) :: arr_gmax_exp(nflds)
764 integer,
intent(in) :: max_levels(nflds)
767 integer,
intent(in) :: max_level
769 integer,
intent(in) :: omp_nthreads
770 integer,
intent(in) :: mpi_comm
772 real(r8),
intent(in) :: arr(dsummands,nflds)
775 logical,
intent(in):: validate
779 logical,
intent(out):: recompute
784 real(r8),
intent(out):: arr_gsum(nflds)
788 integer,
parameter :: max_jlevel = &
789 1 + (digits(0_i8)/digits(0.0_r8))
791 integer(i8) :: i8_arr_tlsum_level(0:max_level,nflds,omp_nthreads)
794 integer(i8) :: i8_arr_lsum_level((max_level+3)*nflds)
797 integer(i8) :: i8_arr_level
799 integer(i8) :: i8_arr_gsum_level((max_level+3)*nflds)
805 integer(i8) :: i8_sign
806 integer(i8) :: i8_radix
808 integer :: max_error(nflds,omp_nthreads)
810 integer :: not_exact(nflds,omp_nthreads)
813 integer :: isum_beg(omp_nthreads), isum_end(omp_nthreads)
816 integer :: ifld, isum, ithread
822 integer :: offset(nflds)
831 integer :: LX(max_jlevel)
833 integer :: sum_digits
841 real(r8) :: arr_remainder
843 real(r8) :: X_8(max_jlevel)
855 character(len=*),
parameter :: subname =
'(oasis_reprosum_int)' 859 i8_radix = radix(ix_8)
872 offset(ifld) = offset(ifld-1) &
873 + (max_levels(ifld-1) + voffset)
875 veclth = offset(nflds) + max_levels(nflds)
878 call split_indices(nsummands, omp_nthreads, isum_beg, isum_end)
883 i8_arr_lsum_level(:) = 0_i8
889 do ithread=1,omp_nthreads
892 ioffset = offset(ifld)
894 max_error(ifld,ithread) = 0
895 not_exact(ifld,ithread) = 0
897 i8_arr_tlsum_level(:,ifld,ithread) = 0_i8
898 do isum=isum_beg(ithread),isum_end(ithread)
899 arr_remainder = 0.0_r8
901 if (arr(isum,ifld) .ne. 0.0_r8)
then 902 arr_exp = exponent(arr(isum,ifld))
903 arr_frac = fraction(arr(isum,ifld))
906 if (arr_exp > arr_gmax_exp(ifld))
then 907 max_error(ifld,ithread) = 1
912 arr_shift = arr_max_shift - (arr_gmax_exp(ifld)-arr_exp)
917 if (arr_shift < 1)
then 918 ilevel = (1 + (arr_gmax_exp(ifld)-arr_exp))/arr_max_shift
919 arr_shift = ilevel*arr_max_shift - (arr_gmax_exp(ifld)-arr_exp)
921 do while (arr_shift < 1)
922 arr_shift = arr_shift + arr_max_shift
929 if (ilevel .le. max_levels(ifld))
then 932 arr_remainder = scale(arr_frac,arr_shift)
933 i8_arr_level = int(arr_remainder,i8)
934 i8_arr_tlsum_level(ilevel,ifld,ithread) = &
935 i8_arr_tlsum_level(ilevel,ifld,ithread) + i8_arr_level
936 arr_remainder = arr_remainder - i8_arr_level
940 do while ((arr_remainder .ne. 0.0_r8) &
941 .and. (ilevel < max_levels(ifld)))
943 arr_remainder = scale(arr_remainder,arr_max_shift)
944 i8_arr_level = int(arr_remainder,i8)
945 i8_arr_tlsum_level(ilevel,ifld,ithread) = &
946 i8_arr_tlsum_level(ilevel,ifld,ithread) + i8_arr_level
947 arr_remainder = arr_remainder - i8_arr_level
953 if (arr_remainder .ne. 0.0_r8)
then 954 not_exact(ifld,ithread) = 1
965 do ilevel=max_levels(ifld),1,-1
966 rx_8 = i8_arr_tlsum_level(ilevel,ifld,ithread)
967 ix_8 = int(scale(rx_8,-arr_max_shift),i8)
968 if (ix_8 .ne. 0_i8)
then 969 i8_arr_tlsum_level(ilevel-1,ifld,ithread) = &
970 i8_arr_tlsum_level(ilevel-1,ifld,ithread) + ix_8
971 ix_8 = ix_8*(i8_radix**arr_max_shift)
972 i8_arr_tlsum_level(ilevel,ifld,ithread) = &
973 i8_arr_tlsum_level(ilevel,ifld,ithread) - ix_8
982 ioffset = offset(ifld)
983 do ithread = 1,omp_nthreads
984 do ilevel = 0,max_levels(ifld)
985 i8_arr_lsum_level(ioffset+ilevel) = &
986 i8_arr_lsum_level(ioffset+ilevel) &
987 + i8_arr_tlsum_level(ilevel,ifld,ithread)
996 ioffset = offset(ifld)
997 i8_arr_lsum_level(ioffset-voffset+1) = maxval(max_error(ifld,:))
998 i8_arr_lsum_level(ioffset-voffset+2) = maxval(not_exact(ifld,:))
1003 #if ( defined noI8 ) 1006 call mpi_allreduce (i8_arr_lsum_level, i8_arr_gsum_level, &
1007 veclth, mpi_integer, mpi_sum, mpi_comm, ierr)
1011 call mpi_allreduce (i8_arr_lsum_level, i8_arr_gsum_level, &
1012 veclth, mpi_integer8, mpi_sum, mpi_comm, ierr)
1035 arr_gsum(ifld) = 0.0_r8
1036 ioffset = offset(ifld)
1041 if (i8_arr_gsum_level(ioffset-voffset+1) .ne. 0_i8)
then 1046 if (.not. recompute)
then 1052 do ilevel=max_levels(ifld),1,-1
1053 rx_8 = i8_arr_gsum_level(ioffset+ilevel)
1054 ix_8 = int(scale(rx_8,-arr_max_shift),i8)
1055 if (ix_8 .ne. 0_i8)
then 1056 i8_arr_gsum_level(ioffset+ilevel-1) = i8_arr_gsum_level(ioffset+ilevel-1) &
1058 ix_8 = ix_8*(i8_radix**arr_max_shift)
1059 i8_arr_gsum_level(ioffset+ilevel) = i8_arr_gsum_level(ioffset+ilevel) &
1068 do while ((i8_arr_gsum_level(ioffset+ilevel) .eq. 0_i8) &
1069 .and. (ilevel < max_levels(ifld)))
1073 if (ilevel < max_levels(ifld))
then 1074 if (i8_arr_gsum_level(ioffset+ilevel) > 0_i8)
then 1079 do jlevel=ilevel,max_levels(ifld)-1
1080 if (sign(1_i8,i8_arr_gsum_level(ioffset+jlevel)) &
1081 .ne. sign(1_i8,i8_arr_gsum_level(ioffset+jlevel+1)))
then 1082 i8_arr_gsum_level(ioffset+jlevel) = i8_arr_gsum_level(ioffset+jlevel) &
1084 i8_arr_gsum_level(ioffset+jlevel+1) = i8_arr_gsum_level(ioffset+jlevel+1) &
1085 + i8_sign*(i8_radix**arr_max_shift)
1091 arr_shift = arr_gmax_exp(ifld) &
1092 - max_levels(ifld)*arr_max_shift
1095 do ilevel=max_levels(ifld),0,-1
1097 if (i8_arr_gsum_level(ioffset+ilevel) .ne. 0_i8)
then 1101 x_8(jlevel) = i8_arr_gsum_level(ioffset+ilevel)
1102 lx(jlevel) = exponent(x_8(jlevel))
1105 ix_8 = int(x_8(jlevel),i8)
1106 rx_8 = (i8_arr_gsum_level(ioffset+ilevel) - ix_8)
1109 do while (rx_8 .ne. 0.0_r8)
1112 lx(jlevel) = exponent(rx_8)
1113 ix_8 = ix_8 + int(rx_8,i8)
1114 rx_8 = (i8_arr_gsum_level(ioffset+ilevel) - ix_8)
1120 do while (jlevel > 0)
1122 curr_exp = lx(jlevel) + arr_shift
1123 arr_gsum(ifld) = fraction(x_8(jlevel))
1126 corr_exp = curr_exp - (lx(jlevel) + arr_shift)
1127 arr_gsum(ifld) = fraction(x_8(jlevel)) &
1128 + scale(arr_gsum(ifld),corr_exp)
1129 curr_exp = lx(jlevel) + arr_shift
1136 arr_shift = arr_shift + arr_max_shift
1141 corr_exp = curr_exp + exponent(arr_gsum(ifld))
1142 if (corr_exp .ge. minexponent(1._r8))
then 1143 arr_gsum(ifld) = set_exponent(arr_gsum(ifld),corr_exp)
1145 rx_8 = set_exponent(arr_gsum(ifld), &
1146 corr_exp-minexponent(1._r8))
1147 arr_gsum(ifld) = scale(rx_8,minexponent(1._r8))
1155 if (i8_arr_gsum_level(ioffset-voffset+2) .ne. 0_i8)
then 1160 do ilevel=0,max_levels(ifld)
1161 if (sum_digits .eq. 0)
then 1162 if (i8_arr_gsum_level(ioffset+ilevel) .ne. 0_i8)
then 1163 x_8(1) = i8_arr_gsum_level(ioffset+ilevel)
1164 lx(1) = exponent(x_8(1))
1168 sum_digits = sum_digits + arr_max_shift
1172 if (sum_digits < digits(1.0_r8))
then 1199 character(len=*),
intent(in) :: name
1200 integer,
intent(in) :: nflds
1201 logical,
intent(in) :: master
1203 integer,
optional,
intent(in) :: logunit
1205 real(r8),
intent(in) :: rel_diff(2,nflds)
1214 integer :: exceeds_limit
1216 real(r8) :: max_rel_diff
1217 integer :: max_rel_diff_idx
1218 real(r8) :: max_abs_diff
1219 integer :: max_abs_diff_idx
1220 character(len=*),
parameter :: subname =
'(oasis_reprosum_tolExceeded)' 1227 if (
present(logunit) )
then 1235 max_rel_diff = 0.0_r8
1236 max_abs_diff = 0.0_r8
1239 exceeds_limit = exceeds_limit + 1
1240 if (rel_diff(1,ifld) > max_rel_diff)
then 1241 max_rel_diff = rel_diff(1,ifld)
1242 max_rel_diff_idx = ifld
1244 if (rel_diff(2,ifld) > max_abs_diff)
then 1245 max_abs_diff = rel_diff(2,ifld)
1246 max_abs_diff_idx = ifld
1251 if (exceeds_limit > 0)
then 1253 write(llogunit,*) subname,trim(name), &
1254 ': difference in fixed and floating point sums ', &
1255 ' exceeds tolerance in ', exceeds_limit, &
1257 write(llogunit,*) subname,
' Maximum relative diff: (rel)', &
1258 rel_diff(1,max_rel_diff_idx),
' (abs) ', &
1259 rel_diff(2,max_rel_diff_idx)
1260 write(llogunit,*) subname,
' Maximum absolute diff: (rel)', &
1261 rel_diff(1,max_abs_diff_idx),
' (abs) ', &
1262 rel_diff(2,max_abs_diff_idx)
1285 integer,
intent(in) :: nsummands
1286 integer,
intent(in) :: dsummands
1287 integer,
intent(in) :: nflds
1288 real(r8),
intent(in) :: arr(dsummands,nflds)
1290 integer,
intent(in) :: mpi_comm
1292 real(r8),
intent(out):: arr_gsum(nflds)
1300 integer :: ifld, isum
1303 real(r8) :: e, t1, t2
1304 complex(r8) :: arr_lsum_dd(nflds)
1306 complex(r8) :: arr_gsum_dd(nflds)
1309 integer,
save :: mpi_sumdd
1310 logical,
save :: first_time = .true.
1311 character(len=*),
parameter :: subname =
'(oasis_reprosum_ddpdd)' 1315 call shr_reprosumx86_fix_start (old_cw)
1317 if (first_time)
then 1318 call mpi_op_create(
ddpdd, .true., mpi_sumdd, ierr)
1319 first_time = .false.
1323 arr_lsum_dd(ifld) = (0.0_r8,0.0_r8)
1329 t1 = arr(isum,ifld) +
real(arr_lsum_dd(ifld))
1330 e = t1 - arr(isum,ifld)
1331 t2 = ((
real(arr_lsum_dd(ifld)) - e) &
1332 + (arr(isum,ifld) - (t1 - e))) &
1333 + aimag(arr_lsum_dd(ifld))
1336 arr_lsum_dd(ifld) = cmplx( t1 + t2, t2 - ((t1 + t2) - t1), r8 )
1341 call mpi_allreduce (arr_lsum_dd, arr_gsum_dd, nflds, &
1342 mpi_complex16, mpi_sumdd, mpi_comm, ierr)
1344 arr_gsum(ifld) =
real(arr_gsum_dd(ifld))
1347 call shr_reprosumx86_fix_end (old_cw)
1353 subroutine ddpdd (dda, ddb, len, itype)
1364 integer,
intent(in) :: len
1365 complex(r8),
intent(in) :: dda(len)
1366 complex(r8),
intent(inout) :: ddb(len)
1367 integer,
intent(in) :: itype
1373 character(len=*),
parameter :: subname =
'(oasis_reprosum_mod:DDPDD)' 1379 t1 =
real(dda(i)) +
real(ddb(i))
1380 e = t1 -
real(dda(i))
1381 t2 = ((
real(ddb(i)) - e) + (
real(dda(i)) - (t1 - e))) &
1382 + aimag(dda(i)) + aimag(ddb(i))
1385 ddb(i) = cmplx( t1 + t2, t2 - ((t1 + t2) - t1), r8 )
1389 end subroutine ddpdd 1403 integer,
intent(in) :: total
1404 integer,
intent(in) :: num_pieces
1405 integer,
intent(out) :: ibeg(num_pieces), iend(num_pieces)
1409 integer :: itmp1, itmp2, ioffset, i
1410 character(len=*),
parameter :: subname =
'(oasis_reprosum_mod:split_indices)' 1414 itmp1 = total/num_pieces
1415 itmp2 = mod(total,num_pieces)
1418 ibeg(i) = ioffset + 1
1419 iend(i) = ioffset + (itmp1+1)
1422 do i=itmp2+1,num_pieces
1423 ibeg(i) = ioffset + 1
1424 if (ibeg(i) > total)
then 1425 iend(i) = ibeg(i) - 1
1427 iend(i) = ioffset + itmp1
subroutine oasis_reprosum_int(arr, arr_gsum, nsummands, dsummands, nflds, arr_max_shift, arr_gmax_exp, max_levels, max_level, validate, recompute, omp_nthreads, mpi_comm)
Compute the global sum of each field in "arr" using the indicated communicator with a reproducible ye...
Provides a common location for several OASIS variables.
subroutine, public oasis_reprosum_calc(arr, arr_gsum, nsummands, dsummands, nflds, ddpdd_sum, arr_gbl_max, arr_gbl_max_out, arr_max_levels, arr_max_levels_out, gbl_max_nsummands, gbl_max_nsummands_out, gbl_count, repro_sum_validate, repro_sum_stats, rel_diff, commid)
Compute the global sum of each field in "arr" using the indicated communicator with a reproducible ye...
integer(kind=ip_intwp_p) nulprt
subroutine, public oasis_reprosum_setopts(repro_sum_use_ddpdd_in, repro_sum_rel_diff_max_in, repro_sum_recompute_in, repro_sum_master, repro_sum_logunit)
Set runtime options.
subroutine, public oasis_abort(id_compid, cd_routine, cd_message, file, line, rcode)
OASIS abort method, publically available to users.
integer, parameter ip_i4_p
integer, parameter ip_r8_p
subroutine, private ddpdd(dda, ddb, len, itype)
real(r8), public oasis_reprosum_reldiffmax
Provides a generic and simpler interface into MPI calls for OASIS.
logical function, public oasis_reprosum_tolexceeded(name, nflds, master, logunit, rel_diff)
Test whether distributed sum exceeds tolerance and print out a warning message.
subroutine, public oasis_timer_start(timer_label, barrier)
Start a timer.
subroutine, public oasis_timer_stop(timer_label)
Stop a timer.
character(len= *), parameter, public estr
subroutine, public oasis_mpi_barrier(comm, string)
Call MPI_BARRIER for a particular communicator.
logical repro_sum_use_ddpdd
subroutine, private split_indices(total, num_pieces, ibeg, iend)
subroutine oasis_reprosum_ddpdd(arr, arr_gsum, nsummands, dsummands, nflds, mpi_comm)
Compute the global sum of each field in "arr" using the indicated communicator with a reproducible ye...
integer, parameter ip_i8_p
Performance timer methods.
OASIS reproducible sum method from P. Worley.
logical, public oasis_reprosum_recompute