30 | | {{{#!box width=35em help |
31 | | Describe flow chart of the changes in the code. \\ |
32 | | List the Fortran modules and subroutines to be created/edited/deleted. \\ |
33 | | Detailed list of new variables to be defined (including namelists), \\ |
34 | | give for each the chosen name and description wrt coding rules. |
35 | | }}} |
36 | | |
| 30 | The current code is structured thus: |
| 31 | |
| 32 | {{{ |
| 33 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 34 | ! |
| 35 | ALLOCATE( zekb(jpi,jpj) , zekg(jpi,jpj) , zekr (jpi,jpj) , & |
| 36 | & ze0 (jpi,jpj,jpk) , ze1 (jpi,jpj,jpk) , ze2 (jpi,jpj,jpk) , & |
| 37 | & ze3 (jpi,jpj,jpk) , zea (jpi,jpj,jpk) , zchl3d(jpi,jpj,jpk) ) |
| 38 | ! |
| 39 | ! code to set zchl3d(:,:,1:nskr+1) |
| 40 | ! |
| 41 | ! |
| 42 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 43 | DO_2D_00_00 |
| 44 | ze0(ji,jj,1) = rn_abs * qsr(ji,jj) |
| 45 | ze1(ji,jj,1) = zcoef * qsr(ji,jj) |
| 46 | ze2(ji,jj,1) = zcoef * qsr(ji,jj) |
| 47 | ze3(ji,jj,1) = zcoef * qsr(ji,jj) |
| 48 | zea(ji,jj,1) = qsr(ji,jj) |
| 49 | END_2D |
| 50 | ! |
| 51 | DO jk = 2, nksr+1 !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 52 | DO_2D_00_00 |
| 53 | zchl = MIN( 10. , MAX( 0.03, zchl3d(ji,jj,jk) ) ) |
| 54 | irgb = NINT( 41 + 20.*LOG10(zchl) + 1.e-15 ) |
| 55 | zekb(ji,jj) = rkrgb(1,irgb) |
| 56 | zekg(ji,jj) = rkrgb(2,irgb) |
| 57 | zekr(ji,jj) = rkrgb(3,irgb) |
| 58 | END_2D |
| 59 | |
| 60 | DO_2D_00_00 |
| 61 | zc0 = ze0(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * xsi0r ) |
| 62 | zc1 = ze1(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekb(ji,jj) ) |
| 63 | zc2 = ze2(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekg(ji,jj) ) |
| 64 | zc3 = ze3(ji,jj,jk-1) * EXP( - e3t(ji,jj,jk-1,Kmm) * zekr(ji,jj) ) |
| 65 | ze0(ji,jj,jk) = zc0 |
| 66 | ze1(ji,jj,jk) = zc1 |
| 67 | ze2(ji,jj,jk) = zc2 |
| 68 | ze3(ji,jj,jk) = zc3 |
| 69 | zea(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 70 | END_2D |
| 71 | END DO |
| 72 | ! |
| 73 | DO_3D_00_00( 1, nksr ) |
| 74 | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zea(ji,jj,jk) - zea(ji,jj,jk+1) ) |
| 75 | END_3D |
| 76 | ! |
| 77 | DEALLOCATE( zekb , zekg , zekr , ze0 , ze1 , ze2 , ze3 , zea , zchl3d ) |
| 78 | ! |
| 79 | }}} |
| 80 | Where most of the temporary, full-depth arrays are not necessary because only two vertical levels are required at any one time. In fact even the zea array is unnecessary since the zchl3d array could be repurposed once its value has been used. |
| 81 | |
| 82 | === Option 1: Minmum memory usage |
| 83 | By rearranging the loop order and placing the vertical loop innermost then the code can be greatly simplified to an equivalent using minimal temporary storage: |
| 84 | |
| 85 | {{{ |
| 86 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 87 | ! |
| 88 | ALLOCATE( zchl3d(jpi,jpj,jpk) ) |
| 89 | ! |
| 90 | ! code to set zchl3d(:,:,1:nskr+1) |
| 91 | ! |
| 92 | ! |
| 93 | ! |
| 94 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 95 | ! store the surface SW radiation; |
| 96 | ! re-use the surface zchl3d array since the surface chl value is not used |
| 97 | zchl3d(:,:,1) = qsr(:,:) |
| 98 | ! |
| 99 | !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 100 | DO_2D_00_00 |
| 101 | zc0 = rn_abs * qsr(ji,jj) |
| 102 | zc1 = zcoef * qsr(ji,jj) |
| 103 | zc2 = zc1 |
| 104 | zc3 = zc1 |
| 105 | zc4 = e3t(ji,jj,1,Kmm) |
| 106 | DO jk = 2, nksr+1 |
| 107 | zchl = MIN( 10. , MAX( 0.03, zchl3d(ji,jj,jk) ) ) |
| 108 | irgb = NINT( 41 + 20.*LOG10(zchl) + 1.e-15 ) |
| 109 | zc0 = zc0 * EXP( - zc4 * xsi0r ) |
| 110 | zc1 = zc1 * EXP( - zc4 * rkrgb(1,irgb) ) |
| 111 | zc2 = zc2 * EXP( - zc4 * rkrgb(2,irgb) ) |
| 112 | zc3 = zc3 * EXP( - zc4 * rkrgb(3,irgb) ) |
| 113 | zc4 = e3t(ji,jj,jk,Kmm) |
| 114 | ! store the SW radiation penetrating to this location |
| 115 | ! re-use the zchl3d array since the chl value at this point will not be needed again |
| 116 | zchl3d(ji,jj,jk) = ( zc0 + zc1 + zc2 + zc3 ) * wmask(ji,jj,jk) |
| 117 | END DO |
| 118 | END_2D |
| 119 | ! |
| 120 | DO_3D_00_00( 1, nksr ) |
| 121 | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zchl3d(ji,jj,jk) - zchl3d(ji,jj,jk+1) ) |
| 122 | END_3D |
| 123 | ! |
| 124 | DEALLOCATE( zchl3d ) |
| 125 | }}} |
| 126 | |
| 127 | This is code and memory efficient but will perform poorly due to non-contiguous access to the array elements (see performance section below). |
| 128 | |
| 129 | === Option 2: Reduce full-depth arrays to single level arrays where possible. |
| 130 | A compromise solution, which reduces memory use and maintains performance is to remove all unnecessary full-depth arrays but maintain loop order. |
| 131 | {{{ |
| 132 | CASE( np_RGB , np_RGBc ) !== R-G-B fluxes ==! |
| 133 | ! |
| 134 | ALLOCATE( zeka(jpi,jpj) , zekb(jpi,jpj) , & |
| 135 | & zekg(jpi,jpj) , zekr(jpi,jpj) , & |
| 136 | & ze0 (jpi,jpj) , ze1 (jpi,jpj) , & |
| 137 | & ze2 (jpi,jpj) , ze3 (jpi,jpj) , & |
| 138 | & zchl3d(jpi,jpj,jpk) ) |
| 139 | ! |
| 140 | ! code to set zchl3d(:,:,1:nskr+1) |
| 141 | ! |
| 142 | ! |
| 143 | zcoef = ( 1. - rn_abs ) / 3._wp !* surface equi-partition in R-G-B |
| 144 | DO_2D_00_00 |
| 145 | ze0(ji,jj) = rn_abs * qsr(ji,jj) |
| 146 | ze1(ji,jj) = zcoef * qsr(ji,jj) |
| 147 | ze2(ji,jj) = zcoef * qsr(ji,jj) |
| 148 | ze3(ji,jj) = zcoef * qsr(ji,jj) |
| 149 | ! store the surface SW radiation |
| 150 | ! re-use the surface zchl3d array since the surface chl is not used |
| 151 | zchl3d(ji,jj,1) = qsr(ji,jj) |
| 152 | END_2D |
| 153 | ! |
| 154 | DO jk = 2, nksr+1 !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 155 | DO_2D_00_00 |
| 156 | zchl = MIN( 10. , MAX( 0.03, zchl3d(ji,jj,jk) ) ) |
| 157 | irgb = NINT( 41 + 20.*LOG10(zchl) + 1.e-15 ) |
| 158 | ze3t = e3t(ji,jj,jk-1,Kmm) |
| 159 | zeka(ji,jj) = EXP( - ze3t * xsi0r ) |
| 160 | zekb(ji,jj) = EXP( - ze3t * rkrgb(1,irgb) ) |
| 161 | zekg(ji,jj) = EXP( - ze3t * rkrgb(2,irgb) ) |
| 162 | zekr(ji,jj) = EXP( - ze3t * rkrgb(3,irgb) ) |
| 163 | END_2D |
| 164 | |
| 165 | DO_2D_00_00 |
| 166 | ze0(ji,jj) = ze0(ji,jj) * zeka(ji,jj) |
| 167 | ze1(ji,jj) = ze1(ji,jj) * zekb(ji,jj) |
| 168 | ze2(ji,jj) = ze2(ji,jj) * zekg(ji,jj) |
| 169 | ze3(ji,jj) = ze3(ji,jj) * zekr(ji,jj) |
| 170 | ! store the SW radiation penetrating to this location |
| 171 | ! re-use the zchl3d array since the chl value at this point will |
| 172 | ! not be needed again |
| 173 | zchl3d(ji,jj,jk) = ( ze0(ji,jj) + ze1(ji,jj) + ze2(ji,jj) + ze3(ji,jj) ) * wmask(ji,jj,jk) |
| 174 | END_2D |
| 175 | END DO |
| 176 | ! |
| 177 | DO_3D_00_00( 1, nksr ) |
| 178 | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zchl3d(ji,jj,jk) - zchl3d(ji,jj,jk+1) ) |
| 179 | END_3D |
| 180 | ! |
| 181 | DEALLOCATE( zeka, zekb , zekg , zekr , ze0 , ze1 , ze2 , ze3 , zchl3d ) |
| 182 | }}} |