88 | | ALLOCATE( zchl3d(jpi,jpj,jpk) ) |
89 | | ! |
90 | | ! code to set zchl3d(:,:,1:nskr+1) |
91 | | ! |
92 | | ! |
| 91 | ALLOCATE( ztmp3d(jpi,jpj,nksr + 1) ) |
| 92 | ! |
| 93 | ! code to set ztmp3d(:,:,1:nskr+1) |
| 94 | |
| 95 | ! including the following changes after |
| 96 | IF( nqsr == np_RGBc ) THEN !* Variable Chlorophyll |
| 97 | . |
| 98 | DO_3D_00_00( 1, nksr + 1 ) |
| 99 | . |
| 100 | . no change until after |
| 101 | zCze = 1.12 * (zchl)**0.803 |
| 102 | ! |
| 103 | ! NB. make sure zchl value is such that: zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 104 | zchl = MIN( 10. , MAX( 0.03, zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) / zdelpsi )**2 ) ) ) ) |
| 105 | ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 106 | ztmp3d(ji,jj,jk) = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 107 | END_3D |
| 108 | ELSE !* constant chrlorophyll |
| 109 | zchl = 0.05 |
| 110 | ! NB. make sure constant value is such that: |
| 111 | zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 112 | ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 113 | zlui = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 114 | DO jk = 1, nksr + 1 |
| 115 | ztmp3d(:,:,jk) = zlui |
| 116 | END DO |
| 117 | ENDIF |
| 118 | ! |
121 | | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zchl3d(ji,jj,jk) - zchl3d(ji,jj,jk+1) ) |
122 | | END_3D |
123 | | ! |
124 | | DEALLOCATE( zchl3d ) |
125 | | }}} |
126 | | |
127 | | This is code and memory efficient but will perform poorly due to non-contiguous access to the array elements (see performance section below). |
128 | | |
129 | | === Option 2: Reduce full-depth arrays to single level arrays where possible. |
130 | | A compromise solution, which reduces memory use and maintains performance is to remove all unnecessary full-depth arrays but maintain loop order. |
| 147 | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( ztmp3d(ji,jj,jk) - ztmp3d(ji,jj,jk+1) ) |
| 148 | END_3D |
| 149 | ! |
| 150 | DEALLOCATE( ztmp3d ) |
| 151 | }}} |
| 152 | |
| 153 | This is code and memory efficient but may perform poorly due to non-contiguous access to the array elements (see performance section below). |
| 154 | |
| 155 | === Option 2: Low memory use (retain loop order). |
| 156 | A compromise solution, which reduces memory use and should perform better is to remove all unnecessary full-depth arrays but maintain loop order by keeping a few 2D arrays. |
134 | | ALLOCATE( zeka(jpi,jpj) , zekb(jpi,jpj) , & |
135 | | & zekg(jpi,jpj) , zekr(jpi,jpj) , & |
136 | | & ze0 (jpi,jpj) , ze1 (jpi,jpj) , & |
137 | | & ze2 (jpi,jpj) , ze3 (jpi,jpj) , & |
138 | | & zchl3d(jpi,jpj,jpk) ) |
139 | | ! |
140 | | ! code to set zchl3d(:,:,1:nskr+1) |
141 | | ! |
| 160 | ALLOCATE( ze0 (jpi,jpj) , ze1 (jpi,jpj) , & |
| 161 | & ze2 (jpi,jpj) , ze3 (jpi,jpj) , & |
| 162 | & ztmp3d(jpi,jpj,nksr + 1) ) |
| 163 | ! |
| 164 | ! code to set ztmp3d(:,:,1:nskr+1) |
| 165 | ! including the following changes after |
| 166 | IF( nqsr == np_RGBc ) THEN !* Variable Chlorophyll |
| 167 | . |
| 168 | DO_3D_00_00( 1, nksr + 1 ) |
| 169 | . |
| 170 | . no change until after |
| 171 | zCze = 1.12 * (zchl)**0.803 |
| 172 | ! |
| 173 | ! NB. make sure zchl value is such that: zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 174 | zchl = MIN( 10. , MAX( 0.03, zCze * ( zCb + zCmax * EXP( -( (zpsi - zpsimax) / zdelpsi )**2 ) ) ) ) |
| 175 | ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 176 | ztmp3d(ji,jj,jk) = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 177 | END_3D |
| 178 | ELSE !* constant chlorophyll |
| 179 | zchl = 0.05 |
| 180 | ! NB. make sure constant value is such that: |
| 181 | zchl = MIN( 10. , MAX( 0.03, zchl ) ) |
| 182 | ! Convert chlorophyll value to attenuation coefficient look-up table index |
| 183 | zlui = 41 + 20.*LOG10(zchl) + 1.e-15 |
| 184 | DO jk = 1, nksr + 1 |
| 185 | ztmp3d(:,:,jk) = zlui |
| 186 | END DO |
| 187 | ENDIF |
154 | | DO jk = 2, nksr+1 !* interior equi-partition in R-G-B depending of vertical profile of Chl |
155 | | DO_2D_00_00 |
156 | | zchl = MIN( 10. , MAX( 0.03, zchl3d(ji,jj,jk) ) ) |
157 | | irgb = NINT( 41 + 20.*LOG10(zchl) + 1.e-15 ) |
158 | | ze3t = e3t(ji,jj,jk-1,Kmm) |
159 | | zeka(ji,jj) = EXP( - ze3t * xsi0r ) |
160 | | zekb(ji,jj) = EXP( - ze3t * rkrgb(1,irgb) ) |
161 | | zekg(ji,jj) = EXP( - ze3t * rkrgb(2,irgb) ) |
162 | | zekr(ji,jj) = EXP( - ze3t * rkrgb(3,irgb) ) |
163 | | END_2D |
164 | | |
165 | | DO_2D_00_00 |
166 | | ze0(ji,jj) = ze0(ji,jj) * zeka(ji,jj) |
167 | | ze1(ji,jj) = ze1(ji,jj) * zekb(ji,jj) |
168 | | ze2(ji,jj) = ze2(ji,jj) * zekg(ji,jj) |
169 | | ze3(ji,jj) = ze3(ji,jj) * zekr(ji,jj) |
170 | | ! store the SW radiation penetrating to this location |
171 | | ! re-use the zchl3d array since the chl value at this point will |
172 | | ! not be needed again |
173 | | zchl3d(ji,jj,jk) = ( ze0(ji,jj) + ze1(ji,jj) + ze2(ji,jj) + ze3(ji,jj) ) * wmask(ji,jj,jk) |
174 | | END_2D |
175 | | END DO |
| 200 | !* interior equi-partition in R-G-B depending of vertical profile of Chl |
| 201 | DO_3D_00_00( 2, nksr+1 ) |
| 202 | irgb = NINT( ztmp3d(ji,jj,jk) ) |
| 203 | ze3t = e3t(ji,jj,jk-1,Kmm) |
| 204 | ze0(ji,jj) = ze0(ji,jj) * EXP( - ze3t * xsi0r ) |
| 205 | ze1(ji,jj) = ze1(ji,jj) * EXP( - ze3t * rkrgb(1,irgb) ) |
| 206 | ze2(ji,jj) = ze2(ji,jj) * EXP( - ze3t * rkrgb(2,irgb) ) |
| 207 | ze3(ji,jj) = ze3(ji,jj) * EXP( - ze3t * rkrgb(3,irgb) ) |
| 208 | ! store the SW radiation penetrating to this location |
| 209 | ! re-use the ztmp3d array since the attenuation coefficient |
| 210 | ! at this point will not be needed again |
| 211 | ztmp3d(ji,jj,jk) = ( ze0(ji,jj) + ze1(ji,jj) + ze2(ji,jj) + ze3(ji,jj) ) * wmask(ji,jj,jk) |
| 212 | END_3D |
178 | | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( zchl3d(ji,jj,jk) - zchl3d(ji,jj,jk+1) ) |
179 | | END_3D |
180 | | ! |
181 | | DEALLOCATE( zeka, zekb , zekg , zekr , ze0 , ze1 , ze2 , ze3 , zchl3d ) |
182 | | }}} |
| 215 | qsr_hc(ji,jj,jk) = r1_rho0_rcp * ( ztmp3d(ji,jj,jk) - ztmp3d(ji,jj,jk+1) ) |
| 216 | END_3D |
| 217 | ! |
| 218 | DEALLOCATE( ze0 , ze1 , ze2 , ze3 , ztmp3d ) |
| 219 | }}} |
| 220 | |
| 221 | === Performance and evaluation |
| 222 | Both these options produce identical results to the original code (based on an ORCA2_ICE_PISCES test using SETTE (which includes variable surface chlorophyll inputs. ln_timing was activated and the |