1 | subroutine psolve_xy_periodic |
---|
2 | #define DEBUG_LEVEL 1 |
---|
3 | !Solve the elliptic equation for pressure subject to appropriate |
---|
4 | !boundary conditions. Interior and boundary differential operators |
---|
5 | !are defined in subroutine matvec. |
---|
6 | |
---|
7 | use mpi_parameters |
---|
8 | use grid_info |
---|
9 | use dependent_variables, only: pressure ! soln vector |
---|
10 | use intermediate_variables, only: b ! rhs vector |
---|
11 | use counters_flags_etc |
---|
12 | use boundary_information |
---|
13 | use user_parameters, only: LAPACK_FLAG |
---|
14 | use columnslab |
---|
15 | implicit none |
---|
16 | include '../input/problem_size.h' |
---|
17 | include 'mpif.h' |
---|
18 | |
---|
19 | real :: pi,xx,mean_value,kx_max,ky_max,kmax,kstar |
---|
20 | real :: XF,YF,bsum,lambda2 |
---|
21 | real, allocatable :: kx(:),ky(:) |
---|
22 | real, allocatable :: soln(:),tmp(:),C3(:) |
---|
23 | real, allocatable :: L_star(:,:) |
---|
24 | |
---|
25 | real, allocatable :: b_col(:,:,:),p_col(:,:,:) |
---|
26 | real, allocatable :: work(:,:,:) |
---|
27 | integer :: N,nrhs,ldb,ii,row |
---|
28 | integer, allocatable :: ipiv(:) |
---|
29 | integer :: sizeofreal |
---|
30 | |
---|
31 | integer,parameter :: locnx=(nx-1)/numprocs |
---|
32 | |
---|
33 | #if DEBUG_LEVEL >=1 |
---|
34 | if(myid==0) then |
---|
35 | write(0,*) 'hello world from solve_for_pressure: xy periodic case' |
---|
36 | endif |
---|
37 | #endif |
---|
38 | |
---|
39 | ! Create a derived data type for ease of swaps |
---|
40 | call MPI_TYPE_EXTENT(MPI_REAL,sizeofreal,ierr) |
---|
41 | ! data type for 1d section of basic data slice: m1/nprocs |
---|
42 | call MPI_TYPE_VECTOR(locnx,1,1,MPI_REAL,oneslice,ierr) |
---|
43 | ! data type for 2d section of basic data slice: m2=ny 1d sections |
---|
44 | call MPI_TYPE_HVECTOR(ny,1,nx*sizeofreal,oneslice,twoslice,ierr) ! |
---|
45 | ! data type for 3d section of basic data slice:locnz=nz/numprocs 2d sections |
---|
46 | call MPI_TYPE_HVECTOR(locnz,1,nx*ny*sizeofreal,twoslice,subslice,ierr) |
---|
47 | call MPI_TYPE_COMMIT(subslice,ierr) |
---|
48 | call MPI_BARRIER(comm,ierr) |
---|
49 | |
---|
50 | current_grid_level=0 |
---|
51 | |
---|
52 | !(1) exchange data so that each processor has columns of b~ --> b_col(1:locnx,1:ny,1:nz) |
---|
53 | allocate( b_col(locnx,ny,nz),p_col(locnx,ny,nz) ) ! don't keep extra x endpt in col storage |
---|
54 | b_col(:,:,:) = 0. |
---|
55 | p_col(:,:,:) = 0. |
---|
56 | call slabs_to_columns(b,b_col) |
---|
57 | p_col(:,:,:)=0. |
---|
58 | |
---|
59 | !(2) each processor compute global arrays kx&ky |
---|
60 | N=nx-1 ! nx includes both boundary points, N only 1 |
---|
61 | pi=4.*atan(1.) |
---|
62 | allocate(kx(N+1)) |
---|
63 | kx(:) = 0. |
---|
64 | do i=1,N/2+1 |
---|
65 | kx(i)=2.*pi*(i-1.) ! FFTW real to half-complex storage |
---|
66 | enddo |
---|
67 | do i=2,N/2 |
---|
68 | kx(N-i+2)=kx(i) ! FFTW real to half-complex storage |
---|
69 | enddo |
---|
70 | kx(N+1)=0. ! extra location, convenient for alignment of arrays |
---|
71 | kx_max=2.*pi*(N/2.) |
---|
72 | |
---|
73 | |
---|
74 | N=ny-1 ! ny includes both boundary points, N only 1 |
---|
75 | allocate(ky(N+1)) |
---|
76 | ky(:) = 0. |
---|
77 | do j=1,N/2+1 |
---|
78 | ky(j)=2.*pi*(j-1.) ! FFTW real to half-complex storage |
---|
79 | enddo |
---|
80 | do j=2,N/2 |
---|
81 | ky(N-j+2)=ky(j) ! FFTW real to half-complex storage |
---|
82 | enddo |
---|
83 | ky(N+1)=0. ! extra location, convenient for alignment of arrays |
---|
84 | ky_max=2.*pi*(N/2.) |
---|
85 | |
---|
86 | allocate( C3(nz) ) ! global integration weights for zeta direction |
---|
87 | C3(:) = 0. |
---|
88 | if( nz < 9 ) then |
---|
89 | write(0,*) 'nz too small for integration scheme in solve_for_xypressure' |
---|
90 | stop |
---|
91 | endif |
---|
92 | C3(5:nz-4)=1.0 |
---|
93 | C3(1)=17./48. |
---|
94 | C3(2)=59./48. |
---|
95 | C3(3)=43./48. |
---|
96 | C3(4)=49./48. |
---|
97 | C3(nz)=17./48. |
---|
98 | C3(nz-1)=59./48. |
---|
99 | C3(nz-2)=43./48. |
---|
100 | C3(nz-3)=49./48. |
---|
101 | |
---|
102 | !(3) each processor allocate storage for L_star, a FD approximation to L |
---|
103 | allocate( ipiv(nz+1), L_star(nz+1,nz+1) ) |
---|
104 | ipiv(:) = 0 |
---|
105 | L_star(:,:) = 0. |
---|
106 | allocate( soln(nz),tmp(nz+1) ) |
---|
107 | soln(:) = 0. |
---|
108 | tmp(:) = 0. |
---|
109 | nrhs=1 |
---|
110 | ldb=nz+1 ! leading dimension of B, i.e. # of rows in augmented rhs vector |
---|
111 | |
---|
112 | !(4) loop over kx,ky space and iteratively solve the perturbed problems |
---|
113 | do i=1,locnx |
---|
114 | ii=i+myid*locnx ! global kx indexing variable |
---|
115 | do j=1,ny-1 |
---|
116 | |
---|
117 | bsum = sum( b_col(i,j,1:nz)*C3(1:nz) ) ! integral of b dzeta (/dzeta) |
---|
118 | lambda2= (kx(ii))**2 * (1./Grid(0)%x_xi(1,1)**2) & |
---|
119 | +(ky(j))**2 * (1./Grid(0)%y_eta(1)**2) |
---|
120 | |
---|
121 | ! rhs of matrix eqn |
---|
122 | tmp(1:nz)=b_col(i,j,1:nz) |
---|
123 | |
---|
124 | ! Augment the rhs for compatability condition |
---|
125 | if( ii==1 .and. j==1 ) then |
---|
126 | tmp(nz+1)=0. |
---|
127 | else |
---|
128 | tmp(nz+1)=-bsum/lambda2 ! augmented rhs, compatability condition |
---|
129 | endif |
---|
130 | |
---|
131 | L_star(:,:)=0. |
---|
132 | |
---|
133 | ! set diagonal elements |
---|
134 | |
---|
135 | do row=3,nz-2 |
---|
136 | xx = (global_Czz(row)/global_det_J(row) )*(1./(60*Grid(0)%dzeta**2)) |
---|
137 | L_star(row,row-2)=-5.*xx |
---|
138 | L_star(row,row-1)=80.*xx |
---|
139 | L_star(row,row)=-150.*xx |
---|
140 | L_star(row,row+1)=80.*xx |
---|
141 | L_star(row,row+2)=-5.*xx |
---|
142 | enddo |
---|
143 | ! 1st row |
---|
144 | row=1 |
---|
145 | xx = (global_Czz(row)/global_det_J(row) )*(1./(60*Grid(0)%dzeta**2)) |
---|
146 | L_star(row,1)=-(725./3.)*xx |
---|
147 | L_star(row,2)=280.*xx |
---|
148 | L_star(row,3)=-30.*xx |
---|
149 | L_star(row,4)=-(40./3.)*xx |
---|
150 | L_star(row,5)=5.*xx |
---|
151 | ! 2nd row |
---|
152 | row=2 |
---|
153 | xx = (global_Czz(row)/global_det_J(row) )*(1./(60*Grid(0)%dzeta**2)) |
---|
154 | L_star(row,1)=(290./3.)*xx |
---|
155 | L_star(row,2)=-180.*xx |
---|
156 | L_star(row,3)=90.*xx |
---|
157 | L_star(row,4)=-(20./3.)*xx |
---|
158 | ! last row |
---|
159 | row=nz |
---|
160 | xx = (global_Czz(row)/global_det_J(row) )*(1./(60*Grid(0)%dzeta**2)) |
---|
161 | L_star(nz,nz)=-(725./3.)*xx |
---|
162 | L_star(nz,nz-1)=280.*xx |
---|
163 | L_star(nz,nz-2)=-30.*xx |
---|
164 | L_star(nz,nz-3)=-(40./3.)*xx |
---|
165 | L_star(nz,nz-4)=5.*xx |
---|
166 | ! 2nd last row |
---|
167 | row=nz-1 |
---|
168 | xx = (global_Czz(row)/global_det_J(row) )*(1./(60*Grid(0)%dzeta**2)) |
---|
169 | L_star(nz-1,nz)=(290./3.)*xx |
---|
170 | L_star(nz-1,nz-1)=-180.*xx |
---|
171 | L_star(nz-1,nz-2)=90.*xx |
---|
172 | L_star(nz-1,nz-3)=-(20./3.)*xx |
---|
173 | |
---|
174 | !Add in the term for horizontal derivatives |
---|
175 | do row=1,nz |
---|
176 | L_star(row,row)=L_star(row,row) - lambda2 |
---|
177 | enddo |
---|
178 | |
---|
179 | ! add in the terms for a*b_zeta*p_zeta |
---|
180 | ! (1st & nz rows don't get anything: p_zeta=0 by BCs, or use approx form, no difference) |
---|
181 | do row=3,nz-2 |
---|
182 | xx = (1./(global_det_J(row)))*global_Cz(row)/(24.*Grid(0)%dzeta) |
---|
183 | L_star(row,row-2)=L_star(row,row-2)+(2.)*xx |
---|
184 | L_star(row,row-1)=L_star(row,row-1)-(16.)*xx |
---|
185 | L_star(row,row+1)=L_star(row,row+1)+(16.)*xx |
---|
186 | L_star(row,row+2)=L_star(row,row+2)-(2.)*xx |
---|
187 | enddo |
---|
188 | ! 1st row |
---|
189 | row=1 |
---|
190 | xx = (1./(global_det_J(row)))*global_Cz(row)/(24.*Grid(0)%dzeta) |
---|
191 | L_star(row,1)=L_star(row,1)-(50)*xx |
---|
192 | L_star(row,2)=L_star(row,2)+(96.)*xx |
---|
193 | L_star(row,3)=L_star(row,3)-(72.)*xx |
---|
194 | L_star(row,4)=L_star(row,4)+(32.)*xx |
---|
195 | L_star(row,5)=L_star(row,5)-(6.)*xx |
---|
196 | ! 2nd row |
---|
197 | row=2 |
---|
198 | xx = (1./(global_det_J(row)))*global_Cz(row)/(24.*Grid(0)%dzeta) |
---|
199 | L_star(row,1)=L_star(row,1)-(68./3.)*xx |
---|
200 | L_star(row,2)=L_star(row,2)+(12.)*xx |
---|
201 | L_star(row,3)=L_star(row,3)+(12.)*xx |
---|
202 | L_star(row,4)=L_star(row,4)-(4./3.)*xx |
---|
203 | ! nz-1 row |
---|
204 | row=nz-1 |
---|
205 | xx = (1./(global_det_J(row)))*global_Cz(row)/(24.*Grid(0)%dzeta) |
---|
206 | L_star(row,nz)=L_star(row,nz)+(28./3.)*xx |
---|
207 | L_star(row,nz-1)=L_star(row,nz-1)+(12.)*xx |
---|
208 | L_star(row,nz-2)=L_star(row,nz-2)-(20.)*xx |
---|
209 | L_star(row,nz-3)=L_star(row,nz-3)+(8./3.)*xx |
---|
210 | ! nz row |
---|
211 | row=nz |
---|
212 | xx = (1./(global_det_J(row)))*global_Cz(row)/(24.*Grid(0)%dzeta) |
---|
213 | L_star(row,nz)=L_star(row,nz)+(50)*xx |
---|
214 | L_star(row,nz-1)=L_star(row,nz-1)-(96.)*xx |
---|
215 | L_star(row,nz-2)=L_star(row,nz-2)+(72.)*xx |
---|
216 | L_star(row,nz-3)=L_star(row,nz-3)-(32.)*xx |
---|
217 | L_star(row,nz-4)=L_star(row,nz-4)+(6.)*xx |
---|
218 | |
---|
219 | ! Augmented system for unique soln with Neumann BCs: |
---|
220 | L_star(nz+1,1:nz)=C3 ! extra row: compatability condition |
---|
221 | L_star(1:nz,nz+1)=1. ! xtra col : perturb rhs such that compatability can be satisfied |
---|
222 | |
---|
223 | ! solve the augmented system |
---|
224 | if( LAPACK_FLAG == 'double' ) then |
---|
225 | !!! call DGESV(nz+1,nrhs,L_star,nz+1,ipiv,tmp,ldb,ierr) |
---|
226 | write(0,*) 'LAPACK_FLAG = double instead of single' |
---|
227 | stop |
---|
228 | elseif( LAPACK_FLAG == 'single' ) then |
---|
229 | call SGESV(nz+1,nrhs,L_star,nz+1,ipiv,tmp,ldb,ierr) |
---|
230 | endif |
---|
231 | |
---|
232 | soln(1:nz)=tmp(1:nz) ! xGESV overwrites the rhs vector w/ the soln |
---|
233 | |
---|
234 | p_col(i,j,1:nz)=soln(1:nz) ! save soln vectors in p_col |
---|
235 | |
---|
236 | enddo |
---|
237 | enddo |
---|
238 | |
---|
239 | deallocate( soln,tmp,C3,kx,ky ) |
---|
240 | |
---|
241 | |
---|
242 | !(5) do the data exchanges so that each processor gets slabs of p --> work(1:nx,1:ny,1:locnz) |
---|
243 | allocate( work(nx,ny,locnz) ) |
---|
244 | work(:,:,:) = 0. |
---|
245 | !!! xlv |
---|
246 | !!! call columns_to_slabs(p_col,work) |
---|
247 | call columns_to_slabs(p_col,work) |
---|
248 | work(nx,:,:)=0. |
---|
249 | work(:,ny,:)=0. |
---|
250 | !!! xlv |
---|
251 | |
---|
252 | ! Now we need to inverse transform the pressure solution |
---|
253 | ! to recover P(xi,eta,zeta). |
---|
254 | call xy_fft(work,pressure,nx,ny,locnz,-1) ! -1 for inverse direction |
---|
255 | |
---|
256 | !Store updated total pressure in pressure(:,:,:,2). |
---|
257 | if( psolve_scheme .eq. 'total_pressure' ) then |
---|
258 | pressure(:,:,:,2)=pressure(:,:,:,1) |
---|
259 | elseif( psolve_scheme .eq. 'change_in_pressure' ) then |
---|
260 | pressure(:,:,:,2)=pressure(:,:,:,2)+pressure(:,:,:,1) |
---|
261 | endif |
---|
262 | |
---|
263 | !release the MPI derived data types (created before) |
---|
264 | call MPI_BARRIER(comm,ierr) |
---|
265 | call MPI_TYPE_FREE(oneslice,ierr) |
---|
266 | call MPI_TYPE_FREE(twoslice,ierr) |
---|
267 | call MPI_TYPE_FREE(subslice,ierr) |
---|
268 | |
---|
269 | deallocate(b_col,p_col,ipiv,L_star,work) |
---|
270 | |
---|
271 | end subroutine psolve_xy_periodic |
---|
272 | |
---|