Line data Source code
1 : MODULE m_stepf
2 : USE m_juDFT
3 : USE m_cdn_io
4 : CONTAINS
5 160 : SUBROUTINE stepf(sym, stars, atoms, input, cell, vacuum, fmpi)
6 : !
7 : !*********************************************************************
8 : ! calculates the fourier components of the interstitial step
9 : ! function for the reciprocal vectors of the star list.
10 : ! m. weinert 1986
11 : !*********************************************************************
12 : !
13 : ! also set up FFT of U(G) on a (-2G:+2G) grid for convolutions
14 : !
15 : !*********************************************************************
16 :
17 : USE m_cfft
18 : USE m_constants
19 :
20 : USE m_types
21 : USE m_mpi_bc_tool
22 : USE m_mpi_reduce_tool
23 : IMPLICIT NONE
24 : ! ..
25 : TYPE(t_sym), INTENT(IN) :: sym
26 : TYPE(t_stars), INTENT(INOUT) :: stars
27 : TYPE(t_atoms), INTENT(IN) :: atoms
28 :
29 : TYPE(t_input), INTENT(IN) :: input
30 : TYPE(t_cell), INTENT(IN) :: cell
31 : TYPE(t_vacuum), INTENT(IN) :: vacuum
32 : TYPE(t_mpi), INTENT(IN) :: fmpi
33 :
34 : ! ..
35 : ! .. Local Scalars ..
36 : COMPLEX c_c, c_phs, sf
37 : REAL factorA, dd, gs, th, inv_omtil, r_phs
38 : REAL g_rmt, g_sqr, help, g_abs, fp_omtil, r_c, gr, gx, gy
39 : INTEGER i, iType, na, naInit, nn, i1, i2, i3, ic, ifft2d, ifftd, kk
40 : INTEGER iStar, iStarStart, iStarEnd
41 : INTEGER ic1, ic2, ic3, icc, im1, im2, im3, loopstart
42 : LOGICAL l_error
43 : ! ..
44 : ! .. Local Arrays ..
45 : REAL g(3), gm(3), fJ
46 160 : REAL, ALLOCATABLE :: bfft(:)
47 160 : COMPLEX, ALLOCATABLE :: ustepLocal(:)
48 160 : INTEGER, ALLOCATABLE :: icm(:, :, :)
49 : INTEGER :: i3_start, i3_end, chunk_size, leftover_size
50 : INTEGER ierr
51 160 : INTEGER, ALLOCATABLE :: icm_local(:, :, :)
52 160 : REAL, ALLOCATABLE :: ufft_local(:), bfft_local(:)
53 :
54 160 : ifftd = 27*stars%mx1*stars%mx2*stars%mx3
55 : ! ..
56 : ! ..
57 : !---> if step function stored on disc, then just read it in
58 : !
59 160 : l_error = .FALSE.
60 160 : IF (fmpi%irank == 0) THEN
61 80 : CALL readStepfunction(stars, atoms, cell, vacuum, l_error)
62 : END IF
63 :
64 160 : CALL mpi_bc(l_error, 0, fmpi%mpi_comm)
65 :
66 160 : IF (.NOT. l_error) THEN
67 0 : RETURN
68 : END IF
69 :
70 480 : ALLOCATE (ustepLocal(stars%ng3))
71 552616 : stars%ustep(:) = CMPLX(0.0,0.0)
72 552616 : ustepLocal = CMPLX(0.0,0.0)
73 :
74 160 : IF (fmpi%irank == 0) THEN
75 80 : CALL timestart("ustep")
76 80 : IF (input%film) THEN
77 11 : dd = vacuum%dvac*cell%area/cell%omtil
78 : ELSE
79 : dd = 1.0
80 : END IF
81 : !---> G=0 star
82 80 : factorA = 0.0
83 217 : DO iType = 1, atoms%ntype
84 217 : factorA = factorA + atoms%neq(iType)*atoms%volmts(iType)/cell%omtil
85 : ENDDO
86 :
87 : ! Treat 1st star separately:
88 :
89 80 : ustepLocal(1) = CMPLX(dd - factorA, 0.0)
90 :
91 : ! Film contributions:
92 : !---> G(parallel)=0 (for film)
93 80 : IF (input%film ) THEN
94 29355 : DO iStar = 2, stars%ng3
95 29355 : IF (stars%ig2(iStar) .EQ. 1) THEN
96 278 : th = cell%bmat(3, 3)*stars%kv3(3, iStar)*cell%z1
97 278 : ustepLocal(iStar) = CMPLX(cell%vol*SIN(th)/th/cell%omtil, 0.0)
98 : END IF
99 : ENDDO
100 : END IF
101 : END IF
102 :
103 : ! Now the other stars:
104 :
105 160 : CALL calcIndexBounds(fmpi,2, stars%ng3, iStarStart, iStarEnd)
106 :
107 160 : CALL mpi_bc(stars%sk3, 0, fmpi%mpi_comm)
108 160 : CALL mpi_bc(stars%kv3, 0, fmpi%mpi_comm)
109 :
110 : !$OMP PARALLEL DO DEFAULT(none) &
111 : !$OMP SHARED(atoms,stars,cell,ustepLocal,iStarStart,iStarEnd) &
112 160 : !$OMP PRIVATE(iStar,iType,naInit,factorA,th,sf,nn,na,gs)
113 : DO iStar = iStarStart, iStarEnd
114 : DO iType = 1, atoms%ntype
115 : !--> structure factors: loop over equivalent atoms
116 : naInit = atoms%firstAtom(iType) - 1
117 : factorA = 3.0 * atoms%volmts(iType) / cell%omtil
118 : sf = CMPLX(0.0,0.0)
119 :
120 : DO nn = 1, atoms%neq(iType)
121 : na = naInit + nn
122 : th = -tpi_const * DOT_PRODUCT(stars%kv3(:,iStar), atoms%taual(:, na))
123 : sf = sf + CMPLX(COS(th), SIN(th))
124 : END DO
125 :
126 : gs = stars%sk3(iStar)*atoms%rmt(iType)
127 : ustepLocal(iStar) = ustepLocal(iStar) - (factorA*(SIN(gs)/gs - COS(gs))/(gs*gs))*sf
128 : ENDDO
129 : ENDDO
130 : !$OMP END PARALLEL DO
131 :
132 160 : CALL mpi_sum_reduce(ustepLocal, stars%ustep, fmpi%mpi_comm)
133 :
134 160 : DEALLOCATE(ustepLocal)
135 :
136 160 : IF (fmpi%irank == 0) THEN
137 80 : CALL timestop("ustep")
138 80 : CALL timestart("Oldstepf")
139 : END IF ! (fmpi%irank == 0)
140 :
141 :
142 :
143 :
144 :
145 : !
146 : ! --> set up stepfunction on fft-grid:
147 : !
148 640 : ALLOCATE (bfft_local(0:ifftd - 1), ufft_local(0:ifftd - 1))
149 9753208 : bfft_local = 0.0
150 9753208 : ufft_local = 0.0
151 :
152 320 : ALLOCATE (bfft(0:ifftd - 1))
153 160 : im1 = CEILING(1.5*stars%mx1)
154 160 : im2 = CEILING(1.5*stars%mx2)
155 160 : im3 = CEILING(1.5*stars%mx3)
156 800 : ALLOCATE (icm(-im1:im1, -im2:im2, -im3:im3))
157 11210608 : icm = 0
158 640 : ALLOCATE (icm_local(-im1:im1, -im2:im2, -im3:im3))
159 11210608 : icm_local = 0
160 :
161 160 : inv_omtil = 1.0/cell%omtil
162 160 : fp_omtil = -fpi_const*inv_omtil
163 : !DO first vector before loop
164 9753208 : stars%ufft = 0.0
165 9753208 : bfft = 0.0
166 :
167 160 : IF (fmpi%irank == 0) THEN
168 217 : DO iType = 1, atoms%ntype
169 217 : ufft_local(0) = ufft_local(0) + atoms%neq(iType)*atoms%volmts(iType)
170 : ENDDO
171 80 : ufft_local(0) = 1.0 - ufft_local(0)*inv_omtil
172 : ENDIF
173 :
174 160 : CALL calcIndexBounds(fmpi,0, im3, i3_start, i3_end)
175 :
176 : !$OMP PARALLEL DO DEFAULT(none) &
177 : !$OMP SHARED(atoms,stars,cell,sym,bfft_local,ufft_local,icm_local,i3_start,i3_end,im1,im2,im3,fp_omtil,inv_omtil) &
178 160 : !$OMP PRIVATE(i3,gm,i2,loopstart,i1,ic,na,gs,ic1,ic2,ic3,g,g_sqr,g_abs,help,r_c,iType,r_phs,nn,th,g_rmt,c_c,c_phs)
179 : DO i3 = i3_start, i3_end
180 : gm(3) = REAL(i3)
181 : DO i2 = 0, 3*stars%mx2 - 1
182 : gm(2) = REAL(i2)
183 : IF (2*i2 > 3*stars%mx2) gm(2) = gm(2) - 3.0*stars%mx2
184 : IF (i3 == 0 .AND. i2 == 0) THEN
185 : loopstart = 1
186 : ELSE
187 : loopstart = 0
188 : ENDIF
189 : DO i1 = loopstart, 3*stars%mx1 - 1
190 : ic = i1 + 3*stars%mx1*i2 + 9*stars%mx1*stars%mx2*i3
191 : gm(1) = REAL(i1)
192 : IF (2*i1 > 3*stars%mx1) gm(1) = gm(1) - 3.0*stars%mx1
193 :
194 : ic1 = NINT(gm(1))
195 : ic2 = NINT(gm(2))
196 : ic3 = NINT(gm(3))
197 : icm_local(ic1, ic2, ic3) = ic
198 : IF (ic1 == im1) icm_local(-ic1, ic2, ic3) = ic
199 : IF (ic2 == im2) icm_local(ic1, -ic2, ic3) = ic
200 : IF ((ic1 == im1) .AND. (ic2 == im2)) icm_local(-ic1, -ic2, ic3) = ic
201 : g = MATMUL(TRANSPOSE(cell%bmat), gm)
202 : g_sqr = DOT_PRODUCT(g, g)
203 : g_abs = SQRT(g_sqr)
204 : help = fp_omtil/g_sqr
205 : IF (sym%invs) THEN
206 : r_c = 0.0
207 : DO iType = 1, atoms%ntype
208 : r_phs = 0.0
209 : na = atoms%firstAtom(iType) - 1
210 : DO nn = 1, atoms%neq(iType)
211 : th = -tpi_const*DOT_PRODUCT(gm, atoms%taual(:, na + nn))
212 : r_phs = r_phs + COS(th)
213 : ENDDO
214 : g_rmt = g_abs*atoms%rmt(iType)
215 : r_c = r_c + atoms%rmt(iType)*(SIN(g_rmt)/g_rmt - COS(g_rmt))*r_phs
216 : ENDDO
217 : ufft_local(ic) = help*r_c
218 : ELSE
219 : c_c = CMPLX(0.0, 0.0)
220 : DO iType = 1, atoms%ntype
221 : c_phs = CMPLX(0.0, 0.0)
222 : na = atoms%firstAtom(iType) - 1
223 : DO nn = 1, atoms%neq(iType)
224 : th = -tpi_const*DOT_PRODUCT(gm, atoms%taual(:, na + nn))
225 : c_phs = c_phs + EXP(CMPLX(0, th))
226 : ENDDO
227 : g_rmt = g_abs*atoms%rmt(iType)
228 : c_c = c_c + atoms%rmt(iType)*(SIN(g_rmt)/g_rmt - COS(g_rmt))*c_phs
229 : ENDDO
230 : ufft_local(ic) = help*REAL(c_c)
231 : bfft_local(ic) = help*AIMAG(c_c)
232 : ENDIF
233 :
234 : IF (((2*i3 .EQ. 3*stars%mx3) .OR. (2*i2 .EQ. 3*stars%mx2)) .OR. (2*i1 .EQ. 3*stars%mx1)) THEN
235 : ufft_local(ic) = 0.0
236 : bfft_local(ic) = 0.0
237 : ENDIF
238 : ENDDO
239 : ENDDO
240 : ENDDO
241 : !$OMP END PARALLEL DO
242 :
243 160 : CALL mpi_sum_reduce(ufft_local,stars%ufft,fmpi%mpi_comm)
244 160 : CALL mpi_sum_reduce(bfft_local,bfft,fmpi%mpi_comm)
245 160 : CALL mpi_sum_reduce(icm_local,icm,fmpi%mpi_comm)
246 :
247 160 : IF (fmpi%irank == 0) THEN
248 80 : ic = 9*stars%mx1*stars%mx2*(im3 + 1)
249 1688 : DO i3 = im3 + 1, 3*stars%mx3 - 1
250 1608 : gm(3) = REAL(i3)
251 1608 : gm(3) = gm(3) - 3.0*stars%mx3
252 61382 : DO i2 = 0, 3*stars%mx2 - 1
253 59694 : gm(2) = REAL(i2)
254 59694 : IF (2*i2 > 3*stars%mx2) gm(2) = gm(2) - 3.0*stars%mx2
255 2369064 : DO i1 = 0, 3*stars%mx1 - 1
256 2307762 : gm(1) = REAL(i1)
257 2307762 : IF (2*i1 > 3*stars%mx1) gm(1) = gm(1) - 3.0*stars%mx1
258 : !
259 : !-> use inversion <-> c.c.
260 : !
261 2307762 : ic1 = NINT(gm(1)); ic2 = NINT(gm(2)); ic3 = NINT(gm(3))
262 2307762 : icc = icm(-ic1, -ic2, -ic3)
263 2307762 : stars%ufft(ic) = stars%ufft(icc)
264 2307762 : IF (.NOT. sym%invs) bfft(ic) = -bfft(icc)
265 2367456 : ic = ic + 1
266 : ENDDO
267 : ENDDO
268 : ENDDO
269 : !
270 : ! --> add film-contributions
271 : !
272 80 : IF (input%film ) THEN
273 :
274 11 : ifft2d = 9*stars%mx1*stars%mx2
275 11 : stars%ufft(0) = stars%ufft(0) + cell%vol*inv_omtil - 1.0
276 :
277 765 : DO i3 = 1, 3*stars%mx3 - 1
278 754 : gm(3) = REAL(i3)
279 754 : IF (gm(3) > 1.5*stars%mx3) gm(3) = gm(3) - 3.0*stars%mx3
280 754 : th = cell%bmat(3, 3)*gm(3)*cell%z1
281 765 : stars%ufft(i3*ifft2d) = stars%ufft(i3*ifft2d) + cell%vol*inv_omtil*SIN(th)/th
282 : ENDDO
283 :
284 : ENDIF
285 :
286 80 : CALL timestop("Oldstepf")
287 80 : CALL timestart("FFT - step function")
288 :
289 : !
290 : ! --> make fft
291 : !
292 2605310 : IF (sym%invs) bfft = 0.0
293 80 : CALL cfft(stars%ufft, bfft, ifftd, 3*stars%mx1, 3*stars%mx1, +1)
294 80 : CALL cfft(stars%ufft, bfft, ifftd, 3*stars%mx2, 9*stars%mx1*stars%mx2, +1)
295 80 : CALL cfft(stars%ufft, bfft, ifftd, 3*stars%mx3, ifftd, +1)
296 :
297 80 : DEALLOCATE (bfft, icm)
298 80 : DEALLOCATE (bfft_local, ufft_local, icm_local)
299 :
300 80 : CALL timestop("FFT - step function")
301 :
302 80 : CALL writeStepfunction(stars)
303 : ENDIF ! (fmpi%irank == 0)
304 :
305 160 : END SUBROUTINE stepf
306 : END MODULE m_stepf
|