Line data Source code
1 : module m_wavefproducts_noinv
2 : USE m_types_hybdat
3 :
4 : CONTAINS
5 22 : SUBROUTINE wavefproducts_noinv(fi, ik, z_k, iq, jsp, bandoi, bandof, lapw, hybdat, mpdata, nococonv, stars, ikqpt, cmt_nk, cprod)
6 : USE m_types
7 : use m_juDFT
8 : use m_wavefproducts_aux
9 : use m_constants, only: cmplx_0
10 : IMPLICIT NONE
11 :
12 : type(t_fleurinput), intent(in) :: fi
13 : type(t_nococonv), intent(in) :: nococonv
14 : TYPE(t_lapw), INTENT(IN) :: lapw
15 : TYPE(t_mpdata), intent(in) :: mpdata
16 : TYPE(t_hybdat), INTENT(INOUT) :: hybdat
17 : type(t_mat), intent(in) :: z_k ! z_k is also z_k_p since ik < nkpt
18 : type(t_stars), intent(in) :: stars
19 : type(t_mat), intent(inout) :: cprod
20 :
21 : ! - scalars -
22 : INTEGER, INTENT(IN) :: ik, iq, jsp, bandoi, bandof
23 : INTEGER, INTENT(INOUT) :: ikqpt
24 :
25 : complex, intent(in) :: cmt_nk(:,:,:)
26 :
27 : INTEGER :: g_t(3)
28 : REAL :: kqpt(3), kqpthlp(3)
29 22 : complex, allocatable :: c_phase_kqpt(:)
30 22 : type(t_mat) :: z_kqpt_p
31 :
32 22 : call timestart("wavefproducts_noinv")
33 : ! calculate ikqpt
34 88 : kqpthlp = fi%kpts%bkf(:, ik) + fi%kpts%bkf(:, iq)
35 22 : kqpt = fi%kpts%to_first_bz(kqpthlp)
36 :
37 : ! if k+q outside of first BZ put we need this shift
38 88 : g_t = nint(kqpt - kqpthlp)
39 : ! determine number of kqpt
40 22 : ikqpt = fi%kpts%get_nk(kqpt)
41 22 : call timestart("alloc c_phase_kqpt")
42 1144 : allocate (c_phase_kqpt(hybdat%nbands(ikqpt,jsp)), source=cmplx_0)
43 22 : call timestop("alloc c_phase_kqpt")
44 :
45 22 : IF (.not. fi%kpts%is_kpt(kqpt)) then
46 0 : call juDFT_error('wavefproducts: k-point not found')
47 : endif
48 :
49 : !$acc data copyin(cprod) create(cprod%data_r) copyout(cprod%data_c)
50 : !$acc kernels
51 7792982 : cprod%data_c(:,:) = 0.0
52 : !$acc end kernels
53 : call wavefproducts_IS_FFT(fi, ik, iq, g_t, jsp, bandoi, bandof, mpdata, hybdat, lapw, stars, nococonv, &
54 22 : ikqpt, z_k, z_kqpt_p, c_phase_kqpt, cprod)
55 :
56 : call wavefproducts_noinv_MT(fi, ik, iq, bandoi, bandof, nococonv, mpdata, hybdat, &
57 22 : jsp, ikqpt, z_kqpt_p, c_phase_kqpt, cmt_nk, cprod%data_c)
58 : !$acc end data ! cprod
59 :
60 22 : call timestop("wavefproducts_noinv")
61 :
62 22 : END SUBROUTINE wavefproducts_noinv
63 :
64 88 : subroutine wavefproducts_noinv_MT(fi, ik, iq, bandoi, bandof, nococonv, mpdata, hybdat, jsp, ikqpt, &
65 88 : z_kqpt_p, c_phase_kqpt, cmt_nk, cprod)
66 : use m_types
67 : USE m_constants
68 : use m_io_hybrid
69 : use m_judft
70 : use m_wavefproducts_aux
71 : use m_calc_cmt
72 : IMPLICIT NONE
73 : type(t_fleurinput), intent(in) :: fi
74 : type(t_nococonv), intent(in) :: nococonv
75 : TYPE(t_mpdata), INTENT(IN) :: mpdata
76 : TYPE(t_hybdat), INTENT(IN) :: hybdat
77 : type(t_mat), intent(in) :: z_kqpt_p
78 : complex, intent(inout) :: cprod(:,:)
79 :
80 : ! - scalars -
81 : INTEGER, INTENT(IN) :: ik, iq, jsp, bandoi, bandof
82 : INTEGER, INTENT(IN) :: ikqpt
83 :
84 : ! - arrays -
85 : complex, intent(in) :: c_phase_kqpt(hybdat%nbands(ikqpt,jsp))
86 :
87 : complex, intent(in) :: cmt_nk(:,:,:)
88 :
89 : ! - local scalars -
90 : INTEGER :: iatm, iatm2, l, n, l1, l2, n1, n2, lm_0, lm1_0, lm2_0, lm1_cprod, lm2_cprod
91 : INTEGER :: lm, lm1, lm2, m1, m2, i, ll, j, k, ok
92 : INTEGER :: itype, ieq, m, psize, ishift, ioffset
93 :
94 : REAL :: sr2, atom_phase1, atom_phase2
95 :
96 : COMPLEX :: atom_phase, cscal, cscal2, add1, add2
97 :
98 : LOGICAL :: offdiag
99 :
100 : ! - local arrays -
101 88 : INTEGER :: lmstart(0:fi%atoms%lmaxd, fi%atoms%ntype)
102 :
103 88 : COMPLEX, allocatable :: cmt_ikqpt(:,:,:)
104 :
105 88 : call timestart("wavefproducts_noinv5 MT")
106 274940 : allocate(cmt_ikqpt(bandoi:bandof, hybdat%maxlmindx, fi%atoms%nat), stat=ok, source=cmplx_0)
107 88 : if(ok /= 0) call juDFT_error("alloc cmt_ikqpt")
108 :
109 88 : psize = bandof-bandoi+1
110 : ! lmstart = lm start index for each l-quantum number and atom type (for cmt-coefficients)
111 88 : call timestart("set lmstart")
112 220 : DO itype = 1, fi%atoms%ntype
113 1496 : DO l = 0, fi%atoms%lmax(itype)
114 12584 : lmstart(l, itype) = sum([(mpdata%num_radfun_per_l(ll, itype)*(2*ll + 1), ll=0, l - 1)])
115 : END DO
116 : END DO
117 88 : call timestop("set lmstart")
118 :
119 88 : sr2 = SQRT(2.0)
120 :
121 : ! read in cmt coefficients from direct access file cmt
122 88 : call calc_cmt(fi%atoms, fi%cell, fi%input, fi%noco, nococonv, fi%hybinp, hybdat, mpdata, fi%kpts, fi%sym, z_kqpt_p, jsp, ikqpt, c_phase_kqpt, cmt_ikqpt)
123 :
124 88 : call timestart("loop over l, l1, l2, n, n1, n2")
125 : !$acc data copyin(mpdata, mpdata%num_radbasfn, mpdata%num_radfun_per_l, mpdata%l1, mpdata%l2, mpdata%n1, mpdata%n2,&
126 : !$acc hybdat, hybdat%prodm, hybdat%nbands, hybdat%nindxp1, hybdat%gauntarr, &
127 : !$acc lmstart, cmt_nk, cmt_ikqpt)
128 :
129 88 : call timestart("gpu cpy in")
130 : !$acc wait
131 88 : call timestop("gpu cpy in")
132 :
133 88 : lm_0 = 0
134 220 : do iatm = 1,fi%atoms%nat
135 132 : itype = fi%atoms%itype(iatm)
136 528 : atom_phase = exp(-ImagUnit*tpi_const*dot_product(fi%kpts%bkf(:, iq), fi%atoms%taual(:, iatm)))
137 528 : atom_phase1 = sin(tpi_const*dot_product(fi%kpts%bkf(:, iq), fi%atoms%taual(:, iatm))) / sr2 ! This is not just the phase!!
138 528 : atom_phase2 = cos(tpi_const*dot_product(fi%kpts%bkf(:, iq), fi%atoms%taual(:, iatm))) / sr2 ! This is not just the phase!!
139 :
140 132 : iatm2 = fi%sym%invsatnr(iatm)
141 132 : IF(iatm2.EQ.0) iatm2 = iatm
142 :
143 1452 : ioffset = sum((/((2*l + 1)*mpdata%num_radbasfn(l, itype), l=0, fi%hybinp%lcutm1(itype))/))
144 :
145 :
146 220 : IF(iatm2.LT.iatm) THEN ! iatm is the second of two atoms that are mapped onto each other by inversion symmetry
147 :
148 0 : DO l = 0, fi%hybinp%lcutm1(itype)
149 0 : lm_0 = lm_0 + mpdata%num_radbasfn(l, itype)*(2*l + 1) ! go to the lm start index of the next l-quantum number
150 : END DO
151 : CYCLE
152 :
153 132 : ELSE IF(iatm2.GT.iatm) THEN ! iatm is the first of two atoms that are mapped onto each other by inversion symmetry
154 :
155 : ! The default(shared) in the OMP part of the following loop is needed to avoid compilation issues on gfortran 7.5.
156 0 : DO l = 0, fi%hybinp%lcutm1(itype)
157 : #ifdef _OPENACC
158 : !$acc data copyin(l, iatm, iatm2, itype, lm_0, bandoi, bandof, psize, atom_phase, atom_phase1, atom_phase2, ioffset, ik, jsp)
159 :
160 : !$acc parallel loop default(none) collapse(2)&
161 : !$acc present(lmstart, cmt_ikqpt, cmt_nk, cprod,&
162 : !$acc l, iatm, iatm2, itype, lm_0, bandoi, bandof, psize, atom_phase, atom_phase1, atom_phase2, ioffset, ik, jsp, &
163 : !$acc mpdata, mpdata%num_radbasfn, mpdata%num_radfun_per_l, mpdata%l1, mpdata%l2, mpdata%n1, mpdata%n2,&
164 : !$acc hybdat, hybdat%prodm, hybdat%nbands, hybdat%nindxp1, hybdat%gauntarr)&
165 : !$acc private(k,j,n,i,l1, l2, n1, n2, offdiag, lm, m, cscal, cscal2, add1, add2, ishift, lm1, m1, m2, lm2, lm1_cprod, lm2_cprod)
166 : #else
167 : !$OMP PARALLEL DO default(shared) collapse(2) schedule(dynamic) &
168 : !$OMP private(k,j,n, n1, l1, n2, l2, offdiag, lm1_0, lm2_0, lm, m, cscal, cscal2, add1, add2, ishift, lm1, m1, m2, lm2, i, lm1_cprod, lm2_cprod)&
169 : !$OMP shared(hybdat, bandoi, bandof, lmstart, lm_0, mpdata, cmt_ikqpt, cmt_nk, cprod, itype, l) &
170 0 : !$OMP shared(iatm, iatm2, psize, atom_phase, atom_phase1, atom_phase2, ioffset, ik)
171 : #endif
172 : do k = 1, hybdat%nbands(ik,jsp) !This loop covers all bands that are to be used for the hybrid functionals calculation
173 : do j = bandoi, bandof ! This loop only covers occupied bands
174 : !$acc loop seq
175 : DO n = 1, hybdat%nindxp1(l, itype) ! loop over basis-function products
176 : ! don't call object funcktions in acc
177 : l1 = mpdata%l1(n, l, itype) !
178 : l2 = mpdata%l2(n, l, itype) ! current basis-function mpdatauct
179 : n1 = mpdata%n1(n, l, itype) ! = bas(:,n1,l1,itype)*bas(:,n2,l2,itype) = b1*b2
180 : n2 = mpdata%n2(n, l, itype) !
181 :
182 : IF (mod(l1 + l2 + l, 2) == 0) THEN
183 : offdiag = (l1 /= l2) .or. (n1 /= n2) ! offdiag=true means that b1*b2 and b2*b1 are different combinations
184 : !(leading to the same basis-function product)
185 :
186 : lm1_0 = lmstart(l1, itype) ! start at correct lm index of cmt-coefficients
187 : lm2_0 = lmstart(l2, itype) ! (corresponding to l1 and l2)
188 :
189 : lm = lm_0
190 :
191 : !$acc loop seq
192 : DO m = -l, l
193 :
194 : ishift = -2 * m * mpdata%num_radbasfn(l, itype)
195 : lm1_cprod = lm + (iatm - fi%atoms%firstAtom(itype))*ioffset
196 : lm2_cprod = lm + (iatm2 - fi%atoms%firstAtom(itype))*ioffset + ishift
197 :
198 : cscal = 0.0
199 : cscal2 = 0.0
200 :
201 : lm1 = lm1_0 + n1 ! go to lm index for m1=-l1
202 : !$acc loop seq
203 : DO m1 = -l1, l1
204 : m2 = m1 + m ! Gaunt condition -m1+m2-m=0
205 :
206 : IF (abs(m2) <= l2) THEN
207 : lm2 = lm2_0 + n2 + (m2 + l2)*mpdata%num_radfun_per_l(l2, itype)
208 : IF (abs(hybdat%gauntarr(1, l1, l2, l, m1, m)) > 1e-12) THEN
209 : cscal = cscal + hybdat%gauntarr(1, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm2, iatm)) * REAL(conjg(cmt_nk(k, lm1, iatm))) + &
210 : hybdat%gauntarr(1, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm2, iatm2)) * REAL(conjg(cmt_nk(k, lm1, iatm2)))
211 : cscal2 = cscal2 + hybdat%gauntarr(1, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm2, iatm2)) * REAL(conjg(cmt_nk(k, lm1, iatm))) - &
212 : hybdat%gauntarr(1, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm2, iatm)) * REAL(conjg(cmt_nk(k, lm1, iatm2)))
213 : END IF
214 : END IF
215 :
216 : m2 = m1 - m ! switch role of b1 and b2
217 : IF (abs(m2) <= l2 .and. offdiag) THEN
218 : lm2 = lm2_0 + n2 + (m2 + l2)*mpdata%num_radfun_per_l(l2, itype)
219 : IF (abs(hybdat%gauntarr(2, l1, l2, l, m1, m)) > 1e-12) THEN
220 : cscal = cscal + hybdat%gauntarr(2, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm1, iatm)) * REAL(conjg(cmt_nk(k, lm2, iatm))) + &
221 : hybdat%gauntarr(2, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm1, iatm2)) * REAL(conjg(cmt_nk(k, lm2, iatm2)))
222 : cscal2 = cscal2 + hybdat%gauntarr(2, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm1, iatm2)) * REAL(conjg(cmt_nk(k, lm2, iatm))) - &
223 : hybdat%gauntarr(2, l1, l2, l, m1, m) * REAL(cmt_ikqpt(j, lm1, iatm)) * REAL(conjg(cmt_nk(k, lm2, iatm2)))
224 : END IF
225 : END IF
226 :
227 : lm1 = lm1 + mpdata%num_radfun_per_l(l1, itype) ! go to lm start index for next m1-quantum number
228 :
229 : END DO !m1
230 :
231 : add1 = cscal * atom_phase2 + cscal2 * atom_phase1
232 : add2 = cscal2 * atom_phase2 - cscal * atom_phase1
233 :
234 : !$acc loop seq
235 : DO i = 1, mpdata%num_radbasfn(l, itype)
236 : cprod(i + lm1_cprod, (j-bandoi+1) + (k-1)*psize) = cprod(i + lm1_cprod, (j-bandoi+1) + (k-1)*psize) + hybdat%prodm(i, n, l, itype) * add1
237 : cprod(i + lm2_cprod, (j-bandoi+1) + (k-1)*psize) = cprod(i + lm2_cprod, (j-bandoi+1) + (k-1)*psize) + hybdat%prodm(i, n, l, itype) * add2
238 : ENDDO
239 : lm = lm + mpdata%num_radbasfn(l, itype)
240 : END DO ! m
241 : ENDIF
242 : END DO !n
243 : enddo !j
244 : enddo !k
245 : #ifdef _OPENACC
246 : !$acc end parallel loop
247 :
248 : !$acc end data
249 : #else
250 : !$OMP END PARALLEL DO
251 : #endif
252 0 : lm_0 = lm_0 + mpdata%num_radbasfn(l, itype)*(2*l + 1) ! go to the lm start index of the next l-quantum number
253 : END DO ! l loop
254 :
255 : ELSE ! (no inversion-symmetry mapping of atoms)
256 :
257 : ! The default(shared) in the OMP part of the following loop is needed to avoid compilation issues on gfortran 7.5.
258 792 : DO l = 0, fi%hybinp%lcutm1(itype)
259 : #ifdef _OPENACC
260 : !$acc data copyin(l, iatm, itype, lm_0, bandoi, bandof, psize, atom_phase, ik, jsp)
261 :
262 : !$acc parallel loop default(none) collapse(2)&
263 : !$acc present(lmstart, cmt_ikqpt, cmt_nk, cprod,&
264 : !$acc l, iatm, itype, lm_0, bandoi, bandof, psize, atom_phase, ik, jsp, &
265 : !$acc mpdata, mpdata%num_radbasfn, mpdata%num_radfun_per_l, mpdata%l1, mpdata%l2, mpdata%n1, mpdata%n2,&
266 : !$acc hybdat, hybdat%prodm, hybdat%nbands, hybdat%nindxp1, hybdat%gauntarr)&
267 : !$acc private(k,j,n,i,l1, l2, n1, n2, offdiag, lm, m, cscal, lm1, m1, m2, lm2)
268 : #else
269 : !$OMP PARALLEL DO default(shared) collapse(2) schedule(dynamic) &
270 : !$OMP private(k,j,n, n1, l1, n2, l2, offdiag, lm1_0, lm2_0, lm, m, cscal, lm1, m1, m2, lm2, i)&
271 : !$OMP shared(hybdat, bandoi, bandof, lmstart, lm_0, mpdata, cmt_ikqpt, cmt_nk, cprod, itype, l) &
272 660 : !$OMP shared(iatm, psize, atom_phase, ik)
273 : #endif
274 : do k = 1, hybdat%nbands(ik,jsp)
275 : do j = bandoi, bandof
276 : !$acc loop seq
277 : DO n = 1, hybdat%nindxp1(l, itype) ! loop over basis-function products
278 : ! don't call object funcktions in acc
279 : l1 = mpdata%l1(n, l, itype) !
280 : l2 = mpdata%l2(n, l, itype) ! current basis-function mpdatauct
281 : n1 = mpdata%n1(n, l, itype) ! = bas(:,n1,l1,itype)*bas(:,n2,l2,itype) = b1*b2
282 : n2 = mpdata%n2(n, l, itype) !
283 :
284 : IF (mod(l1 + l2 + l, 2) == 0) THEN
285 : offdiag = (l1 /= l2) .or. (n1 /= n2) ! offdiag=true means that b1*b2 and b2*b1 are different combinations
286 : !(leading to the same basis-function product)
287 :
288 : lm1_0 = lmstart(l1, itype) ! start at correct lm index of cmt-coefficients
289 : lm2_0 = lmstart(l2, itype) ! (corresponding to l1 and l2)
290 :
291 : lm = lm_0
292 : !$acc loop seq
293 : DO m = -l, l
294 : cscal = 0.0
295 :
296 : lm1 = lm1_0 + n1 ! go to lm index for m1=-l1
297 : !$acc loop seq
298 : DO m1 = -l1, l1
299 : m2 = m1 + m ! Gaunt condition -m1+m2-m=0
300 :
301 : IF (abs(m2) <= l2) THEN
302 : lm2 = lm2_0 + n2 + (m2 + l2)*mpdata%num_radfun_per_l(l2, itype)
303 : IF (abs(hybdat%gauntarr(1, l1, l2, l, m1, m)) > 1e-12) THEN
304 : cscal = cscal + hybdat%gauntarr(1, l1, l2, l, m1, m) * cmt_ikqpt(j, lm2, iatm) * conjg(cmt_nk(k, lm1, iatm))
305 : END IF
306 : END IF
307 :
308 : m2 = m1 - m ! switch role of b1 and b2
309 : IF (abs(m2) <= l2 .and. offdiag) THEN
310 : lm2 = lm2_0 + n2 + (m2 + l2)*mpdata%num_radfun_per_l(l2, itype)
311 : IF (abs(hybdat%gauntarr(2, l1, l2, l, m1, m)) > 1e-12) THEN
312 : cscal = cscal + hybdat%gauntarr(2, l1, l2, l, m1, m) * cmt_ikqpt(j, lm1, iatm) * conjg(cmt_nk(k, lm2, iatm))
313 : END IF
314 : END IF
315 :
316 : lm1 = lm1 + mpdata%num_radfun_per_l(l1, itype) ! go to lm start index for next m1-quantum number
317 :
318 : END DO !m1
319 :
320 : lm = lm_0 + (m + l)*mpdata%num_radbasfn(l, itype)
321 : !$acc loop seq
322 : DO i = 1, mpdata%num_radbasfn(l, itype)
323 : cprod(i + lm, (j-bandoi+1) + (k-1)*psize) = cprod(i + lm, (j-bandoi+1) + (k-1)*psize) + hybdat%prodm(i, n, l, itype)*cscal*atom_phase
324 : ENDDO
325 : END DO
326 : ENDIF
327 : END DO !n
328 : enddo !j
329 : enddo !k
330 : #ifdef _OPENACC
331 : !$acc end parallel loop
332 :
333 : !$acc end data
334 : #else
335 : !$OMP END PARALLEL DO
336 : #endif
337 792 : lm_0 = lm_0 + mpdata%num_radbasfn(l, itype)*(2*l + 1) ! go to the lm start index of the next l-quantum number
338 : END DO ! l loop
339 : END IF
340 : END DO
341 : !$acc end data
342 88 : deallocate(cmt_ikqpt)
343 :
344 88 : call timestop("loop over l, l1, l2, n, n1, n2")
345 88 : call timestop("wavefproducts_noinv5 MT")
346 88 : end subroutine wavefproducts_noinv_MT
347 : end module m_wavefproducts_noinv
|