Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 :
7 : ! Calculates the HF exchange term
8 : !
9 : ! s s* s s*
10 : ! phi (r) phi (r) phi (r') phi (r')
11 : ! occ. n_1k n'k+q n'k+q n_2k
12 : ! exchange(n,q) = - SUM INT INT ------------------------------------------- dr dr'
13 : ! k,n' | r - r' |
14 : !
15 : ! occ s s ~ ~ s s
16 : ! = - SUM SUM v < phi | phi M > < M phi | phi >
17 : ! k,n' I,J k,IJ n'k+q n_1k q,I q,J n_2k n'k+q
18 : !
19 : ! for the different combinations of n_1 and n_2 and where n' runs only over the valence states.
20 : ! ( n_1,n_2: valence-valence, core-core,core-valence )
21 : !
22 : !
23 : ! At the Gamma point (k=0) v diverges. After diagonalization of v at k=0 the divergence is
24 : ! restricted to the head element I=1. Furthermore, we expand <...> with kp perturbation theory.
25 : ! As a result, the total I=1 element is given by a sum of a divergent 1/k**2-term and an
26 : ! angular dependent term. The former is separated from the numerical k-summation and treated
27 : ! analytically while the latter is spherically averaged and added to the k=0 contribution of
28 : ! the numerical k-summation. (A better knowledge of the integrand's behavior at the BZ edges
29 : ! might further improve the integration.)
30 : !
31 : ! The divergence at the Gamma point is integrated with one of the following algorithms:
32 : ! (1) Switching-Off Function
33 : ! In a sphere of radius k0=radshmin/2 a switching-off function g(k)=1-(k/k0)**n*(n+1-n*k/k0)
34 : ! (n=npot) is defined. The 1/k**2 divergence is subtracted from the BZ integral in the form
35 : ! g(k)/k**2 and integrated analytically. The non-divergent rest is integrated numerically.
36 : ! (2) Periodic Function (similar to the one used by Massidda PRB 48, 5058)
37 : ! The function F(k) = SUM(G) exp(-expo*|k+G|**3) / |k+G|**2 is subtracted from the BZ integral
38 : ! and integrated analytically. The non-divergent rest is integrated numerically.
39 : ! The parameter expo is chosen such that exp(-expo*q**3)=1/2
40 : ! with q = radius of sphere with same volume as BZ.
41 : ! (3) Periodic Function (same as Massidda's) with expo->0
42 : ! The function F(k) = lim(expo->0) SUM(G) exp(-expo*|k+G|**2) / |k+G|**2 is subtracted from
43 : ! the BZ integral and integrated analytically. The contribution to the BZ integral including
44 : ! the "tail" is
45 : ! vol/(8*pi**3) INT F(k) d^3k - P SUM(k) F(k) ( P = principal value ) .
46 : ! For expo->0 the two terms diverge. Therefore a cutoff radius q0 is introduced and related to
47 : ! expo by exp(-expo*q0**2)=delta ( delta = small value, e.g., delta = 1*10.0**-10 ) .
48 : ! The resulting formula
49 : ! vol/(4*pi**1.5*sqrt(expo)) * erf(sqrt(a)*q0) - sum(q,0<q<q0) exp(-expo*q**2)/q**2
50 : ! converges well with q0. (Should be the default.)
51 :
52 : MODULE m_exchange_valence_hf
53 :
54 : USE m_constants
55 : USE m_types
56 : USE m_util
57 : use m_matmul_dgemm
58 : LOGICAL, PARAMETER:: zero_order = .false., ibs_corr = .false.
59 :
60 : CONTAINS
61 72 : SUBROUTINE exchange_valence_hf(k_pack, fi, fmpi, z_k, mpdata, jsp, hybdat, lapw, eig_irr, results, &
62 24 : n_q, wl_iks, xcpot, nococonv, stars, nsest, indx_sest, cmt_nk, mat_ex)
63 :
64 : USE m_wrapper
65 : USE m_trafo
66 : USE m_wavefproducts
67 : USE m_olap
68 : USE m_hsefunctional
69 : USE m_io_hybrid
70 : USE m_kp_perturbation
71 : use m_spmm_inv
72 : use m_spmm_noinv
73 : use m_work_package
74 : use m_judft
75 : #ifdef CPP_MPI
76 : use mpi
77 : #endif
78 : #ifdef _OPENACC
79 : USE cublas
80 : #define CPP_zgemm cublaszgemm
81 : #define CPP_dgemm cublasdgemm
82 : #else
83 : #define CPP_zgemm zgemm
84 : #define CPP_dgemm dgemm
85 : #endif
86 : IMPLICIT NONE
87 :
88 : type(t_k_package), intent(in) :: k_pack
89 : type(t_fleurinput), intent(in) :: fi
90 : TYPE(t_mpi), INTENT(IN) :: fmpi
91 : type(t_mat), intent(in) :: z_k
92 : TYPE(t_results), INTENT(IN) :: results
93 : TYPE(t_xcpot_inbuild), INTENT(IN) :: xcpot
94 : TYPE(t_mpdata), intent(inout) :: mpdata
95 : TYPE(t_nococonv), INTENT(IN) :: nococonv
96 : TYPE(t_lapw), INTENT(IN) :: lapw
97 : type(t_stars), intent(in) :: stars
98 : TYPE(t_mat), INTENT(INOUT) :: mat_ex
99 : TYPE(t_hybdat), INTENT(INOUT) :: hybdat
100 :
101 : ! scalars
102 : INTEGER, INTENT(IN) :: jsp
103 :
104 : ! arrays
105 : INTEGER, INTENT(IN) :: n_q(:)
106 : INTEGER, INTENT(IN) :: nsest(:)
107 : INTEGER, INTENT(IN) :: indx_sest(:, :)
108 :
109 : complex, intent(in) :: cmt_nk(:,:,:)
110 :
111 : REAL, INTENT(IN) :: eig_irr(:, :)
112 : REAL, INTENT(IN) :: wl_iks(:, :)
113 :
114 : ! local scalars
115 : INTEGER :: iband, iband1, jq, iq, nq_idx
116 : INTEGER :: i, ierr, ik
117 : INTEGER :: j, iq_p, start, stride
118 : INTEGER :: n1, n2, nn2, me, max_band_pack
119 : INTEGER :: ikqpt, iob, m, n, k, lda, ldb, ldc
120 : INTEGER :: ok, psize, n_parts, ipart, ibando
121 :
122 : REAL, SAVE :: divergence
123 :
124 : COMPLEX :: cdum2
125 : COMPLEX :: exch0
126 :
127 : LOGICAL, SAVE :: initialize = .true.
128 :
129 : ! local arrays
130 24 : COMPLEX :: exchcorrect(fi%kpts%nkptf)
131 24 : COMPLEX, allocatable :: dcprod(:,:,:) ! (hybdat%nbands(k_pack%nk,jsp), hybdat%nbands(k_pack%nk,jsp), 3)
132 24 : COMPLEX, allocatable :: exch_vv(:,:) !(hybdat%nbands(k_pack%nk,jsp), hybdat%nbands(k_pack%nk,jsp))
133 : COMPLEX :: hessian(3, 3)
134 24 : COMPLEX :: proj_ibsc(3, MAXVAL(hybdat%nobd(:, jsp)), hybdat%nbands(k_pack%nk,jsp))
135 24 : COMPLEX :: olap_ibsc(3, 3, MAXVAL(hybdat%nobd(:, jsp)), MAXVAL(hybdat%nobd(:, jsp)))
136 24 : COMPLEX, ALLOCATABLE :: phase_vv(:, :), c_coul_wavf(:,:), dot_result_c(:,:)
137 24 : REAL, ALLOCATABLE :: r_coul_wavf(:,:), dot_result_r(:,:)
138 24 : LOGICAL :: occup(fi%input%neig), conjg_mtir
139 :
140 : #define CPP_cprod_r cprod_vv%data_r
141 : #define CPP_cprod_c cprod_vv%data_c
142 :
143 24 : type(t_mat) :: cprod_vv, carr3_vv
144 24 : CALL timestart("valence exchange calculation")
145 24 : ik = k_pack%nk
146 :
147 24 : IF (initialize) THEN !it .eq. 1 .and. ik .eq. 1) THEN
148 6 : call calc_divergence(fi%cell, fi%kpts, divergence)
149 6 : if(fmpi%irank == 0) write (*,*) "Divergence:", divergence
150 6 : initialize = .false.
151 : END IF
152 :
153 : ! calculate valence-valence-valence-valence, core-valence-valence-valence
154 : ! and core-valence-valence-core exchange at current k-point
155 : ! the sum over the inner occupied valence states is restricted to the EIBZ(k)
156 : ! the contribution of the Gamma-point is treated separately (see below)
157 :
158 24 : call timestart("alloc phase_vv & dot_res")
159 24 : if(mat_ex%l_real) then
160 50976 : allocate(dot_result_r(hybdat%nbands(ik,jsp), hybdat%nbands(ik,jsp)), stat=ierr, source=0.0)
161 9322 : allocate (phase_vv(MAXVAL(hybdat%nobd(:, jsp)), hybdat%nbands(ik,jsp)), stat=ok, source=cmplx_0)
162 : else
163 14724 : allocate(dot_result_c(hybdat%nbands(ik,jsp), hybdat%nbands(ik,jsp)), stat=ierr, source=cmplx_0)
164 6 : allocate (phase_vv(0,0), stat=ok, source=cmplx_0)
165 : endif
166 24 : IF (ok /= 0) call judft_error('exchange_val_hf: error allocation phase')
167 24 : if(ierr /= 0) call judft_error("can't alloc dot_res")
168 :
169 65700 : allocate(exch_vv(hybdat%nbands(ik,jsp), hybdat%nbands(ik,jsp)), stat=ierr, source=cmplx_0)
170 24 : if(ierr /= 0) call judft_error("Can't alloc exch_vv")
171 :
172 :
173 : !$acc data copyout(exch_vv) copyin(hybdat, hybdat%nbands, hybdat%nbasm, nsest, indx_sest)
174 : !$acc kernels present(exch_vv) default(none)
175 65628 : exch_vv = 0
176 : !$acc end kernels
177 :
178 24 : call timestop("alloc phase_vv & dot_res")
179 :
180 24 : call timestart("q_loop")
181 :
182 112 : DO jq = 1, size(k_pack%q_packs)
183 88 : call timestart("initial setup")
184 88 : iq = k_pack%q_packs(jq)%ptr
185 88 : iq_p = fi%kpts%bkp(iq)
186 :
187 440 : ikqpt = fi%kpts%get_nk(fi%kpts%to_first_bz(fi%kpts%bkf(:, ik) + fi%kpts%bkf(:, iq)))
188 :
189 88 : n_parts = size(k_pack%q_packs(jq)%band_packs)
190 88 : start = k_pack%q_packs(jq)%submpi%rank + 1
191 88 : stride = k_pack%q_packs(jq)%submpi%size
192 88 : call timestop("initial setup")
193 200 : do ipart = start, n_parts, stride
194 : !if (n_parts > 1) write (*, *) "Part ("//int2str(ipart)//"/"//int2str(n_parts)//") ik= "//int2str(ik)//" jq= "//int2str(jq)
195 88 : psize = k_pack%q_packs(jq)%band_packs(ipart)%psize
196 88 : ibando = k_pack%q_packs(jq)%band_packs(ipart)%start_idx
197 88 : call timestart("alloc & copy")
198 88 : call cprod_vv%alloc(mat_ex%l_real, hybdat%nbasm(iq), psize*hybdat%nbands(ik,jsp), mat_name="cprod_vv")
199 : call alloc_dev_cpy(cprod_vv, CPP_cprod_r, CPP_cprod_c)
200 88 : call timestop("alloc & copy")
201 88 : IF (mat_ex%l_real) THEN
202 : CALL wavefproducts_inv(fi, ik, z_k, iq, jsp, ibando, ibando + psize - 1, lapw, &
203 66 : hybdat, mpdata, nococonv, stars, ikqpt, cmt_nk, cprod_vv)
204 : ELSE
205 : CALL wavefproducts_noinv(fi, ik, z_k, iq, jsp, ibando, ibando + psize - 1, lapw,&
206 22 : hybdat, mpdata, nococonv, stars, ikqpt, cmt_nk, cprod_vv)
207 : END IF
208 : ! The sparse matrix technique is not feasible for the HSE
209 : ! functional. Thus, a dynamic adjustment is implemented
210 : ! The mixed basis functions and the potential difference
211 : ! are Fourier transformed, so that the exchange can be calculated
212 : ! in Fourier space
213 : !! REIMPLEMENTING (notes in lab book)
214 88 : IF (xcpot%is_name("hse") .OR. xcpot%is_name("vhse")) THEN
215 0 : CALL timestart("hse: dynamic hse adjustment")
216 0 : iband1 = hybdat%nobd(ikqpt, jsp)
217 : exch_vv = exch_vv + &
218 : dynamic_hse_adjustment(fi%atoms, fi%kpts%bkf(:, iq), iq, &
219 : fi%kpts%nkptf, fi%cell%bmat, fi%cell%omtil, &
220 : fi%hybinp%lcutm1, maxval(fi%hybinp%lcutm1), mpdata%num_radbasfn, maxval(mpdata%num_radbasfn), mpdata%g, &
221 : mpdata%n_g(iq), mpdata%gptm_ptr(:, iq), mpdata%num_gpts(), mpdata%radbasfn_mt, &
222 : hybdat%nbasm(iq), iband1, hybdat%nbands(ik,jsp), nsest, ibando, psize, indx_sest, &
223 : fi%sym, fmpi%irank, cprod_vv%data_r(:, :), &
224 0 : cprod_vv%data_c(:, :), mat_ex%l_real, wl_iks(:iband1, ikqpt), n_q(jq))
225 0 : CALL timestop("hse: dynamic hse adjustment")
226 : END IF
227 :
228 : ! the Coulomb matrix is only evaluated at the irrecuible k-points
229 : ! bra_trafo transforms cprod instead of rotating the Coulomb matrix
230 : ! from IBZ to current k-point
231 :
232 88 : call timestart("bra_trafo stuff")
233 88 : IF (fi%kpts%bkp(iq) /= iq) THEN
234 16 : call carr3_vv%init(cprod_vv, mat_name="carr_3")
235 16 : call bra_trafo(fi, mpdata, hybdat, hybdat%nbands(ik,jsp), iq, psize, phase_vv, cprod_vv, carr3_vv)
236 16 : call cprod_vv%copy(carr3_vv, 1, 1)
237 16 : call carr3_vv%free()
238 : ELSE
239 27390 : phase_vv(:, :) = cmplx_1
240 : END IF
241 88 : call timestop("bra_trafo stuff")
242 :
243 88 : call timestart("alloc coul_wavf")
244 88 : if(cprod_vv%l_real) then
245 264 : allocate(r_coul_wavf(cprod_vv%matsize1, cprod_vv%matsize2), stat=ierr)
246 66 : allocate(c_coul_wavf(0,0))
247 : else
248 88 : allocate(c_coul_wavf(cprod_vv%matsize1, cprod_vv%matsize2), stat=ierr)
249 22 : allocate(r_coul_wavf(0,0))
250 : endif
251 88 : if(ierr /= 0) call judft_error("can't alloc coul_wavf")
252 8548222 : r_coul_wavf = 0.0
253 88 : call timestop("alloc coul_wavf")
254 :
255 88 : call timestart("exchange matrix")
256 88 : call timestart("sparse matrix products")
257 88 : IF (mat_ex%l_real) THEN
258 66 : call spmm_invs(fi, mpdata, hybdat, iq_p, cprod_vv%data_r, r_coul_wavf)
259 : ELSE
260 22 : conjg_mtir = (fi%kpts%bksym(iq) > fi%sym%nop)
261 22 : call spmm_noinvs(fi, mpdata, hybdat, iq_p, conjg_mtir, cprod_vv%data_c, c_coul_wavf)
262 : END IF
263 88 : call timestop("sparse matrix products")
264 :
265 : !$acc enter data copyin(phase_vv, r_coul_wavf, c_coul_wavf)
266 88 : nq_idx = k_pack%q_packs(jq)%rank
267 :
268 :
269 88 : call timestart("apply prefactors carr1_v")
270 : !$acc data copyin(psize, wl_iks, n_q, nq_idx, ibando, ikqpt)
271 88 : if (mat_ex%l_real) then
272 : #ifdef _OPENACC
273 : call timestart("cpy cprod")
274 : call dlacpy("N", size(cprod_vv%data_r, 1), size(cprod_vv%data_r, 2), cprod_vv%data_r, size(cprod_vv%data_r, 1), CPP_cprod_r, size(CPP_cprod_r,1))
275 : call timestop("cpy cprod")
276 : #endif
277 : !$acc enter data copyin(CPP_cprod_r)
278 :
279 : !$acc parallel loop default(none) collapse(3) private(iband, iob, i)&
280 : !$acc present(r_coul_wavf, hybdat, hybdat%nbands, hybdat%nbasm, psize, wl_iks, ikqpt, ibando)&
281 : !$acc present(n_q, nq_idx)
282 3496 : DO iband = 1, hybdat%nbands(ik,jsp)
283 31286 : DO iob = 1, psize
284 8551564 : do i = 1, hybdat%nbasm(iq)
285 8548134 : r_coul_wavf(i, iob + psize*(iband - 1)) = r_coul_wavf(i, iob + psize*(iband - 1)) * wl_iks(ibando + iob - 1, ikqpt) / n_q(nq_idx)
286 : enddo
287 : enddo
288 : enddo
289 : !$acc end parallel loop
290 :
291 : else
292 : #ifdef _OPENACC
293 : call timestart("cpy cprod")
294 : call zlacpy("N", size(cprod_vv%data_c, 1), size(cprod_vv%data_c, 2), cprod_vv%data_c, size(cprod_vv%data_c, 1), CPP_cprod_c, size(CPP_cprod_c,1))
295 : call timestop("cpy cprod")
296 : #endif
297 : !$acc enter data copyin(CPP_cprod_c)
298 :
299 : !$acc parallel loop default(none) collapse(3) private(iband, iob, i)&
300 : !$acc present(c_coul_wavf, hybdat, hybdat%nbands, hybdat%nbasm, psize, wl_iks, ikqpt, ibando)&
301 : !$acc present(n_q, nq_idx)
302 1100 : DO iband = 1, hybdat%nbands(ik,jsp)
303 16192 : DO iob = 1, psize
304 7794038 : do i = 1, hybdat%nbasm(iq)
305 7792960 : c_coul_wavf(i, iob + psize*(iband - 1)) = c_coul_wavf(i, iob + psize*(iband - 1)) * wl_iks(ibando + iob - 1, ikqpt)/n_q(nq_idx)
306 : enddo
307 : enddo
308 : enddo
309 : !$acc end parallel loop
310 : endif
311 : !$acc end data ! (psize, wl_iks, n_q, nq_idx, ibando, ikqpt)
312 88 : call timestop("apply prefactors carr1_v")
313 :
314 88 : call timestart("exch_vv dot prod")
315 88 : m = hybdat%nbands(ik,jsp)
316 88 : n = hybdat%nbands(ik,jsp)
317 88 : k = hybdat%nbasm(iq)
318 88 : lda = hybdat%nbasm(iq)*psize
319 88 : ldb = hybdat%nbasm(iq)*psize
320 88 : ldc = hybdat%nbands(ik,jsp)
321 :
322 88 : IF (mat_ex%l_real) THEN
323 : !calculate all dotproducts for the current iob -> need to skip intermediate iob
324 : !$acc enter data create(dot_result_r)
325 600 : DO iob = 1, psize
326 534 : call timestart("CPP_dgemm")
327 : !call blas_matmul(m,n,k,r_coul_wavf(:,iob:),CPP_cprod_r(:, iob:),dot_result_r,op_a="T")
328 : ASSOCIATE(prod_data=>CPP_cprod_r) !persuade NVHPC that it knows CPP_CPROD_r
329 : !$acc host_data use_device(r_coul_wavf, prod_data, dot_result_r)
330 534 : call CPP_dgemm("T", "N", m, n, k, 1.0, r_coul_wavf(1, iob), lda, prod_data(1, iob), ldb, 0.0, dot_result_r , ldc)
331 : !$acc end host_data
332 : end ASSOCIATE
333 : !$acc wait
334 534 : call timestop("CPP_dgemm")
335 :
336 : !$acc kernels present(exch_vv, dot_result_r, phase_vv, hybdat, hybdat%nbands, nsest, indx_sest) default(none)
337 28390 : DO iband = 1, hybdat%nbands(ik,jsp)
338 173450 : DO n2 = 1, nsest(iband)
339 145126 : nn2 = indx_sest(n2, iband)
340 172916 : exch_vv(nn2, iband) = exch_vv(nn2, iband) + phase_vv(iob, nn2)*conjg(phase_vv(iob, iband))*dot_result_r(iband, nn2)
341 : enddo
342 : END DO
343 : !$acc end kernels
344 : END DO
345 : !$acc exit data delete(CPP_cprod_r, dot_result_r)
346 : ELSE
347 : !calculate all dotproducts for the current iob -> need to skip intermediate iob
348 : !$acc enter data create(dot_result_c)
349 330 : DO iob = 1, psize
350 308 : call timestart("CPP_zgemm")
351 : ASSOCIATE(prod_data=>CPP_cprod_c) !persuade NVHPC that it knows CPP_CPROD_C
352 : !$acc host_data use_device(c_coul_wavf, prod_data, dot_result_c)
353 308 : call CPP_zgemm("C", "N", m, n, k, cmplx_1, c_coul_wavf(1, iob), lda, prod_data(1, iob), ldb, cmplx_0, dot_result_c, ldc)
354 : !$acc end host_data
355 : end ASSOCIATE
356 : !$acc wait
357 308 : call timestop("CPP_zgemm")
358 :
359 : !$acc kernels present(exch_vv, dot_result_c, hybdat, hybdat%nbands, nsest, indx_sest) default(none)
360 15422 : DO iband = 1, hybdat%nbands(ik,jsp)
361 171444 : DO n2 = 1, nsest(iband)
362 156044 : nn2 = indx_sest(n2, iband)
363 171136 : exch_vv(nn2, iband) = exch_vv(nn2, iband) + dot_result_c(iband, nn2)
364 : enddo
365 : END DO
366 : !$acc end kernels
367 : enddo
368 : !$acc exit data delete(CPP_cprod_c, dot_result_c)
369 : END IF
370 : !$acc exit data delete(phase_vv, r_coul_wavf, c_coul_wavf)
371 88 : call timestop("exch_vv dot prod")
372 88 : call timestop("exchange matrix")
373 :
374 88 : call cprod_vv%free()
375 88 : if(allocated(r_coul_wavf)) deallocate(r_coul_wavf)
376 176 : if(allocated(c_coul_wavf)) deallocate(c_coul_wavf)
377 : enddo
378 : END DO !jq
379 : !$acc end data ! exch_vv hybdat, hybdat%nbands, hybdat%nbasm, nsest, indx_sest
380 24 : call timestop("q_loop")
381 :
382 24 : if(allocated(dot_result_r)) deallocate(dot_result_r)
383 24 : if(allocated(dot_result_c)) deallocate(dot_result_c)
384 :
385 : ! add contribution of the gamma point to the different cases (exch_vv,exch_cv,exch_cc)
386 :
387 : ! valence-valence-valence-valence exchange
388 24 : call timestart("gamma point treatment")
389 : IF ((.not. xcpot%is_name("hse")) .AND. &
390 24 : (.not. xcpot%is_name("vhse")) .AND. &
391 : k_pack%submpi%root()) THEN ! no gamma point correction needed for HSE functional
392 :
393 : IF (zero_order .and. .not. ibs_corr) THEN
394 : WRITE (oUnit, '(A)') ' Take zero order terms into account.'
395 : ELSE IF (zero_order .and. ibs_corr) THEN
396 : WRITE (oUnit, '(A)') ' Take zero order terms and ibs-correction into account.'
397 : END IF
398 :
399 : IF (zero_order) THEN
400 : CALL dwavefproducts(dcprod, ik, 1, hybdat%nbands(ik,jsp), 1, hybdat%nbands(ik,jsp), .false., fi%input, fi%atoms, mpdata, fi%hybinp, &
401 : fi%cell, hybdat, fi%kpts, fi%sym, fi%noco, nococonv, lapw, jsp, eig_irr)
402 :
403 : ! make dcprod hermitian
404 : DO n1 = 1, hybdat%nbands(ik,jsp)
405 : DO n2 = 1, n1
406 : dcprod(n1, n2, :) = (dcprod(n1, n2, :) - conjg(dcprod(n2, n1, :)))/2
407 : dcprod(n2, n1, :) = -conjg(dcprod(n1, n2, :))
408 : END DO
409 : END DO
410 :
411 : IF (ibs_corr) THEN
412 : CALL ibs_correction(ik, fi%atoms, fi%input, jsp, hybdat, mpdata, fi%hybinp, &
413 : lapw, fi%kpts, fi%cell, MAXVAL(hybdat%nobd(:, jsp)), &
414 : fi%sym, fi%noco, nococonv, proj_ibsc, olap_ibsc)
415 : END IF
416 : END IF
417 :
418 : !This should be done with w_iks I guess!TODO
419 1644 : occup = .false.
420 1644 : DO i = 1, results%neig(ik, jsp)
421 1644 : IF (results%ef >= eig_irr(i, ik)) THEN
422 230 : occup(i) = .true.
423 1390 : ELSE IF ((eig_irr(i, ik) - results%ef) <= 1E-06) THEN
424 0 : occup(i) = .true.
425 : END IF
426 : END DO
427 :
428 1252 : DO n1 = 1, hybdat%nbands(ik,jsp)
429 9260 : DO n2 = 1, nsest(n1)!n1
430 8008 : nn2 = indx_sest(n2, n1)
431 72072 : exchcorrect = 0
432 8008 : exch0 = 0
433 :
434 : ! if zero_order = .true. add averaged k-dependent term to the numerical integration at Gamma-point contribution
435 :
436 : ! if we start with a system with a small DFT band gap (like GaAs), the contribution
437 : ! of the highest occupied and lowest unoccupied state in Hessian is typically
438 : ! large; a correct numerical integration requires a dense k-point mesh, so
439 : ! we don't add the contribution exchcorrect for such materials
440 :
441 : IF (zero_order) THEN
442 : hessian = 0
443 : IF (occup(n1) .and. occup(nn2)) THEN
444 : DO i = 1, 3
445 : j = i
446 : DO iband = 1, hybdat%nbands(ik,jsp)
447 : IF (occup(iband)) THEN
448 : hessian(i, j) = hessian(i, j) + conjg(dcprod(iband, n1, i))*dcprod(iband, nn2, j)
449 : END IF
450 : hessian(i, j) = hessian(i, j) - dcprod(iband, nn2, i)*conjg(dcprod(iband, n1, j))
451 : END DO
452 :
453 : ! ibs correction
454 : IF (ibs_corr) THEN
455 : hessian(i, j) = hessian(i, j) - olap_ibsc(i, j, n1, nn2)/fi%cell%omtil
456 : DO iband = 1, hybdat%nbands(ik,jsp)
457 : hessian(i, j) = hessian(i, j) + conjg(proj_ibsc(i, nn2, iband))*proj_ibsc(j, n1, iband)/fi%cell%omtil
458 : END DO
459 : END IF
460 : END DO
461 : ELSE
462 : DO i = 1, 3
463 : j = i
464 : DO iband = 1, hybdat%nbands(ik,jsp)
465 : IF (occup(iband)) THEN
466 : hessian(i, j) = hessian(i, j) + conjg(dcprod(iband, n1, i))*dcprod(iband, nn2, j)
467 : END IF
468 : END DO
469 : END DO
470 : END IF
471 :
472 : exchcorrect(1) = fpi_const/3*(hessian(1, 1) + hessian(2, 2) + hessian(3, 3))
473 : exch0 = exchcorrect(1)/fi%kpts%nkptf
474 : END IF
475 :
476 : ! tail correction/contribution from all other k-points (it goes into exchcorrect )
477 :
478 : ! Analytic contribution
479 :
480 8008 : cdum2 = 0
481 : !multiply divergent contribution with occupation number;
482 : !this only affects metals
483 8008 : IF (n1 == nn2) THEN
484 1228 : cdum2 = fpi_const/fi%cell%omtil*divergence*wl_iks(n1, ik)*fi%kpts%nkptf
485 : END IF
486 :
487 : ! due to the symmetrization afterwards the factor 1/n_q(1) must be added
488 :
489 8008 : IF (n1 == nn2) hybdat%div_vv(n1, ik, jsp) = REAL(cdum2)
490 9236 : exch_vv(nn2, n1) = exch_vv(nn2, n1) + (exch0 + cdum2)/n_q(1)
491 :
492 : END DO !n2
493 : END DO !n1
494 : END IF ! xcpot%icorr .ne. icorr_hse
495 24 : call timestop("gamma point treatment")
496 :
497 24 : IF (mat_ex%l_real) THEN
498 50922 : IF (any(abs(aimag(exch_vv)) > 1E-08)) then
499 : CALL judft_warn('unusually large imaginary part of exch_vv. Max:' // float2str(maxval(abs(aimag(exch_vv)))), &
500 0 : calledby='exchange_val_hf.F90')
501 : endif
502 : END IF
503 :
504 : ! write exch_vv in mat_ex
505 24 : call timestart("alloc mat_ex")
506 24 : if (.not. mat_ex%allocated()) then
507 24 : if (k_pack%submpi%root()) then
508 24 : CALL mat_ex%alloc(matsize1=hybdat%nbands(ik,jsp))
509 : else
510 0 : CALL mat_ex%alloc(matsize1=1)
511 : endif
512 : endif
513 24 : call timestop("alloc mat_ex")
514 :
515 : #ifdef CPP_MPI
516 24 : call timestart("pre exchmat reduce barrier")
517 24 : call MPI_Barrier(k_pack%submpi%comm, ierr)
518 24 : call timestop("pre exchmat reduce barrier")
519 : #endif
520 :
521 24 : call timestart("reduce exch_vv>mat_ex")
522 24 : IF (mat_ex%l_real) THEN
523 : #ifdef CPP_MPI
524 50922 : call MPI_Reduce(real(exch_vv), mat_ex%data_r, hybdat%nbands(ik,jsp)**2, MPI_DOUBLE_PRECISION, MPI_SUM, 0, k_pack%submpi%comm, ierr)
525 : #else
526 : mat_ex%data_r = exch_vv
527 : #endif
528 : ELSE
529 : #ifdef CPP_MPI
530 6 : call MPI_Reduce(exch_vv, mat_ex%data_c, hybdat%nbands(ik,jsp)**2, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, k_pack%submpi%comm, ierr)
531 : #else
532 : mat_ex%data_c = exch_vv
533 : #endif
534 : END IF
535 24 : call timestop("reduce exch_vv>mat_ex")
536 24 : CALL timestop("valence exchange calculation")
537 48 : END SUBROUTINE exchange_valence_hf
538 :
539 6 : SUBROUTINE calc_divergence(cell, kpts, divergence)
540 : IMPLICIT NONE
541 :
542 : TYPE(t_cell), INTENT(IN) :: cell
543 : TYPE(t_kpts), INTENT(IN) :: kpts
544 : REAL, INTENT(INOUT) :: divergence
545 :
546 : INTEGER :: ix, iy, iz, sign, n
547 : logical :: found
548 : REAL :: expo, rrad, k(3), kv1(3), kv2(3), kv3(3), knorm2, nkpt3(3)
549 : COMPLEX :: cdum
550 :
551 6 : call timestart("calc_divergence")
552 6 : expo = 5e-3
553 6 : rrad = sqrt(-log(5e-3)/expo)
554 6 : cdum = sqrt(expo)*rrad
555 6 : divergence = real(cell%omtil/(tpi_const**2)*sqrt(pi_const/expo)*cerf(cdum))
556 6 : rrad = rrad**2
557 24 : nkpt3 = kpts%calcNkpt3()
558 24 : kv1 = cell%bmat(1, :)/nkpt3(1)
559 24 : kv2 = cell%bmat(2, :)/nkpt3(2)
560 24 : kv3 = cell%bmat(3, :)/nkpt3(3)
561 : n = 1
562 : found = .true.
563 :
564 1000 : DO WHILE (found)
565 994 : found = .false.
566 184464 : DO ix = -n, n
567 23945618 : DO iy = -(n - abs(ix)), n - abs(ix)
568 23761154 : iz = n - abs(ix) - abs(iy)
569 70737028 : DO sign = -1, 1, 2
570 47157356 : iz = sign*iz
571 47157356 : k(1) = ix*kv1(1) + iy*kv2(1) + iz*kv3(1)
572 47157356 : k(2) = ix*kv1(2) + iy*kv2(2) + iz*kv3(2)
573 47157356 : k(3) = ix*kv1(3) + iy*kv2(3) + iz*kv3(3)
574 47157356 : knorm2 = k(1)**2 + k(2)**2 + k(3)**2
575 47157356 : IF (knorm2 < rrad) THEN
576 7420164 : found = .true.
577 7420164 : divergence = divergence - exp(-expo*knorm2)/knorm2/kpts%nkptf
578 : END IF
579 70553558 : IF (iz == 0) exit
580 : END DO
581 : END DO
582 : END DO
583 994 : n = n + 1
584 : END DO
585 6 : call timestop("calc_divergence")
586 6 : END SUBROUTINE calc_divergence
587 :
588 0 : function calc_divergence2(cell, kpts) result(divergence)
589 : USE m_types
590 : USE m_constants
591 : USE m_util, ONLY: cerf
592 : implicit none
593 : TYPE(t_cell), INTENT(IN) :: cell
594 : TYPE(t_kpts), INTENT(IN) :: kpts
595 : REAL :: divergence
596 :
597 : INTEGER :: ikpt
598 : REAL, PARAMETER :: expo = 5e-3
599 : REAL :: rrad, k(3), knorm2
600 : COMPLEX :: cdum
601 :
602 0 : rrad = sqrt(-log(5e-3)/expo)
603 0 : cdum = sqrt(expo)*rrad
604 0 : divergence = real(cell%omtil/(tpi_const**2)*sqrt(pi_const/expo)*cerf(cdum))
605 0 : rrad = rrad**2
606 :
607 0 : do ikpt = 1, kpts%nkptf
608 0 : k = kpts%bkf(:, ikpt)
609 0 : knorm2 = norm2(k)
610 0 : IF (knorm2 < rrad) THEN
611 0 : divergence = divergence - exp(-expo*knorm2)/knorm2/kpts%nkptf
612 : END IF
613 : enddo
614 0 : end function calc_divergence2
615 :
616 0 : subroutine recombine_parts(in_part, ipart, psizes, out_total)
617 : use m_types
618 : type(t_mat), intent(in) :: in_part
619 : integer, intent(in) :: ipart, psizes(:)
620 : type(t_mat), intent(inout) :: out_total
621 :
622 : integer :: nbands, iband, iob, offset, i, tsize
623 : logical :: l_real
624 :
625 0 : l_real = in_part%l_real
626 0 : tsize = sum(psizes)
627 :
628 0 : nbands = in_part%matsize2/psizes(ipart)
629 0 : if (out_total%matsize2/tsize /= nbands) call judft_error("nbands seems different")
630 0 : offset = 0
631 0 : do i = 1, ipart - 1
632 0 : offset = offset + psizes(i)
633 : enddo
634 :
635 0 : do iband = 1, nbands
636 0 : do iob = 1, psizes(ipart)
637 0 : if (l_real) then
638 0 : out_total%data_r(:, iob + (iband - 1)*tsize + offset) = in_part%data_r(:, iob + (iband - 1)*psizes(ipart))
639 : else
640 0 : out_total%data_c(:, iob + (iband - 1)*tsize + offset) = in_part%data_c(:, iob + (iband - 1)*psizes(ipart))
641 : endif
642 : enddo
643 : enddo
644 0 : end subroutine recombine_parts
645 :
646 0 : subroutine alloc_dev_cpy(mat, arr_r, arr_c)
647 : implicit none
648 : type(t_mat), intent(in) :: mat
649 :
650 : real, allocatable, intent(inout) :: arr_r(:,:)
651 : complex, allocatable, intent(inout) :: arr_c(:,:)
652 : integer :: ierr
653 :
654 : #ifdef _OPENACC
655 : if(allocated(arr_r)) deallocate(arr_r)
656 : if(allocated(arr_c)) deallocate(arr_c)
657 :
658 : if(mat%l_real) then
659 : allocate(arr_r(mat%matsize1, mat%matsize2), stat=ierr)
660 : else
661 : allocate(arr_c(mat%matsize1, mat%matsize2), stat=ierr)
662 : endif
663 :
664 : if(ierr /= 0) call judft_error("can't alloc. prob. no mem.")
665 : #endif
666 0 : end subroutine alloc_dev_cpy
667 :
668 : END MODULE m_exchange_valence_hf
|