LCOV - code coverage report
Current view: top level - hybrid - exchange_val_hf.F90 (source / functions) Hit Total Coverage
Test: FLEUR test coverage Lines: 181 215 84.2 %
Date: 2024-04-26 04:44:34 Functions: 2 5 40.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : !     Calculates the HF exchange term
       8             : !
       9             : !                                          s          s*          s            s*
      10             : !                                       phi    (r) phi     (r) phi     (r') phi    (r')
      11             : !                         occ.             n_1k       n'k+q       n'k+q        n_2k
      12             : !     exchange(n,q)  =  - SUM  INT INT  ------------------------------------------- dr dr'
      13             : !                         k,n'                           | r - r' |
      14             : !
      15             : !                         occ                  s          s    ~        ~       s         s
      16             : !                    =  - SUM  SUM  v     < phi      | phi     M    > < M    phi     | phi      >
      17             : !                         k,n' I,J   k,IJ      n'k+q      n_1k  q,I      q,J    n_2k      n'k+q
      18             : !
      19             : !     for the different combinations of n_1 and n_2 and where n' runs only over the valence states.
      20             : !     ( n_1,n_2:  valence-valence, core-core,core-valence )
      21             : !
      22             : !
      23             : !     At the Gamma point (k=0) v diverges. After diagonalization of v at k=0 the divergence is
      24             : !     restricted to the head element I=1. Furthermore, we expand <...> with kp perturbation theory.
      25             : !     As a result, the total I=1 element is given by a sum of a divergent 1/k**2-term and an
      26             : !     angular dependent term. The former is separated from the numerical k-summation and treated
      27             : !     analytically while the latter is spherically averaged and added to the k=0 contribution of
      28             : !     the numerical k-summation. (A better knowledge of the integrand's behavior at the BZ edges
      29             : !     might further improve the integration.)
      30             : !
      31             : !     The divergence at the Gamma point is integrated with one of the following algorithms:
      32             : ! (1) Switching-Off Function
      33             : !     In a sphere of radius k0=radshmin/2 a switching-off function g(k)=1-(k/k0)**n*(n+1-n*k/k0)
      34             : !     (n=npot) is defined. The 1/k**2 divergence is subtracted from the BZ integral in the form
      35             : !     g(k)/k**2 and integrated analytically. The non-divergent rest is integrated numerically.
      36             : ! (2) Periodic Function (similar to the one used by Massidda PRB 48, 5058)
      37             : !     The function  F(k) = SUM(G) exp(-expo*|k+G|**3) / |k+G|**2  is subtracted from the BZ integral
      38             : !     and integrated analytically. The non-divergent rest is integrated numerically.
      39             : !     The parameter expo is chosen such that exp(-expo*q**3)=1/2
      40             : !     with q = radius of sphere with same volume as BZ.
      41             : ! (3) Periodic Function (same as Massidda's) with expo->0
      42             : !     The function  F(k) = lim(expo->0) SUM(G) exp(-expo*|k+G|**2) / |k+G|**2  is subtracted from
      43             : !     the BZ integral and integrated analytically. The contribution to the BZ integral including
      44             : !     the "tail" is
      45             : !     vol/(8*pi**3) INT F(k) d^3k - P SUM(k) F(k)  ( P = principal value ) .
      46             : !     For expo->0 the two terms diverge. Therefore a cutoff radius q0 is introduced and related to
      47             : !     expo by exp(-expo*q0**2)=delta  ( delta = small value, e.g., delta = 1*10.0**-10 ) .
      48             : !     The resulting formula
      49             : !     vol/(4*pi**1.5*sqrt(expo)) * erf(sqrt(a)*q0) - sum(q,0<q<q0) exp(-expo*q**2)/q**2
      50             : !     converges well with q0. (Should be the default.)
      51             : 
      52             : MODULE m_exchange_valence_hf
      53             : 
      54             :    USE m_constants
      55             :    USE m_types
      56             :    USE m_util
      57             :    use m_matmul_dgemm
      58             :    LOGICAL, PARAMETER:: zero_order = .false., ibs_corr = .false.
      59             : 
      60             : CONTAINS
      61          72 :    SUBROUTINE exchange_valence_hf(k_pack, fi, fmpi, z_k, mpdata, jsp, hybdat, lapw, eig_irr, results, &
      62          24 :                                   n_q, wl_iks, xcpot, nococonv, stars, nsest, indx_sest, cmt_nk, mat_ex)
      63             :       
      64             :       USE m_wrapper
      65             :       USE m_trafo
      66             :       USE m_wavefproducts
      67             :       USE m_olap
      68             :       USE m_hsefunctional
      69             :       USE m_io_hybrid
      70             :       USE m_kp_perturbation
      71             :       use m_spmm_inv
      72             :       use m_spmm_noinv
      73             :       use m_work_package
      74             :       use m_judft 
      75             : #ifdef CPP_MPI
      76             :       use mpi
      77             : #endif
      78             : #ifdef _OPENACC
      79             :       USE cublas
      80             : #define CPP_zgemm cublaszgemm
      81             : #define CPP_dgemm cublasdgemm
      82             : #else
      83             : #define CPP_zgemm zgemm
      84             : #define CPP_dgemm dgemm
      85             : #endif
      86             :       IMPLICIT NONE
      87             : 
      88             :       type(t_k_package), intent(in)     :: k_pack
      89             :       type(t_fleurinput), intent(in)    :: fi
      90             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
      91             :       type(t_mat), intent(in)           :: z_k
      92             :       TYPE(t_results), INTENT(IN)       :: results
      93             :       TYPE(t_xcpot_inbuild), INTENT(IN) :: xcpot
      94             :       TYPE(t_mpdata), intent(inout)     :: mpdata
      95             :       TYPE(t_nococonv), INTENT(IN)      :: nococonv
      96             :       TYPE(t_lapw), INTENT(IN)          :: lapw
      97             :       type(t_stars), intent(in)         :: stars
      98             :       TYPE(t_mat), INTENT(INOUT)        :: mat_ex
      99             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     100             : 
     101             :       ! scalars
     102             :       INTEGER, INTENT(IN)    :: jsp
     103             : 
     104             :       ! arrays
     105             :       INTEGER, INTENT(IN)    ::  n_q(:)
     106             :       INTEGER, INTENT(IN)    ::  nsest(:)
     107             :       INTEGER, INTENT(IN)    ::  indx_sest(:, :)
     108             : 
     109             :       complex, intent(in)    :: cmt_nk(:,:,:)
     110             : 
     111             :       REAL, INTENT(IN)    ::  eig_irr(:, :)
     112             :       REAL, INTENT(IN)    ::  wl_iks(:, :)
     113             : 
     114             :       ! local scalars
     115             :       INTEGER                 ::  iband, iband1, jq, iq, nq_idx
     116             :       INTEGER                 ::  i, ierr, ik
     117             :       INTEGER                 ::  j, iq_p, start, stride
     118             :       INTEGER                 ::  n1, n2, nn2, me, max_band_pack
     119             :       INTEGER                 ::  ikqpt, iob, m, n, k, lda, ldb, ldc
     120             :       INTEGER                 ::  ok, psize, n_parts, ipart, ibando
     121             : 
     122             :       REAL, SAVE             ::  divergence
     123             : 
     124             :       COMPLEX                 ::  cdum2
     125             :       COMPLEX                 ::  exch0
     126             : 
     127             :       LOGICAL, SAVE           ::  initialize = .true.
     128             : 
     129             :       ! local arrays
     130          24 :       COMPLEX                          :: exchcorrect(fi%kpts%nkptf)
     131          24 :       COMPLEX, allocatable             :: dcprod(:,:,:) ! (hybdat%nbands(k_pack%nk,jsp), hybdat%nbands(k_pack%nk,jsp), 3)
     132          24 :       COMPLEX, allocatable             :: exch_vv(:,:) !(hybdat%nbands(k_pack%nk,jsp), hybdat%nbands(k_pack%nk,jsp))
     133             :       COMPLEX                          :: hessian(3, 3)
     134          24 :       COMPLEX                          :: proj_ibsc(3, MAXVAL(hybdat%nobd(:, jsp)), hybdat%nbands(k_pack%nk,jsp))
     135          24 :       COMPLEX                          :: olap_ibsc(3, 3, MAXVAL(hybdat%nobd(:, jsp)), MAXVAL(hybdat%nobd(:, jsp)))
     136          24 :       COMPLEX, ALLOCATABLE  :: phase_vv(:, :), c_coul_wavf(:,:), dot_result_c(:,:)
     137          24 :       REAL, ALLOCATABLE     :: r_coul_wavf(:,:), dot_result_r(:,:)
     138          24 :       LOGICAL                          :: occup(fi%input%neig), conjg_mtir
     139             : 
     140             : #define CPP_cprod_r cprod_vv%data_r
     141             : #define CPP_cprod_c cprod_vv%data_c
     142             : 
     143          24 :       type(t_mat)          :: cprod_vv, carr3_vv
     144          24 :       CALL timestart("valence exchange calculation")
     145          24 :       ik = k_pack%nk
     146             : 
     147          24 :       IF (initialize) THEN !it .eq. 1 .and. ik .eq. 1) THEN
     148           6 :          call calc_divergence(fi%cell, fi%kpts, divergence)
     149           6 :          if(fmpi%irank == 0) write (*,*) "Divergence:", divergence
     150           6 :          initialize = .false.
     151             :       END IF
     152             : 
     153             :       ! calculate valence-valence-valence-valence, core-valence-valence-valence
     154             :       ! and core-valence-valence-core exchange at current k-point
     155             :       ! the sum over the inner occupied valence states is restricted to the EIBZ(k)
     156             :       ! the contribution of the Gamma-point is treated separately (see below)
     157             : 
     158          24 :       call timestart("alloc phase_vv & dot_res")
     159          24 :       if(mat_ex%l_real) then
     160       50976 :          allocate(dot_result_r(hybdat%nbands(ik,jsp), hybdat%nbands(ik,jsp)), stat=ierr, source=0.0)
     161        9322 :          allocate (phase_vv(MAXVAL(hybdat%nobd(:, jsp)), hybdat%nbands(ik,jsp)), stat=ok, source=cmplx_0)
     162             :       else 
     163       14724 :          allocate(dot_result_c(hybdat%nbands(ik,jsp), hybdat%nbands(ik,jsp)), stat=ierr, source=cmplx_0)
     164           6 :          allocate (phase_vv(0,0), stat=ok, source=cmplx_0)
     165             :       endif
     166          24 :       IF (ok /= 0) call judft_error('exchange_val_hf: error allocation phase')
     167          24 :       if(ierr /= 0) call judft_error("can't alloc dot_res")
     168             : 
     169       65700 :       allocate(exch_vv(hybdat%nbands(ik,jsp), hybdat%nbands(ik,jsp)), stat=ierr, source=cmplx_0)
     170          24 :       if(ierr /= 0) call judft_error("Can't alloc exch_vv")
     171             : 
     172             : 
     173             :       !$acc data copyout(exch_vv) copyin(hybdat, hybdat%nbands, hybdat%nbasm, nsest, indx_sest) 
     174             :          !$acc kernels present(exch_vv) default(none)
     175       65628 :          exch_vv = 0
     176             :          !$acc end kernels
     177             : 
     178          24 :          call timestop("alloc phase_vv & dot_res")
     179             :          
     180          24 :          call timestart("q_loop")
     181             : 
     182         112 :          DO jq = 1, size(k_pack%q_packs)
     183          88 :             call timestart("initial setup")
     184          88 :             iq = k_pack%q_packs(jq)%ptr
     185          88 :             iq_p = fi%kpts%bkp(iq)
     186             : 
     187         440 :             ikqpt = fi%kpts%get_nk(fi%kpts%to_first_bz(fi%kpts%bkf(:, ik) + fi%kpts%bkf(:, iq)))
     188             : 
     189          88 :             n_parts = size(k_pack%q_packs(jq)%band_packs)
     190          88 :             start  = k_pack%q_packs(jq)%submpi%rank + 1
     191          88 :             stride = k_pack%q_packs(jq)%submpi%size
     192          88 :             call timestop("initial setup")
     193         200 :             do ipart = start, n_parts, stride
     194             :                !if (n_parts > 1) write (*, *) "Part ("//int2str(ipart)//"/"//int2str(n_parts)//") ik= "//int2str(ik)//" jq= "//int2str(jq)
     195          88 :                psize = k_pack%q_packs(jq)%band_packs(ipart)%psize
     196          88 :                ibando = k_pack%q_packs(jq)%band_packs(ipart)%start_idx
     197          88 :                call timestart("alloc & copy")
     198          88 :                call cprod_vv%alloc(mat_ex%l_real, hybdat%nbasm(iq), psize*hybdat%nbands(ik,jsp), mat_name="cprod_vv")
     199             :                call alloc_dev_cpy(cprod_vv, CPP_cprod_r, CPP_cprod_c)
     200          88 :                call timestop("alloc & copy")
     201          88 :                IF (mat_ex%l_real) THEN
     202             :                   CALL wavefproducts_inv(fi, ik, z_k, iq, jsp, ibando, ibando + psize - 1, lapw, &
     203          66 :                                        hybdat, mpdata, nococonv, stars, ikqpt, cmt_nk, cprod_vv)
     204             :                ELSE
     205             :                   CALL wavefproducts_noinv(fi, ik, z_k, iq, jsp, ibando, ibando + psize - 1, lapw,&
     206          22 :                                           hybdat, mpdata, nococonv, stars, ikqpt, cmt_nk, cprod_vv)
     207             :                END IF
     208             :                ! The sparse matrix technique is not feasible for the HSE
     209             :                ! functional. Thus, a dynamic adjustment is implemented
     210             :                ! The mixed basis functions and the potential difference
     211             :                ! are Fourier transformed, so that the exchange can be calculated
     212             :                ! in Fourier space
     213             :                !! REIMPLEMENTING (notes in lab book)
     214          88 :                IF (xcpot%is_name("hse") .OR. xcpot%is_name("vhse")) THEN
     215           0 :                   CALL timestart("hse: dynamic hse adjustment")
     216           0 :                   iband1 = hybdat%nobd(ikqpt, jsp)
     217             :                   exch_vv = exch_vv + &
     218             :                             dynamic_hse_adjustment(fi%atoms, fi%kpts%bkf(:, iq), iq, &
     219             :                                                    fi%kpts%nkptf, fi%cell%bmat, fi%cell%omtil, &
     220             :                                                    fi%hybinp%lcutm1, maxval(fi%hybinp%lcutm1), mpdata%num_radbasfn, maxval(mpdata%num_radbasfn), mpdata%g, &
     221             :                                                    mpdata%n_g(iq), mpdata%gptm_ptr(:, iq), mpdata%num_gpts(), mpdata%radbasfn_mt, &
     222             :                                                    hybdat%nbasm(iq), iband1, hybdat%nbands(ik,jsp), nsest, ibando, psize, indx_sest, &
     223             :                                                    fi%sym, fmpi%irank, cprod_vv%data_r(:, :), &
     224           0 :                                                    cprod_vv%data_c(:, :), mat_ex%l_real, wl_iks(:iband1, ikqpt), n_q(jq))
     225           0 :                   CALL timestop("hse: dynamic hse adjustment")
     226             :                END IF
     227             : 
     228             :                ! the Coulomb matrix is only evaluated at the irrecuible k-points
     229             :                ! bra_trafo transforms cprod instead of rotating the Coulomb matrix
     230             :                ! from IBZ to current k-point
     231             : 
     232          88 :                call timestart("bra_trafo stuff")
     233          88 :                IF (fi%kpts%bkp(iq) /= iq) THEN
     234          16 :                   call carr3_vv%init(cprod_vv, mat_name="carr_3")
     235          16 :                   call bra_trafo(fi, mpdata, hybdat, hybdat%nbands(ik,jsp), iq, psize, phase_vv, cprod_vv, carr3_vv)
     236          16 :                   call cprod_vv%copy(carr3_vv, 1, 1)
     237          16 :                   call carr3_vv%free()
     238             :                ELSE
     239       27390 :                   phase_vv(:, :) = cmplx_1
     240             :                END IF
     241          88 :                call timestop("bra_trafo stuff")
     242             :                
     243          88 :                call timestart("alloc coul_wavf")
     244          88 :                if(cprod_vv%l_real) then 
     245         264 :                   allocate(r_coul_wavf(cprod_vv%matsize1, cprod_vv%matsize2), stat=ierr)
     246          66 :                   allocate(c_coul_wavf(0,0))
     247             :                else
     248          88 :                   allocate(c_coul_wavf(cprod_vv%matsize1, cprod_vv%matsize2), stat=ierr)
     249          22 :                   allocate(r_coul_wavf(0,0))
     250             :                endif
     251          88 :                if(ierr /= 0) call judft_error("can't alloc coul_wavf")
     252     8548222 :                r_coul_wavf = 0.0
     253          88 :                call timestop("alloc coul_wavf")
     254             :                
     255          88 :                call timestart("exchange matrix")
     256          88 :                call timestart("sparse matrix products")
     257          88 :                IF (mat_ex%l_real) THEN
     258          66 :                   call spmm_invs(fi, mpdata, hybdat, iq_p, cprod_vv%data_r, r_coul_wavf)
     259             :                ELSE
     260          22 :                   conjg_mtir = (fi%kpts%bksym(iq) > fi%sym%nop)
     261          22 :                   call spmm_noinvs(fi, mpdata, hybdat, iq_p, conjg_mtir, cprod_vv%data_c, c_coul_wavf)
     262             :                END IF
     263          88 :                call timestop("sparse matrix products")
     264             : 
     265             :                !$acc enter data copyin(phase_vv, r_coul_wavf, c_coul_wavf)
     266          88 :                nq_idx = k_pack%q_packs(jq)%rank
     267             : 
     268             : 
     269          88 :                call timestart("apply prefactors carr1_v")
     270             :                !$acc data copyin(psize, wl_iks, n_q, nq_idx, ibando, ikqpt)
     271          88 :                   if (mat_ex%l_real) then
     272             : #ifdef _OPENACC
     273             :                      call timestart("cpy cprod")
     274             :                      call dlacpy("N", size(cprod_vv%data_r, 1), size(cprod_vv%data_r, 2), cprod_vv%data_r, size(cprod_vv%data_r, 1), CPP_cprod_r, size(CPP_cprod_r,1))
     275             :                      call timestop("cpy cprod")
     276             : #endif
     277             :                      !$acc enter data copyin(CPP_cprod_r)
     278             : 
     279             :                      !$acc parallel loop default(none) collapse(3) private(iband, iob, i)&
     280             :                      !$acc present(r_coul_wavf, hybdat, hybdat%nbands, hybdat%nbasm, psize, wl_iks, ikqpt, ibando)&
     281             :                      !$acc present(n_q, nq_idx)
     282        3496 :                      DO iband = 1, hybdat%nbands(ik,jsp)
     283       31286 :                         DO iob = 1, psize
     284     8551564 :                            do i = 1, hybdat%nbasm(iq)
     285     8548134 :                               r_coul_wavf(i, iob + psize*(iband - 1)) = r_coul_wavf(i, iob + psize*(iband - 1)) * wl_iks(ibando + iob - 1, ikqpt) / n_q(nq_idx)                        
     286             :                            enddo
     287             :                         enddo
     288             :                      enddo
     289             :                      !$acc end parallel loop
     290             : 
     291             :                   else
     292             : #ifdef _OPENACC
     293             :                      call timestart("cpy cprod")
     294             :                      call zlacpy("N", size(cprod_vv%data_c, 1), size(cprod_vv%data_c, 2), cprod_vv%data_c, size(cprod_vv%data_c, 1), CPP_cprod_c, size(CPP_cprod_c,1))
     295             :                      call timestop("cpy cprod")
     296             : #endif
     297             :                      !$acc enter data copyin(CPP_cprod_c)
     298             : 
     299             :                      !$acc parallel loop default(none) collapse(3) private(iband, iob, i)&
     300             :                      !$acc present(c_coul_wavf, hybdat, hybdat%nbands, hybdat%nbasm, psize, wl_iks, ikqpt, ibando)&
     301             :                      !$acc present(n_q, nq_idx)
     302        1100 :                      DO iband = 1, hybdat%nbands(ik,jsp)
     303       16192 :                         DO iob = 1, psize
     304     7794038 :                            do i = 1, hybdat%nbasm(iq)
     305     7792960 :                               c_coul_wavf(i, iob + psize*(iband - 1))  = c_coul_wavf(i, iob + psize*(iband - 1)) * wl_iks(ibando + iob - 1, ikqpt)/n_q(nq_idx)
     306             :                            enddo
     307             :                         enddo
     308             :                      enddo
     309             :                      !$acc end parallel loop
     310             :                   endif
     311             :                !$acc end data ! (psize, wl_iks, n_q, nq_idx, ibando, ikqpt)
     312          88 :                call timestop("apply prefactors carr1_v")
     313             : 
     314          88 :                call timestart("exch_vv dot prod")
     315          88 :                m = hybdat%nbands(ik,jsp)
     316          88 :                n = hybdat%nbands(ik,jsp)
     317          88 :                k = hybdat%nbasm(iq)
     318          88 :                lda = hybdat%nbasm(iq)*psize
     319          88 :                ldb = hybdat%nbasm(iq)*psize
     320          88 :                ldc = hybdat%nbands(ik,jsp)
     321             : 
     322          88 :                IF (mat_ex%l_real) THEN
     323             :                   !calculate all dotproducts for the current iob -> need to skip intermediate iob
     324             :                   !$acc enter data create(dot_result_r) 
     325         600 :                   DO iob = 1, psize
     326         534 :                      call timestart("CPP_dgemm")
     327             :                      !call blas_matmul(m,n,k,r_coul_wavf(:,iob:),CPP_cprod_r(:, iob:),dot_result_r,op_a="T")
     328             :                      ASSOCIATE(prod_data=>CPP_cprod_r) !persuade NVHPC that it knows CPP_CPROD_r
     329             :                         !$acc host_data use_device(r_coul_wavf, prod_data, dot_result_r)
     330         534 :                         call CPP_dgemm("T", "N", m, n, k, 1.0, r_coul_wavf(1, iob), lda, prod_data(1, iob), ldb, 0.0, dot_result_r , ldc)
     331             :                         !$acc end host_data
     332             :                      end ASSOCIATE   
     333             :                      !$acc wait
     334         534 :                      call timestop("CPP_dgemm")
     335             : 
     336             :                      !$acc kernels present(exch_vv, dot_result_r, phase_vv, hybdat, hybdat%nbands, nsest, indx_sest) default(none)
     337       28390 :                      DO iband = 1, hybdat%nbands(ik,jsp)
     338      173450 :                         DO n2 = 1, nsest(iband)
     339      145126 :                            nn2 = indx_sest(n2, iband)
     340      172916 :                            exch_vv(nn2, iband) = exch_vv(nn2, iband) + phase_vv(iob, nn2)*conjg(phase_vv(iob, iband))*dot_result_r(iband, nn2)
     341             :                         enddo
     342             :                      END DO
     343             :                      !$acc end kernels
     344             :                   END DO
     345             :                   !$acc exit data delete(CPP_cprod_r, dot_result_r)
     346             :                ELSE
     347             :                   !calculate all dotproducts for the current iob -> need to skip intermediate iob
     348             :                   !$acc enter data create(dot_result_c) 
     349         330 :                   DO iob = 1, psize
     350         308 :                      call timestart("CPP_zgemm")
     351             :                      ASSOCIATE(prod_data=>CPP_cprod_c) !persuade NVHPC that it knows CPP_CPROD_C
     352             :                         !$acc host_data use_device(c_coul_wavf, prod_data, dot_result_c)
     353         308 :                         call CPP_zgemm("C", "N", m, n, k, cmplx_1, c_coul_wavf(1, iob), lda, prod_data(1, iob), ldb, cmplx_0, dot_result_c, ldc)
     354             :                         !$acc end host_data
     355             :                      end ASSOCIATE   
     356             :                      !$acc wait
     357         308 :                      call timestop("CPP_zgemm")
     358             : 
     359             :                      !$acc kernels present(exch_vv, dot_result_c, hybdat, hybdat%nbands, nsest, indx_sest)  default(none)
     360       15422 :                      DO iband = 1, hybdat%nbands(ik,jsp)
     361      171444 :                         DO n2 = 1, nsest(iband)
     362      156044 :                            nn2 = indx_sest(n2, iband)
     363      171136 :                            exch_vv(nn2, iband) = exch_vv(nn2, iband) + dot_result_c(iband, nn2)
     364             :                         enddo
     365             :                      END DO
     366             :                      !$acc end kernels
     367             :                   enddo
     368             :                   !$acc exit data delete(CPP_cprod_c, dot_result_c)
     369             :                END IF
     370             :                !$acc exit data delete(phase_vv, r_coul_wavf, c_coul_wavf)
     371          88 :                call timestop("exch_vv dot prod")
     372          88 :                call timestop("exchange matrix")
     373             :                
     374          88 :                call cprod_vv%free()
     375          88 :                if(allocated(r_coul_wavf)) deallocate(r_coul_wavf)
     376         176 :                if(allocated(c_coul_wavf)) deallocate(c_coul_wavf)
     377             :             enddo
     378             :          END DO  !jq
     379             :       !$acc end data ! exch_vv hybdat, hybdat%nbands, hybdat%nbasm, nsest, indx_sest
     380          24 :       call timestop("q_loop")
     381             : 
     382          24 :       if(allocated(dot_result_r)) deallocate(dot_result_r)
     383          24 :       if(allocated(dot_result_c)) deallocate(dot_result_c)
     384             : 
     385             :       ! add contribution of the gamma point to the different cases (exch_vv,exch_cv,exch_cc)
     386             : 
     387             :       ! valence-valence-valence-valence exchange
     388          24 :       call timestart("gamma point treatment")
     389             :       IF ((.not. xcpot%is_name("hse")) .AND. &
     390          24 :           (.not. xcpot%is_name("vhse")) .AND. &
     391             :           k_pack%submpi%root()) THEN ! no gamma point correction needed for HSE functional
     392             : 
     393             :          IF (zero_order .and. .not. ibs_corr) THEN
     394             :             WRITE (oUnit, '(A)') ' Take zero order terms into account.'
     395             :          ELSE IF (zero_order .and. ibs_corr) THEN
     396             :             WRITE (oUnit, '(A)') ' Take zero order terms and ibs-correction into account.'
     397             :          END IF
     398             : 
     399             :          IF (zero_order) THEN
     400             :             CALL dwavefproducts(dcprod, ik, 1, hybdat%nbands(ik,jsp), 1, hybdat%nbands(ik,jsp), .false., fi%input, fi%atoms, mpdata, fi%hybinp, &
     401             :                                 fi%cell, hybdat, fi%kpts, fi%sym, fi%noco, nococonv, lapw,  jsp, eig_irr)
     402             : 
     403             :             ! make dcprod hermitian
     404             :             DO n1 = 1, hybdat%nbands(ik,jsp)
     405             :                DO n2 = 1, n1
     406             :                   dcprod(n1, n2, :) = (dcprod(n1, n2, :) - conjg(dcprod(n2, n1, :)))/2
     407             :                   dcprod(n2, n1, :) = -conjg(dcprod(n1, n2, :))
     408             :                END DO
     409             :             END DO
     410             : 
     411             :             IF (ibs_corr) THEN
     412             :                CALL ibs_correction(ik, fi%atoms, fi%input, jsp, hybdat, mpdata, fi%hybinp, &
     413             :                                    lapw, fi%kpts, fi%cell, MAXVAL(hybdat%nobd(:, jsp)), &
     414             :                                    fi%sym, fi%noco, nococonv, proj_ibsc, olap_ibsc)
     415             :             END IF
     416             :          END IF
     417             : 
     418             :          !This should be done with w_iks I guess!TODO
     419        1644 :          occup = .false.
     420        1644 :          DO i = 1, results%neig(ik, jsp)
     421        1644 :             IF (results%ef >= eig_irr(i, ik)) THEN
     422         230 :                occup(i) = .true.
     423        1390 :             ELSE IF ((eig_irr(i, ik) - results%ef) <= 1E-06) THEN
     424           0 :                occup(i) = .true.
     425             :             END IF
     426             :          END DO
     427             : 
     428        1252 :          DO n1 = 1, hybdat%nbands(ik,jsp)
     429        9260 :             DO n2 = 1, nsest(n1)!n1
     430        8008 :                nn2 = indx_sest(n2, n1)
     431       72072 :                exchcorrect = 0
     432        8008 :                exch0 = 0
     433             : 
     434             :                ! if zero_order = .true. add averaged k-dependent term to the numerical integration at Gamma-point contribution
     435             : 
     436             :                ! if we start with a system with a small DFT band gap (like GaAs), the contribution
     437             :                ! of the highest occupied and lowest unoccupied state in Hessian is typically
     438             :                ! large; a correct numerical integration requires a dense k-point mesh, so
     439             :                ! we don't add the contribution exchcorrect for such materials
     440             : 
     441             :                IF (zero_order) THEN
     442             :                   hessian = 0
     443             :                   IF (occup(n1) .and. occup(nn2)) THEN
     444             :                      DO i = 1, 3
     445             :                         j = i
     446             :                         DO iband = 1, hybdat%nbands(ik,jsp)
     447             :                            IF (occup(iband)) THEN
     448             :                               hessian(i, j) = hessian(i, j) + conjg(dcprod(iband, n1, i))*dcprod(iband, nn2, j)
     449             :                            END IF
     450             :                            hessian(i, j) = hessian(i, j) - dcprod(iband, nn2, i)*conjg(dcprod(iband, n1, j))
     451             :                         END DO
     452             : 
     453             :                         ! ibs correction
     454             :                         IF (ibs_corr) THEN
     455             :                            hessian(i, j) = hessian(i, j) - olap_ibsc(i, j, n1, nn2)/fi%cell%omtil
     456             :                            DO iband = 1, hybdat%nbands(ik,jsp)
     457             :                               hessian(i, j) = hessian(i, j) + conjg(proj_ibsc(i, nn2, iband))*proj_ibsc(j, n1, iband)/fi%cell%omtil
     458             :                            END DO
     459             :                         END IF
     460             :                      END DO
     461             :                   ELSE
     462             :                      DO i = 1, 3
     463             :                         j = i
     464             :                         DO iband = 1, hybdat%nbands(ik,jsp)
     465             :                            IF (occup(iband)) THEN
     466             :                               hessian(i, j) = hessian(i, j) + conjg(dcprod(iband, n1, i))*dcprod(iband, nn2, j)
     467             :                            END IF
     468             :                         END DO
     469             :                      END DO
     470             :                   END IF
     471             : 
     472             :                   exchcorrect(1) = fpi_const/3*(hessian(1, 1) + hessian(2, 2) + hessian(3, 3))
     473             :                   exch0 = exchcorrect(1)/fi%kpts%nkptf
     474             :                END IF
     475             : 
     476             :                ! tail correction/contribution from all other k-points (it  goes into exchcorrect )
     477             : 
     478             :                ! Analytic contribution
     479             : 
     480        8008 :                cdum2 = 0
     481             :                !multiply divergent contribution with occupation number;
     482             :                !this only affects metals
     483        8008 :                IF (n1 == nn2) THEN
     484        1228 :                   cdum2 = fpi_const/fi%cell%omtil*divergence*wl_iks(n1, ik)*fi%kpts%nkptf
     485             :                END IF
     486             : 
     487             :                ! due to the symmetrization afterwards the factor 1/n_q(1) must be added
     488             : 
     489        8008 :                IF (n1 == nn2) hybdat%div_vv(n1, ik, jsp) = REAL(cdum2)
     490        9236 :                exch_vv(nn2, n1) = exch_vv(nn2, n1) + (exch0 + cdum2)/n_q(1)
     491             : 
     492             :             END DO !n2
     493             :          END DO !n1
     494             :       END IF ! xcpot%icorr .ne. icorr_hse
     495          24 :       call timestop("gamma point treatment")
     496             : 
     497          24 :       IF (mat_ex%l_real) THEN
     498       50922 :          IF (any(abs(aimag(exch_vv)) > 1E-08)) then 
     499             :             CALL judft_warn('unusually large imaginary part of exch_vv. Max:' // float2str(maxval(abs(aimag(exch_vv)))), &
     500           0 :                                                                calledby='exchange_val_hf.F90')
     501             :          endif
     502             :       END IF
     503             : 
     504             :       ! write exch_vv in mat_ex
     505          24 :       call timestart("alloc mat_ex")
     506          24 :       if (.not. mat_ex%allocated()) then
     507          24 :          if (k_pack%submpi%root()) then
     508          24 :             CALL mat_ex%alloc(matsize1=hybdat%nbands(ik,jsp))
     509             :          else
     510           0 :             CALL mat_ex%alloc(matsize1=1)
     511             :          endif
     512             :       endif
     513          24 :       call timestop("alloc mat_ex")
     514             : 
     515             : #ifdef CPP_MPI 
     516          24 :       call timestart("pre exchmat reduce barrier")
     517          24 :       call MPI_Barrier(k_pack%submpi%comm, ierr)
     518          24 :       call timestop("pre exchmat reduce barrier")
     519             : #endif
     520             : 
     521          24 :       call timestart("reduce exch_vv>mat_ex")
     522          24 :       IF (mat_ex%l_real) THEN
     523             : #ifdef CPP_MPI
     524       50922 :          call MPI_Reduce(real(exch_vv), mat_ex%data_r, hybdat%nbands(ik,jsp)**2, MPI_DOUBLE_PRECISION, MPI_SUM, 0, k_pack%submpi%comm, ierr)
     525             : #else
     526             :          mat_ex%data_r = exch_vv
     527             : #endif
     528             :       ELSE
     529             : #ifdef CPP_MPI
     530           6 :          call MPI_Reduce(exch_vv, mat_ex%data_c, hybdat%nbands(ik,jsp)**2, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, k_pack%submpi%comm, ierr)
     531             : #else
     532             :          mat_ex%data_c = exch_vv
     533             : #endif
     534             :       END IF
     535          24 :       call timestop("reduce exch_vv>mat_ex")
     536          24 :       CALL timestop("valence exchange calculation")
     537          48 :    END SUBROUTINE exchange_valence_hf
     538             : 
     539           6 :    SUBROUTINE calc_divergence(cell, kpts, divergence)
     540             :       IMPLICIT NONE
     541             : 
     542             :       TYPE(t_cell), INTENT(IN)  :: cell
     543             :       TYPE(t_kpts), INTENT(IN)  :: kpts
     544             :       REAL, INTENT(INOUT) :: divergence
     545             : 
     546             :       INTEGER :: ix, iy, iz, sign, n
     547             :       logical :: found
     548             :       REAL    :: expo, rrad, k(3), kv1(3), kv2(3), kv3(3), knorm2, nkpt3(3)
     549             :       COMPLEX :: cdum
     550             : 
     551           6 :       call timestart("calc_divergence")
     552           6 :       expo = 5e-3
     553           6 :       rrad = sqrt(-log(5e-3)/expo)
     554           6 :       cdum = sqrt(expo)*rrad
     555           6 :       divergence = real(cell%omtil/(tpi_const**2)*sqrt(pi_const/expo)*cerf(cdum))
     556           6 :       rrad = rrad**2
     557          24 :       nkpt3 = kpts%calcNkpt3()
     558          24 :       kv1 = cell%bmat(1, :)/nkpt3(1)
     559          24 :       kv2 = cell%bmat(2, :)/nkpt3(2)
     560          24 :       kv3 = cell%bmat(3, :)/nkpt3(3)
     561             :       n = 1
     562             :       found = .true.
     563             : 
     564        1000 :       DO WHILE (found)
     565         994 :          found = .false.
     566      184464 :          DO ix = -n, n
     567    23945618 :             DO iy = -(n - abs(ix)), n - abs(ix)
     568    23761154 :                iz = n - abs(ix) - abs(iy)
     569    70737028 :                DO sign = -1, 1, 2
     570    47157356 :                   iz = sign*iz
     571    47157356 :                   k(1) = ix*kv1(1) + iy*kv2(1) + iz*kv3(1)
     572    47157356 :                   k(2) = ix*kv1(2) + iy*kv2(2) + iz*kv3(2)
     573    47157356 :                   k(3) = ix*kv1(3) + iy*kv2(3) + iz*kv3(3)
     574    47157356 :                   knorm2 = k(1)**2 + k(2)**2 + k(3)**2
     575    47157356 :                   IF (knorm2 < rrad) THEN
     576     7420164 :                      found = .true.
     577     7420164 :                      divergence = divergence - exp(-expo*knorm2)/knorm2/kpts%nkptf
     578             :                   END IF
     579    70553558 :                   IF (iz == 0) exit
     580             :                END DO
     581             :             END DO
     582             :          END DO
     583         994 :          n = n + 1
     584             :       END DO
     585           6 :       call timestop("calc_divergence")
     586           6 :    END SUBROUTINE calc_divergence
     587             : 
     588           0 :    function calc_divergence2(cell, kpts) result(divergence)
     589             :       USE m_types
     590             :       USE m_constants
     591             :       USE m_util, ONLY: cerf
     592             :       implicit none
     593             :       TYPE(t_cell), INTENT(IN)  :: cell
     594             :       TYPE(t_kpts), INTENT(IN)  :: kpts
     595             :       REAL                      :: divergence
     596             : 
     597             :       INTEGER :: ikpt
     598             :       REAL, PARAMETER :: expo = 5e-3
     599             :       REAL    :: rrad, k(3), knorm2
     600             :       COMPLEX :: cdum
     601             : 
     602           0 :       rrad = sqrt(-log(5e-3)/expo)
     603           0 :       cdum = sqrt(expo)*rrad
     604           0 :       divergence = real(cell%omtil/(tpi_const**2)*sqrt(pi_const/expo)*cerf(cdum))
     605           0 :       rrad = rrad**2
     606             : 
     607           0 :       do ikpt = 1, kpts%nkptf
     608           0 :          k = kpts%bkf(:, ikpt)
     609           0 :          knorm2 = norm2(k)
     610           0 :          IF (knorm2 < rrad) THEN
     611           0 :             divergence = divergence - exp(-expo*knorm2)/knorm2/kpts%nkptf
     612             :          END IF
     613             :       enddo
     614           0 :    end function calc_divergence2
     615             : 
     616           0 :    subroutine recombine_parts(in_part, ipart, psizes, out_total)
     617             :       use m_types
     618             :       type(t_mat), intent(in)    :: in_part
     619             :       integer, intent(in)        :: ipart, psizes(:)
     620             :       type(t_mat), intent(inout) :: out_total
     621             : 
     622             :       integer :: nbands, iband, iob, offset, i, tsize
     623             :       logical :: l_real
     624             : 
     625           0 :       l_real = in_part%l_real
     626           0 :       tsize = sum(psizes)
     627             : 
     628           0 :       nbands = in_part%matsize2/psizes(ipart)
     629           0 :       if (out_total%matsize2/tsize /= nbands) call judft_error("nbands seems different")
     630           0 :       offset = 0
     631           0 :       do i = 1, ipart - 1
     632           0 :          offset = offset + psizes(i)
     633             :       enddo
     634             : 
     635           0 :       do iband = 1, nbands
     636           0 :          do iob = 1, psizes(ipart)
     637           0 :             if (l_real) then
     638           0 :                out_total%data_r(:, iob + (iband - 1)*tsize + offset) = in_part%data_r(:, iob + (iband - 1)*psizes(ipart))
     639             :             else
     640           0 :                out_total%data_c(:, iob + (iband - 1)*tsize + offset) = in_part%data_c(:, iob + (iband - 1)*psizes(ipart))
     641             :             endif
     642             :          enddo
     643             :       enddo
     644           0 :    end subroutine recombine_parts
     645             : 
     646           0 :    subroutine alloc_dev_cpy(mat, arr_r, arr_c)
     647             :       implicit none 
     648             :       type(t_mat), intent(in)             :: mat
     649             : 
     650             :       real, allocatable, intent(inout)    :: arr_r(:,:)
     651             :       complex, allocatable, intent(inout) :: arr_c(:,:) 
     652             :       integer :: ierr
     653             : 
     654             : #ifdef _OPENACC
     655             :       if(allocated(arr_r)) deallocate(arr_r)
     656             :       if(allocated(arr_c)) deallocate(arr_c)
     657             : 
     658             :       if(mat%l_real) then 
     659             :          allocate(arr_r(mat%matsize1, mat%matsize2), stat=ierr)
     660             :       else
     661             :          allocate(arr_c(mat%matsize1, mat%matsize2), stat=ierr)
     662             :       endif 
     663             : 
     664             :       if(ierr /= 0) call judft_error("can't alloc. prob. no mem.")
     665             : #endif
     666           0 :    end subroutine alloc_dev_cpy
     667             : 
     668             : END MODULE m_exchange_valence_hf

Generated by: LCOV version 1.14