LCOV - code coverage report
Current view: top level - eigen - hlomat.F90 (source / functions) Hit Total Coverage
Test: FLEUR test coverage Lines: 70 87 80.5 %
Date: 2024-04-19 04:21:58 Functions: 1 1 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2023 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : #ifdef _OPENACC
       7             : #define CPP_OMP not_used
       8             : #else
       9             : #define CPP_OMP $OMP
      10             : #endif
      11             : MODULE m_hlomat
      12             :    use m_matmul_dgemm
      13             :   IMPLICIT NONE
      14             :   !***********************************************************************
      15             :   ! updates the hamiltonian  matrix with the contributions from the local
      16             :   ! orbitals.
      17             :   ! p.kurz sept. 1996
      18             :   !***********************************************************************
      19             : CONTAINS
      20       11722 :   SUBROUTINE hlomat(input,atoms,fmpi,lapw,ud,tlmplm,sym,cell,noco,nococonv,ilSpinPr,ilSpin,&
      21       11722 :        ntyp,na,fjgj,alo1,blo1,clo1, igSpinPr,igSpin,chi,hmat,l_fullj,l_ham,lapwq,fjgjq)
      22             : 
      23             :     USE m_hsmt_ab
      24             :     USE m_types
      25             : !    USE m_types_mpimat
      26             :     USE m_hsmt_fjgj
      27             :     IMPLICIT NONE
      28             :     TYPE(t_input),INTENT(IN)  :: input
      29             :     TYPE(t_atoms),INTENT(IN)  :: atoms
      30             :     TYPE(t_lapw),INTENT(IN),TARGET   :: lapw
      31             :     TYPE(t_mpi),INTENT(IN)    :: fmpi
      32             :     TYPE(t_usdus),INTENT(IN)  :: ud
      33             :     TYPE(t_tlmplm),INTENT(IN) :: tlmplm
      34             :     TYPE(t_sym),INTENT(IN)    :: sym
      35             :     TYPE(t_cell),INTENT(IN)   :: cell
      36             :     TYPE(t_noco),INTENT(IN)   :: noco
      37             :     TYPE(t_nococonv),INTENT(IN),TARGET   :: nococonv
      38             :     TYPE(t_fjgj),INTENT(IN),TARGET   :: fjgj
      39             : 
      40             : 
      41             :     !     ..
      42             :     !     .. Scalar Arguments ..
      43             :     INTEGER, INTENT (IN) :: na,ntyp
      44             :     INTEGER, INTENT (IN) :: ilSpinPr,ilSpin !spin for usdus and tlmplm
      45             :     INTEGER, INTENT (IN) :: igSpin,igSpinPr
      46             :     COMPLEX, INTENT (IN) :: chi
      47             :     !     ..
      48             :     !     .. Array Arguments ..
      49             :     REAL, INTENT (IN) :: alo1(:,:),blo1(:,:),clo1(:,:)
      50             : 
      51             :     CLASS(t_mat),INTENT (INOUT) :: hmat
      52             :     LOGICAL, INTENT(IN) :: l_fullj, l_ham
      53             : 
      54             :     TYPE(t_lapw), OPTIONAL, INTENT(IN),TARGET :: lapwq
      55             :     TYPE(t_fjgj), OPTIONAL, INTENT(IN),TARGET :: fjgjq
      56             :     !     ..
      57             :     ! Local Scalars
      58             :       COMPLEX :: prod,axx,bxx,cxx
      59             :       INTEGER :: invsfct,l,lm,lmp,lo,lolo,lolop,lop,lp,i,lo_lmax
      60             :       INTEGER :: mp,nkvec,nkvecp,lmplm,loplo,kp,m,mlo,mlolo,mlolo_new,lolop_new
      61             :       INTEGER :: locol,lorow,n,k,ab_size,ab_size_Pr,s
      62             :       LOGICAL :: l_samelapw
      63             : 
      64             :       ! Local Arrays
      65             :       COMPLEX, ALLOCATABLE :: abCoeffs(:,:), ax(:,:), bx(:,:), cx(:,:)
      66       11722 :       COMPLEX, ALLOCATABLE :: abclo(:,:,:,:)
      67             :       COMPLEX, ALLOCATABLE :: abCoeffsPr(:,:), axPr(:,:), bxPr(:,:), cxPr(:,:)
      68             :       COMPLEX, ALLOCATABLE :: abcloPr(:,:,:,:)
      69             : 
      70             :       TYPE(t_lapw) ,POINTER:: lapwPr
      71             :       TYPE(t_fjgj) ,POINTER:: fjgjPr
      72             : 
      73       11722 :       l_samelapw = .FALSE.
      74       11722 :       IF (.NOT.PRESENT(lapwq)) l_samelapw = .TRUE.
      75             :       IF (.NOT.l_samelapw) THEN
      76             :          lapwPr => lapwq
      77             :          fjgjPr => fjgjq
      78             :       ELSE
      79             :          lapwPr => lapw
      80             :          fjgjPr => fjgj
      81             :       END IF
      82             : 
      83             :       ! Synthesize a and b
      84             :       
      85       80546 :       lo_lmax=maxval(atoms%llo)
      86             : 
      87       58610 :       ALLOCATE(abclo(3,-atoms%llod:atoms%llod,2*(2*atoms%llod+1),atoms%nlod))
      88      187552 :       ALLOCATE(ax(2*lo_lmax+1,MAXVAL(lapw%nv)),bx(2*lo_lmax+1,MAXVAL(lapw%nv)),cx(2*lo_lmax+1,MAXVAL(lapw%nv)))      
      89       70332 :       ALLOCATE(abCoeffsPr(0:2*atoms%lnonsph(ntyp)*(atoms%lnonsph(ntyp)+2)+1,MAXVAL(lapwPr%nv)))
      90      187552 :       ALLOCATE(axPr(MAXVAL(lapwPr%nv),2*lo_lmax+1),bxPr(MAXVAL(lapwPr%nv),2*lo_lmax+1),cxPr(MAXVAL(lapwPr%nv),2*lo_lmax+1))
      91       46888 :       ALLOCATE(abcloPr(3,-atoms%llod:atoms%llod,2*(2*atoms%llod+1),atoms%nlod))
      92       70332 :       ALLOCATE(abCoeffs(0:2*atoms%lnonsph(ntyp)*(atoms%lnonsph(ntyp)+2)+1,MAXVAL(lapw%nv)))       
      93             :       !$acc data create(abcoeffs,abclo,abcoeffsPr,abcloPr)
      94             :       !$acc data copyin(alo1,blo1,clo1,fjgjPr,fjgjpr%fj,fjgjpr%gj,tlmplm,tlmplm%h_loc_LO,tlmplm%h_lo)
      95       11722 :       CALL hsmt_ab(sym,atoms,noco,nococonv,ilSpinPr,igSpinPr,ntyp,na,cell,lapwPr,fjgjPr,abCoeffsPr(:,:),ab_size_Pr,.TRUE.,abcloPr,alo1(:,ilSpinPr),blo1(:,ilSpinPr),clo1(:,ilSpinPr))
      96             : 
      97             :       !we need the "unprimed" abcoeffs
      98       11722 :       IF (ilSpin==ilSpinPr.AND.igSpinPr==igSpin.AND.l_samelapw) THEN
      99             :          !$acc kernels present(abcoeffs,abcoeffsPr,abclo,abcloPr)
     100       11286 :          if (l_fullj) abcoeffs=abcoeffsPr 
     101     1482746 :          abclo=abcloPr
     102             :          !$acc end kernels
     103             :       ELSE     
     104         436 :          CALL hsmt_ab(sym,atoms,noco,nococonv,ilSpin,igSpin,ntyp,na,cell,lapw,fjgj,abCoeffs(:,:),ab_size,.TRUE.,abclo,alo1(:,ilSpin),blo1(:,ilSpin),clo1(:,ilSpin))
     105             :       END IF
     106             :    
     107             : 
     108       11722 :       mlo=0;mlolo=0;mlolo_new=0
     109       18188 :       DO m=1,ntyp-1
     110        6466 :          mlo=mlo+atoms%nlo(m)
     111        6466 :          mlolo=mlolo+atoms%nlo(m)*(atoms%nlo(m)+1)/2
     112       18188 :          mlolo_new=mlolo_new+atoms%nlo(m)**2
     113             :       END DO
     114             : 
     115       11722 :       IF ((sym%invsat(na) == 0) .OR. (sym%invsat(na) == 1)) THEN
     116             :          ! If this atom is the first of two atoms related by inversion, the
     117             :          ! contributions to the overlap matrix of both atoms are added simultaneously.
     118             :          ! Where it is made use of the fact, that the sum of these contributions
     119             :          ! is twice the real part of the contribution of each atom. Note, that
     120             :          ! in this case there are twice as many (2*(2*l+1)) k-vectors.
     121             :          ! (compare abccoflo and comments there).
     122             :          IF (sym%invsat(na) == 0) invsfct = 1
     123       11722 :          IF (sym%invsat(na) == 1) invsfct = 2
     124       11722 :          CALL timestart("LAPW-LO")
     125             :          ! Calculate the hamiltonian matrix elements with the regular
     126             :          ! LAPW basis-functions   
     127       30032 :          DO lo = 1,atoms%nlo(ntyp)
     128       18310 :             l = atoms%llo(lo,ntyp)
     129       18310 :             s = tlmplm%h_loc2_nonsph(ntyp) 
     130             :             !TODO here we copy the data to the CPU
     131             :             !$acc update self(abcoeffspr)
     132       54930 :             call blas_matmul(maxval(lapwPr%nv),2*l+1,2*s,abCoeffsPr,tlmplm%h_loc_LO(0:2*s-1,l*l:,ntyp,ilSpinPr,ilSpin),axPr,cmplx(1.0,0.0),cmplx(0.0,0.0),'C')
     133       54930 :             call blas_matmul(maxval(lapwPr%nv),2*l+1,2*s,abCoeffsPr,tlmplm%h_loc_LO(0:2*s-1,s+l*l:,ntyp,ilSpinPr,ilSpin),bxPr,cmplx(1.0,0.0),cmplx(0.0,0.0),'C')
     134       54930 :             call blas_matmul(maxval(lapwPr%nv),2*l+1,2*s,abCoeffsPr,tlmplm%h_LO(0:2*s-1,-l:,lo+mlo,ilSpinPr,ilSpin),cxPr,cmplx(1.0,0.0),cmplx(0.0,0.0),'C')
     135             :             !$acc data copyin(axpr,bxpr,cxpr)
     136             :             
     137             :             !LAPW LO contributions
     138             :             !$acc kernels present(hmat,hmat%data_c,hmat%data_r,abclo,axpr,bxpr,cxpr)&
     139             :             !$acc & copyin(lapw,lapw%nv,lapw%index_lo,fmpi,fmpi%n_size,fmpi%n_rank,lo,na,igSpin)
     140       73196 :             DO nkvec = 1,invsfct*(2*l+1)
     141       43164 :                locol= lapw%nv(igSpin)+lapw%index_lo(lo,na)+nkvec ! This is the column of the matrix
     142       61474 :                IF (MOD(locol-1,fmpi%n_size) == fmpi%n_rank) THEN ! Only this MPI rank calculates this column
     143       27666 :                   locol=(locol-1)/fmpi%n_size+1 ! This is the column in local storage
     144       27666 :                   IF (hmat%l_real) THEN
     145       35160 :                     DO m=-l,l
     146     3234310 :                         DO kp = 1,lapwPr%nv(igSpinPr)
     147             :                            hmat%data_r(kp,locol) = hmat%data_r(kp,locol) &
     148             :                                                & + REAL(chi) * invsfct * (&
     149             :                                                & abclo(1,m,nkvec,lo) *  axPr(kp,l+1+m) + &
     150             :                                                & abclo(2,m,nkvec,lo) *  bxPr(kp,l+1+m) + &
     151     3225347 :                                                & abclo(3,m,nkvec,lo) *  cxPr(kp,l+1+m) )
     152             :                         ENDDO   
     153             :                     END DO
     154             :                   ELSE
     155       68786 :                     DO m=-l,l
     156     5967259 :                         DO kp = 1,lapwPr%nv(igSpinPr)
     157             :                            hmat%data_c(kp,locol) = hmat%data_c(kp,locol) &
     158             :                                                & + chi * invsfct * ( &
     159             :                                                & abclo(1,m,nkvec,lo) *  axPr(kp,(l+1+m)) + &
     160             :                                                & abclo(2,m,nkvec,lo) *  bxPr(kp,(l+1+m)) + &
     161     5948556 :                                                & abclo(3,m,nkvec,lo) *  cxPr(kp,(l+1+m)) )
     162             :                         ENDDO   
     163             :                      END DO
     164             :                   END IF
     165             :                      ! Jump to the last matrix element of the current row
     166             :                END IF
     167             :             END DO
     168             :             !$acc end kernels
     169             :             !$acc end data
     170             :          ENDDO
     171       11722 :          CALL timestop("LAPW-LO")
     172       11722 :          IF (l_fullj) THEN
     173           0 :             CALL timestart("LO-LAPW")
     174           0 :             DO lo = 1,atoms%nlo(ntyp)
     175           0 :                l = atoms%llo(lo,ntyp)
     176           0 :                s = tlmplm%h_loc2_nonsph(ntyp) 
     177             :                !$acc data create(axpr,bxpr,cxpr)
     178           0 :                call blas_matmul(2*l+1,maxval(lapw%nv),2*s,tlmplm%h_loc_LO(0:2*s-1,l*l:,ntyp,ilSpinPr,ilSpin),abCoeffs,ax,cmplx(1.0,0.0),cmplx(0.0,0.0),'T','N')
     179           0 :                call blas_matmul(2*l+1,maxval(lapw%nv),2*s,tlmplm%h_loc_LO(0:2*s-1,s+l*l:,ntyp,ilSpinPr,ilSpin),abCoeffs,bx,cmplx(1.0,0.0),cmplx(0.0,0.0),'T','N')
     180           0 :                call blas_matmul(2*l+1,maxval(lapw%nv),2*s,tlmplm%h_LO2(0:2*s-1,-l:,lo+mlo,ilSpinPr,ilSpin),abCoeffs,cx,cmplx(1.0,0.0),cmplx(0.0,0.0),'T','N')
     181             :             
     182             :                !$acc kernels present(hmat,hmat%data_c,hmat%data_r,abcloPr,ax,bx,cx)&
     183             :                !$acc & copyin(lapwPr,lapwPr%nv,lapwPr%index_lo,fmpi,fmpi%n_size,fmpi%n_rank,invsfct,igSpinPr,lo,na,l)
     184           0 :                DO nkvec = 1,invsfct*(2*l+1)
     185           0 :                   lorow = lapwPr%nv(igSpinPr)+lapwPr%index_lo(lo,na)+nkvec
     186           0 :                   IF (hmat%l_real) THEN
     187           0 :                      DO m=-l,l
     188           0 :                         DO kp = 1,lapw%nv(igSpin)
     189             :                            hmat%data_r(lorow,kp) = hmat%data_r(lorow,kp) &
     190             :                                                & + REAL(chi) * invsfct * (&
     191             :                                                & CONJG(abcloPr(1,m,nkvec,lo)) *  ax(l+1+m,kp) + &
     192             :                                                & CONJG(abcloPr(2,m,nkvec,lo)) *  bx(l+1+m,kp) + &
     193           0 :                                                & CONJG(abcloPr(3,m,nkvec,lo)) *  cx(l+1+m,kp) )
     194             :                         END DO   
     195             :                      END DO
     196             :                   ELSE
     197           0 :                      DO m=-l,l
     198           0 :                         DO kp = 1,lapw%nv(igSpin)
     199             :                            hmat%data_c(lorow,kp) = hmat%data_c(lorow,kp) &
     200             :                                                & + chi * invsfct * ( &
     201             :                                                & CONJG(abcloPr(1,m,nkvec,lo)) *  ax((l+1+m),kp) + &
     202             :                                                & CONJG(abcloPr(2,m,nkvec,lo)) *  bx((l+1+m),kp) + &
     203           0 :                                                & CONJG(abcloPr(3,m,nkvec,lo)) *  cx((l+1+m),kp) )
     204             :                         END DO   
     205             :                      END DO
     206             :                   END IF
     207             :                END DO
     208             :                !$acc end kernels
     209             :                !$acc end data
     210             :                
     211             :             END DO
     212           0 :             CALL timestop("LO-LAPW")
     213             :          END IF
     214       11722 :          CALL timestart("LO-LO")
     215             :          !$acc kernels present(hmat,hmat%data_c,hmat%data_r,abcoeffs,abclo,abcoeffsPr,abcloPr) &
     216             :          !$acc & copyin(atoms,lapw,lapwPr,tlmplm,lapw%nv(:),lapwPr%nv(:))&
     217             :          !$acc & copyin(tlmplm%tuloulo_newer(:,:,:,:,ntyp,ilSpinPr,ilSpin))&
     218             :          !$acc & copyin(tlmplm%h_loc_LO(:,:,ntyp,ilSpinPr,ilSpin),tlmplm%h_LO(:,:,:,ilSpinPr,ilSpin),tlmplm%h_LO2(:,:,:,ilSpinPr,ilSpin),tlmplm%h_loc2_nonsph)&
     219             :          !$acc & copyin(lapw%index_lo(:,na),lapwPr%index_lo(:,na),tlmplm%h_loc2,atoms%llo(:,ntyp),atoms%nlo(ntyp))&
     220             :          !$acc & copyin(fmpi, fmpi%n_size, fmpi%n_rank)&
     221             :          !$acc & default(none)
     222       30032 :          DO lo = 1,atoms%nlo(ntyp)
     223       18310 :             l = atoms%llo(lo,ntyp)
     224             :             ! Calculate the hamiltonian matrix elements with other local
     225             :             ! orbitals at the same atom and with itself
     226       73196 :             DO nkvec = 1,invsfct* (2*l+1)
     227       43164 :                locol = lapw%nv(igSpin)+lapw%index_lo(lo,na)+nkvec ! This is the column of the matrix
     228       61474 :                IF (MOD(locol-1,fmpi%n_size) == fmpi%n_rank) THEN ! Only this MPI rank calculates this column
     229       27666 :                   locol=(locol-1)/fmpi%n_size+1 ! This is the column in local storage
     230             :                   ! Calculate the Hamiltonian matrix elements with different
     231             :                   ! local orbitals at the same atom
     232       70090 :                   DO lop = 1, MERGE(lo,atoms%nlo(ntyp),igSpinPr==igSpin.AND..NOT.l_fullj)
     233             :                      !IF (lop==lo) CYCLE No sepcial treatment needed for same LO
     234       42424 :                      lp = atoms%llo(lop,ntyp)
     235      163050 :                      DO nkvecp = 1,invsfct* (2*lp+1)
     236       92960 :                         lorow = lapwPr%nv(igSpinPr)+lapwPr%index_lo(lop,na)+nkvecp
     237       92960 :                         if (lorow>lapw%nv(igSpin)+lapw%index_lo(lo,na)+nkvec.AND..NOT.l_fullj) cycle
     238      315970 :                         DO m = -l,l
     239      205360 :                            lm = l*(l+1) + m
     240      856774 :                            DO mp = -lp,lp
     241      558454 :                               lmp = lp* (lp+1) + mp
     242      558454 :                               s = tlmplm%h_loc2_nonsph(ntyp)
     243             :                               ! This is the product abclo^* tlmplm abclo in the 1,2,3 index (a,b,c coeff)
     244             :                               axx = tlmplm%h_loc_LO(lmp,lm,ntyp,ilSpinPr,ilSpin)*abclo(1,m,nkvec,lo)+ &
     245             :                                     tlmplm%h_loc_LO(lmp,lm+s,ntyp,ilSpinPr,ilSpin)*abclo(2,m,nkvec,lo) + &
     246      558454 :                                     tlmplm%h_LO(lmp,m,lo+mlo,ilSpinPr,ilSpin)*abclo(3,m,nkvec,lo)
     247             :                               bxx = tlmplm%h_loc_LO(lmp+s,lm,ntyp,ilSpinPr,ilSpin)*abclo(1,m,nkvec,lo) + &
     248             :                                     tlmplm%h_loc_LO(lmp+s,lm+s,ntyp,ilSpinPr,ilSpin)*abclo(2,m,nkvec,lo)  + &
     249      558454 :                                     tlmplm%h_LO(lmp+s,m,lo+mlo,ilSpinPr,ilSpin) * abclo(3,m,nkvec,lo)
     250             :                               !cxx = tlmplm%tulou(lm,mp,lop+mlo,ilSpinPr,ilSpin) * abclo(1,m,nkvec,lo)+ &
     251             :                               !      tlmplm%tulod(lm,mp,lop+mlo,ilSpinPr,ilSpin)* abclo(2,m,nkvec,lo) + &
     252             :                               !      tlmplm%tuloulo_newer(mp,m,lop,lo,ntyp,ilSpinPr,ilSpin) * abclo(3,m,nkvec,lo)
     253             :                               cxx = tlmplm%h_LO2(lm,mp,lop+mlo,ilSpinPr,ilSpin) * abclo(1,m,nkvec,lo)+ &
     254             :                                     tlmplm%h_LO2(lm+s,mp,lop+mlo,ilSpinPr,ilSpin)* abclo(2,m,nkvec,lo) + &
     255      558454 :                                     tlmplm%tuloulo_newer(mp,m,lop,lo,ntyp,ilSpinPr,ilSpin) * abclo(3,m,nkvec,lo)
     256             :                               prod= CONJG(abcloPr(1,mp,nkvecp,lop)) * axx + &
     257             :                                     CONJG(abcloPr(2,mp,nkvecp,lop)) * bxx + &
     258      558454 :                                     CONJG(abcloPr(3,mp,nkvecp,lop)) * cxx
     259             :                             
     260      763814 :                               IF (hmat%l_real) THEN
     261      189309 :                                  hmat%data_r(lorow,locol) = hmat%data_r(lorow,locol) + REAL(chi) * invsfct * REAL(prod)
     262             :                               ELSE
     263      369145 :                                  hmat%data_c(lorow,locol) = hmat%data_c(lorow,locol) + chi * prod
     264             :                               END IF
     265             :                            END DO
     266             :                         END DO
     267             :                      END DO
     268             :                   END DO                  
     269             :                END IF !If this lo to be calculated by fmpi rank
     270             :             END DO
     271             :          END DO ! end of lo = 1,atoms%nlo loop
     272             :          !$acc end kernels
     273       11722 :          CALL timestop("LO-LO")
     274             :       END IF
     275             : 
     276             :       !$acc end data
     277             :       !$acc end data
     278       11722 :    END SUBROUTINE hlomat
     279             : END MODULE m_hlomat

Generated by: LCOV version 1.14