Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2023 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 : #ifdef _OPENACC
7 : #define CPP_OMP not_used
8 : #else
9 : #define CPP_OMP $OMP
10 : #endif
11 : MODULE m_hlomat
12 : use m_matmul_dgemm
13 : IMPLICIT NONE
14 : !***********************************************************************
15 : ! updates the hamiltonian matrix with the contributions from the local
16 : ! orbitals.
17 : ! p.kurz sept. 1996
18 : !***********************************************************************
19 : CONTAINS
20 11722 : SUBROUTINE hlomat(input,atoms,fmpi,lapw,ud,tlmplm,sym,cell,noco,nococonv,ilSpinPr,ilSpin,&
21 11722 : ntyp,na,fjgj,alo1,blo1,clo1, igSpinPr,igSpin,chi,hmat,l_fullj,l_ham,lapwq,fjgjq)
22 :
23 : USE m_hsmt_ab
24 : USE m_types
25 : ! USE m_types_mpimat
26 : USE m_hsmt_fjgj
27 : IMPLICIT NONE
28 : TYPE(t_input),INTENT(IN) :: input
29 : TYPE(t_atoms),INTENT(IN) :: atoms
30 : TYPE(t_lapw),INTENT(IN),TARGET :: lapw
31 : TYPE(t_mpi),INTENT(IN) :: fmpi
32 : TYPE(t_usdus),INTENT(IN) :: ud
33 : TYPE(t_tlmplm),INTENT(IN) :: tlmplm
34 : TYPE(t_sym),INTENT(IN) :: sym
35 : TYPE(t_cell),INTENT(IN) :: cell
36 : TYPE(t_noco),INTENT(IN) :: noco
37 : TYPE(t_nococonv),INTENT(IN),TARGET :: nococonv
38 : TYPE(t_fjgj),INTENT(IN),TARGET :: fjgj
39 :
40 :
41 : ! ..
42 : ! .. Scalar Arguments ..
43 : INTEGER, INTENT (IN) :: na,ntyp
44 : INTEGER, INTENT (IN) :: ilSpinPr,ilSpin !spin for usdus and tlmplm
45 : INTEGER, INTENT (IN) :: igSpin,igSpinPr
46 : COMPLEX, INTENT (IN) :: chi
47 : ! ..
48 : ! .. Array Arguments ..
49 : REAL, INTENT (IN) :: alo1(:,:),blo1(:,:),clo1(:,:)
50 :
51 : CLASS(t_mat),INTENT (INOUT) :: hmat
52 : LOGICAL, INTENT(IN) :: l_fullj, l_ham
53 :
54 : TYPE(t_lapw), OPTIONAL, INTENT(IN),TARGET :: lapwq
55 : TYPE(t_fjgj), OPTIONAL, INTENT(IN),TARGET :: fjgjq
56 : ! ..
57 : ! Local Scalars
58 : COMPLEX :: prod,axx,bxx,cxx
59 : INTEGER :: invsfct,l,lm,lmp,lo,lolo,lolop,lop,lp,i,lo_lmax
60 : INTEGER :: mp,nkvec,nkvecp,lmplm,loplo,kp,m,mlo,mlolo,mlolo_new,lolop_new
61 : INTEGER :: locol,lorow,n,k,ab_size,ab_size_Pr,s
62 : LOGICAL :: l_samelapw
63 :
64 : ! Local Arrays
65 : COMPLEX, ALLOCATABLE :: abCoeffs(:,:), ax(:,:), bx(:,:), cx(:,:)
66 11722 : COMPLEX, ALLOCATABLE :: abclo(:,:,:,:)
67 : COMPLEX, ALLOCATABLE :: abCoeffsPr(:,:), axPr(:,:), bxPr(:,:), cxPr(:,:)
68 : COMPLEX, ALLOCATABLE :: abcloPr(:,:,:,:)
69 :
70 : TYPE(t_lapw) ,POINTER:: lapwPr
71 : TYPE(t_fjgj) ,POINTER:: fjgjPr
72 :
73 11722 : l_samelapw = .FALSE.
74 11722 : IF (.NOT.PRESENT(lapwq)) l_samelapw = .TRUE.
75 : IF (.NOT.l_samelapw) THEN
76 : lapwPr => lapwq
77 : fjgjPr => fjgjq
78 : ELSE
79 : lapwPr => lapw
80 : fjgjPr => fjgj
81 : END IF
82 :
83 : ! Synthesize a and b
84 :
85 80546 : lo_lmax=maxval(atoms%llo)
86 :
87 58610 : ALLOCATE(abclo(3,-atoms%llod:atoms%llod,2*(2*atoms%llod+1),atoms%nlod))
88 187552 : ALLOCATE(ax(2*lo_lmax+1,MAXVAL(lapw%nv)),bx(2*lo_lmax+1,MAXVAL(lapw%nv)),cx(2*lo_lmax+1,MAXVAL(lapw%nv)))
89 70332 : ALLOCATE(abCoeffsPr(0:2*atoms%lnonsph(ntyp)*(atoms%lnonsph(ntyp)+2)+1,MAXVAL(lapwPr%nv)))
90 187552 : ALLOCATE(axPr(MAXVAL(lapwPr%nv),2*lo_lmax+1),bxPr(MAXVAL(lapwPr%nv),2*lo_lmax+1),cxPr(MAXVAL(lapwPr%nv),2*lo_lmax+1))
91 46888 : ALLOCATE(abcloPr(3,-atoms%llod:atoms%llod,2*(2*atoms%llod+1),atoms%nlod))
92 70332 : ALLOCATE(abCoeffs(0:2*atoms%lnonsph(ntyp)*(atoms%lnonsph(ntyp)+2)+1,MAXVAL(lapw%nv)))
93 : !$acc data create(abcoeffs,abclo,abcoeffsPr,abcloPr)
94 : !$acc data copyin(alo1,blo1,clo1,fjgjPr,fjgjpr%fj,fjgjpr%gj,tlmplm,tlmplm%h_loc_LO,tlmplm%h_lo)
95 11722 : CALL hsmt_ab(sym,atoms,noco,nococonv,ilSpinPr,igSpinPr,ntyp,na,cell,lapwPr,fjgjPr,abCoeffsPr(:,:),ab_size_Pr,.TRUE.,abcloPr,alo1(:,ilSpinPr),blo1(:,ilSpinPr),clo1(:,ilSpinPr))
96 :
97 : !we need the "unprimed" abcoeffs
98 11722 : IF (ilSpin==ilSpinPr.AND.igSpinPr==igSpin.AND.l_samelapw) THEN
99 : !$acc kernels present(abcoeffs,abcoeffsPr,abclo,abcloPr)
100 11286 : if (l_fullj) abcoeffs=abcoeffsPr
101 1482746 : abclo=abcloPr
102 : !$acc end kernels
103 : ELSE
104 436 : CALL hsmt_ab(sym,atoms,noco,nococonv,ilSpin,igSpin,ntyp,na,cell,lapw,fjgj,abCoeffs(:,:),ab_size,.TRUE.,abclo,alo1(:,ilSpin),blo1(:,ilSpin),clo1(:,ilSpin))
105 : END IF
106 :
107 :
108 11722 : mlo=0;mlolo=0;mlolo_new=0
109 18188 : DO m=1,ntyp-1
110 6466 : mlo=mlo+atoms%nlo(m)
111 6466 : mlolo=mlolo+atoms%nlo(m)*(atoms%nlo(m)+1)/2
112 18188 : mlolo_new=mlolo_new+atoms%nlo(m)**2
113 : END DO
114 :
115 11722 : IF ((sym%invsat(na) == 0) .OR. (sym%invsat(na) == 1)) THEN
116 : ! If this atom is the first of two atoms related by inversion, the
117 : ! contributions to the overlap matrix of both atoms are added simultaneously.
118 : ! Where it is made use of the fact, that the sum of these contributions
119 : ! is twice the real part of the contribution of each atom. Note, that
120 : ! in this case there are twice as many (2*(2*l+1)) k-vectors.
121 : ! (compare abccoflo and comments there).
122 : IF (sym%invsat(na) == 0) invsfct = 1
123 11722 : IF (sym%invsat(na) == 1) invsfct = 2
124 11722 : CALL timestart("LAPW-LO")
125 : ! Calculate the hamiltonian matrix elements with the regular
126 : ! LAPW basis-functions
127 30032 : DO lo = 1,atoms%nlo(ntyp)
128 18310 : l = atoms%llo(lo,ntyp)
129 18310 : s = tlmplm%h_loc2_nonsph(ntyp)
130 : !TODO here we copy the data to the CPU
131 : !$acc update self(abcoeffspr)
132 54930 : call blas_matmul(maxval(lapwPr%nv),2*l+1,2*s,abCoeffsPr,tlmplm%h_loc_LO(0:2*s-1,l*l:,ntyp,ilSpinPr,ilSpin),axPr,cmplx(1.0,0.0),cmplx(0.0,0.0),'C')
133 54930 : call blas_matmul(maxval(lapwPr%nv),2*l+1,2*s,abCoeffsPr,tlmplm%h_loc_LO(0:2*s-1,s+l*l:,ntyp,ilSpinPr,ilSpin),bxPr,cmplx(1.0,0.0),cmplx(0.0,0.0),'C')
134 54930 : call blas_matmul(maxval(lapwPr%nv),2*l+1,2*s,abCoeffsPr,tlmplm%h_LO(0:2*s-1,-l:,lo+mlo,ilSpinPr,ilSpin),cxPr,cmplx(1.0,0.0),cmplx(0.0,0.0),'C')
135 : !$acc data copyin(axpr,bxpr,cxpr)
136 :
137 : !LAPW LO contributions
138 : !$acc kernels present(hmat,hmat%data_c,hmat%data_r,abclo,axpr,bxpr,cxpr)&
139 : !$acc & copyin(lapw,lapw%nv,lapw%index_lo,fmpi,fmpi%n_size,fmpi%n_rank,lo,na,igSpin)
140 73196 : DO nkvec = 1,invsfct*(2*l+1)
141 43164 : locol= lapw%nv(igSpin)+lapw%index_lo(lo,na)+nkvec ! This is the column of the matrix
142 61474 : IF (MOD(locol-1,fmpi%n_size) == fmpi%n_rank) THEN ! Only this MPI rank calculates this column
143 27666 : locol=(locol-1)/fmpi%n_size+1 ! This is the column in local storage
144 27666 : IF (hmat%l_real) THEN
145 35160 : DO m=-l,l
146 3234310 : DO kp = 1,lapwPr%nv(igSpinPr)
147 : hmat%data_r(kp,locol) = hmat%data_r(kp,locol) &
148 : & + REAL(chi) * invsfct * (&
149 : & abclo(1,m,nkvec,lo) * axPr(kp,l+1+m) + &
150 : & abclo(2,m,nkvec,lo) * bxPr(kp,l+1+m) + &
151 3225347 : & abclo(3,m,nkvec,lo) * cxPr(kp,l+1+m) )
152 : ENDDO
153 : END DO
154 : ELSE
155 68786 : DO m=-l,l
156 5967259 : DO kp = 1,lapwPr%nv(igSpinPr)
157 : hmat%data_c(kp,locol) = hmat%data_c(kp,locol) &
158 : & + chi * invsfct * ( &
159 : & abclo(1,m,nkvec,lo) * axPr(kp,(l+1+m)) + &
160 : & abclo(2,m,nkvec,lo) * bxPr(kp,(l+1+m)) + &
161 5948556 : & abclo(3,m,nkvec,lo) * cxPr(kp,(l+1+m)) )
162 : ENDDO
163 : END DO
164 : END IF
165 : ! Jump to the last matrix element of the current row
166 : END IF
167 : END DO
168 : !$acc end kernels
169 : !$acc end data
170 : ENDDO
171 11722 : CALL timestop("LAPW-LO")
172 11722 : IF (l_fullj) THEN
173 0 : CALL timestart("LO-LAPW")
174 0 : DO lo = 1,atoms%nlo(ntyp)
175 0 : l = atoms%llo(lo,ntyp)
176 0 : s = tlmplm%h_loc2_nonsph(ntyp)
177 : !$acc data create(axpr,bxpr,cxpr)
178 0 : call blas_matmul(2*l+1,maxval(lapw%nv),2*s,tlmplm%h_loc_LO(0:2*s-1,l*l:,ntyp,ilSpinPr,ilSpin),abCoeffs,ax,cmplx(1.0,0.0),cmplx(0.0,0.0),'T','N')
179 0 : call blas_matmul(2*l+1,maxval(lapw%nv),2*s,tlmplm%h_loc_LO(0:2*s-1,s+l*l:,ntyp,ilSpinPr,ilSpin),abCoeffs,bx,cmplx(1.0,0.0),cmplx(0.0,0.0),'T','N')
180 0 : call blas_matmul(2*l+1,maxval(lapw%nv),2*s,tlmplm%h_LO2(0:2*s-1,-l:,lo+mlo,ilSpinPr,ilSpin),abCoeffs,cx,cmplx(1.0,0.0),cmplx(0.0,0.0),'T','N')
181 :
182 : !$acc kernels present(hmat,hmat%data_c,hmat%data_r,abcloPr,ax,bx,cx)&
183 : !$acc & copyin(lapwPr,lapwPr%nv,lapwPr%index_lo,fmpi,fmpi%n_size,fmpi%n_rank,invsfct,igSpinPr,lo,na,l)
184 0 : DO nkvec = 1,invsfct*(2*l+1)
185 0 : lorow = lapwPr%nv(igSpinPr)+lapwPr%index_lo(lo,na)+nkvec
186 0 : IF (hmat%l_real) THEN
187 0 : DO m=-l,l
188 0 : DO kp = 1,lapw%nv(igSpin)
189 : hmat%data_r(lorow,kp) = hmat%data_r(lorow,kp) &
190 : & + REAL(chi) * invsfct * (&
191 : & CONJG(abcloPr(1,m,nkvec,lo)) * ax(l+1+m,kp) + &
192 : & CONJG(abcloPr(2,m,nkvec,lo)) * bx(l+1+m,kp) + &
193 0 : & CONJG(abcloPr(3,m,nkvec,lo)) * cx(l+1+m,kp) )
194 : END DO
195 : END DO
196 : ELSE
197 0 : DO m=-l,l
198 0 : DO kp = 1,lapw%nv(igSpin)
199 : hmat%data_c(lorow,kp) = hmat%data_c(lorow,kp) &
200 : & + chi * invsfct * ( &
201 : & CONJG(abcloPr(1,m,nkvec,lo)) * ax((l+1+m),kp) + &
202 : & CONJG(abcloPr(2,m,nkvec,lo)) * bx((l+1+m),kp) + &
203 0 : & CONJG(abcloPr(3,m,nkvec,lo)) * cx((l+1+m),kp) )
204 : END DO
205 : END DO
206 : END IF
207 : END DO
208 : !$acc end kernels
209 : !$acc end data
210 :
211 : END DO
212 0 : CALL timestop("LO-LAPW")
213 : END IF
214 11722 : CALL timestart("LO-LO")
215 : !$acc kernels present(hmat,hmat%data_c,hmat%data_r,abcoeffs,abclo,abcoeffsPr,abcloPr) &
216 : !$acc & copyin(atoms,lapw,lapwPr,tlmplm,lapw%nv(:),lapwPr%nv(:))&
217 : !$acc & copyin(tlmplm%tuloulo_newer(:,:,:,:,ntyp,ilSpinPr,ilSpin))&
218 : !$acc & copyin(tlmplm%h_loc_LO(:,:,ntyp,ilSpinPr,ilSpin),tlmplm%h_LO(:,:,:,ilSpinPr,ilSpin),tlmplm%h_LO2(:,:,:,ilSpinPr,ilSpin),tlmplm%h_loc2_nonsph)&
219 : !$acc & copyin(lapw%index_lo(:,na),lapwPr%index_lo(:,na),tlmplm%h_loc2,atoms%llo(:,ntyp),atoms%nlo(ntyp))&
220 : !$acc & copyin(fmpi, fmpi%n_size, fmpi%n_rank)&
221 : !$acc & default(none)
222 30032 : DO lo = 1,atoms%nlo(ntyp)
223 18310 : l = atoms%llo(lo,ntyp)
224 : ! Calculate the hamiltonian matrix elements with other local
225 : ! orbitals at the same atom and with itself
226 73196 : DO nkvec = 1,invsfct* (2*l+1)
227 43164 : locol = lapw%nv(igSpin)+lapw%index_lo(lo,na)+nkvec ! This is the column of the matrix
228 61474 : IF (MOD(locol-1,fmpi%n_size) == fmpi%n_rank) THEN ! Only this MPI rank calculates this column
229 27666 : locol=(locol-1)/fmpi%n_size+1 ! This is the column in local storage
230 : ! Calculate the Hamiltonian matrix elements with different
231 : ! local orbitals at the same atom
232 70090 : DO lop = 1, MERGE(lo,atoms%nlo(ntyp),igSpinPr==igSpin.AND..NOT.l_fullj)
233 : !IF (lop==lo) CYCLE No sepcial treatment needed for same LO
234 42424 : lp = atoms%llo(lop,ntyp)
235 163050 : DO nkvecp = 1,invsfct* (2*lp+1)
236 92960 : lorow = lapwPr%nv(igSpinPr)+lapwPr%index_lo(lop,na)+nkvecp
237 92960 : if (lorow>lapw%nv(igSpin)+lapw%index_lo(lo,na)+nkvec.AND..NOT.l_fullj) cycle
238 315970 : DO m = -l,l
239 205360 : lm = l*(l+1) + m
240 856774 : DO mp = -lp,lp
241 558454 : lmp = lp* (lp+1) + mp
242 558454 : s = tlmplm%h_loc2_nonsph(ntyp)
243 : ! This is the product abclo^* tlmplm abclo in the 1,2,3 index (a,b,c coeff)
244 : axx = tlmplm%h_loc_LO(lmp,lm,ntyp,ilSpinPr,ilSpin)*abclo(1,m,nkvec,lo)+ &
245 : tlmplm%h_loc_LO(lmp,lm+s,ntyp,ilSpinPr,ilSpin)*abclo(2,m,nkvec,lo) + &
246 558454 : tlmplm%h_LO(lmp,m,lo+mlo,ilSpinPr,ilSpin)*abclo(3,m,nkvec,lo)
247 : bxx = tlmplm%h_loc_LO(lmp+s,lm,ntyp,ilSpinPr,ilSpin)*abclo(1,m,nkvec,lo) + &
248 : tlmplm%h_loc_LO(lmp+s,lm+s,ntyp,ilSpinPr,ilSpin)*abclo(2,m,nkvec,lo) + &
249 558454 : tlmplm%h_LO(lmp+s,m,lo+mlo,ilSpinPr,ilSpin) * abclo(3,m,nkvec,lo)
250 : !cxx = tlmplm%tulou(lm,mp,lop+mlo,ilSpinPr,ilSpin) * abclo(1,m,nkvec,lo)+ &
251 : ! tlmplm%tulod(lm,mp,lop+mlo,ilSpinPr,ilSpin)* abclo(2,m,nkvec,lo) + &
252 : ! tlmplm%tuloulo_newer(mp,m,lop,lo,ntyp,ilSpinPr,ilSpin) * abclo(3,m,nkvec,lo)
253 : cxx = tlmplm%h_LO2(lm,mp,lop+mlo,ilSpinPr,ilSpin) * abclo(1,m,nkvec,lo)+ &
254 : tlmplm%h_LO2(lm+s,mp,lop+mlo,ilSpinPr,ilSpin)* abclo(2,m,nkvec,lo) + &
255 558454 : tlmplm%tuloulo_newer(mp,m,lop,lo,ntyp,ilSpinPr,ilSpin) * abclo(3,m,nkvec,lo)
256 : prod= CONJG(abcloPr(1,mp,nkvecp,lop)) * axx + &
257 : CONJG(abcloPr(2,mp,nkvecp,lop)) * bxx + &
258 558454 : CONJG(abcloPr(3,mp,nkvecp,lop)) * cxx
259 :
260 763814 : IF (hmat%l_real) THEN
261 189309 : hmat%data_r(lorow,locol) = hmat%data_r(lorow,locol) + REAL(chi) * invsfct * REAL(prod)
262 : ELSE
263 369145 : hmat%data_c(lorow,locol) = hmat%data_c(lorow,locol) + chi * prod
264 : END IF
265 : END DO
266 : END DO
267 : END DO
268 : END DO
269 : END IF !If this lo to be calculated by fmpi rank
270 : END DO
271 : END DO ! end of lo = 1,atoms%nlo loop
272 : !$acc end kernels
273 11722 : CALL timestop("LO-LO")
274 : END IF
275 :
276 : !$acc end data
277 : !$acc end data
278 11722 : END SUBROUTINE hlomat
279 : END MODULE m_hlomat
|