Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 : #ifdef _OPENACC
7 : #define CPP_OMP not_used
8 : #else
9 : #define CPP_OMP $OMP
10 : #endif
11 : MODULE m_hsmt_soc_offdiag
12 : USE m_juDFT
13 : IMPLICIT NONE
14 : CONTAINS
15 372 : SUBROUTINE hsmt_soc_offdiag(n,atoms,cell,fmpi,nococonv,lapw,sym,usdus,td,fjgj,hmat)
16 : USE m_constants, ONLY : fpi_const,tpi_const
17 : USE m_types
18 : USE m_hsmt_spinor
19 : USE m_hsmt_fjgj
20 : IMPLICIT NONE
21 : TYPE(t_mpi),INTENT(IN) :: fmpi
22 : TYPE(t_nococonv),INTENT(IN) :: nococonv
23 : TYPE(t_atoms),INTENT(IN) :: atoms
24 : TYPE(t_cell),INTENT(IN) :: cell
25 : TYPE(t_lapw),INTENT(IN) :: lapw
26 : TYPE(t_sym ),INTENT(IN) :: sym
27 : TYPE(t_usdus),INTENT(IN) :: usdus
28 : TYPE(t_tlmplm),INTENT(IN) :: td
29 : TYPE(t_fjgj),INTENT(IN) :: fjgj
30 : CLASS(t_mat),INTENT(INOUT) :: hmat(:,:)!(2,2)
31 : ! ..
32 : ! .. Scalar Arguments ..
33 : INTEGER, INTENT (IN) :: n
34 : ! ..
35 : ! ..
36 : ! .. Local Scalars ..
37 : REAL tnn(3),ski(3), fjkiln,gjkiln
38 : INTEGER kii,ki,kj,l,nn,j1,j2,ll,l3,kj_off,kj_vec,jv
39 : INTEGER NVEC_rem !remainder
40 : INTEGER, PARAMETER :: NVEC = 128
41 : ! ..
42 : ! .. Local Arrays ..
43 372 : REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)
44 : COMPLEX:: chi(2,2,2,2)
45 372 : REAL, ALLOCATABLE :: plegend(:,:),dplegend(:,:)
46 372 : REAL, ALLOCATABLE :: xlegend(:), dot(:)
47 372 : COMPLEX, ALLOCATABLE :: cph(:),fct(:),angso(:,:,:)
48 :
49 372 : CALL timestart("offdiagonal soc-setup")
50 :
51 : !$acc update self(hmat(1,1)%data_c,hmat(2,1)%data_c,hmat(1,2)%data_c,hmat(2,2)%data_c)
52 :
53 3760 : DO l = 0,atoms%lmaxd
54 3388 : fleg1(l) = REAL(l+l+1)/REAL(l+1)
55 3388 : fleg2(l) = REAL(l)/REAL(l+1)
56 3760 : fl2p1(l) = REAL(l+l+1)/fpi_const
57 : END DO
58 : !!$acc data copyin(td,td%rsoc%rsopp,td%rsoc%rsopdp,td%rsoc%rsoppd,td%rsoc%rsopdpd)
59 : !CPP_OMP PARALLEL DEFAULT(NONE)&
60 : !CPP_OMP SHARED(n,lapw,atoms,td,fjgj,nococonv,fl2p1,fleg1,fleg2,hmat,fmpi)&
61 : !CPP_OMP PRIVATE(kii,ki,ski,kj,plegend,dplegend,l,j1,j2,angso,chi)&
62 : !CPP_OMP PRIVATE(cph,dot,nn,tnn,fct,xlegend,l3,fjkiln,gjkiln,NVEC_rem)&
63 372 : !CPP_OMP PRIVATE(kj_off,kj_vec,jv)
64 : ALLOCATE(cph(NVEC))
65 : ALLOCATE(xlegend(NVEC))
66 : ALLOCATE(plegend(NVEC,0:2))
67 : ALLOCATE(dplegend(NVEC,0:2))
68 : ALLOCATE(fct(NVEC))
69 : ALLOCATE(dot(NVEC))
70 : ALLOCATE(angso(NVEC,2,2))
71 : !CPP_OMP DO SCHEDULE(DYNAMIC,1)
72 : DO ki = fmpi%n_rank+1, lapw%nv(1), fmpi%n_size
73 : kii=(ki-1)/fmpi%n_size+1
74 :
75 : DO kj_off = 1, ki, NVEC
76 : NVEC_rem = NVEC
77 : kj_vec = kj_off - 1 + NVEC
78 : IF (kj_vec > ki) THEN
79 : kj_vec = ki
80 : NVEC_rem = ki - kj_off + 1
81 : ENDIF
82 : if (NVEC_rem<0 ) exit
83 :
84 : !Set up spinors...
85 : CALL hsmt_spinor_soc(n,ki,nococonv,lapw,chi,angso,kj_off,kj_vec)
86 :
87 : !---> set up phase factors
88 : cph = 0.0
89 : ski = lapw%gvec(:,ki,1)
90 : DO nn = atoms%firstAtom(n), atoms%firstAtom(n) + atoms%neq(n) - 1
91 : tnn = tpi_const*atoms%taual(:,nn)
92 : DO jv = 1,NVEC_rem
93 : kj = kj_off - 1 + jv
94 : dot(jv) = DOT_PRODUCT(ski(1:3)-lapw%gvec(1:3,kj,1),tnn(1:3))
95 : END DO
96 : cph(:NVEC_rem) = cph(:NVEC_rem) + CMPLX(COS(dot(:NVEC_rem)),SIN(dot(:NVEC_rem)))
97 : END DO
98 :
99 : !---> x for legendre polynomials
100 : DO jv = 1,NVEC_rem
101 : kj = kj_off - 1 + jv
102 : xlegend(jv) = DOT_PRODUCT(lapw%gk(1:3,kj,1),lapw%gk(1:3,ki,1))
103 : END DO
104 : plegend(:NVEC_rem,0) = 1.0
105 : dplegend(:NVEC_rem,0) = 0.0
106 :
107 : !---> update overlap and l-diagonal hamiltonian matrix
108 : !!$acc kernels &
109 : !!$acc copyin(atoms,atoms%lmax,xlegend,cph,angso)&
110 : !!$acc create(plegend,dplegend,fct)&
111 : !!$acc present(fjgj,fjgj%fj,fjgj%gj)&
112 : !!$acc present(hmat(1,1)%data_c,hmat(2,1)%data_c,hmat(1,2)%data_c,hmat(2,2)%data_c)
113 : DO l = 1,atoms%lmax(n)
114 : !---> legendre polynomials
115 : l3 = MODULO(l, 3)
116 : IF (l == 1) THEN
117 : plegend(:NVEC_rem,1) = xlegend(:NVEC_rem)
118 : dplegend(:NVEC_rem,1) = 1.0
119 : ELSE
120 : plegend(:NVEC_rem,l3) = fleg1(l-1)*xlegend(:NVEC_rem)*plegend(:NVEC_rem,MODULO(l-1,3)) - fleg2(l-1)*plegend(:NVEC_rem,MODULO(l-2,3))
121 : dplegend(:NVEC_rem,l3)=REAL(l)*plegend(:NVEC_rem,MODULO(l-1,3))+xlegend(:NVEC_rem)*dplegend(:NVEC_rem,MODULO(l-1,3))
122 : END IF ! l
123 : DO j1=1,2
124 : DO j2=1,2
125 : fct(:NVEC_rem) =cph(:NVEC_rem) * dplegend(:NVEC_rem,l3)*fl2p1(l)*(&
126 : fjgj%fj(ki,l,j1,1)*fjgj%fj(kj_off:kj_vec,l,j2,1) *td%rsoc%rsopp(n,l,j1,j2) + &
127 : fjgj%fj(ki,l,j1,1)*fjgj%gj(kj_off:kj_vec,l,j2,1) *td%rsoc%rsopdp(n,l,j1,j2) + &
128 : fjgj%gj(ki,l,j1,1)*fjgj%fj(kj_off:kj_vec,l,j2,1) *td%rsoc%rsoppd(n,l,j1,j2) + &
129 : fjgj%gj(ki,l,j1,1)*fjgj%gj(kj_off:kj_vec,l,j2,1) *td%rsoc%rsopdpd(n,l,j1,j2)) &
130 : * angso(:NVEC_rem,j1,j2)
131 :
132 : hmat(1,1)%data_c(kj_off:kj_vec,kii)=hmat(1,1)%data_c(kj_off:kj_vec,kii) + chi(1,1,j1,j2)*fct(:NVEC_rem)
133 : hmat(1,2)%data_c(kj_off:kj_vec,kii)=hmat(1,2)%data_c(kj_off:kj_vec,kii) + chi(1,2,j1,j2)*fct(:NVEC_rem)
134 : hmat(2,1)%data_c(kj_off:kj_vec,kii)=hmat(2,1)%data_c(kj_off:kj_vec,kii) + chi(2,1,j1,j2)*fct(:NVEC_rem)
135 : hmat(2,2)%data_c(kj_off:kj_vec,kii)=hmat(2,2)%data_c(kj_off:kj_vec,kii) + chi(2,2,j1,j2)*fct(:NVEC_rem)
136 : ENDDO
137 : ENDDO
138 : !---> end loop over l
139 : ENDDO
140 : !!$acc end kernels
141 : ENDDO
142 : !---> end loop over ki
143 : ENDDO
144 : !CPP_OMP END DO
145 : !---> end loop over atom types (ntype)
146 : DEALLOCATE(xlegend,plegend,dplegend)
147 : DEALLOCATE(cph)
148 : !CPP_OMP END PARALLEL
149 : !!$acc end data
150 372 : CALL timestop("offdiagonal soc-setup")
151 :
152 372 : if (atoms%nlo(n)>0) call hsmt_soc_offdiag_LO(n,atoms,cell,fmpi,nococonv,lapw,sym,td,usdus,fjgj,hmat)
153 : !$acc update device(hmat(1,1)%data_c,hmat(2,1)%data_c,hmat(1,2)%data_c,hmat(2,2)%data_c)
154 372 : RETURN
155 372 : END SUBROUTINE hsmt_soc_offdiag
156 :
157 132 : SUBROUTINE hsmt_soc_offdiag_LO(n,atoms,cell,fmpi,nococonv,lapw,sym,td,ud,fjgj,hmat)
158 : USE m_constants, ONLY : fpi_const,tpi_const
159 : USE m_types
160 : USE m_hsmt_spinor
161 : USE m_setabc1lo
162 : USE m_hsmt_fjgj
163 : IMPLICIT NONE
164 : TYPE(t_mpi),INTENT(IN) :: fmpi
165 : TYPE(t_nococonv),INTENT(IN) :: nococonv
166 : TYPE(t_atoms),INTENT(IN) :: atoms
167 : TYPE(t_cell),INTENT(IN) :: cell
168 : TYPE(t_lapw),INTENT(IN) :: lapw
169 : TYPE(t_sym),INTENT(IN) :: sym
170 : TYPE(t_tlmplm),INTENT(IN) :: td
171 : TYPE(t_usdus),INTENT(IN) :: ud
172 : TYPE(t_fjgj),INTENT(IN) :: fjgj
173 : CLASS(t_mat),INTENT(INOUT) :: hmat(:,:)!(2,2)
174 : ! ..
175 : ! .. Scalar Arguments ..
176 : INTEGER, INTENT (IN) :: n
177 : ! ..
178 : ! ..
179 : ! .. Local Scalars ..
180 : REAL tnn(3),ski(3)
181 : INTEGER ki,kj,l,nn,j1,j2,lo,ilo,locol_loc,locol_mat,lorow,ll,nkvec,nkvecp,na,invsfct
182 : COMPLEX :: fct
183 : ! ..
184 : ! .. Local Arrays ..
185 132 : REAL fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)
186 132 : COMPLEX:: chi(2,2,2,2),angso(lapw%nv(1),2,2)
187 132 : REAL, ALLOCATABLE :: plegend(:,:),dplegend(:,:)
188 132 : COMPLEX, ALLOCATABLE :: cph(:)
189 132 : REAL :: alo1(atoms%nlod,2),blo1(atoms%nlod,2),clo1(atoms%nlod,2)
190 132 : CALL timestart("offdiagonal soc-setup LO")
191 :
192 1360 : DO l = 0,atoms%lmaxd
193 1228 : fleg1(l) = REAL(l+l+1)/REAL(l+1)
194 1228 : fleg2(l) = REAL(l)/REAL(l+1)
195 1360 : fl2p1(l) = REAL(l+l+1)/fpi_const
196 : END DO
197 660 : ALLOCATE(cph(MAXVAL(lapw%nv)))
198 792 : ALLOCATE(plegend(MAXVAL(lapw%nv),0:atoms%lmaxd))
199 792 : ALLOCATE(dplegend(MAXVAL(lapw%nv),0:atoms%lmaxd))
200 472421 : plegend=0.0
201 51715 : plegend(:,0)=1.0
202 51715 : dplegend(:,0)=0.e0
203 51715 : dplegend(:,1)=1.e0
204 :
205 396 : DO j1=1,2
206 396 : call setabc1lo(atoms,n,ud,j1, alo1,blo1,clo1)
207 : ENDDO
208 : !Normalization taken from hsmt_ab
209 920 : alo1=alo1*fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)
210 920 : blo1=blo1*fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)
211 920 : clo1=clo1*fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)
212 :
213 264 : DO na = atoms%firstAtom(n), atoms%firstAtom(n) + atoms%neq(n) - 1
214 264 : IF ((sym%invsat(na) == 0) .OR. (sym%invsat(na) == 1)) THEN
215 : !---> if this atom is the first of two atoms related by inversion,
216 : !---> the contributions to the overlap matrix of both atoms are added
217 : !---> at once. where it is made use of the fact, that the sum of
218 : !---> these contributions is twice the real part of the contribution
219 : !---> of each atom. note, that in this case there are twice as many
220 : !---> (2*(2*l+1)) k-vectors (compare abccoflo and comments there).
221 132 : IF (sym%invsat(na) == 0) invsfct = 1
222 132 : IF (sym%invsat(na) == 1) invsfct = 2
223 : !
224 394 : DO lo = 1,atoms%nlo(n)
225 262 : l = atoms%llo(lo,n)
226 262 : if (l==0) cycle !no SOC for s-states
227 660 : DO nkvec = 1,invsfct* (2*l+1)
228 396 : locol_mat= lapw%nv(1)+lapw%index_lo(lo,na)+nkvec !this is the column of the matrix
229 658 : IF (MOD(locol_mat-1,fmpi%n_size) == fmpi%n_rank) THEN !only this MPI rank calculates this column
230 219 : locol_loc=(locol_mat-1)/fmpi%n_size+1 !this is the column in local storage
231 219 : ki=lapw%kvec(nkvec,lo,na) !this LO is attached to this k+G
232 :
233 : !---> legendre polynomials
234 78345 : DO kj = 1,lapw%nv(1)
235 312723 : plegend(kj,1) = DOT_PRODUCT(lapw%gk(1:3,kj,1),lapw%gk(1:3,ki,1))
236 : END DO
237 219 : DO ll = 1,l - 1
238 0 : plegend(:,ll+1) = fleg1(l)*plegend(:,1)*plegend(:,ll) - fleg2(l)*plegend(:,ll-1)
239 219 : dplegend(:,ll+1)=REAL(l+1)*plegend(:,ll)+plegend(:,1)*dplegend(:,ll)
240 : END DO
241 : !---> set up phase factors
242 78345 : cph = 0.0
243 876 : ski = lapw%gvec(:,ki,1)
244 876 : tnn = tpi_const*atoms%taual(:,na)
245 78345 : DO kj = 1,lapw%nv(1)
246 : cph(kj) = cph(kj) +&
247 : CMPLX(COS(DOT_PRODUCT(ski-lapw%gvec(:,kj,1),tnn)),&
248 625227 : SIN(DOT_PRODUCT(ski-lapw%gvec(:,kj,1),tnn)))
249 : END DO
250 : !Set up spinors...
251 219 : CALL hsmt_spinor_soc(n,ki,nococonv,lapw,chi,angso,1,size(angso,1))
252 :
253 657 : DO j1=1,2
254 1533 : DO j2=1,2
255 : !DO j2=j1,j1
256 : !---> update l-diagonal hamiltonian matrix with LAPW,LO contribution
257 313380 : DO kj = 1,lapw%nv(j2)
258 : fct =cph(kj) * dplegend(kj,l)*fl2p1(l)*(&
259 : alo1(lo,j1)*fjgj%fj(kj,l,j2,1) *td%rsoc%rsopp(n,l,j1,j2) + &
260 : alo1(lo,j1)*fjgj%gj(kj,l,j2,1) *td%rsoc%rsopdp(n,l,j1,j2) + &
261 : blo1(lo,j1)*fjgj%fj(kj,l,j2,1) *td%rsoc%rsoppd(n,l,j1,j2) + &
262 : blo1(lo,j1)*fjgj%gj(kj,l,j2,1) *td%rsoc%rsopdpd(n,l,j1,j2)+ &
263 : clo1(lo,j1)*fjgj%fj(kj,l,j2,1) *td%rsoc%rsopplo(n,lo,j1,j2) + &
264 : clo1(lo,j1)*fjgj%gj(kj,l,j2,1) *td%rsoc%rsopdplo(n,lo,j1,j2)) &
265 312504 : * angso(kj,j1,j2)
266 312504 : hmat(1,1)%data_c(kj,locol_loc)=hmat(1,1)%data_c(kj,locol_loc) + chi(1,1,j1,j2)*fct
267 312504 : hmat(1,2)%data_c(kj,locol_loc)=hmat(1,2)%data_c(kj,locol_loc) + chi(1,2,j1,j2)*fct
268 312504 : hmat(2,1)%data_c(kj,locol_loc)=hmat(2,1)%data_c(kj,locol_loc) + chi(2,1,j1,j2)*fct
269 313380 : hmat(2,2)%data_c(kj,locol_loc)=hmat(2,2)%data_c(kj,locol_loc) + chi(2,2,j1,j2)*fct
270 : ENDDO
271 : !Update LO-LO part
272 3054 : DO ilo=1,atoms%nlo(n)
273 2616 : if (l == atoms%llo(ilo,n)) THEN !LO with same L found....
274 3504 : DO nkvecp = 1,invsfct* (2*l+1)
275 2628 : kj=lapw%kvec(nkvecp,ilo,na) !this LO is attached to this k+G
276 2628 : lorow= lapw%nv(1)+lapw%index_lo(ilo,na)+nkvecp !local row
277 2628 : if (lorow>locol_mat) cycle
278 : fct =cph(kj) * dplegend(kj,l)*fl2p1(l)*(&
279 : alo1(lo,j1)*alo1(ilo,j2) *td%rsoc%rsopp(n,l,j1,j2) + &
280 : alo1(lo,j1)*blo1(ilo,j2) *td%rsoc%rsopdp(n,l,j1,j2) + &
281 : alo1(lo,j1)*clo1(ilo,j2) *td%rsoc%rsoplop(n,ilo,j1,j2) + &
282 : blo1(lo,j1)*alo1(ilo,j2) *td%rsoc%rsoppd(n,l,j1,j2) + &
283 : blo1(lo,j1)*blo1(ilo,j2) *td%rsoc%rsopdpd(n,l,j1,j2)+ &
284 : blo1(lo,j1)*clo1(ilo,j2) *td%rsoc%rsoplopd(n,ilo,j1,j2)+ &
285 : clo1(lo,j1)*alo1(ilo,j2) *td%rsoc%rsopplo(n,lo,j1,j2) + &
286 : clo1(lo,j1)*blo1(ilo,j2) *td%rsoc%rsopdplo(n,lo,j1,j2)+ &
287 : clo1(lo,j1)*clo1(ilo,j2) *td%rsoc%rsoploplop(n,lo,ilo,j1,j2)) &
288 1752 : * angso(kj,j1,j2)
289 1752 : hmat(1,1)%data_c(lorow,locol_loc)=hmat(1,1)%data_c(lorow,locol_loc) + chi(1,1,j1,j2)*fct
290 1752 : hmat(1,2)%data_c(lorow,locol_loc)=hmat(1,2)%data_c(lorow,locol_loc) + chi(1,2,j1,j2)*fct
291 1752 : hmat(2,1)%data_c(lorow,locol_loc)=hmat(2,1)%data_c(lorow,locol_loc) + chi(2,1,j1,j2)*fct
292 3504 : hmat(2,2)%data_c(lorow,locol_loc)=hmat(2,2)%data_c(lorow,locol_loc) + chi(2,2,j1,j2)*fct
293 : ENDDO
294 : ENDIF
295 : ENDDO
296 : ENDDO
297 : ENDDO
298 : ENDIF !This PE works on LO
299 : ENDDO!LO
300 : !---> end loop over ki
301 : ENDDO
302 : ENDIF
303 : ENDDO
304 132 : CALL timestop("offdiagonal soc-setup LO")
305 :
306 132 : RETURN
307 132 : END SUBROUTINE hsmt_soc_offdiag_LO
308 : END MODULE m_hsmt_soc_offdiag
309 :
310 : #if false
311 : !Code snipplet useful for debugging only
312 : fct(:NVEC_rem) =cph(:NVEC_rem) * dplegend(:NVEC_rem,l3)*fl2p1(l)*(&
313 : fjkiln*fjgj%fj(kj_off:kj_vec,l,j2,1) *td%rsoc%rsopp(n,l,j1,j2) ) &
314 : * angso(:NVEC_rem,j1,j2)
315 :
316 : BLOCK
317 : use m_anglso
318 : USE m_ylm
319 : INTEGER :: m1,m2,is1,is2,lm1,lm2
320 : COMPLEX :: soangl(0:atoms%lmaxd,-atoms%lmaxd:atoms%lmaxd,2,-atoms%lmaxd:atoms%lmaxd,2),angso2
321 : COMPLEX :: ylm1( (atoms%lmaxd+1)**2 ), ylm2( (atoms%lmaxd+1)**2 )
322 : INTEGER :: ispjsp(2)
323 : if (kj_off/=kj_vec) call judft_error("DEBUG Problem")
324 : DATA ispjsp/1,-1/
325 : CALL ylm4(l,lapw%gk(:,ki,1),ylm1)
326 : CALL ylm4(l,lapw%gk(:,kj,1),ylm2)
327 : angso2=0.0
328 : is1=ispjsp(j1)
329 : is2=ispjsp(j2)
330 : DO m1=-l,l
331 : lm1=l*(l+1)+m1+1
332 : DO m2=-l,l
333 : lm2=l*(l+1)+m2+1
334 : angso2=angso2+ylm1(lm1)*conjg(ylm2(lm2))* &
335 : anglso(nococonv%beta(n),nococonv%alph(n),l,m1,is1,l,m2,is2)
336 : ENDDO
337 : ENDDO
338 : fct(1)=angso2*fjgj%fj(ki,l,j1,1)*fjgj%fj(kj_off,l,j2,1) *td%rsoc%rsopp(n,l,j1,j2)
339 : END BLOCK
340 : #endif
|