Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 :
7 : MODULE m_cdnovlp
8 : USE m_juDFT
9 : #ifdef CPP_MPI
10 : USE mpi
11 : #endif
12 : USE m_force_a4_add
13 : USE m_sphbes
14 : USE m_phasy1
15 :
16 : IMPLICIT NONE
17 : PRIVATE
18 : PUBLIC :: cdnovlp
19 : CONTAINS
20 636 : SUBROUTINE cdnovlp(fmpi, sphhar, stars, atoms, sym, vacuum,cell, input, &
21 636 : l_st, jspin, rh, qpw, rho, rhvac, vpw, vr)
22 : !--------------------------------------------------------------------------
23 : ! calculates the overlapping core tail density and adds
24 : ! its contribution to the corresponging components of
25 : ! valence density.
26 : !
27 : ! OLD VERSION:
28 : ! The previous version to calculate the overlap of the
29 : ! core tails was done in real space:
30 : ! A three dimensional real space grid was set up. On each
31 : ! of these grid points the charge density of all atoms inside
32 : ! the unit cell and neigboring unit cells was calculated.
33 : ! This calculation required a lagrange fit since the core
34 : ! charge is on a radial mesh. The same was done in the vacuum
35 : ! region, except on each z-plane we worked on a two dimension
36 : ! grid. After the charge density was generated on a equidistant
37 : ! grid the charge density was FFTed into G-space. The set up
38 : ! of the charge density on the real space grid is rather time
39 : ! consuming. 3-loops are required for the 3D grid
40 : ! 1-loop over all atoms in the unit cells
41 : ! Larange interpolation
42 : ! 3D and 2D FFTs
43 : ! In order to save time the non-spherical contributions inside
44 : ! the sphere had been ignored. It turns out that the later
45 : ! approximation is pure in the context of force calculations.
46 : !
47 : ! PRESENT VERSION:
48 : ! The present version is written from scratch. It is based on the
49 : ! idea that the FFT of an overlap of spherically symmetric
50 : ! charges can be expressed by the product of
51 : !
52 : ! sum_natype{ F(G,ntype) * sum_atom(atype) {S(\vec{G},atom)}}
53 : !
54 : ! of form factor F and structure factor S. The Form factor
55 : ! depends only G while the structure factor depends on \vec{G}
56 : ! and can build up in G-space. F of a gaussian chargedensity can
57 : ! be calculated analytically.
58 : !
59 : ! The core-tails to the vacuum region are described by an
60 : ! exponentially decaying function into the vacuum:
61 : !
62 : ! rho(r_||,z,ivac)= sum_n{ rho(n,ivac) * exp(-kappa*(z-z_v))
63 : ! * exp(iG(n)r_||) }
64 : !
65 : ! And the plane waves are expanded into lattice harmonics
66 : ! up to a l_cutoff. Tests of the accuracy inside the sphere
67 : ! have shown that a reduction of the l_cutoff inside the
68 : ! in order to save time leads to sizable errors and should
69 : ! be omitted.
70 : !
71 : ! rho_L(r) = 4 \pi i^l \sum_{g =|= 0} \rho_int(g) r_i^{2} \times
72 : ! j_{l} (gr_i) \exp{ig\xi_i} Y^*_{lm} (g)
73 : !
74 : ! Tests have shown that the present version is about 10 times
75 : ! faster than the previous one also all nonspherical terms are
76 : ! included up to l=8 and the previous one included only l=0.
77 : ! For l=16 it is still faster by factor 2.
78 : !
79 : ! coded Stefan Bl"ugel, IFF Nov. 1997
80 : ! tested RObert Abt , IFF Dez. 1997
81 : !
82 : ! Added calculation of force contributions from coretails
83 : ! outside of their native muffin-tin spheres, i.e. in the
84 : ! interstitial region and other muffin-tins; only for bulk.
85 : ! Refer to Klüppelberg et al., PRB 91 035105 (2015)
86 : ! Aaron Klueppelberg, Oct. 2015
87 : !
88 : !--------------------------------------------------------------------------
89 :
90 : USE m_constants
91 : USE m_qpwtonmt
92 : USE m_cylbes
93 : USE m_dcylbs
94 :
95 : USE m_diflgr
96 : USE m_types
97 : USE m_intgr, ONLY : intgr3
98 : #ifdef CPP_MPI
99 : USE m_mpi_bc_st
100 : #endif
101 : !
102 : ! .. Parameters ..
103 : TYPE(t_mpi),INTENT(IN) :: fmpi
104 : TYPE(t_sphhar),INTENT(IN) :: sphhar
105 : TYPE(t_atoms),INTENT(IN) :: atoms
106 : TYPE(t_stars),INTENT(IN) :: stars
107 : TYPE(t_cell),INTENT(IN) :: cell
108 : TYPE(t_sym),INTENT(IN) :: sym
109 :
110 :
111 : TYPE(t_vacuum),INTENT(in):: vacuum
112 : TYPE(t_input),INTENT(in)::input
113 :
114 : INTEGER method1, method2
115 : PARAMETER (method1 = 2, method2 = 1)
116 : ! ..
117 : ! .. Scalar Arguments ..
118 :
119 : INTEGER,INTENT (IN) :: jspin
120 : LOGICAL,INTENT (IN) :: l_st
121 : ! ..
122 : ! .. Array Arguments ..
123 : COMPLEX,INTENT(IN),OPTIONAL :: vpw(:,:)
124 : REAL,INTENT(IN),OPTIONAL :: vr(:,0:,:,:)
125 : COMPLEX,INTENT (INOUT) :: qpw(stars%ng3,input%jspins)
126 : COMPLEX,INTENT (INOUT) :: rhvac(vacuum%nmzd,stars%ng2,2,input%jspins)
127 : REAL, INTENT (INOUT) :: rho(atoms%jmtd,0:sphhar%nlhd,atoms%ntype,input%jspins)
128 : REAL, INTENT (INOUT) :: rh(atoms%msh,atoms%ntype)
129 : ! ..
130 : ! .. Local Scalars ..
131 : COMPLEX czero,carg,VALUE,slope,c_ph,sm
132 : REAL dif,dxx,g,gz,dtildh,&
133 : & rkappa,sign,signz,tol_14,z,zero,zvac,&
134 : & g2,phi,gamma,qq,s13,s23,factor,gr
135 : INTEGER ig3,imz,ivac,j,j1,k,kz,k1,k2,l_cutoff,m0,&
136 : & n,nz,nrz,nzvac,&
137 : & irec2,irec3,irec1,m,gzi,dir,jm,lh,nat,left,minpar,kp,l,lm,maxl,nd,symint
138 : ! ..
139 : ! .. Local Arrays ..
140 636 : COMPLEX, ALLOCATABLE :: qpwc(:),ffonat_pT(:,:),pylm2(:,:,:)
141 636 : REAL, ALLOCATABLE :: vr2(:,:,:),integrand(:,:),integrandr(:),vrrgrid(:),vrigrid(:),bsl(:,:),force_a4_mt_loc(:,:)
142 636 : REAL acoff(atoms%ntype),alpha(atoms%ntype),rho_out(2)
143 636 : REAL rat(atoms%msh,atoms%ntype)
144 636 : COMPLEX gv(3),ycomp1(3,-1:1),ffonat(3,stars%ng3*sym%nop)
145 636 : INTEGER mshc(atoms%ntype),ioffset_pT(0:fmpi%isize-1),nkpt_pT(0:fmpi%isize-1)
146 : LOGICAL l_f2
147 : INTEGER, ALLOCATABLE :: n1(:),n2(:)
148 : ! ..
149 : DATA czero /(0.0,0.0)/, zero /0.0/, tol_14 /1.0e-10/!-14
150 : #ifdef CPP_MPI
151 : !EXTERNAL MPI_BCAST
152 : INTEGER ierr
153 : #endif
154 :
155 : !
156 : !----> Abbreviation
157 : !
158 : ! l_cutoff : cuts off the l-expansion of the nonspherical charge
159 : ! density due to the core-tails of neigboring atoms
160 : ! mshc : maximal radial meshpoint for which the radial coretail
161 : ! density is larger than tol_14
162 : ! method1 : two different ways to calculate the derivative of the
163 : ! charge density at the sphere boundary.
164 : ! (1) use subroutine diflgr based on lagrange interpol.
165 : ! (2) use two point formular in real space,
166 : ! see notes of SB.
167 : ! Tests have shown that (2) is more accurate.
168 : ! method2 : two different integration routines to calculate form
169 : ! factor of coretails outside the sphere.
170 : ! (1) use subroutine intgrz to integrate the tails from
171 : ! outside to inside.
172 : ! (2) use subroutine intgr3 to integrate the tails from
173 : ! muffin-tin radius to outside and include correction
174 : ! for start up.
175 : ! Tests have shown that (1) is more accurate.
176 : !
177 : !
178 :
179 1908 : ALLOCATE (qpwc(stars%ng3))
180 :
181 636 : l_f2 = input%l_f.AND.(input%f_level.GE.1).AND.(.NOT.l_st) ! f_level >= 1: coretails completely contained in force calculation
182 : ! Klueppelberg (force level 1)
183 636 : IF (l_f2) THEN
184 : ! Allocate the force arrays in the routine force_a4_add.f90
185 2 : IF (.NOT.ALLOCATED(force_a4_mt)) THEN
186 2 : CALL alloc_fa4_arrays(atoms,input)
187 : END IF
188 8 : ALLOCATE(force_a4_mt_loc,mold=force_a4_mt(:,:,jspin))
189 26 : force_a4_mt(:,:,jspin) = zero
190 26 : force_a4_mt_loc(:,:) = zero
191 26 : force_a4_is(:,:,jspin) = czero
192 :
193 : ! Calculate the distribution of Tasks onto processors
194 : ! Agenda here: every processor in general gets the same number of tasks,
195 : ! but if there are some left, distribute them on the last processors
196 : ! reason for that is that within the first few stars, the length of the
197 : ! stars will change rapidly, while for the last few stars, many will have
198 : ! the same length. Hence, the first few processors, which will calculate
199 : ! the spherical Bessel functions quite often, don't get as many stars to
200 : ! calculate as the last few processors
201 : ! the first star is \vec{0} and will not contribute to the core forces
202 : ! thereforce, stars are only considered starting from the second star.
203 : ! the number of stars is then nq3-1
204 :
205 : ! TODO: Proper parallelization of Klueppelberg force levels.
206 :
207 : !ioffset_pT = 0
208 : !minpar = (stars%ng3-1)/fmpi%isize ! MINimal number of elements calculated in each PARallel rank
209 : !nkpt_pT = minpar
210 : !left = (stars%ng3-1) - minpar * fmpi%isize
211 : !do j=1,left
212 : ! nkpt_pT(fmpi%isize-j) = nkpt_pT(fmpi%isize-j)+1
213 : !end do !j
214 :
215 : !do j=1,fmpi%isize-1
216 : ! ioffset_pT(j) = sum(nkpt_pT(0:j-1))
217 : !end do !j
218 :
219 : !ioffset_pT = ioffset_pT+1
220 6 : ALLOCATE ( ffonat_pT(3,(stars%ng3-1)*sym%nop) )
221 113698 : ffonat_pT = czero
222 :
223 : ! lattice/spherical harmonics related variables
224 2 : s13 = sqrt(1.0/3.0)
225 2 : s23 = sqrt(2.0/3.0)
226 2 : ycomp1(1,0) = czero
227 2 : ycomp1(2,0) = czero
228 2 : ycomp1(3,0) = cmplx(2.0*s13,0.0)
229 2 : ycomp1(1,-1) = cmplx(s23,0.0)
230 2 : ycomp1(2,-1) = cmplx(0.0,-s23)
231 2 : ycomp1(3,-1) = czero
232 2 : ycomp1(1,1) = cmplx(-s23,0.0)
233 2 : ycomp1(2,1) = cmplx(0.0,-s23)
234 2 : ycomp1(3,1) = czero
235 :
236 10 : ALLOCATE ( vr2(atoms%jmtd,0:sphhar%nlhd,atoms%ntype) )
237 334376 : vr2=0.0
238 : ! the l = 0 component of the potential is multiplied by r/sqrt(4 pi),
239 : ! for simple use, this is corrected here
240 8 : DO n = 1,atoms%ntype
241 2936 : vr2(:atoms%jri(n),0,n) = sfp_const*vr(:atoms%jri(n),0,n,jspin)/atoms%rmsh(:atoms%jri(n),n)
242 330248 : vr2(:,1:,n) = vr(:,1:,n,jspin)
243 : END DO ! n
244 :
245 6 : ALLOCATE ( integrandr(atoms%jmtd) )
246 8 : ALLOCATE ( pylm2( (atoms%lmaxd+1)**2,3,sym%nop ) )
247 6 : ALLOCATE ( vrrgrid(atoms%jmtd),vrigrid(atoms%jmtd) )
248 :
249 : ! (f)orce(f)actor(on)(at)oms calculation, parallelization in k
250 113730 : ffonat = czero
251 8 : DO n = 1, atoms%ntype
252 6 : nat = atoms%firstAtom(n)
253 6 : nd = sym%ntypsy(nat)
254 : ! find maximal l of the potential for atom (type) n
255 : ! directly reading max(llh(:,nd)) is only possible if llh is initialized to zero
256 : ! otherwise, there can be random numbers in it for high lh that are not used by each atom
257 6 : maxl = 0
258 492 : DO lh = 0,sphhar%nlh(nd)
259 492 : maxl = max(maxl,sphhar%llh(lh,nd))
260 : END DO ! lh
261 41256 : ALLOCATE ( bsl(0:maxl,atoms%jmtd),integrand(atoms%jmtd,0:maxl) ) ; bsl=0.0
262 6 : g = -0.1 ! g is the norm of a star and can't be negative, this is to initialize a check if the norm between stars has changed
263 :
264 : ! on each processor, calculate a certain consecutive set of k
265 6 : kp = 0
266 21324 : DO k = 2,stars%ng3 ! for k = 1 (G = 0), grad rho_core^alpha is zero
267 21318 : IF (abs(g-stars%sk3(k)).gt.tol_14) THEN ! only calculate new spherical Bessel functions if the length of the star vector has changed
268 21318 : g = stars%sk3(k)
269 :
270 : ! generate spherical Bessel functions up to maxl for the radial grid
271 10431608 : DO j = 1,atoms%jri(n)
272 10410290 : gr = g * atoms%rmsh(j,n)
273 10410290 : CALL sphbes(maxl,gr,bsl(:,j))
274 104124218 : bsl(:,j) = bsl(:,j) * atoms%rmsh(j,n)**2
275 : END DO ! j
276 : END IF
277 :
278 : ! as phasy1, but with i\vec{G} in it, i.e. gradient of plane wave, only for atom n and star k
279 21318 : CALL phasy2(atoms, stars, sym, cell, k, n, nat, pylm2)
280 :
281 : ! construct and evaluate radial integral int_0^R_{beta} r^2 j_{l}(Gr) V_{eff,l}^{beta}(r) dr
282 : !then, multiply by pylm2 times clnu
283 1748076 : DO lh = 0,sphhar%nlh(nd)
284 1726758 : l = sphhar%llh(lh,nd)
285 1188009504 : integrandr(:) = bsl(l,:) * vr2(:,lh,n)
286 1726758 : CALL intgr3(integrandr,atoms%rmsh(1,n),atoms%dx(n),atoms%jri(n),factor)
287 8655108 : DO j = 1,sym%nop
288 6907032 : symint = kp*sym%nop + j
289 29354886 : DO dir = 1,3
290 20721096 : sm = czero
291 59860944 : DO jm = 1,sphhar%nmem(lh,nd)
292 39139848 : lm = l*(l+1) + sphhar%mlh(jm,lh,nd) + 1
293 59860944 : sm = sm + conjg(sphhar%clnu(jm,lh,nd)) * pylm2(lm,dir,j)
294 : END DO ! jm
295 27628128 : ffonat_pT(dir,symint) = ffonat_pT(dir,symint) + factor * sm
296 : END DO ! dir
297 : END DO ! symint
298 : END DO ! lh
299 :
300 21324 : kp = kp+1
301 : END DO ! k stars
302 8 : DEALLOCATE ( bsl,integrand )
303 : END DO ! n atom type
304 :
305 : ! collect the entries of ffonat calculated by the different processors
306 : ! could be all collapsed to irank 0, but if the later part also gets
307 : ! parallelized over k at some point, now all iranks have ffonat available
308 : !#ifdef CPP_MPI
309 : ! ALLOCATE( n1(0:fmpi%isize-1),n2(0:fmpi%isize-1) )
310 : ! n1(:) = 3*nkpt_pT(:)*sym%nop
311 : ! n2(:) = 3*ioffset_pT(:)*sym%nop
312 : ! CALL MPI_ALLGATHERV(ffonat_pT(1,1),n1(fmpi%irank),MPI_COMPLEX,ffonat(1,1),n1,n2,MPI_COMPLEX,MPI_COMM_WORLD,ierr)
313 : ! DEALLOCATE(ffonat_pT,n1,n2)
314 : !#else
315 113698 : ffonat(:,(sym%nop+1):) = ffonat_pT
316 2 : IF (fmpi%irank==0) THEN
317 1 : DEALLOCATE(ffonat_pT)
318 : END IF
319 : !#endif
320 :
321 : END IF ! l_f2
322 :
323 : !
324 : !----> prepare local array to store pw-expansion of pseudo core charge
325 : !
326 1632594 : DO k = 1 , stars%ng3
327 1632594 : qpwc(k) = czero
328 : ENDDO
329 : !
330 : !----> (1) set up radial mesh beyond muffin-tin radius
331 : ! (2) cut_off core tails from noise
332 : !
333 : #ifdef CPP_MPI
334 636 : CALL MPI_BCAST(rh,atoms%msh*atoms%ntype,MPI_DOUBLE_PRECISION,0,fmpi%mpi_comm,ierr)
335 : #endif
336 1858 : mshc(:) = 0 ! This initialization is important because there may be atoms without core states.
337 1858 : nloop: DO n = 1 , atoms%ntype
338 1858 : IF ((atoms%econf(n)%num_core_states.GT.0).OR.l_st) THEN
339 829016 : DO j = 1 , atoms%jri(n)
340 829016 : rat(j,n) = atoms%rmsh(j,n)
341 : ENDDO
342 1170 : dxx = EXP(atoms%dx(n))
343 227004 : DO j = atoms%jri(n) + 1 , atoms%msh
344 227004 : rat(j,n) = rat(j-1,n)*dxx
345 : ENDDO
346 229344 : DO j = atoms%jri(n) - 1 , atoms%msh
347 229344 : rh(j,n) = rh(j,n)/ (fpi_const*rat(j,n)*rat(j,n))
348 : ENDDO
349 170606 : DO j = atoms%msh , atoms%jri(n) , -1
350 170606 : IF ( rh(j,n) .GT. tol_14 ) THEN
351 866 : mshc(n) = j
352 866 : CYCLE nloop
353 : END IF
354 : ENDDO
355 304 : mshc(n) = atoms%jri(n)
356 : ENDIF
357 : ENDDO nloop
358 : !
359 : !-----> the core density inside the spheres is replaced by a
360 : ! gaussian-like pseudo density : n(r) = acoff*exp(-alpha*r*r)
361 : ! acoff and alpha determined to obtain a continous and
362 : ! differentiable density at the sphere boundary.
363 : ! IF mshc = jri either core tail too small or no core (i.e. H)
364 : !
365 1858 : DO n = 1,atoms%ntype
366 1222 : nat = atoms%firstAtom(n)
367 1222 : nd = sym%ntypsy(nat)
368 1858 : IF ((mshc(n).GT.atoms%jri(n)).AND.((atoms%econf(n)%num_core_states.GT.0).OR.l_st)) THEN
369 :
370 866 : j1 = atoms%jri(n) - 1
371 : IF ( method1 .EQ. 1) THEN
372 : dif = diflgr(rat(j1,n),rh(j1,n))
373 : WRITE (oUnit,FMT=8000) n,rh(atoms%jri(n),n),dif
374 : alpha(n) = -0.5 * dif / ( rh(atoms%jri(n),n)*atoms%rmt(n) )
375 : ELSEIF ( method1 .EQ. 2) THEN
376 866 : alpha(n) = LOG( rh(j1,n) / rh(atoms%jri(n),n) )
377 : alpha(n) = alpha(n)&
378 866 : & / ( atoms%rmt(n)*atoms%rmt(n)*( 1.0-EXP( -2.0*atoms%dx(n) ) ) )
379 : ELSE
380 : WRITE (oUnit,'('' error in choice of method1 in cdnovlp '')')
381 : CALL juDFT_error("error in choice of method1 in cdnovlp"&
382 : & ,calledby ="cdnovlp")
383 : ENDIF
384 866 : acoff(n) = rh(atoms%jri(n),n) * EXP( alpha(n)*atoms%rmt(n)*atoms%rmt(n) )
385 : !WRITE (oUnit,FMT=8010) alpha(n),acoff(n)
386 597814 : DO j = 1,atoms%jri(n) - 1
387 597814 : rh(j,n) = acoff(n) * EXP( -alpha(n)*rat(j,n)**2 )
388 : ENDDO
389 :
390 : ! Subtract pseudo density contribution from own mt sphere from mt forces
391 : ! Klueppelberg (force level 1)
392 866 : IF (l_f2) THEN
393 18 : ALLOCATE ( integrand(atoms%jmtd,3) )
394 4128 : integrandr = zero
395 12390 : integrand = zero
396 2936 : DO j = 1,atoms%jri(n)
397 : integrandr(j) = -alpha(n) * acoff(n) * atoms%rmsh(j,n)**3 &
398 2936 : *sfp_const * exp(-alpha(n) * atoms%rmsh(j,n)**2) !*2
399 : ! factor of two missing? grad e^{-alpha*r^2} = -2alpha\vec{r}e^{-alpha*r^2}
400 : END DO ! j radial mesh
401 :
402 492 : DO lh = 0,sphhar%nlh(nd)
403 486 : IF (sphhar%llh(lh,nd).ne.1) CYCLE
404 :
405 72 : gv = czero
406 48 : DO jm = 1,sphhar%nmem(lh,nd)
407 30 : m = sphhar%mlh(jm,lh,nd)
408 :
409 138 : DO dir = 1,3
410 120 : gv(dir) = gv(dir) + ycomp1(dir,m)* sphhar%clnu(jm,lh,nd) ! why not conjg?
411 : END DO ! direction
412 :
413 : END DO ! jm
414 :
415 78 : DO dir = 1,3
416 26910 : DO j = 1,atoms%jri(n)
417 26424 : integrand(j,dir) = integrand(j,dir) - integrandr(j)* vr2(j,lh,n) * real(gv(dir))
418 : END DO ! j radial mesh
419 : END DO ! dir ection
420 :
421 : END DO ! lh lattice harmonics
422 :
423 24 : DO dir = 1,3
424 24 : CALL intgr3(integrand(:,dir),atoms%rmsh(1,n),atoms%dx(n),atoms%jri(n),force_a4_mt_loc(dir,n))
425 : END DO ! direction
426 6 : DEALLOCATE ( integrand )
427 : END IF !l_f2
428 : ELSE
429 356 : alpha(n) = 0.0
430 : ENDIF
431 : ENDDO
432 : !
433 : IF (fmpi%irank ==0) THEN
434 : 8000 FORMAT (/,10x,'core density and its first derivative ',&
435 : & 'at sph. bound. for atom type',&
436 : & i2,' is',3x,2e15.7)
437 : 8010 FORMAT (/,10x,'alpha=',f10.5,5x,'acoff=',f10.5)
438 : END IF
439 : !
440 : !=====> calculate the fourier transform of the core-pseudocharge
441 636 : IF (l_f2) THEN
442 : CALL ft_of_CorePseudocharge(fmpi,input,atoms,mshc,alpha,tol_14,rh, &
443 2 : acoff,stars,method2,rat,cell ,sym,qpwc,jspin,l_f2,vpw,ffonat,force_a4_mt_loc)
444 : ELSE
445 : CALL ft_of_CorePseudocharge(fmpi,input,atoms,mshc,alpha,tol_14,rh, &
446 634 : acoff,stars,method2,rat,cell ,sym,qpwc,jspin,l_f2)
447 : END IF
448 :
449 1632594 : DO k = 1 , stars%ng3
450 1632594 : qpw(k,jspin) = qpw(k,jspin) + qpwc(k)
451 : ENDDO
452 :
453 636 : IF (fmpi%irank ==0) THEN
454 : !
455 : !=====> calculate core-tails to the vacuum region
456 : ! Coretails expanded in exponentially decaying functions.
457 : ! Describe vacuum by:
458 : ! rho(r_||,z,ivac)= sum_n{ rho(n,ivac) * exp(-kappa*(z-z_v))
459 : ! * exp(iG(n)r_||) }
460 318 : IF (input%film ) THEN
461 : !+gu
462 73 : dtildh = 0.5 * tpi_const / cell%bmat(3,3)
463 73 : IF (vacuum%nvac.EQ.1) THEN
464 49 : rho_out(1) = qpwc(1)*cell%z1
465 164511 : DO k = 2,stars%ng3
466 164511 : IF ((stars%kv3(1,k).EQ.0).AND.(stars%kv3(2,k).EQ.0)) THEN
467 716 : nz = stars%nstr(k) !1
468 716 : g = stars%kv3(3,k) * cell%bmat(3,3)
469 716 : rho_out(1) = rho_out(1) + nz*qpwc(k)*SIN(g*cell%z1)/g
470 : ENDIF
471 : ENDDO
472 49 : rho_out(1) = qpwc(1) * dtildh - rho_out(1)
473 : ELSE
474 72 : DO ivac = 1, vacuum%nvac
475 48 : carg = CMPLX(0.0,0.0)
476 186624 : DO k = 2,stars%ng3
477 186624 : IF ((stars%kv3(1,k).EQ.0).AND.(stars%kv3(2,k).EQ.0)) THEN
478 1632 : g = stars%kv3(3,k) * cell%bmat(3,3) * (3. - 2.*ivac)
479 1632 : carg = carg -qpwc(k)*(EXP(ImagUnit*g*dtildh)-EXP(ImagUnit*g*cell%z1))/g
480 : ENDIF
481 : ENDDO
482 72 : rho_out(ivac) = qpwc(1) * ( dtildh-cell%z1 ) - AIMAG(carg)
483 : ENDDO
484 : ENDIF
485 : !-gu
486 : ! nzvac = min(50,nmz)
487 :
488 : !
489 : !---> loop over 2D stars
490 : !
491 22884 : DO 280 k = 1,stars%ng2
492 22811 : k1 = stars%kv2(1,k)
493 22811 : k2 = stars%kv2(2,k)
494 49606 : DO 270 ivac = 1,vacuum%nvac
495 26795 : VALUE = czero
496 26795 : slope = czero
497 26795 : sign = 3. - 2.*ivac
498 : !
499 : ! ---> sum over gz-stars
500 819080 : DO 250 kz = -stars%mx3,stars%mx3
501 792285 : ig3 = stars%ig(k1,k2,kz)
502 792285 : c_ph = stars%rgphs(k1,k2,kz) ! phase factor for invs=T & zrfs=F
503 : ! ----> use only stars within the g_max sphere (oct.97 shz)
504 792285 : IF (ig3.NE.0) THEN
505 499723 : gz = kz*cell%bmat(3,3)
506 499723 : carg = ImagUnit*sign*gz
507 499723 : VALUE = VALUE + c_ph*qpwc(ig3)* EXP(carg*cell%z1)
508 499723 : slope = slope + c_ph*carg*qpwc(ig3)* EXP(carg*cell%z1)
509 : END IF
510 26795 : 250 ENDDO
511 : ! roa work-around
512 26795 : IF ( ABS(REAL(VALUE)).GT.zero ) THEN
513 : ! roa work-around
514 : ! gb works also around
515 22032 : rkappa = - REAL( slope/VALUE )
516 22032 : IF (k.EQ.1) rkappa = VALUE/rho_out(ivac)
517 : ! rkappa = - sign * real( slope/value )
518 22032 : IF (rkappa.LE.zero) rkappa=MIN(rkappa,-tol_14)
519 22032 : IF (rkappa.GT.zero) rkappa=MAX(rkappa,tol_14)
520 : ! gb works also around
521 22032 : zvac = - LOG( tol_14/cabs(VALUE) ) / rkappa
522 22032 : zvac = MIN (2.*vacuum%nmz,abs(zvac)) ! avoid int-overflow in next line
523 22032 : nzvac = INT( zvac/vacuum%delz ) + 1
524 : ! IF ( rkappa.GT.zero .AND. real(value).GT.zero ) THEN
525 22032 : IF ( rkappa.GT.zero ) THEN
526 12493 : z = 0.
527 12493 : IF ( k.EQ.1 ) THEN
528 3378 : DO imz = 1 , MIN( nzvac,vacuum%nmz )
529 3311 : rhvac(imz,1,ivac,jspin) = rhvac(imz,1,ivac,jspin) + VALUE*EXP(-rkappa*z)
530 3311 : z = z + vacuum%delz
531 67 : 220 ENDDO
532 : ELSE
533 302194 : DO imz = 1 , MIN( nzvac,vacuum%nmzxy )
534 289768 : rhvac(imz,k,ivac,jspin) = rhvac(imz,k,ivac,jspin) + VALUE*EXP(-rkappa*z)
535 289768 : z = z + vacuum%delz
536 12426 : 230 ENDDO
537 : END IF
538 : END IF
539 : ! roa work-around
540 : END IF
541 : ! roa work-around
542 22811 : 270 ENDDO
543 73 : 280 ENDDO
544 : END IF
545 : !
546 : !=====> update density inside the spheres
547 : !
548 : ! ----> (1) subtract on-site contribution, because
549 : ! they are contained in the plane wave part
550 : !
551 929 : DO n = 1,atoms%ntype
552 929 : IF ((mshc(n).GT.atoms%jri(n)).AND.((atoms%econf(n)%num_core_states.GT.0).OR.l_st)) THEN
553 299340 : DO j = 1,atoms%jri(n)
554 : rho(j,0,n,jspin) = rho(j,0,n,jspin)&
555 299518 : & - sfp_const*rat(j,n)*rat(j,n)*rh(j,n)
556 : ENDDO
557 : ENDIF
558 : ENDDO
559 : !
560 : ! ----> (2) add the plane wave contribution of (core tails + on-site
561 : ! contribution) to the m.t. density, include full nonspherical
562 : ! components
563 : !
564 : ENDIF ! fmpi%irank ==0
565 636 : l_cutoff=input%coretail_lmax
566 : #ifdef CPP_MPI
567 636 : IF ( fmpi%isize > 1 ) CALL mpi_bc_st(fmpi,stars,qpwc)
568 : #endif
569 :
570 : CALL qpw_to_nmt(&
571 : & sphhar,atoms,stars,&
572 : & sym,cell ,fmpi,&
573 : & jspin,l_cutoff,qpwc,&
574 636 : & rho)
575 :
576 : #ifdef CPP_MPI
577 636 : IF ( fmpi%isize > 1) CALL mpi_col_st(fmpi,atoms,sphhar,rho(1,0,1,jspin))
578 : #endif
579 :
580 636 : DEALLOCATE (qpwc)
581 636 : IF (l_f2) THEN ! Klueppelberg (force level 1)
582 : ! Deallocate arrays used specifically during force calculation
583 2 : DEALLOCATE ( vr2,integrandr,pylm2 )
584 2 : DEALLOCATE ( vrrgrid,vrigrid )
585 : END IF
586 :
587 636 : END SUBROUTINE cdnovlp
588 :
589 : !***********************************************************************
590 : ! INTERNAL SUBROUTINES
591 : !***********************************************************************
592 :
593 636 : subroutine ft_of_CorePseudocharge(fmpi,input,atoms,mshc,alpha,&
594 636 : tol_14,rh,acoff,stars,method2,rat,cell ,sym,qpwc,jspin,l_f2,vpw,ffonat,force_a4_mt_loc)
595 :
596 : !=====> calculate the fourier transform of the core-pseudocharge
597 : !
598 : ! qpw(\vec{G}) = Sum_{n} [ F(G,n) * Sum_{atm{n}} S(\vec{G},atm) ]
599 : ! n = atom_type
600 : ! F = Formfactor = F_in_sphere + F_outsphere
601 : ! S = Structure factor
602 :
603 : USE m_types
604 :
605 : type(t_mpi) ,intent(in) :: fmpi
606 : TYPE(t_input), INTENT(in) ::input
607 : type(t_atoms) ,intent(in) :: atoms
608 : integer ,intent(in) :: mshc(atoms%ntype),jspin
609 : real ,intent(in) :: alpha(atoms%ntype), tol_14
610 : real ,intent(in) :: rh(atoms%msh,atoms%ntype)
611 : real ,intent(in) :: acoff(atoms%ntype)
612 : type(t_stars) ,intent(in) :: stars
613 : integer ,intent(in) :: method2
614 : real ,intent(in) :: rat(atoms%msh,atoms%ntype)
615 : type(t_cell) ,intent(in) :: cell
616 :
617 : type(t_sym) ,intent(in) :: sym
618 : LOGICAL, INTENT(IN) :: l_f2
619 : COMPLEX,OPTIONAL,INTENT(IN) :: vpw(:,:),ffonat(:,:)
620 : REAL,OPTIONAL,INTENT(IN) :: force_a4_mt_loc(:,:)
621 : complex ,intent(out) :: qpwc(stars%ng3)
622 :
623 : ! ..Local variables
624 : integer nat1, n, n_out_p, k
625 : INTEGER :: reducedStarsCutoff ! This is introduced to avoid numerical instabilities.
626 : complex czero
627 :
628 : ! ..Local arrays
629 636 : real :: qf(stars%ng3)
630 636 : complex qpwc_at(stars%ng3)
631 : #ifdef CPP_MPI
632 636 : complex :: qpwc_loc(stars%ng3)
633 : integer :: ierr
634 : #endif
635 636 : czero = (0.0,0.0)
636 : #ifdef CPP_MPI
637 1632594 : DO k = 1, stars%ng3
638 1632594 : qpwc_loc(k) = czero
639 : ENDDO
640 : #endif
641 1632594 : DO k = 1, stars%ng3
642 1631958 : IF (stars%sk3(k).LE.3.0*input%rkmax) reducedStarsCutoff = k ! The factor 3.0 is arbitrary. One could try going down to 2.0.
643 1632594 : qpwc(k) = czero
644 : ENDDO
645 :
646 : !
647 : !*****> start loop over the atom type
648 : !
649 1247 : DO n = 1 + fmpi%irank, atoms%ntype, fmpi%isize
650 611 : IF ( ( mshc(n) .GT. atoms%jri(n) ).AND.&
651 636 : & ( alpha(n) .GT. tol_14 ) ) THEN
652 :
653 433 : n_out_p = mshc(n)-atoms%jri(n)+1
654 :
655 : ! (1) Form factor for each atom type
656 :
657 : CALL FormFactor_forAtomType(atoms%msh,method2,n_out_p,reducedStarsCutoff,&
658 : atoms%rmt(n),atoms%jri(n),atoms%dx(n),mshc(n),rat(:,n), &
659 433 : rh(:,n),alpha(n),stars,cell,acoff(n),qf)
660 :
661 : ! (2) structure constant for each atom of atom type
662 :
663 433 : nat1 = atoms%firstAtom(n)
664 :
665 433 : IF (l_f2) THEN
666 : CALL StructureConst_forAtom(nat1,stars ,sym,reducedStarsCutoff,&
667 : atoms%neq(n),atoms%nat,atoms%taual,&
668 3 : cell,qf,qpwc_at,jspin,l_f2,n,vpw,ffonat)
669 : ELSE
670 : CALL StructureConst_forAtom(nat1,stars ,sym,reducedStarsCutoff,&
671 : atoms%neq(n),atoms%nat,atoms%taual,&
672 430 : cell,qf,qpwc_at,jspin,l_f2,n)
673 : END IF
674 : #ifdef CPP_MPI
675 993893 : DO k = 1, stars%ng3
676 993893 : qpwc_loc(k) = qpwc_loc(k) + qpwc_at(k)
677 : END DO
678 : #else
679 : DO k = 1 , stars%ng3
680 : qpwc(k) = qpwc(k) + qpwc_at(k)
681 : END DO
682 : #endif
683 :
684 : END IF
685 : ENDDO
686 : #ifdef CPP_MPI
687 : CALL mpi_allreduce(qpwc_loc,qpwc,stars%ng3,MPI_DOUBLE_COMPLEX,mpi_sum, &
688 636 : fmpi%mpi_comm,ierr)
689 636 : IF (l_f2) THEN
690 8 : CALL MPI_ALLREDUCE(MPI_IN_PLACE,force_a4_mt,SIZE(force_a4_mt),MPI_DOUBLE,MPI_SUM,fmpi%mpi_comm,ierr)
691 8 : CALL MPI_ALLREDUCE(MPI_IN_PLACE,force_a4_is,SIZE(force_a4_is),MPI_DOUBLE_COMPLEX,MPI_SUM,fmpi%mpi_comm,ierr)
692 : END IF
693 : #endif
694 636 : IF (l_f2) THEN
695 26 : force_a4_mt(:,:,jspin)=force_a4_mt(:,:,jspin)+force_a4_mt_loc(:,:)
696 : END IF
697 636 : end subroutine ft_of_CorePseudocharge
698 :
699 433 : SUBROUTINE StructureConst_forAtom(nat1,stars ,sym,reducedStarsCutoff,&
700 433 : neq,natd,taual,cell,qf,qpwc_at,jspin,l_f2,n,vpw,ffonat)
701 : ! Calculates the structure constant for each atom of atom type
702 :
703 : USE m_types
704 : USE m_spgrot
705 : USE m_constants
706 :
707 :
708 : integer, intent(in) :: nat1
709 : type(t_stars), intent(in) :: stars
710 :
711 : type(t_sym), intent(in) :: sym
712 : INTEGER, INTENT(IN) :: reducedStarsCutoff
713 : integer, intent(in) :: neq,natd, jspin, n
714 : real, intent(in) :: taual(3,natd)
715 : type(t_cell), intent(in) :: cell
716 : real, intent(in) :: qf(stars%ng3)
717 : LOGICAL, INTENT(IN) :: l_f2
718 : COMPLEX,OPTIONAL,INTENT(IN):: vpw(:,:),ffonat(:,:)
719 : complex, intent(out) :: qpwc_at(stars%ng3)
720 :
721 : ! ..Local variables
722 : integer k, nat2, nat, j
723 : real x
724 : complex sf, czero
725 :
726 : ! ..Local arrays
727 433 : integer kr(3,sym%nop)
728 : real force_mt_loc(3)
729 433 : complex phas(sym%nop), phase, force_is_loc(3)
730 : complex kcmplx(3)
731 :
732 433 : czero = (0.0,0.0)
733 993893 : qpwc_at(:) = czero
734 :
735 : ! first G=0
736 433 : k=1
737 433 : qpwc_at(k) = qpwc_at(k) + neq * qf(k)
738 :
739 : ! then G>0
740 :
741 433 : force_mt_loc=0.0
742 433 : force_is_loc=cmplx(0.0,0.0)
743 : !$OMP PARALLEL DO DEFAULT(none) &
744 : !$OMP SHARED(stars ,sym,reducedStarsCutoff,neq,natd,nat1,taual,cell,qf,qpwc_at,l_f2,ffonat,n,jspin,vpw) &
745 : !$OMP FIRSTPRIVATE(czero) &
746 : !$OMP PRIVATE(k,kr,phas,nat2,nat,sf,j,x,kcmplx,phase) &
747 433 : !$OMP REDUCTION(+:force_mt_loc,force_is_loc)
748 : DO k = 2,reducedStarsCutoff
749 :
750 : CALL spgrot(sym%nop, sym%symor, sym%mrot, sym%tau, sym%invtab, &
751 : stars%kv3(:,k), kr, phas)
752 :
753 : ! ----> start loop over equivalent atoms
754 :
755 : IF (l_f2) THEN ! Klueppelberg (force level 1)
756 : ! generate phase factors for each G, not only for each star, to incorporate the atomic phase factors
757 : kcmplx = cmplx(0.0,0.0)
758 : DO j = 1,sym%nop
759 : x = -tpi_const * ( kr(1,j) * taual(1,nat1) &
760 : + kr(2,j) * taual(2,nat1) &
761 : + kr(3,j) * taual(3,nat1) )
762 : phase = cmplx(cos(x),sin(x))
763 : ! generate muffin-tin part of core force component
764 : force_mt_loc(:) = force_mt_loc(:) + qf(k) * &
765 : phase * stars%nstr(k) * ffonat(:,(k-1)*sym%nop+j)
766 : kcmplx(:) = kcmplx(:) + kr(:,j) * phase * phas(j) ! should be conjg(phas(j)), but in FLEUR, only real phas(j) are accepted
767 : END DO !j
768 : kcmplx = matmul(kcmplx,cell%bmat) * stars%nstr(k) / sym%nop
769 : ! generate interstitial part of core force component
770 : force_is_loc(:) = force_is_loc(:) + qf(k) * &
771 : conjg(vpw(k,jspin))*cell%omtil*ImagUnit*kcmplx(:)
772 : END IF
773 :
774 : nat2 = nat1 + neq - 1
775 : DO nat = nat1, nat2
776 : sf = czero
777 : DO j = 1,sym%nop
778 : x = -tpi_const * ( kr(1,j) * taual(1,nat) &
779 : + kr(2,j) * taual(2,nat) &
780 : + kr(3,j) * taual(3,nat) )
781 : !gb sf = sf + CMPLX(COS(x),SIN(x))*phas(j)
782 : sf = sf + CMPLX(COS(x),SIN(x))*conjg(phas(j))
783 : END DO
784 : sf = sf / REAL( sym%nop )
785 : qpwc_at(k) = qpwc_at(k) + sf * qf(k)
786 : END DO
787 :
788 : ENDDO
789 : !$OMP END PARALLEL DO
790 :
791 433 : IF (l_f2) THEN ! Klueppelberg (force level 1)
792 12 : force_a4_mt(:,n,jspin) = force_a4_mt(:,n,jspin) + force_mt_loc
793 12 : force_a4_is(:,n,jspin) = force_a4_is(:,n,jspin) + force_is_loc
794 : END IF
795 433 : END SUBROUTINE StructureConst_forAtom
796 :
797 433 : SUBROUTINE FormFactor_forAtomType(msh, method2, n_out_p, reducedStarsCutoff, rmt, jri, dx, &
798 433 : mshc, rat, rh, alpha, stars, cell, acoff, &
799 433 : qf)
800 :
801 : USE m_types
802 : USE m_constants
803 : USE m_rcerf
804 : USE m_intgr, ONLY : intgr3, intgz0
805 :
806 :
807 : integer ,intent(in) :: msh,method2, n_out_p
808 : INTEGER, INTENT(IN) :: reducedStarsCutoff
809 : real ,intent(in) :: rmt
810 : integer ,intent(in) :: jri
811 : real ,intent(in) :: dx
812 : integer ,intent(in) :: mshc
813 : real ,intent(in) :: rat(msh)
814 : real ,intent(in) :: rh(msh)
815 : real ,intent(in) :: alpha
816 : type(t_stars) ,intent(in) :: stars
817 : type(t_cell) ,intent(in) :: cell
818 : real ,intent(in) :: acoff
819 : real ,intent(out) :: qf(stars%ng3)
820 :
821 : ! ..Local variables
822 : real f11, f12, ar, g, ai, qfin, qfout, gr, a4, alpha3, zero
823 : integer k, ir, j
824 : logical tail
825 :
826 : ! ..Local arrays
827 433 : real rhohelp(msh)
828 :
829 433 : zero = 0.0
830 993893 : qf(:) = 0.0
831 :
832 433 : tail = .FALSE.
833 433 : f11 = tpi_const * rmt * rh(jri) / alpha
834 433 : f12 = acoff * ( pi_const/alpha ) *SQRT(pi_const/alpha)
835 433 : ar = SQRT( alpha ) * rmt
836 :
837 : !$OMP PARALLEL DO DEFAULT(none) &
838 : !$OMP SHARED(stars,f11,f12,ar,method2,n_out_p,reducedStarsCutoff,jri,rat,rh,dx,tail) &
839 : !$OMP SHARED(alpha,cell,mshc,rmt,qf) &
840 : !$OMP FIRSTPRIVATE(zero) &
841 433 : !$OMP PRIVATE(k,g,ai,qfin,ir,j,rhohelp,qfout,gr,a4,alpha3)
842 : DO k = 1, reducedStarsCutoff
843 : g = stars%sk3(k)
844 : ! first G=0
845 : IF ( k.EQ.1 ) THEN
846 : ai = zero
847 :
848 : ! ----> calculate form factor inside the mt-sphere
849 : ! (use analytic integration of gaussian)
850 :
851 : qfin = - f11 + f12 * rcerf(ar,ai)
852 :
853 : ! ----> calculate form factor outside the mt-sphere
854 : ! (do numerical integration of tails)
855 :
856 : IF ( method2 .EQ. 1) THEN
857 : DO ir = -6 , n_out_p
858 : j = jri+ir-1
859 : rhohelp(mshc+1-j) = rat(j) * rat(j) * rat(j) * rh(j)
860 : END DO
861 : CALL intgz0(rhohelp, dx, n_out_p, qfout, tail)
862 : ELSE
863 : DO ir = 1 , n_out_p
864 : j = jri+ir-1
865 : rhohelp(ir) = rat(j) * rat(j) * rh(j)
866 : END DO
867 : CALL intgr3(rhohelp, rat(jri), dx, n_out_p, qfout)
868 : ! ----> have to remove the small r-correction from intgr3
869 : qfout = qfout - rmt*rhohelp(1)
870 : END IF
871 :
872 : qfout = fpi_const * qfout
873 : ELSE
874 : ! then G>0
875 : ai = 0.5*g/SQRT(alpha)
876 : gr = g*rmt
877 : a4 = 0.25/alpha
878 :
879 : ! ----> calculate form factor inside the mt-sphere
880 : ! (use analytic integration of gaussian)
881 :
882 : qfin = - f11 * SIN(gr)/gr + f12 * rcerf(ar,ai) * EXP(-a4*g*g)
883 :
884 : ! ----> calculate form factor outside the mt-sphere
885 : ! (do numerical integration of tails)
886 :
887 : IF ( method2 .EQ. 1) THEN
888 : DO ir = -6 , n_out_p
889 : j = jri+ir-1
890 : rhohelp(mshc-jri+2-ir) = rat(j)*rat(j) * rh(j) * SIN( g*rat(j) )
891 : END DO
892 :
893 : ! ----> note we use here the integration routine for vacuum. Because
894 : ! the vacuum integration is made for an inwards integration
895 : ! from outside to inside. Outside the starting value will be
896 : ! nearly zero since the core density is small. if the tail
897 : ! correction (tail=.true.) is included for the integrals, the
898 : ! integrand is from infinity inward. This might not be
899 : ! necessary. Further the integration routine is made for
900 : ! equidistant meshpoints, therefore the term r(i) of
901 : ! dr/di = dx*r(i) is included in rhohelp
902 :
903 : CALL intgz0(rhohelp,dx,n_out_p,qfout,tail)
904 : ELSE
905 : DO ir = 1 , n_out_p
906 : j = jri+ir-1
907 : rhohelp(ir) = rat(j) * rh(j) * SIN(g*rat(j))
908 : END DO
909 : CALL intgr3(rhohelp, rat(jri), dx, n_out_p, qfout)
910 : ! ----> have to remove the small r-correction from intgr3
911 : !roa...correction.from.intgr3.......................
912 : IF (rhohelp(1)*rhohelp(2).GT.zero) THEN
913 : alpha3 = 1.0 + LOG(rhohelp(2)/rhohelp(1))/dx
914 : IF (alpha3.GT.zero) qfout = qfout - rat(jri)*rhohelp(1)/alpha3
915 : END IF
916 : !roa...end.correction...............................
917 : END IF
918 :
919 : qfout = fpi_const * qfout / g
920 : END IF
921 : qf(k) = (qfin + qfout)/cell%omtil
922 : END DO
923 : !$OMP END PARALLEL DO
924 433 : END SUBROUTINE FormFactor_forAtomType
925 : END MODULE m_cdnovlp
|