Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 : MODULE m_hsmt_ab
7 : !! Module to produce matching coefficients.
8 :
9 : USE m_juDFT
10 :
11 : IMPLICIT NONE
12 :
13 : CONTAINS
14 :
15 49609 : SUBROUTINE hsmt_ab(sym,atoms,noco,nococonv,ilSpin,igSpin,n,na,cell,lapw,fjgj,abCoeffs,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
16 : !! Construct the matching coefficients of the form:
17 : !! \begin{aligned}
18 : !! a_{l,m,p}^{\mu,\boldsymbol{G}}(\boldsymbol{k}) = e^{i \boldsymbol{K}\cdot\boldsymbol{\tau}_{\mu}}
19 : !! Y_{l}^{m*}(R^{\mu}\boldsymbol{K})
20 : !! f_{l,p}^{\alpha}(K)
21 : !! \end{aligned}
22 : !! with \(\boldsymbol{K} = \boldsymbol{k + G \pm q/2}\).
23 : !!
24 : !! For example, for p = 0 == acof this translates to:
25 : !! \begin{aligned}
26 : !! f_{l,0}^{\alpha}(K) = \frac{4\pi}{\sqrt{\Omega}W}
27 : !! (\overset{.}{u}_{l}(R_{\alpha}) * K * j_{l}^{'}(K R_{\alpha})
28 : !! -\overset{.}{u}_{l}^{'}(R_{\alpha}) * j_{l}(K R_{\alpha}))
29 : !! \end{aligned}
30 : !! And in the actual code into:
31 : !!
32 : !! abCoeffs(lm, k) = c_ph(k,igSpin) * CONJG(ylm(lm, k)) * fjgj%fj(k,l,ilSpin,igSpin)
33 : !!
34 : !! A factor of \(i^l\) is omitted here and instead calculated where
35 : !! the coefficients are used.
36 : !! Also, \(f_{l,p}^{\alpha}(K)\) carries information about the global and
37 : !! local spins \(\sigma_{g}\) and \(\sigma_{\alpha}\) through the K prefactor
38 : !! [\(\pm q\) in K] and the \(u/\overset{.}{u}\)
39 : !! respectively. The former also appears in the complex phase factor.
40 :
41 : USE m_constants, ONLY : fpi_const,tpi_const
42 : USE m_types
43 : USE m_ylm
44 : USE m_hsmt_fjgj
45 : USE m_matmul_dgemm
46 :
47 : TYPE(t_sym), INTENT(IN) :: sym
48 : TYPE(t_cell), INTENT(IN) :: cell
49 : TYPE(t_atoms), INTENT(IN) :: atoms
50 : TYPE(t_lapw), INTENT(IN) :: lapw
51 : TYPE(t_noco), INTENT(IN) :: noco
52 : TYPE(t_nococonv), INTENT(IN) :: nococonv
53 : TYPE(t_fjgj), INTENT(IN) :: fjgj
54 : ! ..
55 : ! .. Scalar Arguments ..
56 : INTEGER, INTENT(IN) :: ilSpin, n, na, igSpin
57 : LOGICAL, INTENT(IN) :: l_nonsph
58 : INTEGER, INTENT(OUT) :: ab_size
59 : ! ..
60 : ! .. Array Arguments ..
61 : COMPLEX, INTENT(INOUT) :: abCoeffs(:,:)
62 : !Optional arguments if abc coef for LOs are needed
63 : COMPLEX, INTENT(INOUT), OPTIONAL :: abclo(:, :, :, :)
64 : REAL, INTENT(IN), OPTIONAL :: alo1(:), blo1(:), clo1(:)
65 :
66 : INTEGER :: np, k, l, ll1, m, lmax, nkvec, lo, lm, invsfct, lmMin, lmMax, ierr
67 : REAL :: term, bmrot(3, 3)
68 : COMPLEX :: c_ph(MAXVAL(lapw%nv), MERGE(2, 1, noco%l_ss.OR.ANY(noco%l_unrestrictMT) &
69 410345 : & .OR.ANY(noco%l_spinoffd_ldau)))
70 : LOGICAL :: l_apw, l_abclo
71 :
72 49609 : REAL, ALLOCATABLE :: gkrot(:, :)
73 49609 : COMPLEX, ALLOCATABLE :: ylm(:, :)
74 :
75 49609 : l_abclo = PRESENT(abclo)
76 49609 : lmax = MERGE(atoms%lnonsph(n), atoms%lmax(n), l_nonsph)
77 49609 : ab_size = lmax*(lmax+2) + 1
78 : ! TODO: replace APW+lo check (may actually be a broken trick) by something simpler
79 : ! l_apw=ALL(fjgj%gj==0.0)
80 49609 : l_apw = .FALSE.
81 :
82 : ! We skip the initialization for speed
83 : ! abCoeffs=0.0
84 :
85 49609 : np = sym%invtab(sym%ngopr(na))
86 49609 : CALL lapw%phase_factors(igSpin, atoms%taual(:, na), nococonv%qss, c_ph(:, igSpin))
87 4018329 : bmrot = TRANSPOSE(MATMUL(1.0 * sym%mrot(:, :, np), cell%bmat))
88 :
89 198436 : ALLOCATE(ylm((lmax+1)**2, lapw%nv(igSpin)), stat=ierr)
90 49609 : IF (ierr /= 0) CALL juDFT_error("Couldn't allocate ylm")
91 148827 : ALLOCATE(gkrot(3,lapw%nv(igSpin)), stat=ierr)
92 49609 : IF (ierr /= 0) CALL juDFT_error("Couldn't allocate gkrot")
93 :
94 : ! Generate spherical harmonics
95 : ! gkrot = matmul(bmrot, lapw%vk(:,:,igSpin))
96 : ! These two lines should eventually move to the GPU:
97 : !CALL dgemm("N","N", 3, lapw%nv(igSpin), 3, 1.0, bmrot, 3, lapw%vk(:,:,igSpin), 3, 0.0, gkrot, 3)
98 49609 : call blas_matmul(3,lapw%nv(igSpin),3,bmrot,lapw%vk(:,:,igspin),gkrot)
99 49609 : CALL ylm4_batched(lmax,gkrot,ylm)
100 :
101 : #ifndef _OPENACC
102 : !$OMP PARALLEL DO DEFAULT(none) &
103 : !$OMP& SHARED(lapw,lmax,c_ph,igSpin,abCoeffs,fjgj,abclo,cell,atoms,sym) &
104 : !$OMP& SHARED(l_abclo,alo1,blo1,clo1,ab_size,na,n,ilSpin,bmrot, ylm) &
105 : !$OMP& PRIVATE(k,l,ll1,m,lm,term,invsfct,lo,nkvec) &
106 49609 : !$OMP& PRIVATE(lmMin,lmMax)
107 : #else
108 : !$acc kernels present(abCoeffs) default(none)
109 : abCoeffs(:,:)=0.0
110 : !$acc end kernels
111 : #endif
112 :
113 : !$acc data copyin(atoms,atoms%llo,atoms%llod,atoms%nlo,cell,cell%omtil,atoms%rmt) if (l_abclo)
114 : !$acc parallel loop present(fjgj,fjgj%fj,fjgj%gj,abCoeffs) vector_length(32)&
115 : !$acc copyin(lmax,lapw,lapw%nv,lapw%vk,lapw%kvec,bmrot,c_ph, sym, sym%invsat,l_abclo, ylm) &
116 : !$acc present(abclo,alo1,blo1,clo1)&
117 : !$acc private(k,l,lm,invsfct,lo,term,lmMin,lmMax) default(none)
118 : DO k = 1,lapw%nv(igSpin)
119 : !$acc loop vector private(l,lmMin,lmMax)
120 : DO l = 0,lmax
121 : lmMin = l*(l+1) + 1 - l
122 : lmMax = l*(l+1) + 1 + l
123 : abCoeffs(lmMin:lmMax, k) = fjgj%fj(k,l,ilSpin,igSpin)*c_ph(k,igSpin) * CONJG(ylm(lmMin:lmMax, k))
124 : abCoeffs(ab_size+lmMin:ab_size+lmMax,k) = fjgj%gj(k,l,ilSpin,igSpin)*c_ph(k,igSpin) * CONJG(ylm(lmMin:lmMax, k))
125 : END DO
126 : !$acc end loop
127 :
128 : IF (l_abclo) THEN
129 : ! Determine also the abc coeffs for LOs
130 : invsfct=MERGE(1,2,sym%invsat(na).EQ.0)
131 : term = fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)
132 : ! !$acc loop vector private(lo,l,nkvec,ll1,m,lm)
133 : DO lo = 1,atoms%nlo(n)
134 : l = atoms%llo(lo,n)
135 : DO nkvec=1,invsfct*(2*l+1)
136 : IF (lapw%kvec(nkvec,lo,na)==k) THEN !This k-vector is used in LO
137 : ll1 = l*(l+1) + 1
138 : DO m = -l,l
139 : lm = ll1 + m
140 : abclo(1,m+atoms%llod+1,nkvec,lo) = term*c_ph(k,igSpin)*CONJG(ylm(lm,k))*alo1(lo)
141 : abclo(2,m+atoms%llod+1,nkvec,lo) = term*c_ph(k,igSpin)*CONJG(ylm(lm,k))*blo1(lo)
142 : abclo(3,m+atoms%llod+1,nkvec,lo) = term*c_ph(k,igSpin)*CONJG(ylm(lm,k))*clo1(lo)
143 : END DO
144 : END IF
145 : END DO
146 : END DO
147 : ! !$acc end loop
148 : END IF
149 : END DO !k-loop
150 : !$acc end parallel loop
151 : !$acc end data
152 :
153 : #ifndef _OPENACC
154 : !$OMP END PARALLEL DO
155 : #endif
156 :
157 49609 : IF (.NOT.l_apw) ab_size=ab_size*2
158 49609 : END SUBROUTINE hsmt_ab
159 49609 : END MODULE m_hsmt_ab
|