Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 :
7 : MODULE m_eigen_hssetup
8 : CONTAINS
9 : #ifndef _OPENACC
10 : !> The setup of the Hamiltonian and Overlap matrices are performed here
11 : !!
12 : !! The following steps are executed:
13 : !! 1. The matrices are a allocated (in the fi%noco-case these are 2x2-arrays of matrices)
14 : !! 2. The Interstitial contribution is calculated (in hs_int())
15 : !! 3. The MT-part is calculated (in hsmt() )
16 : !! 4. The vacuum part is added (in hsvac())
17 : !! 5. The matrices are copied to the final matrix, in the fi%noco-case the full matrix is constructed from the 4-parts.
18 7342 : SUBROUTINE eigen_hssetup(isp, fmpi, fi, mpdata, results, den, vx, xcpot, enpara, nococonv, stars, sphhar, hybdat, &
19 : ud, td, v, lapw, nk, smat_final, hmat_final)
20 : USE m_types
21 : USE m_types_mpimat
22 : USE m_hs_int
23 : USE m_hsvac
24 : USE m_hsmt
25 : USE m_vham
26 : USE m_eigen_redist_matrix
27 : USE m_add_vnonlocal
28 : USE m_hsmt_fjgj
29 : USE m_eig66_io, ONLY: open_eig, write_eig, read_eig
30 : IMPLICIT NONE
31 : INTEGER, INTENT(IN) :: isp
32 : TYPE(t_mpi), INTENT(IN) :: fmpi
33 : type(t_fleurinput), intent(in) :: fi
34 : type(t_mpdata), intent(inout):: mpdata
35 : type(t_results), intent(inout):: results
36 : class(t_xcpot), intent(in) :: xcpot
37 : TYPE(t_stars), INTENT(IN) :: stars
38 : TYPE(t_enpara), INTENT(IN) :: enpara
39 : TYPE(t_nococonv), INTENT(IN) :: nococonv
40 : TYPE(t_sphhar), INTENT(IN) :: sphhar
41 : type(t_hybdat), intent(inout):: hybdat
42 : TYPE(t_usdus), INTENT(INout) :: ud
43 : TYPE(t_tlmplm), INTENT(IN) :: td
44 : TYPE(t_lapw), INTENT(IN) :: lapw
45 : TYPE(t_potden), INTENT(IN) :: den, v, vx
46 : integer, intent(in) :: nk
47 : CLASS(t_mat), ALLOCATABLE, INTENT(INOUT) :: smat_final, hmat_final
48 :
49 36710 : CLASS(t_mat), ALLOCATABLE :: smat(:, :), hmat(:, :)
50 : INTEGER :: i, j, nspins
51 7342 : complex, allocatable :: vpw_wTemp(:,:)
52 : INTEGER :: tempI,tempJ
53 :
54 7342 : TYPE(t_fjgj) :: fjgj
55 :
56 :
57 7342 : IF(fi%atoms%n_v.GT.0) THEN
58 0 : CALL fjgj%alloc(MAXVAL(lapw%nv),fi%atoms%lmaxd,isp,fi%noco)
59 : END IF
60 :
61 :
62 : !Matrices for Hamiltonian and Overlapp
63 : !In fi%noco case we need 4-matrices for each spin channel
64 7342 : nspins = MERGE(2, 1, fi%noco%l_noco)
65 7342 : IF (fmpi%n_size == 1) THEN
66 25404 : ALLOCATE (t_mat::smat(nspins, nspins), hmat(nspins, nspins))
67 : ELSE
68 53584 : ALLOCATE (t_mpimat::smat(nspins, nspins), hmat(nspins, nspins))
69 : END IF
70 15380 : DO i = 1, nspins
71 24810 : DO j = 1, nspins
72 9430 : CALL smat(i, j)%init(fi%input%l_real, lapw%nv(i) + fi%atoms%nlotot, lapw%nv(j) + fi%atoms%nlotot, fmpi%sub_comm, .false.)
73 17468 : CALL hmat(i, j)%init(smat(i, j))
74 : END DO
75 : END DO
76 :
77 7342 : CALL timestart("Interstitial part")
78 : !Generate interstitial part of Hamiltonian
79 29368 : ALLOCATE(vpw_wTemp(SIZE(v%pw_w,1),SIZE(v%pw_w,2)))
80 21799360 : vpw_wTemp = merge(v%pw_w - xcpot%get_exchange_weight() * vx%pw_w, v%pw_w, hybdat%l_subvxc)
81 7342 : CALL hs_int(fi%input, fi%noco, nococonv, stars, lapw, fmpi, fi%cell%bbmat, isp, vpw_wTemp, smat, hmat)
82 7342 : DEALLOCATE(vpw_wTemp)
83 :
84 7342 : CALL timestop("Interstitial part")
85 7342 : CALL timestart("MT part")
86 : !MT-part of Hamiltonian. In case of fi%noco, we need an loop over the local spin of the fi%atoms
87 7342 : DO i = 1, nspins; DO j = 1, nspins
88 : !$acc enter data copyin(hmat(i,j),smat(i,j))
89 : !$acc enter data copyin(hmat(i,j)%data_r,smat(i,j)%data_r,hmat(i,j)%data_c,smat(i,j)%data_c)
90 : END DO; END DO
91 7342 : CALL hsmt(fi%atoms, fi%sym, enpara, isp, fi%input, fmpi, fi%noco, nococonv, fi%cell, lapw, ud, td, smat, hmat)
92 7342 : DO i = 1, nspins; DO j = 1, nspins; if (hmat(1, 1)%l_real) THEN
93 : !$acc exit data copyout(hmat(i,j)%data_r,smat(i,j)%data_r) delete(hmat(i,j)%data_c,smat(i,j)%data_c)
94 : !$acc exit data delete(hmat(i,j),smat(i,j))
95 : ELSE
96 : !$acc exit data copyout(hmat(i,j)%data_c,smat(i,j)%data_c) delete(hmat(i,j)%data_r,smat(i,j)%data_r)
97 : !$acc exit data delete(hmat(i,j),smat(i,j))
98 : END IF; END DO; END DO
99 7342 : CALL timestop("MT part")
100 :
101 7342 : IF (fi%atoms%n_v.GT.0) THEN
102 0 : DO i = 1, nspins
103 0 : CALL v_ham(fi%input,ud,fi%atoms,fi%kpts,fi%cell,lapw,fi%sym,fi%noco,fmpi,nococonv,fjgj,den,isp,nk,hmat(i,i))
104 : END DO
105 : END IF
106 :
107 : !Vacuum contributions
108 7342 : IF (fi%input%film) THEN
109 144 : CALL timestart("Vacuum part")
110 : CALL hsvac(fi%vacuum, stars, fmpi, isp, fi%input, v, enpara%evac, fi%cell, &
111 144 : lapw, fi%noco, nococonv, hmat, smat)
112 144 : CALL timestop("Vacuum part")
113 : END IF
114 :
115 : !Deal with hybrid code
116 7342 : IF (fi%hybinp%l_hybrid .OR. fi%input%l_rdmft) THEN
117 2160 : if (any(shape(smat) /= 1)) then
118 0 : call judft_error("Hybrid doesn't do noco.")
119 : end if
120 3730076 : smat(1,1)%data_c = CONJG(smat(1,1)%data_c)
121 720 : CALL write_eig(hybdat%eig_id, nk, isp, smat=smat(1, 1), n_start=fmpi%n_size, n_end=fmpi%n_rank)
122 3730076 : smat(1,1)%data_c = CONJG(smat(1,1)%data_c)
123 : END IF
124 :
125 7342 : IF (fi%hybinp%l_hybrid) THEN
126 720 : IF (hybdat%l_addhf) THEN
127 372 : CALL add_Vnonlocal(nk, lapw, fi, hybdat, isp, xcpot, fmpi, nococonv, hmat(1, 1))
128 : END IF
129 : END IF ! fi%hybinp%l_hybrid
130 :
131 : !Now copy the data into final matrix
132 : ! Collect the four fi%noco parts into a single matrix
133 : ! In collinear case only a copy is done
134 : ! In the parallel case also a redistribution happens
135 7342 : ALLOCATE (smat_final, mold=smat(1, 1))
136 7342 : ALLOCATE (hmat_final, mold=smat(1, 1))
137 7342 : CALL timestart("Matrix redistribution")
138 7342 : CALL eigen_redist_matrix(fmpi, lapw, fi%atoms, smat, smat_final)
139 7342 : CALL eigen_redist_matrix(fmpi, lapw, fi%atoms, hmat, hmat_final, smat_final)
140 7342 : CALL timestop("Matrix redistribution")
141 :
142 7342 : END SUBROUTINE eigen_hssetup
143 : #else
144 : SUBROUTINE eigen_hssetup(isp, fmpi, fi, mpdata, results, den, vx, xcpot, enpara, nococonv, stars, sphhar, hybdat, &
145 : ud, td, v, lapw, nk, smat_final, hmat_final)
146 : USE m_types
147 : USE m_types_mpimat
148 : USE m_hs_int
149 : USE m_hsvac
150 : USE m_hsmt
151 : USE m_vham
152 : USE m_eigen_redist_matrix
153 : USE m_add_vnonlocal
154 : USE m_hsmt_fjgj
155 : USE m_eig66_io, ONLY: open_eig, write_eig, read_eig
156 : IMPLICIT NONE
157 : INTEGER, INTENT(IN) :: isp
158 : TYPE(t_mpi), INTENT(IN) :: fmpi
159 : type(t_fleurinput), intent(in) :: fi
160 : type(t_mpdata), intent(inout):: mpdata
161 : type(t_results), intent(inout):: results
162 : class(t_xcpot), intent(in) :: xcpot
163 : TYPE(t_stars), INTENT(IN) :: stars
164 : TYPE(t_enpara), INTENT(IN) :: enpara
165 : TYPE(t_nococonv), INTENT(IN) :: nococonv
166 : TYPE(t_sphhar), INTENT(IN) :: sphhar
167 : type(t_hybdat), intent(inout):: hybdat
168 : TYPE(t_usdus), INTENT(INout) :: ud
169 : TYPE(t_tlmplm), INTENT(IN) :: td
170 : TYPE(t_lapw), INTENT(IN) :: lapw
171 : TYPE(t_potden), INTENT(IN) :: den, v, vx
172 : integer, intent(in) :: nk
173 : CLASS(t_mat), ALLOCATABLE, INTENT(INOUT) :: smat_final, hmat_final
174 :
175 : TYPE(t_mat), ALLOCATABLE :: smat(:, :), hmat(:, :)
176 : TYPE(t_mpimat), ALLOCATABLE :: smat_mpi(:, :), hmat_mpi(:, :)
177 : INTEGER :: i, j, nspins
178 : complex, allocatable :: vpw_wTemp(:,:)
179 : INTEGER :: tempI,tempJ
180 :
181 : TYPE(t_fjgj) :: fjgj
182 :
183 :
184 : IF(fi%atoms%n_v.GT.0) THEN
185 : CALL fjgj%alloc(MAXVAL(lapw%nv),fi%atoms%lmaxd,isp,fi%noco)
186 : END IF
187 :
188 : !Matrices for Hamiltonian and Overlapp
189 : !In fi%noco case we need 4-matrices for each spin channel
190 : nspins = MERGE(2, 1, fi%noco%l_noco)
191 : IF (fmpi%n_size == 1) THEN
192 : ALLOCATE (smat(nspins, nspins), hmat(nspins, nspins))
193 : DO i = 1, nspins
194 : DO j = 1, nspins
195 : CALL smat(i, j)%init(fi%input%l_real, lapw%nv(i) + fi%atoms%nlotot, lapw%nv(j) + fi%atoms%nlotot, fmpi%sub_comm, .false.)
196 : CALL hmat(i, j)%init(smat(i, j))
197 : END DO
198 : END DO
199 :
200 : CALL timestart("Interstitial part")
201 : !Generate interstitial part of Hamiltonian
202 : ALLOCATE(vpw_wTemp(SIZE(v%pw_w,1),SIZE(v%pw_w,2)))
203 : vpw_wTemp = merge(v%pw_w - xcpot%get_exchange_weight() * vx%pw_w, v%pw_w, hybdat%l_subvxc)
204 : CALL hs_int(fi%input, fi%noco, nococonv, stars, lapw, fmpi, fi%cell%bbmat, isp, vpw_wTemp, smat, hmat)
205 : DEALLOCATE(vpw_wTemp)
206 :
207 : CALL timestop("Interstitial part")
208 : CALL timestart("MT part")
209 : !MT-part of Hamiltonian. In case of fi%noco, we need an loop over the local spin of the fi%atoms
210 : DO i = 1, nspins; DO j = 1, nspins
211 : !$acc enter data copyin(hmat(i,j),smat(i,j))
212 : !$acc enter data copyin(hmat(i,j)%data_r,smat(i,j)%data_r,hmat(i,j)%data_c,smat(i,j)%data_c)
213 : END DO; END DO
214 : CALL hsmt(fi%atoms, fi%sym, enpara, isp, fi%input, fmpi, fi%noco, nococonv, fi%cell, lapw, ud, td, smat, hmat)
215 : DO i = 1, nspins; DO j = 1, nspins; if (hmat(1, 1)%l_real) THEN
216 : !$acc exit data copyout(hmat(i,j)%data_r,smat(i,j)%data_r) delete(hmat(i,j)%data_c,smat(i,j)%data_c)
217 : !$acc exit data delete(hmat(i,j),smat(i,j))
218 : ELSE
219 : !$acc exit data copyout(hmat(i,j)%data_c,smat(i,j)%data_c) delete(hmat(i,j)%data_r,smat(i,j)%data_r)
220 : !$acc exit data delete(hmat(i,j),smat(i,j))
221 : END IF; END DO; END DO
222 : CALL timestop("MT part")
223 :
224 : IF (fi%atoms%n_v.GT.0) THEN
225 : DO i = 1, nspins
226 : CALL v_ham(fi%input,ud,fi%atoms,fi%kpts,fi%cell,lapw,fi%sym,fi%noco,fmpi,nococonv,fjgj,den,isp,nk,hmat(i,i))
227 : END DO
228 : END IF
229 :
230 : !Vacuum contributions
231 : IF (fi%input%film) THEN
232 : CALL timestart("Vacuum part")
233 : CALL hsvac(fi%vacuum, stars, fmpi, isp, fi%input, v, enpara%evac, fi%cell, &
234 : lapw, fi%noco, nococonv, hmat, smat)
235 : CALL timestop("Vacuum part")
236 : END IF
237 :
238 : !Deal with hybrid code
239 : IF (fi%hybinp%l_hybrid .OR. fi%input%l_rdmft) THEN
240 : if (any(shape(smat) /= 1)) then
241 : call judft_error("Hybrid doesn't do noco.")
242 : end if
243 : smat(1,1)%data_c = CONJG(smat(1,1)%data_c)
244 : CALL write_eig(hybdat%eig_id, nk, isp, smat=smat(1, 1), n_start=fmpi%n_size, n_end=fmpi%n_rank)
245 : smat(1,1)%data_c = CONJG(smat(1,1)%data_c)
246 : END IF
247 :
248 : IF (fi%hybinp%l_hybrid) THEN
249 : IF (hybdat%l_addhf) THEN
250 : CALL add_Vnonlocal(nk, lapw, fi, hybdat, isp, xcpot, fmpi, nococonv, hmat(1, 1))
251 : END IF
252 : END IF ! fi%hybinp%l_hybrid
253 :
254 : !Now copy the data into final matrix
255 : ! Collect the four fi%noco parts into a single matrix
256 : ! In collinear case only a copy is done
257 : ! In the parallel case also a redistribution happens
258 : ALLOCATE (smat_final, mold=smat(1, 1))
259 : ALLOCATE (hmat_final, mold=smat(1, 1))
260 : CALL timestart("Matrix redistribution")
261 : CALL eigen_redist_matrix(fmpi, lapw, fi%atoms, smat, smat_final)
262 : CALL eigen_redist_matrix(fmpi, lapw, fi%atoms, hmat, hmat_final, smat_final)
263 : CALL timestop("Matrix redistribution")
264 : ELSE
265 : ALLOCATE (smat_mpi(nspins, nspins), hmat_mpi(nspins, nspins))
266 : DO i = 1, nspins
267 : DO j = 1, nspins
268 : CALL smat_mpi(i, j)%init(fi%input%l_real, lapw%nv(i) + fi%atoms%nlotot, lapw%nv(j) + fi%atoms%nlotot, fmpi%sub_comm, .false.)
269 : CALL hmat_mpi(i, j)%init(smat_mpi(i, j))
270 : END DO
271 : END DO
272 :
273 : CALL timestart("Interstitial part")
274 : !Generate interstitial part of Hamiltonian
275 : ALLOCATE(vpw_wTemp(SIZE(v%pw_w,1),SIZE(v%pw_w,2)))
276 : vpw_wTemp = merge(v%pw_w - xcpot%get_exchange_weight() * vx%pw_w, v%pw_w, hybdat%l_subvxc)
277 : CALL hs_int(fi%input, fi%noco, nococonv, stars, lapw, fmpi, fi%cell%bbmat, isp, vpw_wTemp, smat_mpi, hmat_mpi)
278 : DEALLOCATE(vpw_wTemp)
279 :
280 : CALL timestop("Interstitial part")
281 : CALL timestart("MT part")
282 : !MT-part of Hamiltonian. In case of fi%noco, we need an loop over the local spin of the fi%atoms
283 : DO i = 1, nspins; DO j = 1, nspins
284 : !$acc enter data copyin(hmat_mpi(i,j),smat_mpi(i,j))
285 : !$acc enter data copyin(hmat_mpi(i,j)%data_r,smat_mpi(i,j)%data_r,hmat_mpi(i,j)%data_c,smat_mpi(i,j)%data_c)
286 : END DO; END DO
287 : CALL hsmt(fi%atoms, fi%sym, enpara, isp, fi%input, fmpi, fi%noco, nococonv, fi%cell, lapw, ud, td, smat_mpi, hmat_mpi)
288 : DO i = 1, nspins; DO j = 1, nspins; if (hmat_mpi(1, 1)%l_real) THEN
289 : !$acc exit data copyout(hmat_mpi(i,j)%data_r,smat_mpi(i,j)%data_r) delete(hmat_mpi(i,j)%data_c,smat_mpi(i,j)%data_c)
290 : !$acc exit data delete(hmat_mpi(i,j),smat_mpi(i,j))
291 : ELSE
292 : !$acc exit data copyout(hmat_mpi(i,j)%data_c,smat_mpi(i,j)%data_c) delete(hmat_mpi(i,j)%data_r,smat_mpi(i,j)%data_r)
293 : !$acc exit data delete(hmat_mpi(i,j),smat_mpi(i,j))
294 : END IF; END DO; END DO
295 : CALL timestop("MT part")
296 :
297 : IF (fi%atoms%n_v.GT.0) THEN
298 : call judft_error("LDA+V not yet implemented for GPU + EV-parallelization.")
299 : END IF
300 :
301 : !Vacuum contributions
302 : IF (fi%input%film) THEN
303 : CALL timestart("Vacuum part")
304 : CALL hsvac(fi%vacuum, stars, fmpi, isp, fi%input, v, enpara%evac, fi%cell, &
305 : lapw, fi%noco, nococonv, hmat_mpi, smat_mpi)
306 : CALL timestop("Vacuum part")
307 : END IF
308 :
309 : !Deal with hybrid code
310 : IF (fi%hybinp%l_hybrid .OR. fi%input%l_rdmft) THEN
311 : if (any(shape(smat_mpi) /= 1)) then
312 : call judft_error("Hybrid doesn't do noco.")
313 : end if
314 : smat_mpi(1,1)%data_c = CONJG(smat_mpi(1,1)%data_c)
315 : call judft_bug("GPU version of hybrid functionals currently not working correctly")
316 : !CALL write_eig(hybdat%eig_id, nk, isp, smat_mpi=smat_mpi(1, 1), n_start=fmpi%n_size, n_end=fmpi%n_rank)
317 : smat_mpi(1,1)%data_c = CONJG(smat_mpi(1,1)%data_c)
318 : END IF
319 :
320 : IF (fi%hybinp%l_hybrid) THEN
321 : IF (hybdat%l_addhf) THEN
322 : !CALL add_Vnonlocal(nk, lapw, fi, hybdat, isp, xcpot, fmpi, nococonv, hmat_mpi(1, 1))
323 : END IF
324 : END IF ! fi%hybinp%l_hybrid
325 :
326 : !Now copy the data into final matrix
327 : ! Collect the four fi%noco parts into a single matrix
328 : ! In collinear case only a copy is done
329 : ! In the parallel case also a redistribution happens
330 : ALLOCATE (t_mpimat::smat_final)
331 : ALLOCATE (t_mpimat::hmat_final)
332 : CALL timestart("Matrix redistribution")
333 : CALL eigen_redist_matrix(fmpi, lapw, fi%atoms, smat_mpi, smat_final)
334 : CALL eigen_redist_matrix(fmpi, lapw, fi%atoms, hmat_mpi, hmat_final, smat_final)
335 : CALL timestop("Matrix redistribution")
336 : ENDIF
337 : END SUBROUTINE eigen_hssetup
338 : #endif
339 : END MODULE m_eigen_hssetup
|