Line data Source code
1 : ! Copyright (c) 2024 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
2 : ! This file is part of FLEUR and available as free software under the conditions
3 : ! of the MIT license as expressed in the LICENSE file in more detail.
4 : !
5 : !--------------------------------------------------------------------------------
6 : module m_elpa
7 : use m_judft
8 : use m_constants
9 : use m_types_solver
10 : use m_types_mpimat
11 : use m_types_mat
12 : #ifdef CPP_ELPA
13 : use elpa
14 : #endif
15 : #ifdef CPP_MPI
16 : use mpi
17 : #endif
18 : implicit none
19 : private
20 : type, extends(t_solver):: t_solver_elpa
21 : contains
22 : procedure :: solve_gev => elpa_gev !solver for generalized eigenvalue problem
23 : procedure :: solve_std_dp => elpa_diag
24 : procedure :: solve_std_sp => elpa_diag_sp
25 : procedure :: to_std => elpa_to_std
26 : procedure :: backtrans => elpa_recover !transform the Eigenvalue back to the generalized problem
27 : end type
28 : #ifdef CPP_ELPA
29 : class(elpa_t), pointer :: elpa_obj
30 : #endif
31 : logical,save:: firstcall=.true.
32 : public get_solver_elpa
33 :
34 : contains
35 :
36 97 : function get_solver_elpa() result(solver)
37 : type(t_solver_elpa), pointer::solver
38 97 : allocate (solver)
39 97 : solver%name = "elpa"
40 : #ifdef CPP_ELPA
41 : solver%available = .true.
42 : #else
43 97 : solver%available = .false.
44 : #endif
45 97 : solver%parallel = .true.
46 97 : solver%serial = .false.
47 97 : solver%generalized = .true.
48 97 : solver%standard = .true.
49 : #ifdef CPP_ELPA_PATCH
50 : solver%transform = .true.
51 : #else
52 97 : solver%transform = .false.
53 : #endif
54 97 : solver%GPU = .true.
55 : #ifdef CPP_ELPA_SP
56 : solver%single_precision = .true.
57 : #else
58 97 : solver%single_precision = .false.
59 : #endif
60 : solver%use_sp = .false.
61 97 : end function
62 :
63 :
64 :
65 : subroutine create_elpa_obj(hmat)
66 : !$ use omp_lib
67 : implicit none
68 : class(t_mat), intent(IN) :: hmat
69 :
70 : #ifdef CPP_ELPA
71 : integer :: np, myid
72 : integer :: err
73 : TYPE(t_mpimat) :: tmp
74 :
75 : if (firstcall) then
76 : err = elpa_init(20180525)
77 : firstcall = .false.
78 : elpa_obj=>null()
79 : end if
80 : if (associated(elpa_obj)) return
81 : elpa_obj => elpa_allocate()
82 :
83 : !Some settings are set for all matrices
84 : call elpa_obj%set("local_nrows", hmat%matsize1, err)
85 : call elpa_obj%set("local_ncols", hmat%matsize2, err)
86 :
87 : !$ call elpa_obj%set("omp_threads", omp_get_num_threads(),err)
88 : call elpa_obj%set("timings", 1, err)
89 : !Some other settings depend on matrix type
90 : select type (hmat)
91 : type is (t_mpimat)
92 : call MPI_BARRIER(hmat%blacsdata%mpi_com, err)
93 : call MPI_COMM_SIZE(hmat%blacsdata%mpi_com, np, err)
94 : call MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, err)
95 : ! Blocking factor
96 : if (hmat%blacsdata%blacs_desc(5) .ne. hmat%blacsdata%blacs_desc(6)) &
97 : call judft_error("Different block sizes for rows/columns not supported")
98 : call elpa_obj%set("na", hmat%global_size1, err)
99 : call elpa_obj%set("nblk", hmat%blacsdata%blacs_desc(5), err)
100 : call elpa_obj%set("mpi_comm_parent", hmat%blacsdata%mpi_com, err)
101 : call elpa_obj%set("process_row", hmat%blacsdata%myrow, err)
102 : call elpa_obj%set("process_col", hmat%blacsdata%mycol, err)
103 : call elpa_obj%set("blacs_context", hmat%blacsdata%blacs_desc(2), err)
104 : type is (t_mat)
105 : call judft_bug("Elpa solver not available for non-distributed matrices")
106 : call elpa_obj%set("na", hmat%matsize1, err)
107 : call elpa_obj%set("nblk", hmat%matsize1, err)
108 : call elpa_obj%set("mpi_comm_parent", MPI_COMM_SELF, err)
109 : call elpa_obj%set("process_row", 1, err)
110 : call elpa_obj%set("process_col", 1, err)
111 : !Generate a blacs context for this PE only
112 : call tmp%init(.true.,1,1,MPI_COMM_SELF,MPIMAT_2D_BLOCK_CYCLIC)
113 : call elpa_obj%set("blacs_context", tmp%blacsdata%blacs_desc(2), err)
114 : end select
115 : err = elpa_obj%setup()
116 :
117 : #if defined(CPP_GPU)||defined(_OPENACC)
118 : call elpa_obj%set("gpu_hermitian_multiply", 1, err)
119 : !call elpa_obj%set("cannon_for_generalized",0,err)
120 : call elpa_obj%set("nvidia-gpu", 1, err)
121 : !call elpa_obj%set("verbose",1,err)
122 : err=elpa_obj%setup_gpu()
123 : !print *,"ELPA-GPU-err:",err
124 : #else
125 : call elpa_obj%set("solver", ELPA_SOLVER_2STAGE)
126 : #endif
127 : #endif
128 : end subroutine
129 :
130 0 : subroutine elpa_gev(self, hmat, smat, ne, eig, zmat, ikpt)
131 : use m_types_mat
132 : use m_judft
133 :
134 : implicit none
135 : class(t_solver_elpa) :: self
136 : class(t_mat), intent(INOUT) :: hmat, smat
137 : integer, intent(INOUT) :: ne
138 : class(t_mat), allocatable, intent(OUT) :: zmat
139 : real, intent(OUT) :: eig(:)
140 : integer, intent(IN) :: ikpt
141 : #ifdef CPP_ELPA
142 : integer :: num, np, myid,kernel
143 : integer :: err
144 : integer :: i
145 : real, allocatable :: eig2(:)
146 : class(t_mat),allocatable :: ev_dist
147 : !Update elpa object
148 : call create_elpa_obj(hmat)
149 : call elpa_obj%set("nev", ne, err)
150 : allocate(ev_dist,mold=hmat)
151 :
152 : call timestart("ELPA GEV")
153 : select type(hmat)
154 : type is (t_mpimat) !we need some data from t_mpimat
155 : allocate (eig2(hmat%global_size1), stat=err) ! The eigenvalue array
156 : type is (t_mat)
157 : allocate (eig2(hmat%matsize1), stat=err) ! The eigenvalue array
158 : end select
159 : if (err .ne. 0) call juDFT_error('Failed to allocated "eig2"', calledby='elpa')
160 :
161 : call ev_dist%init(hmat)! Eigenvectors
162 : if (err .ne. 0) call juDFT_error('Failed to allocated "ev_dist"', calledby='elpa')
163 :
164 : call hmat%u2l()
165 : call smat%u2l()
166 : call elpa_obj%timer_start("ELPA")
167 : if (hmat%l_real) then
168 : call elpa_obj%generalized_eigenvectors(hmat%data_r, smat%data_r, eig2, ev_dist%data_r, .false., err)
169 : else
170 : call elpa_obj%generalized_eigenvectors(hmat%data_c, smat%data_c, eig2, ev_dist%data_c, .false., err)
171 : end if
172 : call elpa_obj%timer_stop("ELPA")
173 : !Useful for debugging
174 : !call mpi_comm_rank(MPI_COMM_WORLD,myid,err)
175 : !if (myid == 0) then
176 : ! call elpa_obj%get("complex_kernel", kernel)
177 : ! print *, "elpa uses "//elpa_int_value_to_string("complex_kernel", kernel)//" kernel"
178 : ! call elpa_obj%print_times("ELPA")
179 : !endif
180 :
181 :
182 : eig(:ne) = eig2(:ne)
183 : deallocate (eig2)
184 :
185 : select type(hmat)
186 : type is (t_mpimat)
187 : !
188 : ! Redistribute eigenvectors from ScaLAPACK distribution to each process, i.e. for
189 : ! process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
190 : ! Only num=num2/np eigenvectors per process
191 : !
192 : call MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, err)
193 : call MPI_COMM_SIZE(hmat%blacsdata%mpi_com, np, err)
194 :
195 : num = ne
196 : ne = 0
197 : do i = myid + 1, num, np
198 : ne = ne + 1
199 : end do
200 : !
201 : !
202 : allocate (t_mpimat::zmat)
203 : call zmat%init(hmat%l_real, hmat%global_size1,hmat%global_size1 , hmat%blacsdata%mpi_com, MPIMAT_ROWCYCLIC)
204 : call zmat%copy(ev_dist, 1, 1)
205 : type is (t_mat)
206 : allocate(t_mat::zmat)
207 : call zmat%init(hmat%l_real,hmat%matsize1,ne)
208 : if (zmat%l_real) THEN
209 : zmat%data_r(:,:)=ev_dist%data_r(:,:ne)
210 : else
211 : zmat%data_c(:,:)=ev_dist%data_c(:,:ne)
212 : endif
213 : end select
214 : call timestop("ELPA GEV")
215 : call elpa_deallocate(elpa_obj)
216 : if (associated(elpa_obj)) elpa_obj=>null()
217 :
218 : #endif
219 :
220 0 : end subroutine elpa_gev
221 :
222 :
223 0 : subroutine elpa_diag(self, hmat, ne, eig, zmat)
224 : !Simple driver to solve Generalized Eigenvalue Problem using LAPACK routine
225 : implicit none
226 : class(t_solver_elpa) :: self
227 : class(t_mat), intent(INOUT) :: hmat
228 : integer, intent(INOUT) :: ne
229 : class(t_mat), allocatable, intent(OUT) :: zmat
230 : real, intent(OUT) :: eig(:)
231 : #ifdef CPP_ELPA
232 : real,allocatable:: eig2(:)
233 : integer :: err,myid,num,np,i
234 :
235 : !Update elpa object
236 : call create_elpa_obj(hmat)
237 : call elpa_obj%set("nev", ne, err)
238 :
239 : call timestart("ELPA STD")
240 : select type(hmat)
241 : type is (t_mpimat) !we need some data from t_mpimat
242 : allocate (eig2(hmat%global_size1), stat=err) ! The eigenvalue array
243 : type is (t_mat)
244 : allocate (eig2(hmat%matsize1), stat=err) ! The eigenvalue array
245 : end select
246 : if (err .ne. 0) call juDFT_error('Failed to allocated "eig2"', calledby='elpa')
247 :
248 : call zmat%init(hmat)! Eigenvectors
249 : if (err .ne. 0) call juDFT_error('Failed to allocated "zmat"', calledby='elpa')
250 :
251 : call hmat%u2l()
252 : call elpa_obj%timer_start("ELPA")
253 : if (hmat%l_real) then
254 : call elpa_obj%eigenvectors(hmat%data_r, eig2, zmat%data_r, err)
255 : else
256 : call elpa_obj%eigenvectors(hmat%data_c, eig2, zmat%data_c, err)
257 : end if
258 : call elpa_obj%timer_stop("ELPA")
259 : ! END of ELPA stuff
260 : !
261 : ! Each process has all eigenvalues in output
262 : eig(:ne) = eig2(:ne)
263 : deallocate (eig2)
264 : select type (hmat)
265 : type is (t_mpimat)
266 : !
267 : !
268 : ! Redistribute eigenvectors from ScaLAPACK distribution to each process, i.e. for
269 : ! process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
270 : ! Only num=num2/np eigenvectors per process
271 : !
272 : call MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, err)
273 : call MPI_COMM_SIZE(hmat%blacsdata%mpi_com, np, err)
274 :
275 : num = ne
276 : ne = 0
277 : do i = myid + 1, num, np
278 : ne = ne + 1
279 : end do
280 : !
281 : end select
282 : call timestop("ELPA STD")
283 : #endif
284 0 : end subroutine
285 0 : subroutine elpa_diag_sp(self, hmat, ne, eig, zmat)
286 : !Simple driver to solve Generalized Eigenvalue Problem using LAPACK routine
287 : implicit none
288 : class(t_solver_elpa) :: self
289 : class(t_mat), intent(INOUT) :: hmat
290 : integer, intent(INOUT) :: ne
291 : class(t_mat), allocatable, intent(OUT) :: zmat
292 : real, intent(OUT) :: eig(:)
293 :
294 :
295 : integer, parameter:: sp = selected_real_kind(6)
296 : real(kind=sp),allocatable:: eig2(:)
297 : integer :: err,myid,num,np,i
298 :
299 : #ifdef CPP_ELPA_SP
300 : !Update elpa object
301 : call create_elpa_obj(hmat)
302 : call elpa_obj%set("nev", ne, err)
303 :
304 :
305 : call timestart("ELPA STD-SP")
306 : select type(hmat)
307 : type is (t_mpimat) !we need some data from t_mpimat
308 : allocate (eig2(hmat%global_size1), stat=err) ! The eigenvalue array
309 : type is (t_mat)
310 : allocate (eig2(hmat%matsize1), stat=err) ! The eigenvalue array
311 : end select
312 : if (err .ne. 0) call juDFT_error('Failed to allocated "eig2"', calledby='elpa')
313 :
314 : call zmat%init(hmat)! Eigenvectors
315 : if (err .ne. 0) call juDFT_error('Failed to allocated "zmat"', calledby='elpa')
316 :
317 : call hmat%u2l()
318 : call elpa_obj%timer_start("ELPA")
319 : if (hmat%l_real) then
320 : block
321 : real(kind=sp),allocatable:: mat(:,:),z(:,:)
322 : mat=hmat%data_r
323 : allocate(z(size(zmat%data_r,1),size(zmat%data_r,2)))
324 : call elpa_obj%eigenvectors(mat, eig2, z, err)
325 : zmat%data_r=z
326 : end block
327 : else
328 : block
329 : complex(kind=sp),allocatable:: mat(:,:),z(:,:)
330 : mat=hmat%data_c
331 : allocate(z(size(zmat%data_c,1),size(zmat%data_c,2)))
332 : call elpa_obj%eigenvectors(mat, eig2, z, err)
333 : zmat%data_c=z
334 : end block
335 : end if
336 : call elpa_obj%timer_stop("ELPA")
337 : ! Each process has all eigenvalues in output
338 : eig(:ne) = eig2(:ne)
339 : deallocate (eig2)
340 :
341 : select type (hmat)
342 : type is (t_mpimat)
343 : !
344 : ! Redistribute eigenvectors from ScaLAPACK distribution to each process, i.e. for
345 : ! process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
346 : ! Only num=num2/np eigenvectors per process
347 : !
348 : call MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, err)
349 : num = ne
350 : ne = 0
351 : do i = myid + 1, num, np
352 : ne = ne + 1
353 : end do
354 : !
355 : end select
356 :
357 : call timestop("ELPA STD-SP")
358 : #endif
359 0 : end subroutine
360 :
361 0 : subroutine elpa_to_std(self, hmat, smat)
362 : !Simple driver to transform Generalized Eigenvalue Problem to Standard problem using LAPACK routine
363 :
364 : class(t_solver_elpa) :: self
365 : class(t_mat), intent(INOUT) :: hmat, smat
366 : integer :: err,n
367 : logical :: decomposed
368 :
369 : call create_elpa_obj(hmat)
370 :
371 0 : decomposed=.false.
372 : #ifdef CPP_ELPA_PATCH
373 : IF (hmat%l_real) THEN
374 : call elpa_obj%elpa_transform_generalized_d(hmat%data_r,smat%data_r,decomposed,err)
375 : else
376 : call elpa_obj%elpa_transform_generalized_dc(hmat%data_c,smat%data_c,decomposed,err)
377 : endif
378 : #endif
379 :
380 0 : end subroutine
381 :
382 0 : subroutine elpa_recover(self, smat, zmat)
383 :
384 : class(t_solver_elpa) :: self
385 : class(t_mat), intent(INOUT) :: zmat, smat
386 : integer :: error
387 :
388 0 : type(t_mat):: tmp_mat
389 0 : type(t_mpimat):: tmp_mpimat
390 : #ifdef CPP_ELPA_PATCH
391 : if (smat%l_real) THEN
392 : call elpa_obj%elpa_transform_back_generalized_d(smat%data_r, zmat%data_r, error)
393 : else
394 : call elpa_obj%elpa_transform_back_generalized_dc(smat%data_c, zmat%data_c, error)
395 : endif
396 : call elpa_deallocate(elpa_obj)
397 : if (associated(elpa_obj)) elpa_obj=>null()
398 : #endif
399 :
400 : select type(zmat)
401 : type is (t_mpimat)
402 0 : call tmp_mpimat%init(zmat%l_real,zmat%global_size1,zmat%global_size2,zmat%blacsdata%mpi_com,MPIMAT_ROWCYCLIC)
403 0 : call tmp_mpimat%copy(zmat,1,1)
404 0 : zmat=tmp_mpimat
405 : type is (t_mat)
406 0 : call tmp_mat%init(zmat)
407 0 : call tmp_mat%copy(zmat,1,1)
408 0 : zmat=tmp_mat
409 : end select
410 :
411 0 : end subroutine
412 :
413 :
414 0 : end module m_elpa
|