Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 : #ifdef FLEUR_USE_SCOREP
7 : #include 'scorep/SCOREP_User.inc'
8 : #endif
9 : module m_scalapack
10 : use m_juDFT
11 : use m_constants
12 : use m_types_mpimat
13 : use m_types_mat
14 : use m_types_solver
15 : #ifdef CPP_MPI
16 : use mpi
17 : #endif
18 : implicit none
19 :
20 : type, extends(t_solver)::t_solver_scalapack
21 : contains
22 : procedure :: solve_gev => scalapack_gev !solver for generalized eigenvalue problem
23 : procedure :: to_std => scalapack_reduction !transform the H of the generalized problem to a std problem
24 : procedure :: backtrans => scalapack_recover !transform the Eigenvalue back to the generalized problem
25 : end type
26 : public :: get_solver_scalapack
27 :
28 : contains
29 :
30 4761 : function get_solver_scalapack() result(solver)
31 : type(t_solver_scalapack), pointer::solver
32 4761 : allocate (solver)
33 4761 : solver%name = "scalapack"
34 : #ifdef CPP_SCALAPACK
35 4761 : solver%available = .true.
36 : #else
37 : solver%available = .false.
38 : #endif
39 4761 : solver%parallel = .true.
40 4761 : solver%serial = .false.
41 4761 : solver%generalized = .true.
42 4761 : solver%standard = .false.
43 4761 : solver%single_precision = .false.
44 4761 : solver%transform = .true.
45 4761 : solver%GPU = .false.
46 4761 : end function
47 :
48 4624 : subroutine scalapack_gev(self, hmat, smat, ne, eig, zmat, ikpt)
49 : !
50 : !----------------------------------------------------
51 : !- Parallel eigensystem solver - driver routine; gb99
52 : ! Uses the SCALAPACK for the actual diagonalization
53 : !
54 : ! hmat ..... Hamiltonian matrix
55 : ! smat ..... overlap matrix
56 : ! ne ....... number of ev's searched (and found) on this node
57 : ! On input, overall number of ev's searched,
58 : ! On output, local number of ev's found
59 : ! eig ...... all eigenvalues, output
60 : ! ev ....... local eigenvectors, output
61 : !
62 : !----------------------------------------------------
63 : !
64 :
65 : implicit none
66 : class(t_solver_scalapack) :: self
67 : class(t_mat), intent(INOUT) :: hmat, smat
68 : class(t_mat), allocatable, intent(OUT)::zmat
69 : real, intent(out) :: eig(:)
70 : integer, intent(INOUT) :: ne
71 : integer, intent(IN) :: ikpt
72 :
73 : #ifdef CPP_SCALAPACK
74 : !... Local variables
75 : !
76 : integer i, ierr, err
77 4624 : integer, allocatable :: iwork(:)
78 4624 : real, allocatable :: rwork(:)
79 : integer :: lrwork
80 :
81 : !
82 : ! ScaLAPACK things
83 : character(len=1) :: uplo
84 : integer :: num, num1, num2, liwork, lwork2, np0, mq0, np, myid
85 : integer :: iceil, numroc, nn, nb
86 4624 : integer, allocatable :: ifail(:), iclustr(:)
87 : real :: abstol, orfac = 1.e-4, dlamch
88 4624 : real, allocatable :: eig2(:), gap(:)
89 4624 : real, allocatable :: work2_r(:)
90 4624 : complex, allocatable :: work2_c(:)
91 :
92 4624 : type(t_mpimat):: ev_dist
93 :
94 : external iceil, numroc
95 : external dlamch
96 :
97 : #ifdef FLEUR_USE_SCOREP
98 :
99 : SCOREP_RECORDING_OFF()
100 : #endif
101 : select type (hmat)
102 : type IS (t_mpimat)
103 4624 : select type (smat)
104 : type IS (t_mpimat)
105 :
106 4624 : allocate (eig2(hmat%global_size1))
107 :
108 4624 : call MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, ierr)
109 4624 : call MPI_COMM_SIZE(hmat%blacsdata%mpi_com, np, ierr)
110 :
111 4624 : num = ne !no of states solved for
112 :
113 4624 : abstol = 2.0*dlamch('S') ! PDLAMCH gave an error on ZAMpano
114 :
115 4624 : call ev_dist%init(hmat)
116 :
117 : !smat%blacs_desc(2) = hmat%blacs_desc(2)
118 : !ev_dist%blacs_desc(2) = hmat%blacs_desc(2)
119 : !smat%blacs_desc=hmat%blacs_desc
120 : !ev_dist%blacs_desc=hmat%blacs_desc
121 :
122 4624 : nb = hmat%blacsdata%blacs_desc(5)! Blocking factor
123 4624 : if (nb .ne. hmat%blacsdata%blacs_desc(6)) call judft_error("Different block sizes for rows/columns not supported")
124 :
125 : !
126 4624 : nn = max(max(hmat%global_size1, nb), 2)
127 4624 : np0 = numroc(nn, nb, 0, 0, hmat%blacsdata%nprow)
128 4624 : mq0 = numroc(max(max(ne, nb), 2), nb, 0, 0, hmat%blacsdata%npcol)
129 4624 : if (hmat%l_real) then
130 : lwork2 = 5*hmat%global_size1 + max(5*nn, np0*mq0 + 2*nb*nb) + &
131 2710 : iceil(ne, hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
132 2710 : allocate (work2_r(lwork2 + 10*hmat%global_size1), stat=err) ! Allocate more in case of clusters
133 : else
134 1914 : lwork2 = hmat%global_size1 + max(nb*(np0 + 1), 3)
135 1914 : allocate (work2_c(lwork2), stat=err)
136 : end if
137 4624 : if (err .ne. 0) then
138 0 : WRITE(*,*) 'Error for k-point ', ikpt
139 0 : write (*, *) 'work2 :', err, lwork2
140 0 : call juDFT_error('Failed to allocated "work2"', calledby='chani')
141 : end if
142 :
143 4624 : liwork = 6*max(max(hmat%global_size1, hmat%blacsdata%nprow*hmat%blacsdata%npcol + 1), 4)
144 4624 : allocate (iwork(liwork), stat=err)
145 4624 : if (err .ne. 0) then
146 0 : WRITE(*,*) 'Error for k-point ', ikpt
147 0 : write (*, *) 'iwork :', err, liwork
148 0 : call juDFT_error('Failed to allocated "iwork"', calledby='chani')
149 : end if
150 4624 : allocate (ifail(hmat%global_size1), stat=err)
151 4624 : if (err .ne. 0) then
152 0 : WRITE(*,*) 'Error for k-point ', ikpt
153 0 : write (*, *) 'ifail :', err, hmat%global_size1
154 0 : call juDFT_error('Failed to allocated "ifail"', calledby='chani')
155 : end if
156 4624 : allocate (iclustr(2*hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err)
157 4624 : if (err .ne. 0) then
158 0 : WRITE(*,*) 'Error for k-point ', ikpt
159 0 : write (*, *) 'iclustr:', err, 2*hmat%blacsdata%nprow*hmat%blacsdata%npcol
160 0 : call juDFT_error('Failed to allocated "iclustr"', calledby='chani')
161 : end if
162 4624 : allocate (gap(hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err)
163 4624 : if (err .ne. 0) then
164 0 : WRITE(*,*) 'Error for k-point ', ikpt
165 0 : write (*, *) 'gap :', err, hmat%blacsdata%nprow*hmat%blacsdata%npcol
166 0 : call juDFT_error('Failed to allocated "gap"', calledby='chani')
167 : end if
168 : !
169 : ! Compute size of workspace
170 : !
171 4624 : if (hmat%l_real) then
172 2710 : uplo = 'U'
173 : call pdsygvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_r, 1, 1, &
174 : hmat%blacsdata%blacs_desc, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, &
175 : 0.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_r, 1, 1, &
176 2710 : ev_dist%blacsdata%blacs_desc, work2_r, -1, iwork, -1, ifail, iclustr, gap, ierr)
177 2710 : if (work2_r(1) .gt. lwork2) then
178 2710 : lwork2 = work2_r(1)
179 2710 : deallocate (work2_r)
180 2710 : allocate (work2_r(lwork2 + 20*hmat%global_size1), stat=err) ! Allocate even more in case of clusters
181 2710 : if (err .ne. 0) then
182 0 : WRITE(*,*) 'Error for k-point ', ikpt
183 0 : write (*, *) 'work2 :', err, lwork2
184 0 : call juDFT_error('Failed to allocated "work2"', calledby='chani')
185 : end if
186 : end if
187 : else
188 : lrwork = 4*hmat%global_size1 + max(5*nn, np0*mq0) + &
189 1914 : iceil(ne, hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
190 : ! Allocate more in case of clusters
191 1914 : allocate (rwork(lrwork + 10*hmat%global_size1), stat=ierr)
192 1914 : if (err /= 0) then
193 0 : WRITE(*,*) 'Error for k-point ', ikpt
194 0 : write (*, *) 'ERROR: chani.F: Allocating rwork failed'
195 0 : call juDFT_error('Failed to allocated "rwork"', calledby='chani')
196 : end if
197 :
198 : call pzhegvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_c, 1, 1, &
199 : hmat%blacsdata%blacs_desc, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
200 : 0.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_c, 1, 1, &
201 : ev_dist%blacsdata%blacs_desc, work2_c, -1, rwork, -1, iwork, -1, ifail, iclustr, &
202 1914 : gap, ierr)
203 1914 : if (abs(work2_c(1)) .gt. lwork2) then
204 1914 : lwork2 = work2_c(1)
205 1914 : deallocate (work2_c)
206 1914 : allocate (work2_c(lwork2), stat=err)
207 1914 : if (err /= 0) then
208 0 : WRITE(*,*) 'Error for k-point ', ikpt
209 0 : write (*, *) 'ERROR: chani.F: Allocating rwork failed:', lwork2
210 0 : call juDFT_error('Failed to allocated "work2"', calledby='chani')
211 : end if
212 : end if
213 1914 : if (rwork(1) .gt. lrwork) then
214 0 : lrwork = rwork(1)
215 0 : deallocate (rwork)
216 : ! Allocate even more in case of clusters
217 0 : allocate (rwork(lrwork + 20*hmat%global_size1), stat=err)
218 0 : if (err /= 0) then
219 0 : WRITE(*,*) 'Error for k-point ', ikpt
220 0 : write (*, *) 'ERROR: chani.F: Allocating rwork failed: ', lrwork + 20*hmat%global_size1
221 0 : call juDFT_error('Failed to allocated "rwork"', calledby='chani')
222 : end if
223 : end if
224 : end if
225 4624 : if (iwork(1) .gt. liwork) then
226 0 : liwork = iwork(1)
227 0 : deallocate (iwork)
228 0 : allocate (iwork(liwork), stat=err)
229 0 : if (err /= 0) then
230 0 : WRITE(*,*) 'Error for k-point ', ikpt
231 0 : write (*, *) 'ERROR: chani.F: Allocating iwork failed: ', liwork
232 0 : call juDFT_error('Failed to allocated "iwork"', calledby='chani')
233 : end if
234 : end if
235 : !
236 : ! Now solve generalized eigenvalue problem
237 : !
238 4624 : call timestart("SCALAPACK call")
239 4624 : if (hmat%l_real) then
240 : call pdsygvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_r, 1, 1, &
241 : hmat%blacsdata%blacs_desc, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, &
242 : 1.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_r, 1, 1, &
243 : ev_dist%blacsdata%blacs_desc, work2_r, lwork2, iwork, liwork, ifail, iclustr, &
244 2710 : gap, ierr)
245 : else
246 : call pzhegvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_c, 1, 1, &
247 : hmat%blacsdata%blacs_desc, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
248 : 1.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_c, 1, 1, &
249 : ev_dist%blacsdata%blacs_desc, work2_c, lwork2, rwork, lrwork, iwork, liwork, &
250 1914 : ifail, iclustr, gap, ierr)
251 1914 : deallocate (rwork)
252 : end if
253 4624 : call timestop("SCALAPACK call")
254 4624 : if (ierr .ne. 0) then
255 : !IF (ierr /= 2) WRITE (oUnit,*) myid,' error in pzhegvx/pdsygvx, ierr=',ierr
256 : !IF (ierr <= 0) WRITE (oUnit,*) myid,' illegal input argument'
257 : if (mod(ierr, 2) /= 0) then
258 : !WRITE (oUnit,*) myid,'some eigenvectors failed to converge'
259 : eigs: do i = 1, ne
260 : if (ifail(i) /= 0) then
261 : !WRITE (oUnit,*) myid,' eigenvector',ifail(i), 'failed to converge'
262 : else
263 : exit eigs
264 : end if
265 : end do eigs
266 : !CALL CPP_flush(oUnit)
267 : end if
268 : if (mod(ierr/4, 2) .ne. 0) then
269 : !WRITE(oUnit,*) myid,' only',num2,' eigenvectors converged'
270 : !CALL CPP_flush(oUnit)
271 : end if
272 0 : if (mod(ierr/8, 2) .ne. 0) then
273 : !WRITE(oUnit,*) myid,' PDSTEBZ failed to compute eigenvalues'
274 0 : WRITE(*,*) 'Warning for k-point ', ikpt
275 0 : call judft_warn("SCALAPACK failed to solve eigenvalue problem", calledby="scalapack.f90")
276 : end if
277 0 : if (mod(ierr/16, 2) .ne. 0) then
278 0 : WRITE(*,*) 'Warning for k-point ', ikpt
279 : !WRITE(oUnit,*) myid,' B was not positive definite, Cholesky failed at',ifail(1)
280 : call judft_warn("SCALAPACK failed: B was not positive definite. "//new_line("a")// &
281 : "Order of the smallest minor which is not positive definite:"//int2str(ifail(1)) &
282 0 : , calledby="scalapack.f90")
283 : end if
284 : end if
285 4624 : if (num2 < num1) then
286 : !IF (myid ==0) THEN
287 0 : write (oUnit, *) 'Not all eigenvalues wanted are found'
288 0 : write (oUnit, *) 'number of eigenvalues/vectors wanted', num1
289 0 : write (oUnit, *) 'number of eigenvalues/vectors found', num2
290 : !CALL CPP_flush(oUnit)
291 : !ENDIF
292 : end if
293 : !
294 : ! Each process has all eigenvalues in output
295 195930 : eig(:num2) = eig2(:num2)
296 4624 : deallocate (eig2)
297 : !
298 : !
299 : ! Redistribute eigenvectors from ScaLAPACK distribution to each process, i.e. for
300 : ! process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
301 : ! Only num=num2/np eigenvectors per process
302 : !
303 4624 : num = floor(real(num2)/np)
304 4624 : if (myid .lt. num2 - (num2/np)*np) num = num + 1
305 4624 : ne = 0
306 4624 : do i = myid + 1, num2, np
307 95653 : ne = ne + 1
308 : !eig(ne)=eig2(i)
309 : end do
310 4624 : allocate (t_mpimat::zmat)
311 : call zmat%init(ev_dist%l_real, ev_dist%global_size1, ev_dist%global_size1, &
312 4624 : ev_dist%blacsdata%mpi_com, MPIMAT_ROWCYCLIC)
313 13872 : call zmat%copy(ev_dist, 1, 1)
314 : class DEFAULT
315 0 : WRITE(*,*) 'Error for k-point ', ikpt
316 0 : call judft_error("Wrong type (1) in scalapack")
317 : end select
318 : class DEFAULT
319 0 : WRITE(*,*) 'Error for k-point ', ikpt
320 0 : call judft_error("Wrong type (2) in scalapack")
321 : end select
322 4624 : call ev_dist%free()
323 : #ifdef FLEUR_USE_SCOREP
324 : SCOREP_RECORDING_ON()
325 : #endif
326 : #endif
327 4624 : end subroutine scalapack_gev
328 :
329 0 : subroutine scalapack_reduction(self, hmat, smat)
330 : !Simple driver to transform Generalized Eigenvalue Problem to Standard problem using LAPACK routine
331 :
332 : class(t_solver_scalapack) :: self
333 : class(t_mat), intent(INOUT) :: hmat, smat
334 : integer :: info, n
335 :
336 : #ifdef CPP_SCALAPACK
337 : real :: scale
338 : select type (hmat)
339 : type is (t_mpimat)
340 0 : select type (smat)
341 : type is (t_mpimat)
342 : !Transform to standard problem using SCALAPACK
343 0 : if (hmat%l_real) then
344 0 : call pdpotrf('U', smat%global_size1, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, info)
345 0 : if (info .ne. 0) then
346 0 : write (*, *) 'Error in pdpotrf: info =', info
347 0 : call juDFT_error("1 Reduction failed", calledby="scalapack_reduction")
348 : end if
349 : call pdsygst(1, 'U', smat%global_size1, hmat%data_r, 1, 1, hmat%blacsdata%blacs_desc, &
350 0 : smat%data_r, 1, 1, smat%blacsdata%blacs_desc, scale, info)
351 0 : if (abs(scale - 1) > 1e-10) call judft_error("Scale parameter not implemented in scalapack_reduction")
352 0 : if (info .ne. 0) then
353 0 : write (oUnit, *) 'Error in pdsygst: info =', info
354 0 : call juDFT_error("2 Reduction failed", calledby="scalapack_reduction")
355 : end if
356 : else
357 0 : call pzpotrf('U', smat%global_size1, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, info)
358 0 : if (info .ne. 0) then
359 0 : write (*, *) 'Error in pzpotrf: info =', info
360 0 : call juDFT_error("3 Reduction failed", calledby="scalapack_reduction")
361 : end if
362 : call pzhegst(1, 'U', smat%global_size1, hmat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
363 0 : smat%data_c, 1, 1, smat%blacsdata%blacs_desc, scale, info)
364 0 : if (abs(scale - 1) > 1e-10) call judft_error("Scale parameter not implemented in scalapack_reduction")
365 0 : if (info .ne. 0) then
366 0 : write (oUnit, *) 'Error in pzhegst: info =', info
367 0 : call juDFT_error("4 Reduction failed", calledby="scalapack_reduction")
368 : end if
369 : end if
370 : class default
371 0 : call judft_bug("Wrong matrix type in scalapack")
372 : end select
373 : class default
374 0 : call judft_bug("Wrong matrix type in scalapack")
375 : end select
376 : #endif
377 0 : end subroutine
378 :
379 0 : subroutine scalapack_recover(self, smat, zmat)
380 :
381 : class(t_solver_scalapack) :: self
382 : class(t_mat), intent(INOUT) :: zmat, smat
383 : integer :: m, n, info
384 : #ifdef CPP_SCALAPACK
385 :
386 : select type (smat)
387 : type is (t_mpimat)
388 0 : select type (zmat)
389 : type is (t_mpimat)
390 0 : n = smat%global_size1
391 0 : m = zmat%global_size2
392 : ! --> recover the generalized eigenvectors z by solving z' = l^t * z
393 0 : if (smat%l_real) then
394 : call pdtrtrs('U', 'N', 'N', n, m, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, &
395 0 : zmat%data_r, 1, 1, zmat%blacsdata%blacs_desc, info)
396 : else
397 : call pztrtrs('U', 'N', 'N', n, m, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
398 0 : zmat%data_c, 1, 1, zmat%blacsdata%blacs_desc, info)
399 : end if
400 0 : if (info .ne. 0) then
401 0 : write (oUnit, *) 'Error in p?trtrs: info =', info
402 0 : call juDFT_error("Recovery failed", calledby="scalapack_recover")
403 : end if
404 : class default
405 0 : call judft_bug("Wrong matrix type in scalapack")
406 : end select
407 : class default
408 0 : call judft_bug("Wrong matrix type in scalapack")
409 : end select
410 : #endif
411 0 : end subroutine
412 :
413 9248 : end module m_scalapack
|