Line data Source code
1 : !-------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 :
7 : MODULE m_elpa
8 : CONTAINS
9 0 : SUBROUTINE elpa_diag(hmat, smat, ne, eig, ev)
10 : !
11 : !----------------------------------------------------
12 : !- Parallel eigensystem solver - driver routine based on chani; dw'12
13 : ! Uses the ELPA for the actual diagonalization
14 : !
15 : !
16 : ! hmat ..... Hamiltonian matrix
17 : ! smat ..... overlap matrix
18 : ! ne ....... number of ev's searched (and found) on this node
19 : ! On input, overall number of ev's searched,
20 : ! On output, local number of ev's found
21 : ! eig ...... all eigenvalues, output
22 : ! ev ....... local eigenvectors, output
23 : !
24 : !----------------------------------------------------
25 :
26 : USE m_juDFT
27 : USE m_types_mpimat
28 : USE m_types_mat
29 : #ifdef CPP_ELPA_201705003
30 : USE elpa
31 : #else
32 : #ifdef CPP_ELPA
33 : USE elpa1
34 : USE mpi
35 : #ifdef CPP_ELPA2
36 : USE elpa2
37 : #endif
38 : #endif
39 : #endif
40 : IMPLICIT NONE
41 :
42 : CLASS(t_mat), INTENT(INOUT) :: hmat, smat
43 : CLASS(t_mat), ALLOCATABLE, INTENT(OUT)::ev
44 : REAL, INTENT(out) :: eig(:)
45 : INTEGER, INTENT(INOUT) :: ne
46 :
47 : #ifdef CPP_ELPA
48 : !... Local variables
49 : !
50 : INTEGER :: num, num2
51 : INTEGER :: nb, myid, np
52 : INTEGER :: n_col, n_row
53 : LOGICAL :: ok
54 : INTEGER :: err
55 : INTEGER :: mpi_comm_rows, mpi_comm_cols
56 : INTEGER :: i, k
57 :
58 : ! large distributed Matrices
59 : REAL, ALLOCATABLE :: eig2(:)
60 : TYPE(t_mpimat) :: ev_dist
61 : REAL, ALLOCATABLE :: tmp2_r(:, :)
62 : COMPLEX, ALLOCATABLE :: tmp2_c(:, :)
63 : INTEGER, EXTERNAL :: numroc, indxl2g !SCALAPACK functions
64 : #ifdef CPP_ELPA_201705003
65 : INTEGER :: kernel
66 : CLASS(elpa_t), pointer :: elpa_obj
67 :
68 : err = elpa_init(20170403)
69 : elpa_obj => elpa_allocate()
70 : #endif
71 : call timestart("ELPA")
72 : SELECT TYPE (hmat)
73 : TYPE IS (t_mpimat)
74 : SELECT TYPE (smat)
75 : TYPE IS (t_mpimat)
76 :
77 : CALL MPI_BARRIER(hmat%blacsdata%mpi_com, err)
78 : CALL MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, err)
79 : CALL MPI_COMM_SIZE(hmat%blacsdata%mpi_com, np, err)
80 :
81 : !Create communicators for ELPA
82 : #if defined (CPP_ELPA_201705003)
83 : mpi_comm_rows = -1
84 : mpi_comm_cols = -1
85 : #elif defined (CPP_ELPA_201605004) || defined (CPP_ELPA_201605003)||defined(CPP_ELPA_NEW)
86 : err = get_elpa_row_col_comms(hmat%blacsdata%mpi_com, hmat%blacsdata%myrow, hmat%blacsdata%mycol, mpi_comm_rows, mpi_comm_cols)
87 : #else
88 : CALL get_elpa_row_col_comms(hmat%blacsdata%mpi_com, hmat%blacsdata%myrow, hmat%blacsdata%mycol, mpi_comm_rows, mpi_comm_cols)
89 : #endif
90 :
91 : num2 = ne !no of states solved for
92 :
93 : ALLOCATE (eig2(hmat%global_size1), stat=err) ! The eigenvalue array for ScaLAPACK
94 : IF (err .NE. 0) CALL juDFT_error('Failed to allocated "eig2"', calledby='elpa')
95 :
96 : CALL ev_dist%init(hmat)! Eigenvectors for ScaLAPACK
97 : IF (err .NE. 0) CALL juDFT_error('Failed to allocated "ev_dist"', calledby='elpa')
98 :
99 : IF (hmat%l_real) THEN
100 : ALLOCATE (tmp2_r(hmat%matsize1, hmat%matsize2), stat=err) ! tmp_array
101 : ELSE
102 : ALLOCATE (tmp2_c(hmat%matsize1, hmat%matsize2), stat=err) ! tmp_array
103 : ENDIF
104 : IF (err .NE. 0) CALL juDFT_error('Failed to allocated "tmp2"', calledby='elpa')
105 :
106 : nb = hmat%blacsdata%blacs_desc(5)! Blocking factor
107 : IF (nb .NE. hmat%blacsdata%blacs_desc(6)) CALL judft_error("Different block sizes for rows/columns not supported")
108 :
109 : #ifdef CPP_ELPA_201705003
110 : CALL elpa_obj%set("na", hmat%global_size1, err)
111 : CALL elpa_obj%set("nev", num2, err)
112 : CALL elpa_obj%set("local_nrows", hmat%matsize1, err)
113 : CALL elpa_obj%set("local_ncols", hmat%matsize2, err)
114 : CALL elpa_obj%set("nblk", nb, err)
115 : CALL elpa_obj%set("mpi_comm_parent", hmat%blacsdata%mpi_com, err)
116 : CALL elpa_obj%set("process_row", hmat%blacsdata%myrow, err)
117 : CALL elpa_obj%set("process_col", hmat%blacsdata%mycol, err)
118 : #ifdef _OPENACC
119 : CALL elpa_obj%set("gpu", 1, err)
120 : #else
121 : #ifdef CPP_ELPA2
122 : CALL elpa_obj%set("solver", ELPA_SOLVER_2STAGE)
123 : #else
124 : CALL elpa_obj%set("solver", ELPA_SOLVER_1STAGE)
125 : #endif
126 : #endif
127 : err = elpa_obj%setup()
128 : if (myid == 0) then
129 : call elpa_obj%get("complex_kernel", kernel)
130 : print *, "elpa uses "//elpa_int_value_to_string("complex_kernel", kernel)//" kernel"
131 : endif
132 : #endif
133 :
134 : ! Solve generalized problem
135 : !
136 : ! 1. Calculate Cholesky factorization of Matrix S = U**T * U
137 : ! and invert triangular matrix U.
138 : ! Cholesky factorization:
139 : ! Only upper triangle needs to be set. On return, the upper triangle contains
140 : ! the Cholesky factor and the lower triangle is set to 0.
141 : ! invert_triangular:
142 : ! Inverts an upper triangular real or complex matrix.
143 : !
144 : ! Please note: cholesky_complex/invert_trm_complex are not trimmed for speed.
145 : ! The only reason having them is that the Scalapack counterpart
146 : ! PDPOTRF very often fails on higher processor numbers for unknown reasons!
147 :
148 : #if defined(CPP_ELPA_201705003)
149 : IF (hmat%l_real) THEN
150 : CALL elpa_obj%cholesky(smat%data_r, err)
151 : CALL elpa_obj%invert_triangular(smat%data_r, err)
152 : ELSE
153 : CALL elpa_obj%cholesky(smat%data_c, err)
154 : CALL elpa_obj%invert_triangular(smat%data_c, err)
155 : ENDIF
156 : #elif defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
157 : IF (hmat%l_real) THEN
158 : ok = cholesky_real(smat%global_size1, smat%data_r, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false.)
159 : ok = invert_trm_real(smat%global_size1, smat%data_r, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false.)
160 : ELSE
161 : ok = cholesky_complex(smat%global_size1, smat%data_c, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false.)
162 : ok = invert_trm_complex(smat%global_size1, smat%data_c, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false.)
163 : ENDIF
164 : #elif defined CPP_ELPA_NEW
165 : IF (hmat%l_real) THEN
166 : CALL cholesky_real(smat%global_size1, smat%data_r, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false., ok)
167 : CALL invert_trm_real(smat%global_size1, smat%data_r, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false., ok)
168 : ELSE
169 : CALL cholesky_complex(smat%global_size1, smat%data_c, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false., ok)
170 : CALL invert_trm_complex(smat%global_size1, smat%data_c, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false., ok)
171 : ENDIF
172 : #else
173 : IF (hmat%l_real) THEN
174 : CALL cholesky_real(smat%global_size1, smat%data_r, smat%matsize1, nb, mpi_comm_rows, mpi_comm_cols, .false.)
175 : CALL invert_trm_real(smat%global_size1, smat%data_r, smat%matsize1, nb, mpi_comm_rows, mpi_comm_cols, .false.)
176 : ELSE
177 : CALL cholesky_complex(smat%global_size1, smat%data_c, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false.)
178 : CALL invert_trm_complex(smat%global_size1, smat%data_c, smat%matsize1, nb, smat%matsize2, mpi_comm_rows, mpi_comm_cols, .false.)
179 : ENDIF
180 : #endif
181 :
182 : ! 2. Calculate U**-T * H * U**-1
183 :
184 : ! 2a. ev_dist = U**-T * H
185 :
186 : ! H is only set in the upper half, solve_evp_real needs a full matrix
187 : ! Set lower half from upper half
188 :
189 : ! Set the lower half of the H matrix to zeros.
190 : DO i = 1, hmat%matsize2
191 : ! Get global column corresponding to i and number of local rows up to
192 : ! and including the diagonal, these are unchanged in H
193 : n_col = indxl2g(i, nb, hmat%blacsdata%mycol, 0, hmat%blacsdata%npcol)
194 : n_row = numroc(n_col, nb, hmat%blacsdata%myrow, 0, hmat%blacsdata%nprow)
195 : IF (hmat%l_real) THEN
196 : hmat%data_r(n_row + 1:hmat%matsize1, i) = 0.0
197 : ELSE
198 : hmat%data_c(n_row + 1:hmat%matsize1, i) = 0.0
199 : ENDIF
200 : ENDDO
201 :
202 : ! Use the ev_dist array to store the calculated values for the lower part.
203 : IF (hmat%l_real) THEN
204 : CALL pdtran(hmat%global_size1, hmat%global_size1, 1.0, hmat%data_r, 1, 1, &
205 : hmat%blacsdata%blacs_desc, 0.0, ev_dist%data_r, 1, 1, ev_dist%blacsdata%blacs_desc)
206 : ELSE
207 : CALL pztranc(hmat%global_size1, hmat%global_size2, cmplx(1.0, 0.0), hmat%data_c, 1, 1, &
208 : hmat%blacsdata%blacs_desc, cmplx(0.0, 0.0), ev_dist%data_c, 1, 1, ev_dist%blacsdata%blacs_desc)
209 : ENDIF
210 :
211 : ! Copy the calculated values to the lower part of the H matrix
212 : DO i = 1, hmat%matsize2
213 : ! Get global column corresponding to i and number of local rows up to
214 : ! and including the diagonal, these are unchanged in H
215 : n_col = indxl2g(i, nb, hmat%blacsdata%mycol, 0, hmat%blacsdata%npcol)
216 : n_row = numroc(n_col, nb, hmat%blacsdata%myrow, 0, hmat%blacsdata%nprow)
217 : IF (hmat%l_real) THEN
218 : hmat%data_r(n_row + 1:hmat%matsize1, i) = ev_dist%data_r(n_row + 1:ev_dist%matsize1, i)
219 : ELSE
220 : hmat%data_c(n_row + 1:hmat%matsize1, i) = ev_dist%data_c(n_row + 1:ev_dist%matsize1, i)
221 : ENDIF
222 : ENDDO
223 :
224 : #if defined (CPP_ELPA_201705003)
225 : IF (hmat%l_real) THEN
226 : CALL elpa_obj%hermitian_multiply('U', 'L', hmat%global_size1, smat%data_r, hmat%data_r, &
227 : smat%matsize1, smat%matsize2, ev_dist%data_r, hmat%matsize1, hmat%matsize2, err)
228 : ELSE
229 : CALL elpa_obj%hermitian_multiply('U', 'L', hmat%global_size1, smat%data_c, hmat%data_c, &
230 : smat%matsize1, smat%matsize2, ev_dist%data_c, hmat%matsize1, hmat%matsize2, err)
231 : ENDIF
232 : #elif defined (CPP_ELPA_201605004)
233 : IF (hmat%l_real) THEN
234 : ok = elpa_mult_at_b_real('U', 'L', hmat%global_size1, hmat%global_size1, smat%data_r, smat%matsize1, smat%matsize2, &
235 : hmat%data_r, hmat%matsize1, hmat%matsize2, nb, mpi_comm_rows, mpi_comm_cols, &
236 : ev_dist%data_r, ev_dist%matsize1, ev_dist%matsize2)
237 : ELSE
238 : ok = mult_ah_b_complex('U', 'L', hmat%global_size1, hmat%global_size1, smat%data_c, smat%matsize1, smat%matsize2, &
239 : hmat%data_c, hmat%matsize1, hmat%matsize2, nb, mpi_comm_rows, mpi_comm_cols, &
240 : ev_dist%data_c, ev_dist%matsize1, ev_dist%matsize2)
241 : ENDIF
242 : #elif defined (CPP_ELPA_201605003)
243 : IF (hmat%l_real) THEN
244 : ok = mult_at_b_real('U', 'L', hmat%global_size1, hmat%global_size1, smat%data_r, smat%matsize1, &
245 : hmat%data_r, hmat%matsize1, nb, mpi_comm_rows, mpi_comm_cols, ev_dist%data_r, ev_dist%matsize1)
246 : ELSE
247 : ok = mult_ah_b_complex('U', 'L', hmat%global_size1, hmat%global_size1, smat%data_c, smat%matsize1, &
248 : hmat%data_c, hmat%matsize1, nb, mpi_comm_rows, mpi_comm_cols, ev_dist%data_c, ev_dist%matsize1)
249 : ENDIF
250 : #else
251 : IF (hmat%l_real) THEN
252 : CALL mult_at_b_real('U', 'L', hmat%global_size1, hmat%global_size1, smat%data_r, smat%matsize1, &
253 : hmat%data_r, hmat%matsize1, nb, mpi_comm_rows, mpi_comm_cols, ev_dist%data_r, ev_dist%matsize1)
254 : ELSE
255 : CALL mult_ah_b_complex('U', 'L', hmat%global_size1, hmat%global_size1, smat%data_c, smat%matsize1, &
256 : hmat%data_c, hmat%matsize1, nb, mpi_comm_rows, mpi_comm_cols, ev_dist%data_c, ev_dist%matsize1)
257 : ENDIF
258 : #endif
259 :
260 : ! 2b. tmp2 = ev_dist**T
261 : IF (hmat%l_real) THEN
262 : CALL pdtran(ev_dist%global_size1, ev_dist%global_size1, 1.0, ev_dist%data_r, 1, 1, &
263 : ev_dist%blacsdata%blacs_desc, 0.0, tmp2_r, 1, 1, ev_dist%blacsdata%blacs_desc)
264 : ELSE
265 : CALL pztranc(ev_dist%global_size1, ev_dist%global_size1, cmplx(1.0, 0.0), ev_dist%data_c, 1, 1, &
266 : ev_dist%blacsdata%blacs_desc, cmplx(0.0, 0.0), tmp2_c, 1, 1, ev_dist%blacsdata%blacs_desc)
267 : ENDIF
268 :
269 : ! 2c. A = U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
270 : #if defined (CPP_ELPA_201705003)
271 : IF (hmat%l_real) THEN
272 : CALL elpa_obj%hermitian_multiply('U', 'U', smat%global_size1, smat%data_r, tmp2_r, &
273 : smat%matsize1, smat%matsize2, hmat%data_r, hmat%matsize1, hmat%matsize2, err)
274 : ELSE
275 : CALL elpa_obj%hermitian_multiply('U', 'U', smat%global_size1, smat%data_c, tmp2_c, &
276 : smat%matsize1, smat%matsize2, hmat%data_c, hmat%matsize1, hmat%matsize2, err)
277 : ENDIF
278 : #elif defined (CPP_ELPA_201605004)
279 : IF (hmat%l_real) THEN
280 : ok = elpa_mult_at_b_real('U', 'U', smat%global_size1, smat%global_size1, smat%data_r, smat%matsize1, smat%matsize2, &
281 : tmp2_r, SIZE(tmp2_r, 1), SIZE(tmp2_r, 2), nb, mpi_comm_rows, mpi_comm_cols, &
282 : hmat%data_r, hmat%matsize1, hmat%matsize2)
283 : ELSE
284 : ok = mult_ah_b_complex('U', 'U', smat%global_size1, smat%global_size1, smat%data_c, smat%matsize1, smat%matsize2, &
285 : tmp2_c, SIZE(tmp2_c, 1), SIZE(tmp2_c, 2), nb, mpi_comm_rows, mpi_comm_cols, &
286 : hmat%data_c, hmat%matsize1, hmat%matsize2)
287 : ENDIF
288 : #elif defined (CPP_ELPA_201605003)
289 : IF (hmat%l_real) THEN
290 : ok = mult_at_b_real('U', 'U', smat%global_size1, smat%global_size1, smat%data_r, smat%matsize1, &
291 : tmp2_r, SIZE(tmp2_r, 1), nb, mpi_comm_rows, mpi_comm_cols, hmat%data_r, hmat%matsize1)
292 : ELSE
293 : ok = mult_ah_b_complex('U', 'U', smat%global_size1, smat%global_size1, smat%data_c, smat%matsize1, &
294 : tmp2_c, SIZE(tmp2_c, 1), nb, mpi_comm_rows, mpi_comm_cols, hmat%data_c, hmat%matsize1)
295 : ENDIF
296 : #else
297 : IF (hmat%l_real) THEN
298 : CALL mult_at_b_real('U', 'U', smat%global_size1, smat%global_size1, smat%data_r, smat%matsize1, &
299 : tmp2_r, SIZE(tmp2_r, 1), nb, mpi_comm_rows, mpi_comm_cols, hmat%data_r, hmat%matsize1)
300 : ELSE
301 : CALL mult_ah_b_complex('U', 'U', smat%global_size1, smat%global_size1, smat%data_c, smat%matsize1, &
302 : tmp2_c, SIZE(tmp2_c, 1), nb, mpi_comm_rows, mpi_comm_cols, hmat%data_c, hmat%matsize1)
303 : ENDIF
304 : #endif
305 :
306 : ! A is only set in the upper half, solve_evp_real needs a full matrix
307 : ! Set lower half from upper half
308 :
309 : IF (hmat%l_real) THEN
310 : CALL pdtran(hmat%global_size1, hmat%global_size1, 1.0, hmat%data_r, 1, 1, &
311 : hmat%blacsdata%blacs_desc, 0.0, ev_dist%data_r, 1, 1, ev_dist%blacsdata%blacs_desc)
312 : ELSE
313 : CALL pztranc(hmat%global_size1, hmat%global_size1, cmplx(1.0, 0.0), hmat%data_c, 1, 1, &
314 : hmat%blacsdata%blacs_desc, cmplx(0.0, 0.0), ev_dist%data_c, 1, 1, ev_dist%blacsdata%blacs_desc)
315 : ENDIF
316 :
317 : DO i = 1, hmat%matsize2
318 : ! Get global column corresponding to i and number of local rows up to
319 : ! and including the diagonal, these are unchanged in A
320 : n_col = indxl2g(i, nb, hmat%blacsdata%mycol, 0, hmat%blacsdata%npcol)
321 : n_row = numroc(n_col, nb, hmat%blacsdata%myrow, 0, hmat%blacsdata%nprow)
322 : IF (hmat%l_real) THEN
323 : hmat%data_r(n_row + 1:hmat%matsize1, i) = ev_dist%data_r(n_row + 1:ev_dist%matsize1, i)
324 : ELSE
325 : hmat%data_c(n_row + 1:hmat%matsize1, i) = ev_dist%data_c(n_row + 1:ev_dist%matsize1, i)
326 : ENDIF
327 : ENDDO
328 :
329 : ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
330 : ! Eigenvectors go to ev_dist
331 : #if defined (CPP_ELPA_201705003)
332 : IF (hmat%l_real) THEN
333 : CALL elpa_obj%eigenvectors(hmat%data_r, eig2, ev_dist%data_r, err)
334 : ELSE
335 : CALL elpa_obj%eigenvectors(hmat%data_c, eig2, ev_dist%data_c, err)
336 : ENDIF
337 : #elif defined(CPP_ELPA_201605003) || defined(CPP_ELPA_201605004)
338 : #ifdef CPP_ELPA2
339 : IF (hmat%l_real) THEN
340 : ok = solve_evp_real_2stage(hmat%global_size1, num2, hmat%data_r, hmat%matsize1, &
341 : eig2, ev_dist%data_r, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols, hmat%blacsdata%mpi_com)
342 : ELSE
343 : ok = solve_evp_complex_2stage(hmat%global_size1, num2, hmat%data_c, hmat%matsize1, &
344 : eig2, ev_dist%data_c, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols, hmat%blacsdata%mpi_com)
345 : ENDIF
346 : #else
347 : IF (hmat%l_real) THEN
348 : ok = solve_evp_real_1stage(hmat%global_size1, num2, hmat%data_r, hmat%matsize1, &
349 : eig2, ev_dist%data_r, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols)
350 : ELSE
351 : ok = solve_evp_complex_1stage(hmat%global_size1, num2, hmat%data_c, hmat%matsize1, &
352 : eig2, ev_dist%data_c, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols)
353 : ENDIF
354 : #endif
355 : #elif defined CPP_ELPA_NEW
356 : #ifdef CPP_ELPA2
357 : IF (hmat%l_real) THEN
358 : err = solve_evp_real_2stage(hmat%global_size1, num2, hmat%data_r, hmat%matsize1, &
359 : eig2, ev_dist%data_r, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols, hmat%blacsdata%mpi_com)
360 : ELSE
361 : err = solve_evp_complex_2stage(hmat%global_size1, num2, hmat%data_c, hmat%matsize1, &
362 : eig2, ev_dist%data_c, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols, hmat%blacsdata%mpi_com)
363 : ENDIF
364 : #else
365 : IF (hmat%l_real) THEN
366 : err = solve_evp_real_1stage(hmat%global_size1, num2, hmat%data_r, hmat%matsize1, &
367 : eig2, ev_dist%data_r, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols)
368 : ELSE
369 : err = solve_evp_complex_1stage(hmat%global_size1, num2, hmat%data_c, hmat%matsize1, &
370 : eig2, ev_dist%data_c, ev_dist%matsize1, nb, ev_dist%matsize2, mpi_comm_rows, mpi_comm_cols)
371 : ENDIF
372 : #endif
373 : #else
374 : #ifdef CPP_ELPA2
375 : IF (hmat%l_real) THEN
376 : CALL solve_evp_real_2stage(hmat%global_size1, num2, hmat%data_r, hmat%matsize1, &
377 : eig2, ev_dist%data_r, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols, hmat%blacsdata%mpi_com)
378 : ELSE
379 : CALL solve_evp_complex_2stage(hmat%global_size1, num2, hmat%data_c, hmat%matsize1, &
380 : eig2, ev_dist%data_c, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols, hmat%blacsdata%mpi_com)
381 : ENDIF
382 : #else
383 : IF (hmat%l_real) THEN
384 : CALL solve_evp_real(hmat%global_size1, num2, hmat%data_r, hmat%matsize1, &
385 : eig2, ev_dist%data_r, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols)
386 : ELSE
387 : CALL solve_evp_complex(hmat%global_size1, num2, hmat%data_c, hmat%matsize1, &
388 : eig2, ev_dist%data_c, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols)
389 : ENDIF
390 : #endif
391 : #endif
392 :
393 : ! 4. Backtransform eigenvectors: Z = U**-1 * ev_dist
394 :
395 : ! mult_ah_b_complex needs the transpose of U**-1, thus tmp2 = (U**-1)**T
396 : IF (hmat%l_real) THEN
397 : CALL pdtran(smat%global_size1, smat%global_size1, 1.0, smat%data_r, 1, 1, &
398 : smat%blacsdata%blacs_desc, 0.0, tmp2_r, 1, 1, smat%blacsdata%blacs_desc)
399 : ELSE
400 : CALL pztranc(smat%global_size1, smat%global_size1, cmplx(1.0, 0.0), smat%data_c, 1, 1, &
401 : smat%blacsdata%blacs_desc, cmplx(0.0, 0.0), tmp2_c, 1, 1, smat%blacsdata%blacs_desc)
402 : ENDIF
403 :
404 : #if defined (CPP_ELPA_201705003)
405 : IF (hmat%l_real) THEN
406 : CALL elpa_obj%hermitian_multiply('L', 'N', num2, tmp2_r, ev_dist%data_r, &
407 : ev_dist%matsize1, ev_dist%matsize2, hmat%data_r, hmat%matsize1, hmat%matsize2, err)
408 : ELSE
409 : CALL elpa_obj%hermitian_multiply('L', 'N', num2, tmp2_c, ev_dist%data_c, &
410 : ev_dist%matsize1, ev_dist%matsize2, hmat%data_c, hmat%matsize1, hmat%matsize2, err)
411 : ENDIF
412 : #elif defined (CPP_ELPA_201605004)
413 : IF (hmat%l_real) THEN
414 : ok = elpa_mult_at_b_real('L', 'N', hmat%global_size1, num2, tmp2_r, hmat%matsize1, hmat%matsize2, &
415 : ev_dist%data_r, ev_dist%matsize1, ev_dist%matsize2, nb, mpi_comm_rows, mpi_comm_cols, &
416 : hmat%data_r, hmat%matsize1, hmat%matsize2)
417 : ELSE
418 : ok = mult_ah_b_complex('L', 'N', hmat%global_size1, num2, tmp2_c, hmat%matsize1, hmat%matsize2, &
419 : ev_dist%data_c, ev_dist%matsize1, ev_dist%matsize2, nb, mpi_comm_rows, mpi_comm_cols, &
420 : hmat%data_c, hmat%matsize1, hmat%matsize2)
421 : ENDIF
422 : #elif defined (CPP_ELPA_201605003)
423 : IF (hmat%l_real) THEN
424 : ok = elpa_mult_at_b_real('L', 'N', hmat%global_size1, num2, tmp2_r, hmat%matsize1, &
425 : ev_dist%data_r, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols, &
426 : hmat%data_r, hmat%matsize1)
427 : ELSE
428 : ok = mult_ah_b_complex('L', 'N', hmat%global_size1, num2, tmp2_c, hmat%matsize1, &
429 : ev_dist%data_c, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols, &
430 : hmat%data_c, hmat%matsize1)
431 : ENDIF
432 : #else
433 : IF (hmat%l_real) THEN
434 : CALL mult_at_b_real('L', 'N', hmat%global_size1, num2, tmp2_r, hmat%matsize1, &
435 : ev_dist%data_r, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols, &
436 : hmat%data_r, hmat%matsize1)
437 : ELSE
438 : CALL mult_ah_b_complex('L', 'N', hmat%global_size1, num2, tmp2_c, hmat%matsize1, &
439 : ev_dist%data_c, ev_dist%matsize1, nb, mpi_comm_rows, mpi_comm_cols, &
440 : hmat%data_c, hmat%matsize1)
441 : ENDIF
442 : #endif
443 :
444 : #if defined (CPP_ELPA_201705003)
445 : CALL elpa_deallocate(elpa_obj)
446 : CALL elpa_uninit()
447 : #endif
448 : ! END of ELPA stuff
449 : #if ( !defined (CPP_ELPA_201705003))
450 : CALL MPI_COMM_FREE(mpi_comm_rows, err)
451 : CALL MPI_COMM_FREE(mpi_comm_cols, err)
452 : #endif
453 : !
454 : ! Each process has all eigenvalues in output
455 : eig(:num2) = eig2(:num2)
456 : DEALLOCATE (eig2)
457 : !
458 : !
459 : ! Redistribute eigenvectors from ScaLAPACK distribution to each process, i.e. for
460 : ! process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
461 : ! Only num=num2/np eigenvectors per process
462 : !
463 : num = FLOOR(REAL(num2)/np)
464 : IF (myid .LT. num2 - (num2/np)*np) num = num + 1
465 : ne = 0
466 : DO i = myid + 1, num2, np
467 : ne = ne + 1
468 : !eig(ne)=eig2(i)
469 : ENDDO
470 : !
471 : !
472 : ALLOCATE (t_mpimat::ev)
473 : CALL ev%init(hmat%l_real, hmat%global_size1, hmat%global_size1, hmat%blacsdata%mpi_com, .FALSE.)
474 : CALL ev%copy(hmat, 1, 1)
475 : call ev_dist%free()
476 : CLASS DEFAULT
477 : call judft_error("Wrong type (1) in scalapack")
478 : END SELECT
479 : CLASS DEFAULT
480 : call judft_error("Wrong type (2) in scalapack")
481 : END SELECT
482 :
483 : IF (hmat%l_real) THEN
484 : DEALLOCATE (tmp2_r)
485 : ELSE
486 : DEALLOCATE (tmp2_c)
487 : ENDIF
488 :
489 : #endif
490 0 : call timestop("ELPA")
491 0 : END SUBROUTINE elpa_diag
492 : END MODULE m_elpa
|