Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 : #ifdef FLEUR_USE_SCOREP
7 : #include 'scorep/SCOREP_User.inc'
8 : #endif
9 : MODULE m_scalapack
10 : CONTAINS
11 4872 : SUBROUTINE scalapack(hmat,smat,ne,eig,ev)
12 : !
13 : !----------------------------------------------------
14 : !- Parallel eigensystem solver - driver routine; gb99
15 : ! Uses the SCALAPACK for the actual diagonalization
16 : !
17 : ! hmat ..... Hamiltonian matrix
18 : ! smat ..... overlap matrix
19 : ! ne ....... number of ev's searched (and found) on this node
20 : ! On input, overall number of ev's searched,
21 : ! On output, local number of ev's found
22 : ! eig ...... all eigenvalues, output
23 : ! ev ....... local eigenvectors, output
24 : !
25 : !----------------------------------------------------
26 : !
27 : USE m_juDFT
28 : USE m_constants
29 : USE m_types_mpimat
30 : USE m_types_mat
31 : #ifdef CPP_MPI
32 : USE mpi
33 : #endif
34 : IMPLICIT NONE
35 : CLASS(t_mat),INTENT(INOUT) :: hmat,smat
36 : CLASS(t_mat),ALLOCATABLE,INTENT(OUT)::ev
37 : REAL,INTENT(out) :: eig(:)
38 : INTEGER,INTENT(INOUT) :: ne
39 :
40 :
41 : #ifdef CPP_SCALAPACK
42 : !... Local variables
43 : !
44 : INTEGER i , ierr, err
45 4872 : INTEGER, ALLOCATABLE :: iwork(:)
46 4872 : REAL, ALLOCATABLE :: rwork(:)
47 : INTEGER :: lrwork
48 :
49 : !
50 : ! ScaLAPACK things
51 : CHARACTER (len=1) :: uplo
52 : INTEGER :: num,num1,num2,liwork,lwork2,np0,mq0,np,myid
53 : INTEGER :: iceil, numroc, nn, nb
54 4872 : INTEGER, ALLOCATABLE :: ifail(:), iclustr(:)
55 : REAL :: abstol,orfac=1.E-4,dlamch
56 4872 : REAL,ALLOCATABLE :: eig2(:), gap(:)
57 4872 : REAL, ALLOCATABLE :: work2_r(:)
58 4872 : COMPLEX, ALLOCATABLE :: work2_c(:)
59 :
60 4872 : TYPE(t_mpimat):: ev_dist
61 :
62 : EXTERNAL iceil, numroc
63 : EXTERNAL dlamch
64 :
65 :
66 : #ifdef FLEUR_USE_SCOREP
67 :
68 : SCOREP_RECORDING_OFF()
69 : #endif
70 : SELECT TYPE(hmat)
71 : TYPE IS (t_mpimat)
72 4872 : SELECT TYPE(smat)
73 : TYPE IS (t_mpimat)
74 :
75 14616 : ALLOCATE(eig2(hmat%global_size1))
76 :
77 :
78 4872 : CALL MPI_COMM_RANK(hmat%blacsdata%mpi_com,myid,ierr)
79 4872 : CALL MPI_COMM_SIZE(hmat%blacsdata%mpi_com,np,ierr)
80 :
81 4872 : num=ne !no of states solved for
82 :
83 4872 : abstol=2.0*dlamch('S') ! PDLAMCH gave an error on ZAMpano
84 :
85 4872 : CALL ev_dist%init(hmat)
86 :
87 : !smat%blacs_desc(2) = hmat%blacs_desc(2)
88 : !ev_dist%blacs_desc(2) = hmat%blacs_desc(2)
89 : !smat%blacs_desc=hmat%blacs_desc
90 : !ev_dist%blacs_desc=hmat%blacs_desc
91 :
92 :
93 4872 : nb=hmat%blacsdata%blacs_desc(5)! Blocking factor
94 4872 : IF (nb.NE.hmat%blacsdata%blacs_desc(6)) CALL judft_error("Different block sizes for rows/columns not supported")
95 :
96 : !
97 4872 : nn=MAX(MAX(hmat%global_size1,nb),2)
98 4872 : np0=numroc(nn,nb,0,0,hmat%blacsdata%nprow)
99 4872 : mq0=numroc(MAX(MAX(ne,nb),2),nb,0,0,hmat%blacsdata%npcol)
100 4872 : IF (hmat%l_real) THEN
101 2704 : lwork2=5*hmat%global_size1+MAX(5*nn,np0*mq0+2*nb*nb)+ iceil(ne,hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
102 8112 : ALLOCATE ( work2_r(lwork2+10*hmat%global_size1), stat=err ) ! Allocate more in case of clusters
103 : ELSE
104 2168 : lwork2=hmat%global_size1+MAX(nb*(np0+1),3)
105 6504 : ALLOCATE ( work2_c(lwork2), stat=err )
106 : ENDIF
107 4872 : IF (err.NE.0) THEN
108 0 : WRITE (*,*) 'work2 :',err,lwork2
109 0 : CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
110 : ENDIF
111 :
112 4872 : liwork=6*MAX(MAX(hmat%global_size1,hmat%blacsdata%nprow*hmat%blacsdata%npcol+1),4)
113 14616 : ALLOCATE ( iwork(liwork), stat=err )
114 4872 : IF (err.NE.0) THEN
115 0 : WRITE (*,*) 'iwork :',err,liwork
116 0 : CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
117 : ENDIF
118 14616 : ALLOCATE ( ifail(hmat%global_size1), stat=err )
119 4872 : IF (err.NE.0) THEN
120 0 : WRITE (*,*) 'ifail :',err,hmat%global_size1
121 0 : CALL juDFT_error('Failed to allocated "ifail"', calledby ='chani')
122 : ENDIF
123 14616 : ALLOCATE ( iclustr(2*hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err )
124 4872 : IF (err.NE.0) THEN
125 0 : WRITE (*,*) 'iclustr:',err,2*hmat%blacsdata%nprow*hmat%blacsdata%npcol
126 0 : CALL juDFT_error('Failed to allocated "iclustr"', calledby ='chani')
127 : ENDIF
128 14616 : ALLOCATE ( gap(hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err )
129 4872 : IF (err.NE.0) THEN
130 0 : WRITE (*,*) 'gap :',err,hmat%blacsdata%nprow*hmat%blacsdata%npcol
131 0 : CALL juDFT_error('Failed to allocated "gap"', calledby ='chani')
132 : ENDIF
133 : !
134 : ! Compute size of workspace
135 : !
136 4872 : IF (hmat%l_real) THEN
137 2704 : uplo='U'
138 : CALL pdsygvx(1,'V','I','U',hmat%global_size1,hmat%data_r,1,1,&
139 : hmat%blacsdata%blacs_desc,smat%data_r,1,1,smat%blacsdata%blacs_desc,&
140 : 0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_r,1,1,&
141 2704 : ev_dist%blacsdata%blacs_desc,work2_r,-1,iwork,-1,ifail,iclustr, gap,ierr)
142 2704 : IF ( work2_r(1).GT.lwork2) THEN
143 2704 : lwork2 = work2_r(1)
144 2704 : DEALLOCATE (work2_r)
145 8112 : ALLOCATE ( work2_r(lwork2+20*hmat%global_size1), stat=err ) ! Allocate even more in case of clusters
146 2704 : IF (err.NE.0) THEN
147 0 : WRITE (*,*) 'work2 :',err,lwork2
148 0 : CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
149 : ENDIF
150 : ENDIF
151 : ELSE
152 2168 : lrwork=4*hmat%global_size1+MAX(5*nn,np0*mq0)+ iceil(ne,hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
153 : ! Allocate more in case of clusters
154 6504 : ALLOCATE(rwork(lrwork+10*hmat%global_size1), stat=ierr)
155 2168 : IF (err /= 0) THEN
156 0 : WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed'
157 0 : CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
158 : ENDIF
159 :
160 : CALL pzhegvx(1,'V','I','U',hmat%global_size1,hmat%data_c,1,1,&
161 : hmat%blacsdata%blacs_desc,smat%data_c,1,1, smat%blacsdata%blacs_desc,&
162 : 0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_c,1,1,&
163 : ev_dist%blacsdata%blacs_desc,work2_c,-1,rwork,-1,iwork,-1,ifail,iclustr,&
164 2168 : gap,ierr)
165 2168 : IF (ABS(work2_c(1)).GT.lwork2) THEN
166 2168 : lwork2=work2_c(1)
167 2168 : DEALLOCATE (work2_c)
168 6504 : ALLOCATE (work2_c(lwork2), stat=err)
169 2168 : IF (err /= 0) THEN
170 0 : WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed:',lwork2
171 0 : CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
172 : ENDIF
173 : ENDIF
174 2168 : IF (rwork(1).GT.lrwork) THEN
175 0 : lrwork=rwork(1)
176 0 : DEALLOCATE(rwork)
177 : ! Allocate even more in case of clusters
178 0 : ALLOCATE (rwork(lrwork+20*hmat%global_size1), stat=err)
179 0 : IF (err /= 0) THEN
180 0 : WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed: ', lrwork+20*hmat%global_size1
181 0 : CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
182 : ENDIF
183 : ENDIF
184 : endif
185 4872 : IF (iwork(1) .GT. liwork) THEN
186 0 : liwork = iwork(1)
187 0 : DEALLOCATE (iwork)
188 0 : ALLOCATE (iwork(liwork), stat=err)
189 0 : IF (err /= 0) THEN
190 0 : WRITE (*,*) 'ERROR: chani.F: Allocating iwork failed: ',liwork
191 0 : CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
192 : ENDIF
193 : ENDIF
194 : !
195 : ! Now solve generalized eigenvalue problem
196 : !
197 4872 : CALL timestart("SCALAPACK call")
198 4872 : if (hmat%l_real) THEN
199 : CALL pdsygvx(1,'V','I','U',hmat%global_size1,hmat%data_r,1,1,hmat%blacsdata%blacs_desc,smat%data_r,1,1, smat%blacsdata%blacs_desc,&
200 : 1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_r,1,1,&
201 : ev_dist%blacsdata%blacs_desc,work2_r,lwork2,iwork,liwork,ifail,iclustr,&
202 2704 : gap,ierr)
203 : else
204 : CALL pzhegvx(1,'V','I','U',hmat%global_size1,hmat%data_c,1,1,hmat%blacsdata%blacs_desc,smat%data_c,1,1, smat%blacsdata%blacs_desc,&
205 : 1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_c,1,1,&
206 : ev_dist%blacsdata%blacs_desc,work2_c,lwork2,rwork,lrwork,iwork,liwork,&
207 2168 : ifail,iclustr,gap,ierr)
208 2168 : DEALLOCATE(rwork)
209 : endif
210 4872 : CALL timestop("SCALAPACK call")
211 4872 : IF (ierr .NE. 0) THEN
212 : !IF (ierr /= 2) WRITE (oUnit,*) myid,' error in pzhegvx/pdsygvx, ierr=',ierr
213 : !IF (ierr <= 0) WRITE (oUnit,*) myid,' illegal input argument'
214 : IF (MOD(ierr,2) /= 0) THEN
215 : !WRITE (oUnit,*) myid,'some eigenvectors failed to converge'
216 : eigs: DO i = 1, ne
217 : IF (ifail(i) /= 0) THEN
218 : !WRITE (oUnit,*) myid,' eigenvector',ifail(i), 'failed to converge'
219 : ELSE
220 : EXIT eigs
221 : ENDIF
222 : ENDDO eigs
223 : !CALL CPP_flush(oUnit)
224 : ENDIF
225 : IF (MOD(ierr/4,2).NE.0) THEN
226 : !WRITE(oUnit,*) myid,' only',num2,' eigenvectors converged'
227 : !CALL CPP_flush(oUnit)
228 : ENDIF
229 82 : IF (MOD(ierr/8,2).NE.0) THEN
230 : !WRITE(oUnit,*) myid,' PDSTEBZ failed to compute eigenvalues'
231 0 : CALL judft_warn("SCALAPACK failed to solve eigenvalue problem",calledby="scalapack.f90")
232 : ENDIF
233 82 : IF (MOD(ierr/16,2).NE.0) THEN
234 : !WRITE(oUnit,*) myid,' B was not positive definite, Cholesky failed at',ifail(1)
235 : CALL judft_warn("SCALAPACK failed: B was not positive definite. " // new_line("a") // &
236 : "Order of the smallest minor which is not positive definite:" // int2str(ifail(1))&
237 0 : ,calledby="scalapack.f90")
238 : ENDIF
239 : ENDIF
240 4872 : IF (num2 < num1) THEN
241 : !IF (myid ==0) THEN
242 0 : WRITE(oUnit,*) 'Not all eigenvalues wanted are found'
243 0 : WRITE(oUnit,*) 'number of eigenvalues/vectors wanted',num1
244 0 : WRITE(oUnit,*) 'number of eigenvalues/vectors found',num2
245 : !CALL CPP_flush(oUnit)
246 : !ENDIF
247 : ENDIF
248 : !
249 : ! Each process has all eigenvalues in output
250 217432 : eig(:num2) = eig2(:num2)
251 4872 : DEALLOCATE(eig2)
252 : !
253 : !
254 : ! Redistribute eigenvectors from ScaLAPACK distribution to each process, i.e. for
255 : ! process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
256 : ! Only num=num2/np eigenvectors per process
257 : !
258 4872 : num=FLOOR(REAL(num2)/np)
259 4872 : IF (myid.LT.num2-(num2/np)*np) num=num+1
260 4872 : ne=0
261 4872 : DO i=myid+1,num2,np
262 106280 : ne=ne+1
263 : !eig(ne)=eig2(i)
264 : ENDDO
265 4872 : ALLOCATE(t_mpimat::ev)
266 4872 : CALL ev%init(ev_dist%l_real,ev_dist%global_size1,ev_dist%global_size1,ev_dist%blacsdata%mpi_com,.FALSE.)
267 14616 : CALL ev%copy(ev_dist,1,1)
268 : CLASS DEFAULT
269 0 : call judft_error("Wrong type (1) in scalapack")
270 : END SELECT
271 : CLASS DEFAULT
272 0 : call judft_error("Wrong type (2) in scalapack")
273 : END SELECT
274 4872 : call ev_dist%free()
275 : #ifdef FLEUR_USE_SCOREP
276 : SCOREP_RECORDING_ON()
277 : #endif
278 : #endif
279 4872 : END SUBROUTINE scalapack
280 4872 : END MODULE m_scalapack
|