Line data Source code
1 : MODULE m_types_mat
2 : #ifdef _OPENACC
3 : use openacc
4 : use cusolverDn
5 : use cublas
6 : #endif
7 : USE m_judft
8 : use m_constants
9 : IMPLICIT NONE
10 : PRIVATE
11 : !<This is the basic type to store and manipulate real/complex rank-2 matrices
12 : !!
13 : !! In its simple implementation here, it contains a fields for the matrix-size and
14 : !! a real and complex array for the data
15 : !! This data-type will be overwritten for distributed matrixes by t_mpimat as defined in types_mpimat.F90
16 :
17 : TYPE :: t_mat
18 : LOGICAL :: l_real !>Store either real or complex data
19 : INTEGER :: matsize1 = -1 !> matsize1=size(data_?,1),i.e. no of rows
20 : INTEGER :: matsize2 = -1 !> matsize2=size(data_?,2),i.e. no of columns
21 : REAL, ALLOCATABLE :: data_r(:, :)
22 : COMPLEX, ALLOCATABLE :: data_c(:, :)
23 : CONTAINS
24 : PROCEDURE :: alloc => t_mat_alloc !> allocate the data-arrays
25 : PROCEDURE :: multiply => t_mat_multiply !> do a matrix-matrix multiply
26 : PROCEDURE :: transpose => t_mat_transpose !> transpose the matrix
27 : PROCEDURE :: from_packed_real => t_mat_from_packed_real
28 : PROCEDURE :: from_packed_cmplx => t_mat_from_packed_cmplx !> initialized from a packed-storage matrix
29 : generic :: from_packed => from_packed_real, from_packed_cmplx
30 : PROCEDURE :: inverse => t_mat_inverse !> invert the matrix
31 : PROCEDURE :: linear_problem => t_mat_lproblem !> Solve linear equation
32 : PROCEDURE :: to_packed => t_mat_to_packed !> convert to packed-storage matrix
33 : PROCEDURE :: clear => t_mat_clear !> set data arrays to zero
34 : PROCEDURE :: copy => t_mat_copy !> copy into another t_mat (overloaded for t_mpimat)
35 : PROCEDURE :: move => t_mat_move !> move data into another t_mat (overloaded for t_mpimat)
36 : PROCEDURE :: save_npy => t_mat_save_npy
37 : procedure :: allocated => t_mat_allocated
38 : PROCEDURE :: init_details => t_mat_init
39 : PROCEDURE :: init_template => t_mat_init_template !> initalize the matrix(overloaded for t_mpimat)
40 : GENERIC :: init => init_details, init_template
41 : PROCEDURE :: free => t_mat_free !> dealloc the data (overloaded for t_mpimat)
42 : PROCEDURE :: add_transpose => t_mat_add_transpose!> add the tranpose/Hermitian conjg. without the diagonal (overloaded for t_mpimat)
43 : PROCEDURE :: unsymmetry => t_mat_unsymmetry
44 : procedure :: norm2 => t_mat_norm2
45 : procedure :: subtract => t_mat_subtract
46 : procedure :: u2l => t_mat_u2l
47 : procedure :: l2u => t_mat_l2u
48 : procedure :: size_mb => t_mat_size_mb
49 : procedure :: print_type => t_mat_print_type
50 : procedure :: conjugate => t_mat_conjg
51 : procedure :: reset => t_mat_reset
52 : procedure :: bcast => t_mat_bcast
53 : procedure :: pos_eigvec_sum => t_mat_pos_eigvec_sum
54 : procedure :: leastsq => t_mat_leastsq
55 : procedure :: add
56 : END type t_mat
57 : PUBLIC t_mat
58 : CONTAINS
59 18160 : subroutine add(mat,mat2,alpha_c,alpha_r)
60 : #ifdef _OPENACC
61 : use openacc
62 : #endif
63 : IMPLICIT NONE
64 : CLASS(t_mat), INTENT(INOUT) :: mat
65 : class(t_mat), INTENT(IN) :: mat2
66 : complex,intent(in),optional :: alpha_c
67 : complex,intent(in),optional :: alpha_r
68 :
69 : real:: a_r
70 : complex:: a_c
71 : INTEGER:: i,j
72 :
73 : INTEGER,PARAMETER :: gpu_mode=1,omp_mode=2,caxpy_mode=3
74 : INTEGER :: mode=caxpy_mode
75 :
76 18160 : a_c=1.0
77 18160 : if (present(alpha_c)) a_c=alpha_c
78 :
79 18160 : a_r=1.0
80 18160 : if(present(alpha_r)) a_r=alpha_r
81 :
82 : #ifdef _OPENACC
83 : if (mat%l_real) THEN
84 : mode=merge(gpu_mode,mode,acc_is_present(mat%data_r).and.acc_is_present(mat2%data_r))
85 : else
86 : mode=merge(gpu_mode,mode,acc_is_present(mat%data_c).and.acc_is_present(mat2%data_c))
87 : endif
88 : #endif
89 :
90 18160 : if (mat%l_real) THEN
91 0 : select case(mode)
92 : CASE(GPU_MODE)
93 : !Data is on Device, hence we can operate on GPU
94 : !$acc kernels present(mat%data_r,mat2%data_r)
95 0 : mat%data_r=mat%data_r+a_r*mat2%data_r
96 : !$acc end kernels
97 : CASE(OMP_MODE)
98 0 : !$OMP parallel do collapse(2) shared (mat,mat2,a_r) default(none)
99 : DO j=1,mat%matsize2
100 : DO i=1,mat%matsize1
101 : mat%data_r(i,j)=mat%data_r(i,j)+a_r*mat2%data_r(i,j)
102 : ENDDO
103 : ENDDO
104 : !$OMP end parallel do
105 : CASE default
106 0 : call daxpy(size(mat%data_r),a_r,mat2%data_r(1,1),1,mat%data_r(1,1),1)
107 : end select
108 : ELSE
109 0 : select case(mode)
110 : case(GPU_MODE)
111 : !Data is on Device, hence we can operate on GPU
112 : !$acc kernels present(mat%data_c,mat2%data_c)
113 0 : mat%data_c=mat%data_c+a_c*mat2%data_c
114 : !$acc end kernels
115 : case(OMP_MODE)
116 0 : !$OMP parallel do collapse(2) shared (mat,mat2,a_c) default(none)
117 : DO j=1,mat%matsize2
118 : DO i=1,mat%matsize1
119 : mat%data_c(i,j)=mat%data_c(i,j)+a_c*mat2%data_c(i,j)
120 : ENDDO
121 : ENDDO
122 : !$OMP end parallel do
123 : CASE DEFAULT
124 72640 : call zaxpy(size(mat%data_c),a_c,mat2%data_c(1,1),1,mat%data_c(1,1),1)
125 : END SELECT
126 : ENDIF
127 18160 : END SUBROUTINE
128 :
129 0 : subroutine t_mat_leastsq(A, b)
130 : use m_constants
131 : implicit none
132 : class(t_mat), intent(inout) :: A
133 : type(t_mat), intent(inout) :: b
134 :
135 0 : type(t_mat) :: tmp
136 : integer :: m, n, nrhs, lda, ldb, info, lwork
137 :
138 : real :: rwork_req(1)
139 : complex :: cwork_req(1)
140 :
141 0 : real, allocatable :: rwork(:)
142 0 : complex, allocatable :: cwork(:)
143 :
144 0 : if(A%matsize2 /= b%matsize2) call judft_error("least-squares dimension problem")
145 0 : if(A%l_real .neqv. b%l_real) call judft_error("least-squares kind problem")
146 :
147 0 : m = A%matsize1
148 0 : n = A%matsize2
149 0 : nrhs = b%matsize2
150 0 : if(A%l_real) then
151 0 : lda = size(A%data_r,1)
152 0 : ldb = size(b%data_r,1)
153 :
154 0 : call dgels("N", m, n, nrhs, A%data_r, lda, b%data_r, ldb, rwork_req, -1, info)
155 0 : lwork = int(rwork_req(1))
156 0 : allocate(rwork(lwork), source=0.0)
157 :
158 0 : call dgels("N", m, n, nrhs, A%data_r, lda, b%data_r, ldb, rwork, lwork, info)
159 : else
160 0 : lda = size(A%data_c,1)
161 0 : ldb = size(b%data_c,1)
162 :
163 0 : call zgels("N", m, n, nrhs, A%data_c, lda, b%data_c, ldb, cwork_req, -1, info)
164 0 : lwork = int(cwork_req(1))
165 0 : allocate(cwork(lwork), source=cmplx_0)
166 :
167 0 : call zgels("N", m, n, nrhs, A%data_c, lda, b%data_c, ldb, cwork, lwork, info)
168 : endif
169 :
170 0 : if(info /= 0) call judft_error("least squares failed.")
171 :
172 0 : call tmp%init(A%l_real, n, nrhs)
173 :
174 0 : if(tmp%l_real) then
175 0 : tmp%data_r = b%data_r(:n,:)
176 : else
177 0 : tmp%data_c = b%data_c(:n,:)
178 : endif
179 :
180 0 : call b%free()
181 0 : call b%init(tmp)
182 0 : call b%copy(tmp, 1,1)
183 0 : call tmp%free()
184 0 : end subroutine t_mat_leastsq
185 :
186 0 : subroutine t_mat_pos_eigvec_sum(mat)
187 : implicit none
188 : CLASS(t_mat), INTENT(INOUT) :: mat
189 : integer :: ne, i
190 : real :: sum_sign_r
191 :
192 0 : do ne = 1,size(mat%data_r,2)
193 : i = -1
194 : sum_sign_r = 0.0
195 0 : do while (abs(sum_sign_r) < 1e-8)
196 0 : i = i + 1
197 0 : if(mat%matsize1-i < 1) exit
198 0 : sum_sign_r = sum(mat%data_r(:mat%matsize1-i,ne))
199 : enddo
200 0 : if(mat%matsize1-i >= 1) then
201 0 : mat%data_r(:,ne) = mat%data_r(:,ne) / sign(1.0, sum_sign_r)
202 : endif
203 : enddo
204 0 : end subroutine t_mat_pos_eigvec_sum
205 :
206 36 : subroutine t_mat_bcast(mat, root, comm)
207 : use m_divide_most_evenly
208 : #ifdef CPP_MPI
209 : use mpi
210 : #endif
211 : implicit none
212 : CLASS(t_mat), INTENT(INOUT) :: mat
213 : integer, intent(in) :: root, comm
214 :
215 : integer :: ierr, full_shape(2), me, n_parts, i
216 36 : integer, allocatable :: start_idx(:), psize(:)
217 : integer(8) :: sz_in_byte
218 :
219 : #ifdef CPP_MPI
220 36 : call MPI_Comm_rank(comm, me, ierr)
221 36 : call MPI_Bcast(mat%l_real, 1, MPI_LOGICAL, root, comm, ierr)
222 : !alloc mat same as root
223 36 : if(me == root) then
224 54 : full_shape = merge(shape(mat%data_r), shape(mat%data_c), mat%l_real)
225 18 : call MPI_Bcast(full_shape, 2, MPI_INTEGER, root, comm, ierr)
226 : else
227 18 : call MPI_Bcast(full_shape, 2, MPI_INTEGER, root, comm, ierr)
228 18 : call mat%alloc(mat%l_real, full_shape(1), full_shape(2))
229 : endif
230 :
231 : ! overwrite matsize as needed
232 36 : call MPI_Bcast(mat%matsize1, 1, MPI_INTEGER, root, comm, ierr)
233 36 : call MPI_Bcast(mat%matsize2, 1, MPI_INTEGER, root, comm, ierr)
234 :
235 36 : sz_in_byte = full_shape(1)
236 36 : sz_in_byte = sz_in_byte * full_shape(2)
237 36 : sz_in_byte = sz_in_byte * merge(8, 16, mat%l_real)
238 : !make sure everything is smaller than 4 GB
239 36 : n_parts = ceiling(sz_in_byte / 4e9)
240 36 : call divide_most_evenly(mat%matsize2, n_parts, start_idx, psize)
241 :
242 72 : do i = 1,n_parts
243 72 : if(mat%l_real) then
244 24 : call MPI_bcast(mat%data_r(:,start_idx(i)), full_shape(1)*psize(i), MPI_DOUBLE_PRECISION, root, comm, ierr)
245 : else
246 12 : call MPI_bcast(mat%data_c(:,start_idx(i)), full_shape(1)*psize(i), MPI_DOUBLE_COMPLEX, root, comm, ierr)
247 : endif
248 : enddo
249 : #endif
250 36 : end subroutine t_mat_bcast
251 :
252 0 : subroutine t_mat_reset(mat, val)
253 : implicit none
254 : CLASS(t_mat), INTENT(INOUT) :: mat
255 : complex, intent(in) :: val
256 :
257 0 : if(mat%l_real) then
258 0 : mat%data_r = real(val)
259 : else
260 0 : mat%data_c = val
261 : endif
262 0 : end subroutine t_mat_reset
263 :
264 24 : subroutine t_mat_conjg(mat)
265 : implicit none
266 : CLASS(t_mat), INTENT(INOUT) :: mat
267 : integer :: i,j
268 :
269 24 : if(.not. mat%l_real) then
270 6 : if(mat%matsize1 == size(mat%data_c,1) .and. mat%matsize2 == size(mat%data_c,2)) then
271 6 : call zlacgv(mat%matsize1 * mat%matsize2, mat%data_c, 1)
272 : else
273 0 : !$OMP parallel do default(none) private(i) shared(mat)
274 : do i =1,mat%matsize2
275 : call zlacgv(mat%matsize1, mat%data_c(1,i), 1)
276 : enddo
277 : !$OMP end parallel do
278 : endif
279 : endif
280 24 : end subroutine t_mat_conjg
281 :
282 :
283 0 : subroutine t_mat_print_type(mat)
284 : implicit none
285 : CLASS(t_mat), INTENT(IN) :: mat
286 :
287 0 : write (*,*) "type -> t_mat"
288 0 : end subroutine t_mat_print_type
289 :
290 18 : function t_mat_size_mb(mat) result(mb_size)
291 : implicit none
292 : class(t_mat), intent(inout) :: mat
293 : real :: mb_size
294 :
295 18 : if(mat%l_real) then
296 0 : mb_size = 8e-6 * size(mat%data_r)
297 : else
298 54 : mb_size = 16e-6 * size(mat%data_c)
299 : endif
300 18 : end function t_mat_size_mb
301 : ! copy lower triangle to upper triangle
302 0 : subroutine t_mat_l2u(mat)
303 : implicit none
304 : class(t_mat), intent(inout) :: mat
305 : integer :: i,j
306 :
307 0 : call timestart("copy lower to upper matrix")
308 0 : if(mat%matsize1 /= mat%matsize2) call judft_error("l2u only works for square matricies")
309 :
310 0 : if(mat%l_real) then
311 0 : do i = 1,mat%matsize1
312 0 : do j = 1,i-1
313 0 : mat%data_r(j,i) = mat%data_r(i,j)
314 : enddo
315 : enddo
316 : else
317 0 : do i = 1,mat%matsize1
318 0 : do j = 1,i-1
319 0 : mat%data_c(j,i) = conjg(mat%data_c(i,j))
320 : enddo
321 : enddo
322 : endif
323 0 : call timestop("copy lower to upper matrix")
324 0 : end subroutine t_mat_l2u
325 :
326 : ! copy upper triangle to lower triangle
327 616 : subroutine t_mat_u2l(mat)
328 : use m_judft
329 : implicit none
330 : class(t_mat), intent(inout) :: mat
331 : integer :: i,j
332 :
333 616 : call timestart("copy upper to lower matrix")
334 616 : if(mat%matsize1 /= mat%matsize2) call judft_error("l2u only works for square matricies")
335 616 : if(mat%l_real) then
336 14904 : do i = 1,mat%matsize1
337 996950 : do j = 1,i-1
338 996364 : mat%data_r(i,j) = mat%data_r(j,i)
339 : enddo
340 : enddo
341 : else
342 4988 : do i = 1,mat%matsize1
343 465668 : do j = 1,i-1
344 465638 : mat%data_c(i,j) = conjg(mat%data_c(j,i))
345 : enddo
346 : enddo
347 : endif
348 616 : call timestop("copy upper to lower matrix")
349 616 : end subroutine t_mat_u2l
350 :
351 0 : subroutine t_mat_subtract(res_mat, mat1, mat2)
352 : implicit none
353 : class(t_mat), intent(inout) :: res_mat
354 : type(t_mat), intent(in) :: mat1, mat2
355 : logical :: real_res
356 : integer :: s1, s2
357 :
358 : ! check dimensions
359 0 : if(mat1%matsize1 /= mat2%matsize1) call judft_error("matsize 1 doesn't agree")
360 0 : s1 = mat1%matsize1
361 0 : if(mat1%matsize2 /= mat2%matsize2) call judft_error("matsize 2 doesn't agree")
362 0 : s2 = mat1%matsize2
363 :
364 : ! check real/cmplx
365 0 : real_res = mat1%l_real .and. mat2%l_real
366 0 : if(res_mat%l_real .neqv. real_res) then
367 0 : call res_mat%free()
368 : endif
369 0 : if(.not. res_mat%allocated()) call res_mat%alloc(real_res, s1, s2)
370 :
371 0 : if(res_mat%l_real) then
372 0 : res_mat%data_r = mat1%data_r(:s1,:s2) - mat2%data_r(:s1,:s2)
373 0 : elseif(mat1%l_real .and. (.not. mat2%l_real)) then
374 0 : res_mat%data_c = mat1%data_r(:s1,:s2) - mat2%data_c(:s1,:s2)
375 0 : elseif((.not. mat1%l_real) .and. mat2%l_real) then
376 0 : res_mat%data_c = mat1%data_c(:s1,:s2) - mat2%data_r(:s1,:s2)
377 : else
378 0 : res_mat%data_c(:s1,:s2) = mat1%data_c(:s1,:s2) - mat2%data_c(:s1,:s2)
379 : endif
380 0 : end subroutine t_mat_subtract
381 :
382 0 : function t_mat_norm2(mat) result(norm)
383 : implicit none
384 : class(t_mat), intent(in) :: mat
385 : real :: norm
386 : real, external :: dnrm2, dznrm2
387 :
388 0 : if (mat%l_real) then
389 0 : norm = dnrm2(size(mat%data_r), mat%data_r, 1)
390 : else
391 0 : norm = dznrm2(size(mat%data_c), mat%data_c, 1)
392 : endif
393 0 : end function t_mat_norm2
394 :
395 2039 : function t_mat_allocated(mat) result(var_alloc)
396 : implicit none
397 : class(t_mat), intent(in) :: mat
398 : logical :: var_alloc
399 :
400 2039 : var_alloc = allocated(mat%data_r) .or. allocated(mat%data_c)
401 2039 : end function t_mat_allocated
402 :
403 36 : SUBROUTINE t_mat_lproblem(mat, vec)
404 : IMPLICIT NONE
405 : CLASS(t_mat), INTENT(IN) :: mat
406 : class(t_mat), INTENT(INOUT) :: vec
407 :
408 : INTEGER:: lwork, info
409 36 : REAL, ALLOCATABLE:: work(:)
410 36 : INTEGER, allocatable::ipiv(:)
411 : logical :: both_on_gpu
412 : #ifdef _OPENACC
413 : integer :: ierr, sz
414 : real(8), dimension(100,100) :: A
415 : type(cusolverDnHandle) :: handle
416 : #endif
417 :
418 : select type (vec)
419 : class is (t_mat)
420 : class default
421 0 : call judft_error("lproblem can only be solved if vec and mat are the same class")
422 : end select
423 :
424 36 : IF ((mat%l_real .NEQV. vec%l_real) .OR. (mat%matsize1 .NE. mat%matsize2) .OR. (mat%matsize1 .NE. vec%matsize1)) then
425 0 : CALL judft_error("Invalid matices in t_mat_lproblem")
426 : endif
427 :
428 : #ifdef _OPENACC
429 : if(mat%l_real) then
430 : both_on_gpu = acc_is_present(mat%data_r) .and. acc_is_present(vec%data_r)
431 : else
432 : both_on_gpu = acc_is_present(mat%data_c) .and. acc_is_present(vec%data_c)
433 : endif
434 : #else
435 36 : both_on_gpu = .False.
436 : #endif
437 :
438 : if(both_on_gpu) then
439 : #ifdef _OPENACC
440 : allocate(ipiv(mat%matsize1))
441 : sz = mat%matsize1
442 : ierr = cusolverDnCreate(handle)
443 : if(ierr /= 0) call juDFT_error("can't create handle")
444 :
445 : !$acc data create(ipiv)
446 : call perform_LU_cusolver(handle, mat%l_real, mat%data_r, mat%data_c, ipiv)
447 : call calc_rhs(handle, mat%l_real, mat%data_r, mat%data_c, vec%data_r, vec%data_c, ipiv)
448 : !$acc end data
449 :
450 : ierr = cusolverDnDestroy(handle)
451 : deallocate(ipiv)
452 : if(ierr /= 0) call juDFT_error("can't destroy handle")
453 : #endif
454 : else
455 36 : IF (mat%l_real) THEN
456 0 : call timestart("solve real linear problem")
457 0 : IF (mat%unsymmetry() < 1E-8) THEN
458 : !Matrix is symmetric
459 : CALL DPOSV('Upper', mat%matsize1, vec%matsize2, mat%data_r, mat%matsize1, &
460 0 : vec%data_r, vec%matsize1, INFO)
461 0 : IF (INFO > 0) THEN
462 : !Matrix was not positive definite
463 0 : lwork = -1; ALLOCATE (work(1))
464 : CALL DSYSV('Upper', mat%matsize1, vec%matsize2, mat%data_r, mat%matsize1, IPIV, &
465 0 : vec%data_r, vec%matsize1, WORK, LWORK, INFO)
466 0 : lwork = INT(work(1))
467 0 : DEALLOCATE (work); ALLOCATE (ipiv(mat%matsize1), work(lwork))
468 : CALL DSYSV('Upper', mat%matsize1, vec%matsize2, mat%data_r, mat%matsize1, IPIV, &
469 0 : vec%data_r, vec%matsize1, WORK, LWORK, INFO)
470 0 : IF (info .NE. 0) CALL judft_error("Could not solve linear equation, matrix singular")
471 : END IF
472 : ELSE
473 0 : allocate (ipiv(mat%matsize1))
474 0 : call dgesv(mat%matsize1, vec%matsize2, mat%data_r, mat%matsize1, ipiv, vec%data_r, vec%matsize1, info)
475 0 : if (info /= 0) call judft_error("Error in dgesv for lproblem")
476 : END IF
477 0 : call timestop("solve real linear problem")
478 : ELSE
479 36 : call timestart("solve cmplx linear problem")
480 : ! I don't to do the whole testing for symmetry:
481 108 : allocate (ipiv(mat%matsize1))
482 36 : call zgesv(mat%matsize1, vec%matsize2, mat%data_c, mat%matsize1, ipiv, vec%data_c, vec%matsize1, info)
483 36 : if (info /= 0) call judft_error("Error in zgesv for lproblem")
484 36 : call timestop("solve cmplx linear problem")
485 : ENDIF
486 : endif
487 36 : END SUBROUTINE t_mat_lproblem
488 :
489 : #ifdef _OPENACC
490 : subroutine perform_LU_cusolver(handle, l_real, data_r, data_c, ipiv)
491 : implicit none
492 : type(cusolverDnHandle), intent(in) :: handle
493 : logical, intent(in) :: l_real
494 : real, intent(inout) :: data_r(:,:)
495 : complex, intent(inout) :: data_c(:,:)
496 : integer, intent(inout) :: ipiv(:)
497 :
498 : real, allocatable :: r_work(:)
499 : complex, allocatable :: c_work(:)
500 : integer :: lwork, devinfo, sz, ierr
501 :
502 : lwork = get_lwork_cusolver(handle, l_real, data_r, data_c)
503 : if(l_real) then
504 : sz = size(data_r,1)
505 : allocate(r_work(lwork), stat=ierr)
506 : if(ierr /= 0) call juDFT_error("cant' alloc r_work")
507 :
508 : !$acc data create(r_work) copyout(devinfo)
509 : !$acc host_data use_device(data_r, r_work, ipiv, devinfo)
510 : ierr = cusolverDnDgetrf(handle, sz, sz, data_r, sz, r_work, ipiv, devinfo)
511 : !$acc end host_data
512 : !$acc end data
513 : if(ierr /= 0) call juDFT_error("cusolver failed R")
514 : deallocate(r_work)
515 : else
516 : sz = size(data_c,1)
517 : allocate(c_work(lwork), stat=ierr)
518 : if(ierr /= 0) call juDFT_error("cant' alloc c_work")
519 :
520 : !$acc data create(c_work) copyout(devinfo)
521 : !$acc host_data use_device(data_c, c_work, ipiv, devinfo)
522 : ierr = cusolverDnZgetrf(handle, sz, sz, data_c, sz, c_work, ipiv, devinfo)
523 : !$acc end host_data
524 : !$acc end data
525 : if(ierr /= 0) call juDFT_error("cusolver failed C")
526 : deallocate(c_work)
527 : endif
528 : end subroutine perform_LU_cusolver
529 :
530 : subroutine calc_rhs(handle, l_real, mat_r, mat_c, vec_r, vec_c, ipiv)
531 : implicit none
532 : type(cusolverDnHandle), intent(in) :: handle
533 : logical, intent(in) :: l_real
534 : real, intent(inout) :: mat_r(:,:), vec_r(:,:)
535 : complex, intent(inout) :: mat_c(:,:), vec_c(:,:)
536 : integer, intent(in) :: ipiv(:)
537 :
538 : integer :: ierr, n, nrhs, devinfo
539 :
540 : !$acc data copyout(devinfo)
541 : if(l_real) then
542 : n = size(mat_r, 1)
543 : nrhs = size(vec_r, 2)
544 : !$acc host_data use_device(mat_r, vec_r, ipiv, devinfo)
545 : ierr = cusolverDnDgetrs(handle, CUBLAS_OP_N, n, nrhs, mat_r, n, ipiv, vec_r, n, devinfo)
546 : !$acc end host_data
547 : else
548 : n = size(mat_c, 1)
549 : nrhs = size(vec_c, 2)
550 : !$acc host_data use_device(mat_c, vec_c, ipiv, devinfo)
551 : ierr = cusolverDnZgetrs(handle, CUBLAS_OP_N, n, nrhs, mat_c, n, ipiv, vec_c, n, devinfo)
552 : !$acc end host_data
553 : endif
554 : !$acc end data
555 : if(ierr /= 0) call judft_error("rhs failed")
556 : end subroutine calc_rhs
557 :
558 : function get_lwork_cusolver(handle, l_real, data_r, data_c) result(lwork)
559 : implicit none
560 : type(cusolverDnHandle), intent(in) :: handle
561 : logical, intent(in) :: l_real
562 : real, intent(in) :: data_r(:,:)
563 : complex, intent(in) :: data_c(:,:)
564 : integer :: lwork, sz, ierr
565 :
566 : if(l_real) then
567 : sz = size(data_r,1)
568 : !$acc host_data use_device(data_r)
569 : ierr = cusolverDnDgetrf_buffersize(handle, sz, sz, data_r, sz, lwork)
570 : !$acc end host_data
571 : else
572 : sz = size(data_c,1)
573 : !$acc host_data use_device(data_c)
574 : ierr = cusolverDnZgetrf_buffersize(handle, sz, sz, data_c, sz, lwork)
575 : !$acc end host_data
576 : endif
577 :
578 : if(ierr /= 0) call juDFT_error("can't get lwork")
579 : end function get_lwork_cusolver
580 : #endif
581 :
582 13217 : SUBROUTINE t_mat_free(mat)
583 : use m_judft
584 : CLASS(t_mat), INTENT(INOUT)::mat
585 13217 : call timestart("t_mat_free")
586 13217 : IF (ALLOCATED(mat%data_c)) DEALLOCATE (mat%data_c)
587 13217 : IF (ALLOCATED(mat%data_r)) DEALLOCATE (mat%data_r)
588 13217 : mat%matsize1 = -1
589 13217 : mat%matsize2 = -1
590 13217 : call timestop("t_mat_free")
591 13217 : END SUBROUTINE t_mat_free
592 :
593 176 : SUBROUTINE t_mat_add_transpose(mat, mat1)
594 : CLASS(t_mat), INTENT(INOUT)::mat, mat1
595 : INTEGER::i, j
596 176 : IF ((mat%matsize1 .NE. mat1%matsize2) .OR. &
597 : (mat%matsize2 .NE. mat1%matsize1)) &
598 0 : CALL judft_error("Matrix sizes missmatch in add_transpose")
599 176 : IF (mat%l_real .AND. mat1%l_real) THEN
600 0 : DO i = 1, mat%matsize2
601 0 : DO j = i + 1, mat%matsize1
602 0 : mat%data_r(j, i) = mat1%data_r(i, j)
603 : ENDDO
604 : ENDDO
605 176 : ELSEIF ((.NOT. mat%l_real) .AND. (.NOT. mat1%l_real)) THEN
606 8594 : DO i = 1, mat%matsize2
607 207260 : DO j = i + 1, mat%matsize1
608 207084 : mat%data_c(j, i) = CONJG(mat1%data_c(i, j))
609 : ENDDO
610 : ENDDO
611 : ELSE
612 0 : call judft_error("Inconsistency between data types in m_mat")
613 : END IF
614 176 : END SUBROUTINE t_mat_add_transpose
615 :
616 14148 : SUBROUTINE t_mat_init(mat, l_real, matsize1, matsize2, mpi_subcom, l_2d, nb_x, nb_y, mat_name)
617 : CLASS(t_mat) :: mat
618 : LOGICAL, INTENT(IN), OPTIONAL :: l_real
619 : INTEGER, INTENT(IN), OPTIONAL :: matsize1, matsize2
620 : INTEGER, INTENT(IN), OPTIONAL :: mpi_subcom, nb_x, nb_y !not needed here, only for allowing overloading this in t_mpimat
621 : LOGICAL, INTENT(IN), OPTIONAL :: l_2d !not needed here either
622 : character(len=*),intent(in),optional :: mat_name
623 :
624 28296 : CALL mat%alloc(l_real, matsize1, matsize2, mat_name=mat_name)
625 14148 : END SUBROUTINE t_mat_init
626 5484 : SUBROUTINE t_mat_init_template(mat, templ, global_size1, global_size2, mat_name)
627 : IMPLICIT NONE
628 : CLASS(t_mat), INTENT(INOUT) :: mat
629 : CLASS(t_mat), INTENT(IN) :: templ
630 : INTEGER, INTENT(IN), OPTIONAL:: global_size1, global_size2
631 : character(len=*),intent(in),optional :: mat_name
632 :
633 : integer :: ierr
634 :
635 5484 : IF (PRESENT(global_size1) .AND. PRESENT(global_size2)) THEN
636 0 : IF ((global_size1 .NE. templ%matsize1) .OR. (global_size2 .NE. templ%matsize2)) CALL judft_error("BUG:Invalid change of size in init by template")
637 : END IF
638 5484 : mat%l_real = templ%l_real
639 5484 : mat%matsize1 = templ%matsize1
640 5484 : mat%matsize2 = templ%matsize2
641 5484 : IF (mat%l_real) THEN
642 21839022 : ALLOCATE (mat%data_r(mat%matsize1, mat%matsize2), source=0.0, stat=ierr)
643 702 : ALLOCATE (mat%data_c(1, 1))
644 : ELSE
645 51214070 : ALLOCATE (mat%data_c(mat%matsize1, mat%matsize2), source=(0.0,0.0), stat=ierr)
646 4782 : ALLOCATE (mat%data_r(1, 1))
647 : END IF
648 5484 : if(ierr /= 0) then
649 0 : if(present(mat_name)) then
650 : call judft_error("can't alloc matrix of size: [" // &
651 0 : int2str(mat%matsize1) // ", " // int2str(mat%matsize2) // "]. Name: " // trim(mat_name))
652 : else
653 : call judft_error("can't alloc matrix of size: [" // &
654 0 : int2str(mat%matsize1) // ", " // int2str(mat%matsize2) // "].")
655 : endif
656 : endif
657 5484 : END SUBROUTINE t_mat_init_template
658 :
659 53518 : SUBROUTINE t_mat_alloc(mat, l_real, matsize1, matsize2, init, mat_name)
660 : use m_judft
661 : CLASS(t_mat) :: mat
662 : LOGICAL, INTENT(IN), OPTIONAL:: l_real
663 : INTEGER, INTENT(IN), OPTIONAL:: matsize1, matsize2
664 : REAL, INTENT(IN), OPTIONAL :: init
665 : character(len=*), intent(in), optional :: mat_name
666 : character(len=300) :: errmsg
667 :
668 : INTEGER:: err
669 :
670 53518 : call timestart("t_mat_alloc")
671 53518 : IF (present(l_real)) mat%l_real = l_real
672 53518 : IF (present(matsize1)) mat%matsize1 = matsize1
673 53518 : IF (present(matsize2)) mat%matsize2 = matsize2
674 :
675 53518 : IF (mat%matsize1 < 0) CALL judft_error("Cannot allocate memory for mat datatype that is not initialized", hint="This is a BUG in FLEUR, please report")
676 53518 : IF (mat%matsize2 < 0) mat%matsize2 = mat%matsize1 !Square by default
677 :
678 53518 : IF (allocated(mat%data_r)) deallocate (mat%data_r)
679 53518 : IF (allocated(mat%data_c)) deallocate (mat%data_c)
680 :
681 53518 : IF (mat%l_real) THEN
682 88456 : ALLOCATE (mat%data_r(mat%matsize1, mat%matsize2), STAT=err, errmsg=errmsg)
683 22114 : ALLOCATE (mat%data_c(0, 0))
684 22114 : IF (err /= 0) then
685 : write (*,*) "Failed to allocate mem of shape: [" &
686 0 : // int2str(mat%matsize1) // ", " // int2str(mat%matsize2) // "]"
687 0 : if(present(mat_name)) then
688 : CALL judft_error("Allocation of memmory failed for mat datatype. Name:" // trim(mat_name), &
689 0 : hint="Errormessage: " // trim(errmsg))
690 : else
691 : CALL judft_error("Allocation of memmory failed for mat datatype", &
692 0 : hint="Errormessage: " // trim(errmsg))
693 : endif
694 : endif
695 215341552 : mat%data_r = 0.0
696 166202 : if (present(init)) mat%data_r = init
697 : ELSE
698 31404 : ALLOCATE (mat%data_r(0, 0))
699 125616 : ALLOCATE (mat%data_c(mat%matsize1, mat%matsize2), STAT=err, errmsg=errmsg)
700 0 : IF (err /= 0) CALL judft_error("Allocation of memmory failed for mat datatype", &
701 0 : hint="Errormessage: " // trim(errmsg))
702 1084525916 : mat%data_c = 0.0
703 984500 : IF (PRESENT(init)) mat%data_c = init
704 : ENDIF
705 53518 : call timestop("t_mat_alloc")
706 53518 : END SUBROUTINE t_mat_alloc
707 :
708 96 : SUBROUTINE t_mat_multiply(mat1, mat2, res, transA, transB)
709 : use m_judft
710 : CLASS(t_mat), INTENT(INOUT) :: mat1
711 : CLASS(t_mat), INTENT(IN) :: mat2
712 : CLASS(t_mat), INTENT(INOUT), OPTIONAL :: res
713 : character(len=1), intent(in), optional :: transA, transB
714 :
715 : integer :: m,n,k, lda, ldb, ldc
716 : character(len=1) :: transA_i, transB_i
717 96 : type(t_mat) :: tmp
718 : logical :: run_on_gpu
719 :
720 96 : call timestart("t_mat_multiply")
721 :
722 96 : if(mat1%matsize1 == -1 .or. mat1%matsize2 == -1) call judft_error("mat1 not initialized")
723 96 : if(mat2%matsize1 == -1 .or. mat2%matsize2 == -1) call judft_error("mat2 not initialized")
724 :
725 96 : transA_i = "N"
726 96 : if(present(transA)) transA_i = transA
727 96 : transB_i = "N"
728 96 : if(present(transB)) transB_i = transB
729 :
730 96 : if(transA_i == "N") then
731 96 : m = mat1%matsize1
732 96 : k = mat1%matsize2
733 : else
734 0 : m = mat1%matsize2
735 0 : k = mat1%matsize1
736 : endif
737 :
738 96 : if(mat1%l_real .neqv. mat2%l_real) call judft_error("can only multiply matricies of the same type")
739 :
740 : if(mat1%l_real) then
741 : #ifdef _OPENACC
742 : run_on_gpu = acc_is_present(mat1%data_r) .and. acc_is_present(mat2%data_r)
743 : if(present(res)) then
744 : run_on_gpu = run_on_gpu .and. acc_is_present(res%data_r)
745 : endif
746 : #else
747 : run_on_gpu = .False.
748 : #endif
749 : else
750 : #ifdef _OPENACC
751 : run_on_gpu = acc_is_present(mat1%data_c) .and. acc_is_present(mat2%data_c)
752 : if(present(res)) then
753 : run_on_gpu = run_on_gpu .and. acc_is_present(res%data_c)
754 : endif
755 : #else
756 : run_on_gpu = .False.
757 : #endif
758 : endif
759 :
760 96 : if(transB_i == "N" ) then
761 72 : if(k /= mat2%matsize1) call judft_error("dimensions don't agree for matmul")
762 72 : n = mat2%matsize2
763 : else
764 24 : if(k /= mat2%matsize2) call judft_error("dimensions don't agree for matmul")
765 24 : n = mat2%matsize1
766 : endif
767 :
768 96 : lda = merge(size(mat1%data_r, dim=1), size(mat1%data_c, dim=1), mat1%l_real)
769 96 : if(transA_i == "N") then
770 96 : if(lda < max(1,m)) call judft_error("problem with lda")
771 : else
772 0 : if(lda < max(1,k)) call judft_error("problem with lda")
773 : endif
774 :
775 96 : ldb = merge(size(mat2%data_r, dim=1), size(mat2%data_c, dim=1), mat2%l_real)
776 96 : if(transB_i == "N") then
777 72 : if(ldb < max(1,k)) call judft_error("problem with ldb")
778 : else
779 24 : if(ldb < max(1,n)) call judft_error("problem with ldb")
780 : endif
781 :
782 96 : IF (present(res)) THEN
783 : ! prepare res matrix
784 96 : if(res%allocated()) then
785 96 : if(res%l_real .neqv. mat1%l_real) then
786 0 : call juDFT_error("res must be of the correct type")
787 : else
788 96 : if(res%l_real) then
789 162 : if(any(shape(res%data_r) /= [m,n])) then
790 0 : call juDFT_error("res must be of the correct size!")
791 : endif
792 : else
793 126 : if(any(shape(res%data_c) /= [m,n])) then
794 0 : call juDFT_error("res must be of the correct size!")
795 : endif
796 : endif
797 : endif
798 : else
799 0 : call juDFT_error("res must be allocated")
800 : endif
801 :
802 96 : ldc = merge(size(res%data_r, dim=1), size(res%data_c, dim=1), mat2%l_real)
803 96 : if(ldc < max(1,m)) call judft_error("problem with ldc")
804 :
805 : if(run_on_gpu) then
806 : call perform_cublas_gemm(mat1%l_real, transA_i,transB_i,m,n,k, lda, ldb, ldc,&
807 : mat1%data_r, mat1%data_c, mat2%data_r, mat2%data_c, res%data_r, res%data_c)
808 : else
809 96 : IF (mat1%l_real) THEN
810 54 : call dgemm(transA_i,transB_i,m,n,k, 1.0, mat1%data_r, lda, mat2%data_r, ldb, 0.0, res%data_r, ldc)
811 : ELSE
812 42 : call zgemm(transA_i,transB_i,m,n,k,cmplx_1, mat1%data_c, lda, mat2%data_c, ldb, cmplx_0,res%data_c, ldc)
813 : ENDIF
814 : endif
815 : else
816 0 : if (mat1%matsize1 /= mat1%matsize2 .or. mat2%matsize2 /= mat2%matsize1)&
817 0 : CALL judft_error("Cannot multiply matrices inplace because of non-matching dimensions", hint="This is a BUG in FLEUR, please report")
818 :
819 0 : call tmp%alloc(mat1%l_real, n,n)
820 0 : ldc = merge(size(tmp%data_r, dim=1), size(tmp%data_c, dim=1), tmp%l_real)
821 0 : if(ldc < max(1,m)) call judft_error("problem with ldc")
822 :
823 : if(run_on_gpu) then
824 : !$acc data create(tmp, tmp%data_r, tmp%data_c)
825 : call perform_cublas_gemm(mat1%l_real, transA_i, transB_i, m,n,k, lda, ldb, ldc,&
826 : mat1%data_r, mat1%data_c, mat2%data_r, mat2%data_c, tmp%data_r, tmp%data_c)
827 : call mat1%copy(tmp,1,1)
828 : !$acc end data
829 : else
830 0 : if (mat1%l_real) THEN
831 0 : call dgemm(transA_i,transB_i,n,n,n, 1.0, mat1%data_r, lda, mat2%data_r, ldb, 0.0, tmp%data_r, ldc)
832 : ELSE
833 0 : call zgemm(transA_i,transB_i,n,n,n,cmplx_1, mat1%data_c, lda, mat2%data_c, ldb, cmplx_0, tmp%data_c, ldc)
834 : ENDIF
835 0 : call mat1%copy(tmp,1,1)
836 : endif
837 0 : call tmp%free()
838 : end IF
839 96 : call timestop("t_mat_multiply")
840 96 : end SUBROUTINE t_mat_multiply
841 :
842 : subroutine perform_cublas_gemm(l_real, transA, transB, m,n,k, lda, ldb, ldc,&
843 : mat1_data_r, mat1_data_c, mat2_data_r, mat2_data_c, res_data_r, res_data_c)
844 : implicit none
845 : logical, intent(in) :: l_real
846 : character(len=*), intent(in) :: transA, transB
847 : integer, intent(in) :: m, n, k, lda, ldb, ldc
848 : real, intent(in) :: mat1_data_r(:,:), mat2_data_r(:,:)
849 : complex, intent(in) :: mat1_data_c(:,:), mat2_data_c(:,:)
850 : real, intent(inout) :: res_data_r(:,:)
851 : complex, intent(inout) :: res_data_c(:,:)
852 :
853 : #ifdef _OPENACC
854 : if(l_real) then
855 : !$acc host_data use_device(mat1_data_r, mat2_data_r, res_data_r)
856 : call cublasDgemm(transA, transB, m, n, k, 1.0, mat1_data_r, lda, mat2_data_r, ldb, 0.0, res_data_r, ldc)
857 : !$acc end host_data
858 : else
859 : !$acc host_data use_device(mat1_data_c, mat2_data_c, res_data_c)
860 : call cublasZgemm(transA, transB, m, n, k, cmplx_1, mat1_data_c, lda, mat2_data_c, ldb, cmplx_0, res_data_c, ldc)
861 : !$acc end host_data
862 : endif
863 : #endif
864 : end subroutine perform_cublas_gemm
865 :
866 :
867 0 : SUBROUTINE t_mat_transpose(mat1, res)
868 : CLASS(t_mat), INTENT(INOUT) ::mat1
869 : CLASS(t_mat), INTENT(OUT), OPTIONAL ::res
870 :
871 0 : IF (present(res)) THEN
872 0 : call res%alloc(mat1%l_real, mat1%matsize2, mat1%matsize1)
873 0 : IF (mat1%l_real) THEN
874 0 : res%data_r = transpose(mat1%data_r(:mat1%matsize1, :mat1%matsize2))
875 : ELSE
876 0 : res%data_c = conjg(transpose(mat1%data_c(:mat1%matsize1, :mat1%matsize2)))
877 : ENDIF
878 : else
879 0 : if (mat1%matsize1 .ne. mat1%matsize2) CALL judft_error("Cannot transpose matrices inplace because of non-matching dimensions", hint="This is a BUG in FLEUR, please report")
880 0 : IF (mat1%l_real) THEN
881 0 : mat1%data_r(:mat1%matsize1, :mat1%matsize2) = transpose(mat1%data_r(:mat1%matsize1, :mat1%matsize2))
882 : ELSE
883 0 : mat1%data_c(:mat1%matsize1, :mat1%matsize2) = conjg(transpose(mat1%data_c(:mat1%matsize1, :mat1%matsize2)))
884 : ENDIF
885 : end IF
886 0 : end SUBROUTINE t_mat_transpose
887 :
888 0 : SUBROUTINE t_mat_from_packed_real(mat1, matsize, packed_r)
889 : use m_judft
890 : CLASS(t_mat), INTENT(INOUT) :: mat1
891 : INTEGER, INTENT(IN) :: matsize
892 : REAL, INTENT(IN) :: packed_r(:)
893 :
894 : INTEGER:: n, nn, i
895 0 : call timestart("t_mat_from_packed_real")
896 0 : call mat1%alloc(.true., matsize, matsize)
897 :
898 : !$OMP PARALLEL DO default(none) &
899 : !$OMP shared(matsize, mat1, packed_r) private(n, nn, i) &
900 0 : !$OMP schedule(dynamic, 10)
901 : DO n = 1, matsize
902 : DO nn = 1, n
903 : i = ((n - 1)*n)/2 + nn
904 : mat1%data_r(n, nn) = packed_r(i)
905 : mat1%data_r(nn, n) = packed_r(i)
906 : end DO
907 : end DO
908 : !$OMP END PARALLEL DO
909 0 : call timestop("t_mat_from_packed_real")
910 0 : end SUBROUTINE t_mat_from_packed_real
911 :
912 0 : SUBROUTINE t_mat_from_packed_cmplx(mat1, matsize, packed_c)
913 : use m_judft
914 : CLASS(t_mat), INTENT(INOUT) :: mat1
915 : INTEGER, INTENT(IN) :: matsize
916 : COMPLEX, INTENT(IN) :: packed_c(:)
917 :
918 : INTEGER:: n, nn, i
919 0 : call timestart("t_mat_from_packed_cmplx")
920 0 : call mat1%alloc(.false., matsize, matsize)
921 :
922 : !$OMP PARALLEL DO default(none) &
923 : !$OMP shared(matsize, mat1, packed_c) private(n, nn, i) &
924 0 : !$OMP schedule(dynamic, 10)
925 : DO n = 1, matsize
926 : DO nn = 1, n
927 : i = ((n - 1)*n)/2 + nn
928 : mat1%data_c(n, nn) = conjg(packed_c(i))
929 : mat1%data_c(nn, n) = packed_c(i)
930 : i = i + 1
931 : end DO
932 : end DO
933 : !$OMP END PARALLEL DO
934 0 : call timestop("t_mat_from_packed_cmplx")
935 0 : end SUBROUTINE t_mat_from_packed_cmplx
936 :
937 0 : function t_mat_to_packed(mat) result(packed)
938 : CLASS(t_mat), INTENT(IN) :: mat
939 : COMPLEX :: packed(mat%matsize1*(mat%matsize1 + 1)/2)
940 : integer :: n, nn, i
941 : real, parameter :: tol = 1e-5
942 0 : if (mat%matsize1 .ne. mat%matsize2) call judft_error("Could not pack no-square matrix", hint='This is a BUG, please report')
943 :
944 0 : if (mat%l_real) THEN
945 : !$OMP PARALLEL DO default(none) &
946 : !$OMP shared(mat, packed) private(n, nn, i) &
947 0 : !$OMP schedule(dynamic, 10)
948 : DO n = 1, mat%matsize1
949 : DO nn = 1, n
950 : i = ((n - 1)*n)/2 + nn
951 : packed(i) = (mat%data_r(n, nn) + mat%data_r(nn, n))/2.
952 : end DO
953 : end DO
954 : !$OMP END PARALLEL DO
955 : else
956 : !$OMP PARALLEL DO default(none) &
957 : !$OMP shared(mat, packed) private(n, nn, i) &
958 0 : !$OMP schedule(dynamic, 10)
959 : DO n = 1, mat%matsize1
960 : DO nn = 1, n
961 : i = ((n - 1)*n)/2 + nn
962 : packed(i) = (conjg(mat%data_c(n, nn)) + mat%data_c(nn, n))/2.
963 : end DO
964 : end DO
965 : !$OMP END PARALLEL DO
966 : endif
967 0 : end function t_mat_to_packed
968 :
969 0 : subroutine t_mat_inverse(mat)
970 : implicit none
971 : CLASS(t_mat), INTENT(INOUT) :: mat
972 : integer :: info
973 0 : real, allocatable :: work_r(:)
974 0 : integer, allocatable :: ipiv(:)
975 0 : complex, allocatable :: work_c(:)
976 :
977 0 : call timestart("invert matrix")
978 0 : if (mat%matsize1 .ne. mat%matsize2) then
979 : call judft_error("Can only invert square matrices", &
980 0 : hint="This is a BUG in FLEUR, please report")
981 : endif
982 0 : ALLOCATE (ipiv(mat%matsize1))
983 :
984 0 : if (mat%l_real) THEN
985 0 : ALLOCATE (work_r(mat%matsize1))
986 0 : call dgetrf(mat%matsize1, mat%matsize1, mat%data_r, size(mat%data_r, 1), ipiv, info)
987 0 : if (info .ne. 0) call judft_error("Failed to invert matrix: dpotrf failed.")
988 0 : call dgetri(mat%matsize1, mat%data_r, size(mat%data_r, 1), ipiv, work_r, size(work_r), info)
989 0 : if (info .ne. 0) call judft_error("Failed to invert matrix: dpotrf failed.")
990 : else
991 0 : ALLOCATE (work_c(mat%matsize1))
992 0 : call zgetrf(mat%matsize1, mat%matsize1, mat%data_c, size(mat%data_c, 1), ipiv, info)
993 0 : if (info .ne. 0) call judft_error("Failed to invert matrix: dpotrf failed.")
994 0 : call zgetri(mat%matsize1, mat%data_c, size(mat%data_c, 1), ipiv, work_c, size(work_c), info)
995 0 : if (info .ne. 0) call judft_error("Failed to invert matrix: dpotrf failed.")
996 : end if
997 0 : call timestop("invert matrix")
998 0 : end subroutine t_mat_inverse
999 :
1000 4764 : SUBROUTINE t_mat_move(mat, mat1)
1001 : IMPLICIT NONE
1002 : CLASS(t_mat), INTENT(INOUT):: mat
1003 : CLASS(t_mat), INTENT(INOUT):: mat1
1004 : !Special case, the full matrix is copied. Then use move alloc
1005 4764 : IF (mat%l_real) THEN
1006 624 : CALL move_ALLOC(mat1%data_r, mat%data_r)
1007 : ELSE
1008 4140 : CALL move_ALLOC(mat1%data_c, mat%data_c)
1009 : END IF
1010 4764 : END SUBROUTINE t_mat_move
1011 :
1012 604 : SUBROUTINE t_mat_copy(mat, mat1, n1, n2)
1013 : IMPLICIT NONE
1014 : CLASS(t_mat), INTENT(INOUT):: mat
1015 : class(t_mat), INTENT(IN) :: mat1
1016 : INTEGER, INTENT(IN) :: n1, n2
1017 :
1018 : INTEGER:: i1, i2, j1, j2
1019 : logical:: both_on_gpu, tmp
1020 :
1021 604 : call timestart("t_mat_copy")
1022 :
1023 604 : if(.not. mat%allocated()) then
1024 : #ifdef _OPENACC
1025 : tmp = acc_is_present(mat1%data_c)
1026 : #else
1027 0 : tmp = .False.
1028 : #endif
1029 : if(tmp) then
1030 : call judft_error("can't use copy alloc on GPU")
1031 : else
1032 0 : call mat%init(mat1)
1033 : endif
1034 : endif
1035 :
1036 : #ifdef _OPENACC
1037 : if(mat1%l_real) then
1038 : both_on_gpu = acc_is_present(mat%data_r) .and. acc_is_present(mat1%data_r)
1039 : else
1040 : both_on_gpu = acc_is_present(mat%data_c) .and. acc_is_present(mat1%data_c)
1041 : endif
1042 : #else
1043 604 : both_on_GPU = .False.
1044 : #endif
1045 :
1046 : select type (mat1)
1047 : type is(t_mat)
1048 :
1049 : class default
1050 0 : call judft_error("you can only copy a t_mat to a t_mat")
1051 : end select
1052 :
1053 604 : i1 = mat%matsize1 - n1 + 1 !space available for first dimension
1054 604 : i2 = mat%matsize2 - n2 + 1
1055 604 : i1 = MIN(i1, mat1%matsize1)
1056 604 : i2 = MIN(i2, mat1%matsize2)
1057 :
1058 : if(both_on_GPU )then
1059 : if(mat%l_real) then
1060 : !$acc kernels present(mat, mat%data_r, mat1, mat1%data_r)
1061 : do j1 = 1,i1
1062 : do j2 = 1,i2
1063 : mat%data_r(n1+j1-1, n2+j2-1) = mat1%data_r(j1,j2)
1064 : enddo
1065 : enddo
1066 : !$acc end kernels
1067 : else
1068 : !$acc kernels present(mat, mat%data_c, mat1, mat1%data_c)
1069 : do j1 = 1,i1
1070 : do j2 = 1,i2
1071 : mat%data_c(n1+j1-1, n2+j2-1) = mat1%data_c(j1,j2)
1072 : enddo
1073 : enddo
1074 : !$acc end kernels
1075 : endif
1076 : else
1077 604 : IF (mat%l_real) THEN
1078 58 : call dlacpy("N", i1, i2, mat1%data_r, size(mat1%data_r, 1), mat%data_r(n1,n2), size(mat%data_r,1) )
1079 : ELSE
1080 546 : call zlacpy("N", i1, i2, mat1%data_c, size(mat1%data_c, 1), mat%data_c(n1,n2), size(mat%data_c,1) )
1081 : END IF
1082 : endif
1083 :
1084 604 : call timestop("t_mat_copy")
1085 604 : END SUBROUTINE t_mat_copy
1086 :
1087 0 : SUBROUTINE t_mat_clear(mat)
1088 : IMPLICIT NONE
1089 : CLASS(t_mat), INTENT(INOUT):: mat
1090 : INTEGER :: i
1091 :
1092 0 : IF (mat%l_real) THEN
1093 0 : call dlaset("A",mat%matsize1,mat%matsize2,0.0,0.0,mat%data_r,mat%matsize1)
1094 : ELSE
1095 0 : call zlaset("A",mat%matsize1,mat%matsize2,cmplx(0.0,0.0),cmplx(0.0,0.0),mat%data_c,mat%matsize1)
1096 : ENDIF
1097 : #ifdef _OPENACC
1098 : IF (mat%l_real) THEN
1099 : if (acc_is_present(mat%data_r)) Then
1100 : !$acc kernels present(mat%data_r)
1101 : mat%data_r(:,:)=0.0
1102 : !$acc end kernels
1103 : endif
1104 : ELSE
1105 : if (acc_is_present(mat%data_c)) Then
1106 : !$acc kernels present(mat%data_c)
1107 : mat%data_c(:,:)=0.0
1108 : !$acc end kernels
1109 : endif
1110 : ENDIF
1111 : #endif
1112 0 : END SUBROUTINE t_mat_clear
1113 :
1114 0 : subroutine t_mat_save_npy(mat, filename)
1115 : use m_judft
1116 : implicit NONE
1117 : class(t_mat), intent(in) :: mat
1118 : character(len=*) :: filename
1119 :
1120 : if (mat%l_real) then
1121 : !call save_npy(filename, mat%data_r)
1122 : else
1123 : !call save_npy(filename, mat%data_c)
1124 : endif
1125 0 : end subroutine t_mat_save_npy
1126 :
1127 0 : function t_mat_unsymmetry(mat) result(unsymmetry)
1128 : implicit none
1129 : class(t_mat), intent(in) :: mat
1130 : real :: unsymmetry
1131 : integer :: n
1132 :
1133 0 : unsymmetry = 0.0
1134 :
1135 0 : if (mat%matsize1 /= mat%matsize2) then
1136 0 : call judft_error("Rectangular matricies can't be symmetric")
1137 : else
1138 0 : n = mat%matsize1
1139 0 : if (mat%l_real) THEN
1140 0 : unsymmetry = maxval(mat%data_r(:n, :n) - transpose(mat%data_r(:n, :n)))
1141 : else
1142 0 : unsymmetry = maxval(abs(mat%data_c(:n, :n) - conjg(transpose(mat%data_c(:n, :n)))))
1143 : endif
1144 : endif
1145 0 : end function t_mat_unsymmetry
1146 :
1147 169298 : END MODULE m_types_mat
|