LCOV - code coverage report
Current view: top level - diagonalization - scalapack.F90 (source / functions) Coverage Total Hit
Test: FLEUR test coverage Lines: 49.7 % 175 87
Test Date: 2026-03-18 04:40:43 Functions: 60.0 % 5 3

            Line data    Source code
       1              : !--------------------------------------------------------------------------------
       2              : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3              : ! This file is part of FLEUR and available as free software under the conditions
       4              : ! of the MIT license as expressed in the LICENSE file in more detail.
       5              : !--------------------------------------------------------------------------------
       6              : #ifdef FLEUR_USE_SCOREP
       7              : #include 'scorep/SCOREP_User.inc'
       8              : #endif
       9              : module m_scalapack
      10              :    use m_juDFT
      11              :    use m_constants
      12              :    use m_types_mpimat
      13              :    use m_types_mat
      14              :    use m_types_solver
      15              : #ifdef CPP_MPI
      16              :    use mpi
      17              : #endif
      18              :    implicit none
      19              : 
      20              :    type, extends(t_solver)::t_solver_scalapack
      21              :    contains
      22              :       procedure        :: solve_gev => scalapack_gev  !solver for generalized eigenvalue problem
      23              :       procedure        :: to_std => scalapack_reduction     !transform the H of the generalized problem to a std problem
      24              :       procedure        :: backtrans => scalapack_recover  !transform the Eigenvalue back to the generalized problem
      25              :    end type
      26              :    public :: get_solver_scalapack
      27              : 
      28              : contains
      29              : 
      30         4761 :    function get_solver_scalapack() result(solver)
      31              :       type(t_solver_scalapack), pointer::solver
      32         4761 :       allocate (solver)
      33         4761 :       solver%name = "scalapack"
      34              : #ifdef CPP_SCALAPACK
      35         4761 :       solver%available = .true.
      36              : #else
      37              :       solver%available = .false.
      38              : #endif
      39         4761 :       solver%parallel = .true.
      40         4761 :       solver%serial = .false.
      41         4761 :       solver%generalized = .true.
      42         4761 :       solver%standard = .false.
      43         4761 :       solver%single_precision = .false.
      44         4761 :       solver%transform = .true.
      45         4761 :       solver%GPU = .false.
      46         4761 :    end function
      47              : 
      48         4624 :    subroutine scalapack_gev(self, hmat, smat, ne, eig, zmat, ikpt)
      49              :       !
      50              :       !----------------------------------------------------
      51              :       !- Parallel eigensystem solver - driver routine; gb99
      52              :       !  Uses the SCALAPACK for the actual diagonalization
      53              :       !
      54              :       ! hmat ..... Hamiltonian matrix
      55              :       ! smat ..... overlap matrix
      56              :       ! ne ....... number of ev's searched (and found) on this node
      57              :       !            On input, overall number of ev's searched,
      58              :       !            On output, local number of ev's found
      59              :       ! eig ...... all eigenvalues, output
      60              :       ! ev ....... local eigenvectors, output
      61              :       !
      62              :       !----------------------------------------------------
      63              :       !
      64              : 
      65              :       implicit none
      66              :       class(t_solver_scalapack) :: self
      67              :       class(t_mat), intent(INOUT)    :: hmat, smat
      68              :       class(t_mat), allocatable, intent(OUT)::zmat
      69              :       real, intent(out)              :: eig(:)
      70              :       integer, intent(INOUT)         :: ne
      71              :       integer, intent(IN)            :: ikpt
      72              : 
      73              : #ifdef CPP_SCALAPACK
      74              :       !...  Local variables
      75              :       !
      76              :       integer i, ierr, err
      77         4624 :       integer, allocatable :: iwork(:)
      78         4624 :       real, allocatable :: rwork(:)
      79              :       integer              :: lrwork
      80              : 
      81              :       !
      82              :       !  ScaLAPACK things
      83              :       character(len=1)    :: uplo
      84              :       integer              :: num, num1, num2, liwork, lwork2, np0, mq0, np, myid
      85              :       integer              :: iceil, numroc, nn, nb
      86         4624 :       integer, allocatable :: ifail(:), iclustr(:)
      87              :       real                 :: abstol, orfac = 1.e-4, dlamch
      88         4624 :       real, allocatable     :: eig2(:), gap(:)
      89         4624 :       real, allocatable :: work2_r(:)
      90         4624 :       complex, allocatable :: work2_c(:)
      91              : 
      92         4624 :       type(t_mpimat):: ev_dist
      93              : 
      94              :       external iceil, numroc
      95              :       external dlamch
      96              : 
      97              : #ifdef FLEUR_USE_SCOREP
      98              : 
      99              :       SCOREP_RECORDING_OFF()
     100              : #endif
     101              :       select type (hmat)
     102              :       type IS (t_mpimat)
     103         4624 :          select type (smat)
     104              :          type IS (t_mpimat)
     105              : 
     106         4624 :             allocate (eig2(hmat%global_size1))
     107              : 
     108         4624 :             call MPI_COMM_RANK(hmat%blacsdata%mpi_com, myid, ierr)
     109         4624 :             call MPI_COMM_SIZE(hmat%blacsdata%mpi_com, np, ierr)
     110              : 
     111         4624 :             num = ne !no of states solved for
     112              : 
     113         4624 :             abstol = 2.0*dlamch('S') ! PDLAMCH gave an error on ZAMpano
     114              : 
     115         4624 :             call ev_dist%init(hmat)
     116              : 
     117              :             !smat%blacs_desc(2)    = hmat%blacs_desc(2)
     118              :             !ev_dist%blacs_desc(2) = hmat%blacs_desc(2)
     119              :             !smat%blacs_desc=hmat%blacs_desc
     120              :             !ev_dist%blacs_desc=hmat%blacs_desc
     121              : 
     122         4624 :             nb = hmat%blacsdata%blacs_desc(5)! Blocking factor
     123         4624 :             if (nb .ne. hmat%blacsdata%blacs_desc(6)) call judft_error("Different block sizes for rows/columns not supported")
     124              : 
     125              :             !
     126         4624 :             nn = max(max(hmat%global_size1, nb), 2)
     127         4624 :             np0 = numroc(nn, nb, 0, 0, hmat%blacsdata%nprow)
     128         4624 :             mq0 = numroc(max(max(ne, nb), 2), nb, 0, 0, hmat%blacsdata%npcol)
     129         4624 :             if (hmat%l_real) then
     130              :                lwork2 = 5*hmat%global_size1 + max(5*nn, np0*mq0 + 2*nb*nb) + &
     131         2710 :                         iceil(ne, hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
     132         2710 :                allocate (work2_r(lwork2 + 10*hmat%global_size1), stat=err) ! Allocate more in case of clusters
     133              :             else
     134         1914 :                lwork2 = hmat%global_size1 + max(nb*(np0 + 1), 3)
     135         1914 :                allocate (work2_c(lwork2), stat=err)
     136              :             end if
     137         4624 :             if (err .ne. 0) then
     138            0 :                WRITE(*,*) 'Error for k-point ', ikpt
     139            0 :                write (*, *) 'work2  :', err, lwork2
     140            0 :                call juDFT_error('Failed to allocated "work2"', calledby='chani')
     141              :             end if
     142              : 
     143         4624 :             liwork = 6*max(max(hmat%global_size1, hmat%blacsdata%nprow*hmat%blacsdata%npcol + 1), 4)
     144         4624 :             allocate (iwork(liwork), stat=err)
     145         4624 :             if (err .ne. 0) then
     146            0 :                WRITE(*,*) 'Error for k-point ', ikpt
     147            0 :                write (*, *) 'iwork  :', err, liwork
     148            0 :                call juDFT_error('Failed to allocated "iwork"', calledby='chani')
     149              :             end if
     150         4624 :             allocate (ifail(hmat%global_size1), stat=err)
     151         4624 :             if (err .ne. 0) then
     152            0 :                WRITE(*,*) 'Error for k-point ', ikpt
     153            0 :                write (*, *) 'ifail  :', err, hmat%global_size1
     154            0 :                call juDFT_error('Failed to allocated "ifail"', calledby='chani')
     155              :             end if
     156         4624 :             allocate (iclustr(2*hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err)
     157         4624 :             if (err .ne. 0) then
     158            0 :                WRITE(*,*) 'Error for k-point ', ikpt
     159            0 :                write (*, *) 'iclustr:', err, 2*hmat%blacsdata%nprow*hmat%blacsdata%npcol
     160            0 :                call juDFT_error('Failed to allocated "iclustr"', calledby='chani')
     161              :             end if
     162         4624 :             allocate (gap(hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err)
     163         4624 :             if (err .ne. 0) then
     164            0 :                WRITE(*,*) 'Error for k-point ', ikpt
     165            0 :                write (*, *) 'gap    :', err, hmat%blacsdata%nprow*hmat%blacsdata%npcol
     166            0 :                call juDFT_error('Failed to allocated "gap"', calledby='chani')
     167              :             end if
     168              :             !
     169              :             !     Compute size of workspace
     170              :             !
     171         4624 :             if (hmat%l_real) then
     172         2710 :                uplo = 'U'
     173              :                call pdsygvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_r, 1, 1, &
     174              :                             hmat%blacsdata%blacs_desc, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, &
     175              :                             0.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_r, 1, 1, &
     176         2710 :                             ev_dist%blacsdata%blacs_desc, work2_r, -1, iwork, -1, ifail, iclustr, gap, ierr)
     177         2710 :                if (work2_r(1) .gt. lwork2) then
     178         2710 :                   lwork2 = work2_r(1)
     179         2710 :                   deallocate (work2_r)
     180         2710 :                   allocate (work2_r(lwork2 + 20*hmat%global_size1), stat=err) ! Allocate even more in case of clusters
     181         2710 :                   if (err .ne. 0) then
     182            0 :                      WRITE(*,*) 'Error for k-point ', ikpt
     183            0 :                      write (*, *) 'work2  :', err, lwork2
     184            0 :                      call juDFT_error('Failed to allocated "work2"', calledby='chani')
     185              :                   end if
     186              :                end if
     187              :             else
     188              :                lrwork = 4*hmat%global_size1 + max(5*nn, np0*mq0) + &
     189         1914 :                         iceil(ne, hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
     190              :                ! Allocate more in case of clusters
     191         1914 :                allocate (rwork(lrwork + 10*hmat%global_size1), stat=ierr)
     192         1914 :                if (err /= 0) then
     193            0 :                   WRITE(*,*) 'Error for k-point ', ikpt
     194            0 :                   write (*, *) 'ERROR: chani.F: Allocating rwork failed'
     195            0 :                   call juDFT_error('Failed to allocated "rwork"', calledby='chani')
     196              :                end if
     197              : 
     198              :                call pzhegvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_c, 1, 1, &
     199              :                             hmat%blacsdata%blacs_desc, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
     200              :                             0.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_c, 1, 1, &
     201              :                             ev_dist%blacsdata%blacs_desc, work2_c, -1, rwork, -1, iwork, -1, ifail, iclustr, &
     202         1914 :                             gap, ierr)
     203         1914 :                if (abs(work2_c(1)) .gt. lwork2) then
     204         1914 :                   lwork2 = work2_c(1)
     205         1914 :                   deallocate (work2_c)
     206         1914 :                   allocate (work2_c(lwork2), stat=err)
     207         1914 :                   if (err /= 0) then
     208            0 :                      WRITE(*,*) 'Error for k-point ', ikpt
     209            0 :                      write (*, *) 'ERROR: chani.F: Allocating rwork failed:', lwork2
     210            0 :                      call juDFT_error('Failed to allocated "work2"', calledby='chani')
     211              :                   end if
     212              :                end if
     213         1914 :                if (rwork(1) .gt. lrwork) then
     214            0 :                   lrwork = rwork(1)
     215            0 :                   deallocate (rwork)
     216              :                   ! Allocate even more in case of clusters
     217            0 :                   allocate (rwork(lrwork + 20*hmat%global_size1), stat=err)
     218            0 :                   if (err /= 0) then
     219            0 :                      WRITE(*,*) 'Error for k-point ', ikpt
     220            0 :                      write (*, *) 'ERROR: chani.F: Allocating rwork failed: ', lrwork + 20*hmat%global_size1
     221            0 :                      call juDFT_error('Failed to allocated "rwork"', calledby='chani')
     222              :                   end if
     223              :                end if
     224              :             end if
     225         4624 :             if (iwork(1) .gt. liwork) then
     226            0 :                liwork = iwork(1)
     227            0 :                deallocate (iwork)
     228            0 :                allocate (iwork(liwork), stat=err)
     229            0 :                if (err /= 0) then
     230            0 :                   WRITE(*,*) 'Error for k-point ', ikpt
     231            0 :                   write (*, *) 'ERROR: chani.F: Allocating iwork failed: ', liwork
     232            0 :                   call juDFT_error('Failed to allocated "iwork"', calledby='chani')
     233              :                end if
     234              :             end if
     235              :             !
     236              :             !     Now solve generalized eigenvalue problem
     237              :             !
     238         4624 :             call timestart("SCALAPACK call")
     239         4624 :             if (hmat%l_real) then
     240              :                call pdsygvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_r, 1, 1, &
     241              :                             hmat%blacsdata%blacs_desc, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, &
     242              :                             1.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_r, 1, 1, &
     243              :                             ev_dist%blacsdata%blacs_desc, work2_r, lwork2, iwork, liwork, ifail, iclustr, &
     244         2710 :                             gap, ierr)
     245              :             else
     246              :                call pzhegvx(1, 'V', 'I', 'U', hmat%global_size1, hmat%data_c, 1, 1, &
     247              :                             hmat%blacsdata%blacs_desc, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
     248              :                             1.0, 1.0, 1, num, abstol, num1, num2, eig2, orfac, ev_dist%data_c, 1, 1, &
     249              :                             ev_dist%blacsdata%blacs_desc, work2_c, lwork2, rwork, lrwork, iwork, liwork, &
     250         1914 :                             ifail, iclustr, gap, ierr)
     251         1914 :                deallocate (rwork)
     252              :             end if
     253         4624 :             call timestop("SCALAPACK call")
     254         4624 :             if (ierr .ne. 0) then
     255              :                !IF (ierr /= 2) WRITE (oUnit,*) myid,' error in pzhegvx/pdsygvx, ierr=',ierr
     256              :                !IF (ierr <= 0) WRITE (oUnit,*) myid,' illegal input argument'
     257              :                if (mod(ierr, 2) /= 0) then
     258              :                   !WRITE (oUnit,*) myid,'some eigenvectors failed to converge'
     259              :                   eigs: do i = 1, ne
     260              :                      if (ifail(i) /= 0) then
     261              :                         !WRITE (oUnit,*) myid,' eigenvector',ifail(i), 'failed to converge'
     262              :                      else
     263              :                         exit eigs
     264              :                      end if
     265              :                   end do eigs
     266              :                   !CALL CPP_flush(oUnit)
     267              :                end if
     268              :                if (mod(ierr/4, 2) .ne. 0) then
     269              :                   !WRITE(oUnit,*) myid,' only',num2,' eigenvectors converged'
     270              :                   !CALL CPP_flush(oUnit)
     271              :                end if
     272            0 :                if (mod(ierr/8, 2) .ne. 0) then
     273              :                   !WRITE(oUnit,*) myid,' PDSTEBZ failed to compute eigenvalues'
     274            0 :                   WRITE(*,*) 'Warning for k-point ', ikpt
     275            0 :                   call judft_warn("SCALAPACK failed to solve eigenvalue problem", calledby="scalapack.f90")
     276              :                end if
     277            0 :                if (mod(ierr/16, 2) .ne. 0) then
     278            0 :                   WRITE(*,*) 'Warning for k-point ', ikpt
     279              :                   !WRITE(oUnit,*) myid,' B was not positive definite, Cholesky failed at',ifail(1)
     280              :                   call judft_warn("SCALAPACK failed: B was not positive definite. "//new_line("a")// &
     281              :                                   "Order of the smallest minor which is not positive definite:"//int2str(ifail(1)) &
     282            0 :                                   , calledby="scalapack.f90")
     283              :                end if
     284              :             end if
     285         4624 :             if (num2 < num1) then
     286              :                !IF (myid ==0) THEN
     287            0 :                write (oUnit, *) 'Not all eigenvalues wanted are found'
     288            0 :                write (oUnit, *) 'number of eigenvalues/vectors wanted', num1
     289            0 :                write (oUnit, *) 'number of eigenvalues/vectors found', num2
     290              :                !CALL CPP_flush(oUnit)
     291              :                !ENDIF
     292              :             end if
     293              :             !
     294              :             !     Each process has all eigenvalues in output
     295       195930 :             eig(:num2) = eig2(:num2)
     296         4624 :             deallocate (eig2)
     297              :             !
     298              :             !
     299              :             !     Redistribute eigenvectors  from ScaLAPACK distribution to each process, i.e. for
     300              :             !     process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
     301              :             !     Only num=num2/np eigenvectors per process
     302              :             !
     303         4624 :             num = floor(real(num2)/np)
     304         4624 :             if (myid .lt. num2 - (num2/np)*np) num = num + 1
     305         4624 :             ne = 0
     306         4624 :             do i = myid + 1, num2, np
     307        95653 :                ne = ne + 1
     308              :                !eig(ne)=eig2(i)
     309              :             end do
     310         4624 :             allocate (t_mpimat::zmat)
     311              :             call zmat%init(ev_dist%l_real, ev_dist%global_size1, ev_dist%global_size1, &
     312         4624 :                            ev_dist%blacsdata%mpi_com, MPIMAT_ROWCYCLIC)
     313        13872 :             call zmat%copy(ev_dist, 1, 1)
     314              :          class DEFAULT
     315            0 :             WRITE(*,*) 'Error for k-point ', ikpt
     316            0 :             call judft_error("Wrong type (1) in scalapack")
     317              :          end select
     318              :       class DEFAULT
     319            0 :          WRITE(*,*) 'Error for k-point ', ikpt
     320            0 :          call judft_error("Wrong type (2) in scalapack")
     321              :       end select
     322         4624 :       call ev_dist%free()
     323              : #ifdef FLEUR_USE_SCOREP
     324              :       SCOREP_RECORDING_ON()
     325              : #endif
     326              : #endif
     327         4624 :    end subroutine scalapack_gev
     328              : 
     329            0 :    subroutine scalapack_reduction(self, hmat, smat)
     330              :       !Simple driver to transform Generalized Eigenvalue Problem to Standard problem using LAPACK routine
     331              : 
     332              :       class(t_solver_scalapack) :: self
     333              :       class(t_mat), intent(INOUT)  :: hmat, smat
     334              :       integer            :: info, n
     335              : 
     336              : #ifdef CPP_SCALAPACK
     337              :       real :: scale
     338              :       select type (hmat)
     339              :       type is (t_mpimat)
     340            0 :          select type (smat)
     341              :          type is (t_mpimat)
     342              :             !Transform to standard problem using SCALAPACK
     343            0 :             if (hmat%l_real) then
     344            0 :                call pdpotrf('U', smat%global_size1, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, info)
     345            0 :                if (info .ne. 0) then
     346            0 :                   write (*, *) 'Error in pdpotrf: info =', info
     347            0 :                   call juDFT_error("1 Reduction failed", calledby="scalapack_reduction")
     348              :                end if
     349              :                call pdsygst(1, 'U', smat%global_size1, hmat%data_r, 1, 1, hmat%blacsdata%blacs_desc, &
     350            0 :                             smat%data_r, 1, 1, smat%blacsdata%blacs_desc, scale, info)
     351            0 :                if (abs(scale - 1) > 1e-10) call judft_error("Scale parameter not implemented in scalapack_reduction")
     352            0 :                if (info .ne. 0) then
     353            0 :                   write (oUnit, *) 'Error in pdsygst: info =', info
     354            0 :                   call juDFT_error("2 Reduction failed", calledby="scalapack_reduction")
     355              :                end if
     356              :             else
     357            0 :                call pzpotrf('U', smat%global_size1, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, info)
     358            0 :                if (info .ne. 0) then
     359            0 :                   write (*, *) 'Error in pzpotrf: info =', info
     360            0 :                   call juDFT_error("3 Reduction failed", calledby="scalapack_reduction")
     361              :                end if
     362              :                call pzhegst(1, 'U', smat%global_size1, hmat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
     363            0 :                             smat%data_c, 1, 1, smat%blacsdata%blacs_desc, scale, info)
     364            0 :                if (abs(scale - 1) > 1e-10) call judft_error("Scale parameter not implemented in scalapack_reduction")
     365            0 :                if (info .ne. 0) then
     366            0 :                   write (oUnit, *) 'Error in pzhegst: info =', info
     367            0 :                   call juDFT_error("4 Reduction failed", calledby="scalapack_reduction")
     368              :                end if
     369              :             end if
     370              :          class default
     371            0 :             call judft_bug("Wrong matrix type in scalapack")
     372              :          end select
     373              :       class default
     374            0 :          call judft_bug("Wrong matrix type in scalapack")
     375              :       end select
     376              : #endif
     377            0 :    end subroutine
     378              : 
     379            0 :    subroutine scalapack_recover(self, smat, zmat)
     380              : 
     381              :       class(t_solver_scalapack) :: self
     382              :       class(t_mat), intent(INOUT)  :: zmat, smat
     383              :       integer :: m, n, info
     384              : #ifdef CPP_SCALAPACK
     385              : 
     386              :       select type (smat)
     387              :       type is (t_mpimat)
     388            0 :          select type (zmat)
     389              :          type is (t_mpimat)
     390            0 :             n = smat%global_size1
     391            0 :             m = zmat%global_size2
     392              :             ! --> recover the generalized eigenvectors z by solving z' = l^t * z
     393            0 :             if (smat%l_real) then
     394              :                call pdtrtrs('U', 'N', 'N', n, m, smat%data_r, 1, 1, smat%blacsdata%blacs_desc, &
     395            0 :                             zmat%data_r, 1, 1, zmat%blacsdata%blacs_desc, info)
     396              :             else
     397              :                call pztrtrs('U', 'N', 'N', n, m, smat%data_c, 1, 1, smat%blacsdata%blacs_desc, &
     398            0 :                             zmat%data_c, 1, 1, zmat%blacsdata%blacs_desc, info)
     399              :             end if
     400            0 :             if (info .ne. 0) then
     401            0 :                write (oUnit, *) 'Error in p?trtrs: info =', info
     402            0 :                call juDFT_error("Recovery failed", calledby="scalapack_recover")
     403              :             end if
     404              :          class default
     405            0 :             call judft_bug("Wrong matrix type in scalapack")
     406              :          end select
     407              :       class default
     408            0 :          call judft_bug("Wrong matrix type in scalapack")
     409              :       end select
     410              : #endif
     411            0 :    end subroutine
     412              : 
     413         9248 : end module m_scalapack
        

Generated by: LCOV version 2.0-1