LCOV - code coverage report
Current view: top level - diagonalization - scalapack.F90 (source / functions) Hit Total Coverage
Test: FLEUR test coverage Lines: 77 112 68.8 %
Date: 2024-04-26 04:44:34 Functions: 1 1 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : #ifdef FLEUR_USE_SCOREP
       7             : #include 'scorep/SCOREP_User.inc'
       8             : #endif
       9             : MODULE m_scalapack
      10             : CONTAINS
      11        4872 :   SUBROUTINE scalapack(hmat,smat,ne,eig,ev)
      12             :     !
      13             :     !----------------------------------------------------
      14             :     !- Parallel eigensystem solver - driver routine; gb99
      15             :     !  Uses the SCALAPACK for the actual diagonalization
      16             :     !
      17             :     ! hmat ..... Hamiltonian matrix
      18             :     ! smat ..... overlap matrix
      19             :     ! ne ....... number of ev's searched (and found) on this node
      20             :     !            On input, overall number of ev's searched,
      21             :     !            On output, local number of ev's found
      22             :     ! eig ...... all eigenvalues, output
      23             :     ! ev ....... local eigenvectors, output
      24             :     !
      25             :     !----------------------------------------------------
      26             :     !
      27             :     USE m_juDFT
      28             :     USE m_constants
      29             :     USE m_types_mpimat
      30             :     USE m_types_mat
      31             : #ifdef CPP_MPI
      32             :     USE mpi
      33             : #endif
      34             :     IMPLICIT NONE
      35             :     CLASS(t_mat),INTENT(INOUT)    :: hmat,smat
      36             :     CLASS(t_mat),ALLOCATABLE,INTENT(OUT)::ev
      37             :     REAL,INTENT(out)              :: eig(:)
      38             :     INTEGER,INTENT(INOUT)         :: ne
      39             :     
      40             :     
      41             : #ifdef CPP_SCALAPACK
      42             :     !...  Local variables
      43             :     !
      44             :     INTEGER i , ierr, err
      45        4872 :     INTEGER, ALLOCATABLE :: iwork(:)
      46        4872 :     REAL,    ALLOCATABLE :: rwork(:)
      47             :     INTEGER              :: lrwork
      48             : 
      49             :     !
      50             :     !  ScaLAPACK things
      51             :     CHARACTER (len=1)    :: uplo
      52             :     INTEGER              :: num,num1,num2,liwork,lwork2,np0,mq0,np,myid
      53             :     INTEGER              :: iceil, numroc, nn, nb
      54        4872 :     INTEGER, ALLOCATABLE :: ifail(:), iclustr(:)
      55             :     REAL                 :: abstol,orfac=1.E-4,dlamch
      56        4872 :     REAL,ALLOCATABLE     :: eig2(:), gap(:)
      57        4872 :     REAL,    ALLOCATABLE :: work2_r(:)
      58        4872 :     COMPLEX, ALLOCATABLE :: work2_c(:)
      59             : 
      60        4872 :     TYPE(t_mpimat):: ev_dist
      61             :     
      62             :     EXTERNAL iceil, numroc
      63             :     EXTERNAL dlamch
      64             : 
      65             : 
      66             : #ifdef FLEUR_USE_SCOREP
      67             : 
      68             :     SCOREP_RECORDING_OFF()    
      69             : #endif    
      70             :     SELECT TYPE(hmat)
      71             :     TYPE IS (t_mpimat)
      72        4872 :     SELECT TYPE(smat)
      73             :     TYPE IS (t_mpimat)
      74             : 
      75       14616 :     ALLOCATE(eig2(hmat%global_size1))
      76             : 
      77             : 
      78        4872 :     CALL MPI_COMM_RANK(hmat%blacsdata%mpi_com,myid,ierr)
      79        4872 :     CALL MPI_COMM_SIZE(hmat%blacsdata%mpi_com,np,ierr)
      80             : 
      81        4872 :     num=ne !no of states solved for
      82             :     
      83        4872 :     abstol=2.0*dlamch('S') ! PDLAMCH gave an error on ZAMpano
      84             : 
      85        4872 :     CALL ev_dist%init(hmat)
      86             : 
      87             :     !smat%blacs_desc(2)    = hmat%blacs_desc(2)
      88             :     !ev_dist%blacs_desc(2) = hmat%blacs_desc(2)
      89             :     !smat%blacs_desc=hmat%blacs_desc
      90             :     !ev_dist%blacs_desc=hmat%blacs_desc
      91             :     
      92             :     
      93        4872 :     nb=hmat%blacsdata%blacs_desc(5)! Blocking factor
      94        4872 :     IF (nb.NE.hmat%blacsdata%blacs_desc(6)) CALL judft_error("Different block sizes for rows/columns not supported")
      95             :   
      96             :     !
      97        4872 :     nn=MAX(MAX(hmat%global_size1,nb),2)
      98        4872 :     np0=numroc(nn,nb,0,0,hmat%blacsdata%nprow)
      99        4872 :     mq0=numroc(MAX(MAX(ne,nb),2),nb,0,0,hmat%blacsdata%npcol)
     100        4872 :     IF (hmat%l_real) THEN
     101        2704 :        lwork2=5*hmat%global_size1+MAX(5*nn,np0*mq0+2*nb*nb)+ iceil(ne,hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
     102        8112 :        ALLOCATE ( work2_r(lwork2+10*hmat%global_size1), stat=err ) ! Allocate more in case of clusters
     103             :     ELSE
     104        2168 :        lwork2=hmat%global_size1+MAX(nb*(np0+1),3)
     105        6504 :        ALLOCATE ( work2_c(lwork2), stat=err )
     106             :     ENDIF
     107        4872 :     IF (err.NE.0) THEN 
     108           0 :        WRITE (*,*) 'work2  :',err,lwork2
     109           0 :        CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
     110             :     ENDIF
     111             :     
     112        4872 :     liwork=6*MAX(MAX(hmat%global_size1,hmat%blacsdata%nprow*hmat%blacsdata%npcol+1),4)
     113       14616 :     ALLOCATE ( iwork(liwork), stat=err )
     114        4872 :     IF (err.NE.0) THEN
     115           0 :        WRITE (*,*) 'iwork  :',err,liwork
     116           0 :        CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
     117             :     ENDIF
     118       14616 :     ALLOCATE ( ifail(hmat%global_size1), stat=err )
     119        4872 :     IF (err.NE.0) THEN
     120           0 :        WRITE (*,*) 'ifail  :',err,hmat%global_size1
     121           0 :        CALL juDFT_error('Failed to allocated "ifail"', calledby ='chani')
     122             :     ENDIF
     123       14616 :     ALLOCATE ( iclustr(2*hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err )
     124        4872 :     IF (err.NE.0) THEN
     125           0 :        WRITE (*,*) 'iclustr:',err,2*hmat%blacsdata%nprow*hmat%blacsdata%npcol
     126           0 :        CALL juDFT_error('Failed to allocated "iclustr"', calledby ='chani')
     127             :     ENDIF
     128       14616 :     ALLOCATE ( gap(hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err )
     129        4872 :     IF (err.NE.0) THEN
     130           0 :        WRITE (*,*) 'gap    :',err,hmat%blacsdata%nprow*hmat%blacsdata%npcol
     131           0 :        CALL juDFT_error('Failed to allocated "gap"', calledby ='chani')
     132             :     ENDIF
     133             :     !
     134             :     !     Compute size of workspace
     135             :     !
     136        4872 :     IF (hmat%l_real) THEN
     137        2704 :        uplo='U'
     138             :        CALL pdsygvx(1,'V','I','U',hmat%global_size1,hmat%data_r,1,1,&
     139             :             hmat%blacsdata%blacs_desc,smat%data_r,1,1,smat%blacsdata%blacs_desc,&
     140             :             0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_r,1,1,&
     141        2704 :             ev_dist%blacsdata%blacs_desc,work2_r,-1,iwork,-1,ifail,iclustr, gap,ierr)
     142        2704 :        IF ( work2_r(1).GT.lwork2) THEN
     143        2704 :           lwork2 = work2_r(1)
     144        2704 :           DEALLOCATE (work2_r)
     145        8112 :           ALLOCATE ( work2_r(lwork2+20*hmat%global_size1), stat=err ) ! Allocate even more in case of clusters
     146        2704 :           IF (err.NE.0) THEN
     147           0 :              WRITE (*,*) 'work2  :',err,lwork2
     148           0 :              CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
     149             :           ENDIF
     150             :        ENDIF
     151             :     ELSE
     152        2168 :        lrwork=4*hmat%global_size1+MAX(5*nn,np0*mq0)+ iceil(ne,hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
     153             :        ! Allocate more in case of clusters
     154        6504 :        ALLOCATE(rwork(lrwork+10*hmat%global_size1), stat=ierr)
     155        2168 :        IF (err /= 0) THEN
     156           0 :           WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed'
     157           0 :           CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
     158             :        ENDIF
     159             :        
     160             :        CALL pzhegvx(1,'V','I','U',hmat%global_size1,hmat%data_c,1,1,&
     161             :             hmat%blacsdata%blacs_desc,smat%data_c,1,1, smat%blacsdata%blacs_desc,&
     162             :             0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_c,1,1,&
     163             :             ev_dist%blacsdata%blacs_desc,work2_c,-1,rwork,-1,iwork,-1,ifail,iclustr,&
     164        2168 :             gap,ierr)
     165        2168 :        IF (ABS(work2_c(1)).GT.lwork2) THEN
     166        2168 :           lwork2=work2_c(1)
     167        2168 :           DEALLOCATE (work2_c)
     168        6504 :           ALLOCATE (work2_c(lwork2), stat=err)
     169        2168 :           IF (err /= 0) THEN
     170           0 :              WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed:',lwork2
     171           0 :              CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
     172             :           ENDIF
     173             :        ENDIF
     174        2168 :        IF (rwork(1).GT.lrwork) THEN
     175           0 :           lrwork=rwork(1)
     176           0 :           DEALLOCATE(rwork)
     177             :           ! Allocate even more in case of clusters
     178           0 :           ALLOCATE (rwork(lrwork+20*hmat%global_size1), stat=err)
     179           0 :           IF (err /= 0) THEN
     180           0 :              WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed: ', lrwork+20*hmat%global_size1
     181           0 :              CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
     182             :           ENDIF
     183             :        ENDIF
     184             :     endif
     185        4872 :     IF (iwork(1) .GT. liwork) THEN
     186           0 :        liwork = iwork(1)
     187           0 :        DEALLOCATE (iwork)
     188           0 :        ALLOCATE (iwork(liwork), stat=err)
     189           0 :        IF (err /= 0) THEN
     190           0 :           WRITE (*,*) 'ERROR: chani.F: Allocating iwork failed: ',liwork
     191           0 :           CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
     192             :        ENDIF
     193             :     ENDIF
     194             :     !
     195             :     !     Now solve generalized eigenvalue problem
     196             :     !
     197        4872 :     CALL timestart("SCALAPACK call")
     198        4872 :     if (hmat%l_real) THEN
     199             :        CALL pdsygvx(1,'V','I','U',hmat%global_size1,hmat%data_r,1,1,hmat%blacsdata%blacs_desc,smat%data_r,1,1, smat%blacsdata%blacs_desc,&
     200             :             1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_r,1,1,&
     201             :             ev_dist%blacsdata%blacs_desc,work2_r,lwork2,iwork,liwork,ifail,iclustr,&
     202        2704 :             gap,ierr)
     203             :     else
     204             :        CALL pzhegvx(1,'V','I','U',hmat%global_size1,hmat%data_c,1,1,hmat%blacsdata%blacs_desc,smat%data_c,1,1, smat%blacsdata%blacs_desc,&
     205             :             1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_c,1,1,&
     206             :             ev_dist%blacsdata%blacs_desc,work2_c,lwork2,rwork,lrwork,iwork,liwork,&
     207        2168 :             ifail,iclustr,gap,ierr)
     208        2168 :        DEALLOCATE(rwork)
     209             :     endif
     210        4872 :     CALL timestop("SCALAPACK call")
     211        4872 :     IF (ierr .NE. 0) THEN
     212             :        !IF (ierr /= 2) WRITE (oUnit,*) myid,' error in pzhegvx/pdsygvx, ierr=',ierr
     213             :        !IF (ierr <= 0) WRITE (oUnit,*) myid,' illegal input argument'
     214             :        IF (MOD(ierr,2) /= 0) THEN
     215             :           !WRITE (oUnit,*) myid,'some eigenvectors failed to converge'
     216             :           eigs: DO i = 1, ne
     217             :              IF (ifail(i) /= 0) THEN
     218             :                 !WRITE (oUnit,*) myid,' eigenvector',ifail(i), 'failed to converge'
     219             :              ELSE
     220             :                 EXIT eigs
     221             :              ENDIF
     222             :           ENDDO eigs
     223             :           !CALL CPP_flush(oUnit)
     224             :        ENDIF
     225             :        IF (MOD(ierr/4,2).NE.0) THEN
     226             :           !WRITE(oUnit,*) myid,' only',num2,' eigenvectors converged'
     227             :           !CALL CPP_flush(oUnit)
     228             :        ENDIF
     229          82 :        IF (MOD(ierr/8,2).NE.0) THEN
     230             :           !WRITE(oUnit,*) myid,' PDSTEBZ failed to compute eigenvalues'
     231           0 :           CALL judft_warn("SCALAPACK failed to solve eigenvalue problem",calledby="scalapack.f90")
     232             :        ENDIF
     233          82 :        IF (MOD(ierr/16,2).NE.0) THEN
     234             :           !WRITE(oUnit,*) myid,' B was not positive definite, Cholesky failed at',ifail(1)
     235             :           CALL judft_warn("SCALAPACK failed: B was not positive definite. " // new_line("a") // &
     236             :                           "Order of the smallest minor which is not positive definite:" // int2str(ifail(1))&
     237           0 :                          ,calledby="scalapack.f90")
     238             :        ENDIF
     239             :     ENDIF
     240        4872 :     IF (num2 < num1) THEN
     241             :        !IF (myid ==0) THEN
     242           0 :           WRITE(oUnit,*) 'Not all eigenvalues wanted are found'
     243           0 :           WRITE(oUnit,*) 'number of eigenvalues/vectors wanted',num1
     244           0 :           WRITE(oUnit,*) 'number of eigenvalues/vectors found',num2
     245             :           !CALL CPP_flush(oUnit)
     246             :        !ENDIF
     247             :     ENDIF
     248             :     !
     249             :     !     Each process has all eigenvalues in output
     250      217432 :     eig(:num2) = eig2(:num2)    
     251        4872 :     DEALLOCATE(eig2)
     252             :     !
     253             :     !
     254             :     !     Redistribute eigenvectors  from ScaLAPACK distribution to each process, i.e. for
     255             :     !     process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
     256             :     !     Only num=num2/np eigenvectors per process
     257             :     !
     258        4872 :     num=FLOOR(REAL(num2)/np)
     259        4872 :     IF (myid.LT.num2-(num2/np)*np) num=num+1
     260        4872 :     ne=0
     261        4872 :     DO i=myid+1,num2,np
     262      106280 :        ne=ne+1
     263             :        !eig(ne)=eig2(i)
     264             :     ENDDO
     265        4872 :     ALLOCATE(t_mpimat::ev)
     266        4872 :     CALL ev%init(ev_dist%l_real,ev_dist%global_size1,ev_dist%global_size1,ev_dist%blacsdata%mpi_com,.FALSE.)
     267       14616 :     CALL ev%copy(ev_dist,1,1)
     268             :     CLASS DEFAULT
     269           0 :       call judft_error("Wrong type (1) in scalapack")
     270             :     END SELECT
     271             :     CLASS DEFAULT
     272           0 :       call judft_error("Wrong type (2) in scalapack")
     273             :     END SELECT
     274        4872 :     call ev_dist%free()
     275             : #ifdef FLEUR_USE_SCOREP
     276             :     SCOREP_RECORDING_ON()
     277             : #endif    
     278             : #endif
     279        4872 :   END SUBROUTINE scalapack
     280        4872 : END MODULE m_scalapack

Generated by: LCOV version 1.14