LCOV - code coverage report
Current view: top level - diagonalization - scalapack.F90 (source / functions) Hit Total Coverage
Test: combined.info Lines: 0 111 0.0 %
Date: 2019-09-08 04:53:50 Functions: 0 1 0.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : MODULE m_scalapack
       8             : CONTAINS
       9           0 :   SUBROUTINE scalapack(hmat,smat,ne,eig,ev)
      10             :     !
      11             :     !----------------------------------------------------
      12             :     !- Parallel eigensystem solver - driver routine; gb99
      13             :     !  Uses the SCALAPACK for the actual diagonalization
      14             :     !
      15             :     ! hmat ..... Hamiltonian matrix
      16             :     ! smat ..... overlap matrix
      17             :     ! ne ....... number of ev's searched (and found) on this node
      18             :     !            On input, overall number of ev's searched,
      19             :     !            On output, local number of ev's found
      20             :     ! eig ...... all eigenvalues, output
      21             :     ! ev ....... local eigenvectors, output
      22             :     !
      23             :     !----------------------------------------------------
      24             :     !
      25             : #include"cpp_double.h"
      26             :     USE m_juDFT
      27             :     USE m_types_mpimat
      28             :     USE m_types_mat
      29             :     IMPLICIT NONE
      30             :     CLASS(t_mat),INTENT(INOUT)    :: hmat,smat
      31             :     CLASS(t_mat),ALLOCATABLE,INTENT(OUT)::ev
      32             :     REAL,INTENT(out)              :: eig(:)
      33             :     INTEGER,INTENT(INOUT)         :: ne
      34             :     
      35             :     
      36             : #ifdef CPP_SCALAPACK
      37             : #ifdef CPP_MPI    
      38             :     INCLUDE 'mpif.h'
      39             : #endif
      40             :     !...  Local variables
      41             :     !
      42             :     INTEGER i , ierr, err
      43           0 :     INTEGER, ALLOCATABLE :: iwork(:)
      44           0 :     REAL,    ALLOCATABLE :: rwork(:)
      45             :     INTEGER              :: lrwork
      46             : 
      47             :     !
      48             :     !  ScaLAPACK things
      49             :     CHARACTER (len=1)    :: uplo
      50             :     INTEGER              :: num,num1,num2,liwork,lwork2,np0,mq0,np,myid
      51             :     INTEGER              :: iceil, numroc, nn, nb
      52           0 :     INTEGER, ALLOCATABLE :: ifail(:), iclustr(:)
      53             :     REAL                 :: abstol,orfac=1.E-4,CPP_LAPACK_slamch
      54           0 :     REAL,ALLOCATABLE     :: eig2(:), gap(:)
      55           0 :     REAL,    ALLOCATABLE :: work2_r(:)
      56           0 :     COMPLEX, ALLOCATABLE :: work2_c(:)
      57             : 
      58           0 :     TYPE(t_mpimat):: ev_dist
      59             :     
      60             :     EXTERNAL iceil, numroc
      61             :     EXTERNAL CPP_LAPACK_slamch
      62             :     
      63             :     
      64             :     SELECT TYPE(hmat)
      65             :     TYPE IS (t_mpimat)
      66           0 :     SELECT TYPE(smat)
      67             :     TYPE IS (t_mpimat)
      68             : 
      69           0 :     ALLOCATE(eig2(hmat%global_size1))
      70             : 
      71             : 
      72           0 :     CALL MPI_COMM_RANK(hmat%blacsdata%mpi_com,myid,ierr)
      73           0 :     CALL MPI_COMM_SIZE(hmat%blacsdata%mpi_com,np,ierr)
      74             : 
      75           0 :     num=ne !no of states solved for
      76             :     
      77           0 :     abstol=2.0*CPP_LAPACK_slamch('S') ! PDLAMCH gave an error on ZAMpano
      78             : 
      79           0 :     CALL ev_dist%init(hmat)
      80             : 
      81             :     !smat%blacs_desc(2)    = hmat%blacs_desc(2)
      82             :     !ev_dist%blacs_desc(2) = hmat%blacs_desc(2)
      83             :     !smat%blacs_desc=hmat%blacs_desc
      84             :     !ev_dist%blacs_desc=hmat%blacs_desc
      85             :     
      86             :     
      87           0 :     nb=hmat%blacsdata%blacs_desc(5)! Blocking factor
      88           0 :     IF (nb.NE.hmat%blacsdata%blacs_desc(6)) CALL judft_error("Different block sizes for rows/columns not supported")
      89             :   
      90             :     !
      91           0 :     nn=MAX(MAX(hmat%global_size1,nb),2)
      92           0 :     np0=numroc(nn,nb,0,0,hmat%blacsdata%nprow)
      93           0 :     mq0=numroc(MAX(MAX(ne,nb),2),nb,0,0,hmat%blacsdata%npcol)
      94           0 :     IF (hmat%l_real) THEN
      95           0 :        lwork2=5*hmat%global_size1+MAX(5*nn,np0*mq0+2*nb*nb)+ iceil(ne,hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
      96           0 :        ALLOCATE ( work2_r(lwork2+10*hmat%global_size1), stat=err ) ! Allocate more in case of clusters
      97             :     ELSE
      98           0 :        lwork2=hmat%global_size1+MAX(nb*(np0+1),3)
      99           0 :        ALLOCATE ( work2_c(lwork2), stat=err )
     100             :     ENDIF
     101           0 :     IF (err.NE.0) THEN 
     102           0 :        WRITE (*,*) 'work2  :',err,lwork2
     103           0 :        CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
     104             :     ENDIF
     105             :     
     106           0 :     liwork=6*MAX(MAX(hmat%global_size1,hmat%blacsdata%nprow*hmat%blacsdata%npcol+1),4)
     107           0 :     ALLOCATE ( iwork(liwork), stat=err )
     108           0 :     IF (err.NE.0) THEN
     109           0 :        WRITE (*,*) 'iwork  :',err,liwork
     110           0 :        CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
     111             :     ENDIF
     112           0 :     ALLOCATE ( ifail(hmat%global_size1), stat=err )
     113           0 :     IF (err.NE.0) THEN
     114           0 :        WRITE (*,*) 'ifail  :',err,hmat%global_size1
     115           0 :        CALL juDFT_error('Failed to allocated "ifail"', calledby ='chani')
     116             :     ENDIF
     117           0 :     ALLOCATE ( iclustr(2*hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err )
     118           0 :     IF (err.NE.0) THEN
     119           0 :        WRITE (*,*) 'iclustr:',err,2*hmat%blacsdata%nprow*hmat%blacsdata%npcol
     120           0 :        CALL juDFT_error('Failed to allocated "iclustr"', calledby ='chani')
     121             :     ENDIF
     122           0 :     ALLOCATE ( gap(hmat%blacsdata%nprow*hmat%blacsdata%npcol), stat=err )
     123           0 :     IF (err.NE.0) THEN
     124           0 :        WRITE (*,*) 'gap    :',err,hmat%blacsdata%nprow*hmat%blacsdata%npcol
     125           0 :        CALL juDFT_error('Failed to allocated "gap"', calledby ='chani')
     126             :     ENDIF
     127             :     !
     128             :     !     Compute size of workspace
     129             :     !
     130           0 :     IF (hmat%l_real) THEN
     131           0 :        uplo='U'
     132             :        CALL CPP_LAPACK_pdsygvx(1,'V','I','U',hmat%global_size1,hmat%data_r,1,1,&
     133             :             hmat%blacsdata%blacs_desc,smat%data_r,1,1,smat%blacsdata%blacs_desc,&
     134             :             0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_r,1,1,&
     135           0 :             ev_dist%blacsdata%blacs_desc,work2_r,-1,iwork,-1,ifail,iclustr, gap,ierr)
     136           0 :        IF ( work2_r(1).GT.lwork2) THEN
     137           0 :           lwork2 = work2_r(1)
     138           0 :           DEALLOCATE (work2_r)
     139           0 :           ALLOCATE ( work2_r(lwork2+20*hmat%global_size1), stat=err ) ! Allocate even more in case of clusters
     140           0 :           IF (err.NE.0) THEN
     141           0 :              WRITE (*,*) 'work2  :',err,lwork2
     142           0 :              CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
     143             :           ENDIF
     144             :        ENDIF
     145             :     ELSE
     146           0 :        lrwork=4*hmat%global_size1+MAX(5*nn,np0*mq0)+ iceil(ne,hmat%blacsdata%nprow*hmat%blacsdata%npcol)*nn
     147             :        ! Allocate more in case of clusters
     148           0 :        ALLOCATE(rwork(lrwork+10*hmat%global_size1), stat=ierr)
     149           0 :        IF (err /= 0) THEN
     150           0 :           WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed'
     151           0 :           CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
     152             :        ENDIF
     153             :        
     154             :        CALL CPP_LAPACK_pzhegvx(1,'V','I','U',hmat%global_size1,hmat%data_c,1,1,&
     155             :             hmat%blacsdata%blacs_desc,smat%data_c,1,1, smat%blacsdata%blacs_desc,&
     156             :             0.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_c,1,1,&
     157             :             ev_dist%blacsdata%blacs_desc,work2_c,-1,rwork,-1,iwork,-1,ifail,iclustr,&
     158           0 :             gap,ierr)
     159           0 :        IF (ABS(work2_c(1)).GT.lwork2) THEN
     160           0 :           lwork2=work2_c(1)
     161           0 :           DEALLOCATE (work2_c)
     162           0 :           ALLOCATE (work2_c(lwork2), stat=err)
     163           0 :           IF (err /= 0) THEN
     164           0 :              WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed:',lwork2
     165           0 :              CALL juDFT_error('Failed to allocated "work2"', calledby ='chani')
     166             :           ENDIF
     167             :        ENDIF
     168           0 :        IF (rwork(1).GT.lrwork) THEN
     169           0 :           lrwork=rwork(1)
     170           0 :           DEALLOCATE(rwork)
     171             :           ! Allocate even more in case of clusters
     172           0 :           ALLOCATE (rwork(lrwork+20*hmat%global_size1), stat=err)
     173           0 :           IF (err /= 0) THEN
     174           0 :              WRITE (*,*) 'ERROR: chani.F: Allocating rwork failed: ', lrwork+20*hmat%global_size1
     175           0 :              CALL juDFT_error('Failed to allocated "rwork"', calledby ='chani')
     176             :           ENDIF
     177             :        ENDIF
     178             :     endif
     179           0 :     IF (iwork(1) .GT. liwork) THEN
     180           0 :        liwork = iwork(1)
     181           0 :        DEALLOCATE (iwork)
     182           0 :        ALLOCATE (iwork(liwork), stat=err)
     183           0 :        IF (err /= 0) THEN
     184           0 :           WRITE (*,*) 'ERROR: chani.F: Allocating iwork failed: ',liwork
     185           0 :           CALL juDFT_error('Failed to allocated "iwork"', calledby ='chani')
     186             :        ENDIF
     187             :     ENDIF
     188             :     !
     189             :     !     Now solve generalized eigenvalue problem
     190             :     !
     191           0 :     CALL timestart("SCALAPACK call")
     192           0 :     if (hmat%l_real) THEN
     193             :        CALL CPP_LAPACK_pdsygvx(1,'V','I','U',hmat%global_size1,hmat%data_r,1,1,hmat%blacsdata%blacs_desc,smat%data_r,1,1, smat%blacsdata%blacs_desc,&
     194             :             1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_r,1,1,&
     195             :             ev_dist%blacsdata%blacs_desc,work2_r,lwork2,iwork,liwork,ifail,iclustr,&
     196           0 :             gap,ierr)
     197             :     else
     198             :        CALL CPP_LAPACK_pzhegvx(1,'V','I','U',hmat%global_size1,hmat%data_c,1,1,hmat%blacsdata%blacs_desc,smat%data_c,1,1, smat%blacsdata%blacs_desc,&
     199             :             1.0,1.0,1,num,abstol,num1,num2,eig2,orfac,ev_dist%data_c,1,1,&
     200             :             ev_dist%blacsdata%blacs_desc,work2_c,lwork2,rwork,lrwork,iwork,liwork,&
     201           0 :             ifail,iclustr,gap,ierr)
     202           0 :        DEALLOCATE(rwork)
     203             :     endif
     204           0 :     CALL timestop("SCALAPACK call")
     205           0 :     IF (ierr .NE. 0) THEN
     206             :        !IF (ierr /= 2) WRITE (6,*) myid,' error in pzhegvx/pdsygvx, ierr=',ierr
     207             :        !IF (ierr <= 0) WRITE (6,*) myid,' illegal input argument'
     208             :        IF (MOD(ierr,2) /= 0) THEN
     209             :           !WRITE (6,*) myid,'some eigenvectors failed to converge'
     210             :           eigs: DO i = 1, ne
     211             :              IF (ifail(i) /= 0) THEN
     212             :                 !WRITE (6,*) myid,' eigenvector',ifail(i), 'failed to converge'
     213             :              ELSE
     214             :                 EXIT eigs
     215             :              ENDIF
     216             :           ENDDO eigs
     217             :           !CALL CPP_flush(6)
     218             :        ENDIF
     219             :        IF (MOD(ierr/4,2).NE.0) THEN
     220             :           !WRITE(6,*) myid,' only',num2,' eigenvectors converged'
     221             :           !CALL CPP_flush(6)
     222             :        ENDIF
     223           0 :        IF (MOD(ierr/8,2).NE.0) THEN
     224             :           !WRITE(6,*) myid,' PDSTEBZ failed to compute eigenvalues'
     225           0 :           CALL judft_warn("SCALAPACK failed to solve eigenvalue problem",calledby="scalapack.f90")
     226             :        ENDIF
     227           0 :        IF (MOD(ierr/16,2).NE.0) THEN
     228             :           !WRITE(6,*) myid,' B was not positive definite, Cholesky failed at',ifail(1)
     229           0 :           CALL judft_warn("SCALAPACK failed: B was not positive definite",calledby="scalapack.f90")
     230             :        ENDIF
     231             :     ENDIF
     232           0 :     IF (num2 < num1) THEN
     233             :        !IF (myid ==0) THEN
     234           0 :           WRITE(6,*) 'Not all eigenvalues wanted are found'
     235           0 :           WRITE(6,*) 'number of eigenvalues/vectors wanted',num1
     236           0 :           WRITE(6,*) 'number of eigenvalues/vectors found',num2
     237             :           !CALL CPP_flush(6)
     238             :        !ENDIF
     239             :     ENDIF
     240             :     !
     241             :     !     Each process has all eigenvalues in output
     242           0 :     eig(:num2) = eig2(:num2)    
     243           0 :     DEALLOCATE(eig2)
     244             :     !
     245             :     !
     246             :     !     Redistribute eigenvectors  from ScaLAPACK distribution to each process, i.e. for
     247             :     !     process i these are eigenvectors i+1, np+i+1, 2*np+i+1...
     248             :     !     Only num=num2/np eigenvectors per process
     249             :     !
     250           0 :     num=FLOOR(REAL(num2)/np)
     251           0 :     IF (myid.LT.num2-(num2/np)*np) num=num+1
     252           0 :     ne=0
     253           0 :     DO i=myid+1,num2,np
     254           0 :        ne=ne+1
     255             :        !eig(ne)=eig2(i)
     256             :     ENDDO
     257           0 :     ALLOCATE(t_mpimat::ev)
     258           0 :     CALL ev%init(ev_dist%l_real,ev_dist%global_size1,ev_dist%global_size1,ev_dist%blacsdata%mpi_com,.FALSE.)
     259           0 :     CALL ev%copy(ev_dist,1,1)
     260             :     CLASS DEFAULT
     261           0 :       call judft_error("Wrong type (1) in scalapack")
     262             :     END SELECT
     263             :     CLASS DEFAULT
     264           0 :       call judft_error("Wrong type (2) in scalapack")
     265             :     END SELECT
     266             : #endif
     267           0 :   END SUBROUTINE scalapack
     268           0 : END MODULE m_scalapack

Generated by: LCOV version 1.13