LCOV - code coverage report
Current view: top level - eigen - hsmt_nonsph.F90 (source / functions) Hit Total Coverage
Test: combined.info Lines: 51 55 92.7 %
Date: 2019-09-08 04:53:50 Functions: 3 3 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : MODULE m_hsmt_nonsph
       7             :   USE m_juDFT
       8             :   IMPLICIT NONE
       9             :   PRIVATE
      10             :   PUBLIC hsmt_nonsph
      11             :   INTERFACE priv_noMPI
      12             :     module procedure priv_noMPI_cpu
      13             : #ifdef CPP_GPU
      14             :     module procedure priv_noMPI_gpu
      15             : #endif
      16             :   END INTERFACE
      17             : CONTAINS
      18        4396 :   SUBROUTINE hsmt_nonsph(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
      19             :     USE m_types
      20             :     IMPLICIT NONE
      21             :     TYPE(t_mpi),INTENT(IN)        :: mpi
      22             :     TYPE(t_sym),INTENT(IN)        :: sym
      23             :     TYPE(t_noco),INTENT(IN)       :: noco
      24             :     TYPE(t_cell),INTENT(IN)       :: cell
      25             :     TYPE(t_atoms),INTENT(IN)      :: atoms
      26             :     TYPE(t_lapw),INTENT(IN)       :: lapw
      27             :     TYPE(t_tlmplm),INTENT(IN)     :: td
      28             :     !     .. Scalar Arguments ..
      29             :     INTEGER, INTENT (IN)          :: n,isp,iintsp,jintsp
      30             :     COMPLEX,INTENT(IN)            :: chi
      31             :     !     .. Array Arguments ..
      32             : #if defined CPP_GPU
      33             :     REAL,MANAGED,INTENT(IN)    :: fj(:,:,:),gj(:,:,:)
      34             : #else
      35             :     REAL,INTENT(IN)            :: fj(:,0:,:),gj(:,0:,:)
      36             : #endif
      37             :     CLASS(t_mat),INTENT(INOUT)     ::hmat
      38             : #if defined CPP_GPU
      39             :     COMPLEX,ALLOCATABLE,DEVICE :: h_loc_dev(:,:)
      40             : #endif
      41        4396 :     CALL timestart("non-spherical setup")
      42        4396 :     IF (mpi%n_size==1) THEN
      43             : #if defined CPP_GPU
      44             :     ALLOCATE(h_loc_dev(size(td%h_loc,1),size(td%h_loc,2)))
      45             :     h_loc_dev(1:,1:) = CONJG(td%h_loc(0:,0:,n,isp)) 
      46             : 
      47             :        CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj,gj,hmat)
      48             : #else
      49         580 :        CALL priv_noMPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
      50             : #endif
      51             :     ELSE
      52        3816 :        CALL priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
      53             :     ENDIF
      54        4396 :     CALL timestop("non-spherical setup")
      55        4396 :   END SUBROUTINE hsmt_nonsph
      56             : 
      57             : #if defined CPP_GPU
      58             :   SUBROUTINE priv_noMPI_gpu(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,h_loc_dev,fj_dev,gj_dev,hmat)
      59             : !Calculate overlap matrix, GPU version
      60             : !note that basically all matrices in the GPU version are conjugates of their cpu counterparts
      61             :     USE m_hsmt_ab
      62             :     USE m_constants, ONLY : fpi_const,tpi_const
      63             :     USE m_types
      64             :     USE m_ylm
      65             :   !   cublas: required to use generic BLAS interface
      66             :   !   cudafor: required to use CUDA runtime API routines 
      67             :   !   nvtx: profiling
      68             :     USE cublas   
      69             :     USE cudafor
      70             :     USE nvtx
      71             : 
      72             :     IMPLICIT NONE
      73             :     TYPE(t_mpi),INTENT(IN)      :: mpi
      74             :     TYPE(t_sym),INTENT(IN)      :: sym
      75             :     TYPE(t_noco),INTENT(IN)     :: noco
      76             :     TYPE(t_cell),INTENT(IN)     :: cell
      77             :     TYPE(t_atoms),INTENT(IN)    :: atoms
      78             :     TYPE(t_lapw),INTENT(IN)     :: lapw
      79             :     COMPLEX, INTENT(IN),DEVICE  :: h_loc_dev(:,:)
      80             :     !     ..
      81             :     !     .. Scalar Arguments ..
      82             :     INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
      83             :     COMPLEX,INTENT(in)   :: chi
      84             :     !     ..
      85             :     !     .. Array Arguments ..
      86             :     REAL,   INTENT(IN),   DEVICE :: fj_dev(:,:,:), gj_dev(:,:,:)
      87             :     CLASS(t_mat),INTENT(INOUT)     ::hmat
      88             :     
      89             :     INTEGER:: nn,na,ab_size,l,ll,m
      90             :     real :: rchi
      91             :     COMPLEX,ALLOCATABLE,DEVICE :: ab1_dev(:,:), ab_dev(:,:), ab2_dev(:,:)
      92             :     integer :: i, j, istat
      93             :     call nvtxStartRange("hsmt_nonsph",1)    
      94             : 
      95             :     ALLOCATE(ab1_dev(lapw%nv(jintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
      96             :     ALLOCATE(ab_dev(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
      97             :     IF (iintsp.NE.jintsp) ALLOCATE(ab2_dev(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
      98             : 
      99             :     IF (hmat%l_real) THEN
     100             :        IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
     101             :           DEALLOCATE(hmat%data_c)
     102             :           ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
     103             :        ENDIF
     104             :        hmat%data_c=0.0
     105             :     ENDIF
     106             : 
     107             :     DO nn = 1,atoms%neq(n)
     108             :        na = SUM(atoms%neq(:n-1))+nn
     109             :        IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
     110             :           rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
     111             : 
     112             :           CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj_dev,gj_dev,ab_dev,ab_size,.TRUE.)
     113             : 
     114             :           !Calculate Hamiltonian
     115             :           CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab_dev,SIZE(ab_dev,1),&
     116             :                      h_loc_dev,SIZE(h_loc_dev,1),CMPLX(0.,0.),ab1_dev,SIZE(ab1_dev,1))
     117             :           IF (iintsp==jintsp) THEN
     118             :              call nvtxStartRange("zherk",3)
     119             :              CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,ab1_dev,SIZE(ab1_dev,1),1.0,hmat%data_c,SIZE(hmat%data_c,1))
     120             :              istat = cudaDeviceSynchronize() 
     121             :              call nvtxEndRange()    
     122             :           ELSE  !here the l_ss off-diagonal part starts
     123             :              !Second set of ab is needed
     124             :              CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj_dev,gj_dev,ab_dev,ab_size,.TRUE.)
     125             :              CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab_dev,SIZE(ab_dev,1),&
     126             :                         h_loc_dev,SIZE(h_loc_dev,1),CMPLX(0.,0.),ab2_dev,SIZE(ab2_dev,1))
     127             :              !Multiply for Hamiltonian
     128             : 
     129             :              !$cuf kernel do<<<*,256>>>
     130             :              do i = 1,size(ab1_dev,2)
     131             :                do j = 1,size(ab1_dev,1)
     132             :                   ab1_dev(j,i) = conjg(ab1_dev(j,i))
     133             :                enddo
     134             :              enddo
     135             :              CALL zgemm("N","T",lapw%nv(iintsp),lapw%nv(jintsp),ab_size,chi,ab2_dev,SIZE(ab2_dev,1),&
     136             :                         ab1_dev,SIZE(ab1_dev,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
     137             :           ENDIF
     138             :        ENDIF
     139             :     END DO
     140             : 
     141             :     IF (hmat%l_real) THEN
     142             :        hmat%data_r=hmat%data_r+REAL(hmat%data_c)
     143             :     ENDIF
     144             :     call nvtxEndRange
     145             :  END SUBROUTINE priv_noMPI_gpu
     146             : #endif
     147             : 
     148         580 :   SUBROUTINE priv_noMPI_cpu(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
     149             : !Calculate overlap matrix
     150             :     USE m_hsmt_ab
     151             :     USE m_constants, ONLY : fpi_const,tpi_const
     152             :     USE m_types
     153             :     USE m_ylm
     154             : 
     155             :     IMPLICIT NONE
     156             :     TYPE(t_mpi),INTENT(IN)      :: mpi
     157             :     TYPE(t_sym),INTENT(IN)      :: sym
     158             :     TYPE(t_noco),INTENT(IN)     :: noco
     159             :     TYPE(t_cell),INTENT(IN)     :: cell
     160             :     TYPE(t_atoms),INTENT(IN)    :: atoms
     161             :     TYPE(t_lapw),INTENT(IN)     :: lapw
     162             :     TYPE(t_tlmplm),INTENT(IN)   :: td
     163             :     !     ..
     164             :     !     .. Scalar Arguments ..
     165             :     INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
     166             :     COMPLEX,INTENT(in)   :: chi
     167             :     !     ..
     168             :     !     .. Array Arguments ..
     169             :     REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
     170             :     CLASS(t_mat),INTENT(INOUT)::hmat
     171             : 
     172             :     
     173             :     INTEGER:: nn,na,ab_size,l,ll,m
     174         580 :     COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab2(:,:)
     175             :     real :: rchi
     176             : 
     177         580 :     ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lmaxd*(atoms%lmaxd+2)+2),ab1(lapw%nv(jintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
     178             : 
     179         580 :     IF (iintsp.NE.jintsp) ALLOCATE(ab2(lapw%nv(iintsp),2*atoms%lmaxd*(atoms%lmaxd+2)+2))
     180             : 
     181         580 :     IF (hmat%l_real) THEN
     182        1056 :        IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
     183         152 :           DEALLOCATE(hmat%data_c)
     184         696 :           ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
     185             :        ENDIF
     186         272 :        hmat%data_c=0.0
     187             :     ENDIF
     188             :     
     189        1174 :     DO nn = 1,atoms%neq(n)
     190         594 :        na = SUM(atoms%neq(:n-1))+nn
     191        1174 :        IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
     192         582 :           rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
     193             : 
     194         582 :           CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
     195             :           !Calculate Hamiltonian
     196             :           CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),&
     197         582 :                      td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
     198             :           !ab1=MATMUL(ab(:lapw%nv(iintsp),:ab_size),td%h_loc(:ab_size,:ab_size,n,isp))
     199         582 :           IF (iintsp==jintsp) THEN
     200         582 :              IF (isp<3) THEN
     201         582 :                 CALL ZHERK("U","N",lapw%nv(iintsp),ab_size,Rchi,CONJG(ab1),SIZE(ab1,1),1.0,hmat%data_c,SIZE(hmat%data_c,1))
     202             :              ELSE !This is the case of a local off-diagonal contribution.
     203             :                   !It is not Hermitian, so we need to USE zgemm CALL
     204             :                 CALL zgemm("N","T",lapw%nv(iintsp),lapw%nv(jintsp),ab_size,chi,CONJG(ab),SIZE(ab,1),&
     205           0 :                      ab1,SIZE(ab1,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
     206             :              ENDIF
     207             :           ELSE  !here the l_ss off-diagonal part starts
     208             :              !Second set of ab is needed
     209           0 :              CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
     210             :              CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),&
     211           0 :                         td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab2,SIZE(ab2,1))
     212             :              !Multiply for Hamiltonian
     213             :              CALL zgemm("N","T",lapw%nv(iintsp),lapw%nv(jintsp),ab_size,chi,conjg(ab2),SIZE(ab2,1),&
     214           0 :                         ab1,SIZE(ab1,1),CMPLX(1.0,0.0),hmat%data_c,SIZE(hmat%data_c,1))
     215             :           ENDIF
     216             :        ENDIF
     217             :     END DO
     218             :     
     219         580 :     IF (hmat%l_real) THEN
     220         272 :        hmat%data_r=hmat%data_r+REAL(hmat%data_c)
     221             :     ENDIF
     222             : 
     223         580 :  END SUBROUTINE priv_noMPI_cpu
     224             : 
     225             : 
     226        3816 :   SUBROUTINE priv_MPI(n,mpi,sym,atoms,isp,iintsp,jintsp,chi,noco,cell,lapw,td,fj,gj,hmat)
     227             : !Calculate overlap matrix
     228             :     USE m_hsmt_ab
     229             :     USE m_constants, ONLY : fpi_const,tpi_const
     230             :     USE m_types
     231             :     USE m_ylm
     232             :     IMPLICIT NONE
     233             :     TYPE(t_mpi),INTENT(IN)      :: mpi
     234             :     TYPE(t_sym),INTENT(IN)      :: sym
     235             :     TYPE(t_noco),INTENT(IN)     :: noco
     236             :     TYPE(t_cell),INTENT(IN)     :: cell
     237             :     TYPE(t_atoms),INTENT(IN)    :: atoms
     238             :     TYPE(t_lapw),INTENT(IN)     :: lapw
     239             :     TYPE(t_tlmplm),INTENT(IN)   :: td
     240             :     !     ..
     241             :     !     .. Scalar Arguments ..
     242             :     INTEGER, INTENT (IN) :: n,isp,iintsp,jintsp
     243             :     COMPLEX,INTENT(in)   :: chi
     244             :     !     ..
     245             :     !     .. Array Arguments ..
     246             :     REAL,INTENT(IN) :: fj(:,0:,:),gj(:,0:,:)
     247             :     CLASS(t_mat),INTENT(INOUT)::hmat
     248             : 
     249             :     
     250             :     INTEGER:: nn,na,ab_size,l,ll,m,i,ii
     251        3816 :     COMPLEX,ALLOCATABLE:: ab(:,:),ab1(:,:),ab_select(:,:)
     252             :     real :: rchi
     253             : 
     254        3816 :     ALLOCATE(ab(MAXVAL(lapw%nv),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2),ab1(lapw%nv(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2),ab_select(lapw%num_local_cols(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2))
     255             : 
     256             :     !IF (iintsp.NE.jintsp) ALLOCATE(ab_select1(lapw%num_local_cols(jintsp),2*atoms%lnonsph(n)*(atoms%lnonsph(n)+2)+2))
     257             : 
     258        3816 :     IF (hmat%l_real) THEN
     259        4440 :        IF (ANY(SHAPE(hmat%data_c)/=SHAPE(hmat%data_r))) THEN
     260         210 :           DEALLOCATE(hmat%data_c)
     261        2154 :           ALLOCATE(hmat%data_c(SIZE(hmat%data_r,1),SIZE(hmat%data_r,2)))
     262             :        ENDIF
     263         972 :        hmat%data_c=0.0
     264             :     ENDIF
     265             :     
     266        8394 :     DO nn = 1,atoms%neq(n)
     267        4578 :        na = SUM(atoms%neq(:n-1))+nn
     268        8394 :        IF ((atoms%invsat(na)==0) .OR. (atoms%invsat(na)==1)) THEN
     269        3822 :           rchi=MERGE(REAL(chi),REAL(chi)*2,(atoms%invsat(na)==0))
     270             :           
     271        3822 :           CALL hsmt_ab(sym,atoms,noco,isp,jintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
     272             :           !Calculate Hamiltonian
     273             :         
     274        3822 :           CALL zgemm("N","N",lapw%nv(jintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(0:,0:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
     275             :           !Cut out of ab1 only the needed elements here
     276        3822 :           ab_select=ab1(mpi%n_rank+1:lapw%nv(jintsp):mpi%n_size,:)
     277        3822 :           IF (iintsp==jintsp) THEN
     278        3662 :              CALL zgemm("N","T",lapw%nv(iintsp),lapw%num_local_cols(iintsp),ab_size,CMPLX(rchi,0.0),CONJG(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(iintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))
     279             :           ELSE
     280             :              !Second set of ab is needed
     281         160 :              CALL hsmt_ab(sym,atoms,noco,isp,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,.TRUE.)
     282         160 :              CALL zgemm("N","N",lapw%nv(iintsp),ab_size,ab_size,CMPLX(1.0,0.0),ab,SIZE(ab,1),td%h_loc(:,:,n,isp),SIZE(td%h_loc,1),CMPLX(0.,0.),ab1,SIZE(ab1,1))
     283             :              !Multiply for Hamiltonian
     284         160 :              CALL zgemm("N","t",lapw%nv(iintsp),lapw%num_local_cols(jintsp),ab_size,chi,conjg(ab1),SIZE(ab1,1),ab_select,lapw%num_local_cols(jintsp),CMPLX(1.,0.0),hmat%data_c,SIZE(hmat%data_c,1))   
     285             :           ENDIF
     286             :        ENDIF
     287             :     END DO
     288             :     !delete lower part of matrix
     289             :     !i=0
     290             :     !DO ii=mpi%n_rank+1,lapw%nv(iintsp),mpi%n_size
     291             :     !   i=i+1
     292             :     !   hmat%data_c(ii+1:,i)=0.0
     293             :     !ENDDO
     294        3816 :     IF (hmat%l_real) THEN
     295         972 :        hmat%data_r=hmat%data_r+hmat%data_c
     296             :     ENDIF
     297             :     
     298        3816 :   END SUBROUTINE priv_MPI
     299             : 
     300             :   
     301             : END MODULE m_hsmt_nonsph

Generated by: LCOV version 1.13