LCOV - code coverage report
Current view: top level - eigen - hsmt_ab.F90 (source / functions) Hit Total Coverage
Test: combined.info Lines: 42 42 100.0 %
Date: 2019-09-08 04:53:50 Functions: 2 2 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : MODULE m_hsmt_ab
       7             :   use m_juDFT
       8             :   implicit none
       9             : 
      10             :   INTERFACE hsmt_ab
      11             :     module procedure hsmt_ab_cpu
      12             : #ifdef CPP_GPU
      13             :     module procedure hsmt_ab_gpu
      14             : #endif
      15             :   END INTERFACE
      16             : 
      17             : 
      18             : CONTAINS
      19             : 
      20             : #ifdef CPP_GPU
      21             : 
      22             :   ATTRIBUTES(global) SUBROUTINE synth_ab(loop_size,n,lmax,ab_size,gkrot_dev,fj,gj,c_ph,ab)
      23             :     USE m_ylm
      24             :     INTEGER, VALUE, INTENT(IN) :: loop_size, n, lmax, ab_size
      25             :     REAL,   DEVICE, INTENT(IN) :: gkrot_dev(:,:),fj(:,:),gj(:,:)
      26             :     COMPLEX,DEVICE, INTENT(IN) :: c_ph(:)
      27             :     COMPLEX,DEVICE, INTENT (OUT) :: ab(:,:)
      28             :     COMPLEX,ALLOCATABLE :: ylm(:)
      29             :     INTEGER :: k,l,ll1,m,i
      30             :     INTEGER :: loop_start, loop_end
      31             : 
      32             :     ALLOCATE(ylm((lmax+1)**2))
      33             : 
      34             :     k = (blockidx%x-1)*blockdim%x + threadidx%x
      35             :     loop_start = (k-1) * loop_size + 1
      36             :     loop_end = loop_start + loop_size - 1
      37             :     if (loop_end > n ) loop_end = n
      38             : 
      39             :     DO i = loop_start,loop_end
      40             :        !-->    generate spherical harmonics
      41             :        CALL ylm4_dev(lmax,gkrot_dev(:,i),ylm(:))
      42             :        DO l = 0,lmax
      43             :           ll1 = l* (l+1)
      44             :           DO m = -l,l               
      45             :              ab(i,ll1+m+1)         = CONJG(fj(i,l+1)*c_ph(i)*ylm(ll1+m+1)) 
      46             :              ab(i,ll1+m+1+ab_size) = CONJG(gj(i,l+1)*c_ph(i)*ylm(ll1+m+1)) 
      47             :           END DO
      48             :        END DO
      49             :     ENDDO 
      50             : 
      51             :     DEALLOCATE(ylm)
      52             :   END SUBROUTINE synth_ab
      53             : 
      54             : 
      55             :   SUBROUTINE hsmt_ab_gpu(sym,atoms,noco,ispin,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
      56             : !Calculate overlap matrix, GPU version
      57             :     USE m_constants, ONLY : fpi_const,tpi_const
      58             :     USE m_types
      59             :     USE m_ylm
      60             :     USE cudafor
      61             :     USE nvtx
      62             :     IMPLICIT NONE
      63             :     TYPE(t_sym),INTENT(IN)      :: sym
      64             :     TYPE(t_cell),INTENT(IN)     :: cell
      65             :     TYPE(t_atoms),INTENT(IN)    :: atoms
      66             :     TYPE(t_lapw),INTENT(IN)     :: lapw
      67             :     TYPE(t_noco),INTENT(IN)     :: noco
      68             :     !     ..
      69             :     !     .. Scalar Arguments ..
      70             :     INTEGER, INTENT (IN) :: ispin,n,na,iintsp
      71             :     LOGICAL,INTENT(IN)   :: l_nonsph
      72             :     INTEGER,INTENT(OUT)  :: ab_size
      73             :     !     ..
      74             :     !     .. Array Arguments ..
      75             :     REAL, DEVICE, INTENT(IN)       :: fj(:,:,:),gj(:,:,:)
      76             :     COMPLEX,DEVICE, INTENT (OUT) :: ab(:,:)
      77             :     !Optional arguments if abc coef for LOs are needed
      78             :     COMPLEX, INTENT(INOUT),OPTIONAL:: abclo(:,-atoms%llod:,:,:)
      79             :     REAL,INTENT(IN),OPTIONAL:: alo1(:),blo1(:),clo1(:)
      80             :     
      81             :     INTEGER:: np,k,l,ll1,m,lmax,nkvec,lo,lm,invsfct
      82             :     REAL   :: th,v(3),bmrot(3,3),vmult(3)
      83             :     COMPLEX,ALLOCATABLE :: ylm(:,:)
      84             :     COMPLEX,ALLOCATABLE :: c_ph(:,:)
      85             :     REAL,   ALLOCATABLE :: gkrot(:,:)
      86             :     COMPLEX:: term
      87             : 
      88             :     COMPLEX,ALLOCATABLE,DEVICE :: c_ph_dev(:,:)
      89             :     REAL,   ALLOCATABLE,DEVICE :: gkrot_dev(:,:)
      90             :     INTEGER :: grid, block, loop_size
      91             :     INTEGER :: istat
      92             :  
      93             :     call nvtxStartRange("hsmt_ab",3)    
      94             :     lmax=MERGE(atoms%lnonsph(n),atoms%lmax(n),l_nonsph)
      95             : 
      96             :     ALLOCATE(c_ph_dev(lapw%nv(1),MERGE(2,1,noco%l_ss)))
      97             :     ALLOCATE(gkrot_dev(3,lapw%nv(1)))
      98             : 
      99             :     ALLOCATE(ylm((lmax+1)**2,lapw%nv(1)))
     100             :     ALLOCATE(c_ph(lapw%nv(1),MERGE(2,1,noco%l_ss)))
     101             :     ALLOCATE(gkrot(3,lapw%nv(1)))
     102             : 
     103             :     
     104             :     ab_size=lmax*(lmax+2)+1
     105             :     ab=0.0
     106             :     
     107             :     np = sym%invtab(atoms%ngopr(na))
     108             :     !--->          set up phase factors
     109             :     CALL lapw%phase_factors(iintsp,atoms%taual(:,na),noco%qss,c_ph(:,iintsp))
     110             :     c_ph_dev=c_ph   
     111             :  
     112             :     IF (np==1) THEN
     113             :        gkrot(:, 1:lapw%nv(iintsp)) = lapw%gk(:, 1:lapw%nv(iintsp),iintsp)
     114             :     ELSE
     115             :        bmrot=MATMUL(1.*sym%mrot(:,:,np),cell%bmat)
     116             :        DO k = 1,lapw%nv(iintsp)
     117             :           !-->  apply the rotation that brings this atom into the
     118             :           !-->  representative (this is the definition of ngopr(na)
     119             :           !-->  and transform to cartesian coordinates
     120             :           v(:) = lapw%vk(:,k,iintsp)
     121             :           gkrot(:,k) = MATMUL(TRANSPOSE(bmrot),v)
     122             :        END DO
     123             :     END IF
     124             : 
     125             :     gkrot_dev = gkrot 
     126             : 
     127             : 
     128             :     !-->  synthesize the complex conjugates of a and b
     129             :     grid = 30    ! number of blocks in the grid
     130             :     block = 32   ! number of threads in a block
     131             :     loop_size = max(lapw%nv(1)/(grid*block),1)   !number of iterations performed by each thread
     132             :     if (loop_size * grid*block < lapw%nv(1)) loop_size = loop_size + 1
     133             :     CALL synth_ab<<<grid,block>>>(loop_size,lapw%nv(1),lmax,ab_size,gkrot_dev,&
     134             :                                   fj(:,:,iintsp),gj(:,:,iintsp),c_ph_dev(:,iintsp),ab)
     135             : 
     136             :     IF (PRESENT(abclo)) THEN
     137             :        print*, "Ooooops, TODO in hsmt_ab"
     138             :        !DO k = 1,lapw%nv(1)
     139             :        !   !determine also the abc coeffs for LOs
     140             :        !   invsfct=MERGE(1,2,atoms%invsat(na).EQ.0)
     141             :        !   term = fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)*c_ph(k,iintsp)
     142             :        !   DO lo = 1,atoms%nlo(n)
     143             :        !      l = atoms%llo(lo,n)
     144             :        !      DO nkvec=1,invsfct*(2*l+1)
     145             :        !         IF (lapw%kvec(nkvec,lo,na)==k) THEN !This k-vector is used in LO
     146             :        !            ll1 = l*(l+1) + 1
     147             :        !            DO m = -l,l
     148             :        !               lm = ll1 + m
     149             :        !               abclo(1,m,nkvec,lo) = term*ylm(k,lm)*alo1(lo)
     150             :        !               abclo(2,m,nkvec,lo) = term*ylm(k,lm)*blo1(lo)
     151             :        !               abclo(3,m,nkvec,lo) = term*ylm(k,lm)*clo1(lo)
     152             :        !            END DO
     153             :        !         END IF
     154             :        !      ENDDO
     155             :        !   ENDDO
     156             :        !ENDDO
     157             :     ENDIF
     158             :        
     159             :     ab_size=ab_size*2
     160             :     
     161             :     DEALLOCATE(c_ph_dev)
     162             :     DEALLOCATE(gkrot_dev)
     163             : 
     164             :     istat = cudaDeviceSynchronize() 
     165             :     call nvtxEndRange
     166             :   END SUBROUTINE hsmt_ab_gpu
     167             : #endif
     168             : 
     169        5592 :   SUBROUTINE hsmt_ab_cpu(sym,atoms,noco,ispin,iintsp,n,na,cell,lapw,fj,gj,ab,ab_size,l_nonsph,abclo,alo1,blo1,clo1)
     170             : !Calculate overlap matrix, CPU vesion
     171             :     USE m_constants, ONLY : fpi_const,tpi_const
     172             :     USE m_types
     173             :     USE m_ylm
     174             :     IMPLICIT NONE
     175             :     TYPE(t_sym),INTENT(IN)      :: sym
     176             :     TYPE(t_cell),INTENT(IN)     :: cell
     177             :     TYPE(t_atoms),INTENT(IN)    :: atoms
     178             :     TYPE(t_lapw),INTENT(IN)     :: lapw
     179             :     TYPE(t_noco),INTENT(IN)     :: noco
     180             :     !     ..
     181             :     !     .. Scalar Arguments ..
     182             :     INTEGER, INTENT (IN) :: ispin,n,na,iintsp
     183             :     LOGICAL,INTENT(IN)   :: l_nonsph
     184             :     INTEGER,INTENT(OUT)  :: ab_size
     185             :     !     ..
     186             :     !     .. Array Arguments ..
     187             :     REAL,INTENT(IN)       :: fj(:,0:,:),gj(:,0:,:)
     188             :     COMPLEX, INTENT (OUT) :: ab(:,:)
     189             :     !Optional arguments if abc coef for LOs are needed
     190             :     COMPLEX, INTENT(INOUT),OPTIONAL:: abclo(:,-atoms%llod:,:,:)
     191             :     REAL,INTENT(IN),OPTIONAL:: alo1(:),blo1(:),clo1(:)
     192             :     
     193             :     INTEGER:: np,k,l,ll1,m,lmax,nkvec,lo,lm,invsfct
     194             :     COMPLEX:: term
     195             :     REAL   :: th,v(3),bmrot(3,3),vmult(3)
     196        5592 :     COMPLEX :: ylm((atoms%lmaxd+1)**2)
     197        5592 :     COMPLEX,ALLOCATABLE:: c_ph(:,:)
     198        5592 :     REAL,ALLOCATABLE   :: gkrot(:,:)
     199             :     LOGICAL :: l_apw
     200             :    
     201        5592 :     ALLOCATE(c_ph(maxval(lapw%nv),MERGE(2,1,noco%l_ss)))
     202        5592 :     ALLOCATE(gkrot(3,maxval(lapw%nv)))
     203             : 
     204        5592 :     lmax=MERGE(atoms%lnonsph(n),atoms%lmax(n),l_nonsph)
     205             :     
     206        5592 :     ab_size=lmax*(lmax+2)+1
     207        5592 :     l_apw=ALL(gj==0.0)
     208      706888 :     ab=0.0
     209             :     
     210        5592 :     np = sym%invtab(atoms%ngopr(na))
     211             :     !--->          set up phase factors
     212        5592 :     CALL lapw%phase_factors(iintsp,atoms%taual(:,na),noco%qss,c_ph(:,iintsp))
     213             :     
     214        5592 :     IF (np==1) THEN
     215        5584 :        gkrot(:, 1:lapw%nv(iintsp)) = lapw%gk(:, 1:lapw%nv(iintsp),iintsp)
     216             :     ELSE
     217           8 :        bmrot=MATMUL(1.*sym%mrot(:,:,np),cell%bmat)
     218        5528 :        DO k = 1,lapw%nv(iintsp)
     219             :           !-->  apply the rotation that brings this atom into the
     220             :           !-->  representative (this is the definition of ngopr(na)
     221             :           !-->  and transform to cartesian coordinates
     222        5520 :           v(:) = lapw%vk(:,k,iintsp)
     223        5528 :           gkrot(:,k) = MATMUL(TRANSPOSE(bmrot),v)
     224             :        END DO
     225             :     END IF
     226             :     !$OMP PARALLEL DO DEFAULT(none) &
     227             :     !$OMP& SHARED(lapw,gkrot,lmax,c_ph,iintsp,ab,fj,gj,abclo,cell,atoms) &
     228             :     !$OMP& SHARED(alo1,blo1,clo1,ab_size,na,n) &
     229        5592 :     !$OMP& PRIVATE(k,vmult,ylm,l,ll1,m,lm,term,invsfct,lo,nkvec)
     230             :     DO k = 1,lapw%nv(iintsp)
     231             :        !-->    generate spherical harmonics
     232     5992984 :        vmult(:) =  gkrot(:,k)
     233     1498246 :        CALL ylm4(lmax,vmult,ylm)
     234             :        !-->  synthesize the complex conjugates of a and b
     235     1498246 :        DO l = 0,lmax
     236    10545674 :           ll1 = l* (l+1)
     237    86517686 :           DO m = -l,l               
     238    74473766 :              term = c_ph(k,iintsp)*ylm(ll1+m+1)
     239    74473766 :              ab(k,ll1+m+1)         = fj(k,l,iintsp)*term
     240    85019440 :              ab(k,ll1+m+1+ab_size) = gj(k,l,iintsp)*term
     241             :           END DO
     242             :        END DO
     243     2053448 :        IF (PRESENT(abclo)) THEN
     244             :           !determine also the abc coeffs for LOs
     245     1088036 :           invsfct=MERGE(1,2,atoms%invsat(na).EQ.0)
     246      544018 :           term = fpi_const/SQRT(cell%omtil)* ((atoms%rmt(n)**2)/2)*c_ph(k,iintsp)
     247     1607546 :           DO lo = 1,atoms%nlo(n)
     248     1063528 :              l = atoms%llo(lo,n)
     249     5434430 :              DO nkvec=1,invsfct*(2*l+1)
     250     4890412 :                 IF (lapw%kvec(nkvec,lo,na)==k) THEN !This k-vector is used in LO
     251        6940 :                    ll1 = l*(l+1) + 1
     252       24552 :                    DO m = -l,l
     253       17612 :                       lm = ll1 + m
     254       17612 :                       abclo(1,m,nkvec,lo) = term*ylm(lm)*alo1(lo)
     255       17612 :                       abclo(2,m,nkvec,lo) = term*ylm(lm)*blo1(lo)
     256       24552 :                       abclo(3,m,nkvec,lo) = term*ylm(lm)*clo1(lo)
     257             :                    END DO
     258             :                 END IF
     259             :              ENDDO
     260             :           ENDDO
     261             :        ENDIF
     262             :        
     263             :     ENDDO !k-loop
     264             :     !$OMP END PARALLEL DO
     265        5592 :     IF (.NOT.l_apw) ab_size=ab_size*2
     266             :     
     267        5592 :   END SUBROUTINE hsmt_ab_cpu
     268             : END MODULE m_hsmt_ab

Generated by: LCOV version 1.13