LCOV - FLEUR test coverage - hybrid/copy

LCOV - code coverage report

Current view:	top level - hybrid - copy_coul.F90 (source / functions)		Hit	Total	Coverage
Test:	FLEUR test coverage	Lines:	252	255	98.8 %
Date:	2024-05-02 04:21:52	Functions:	13	13	100.0 %

          Line data    Source code

       1             : module m_copy_coul
       2             :    use m_types
       3             :    use m_constants
       4             :    use m_glob_tofrom_loc
       5             :    USE m_types_mpimat
       6             : #ifdef CPP_MPI 
       7             :    use mpi 
       8             : #endif
       9             :    private 
      10             :    public :: copy_from_dense_to_sparse
      11             : contains
      12          36 :    subroutine copy_from_dense_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      13             :       implicit none
      14             :       type(t_fleurinput), intent(in)    :: fi
      15             :       type(t_mpdata), intent(in)        :: mpdata
      16             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
      17             :       class(t_mat), intent(in)          :: coulomb(:)
      18             :       integer, intent(in)               :: ikpt
      19             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
      20             : 
      21          36 :       call timestart("copy_from_dense_to_sparse")
      22             : 
      23          36 :       call copy_mt1_from_striped_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      24          36 :       call copy_mt2_from_striped_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      25          36 :       call copy_mt3_from_striped_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      26          36 :       call test_mt2_mt3(fi, fmpi, mpdata, ikpt, hybdat)
      27          36 :       call copy_residual_mt_contrib_atm(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      28          36 :       call copy_residual_mt_contrib_gpt(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      29          36 :       call copy_ir(fi, fmpi, mpdata, coulomb(ikpt), ikpt, hybdat)
      30             :       
      31          36 :       call timestop("copy_from_dense_to_sparse")
      32          36 :    end subroutine copy_from_dense_to_sparse
      33             : 
      34          36 :    subroutine copy_mt1_from_striped_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
      35             :       implicit none
      36             :       type(t_fleurinput), intent(in)    :: fi
      37             :       type(t_mpdata), intent(in)        :: mpdata
      38             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
      39             :       class(t_mat), intent(in)          :: coulomb(:)
      40             :       integer, intent(in)               :: ikpt
      41             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
      42             : 
      43          36 :       real, allocatable    :: tmp_4r(:, :, :, :)
      44          36 :       complex, allocatable :: tmp_4c(:, :, :, :)
      45             :       integer :: indx1, sz, itype, ineq, l, i, ierr, ix_loc, n, pe_ix
      46             : 
      47          36 :       call timestart("copy_mt1")
      48             : 
      49             :       ! only one processor per k-point calculates MT convolution
      50             :       !
      51             :       ! store m-independent part of Coulomb matrix in MT spheres
      52             :       ! in coulomb_mt1(:mpdata%num_radbasfn(l,itype)-1,:mpdata%num_radbasfn(l,itype)-1,l,itype)
      53             :       !
      54         396 :       sz = maxval(mpdata%num_radbasfn) - 1
      55          36 :       if (fi%sym%invs) THEN
      56       14436 :          allocate (tmp_4r(sz, sz, 0:maxval(fi%hybinp%lcutm1), fi%atoms%ntype), source=0.0)
      57             :       else
      58        8880 :          allocate (tmp_4c(sz, sz, 0:maxval(fi%hybinp%lcutm1), fi%atoms%ntype), source=cmplx_0)
      59             :       end if
      60          36 :       indx1 = 0
      61          96 :       DO itype = 1, fi%atoms%ntype
      62         156 :          DO ineq = 1, fi%atoms%neq(itype)
      63         420 :             DO l = 0, fi%hybinp%lcutm1(itype)
      64         300 :                IF (ineq == 1) THEN
      65        2280 :                   DO n = 1, mpdata%num_radbasfn(l, itype) - 1
      66       15852 :                      do i = 1, mpdata%num_radbasfn(l, itype) - 1
      67       13572 :                         call glob_to_loc(fmpi, indx1 + i, pe_ix, ix_loc)
      68       15552 :                         if (fmpi%n_rank == pe_ix) then
      69        6786 :                            if (fi%sym%invs) THEN
      70        3930 :                               tmp_4r(n, i, l, itype) = real(coulomb(ikpt)%data_c(indx1 + n, ix_loc))
      71             :                            else
      72        2856 :                               tmp_4c(n, i, l, itype) = real(coulomb(ikpt)%data_c(indx1 + n, ix_loc))
      73             :                            end if
      74             :                         end if
      75             :                      end do
      76             :                   END DO
      77             :                END IF
      78             : 
      79         360 :                indx1 = indx1 + (2*l + 1)*mpdata%num_radbasfn(l, itype)
      80             :             END DO
      81             :          END DO
      82             :       END do
      83             : 
      84          36 :       if (fi%sym%invs) THEN
      85             : #ifdef CPP_MPI
      86             :          call MPI_Reduce(tmp_4r, hybdat%coul(ikpt)%mt1_r, size(tmp_4r), MPI_DOUBLE_PRECISION, &
      87         120 :                          MPI_SUM, 0, fmpi%sub_comm, ierr)
      88             : #else
      89             :          hybdat%coul(ikpt)%mt1_r = tmp_4r
      90             : #endif
      91          24 :          deallocate (tmp_4r)
      92             :       else
      93             : #ifdef CPP_MPI
      94             :          call MPI_Reduce(tmp_4c, hybdat%coul(ikpt)%mt1_c, size(tmp_4c), MPI_DOUBLE_COMPLEX, &
      95          60 :                          MPI_SUM, 0, fmpi%sub_comm, ierr)
      96             : #else
      97             :          hybdat%coul(ikpt)%mt1_c = tmp_4c
      98             : #endif
      99          12 :          deallocate (tmp_4c)
     100             :       end if
     101          36 :       call timestop("copy_mt1")
     102          36 :    end subroutine copy_mt1_from_striped_to_sparse
     103             : 
     104             : 
     105          36 :    subroutine copy_mt2_from_striped_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
     106             :       implicit none
     107             :       type(t_fleurinput), intent(in)    :: fi
     108             :       type(t_mpdata), intent(in)        :: mpdata
     109             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     110             :       class(t_mat), intent(in)          :: coulomb(:)
     111             :       integer, intent(in)               :: ikpt
     112             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     113             : 
     114             :       integer :: indx1, itype, l, m, iatom, ic, ierr, info, ix, ix_loc, pe_ix, n
     115          36 :       real, allocatable    :: tmp_r(:,:,:,:)
     116          36 :       complex, allocatable :: tmp_c(:,:,:,:)
     117             : 
     118          36 :       call timestart("copy_mt2")
     119          36 :       if(fi%sym%invs) then
     120             :          allocate (tmp_r(maxval(mpdata%num_radbasfn) - 1, &
     121             :                         -maxval(fi%hybinp%lcutm1):maxval(fi%hybinp%lcutm1), &
     122       18828 :                         0:maxval(fi%hybinp%lcutm1) + 1, fi%atoms%nat), stat=info, source=0.0)
     123             :       else
     124             :          allocate (tmp_c(maxval(mpdata%num_radbasfn) - 1, &
     125             :                         -maxval(fi%hybinp%lcutm1):maxval(fi%hybinp%lcutm1), &
     126       12096 :                         0:maxval(fi%hybinp%lcutm1) + 1, fi%atoms%nat), stat=info, source=cmplx_0)
     127             :       endif
     128          36 :       if(info /= 0) call judft_error("can't alloc mt2_tmp")
     129             : 
     130          36 :       indx1 = 0
     131          96 :       do iatom = 1, fi%atoms%nat
     132          60 :          itype = fi%atoms%itype(iatom)
     133         396 :          DO l = 0, fi%hybinp%lcutm1(itype)
     134        1860 :             DO M = -l, l
     135        1500 :                ix = indx1 + mpdata%num_radbasfn(l, itype)
     136        1500 :                call glob_to_loc(fmpi, ix, pe_ix, ix_loc)
     137        1500 :                if(pe_ix == fmpi%n_rank) then
     138         750 :                   if (fi%sym%invs) THEN
     139             :                      tmp_r(:mpdata%num_radbasfn(l, itype) - 1, M, l, iatom) &
     140        3072 :                         = real(coulomb(ikpt)%data_c(indx1 + 1:indx1 + mpdata%num_radbasfn(l, itype) - 1, ix_loc))
     141             :                   else
     142             :                      tmp_c(:mpdata%num_radbasfn(l, itype) - 1, M, l, iatom) &
     143        2148 :                         = coulomb(ikpt)%data_c(indx1 + 1:indx1 + mpdata%num_radbasfn(l, itype) - 1, ix_loc)
     144             :                   endif
     145             :                endif
     146             : 
     147        1800 :                indx1 = indx1 + mpdata%num_radbasfn(l, itype)
     148             :             END DO
     149             :          END DO
     150             :       END DO
     151             : 
     152          36 :       ix = hybdat%nbasp + 1
     153          36 :       call glob_to_loc(fmpi, ix, pe_ix, ix_loc)
     154          36 :       IF (ikpt == 1 .and. pe_ix == fmpi%n_rank) THEN
     155             :          !
     156             :          ! store the contribution of the G=0 plane wave with the MT l=0 functions in
     157             :          ! coulomb_mt2(:mpdata%num_radbasfn(l=0,itype),0,maxval(fi%hybinp%lcutm1)+1,iatom)
     158             :          !
     159             :          ic = 0
     160          16 :          do iatom = 1,fi%atoms%nat 
     161          10 :             itype = fi%atoms%itype(iatom)
     162          90 :             DO n = 1, mpdata%num_radbasfn(0, itype) - 1
     163          90 :                if (fi%sym%invs) THEN
     164         126 :                   tmp_r(n, 0, maxval(fi%hybinp%lcutm1) + 1, iatom) =  real(coulomb(ikpt)%data_c(ic + n, ix_loc))
     165             :                else
     166          96 :                   tmp_c(n, 0, maxval(fi%hybinp%lcutm1) + 1, iatom) = coulomb(ikpt)%data_c(ic + n, ix_loc)
     167             :                endif
     168             :             END DO
     169         126 :             ic = ic + SUM([((2*l + 1)*mpdata%num_radbasfn(l, itype), l=0, fi%hybinp%lcutm1(itype))])
     170             :          END DO
     171             :       endif 
     172             :       
     173          36 :       if (fi%sym%invs) THEN
     174             : #ifdef CPP_MPI
     175         120 :          call MPI_Reduce(tmp_r, hybdat%coul(ikpt)%mt2_r, size(tmp_r), MPI_DOUBLE_PRECISION, MPI_SUM, 0, fmpi%sub_comm, ierr)
     176             : #else
     177             :          hybdat%coul(ikpt)%mt2_r = tmp_r
     178             : #endif
     179          24 :          deallocate (tmp_r)
     180             :       else
     181             : #ifdef CPP_MPI
     182          60 :          call MPI_Reduce(tmp_c, hybdat%coul(ikpt)%mt2_c, size(tmp_c), MPI_DOUBLE_COMPLEX, MPI_SUM, 0, fmpi%sub_comm, ierr)
     183             : #else
     184             :          hybdat%coul(ikpt)%mt2_c = tmp_c
     185             : #endif
     186          12 :          deallocate (tmp_c)
     187             :       end if
     188          36 :       call timestop("copy_mt2")
     189          36 :    end subroutine copy_mt2_from_striped_to_sparse
     190             : 
     191          36 :    subroutine copy_mt3_from_striped_to_sparse(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
     192             :       !
     193             :       ! store the contributions between the MT s-like functions at atom1 and
     194             :       ! and the constant function at a different atom2
     195             :       !
     196             :       implicit none
     197             :       type(t_fleurinput), intent(in)    :: fi
     198             :       type(t_mpdata), intent(in)        :: mpdata
     199             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     200             :       class(t_mat), intent(in)          :: coulomb(:)
     201             :       integer, intent(in)               :: ikpt
     202             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     203             : 
     204             :       integer :: ic, iatom, itype, ishift, iatom1, ic1, ic2, itype1, ishift1, pe, loc, i, ierr, l, l1
     205          36 :       real, allocatable    :: tmp_r(:,:,:)
     206          36 :       complex, allocatable :: tmp_c(:,:,:)
     207             : 
     208          36 :       IF (ikpt == 1) THEN
     209          12 :          call timestart("copy_mt3")
     210          12 :          if(fi%sym%invs) then
     211         308 :             allocate(tmp_r(maxval(mpdata%num_radbasfn) - 1, fi%atoms%nat, fi%atoms%nat), source=0.0)
     212             :          else
     213         220 :             allocate(tmp_c(maxval(mpdata%num_radbasfn) - 1, fi%atoms%nat, fi%atoms%nat), source=cmplx_0)
     214             :          endif
     215             : 
     216          12 :          ic = 0
     217          32 :          do iatom = 1, fi%atoms%nat 
     218          20 :             itype = fi%atoms%itype(iatom)
     219         240 :             ishift = SUM([((2*l + 1)*mpdata%num_radbasfn(l, itype), l=0, fi%hybinp%lcutm1(itype))])
     220          20 :             ic1 = ic + mpdata%num_radbasfn(0, itype)
     221             : 
     222          20 :             ic2 = 0
     223          56 :             do iatom1 = 1,fi%atoms%nat
     224          36 :                itype1 = fi%atoms%itype(iatom1)
     225         432 :                ishift1 = SUM([((2*l1 + 1)*mpdata%num_radbasfn(l1, itype1), l1=0, fi%hybinp%lcutm1(itype1))])
     226             : 
     227         320 :                do i = 1,mpdata%num_radbasfn(0, itype1) - 1
     228         284 :                   call glob_to_loc(fmpi, ic2+i, pe, loc)
     229         320 :                   if(fmpi%n_rank == pe) then
     230         142 :                      IF (fi%sym%invs) THEN
     231          78 :                         tmp_r(i, iatom, iatom1) = real(coulomb(ikpt)%data_c(ic1, loc))
     232             :                      ELSE
     233          64 :                         tmp_c(i, iatom, iatom1) = CONJG(coulomb(ikpt)%data_c(ic1, loc))
     234             :                      ENDIF
     235             :                   endif
     236             :                enddo
     237          56 :                ic2 = ic2 + ishift1
     238             :             END DO
     239          32 :             ic = ic + ishift
     240             :          END DO
     241             : 
     242          12 :          if (fi%sym%invs) THEN
     243             : #ifdef CPP_MPI
     244          32 :             call MPI_Reduce(tmp_r, hybdat%coul(ikpt)%mt3_r, size(tmp_r), MPI_DOUBLE_PRECISION, MPI_SUM, 0, fmpi%sub_comm, ierr)
     245             : #else
     246             :             hybdat%coul(ikpt)%mt3_r = tmp_r
     247             : #endif
     248           8 :             deallocate (tmp_r)
     249             :          else
     250             : #ifdef CPP_MPI
     251          16 :             call MPI_Reduce(tmp_c, hybdat%coul(ikpt)%mt3_c, size(tmp_c), MPI_DOUBLE_COMPLEX, MPI_SUM, 0, fmpi%sub_comm, ierr)
     252             : #else
     253             :             hybdat%coul(ikpt)%mt3_c = tmp_c
     254             : #endif
     255           4 :             deallocate (tmp_c)
     256             :          end if
     257          12 :          call timestop("copy_mt3")
     258             :       endif ! ikpt == 1
     259          36 :    end subroutine copy_mt3_from_striped_to_sparse
     260             : 
     261          36 :    subroutine test_mt2_mt3(fi, fmpi, mpdata, ikpt, hybdat)
     262             :       implicit none
     263             :       type(t_fleurinput), intent(in)    :: fi
     264             :       type(t_mpdata), intent(in)        :: mpdata
     265             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     266             :       integer, intent(in)               :: ikpt
     267             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     268             : 
     269             :       integer :: iatom, itype
     270          36 :       call timestart("test_mt2_mt3")
     271          36 :       if (fmpi%n_rank == 0 .and. ikpt == 1) then
     272             :          !test
     273          16 :          do iatom =1,fi%atoms%nat 
     274          10 :             itype = fi%atoms%itype(iatom)
     275          16 :             if (fi%sym%invs) THEN
     276          54 :                IF (MAXVAL(ABS(hybdat%coul(ikpt)%mt2_r(:mpdata%num_radbasfn(0, itype) - 1, 0, 0, iatom) &
     277             :                               - hybdat%coul(ikpt)%mt3_r(:mpdata%num_radbasfn(0, itype) - 1, iatom, iatom))) > 1E-08) &
     278           0 :                   call judft_error('coulombmatrix: coulomb_mt2 and coulomb_mt3 are inconsistent')
     279             : 
     280             :             else
     281          36 :                IF (MAXVAL(ABS(hybdat%coul(ikpt)%mt2_c(:mpdata%num_radbasfn(0, itype) - 1, 0, 0, iatom) &
     282             :                               - hybdat%coul(ikpt)%mt3_c(:mpdata%num_radbasfn(0, itype) - 1, iatom, iatom))) > 1E-08) &
     283           0 :                   call judft_error('coulombmatrix: coulomb_mt2 and coulomb_mt3 are inconsistent')
     284             :             end if
     285             :          END DO
     286             :       END IF
     287          36 :       call timestop("test_mt2_mt3")
     288          36 :    end subroutine test_mt2_mt3 
     289             : 
     290          36 :    subroutine copy_residual_mt_contrib_atm(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
     291             :       !
     292             :       ! add the residual MT contributions, i.e. those functions with an moment,
     293             :       ! to the matrix coulomb_mtir, which is fully occupied
     294             :       !
     295             :       implicit none
     296             :       type(t_fleurinput), intent(in)    :: fi
     297             :       type(t_mpdata), intent(in)        :: mpdata
     298             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     299             :       class(t_mat), intent(in)          :: coulomb(:)
     300             :       integer, intent(in)               :: ikpt
     301             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     302             : 
     303             :       integer :: igpt, indx1, indx2, indx3, indx4, itype, itype1, l, m, l1, m1
     304             :       integer :: iatom, iatom1, ierr, loc_4, pe_4, pe_ix, ix, ix_loc, ic, loc_from, i, tmp_idx
     305             :       complex :: tmp
     306          36 :       integer, allocatable :: loc_sizes(:), displs(:), loc_idx(:)
     307          36 :       complex, allocatable :: sendbuf(:), tmp_arr(:)
     308             :       
     309          36 :       call timestart("dbl iatom loop")
     310          36 :       ic = calc_ic(fi)
     311             : 
     312         180 :       allocate(loc_sizes(0:fmpi%n_size-1), displs(0:fmpi%n_size-1), loc_idx(0:fmpi%n_size-1))
     313         144 :       allocate(sendbuf(ic), tmp_arr(ic))
     314          36 :       indx1 = 0; indx2 = 0; indx3 = 0; indx4 = 0
     315             : 
     316             : 
     317          96 :       do iatom = 1, fi%atoms%nat 
     318          60 :          itype = fi%atoms%itype(iatom)
     319         396 :          DO l = 0, fi%hybinp%lcutm1(itype)
     320        1860 :             DO M = -l, l
     321        1500 :                indx1 = indx1 + 1
     322        1500 :                indx3 = indx3 + mpdata%num_radbasfn(l, itype)
     323             : 
     324             : 
     325        1500 :                loc_sizes = calc_loc_size_atom(fmpi, fi, mpdata, indx3)
     326        1500 :                displs = calc_disp(loc_sizes)
     327        1500 :                call assemble_sendbuf_atm(fi, fmpi, mpdata, coulomb, ikpt, indx3, sendbuf)
     328             : #ifdef CPP_MPI
     329             :                call MPI_Gatherv(sendbuf, loc_sizes(fmpi%n_rank), MPI_DOUBLE_COMPLEX, &
     330        1500 :                                tmp_arr, loc_sizes, displs, MPI_DOUBLE_COMPLEX, 0, fmpi%sub_comm, ierr)
     331             : #else
     332             :                tmp_arr = sendbuf 
     333             : #endif
     334             : 
     335        1800 :                if(fmpi%n_rank == 0) then
     336         750 :                   indx2 = 0
     337         750 :                   indx4 = 0
     338        2250 :                   loc_idx = 0
     339             : 
     340        2100 :                   do iatom1 = 1,fi%atoms%nat 
     341        1350 :                      itype1 = fi%atoms%itype(iatom1)
     342        8850 :                      DO l1 = 0, fi%hybinp%lcutm1(itype1)
     343       41850 :                         DO m1 = -l1, l1
     344       33750 :                            indx2 = indx2 + 1
     345       33750 :                            indx4 = indx4 + mpdata%num_radbasfn(l1, itype1)
     346       40500 :                            IF (indx4 >= indx3) then
     347       17250 :                               call glob_to_loc(fmpi, indx4, pe_4, loc_4)
     348       17250 :                               loc_idx(pe_4) = loc_idx(pe_4) + 1
     349       17250 :                               IF (fi%sym%invs) THEN
     350        9600 :                                  hybdat%coul(ikpt)%mtir%data_r(indx1, indx2) = real(tmp_arr(displs(pe_4) + loc_idx(pe_4)))
     351             :                               ELSE
     352        7650 :                                  hybdat%coul(ikpt)%mtir%data_c(indx1, indx2) = tmp_arr(displs(pe_4) + loc_idx(pe_4))
     353             :                               ENDIF
     354             :                            endif
     355             :                         END DO
     356             :                      END DO
     357             :                   END DO
     358             :                endif !rank == 0
     359             :             enddo
     360             :          enddo
     361             :       enddo
     362          36 :       call timestop("dbl iatom loop")
     363             : 
     364          36 :    end subroutine copy_residual_mt_contrib_atm
     365             : 
     366        1500 :    subroutine assemble_sendbuf_atm(fi, fmpi, mpdata, coulomb, ikpt, indx3, sendbuf)
     367             :       implicit none 
     368             :       type(t_fleurinput), intent(in)    :: fi
     369             :       type(t_mpdata), intent(in)        :: mpdata
     370             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     371             :       class(t_mat), intent(in)          :: coulomb(:)
     372             :       integer, intent(in)               :: ikpt, indx3
     373             :       complex, intent(inout)            :: sendbuf(:)
     374             : 
     375             :       integer :: loc_idx, iatom1, itype1, l1, m1, indx4, pe_4, loc_4
     376             : 
     377        1500 :       loc_idx = 0
     378        1500 :       indx4 = 0
     379             : 
     380        4200 :       do iatom1 = 1,fi%atoms%nat 
     381        2700 :          itype1 = fi%atoms%itype(iatom1)
     382       17700 :          DO l1 = 0, fi%hybinp%lcutm1(itype1)
     383       83700 :             DO m1 = -l1, l1
     384       67500 :                indx4 = indx4 + mpdata%num_radbasfn(l1, itype1)
     385       81000 :                if (indx4 >= indx3) then
     386       34500 :                   call glob_to_loc(fmpi, indx4, pe_4, loc_4)
     387       34500 :                   if(pe_4 == fmpi%n_rank) then
     388       17250 :                      loc_idx = loc_idx + 1
     389       17250 :                      sendbuf(loc_idx) = coulomb(ikpt)%data_c(indx3, loc_4) 
     390             :                   endif
     391             :                endif
     392             :             enddo 
     393             :          enddo 
     394             :       enddo
     395        1500 :    end subroutine assemble_sendbuf_atm
     396             : 
     397          36 :    subroutine copy_residual_mt_contrib_gpt(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
     398             :       implicit none
     399             :       type(t_fleurinput), intent(in)    :: fi
     400             :       type(t_mpdata), intent(in)        :: mpdata
     401             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     402             :       class(t_mat), intent(in)          :: coulomb(:)
     403             :       integer, intent(in)               :: ikpt
     404             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     405             : 
     406             :       integer :: igpt, indx1, indx2, indx3, indx4, itype, itype1, l, m, l1, m1
     407             :       integer :: iatom, iatom1, ierr, loc_4, pe_4, pe_ix, ix, ix_loc, ic, loc_from, i, tmp_idx
     408             :       complex :: tmp
     409          36 :       complex, allocatable :: tmp_arr(:), sendbuf(:)
     410             :       integer, allocatable :: loc_sizes(:), displs(:), loc_froms(:)
     411             : 
     412          36 :       ic = calc_ic(fi)
     413             : 
     414         180 :       allocate(loc_sizes(0:fmpi%n_size-1), displs(0:fmpi%n_size-1), loc_froms(0:fmpi%n_size-1))
     415          36 :       loc_sizes = calc_loc_size_gpt(fmpi, hybdat, mpdata, ikpt)
     416          36 :       displs = calc_disp(loc_sizes)
     417          36 :       loc_froms = collect_loc_froms_gpt(fmpi, hybdat)
     418         108 :       allocate(sendbuf(loc_sizes(fmpi%n_rank)))
     419             : 
     420         108 :       allocate(tmp_arr(mpdata%n_g(ikpt)))
     421          36 :       call timestart("iatom igpt loop")
     422          36 :       indx1 = 0; indx3 = 0
     423          96 :       do iatom = 1, fi%atoms%nat 
     424          60 :          itype = fi%atoms%itype(iatom)
     425         396 :          DO l = 0, fi%hybinp%lcutm1(itype)
     426        1860 :             DO M = -l, l
     427        1500 :                indx1 = indx1 + 1
     428        1500 :                indx3 = indx3 + mpdata%num_radbasfn(l, itype)
     429             : 
     430             : #ifdef CPP_MPI
     431      118850 :                sendbuf = coulomb(ikpt)%data_c(indx3, loc_froms(fmpi%n_rank):)
     432             :                call MPI_Gatherv(sendbuf, loc_sizes(fmpi%n_rank), MPI_DOUBLE_COMPLEX, &
     433        1500 :                                tmp_arr, loc_sizes, displs, MPI_DOUBLE_COMPLEX, 0, fmpi%sub_comm, ierr)
     434             : #else
     435             :                tmp_arr = coulomb(ikpt)%data_c(indx3, loc_froms(fmpi%n_rank):)
     436             : #endif
     437             : 
     438        1800 :                if(fmpi%n_rank == 0) then
     439      116600 :                   DO igpt = 1, mpdata%n_g(ikpt)
     440      115850 :                      ix =  hybdat%nbasp + igpt
     441      115850 :                      call glob_to_loc(fmpi, ix, pe_ix, ix_loc)
     442      115850 :                      tmp_idx = ix_loc - loc_froms(pe_ix) + 1 + displs(pe_ix)
     443      116600 :                      IF (fi%sym%invs) THEN
     444       68350 :                         hybdat%coul(ikpt)%mtir%data_r(indx1, ic + igpt) = real(tmp_arr(tmp_idx))
     445             :                      ELSE
     446       47500 :                         hybdat%coul(ikpt)%mtir%data_c(indx1, ic + igpt) = tmp_arr(tmp_idx)
     447             :                      ENDIF
     448             :                   END DO
     449             :                endif
     450             : 
     451             :             END DO
     452             :          END DO
     453             :       END do
     454          36 :       call timestop("iatom igpt loop")
     455             : 
     456          36 :       call hybdat%coul(ikpt)%mtir%u2l()
     457          36 :       IF (indx1 /= ic) call judft_error('coulombmatrix: error index counting')
     458          36 :    end subroutine copy_residual_mt_contrib_gpt
     459             : 
     460          36 :    function calc_loc_size_gpt(fmpi, hybdat, mpdata, ikpt) result(loc_sizes)
     461             :       implicit none 
     462             :       type(t_mpdata), intent(in)        :: mpdata
     463             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     464             :       TYPE(t_hybdat), INTENT(IN)        :: hybdat
     465             :       integer, intent(in)               :: ikpt
     466             :       integer :: loc_sizes(fmpi%n_size)
     467             :       integer :: loc_from, loc_to, my_size, ierr
     468             : 
     469          36 :       call range_from_glob_to_loc(fmpi, hybdat%nbasp + 1, loc_from)
     470          36 :       call range_to_glob_to_loc(fmpi, hybdat%nbasp + mpdata%n_g(ikpt), loc_to)
     471             : 
     472          36 :       my_size = loc_to - loc_from + 1
     473             : #ifdef CPP_MPI
     474          36 :       call MPI_Allgather(my_size, 1, MPI_INTEGER, loc_sizes, 1, MPI_INTEGER, fmpi%sub_comm, ierr)
     475             : #else 
     476             :       loc_sizes(1) = my_size 
     477             : #endif   
     478          36 :    end function calc_loc_size_gpt 
     479             : 
     480        1500 :    function calc_loc_size_atom(fmpi, fi, mpdata, indx3) result(loc_size)
     481             :       implicit none 
     482             :       type(t_fleurinput), intent(in)    :: fi
     483             :       type(t_mpi), intent(in)           :: fmpi
     484             :       type(t_mpdata), intent(in)        :: mpdata
     485             :       integer, intent(in)               :: indx3
     486             : 
     487             :       integer :: loc_size(0:fmpi%n_size-1)
     488             :       integer :: indx4, iatom1, itype1, l1, m1, loc_4, pe_4
     489             : 
     490        4500 :       loc_size = 0
     491        1500 :       indx4 = 0
     492        4200 :       do iatom1 = 1,fi%atoms%nat 
     493        2700 :          itype1 = fi%atoms%itype(iatom1)
     494       17700 :          DO l1 = 0, fi%hybinp%lcutm1(itype1)
     495       83700 :             DO m1 = -l1, l1
     496       67500 :                indx4 = indx4 + mpdata%num_radbasfn(l1, itype1)
     497       81000 :                IF (indx4 >= indx3) then
     498       34500 :                   call glob_to_loc(fmpi, indx4, pe_4, loc_4)
     499       34500 :                   loc_size(pe_4) = loc_size(pe_4) + 1
     500             :                endif
     501             :             enddo 
     502             :          enddo 
     503             :       enddo
     504        1500 :    end function calc_loc_size_atom 
     505             : 
     506        1536 :    function calc_disp(loc_sizes) result(displs)
     507             :       implicit NONE
     508             :       integer :: loc_sizes(:)
     509             :       integer :: displs(size(loc_sizes))
     510             :       integer :: i 
     511             : 
     512        4608 :       displs = 0 
     513        3072 :       do i = 2,size(loc_sizes)
     514        3072 :          displs(i) = displs(i-1) + loc_sizes(i-1)
     515             :       end do
     516        1536 :    end function calc_disp
     517             : 
     518          36 :    function collect_loc_froms_gpt(fmpi, hybdat) result(loc_froms)
     519             :       implicit none 
     520             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     521             :       TYPE(t_hybdat), INTENT(IN)        :: hybdat
     522             :       integer :: loc_froms(fmpi%n_size), ierr, loc_from
     523             : 
     524          36 :       call range_from_glob_to_loc(fmpi, hybdat%nbasp + 1, loc_from)
     525             : 
     526             : #ifdef CPP_MPI
     527          36 :       call MPI_Allgather(loc_from, 1, MPI_INTEGER, loc_froms, 1, MPI_INTEGER, fmpi%sub_comm, ierr)
     528             : #else 
     529             :       loc_froms(1) = loc_from 
     530             : #endif
     531          36 :    end function collect_loc_froms_gpt
     532             : 
     533         108 :    function calc_ic(fi) result(ic)
     534             :       implicit none 
     535             :       type(t_fleurinput), intent(in)    :: fi
     536             :       integer :: ic, iatom, itype, l
     537             : 
     538         108 :       ic = 0
     539         288 :       do iatom = 1,fi%atoms%nat 
     540         180 :          itype = fi%atoms%itype(iatom)
     541         288 :          ic = ic + (fi%hybinp%lcutm1(itype) + 1)**2
     542             :       END DO
     543             :    end function calc_ic
     544             : 
     545          36 :    subroutine copy_ir(fi, fmpi, mpdata, coulomb, ikpt, hybdat)
     546             :       implicit none
     547             :       type(t_fleurinput), intent(in)    :: fi
     548             :       type(t_mpdata), intent(in)        :: mpdata
     549             :       TYPE(t_mpi), INTENT(IN)           :: fmpi
     550             :       class(t_mat), intent(in)          :: coulomb
     551             :       integer, intent(in)               :: ikpt
     552             :       TYPE(t_hybdat), INTENT(INOUT)     :: hybdat
     553             : 
     554             :       integer :: ic, iatom, l, ix, iy, ix_loc, pe_ix, i, itype, ierr
     555             :       INTEGER:: blacs_desc(9), umap(1, 1), np
     556             :       real, allocatable    :: tmp(:)
     557          36 :       type(t_mat)          :: loc_cpy
     558             :       !
     559             :       ! add ir part to the matrix coulomb_mtir
     560             :       !
     561             : 
     562          36 :       ic = calc_ic(fi)
     563          36 :       call timestart("copy_ir")
     564             :       select type(coulomb)
     565             :       class is(t_mpimat)
     566             : #ifdef CPP_SCALAPACK
     567          36 :          if(fi%sym%invs) then
     568          24 :             call loc_cpy%alloc(.false., mpdata%n_g(ikpt), mpdata%n_g(ikpt))
     569         240 :             blacs_desc = [1, -1, loc_cpy%matsize1, loc_cpy%matsize2, loc_cpy%matsize1, loc_cpy%matsize2, 0, 0, loc_cpy%matsize1]
     570          24 :             umap(1, 1) = 0
     571          24 :             CALL BLACS_GET(coulomb%blacsdata%blacs_desc(2), 10, blacs_desc(2))
     572          24 :             CALL BLACS_GRIDMAP(blacs_desc(2), umap, 1, 1, 1)
     573             :             
     574             :             call pzgemr2d(mpdata%n_g(ikpt), mpdata%n_g(ikpt), &
     575             :                         coulomb%data_c, hybdat%nbasp + 1, hybdat%nbasp + 1, coulomb%blacsdata%blacs_desc, &
     576          24 :                         loc_cpy%data_c,1,  1,  blacs_desc, coulomb%blacsdata%blacs_desc(2))
     577             : 
     578             : 
     579          24 :             if(fmpi%n_rank == 0) then
     580          12 :                !$OMP parallel do default(shared) shared(mpdata, hybdat, loc_cpy, ic, ikpt) private(ix, iy) collapse(2)
     581             :                do ix = 1, mpdata%n_g(ikpt)
     582             :                   do iy = 1, mpdata%n_g(ikpt) 
     583             :                         hybdat%coul(ikpt)%mtir%data_r(ic + iy, ic + ix) = real(loc_cpy%data_c(iy, ix))
     584             :                   enddo 
     585             :                enddo
     586             :                !$OMP end parallel do
     587             :             endif
     588             :          else 
     589             :             blacs_desc = [1, -1, hybdat%coul(ikpt)%mtir%matsize1, hybdat%coul(ikpt)%mtir%matsize2, &
     590         120 :                         hybdat%coul(ikpt)%mtir%matsize1, hybdat%coul(ikpt)%mtir%matsize2, 0, 0, hybdat%coul(ikpt)%mtir%matsize1]
     591          12 :             umap(1, 1) = 0
     592          12 :             CALL BLACS_GET(coulomb%blacsdata%blacs_desc(2), 10, blacs_desc(2))
     593          12 :             CALL BLACS_GRIDMAP(blacs_desc(2), umap, 1, 1, 1)
     594             : 
     595             :             call pzgemr2d(mpdata%n_g(ikpt), mpdata%n_g(ikpt), &
     596             :                         coulomb%data_c, hybdat%nbasp + 1, hybdat%nbasp + 1, coulomb%blacsdata%blacs_desc, &
     597          12 :                         hybdat%coul(ikpt)%mtir%data_c, ic+1, ic+1,  blacs_desc, coulomb%blacsdata%blacs_desc(2))
     598             : 
     599             :          endif
     600             : #endif
     601             :       class is (t_mat)
     602           0 :          !$OMP parallel do default(shared) shared(mpdata, hybdat, loc_cpy, ic, fi, ikpt) private(ix, iy) collapse(2)
     603             :          do ix = 1, mpdata%n_g(ikpt)
     604             :             do iy = 1, mpdata%n_g(ikpt) 
     605             :                if(fi%sym%invs) then
     606             :                   hybdat%coul(ikpt)%mtir%data_r(ic + iy, ic + ix) = real(coulomb%data_c(hybdat%nbasp + iy, hybdat%nbasp + ix))
     607             :                else
     608             :                   hybdat%coul(ikpt)%mtir%data_c(ic + iy, ic + ix) = coulomb%data_c(hybdat%nbasp + iy, hybdat%nbasp + ix)
     609             :                endif
     610             :             enddo 
     611             :          enddo
     612             :          !$OMP end parallel do
     613             :       end select
     614             : 
     615          36 :       call timestop("copy_ir")
     616          36 :    end subroutine copy_ir
     617          36 : end module m_copy_coul

Generated by: LCOV version 1.14