LCOV - code coverage report
Current view: top level - mpi - mpi_make_groups.F90 (source / functions) Hit Total Coverage
Test: FLEUR test coverage Lines: 0 110 0.0 %
Date: 2024-03-28 04:22:06 Functions: 0 2 0.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : MODULE m_mpimakegroups
       8             :   use m_juDFT
       9             :   use mpi
      10             : CONTAINS
      11           0 :   SUBROUTINE mpi_make_groups(&
      12             :        fmpi,kpts, input,atoms,noco,&
      13             :        mlotot,mlolotot,&
      14             :        n_start,n_groups,n,matsize,ne, n_rank,n_size,SUB_COMM)
      15             : !------------------------------------------------------------------------
      16             : !
      17             : ! Distribute the k-point / eigenvector  parallelisation so, that
      18             : ! all pe's have aproximately equal load. Maximize for k-point
      19             : ! parallelisation. The naming conventions are as follows:
      20             : !
      21             : ! groups             1               2          (n_groups = 2)
      22             : !                 /     \         /     \
      23             : ! k-points:      1       2       3       4      (nkpts = 4)
      24             : !               /|\     /|\     /|\     /|\
      25             : ! irank        01 2   34 5   01 2   34 5    (isize = 6)
      26             : !
      27             : ! n_rank       01 2   01 2   01 2   01 2    (n_size = 3)
      28             : !
      29             : ! nrec         12 3   45 6   78 9  1011 12  ...rec. no. on eig-file
      30             : !              * *     * *     * *     *  *
      31             : !
      32             : ! In the above example, 6 pe's should work on 4 k-points and distribute
      33             : ! their load in a way, that 3 pe's work on each k-points, so 2 k-points
      34             : ! are done in parellel (n_members=2) and there are 2 groups of k-points.
      35             : ! n_rank and n_size are the equivalents of irank and isize. The former
      36             : ! belong to the communicator SUB_COMM, the latter to MPI_COMM.
      37             : !
      38             : ! The results (containing a subset of ew & ev's) are written as separate
      39             : ! records on the file 'eig' with the vacuum energy parameter of the
      40             : ! marked (*) records set to 999.9 to indicate that this is only one part
      41             : ! of a k-point's results. This marker is needed in routines fermie and
      42             : ! cdnval.
      43             : !          G.B. `99
      44             : !
      45             : !------------------------------------------------------------------------
      46             :     USE m_types
      47             :     IMPLICIT NONE
      48             : 
      49             :     TYPE(t_mpi),INTENT(IN)       :: fmpi
      50             : 
      51             :     TYPE(t_input),INTENT(IN)     :: input
      52             :     TYPE(t_noco),INTENT(IN)      :: noco
      53             :     TYPE(t_kpts),INTENT(IN)      :: kpts
      54             :     TYPE(t_atoms),INTENT(IN)     :: atoms
      55             :     INTEGER, INTENT (IN)  :: mlotot
      56             :     INTEGER, INTENT (IN)  :: mlolotot
      57             :     INTEGER, INTENT (OUT) :: n_start,n_groups,n,SUB_COMM
      58             :     INTEGER, INTENT (OUT) :: matsize,n_rank,n_size,ne
      59             : 
      60             :     INTEGER i,n_members
      61           0 :     INTEGER, ALLOCATABLE :: i_mygroup(:)
      62             : 
      63             :     INTEGER WORLD_GROUP,SUB_GROUP
      64             :     INTEGER ierr(3)
      65             :     LOGICAL l_cm
      66             : 
      67             : !
      68             : ! first determine the number of groups of k-points to process
      69             : !
      70           0 :       n_groups = 0
      71           0 :       IF (kpts%nkpt.GT.fmpi%isize) THEN           ! if there are more k-points than PEs
      72             : 
      73           0 :         IF (mod(kpts%nkpt,fmpi%isize).EQ.0) THEN  ! maybe kpts%nkpt is a multiple of fmpi%isize
      74           0 :           n_groups = kpts%nkpt/fmpi%isize
      75           0 :           n_size = 1
      76             :         ELSE                            ! or an integer fraction of fmpi%isize fits
      77           0 :           DO i=2,fmpi%isize
      78           0 :             IF (mod(fmpi%isize,i).EQ.0) THEN
      79           0 :               n_size = i
      80           0 :               n_groups = kpts%nkpt * i/fmpi%isize
      81           0 :               IF (mod(kpts%nkpt,fmpi%isize/i).EQ.0) GOTO 990
      82             :             ENDIF
      83             :           ENDDO
      84           0 :           n_groups = kpts%nkpt               ! or use all PE's per k-point
      85           0 :           n_size = fmpi%isize
      86             :         ENDIF
      87             : 
      88           0 :       ELSEIF (kpts%nkpt.LT.fmpi%isize) THEN       ! if there are more PEs than k-points
      89             : 
      90           0 :         IF (mod(fmpi%isize,kpts%nkpt).EQ.0) THEN  ! maybe fmpi%isize is a multiple of kpts%nkpt
      91           0 :            n_groups = 1
      92           0 :            n_size = fmpi%isize/kpts%nkpt
      93             :         ELSE                            ! or an integer fraction of kpts%nkpt fits
      94           0 :           DO i=kpts%nkpt-1,2,-1
      95           0 :             IF (mod(kpts%nkpt,i).EQ.0) THEN
      96           0 :                n_groups = kpts%nkpt/i
      97           0 :                n_size = fmpi%isize/i
      98           0 :                IF (mod(fmpi%isize,i).EQ.0) GOTO 990
      99             :             ENDIF
     100             :           ENDDO
     101           0 :           n_groups = kpts%nkpt               ! or use all PE's per k-point
     102           0 :           n_size = fmpi%isize
     103             :         ENDIF
     104             : 
     105             :       ELSE
     106             : !
     107             : ! if there are as many pe's as k-points (isize = nkpt), use one PE per
     108             : ! kpoint (n_size = 1) and finish in a single k-loop (n_groups = 1)
     109             : !
     110           0 :         n_groups = 1
     111           0 :         n_size = 1
     112             :       ENDIF
     113             : 
     114             : 
     115             : 
     116           0 :  990  IF (n_groups==0)  CALL juDFT_error("mpi_make_groups:1",calledby ="mpi_make_groups")
     117             :       n_members = kpts%nkpt/n_groups
     118             : !
     119             : ! check different algorithm
     120             : !
     121           0 :       CALL check_memory(input,atoms, mlotot,mlolotot,noco,kpts,fmpi, n_size)
     122             : 
     123           0 :       write(*,*) n_size
     124           0 :       n_members = MIN(kpts%nkpt,fmpi%isize)
     125           0 :       n_members = MIN(n_members , CEILING(REAL(fmpi%isize)/n_size) ) + 1
     126             : 
     127           0 :       l_cm = .false.
     128             :       DO WHILE (.not.l_cm)
     129           0 :         n_members = n_members - 1
     130           0 :         IF ((mod(fmpi%isize,n_members) == 0).AND.&
     131             :      &      (mod(kpts%nkpt,n_members) == 0) ) THEN
     132             :            l_cm = .true.
     133             :         ENDIF
     134             :       ENDDO
     135           0 :       n_groups = kpts%nkpt/n_members
     136           0 :       n_size = fmpi%isize/n_members
     137           0 :       IF (fmpi%irank == 0) THEN
     138           0 :         write(*,*) 'k-points in parallel: ',n_members
     139           0 :         write(*,*) "pe's per k-point:     ",n_size
     140           0 :         write(*,*) '# of k-point loops:   ',n_groups
     141             :       ENDIF
     142             : !
     143             : ! now, we make the groups
     144             : !
     145           0 :       n_start = mod(fmpi%irank,n_members) + 1
     146             : !!      n_start = INT(irank/n_size) * n_size
     147           0 :       ALLOCATE ( i_mygroup(n_size) )
     148           0 :       n = 0
     149           0 :       DO i = n_start,fmpi%isize,n_members
     150             : !!      DO i = n_start+1,n_start+n_size
     151           0 :         n = n+1
     152           0 :         i_mygroup(n) = i-1
     153             :       ENDDO
     154             : 
     155             : !      write (*,*) irank,n_groups,n_start,i_mygroup
     156             : 
     157           0 :       CALL MPI_COMM_GROUP (fmpi%MPI_COMM,WORLD_GROUP,ierr(1))
     158           0 :       CALL MPI_GROUP_INCL (WORLD_GROUP,n_size,i_mygroup, SUB_GROUP,ierr(1))
     159           0 :       CALL MPI_COMM_CREATE (fmpi%MPI_COMM,SUB_GROUP,SUB_COMM,ierr(1))
     160             : 
     161           0 :       CALL MPI_COMM_RANK (SUB_COMM,n_rank,ierr(1))
     162             : !
     163             : ! determine number of columns per group
     164             : !
     165           0 :       n=0
     166           0 :       DO  i=1+n_rank, lapw_dim_nbasfcn, n_size
     167           0 :         n=n+1
     168             :       ENDDO
     169           0 :       IF (n_size.GT.1) THEN
     170           0 :         matsize = lapw_dim_nbasfcn * n
     171             :       ELSE
     172           0 :         matsize = (lapw_dim_nbasfcn+1)*lapw_dim_nbasfcn/2
     173             :       ENDIF
     174             : !
     175           0 :         ne = input%neig
     176           0 :       ne = max(ne,5)
     177             : 
     178           0 :       DEALLOCATE (i_mygroup)
     179             : 
     180           0 :       END SUBROUTINE mpi_make_groups
     181             : 
     182             : !----------------------------------------------------------------------
     183           0 :       SUBROUTINE check_memory(input,atoms, mlotot,mlolotot, noco,kpts,fmpi, n_size)
     184             : 
     185             : !
     186             : ! check the free and the (approximate) required memory ;
     187             : ! determine minimal n_size to fit into the memory (hopefully).
     188             : !
     189             :         USE m_types
     190             :       IMPLICIT NONE
     191             :       type(t_mpi),INTENT(IN)         :: fmpi
     192             : 
     193             :       TYPE(t_kpts),INTENT(IN)        :: kpts
     194             :       TYPE(t_atoms),INTENT(IN)       :: atoms
     195             :       TYPE(t_noco),INTENT(IN)        :: noco
     196             :       TYPE(t_input),INTENT(IN)       :: input
     197             : 
     198             :       INTEGER, INTENT (IN)  :: mlotot,mlolotot
     199             :       INTEGER, INTENT (OUT) :: n_size
     200             : 
     201             :       INTEGER  err, mb
     202             :       INTEGER*8 mem, matsz, m_h
     203           0 :       REAL, ALLOCATABLE :: test(:)
     204             : 
     205           0 :       n_size = CEILING( real(fmpi%isize)/min(kpts%nkpt,fmpi%isize) )
     206             : 
     207             :  10   CONTINUE
     208             : !
     209             : ! some basic arrays allocated in eigen()
     210             : !
     211             : 
     212           0 :       mem = ((atoms%lmaxd*(atoms%lmaxd+2)* (atoms%lmaxd*(atoms%lmaxd+2)+3))/2+1)*atoms%ntype*4                       ! tlmplm%tuu,tlmplm%tdd etc.
     213           0 :       mem = mem + (atoms%lmaxd*(atoms%lmaxd+2)+1)*(2*atoms%llod+1)*max(mlotot,1)*2 ! tlmplm%tuulo ...
     214           0 :       mem = mem + (2*atoms%llod+1)**2 * max(mlolotot,1)    ! tlmplm%tuloulo
     215           0 :       IF (noco%l_noco) mem = mem * 2                      ! both spins
     216           0 :       mem = mem + 49*(atoms%n_u+atoms%n_hia)*input%jspins*2                      ! lda+U, *2 for complex
     217           0 :       mem = mem+INT((lapw_dim_nbasfcn*2+(atoms%lmaxd*(atoms%lmaxd+2)+1)*atoms%ntype)*0.5)+1 ! tlmplm%ind, *0.5 for integer
     218             : 
     219           0 :       matsz = lapw_dim_nbasfcn * CEILING(REAL(lapw_dim_nbasfcn)/n_size) ! size of a, b
     220             : #ifdef CPP_INVERSION
     221             :       mem = mem + 2 * matsz                          ! real case
     222             : #else
     223           0 :       mem = mem + 2 * matsz * 2                      ! complec case
     224             : #endif
     225             : !
     226             : ! now the arrays in hssphn()
     227             : !
     228           0 :       m_h = lapw_dim_nvd*(atoms%lmaxd*(atoms%lmaxd+2)+1)*4  + lapw_dim_nvd*8 + atoms%nlod            ! ar, ai ..., cph, rph, vk, gk
     229           0 :       m_h = m_h + 2 * (2*atoms%llod+1)**2 * atoms%nlod * 3 * 2   ! alo,blo,clo
     230           0 :       IF (noco%l_ss) m_h = m_h * 2
     231           0 :       m_h = m_h + lapw_dim_nvd*(5+atoms%lmaxd)                      ! axr, ... plegend
     232           0 :       IF (noco%l_ss.OR.any(noco%l_constrained).OR.(noco%l_noco.AND.noco%l_soc)) THEN
     233           0 :         m_h = m_h + lapw_dim_nvd*(atoms%lmaxd+1)*atoms%ntype*2*2          ! fj,gj
     234             :       ELSE
     235           0 :         m_h = m_h + lapw_dim_nvd*(atoms%lmaxd+1)*atoms%ntype*2            ! fj,gj
     236             :       ENDIF
     237           0 :       IF (noco%l_noco.AND.noco%l_soc) THEN
     238           0 :         m_h = m_h + lapw_dim_nvd*(atoms%lmaxd+4)
     239             :       ENDIF
     240           0 :       IF (any(noco%l_constrained)) THEN
     241           0 :         m_h = m_h + (atoms%lmaxd+1)*atoms%ntype
     242             :       ENDIF
     243           0 :       IF (noco%l_noco.AND.(.NOT.noco%l_ss)) THEN
     244           0 :         matsz = (lapw_dim_nvd+mlotot) * CEILING(REAL(lapw_dim_nvd+mlotot)/n_size)
     245           0 :         m_h = m_h + matsz * 2 * 2                    ! aahlp,bbhlp
     246             :       ENDIF
     247             : !
     248             : ! see, whether it fits
     249             : !
     250           0 :       mb = (mem+m_h)*8/(1024)**2
     251           0 :       ALLOCATE ( test(mem+m_h) , stat = err)
     252           0 :       WRITE(*,*) mb,'Mbytes needed  in hssphn!',err,mem
     253           0 :       IF ( err /= 0 ) THEN
     254           0 :         n_size = n_size * 2
     255           0 :         IF (n_size > fmpi%isize) THEN
     256             :           mb = (mem+m_h)*8/(1024)**2
     257           0 :           WRITE(*,*) mb,'Mbytes needed  in hssphn!'
     258           0 :           CALL juDFT_error("mpi_make_groups: memory too small!",calledby ="mpi_make_groups")
     259             :         ENDIF
     260             :         GOTO 10
     261             :       ENDIF
     262           0 :       DEALLOCATE (test)
     263             : !
     264             : ! now, allocate z and jump into chani
     265             : !
     266           0 :       matsz = lapw_dim_nbasfcn * CEILING(REAL(lapw_dim_nbasfcn)/n_size)   ! size of z
     267             : #ifdef CPP_INVERSION
     268             :       mem = mem + matsz                             ! real case
     269             : #else
     270           0 :       mem = mem + matsz * 2                         ! complex case
     271             : #endif
     272           0 :       mem = mem + matsz * 2 * 3                     ! asca,bsca,eigvec
     273           0 :       mem = mem + lapw_dim_nbasfcn
     274             : #ifdef CPP_INVERSION
     275             :       mem = mem + matsz                             ! work
     276             : #else
     277           0 :       mem = mem + matsz * 2                         ! work, rwork neglected
     278             : #endif
     279             : !
     280             : ! see, whether it fits
     281             : !
     282           0 :       mb = (mem)*8/(1024)**2
     283           0 :       ALLOCATE ( test(mem) , stat = err)
     284           0 :       WRITE(*,*) mb,'Mbytes needed  in chani !',err,mem
     285           0 :       IF ( err /= 0 ) THEN
     286           0 :         n_size = n_size * 2
     287           0 :         IF (n_size > fmpi%isize) THEN
     288             :           mb = (mem)*8/(1024)**2
     289           0 :           WRITE(*,*) mb,'Mbytes needed  in chani !'
     290             :           CALL juDFT_error("mpi_make_groups: memory too small!",calledby&
     291           0 :      &         ="mpi_make_groups")
     292             :         ENDIF
     293             :         GOTO 10
     294             :       ENDIF
     295           0 :       DEALLOCATE (test)
     296             : 
     297           0 :       END SUBROUTINE check_memory
     298             : !----------------------------------------------------------------------
     299             : 
     300             :       END MODULE m_mpimakegroups

Generated by: LCOV version 1.14