LCOV - code coverage report
Current view: top level - mpi - mpi_make_groups.F90 (source / functions) Hit Total Coverage
Test: combined.info Lines: 0 114 0.0 %
Date: 2019-09-08 04:53:50 Functions: 0 2 0.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : MODULE m_mpimakegroups
       8             :   use m_juDFT
       9             : CONTAINS
      10           0 :   SUBROUTINE mpi_make_groups(&
      11             :        mpi,dimension,kpts, input,atoms,noco,&
      12             :        mlotot,mlolotot,&
      13             :        n_start,n_groups,n,matsize,ne, n_rank,n_size,SUB_COMM)
      14             : !------------------------------------------------------------------------
      15             : !
      16             : ! Distribute the k-point / eigenvector  parallelisation so, that
      17             : ! all pe's have aproximately equal load. Maximize for k-point 
      18             : ! parallelisation. The naming conventions are as follows:
      19             : !
      20             : ! groups             1               2          (n_groups = 2) 
      21             : !                 /     \         /     \
      22             : ! k-points:      1       2       3       4      (nkpts = 4)
      23             : !               /|\     /|\     /|\     /|\    
      24             : ! irank        01 2   34 5   01 2   34 5    (isize = 6)
      25             : !
      26             : ! n_rank       01 2   01 2   01 2   01 2    (n_size = 3)
      27             : !
      28             : ! nrec         12 3   45 6   78 9  1011 12  ...rec. no. on eig-file
      29             : !              * *     * *     * *     *  *
      30             : !
      31             : ! In the above example, 6 pe's should work on 4 k-points and distribute
      32             : ! their load in a way, that 3 pe's work on each k-points, so 2 k-points
      33             : ! are done in parellel (n_members=2) and there are 2 groups of k-points.
      34             : ! n_rank and n_size are the equivalents of irank and isize. The former
      35             : ! belong to the communicator SUB_COMM, the latter to MPI_COMM.
      36             : !
      37             : ! The results (containing a subset of ew & ev's) are written as separate
      38             : ! records on the file 'eig' with the vacuum energy parameter of the
      39             : ! marked (*) records set to 999.9 to indicate that this is only one part
      40             : ! of a k-point's results. This marker is needed in routines fermie and 
      41             : ! cdnval.
      42             : !          G.B. `99
      43             : !
      44             : !------------------------------------------------------------------------
      45             :     USE m_types
      46             :     IMPLICIT NONE
      47             :     
      48             :     TYPE(t_mpi),INTENT(IN)       :: mpi
      49             :     TYPE(t_dimension),INTENT(IN) :: dimension
      50             :     TYPE(t_input),INTENT(IN)     :: input
      51             :     TYPE(t_noco),INTENT(IN)      :: noco
      52             :     TYPE(t_kpts),INTENT(IN)      :: kpts
      53             :     TYPE(t_atoms),INTENT(IN)     :: atoms
      54             :     INTEGER, INTENT (IN)  :: mlotot
      55             :     INTEGER, INTENT (IN)  :: mlolotot    
      56             :     INTEGER, INTENT (OUT) :: n_start,n_groups,n,SUB_COMM
      57             :     INTEGER, INTENT (OUT) :: matsize,n_rank,n_size,ne
      58             : 
      59             :       INTEGER i,n_members
      60           0 :       INTEGER, ALLOCATABLE :: i_mygroup(:)
      61             : 
      62             :       INCLUDE 'mpif.h'
      63             :       INTEGER WORLD_GROUP,SUB_GROUP
      64             :       INTEGER ierr(3)
      65             :       LOGICAL l_cm
      66             : !
      67             : ! first determine the number of groups of k-points to process
      68             : !
      69           0 :       n_groups = 0
      70           0 :       IF (kpts%nkpt.GT.mpi%isize) THEN           ! if there are more k-points than PEs
      71             : 
      72           0 :         IF (mod(kpts%nkpt,mpi%isize).EQ.0) THEN  ! maybe kpts%nkpt is a multiple of mpi%isize
      73           0 :           n_groups = kpts%nkpt/mpi%isize
      74           0 :           n_size = 1
      75             :         ELSE                            ! or an integer fraction of mpi%isize fits
      76           0 :           DO i=2,mpi%isize
      77           0 :             IF (mod(mpi%isize,i).EQ.0) THEN
      78           0 :               n_size = i
      79           0 :               n_groups = kpts%nkpt * i/mpi%isize
      80           0 :               IF (mod(kpts%nkpt,mpi%isize/i).EQ.0) GOTO 990
      81             :             ENDIF
      82             :           ENDDO
      83           0 :           n_groups = kpts%nkpt               ! or use all PE's per k-point
      84           0 :           n_size = mpi%isize
      85             :         ENDIF
      86             : 
      87           0 :       ELSEIF (kpts%nkpt.LT.mpi%isize) THEN       ! if there are more PEs than k-points
      88             : 
      89           0 :         IF (mod(mpi%isize,kpts%nkpt).EQ.0) THEN  ! maybe mpi%isize is a multiple of kpts%nkpt 
      90           0 :            n_groups = 1
      91           0 :            n_size = mpi%isize/kpts%nkpt
      92             :         ELSE                            ! or an integer fraction of kpts%nkpt fits
      93           0 :           DO i=kpts%nkpt-1,2,-1
      94           0 :             IF (mod(kpts%nkpt,i).EQ.0) THEN
      95           0 :                n_groups = kpts%nkpt/i
      96           0 :                n_size = mpi%isize/i
      97           0 :                IF (mod(mpi%isize,i).EQ.0) GOTO 990
      98             :             ENDIF
      99             :           ENDDO
     100           0 :           n_groups = kpts%nkpt               ! or use all PE's per k-point
     101           0 :           n_size = mpi%isize
     102             :         ENDIF
     103             : 
     104             :       ELSE
     105             : !
     106             : ! if there are as many pe's as k-points (isize = nkpt), use one PE per
     107             : ! kpoint (n_size = 1) and finish in a single k-loop (n_groups = 1)
     108             : !
     109           0 :         n_groups = 1
     110           0 :         n_size = 1
     111             :       ENDIF
     112             : 
     113             : 
     114             : 
     115           0 :  990  IF (n_groups==0)  CALL juDFT_error("mpi_make_groups:1",calledby ="mpi_make_groups")
     116           0 :       n_members = kpts%nkpt/n_groups 
     117             : !
     118             : ! check different algorithm
     119             : !
     120           0 :       CALL check_memory(DIMENSION,input,atoms, mlotot,mlolotot,noco,kpts,mpi, n_size)
     121             : 
     122           0 :       write(*,*) n_size
     123           0 :       n_members = MIN(kpts%nkpt,mpi%isize)
     124           0 :       n_members = MIN(n_members , CEILING(REAL(mpi%isize)/n_size) ) + 1
     125             :       
     126           0 :       l_cm = .false.
     127             :       DO WHILE (.not.l_cm) 
     128           0 :         n_members = n_members - 1
     129           0 :         IF ((mod(mpi%isize,n_members) == 0).AND.&
     130             :      &      (mod(kpts%nkpt,n_members) == 0) ) THEN
     131           0 :            l_cm = .true.
     132             :         ENDIF
     133             :       ENDDO
     134           0 :       n_groups = kpts%nkpt/n_members
     135           0 :       n_size = mpi%isize/n_members
     136           0 :       IF (mpi%irank == 0) THEN
     137           0 :         write(*,*) 'k-points in parallel: ',n_members
     138           0 :         write(*,*) "pe's per k-point:     ",n_size
     139           0 :         write(*,*) '# of k-point loops:   ',n_groups
     140             :       ENDIF
     141             : !
     142             : ! now, we make the groups
     143             : !
     144           0 :       n_start = mod(mpi%irank,n_members) + 1
     145             : !!      n_start = INT(irank/n_size) * n_size
     146           0 :       ALLOCATE ( i_mygroup(n_size) )
     147           0 :       n = 0
     148           0 :       DO i = n_start,mpi%isize,n_members
     149             : !!      DO i = n_start+1,n_start+n_size
     150           0 :         n = n+1
     151           0 :         i_mygroup(n) = i-1
     152             :       ENDDO
     153             : 
     154             : !      write (*,*) irank,n_groups,n_start,i_mygroup
     155             : 
     156           0 :       CALL MPI_COMM_GROUP (mpi%MPI_COMM,WORLD_GROUP,ierr)
     157           0 :       CALL MPI_GROUP_INCL (WORLD_GROUP,n_size,i_mygroup, SUB_GROUP,ierr)
     158           0 :       CALL MPI_COMM_CREATE (mpi%MPI_COMM,SUB_GROUP,SUB_COMM,ierr)
     159             : 
     160           0 :       CALL MPI_COMM_RANK (SUB_COMM,n_rank,ierr)
     161             : !
     162             : ! determine number of columns per group
     163             : !
     164           0 :       n=0
     165           0 :       DO  i=1+n_rank, dimension%nbasfcn, n_size
     166           0 :         n=n+1
     167             :       ENDDO
     168           0 :       IF (n_size.GT.1) THEN
     169           0 :         matsize = dimension%nbasfcn * n
     170             :       ELSE
     171           0 :         matsize = (dimension%nbasfcn+1)*dimension%nbasfcn/2
     172             :       ENDIF
     173             : !
     174           0 :         ne = dimension%neigd
     175           0 :       ne = max(ne,5)
     176             : 
     177           0 :       DEALLOCATE (i_mygroup)
     178             : 
     179           0 :       END SUBROUTINE mpi_make_groups
     180             : 
     181             : !----------------------------------------------------------------------
     182           0 :       SUBROUTINE check_memory(DIMENSION,input,atoms, mlotot,mlolotot, noco,kpts,mpi, n_size)
     183             : 
     184             : !
     185             : ! check the free and the (approximate) required memory ;
     186             : ! determine minimal n_size to fit into the memory (hopefully).
     187             : !
     188             :         USE m_types
     189             :       IMPLICIT NONE
     190             :       type(t_mpi),INTENT(IN)         :: mpi
     191             :       TYPE(t_dimension),INTENT(IN)   :: dimension
     192             :       TYPE(t_kpts),INTENT(IN)        :: kpts
     193             :       TYPE(t_atoms),INTENT(IN)       :: atoms
     194             :       TYPE(t_noco),INTENT(IN)        :: noco
     195             :       TYPE(t_input),INTENT(IN)       :: input
     196             : 
     197             :       INTEGER, INTENT (IN)  :: mlotot,mlolotot    
     198             :       INTEGER, INTENT (OUT) :: n_size
     199             : 
     200             :       INTEGER  err, mb
     201             :       INTEGER*8 mem, matsz, m_h
     202           0 :       REAL, ALLOCATABLE :: test(:)
     203             : 
     204           0 :       n_size = CEILING( real(mpi%isize)/min(kpts%nkpt,mpi%isize) )
     205             :       
     206             :  10   CONTINUE
     207             : !
     208             : ! some basic arrays allocated in eigen()
     209             : !
     210           0 :       mem = (dimension%lmplmd+1)*atoms%ntype*4                       ! tlmplm%tuu,tlmplm%tdd etc.
     211           0 :       mem = mem + (dimension%lmd+1)*(2*atoms%llod+1)*max(mlotot,1)*2 ! tlmplm%tuulo ...
     212           0 :       mem = mem + (2*atoms%llod+1)**2 * max(mlolotot,1)    ! tlmplm%tuloulo
     213           0 :       IF (noco%l_noco) mem = mem * 2                      ! both spins
     214           0 :       mem = mem + 49*atoms%n_u*input%jspins*2                      ! lda+U, *2 for complex
     215           0 :       mem = mem+INT((dimension%nbasfcn*2+(dimension%lmd+1)*atoms%ntype)*0.5)+1 ! tlmplm%ind, *0.5 for integer
     216             : 
     217           0 :       matsz = dimension%nbasfcn * CEILING(REAL(dimension%nbasfcn)/n_size) ! size of a, b
     218             : #ifdef CPP_INVERSION
     219             :       mem = mem + 2 * matsz                          ! real case
     220             : #else
     221           0 :       mem = mem + 2 * matsz * 2                      ! complec case
     222             : #endif
     223             : !
     224             : ! now the arrays in hssphn()
     225             : !
     226           0 :       m_h = dimension%nvd*(dimension%lmd+1)*4  + dimension%nvd*8 + atoms%nlod            ! ar, ai ..., cph, rph, vk, gk
     227           0 :       m_h = m_h + 2 * (2*atoms%llod+1)**2 * atoms%nlod * 3 * 2   ! alo,blo,clo
     228           0 :       IF (noco%l_ss) m_h = m_h * 2
     229           0 :       m_h = m_h + dimension%nvd*(5+atoms%lmaxd)                      ! axr, ... plegend
     230           0 :       IF (noco%l_ss.OR.noco%l_constr.OR.(noco%l_noco.AND.noco%l_soc)) THEN
     231           0 :         m_h = m_h + dimension%nvd*(atoms%lmaxd+1)*atoms%ntype*2*2          ! fj,gj
     232             :       ELSE 
     233           0 :         m_h = m_h + dimension%nvd*(atoms%lmaxd+1)*atoms%ntype*2            ! fj,gj
     234             :       ENDIF
     235           0 :       IF (noco%l_noco.AND.noco%l_soc) THEN
     236           0 :         m_h = m_h + dimension%nvd*(atoms%lmaxd+4)
     237             :       ENDIF
     238           0 :       IF (noco%l_constr) THEN
     239           0 :         m_h = m_h + (atoms%lmaxd+1)*atoms%ntype
     240             :       ENDIF
     241           0 :       IF (noco%l_noco.AND.(.NOT.noco%l_ss)) THEN
     242           0 :         matsz = (dimension%nvd+mlotot) * CEILING(REAL(dimension%nvd+mlotot)/n_size)
     243           0 :         m_h = m_h + matsz * 2 * 2                    ! aahlp,bbhlp
     244             :       ENDIF
     245             : !
     246             : ! see, whether it fits
     247             : !
     248           0 :       mb = (mem+m_h)*8/(1024)**2
     249           0 :       ALLOCATE ( test(mem+m_h) , stat = err)
     250           0 :       WRITE(*,*) mb,'Mbytes needed  in hssphn!',err,mem
     251           0 :       IF ( err /= 0 ) THEN
     252           0 :         n_size = n_size * 2
     253           0 :         IF (n_size > mpi%isize) THEN
     254           0 :           mb = (mem+m_h)*8/(1024)**2
     255           0 :           WRITE(*,*) mb,'Mbytes needed  in hssphn!'
     256           0 :           CALL juDFT_error("mpi_make_groups: memory too small!",calledby ="mpi_make_groups")
     257             :         ENDIF 
     258             :         GOTO 10
     259             :       ENDIF
     260           0 :       DEALLOCATE (test)
     261             : !
     262             : ! now, allocate z and jump into chani
     263             : !      
     264           0 :       matsz = dimension%nbasfcn * CEILING(REAL(dimension%nbasfcn)/n_size)   ! size of z
     265             : #ifdef CPP_INVERSION
     266             :       mem = mem + matsz                             ! real case
     267             : #else
     268           0 :       mem = mem + matsz * 2                         ! complex case
     269             : #endif
     270           0 :       mem = mem + matsz * 2 * 3                     ! asca,bsca,eigvec
     271           0 :       mem = mem + dimension%nbasfcn 
     272             : #ifdef CPP_INVERSION
     273             :       mem = mem + matsz                             ! work
     274             : #else
     275           0 :       mem = mem + matsz * 2                         ! work, rwork neglected
     276             : #endif
     277             : !
     278             : ! see, whether it fits
     279             : !
     280           0 :       mb = (mem)*8/(1024)**2
     281           0 :       ALLOCATE ( test(mem) , stat = err)
     282           0 :       WRITE(*,*) mb,'Mbytes needed  in chani !',err,mem
     283           0 :       IF ( err /= 0 ) THEN
     284           0 :         n_size = n_size * 2
     285           0 :         IF (n_size > mpi%isize) THEN
     286           0 :           mb = (mem)*8/(1024)**2
     287           0 :           WRITE(*,*) mb,'Mbytes needed  in chani !'
     288             :           CALL juDFT_error("mpi_make_groups: memory too small!",calledby&
     289           0 :      &         ="mpi_make_groups")
     290             :         ENDIF
     291             :         GOTO 10
     292             :       ENDIF
     293           0 :       DEALLOCATE (test)
     294             :       
     295           0 :       END SUBROUTINE check_memory
     296             : !----------------------------------------------------------------------
     297             : 
     298             :       END MODULE m_mpimakegroups

Generated by: LCOV version 1.13