LCOV - code coverage report
Current view: top level - mpi - setupMPI.F90 (source / functions) Hit Total Coverage
Test: combined.info Lines: 43 72 59.7 %
Date: 2019-09-08 04:53:50 Functions: 3 4 75.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : MODULE m_setupMPI
       8             :   use m_juDFT
       9             :   IMPLICIT NONE
      10             : 
      11             : CONTAINS
      12          76 :   SUBROUTINE setupMPI(nkpt,neigd,mpi)
      13             : !$  use omp_lib
      14             :     USE m_types  
      15             :     USE m_available_solvers,ONLY:parallel_solver_available
      16             :     INTEGER,INTENT(in)           :: nkpt,neigd
      17             :     TYPE(t_mpi),INTENT(inout)    :: mpi
      18             : 
      19             :     INTEGER :: omp=-1,i
      20             : 
      21          76 :     !$ omp=omp_get_max_threads()
      22          76 :     if (mpi%irank==0) THEN
      23             :        !print INFO on parallelization
      24          38 :        WRITE(*,*) "--------------------------------------------------------"
      25             : #ifdef CPP_MPI
      26          38 :        write(*,*) "Number of MPI-tasks:  ",mpi%isize
      27          38 :        CALL add_usage_data("MPI-PE",mpi%isize)     
      28             : #else
      29             :        CALL add_usage_data("MPI-PE",1)     
      30             : #endif
      31          38 :        IF (omp==-1) THEN
      32           0 :           write(*,*) "No OpenMP version of FLEUR."
      33           0 :           CALL add_usage_data("OMP",0)
      34             :        ELSE
      35          38 :           WRITE(*,*) "Number of OMP-threads:",omp
      36          38 :           CALL add_usage_data("OMP",omp)
      37             :        ENDIF
      38             :     endif
      39          76 :     IF (mpi%isize==1) THEN
      40             :        !give some info on available parallelisation
      41           0 :        CALL priv_dist_info(nkpt)
      42           0 :        mpi%n_rank=0
      43           0 :        mpi%n_size=1
      44           0 :        mpi%sub_comm=mpi%mpi_comm
      45           0 :        IF (ALLOCATED(mpi%k_list)) DEALLOCATE(mpi%k_List)
      46           0 :        IF (ALLOCATED(mpi%ev_list)) DEALLOCATE(mpi%ev_list)
      47           0 :        ALLOCATE(mpi%k_list(nkpt))
      48           0 :        ALLOCATE(mpi%ev_list(neigd))
      49           0 :        mpi%k_list=[(i,i=1,nkpt)]
      50           0 :        mpi%ev_list=[(i,i=1,neigd)]
      51           0 :        WRITE(*,*) "--------------------------------------------------------"
      52             :        RETURN
      53             :     END IF
      54             : #ifdef CPP_MPI
      55             :     !Distribute the work
      56          76 :     CALL priv_distribute_k(nkpt,mpi)
      57             : 
      58             :     !Now check if parallelization is possible
      59          76 :     IF (mpi%n_size>1.AND..NOT.parallel_solver_available()) &
      60           0 :          CALL juDFT_error("MPI parallelization failed",hint="You have to either compile FLEUR with a parallel diagonalization library (ELPA,SCALAPACK...) or you have to run such that the No of kpoints can be distributed on the PEs")       
      61             : #endif
      62             :     !generate the MPI communicators
      63          76 :     CALL priv_create_comm(nkpt,neigd,mpi)
      64             : 
      65          76 :     ALLOCATE(mpi%k_list(SIZE([(i, i=INT(mpi%irank/mpi%n_size)+1,nkpt,mpi%isize/mpi%n_size )])))
      66          76 :     mpi%k_list=[(i, i=INT(mpi%irank/mpi%n_size)+1,nkpt,mpi%isize/mpi%n_size )]
      67             : 
      68          76 :     if (mpi%irank==0) WRITE(*,*) "--------------------------------------------------------"
      69             : 
      70             :   END SUBROUTINE setupMPI
      71             : 
      72             : 
      73          76 :   SUBROUTINE priv_distribute_k(nkpt,mpi)
      74             :     use m_types
      75             :     implicit none
      76             :     INTEGER,INTENT(in)      :: nkpt
      77             :     TYPE(t_mpi),INTENT(inout)    :: mpi
      78             : 
      79             :     !-------------------------------------------------------------------------------------------
      80             :     !
      81             :     ! Distribute the k-point / eigenvector  parallelisation so, that
      82             :     ! all pe's have aproximately equal load. Maximize for k-point 
      83             :     ! parallelisation. The naming conventions are as follows:
      84             :     !
      85             :     ! groups             1               2               3             4      (n_groups = 4) 
      86             :     !                 /     \         /     \          /   \         /   \
      87             :     ! k-points:      1       2       3       4       5       6      7     8     (nkpts = 8)
      88             :     !               /|\     /|\     /|\     /|\     /|\     /|\    /|\   /|\
      89             :     ! irank        0 1 2   3 4 5   1 2 3   4 5 6   0 1 2   3 4 5  1 2 3  4 5 6  (mpi%isize = 6)
      90             :     !
      91             :     ! n_rank       0 1 2   0 1 2   0 1 2   0 1 2   0 1 2   0 1 2  0 1 2  0 1 2  (mpi%n_size = 3)
      92             :     !
      93             :     !
      94             :     ! In the above example, 6 pe's should work on 8 k-points and distribute
      95             :     ! their load in a way, that 3 pe's work on each k-points, so 2 k-points
      96             :     ! are done in parellel (n_members=2) and each processor runs a loop over
      97             :     ! 4 k-points (mpi%n_groups = 4).
      98             :     ! n_rank and n_size are the equivalents of irank and isize. The former
      99             :     ! belong to the communicator SUB_COMM, the latter to MPI_COMM.
     100             :     !
     101             :     !          G.B. `99
     102             :     !
     103             :     !-------------------------------------------------------------------------------------------
     104             :     INTEGER:: n_members,n_size_min,nk
     105             :     CHARACTER(len=1000)::txt
     106             : 
     107          76 :     n_members = MIN(nkpt,mpi%isize)
     108          76 :     IF (judft_was_argument("-n_min_size")) THEN
     109           0 :        txt=judft_string_for_argument("-n_min_size")
     110           0 :        READ(txt,*) n_size_min
     111           0 :        WRITE(*,*) "Trying to use ",n_size_min," PE per kpt"
     112           0 :        n_members = MIN(n_members , CEILING(REAL(mpi%isize)/n_size_min) ) 
     113             :     ENDIF
     114          32 :     DO  
     115         108 :        IF ((MOD(mpi%isize,n_members) == 0).AND.(MOD(nkpt,n_members) == 0) ) EXIT
     116          32 :        n_members = n_members - 1
     117             :     ENDDO
     118             :  
     119             :     !mpi%n_groups = nkpt/n_members
     120          76 :     mpi%n_size   = mpi%isize/n_members
     121             :     !mpi%n_stride = n_members
     122          76 :     IF (mpi%irank == 0) THEN
     123          38 :        WRITE(*,*) 'k-points in parallel: ',n_members
     124          38 :        WRITE(*,*) "pe's per k-point:     ",mpi%n_size
     125          38 :        WRITE(*,*) '# of k-point loops:   ',nkpt/n_members
     126             :     ENDIF
     127          76 :   END SUBROUTINE priv_distribute_k
     128             : 
     129          76 :   SUBROUTINE priv_create_comm(nkpt,neigd,mpi)
     130             :     use m_types
     131             :     implicit none
     132             :     INTEGER,INTENT(in)      :: nkpt,neigd
     133             :     TYPE(t_mpi),INTENT(inout)    :: mpi
     134             : #ifdef CPP_MPI
     135             :     INTEGER :: n_members,n,i,ierr,sub_group,world_group,n_start
     136         152 :     INTEGER :: i_mygroup(mpi%n_size)
     137             :     LOGICAL :: compact ! Deside how to distribute k-points
     138             : 
     139          76 :     compact = .true.
     140          76 :     n_members = mpi%isize/mpi%n_size
     141             :     
     142             :     ! now, we make the groups
     143             :     
     144             :     
     145             :     IF (compact) THEN
     146             : 
     147             :         ! This will distribute sub ranks in a compact manner.
     148             :         ! For example, if nkpt = 8 and mpi%isize = 6:
     149             :         
     150             :         !  -----------------------------------
     151             :         ! |  0  |  1  |  2  |  3  |  4  |  5  |    mpi%irank
     152             :         !  -----------------------------------
     153             :         ! |  0  |  1  |  3  |  0  |  1  |  2  |    mpi%n_rank
     154             :         !  -----------------------------------
     155             :         ! |        1        |        2        |    k - points
     156             :         ! |        3        |        4        |
     157             :         ! |        5        |        6        |
     158             :         ! |        7        |        8        |
     159             :         !  -----------------------------------
     160             :     
     161          76 :         n_start = INT(mpi%irank/mpi%n_size) + 1
     162          76 :         i_mygroup(1) = (n_start-1) * mpi%n_size
     163         118 :         do i = 2, mpi%n_size
     164         118 :            i_mygroup(i) = i_mygroup(i-1) + 1
     165             :         enddo
     166             : 
     167             :     ELSE
     168             : 
     169             :         ! This will distribute sub ranks in a spread manner.
     170             :         ! For example, if nkpt = 8 and mpi%isize = 6:
     171             :     
     172             :         !  -----------------------------------
     173             :         ! |  0  |  1  |  2  |  3  |  4  |  5  |    mpi%irank
     174             :         !  -----------------------------------
     175             :         ! |  0  |  1  |  3  |  0  |  1  |  2  |    mpi%n_rank
     176             :         !  -----------------------------------
     177             :         ! |  1  |  2  |  1  |  2  |  1  |  2  |    k - points
     178             :         ! |  3  |  4  |  3  |  4  |  3  |  4  |
     179             :         ! |  5  |  6  |  5  |  6  |  5  |  6  |  
     180             :         ! |  7  |  8  |  7  |  8  |  7  |  8  |
     181             :         !  -----------------------------------
     182             : 
     183             :         n_start = MOD(mpi%irank,n_members) + 1
     184             :         !!      n_start = INT(irank/n_size) * n_size
     185             :         n = 0
     186             :         DO i = n_start,mpi%isize,n_members
     187             :         !!      DO i = n_start+1,n_start+n_size
     188             :            n = n+1
     189             :            i_mygroup(n) = i-1
     190             :         ENDDO
     191             : 
     192             :     ENDIF ! compact
     193             : 
     194          76 :     CALL MPI_COMM_GROUP (mpi%MPI_COMM,WORLD_GROUP,ierr)
     195          76 :     CALL MPI_GROUP_INCL (WORLD_GROUP,mpi%n_size,i_mygroup,SUB_GROUP,ierr)
     196          76 :     CALL MPI_COMM_CREATE (mpi%MPI_COMM,SUB_GROUP,mpi%SUB_COMM,ierr)
     197             :     !write (*,"(a,i0,100i4)") "MPI:",mpi%sub_comm,mpi%irank,mpi%n_groups,mpi%n_size,n,i_mygroup
     198             : 
     199          76 :     CALL MPI_COMM_RANK (mpi%SUB_COMM,mpi%n_rank,ierr)
     200          76 :     ALLOCATE(mpi%ev_list(neigd/mpi%n_size+1))
     201          76 :     mpi%ev_list=[(i,i=mpi%n_rank+1,neigd,mpi%n_size)]
     202             :     
     203             : #endif
     204          76 :   END SUBROUTINE priv_create_comm
     205             : 
     206           0 :   SUBROUTINE priv_dist_info(nkpt)
     207             :     USE m_available_solvers,ONLY:parallel_solver_available
     208             :     IMPLICIT NONE
     209             :     INTEGER,INTENT(in)           :: nkpt
     210             : 
     211           0 :     INTEGER:: n,k_only,pe_k_only(nkpt)
     212             :     
     213             : #ifdef CPP_MPI
     214             :     !Create a list of PE that will lead to k-point parallelization only
     215           0 :     k_only=0
     216           0 :     DO n=1,nkpt
     217           0 :        IF (MOD(nkpt,n)==0) THEN
     218           0 :           k_only=k_only+1
     219           0 :           pe_k_only(k_only)=n
     220             :        ENDIF
     221             :     END DO
     222           0 :     WRITE(*,*) "Most efficient MPI parallelization for:"
     223           0 :     WRITE(*,*) pe_k_only(:k_only)
     224             :     !check if eigenvalue parallelization is possible
     225           0 :     IF (parallel_solver_available()) WRITE(*,*) "Additional eigenvalue parallelization possible"
     226             : #endif
     227           0 :   END SUBROUTINE priv_dist_info
     228             : 
     229             : 
     230             : 
     231             : END MODULE m_setupMPI
     232             : 
     233             : 

Generated by: LCOV version 1.13