LCOV - code coverage report
Current view: top level - mpi - mingeselle.F90 (source / functions) Hit Total Coverage
Test: combined.info Lines: 0 99 0.0 %
Date: 2019-09-08 04:53:50 Functions: 0 3 0.0 %

          Line data    Source code
       1             : MODULE m_mingeselle
       2             :   USE m_juDFT
       3             : CONTAINS
       4           0 :   SUBROUTINE mingeselle(SUB_COMM,n_size,n_rank,nv, ahelp,l_real,aa_r,aa_c)
       5             :     !------------------------------------------------------------------+
       6             :     !                                                                  |
       7             :     ! Transfers the spin-down/spin-up part , upper triangle of the     |
       8             :     ! MT-hamiltonian from the help-array ``ahelp'' to the H-matrix.    |
       9             :     ! For eigenvector-parallelization this needs some communication    |
      10             :     ! between the nodes, since this part is created 'column-wise'      |
      11             :     ! but needed row-wise.                                             |
      12             :     !                                                                  |
      13             :     !     n_s(i): number of elements to send to pe #i                  |
      14             :     !     n_r(i): number of elements to receive from pe #i             |
      15             :     !     ns_tot,nr_tot : total number of elements to send/receive     |
      16             :     !     n2_start: pe that has first column of 2nd spin part          |
      17             :     !     cs_el,cr_el: send and receive elements                       |
      18             :     !     in_pos(n,i): where to put in the n'th element sent by pe #i  |
      19             :     !                                                                  |
      20             :     !------------------------------------------------------------------+
      21             : #include"./cpp_double.h"
      22             : 
      23             :     IMPLICIT NONE
      24             :     ! ..
      25             :     ! .. Scalar Arguments
      26             :     INTEGER, INTENT (IN) :: n_size,n_rank,SUB_COMM
      27             :     ! ..
      28             :     ! .. Array Arguments
      29             :     INTEGER, INTENT (IN)    :: nv(2)
      30             :     COMPLEX, INTENT (INOUT) :: ahelp(:)!(m_ahelp)
      31             :     LOGICAL, INTENT (IN)    :: l_real
      32             :     REAL,    INTENT (INOUT) :: aa_r(:)!(matsize)
      33             :     COMPLEX, INTENT (INOUT) :: aa_c(:)
      34             :     ! ..
      35             :     ! .. Local Scalars
      36             :     INTEGER ki,kj,ns_tot,nr_tot,n_p,n2_start,n_help
      37             :     INTEGER ns_max,nr_max,n_pos,np_s,np_r,nv_s,ii,i
      38             :     INTEGER inext,ifront,req_s,req_r
      39             :     ! ..
      40             :     ! .. Local Arrays
      41           0 :     INTEGER n_s(0:n_size-1),n_send(0:n_size-1)
      42           0 :     INTEGER n_r(0:n_size-1),n_recv(0:n_size-1),ierr(3)
      43           0 :     INTEGER, ALLOCATABLE :: in_pos(:,:)
      44           0 :     COMPLEX, ALLOCATABLE :: cs_el(:,:),cr_el(:),b_b(:),c_help(:,:)
      45             : 
      46             :     INCLUDE 'mpif.h'
      47             :     INTEGER stt(MPI_STATUS_SIZE)
      48             :     ! ..
      49             :     !
      50             :     ! kick out the diagonal elements of ahelp
      51             :     !
      52           0 :     i  = 0
      53           0 :     ii = 0
      54           0 :     DO ki =  n_rank+1, nv(1), n_size
      55           0 :        DO kj = 1,ki - 1
      56           0 :           i  =  i + 1
      57           0 :           ii = ii + 1
      58           0 :           ahelp(i) = ahelp(ii)
      59             :        END DO
      60           0 :        ii = ii + 1
      61             :     ENDDO
      62             :     !
      63             :     ! initialize
      64             :     !
      65             :     ns_tot = 0
      66             :     nr_tot = 0
      67           0 :     DO n_p = 0,n_size-1
      68           0 :        n_s(n_p) = 0
      69           0 :        n_r(n_p) = 0
      70             :     ENDDO
      71             :     !
      72             :     ! determine number of elements to send to other pe's
      73             :     !
      74           0 :     n2_start = MOD(nv(1),n_size) - 1
      75           0 :     DO ki = 1, nv(1)
      76           0 :        IF ( MOD(ki-1,n_size).EQ.n_rank ) THEN
      77           0 :           DO kj = 1, ki-1
      78           0 :              ns_tot = ns_tot + 1
      79           0 :              n_p = MOD((kj+n2_start),n_size)
      80           0 :              n_s(n_p) = n_s(n_p) + 1
      81             :           ENDDO
      82             :        ENDIF
      83             :     ENDDO
      84             :     !
      85             :     ! determine number of elements to receive from other pe's
      86             :     !
      87           0 :     DO ki = 1, nv(2)
      88           0 :        IF ( MOD(ki+nv(1)-1,n_size).EQ.n_rank ) THEN
      89           0 :           DO kj = ki+1, nv(2)
      90           0 :              nr_tot = nr_tot + 1
      91           0 :              n_p = MOD(kj-1,n_size)
      92           0 :              n_r(n_p) = n_r(n_p) + 1
      93             :           ENDDO
      94             :        ENDIF
      95             :     ENDDO
      96             :     !
      97             :     !      WRITE (*,*) ns_tot,(n_s(n_p),n_p=0,n_size-1)
      98             :     !      WRITE (*,*) nr_tot,(n_r(n_p),n_p=0,n_size-1)
      99             :     !
     100             :     ! determine the maximal number of s/r-counts and allocate s/r-arrays
     101             :     !
     102             :     ns_max = 0
     103             :     nr_max = 0
     104           0 :     DO n_p = 0,n_size-1
     105           0 :        ns_max = MAX(ns_max,n_s(n_p))
     106           0 :        nr_max = MAX(nr_max,n_r(n_p))
     107             :     ENDDO
     108             :     !      WRITE (*,*) ns_max ,nr_max  , n_size, n_rank
     109           0 :     ALLOCATE ( cs_el(ns_max,0:n_size-1),cr_el(nr_max), in_pos(nr_max,0:n_size-1) )
     110             :     !
     111             :     ! sort the elements of aahelp-array into the send-arrays
     112             :     !
     113             :     n_help = 0
     114           0 :     DO n_p = 0,n_size-1
     115           0 :        n_send(n_p) = 0
     116             :     ENDDO
     117           0 :     DO ki = 1, nv(1)
     118           0 :        IF ( MOD(ki-1,n_size).EQ.n_rank ) THEN
     119           0 :           DO kj = 1, ki-1
     120           0 :              n_help = n_help + 1
     121           0 :              n_p = MOD((kj+n2_start),n_size)
     122           0 :              n_send(n_p) = n_send(n_p) + 1
     123           0 :              cs_el(n_send(n_p),n_p) = ahelp(n_help)
     124             :           ENDDO
     125             :        ENDIF
     126             :     ENDDO
     127           0 :     IF (n_help/=ns_tot)  CALL juDFT_error("n_help.NE.ns_to         t",calledby ="mingeselle")
     128           0 :     DO n_p = 0,n_size-1
     129           0 :        IF (n_send(n_p)/=n_s(n_p))  CALL juDFT_error("n_send.NE.n_s" ,calledby ="mingeselle")
     130             :     ENDDO
     131             :     !
     132             :     ! resort send array: rows <-> columns
     133             :     !
     134           0 :     DO n_p = 0,n_size-1
     135           0 :        nv_s = NINT(SQRT(2.0*n_send(n_p))-0.5)
     136           0 :        ALLOCATE ( c_help(nv_s,nv_s) )
     137             : 
     138             :        n_help = 0
     139           0 :        DO ki = 1,nv_s
     140           0 :           DO kj = 1,ki
     141           0 :              n_help = n_help + 1
     142           0 :              c_help(ki,kj) = cs_el(n_help,n_p)
     143             :           ENDDO
     144             :        ENDDO
     145             : 
     146             :        n_help = 0
     147           0 :        DO kj = 1,nv_s
     148           0 :           DO ki = kj ,nv_s
     149           0 :              n_help = n_help + 1
     150           0 :              cs_el(n_help,n_p) = c_help(ki,kj)
     151             :           ENDDO
     152             :        ENDDO
     153             : 
     154           0 :        DEALLOCATE ( c_help )
     155             :     ENDDO
     156             :     !
     157             :     ! now we look where to put in the received elements
     158             :     !
     159             :     n_pos = 0
     160           0 :     DO n_p = 0,n_size-1
     161           0 :        n_recv(n_p) = 0
     162             :     ENDDO
     163           0 :     DO ki = 1, nv(1)+nv(2)
     164           0 :        IF ( MOD(ki-1,n_size).EQ.n_rank ) THEN
     165           0 :           DO kj = 1, ki
     166           0 :              n_pos = n_pos + 1 
     167           0 :              IF ( ki.GT.nv(1) ) THEN
     168           0 :                 IF ((kj.GT.ki-nv(1)).AND.(kj.LE.nv(1))) THEN
     169           0 :                    n_p = MOD(kj-1,n_size)
     170           0 :                    n_recv(n_p) = n_recv(n_p) + 1
     171           0 :                    in_pos(n_recv(n_p),n_p) = n_pos
     172             :                 ENDIF
     173             :              ENDIF
     174             :           ENDDO
     175             :        ENDIF
     176             :     ENDDO
     177           0 :     DO n_p = 0,n_size-1
     178           0 :        IF (n_recv(n_p)/=n_r(n_p))  CALL juDFT_error("n_recv.NE.n_s" ,calledby ="mingeselle")
     179             :     ENDDO
     180             :     !
     181             :     ! Mandaliet, mandaliet, min geselle kumme niet
     182             :     !
     183           0 :     ifront = ibefore(n_size,n_rank)
     184           0 :     inext  = iafter (n_size,n_rank)
     185           0 :     DO n_p = 0,n_size-1
     186             :        !
     187             :        ! determine pe's to send to and to receive from
     188             :        !
     189           0 :        np_s = MOD(inext +n_p,n_size)
     190           0 :        np_r = MOD(ifront-n_p,n_size)
     191           0 :        IF (np_r.LT.0) np_r = np_r + n_size
     192             :        !
     193             :        ! send section: local rows i with mod(i-1,np) = np_s will be sent to proc np_s
     194             :        !
     195             : 
     196           0 :        IF (np_s.NE.n_rank) THEN
     197             :           CALL MPI_ISEND(cs_el(1,np_s),n_send(np_s), CPP_MPI_COMPLEX,&
     198           0 :                np_s,n_rank,SUB_COMM,req_s,ierr)
     199             :           !          write (*,*) n_rank,'sends',n_send(np_s),'to',np_s
     200             :           !          write (*,'(i2,10f10.7)') n_rank,(real(cs_el(ki,np_s)),ki=1,10)
     201             :        ENDIF
     202             : 
     203             :        !
     204             :        ! receive section : local rows i  with mod(i-1,np) = np_r will be received from np_r
     205             :        ! ... skipped, if update matrix from local data:
     206             :        !
     207           0 :        IF (np_r.NE.n_rank) THEN
     208           0 :           CALL MPI_IRECV(cr_el,n_recv(np_r),CPP_MPI_COMPLEX, MPI_ANY_SOURCE,np_r,SUB_COMM,req_r,ierr)
     209           0 :           CALL MPI_WAIT(req_s,stt,ierr)
     210           0 :           CALL MPI_WAIT(req_r,stt,ierr)
     211             :           !          write (*,*) n_rank,'recvs',ierr,n_p,np_r
     212             :           !          write(*,*) n_rank,'receives',n_recv(np_r),'from',np_r
     213             :           !          write (*,'(i2,10f10.7)') n_rank,(real(cr_el(ki)),ki=1,10)
     214             :           !
     215             :           ! now update the matrix aa()
     216             :           !
     217           0 :           IF (l_real) THEN
     218           0 :              aa_r(in_pos(:n_recv(np_r),np_r)) = aa_r(in_pos(:n_recv(np_r),np_r)) + cr_el(:n_recv(np_r))
     219             :           ELSE
     220           0 :              aa_c(in_pos(:n_recv(np_r),np_r)) = aa_c(in_pos(:n_recv(np_r),np_r)) + cr_el(:n_recv(np_r))
     221             :           ENDIF
     222             :        ELSE
     223           0 :           IF (l_real) THEN
     224           0 :              aa_r(in_pos(:n_recv(np_r),np_r)) = aa_r(in_pos(:n_recv(np_r),np_r)) + cs_el(:n_recv(np_r),np_s)
     225             :           ELSE
     226           0 :              aa_c(in_pos(:n_recv(np_r),np_r)) = aa_c(in_pos(:n_recv(np_r),np_r)) + cs_el(:n_recv(np_r),np_s)
     227             :           ENDIF
     228             :        ENDIF
     229             :        !         CALL MPI_BARRIER(SUB_COMM,ierr)
     230             :     ENDDO
     231             : 
     232           0 :     DEALLOCATE (cs_el,cr_el,in_pos)
     233             : 
     234           0 :   END SUBROUTINE mingeselle
     235             :   !
     236             :   !-------------------------------------------------------------
     237             :   !
     238           0 :   INTEGER FUNCTION ibefore(np, p)
     239             :     !
     240             :     ! Determine (in a ring structure) which is the front process
     241             :     !
     242             :     IMPLICIT NONE
     243             :     INTEGER, INTENT (IN) :: np  !  number of processes
     244             :     INTEGER, INTENT (IN) :: p   !  current processes
     245             : 
     246           0 :     IF ( p > 0 ) THEN
     247           0 :        ibefore = p-1
     248             :     ELSE
     249           0 :        ibefore = np-1
     250             :     ENDIF
     251             : 
     252           0 :   END FUNCTION ibefore
     253             :   !
     254             :   !-------------------------------------------------------------
     255             :   !
     256           0 :   INTEGER FUNCTION iafter(np, p)
     257             :     !
     258             :     ! Determine (in a ring structure) which is the next process
     259             :     !
     260             :     IMPLICIT NONE
     261             :     INTEGER, INTENT (IN) :: np  !  number of processes
     262             :     INTEGER, INTENT (IN) :: p   !  current processes
     263             : 
     264           0 :     IF ( p < np-1 ) THEN
     265           0 :        iafter = p+1
     266             :     ELSE
     267             :        iafter = 0
     268             :     ENDIF
     269             : 
     270           0 :   END FUNCTION iafter
     271             :   !
     272             :   !-------------------------------------------------------------
     273             :   !
     274             : END MODULE m_mingeselle

Generated by: LCOV version 1.13