LCOV - code coverage report
Current view: top level - types - types_mpimat.F90 (source / functions) Hit Total Coverage
Test: FLEUR test coverage Lines: 366 526 69.6 %
Date: 2024-04-16 04:21:52 Functions: 25 34 73.5 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : MODULE m_types_mpimat
       8             :    USE m_judft
       9             :    USE m_types_mat
      10             :    USE m_constants
      11             : #ifdef CPP_MPI
      12             :    USE mpi
      13             : #endif
      14             :    IMPLICIT NONE
      15             :    !PRIVATE
      16             :    INTEGER, PARAMETER    :: DEFAULT_BLOCKSIZE = 64
      17             :    INTEGER, PARAMETER   :: dlen_ = 9
      18             : 
      19             : #ifdef __INTEL_COMPILER
      20             :    LOGICAL:: use_pdgemr2d=.true.
      21             : #else
      22             :    LOGICAL:: use_pdgemr2d=.false.
      23             : #endif         
      24             : 
      25             :    !<This data-type extends the basic t_mat for distributed matrices.
      26             :    !<
      27             :    !<It stores the additional mpi_communicator and sets up a blacs grid for the matrix.
      28             :    !<This can be used to perform scalapack calls on the matrix with little additional input.
      29             :    !<The copy procedure is overwritten from t_mat to enable also redistribution of the matrix.
      30             :    TYPE t_blacsdata
      31             :       INTEGER:: no_use
      32             :       INTEGER:: mpi_com                          !> mpi-communiator over which matrix is distributed
      33             :       INTEGER:: blacs_desc(dlen_)                !> blacs descriptor
      34             :       !> 1: =1
      35             :       !> 2: context
      36             :       !> 3,4: global matrix size
      37             :       !> 5,6: block sizes
      38             :       !> 7,8: row/colum of grid for first row/colum of matrix
      39             :       !> 9: leading dimension of local matrix
      40             :       INTEGER:: npcol, nprow                     !> the number of columns/rows in the processor grid
      41             :       INTEGER:: mycol, myrow
      42             :    END TYPE t_blacsdata
      43             : 
      44             :    TYPE, EXTENDS(t_mat):: t_mpimat
      45             :       INTEGER                   :: global_size1, global_size2        !> this is the size of the full-matrix
      46             :       TYPE(t_blacsdata), POINTER :: blacsdata => null()
      47             :    CONTAINS
      48             :       PROCEDURE   :: copy => mpimat_copy     !<overwriten from t_mat, also performs redistribution
      49             :       PROCEDURE   :: move => mpimat_move     !<overwriten from t_mat, also performs redistribution
      50             :       PROCEDURE   :: free => mpimat_free     !<overwriten from t_mat, takes care of blacs-grids
      51             :       procedure   :: save_npy => mpimat_save_npy
      52             :       PROCEDURE   :: multiply => mpimat_multiply  !<overwriten from t_mat, takes care of blacs-grids
      53             :       PROCEDURE   :: init_details => mpimat_init
      54             :       PROCEDURE   :: init_template => mpimat_init_template     !<overwriten from t_mat, also calls alloc in t_mat
      55             :       PROCEDURE   :: add_transpose => mpimat_add_transpose !<overwriten from t_mat
      56             :       PROCEDURE   :: u2l => t_mpimat_u2l   ! construct full matrix if only upper triangle of hermitian matrix is given
      57             :       PROCEDURE   :: l2u => t_mpimat_l2u
      58             :       PROCEDURE   :: print_matrix
      59             :       PROCEDURE   :: from_non_dist
      60             :       procedure   :: to_non_dist
      61             :       PROCEDURE   :: transpose => mpimat_transpose
      62             :       procedure   :: print_type => mpimat_print_type
      63             :       PROCEDURE   :: linear_problem => t_mpimat_lproblem
      64             :       PROCEDURE   :: is_column_cyclic
      65             : #ifndef __INTEL_COMPILER      
      66             :       FINAL :: finalize,finalize_1d,finalize_2d,finalize_3d
      67             : #endif      
      68             :    END TYPE t_mpimat
      69             : 
      70             :    PUBLIC t_mpimat, mingeselle
      71             : 
      72             : CONTAINS
      73          36 :    SUBROUTINE t_mpimat_lproblem(mat, vec)
      74             :       IMPLICIT NONE
      75             :       CLASS(t_mpimat), INTENT(IN)   :: mat
      76             :       class(t_mat), INTENT(INOUT)   :: vec
      77             : 
      78          36 :       integer :: ipiv(mat%global_size1), info
      79             : #ifdef CPP_SCALAPACK
      80          36 :       call timestart("mpimat_lproblem")
      81          36 :       if (mat%l_real .neqv. vec%l_real) call judft_error("mat and vec need to be same kind")
      82             : 
      83             :       select type (vec)
      84             :       class is (t_mat)
      85           0 :          call judft_error("lproblem can only be solved if vec and mat are the same class")
      86             :       class is (t_mpimat)
      87          36 :          if (mat%l_real) then
      88             :             !call pdgesv(n,               nrhs,             a,         ia,ja,desca,                    ipiv
      89             :             call pdgesv(mat%global_size1, vec%global_size2, mat%data_r, 1, 1, mat%blacsdata%blacs_desc, ipiv, &
      90             :                         !  b,ib,jb,descb,info)
      91           0 :                         vec%data_r, 1, 1, vec%blacsdata%blacs_desc, info)
      92           0 :             if (info /= 0) call judft_error("Error in pdgesv for lproblem")
      93             :          else
      94             :             !call pzgesv(n,               nrhs,             a,          ia,ja,desca,                    ipiv,
      95             :             call pzgesv(mat%global_size1, vec%global_size2, mat%data_c, 1, 1, mat%blacsdata%blacs_desc, ipiv, &
      96             :                         !           b,         ib,jb, descb,info)
      97          36 :                         vec%data_c, 1, 1, vec%blacsdata%blacs_desc, info)
      98             : 
      99          36 :             if (info /= 0) call judft_error("Error in pzgesv for lproblem: "//int2str(info))
     100             :          end if
     101             :       end select
     102          36 :       call timestop("mpimat_lproblem")
     103             : #else
     104             :       call judft_error("no scala")
     105             : #endif
     106          36 :    end subroutine t_mpimat_lproblem
     107             : 
     108           0 :    subroutine mpimat_print_type(mat)
     109             :       implicit none
     110             :       CLASS(t_mpimat), INTENT(IN)     :: mat
     111             : 
     112           0 :       write (*, *) "type -> t_ mpimat"
     113           0 :    end subroutine mpimat_print_type
     114             : 
     115         372 :    SUBROUTINE mpimat_multiply(mat1, mat2, res, transA, transB)
     116             :       use m_judft
     117             :       CLASS(t_mpimat), INTENT(INOUT)     :: mat1
     118             :       CLASS(t_mat), INTENT(IN)           :: mat2
     119             :       CLASS(t_mat), INTENT(INOUT), OPTIONAL :: res
     120             :       character(len=1), intent(in), optional :: transA, transB
     121             : 
     122             : #ifdef CPP_SCALAPACK
     123         372 :       TYPE(t_mpimat)::m, r
     124             :       character(len=1)  :: transA_i, transB_i
     125             : 
     126         372 :       transA_i = "N"
     127         372 :       if (present(transA)) transA_i = transA
     128         372 :       transB_i = "N"
     129         372 :       if (present(transB)) transB_i = transB
     130             : 
     131         372 :       IF (.NOT. PRESENT(res)) CALL judft_error("BUG: in mpicase the multiply requires the optional result argument")
     132             :       SELECT TYPE (mat2)
     133             :       TYPE IS (t_mpimat)
     134         372 :          SELECT TYPE (res)
     135             :          TYPE is (t_mpimat)
     136         372 :             CALL m%init(mat1, mat2%global_size1, mat2%global_size2)
     137         372 :             CALL m%copy(mat2, 1, 1)
     138         372 :             CALL r%init(mat1, res%global_size1, res%global_size2)
     139         372 :             IF (mat1%l_real) THEN
     140             :                CALL pdgemm(transA_i, transB_i, mat1%global_size1, m%global_size2, mat1%global_size2, 1.0, &
     141             :                            mat1%data_r, 1, 1, mat1%blacsdata%blacs_desc, &
     142             :                            m%data_r, 1, 1, m%blacsdata%blacs_desc, 0.0, &
     143         240 :                            r%data_r, 1, 1, r%blacsdata%blacs_desc)
     144             :             ELSE
     145             :                CALL pzgemm(transA_i, transB_i, mat1%global_size1, m%global_size2, mat1%global_size2, cmplx_1, &
     146             :                            mat1%data_c, 1, 1, mat1%blacsdata%blacs_desc, &
     147             :                            m%data_c, 1, 1, m%blacsdata%blacs_desc, cmplx_0, &
     148         132 :                            r%data_c, 1, 1, r%blacsdata%blacs_desc)
     149             :             END IF
     150         372 :             CALL res%copy(r, 1, 1)
     151         372 :             CALL r%free()
     152         744 :             CALL m%free()
     153             :          CLASS default
     154           0 :             CALL judft_error("BUG in mpimat%multiply: res needs to be t_mpimat")
     155             :          END SELECT
     156             :       CLASS default
     157           0 :          CALL judft_error("BUG in mpimat%multiply: mat2 needs to be t_mpimat")
     158             :       END SELECT
     159             : #endif
     160         372 :    END SUBROUTINE mpimat_multiply
     161             : 
     162           0 :    subroutine mpimat_transpose(mat1, res)
     163             :       CLASS(t_mpimat), INTENT(INOUT) ::mat1
     164             :       CLASS(t_mat), INTENT(OUT), OPTIONAL ::res
     165          72 :       real, allocatable :: rd(:, :)
     166          72 :       complex, allocatable :: cd(:, :)
     167             : #ifdef CPP_SCALAPACK
     168           0 :       if (present(res)) Then
     169             :          select type (res)
     170             :          type is (t_mpimat)
     171           0 :             res%blacsdata = mat1%blacsdata
     172           0 :             res%matsize1 = mat1%matsize1
     173           0 :             res%matsize2 = mat1%matsize2
     174           0 :             res%global_size1 = mat1%global_size1
     175           0 :             res%global_size2 = mat1%global_size2
     176           0 :             res%l_real = mat1%l_real
     177             :          class default
     178           0 :             call judft_error("BUG in t_mpimat%transpose, wrong matrix type")
     179             :          end select
     180             :       END IF
     181          72 :       IF (mat1%l_real) THEN
     182           0 :          allocate (rd(size(mat1%data_r, 1), size(mat1%data_r, 2)))
     183           0 :          call pdtran(mat1%global_size1, mat1%global_size2, 1.0, mat1%data_r, 1, 1, mat1%blacsdata%blacs_desc, 0.0, rd, 1, 1, mat1%blacsdata%blacs_desc)
     184           0 :          if (present(res)) Then
     185           0 :             call move_alloc(rd, res%data_r)
     186             :          else
     187           0 :             call move_alloc(rd, mat1%data_r)
     188             :          end if
     189             :       ELSE
     190         288 :          allocate (cd(size(mat1%data_c, 1), size(mat1%data_c, 2)))
     191          72 :          call pztranc(mat1%global_size1, mat1%global_size2, cmplx(1.0, 0.0), mat1%data_c, 1, 1, mat1%blacsdata%blacs_desc, cmplx(0.0, 0.0), cd, 1, 1, mat1%blacsdata%blacs_desc)
     192          72 :          if (present(res)) Then
     193           0 :             call move_alloc(cd, res%data_c)
     194             :          else
     195          72 :             call move_alloc(cd, mat1%data_c)
     196             :          end if
     197             :       END IF
     198             : #else
     199             :       call judft_error("Not compiled with MPI")
     200             : #endif
     201          72 :    END subroutine
     202             : 
     203           0 :    SUBROUTINE print_matrix(mat, fileno)
     204             : #ifdef CPP_SCALAPACK
     205             :       USE mpi
     206             : #endif
     207             :       CLASS(t_mpimat), INTENT(INOUT) ::mat
     208             :       INTEGER:: fileno
     209             : 
     210             : #ifdef CPP_SCALAPACK
     211             :       INTEGER, EXTERNAL:: indxl2g
     212             :       CHARACTER(len=10)::filename
     213             :       INTEGER :: irank, isize, i, j, npr, npc, r, c, tmp, err, status(MPI_STATUS_SIZE)
     214             : 
     215           0 :       CALL MPI_COMM_RANK(mat%blacsdata%mpi_com, irank, err)
     216           0 :       CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com, isize, err)
     217             : 
     218           0 :       tmp = 0
     219             : 
     220           0 :       IF (irank > 0) CALL MPI_RECV(tmp, 1, MPI_INTEGER, irank - 1, 0, mat%blacsdata%mpi_com, status, err) !lock
     221           0 :       WRITE (filename, "(a,i0)") "out.", fileno
     222           0 :       OPEN (fileno, file=filename, access='append')
     223             : 
     224           0 :       CALL blacs_gridinfo(mat%blacsdata%blacs_desc(2), npr, npc, r, c)
     225           0 :       DO i = 1, mat%matsize1
     226           0 :          DO j = 1, mat%matsize2
     227           0 :             IF (mat%l_real) THEN
     228           0 :                WRITE (fileno, "(5(i0,1x),2(f10.5,1x))") irank, i, j, indxl2g(i, mat%blacsdata%blacs_desc(5), r, 0, npr), &
     229           0 :                   indxl2g(j, mat%blacsdata%blacs_desc(6), c, 0, npc), mat%data_r(i, j)
     230             :             ELSE
     231           0 :                WRITE (fileno, "(5(i0,1x),2(f10.5,1x))") irank, i, j, indxl2g(i, mat%blacsdata%blacs_desc(5), r, 0, npr), &
     232           0 :                   indxl2g(j, mat%blacsdata%blacs_desc(6), c, 0, npc), mat%data_c(i, j)
     233             :             END IF
     234             :          END DO
     235             :       END DO
     236           0 :       CLOSE (fileno)
     237           0 :       IF (irank + 1 < isize) CALL MPI_SEND(tmp, 1, MPI_INTEGER, irank + 1, 0, mat%blacsdata%mpi_com, err)
     238             : 
     239             : #endif
     240           0 :    END SUBROUTINE print_matrix
     241             : 
     242          36 :    subroutine t_mpimat_l2u(mat)
     243             : #ifdef CPP_SCALAPACK
     244             :       USE mpi
     245             : #endif
     246             :       implicit none
     247             :       CLASS(t_mpimat), INTENT(INOUT) ::mat
     248             : 
     249             :       INTEGER :: i, j, i_glob, j_glob, myid, err, np
     250          36 :       COMPLEX, ALLOCATABLE:: tmp_c(:, :)
     251          36 :       REAL, ALLOCATABLE   :: tmp_r(:, :)
     252             : #ifdef CPP_SCALAPACK
     253             :       INTEGER, EXTERNAL    :: numroc, indxl2g  !SCALAPACK functions
     254             : 
     255          36 :       call timestart("t_mpimat_l2u")
     256             : 
     257          36 :       CALL MPI_COMM_RANK(mat%blacsdata%mpi_com, myid, err)
     258          36 :       CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com, np, err)
     259             :       !Set lower part of matrix to zero
     260             : 
     261       15392 :       DO i = 1, mat%matsize1
     262     3634274 :          DO j = 1, mat%matsize2
     263             :             ! Get global column corresponding to i and number of local rows up to
     264             :             ! and including the diagonal, these are unchanged in A
     265     3618882 :             i_glob = indxl2g(i, mat%blacsdata%blacs_desc(5), mat%blacsdata%myrow, 0, mat%blacsdata%nprow)
     266     3618882 :             j_glob = indxl2g(j, mat%blacsdata%blacs_desc(6), mat%blacsdata%mycol, 0, mat%blacsdata%npcol)
     267             : 
     268     3634238 :             IF (i_glob < j_glob) THEN
     269     1805602 :                IF (mat%l_real) THEN
     270           0 :                   mat%data_r(i, j) = 0.0
     271             :                ELSE
     272     1805602 :                   mat%data_c(i, j) = 0.0
     273             :                END IF
     274     1813280 :             elseif (i_glob == j_glob) THEN
     275        7678 :                IF (mat%l_real) THEN
     276           0 :                   mat%data_r(i, j) = mat%data_r(i, j)*0.5
     277             :                ELSE
     278        7678 :                   mat%data_c(i, j) = mat%data_c(i, j)*0.5
     279             :                END IF
     280             :             END IF
     281             :          END DO
     282             :       END DO
     283             : 
     284          36 :       IF (mat%l_real) THEN
     285           0 :          ALLOCATE (tmp_r(mat%matsize1, mat%matsize2))
     286           0 :          tmp_r = mat%data_r
     287             :       ELSE
     288         144 :          ALLOCATE (tmp_c(mat%matsize1, mat%matsize2))
     289     3626632 :          tmp_c = mat%data_c
     290             :       END IF
     291          36 :       CALL MPI_BARRIER(mat%blacsdata%mpi_com, i)
     292          36 :       IF (mat%l_real) THEN
     293           0 :          CALL pdgeadd('t', mat%global_size1, mat%global_size2, 1.0, tmp_r, 1, 1, mat%blacsdata%blacs_desc, 1.0, mat%data_r, 1, 1, mat%blacsdata%blacs_desc)
     294             :       ELSE
     295          36 :          CALL pzgeadd('c', mat%global_size1, mat%global_size2, CMPLX(1.0, 0.0), tmp_c, 1, 1, mat%blacsdata%blacs_desc, CMPLX(1.0, 0.0), mat%data_c, 1, 1, mat%blacsdata%blacs_desc)
     296             :       END IF
     297             : #endif
     298          36 :       call timestop("t_mpimat_l2u")
     299          36 :    end subroutine t_mpimat_l2u
     300             : 
     301          48 :    SUBROUTINE t_mpimat_u2l(mat)
     302             : #ifdef CPP_SCALAPACK
     303             :       USE mpi
     304             : #endif
     305             :       implicit none
     306             :       CLASS(t_mpimat), INTENT(INOUT) ::mat
     307             : 
     308             :       INTEGER :: i, j, i_glob, j_glob, myid, err, np
     309          48 :       COMPLEX, ALLOCATABLE:: tmp_c(:, :)
     310          48 :       REAL, ALLOCATABLE   :: tmp_r(:, :)
     311             : #ifdef CPP_SCALAPACK
     312             :       INTEGER, EXTERNAL    :: numroc, indxl2g  !SCALAPACK functions
     313             : 
     314          48 :       call timestart("t_mpimat_u2l")
     315         144 :       if (all(mat%blacsdata%blacs_desc(5:6) == [mat%global_size1, 1])) then
     316             :          !copy upper triangle to lower triangle
     317          48 :          call mingeselle(mat, mat)
     318             :       else
     319           0 :          CALL MPI_COMM_RANK(mat%blacsdata%mpi_com, myid, err)
     320           0 :          CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com, np, err)
     321             :          !Set lower part of matrix to zero
     322             : 
     323           0 :          DO i = 1, mat%matsize1
     324           0 :             DO j = 1, mat%matsize2
     325             :                ! Get global column corresponding to i and number of local rows up to
     326             :                ! and including the diagonal, these are unchanged in A
     327           0 :                i_glob = indxl2g(i, mat%blacsdata%blacs_desc(5), mat%blacsdata%myrow, 0, mat%blacsdata%nprow)
     328           0 :                j_glob = indxl2g(j, mat%blacsdata%blacs_desc(6), mat%blacsdata%mycol, 0, mat%blacsdata%npcol)
     329             : 
     330           0 :                IF (i_glob > j_glob) THEN
     331           0 :                   IF (mat%l_real) THEN
     332           0 :                      mat%data_r(i, j) = 0.0
     333             :                   ELSE
     334           0 :                      mat%data_c(i, j) = 0.0
     335             :                   END IF
     336             :                END IF
     337           0 :                IF (i_glob == j_glob) THEN
     338           0 :                   IF (mat%l_real) THEN
     339           0 :                      mat%data_r(i, j) = mat%data_r(i, j)/2.0
     340             :                   ELSE
     341           0 :                      mat%data_c(i, j) = mat%data_c(i, j)/2.0
     342             :                   END IF
     343             :                END IF
     344             :             END DO
     345             :          END DO
     346             : 
     347           0 :          IF (mat%l_real) THEN
     348           0 :             ALLOCATE (tmp_r(mat%matsize1, mat%matsize2))
     349           0 :             tmp_r = mat%data_r
     350             :          ELSE
     351           0 :             ALLOCATE (tmp_c(mat%matsize1, mat%matsize2))
     352           0 :             tmp_c = mat%data_c
     353             :          END IF
     354           0 :          CALL MPI_BARRIER(mat%blacsdata%mpi_com, i)
     355           0 :          IF (mat%l_real) THEN
     356           0 :             CALL pdgeadd('t', mat%global_size1, mat%global_size2, 1.0, tmp_r, 1, 1, mat%blacsdata%blacs_desc, 1.0, mat%data_r, 1, 1, mat%blacsdata%blacs_desc)
     357             :          ELSE
     358           0 :             CALL pzgeadd('c', mat%global_size1, mat%global_size2, CMPLX(1.0, 0.0), tmp_c, 1, 1, mat%blacsdata%blacs_desc, CMPLX(1.0, 0.0), mat%data_c, 1, 1, mat%blacsdata%blacs_desc)
     359             :          END IF
     360             :       end if !mingeselle
     361             : #endif
     362          48 :       call timestop("t_mpimat_u2l")
     363          48 :    END SUBROUTINE t_mpimat_u2l
     364             : 
     365           0 :    SUBROUTINE mpimat_add_transpose(mat, mat1)
     366             :       CLASS(t_mpimat), INTENT(INOUT) ::mat
     367             :       CLASS(t_mat), INTENT(INOUT) ::mat1
     368             : 
     369             :       INTEGER:: i, ii, n_size, n_rank
     370             : 
     371           0 :       call timestart("mpimat_add_transpose")
     372             :       SELECT TYPE (mat1)
     373             :       TYPE IS (t_mpimat)
     374             : #ifdef CPP_MPI
     375           0 :          CALL MPI_COMM_RANK(mat%blacsdata%mpi_com, n_rank, i)
     376           0 :          CALL MPI_COMM_SIZE(mat%blacsdata%mpi_com, n_size, i)
     377             : #endif
     378             :          !Set lower part of matrix to zero...
     379           0 :          ii = 0
     380           0 :          DO i = n_rank + 1, MIN(mat%global_size1, mat%global_size2), n_size
     381           0 :             ii = ii + 1
     382           0 :             IF (mat%l_real) THEN
     383           0 :                mat%data_r(i + 1:, ii) = 0.0
     384           0 :                mat1%data_r(i + 1:, ii) = 0.0
     385           0 :                mat1%data_r(i, ii) = 0.0
     386             :             ELSE
     387           0 :                mat%data_c(i + 1:, ii) = 0.0
     388           0 :                mat1%data_c(i + 1:, ii) = 0.0
     389           0 :                mat1%data_c(i, ii) = 0.0
     390             :             END IF
     391             :          END DO
     392           0 :          IF (mat%l_real) THEN
     393             : #ifdef CPP_SCALAPACK
     394             : 
     395           0 :             CALL pdgeadd('t', mat1%global_size1, mat1%global_size2, 1.0, mat1%data_r, 1, 1, mat1%blacsdata%blacs_desc, 1.0, mat%data_r, 1, 1, mat%blacsdata%blacs_desc)
     396             :          ELSE
     397           0 :             CALL pzgeadd('c', mat1%global_size1, mat1%global_size2, CMPLX(1.0, 0.0), mat1%data_c, 1, 1, mat1%blacsdata%blacs_desc, CMPLX(1.0, 0.0), mat%data_c, 1, 1, mat1%blacsdata%blacs_desc)
     398             : #endif
     399             :          END IF
     400             :          !Now multiply the diagonal of the matrix by 1/2
     401             : 
     402             :          !ii=0
     403             :          !DO i=n_rank+1,MIN(mat%global_size1,mat%global_size2),n_size
     404             :          !   ii=ii+1
     405             :          !   IF (mat%l_real) THEN
     406             :          !      mat%data_r(i,ii)=mat%data_r(i,ii)/2
     407             :          !   ELSE
     408             :          !      mat%data_c(i,ii)=mat%data_c(i,ii)/2
     409             :          !   END IF
     410             :          !ENDDO
     411             :       CLASS default
     412           0 :          CALL judft_error("Inconsistent types in t_mpimat_add_transpose")
     413             :       END SELECT
     414             : 
     415           0 :       call timestop("mpimat_add_transpose")
     416           0 :    END SUBROUTINE mpimat_add_transpose
     417             : 
     418       17792 :    SUBROUTINE mpimat_copy(mat, mat1, n1, n2)
     419             :       IMPLICIT NONE
     420             :       CLASS(t_mpimat), INTENT(INOUT)::mat
     421             :       CLASS(t_mat), INTENT(IN)      ::mat1
     422             :       INTEGER, INTENT(IN) ::n1, n2
     423             :       INTEGER :: irank, err
     424             : 
     425       17792 :       call timestart("mpimat_copy")
     426             : 
     427             :       select type (mat1)
     428             :       type is (t_mpimat)
     429             : 
     430             :       class default
     431           0 :          call judft_error("you can only copy a t_mpimat to a t_mpimat")
     432             :       end select
     433             : 
     434             : #ifdef CPP_SCALAPACK
     435             :       SELECT TYPE (mat1)
     436             :       TYPE IS (t_mpimat)
     437       12920 :          if (mat1%is_column_cyclic().and..not.mat%is_column_cyclic().and..not.use_pdgemr2d) THEN
     438       12176 :             call cyclic_column_to_2Dblock_cyclic(mat1,mat,n1,n2)
     439             :          else
     440        5616 :             IF (mat%l_real) THEN
     441        3184 :                CALL pdgemr2d(Mat1%global_size1, mat1%global_size2, mat1%data_r, 1, 1, mat1%blacsdata%blacs_desc, mat%data_r, n1, n2, mat%blacsdata%blacs_desc, mat1%blacsdata%blacs_desc(2))
     442             :             ELSE
     443        2432 :                CALL pzgemr2d(mat1%global_size1, mat1%global_size2, mat1%data_c, 1, 1, mat1%blacsdata%blacs_desc, mat%data_c, n1, n2, mat%blacsdata%blacs_desc, mat1%blacsdata%blacs_desc(2))
     444             :             END IF
     445             :          endif   
     446             :       CLASS DEFAULT
     447           0 :          CALL judft_error("Wrong datatype in copy")
     448             :       END SELECT
     449             : #else
     450             :        call judft_error("Distributed matrix without SCALAPACK",calledby="mpimat_copy")      
     451             : #endif
     452       17792 :       call timestop("mpimat_copy")
     453       17792 :    END SUBROUTINE mpimat_copy
     454             : 
     455          72 :    SUBROUTINE from_non_dist(mat, mat1)
     456             :       IMPLICIT NONE
     457             :       CLASS(t_mpimat), INTENT(INOUT)::mat
     458             :       TYPE(t_mat), INTENT(IN)       ::mat1
     459             : 
     460             :       INTEGER:: blacs_desc(9), irank, ierr, umap(1, 1), np
     461             : #ifdef CPP_SCALAPACK
     462         360 :       blacs_desc = (/1, -1, mat1%matsize1, mat1%matsize2, mat1%matsize1, mat1%matsize2, 0, 0, mat1%matsize1/)
     463             : 
     464          36 :       CALL MPI_COMM_RANK(mat%blacsdata%mpi_com, irank, ierr)
     465          36 :       umap(1, 1) = 0
     466          36 :       CALL BLACS_GET(mat%blacsdata%blacs_desc(2), 10, blacs_desc(2))
     467          36 :       CALL BLACS_GRIDMAP(blacs_desc(2), umap, 1, 1, 1)
     468          36 :       IF (mat%l_real) THEN
     469           0 :          CALL pdgemr2d(Mat1%matsize1, mat1%matsize2, mat1%data_r, 1, 1, blacs_desc, mat%data_r, 1, 1, mat%blacsdata%blacs_desc, mat%blacsdata%blacs_desc(2))
     470             :       ELSE
     471          36 :          CALL pzgemr2d(mat1%matsize1, mat1%matsize2, mat1%data_c, 1, 1, blacs_desc, mat%data_c, 1, 1, mat%blacsdata%blacs_desc, mat%blacsdata%blacs_desc(2))
     472             :       END IF
     473             : #endif
     474          36 :    END SUBROUTINE from_non_dist
     475             : 
     476           0 :    subroutine to_non_dist(mat_in, mat_out)
     477             :       implicit none
     478             :       CLASS(t_mpimat), INTENT(IN)::mat_in
     479             :       TYPE(t_mat), INTENT(INOUT)       ::mat_out
     480             : 
     481             :       INTEGER:: blacs_desc(9), irank, ierr, umap(1, 1), np
     482             : 
     483             : #ifdef CPP_SCALAPACK
     484           0 :       blacs_desc = [1, -1, mat_out%matsize1, mat_out%matsize2, mat_out%matsize1, mat_out%matsize2, 0, 0, mat_out%matsize1]
     485             : 
     486           0 :       CALL MPI_COMM_RANK(mat_in%blacsdata%mpi_com, irank, ierr)
     487           0 :       umap(1, 1) = 0
     488           0 :       CALL BLACS_GET(mat_in%blacsdata%blacs_desc(2), 10, blacs_desc(2))
     489           0 :       CALL BLACS_GRIDMAP(blacs_desc(2), umap, 1, 1, 1)
     490           0 :       IF (mat_in%l_real) THEN
     491             :          !call pdgemr2d(m,                  n,                   a,           ia,ja,desca
     492             :          call pdgemr2d(mat_in%global_size1, mat_in%global_size2, mat_in%data_r, 1, 1, mat_in%blacsdata%blacs_desc, &
     493             :                        !             b,            ib,jb,descb,      ictxt)
     494           0 :                        mat_out%data_r, 1, 1, blacs_desc, mat_in%blacsdata%blacs_desc(2))
     495             :       ELSE
     496             :          !call pzgemr2d(m,                  n,                   a,           ia,ja,desca
     497             :          call pzgemr2d(mat_in%global_size1, mat_in%global_size2, mat_in%data_c, 1, 1, mat_in%blacsdata%blacs_desc, &
     498             :                        !             b,            ib,jb,descb,      ictxt)
     499           0 :                        mat_out%data_c, 1, 1, blacs_desc, mat_in%blacsdata%blacs_desc(2))
     500             :       END IF
     501             : #endif
     502           0 :    end subroutine to_non_dist
     503             : 
     504           0 :    subroutine mpimat_save_npy(mat, filename)
     505             :       use m_judft
     506             :       implicit NONE
     507             :       CLASS(t_mpimat), INTENT(IN)::mat
     508             :       character(len=*)         :: filename
     509           0 :       type(t_mat) :: tmp
     510             :       integer :: ierr, irank
     511             : #ifdef CPP_MPI
     512           0 :       CALL MPI_COMM_RANK(mat%blacsdata%mpi_com, irank, ierr)
     513             : 
     514           0 :       if (irank == 0) then
     515           0 :          call tmp%alloc(mat%l_real, mat%global_size1, mat%global_size2)
     516             :       else
     517           0 :          call tmp%alloc(mat%l_real,1,1)
     518             :       endif 
     519             : 
     520           0 :       call mat%to_non_dist(tmp)
     521             : 
     522           0 :       if (irank == 0) then
     523           0 :          call tmp%save_npy(filename)
     524             :       end if
     525           0 :       call tmp%free()
     526             : #endif
     527           0 :    end subroutine mpimat_save_npy
     528             : 
     529        8528 :    SUBROUTINE mpimat_move(mat, mat1)
     530             :       IMPLICIT NONE
     531             :       CLASS(t_mpimat), INTENT(INOUT)::mat
     532             :       CLASS(t_mat), INTENT(INOUT)   ::mat1
     533        8528 :       CALL mat%copy(mat1, 1, 1)
     534        8528 :    END SUBROUTINE mpimat_move
     535             : 
     536       22196 :    SUBROUTINE finalize(mat)
     537             :       IMPLICIT NONE
     538             :       TYPE(t_mpimat), INTENT(INOUT) :: mat
     539             :       INTEGER :: ierr
     540       22196 :       IF (ASSOCIATED(mat%blacsdata)) THEN
     541        1928 :          IF (mat%blacsdata%no_use > 1) THEN
     542        1148 :             mat%blacsdata%no_use = mat%blacsdata%no_use - 1
     543        1148 :             mat%blacsdata => null()
     544             :          ELSE
     545             : #ifdef CPP_SCALAPACK
     546         780 :             if (mat%blacsdata%blacs_desc(2) /= -1) THEN
     547         780 :                CALL BLACS_GRIDEXIT(mat%blacsdata%blacs_desc(2), ierr)
     548         780 :                DEALLOCATE (mat%blacsdata)
     549             :             endif   
     550             : #endif
     551             :          END IF
     552             :       END IF
     553             :       
     554       22196 :    END SUBROUTINE finalize
     555             : 
     556          12 :    SUBROUTINE finalize_1d(mat)
     557             :       IMPLICIT NONE
     558             : 
     559             :       TYPE(t_mpimat), INTENT(INOUT) :: mat(:)
     560             :       INTEGER                      :: i,ierr
     561          48 :       DO i = 1, size(mat)
     562          48 :          IF (ASSOCIATED(mat(i)%blacsdata)) THEN
     563          36 :             IF (mat(i)%blacsdata%no_use > 1) THEN
     564           0 :                mat(i)%blacsdata%no_use = mat(i)%blacsdata%no_use - 1
     565           0 :                mat(i)%blacsdata => null()
     566             :             ELSE
     567             : #ifdef CPP_SCALAPACK
     568          36 :                if (mat(i)%blacsdata%blacs_desc(2) /= -1) THEN
     569          36 :                   CALL BLACS_GRIDEXIT(mat(i)%blacsdata%blacs_desc(2), ierr)
     570          36 :                   DEALLOCATE (mat(i)%blacsdata)
     571             :                endif   
     572             : #endif
     573             :             END IF
     574             :          END IF
     575             :       END DO
     576          12 :    END SUBROUTINE finalize_1d
     577             : 
     578        9744 :    SUBROUTINE finalize_2d(mat)
     579             :       IMPLICIT NONE
     580             : 
     581             :       TYPE(t_mpimat), INTENT(INOUT) :: mat(:, :)
     582             :       INTEGER                      :: i, j,ierr
     583             : 
     584       20704 :       DO i = 1, size(mat, dim=1)
     585       34096 :          DO j = 1, size(mat, dim=2)
     586       24352 :             IF (ASSOCIATED(mat(i,j)%blacsdata)) THEN
     587           0 :                IF (mat(i,j)%blacsdata%no_use > 1) THEN
     588           0 :                   mat(i,j)%blacsdata%no_use = mat(i,j)%blacsdata%no_use - 1
     589           0 :                   mat(i,j)%blacsdata => null()
     590             :                ELSE
     591             : #ifdef CPP_SCALAPACK
     592           0 :                   if (mat(i,j)%blacsdata%blacs_desc(2) /= -1) THEN
     593           0 :                      CALL BLACS_GRIDEXIT(mat(i,j)%blacsdata%blacs_desc(2), ierr)
     594           0 :                      DEALLOCATE (mat(i,j)%blacsdata)
     595             :                   endif   
     596             : #endif
     597             :                END IF
     598             :             END IF
     599             :          END DO
     600             :       END DO
     601        9744 :    END SUBROUTINE finalize_2d
     602             : 
     603           0 :    SUBROUTINE finalize_3d(mat)
     604             :       IMPLICIT NONE
     605             :       TYPE(t_mpimat), INTENT(INOUT) :: mat(:, :, :)
     606             :       INTEGER                      :: i, j, k, ierr
     607             : 
     608           0 :       DO i = 1, size(mat, dim=1)
     609           0 :          DO j = 1, size(mat, dim=2)
     610           0 :             DO k = 1, size(mat, dim=3)
     611           0 :                IF (ASSOCIATED(mat(i,j,k)%blacsdata)) THEN
     612           0 :                   IF (mat(i,j,k)%blacsdata%no_use > 1) THEN
     613           0 :                      mat(i,j,k)%blacsdata%no_use = mat(i,j,k)%blacsdata%no_use - 1
     614           0 :                      mat(i,j,k)%blacsdata => null()
     615             :                   ELSE
     616             : #ifdef CPP_SCALAPACK
     617           0 :                      if (mat(i,j,k)%blacsdata%blacs_desc(2) /= -1) THEN
     618           0 :                         CALL BLACS_GRIDEXIT(mat(i,j,k)%blacsdata%blacs_desc(2), ierr)
     619           0 :                         DEALLOCATE (mat(i,j,k)%blacsdata)
     620             :                      endif   
     621             : #endif
     622             :                   END IF
     623             :                END IF
     624             :             END DO
     625             :          END DO
     626             :       END DO
     627           0 :    END SUBROUTINE finalize_3d
     628             : 
     629       33660 :    SUBROUTINE mpimat_free(mat)
     630             :       IMPLICIT NONE
     631             :       CLASS(t_mpimat), INTENT(INOUT) :: mat
     632             :       INTEGER :: ierr
     633             :      
     634       33660 :       IF (ALLOCATED(mat%data_r)) DEALLOCATE (mat%data_r)
     635       33660 :       IF (ALLOCATED(mat%data_c)) DEALLOCATE (mat%data_c)
     636       33660 :       IF (ASSOCIATED(mat%blacsdata)) THEN
     637       33660 :          IF (mat%blacsdata%no_use > 1) THEN
     638       17220 :             mat%blacsdata%no_use = mat%blacsdata%no_use - 1
     639       17220 :             mat%blacsdata => null()
     640             :          ELSE
     641             : #ifdef CPP_SCALAPACK
     642       16440 :             if (mat%blacsdata%blacs_desc(2) /= -1) THEN
     643       16440 :                CALL BLACS_GRIDEXIT(mat%blacsdata%blacs_desc(2), ierr)
     644       16440 :                DEALLOCATE (mat%blacsdata)
     645             :             endif   
     646             : #endif
     647             :          END IF
     648             :       END IF
     649       33660 :    END SUBROUTINE mpimat_free
     650             : 
     651             :    !>Initialization of the distributed matrix.
     652             :   !!
     653             :   !! The argument l_2d controls the kind of distribution used:
     654             :   !!  - TRUE: the matrix is a Scalapack BLOCK-CYCLIC distribution
     655             :   !!  - FALSE: the matrix is distributed in a one-dimensional column cyclic distribution with blocksize 1
     656             :   !! as used in the parallel matrix setup of FLEUR
     657       17304 :    SUBROUTINE mpimat_init(mat, l_real, matsize1, matsize2, mpi_subcom, l_2d, nb_x, nb_y, mat_name)
     658             : #ifdef CPP_MPI
     659             :       use mpi
     660             : #endif
     661             :       IMPLICIT NONE
     662             :       CLASS(t_mpimat)                      :: mat
     663             :       INTEGER, INTENT(IN), OPTIONAL        :: matsize1, matsize2, mpi_subcom
     664             :       LOGICAL, INTENT(IN), OPTIONAL        :: l_real, l_2d
     665             :       INTEGER, INTENT(IN), OPTIONAL        :: nb_y, nb_x
     666             :       character(len=*), intent(in), optional :: mat_name
     667             : 
     668             : #ifdef CPP_SCALAPACK
     669             :       INTEGER::nbx, nby, irank, ierr
     670       17304 :       CALL mpi_comm_rank(MPI_COMM_WORLD, irank, ierr)
     671             : 
     672       17304 :       call timestart("mpimat_init")
     673       17304 :       ALLOCATE (mat%blacsdata, stat=ierr)
     674       17304 :       if (mpi_subcom == MPI_COMM_NULL) Then
     675           0 :          mat%blacsdata%blacs_desc(2) = -1
     676           0 :          mat%global_size1 = matsize1
     677           0 :          mat%global_size2 = matsize2
     678           0 :          mat%matsize1 = 0
     679           0 :          mat%matsize2 = 0
     680             :       else
     681       17304 :          nbx = priv_get_blocksize()
     682       17304 :          nby = priv_get_blocksize()
     683       17304 :          IF (PRESENT(nb_x)) nbx = nb_x
     684       17304 :          IF (PRESENT(nb_y)) nby = nb_y
     685       17304 :          IF (.NOT. (PRESENT(matsize1) .AND. PRESENT(matsize2) .AND. PRESENT(mpi_subcom) .AND. PRESENT(l_real) .AND. PRESENT(l_2d))) &
     686           0 :             CALL judft_error("Optional arguments must be present in mpimat_init")
     687       17304 :          mat%global_size1 = matsize1
     688       17304 :          mat%global_size2 = matsize2
     689       17304 :          mat%blacsdata%no_use = 1
     690             :       end if
     691             :       CALL priv_create_blacsgrid(mpi_subcom, l_2d, matsize1, matsize2, nbx, nby, &
     692       17304 :                                  mat%blacsdata, mat%matsize1, mat%matsize2)
     693             : 
     694       17304 :       mat%blacsdata%mpi_com = mpi_subcom
     695       17304 :       CALL mat%alloc(l_real) !Attention,sizes determined in call to priv_create_blacsgrid
     696             :       !check if this matrix is actually distributed over MPI_COMM_SELF
     697             :       !IF (mpi_subcom == MPI_COMM_SELF) THEN
     698             :       !   CALL MPI_COMM_RANK(mpi_subcom, irank, ierr)
     699             :       !   IF (irank > 0) mat%blacsdata%blacs_desc(2) = -1
     700             :       !END IF
     701             : 
     702       17304 :       call timestop("mpimat_init")
     703             : #else
     704             :     CALL juDFT_ERROR("No parallel matrix setup without SCALAPACK")
     705             : #endif
     706       17304 :    END SUBROUTINE mpimat_init
     707             : 
     708       18368 :    SUBROUTINE mpimat_init_template(mat, templ, global_size1, global_size2, mat_name)
     709             :       IMPLICIT NONE
     710             :       CLASS(t_mpimat), INTENT(INOUT)  :: mat
     711             :       CLASS(t_mat), INTENT(IN)        :: templ
     712             :       INTEGER, INTENT(IN), OPTIONAL    :: global_size1, global_size2
     713             :       character(len=*), intent(in), optional :: mat_name
     714             : 
     715             :       INTEGER::numroc
     716             :       EXTERNAL::numroc
     717             : 
     718             :       SELECT TYPE (templ)
     719             :       TYPE IS (t_mpimat)
     720       18368 :          mat%l_real = templ%l_real
     721       18368 :          IF (PRESENT(global_size1) .AND. PRESENT(global_size2)) THEN
     722         780 :             ALLOCATE (mat%blacsdata)
     723         780 :             mat%blacsdata = templ%blacsdata
     724         780 :             mat%blacsdata%no_use = mat%blacsdata%no_use + 1
     725         780 :             mat%blacsdata%blacs_desc(3) = global_size1
     726         780 :             mat%blacsdata%blacs_desc(4) = global_size2
     727         780 :             mat%global_size1 = global_size1
     728         780 :             mat%global_size2 = global_size2
     729             : #ifdef CPP_SCALAPACK
     730         780 :             mat%matsize1 = NUMROC(global_size1, mat%blacsdata%blacs_desc(5), mat%blacsdata%myrow, mat%blacsdata%blacs_desc(7), mat%blacsdata%nprow)
     731         780 :             mat%matsize2 = NUMROC(global_size2, mat%blacsdata%blacs_desc(6), mat%blacsdata%mycol, mat%blacsdata%blacs_desc(8), mat%blacsdata%npcol)
     732             : #endif
     733             :          ELSE
     734       17588 :             mat%matsize1 = templ%matsize1
     735       17588 :             mat%matsize2 = templ%matsize2
     736       17588 :             mat%global_size1 = templ%global_size1
     737       17588 :             mat%global_size2 = templ%global_size2
     738       17588 :             mat%blacsdata => templ%blacsdata
     739       17588 :             mat%blacsdata%no_use = mat%blacsdata%no_use + 1
     740             :          END IF
     741       36736 :          CALL mat%alloc()
     742             : 
     743             :       CLASS default
     744           0 :          CALL judft_error("Mixed initialization in t_mpimat not possible(BUG)")
     745             :       END SELECT
     746       18368 :    END SUBROUTINE mpimat_init_template
     747             : 
     748       17304 :    SUBROUTINE priv_create_blacsgrid(mpi_subcom, l_2d, m1, m2, nbc, nbr, blacsdata, local_size1, local_size2)
     749             : #ifdef CPP_SCALAPACK
     750             :       USE mpi
     751             : #endif
     752             :       IMPLICIT NONE
     753             :       INTEGER, INTENT(IN) :: mpi_subcom
     754             :       INTEGER, INTENT(IN) :: m1, m2
     755             :       INTEGER, INTENT(INOUT)::nbc, nbr
     756             :       LOGICAL, INTENT(IN) :: l_2d
     757             :       type(t_blacsdata), INTENT(OUT)::blacsdata
     758             :       INTEGER, INTENT(OUT):: local_size1, local_size2
     759             : 
     760             : #ifdef CPP_SCALAPACK
     761             :       INTEGER     :: myrowssca, mycolssca
     762             :       INTEGER     :: iamblacs, npblacs, np, myid, mycol, myrow
     763             :       INTEGER     :: nprow2, npcol2
     764             :       INTEGER     :: k, i, j, ictextblacs
     765             :       INTEGER     :: ierr
     766             : 
     767       17304 :       INTEGER, ALLOCATABLE :: iblacsnums(:), ihelp(:), iusermap(:, :)
     768             : 
     769             :       EXTERNAL descinit, blacs_get
     770             :       EXTERNAL blacs_pinfo, blacs_gridinit
     771             : 
     772             :       !Determine rank and no of processors
     773       17304 :       if (mpi_subcom /= MPI_COMM_NULL) THEN
     774       17304 :          CALL MPI_COMM_RANK(mpi_subcom, myid, ierr)
     775       17304 :          CALL MPI_COMM_SIZE(mpi_subcom, np, ierr)
     776             : 
     777             :          ! compute processor grid, as square as possible
     778             :          ! If not square with more rows than columns
     779       17304 :          IF (l_2d) THEN
     780        4908 :             distloop: DO j = INT(SQRT(REAL(np))), 1, -1
     781        4908 :                IF ((np/j)*j == np) THEN
     782        4908 :                   blacsdata%npcol = np/j
     783        4908 :                   blacsdata%nprow = j
     784        4908 :                   EXIT distloop
     785             :                END IF
     786             :             END DO distloop
     787             :          ELSE
     788       12396 :             nbc = 1
     789       12396 :             nbr = MAX(m1, m2)
     790       12396 :             blacsdata%npcol = np
     791       12396 :             blacsdata%nprow = 1
     792             :          END IF
     793             : 
     794      121128 :          ALLOCATE (iblacsnums(np), ihelp(np), iusermap(blacsdata%nprow, blacsdata%npcol))
     795             : 
     796             :          !   A blacsdata%nprow*blacsdata%npcol processor grid will be created
     797             :          !   Row and column index myrow, mycol of this processor in the grid
     798             :          !   and distribution of A and B in ScaLAPACK
     799             :          !   The local processor will get myrowssca rows and mycolssca columns
     800             :          !   of A and B
     801             :          !
     802             : 
     803       17304 :          myrow = myid/blacsdata%npcol  ! my row number in the BLACS blacsdata%nprow*blacsdata%npcol grid
     804       17304 :          mycol = myid - (myid/blacsdata%npcol)*blacsdata%npcol  ! my column number in the BLACS blacsdata%nprow*blacsdata%npcol grid
     805             :          !
     806             :          !  Now allocate Asca to put the elements of Achi or receivebuffer to
     807             :          !
     808       17304 :          myrowssca = (m1 - 1)/(nbr*blacsdata%nprow)*nbr + MIN(MAX(m1 - (m1 - 1)/(nbr*blacsdata%nprow)*nbr*blacsdata%nprow - nbr*myrow, 0), nbr)
     809             :          !     Number of rows the local process gets in ScaLAPACK distribution
     810       17304 :          mycolssca = (m2 - 1)/(nbc*blacsdata%npcol)*nbc + MIN(MAX(m2 - (m2 - 1)/(nbc*blacsdata%npcol)*nbc*blacsdata%npcol - nbc*mycol, 0), nbc)
     811             : 
     812             :          !Get BLACS ranks for all MPI ranks
     813       17304 :          CALL BLACS_PINFO(iamblacs, npblacs)  ! iamblacs = local process rank (e.g. myid)
     814             :          ! npblacs  = number of available processes
     815       51912 :          iblacsnums = -2
     816       51912 :          ihelp = -2
     817       17304 :          ihelp(myid + 1) = iamblacs ! Get the Blacs id corresponding to the MPI id
     818             :          if (mpi_subcom /= MPI_COMM_NULL) &
     819       17304 :             CALL MPI_ALLREDUCE(ihelp, iblacsnums, np, MPI_INTEGER, MPI_MAX, mpi_subcom, ierr)
     820             : 
     821       17304 :          IF (ierr /= 0) call judft_error('Error in allreduce for BLACS nums')
     822             : 
     823             :          !     iblacsnums(i) is the BLACS-process number of MPI-process i-1
     824       17304 :          k = 1
     825       51912 :          DO i = 1, blacsdata%nprow
     826       69216 :             DO j = 1, blacsdata%npcol
     827       34608 :                iusermap(i, j) = iblacsnums(k)
     828       51912 :                k = k + 1
     829             :             END DO
     830             :          END DO
     831             :       else
     832           0 :          CALL BLACS_PINFO(iamblacs, npblacs)
     833           0 :          iusermap = reshape([iamblacs], [1, 1])
     834           0 :          blacsdata%nprow = 1; blacsdata%npcol = 1; blacsdata%blacs_desc(2) = -1
     835             :       end if
     836             : 
     837             : !#ifdef CPP_BLACSDEFAULT
     838             :       !Get the Blacs default context
     839       17304 :       CALL BLACS_GET(0, 0, ictextblacs)
     840             : !#else
     841             : !    ictextblacs=mpi_subcom
     842             : !#endif
     843             :       ! Create the Grid
     844       17304 :       CALL BLACS_GRIDMAP(ictextblacs, iusermap, size(iusermap, 1), blacsdata%nprow, blacsdata%npcol)
     845             :       !     Now control, whether the BLACS grid is the one we wanted
     846       17304 :       CALL BLACS_GRIDINFO(ictextblacs, nprow2, npcol2, blacsdata%myrow, blacsdata%mycol)
     847       17304 :       IF (nprow2 /= blacsdata%nprow) THEN
     848           0 :          WRITE (oUnit, *) 'Wrong number of rows in BLACS grid'
     849           0 :          WRITE (oUnit, *) 'nprow=', blacsdata%nprow, ' nprow2=', nprow2
     850           0 :          call judft_error('Wrong number of rows in BLACS grid')
     851             :       END IF
     852       17304 :       IF (npcol2 /= blacsdata%npcol) THEN
     853           0 :          WRITE (oUnit, *) 'Wrong number of columns in BLACS grid'
     854           0 :          WRITE (oUnit, *) 'npcol=', blacsdata%npcol, ' npcol2=', npcol2
     855           0 :          call judft_error('Wrong number of columns in BLACS grid')
     856             : 
     857             :       END IF
     858             :       !Create the descriptors
     859       17304 :       if (mpi_subcom == MPI_COMM_NULL) then
     860           0 :          blacsdata%blacs_desc(2) = -1
     861           0 :          local_size1 = 0
     862           0 :          local_size2 = 0
     863             :       else
     864       17304 :          IF (myrowssca==0.or.mycolssca==0) THEN
     865          12 :             CALL juDFT_warn("With your chosen eigenvalue parallelization scheme some MPI processes have no share of the Hamiltonian. Please check your parallelization.",hint="Either reduce the number of MPI processes or increase the k-point parallelization.")
     866             :          END IF
     867       17304 :          CALL descinit(blacsdata%blacs_desc, m1, m2, nbr, nbc, 0, 0, ictextblacs, myrowssca, ierr)
     868       17304 :          IF (ierr /= 0) call judft_error('Creation of BLACS descriptor failed')
     869       17304 :          local_size1 = myrowssca
     870       17304 :          local_size2 = mycolssca
     871             :       end if
     872             : #endif
     873       17304 :    END SUBROUTINE priv_create_blacsgrid
     874             : 
     875       34608 :    integer function priv_get_blocksize()
     876             :       integer, save:: block_size=-1
     877             :       character(len=10):: str
     878       34608 :       if (block_size<0)  THEN
     879          86 :          block_size=DEFAULT_BLOCKSIZE
     880          86 :          if (judft_was_argument("-blocksize")) THEN
     881           0 :             str=judft_string_for_argument("-blocksize")
     882           0 :             read(str,*) block_size
     883             :          endif
     884             :       endif
     885       34608 :       priv_get_blocksize=block_size
     886             :       
     887       34608 :    end function   
     888             : 
     889        1264 :    SUBROUTINE mingeselle(mat_in, mat_out)
     890             :       !---------------------------------------------------------------------+
     891             :       !                                                                     |
     892             :       ! Transfers the spin-down/spin-up part , upper triangle of the        |
     893             :       ! mat_in to the lower triangle of the mat_out.                        |
     894             :       !                                                                     |
     895             :       !      --------------------                                           |
     896             :       !      |      |  mat_out  |                                           |
     897             :       !      |      |           | nv1                                       |
     898             :       !      --------------------                                           |
     899             :       !      |      |           |                                           |
     900             :       !      |mat_in|           |                                           |
     901             :       !      |      |           |                                           |
     902             :       !      |      |           | nv2                                       |
     903             :       !      --------------------                                           |
     904             :       !                                                                     |
     905             :       ! For eigenvector-parallelization this needs some communication       |
     906             :       ! between the nodes, since this part is created 'column-wise'         |
     907             :       ! but needed row-wise.                                                |
     908             :       !                                                                     |
     909             :       !     n_send(i): number of elements to send to pe #i                  |
     910             :       !     n_recv(i): number of elements to receive from pe #i             |
     911             :       !     ns_tot,nr_tot : total number of elements to send/receive        |
     912             :       !     cs_el,cr_el: send and receive elements                          |
     913             :       !     in_pos(xy,n,i): where to put in the n'th element sent by pe #i  |
     914             :       !                                                                     |
     915             :       !  ToDo: chop the send messages to the smaller chunks                 |
     916             :       !                                                                     |
     917             :       !  Based on the old mingeselle.                                       |
     918             :       !                                    Okt. 2020                        |
     919             :       !                                    U.Alekseeva                      |
     920             :       !---------------------------------------------------------------------+
     921             : 
     922             :       CLASS(t_mat), INTENT(INOUT) ::mat_in
     923             :       CLASS(t_mat), INTENT(INOUT) ::mat_out
     924             :       ! ..
     925             :       ! .. Local Scalarsxi
     926             :       INTEGER n_rank, n_size
     927             :       INTEGER ki, kj, kjj, ns_tot, nr_tot, n_p, n_help, i, ii
     928             :       INTEGER ns_max, nr_max, np_s, np_r
     929             :       INTEGER inext, ifront, req_s, req_r
     930             :       INTEGER SUB_COMM
     931             :       ! ..
     932             :       ! .. Local Arrays
     933             :       INTEGER ierr
     934        1264 :       INTEGER, ALLOCATABLE :: c_help_size(:, :)
     935        1264 :       INTEGER, ALLOCATABLE :: n_send(:), nsr(:)
     936        1264 :       INTEGER, ALLOCATABLE :: n_recv(:), n_r(:)
     937        1264 :       INTEGER, ALLOCATABLE :: in_pos(:, :, :)
     938        1264 :       COMPLEX, ALLOCATABLE :: cs_el(:, :), cr_el(:), b_b(:), c_help(:, :)
     939        1264 :       LOGICAL, ALLOCATABLE :: nsl(:)
     940             : 
     941             : #ifdef CPP_MPI
     942             :       INTEGER stt(MPI_STATUS_SIZE)
     943             : 
     944        1264 :       call timestart("mingeselle")
     945        1264 :       IF (mat_out%l_real .OR. mat_in%l_real) CALL juDFT_error("Matrices should be complex", calledby="mingeselle")
     946             : 
     947             :       SELECT TYPE (mat_in)
     948             :       TYPE IS (t_mpimat)
     949        1264 :          SELECT TYPE (mat_out)
     950             :          TYPE IS (t_mpimat)
     951        1264 :             IF ((mat_in%global_size1 .NE. mat_out%global_size2) .OR. (mat_in%global_size2 .NE. mat_out%global_size1)) THEN
     952           0 :                CALL juDFT_error("The matrices do not match", calledby="mingeselle")
     953             :             END IF
     954        1264 :             CALL MPI_COMM_RANK(mat_out%blacsdata%mpi_com, n_rank, ierr)
     955        1264 :             CALL MPI_COMM_SIZE(mat_out%blacsdata%mpi_com, n_size, ierr)
     956        1264 :             SUB_COMM = mat_out%blacsdata%mpi_com
     957             : 
     958        7584 :             ALLOCATE (n_send(0:n_size - 1), n_recv(0:n_size - 1), n_r(0:n_size - 1), nsr(0:n_size - 1))
     959        5056 :             ALLOCATE (c_help_size(2, 0:n_size - 1), nsl(0:n_size - 1))
     960             :             ns_tot = 0
     961             :             nr_tot = 0
     962        3792 :             n_send = 0
     963        3792 :             n_r = 0
     964        8848 :             c_help_size = 0
     965             : 
     966             :             ! determine number of elements to send to other pe's
     967             :             ! and calculate the dimensions of c_helpi
     968             :             ! rows of c_help correspond to columns of mat_in and vice versa
     969             : 
     970      167852 :             DO ki = 1, mat_in%matsize2
     971      166588 :                kjj = n_rank + 1 + (ki - 1)*n_size      ! global column index
     972      499764 :                nsr = 0
     973      499764 :                nsl = .FALSE.
     974    31203432 :                DO kj = 1, min(kjj - 1, mat_in%matsize1)
     975             :                   ns_tot = ns_tot + 1
     976    31036844 :                   n_p = MOD(kj - 1, n_size)
     977    31036844 :                   n_send(n_p) = n_send(n_p) + 1
     978    31036844 :                   nsr(n_p) = nsr(n_p) + 1
     979    31203432 :                   nsl(n_p) = .TRUE.
     980             :                END DO
     981      501028 :                DO n_p = 0, n_size - 1
     982      333176 :                   IF (c_help_size(2, n_p) < nsr(n_p)) c_help_size(2, n_p) = nsr(n_p)
     983      499764 :                   IF (nsl(n_p)) c_help_size(1, n_p) = c_help_size(1, n_p) + 1
     984             :                END DO
     985             :             END DO
     986             :        
     987             :             ! determine number of elements to receive from other pe's
     988             : 
     989      167852 :             DO ki = 1, mat_out%matsize2
     990      166588 :                kjj = n_rank + 1 + (ki - 1)*n_size      ! global column index
     991    31204696 :                DO kj = kjj + 1, mat_out%matsize1
     992             :                   nr_tot = nr_tot + 1
     993    31036844 :                   n_p = MOD(kj - 1, n_size)
     994    31203432 :                   n_r(n_p) = n_r(n_p) + 1
     995             :                END DO
     996             :             END DO
     997             :        
     998             :             ! determine the maximal number of s/r-counts and allocate s/r-arrays
     999             : 
    1000             :             ns_max = 0
    1001             :             nr_max = 0
    1002        3792 :             DO n_p = 0, n_size - 1
    1003        2528 :                ns_max = MAX(ns_max, n_send(n_p))
    1004        3792 :                nr_max = MAX(nr_max, n_r(n_p))
    1005             :             END DO
    1006             :             !      WRITE (*,*) ns_max ,nr_max  , n_size, n_rank
    1007        7584 :             ALLOCATE (cs_el(ns_max, 0:n_size - 1), cr_el(nr_max))
    1008        5056 :             ALLOCATE (in_pos(2, nr_max, 0:n_size - 1))
    1009             : 
    1010             :             ! for every send destination:
    1011             :             ! put the elements of the mat_in into the c_help,
    1012             :             ! resorting them on the way: rows <-> columns
    1013             :             ! then put them in the send buffers
    1014             : 
    1015        5056 :             ALLOCATE (c_help(mat_in%matsize2, ceiling(real(mat_in%matsize1)/n_size)))
    1016    31357970 :             c_help = cmplx(0.0, 0.0)
    1017        3792 :             DO n_p = 0, n_size - 1
    1018             : 
    1019        2528 :                IF (c_help_size(2, n_p) > size(c_help, 2)) CALL juDFT_error("allocated c_help is too small", calledby="mingeselle")
    1020        2528 :                IF (c_help_size(1, n_p) > size(c_help, 1)) CALL juDFT_error("allocated c_help is too small", calledby="mingeselle")
    1021             :                !print*, "c_help_size",n_rank, n_p,c_help_size(:,n_p)
    1022             : 
    1023      333808 :                DO ki = 1, c_help_size(1, n_p)
    1024    31370652 :                   DO kj = 1, min(ki, c_help_size(2, n_p))
    1025    31036844 :                      kjj = (kj - 1)*n_size + n_p + 1     ! #row of the element in mat_in
    1026    31368124 :                      IF (n_rank - 1 < n_p) THEN
    1027    23250004 :                         c_help(ki, kj) = mat_in%data_c(kjj, ki + 1)
    1028             :                      ELSE
    1029     7786840 :                         c_help(ki, kj) = mat_in%data_c(kjj, ki)
    1030             :                      END IF
    1031             :                   END DO
    1032             :                END DO
    1033             : 
    1034             :                n_help = 0
    1035      333808 :                DO kj = 1, c_help_size(2, n_p)
    1036    31370652 :                   DO ki = kj, c_help_size(1, n_p)
    1037    31036844 :                      n_help = n_help + 1
    1038    31368124 :                      cs_el(n_help, n_p) = CONJG(c_help(ki, kj))
    1039             :                   END DO
    1040             :                END DO
    1041        3792 :                IF (n_help .NE. n_send(n_p)) CALL juDFT_error("Number of elements to send is wrong", calledby="mingeselle")
    1042             : 
    1043             :             END DO
    1044        1264 :             DEALLOCATE (c_help)
    1045             : 
    1046             :             ! now we look where to put in the received elements
    1047             : 
    1048        3792 :             n_recv = 0
    1049      167852 :             DO ki = 1, mat_out%matsize2
    1050      166588 :                kjj = n_rank + 1 + (ki - 1)*n_size      ! global column index
    1051    31204696 :                DO kj = kjj + 1, mat_out%matsize1
    1052    31036844 :                   n_p = MOD(kj - 1, n_size)
    1053    31036844 :                   n_recv(n_p) = n_recv(n_p) + 1
    1054    31036844 :                   in_pos(1, n_recv(n_p), n_p) = kj
    1055    31203432 :                   in_pos(2, n_recv(n_p), n_p) = ki
    1056             :                END DO
    1057             :             END DO
    1058        3792 :             DO n_p = 0, n_size - 1
    1059        3792 :                IF (n_recv(n_p) /= n_r(n_p)) CALL juDFT_error("n_recv.NE.n_s", calledby="mingeselle")
    1060             :             END DO
    1061             : 
    1062             :             ! Mandaliet, mandaliet, min geselle kumme niet
    1063             : 
    1064        1264 :             ifront = ibefore(n_size, n_rank)
    1065        1264 :             inext = iafter(n_size, n_rank)
    1066        3792 :             DO n_p = 0, n_size - 1
    1067             : 
    1068             :                ! determine pe's to send to and to receive from
    1069             : 
    1070        2528 :                np_s = MOD(inext + n_p, n_size)
    1071        2528 :                np_r = MOD(ifront - n_p, n_size)
    1072        2528 :                IF (np_r .LT. 0) np_r = np_r + n_size
    1073             : 
    1074             :                ! send section: local rows i with mod(i-1,np) = np_s will be sent to proc np_s
    1075             : 
    1076        2528 :                IF (np_s .NE. n_rank) THEN
    1077             :                   CALL MPI_ISEND(cs_el(1, np_s), n_send(np_s), MPI_DOUBLE_COMPLEX, &
    1078        1264 :                                  np_s, n_rank, SUB_COMM, req_s, ierr)
    1079             :                END IF
    1080             : 
    1081             :                ! receive section : local rows i  with mod(i-1,np) = np_r will be received from np_r
    1082             :                ! ... skipped, if update matrix from local data:
    1083             : 
    1084        3792 :                IF (np_r .NE. n_rank) THEN
    1085        1264 :                   CALL MPI_IRECV(cr_el, n_recv(np_r), MPI_DOUBLE_COMPLEX, MPI_ANY_SOURCE, np_r, SUB_COMM, req_r, ierr)
    1086        1264 :                   CALL MPI_WAIT(req_s, stt, ierr)
    1087        1264 :                   CALL MPI_WAIT(req_r, stt, ierr)
    1088    15561234 :                   DO ki = 1, n_recv(np_r)
    1089    15561234 :                      mat_out%data_c(in_pos(1, ki, np_r), in_pos(2, ki, np_r)) = cr_el(ki)
    1090             :                   END DO
    1091             :                ELSE
    1092    15478138 :                   DO ki = 1, n_recv(np_r)
    1093    15478138 :                      mat_out%data_c(in_pos(1, ki, np_r), in_pos(2, ki, np_r)) = cs_el(ki, np_s)
    1094             :                   END DO
    1095             :                END IF
    1096             :             END DO
    1097             : 
    1098        3792 :             DEALLOCATE (cs_el, cr_el, in_pos)
    1099             : 
    1100             :          CLASS DEFAULT
    1101           0 :             call judft_error("Wrong type (1) in mingeselle")
    1102             :          END SELECT
    1103             :       CLASS DEFAULT
    1104           0 :          call judft_error("Wrong type (2) in mingeselle")
    1105             :       END SELECT
    1106             : 
    1107        1264 :       call timestop("mingeselle")
    1108             : #endif
    1109        1264 :    END SUBROUTINE mingeselle
    1110             :    !
    1111             :    !-------------------------------------------------------------
    1112             :    !
    1113        1264 :    INTEGER FUNCTION ibefore(np, p)
    1114             :       !
    1115             :       ! Determine (in a ring structure) which is the front process
    1116             :       !
    1117             :       IMPLICIT NONE
    1118             :       INTEGER, INTENT(IN) :: np  !  number of processes
    1119             :       INTEGER, INTENT(IN) :: p   !  current processes
    1120             : 
    1121        1264 :       IF (p > 0) THEN
    1122         632 :          ibefore = p - 1
    1123             :       ELSE
    1124           0 :          ibefore = np - 1
    1125             :       END IF
    1126             : 
    1127           0 :    END FUNCTION ibefore
    1128             :    !
    1129             :    !-------------------------------------------------------------
    1130             :    !
    1131        1264 :    INTEGER FUNCTION iafter(np, p)
    1132             :       !
    1133             :       ! Determine (in a ring structure) which is the next process
    1134             :       !
    1135             :       IMPLICIT NONE
    1136             :       INTEGER, INTENT(IN) :: np  !  number of processes
    1137             :       INTEGER, INTENT(IN) :: p   !  current processes
    1138             : 
    1139        1264 :       IF (p < np - 1) THEN
    1140         632 :          iafter = p + 1
    1141             :       ELSE
    1142             :          iafter = 0
    1143             :       END IF
    1144             : 
    1145           0 :    END FUNCTION iafter
    1146             : 
    1147             : #if 1==2
    1148             :    subroutine cyclic_column_to_2Dblock_cyclic(mat,mat2d,s1,s2)
    1149             :       implicit none 
    1150             :       class(t_mpimat),intent(in)   ::mat
    1151             :       class(t_mpimat),intent(inout)::mat2d
    1152             :       integer,intent(in),optional  ::s1,s2
    1153             : 
    1154             :       real,allocatable::vecr(:)
    1155             :       complex,allocatable::vecc(:)
    1156             :       integer:: my_proc,num_proc,ierr
    1157             :       integer:: nprow,npcol,myrow,mycol
    1158             :       integer:: shift1,shift2
    1159             :       integer:: n1,n2,blockindex,n_col,n_row
    1160             : 
    1161             :       shift1=0;shift2=0
    1162             :       if (present(s1)) shift1=s1-1
    1163             :       if (present(s2)) shift2=s2-1
    1164             :       
    1165             :       if (mat%l_real) THEN
    1166             :          allocate(vecr(mat%global_size1))
    1167             :       else 
    1168             :          allocate(vecc(mat%global_size1)) 
    1169             :       endif
    1170             : #ifdef CPP_SCALAPACK  
    1171             :       !process ranks for cyclic column dist
    1172             :       call MPI_COMM_RANK(mat%blacsdata%mpi_com,my_proc,ierr)
    1173             :       call MPI_COMM_SIZE(mat%blacsdata%mpi_com,num_proc,ierr)
    1174             :       !info for 2dblock cyclic dist
    1175             :       call blacs_gridinfo(mat2d%blacsdata%blacs_desc(2),nprow,npcol,myrow,mycol)
    1176             : 
    1177             : #ifdef __NEW_CODE      
    1178             :       !create processor map blacs_row,blacs_col->mpi_rank
    1179             :       allocate(pmap(nprow,npcol))
    1180             :       pmap=-1
    1181             :       pmap(myrow,mycol)=mp_proc
    1182             :       CALL MPI_ALLREDUCE(MPI_IN_PLACE,pmap,size(pmap),MPI_INTEGER,MPI_MAX,mat%blacsdata%mpi_com,ierr)
    1183             :       if (any(pmap<0)) call judft_bug("Bug1:types_mpimat")
    1184             : 
    1185             :       DO n2=1,mat%global_size2,num_proc
    1186             :          sglobal_col=n2+my_proc
    1187             :          slocal_col=(n2-1)/num_proc+1
    1188             :          !Which 2d column contains the data?
    1189             :          blockindex=(sglobal_col+shift1-1)/mat2d%blacsdata%blacs_desc(6)
    1190             :          rec_p_col=mod(blockindex,npcol)
    1191             :          !now loop over column
    1192             :          DO n1=1,mat%global_size1
    1193             :             blockindex=(n1+shift1-1)/mat2d%blacsdata%blacs_desc(5)
    1194             :             rec_p_row=mod(blockindex,nprow)
    1195             :             rec_r_index(rec_p_row)=rec_r_index(rec_p_row)+1
    1196             :             vecr(rec_r_index(rec_p_row),rec_p_row)=mat%data_r(n1,slocal_col)
    1197             :          ENDDO
    1198             :          !Send data to all columns of the processor grid involved
    1199             :          DO rec_p_col=0,npcol-1
    1200             :             call MPI_ISEND(vecr(1,rec_p_row),rec_r_index(rec_p_row),MPI_DOUBLE,pmap(rec_p_col,rec_p_col),sGlobal_col,mat%blacsdata%mpi_com,request(rec_p_col),ierr)   
    1201             :          ENDDO
    1202             :          !now each processor might have data from more columns in 2D dist
    1203             :          DO n2_2d=n2,n2+num_proc
    1204             :             rglobal_col=n2_2d+shift1
    1205             :             !Which 2d column contains the data?
    1206             :             blockindex=(rglobal_col-1)/mat2d%blacsdata%blacs_desc(6)
    1207             :             rec_p_col=mod(blockindex,npcol)
    1208             :             if (mycol.ne.rec_p_col) cycle !this process does not contain data
    1209             :             !Where is the column comming from?
    1210             :             s_col=mod(n2_2d-1,num_proc)
    1211             :             !Which is the first element we store the data in?
    1212             :             blockindex=(shift1-1)/mat2d%blacsdata%blacs_desc(5)
    1213             :             if (myrow==0) n_row=shift1-blockindex*mat2d%blacsdata%blacs_desc(5) !first block might be incomplete
    1214             :             n_row=n_row+blockindex/nprow*mat2d%blacsdata%blacs_desc(5)
    1215             :             !Get the data
    1216             :             CALL MPI_RECV(mat2%data_r(n_row:,rec_col),mat2%matsize2-n_row,MPI_DOUBLE,s_col,mat%blacsdata%mpi_com,rglobal_col,ierr)
    1217             :          ENDDO
    1218             :       ENDDO      
    1219             : #endif      
    1220             :       !Now we loop over columns and BC them
    1221             :       DO n2=1,mat%global_size2   
    1222             :          if (mat%l_real) THEN
    1223             :             if (my_proc==mod(n2-1,num_proc)) vecr=mat%data_r(:,(n2-1)/num_proc+1) !This process owns the column
    1224             :             call MPI_BCAST(vecr,size(vecr),MPI_DOUBLE,mod(n2-1,num_proc),mat%blacsdata%mpi_com,ierr)   
    1225             :          else
    1226             :             if (my_proc==mod(n2-1,num_proc)) vecc=mat%data_c(:,(n2-1)/num_proc+1)
    1227             :             call MPI_BCAST(vecc,size(vecc),MPI_DOUBLE_COMPLEX,mod(n2-1,num_proc),mat%blacsdata%mpi_com,ierr)   
    1228             :          endif    
    1229             :          !Which 2d column contains the data?
    1230             :          blockindex=(n2+shift2-1)/mat2d%blacsdata%blacs_desc(6)
    1231             :          if (mycol.ne.mod(blockindex,npcol)) cycle !This process contains no data  
    1232             :          n_col=(n2+shift2)-blockindex*mat2d%blacsdata%blacs_desc(6)+ &
    1233             :                blockindex/npcol*mat2d%blacsdata%blacs_desc(6)
    1234             :          DO n1=1,mat%global_size1
    1235             :             blockindex=(n1+shift1-1)/mat2d%blacsdata%blacs_desc(5)
    1236             :             if (myrow.ne.mod(blockindex,nprow)) cycle !No data here
    1237             :             n_row=(n1+shift1)-blockindex*mat2d%blacsdata%blacs_desc(5)+ &
    1238             :                     blockindex/nprow*mat2d%blacsdata%blacs_desc(5)
    1239             :             if (mat%l_real) then 
    1240             :                mat2d%data_r(n_row,n_col)=vecr(n1)
    1241             :             else   
    1242             :                mat2d%data_c(n_row,n_col)=vecc(n1)  
    1243             :             end if 
    1244             :          enddo
    1245             :       ENDDO   
    1246             : #endif      
    1247             :    end subroutine
    1248             : #endif
    1249       30712 :    logical function is_column_cyclic(mat)
    1250             :       implicit none 
    1251             :       class(t_mpimat),intent(in)   ::mat
    1252             : 
    1253       31456 :       is_column_cyclic=(mat%blacsdata%blacs_desc(5)==mat%global_size1.and.mat%blacsdata%blacs_desc(6)==1)
    1254       12920 :    end function
    1255             : 
    1256             : #ifdef CPP_SCALAPACK
    1257       12176 :    subroutine cyclic_column_to_2Dblock_cyclic(mat,mat2d,offset1,offset2)
    1258             :       use iso_c_binding
    1259             :       implicit none 
    1260             :       class(t_mpimat),intent(in)   ::mat
    1261             :       class(t_mpimat),intent(inout)::mat2d
    1262             :       integer,intent(in),optional  ::offset1,offset2
    1263             : 
    1264             :       INTEGER:: irank,isize,np_col,np_row,my_col,my_row,col,row !info on processors and processor grid
    1265       12176 :       INTEGER,ALLOCATABLE:: map(:,:)
    1266             :       INTEGER,ALLOCATABLE:: row_map(:)
    1267       12176 :       INTEGER,ALLOCATABLE:: col_map(:)
    1268             :       INTEGER            :: win_handle
    1269             :       INTEGER:: ierr,blocksize,global_col,global_col2d,n_col2d,n,ir
    1270             : 
    1271       12176 :       blocksize=mat2d%blacsdata%blacs_desc(5) !blocksize is assumed to be the same for both dimensions
    1272       12176 :       call MPI_COMM_SIZE(mat%blacsdata%mpi_com,isize,ierr)
    1273       12176 :       call MPI_COMM_rank(mat%blacsdata%mpi_com,irank,ierr)
    1274       12176 :       call blacs_gridinfo(mat2d%blacsdata%blacs_desc(2),np_row,np_col,my_row,my_col)
    1275             : 
    1276             :       !map that maps processor grid to iranks
    1277       12176 :       call generate_map_to_irank(np_row,np_col,my_row,my_col,irank,mat2d%blacsdata%blacs_desc(2),mat%blacsdata%mpi_com,map)
    1278             :       !which row/column of processor grid contains the data
    1279     2091884 :       row_map=get_vector_map(mat%global_size1,offset1,blocksize,np_row)
    1280     2091884 :       col_map=get_vector_map(mat%global_size2,offset2,blocksize,np_col)
    1281       12176 :       call create_RMA_win(mat2d,offset1,np_row,my_row,blocksize,mat2d%blacsdata%mpi_com,win_handle) 
    1282       12176 :       global_col=irank+1
    1283     1052030 :       DO n=1,mat%matsize2 !loop over all local columns
    1284             :          !first fence before all the commm
    1285     1039854 :          call send_column(mat,n,global_col,offset2,blocksize,np_col,col_map(global_col),row_map,map(:,col_map(global_col)),win_handle,irank)
    1286     1052030 :          global_col=global_col+isize !the next local column corresponds to this global column
    1287             :       ENDDO
    1288       12176 :       call mpi_win_free(win_handle,ierr)
    1289       12176 :    end subroutine
    1290             : 
    1291       12176 :    subroutine generate_map_to_irank(nprow,npcol,myrow,mycol,irank,ictxt,mpi_com,map)
    1292             :       implicit none
    1293             :       INTEGER,INTENT(IN):: nprow,npcol,myrow,mycol,irank,ictxt,mpi_com
    1294             :       INTEGER,INTENT(OUT),ALLOCATABLE:: map(:,:)
    1295             : 
    1296             :       integer:: ierr
    1297             : 
    1298       48704 :       ALLOCATE(map(0:nprow-1,0:npcol-1))
    1299       60880 :       map=-1
    1300       12176 :       map(myrow,mycol)=irank
    1301       36528 :       CALL MPI_ALLREDUCE(MPI_IN_PLACE,map,size(map),MPI_INTEGER,MPI_MAX,mpi_com,ierr)
    1302       60880 :       if (any(map<0)) call judft_bug("Problem with distribution ")
    1303       12176 :    end SUBROUTINE
    1304             : 
    1305             : 
    1306       24352 :    function get_vector_map(nsize,offset,blocksize,np)result(map)
    1307             :       implicit none
    1308             :       integer,intent(in)    :: nsize,offset,blocksize,np
    1309             :       integer,allocatable   :: map(:)
    1310             : 
    1311             :       integer:: n,blockindex
    1312       73056 :       allocate(map(nsize))
    1313             : 
    1314     4183768 :       DO n=1,size(map)
    1315             :          !in which block are we
    1316     4159416 :          blockindex=(n+offset-2)/blocksize !offset is counted from 1
    1317             :          !on which processor
    1318     4183768 :          map(n)=mod(blockindex,np)
    1319             :       ENDDO
    1320       24352 :    END function   
    1321             : 
    1322     1039854 :    subroutine send_column(mat,n_col1D,n_col,offset2,blocksize,np_col,my_col,row_map,gridmap,win_handle,irank)
    1323             :       implicit none
    1324             :       type(t_mpimat),intent(in)::mat
    1325             :       integer,intent(in):: n_col1d,n_col   ! local column to send, global on sending side
    1326             :       INTEGER,intent(in):: offset2,blocksize,np_col,my_col,irank
    1327             :       integer,intent(in):: row_map(:)        ! mapping of the (local) index of the row to the processor row in the gridmap
    1328             :       integer,intent(in):: gridmap(0:)       ! mapping of the processor to the MPI irank
    1329             :       integer,intent(in):: win_handle
    1330             : 
    1331             :       integer:: myrow,send_size,ierr,n,nn
    1332             :       integer(MPI_ADDRESS_KIND)::disp
    1333             :       integer:: req(size(gridmap))
    1334             : 
    1335     1039854 :       real,allocatable:: buffer_r(:)
    1336     1039854 :       complex,allocatable:: buffer_c(:)
    1337     1039854 :       if (mat%l_real) THEN
    1338     1049082 :          allocate(buffer_r(mat%matsize1))
    1339             :       else
    1340     2070480 :          allocate(buffer_c(mat%matsize1))
    1341             :       endif   
    1342             : 
    1343             :       !First find the local column on the recieving processors
    1344     1039854 :       call infog1l(n_col+offset2-1,blocksize,np_col,my_col,0,n,nn)
    1345     1039854 :       disp=n-1
    1346             : 
    1347     2079708 :       DO myrow=0,size(gridmap)-1
    1348   272752780 :          send_size=count(row_map==myrow)
    1349     2079708 :          if (mat%l_real) THEN
    1350    57828570 :             buffer_r(1:send_size)=pack(mat%data_r(:,n_col1d),row_map==myrow)
    1351      349694 :             CALL MPI_WIN_LOCK(MPI_LOCK_SHARED, gridmap(myrow), 0, win_handle, ierr)
    1352      349694 :             call mpi_put(buffer_r,send_size,MPI_DOUBLE_PRECISION,gridmap(myrow),disp,send_size,MPI_DOUBLE_PRECISION,win_handle,ierr)
    1353      349694 :             call MPI_WIN_UNLOCK(gridmap(myrow), win_handle, ierr)
    1354             :          else
    1355   215964064 :             buffer_c(1:send_size)=pack(mat%data_c(:,n_col1d),row_map==myrow)
    1356      690160 :             CALL MPI_WIN_LOCK(MPI_LOCK_SHARED, gridmap(myrow), 0, win_handle, ierr)
    1357      690160 :             call mpi_put(buffer_c,send_size,MPI_DOUBLE_COMPLEX,gridmap(myrow),disp,send_size,MPI_DOUBLE_complex,win_handle,ierr)
    1358      690160 :             call MPI_WIN_UNLOCK(gridmap(myrow), win_handle, ierr)
    1359             :          endif   
    1360             :       ENDDO
    1361             :       
    1362     1039854 :    END subroutine
    1363             :    
    1364       12176 :    subroutine create_RMA_win(mat,offset1,np_row,my_row,blocksize,mpi_comm,win_handle)
    1365             :       use iso_c_binding
    1366             :       implicit none
    1367             :       type(t_mpimat),intent(in),target::mat !This is the sending matrix
    1368             :       INTEGER,INTENT(IN)  :: offset1 ! The offsets of the target matrix
    1369             :       INTEGER,INTENT(IN)  :: my_row,np_row !Info on the blacs processor grid of the target matrix
    1370             :       integer,INTENT(IN)  :: blocksize,mpi_comm 
    1371             :       INTEGER,INTENT(OUT)     :: win_handle
    1372             : 
    1373             :       !locals
    1374             :       INTEGER(KIND=MPI_ADDRESS_KIND) :: buffersize
    1375             :       INTEGER                        :: data_in_byte,row_size,ierr,start,nn
    1376             :       
    1377             : 
    1378             :       !determine local storage
    1379       12176 :       data_in_byte=merge(8,16,mat%l_real) 
    1380       12176 :       call infog1l(offset1,blocksize,np_row,my_row,0,start,nn)
    1381       12176 :       buffersize=mat%matsize1
    1382       12176 :       buffersize=(buffersize*mat%matsize2-start+1)*data_in_byte 
    1383       12176 :       row_size=mat%matsize1*data_in_byte
    1384             : 
    1385       12176 :       if (mat%l_real) THEN
    1386        5408 :          call mpi_win_create(mat%data_r(start,1),buffersize,row_size,MPI_INFO_NULL,mpi_comm,win_handle,ierr)
    1387             :       else
    1388        6768 :          call mpi_win_create(mat%data_c(start,1),buffersize,row_size,MPI_INFO_NULL,mpi_comm,win_handle,ierr)
    1389             :       endif   
    1390             : 
    1391             :       
    1392       12176 :    END subroutine
    1393             : 
    1394             :    
    1395             : 
    1396             :   
    1397             : #endif
    1398             : 
    1399             : 
    1400             : 
    1401             : 
    1402      109536 : END MODULE m_types_mpimat

Generated by: LCOV version 1.14