LCOV - code coverage report
Current view: top level - init - test_performance.F90 (source / functions) Hit Total Coverage
Test: FLEUR test coverage Lines: 0 70 0.0 %
Date: 2024-04-27 04:44:07 Functions: 0 4 0.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------
       2             : ! Copyright (c) 2023 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
       3             : ! This file is part of FLEUR and available as free software under the conditions
       4             : ! of the MIT license as expressed in the LICENSE file in more detail.
       5             : !--------------------------------------------------------------------------------
       6             : 
       7             : module m_test_performance
       8             :     !$ use omp_lib
       9             :     implicit none
      10             :     private
      11             :     public test_performance
      12             : contains
      13           0 :     subroutine check_multithreading(dim,time)
      14             :         INTEGER,INTENT(IN):: dim
      15             :         integer :: n,n_threads 
      16             :         real,intent(out):: time(:)
      17           0 :         real:: vector(dim*dim),sum
      18             : 
      19             : #ifdef _OPENMP
      20           0 :         call random_number(vector)
      21             :         !Do a small OMP loop to make sure OMP-lib is initialized
      22           0 :         sum=0
      23           0 :         !$OMP PARALLEL DO private(n)shared(vector)default(none)reduction(+:sum)
      24             :         DO n=1,size(vector)
      25             :             sum=sum+vector(n)
      26             :         ENDDO
      27             :         !$OMP END PARALLEL DO
      28             : 
      29           0 :         DO n_threads=1,size(time)
      30           0 :             time(n_threads)=omp_get_wtime()
      31           0 :             sum=0.0
      32           0 :             !$OMP PARALLEL DO num_threads(n_threads) private(n)shared(vector)default(none)reduction(+:sum)
      33             :             DO n=1,size(vector)
      34             :                 sum=sum+sin(vector(n))
      35             :             ENDDO
      36             :             !$OMP end parallel do
      37           0 :             time(n_threads)=omp_get_wtime()-time(n_threads)
      38             :         ENDDO        
      39             : #endif
      40           0 :     end SUBROUTINE      
      41             :     
      42           0 :     subroutine check_blas_multithreading(N,time)
      43             :         integer,intent(in):: N
      44           0 :         real :: a(N,N),c(N,N)
      45             :         real,intent(out):: time(:)
      46             :         integer :: n_threads
      47             : 
      48             : #ifdef _OPENMP        
      49           0 :         call random_number(a)
      50             :         !Call a dgemm to initialize the BLAS
      51           0 :         call dgemm("N","N",N,N,N,1.0,a,N,a,N,0.0,c,N)
      52             : 
      53           0 :         DO n_threads=1,size(time)
      54           0 :             call omp_set_num_threads(n_threads)
      55           0 :             time(n_threads)=omp_get_wtime()
      56           0 :             call dgemm("N","N",N,N,N,1.0,a,N,a,N,0.0,c,N)
      57           0 :             time(n_threads)=omp_get_wtime()-time(n_threads)
      58             :         ENDDO        
      59             : #endif
      60           0 :     end SUBROUTINE      
      61             : 
      62           0 :     subroutine check_diag_multithreading(N,time)
      63             :         use m_eigen_diag
      64             :         use m_types_mat
      65             :         use m_types_mpimat
      66             : #ifdef CPP_MPI
      67             :         use mpi 
      68             : #endif        
      69             :         REAL,INTENT(OUT) :: time(:)
      70             :         INTEGER,INTENT(IN)  :: N
      71             :       
      72             :         INTEGER          :: ne,n_threads,solver=0,i,j
      73           0 :         CLASS(t_mat), ALLOCATABLE :: ev         ! eigenvectors
      74           0 :         REAL :: eig(N) 
      75           0 :         class(t_mat),allocatable :: hmat,smat,h,s
      76             : #ifdef _OPENMP        
      77             : #ifdef CPP_MPI     
      78             :         integer :: ierr,isize
      79             : 
      80           0 :         call MPI_COMM_SIZE(MPI_COMM_WORLD,isize,ierr)
      81             : 
      82           0 :         if (isize>1) THEN
      83           0 :             allocate(t_mpimat:: hmat,smat,h,s)
      84           0 :             CALL hmat%init(.true., N,N, MPI_COMM_WORLD, .true.)
      85             :         ELSE 
      86           0 :             allocate(t_mat:: hmat,smat,h,s)
      87           0 :             CALL hmat%init(.true., N,N)
      88             :         ENDIF           
      89             : #else
      90             :         allocate(t_mat:: hmat,smat,h,s)
      91             :         CALL hmat%init(.true., N,N)
      92             : #endif        
      93           0 :         CALL smat%init(hmat)
      94           0 :         CALL h%init(hmat)
      95           0 :         CALL s%init(hmat)
      96             : 
      97           0 :         call random_number(hmat%data_r(:,:))
      98             :         !make smat positive (semi)definite
      99           0 :         call hmat%multiply(hmat,smat,"T","N")
     100             :        
     101             : 
     102           0 :         ne=0.1*N
     103             :      
     104             : 
     105             :         ! 1 thread
     106           0 :         DO n_threads=1,size(time)
     107           0 :             h%data_r=hmat%data_r
     108           0 :             s%data_r=smat%data_r
     109           0 :             call omp_set_num_threads(n_threads)
     110           0 :             time(n_threads)=omp_get_wtime()
     111           0 :             call eigen_diag(solver,h,s,ne,eig,ev)
     112           0 :             time(n_threads)=omp_get_wtime()-time(n_threads)
     113             :         ENDDO    
     114             :         
     115             : #endif
     116           0 :     end subroutine
     117             : 
     118           0 :     subroutine test_performance()
     119             :         use m_types_lapw
     120             :         INTEGER :: dim
     121           0 :         real,allocatable::time(:,:)
     122             :         INTEGER :: n
     123             : 
     124           0 :         dim=lapw_dim_nvd
     125             : #ifdef _OPENMP
     126           0 :         if (omp_get_max_threads()==1) THEN
     127           0 :             write(*,*) "Only a single OMP thread available, not OMP scaling tested"
     128             :             RETURN
     129             :         ENDIF    
     130           0 :         write(*,*) "Testing OMP scaling"
     131           0 :         allocate(time(omp_get_max_threads(),3))
     132           0 :         write(*,*) "Testing BLAS scaling"
     133           0 :         call check_multithreading(dim,time(:,1))
     134           0 :         write(*,*) "Testing Eigenvalue solver scaling"
     135           0 :         call check_blas_multithreading(dim,time(:,2))
     136           0 :         call check_diag_multithreading(dim,time(:,3))
     137           0 :         write(*,*) "THREADS|      OpenMP         |        BLAS         |      Eigenvalue"
     138           0 :         write(*,*) "       |   time      scaling |   time      scaling |   time      scaling "
     139           0 :         time=time*1000
     140           0 :         DO n=1,size(time,1)
     141           0 :             write(*,"(i3,5x,3('|',f8.2,3x,f8.2,2x))") n,time(n,1),time(1,1)/time(n,1),time(n,2),time(1,2)/time(n,2),time(n,3),time(1,3)/time(n,3)
     142             :         ENDDO
     143           0 :         if (any(time(2,:)/time(1,:)<1.5)) then
     144           0 :             write(*,*) "WARNING"
     145           0 :             write(*,*) "No proper OMP scaling detected"
     146           0 :             write(*,*) "You should check if you:"
     147           0 :             write(*,*) "    - use a multithreaded solver/BLAS"
     148           0 :             write(*,*) "    - use a proper distribution of threads to your hardware"
     149             :         else
     150           0 :             write(*,*) "OpenMP scaling looks OK"
     151             :         endif  
     152             : #else
     153             :         write(*,*) "NO OMP-Version"
     154             : #endif        
     155           0 :     end subroutine    
     156             : end module

Generated by: LCOV version 1.14