Line data Source code
1 : !--------------------------------------------------------------------------------
2 : ! Copyright (c) 2023 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
3 : ! This file is part of FLEUR and available as free software under the conditions
4 : ! of the MIT license as expressed in the LICENSE file in more detail.
5 : !--------------------------------------------------------------------------------
6 :
7 : module m_test_performance
8 : !$ use omp_lib
9 : implicit none
10 : private
11 : public test_performance
12 : contains
13 0 : subroutine check_multithreading(dim,time)
14 : INTEGER,INTENT(IN):: dim
15 : integer :: n,n_threads
16 : real,intent(out):: time(:)
17 0 : real:: vector(dim*dim),sum
18 :
19 : #ifdef _OPENMP
20 0 : call random_number(vector)
21 : !Do a small OMP loop to make sure OMP-lib is initialized
22 0 : sum=0
23 0 : !$OMP PARALLEL DO private(n)shared(vector)default(none)reduction(+:sum)
24 : DO n=1,size(vector)
25 : sum=sum+vector(n)
26 : ENDDO
27 : !$OMP END PARALLEL DO
28 :
29 0 : DO n_threads=1,size(time)
30 0 : time(n_threads)=omp_get_wtime()
31 0 : sum=0.0
32 0 : !$OMP PARALLEL DO num_threads(n_threads) private(n)shared(vector)default(none)reduction(+:sum)
33 : DO n=1,size(vector)
34 : sum=sum+sin(vector(n))
35 : ENDDO
36 : !$OMP end parallel do
37 0 : time(n_threads)=omp_get_wtime()-time(n_threads)
38 : ENDDO
39 : #endif
40 0 : end SUBROUTINE
41 :
42 0 : subroutine check_blas_multithreading(N,time)
43 : integer,intent(in):: N
44 0 : real :: a(N,N),c(N,N)
45 : real,intent(out):: time(:)
46 : integer :: n_threads
47 :
48 : #ifdef _OPENMP
49 0 : call random_number(a)
50 : !Call a dgemm to initialize the BLAS
51 0 : call dgemm("N","N",N,N,N,1.0,a,N,a,N,0.0,c,N)
52 :
53 0 : DO n_threads=1,size(time)
54 0 : call omp_set_num_threads(n_threads)
55 0 : time(n_threads)=omp_get_wtime()
56 0 : call dgemm("N","N",N,N,N,1.0,a,N,a,N,0.0,c,N)
57 0 : time(n_threads)=omp_get_wtime()-time(n_threads)
58 : ENDDO
59 : #endif
60 0 : end SUBROUTINE
61 :
62 0 : subroutine check_diag_multithreading(N,time)
63 : use m_eigen_diag
64 : use m_types_mat
65 : use m_types_mpimat
66 : #ifdef CPP_MPI
67 : use mpi
68 : #endif
69 : REAL,INTENT(OUT) :: time(:)
70 : INTEGER,INTENT(IN) :: N
71 :
72 : INTEGER :: ne,n_threads,solver=0,i,j
73 0 : CLASS(t_mat), ALLOCATABLE :: ev ! eigenvectors
74 0 : REAL :: eig(N)
75 0 : class(t_mat),allocatable :: hmat,smat,h,s
76 : #ifdef _OPENMP
77 : #ifdef CPP_MPI
78 : integer :: ierr,isize
79 :
80 0 : call MPI_COMM_SIZE(MPI_COMM_WORLD,isize,ierr)
81 :
82 0 : if (isize>1) THEN
83 0 : allocate(t_mpimat:: hmat,smat,h,s)
84 0 : CALL hmat%init(.true., N,N, MPI_COMM_WORLD, .true.)
85 : ELSE
86 0 : allocate(t_mat:: hmat,smat,h,s)
87 0 : CALL hmat%init(.true., N,N)
88 : ENDIF
89 : #else
90 : allocate(t_mat:: hmat,smat,h,s)
91 : CALL hmat%init(.true., N,N)
92 : #endif
93 0 : CALL smat%init(hmat)
94 0 : CALL h%init(hmat)
95 0 : CALL s%init(hmat)
96 :
97 0 : call random_number(hmat%data_r(:,:))
98 : !make smat positive (semi)definite
99 0 : call hmat%multiply(hmat,smat,"T","N")
100 :
101 :
102 0 : ne=0.1*N
103 :
104 :
105 : ! 1 thread
106 0 : DO n_threads=1,size(time)
107 0 : h%data_r=hmat%data_r
108 0 : s%data_r=smat%data_r
109 0 : call omp_set_num_threads(n_threads)
110 0 : time(n_threads)=omp_get_wtime()
111 0 : call eigen_diag(solver,h,s,ne,eig,ev)
112 0 : time(n_threads)=omp_get_wtime()-time(n_threads)
113 : ENDDO
114 :
115 : #endif
116 0 : end subroutine
117 :
118 0 : subroutine test_performance()
119 : use m_types_lapw
120 : INTEGER :: dim
121 0 : real,allocatable::time(:,:)
122 : INTEGER :: n
123 :
124 0 : dim=lapw_dim_nvd
125 : #ifdef _OPENMP
126 0 : if (omp_get_max_threads()==1) THEN
127 0 : write(*,*) "Only a single OMP thread available, not OMP scaling tested"
128 : RETURN
129 : ENDIF
130 0 : write(*,*) "Testing OMP scaling"
131 0 : allocate(time(omp_get_max_threads(),3))
132 0 : write(*,*) "Testing BLAS scaling"
133 0 : call check_multithreading(dim,time(:,1))
134 0 : write(*,*) "Testing Eigenvalue solver scaling"
135 0 : call check_blas_multithreading(dim,time(:,2))
136 0 : call check_diag_multithreading(dim,time(:,3))
137 0 : write(*,*) "THREADS| OpenMP | BLAS | Eigenvalue"
138 0 : write(*,*) " | time scaling | time scaling | time scaling "
139 0 : time=time*1000
140 0 : DO n=1,size(time,1)
141 0 : write(*,"(i3,5x,3('|',f8.2,3x,f8.2,2x))") n,time(n,1),time(1,1)/time(n,1),time(n,2),time(1,2)/time(n,2),time(n,3),time(1,3)/time(n,3)
142 : ENDDO
143 0 : if (any(time(2,:)/time(1,:)<1.5)) then
144 0 : write(*,*) "WARNING"
145 0 : write(*,*) "No proper OMP scaling detected"
146 0 : write(*,*) "You should check if you:"
147 0 : write(*,*) " - use a multithreaded solver/BLAS"
148 0 : write(*,*) " - use a proper distribution of threads to your hardware"
149 : else
150 0 : write(*,*) "OpenMP scaling looks OK"
151 : endif
152 : #else
153 : write(*,*) "NO OMP-Version"
154 : #endif
155 0 : end subroutine
156 : end module
|