Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : !
19 : ! More information can be found here:
20 : ! http://elpa.mpcdf.mpg.de/
21 : !
22 : ! ELPA is free software: you can redistribute it and/or modify
23 : ! it under the terms of the version 3 of the license of the
24 : ! GNU Lesser General Public License as published by the Free
25 : ! Software Foundation.
26 : !
27 : ! ELPA is distributed in the hope that it will be useful,
28 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : ! GNU Lesser General Public License for more details.
31 : !
32 : ! You should have received a copy of the GNU Lesser General Public License
33 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
34 : !
35 : ! ELPA reflects a substantial effort on the part of the original
36 : ! ELPA consortium, and we ask you to respect the spirit of the
37 : ! license that we chose: i.e., please contribute any changes you
38 : ! may have back to the original ELPA library distribution, and keep
39 : ! any derivatives of ELPA under the same license that we chose for
40 : ! the original distribution, the GNU Lesser General Public License.
41 : !
42 : !
43 : #include "config-f90.h"
44 : !>
45 : !> Fortran test programm to demonstrates the use of
46 : !> ELPA 2 complex case library.
47 : !> If "HAVE_REDIRECT" was defined at build time
48 : !> the stdout and stderr output of each MPI task
49 : !> can be redirected to files if the environment
50 : !> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
51 : !> to "true".
52 : !>
53 : !> By calling executable [arg1] [arg2] [arg3] [arg4]
54 : !> one can define the size (arg1), the number of
55 : !> Eigenvectors to compute (arg2), and the blocking (arg3).
56 : !> If these values are not set default values (500, 150, 16)
57 : !> are choosen.
58 : !> If these values are set the 4th argument can be
59 : !> "output", which specifies that the EV's are written to
60 : !> an ascii file.
61 : !>
62 : !> The complex ELPA 2 kernel is set in this program via
63 : !> the API call. However, this can be overriden by setting
64 : !> the environment variable "COMPLEX_ELPA_KERNEL" to an
65 : !> appropiate value.
66 : !>
67 144 : program test_complex2_choose_kernel_with_api_single_precision
68 :
69 : !-------------------------------------------------------------------------------
70 : ! Standard eigenvalue problem - COMPLEX version
71 : !
72 : ! This program demonstrates the use of the ELPA module
73 : ! together with standard scalapack routines
74 : !
75 : ! Copyright of the original code rests with the authors inside the ELPA
76 : ! consortium. The copyright of any additional modifications shall rest
77 : ! with their original authors, but shall adhere to the licensing terms
78 : ! distributed along with the original code in the file "COPYING".
79 : !-------------------------------------------------------------------------------
80 :
81 144 : use elpa1
82 : use elpa2
83 :
84 : use elpa_utilities, only : error_unit
85 : use elpa2_utilities
86 : use test_read_input_parameters
87 : use test_check_correctness
88 : use test_setup_mpi
89 : use test_blacs_infrastructure
90 : use test_prepare_matrix
91 : use test_util
92 :
93 : #ifdef HAVE_REDIRECT
94 : use test_redirect
95 : #endif
96 :
97 : use test_output_type
98 :
99 : implicit none
100 :
101 : !-------------------------------------------------------------------------------
102 : ! Please set system size parameters below!
103 : ! na: System size
104 : ! nev: Number of eigenvectors to be calculated
105 : ! nblk: Blocking factor in block cyclic distribution
106 : !-------------------------------------------------------------------------------
107 :
108 : integer(kind=ik) :: nblk
109 : integer(kind=ik) :: na, nev
110 : integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
111 :
112 : integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
113 : integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
114 :
115 : integer(kind=ik), external :: numroc
116 :
117 144 : real(kind=rk4), allocatable :: ev(:)
118 :
119 288 : complex(kind=ck4), allocatable :: a(:,:), z(:,:), as(:,:)
120 :
121 : complex(kind=ck4), parameter :: CZERO = (0._rk4,0._rk4), CONE = (1._rk4,0._rk4)
122 :
123 : integer(kind=ik) :: STATUS
124 : #ifdef WITH_OPENMP
125 : integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level
126 : #endif
127 : logical :: successELPA, success
128 : logical :: gpuAvailable
129 : type(output_t) :: write_to_file
130 : character(len=8) :: task_suffix
131 : integer(kind=ik) :: j
132 :
133 : #undef DOUBLE_PRECISION_COMPLEX
134 :
135 144 : successELPA = .true.
136 144 : gpuAvailable = .false.
137 :
138 144 : call read_input_parameters(na, nev, nblk, write_to_file)
139 :
140 : !-------------------------------------------------------------------------------
141 : ! MPI Initialization
142 144 : call setup_mpi(myid, nprocs)
143 :
144 144 : STATUS = 0
145 :
146 : #define COMPLEXCASE
147 : #include "../../elpa_print_headers.F90"
148 :
149 : !-------------------------------------------------------------------------------
150 : ! Selection of number of processor rows/columns
151 : ! We try to set up the grid square-like, i.e. start the search for possible
152 : ! divisors of nprocs with a number next to the square root of nprocs
153 : ! and decrement it until a divisor is found.
154 :
155 144 : do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
156 0 : if(mod(nprocs,np_cols) == 0 ) exit
157 : enddo
158 : ! at the end of the above loop, nprocs is always divisible by np_cols
159 :
160 144 : np_rows = nprocs/np_cols
161 :
162 144 : if(myid==0) then
163 96 : print *
164 96 : print '(a)','Standard eigenvalue problem - COMPLEX version'
165 : #ifdef WITH_GPU_VERSION
166 : print *," with GPU version"
167 : #endif
168 96 : print *
169 96 : print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk
170 96 : print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
171 96 : print *
172 96 : print *, "This is an example how to determine the ELPA2 kernel with"
173 96 : print *, "an api call. Note, however, that setting the kernel via"
174 96 : print *, "an environment variable will always take precedence over"
175 96 : print *, "everything else! "
176 96 : print *
177 : #ifndef HAVE_ENVIRONMENT_CHECKING
178 : print *, " Notice that it is not possible with this build to set the "
179 : print *, " kernel via an environment variable! To change this re-install"
180 : print *, " the library and have a look at the log files"
181 : #endif
182 : #ifndef WITH_FIXED_COMPLEX_KERNEL
183 96 : print *, " The settings are: COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE"
184 : #else /* WITH_FIXED_COMPLEX_KERNEL */
185 :
186 : #ifdef WITH_COMPLEX_GENERIC_KERNEL
187 : print *, " The settings are: COMPLEX_ELPA_KERNEL_GENERIC"
188 : #endif
189 :
190 : #ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
191 : print *, " The settings are: COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE"
192 : #endif
193 :
194 : #ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
195 : print *, " The settings are: COMPLEX_ELPA_KERNEL_SSE"
196 : #endif
197 : #ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
198 : print *, " The settings are: COMPLEX_ELPA_KERNEL_SSE_BLOCK1"
199 : #endif
200 :
201 : #ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
202 : print *, " The settings are: COMPLEX_ELPA_KERNEL_SSE_BLOCK2"
203 : #endif
204 :
205 : #ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
206 : print *, " The settings are: COMPLEX_ELPA_KERNEL_AVX_BLOCK1"
207 : #endif
208 :
209 : #ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
210 : print *, " The settings are: COMPLEX_ELPA_KERNEL_AVX_BLOCK2"
211 : #endif
212 :
213 : #ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
214 : print *, " The settings are: COMPLEX_ELPA_KERNEL_AVX2_BLOCK1"
215 : #endif
216 :
217 : #ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
218 : print *, " The settings are: COMPLEX_ELPA_KERNEL_AVX2_BLOCK2"
219 : #endif
220 :
221 : #ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
222 : print *, " The settings are: COMPLEX_ELPA_KERNEL_AVX512_BLOCK1"
223 : #endif
224 :
225 : #ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
226 : print *, " The settings are: COMPLEX_ELPA_KERNEL_AVX512_BLOCK2"
227 : #endif
228 :
229 : #ifdef WITH_GPU_VERSION
230 : print *, " The settings are: COMPLEX_ELPA_KERNEL_GPU"
231 : #endif
232 :
233 : #endif /* WITH_FIXED_COMPLEX_KERNEL */
234 :
235 :
236 :
237 96 : print *
238 : endif
239 :
240 : !-------------------------------------------------------------------------------
241 : ! Set up BLACS context and MPI communicators
242 : !
243 : ! The BLACS context is only necessary for using Scalapack.
244 : !
245 : ! For ELPA, the MPI communicators along rows/cols are sufficient,
246 : ! and the grid setup may be done in an arbitrary way as long as it is
247 : ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
248 : ! process has a unique (my_prow,my_pcol) pair).
249 :
250 : call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, 'C', &
251 144 : my_blacs_ctxt, my_prow, my_pcol)
252 :
253 144 : if (myid==0) then
254 96 : print '(a)','| Past BLACS_Gridinfo.'
255 : end if
256 :
257 : ! All ELPA routines need MPI communicators for communicating within
258 : ! rows or columns of processes, these are set in elpa_get_communicators
259 :
260 : mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
261 144 : mpi_comm_rows, mpi_comm_cols)
262 :
263 144 : if (myid==0) then
264 96 : print '(a)','| Past split communicator setup for rows and columns.'
265 : end if
266 :
267 : ! Determine the necessary size of the distributed matrices,
268 : ! we use the Scalapack tools routine NUMROC for that.
269 :
270 : call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
271 144 : na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
272 :
273 :
274 144 : if (myid==0) then
275 96 : print '(a)','| Past scalapack descriptor setup.'
276 : end if
277 : !-------------------------------------------------------------------------------
278 : ! Allocate matrices and set up a test matrix for the eigenvalue problem
279 144 : allocate(a (na_rows,na_cols))
280 144 : allocate(z (na_rows,na_cols))
281 144 : allocate(as(na_rows,na_cols))
282 :
283 144 : allocate(ev(na))
284 :
285 144 : call prepare_matrix_random(na, myid, sc_desc, a, z, as)
286 :
287 : ! set print flag in elpa1
288 144 : elpa_print_times = .true.
289 :
290 : !-------------------------------------------------------------------------------
291 : ! Calculate eigenvalues/eigenvectors
292 :
293 144 : if (myid==0) then
294 96 : print '(a)','| Entering two-stage ELPA solver ... '
295 96 : print *
296 : end if
297 :
298 :
299 : ! ELPA is called a kernel specification in the API
300 : #ifdef WITH_MPI
301 96 : call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
302 : #endif
303 : successELPA = elpa_solve_evp_complex_2stage_single(na, nev, a, na_rows, ev, z, na_rows, nblk, &
304 : na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
305 : #ifndef WITH_FIXED_COMPLEX_KERNEL
306 144 : COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE)
307 : #else /* WITH_FIXED_COMPLEX_KERNEL */
308 :
309 : #ifdef WITH_COMPLEX_GENERIC_KERNEL
310 : COMPLEX_ELPA_KERNEL_GENERIC)
311 : #endif
312 :
313 : #ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
314 : COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE)
315 : #endif
316 :
317 : #ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
318 : COMPLEX_ELPA_KERNEL_SSE)
319 : #endif
320 :
321 : #ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
322 : COMPLEX_ELPA_KERNEL_SSE_BLOCK2)
323 : #else
324 : #ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
325 : COMPLEX_ELPA_KERNEL_SSE_BLOCK1)
326 : #endif
327 : #endif
328 :
329 : #ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
330 : COMPLEX_ELPA_KERNEL_AVX_BLOCK2)
331 : #else
332 : #ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
333 : COMPLEX_ELPA_KERNEL_AVX_BLOCK1)
334 : #endif
335 : #endif
336 :
337 : #ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
338 : COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
339 : #else
340 : #ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
341 : COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
342 : #endif
343 : #endif
344 :
345 : #ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
346 : COMPLEX_ELPA_KERNEL_AVX512_BLOCK2)
347 : #else
348 : #ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
349 : COMPLEX_ELPA_KERNEL_AVX512_BLOCK1)
350 : #endif
351 : #endif
352 :
353 : #ifdef WITH_GPU_VERSION
354 : COMPLEX_ELPA_KERNEL_GPU)
355 : #endif
356 :
357 : #endif /* WITH_FIXED_COMPLEX_KERNEL */
358 :
359 :
360 144 : if (.not.(successELPA)) then
361 0 : write(error_unit,*) "solve_evp_complex_2stage produced an error! Aborting..."
362 : #ifdef WITH_MPI
363 0 : call MPI_ABORT(mpi_comm_world, 1, mpierr)
364 : #else
365 0 : call exit(1)
366 : #endif
367 : endif
368 :
369 144 : if(myid == 0) print *,'Time transform to tridi :',time_evp_fwd
370 144 : if(myid == 0) print *,'Time solve tridi :',time_evp_solve
371 144 : if(myid == 0) print *,'Time transform back EVs :',time_evp_back
372 144 : if(myid == 0) print *,'Total time (sum above) :',time_evp_back+time_evp_solve+time_evp_fwd
373 :
374 144 : if(write_to_file%eigenvectors) then
375 0 : write(unit = task_suffix, fmt = '(i8.8)') myid
376 0 : open(17,file="EVs_complex2_out_task_"//task_suffix(1:8)//".txt",form='formatted',status='new')
377 0 : write(17,*) "Part of eigenvectors: na_rows=",na_rows,"of na=",na," na_cols=",na_cols," of na=",na
378 :
379 0 : do i=1,na_rows
380 0 : do j=1,na_cols
381 0 : write(17,*) "row=",i," col=",j," element of eigenvector=",z(i,j)
382 : enddo
383 : enddo
384 0 : close(17)
385 : endif
386 :
387 144 : if(write_to_file%eigenvalues) then
388 0 : if (myid == 0) then
389 0 : open(17,file="Eigenvalues_complex2_out.txt",form='formatted',status='new')
390 0 : do i=1,na
391 0 : write(17,*) i,ev(i)
392 : enddo
393 0 : close(17)
394 : endif
395 : endif
396 :
397 :
398 : !-------------------------------------------------------------------------------
399 : ! Test correctness of result (using plain scalapack routines)
400 144 : status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol)
401 :
402 144 : deallocate(a)
403 144 : deallocate(as)
404 :
405 144 : deallocate(z)
406 144 : deallocate(ev)
407 :
408 : #ifdef WITH_MPI
409 96 : call blacs_gridexit(my_blacs_ctxt)
410 96 : call mpi_finalize(mpierr)
411 : #endif
412 144 : call EXIT(STATUS)
413 : end
414 :
415 : !-------------------------------------------------------------------------------
|