Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : !
19 : ! More information can be found here:
20 : ! http://elpa.mpcdf.mpg.de/
21 : !
22 : ! ELPA is free software: you can redistribute it and/or modify
23 : ! it under the terms of the version 3 of the license of the
24 : ! GNU Lesser General Public License as published by the Free
25 : ! Software Foundation.
26 : !
27 : ! ELPA is distributed in the hope that it will be useful,
28 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : ! GNU Lesser General Public License for more details.
31 : !
32 : ! You should have received a copy of the GNU Lesser General Public License
33 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
34 : !
35 : ! ELPA reflects a substantial effort on the part of the original
36 : ! ELPA consortium, and we ask you to respect the spirit of the
37 : ! license that we chose: i.e., please contribute any changes you
38 : ! may have back to the original ELPA library distribution, and keep
39 : ! any derivatives of ELPA under the same license that we chose for
40 : ! the original distribution, the GNU Lesser General Public License.
41 : !
42 : !
43 : #include "config-f90.h"
44 : !>
45 : !> Fortran test programm to demonstrates the use of
46 : !> ELPA 2 real case library.
47 : !> If "HAVE_REDIRECT" was defined at build time
48 : !> the stdout and stderr output of each MPI task
49 : !> can be redirected to files if the environment
50 : !> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
51 : !> to "true".
52 : !>
53 : !> By calling executable [arg1] [arg2] [arg3] [arg4]
54 : !> one can define the size (arg1), the number of
55 : !> Eigenvectors to compute (arg2), and the blocking (arg3).
56 : !> If these values are not set default values (500, 150, 16)
57 : !> are choosen.
58 : !> If these values are set the 4th argument can be
59 : !> "output", which specifies that the EV's are written to
60 : !> an ascii file.
61 : !>
62 : !> The real ELPA 2 kernel is set as the default kernel.
63 : !> In this test case the qr_decomposition is used.
64 : !> However, this can be overriden by setting
65 : !> the environment variable "REAL_ELPA_KERNEL" to an
66 : !> appropiate value.
67 : !>
68 144 : program test_real2_default_kernel_qr_decomposition_single_precision
69 :
70 : !-------------------------------------------------------------------------------
71 : ! Standard eigenvalue problem - REAL version
72 : !
73 : ! This program demonstrates the use of the ELPA module
74 : ! together with standard scalapack routines
75 : !
76 : ! Copyright of the original code rests with the authors inside the ELPA
77 : ! consortium. The copyright of any additional modifications shall rest
78 : ! with their original authors, but shall adhere to the licensing terms
79 : ! distributed along with the original code in the file "COPYING".
80 : !
81 : !-------------------------------------------------------------------------------
82 144 : use elpa1
83 : use elpa2
84 :
85 : use elpa_utilities, only : error_unit
86 : use elpa2_utilities
87 : use test_read_input_parameters
88 : use test_check_correctness
89 : use test_setup_mpi
90 : use test_blacs_infrastructure
91 : use test_prepare_matrix
92 : use test_util
93 :
94 : #ifdef HAVE_REDIRECT
95 : use test_redirect
96 : #endif
97 :
98 : use test_output_type
99 : implicit none
100 :
101 : !-------------------------------------------------------------------------------
102 : ! Please set system size parameters below!
103 : ! na: System size
104 : ! nev: Number of eigenvectors to be calculated
105 : ! nblk: Blocking factor in block cyclic distribution
106 : !-------------------------------------------------------------------------------
107 :
108 : integer(kind=ik) :: nblk
109 : integer(kind=ik) :: na, nev
110 :
111 : integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
112 :
113 : integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
114 : integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
115 :
116 : integer, external :: numroc
117 :
118 144 : real(kind=rk4), allocatable :: a(:,:), z(:,:), as(:,:), ev(:)
119 :
120 : integer(kind=ik) :: ret
121 : #ifdef WITH_OPENMP
122 : integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level
123 : #endif
124 : logical :: successELPA, success
125 : logical :: gpuAvailable
126 : type(output_t) :: write_to_file
127 : character(len=8) :: task_suffix
128 : integer(kind=ik) :: j
129 :
130 : #undef DOUBLE_PRECISION_REAL
131 :
132 144 : successELPA = .true.
133 144 : gpuAvailable = .false.
134 :
135 :
136 : !write_to_file = .false.
137 144 : call read_input_parameters(na, nev, nblk, write_to_file)
138 :
139 : !if (COMMAND_ARGUMENT_COUNT() /= 0) then
140 : ! write(error_unit,*) "This program does not support any command-line arguments"
141 : ! stop 1
142 : !endif
143 :
144 : ! ! override nblk
145 : ! nblk = 2
146 : ! na = 500
147 : ! nev = 150
148 : !! nblk = 32
149 : ! ! na = 500
150 : ! ! nev = 150
151 : !
152 : ! ! make sure na, nbl is even
153 : ! if (mod(nblk,2 ) .ne. 0) then
154 : ! nblk = nblk - 1
155 : ! endif
156 : !
157 : ! ! make sure na is even
158 : ! if (mod(na,2) .ne. 0) then
159 : ! na = na - 1
160 : ! endif
161 : ! ! make sure na is at least 34
162 : ! if (na .lt. 34) then
163 : ! na = 34
164 : ! endif
165 :
166 : !-------------------------------------------------------------------------------
167 : ! MPI Initialization
168 144 : call setup_mpi(myid, nprocs)
169 :
170 :
171 144 : ret = 0
172 :
173 144 : if (nblk .lt. 64) then
174 144 : ret = 1
175 144 : if (myid .eq. 0) then
176 96 : print *,"At the moment QR decomposition need blocksize of at least 64"
177 : endif
178 144 : if ((na .lt. 64) .and. (myid .eq. 0)) then
179 0 : print *,"This is why the matrix size must also be at least 64 or only 1 MPI task can be used"
180 : endif
181 : #ifdef WITH_MPI
182 96 : call mpi_finalize(mpierr)
183 : #endif
184 144 : stop 77
185 : endif
186 :
187 : #define REALCASE
188 : #include "../../elpa_print_headers.F90"
189 :
190 : !-------------------------------------------------------------------------------
191 : ! Selection of number of processor rows/columns
192 : ! We try to set up the grid square-like, i.e. start the search for possible
193 : ! divisors of nprocs with a number next to the square root of nprocs
194 : ! and decrement it until a divisor is found.
195 :
196 0 : do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
197 0 : if(mod(nprocs,np_cols) == 0 ) exit
198 : enddo
199 : ! at the end of the above loop, nprocs is always divisible by np_cols
200 :
201 0 : np_rows = nprocs/np_cols
202 :
203 0 : if(myid==0) then
204 0 : print *
205 0 : print '(a)','Standard eigenvalue problem - REAL version'
206 : #ifdef WITH_GPU_VERSION
207 : print *,"with GPU version"
208 : #endif
209 0 : print *
210 0 : print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk
211 0 : print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
212 0 : print *
213 0 : print *, "This is an example how ELPA2 chooses a default kernel,"
214 : #ifdef HAVE_ENVIRONMENT_CHECKING
215 0 : print *, "or takes the kernel defined in the environment variable,"
216 : #endif
217 0 : print *, "since the ELPA API call does not contain any kernel specification"
218 0 : print *
219 0 : print *," "
220 : #ifndef HAVE_ENVIRONMENT_CHECKING
221 : print *, " Notice that it is not possible with this build to set the "
222 : print *, " kernel via an environment variable! To change this re-install"
223 : print *, " the library and have a look at the log files"
224 : #endif
225 0 : print *, " The qr-decomposition is used via the api call"
226 : endif
227 :
228 : !-------------------------------------------------------------------------------
229 : ! Set up BLACS context and MPI communicators
230 : !
231 : ! The BLACS context is only necessary for using Scalapack.
232 : !
233 : ! For ELPA, the MPI communicators along rows/cols are sufficient,
234 : ! and the grid setup may be done in an arbitrary way as long as it is
235 : ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
236 : ! process has a unique (my_prow,my_pcol) pair).
237 :
238 : call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, 'C', &
239 0 : my_blacs_ctxt, my_prow, my_pcol)
240 :
241 0 : if (myid==0) then
242 0 : print '(a)','| Past BLACS_Gridinfo.'
243 : end if
244 :
245 : ! All ELPA routines need MPI communicators for communicating within
246 : ! rows or columns of processes, these are set in elpa_get_communicators.
247 :
248 : mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
249 0 : mpi_comm_rows, mpi_comm_cols)
250 :
251 0 : if (myid==0) then
252 0 : print '(a)','| Past split communicator setup for rows and columns.'
253 : end if
254 :
255 : call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
256 0 : na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
257 :
258 0 : if (myid==0) then
259 0 : print '(a)','| Past scalapack descriptor setup.'
260 : end if
261 :
262 : !-------------------------------------------------------------------------------
263 : ! Allocate matrices and set up a test matrix for the eigenvalue problem
264 0 : allocate(a (na_rows,na_cols))
265 0 : allocate(z (na_rows,na_cols))
266 0 : allocate(as(na_rows,na_cols))
267 :
268 0 : allocate(ev(na))
269 :
270 0 : call prepare_matrix_random(na, myid, sc_desc, a, z, as)
271 :
272 : ! set print flag in elpa1
273 0 : elpa_print_times = .true.
274 :
275 : !-------------------------------------------------------------------------------
276 : ! Calculate eigenvalues/eigenvectors
277 :
278 0 : if (myid==0) then
279 0 : print '(a)','| Entering two-stage ELPA solver ... '
280 0 : print *
281 : end if
282 :
283 :
284 : ! ELPA is called without any kernel specification in the API,
285 : ! furthermore, if the environment variable is not set, the
286 : ! default kernel is called. Otherwise, the kernel defined in the
287 : ! environment variable
288 : #ifdef WITH_MPI
289 0 : call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
290 : #endif
291 :
292 : successELPA = elpa_solve_evp_real_2stage_single(na, nev, a, na_rows, ev, z, na_rows, nblk, &
293 : na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
294 0 : useQR=.true.)
295 :
296 0 : if (.not.(successELPA)) then
297 0 : write(error_unit,*) "solve_evp_real_2stage produced an error! Aborting..."
298 : #ifdef WITH_MPI
299 0 : call MPI_ABORT(mpi_comm_world, 1, mpierr)
300 : #else
301 0 : call exit(1)
302 : #endif
303 : endif
304 :
305 0 : if (myid==0) then
306 0 : print '(a)','| Two-step ELPA solver complete.'
307 0 : print *
308 : end if
309 :
310 0 : if(myid == 0) print *,'Time transform to tridi :',time_evp_fwd
311 0 : if(myid == 0) print *,'Time solve tridi :',time_evp_solve
312 0 : if(myid == 0) print *,'Time transform back EVs :',time_evp_back
313 0 : if(myid == 0) print *,'Total time (sum above) :',time_evp_back+time_evp_solve+time_evp_fwd
314 :
315 0 : if(write_to_file%eigenvectors) then
316 0 : write(unit = task_suffix, fmt = '(i8.8)') myid
317 0 : open(17,file="EVs_real2_out_task_"//task_suffix(1:8)//".txt",form='formatted',status='new')
318 0 : write(17,*) "Part of eigenvectors: na_rows=",na_rows,"of na=",na," na_cols=",na_cols," of na=",na
319 :
320 0 : do i=1,na_rows
321 0 : do j=1,na_cols
322 0 : write(17,*) "row=",i," col=",j," element of eigenvector=",z(i,j)
323 : enddo
324 : enddo
325 0 : close(17)
326 : endif
327 :
328 0 : if(write_to_file%eigenvalues) then
329 0 : if (myid == 0) then
330 0 : open(17,file="EVs_real2_out.txt",form='formatted',status='new')
331 0 : do i=1,na
332 0 : write(17,*) i,ev(i)
333 : enddo
334 0 : close(17)
335 : endif
336 : endif
337 :
338 :
339 : !-------------------------------------------------------------------------------
340 : ! Test correctness of result (using plain scalapack routines)
341 0 : ret = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows, np_cols, my_prow, my_pcol)
342 :
343 0 : deallocate(a)
344 0 : deallocate(as)
345 0 : deallocate(z)
346 0 : deallocate(ev)
347 :
348 : #ifdef WITH_MPI
349 0 : call blacs_gridexit(my_blacs_ctxt)
350 0 : call mpi_finalize(mpierr)
351 : #endif
352 0 : call exit(ret)
353 : end
354 :
355 : !-------------------------------------------------------------------------------
|