Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : !
19 : ! More information can be found here:
20 : ! http://elpa.mpcdf.mpg.de/
21 : !
22 : ! ELPA is free software: you can redistribute it and/or modify
23 : ! it under the terms of the version 3 of the license of the
24 : ! GNU Lesser General Public License as published by the Free
25 : ! Software Foundation.
26 : !
27 : ! ELPA is distributed in the hope that it will be useful,
28 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : ! GNU Lesser General Public License for more details.
31 : !
32 : ! You should have received a copy of the GNU Lesser General Public License
33 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
34 : !
35 : ! ELPA reflects a substantial effort on the part of the original
36 : ! ELPA consortium, and we ask you to respect the spirit of the
37 : ! license that we chose: i.e., please contribute any changes you
38 : ! may have back to the original ELPA library distribution, and keep
39 : ! any derivatives of ELPA under the same license that we chose for
40 : ! the original distribution, the GNU Lesser General Public License.
41 : !
42 : !
43 : #include "config-f90.h"
44 : !>
45 :
46 96 : program test_invert_trm
47 :
48 96 : use elpa1
49 : use elpa_utilities, only : error_unit
50 : use test_util
51 :
52 : use test_read_input_parameters
53 : use test_check_correctness
54 : use test_setup_mpi
55 : use test_blacs_infrastructure
56 : use test_prepare_matrix
57 :
58 : #ifdef HAVE_REDIRECT
59 : use test_redirect
60 : #endif
61 : use test_output_type
62 :
63 : implicit none
64 :
65 : !-------------------------------------------------------------------------------
66 : ! Please set system size parameters below!
67 : ! na: System size
68 : ! nev: Number of eigenvectors to be calculated
69 : ! nblk: Blocking factor in block cyclic distribution
70 : !-------------------------------------------------------------------------------
71 : integer(kind=ik) :: nblk
72 : integer(kind=ik) :: na, nev
73 :
74 : integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
75 :
76 : integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
77 : integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
78 :
79 : integer, external :: numroc
80 :
81 288 : real(kind=rk4), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:)
82 192 : real(kind=rk4), allocatable :: d(:), e(:), bs(:,:)
83 : real(kind=rk4) :: diagonalElement, subdiagonalElement
84 : integer(kind=ik) :: loctmp ,rowLocal, colLocal
85 :
86 : real(kind=rk4) :: norm, normmax
87 : #ifdef WITH_MPI
88 : real(kind=rk4) :: pslange
89 : #else
90 : real(kind=rk4) :: slange
91 : #endif
92 : real(kind=rk4), parameter :: pi = 3.141592653589793238462643383279_rk4
93 : integer(kind=ik) :: STATUS
94 : #ifdef WITH_OPENMP
95 : integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, &
96 : provided_mpi_thread_level
97 : #endif
98 : type(output_t) :: write_to_file
99 : logical :: success
100 : character(len=8) :: task_suffix
101 : integer(kind=ik) :: j
102 : !-------------------------------------------------------------------------------
103 :
104 96 : success = .true.
105 :
106 96 : call read_input_parameters(na, nev, nblk, write_to_file)
107 :
108 : !-------------------------------------------------------------------------------
109 : ! MPI Initialization
110 96 : call setup_mpi(myid, nprocs)
111 :
112 96 : STATUS = 0
113 :
114 96 : do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
115 0 : if(mod(nprocs,np_cols) == 0 ) exit
116 : enddo
117 :
118 : ! at the end of the above loop, nprocs is always divisible by np_cols
119 :
120 96 : np_rows = nprocs/np_cols
121 :
122 96 : if(myid==0) then
123 64 : print '(3(a,i0))','Matrix size=',na,', Block size=',nblk
124 64 : print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
125 64 : print *
126 : endif
127 :
128 : !-------------------------------------------------------------------------------
129 : ! Set up BLACS context and MPI communicators
130 : !
131 : ! The BLACS context is only necessary for using Scalapack.
132 : !
133 : ! For ELPA, the MPI communicators along rows/cols are sufficient,
134 : ! and the grid setup may be done in an arbitrary way as long as it is
135 : ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
136 : ! process has a unique (my_prow,my_pcol) pair).
137 :
138 : call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, 'C', &
139 96 : my_blacs_ctxt, my_prow, my_pcol)
140 :
141 96 : if (myid==0) then
142 64 : print '(a)','| Past BLACS_Gridinfo.'
143 : end if
144 :
145 : ! All ELPA routines need MPI communicators for communicating within
146 : ! rows or columns of processes, these are set in elpa_get_communicators.
147 :
148 : mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
149 96 : mpi_comm_rows, mpi_comm_cols)
150 :
151 96 : if (myid==0) then
152 64 : print '(a)','| Past split communicator setup for rows and columns.'
153 : end if
154 :
155 : call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
156 96 : na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
157 :
158 96 : if (myid==0) then
159 64 : print '(a)','| Past scalapack descriptor setup.'
160 : end if
161 :
162 : !-------------------------------------------------------------------------------
163 : ! Allocate matrices and set up a test matrix for the eigenvalue problem
164 96 : allocate(a (na_rows,na_cols))
165 96 : allocate(b (na_rows,na_cols))
166 96 : allocate(bs(na_rows,na_cols))
167 96 : allocate(c (na_rows,na_cols))
168 :
169 96 : allocate(z (na_rows,na_cols))
170 96 : allocate(as(na_rows,na_cols))
171 :
172 96 : allocate(ev(na))
173 :
174 96 : allocate(d (na))
175 96 : allocate(e (na))
176 :
177 96 : call prepare_matrix_random(na, myid, sc_desc, b, z, bs)
178 96 : bs(:,:) = b(:,:)
179 :
180 96 : a(:,:) = 0.0_rk4
181 :
182 :
183 96 : diagonalElement = 2.546_rk4
184 :
185 14496 : do i = 1, na
186 14400 : if (map_global_array_index_to_local_index(i, i, rowLocal, colLocal, nblk, np_rows, np_cols, my_prow, my_pcol)) then
187 9600 : a(rowLocal,colLocal) = diagonalElement * abs(cos( pi*real(i,kind=rk4)/ real(na+1,kind=rk4) ))
188 : endif
189 : enddo
190 :
191 96 : as(:,:) = a(:,:)
192 :
193 : !-------------------------------------------------------------------------------
194 : ! Calculate eigenvalues/eigenvectors
195 :
196 96 : if (myid==0) then
197 64 : print '(a)','| Setup an upper triangular matrix ... '
198 64 : print *
199 : end if
200 : #ifdef WITH_MPI
201 64 : call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
202 : #endif
203 :
204 96 : success = elpa_cholesky_real_single(na, a, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, .true.)
205 :
206 96 : if (.not.(success)) then
207 0 : write(error_unit,*) "elpa_cholseky_real produced an error! Aborting..."
208 : #ifdef WITH_MPI
209 0 : call MPI_ABORT(mpi_comm_world, 1, mpierr)
210 : #else
211 0 : call exit(1)
212 : #endif
213 : endif
214 :
215 :
216 96 : if (myid==0) then
217 64 : print '(a)','| Upper triangular matrix created.'
218 64 : print *
219 : end if
220 :
221 96 : as(:,:) = a(:,:)
222 :
223 96 : if (myid==0) then
224 64 : print '(a)','| Invert the upper triangular matrix ... '
225 64 : print *
226 : end if
227 : #ifdef WITH_MPI
228 64 : call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
229 : #endif
230 :
231 96 : success = elpa_invert_trm_real_single(na, a, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, .true.)
232 :
233 96 : if (.not.(success)) then
234 0 : write(error_unit,*) "elpa_cholseky_real produced an error! Aborting..."
235 : #ifdef WITH_MPI
236 0 : call MPI_ABORT(mpi_comm_world, 1, mpierr)
237 : #else
238 0 : call exit(1)
239 : #endif
240 : endif
241 :
242 :
243 96 : if (myid==0) then
244 64 : print '(a)','| Upper triangular matrix inverted.'
245 64 : print *
246 : end if
247 :
248 : !-------------------------------------------------------------------------------
249 : ! Test correctness of result (using plain scalapack routines)
250 96 : allocate(tmp1(na_rows,na_cols))
251 96 : allocate(tmp2(na_rows,na_cols))
252 :
253 96 : tmp1(:,:) = 0.0_rk4
254 :
255 : ! tmp1 = as * a^-1 ! should give unity matrix
256 :
257 : #ifdef WITH_MPI
258 : call psgemm("N","N", na, na, na, 1.0_rk4, as, 1, 1, sc_desc, a, 1, 1, &
259 64 : sc_desc, 0.0_rk4, tmp1, 1, 1, sc_desc)
260 : #else
261 32 : call sgemm("N","N", na, na, na, 1.0_rk4, as, na, a, na, 0.0_rk4, tmp1, na)
262 : #endif
263 :
264 : ! check the quality of unity matrix
265 :
266 : ! tmp2 = b * tmp1
267 : #ifdef WITH_MPI
268 : call psgemm("N","N", na, na, na, 1.0_rk4, b, 1, 1, sc_desc, tmp1, 1, 1, &
269 64 : sc_desc, 0.0_rk4, tmp2, 1, 1, sc_desc)
270 : #else
271 32 : call sgemm("N","N", na, na, na, 1.0_rk4, b, na, tmp1, na, 0.0_rk4, tmp2, na)
272 : #endif
273 :
274 : ! compare tmp2 with original matrix b
275 96 : tmp2(:,:) = tmp2(:,:) - bs(:,:)
276 :
277 : #ifdef WITH_MPI
278 64 : norm = pslange("M",na, na, tmp2, 1, 1, sc_desc, bs)
279 : #else
280 32 : norm = slange("M", na, na, tmp2, na_rows, bs)
281 : #endif
282 :
283 : #ifdef WITH_MPI
284 64 : call mpi_allreduce(norm,normmax,1,MPI_REAL4,MPI_MAX,MPI_COMM_WORLD,mpierr)
285 : #else
286 32 : normmax = norm
287 : #endif
288 96 : if (myid .eq. 0) then
289 64 : print *," Maximum error of result: ", normmax
290 : endif
291 :
292 96 : if (normmax .gt. 5e-4_rk4) then
293 0 : status = 1
294 : endif
295 :
296 96 : deallocate(a)
297 96 : deallocate(b)
298 96 : deallocate(bs)
299 96 : deallocate(c)
300 :
301 96 : deallocate(as)
302 :
303 96 : deallocate(z)
304 96 : deallocate(tmp1)
305 96 : deallocate(tmp2)
306 96 : deallocate(ev)
307 :
308 96 : deallocate(d)
309 96 : deallocate(e)
310 :
311 : #ifdef WITH_MPI
312 64 : call blacs_gridexit(my_blacs_ctxt)
313 64 : call mpi_finalize(mpierr)
314 : #endif
315 :
316 96 : call EXIT(STATUS)
317 :
318 :
319 : end
320 :
321 : !-------------------------------------------------------------------------------
|