Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : !
19 : ! More information can be found here:
20 : ! http://elpa.mpcdf.mpg.de/
21 : !
22 : ! ELPA is free software: you can redistribute it and/or modify
23 : ! it under the terms of the version 3 of the license of the
24 : ! GNU Lesser General Public License as published by the Free
25 : ! Software Foundation.
26 : !
27 : ! ELPA is distributed in the hope that it will be useful,
28 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : ! GNU Lesser General Public License for more details.
31 : !
32 : ! You should have received a copy of the GNU Lesser General Public License
33 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
34 : !
35 : ! ELPA reflects a substantial effort on the part of the original
36 : ! ELPA consortium, and we ask you to respect the spirit of the
37 : ! license that we chose: i.e., please contribute any changes you
38 : ! may have back to the original ELPA library distribution, and keep
39 : ! any derivatives of ELPA under the same license that we chose for
40 : ! the original distribution, the GNU Lesser General Public License.
41 : !
42 : !
43 : #include "config-f90.h"
44 : !>
45 :
46 192 : program test_transpose_multiply
47 :
48 192 : use elpa1
49 : use elpa_utilities, only : error_unit
50 : use test_util
51 :
52 : use test_read_input_parameters
53 : use test_check_correctness
54 : use test_setup_mpi
55 : use test_blacs_infrastructure
56 : use test_prepare_matrix
57 :
58 : #ifdef HAVE_REDIRECT
59 : use test_redirect
60 : #endif
61 : use test_output_type
62 :
63 : implicit none
64 :
65 : !-------------------------------------------------------------------------------
66 : ! Please set system size parameters below!
67 : ! na: System size
68 : ! nev: Number of eigenvectors to be calculated
69 : ! nblk: Blocking factor in block cyclic distribution
70 : !-------------------------------------------------------------------------------
71 : integer(kind=ik) :: nblk
72 : integer(kind=ik) :: na, nev
73 :
74 : integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
75 :
76 : integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
77 : integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
78 :
79 : integer, external :: numroc
80 :
81 192 : real(kind=rk8), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:)
82 :
83 : real(kind=rk8) :: norm, normmax
84 : #ifdef WITH_MPI
85 : real(kind=rk8) :: pdlange
86 : #else
87 : real(kind=rk8) :: dlange
88 : #endif
89 : integer(kind=ik) :: STATUS
90 : #ifdef WITH_OPENMP
91 : integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, &
92 : provided_mpi_thread_level
93 : #endif
94 : type(output_t) :: write_to_file
95 : logical :: success
96 : character(len=8) :: task_suffix
97 : integer(kind=ik) :: j
98 : !-------------------------------------------------------------------------------
99 :
100 192 : success = .true.
101 :
102 192 : call read_input_parameters_traditional(na, nev, nblk, write_to_file)
103 :
104 : !-------------------------------------------------------------------------------
105 : ! MPI Initialization
106 192 : call setup_mpi(myid, nprocs)
107 :
108 192 : STATUS = 0
109 :
110 192 : do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
111 0 : if(mod(nprocs,np_cols) == 0 ) exit
112 : enddo
113 :
114 : ! at the end of the above loop, nprocs is always divisible by np_cols
115 :
116 192 : np_rows = nprocs/np_cols
117 :
118 192 : if(myid==0) then
119 128 : print '(3(a,i0))','Matrix size=',na,', Block size=',nblk
120 128 : print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
121 128 : print *
122 : endif
123 :
124 : !-------------------------------------------------------------------------------
125 : ! Set up BLACS context and MPI communicators
126 : !
127 : ! The BLACS context is only necessary for using Scalapack.
128 : !
129 : ! For ELPA, the MPI communicators along rows/cols are sufficient,
130 : ! and the grid setup may be done in an arbitrary way as long as it is
131 : ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
132 : ! process has a unique (my_prow,my_pcol) pair).
133 :
134 : call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, 'C', &
135 192 : my_blacs_ctxt, my_prow, my_pcol)
136 :
137 192 : if (myid==0) then
138 128 : print '(a)','| Past BLACS_Gridinfo.'
139 : end if
140 :
141 : ! All ELPA routines need MPI communicators for communicating within
142 : ! rows or columns of processes, these are set in elpa_get_communicators.
143 :
144 : mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
145 192 : mpi_comm_rows, mpi_comm_cols)
146 :
147 192 : if (myid==0) then
148 128 : print '(a)','| Past split communicator setup for rows and columns.'
149 : end if
150 :
151 : call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
152 192 : na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
153 :
154 192 : if (myid==0) then
155 128 : print '(a)','| Past scalapack descriptor setup.'
156 : end if
157 :
158 : !-------------------------------------------------------------------------------
159 : ! Allocate matrices and set up a test matrix for the eigenvalue problem
160 192 : allocate(a (na_rows,na_cols))
161 192 : allocate(b (na_rows,na_cols))
162 192 : allocate(c (na_rows,na_cols))
163 :
164 192 : allocate(z (na_rows,na_cols))
165 192 : allocate(as(na_rows,na_cols))
166 :
167 192 : allocate(ev(na))
168 :
169 192 : call prepare_matrix_random(na, myid, sc_desc, a, z, as)
170 :
171 192 : b(:,:) = 2.0_rk8 * a(:,:)
172 192 : c(:,:) = 0.0_rk8
173 :
174 :
175 : !-------------------------------------------------------------------------------
176 : ! Calculate eigenvalues/eigenvectors
177 :
178 192 : if (myid==0) then
179 128 : print '(a)','| Compute c= a**T * b ... '
180 128 : print *
181 : end if
182 : #ifdef WITH_MPI
183 128 : call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
184 : #endif
185 : success = elpa_mult_at_b_real_double("F","F", na, na, a, na_rows, na_cols, b, na_rows, &
186 : na_cols, nblk, mpi_comm_rows, mpi_comm_cols, c, &
187 192 : na_rows, na_cols)
188 :
189 192 : if (.not.(success)) then
190 0 : write(error_unit,*) "elpa_mult_at_b_real produced an error! Aborting..."
191 : #ifdef WITH_MPI
192 0 : call MPI_ABORT(mpi_comm_world, 1, mpierr)
193 : #else
194 0 : call exit(1)
195 : #endif
196 : endif
197 :
198 :
199 192 : if (myid==0) then
200 128 : print '(a)','| Solve c = a**T * b complete.'
201 128 : print *
202 : end if
203 :
204 :
205 : !-------------------------------------------------------------------------------
206 : ! Test correctness of result (using plain scalapack routines)
207 192 : allocate(tmp1(na_rows,na_cols))
208 192 : allocate(tmp2(na_rows,na_cols))
209 :
210 192 : tmp1(:,:) = 0.0_rk8
211 :
212 : ! tmp1 = a**T
213 : #ifdef WITH_MPI
214 128 : call pdtran(na, na, 1.0_rk8, a, 1, 1, sc_desc, 0.0_rk8, tmp1, 1, 1, sc_desc)
215 : #else
216 64 : tmp1 = transpose(a)
217 : #endif
218 : ! tmp2 = tmp1 * b
219 : #ifdef WITH_MPI
220 : call pdgemm("N","N", na, na, na, 1.0_rk8, tmp1, 1, 1, sc_desc, b, 1, 1, &
221 128 : sc_desc, 0.0_rk8, tmp2, 1, 1, sc_desc)
222 : #else
223 64 : call dgemm("N","N", na, na, na, 1.0_rk8, tmp1, na, b, na, 0.0_rk8, tmp2, na)
224 : #endif
225 :
226 : ! compare tmp2 with c
227 192 : tmp2(:,:) = tmp2(:,:) - c(:,:)
228 :
229 : #ifdef WITH_MPI
230 128 : norm = pdlange("M", na, na, tmp2, 1, 1, sc_desc, tmp1)
231 : #else
232 64 : norm = dlange("M", na, na, tmp2, na_rows, tmp1)
233 : #endif
234 :
235 : #ifdef WITH_MPI
236 128 : call mpi_allreduce(norm,normmax,1,MPI_REAL8,MPI_MAX,MPI_COMM_WORLD,mpierr)
237 : #else
238 64 : normmax = norm
239 : #endif
240 192 : if (myid .eq. 0) then
241 128 : print *," Maximum error of result: ", normmax
242 : endif
243 :
244 192 : if (normmax .gt. 5e-11_rk8) then
245 0 : status = 1
246 : endif
247 :
248 192 : deallocate(a)
249 192 : deallocate(b)
250 192 : deallocate(c)
251 :
252 192 : deallocate(as)
253 :
254 192 : deallocate(z)
255 192 : deallocate(tmp1)
256 192 : deallocate(tmp2)
257 192 : deallocate(ev)
258 :
259 : #ifdef WITH_MPI
260 128 : call blacs_gridexit(my_blacs_ctxt)
261 128 : call mpi_finalize(mpierr)
262 : #endif
263 :
264 192 : call EXIT(STATUS)
265 :
266 :
267 : end
268 :
269 : !-------------------------------------------------------------------------------
|