Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : !
19 : ! More information can be found here:
20 : ! http://elpa.mpcdf.mpg.de/
21 : !
22 : ! ELPA is free software: you can redistribute it and/or modify
23 : ! it under the terms of the version 3 of the license of the
24 : ! GNU Lesser General Public License as published by the Free
25 : ! Software Foundation.
26 : !
27 : ! ELPA is distributed in the hope that it will be useful,
28 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : ! GNU Lesser General Public License for more details.
31 : !
32 : ! You should have received a copy of the GNU Lesser General Public License
33 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
34 : !
35 : ! ELPA reflects a substantial effort on the part of the original
36 : ! ELPA consortium, and we ask you to respect the spirit of the
37 : ! license that we chose: i.e., please contribute any changes you
38 : ! may have back to the original ELPA library distribution, and keep
39 : ! any derivatives of ELPA under the same license that we chose for
40 : ! the original distribution, the GNU Lesser General Public License.
41 : !
42 : !
43 : #include "config-f90.h"
44 : !>
45 :
46 192 : program test_transpose_multiply
47 :
48 192 : use elpa1
49 : use elpa_utilities, only : error_unit
50 : use test_util
51 :
52 : use test_read_input_parameters
53 : use test_check_correctness
54 : use test_setup_mpi
55 : use test_blacs_infrastructure
56 : use test_prepare_matrix
57 :
58 : #ifdef HAVE_REDIRECT
59 : use test_redirect
60 : #endif
61 : use test_output_type
62 :
63 : implicit none
64 :
65 : !-------------------------------------------------------------------------------
66 : ! Please set system size parameters below!
67 : ! na: System size
68 : ! nev: Number of eigenvectors to be calculated
69 : ! nblk: Blocking factor in block cyclic distribution
70 : !-------------------------------------------------------------------------------
71 : integer(kind=ik) :: nblk
72 : integer(kind=ik) :: na, nev
73 :
74 : integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
75 :
76 : integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
77 : integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
78 :
79 : integer, external :: numroc
80 :
81 192 : real(kind=rk8), allocatable :: ev(:)
82 384 : complex(kind=ck8), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:)
83 : complex(kind=ck8), parameter :: CZERO = (0.0_rk8,0.0_rk8), CONE = (1.0_rk8,0.0_rk8)
84 : real(kind=rk8) :: norm, normmax
85 : #ifdef WITH_MPI
86 : real(kind=rk8) :: pzlange
87 : #else
88 : real(kind=rk8) :: zlange
89 : #endif
90 : integer(kind=ik) :: STATUS
91 : #ifdef WITH_OPENMP
92 : integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, &
93 : provided_mpi_thread_level
94 : #endif
95 : type(output_t) :: write_to_file
96 : logical :: success
97 : character(len=8) :: task_suffix
98 : integer(kind=ik) :: j
99 : !-------------------------------------------------------------------------------
100 :
101 192 : success = .true.
102 :
103 192 : call read_input_parameters_traditional(na, nev, nblk, write_to_file)
104 :
105 : !-------------------------------------------------------------------------------
106 : ! MPI Initialization
107 192 : call setup_mpi(myid, nprocs)
108 :
109 192 : STATUS = 0
110 :
111 192 : do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
112 0 : if(mod(nprocs,np_cols) == 0 ) exit
113 : enddo
114 :
115 : ! at the end of the above loop, nprocs is always divisible by np_cols
116 :
117 192 : np_rows = nprocs/np_cols
118 :
119 192 : if(myid==0) then
120 128 : print '(3(a,i0))','Matrix size=',na,', Block size=',nblk
121 128 : print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
122 128 : print *
123 : endif
124 :
125 : !-------------------------------------------------------------------------------
126 : ! Set up BLACS context and MPI communicators
127 : !
128 : ! The BLACS context is only necessary for using Scalapack.
129 : !
130 : ! For ELPA, the MPI communicators along rows/cols are sufficient,
131 : ! and the grid setup may be done in an arbitrary way as long as it is
132 : ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
133 : ! process has a unique (my_prow,my_pcol) pair).
134 :
135 : call set_up_blacsgrid(mpi_comm_world, np_rows, np_cols, 'C', &
136 192 : my_blacs_ctxt, my_prow, my_pcol)
137 :
138 192 : if (myid==0) then
139 128 : print '(a)','| Past BLACS_Gridinfo.'
140 : end if
141 :
142 : ! All ELPA routines need MPI communicators for communicating within
143 : ! rows or columns of processes, these are set in elpa_get_communicators.
144 :
145 : mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
146 192 : mpi_comm_rows, mpi_comm_cols)
147 :
148 192 : if (myid==0) then
149 128 : print '(a)','| Past split communicator setup for rows and columns.'
150 : end if
151 :
152 : call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
153 192 : na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
154 :
155 192 : if (myid==0) then
156 128 : print '(a)','| Past scalapack descriptor setup.'
157 : end if
158 :
159 : !-------------------------------------------------------------------------------
160 : ! Allocate matrices and set up a test matrix for the eigenvalue problem
161 192 : allocate(a (na_rows,na_cols))
162 192 : allocate(b (na_rows,na_cols))
163 192 : allocate(c (na_rows,na_cols))
164 :
165 192 : allocate(z (na_rows,na_cols))
166 192 : allocate(as(na_rows,na_cols))
167 :
168 192 : allocate(ev(na))
169 192 : call prepare_matrix_random(na, myid, sc_desc, a, z, as)
170 192 : b(:,:) = 2.0_ck8 * a(:,:)
171 192 : c(:,:) = 0.0_ck8
172 :
173 : !-------------------------------------------------------------------------------
174 : ! Calculate eigenvalues/eigenvectors
175 :
176 192 : if (myid==0) then
177 128 : print '(a)','| Compute c= a**T * b ... '
178 128 : print *
179 : end if
180 : #ifdef WITH_MPI
181 128 : call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
182 : #endif
183 :
184 : success = elpa_mult_ah_b_complex_double("F","F", na, na, a, na_rows, na_cols, b, na_rows, na_cols, &
185 192 : nblk, mpi_comm_rows, mpi_comm_cols, c, na_rows, na_cols)
186 :
187 192 : if (.not.(success)) then
188 0 : write(error_unit,*) " elpa_mult_at_b_complex produced an error! Aborting..."
189 : #ifdef WITH_MPI
190 0 : call MPI_ABORT(mpi_comm_world, 1, mpierr)
191 : #else
192 0 : call exit(1)
193 : #endif
194 : endif
195 :
196 :
197 192 : if (myid==0) then
198 128 : print '(a)','| Solve c = a**T * b complete.'
199 128 : print *
200 : end if
201 :
202 :
203 : !-------------------------------------------------------------------------------
204 : ! Test correctness of result (using plain scalapack routines)
205 192 : allocate(tmp1(na_rows,na_cols))
206 192 : allocate(tmp2(na_rows,na_cols))
207 :
208 192 : tmp1(:,:) = 0.0_ck8
209 :
210 : ! tmp1 = a**T
211 : #ifdef WITH_MPI
212 128 : call pztranc(na, na, CONE, a, 1, 1, sc_desc, CZERO, tmp1, 1, 1, sc_desc)
213 : #else
214 64 : tmp1 = transpose(conjg(a))
215 : #endif
216 : ! tmp2 = tmp1 * b
217 : #ifdef WITH_MPI
218 : call pzgemm("N","N", na, na, na, CONE, tmp1, 1, 1, sc_desc, b, 1, 1, &
219 128 : sc_desc, CZERO, tmp2, 1, 1, sc_desc)
220 : #else
221 64 : call zgemm("N","N", na, na, na, CONE, tmp1, na, b, na, CZERO, tmp2, na)
222 : #endif
223 :
224 : ! compare tmp2 with c
225 192 : tmp2(:,:) = tmp2(:,:) - c(:,:)
226 :
227 : #ifdef WITH_MPI
228 128 : norm = pzlange("M",na, na, tmp2, 1, 1, sc_desc, tmp1)
229 : #else
230 64 : norm = zlange("M",na, na, tmp2, na_rows, tmp1)
231 : #endif
232 : #ifdef WITH_MPI
233 128 : call mpi_allreduce(norm,normmax,1,MPI_REAL8,MPI_MAX,MPI_COMM_WORLD,mpierr)
234 : #else
235 64 : normmax = norm
236 : #endif
237 192 : if (myid .eq. 0) then
238 128 : print *," Maximum error of result: ", normmax
239 : endif
240 :
241 192 : if (normmax .gt. 5e-11_rk8) then
242 0 : status = 1
243 : endif
244 :
245 192 : deallocate(a)
246 192 : deallocate(b)
247 192 : deallocate(c)
248 :
249 192 : deallocate(as)
250 :
251 192 : deallocate(z)
252 192 : deallocate(tmp1)
253 192 : deallocate(tmp2)
254 192 : deallocate(ev)
255 :
256 : #ifdef WITH_MPI
257 128 : call blacs_gridexit(my_blacs_ctxt)
258 128 : call mpi_finalize(mpierr)
259 : #endif
260 :
261 192 : call EXIT(STATUS)
262 :
263 :
264 : end
265 :
266 : !-------------------------------------------------------------------------------
|