Line data Source code
1 : #if 0
2 : ! This file is part of ELPA.
3 : !
4 : ! The ELPA library was originally created by the ELPA consortium,
5 : ! consisting of the following organizations:
6 : !
7 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
8 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
9 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
10 : ! Informatik,
11 : ! - Technische Universität München, Lehrstuhl für Informatik mit
12 : ! Schwerpunkt Wissenschaftliches Rechnen ,
13 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
16 : ! and
17 : ! - IBM Deutschland GmbH
18 : !
19 : ! This particular source code file contains additions, changes and
20 : ! enhancements authored by Intel Corporation which is not part of
21 : ! the ELPA consortium.
22 : !
23 : ! More information can be found here:
24 : ! http://elpa.mpcdf.mpg.de/
25 : !
26 : ! ELPA is free software: you can redistribute it and/or modify
27 : ! it under the terms of the version 3 of the license of the
28 : ! GNU Lesser General Public License as published by the Free
29 : ! Software Foundation.
30 : !
31 : ! ELPA is distributed in the hope that it will be useful,
32 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
33 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 : ! GNU Lesser General Public License for more details.
35 : !
36 : ! You should have received a copy of the GNU Lesser General Public License
37 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
38 : !
39 : ! ELPA reflects a substantial effort on the part of the original
40 : ! ELPA consortium, and we ask you to respect the spirit of the
41 : ! license that we chose: i.e., please contribute any changes you
42 : ! may have back to the original ELPA library distribution, and keep
43 : ! any derivatives of ELPA under the same license that we chose for
44 : ! the original distribution, the GNU Lesser General Public License.
45 : !
46 : !
47 : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
48 : !
49 : ! Copyright of the original code rests with the authors inside the ELPA
50 : ! consortium. The copyright of any additional modifications shall rest
51 : ! with their original authors, but shall adhere to the licensing terms
52 : ! distributed along with the original code in the file "COPYING".
53 : #endif
54 :
55 : #include "../general/sanity.F90"
56 :
57 9216 : function elpa_solve_evp_&
58 : &MATH_DATATYPE&
59 : &_1stage_&
60 : &PRECISION&
61 8064 : &_impl (obj, a, ev, q) result(success)
62 : use precision
63 : use cuda_functions
64 : use mod_check_for_gpu
65 : use iso_c_binding
66 : use elpa_abstract_impl
67 : use elpa_mpi
68 : use elpa1_compute
69 : implicit none
70 : #include "../general/precision_kinds.F90"
71 : class(elpa_abstract_impl_t), intent(inout) :: obj
72 : real(kind=REAL_DATATYPE), intent(out) :: ev(obj%na)
73 :
74 : #ifdef USE_ASSUMED_SIZE
75 : MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,*)
76 : MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*)
77 : #else
78 : MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
79 : MATH_DATATYPE(kind=rck), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
80 : #endif
81 :
82 : #if REALCASE == 1
83 4752 : real(kind=C_DATATYPE_KIND), allocatable :: tau(:)
84 4752 : real(kind=C_DATATYPE_KIND), allocatable, target :: q_dummy(:,:)
85 : real(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
86 : #endif /* REALCASE */
87 :
88 : #if COMPLEXCASE == 1
89 4464 : real(kind=REAL_DATATYPE), allocatable :: q_real(:,:)
90 4464 : complex(kind=C_DATATYPE_KIND), allocatable :: tau(:)
91 4464 : complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
92 : complex(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
93 : integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
94 : #endif /* COMPLEXCASE */
95 :
96 : logical :: useGPU
97 : logical :: success
98 :
99 : logical :: do_useGPU
100 : integer(kind=ik) :: numberOfGPUDevices
101 :
102 : integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, mpierr
103 9216 : real(kind=C_DATATYPE_KIND), allocatable :: e(:)
104 : logical :: wantDebug
105 : integer(kind=c_int) :: istat, debug, gpu
106 : character(200) :: errorMessage
107 : integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, &
108 : mpi_comm_rows, mpi_comm_cols, &
109 : mpi_comm_all, check_pd, i, error
110 :
111 : logical :: do_bandred, do_solve, do_trans_ev
112 :
113 : call obj%timer%start("elpa_solve_evp_&
114 : &MATH_DATATYPE&
115 : &_1stage_&
116 : &PRECISION&
117 9216 : &")
118 :
119 9216 : success = .true.
120 :
121 9216 : if (present(q)) then
122 8064 : obj%eigenvalues_only = .false.
123 : else
124 1152 : obj%eigenvalues_only = .true.
125 : endif
126 :
127 9216 : na = obj%na
128 9216 : nev = obj%nev
129 9216 : lda = obj%local_nrows
130 9216 : ldq = obj%local_nrows
131 9216 : nblk = obj%nblk
132 9216 : matrixCols = obj%local_ncols
133 :
134 : ! special case na = 1
135 9216 : if (na .eq. 1) then
136 : #if REALCASE == 1
137 0 : ev(1) = a(1,1)
138 : #endif
139 : #if COMPLEXCASE == 1
140 0 : ev(1) = real(a(1,1))
141 : #endif
142 0 : if (.not.(obj%eigenvalues_only)) then
143 0 : q(1,1) = ONE
144 : endif
145 : call obj%timer%stop("elpa_solve_evp_&
146 : &MATH_DATATYPE&
147 : &_1stage_&
148 : &PRECISION&
149 0 : &")
150 0 : return
151 : endif
152 :
153 9216 : if (nev == 0) then
154 0 : nev = 1
155 0 : obj%eigenvalues_only = .true.
156 : endif
157 :
158 :
159 9216 : call obj%get("mpi_comm_rows",mpi_comm_rows,error)
160 9216 : if (error .ne. ELPA_OK) then
161 0 : print *,"Problem setting option. Aborting..."
162 0 : stop
163 : endif
164 9216 : call obj%get("mpi_comm_cols",mpi_comm_cols,error)
165 9216 : if (error .ne. ELPA_OK) then
166 0 : print *,"Problem setting option. Aborting..."
167 0 : stop
168 : endif
169 9216 : call obj%get("mpi_comm_parent", mpi_comm_all,error)
170 9216 : if (error .ne. ELPA_OK) then
171 0 : print *,"Problem setting option. Aborting..."
172 0 : stop
173 : endif
174 :
175 9216 : call obj%get("gpu",gpu,error)
176 9216 : if (error .ne. ELPA_OK) then
177 0 : print *,"Problem setting option. Aborting..."
178 0 : stop
179 : endif
180 9216 : if (gpu .eq. 1) then
181 0 : useGPU =.true.
182 : else
183 9216 : useGPU = .false.
184 : endif
185 :
186 9216 : call obj%timer%start("mpi_communication")
187 :
188 9216 : call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
189 9216 : call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
190 :
191 9216 : call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
192 9216 : call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
193 :
194 : #if COMPLEXCASE == 1
195 4464 : call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
196 4464 : call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
197 : #endif
198 :
199 9216 : call obj%timer%stop("mpi_communication")
200 :
201 9216 : call obj%get("debug", debug,error)
202 9216 : if (error .ne. ELPA_OK) then
203 0 : print *,"Problem setting option. Aborting..."
204 0 : stop
205 : endif
206 9216 : wantDebug = debug == 1
207 9216 : do_useGPU = .false.
208 :
209 :
210 9216 : if (useGPU) then
211 0 : if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
212 :
213 0 : do_useGPU = .true.
214 : ! set the neccessary parameters
215 0 : cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
216 0 : cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
217 0 : cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
218 0 : cudaHostRegisterPortable = cuda_hostRegisterPortable()
219 0 : cudaHostRegisterMapped = cuda_hostRegisterMapped()
220 : else
221 0 : print *,"GPUs are requested but not detected! Aborting..."
222 0 : success = .false.
223 0 : return
224 : endif
225 : else
226 : ! check whether set by environment variable
227 9216 : call obj%get("gpu", gpu,error)
228 9216 : if (error .ne. ELPA_OK) then
229 0 : print *,"Problem setting option. Aborting..."
230 0 : stop
231 : endif
232 9216 : do_useGPU = gpu == 1
233 :
234 9216 : if (do_useGPU) then
235 0 : if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
236 :
237 : ! set the neccessary parameters
238 0 : cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
239 0 : cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
240 0 : cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
241 0 : cudaHostRegisterPortable = cuda_hostRegisterPortable()
242 0 : cudaHostRegisterMapped = cuda_hostRegisterMapped()
243 : else
244 0 : print *,"GPUs are requested but not detected! Aborting..."
245 0 : success = .false.
246 0 : return
247 : endif
248 : endif
249 : endif
250 :
251 : ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
252 9216 : if (.not.(obj%eigenvalues_only)) then
253 8064 : q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
254 : else
255 1152 : allocate(q_dummy(obj%local_nrows,obj%local_ncols))
256 1152 : q_actual => q_dummy
257 : endif
258 :
259 : #if COMPLEXCASE == 1
260 4464 : l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
261 4464 : l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q
262 :
263 4464 : l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev
264 :
265 4464 : allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
266 4464 : if (istat .ne. 0) then
267 : print *,"solve_evp_&
268 : &MATH_DATATYPE&
269 : &_1stage_&
270 : &PRECISION&
271 0 : &" // ": error when allocating q_real "//errorMessage
272 0 : stop 1
273 : endif
274 : #endif
275 9216 : allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
276 9216 : if (istat .ne. 0) then
277 : print *,"solve_evp_&
278 : &MATH_DATATYPE&
279 : &_1stage_&
280 : &PRECISION&
281 0 : &" // ": error when allocating e, tau "//errorMessage
282 0 : stop 1
283 : endif
284 :
285 :
286 : ! start the computations
287 : ! as default do all three steps (this might change at some point)
288 9216 : do_bandred = .true.
289 9216 : do_solve = .true.
290 9216 : do_trans_ev = .true.
291 :
292 9216 : if (obj%eigenvalues_only) then
293 1152 : do_trans_ev = .true.
294 : endif
295 :
296 9216 : if (do_bandred) then
297 9216 : call obj%timer%start("forward")
298 : call tridiag_&
299 : &MATH_DATATYPE&
300 : &_&
301 : &PRECISION&
302 9216 : & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU, wantDebug)
303 9216 : call obj%timer%stop("forward")
304 : endif !do_bandred
305 :
306 9216 : if (do_solve) then
307 9216 : call obj%timer%start("solve")
308 : call solve_tridi_&
309 : &PRECISION&
310 : & (obj, na, nev, ev, e, &
311 : #if REALCASE == 1
312 : q_actual, ldq, &
313 : #endif
314 : #if COMPLEXCASE == 1
315 : q_real, l_rows, &
316 : #endif
317 9216 : nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU, wantDebug, success)
318 9216 : call obj%timer%stop("solve")
319 9216 : if (.not.(success)) return
320 : endif !do_solve
321 :
322 9216 : if (obj%eigenvalues_only) then
323 1152 : do_trans_ev = .false.
324 : else
325 8064 : call obj%get("check_pd",check_pd,error)
326 8064 : if (error .ne. ELPA_OK) then
327 0 : print *,"Problem setting option. Aborting..."
328 0 : stop
329 : endif
330 8064 : if (check_pd .eq. 1) then
331 0 : check_pd = 0
332 0 : do i = 1, na
333 0 : if (ev(i) .gt. THRESHOLD) then
334 0 : check_pd = check_pd + 1
335 : endif
336 : enddo
337 0 : if (check_pd .lt. na) then
338 : ! not positiv definite => eigenvectors needed
339 0 : do_trans_ev = .true.
340 : else
341 0 : do_trans_ev = .false.
342 : endif
343 : endif ! check_pd
344 : endif ! eigenvalues_only
345 :
346 9216 : if (do_trans_ev) then
347 : ! q must be given thats why from here on we can use q and not q_actual
348 : #if COMPLEXCASE == 1
349 4032 : q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
350 : #endif
351 :
352 8064 : call obj%timer%start("back")
353 : call trans_ev_&
354 : &MATH_DATATYPE&
355 : &_&
356 : &PRECISION&
357 8064 : & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
358 8064 : call obj%timer%stop("back")
359 : endif ! do_trans_ev
360 :
361 : #if COMPLEXCASE == 1
362 4464 : deallocate(q_real, stat=istat, errmsg=errorMessage)
363 4464 : if (istat .ne. 0) then
364 : print *,"solve_evp_&
365 : &MATH_DATATYPE&
366 : &_1stage_&
367 : &PRECISION&
368 0 : &" // ": error when deallocating q_real "//errorMessage
369 0 : stop 1
370 : endif
371 : #endif
372 :
373 9216 : deallocate(e, tau, stat=istat, errmsg=errorMessage)
374 9216 : if (istat .ne. 0) then
375 : print *,"solve_evp_&
376 : &MATH_DATATYPE&
377 : &_1stage_&
378 : &PRECISION&
379 0 : &" // ": error when deallocating e, tau "//errorMessage
380 0 : stop 1
381 : endif
382 :
383 9216 : if (obj%eigenvalues_only) then
384 1152 : deallocate(q_dummy, stat=istat, errmsg=errorMessage)
385 1152 : if (istat .ne. 0) then
386 : print *,"solve_evp_&
387 : &MATH_DATATYPE&
388 : &_1stage_&
389 : &PRECISION&
390 0 : &" // ": error when deallocating q_dummy "//errorMessage
391 0 : stop 1
392 : endif
393 : endif
394 :
395 : call obj%timer%stop("elpa_solve_evp_&
396 : &MATH_DATATYPE&
397 : &_1stage_&
398 : &PRECISION&
399 9216 : &")
400 18432 : end function
401 :
402 :
|