Line data Source code
1 : #if 0
2 : ! This file is part of ELPA.
3 : !
4 : ! The ELPA library was originally created by the ELPA consortium,
5 : ! consisting of the following organizations:
6 : !
7 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
8 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
9 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
10 : ! Informatik,
11 : ! - Technische Universität München, Lehrstuhl für Informatik mit
12 : ! Schwerpunkt Wissenschaftliches Rechnen ,
13 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
16 : ! and
17 : ! - IBM Deutschland GmbH
18 : !
19 : ! This particular source code file contains additions, changes and
20 : ! enhancements authored by Intel Corporation which is not part of
21 : ! the ELPA consortium.
22 : !
23 : ! More information can be found here:
24 : ! http://elpa.mpcdf.mpg.de/
25 : !
26 : ! ELPA is free software: you can redistribute it and/or modify
27 : ! it under the terms of the version 3 of the license of the
28 : ! GNU Lesser General Public License as published by the Free
29 : ! Software Foundation.
30 : !
31 : ! ELPA is distributed in the hope that it will be useful,
32 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
33 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 : ! GNU Lesser General Public License for more details.
35 : !
36 : ! You should have received a copy of the GNU Lesser General Public License
37 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
38 : !
39 : ! ELPA reflects a substantial effort on the part of the original
40 : ! ELPA consortium, and we ask you to respect the spirit of the
41 : ! license that we chose: i.e., please contribute any changes you
42 : ! may have back to the original ELPA library distribution, and keep
43 : ! any derivatives of ELPA under the same license that we chose for
44 : ! the original distribution, the GNU Lesser General Public License.
45 : !
46 : !
47 : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
48 : !
49 : ! Copyright of the original code rests with the authors inside the ELPA
50 : ! consortium. The copyright of any additional modifications shall rest
51 : ! with their original authors, but shall adhere to the licensing terms
52 : ! distributed along with the original code in the file "COPYING".
53 : #endif
54 :
55 : #include "../general/sanity.F90"
56 :
57 : #if REALCASE == 1
58 :
59 : subroutine v_add_s_&
60 17665512 : &PRECISION&
61 17665512 : &(obj, v,n,s)
62 : use precision
63 : use elpa_abstract_impl
64 : implicit none
65 : class(elpa_abstract_impl_t), intent(inout) :: obj
66 : integer(kind=ik) :: n
67 : real(kind=REAL_DATATYPE) :: v(n),s
68 :
69 17665512 : v(:) = v(:) + s
70 : end subroutine v_add_s_&
71 17665512 : &PRECISION
72 :
73 : subroutine distribute_global_column_&
74 7860000 : &PRECISION&
75 7860000 : &(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
76 : use precision
77 : use elpa_abstract_impl
78 : implicit none
79 :
80 : class(elpa_abstract_impl_t), intent(inout) :: obj
81 : real(kind=REAL_DATATYPE) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
82 : integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
83 :
84 : integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
85 :
86 7860000 : nbs = noff/(nblk*np_rows)
87 7860000 : nbe = (noff+nlen-1)/(nblk*np_rows)
88 :
89 65136000 : do jb = nbs, nbe
90 :
91 57276000 : g_off = jb*nblk*np_rows + nblk*my_prow
92 57276000 : l_off = jb*nblk
93 :
94 57276000 : js = MAX(noff+1-g_off,1)
95 57276000 : je = MIN(noff+nlen-g_off,nblk)
96 :
97 57276000 : if (je<js) cycle
98 :
99 54669120 : l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
100 :
101 : enddo
102 : end subroutine distribute_global_column_&
103 7860000 : &PRECISION
104 :
105 : subroutine solve_secular_equation_&
106 0 : &PRECISION&
107 0 : &(obj, n, i, d, z, delta, rho, dlam)
108 : !-------------------------------------------------------------------------------
109 : ! This routine solves the secular equation of a symmetric rank 1 modified
110 : ! diagonal matrix:
111 : !
112 : ! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
113 : !
114 : ! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
115 : ! which is more robust (it always yields a solution) but also slower
116 : ! than the algorithm used in DLAED4.
117 : !
118 : ! The same restictions than in DLAED4 hold, namely:
119 : !
120 : ! rho > 0 and d(i+1) > d(i)
121 : !
122 : ! but this routine will not terminate with error if these are not satisfied
123 : ! (it will normally converge to a pole in this case).
124 : !
125 : ! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
126 : ! N=1 and N=2 which is not compatible with DLAED4.
127 : ! Thus this routine shouldn't be used for these cases as a simple replacement
128 : ! of DLAED4.
129 : !
130 : ! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
131 : !
132 : !
133 : ! N (input) INTEGER
134 : ! The length of all arrays.
135 : !
136 : ! I (input) INTEGER
137 : ! The index of the eigenvalue to be computed. 1 <= I <= N.
138 : !
139 : ! D (input) DOUBLE PRECISION array, dimension (N)
140 : ! The original eigenvalues. It is assumed that they are in
141 : ! order, D(I) < D(J) for I < J.
142 : !
143 : ! Z (input) DOUBLE PRECISION array, dimension (N)
144 : ! The components of the updating Vector.
145 : !
146 : ! DELTA (output) DOUBLE PRECISION array, dimension (N)
147 : ! DELTA contains (D(j) - lambda_I) in its j-th component.
148 : ! See remark above about DLAED4 compatibility!
149 : !
150 : ! RHO (input) DOUBLE PRECISION
151 : ! The scalar in the symmetric updating formula.
152 : !
153 : ! DLAM (output) DOUBLE PRECISION
154 : ! The computed lambda_I, the I-th updated eigenvalue.
155 : !-------------------------------------------------------------------------------
156 :
157 : use precision
158 : use elpa_abstract_impl
159 : implicit none
160 : #include "../../src/general/precision_kinds.F90"
161 : class(elpa_abstract_impl_t), intent(inout) :: obj
162 : integer(kind=ik) :: n, i
163 : real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
164 :
165 : integer(kind=ik) :: iter
166 : real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
167 :
168 : ! In order to obtain sufficient numerical accuracy we have to shift the problem
169 : ! either by d(i) or d(i+1), whichever is closer to the solution
170 :
171 : ! Upper and lower bound of the shifted solution interval are a and b
172 :
173 0 : call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
174 0 : if (i==n) then
175 :
176 : ! Special case: Last eigenvalue
177 : ! We shift always by d(n), lower bound is d(n),
178 : ! upper bound is determined by a guess:
179 :
180 0 : dshift = d(n)
181 0 : delta(:) = d(:) - dshift
182 :
183 0 : a = 0.0_rk ! delta(n)
184 0 : b = rho*SUM(z(:)**2) + 1.0_rk ! rho*SUM(z(:)**2) is the lower bound for the guess
185 : else
186 :
187 : ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
188 : ! We check the sign of the function in the midpoint of the interval
189 : ! in order to determine if eigenvalue is more close to d(i) or d(i+1)
190 0 : x = 0.5_rk*(d(i)+d(i+1))
191 0 : y = 1.0_rk + rho*SUM(z(:)**2/(d(:)-x))
192 0 : if (y>0) then
193 : ! solution is next to d(i)
194 0 : dshift = d(i)
195 : else
196 : ! solution is next to d(i+1)
197 0 : dshift = d(i+1)
198 : endif
199 :
200 0 : delta(:) = d(:) - dshift
201 0 : a = delta(i)
202 0 : b = delta(i+1)
203 :
204 : endif
205 :
206 : ! Bisection:
207 :
208 0 : do iter=1,200
209 :
210 : ! Interval subdivision
211 0 : x = 0.5_rk*(a+b)
212 0 : if (x==a .or. x==b) exit ! No further interval subdivisions possible
213 : #ifdef DOUBLE_PRECISION_REAL
214 0 : if (abs(x) < 1.e-200_rk8) exit ! x next to pole
215 : #else
216 0 : if (abs(x) < 1.e-20_rk4) exit ! x next to pole
217 : #endif
218 : ! evaluate value at x
219 :
220 0 : y = 1. + rho*SUM(z(:)**2/(delta(:)-x))
221 :
222 0 : if (y==0) then
223 : ! found exact solution
224 0 : exit
225 0 : elseif (y>0) then
226 0 : b = x
227 : else
228 0 : a = x
229 : endif
230 :
231 : enddo
232 :
233 : ! Solution:
234 :
235 0 : dlam = x + dshift
236 0 : delta(:) = delta(:) - x
237 0 : call obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
238 :
239 : end subroutine solve_secular_equation_&
240 0 : &PRECISION
241 : !-------------------------------------------------------------------------------
242 : #endif
243 :
244 : #if REALCASE == 1
245 : subroutine hh_transform_real_&
246 : #endif
247 : #if COMPLEXCASE == 1
248 : subroutine hh_transform_complex_&
249 : #endif
250 37625880 : &PRECISION &
251 : (obj, alpha, xnorm_sq, xf, tau, wantDebug)
252 : #if REALCASE == 1
253 : ! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
254 : #endif
255 : #if COMPLEXCASE == 1
256 : ! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
257 : #endif
258 : ! and returns the factor xf by which x has to be scaled.
259 : ! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
260 : ! since this would be expensive for the parallel implementation.
261 : use precision
262 : use elpa_abstract_impl
263 : implicit none
264 : class(elpa_abstract_impl_t), intent(inout) :: obj
265 : logical, intent(in) :: wantDebug
266 : #if REALCASE == 1
267 : real(kind=REAL_DATATYPE), intent(inout) :: alpha
268 : #endif
269 : #if COMPLEXCASE == 1
270 : complex(kind=COMPLEX_DATATYPE), intent(inout) :: alpha
271 : #endif
272 : real(kind=REAL_DATATYPE), intent(in) :: xnorm_sq
273 : #if REALCASE == 1
274 : real(kind=REAL_DATATYPE), intent(out) :: xf, tau
275 : #endif
276 : #if COMPLEXCASE == 1
277 : complex(kind=COMPLEX_DATATYPE), intent(out) :: xf, tau
278 : real(kind=REAL_DATATYPE) :: ALPHR, ALPHI
279 : #endif
280 :
281 : real(kind=REAL_DATATYPE) :: BETA
282 :
283 37625880 : if (wantDebug) call obj%timer%start("hh_transform_&
284 : &MATH_DATATYPE&
285 : &" // &
286 0 : &PRECISION_SUFFIX )
287 :
288 : #if COMPLEXCASE == 1
289 21818400 : ALPHR = real( ALPHA, kind=REAL_DATATYPE )
290 21818400 : ALPHI = PRECISION_IMAG( ALPHA )
291 : #endif
292 :
293 : #if REALCASE == 1
294 15807480 : if ( XNORM_SQ==0. ) then
295 : #endif
296 : #if COMPLEXCASE == 1
297 21818400 : if ( XNORM_SQ==0. .AND. ALPHI==0. ) then
298 : #endif
299 :
300 : #if REALCASE == 1
301 2239344 : if ( ALPHA>=0. ) then
302 : #endif
303 : #if COMPLEXCASE == 1
304 2189952 : if ( ALPHR>=0. ) then
305 : #endif
306 4419960 : TAU = 0.
307 : else
308 9336 : TAU = 2.
309 9336 : ALPHA = -ALPHA
310 : endif
311 4429296 : XF = 0.
312 :
313 : else
314 :
315 : #if REALCASE == 1
316 13568136 : BETA = SIGN( SQRT( ALPHA**2 + XNORM_SQ ), ALPHA )
317 : #endif
318 : #if COMPLEXCASE == 1
319 19628448 : BETA = SIGN( SQRT( ALPHR**2 + ALPHI**2 + XNORM_SQ ), ALPHR )
320 : #endif
321 33196584 : ALPHA = ALPHA + BETA
322 33196584 : IF ( BETA<0 ) THEN
323 16111152 : BETA = -BETA
324 16111152 : TAU = -ALPHA / BETA
325 : ELSE
326 : #if REALCASE == 1
327 7002360 : ALPHA = XNORM_SQ / ALPHA
328 : #endif
329 : #if COMPLEXCASE == 1
330 10083072 : ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=KIND_PRECISION))
331 10083072 : ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=KIND_PRECISION )
332 : #endif
333 :
334 : #if REALCASE == 1
335 7002360 : TAU = ALPHA / BETA
336 7002360 : ALPHA = -ALPHA
337 : #endif
338 : #if COMPLEXCASE == 1
339 10083072 : TAU = PRECISION_CMPLX( ALPHR/BETA, -ALPHI/BETA )
340 10083072 : ALPHA = PRECISION_CMPLX( -ALPHR, ALPHI )
341 : #endif
342 : END IF
343 33196584 : XF = 1.0/ALPHA
344 33196584 : ALPHA = BETA
345 : endif
346 :
347 37625880 : if (wantDebug) call obj%timer%stop("hh_transform_&
348 : &MATH_DATATYPE&
349 : &" // &
350 0 : &PRECISION_SUFFIX )
351 :
352 : #if REALCASE == 1
353 : end subroutine hh_transform_real_&
354 : #endif
355 : #if COMPLEXCASE == 1
356 : end subroutine hh_transform_complex_&
357 : #endif
358 37625880 : &PRECISION
|