LCOV - code coverage report
Current view: top level - src/elpa1 - elpa1_tools_template.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 45 78 57.7 %
Date: 2018-01-10 09:29:53 Functions: 8 10 80.0 %

          Line data    Source code
       1             : #if 0
       2             : !    This file is part of ELPA.
       3             : !
       4             : !    The ELPA library was originally created by the ELPA consortium,
       5             : !    consisting of the following organizations:
       6             : !
       7             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       8             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       9             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
      10             : !      Informatik,
      11             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      12             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      13             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      14             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      15             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      16             : !      and
      17             : !    - IBM Deutschland GmbH
      18             : !
      19             : !    This particular source code file contains additions, changes and
      20             : !    enhancements authored by Intel Corporation which is not part of
      21             : !    the ELPA consortium.
      22             : !
      23             : !    More information can be found here:
      24             : !    http://elpa.mpcdf.mpg.de/
      25             : !
      26             : !    ELPA is free software: you can redistribute it and/or modify
      27             : !    it under the terms of the version 3 of the license of the
      28             : !    GNU Lesser General Public License as published by the Free
      29             : !    Software Foundation.
      30             : !
      31             : !    ELPA is distributed in the hope that it will be useful,
      32             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      33             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      34             : !    GNU Lesser General Public License for more details.
      35             : !
      36             : !    You should have received a copy of the GNU Lesser General Public License
      37             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      38             : !
      39             : !    ELPA reflects a substantial effort on the part of the original
      40             : !    ELPA consortium, and we ask you to respect the spirit of the
      41             : !    license that we chose: i.e., please contribute any changes you
      42             : !    may have back to the original ELPA library distribution, and keep
      43             : !    any derivatives of ELPA under the same license that we chose for
      44             : !    the original distribution, the GNU Lesser General Public License.
      45             : !
      46             : !
      47             : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
      48             : !
      49             : ! Copyright of the original code rests with the authors inside the ELPA
      50             : ! consortium. The copyright of any additional modifications shall rest
      51             : ! with their original authors, but shall adhere to the licensing terms
      52             : ! distributed along with the original code in the file "COPYING".
      53             : #endif
      54             : 
      55             : #include "../general/sanity.F90"
      56             : 
      57             : #if REALCASE == 1
      58             : 
      59             :     subroutine v_add_s_&
      60    17665512 :     &PRECISION&
      61    17665512 :     &(obj, v,n,s)
      62             :       use precision
      63             :       use elpa_abstract_impl
      64             :       implicit none
      65             :       class(elpa_abstract_impl_t), intent(inout) :: obj
      66             :       integer(kind=ik)            :: n
      67             :       real(kind=REAL_DATATYPE)    :: v(n),s
      68             : 
      69    17665512 :       v(:) = v(:) + s
      70             :     end subroutine v_add_s_&
      71    17665512 :     &PRECISION
      72             : 
      73             :     subroutine distribute_global_column_&
      74     7860000 :     &PRECISION&
      75     7860000 :     &(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
      76             :       use precision
      77             :       use elpa_abstract_impl
      78             :       implicit none
      79             : 
      80             :       class(elpa_abstract_impl_t), intent(inout) :: obj
      81             :       real(kind=REAL_DATATYPE)     :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
      82             :       integer(kind=ik)             :: noff, nlen, my_prow, np_rows, nblk
      83             : 
      84             :       integer(kind=ik)  :: nbs, nbe, jb, g_off, l_off, js, je
      85             : 
      86     7860000 :       nbs = noff/(nblk*np_rows)
      87     7860000 :       nbe = (noff+nlen-1)/(nblk*np_rows)
      88             : 
      89    65136000 :       do jb = nbs, nbe
      90             : 
      91    57276000 :         g_off = jb*nblk*np_rows + nblk*my_prow
      92    57276000 :         l_off = jb*nblk
      93             : 
      94    57276000 :         js = MAX(noff+1-g_off,1)
      95    57276000 :         je = MIN(noff+nlen-g_off,nblk)
      96             : 
      97    57276000 :         if (je<js) cycle
      98             : 
      99    54669120 :         l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
     100             : 
     101             :       enddo
     102             :     end subroutine distribute_global_column_&
     103     7860000 :     &PRECISION
     104             : 
     105             :     subroutine solve_secular_equation_&
     106           0 :     &PRECISION&
     107           0 :     &(obj, n, i, d, z, delta, rho, dlam)
     108             :     !-------------------------------------------------------------------------------
     109             :     ! This routine solves the secular equation of a symmetric rank 1 modified
     110             :     ! diagonal matrix:
     111             :     !
     112             :     !    1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
     113             :     !
     114             :     ! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
     115             :     ! which is more robust (it always yields a solution) but also slower
     116             :     ! than the algorithm used in DLAED4.
     117             :     !
     118             :     ! The same restictions than in DLAED4 hold, namely:
     119             :     !
     120             :     !   rho > 0   and   d(i+1) > d(i)
     121             :     !
     122             :     ! but this routine will not terminate with error if these are not satisfied
     123             :     ! (it will normally converge to a pole in this case).
     124             :     !
     125             :     ! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
     126             :     ! N=1 and N=2 which is not compatible with DLAED4.
     127             :     ! Thus this routine shouldn't be used for these cases as a simple replacement
     128             :     ! of DLAED4.
     129             :     !
     130             :     ! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
     131             :     !
     132             :     !
     133             :     !  N      (input) INTEGER
     134             :     !         The length of all arrays.
     135             :     !
     136             :     !  I      (input) INTEGER
     137             :     !         The index of the eigenvalue to be computed.  1 <= I <= N.
     138             :     !
     139             :     !  D      (input) DOUBLE PRECISION array, dimension (N)
     140             :     !         The original eigenvalues.  It is assumed that they are in
     141             :     !         order, D(I) < D(J)  for I < J.
     142             :     !
     143             :     !  Z      (input) DOUBLE PRECISION array, dimension (N)
     144             :     !         The components of the updating Vector.
     145             :     !
     146             :     !  DELTA  (output) DOUBLE PRECISION array, dimension (N)
     147             :     !         DELTA contains (D(j) - lambda_I) in its  j-th component.
     148             :     !         See remark above about DLAED4 compatibility!
     149             :     !
     150             :     !  RHO    (input) DOUBLE PRECISION
     151             :     !         The scalar in the symmetric updating formula.
     152             :     !
     153             :     !  DLAM   (output) DOUBLE PRECISION
     154             :     !         The computed lambda_I, the I-th updated eigenvalue.
     155             :     !-------------------------------------------------------------------------------
     156             : 
     157             :       use precision
     158             :       use elpa_abstract_impl
     159             :       implicit none
     160             : #include "../../src/general/precision_kinds.F90"
     161             :       class(elpa_abstract_impl_t), intent(inout) :: obj
     162             :       integer(kind=ik)           :: n, i
     163             :       real(kind=REAL_DATATYPE)   :: d(n), z(n), delta(n), rho, dlam
     164             : 
     165             :       integer(kind=ik)           :: iter
     166             :       real(kind=REAL_DATATYPE)   :: a, b, x, y, dshift
     167             : 
     168             :       ! In order to obtain sufficient numerical accuracy we have to shift the problem
     169             :       ! either by d(i) or d(i+1), whichever is closer to the solution
     170             : 
     171             :       ! Upper and lower bound of the shifted solution interval are a and b
     172             : 
     173           0 :       call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
     174           0 :       if (i==n) then
     175             : 
     176             :        ! Special case: Last eigenvalue
     177             :        ! We shift always by d(n), lower bound is d(n),
     178             :        ! upper bound is determined by a guess:
     179             : 
     180           0 :        dshift = d(n)
     181           0 :        delta(:) = d(:) - dshift
     182             : 
     183           0 :        a = 0.0_rk ! delta(n)
     184           0 :        b = rho*SUM(z(:)**2) + 1.0_rk ! rho*SUM(z(:)**2) is the lower bound for the guess
     185             :       else
     186             : 
     187             :         ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
     188             :         ! We check the sign of the function in the midpoint of the interval
     189             :         ! in order to determine if eigenvalue is more close to d(i) or d(i+1)
     190           0 :         x = 0.5_rk*(d(i)+d(i+1))
     191           0 :         y = 1.0_rk + rho*SUM(z(:)**2/(d(:)-x))
     192           0 :         if (y>0) then
     193             :           ! solution is next to d(i)
     194           0 :           dshift = d(i)
     195             :         else
     196             :           ! solution is next to d(i+1)
     197           0 :           dshift = d(i+1)
     198             :         endif
     199             : 
     200           0 :         delta(:) = d(:) - dshift
     201           0 :         a = delta(i)
     202           0 :         b = delta(i+1)
     203             : 
     204             :       endif
     205             : 
     206             :       ! Bisection:
     207             : 
     208           0 :       do iter=1,200
     209             : 
     210             :         ! Interval subdivision
     211           0 :         x = 0.5_rk*(a+b)
     212           0 :         if (x==a .or. x==b) exit   ! No further interval subdivisions possible
     213             : #ifdef DOUBLE_PRECISION_REAL
     214           0 :         if (abs(x) < 1.e-200_rk8) exit ! x next to pole
     215             : #else
     216           0 :         if (abs(x) < 1.e-20_rk4) exit ! x next to pole
     217             : #endif
     218             :         ! evaluate value at x
     219             : 
     220           0 :         y = 1. + rho*SUM(z(:)**2/(delta(:)-x))
     221             : 
     222           0 :         if (y==0) then
     223             :           ! found exact solution
     224           0 :           exit
     225           0 :         elseif (y>0) then
     226           0 :           b = x
     227             :         else
     228           0 :           a = x
     229             :         endif
     230             : 
     231             :       enddo
     232             : 
     233             :       ! Solution:
     234             : 
     235           0 :       dlam = x + dshift
     236           0 :       delta(:) = delta(:) - x
     237           0 :       call  obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
     238             : 
     239             :     end subroutine solve_secular_equation_&
     240           0 :     &PRECISION
     241             :     !-------------------------------------------------------------------------------
     242             : #endif
     243             : 
     244             : #if REALCASE == 1
     245             :     subroutine hh_transform_real_&
     246             : #endif
     247             : #if COMPLEXCASE == 1
     248             :     subroutine hh_transform_complex_&
     249             : #endif
     250    37625880 :     &PRECISION &
     251             :                    (obj, alpha, xnorm_sq, xf, tau, wantDebug)
     252             : #if REALCASE  == 1
     253             :       ! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
     254             : #endif
     255             : #if COMPLEXCASE == 1
     256             :       ! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
     257             : #endif
     258             :       ! and returns the factor xf by which x has to be scaled.
     259             :       ! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
     260             :       ! since this would be expensive for the parallel implementation.
     261             :       use precision
     262             :       use elpa_abstract_impl
     263             :       implicit none
     264             :       class(elpa_abstract_impl_t), intent(inout)    :: obj
     265             :       logical, intent(in)                           :: wantDebug
     266             : #if REALCASE == 1
     267             :       real(kind=REAL_DATATYPE), intent(inout)       :: alpha
     268             : #endif
     269             : #if COMPLEXCASE == 1
     270             :       complex(kind=COMPLEX_DATATYPE), intent(inout) :: alpha
     271             : #endif
     272             :       real(kind=REAL_DATATYPE), intent(in)          :: xnorm_sq
     273             : #if REALCASE == 1
     274             :       real(kind=REAL_DATATYPE), intent(out)         :: xf, tau
     275             : #endif
     276             : #if COMPLEXCASE == 1
     277             :       complex(kind=COMPLEX_DATATYPE), intent(out)   :: xf, tau
     278             :       real(kind=REAL_DATATYPE)                      :: ALPHR, ALPHI
     279             : #endif
     280             : 
     281             :       real(kind=REAL_DATATYPE)                      :: BETA
     282             : 
     283    37625880 :      if (wantDebug) call obj%timer%start("hh_transform_&
     284             :                       &MATH_DATATYPE&
     285             :                       &" // &
     286           0 :                       &PRECISION_SUFFIX )
     287             : 
     288             : #if COMPLEXCASE == 1
     289    21818400 :       ALPHR = real( ALPHA, kind=REAL_DATATYPE )
     290    21818400 :       ALPHI = PRECISION_IMAG( ALPHA )
     291             : #endif
     292             : 
     293             : #if REALCASE == 1
     294    15807480 :       if ( XNORM_SQ==0. ) then
     295             : #endif
     296             : #if COMPLEXCASE == 1
     297    21818400 :       if ( XNORM_SQ==0. .AND. ALPHI==0. ) then
     298             : #endif
     299             : 
     300             : #if REALCASE == 1
     301     2239344 :         if ( ALPHA>=0. ) then
     302             : #endif
     303             : #if COMPLEXCASE == 1
     304     2189952 :         if ( ALPHR>=0. ) then
     305             : #endif
     306     4419960 :           TAU = 0.
     307             :         else
     308        9336 :           TAU = 2.
     309        9336 :           ALPHA = -ALPHA
     310             :         endif
     311     4429296 :         XF = 0.
     312             : 
     313             :       else
     314             : 
     315             : #if REALCASE == 1
     316    13568136 :         BETA = SIGN( SQRT( ALPHA**2 + XNORM_SQ ), ALPHA )
     317             : #endif
     318             : #if COMPLEXCASE == 1
     319    19628448 :         BETA = SIGN( SQRT( ALPHR**2 + ALPHI**2 + XNORM_SQ ), ALPHR )
     320             : #endif
     321    33196584 :         ALPHA = ALPHA + BETA
     322    33196584 :         IF ( BETA<0 ) THEN
     323    16111152 :           BETA = -BETA
     324    16111152 :           TAU  = -ALPHA / BETA
     325             :         ELSE
     326             : #if REALCASE == 1
     327     7002360 :           ALPHA = XNORM_SQ / ALPHA
     328             : #endif
     329             : #if COMPLEXCASE == 1
     330    10083072 :           ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=KIND_PRECISION))
     331    10083072 :           ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=KIND_PRECISION )
     332             : #endif
     333             : 
     334             : #if REALCASE == 1
     335     7002360 :           TAU = ALPHA / BETA
     336     7002360 :           ALPHA = -ALPHA
     337             : #endif
     338             : #if COMPLEXCASE == 1
     339    10083072 :           TAU = PRECISION_CMPLX( ALPHR/BETA, -ALPHI/BETA )
     340    10083072 :           ALPHA = PRECISION_CMPLX( -ALPHR, ALPHI )
     341             : #endif
     342             :        END IF
     343    33196584 :        XF = 1.0/ALPHA
     344    33196584 :        ALPHA = BETA
     345             :      endif
     346             : 
     347    37625880 :       if (wantDebug) call obj%timer%stop("hh_transform_&
     348             :       &MATH_DATATYPE&
     349             :       &" // &
     350           0 :       &PRECISION_SUFFIX )
     351             : 
     352             : #if REALCASE == 1
     353             :     end subroutine hh_transform_real_&
     354             : #endif
     355             : #if COMPLEXCASE == 1
     356             :     end subroutine hh_transform_complex_&
     357             : #endif
     358    37625880 :     &PRECISION

Generated by: LCOV version 1.12