LCOV - code coverage report
Current view: top level - src/elpa1 - elpa1_template.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 92 151 60.9 %
Date: 2018-01-10 09:29:53 Functions: 4 4 100.0 %

          Line data    Source code
       1             : #if 0
       2             : !    This file is part of ELPA.
       3             : !
       4             : !    The ELPA library was originally created by the ELPA consortium,
       5             : !    consisting of the following organizations:
       6             : !
       7             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       8             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       9             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
      10             : !      Informatik,
      11             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      12             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      13             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      14             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      15             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      16             : !      and
      17             : !    - IBM Deutschland GmbH
      18             : !
      19             : !    This particular source code file contains additions, changes and
      20             : !    enhancements authored by Intel Corporation which is not part of
      21             : !    the ELPA consortium.
      22             : !
      23             : !    More information can be found here:
      24             : !    http://elpa.mpcdf.mpg.de/
      25             : !
      26             : !    ELPA is free software: you can redistribute it and/or modify
      27             : !    it under the terms of the version 3 of the license of the
      28             : !    GNU Lesser General Public License as published by the Free
      29             : !    Software Foundation.
      30             : !
      31             : !    ELPA is distributed in the hope that it will be useful,
      32             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      33             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      34             : !    GNU Lesser General Public License for more details.
      35             : !
      36             : !    You should have received a copy of the GNU Lesser General Public License
      37             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      38             : !
      39             : !    ELPA reflects a substantial effort on the part of the original
      40             : !    ELPA consortium, and we ask you to respect the spirit of the
      41             : !    license that we chose: i.e., please contribute any changes you
      42             : !    may have back to the original ELPA library distribution, and keep
      43             : !    any derivatives of ELPA under the same license that we chose for
      44             : !    the original distribution, the GNU Lesser General Public License.
      45             : !
      46             : !
      47             : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
      48             : !
      49             : ! Copyright of the original code rests with the authors inside the ELPA
      50             : ! consortium. The copyright of any additional modifications shall rest
      51             : ! with their original authors, but shall adhere to the licensing terms
      52             : ! distributed along with the original code in the file "COPYING".
      53             : #endif
      54             : 
      55             : #include "../general/sanity.F90"
      56             : 
      57        9216 : function elpa_solve_evp_&
      58             :          &MATH_DATATYPE&
      59             :    &_1stage_&
      60             :    &PRECISION&
      61        8064 :    &_impl (obj, a, ev, q) result(success)
      62             :    use precision
      63             :    use cuda_functions
      64             :    use mod_check_for_gpu
      65             :    use iso_c_binding
      66             :    use elpa_abstract_impl
      67             :    use elpa_mpi
      68             :    use elpa1_compute
      69             :    implicit none
      70             : #include "../general/precision_kinds.F90"
      71             :    class(elpa_abstract_impl_t), intent(inout) :: obj
      72             :    real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
      73             : 
      74             : #ifdef USE_ASSUMED_SIZE
      75             :    MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,*)
      76             :    MATH_DATATYPE(kind=rck), optional,target,intent(out)  :: q(obj%local_nrows,*)
      77             : #else
      78             :    MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
      79             :    MATH_DATATYPE(kind=rck), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
      80             : #endif
      81             : 
      82             : #if REALCASE == 1
      83        4752 :    real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
      84        4752 :    real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
      85             :    real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
      86             : #endif /* REALCASE */
      87             : 
      88             : #if COMPLEXCASE == 1
      89        4464 :    real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
      90        4464 :    complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
      91        4464 :    complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
      92             :    complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
      93             :    integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
      94             : #endif /* COMPLEXCASE */
      95             : 
      96             :    logical                                         :: useGPU
      97             :    logical                                         :: success
      98             : 
      99             :    logical                                         :: do_useGPU
     100             :    integer(kind=ik)                                :: numberOfGPUDevices
     101             : 
     102             :    integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
     103        9216 :    real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
     104             :    logical                                         :: wantDebug
     105             :    integer(kind=c_int)                             :: istat, debug, gpu
     106             :    character(200)                                  :: errorMessage
     107             :    integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
     108             :                                                       mpi_comm_rows, mpi_comm_cols,        &
     109             :                                                       mpi_comm_all, check_pd, i, error
     110             : 
     111             :    logical                                         :: do_bandred, do_solve, do_trans_ev
     112             : 
     113             :    call obj%timer%start("elpa_solve_evp_&
     114             :    &MATH_DATATYPE&
     115             :    &_1stage_&
     116             :    &PRECISION&
     117        9216 :    &")
     118             : 
     119        9216 :    success = .true.
     120             : 
     121        9216 :    if (present(q)) then
     122        8064 :      obj%eigenvalues_only = .false.
     123             :    else
     124        1152 :      obj%eigenvalues_only = .true.
     125             :    endif
     126             : 
     127        9216 :    na         = obj%na
     128        9216 :    nev        = obj%nev
     129        9216 :    lda        = obj%local_nrows
     130        9216 :    ldq        = obj%local_nrows
     131        9216 :    nblk       = obj%nblk
     132        9216 :    matrixCols = obj%local_ncols
     133             : 
     134             :    ! special case na = 1
     135        9216 :    if (na .eq. 1) then
     136             : #if REALCASE == 1
     137           0 :      ev(1) = a(1,1)
     138             : #endif
     139             : #if COMPLEXCASE == 1
     140           0 :      ev(1) = real(a(1,1))
     141             : #endif
     142           0 :      if (.not.(obj%eigenvalues_only)) then
     143           0 :        q(1,1) = ONE
     144             :      endif
     145             :      call obj%timer%stop("elpa_solve_evp_&
     146             :      &MATH_DATATYPE&
     147             :      &_1stage_&
     148             :      &PRECISION&
     149           0 :      &")
     150           0 :      return
     151             :    endif
     152             : 
     153        9216 :    if (nev == 0) then
     154           0 :      nev = 1
     155           0 :      obj%eigenvalues_only = .true.
     156             :    endif
     157             : 
     158             : 
     159        9216 :    call obj%get("mpi_comm_rows",mpi_comm_rows,error)
     160        9216 :    if (error .ne. ELPA_OK) then
     161           0 :      print *,"Problem setting option. Aborting..."
     162           0 :      stop
     163             :    endif
     164        9216 :    call obj%get("mpi_comm_cols",mpi_comm_cols,error)
     165        9216 :    if (error .ne. ELPA_OK) then
     166           0 :      print *,"Problem setting option. Aborting..."
     167           0 :      stop
     168             :    endif
     169        9216 :    call obj%get("mpi_comm_parent", mpi_comm_all,error)
     170        9216 :    if (error .ne. ELPA_OK) then
     171           0 :      print *,"Problem setting option. Aborting..."
     172           0 :      stop
     173             :    endif
     174             : 
     175        9216 :    call obj%get("gpu",gpu,error)
     176        9216 :    if (error .ne. ELPA_OK) then
     177           0 :      print *,"Problem setting option. Aborting..."
     178           0 :      stop
     179             :    endif
     180        9216 :    if (gpu .eq. 1) then
     181           0 :      useGPU =.true.
     182             :    else
     183        9216 :      useGPU = .false.
     184             :    endif
     185             : 
     186        9216 :    call obj%timer%start("mpi_communication")
     187             : 
     188        9216 :    call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
     189        9216 :    call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
     190             : 
     191        9216 :    call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
     192        9216 :    call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
     193             : 
     194             : #if COMPLEXCASE == 1
     195        4464 :    call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
     196        4464 :    call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
     197             : #endif
     198             : 
     199        9216 :    call obj%timer%stop("mpi_communication")
     200             : 
     201        9216 :    call obj%get("debug", debug,error)
     202        9216 :    if (error .ne. ELPA_OK) then
     203           0 :      print *,"Problem setting option. Aborting..."
     204           0 :      stop
     205             :    endif
     206        9216 :    wantDebug = debug == 1
     207        9216 :    do_useGPU = .false.
     208             : 
     209             : 
     210        9216 :    if (useGPU) then
     211           0 :      if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
     212             : 
     213           0 :        do_useGPU = .true.
     214             :        ! set the neccessary parameters
     215           0 :        cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
     216           0 :        cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
     217           0 :        cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
     218           0 :        cudaHostRegisterPortable = cuda_hostRegisterPortable()
     219           0 :        cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     220             :      else
     221           0 :        print *,"GPUs are requested but not detected! Aborting..."
     222           0 :        success = .false.
     223           0 :        return
     224             :      endif
     225             :    else
     226             :      ! check whether set by environment variable
     227        9216 :      call obj%get("gpu", gpu,error)
     228        9216 :      if (error .ne. ELPA_OK) then
     229           0 :        print *,"Problem setting option. Aborting..."
     230           0 :        stop
     231             :      endif
     232        9216 :      do_useGPU = gpu == 1
     233             : 
     234        9216 :      if (do_useGPU) then
     235           0 :        if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
     236             : 
     237             :          ! set the neccessary parameters
     238           0 :          cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
     239           0 :          cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
     240           0 :          cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
     241           0 :          cudaHostRegisterPortable = cuda_hostRegisterPortable()
     242           0 :          cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     243             :        else
     244           0 :          print *,"GPUs are requested but not detected! Aborting..."
     245           0 :          success = .false.
     246           0 :          return
     247             :        endif
     248             :      endif
     249             :    endif
     250             : 
     251             :    ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
     252        9216 :    if (.not.(obj%eigenvalues_only)) then
     253        8064 :      q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
     254             :    else
     255        1152 :      allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     256        1152 :      q_actual => q_dummy
     257             :    endif
     258             : 
     259             : #if COMPLEXCASE == 1
     260        4464 :    l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
     261        4464 :    l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q
     262             : 
     263        4464 :    l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev
     264             : 
     265        4464 :    allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
     266        4464 :    if (istat .ne. 0) then
     267             :      print *,"solve_evp_&
     268             :      &MATH_DATATYPE&
     269             :      &_1stage_&
     270             :      &PRECISION&
     271           0 :      &" // ": error when allocating q_real "//errorMessage
     272           0 :      stop 1
     273             :    endif
     274             : #endif
     275        9216 :    allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
     276        9216 :    if (istat .ne. 0) then
     277             :      print *,"solve_evp_&
     278             :      &MATH_DATATYPE&
     279             :      &_1stage_&
     280             :      &PRECISION&
     281           0 :      &" // ": error when allocating e, tau "//errorMessage
     282           0 :      stop 1
     283             :    endif
     284             : 
     285             : 
     286             :    ! start the computations
     287             :    ! as default do all three steps (this might change at some point)
     288        9216 :    do_bandred  = .true.
     289        9216 :    do_solve    = .true.
     290        9216 :    do_trans_ev = .true.
     291             : 
     292        9216 :    if (obj%eigenvalues_only) then
     293        1152 :      do_trans_ev = .true.
     294             :    endif
     295             : 
     296        9216 :    if (do_bandred) then
     297        9216 :      call obj%timer%start("forward")
     298             :      call tridiag_&
     299             :      &MATH_DATATYPE&
     300             :      &_&
     301             :      &PRECISION&
     302        9216 :      & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU, wantDebug)
     303        9216 :      call obj%timer%stop("forward")
     304             :     endif  !do_bandred
     305             : 
     306        9216 :     if (do_solve) then
     307        9216 :      call obj%timer%start("solve")
     308             :      call solve_tridi_&
     309             :      &PRECISION&
     310             :      & (obj, na, nev, ev, e,  &
     311             : #if REALCASE == 1
     312             :         q_actual, ldq,          &
     313             : #endif
     314             : #if COMPLEXCASE == 1
     315             :         q_real, l_rows,  &
     316             : #endif
     317        9216 :         nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU, wantDebug, success)
     318        9216 :      call obj%timer%stop("solve")
     319        9216 :      if (.not.(success)) return
     320             :    endif !do_solve
     321             : 
     322        9216 :    if (obj%eigenvalues_only) then
     323        1152 :      do_trans_ev = .false.
     324             :    else
     325        8064 :      call obj%get("check_pd",check_pd,error)
     326        8064 :      if (error .ne. ELPA_OK) then
     327           0 :        print *,"Problem setting option. Aborting..."
     328           0 :        stop
     329             :      endif
     330        8064 :      if (check_pd .eq. 1) then
     331           0 :        check_pd = 0
     332           0 :        do i = 1, na
     333           0 :          if (ev(i) .gt. THRESHOLD) then
     334           0 :            check_pd = check_pd + 1
     335             :          endif
     336             :        enddo
     337           0 :        if (check_pd .lt. na) then
     338             :          ! not positiv definite => eigenvectors needed
     339           0 :          do_trans_ev = .true.
     340             :        else
     341           0 :          do_trans_ev = .false.
     342             :        endif
     343             :      endif ! check_pd
     344             :    endif ! eigenvalues_only
     345             : 
     346        9216 :    if (do_trans_ev) then
     347             :     ! q must be given thats why from here on we can use q and not q_actual
     348             : #if COMPLEXCASE == 1
     349        4032 :      q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
     350             : #endif
     351             : 
     352        8064 :      call obj%timer%start("back")
     353             :      call trans_ev_&
     354             :      &MATH_DATATYPE&
     355             :      &_&
     356             :      &PRECISION&
     357        8064 :      & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
     358        8064 :      call obj%timer%stop("back")
     359             :    endif ! do_trans_ev
     360             : 
     361             : #if COMPLEXCASE == 1
     362        4464 :     deallocate(q_real, stat=istat, errmsg=errorMessage)
     363        4464 :     if (istat .ne. 0) then
     364             :       print *,"solve_evp_&
     365             :       &MATH_DATATYPE&
     366             :       &_1stage_&
     367             :       &PRECISION&
     368           0 :       &" // ": error when deallocating q_real "//errorMessage
     369           0 :       stop 1
     370             :     endif
     371             : #endif
     372             : 
     373        9216 :    deallocate(e, tau, stat=istat, errmsg=errorMessage)
     374        9216 :    if (istat .ne. 0) then
     375             :      print *,"solve_evp_&
     376             :      &MATH_DATATYPE&
     377             :      &_1stage_&
     378             :      &PRECISION&
     379           0 :      &" // ": error when deallocating e, tau "//errorMessage
     380           0 :      stop 1
     381             :    endif
     382             : 
     383        9216 :    if (obj%eigenvalues_only) then
     384        1152 :      deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     385        1152 :      if (istat .ne. 0) then
     386             :        print *,"solve_evp_&
     387             :        &MATH_DATATYPE&
     388             :        &_1stage_&
     389             :        &PRECISION&
     390           0 :        &" // ": error when deallocating q_dummy "//errorMessage
     391           0 :        stop 1
     392             :      endif
     393             :    endif
     394             : 
     395             :    call obj%timer%stop("elpa_solve_evp_&
     396             :    &MATH_DATATYPE&
     397             :    &_1stage_&
     398             :    &PRECISION&
     399        9216 :    &")
     400       18432 : end function
     401             : 
     402             : 

Generated by: LCOV version 1.12