LCOV - code coverage report
Current view: top level - src/elpa1 - elpa_cholesky_template.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 98 124 79.0 %
Date: 2018-01-10 09:29:53 Functions: 0 0 -

          Line data    Source code
       1             : !    This file is part of ELPA.
       2             : !
       3             : !    The ELPA library was originally created by the ELPA consortium,
       4             : !    consisting of the following organizations:
       5             : !
       6             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       7             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       8             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
       9             : !      Informatik,
      10             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      11             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      12             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      13             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      14             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      15             : !      and
      16             : !    - IBM Deutschland GmbH
      17             : !
      18             : !    This particular source code file contains additions, changes and
      19             : !    enhancements authored by Intel Corporation which is not part of
      20             : !    the ELPA consortium.
      21             : !
      22             : !    More information can be found here:
      23             : !    http://elpa.mpcdf.mpg.de/
      24             : !
      25             : !    ELPA is free software: you can redistribute it and/or modify
      26             : !    it under the terms of the version 3 of the license of the
      27             : !    GNU Lesser General Public License as published by the Free
      28             : !    Software Foundation.
      29             : !
      30             : !    ELPA is distributed in the hope that it will be useful,
      31             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      32             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      33             : !    GNU Lesser General Public License for more details.
      34             : !
      35             : !    You should have received a copy of the GNU Lesser General Public License
      36             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      37             : !
      38             : !    ELPA reflects a substantial effort on the part of the original
      39             : !    ELPA consortium, and we ask you to respect the spirit of the
      40             : !    license that we chose: i.e., please contribute any changes you
      41             : !    may have back to the original ELPA library distribution, and keep
      42             : !    any derivatives of ELPA under the same license that we chose for
      43             : !    the original distribution, the GNU Lesser General Public License.
      44             : 
      45             : #include "../general/sanity.F90"
      46             :      use elpa1_compute
      47             :      use elpa_utilities
      48             :      use elpa_mpi
      49             :      use precision
      50             :      use elpa_abstract_impl
      51             :      implicit none
      52             : #include "../general/precision_kinds.F90"
      53             :       class(elpa_abstract_impl_t), intent(inout) :: obj
      54             :       integer(kind=ik)              :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
      55             : #ifdef USE_ASSUMED_SIZE
      56             :       MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,*)
      57             : #else
      58             :       MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,obj%local_ncols)
      59             : #endif
      60             :       integer(kind=ik)              :: my_prow, my_pcol, np_rows, np_cols, mpierr
      61             :       integer(kind=ik)              :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
      62             :       integer(kind=ik)              :: n, nc, i, info
      63             :       integer(kind=ik)              :: lcs, lce, lrs, lre
      64             :       integer(kind=ik)              :: tile_size, l_rows_tile, l_cols_tile
      65             : 
      66        2592 :       MATH_DATATYPE(kind=rck), allocatable    :: tmp1(:), tmp2(:,:), tmatr(:,:), tmatc(:,:)
      67             :       logical                       :: wantDebug
      68             :       logical                       :: success
      69             :       integer(kind=ik)              :: istat, debug, error
      70             :       character(200)                :: errorMessage
      71             : 
      72             :       call obj%timer%start("elpa_cholesky_&
      73             :       &MATH_DATATYPE&
      74             :       &_&
      75             :       &PRECISION&
      76        2592 :       &")
      77             : 
      78        2592 :       na         = obj%na
      79        2592 :       lda        = obj%local_nrows
      80        2592 :       nblk       = obj%nblk
      81        2592 :       matrixCols = obj%local_ncols
      82             : 
      83        2592 :       call obj%get("mpi_comm_rows",mpi_comm_rows,error )
      84        2592 :       if (error .ne. ELPA_OK) then
      85           0 :         print *,"Problem getting option. Aborting..."
      86           0 :         stop
      87             :       endif
      88        2592 :       call obj%get("mpi_comm_cols",mpi_comm_cols,error)
      89        2592 :       if (error .ne. ELPA_OK) then
      90           0 :         print *,"Problem getting option. Aborting..."
      91           0 :         stop
      92             :       endif
      93             : 
      94        2592 :       call obj%get("debug",debug,error)
      95        2592 :       if (error .ne. ELPA_OK) then
      96           0 :         print *,"Problem getting option. Aborting..."
      97           0 :         stop
      98             :       endif
      99        2592 :       if (debug == 1) then
     100        1728 :         wantDebug = .true.
     101             :       else
     102         864 :         wantDebug = .false.
     103             :       endif
     104             : 
     105        2592 :       call obj%timer%start("mpi_communication")
     106        2592 :       call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
     107        2592 :       call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
     108        2592 :       call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
     109        2592 :       call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
     110        2592 :       call obj%timer%stop("mpi_communication")
     111        2592 :       success = .true.
     112             : 
     113             :       ! Matrix is split into tiles; work is done only for tiles on the diagonal or above
     114             : 
     115        2592 :       tile_size = nblk*least_common_multiple(np_rows,np_cols) ! minimum global tile size
     116        2592 :       tile_size = ((128*max(np_rows,np_cols)-1)/tile_size+1)*tile_size ! make local tiles at least 128 wide
     117             : 
     118        2592 :       l_rows_tile = tile_size/np_rows ! local rows of a tile
     119        2592 :       l_cols_tile = tile_size/np_cols ! local cols of a tile
     120             : 
     121        2592 :       l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
     122        2592 :       l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a
     123             : 
     124        2592 :       allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
     125        2592 :       if (istat .ne. 0) then
     126             :         print *,"elpa_cholesky_&
     127           0 :   &MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
     128           0 :         stop 1
     129             :       endif
     130             : 
     131        2592 :       allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
     132        2592 :       if (istat .ne. 0) then
     133             :         print *,"elpa_cholesky_&
     134             :   &MATH_DATATYPE&
     135           0 :   &: error when allocating tmp2 "//errorMessage
     136           0 :         stop 1
     137             :       endif
     138             : 
     139        2592 :       tmp1 = 0
     140        2592 :       tmp2 = 0
     141             : 
     142        2592 :       allocate(tmatr(l_rows,nblk), stat=istat, errmsg=errorMessage)
     143        2592 :       if (istat .ne. 0) then
     144             :         print *,"elpa_cholesky_&
     145             :   &MATH_DATATYPE&
     146           0 :   &: error when allocating tmatr "//errorMessage
     147           0 :         stop 1
     148             :       endif
     149             : 
     150        2592 :       allocate(tmatc(l_cols,nblk), stat=istat, errmsg=errorMessage)
     151        2592 :       if (istat .ne. 0) then
     152             :         print *,"elpa_cholesky_&
     153             :   &MATH_DATATYPE&
     154           0 :   &: error when allocating tmatc "//errorMessage
     155           0 :         stop 1
     156             :       endif
     157             : 
     158        2592 :       tmatr = 0
     159        2592 :       tmatc = 0
     160             : 
     161       25920 :       do n = 1, na, nblk
     162             :         ! Calculate first local row and column of the still remaining matrix
     163             :         ! on the local processor
     164             : 
     165       25920 :         l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
     166       25920 :         l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)
     167             : 
     168       25920 :         l_rowx = local_index(n+nblk, my_prow, np_rows, nblk, +1)
     169       25920 :         l_colx = local_index(n+nblk, my_pcol, np_cols, nblk, +1)
     170             : 
     171       25920 :         if (n+nblk > na) then
     172             : 
     173             :           ! This is the last step, just do a Cholesky-Factorization
     174             :           ! of the remaining block
     175             : 
     176        2592 :           if (my_prow==prow(n, nblk, np_rows) .and. my_pcol==pcol(n, nblk, np_cols)) then
     177        1728 :             call obj%timer%start("blas")
     178             : 
     179        1728 :             call PRECISION_POTRF('U', na-n+1, a(l_row1,l_col1), lda, info)
     180        1728 :             call obj%timer%stop("blas")
     181             : 
     182        1728 :             if (info/=0) then
     183           0 :               if (wantDebug) write(error_unit,*) "elpa_cholesky_&
     184             :         &MATH_DATATYPE&
     185             : 
     186             : #if REALCASE == 1
     187           0 :         &: Error in dpotrf: ",info
     188             : #endif
     189             : #if COMPLEXCASE == 1
     190           0 :               &: Error in zpotrf: ",info
     191             : #endif
     192           0 :               success = .false.
     193           0 :               return
     194             :             endif
     195             : 
     196             :           endif
     197             : 
     198        2592 :           exit ! Loop
     199             : 
     200             :         endif
     201             : 
     202       23328 :         if (my_prow==prow(n, nblk, np_rows)) then
     203             : 
     204       15552 :           if (my_pcol==pcol(n, nblk, np_cols)) then
     205             : 
     206             :             ! The process owning the upper left remaining block does the
     207             :             ! Cholesky-Factorization of this block
     208       15552 :             call obj%timer%start("blas")
     209             : 
     210       15552 :             call PRECISION_POTRF('U', nblk, a(l_row1,l_col1), lda, info)
     211       15552 :             call obj%timer%stop("blas")
     212             : 
     213       15552 :             if (info/=0) then
     214           0 :               if (wantDebug) write(error_unit,*) "elpa_cholesky_&
     215             :         &MATH_DATATYPE&
     216             : 
     217             : #if REALCASE == 1
     218           0 :         &: Error in dpotrf 2: ",info
     219             : #endif
     220             : #if COMPLEXCASE == 1
     221           0 :         &: Error in zpotrf 2: ",info
     222             : 
     223             : #endif
     224           0 :               success = .false.
     225           0 :               return
     226             :             endif
     227             : 
     228       15552 :             nc = 0
     229      264384 :             do i=1,nblk
     230      248832 :               tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
     231      248832 :               nc = nc+i
     232             :             enddo
     233             :           endif
     234             : #ifdef WITH_MPI
     235        7776 :           call obj%timer%start("mpi_communication")
     236             : 
     237             :           call MPI_Bcast(tmp1, nblk*(nblk+1)/2,      &
     238             : #if REALCASE == 1
     239             :                    MPI_REAL_PRECISION,         &
     240             : #endif
     241             : #if COMPLEXCASE == 1
     242             :                          MPI_COMPLEX_PRECISION,      &
     243             : #endif
     244        7776 :        pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
     245             : 
     246        7776 :           call obj%timer%stop("mpi_communication")
     247             : 
     248             : #endif /* WITH_MPI */
     249       15552 :           nc = 0
     250      264384 :           do i=1,nblk
     251      248832 :             tmp2(1:i,i) = tmp1(nc+1:nc+i)
     252      248832 :             nc = nc+i
     253             :           enddo
     254             : 
     255       15552 :           call obj%timer%start("blas")
     256       15552 :           if (l_cols-l_colx+1>0) &
     257             :               call PRECISION_TRSM('L', 'U', BLAS_TRANS_OR_CONJ, 'N', nblk, l_cols-l_colx+1, ONE, tmp2, &
     258       15552 :                             ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
     259       15552 :           call obj%timer%stop("blas")
     260             :         endif
     261             : 
     262      396576 :         do i=1,nblk
     263             : 
     264             : #if REALCASE == 1
     265      186624 :           if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = a(l_row1+i-1,l_colx:l_cols)
     266             : #endif
     267             : #if COMPLEXCASE == 1
     268      186624 :           if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = conjg(a(l_row1+i-1,l_colx:l_cols))
     269             : #endif
     270             : 
     271             : #ifdef WITH_MPI
     272             : 
     273      248832 :           call obj%timer%start("mpi_communication")
     274      248832 :           if (l_cols-l_colx+1>0) &
     275             :             call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, MPI_MATH_DATATYPE_PRECISION, &
     276      248832 :                            prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
     277             : 
     278      248832 :           call obj%timer%stop("mpi_communication")
     279             : #endif /* WITH_MPI */
     280             :         enddo
     281             :         ! this has to be checked since it was changed substantially when doing type safe
     282             :         call elpa_transpose_vectors_&
     283             :   &MATH_DATATYPE&
     284             :   &_&
     285             :   &PRECISION &
     286             :                  (obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
     287             :                                       tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
     288       23328 :                                       n, na, nblk, nblk)
     289             : 
     290       54432 :         do i=0,(na-1)/tile_size
     291       31104 :           lcs = max(l_colx,i*l_cols_tile+1)
     292       31104 :           lce = min(l_cols,(i+1)*l_cols_tile)
     293       31104 :           lrs = l_rowx
     294       31104 :           lre = min(l_rows,(i+1)*l_rows_tile)
     295       31104 :           if (lce<lcs .or. lre<lrs) cycle
     296       28512 :           call obj%timer%start("blas")
     297             :           call PRECISION_GEMM('N', BLAS_TRANS_OR_CONJ, lre-lrs+1, lce-lcs+1, nblk, -ONE,  &
     298             :                               tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
     299       28512 :                               ONE, a(lrs,lcs), lda)
     300       28512 :           call obj%timer%stop("blas")
     301             : 
     302             :         enddo
     303             : 
     304             :       enddo
     305             : 
     306        2592 :       deallocate(tmp1, tmp2, tmatr, tmatc, stat=istat, errmsg=errorMessage)
     307        2592 :       if (istat .ne. 0) then
     308             :         print *,"elpa_cholesky_&
     309             :   &MATH_DATATYPE&
     310           0 :   &: error when deallocating tmp1 "//errorMessage
     311           0 :         stop 1
     312             :       endif
     313             : 
     314             :       ! Set the lower triangle to 0, it contains garbage (form the above matrix multiplications)
     315             : 
     316      391392 :       do i=1,na
     317      388800 :         if (my_pcol==pcol(i, nblk, np_cols)) then
     318             :           ! column i is on local processor
     319      388800 :           l_col1 = local_index(i  , my_pcol, np_cols, nblk, +1) ! local column number
     320      388800 :           l_row1 = local_index(i+1, my_prow, np_rows, nblk, +1) ! first row below diagonal
     321      388800 :           a(l_row1:l_rows,l_col1) = 0
     322             :         endif
     323             :       enddo
     324             :       call obj%timer%stop("elpa_cholesky_&
     325             :       &MATH_DATATYPE&
     326             :       &_&
     327             :       &PRECISION&
     328        2592 :       &")

Generated by: LCOV version 1.12