LCOV - code coverage report
Current view: top level - src/elpa1 - elpa_invert_trm.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 85 108 78.7 %
Date: 2018-01-10 09:29:53 Functions: 0 0 -

          Line data    Source code
       1             : !    This file is part of ELPA.
       2             : !
       3             : !    The ELPA library was originally created by the ELPA consortium,
       4             : !    consisting of the following organizations:
       5             : !
       6             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       7             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       8             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
       9             : !      Informatik,
      10             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      11             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      12             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      13             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      14             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      15             : !      and
      16             : !    - IBM Deutschland GmbH
      17             : !
      18             : !    This particular source code file contains additions, changes and
      19             : !    enhancements authored by Intel Corporation which is not part of
      20             : !    the ELPA consortium.
      21             : !
      22             : !    More information can be found here:
      23             : !    http://elpa.mpcdf.mpg.de/
      24             : !
      25             : !    ELPA is free software: you can redistribute it and/or modify
      26             : !    it under the terms of the version 3 of the license of the
      27             : !    GNU Lesser General Public License as published by the Free
      28             : !    Software Foundation.
      29             : !
      30             : !    ELPA is distributed in the hope that it will be useful,
      31             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      32             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      33             : !    GNU Lesser General Public License for more details.
      34             : !
      35             : !    You should have received a copy of the GNU Lesser General Public License
      36             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      37             : !
      38             : !    ELPA reflects a substantial effort on the part of the original
      39             : !    ELPA consortium, and we ask you to respect the spirit of the
      40             : !    license that we chose: i.e., please contribute any changes you
      41             : !    may have back to the original ELPA library distribution, and keep
      42             : !    any derivatives of ELPA under the same license that we chose for
      43             : !    the original distribution, the GNU Lesser General Public License.
      44             : !
      45             : !
      46             : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
      47             : !
      48             : ! Copyright of the original code rests with the authors inside the ELPA
      49             : ! consortium. The copyright of any additional modifications shall rest
      50             : ! with their original authors, but shall adhere to the licensing terms
      51             : ! distributed along with the original code in the file "COPYING".
      52             : 
      53             : #include "../general/sanity.F90"
      54             : 
      55             :        use precision
      56             :        use elpa1_compute
      57             :        use elpa_utilities
      58             :        use elpa_mpi
      59             :        use elpa_abstract_impl
      60             :        implicit none
      61             : #include "../general/precision_kinds.F90"
      62             :        class(elpa_abstract_impl_t), intent(inout) :: obj
      63             :        integer(kind=ik)             :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
      64             : #ifdef USE_ASSUMED_SIZE
      65             :        MATH_DATATYPE(kind=rck)     :: a(obj%local_nrows,*)
      66             : #else
      67             :        MATH_DATATYPE(kind=rck)     :: a(obj%local_nrows,obj%local_ncols)
      68             : #endif
      69             : 
      70             :        integer(kind=ik)             :: my_prow, my_pcol, np_rows, np_cols, mpierr
      71             :        integer(kind=ik)             :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
      72             :        integer(kind=ik)             :: n, nc, i, info, ns, nb
      73         864 :        MATH_DATATYPE(kind=rck), allocatable   :: tmp1(:), tmp2(:,:), tmat1(:,:), tmat2(:,:)
      74             :        logical                      :: wantDebug
      75             :        logical                      :: success
      76             :        integer(kind=ik)             :: istat, debug, error
      77             :        character(200)               :: errorMessage
      78             : 
      79             :       call obj%timer%start("elpa_invert_trm_&
      80             :       &MATH_DATATYPE&
      81             :       &_&
      82             :       &PRECISION&
      83         864 :       &")
      84             : 
      85         864 :        na         = obj%na
      86         864 :        lda        = obj%local_nrows
      87         864 :        nblk       = obj%nblk
      88         864 :        matrixCols = obj%local_ncols
      89             : 
      90         864 :        call obj%get("mpi_comm_rows",mpi_comm_rows,error)
      91         864 :        if (error .ne. ELPA_OK) then
      92           0 :          print *,"Error getting option. Aborting..."
      93           0 :          stop
      94             :        endif
      95         864 :        call obj%get("mpi_comm_cols",mpi_comm_cols,error)
      96         864 :        if (error .ne. ELPA_OK) then
      97           0 :          print *,"Error getting option. Aborting..."
      98           0 :          stop
      99             :        endif
     100             : 
     101         864 :        call obj%get("debug", debug,error)
     102         864 :        if (error .ne. ELPA_OK) then
     103           0 :          print *,"Error getting option. Aborting..."
     104           0 :          stop
     105             :        endif
     106         864 :        if (debug == 1) then
     107         864 :          wantDebug = .true.
     108             :        else
     109           0 :          wantDebug = .true.
     110             :        endif
     111         864 :        call obj%timer%start("mpi_communication")
     112         864 :        call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
     113         864 :        call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
     114         864 :        call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
     115         864 :        call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
     116         864 :        call obj%timer%stop("mpi_communication")
     117         864 :        success = .true.
     118             : 
     119         864 :        l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
     120         864 :        l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a
     121             : 
     122         864 :        allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
     123         864 :        if (istat .ne. 0) then
     124             :          print *,"elpa_invert_trm_&
     125             :    &MATH_DATATYPE&
     126           0 :    &: error when allocating tmp1 "//errorMessage
     127           0 :          stop 1
     128             :        endif
     129             : 
     130         864 :        allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
     131         864 :        if (istat .ne. 0) then
     132             :          print *,"elpa_invert_trm_&
     133             :    &MATH_DATATYPE&
     134           0 :    &: error when allocating tmp2 "//errorMessage
     135           0 :          stop 1
     136             :        endif
     137             : 
     138         864 :        tmp1 = 0
     139         864 :        tmp2 = 0
     140             : 
     141         864 :        allocate(tmat1(l_rows,nblk), stat=istat, errmsg=errorMessage)
     142         864 :        if (istat .ne. 0) then
     143             :          print *,"elpa_invert_trm_&
     144             :    &MATH_DATATYPE&
     145           0 :    &: error when allocating tmat1 "//errorMessage
     146           0 :          stop 1
     147             :        endif
     148             : 
     149         864 :        allocate(tmat2(nblk,l_cols), stat=istat, errmsg=errorMessage)
     150         864 :        if (istat .ne. 0) then
     151             :          print *,"elpa_invert_trm_&
     152             :    &MATH_DATATYPE&
     153           0 :    &: error when allocating tmat2 "//errorMessage
     154           0 :          stop 1
     155             :        endif
     156             : 
     157         864 :        tmat1 = 0
     158         864 :        tmat2 = 0
     159             : 
     160             : 
     161         864 :        ns = ((na-1)/nblk)*nblk + 1
     162             : 
     163        9504 :        do n = ns,1,-nblk
     164             : 
     165        8640 :          l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
     166        8640 :          l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)
     167             : 
     168        8640 :          nb = nblk
     169        8640 :          if (na-n+1 < nblk) nb = na-n+1
     170             : 
     171        8640 :          l_rowx = local_index(n+nb, my_prow, np_rows, nblk, +1)
     172        8640 :          l_colx = local_index(n+nb, my_pcol, np_cols, nblk, +1)
     173             : 
     174        8640 :          if (my_prow==prow(n, nblk, np_rows)) then
     175             : 
     176        5760 :            if (my_pcol==pcol(n, nblk, np_cols)) then
     177        5760 :              call obj%timer%start("blas")
     178             : 
     179        5760 :              call PRECISION_TRTRI('U', 'N', nb, a(l_row1,l_col1), lda, info)
     180             : 
     181        5760 :              call obj%timer%stop("blas")
     182             : 
     183        5760 :              if (info/=0) then
     184           0 :                if (wantDebug) write(error_unit,*) "elpa_invert_trm_&
     185             :          &MATH_DATATYPE&
     186             : 
     187             : #if REALCASE == 1
     188           0 :          &: Error in DTRTRI"
     189             : #endif
     190             : #if COMPLEXCASE == 1
     191           0 :          &: Error in ZTRTRI"
     192             : #endif
     193             : 
     194           0 :                success = .false.
     195             :                call obj%timer%stop("elpa_invert_trm_&
     196             :                &MATH_DATATYPE&
     197             :                &_&
     198             :                &PRECISION&
     199           0 :                &")
     200           0 :                return
     201             :              endif
     202             : 
     203        5760 :              nc = 0
     204       92160 :              do i=1,nb
     205       86400 :                tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
     206       86400 :                nc = nc+i
     207             :              enddo
     208             :            endif
     209             : #ifdef WITH_MPI
     210        2880 :            call obj%timer%start("mpi_communication")
     211             :            call MPI_Bcast(tmp1, nb*(nb+1)/2, MPI_MATH_DATATYPE_PRECISION,       &
     212        2880 :                           pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
     213        2880 :            call obj%timer%stop("mpi_communication")
     214             : #endif /* WITH_MPI */
     215        5760 :            nc = 0
     216       92160 :            do i=1,nb
     217       86400 :              tmp2(1:i,i) = tmp1(nc+1:nc+i)
     218       86400 :              nc = nc+i
     219             :            enddo
     220             : 
     221        5760 :            call obj%timer%start("blas")
     222        5760 :            if (l_cols-l_colx+1>0) &
     223             :         call PRECISION_TRMM('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, ONE, &
     224        5184 :                                    tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
     225        5760 :            call obj%timer%stop("blas")
     226        5760 :            if (l_colx<=l_cols)   tmat2(1:nb,l_colx:l_cols) = a(l_row1:l_row1+nb-1,l_colx:l_cols)
     227        5760 :            if (my_pcol==pcol(n, nblk, np_cols)) tmat2(1:nb,l_col1:l_col1+nb-1) = tmp2(1:nb,1:nb) ! tmp2 has the lower left triangle 0
     228             : 
     229             :          endif
     230             : 
     231        8640 :          if (l_row1>1) then
     232        7488 :            if (my_pcol==pcol(n, nblk, np_cols)) then
     233        7488 :              tmat1(1:l_row1-1,1:nb) = a(1:l_row1-1,l_col1:l_col1+nb-1)
     234        7488 :              a(1:l_row1-1,l_col1:l_col1+nb-1) = 0
     235             :            endif
     236             : 
     237       80064 :            do i=1,nb
     238             : #ifdef WITH_MPI
     239       72576 :              call obj%timer%start("mpi_communication")
     240             :              call MPI_Bcast(tmat1(1,i), l_row1-1, MPI_MATH_DATATYPE_PRECISION, &
     241       72576 :                              pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
     242             : 
     243       72576 :              call obj%timer%stop("mpi_communication")
     244             : #endif /* WITH_MPI */
     245             :            enddo
     246             :          endif
     247             : #ifdef WITH_MPI
     248        5760 :          call obj%timer%start("mpi_communication")
     249        5760 :          if (l_cols-l_col1+1>0) &
     250             :       call MPI_Bcast(tmat2(1,l_col1), (l_cols-l_col1+1)*nblk, MPI_MATH_DATATYPE_PRECISION, &
     251        5760 :                              prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
     252             : 
     253        5760 :           call obj%timer%stop("mpi_communication")
     254             : #endif /* WITH_MPI */
     255             : 
     256        8640 :          call obj%timer%start("blas")
     257        8640 :          if (l_row1>1 .and. l_cols-l_col1+1>0) &
     258             :            call PRECISION_GEMM('N', 'N', l_row1-1, l_cols-l_col1+1, nb, -ONE, &
     259             :                                 tmat1, ubound(tmat1,dim=1), tmat2(1,l_col1), ubound(tmat2,dim=1), ONE, &
     260        7488 :                                  a(1,l_col1), lda)
     261             : 
     262        8640 :          call obj%timer%stop("blas")
     263             : 
     264             :        enddo
     265             : 
     266         864 :        deallocate(tmp1, tmp2, tmat1, tmat2, stat=istat, errmsg=errorMessage)
     267         864 :        if (istat .ne. 0) then
     268             :          print *,"elpa_invert_trm_&
     269             :    &MATH_DATATYPE&
     270           0 :    &: error when deallocating tmp1 "//errorMessage
     271           0 :          stop 1
     272             :        endif
     273             : 
     274             :        call obj%timer%stop("elpa_invert_trm_&
     275             :        &MATH_DATATYPE&
     276             :        &_&
     277             :        &PRECISION&
     278         864 :        &")

Generated by: LCOV version 1.12