LCOV - code coverage report
Current view: top level - src/elpa2/qr - qr_utils_template.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 0 133 0.0 %
Date: 2018-01-10 09:29:53 Functions: 0 9 0.0 %

          Line data    Source code
       1             : !    This file is part of ELPA.
       2             : !
       3             : !    The ELPA library was originally created by the ELPA consortium,
       4             : !    consisting of the following organizations:
       5             : !
       6             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       7             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       8             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
       9             : !      Informatik,
      10             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      11             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      12             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      13             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      14             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      15             : !      and
      16             : !    - IBM Deutschland GmbH
      17             : !
      18             : !
      19             : !    More information can be found here:
      20             : !    http://elpa.mpcdf.mpg.de/
      21             : !
      22             : !    ELPA is free software: you can redistribute it and/or modify
      23             : !    it under the terms of the version 3 of the license of the
      24             : !    GNU Lesser General Public License as published by the Free
      25             : !    Software Foundation.
      26             : !
      27             : !    ELPA is distributed in the hope that it will be useful,
      28             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      29             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      30             : !    GNU Lesser General Public License for more details.
      31             : !
      32             : !    You should have received a copy of the GNU Lesser General Public License
      33             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      34             : !
      35             : !    ELPA reflects a substantial effort on the part of the original
      36             : !    ELPA consortium, and we ask you to respect the spirit of the
      37             : !    license that we chose: i.e., please contribute any changes you
      38             : !    may have back to the original ELPA library distribution, and keep
      39             : !    any derivatives of ELPA under the same license that we chose for
      40             : !    the original distribution, the GNU Lesser General Public License.
      41             : !
      42             : !
      43             : #include "config-f90.h"
      44             : #ifndef ALREADY_DEFINED
      45           0 :   subroutine local_size_offset_1d(n,nb,baseidx,idx,rev,rank,nprocs, &
      46             :                                 lsize,baseoffset,offset)
      47             : 
      48             :     use precision
      49             :     use ELPA1_compute
      50             : 
      51             :     implicit none
      52             : 
      53             :     ! input
      54             :     integer(kind=ik) :: n,nb,baseidx,idx,rev,rank,nprocs
      55             : 
      56             :     ! output
      57             :     integer(kind=ik) :: lsize,baseoffset,offset
      58             : 
      59             :     ! local scalars
      60             :     integer(kind=ik) :: rank_idx
      61             : 
      62           0 :     rank_idx = MOD((idx-1)/nb,nprocs)
      63             : 
      64             :     ! calculate local size and offsets
      65           0 :     if (rev .eq. 1) then
      66           0 :         if (idx > 0) then
      67           0 :             lsize = local_index(idx,rank,nprocs,nb,-1)
      68             :         else
      69           0 :             lsize = 0
      70             :         end if
      71             : 
      72           0 :         baseoffset = 1
      73           0 :         offset = 1
      74             :     else
      75           0 :         offset = local_index(idx,rank,nprocs,nb,1)
      76           0 :         baseoffset = local_index(baseidx,rank,nprocs,nb,1)
      77             : 
      78           0 :         lsize = local_index(n,rank,nprocs,nb,-1)
      79             :         !print *,'baseidx,idx',baseidx,idx,lsize,n
      80             : 
      81           0 :         lsize = lsize - offset + 1
      82             : 
      83           0 :         baseoffset = offset - baseoffset + 1
      84             :     end if
      85             : 
      86           0 : end subroutine local_size_offset_1d
      87             : #endif
      88             : 
      89             : subroutine reverse_vector_local_&
      90           0 :            &PRECISION &
      91             :      (n,x,incx,work,lwork)
      92             :     use precision
      93             :     implicit none
      94             : #include "../../general/precision_kinds.F90"
      95             : 
      96             :     ! input
      97             :     integer(kind=ik)              :: incx, n, lwork
      98             :     real(kind=C_DATATYPE_KIND)    :: x(*), work(*)
      99             : 
     100             :     ! local scalars
     101             :     real(kind=C_DATATYPE_KIND)    :: temp
     102             :     integer(kind=ik)              :: srcoffset, destoffset, ientry
     103             : 
     104           0 :     if (lwork .eq. -1) then
     105           0 :         work(1) = 0.0_rk
     106           0 :         return
     107             :     end if
     108             : 
     109           0 :     do ientry=1,n/2
     110           0 :         srcoffset=1+(ientry-1)*incx
     111           0 :         destoffset=1+(n-ientry)*incx
     112             : 
     113           0 :         temp = x(srcoffset)
     114           0 :         x(srcoffset) = x(destoffset)
     115           0 :         x(destoffset) = temp
     116             :     end do
     117             : 
     118             : end subroutine
     119             : 
     120             : subroutine reverse_matrix_local_&
     121           0 :            &PRECISION &
     122           0 :      (trans, m, n, a, lda, work, lwork)
     123             :     use precision
     124             :     implicit none
     125             : 
     126             :     ! input
     127             :     integer(kind=ik)              :: lda,m,n,lwork,trans
     128             :     real(kind=C_DATATYPE_KIND)    :: a(lda,*),work(*)
     129             : 
     130             :     ! local scalars
     131             :     real(kind=C_DATATYPE_KIND)    :: dworksize(1)
     132             :     integer(kind=ik)              :: incx
     133             :     integer(kind=ik)              :: dimsize
     134             :     integer(kind=ik)              :: i
     135             : 
     136           0 :     if (trans .eq. 1) then
     137           0 :         incx = lda
     138           0 :         dimsize = n
     139             :     else
     140           0 :         incx = 1
     141           0 :         dimsize = m
     142             :     end if
     143             : 
     144           0 :     if (lwork .eq. -1) then
     145             :         call reverse_vector_local_&
     146             :        &PRECISION &
     147           0 :        (dimsize, a, incx, dworksize, -1)
     148           0 :         work(1) = dworksize(1)
     149           0 :         return
     150             :     end if
     151             : 
     152           0 :     if (trans .eq. 1) then
     153           0 :         do i=1,m
     154             :             call reverse_vector_local_&
     155             :       &PRECISION &
     156           0 :       (dimsize, a(i,1), incx, work, lwork)
     157             :         end do
     158             :     else
     159           0 :         do i=1,n
     160             :             call reverse_vector_local_&
     161             :       &PRECISION &
     162           0 :       (dimsize, a(1,i), incx, work, lwork)
     163             :         end do
     164             :     end if
     165             : 
     166             : end subroutine
     167             : 
     168             : subroutine reverse_matrix_2dcomm_ref_&
     169           0 :            &PRECISION &
     170           0 :      (m, n, mb, nb, a, lda, work, lwork, mpicomm_cols, mpicomm_rows)
     171             :     use precision
     172             :     implicit none
     173             : 
     174             :     ! input
     175             :     integer(kind=ik)              :: m, n, lda, lwork, mpicomm_cols, mpicomm_rows, mb, nb
     176             :     real(kind=C_DATATYPE_KIND)    :: a(lda,*),work(*)
     177             : 
     178             :     ! local scalars
     179             :     real(kind=C_DATATYPE_KIND)    :: reverse_column_size(1)
     180             :     real(kind=C_DATATYPE_KIND)    :: reverse_row_size(1)
     181             : 
     182             :     integer(kind=ik)              :: mpirank_cols, mpirank_rows
     183             :     integer(kind=ik)              :: mpiprocs_cols, mpiprocs_rows
     184             :     integer(kind=ik)              :: mpierr
     185             :     integer(kind=ik)              :: lrows, lcols, offset, baseoffset
     186             : 
     187           0 :     call MPI_Comm_rank(mpicomm_cols,mpirank_cols,mpierr)
     188           0 :     call MPI_Comm_rank(mpicomm_rows,mpirank_rows,mpierr)
     189           0 :     call MPI_Comm_size(mpicomm_cols,mpiprocs_cols,mpierr)
     190           0 :     call MPI_Comm_size(mpicomm_rows,mpiprocs_rows,mpierr)
     191             :     call local_size_offset_1d(m,mb,1,1,0,mpirank_cols,mpiprocs_cols, &
     192           0 :                                   lrows,baseoffset,offset)
     193             : 
     194             :     call local_size_offset_1d(n,nb,1,1,0,mpirank_rows,mpiprocs_rows, &
     195           0 :                                   lcols,baseoffset,offset)
     196             : 
     197           0 :     if (lwork .eq. -1) then
     198             :         call reverse_matrix_1dcomm_&
     199             :   &PRECISION &
     200           0 :   (0,m,lcols,mb,a,lda,reverse_column_size,-1,mpicomm_cols)
     201             :         call reverse_matrix_1dcomm_&
     202             :   &PRECISION &
     203           0 :   (1,lrows,n,nb,a,lda,reverse_row_size,-1,mpicomm_rows)
     204           0 :         work(1) = max(reverse_column_size(1),reverse_row_size(1))
     205           0 :         return
     206             :     end if
     207             : 
     208             :     call reverse_matrix_1dcomm_&
     209             :     &PRECISION &
     210           0 :     (0,m,lcols,mb,a,lda,work,lwork,mpicomm_cols)
     211             :     call reverse_matrix_1dcomm_&
     212             :     &PRECISION &
     213           0 :     (1,lrows,n,nb,a,lda,work,lwork,mpicomm_rows)
     214             : end subroutine
     215             : 
     216             : ! b: if trans = 'N': b is size of block distribution between rows
     217             : ! b: if trans = 'T': b is size of block distribution between columns
     218             : subroutine reverse_matrix_1dcomm_&
     219           0 :            &PRECISION &
     220           0 :      (trans, m, n, b, a, lda, work, lwork, mpicomm)
     221             :     use precision
     222             :     use elpa_mpi
     223             : 
     224             :     implicit none
     225             : 
     226             :     ! input
     227             :     integer(kind=ik)              :: trans
     228             :     integer(kind=ik)              :: m, n, b, lda, lwork, mpicomm
     229             :     real(kind=C_DATATYPE_KIND)    :: a(lda,*), work(*)
     230             : 
     231             :     ! local scalars
     232             :     integer(kind=ik)              :: mpirank,mpiprocs,mpierr
     233             : #ifdef WITH_MPI
     234             :     integer(kind=ik)              :: my_mpistatus(MPI_STATUS_SIZE)
     235             : #endif
     236             :     integer(kind=ik)              :: nr_blocks,dest_process,src_process,step
     237             :     integer(kind=ik)              :: lsize,baseoffset,offset
     238             :     integer(kind=ik)              :: current_index,destblk,srcblk,icol,next_index
     239             :     integer(kind=ik)              :: sendcount,recvcount
     240             :     integer(kind=ik)              :: sendoffset,recvoffset
     241             :     integer(kind=ik)              :: newmatrix_offset,work_offset
     242             :     integer(kind=ik)              :: lcols,lrows,lroffset,lcoffset,dimsize,fixedsize
     243             :     real(kind=C_DATATYPE_KIND)    :: dworksize(1)
     244             : 
     245           0 :     call MPI_Comm_rank(mpicomm, mpirank, mpierr)
     246           0 :     call MPI_Comm_size(mpicomm, mpiprocs, mpierr)
     247           0 :     if (trans .eq. 1) then
     248             :         call local_size_offset_1d(n,b,1,1,0,mpirank,mpiprocs, &
     249           0 :                                   lcols,baseoffset,lcoffset)
     250           0 :         lrows = m
     251             :     else
     252             :         call local_size_offset_1d(m,b,1,1,0,mpirank,mpiprocs, &
     253           0 :                                   lrows,baseoffset,lroffset)
     254           0 :         lcols = n
     255             :     end if
     256             : 
     257           0 :     if (lwork .eq. -1) then
     258             :         call reverse_matrix_local_&
     259             :   &PRECISION &
     260           0 :   (trans,lrows,lcols,a,max(lrows,lcols),dworksize,-1)
     261           0 :         work(1) = real(3*lrows*lcols,kind=REAL_DATATYPE) + dworksize(1)
     262           0 :         return
     263             :     end if
     264             : 
     265           0 :     sendoffset = 1
     266           0 :     recvoffset = sendoffset + lrows*lcols
     267           0 :     newmatrix_offset = recvoffset + lrows*lcols
     268           0 :     work_offset = newmatrix_offset + lrows*lcols
     269             : 
     270           0 :     if (trans .eq. 1) then
     271           0 :         dimsize = n
     272           0 :         fixedsize = m
     273             :     else
     274           0 :         dimsize = m
     275           0 :         fixedsize = n
     276             :     end if
     277             : 
     278           0 :     if (dimsize .le. 1) then
     279           0 :         return ! nothing to do
     280             :     end if
     281             : 
     282             :     ! 1. adjust step size to remainder size
     283           0 :     nr_blocks = dimsize / b
     284           0 :     nr_blocks = nr_blocks * b
     285           0 :     step = dimsize - nr_blocks
     286           0 :     if (step .eq. 0) step = b
     287             : 
     288             :     ! 2. iterate over destination blocks starting with process 0
     289           0 :     current_index = 1
     290           0 :     do while (current_index .le. dimsize)
     291           0 :         destblk = (current_index-1) / b
     292           0 :         dest_process = mod(destblk,mpiprocs)
     293           0 :         srcblk = (dimsize-current_index) / b
     294           0 :         src_process = mod(srcblk,mpiprocs)
     295             : 
     296           0 :         next_index = current_index+step
     297             : 
     298             :         ! block for dest_process is located on mpirank if lsize > 0
     299             :         call local_size_offset_1d(dimsize-current_index+1,b,dimsize-next_index+2,dimsize-next_index+2,0, &
     300           0 :                                   src_process,mpiprocs,lsize,baseoffset,offset)
     301             : 
     302           0 :         sendcount = lsize*fixedsize
     303           0 :         recvcount = sendcount
     304             : 
     305             :         ! TODO: this send/recv stuff seems to blow up on BlueGene/P
     306             :         ! TODO: is there actually room for the requested matrix part? the target
     307             :         ! process might not have any parts at all (thus no room)
     308           0 :         if ((src_process .eq. mpirank) .and. (dest_process .eq. src_process)) then
     309             :                 ! 5. pack data
     310           0 :                 if (trans .eq. 1) then
     311           0 :                     do icol=offset,offset+lsize-1
     312             :                         work(sendoffset+(icol-offset)*lrows:sendoffset+(icol-offset+1)*lrows-1) = &
     313           0 :                             a(1:lrows,icol)
     314             :                     end do
     315             :                 else
     316           0 :                     do icol=1,lcols
     317             :                         work(sendoffset+(icol-1)*lsize:sendoffset+icol*lsize-1) = &
     318           0 :                             a(offset:offset+lsize-1,icol)
     319             :                     end do
     320             :                 end if
     321             : 
     322             :                 ! 7. reverse data
     323           0 :                 if (trans .eq. 1) then
     324             :                     call reverse_matrix_local_&
     325             :         &PRECISION &
     326           0 :         (1,lrows,lsize,work(sendoffset),lrows,work(work_offset),lwork)
     327             :                 else
     328             :                     call reverse_matrix_local_&
     329             :         &PRECISION &
     330           0 :         (0,lsize,lcols,work(sendoffset),lsize,work(work_offset),lwork)
     331             :                 end if
     332             : 
     333             :                 ! 8. store in temp matrix
     334           0 :                 if (trans .eq. 1) then
     335           0 :                     do icol=1,lsize
     336             :                         work(newmatrix_offset+(icol-1)*lrows:newmatrix_offset+icol*lrows-1) = &
     337           0 :                             work(sendoffset+(icol-1)*lrows:sendoffset+icol*lrows-1)
     338             :                     end do
     339             : 
     340           0 :                     newmatrix_offset = newmatrix_offset + lsize*lrows
     341             :                 else
     342           0 :                     do icol=1,lcols
     343             :                         work(newmatrix_offset+(icol-1)*lrows:newmatrix_offset+(icol-1)*lrows+lsize-1) = &
     344           0 :                             work(sendoffset+(icol-1)*lsize:sendoffset+icol*lsize-1)
     345             :                     end do
     346             : 
     347           0 :                     newmatrix_offset = newmatrix_offset + lsize
     348             :                 end if
     349             :         else
     350             : 
     351           0 :             if (dest_process .eq. mpirank) then
     352             :                 ! 6b. call MPI_Recv
     353             : 
     354             : #ifdef WITH_MPI
     355             : 
     356             : 
     357             :                 call MPI_Recv(work(recvoffset), recvcount, MPI_REAL_PRECISION, &
     358           0 :                               src_process, current_index, mpicomm, my_mpistatus, mpierr)
     359             : 
     360             : #else /* WITH_MPI */
     361           0 :                 work(recvoffset:recvoffset+recvcount-1) = work(sendoffset:sendoffset+sendcount-1)
     362             : #endif /* WITH_MPI */
     363             : 
     364             :                 ! 7. reverse data
     365           0 :                 if (trans .eq. 1) then
     366             :                     call reverse_matrix_local_&
     367             :         &PRECISION &
     368           0 :         (1,lrows,lsize,work(recvoffset),lrows,work(work_offset),lwork)
     369             :                 else
     370             :                     call reverse_matrix_local_&
     371             :         &PRECISION &
     372           0 :         (0,lsize,lcols,work(recvoffset),lsize,work(work_offset),lwork)
     373             :                 end if
     374             : 
     375             :                 ! 8. store in temp matrix
     376           0 :                 if (trans .eq. 1) then
     377           0 :                     do icol=1,lsize
     378             :                         work(newmatrix_offset+(icol-1)*lrows:newmatrix_offset+icol*lrows-1) = &
     379           0 :                             work(recvoffset+(icol-1)*lrows:recvoffset+icol*lrows-1)
     380             :                     end do
     381             : 
     382           0 :                     newmatrix_offset = newmatrix_offset + lsize*lrows
     383             :                 else
     384           0 :                     do icol=1,lcols
     385             :                         work(newmatrix_offset+(icol-1)*lrows:newmatrix_offset+(icol-1)*lrows+lsize-1) = &
     386           0 :                             work(recvoffset+(icol-1)*lsize:recvoffset+icol*lsize-1)
     387             :                     end do
     388             : 
     389           0 :                     newmatrix_offset = newmatrix_offset + lsize
     390             :                 end if
     391             :             end if
     392             : 
     393           0 :             if (src_process .eq. mpirank) then
     394             :                 ! 5. pack data
     395           0 :                 if (trans .eq. 1) then
     396           0 :                     do icol=offset,offset+lsize-1
     397             :                         work(sendoffset+(icol-offset)*lrows:sendoffset+(icol-offset+1)*lrows-1) = &
     398           0 :                             a(1:lrows,icol)
     399             :                     end do
     400             :                 else
     401           0 :                     do icol=1,lcols
     402             :                         work(sendoffset+(icol-1)*lsize:sendoffset+icol*lsize-1) = &
     403           0 :                             a(offset:offset+lsize-1,icol)
     404             :                     end do
     405             :                 end if
     406             : 
     407             :                 ! 6a. call MPI_Send
     408             : #ifdef WITH_MPI
     409             : 
     410             :                 call MPI_Send(work(sendoffset), sendcount, MPI_REAL_PRECISION, &
     411           0 :                                   dest_process, current_index, mpicomm, mpierr)
     412             : #endif /* WITH_MPI */
     413             :             end if
     414             :         end if
     415             : 
     416           0 :         current_index = next_index
     417             :     end do
     418             : 
     419             :    ! 9. copy temp matrix to real matrix
     420           0 :    newmatrix_offset = recvoffset + lrows*lcols
     421           0 :    do icol=1,lcols
     422             :         a(1:lrows,icol) = &
     423           0 :             work(newmatrix_offset+(icol-1)*lrows:newmatrix_offset+icol*lrows-1)
     424             :    end do
     425             : end subroutine

Generated by: LCOV version 1.12