LCOV - code coverage report
Current view: top level - src/elpa1 - elpa_reduce_add_vectors.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 50 50 100.0 %
Date: 2018-01-10 09:29:53 Functions: 10 10 100.0 %

          Line data    Source code
       1             : #if 0
       2             : !    This file is part of ELPA.
       3             : !
       4             : !    The ELPA library was originally created by the ELPA consortium,
       5             : !    consisting of the following organizations:
       6             : !
       7             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       8             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       9             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
      10             : !      Informatik,
      11             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      12             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      13             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      14             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      15             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      16             : !      and
      17             : !    - IBM Deutschland GmbH
      18             : !
      19             : !
      20             : !    More information can be found here:
      21             : !    http://elpa.mpcdf.mpg.de/
      22             : !
      23             : !    ELPA is free software: you can redistribute it and/or modify
      24             : !    it under the terms of the version 3 of the license of the
      25             : !    GNU Lesser General Public License as published by the Free
      26             : !    Software Foundation.
      27             : !
      28             : !    ELPA is distributed in the hope that it will be useful,
      29             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      30             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      31             : !    GNU Lesser General Public License for more details.
      32             : !
      33             : !    You should have received a copy of the GNU Lesser General Public License
      34             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      35             : !
      36             : !    ELPA reflects a substantial effort on the part of the original
      37             : !    ELPA consortium, and we ask you to respect the spirit of the
      38             : !    license that we chose: i.e., please contribute any changes you
      39             : !    may have back to the original ELPA library distribution, and keep
      40             : !    any derivatives of ELPA under the same license that we chose for
      41             : !    the original distribution, the GNU Lesser General Public License.
      42             : !
      43             : ! Author: Andreas Marek, MPCDF
      44             : #endif
      45             : 
      46             : #include "config-f90.h"
      47             : #include "../general/sanity.F90"
      48             : 
      49             : subroutine elpa_reduce_add_vectors_&
      50             : &MATH_DATATYPE&
      51             : &_&
      52     1227264 : &PRECISION &
      53     1227264 :             (obj, vmat_s, ld_s, comm_s, vmat_t, ld_t, comm_t, nvr, nvc, nblk)
      54             : 
      55             : !-------------------------------------------------------------------------------
      56             : ! This routine does a reduce of all vectors in vmat_s over the communicator comm_t.
      57             : ! The result of the reduce is gathered on the processors owning the diagonal
      58             : ! and added to the array of vectors vmat_t (which is distributed over comm_t).
      59             : !
      60             : ! Opposed to elpa_transpose_vectors, there is NO identical copy of vmat_s
      61             : ! in the different members within vmat_t (else a reduce wouldn't be necessary).
      62             : ! After this routine, an allreduce of vmat_t has to be done.
      63             : !
      64             : ! vmat_s    array of vectors to be reduced and added
      65             : ! ld_s      leading dimension of vmat_s
      66             : ! comm_s    communicator over which vmat_s is distributed
      67             : ! vmat_t    array of vectors to which vmat_s is added
      68             : ! ld_t      leading dimension of vmat_t
      69             : ! comm_t    communicator over which vmat_t is distributed
      70             : ! nvr       global length of vmat_s/vmat_t
      71             : ! nvc       number of columns in vmat_s/vmat_t
      72             : ! nblk      block size of block cyclic distribution
      73             : !
      74             : !-------------------------------------------------------------------------------
      75             : 
      76             :    use precision
      77             : #ifdef WITH_OPENMP
      78             :    use omp_lib
      79             : #endif
      80             :    use elpa_mpi
      81             :    use elpa_abstract_impl
      82             :    implicit none
      83             : 
      84             :    class(elpa_abstract_impl_t), intent(inout) :: obj
      85             :    integer(kind=ik), intent(in)                       :: ld_s, comm_s, ld_t, comm_t, nvr, nvc, nblk
      86             :    MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in)    :: vmat_s(ld_s,nvc)
      87             :    MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: vmat_t(ld_t,nvc)
      88             : 
      89     1227264 :    MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable   :: aux1(:), aux2(:)
      90             :    integer(kind=ik)                                   :: myps, mypt, nps, npt
      91             :    integer(kind=ik)                                   :: n, lc, k, i, ips, ipt, ns, nl, mpierr
      92             :    integer(kind=ik)                                   :: lcm_s_t, nblks_tot
      93             :    integer(kind=ik)                                   :: auxstride
      94             : 
      95             :    call obj%timer%start("elpa_reduce_add_vectors_&
      96             :    &MATH_DATATYPE&
      97             :    &" // &
      98             :    &PRECISION_SUFFIX &
      99     1227264 :    )
     100             : 
     101     1227264 :    call obj%timer%start("mpi_communication")
     102     1227264 :    call mpi_comm_rank(comm_s,myps,mpierr)
     103     1227264 :    call mpi_comm_size(comm_s,nps ,mpierr)
     104     1227264 :    call mpi_comm_rank(comm_t,mypt,mpierr)
     105     1227264 :    call mpi_comm_size(comm_t,npt ,mpierr)
     106     1227264 :    call obj%timer%stop("mpi_communication")
     107             : 
     108             :    ! Look to elpa_transpose_vectors for the basic idea!
     109             : 
     110             :    ! The communictation pattern repeats in the global matrix after
     111             :    ! the least common multiple of (nps,npt) blocks
     112             : 
     113     1227264 :    lcm_s_t   = least_common_multiple(nps,npt) ! least common multiple of nps, npt
     114             : 
     115     1227264 :    nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
     116             : 
     117     1227264 :    allocate(aux1( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
     118     1227264 :    allocate(aux2( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
     119     1227264 :    aux1(:) = 0
     120     1227264 :    aux2(:) = 0
     121             : #ifdef WITH_OPENMP
     122     1227264 :    !$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl)
     123             : #endif
     124     3193920 :    do n = 0, lcm_s_t-1
     125             : 
     126     1966656 :       ips = mod(n,nps)
     127     1966656 :       ipt = mod(n,npt)
     128             : 
     129     1966656 :       auxstride = nblk * ((nblks_tot - n + lcm_s_t - 1)/lcm_s_t)
     130             : 
     131     1966656 :       if(myps == ips) then
     132             : 
     133             : !         k = 0
     134             : #ifdef WITH_OPENMP
     135      613632 :          !$omp do
     136             : #endif
     137     2057472 :          do lc=1,nvc
     138    77250048 :             do i = n, nblks_tot-1, lcm_s_t
     139    72918528 :                k = (i - n)/lcm_s_t * nblk + (lc - 1) * auxstride
     140    72918528 :                ns = (i/nps)*nblk ! local start of block i
     141   108379536 :                nl = min(nvr-i*nblk,nblk) ! length
     142   653106672 :                aux1(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
     143             : !               k = k+nblk
     144             :             enddo
     145             :          enddo
     146             : 
     147     1227264 :          k = nvc * auxstride
     148             : #ifdef WITH_OPENMP
     149      613632 :          !$omp barrier
     150      613632 :          !$omp master
     151             : #endif
     152             : 
     153             : #ifdef WITH_MPI
     154      739392 :          call obj%timer%start("mpi_communication")
     155             : 
     156      739392 :          if (k>0) call mpi_reduce(aux1, aux2, k, &
     157             : #if REALCASE == 1
     158             :                                   MPI_REAL_PRECISION,  &
     159             : #endif
     160             : #if COMPLEXCASE == 1
     161             :                                   MPI_COMPLEX_PRECISION, &
     162             : #endif
     163      739392 :                                   MPI_SUM, ipt, comm_t, mpierr)
     164             : 
     165      739392 :           call obj%timer%stop("mpi_communication")
     166             : 
     167             : #else /* WITH_MPI */
     168      487872 :          if(k>0) aux2 = aux1
     169             : #endif /* WITH_MPI */
     170             : 
     171             : #ifdef WITH_OPENMP
     172             :          !$omp end master
     173      613632 :          !$omp barrier
     174             : #endif
     175     1227264 :          if (mypt == ipt) then
     176             : !            k = 0
     177             : #ifdef WITH_OPENMP
     178      613632 :          !$omp do
     179             : #endif
     180     2057472 :             do lc=1,nvc
     181    77250048 :                do i = n, nblks_tot-1, lcm_s_t
     182    72918528 :                   k = (i - n)/lcm_s_t * nblk + (lc - 1) * auxstride
     183    72918528 :                   ns = (i/npt)*nblk ! local start of block i
     184   108379536 :                   nl = min(nvr-i*nblk,nblk) ! length
     185   653106672 :                   vmat_t(ns+1:ns+nl,lc) = vmat_t(ns+1:ns+nl,lc) + aux2(k+1:k+nl)
     186             : !                  k = k+nblk
     187             :                enddo
     188             :             enddo
     189             :          endif
     190             : 
     191             :       endif
     192             : 
     193             :    enddo
     194             : #ifdef WITH_OPENMP
     195             :    !$omp end parallel
     196             : #endif
     197             : 
     198     1227264 :    deallocate(aux1)
     199     1227264 :    deallocate(aux2)
     200             : 
     201             :    call obj%timer%stop("elpa_reduce_add_vectors_&
     202             :    &MATH_DATATYPE&
     203             :    &" // &
     204             :    &PRECISION_SUFFIX &
     205     1227264 :    )
     206     1227264 : end subroutine
     207             : 
     208             : 

Generated by: LCOV version 1.12