LCOV - code coverage report
Current view: top level - src/elpa1 - elpa_transpose_vectors.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 46 46 100.0 %
Date: 2018-01-10 09:29:53 Functions: 10 10 100.0 %

          Line data    Source code
       1             : #if 0
       2             : !    This file is part of ELPA.
       3             : !
       4             : !    The ELPA library was originally created by the ELPA consortium,
       5             : !    consisting of the following organizations:
       6             : !
       7             : !    - Max Planck Computing and Data Facility (MPCDF), formerly known as
       8             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       9             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
      10             : !      Informatik,
      11             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      12             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      13             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      14             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      15             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      16             : !      and
      17             : !    - IBM Deutschland GmbH
      18             : !
      19             : !
      20             : !    More information can be found here:
      21             : !    http://elpa.mpcdf.mpg.de/
      22             : !
      23             : !    ELPA is free software: you can redistribute it and/or modify
      24             : !    it under the terms of the version 3 of the license of the
      25             : !    GNU Lesser General Public License as published by the Free
      26             : !    Software Foundation.
      27             : !
      28             : !    ELPA is distributed in the hope that it will be useful,
      29             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      30             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      31             : !    GNU Lesser General Public License for more details.
      32             : !
      33             : !    You should have received a copy of the GNU Lesser General Public License
      34             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      35             : !
      36             : !    ELPA reflects a substantial effort on the part of the original
      37             : !    ELPA consortium, and we ask you to respect the spirit of the
      38             : !    license that we chose: i.e., please contribute any changes you
      39             : !    may have back to the original ELPA library distribution, and keep
      40             : !    any derivatives of ELPA under the same license that we chose for
      41             : !    the original distribution, the GNU Lesser General Public License.
      42             : !
      43             : ! Copyright of the original code rests with the authors inside the ELPA
      44             : ! consortium. The copyright of any additional modifications shall rest
      45             : ! with their original authors, but shall adhere to the licensing terms
      46             : ! distributed along with the original code in the file "COPYING".
      47             : ! Author: Andreas Marek, MPCDF
      48             : #endif
      49             : 
      50             : #include "config-f90.h"
      51             : #include "../general/sanity.F90"
      52             : 
      53             : subroutine elpa_transpose_vectors_&
      54             : &MATH_DATATYPE&
      55             : &_&
      56     5551488 : &PRECISION &
      57     5551488 :              (obj, vmat_s, ld_s, comm_s, vmat_t, ld_t, comm_t, nvs, nvr, nvc, nblk)
      58             : 
      59             : !-------------------------------------------------------------------------------
      60             : ! This routine transposes an array of vectors which are distributed in
      61             : ! communicator comm_s into its transposed form distributed in communicator comm_t.
      62             : ! There must be an identical copy of vmat_s in every communicator comm_s.
      63             : ! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
      64             : !
      65             : ! vmat_s    original array of vectors
      66             : ! ld_s      leading dimension of vmat_s
      67             : ! comm_s    communicator over which vmat_s is distributed
      68             : ! vmat_t    array of vectors in transposed form
      69             : ! ld_t      leading dimension of vmat_t
      70             : ! comm_t    communicator over which vmat_t is distributed
      71             : ! nvs       global index where to start in vmat_s/vmat_t
      72             : !           Please note: this is kind of a hint, some values before nvs will be
      73             : !           accessed in vmat_s/put into vmat_t
      74             : ! nvr       global length of vmat_s/vmat_t
      75             : ! nvc       number of columns in vmat_s/vmat_t
      76             : ! nblk      block size of block cyclic distribution
      77             : !
      78             : !-------------------------------------------------------------------------------
      79             :     use precision
      80             :     use elpa_abstract_impl
      81             : #ifdef WITH_OPENMP
      82             :    use omp_lib
      83             : #endif
      84             :    use elpa_mpi
      85             : 
      86             :    implicit none
      87             :    class(elpa_abstract_impl_t), intent(inout) :: obj
      88             :    integer(kind=ik), intent(in)                      :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
      89             :    MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in)   :: vmat_s(ld_s,nvc)
      90             :    MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout):: vmat_t(ld_t,nvc)
      91             : 
      92     5551488 :    MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable  :: aux(:)
      93             :    integer(kind=ik)                                  :: myps, mypt, nps, npt
      94             :    integer(kind=ik)                                  :: n, lc, k, i, ips, ipt, ns, nl, mpierr
      95             :    integer(kind=ik)                                  :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
      96             :    integer(kind=ik)                                  :: auxstride
      97             : 
      98             :    call obj%timer%start("elpa_transpose_vectors_&
      99             :    &MATH_DATATYPE&
     100             :    &" // &
     101             :    &PRECISION_SUFFIX &
     102     5551488 :    )
     103             : 
     104     5551488 :    call obj%timer%start("mpi_communication")
     105     5551488 :    call mpi_comm_rank(comm_s,myps,mpierr)
     106     5551488 :    call mpi_comm_size(comm_s,nps ,mpierr)
     107     5551488 :    call mpi_comm_rank(comm_t,mypt,mpierr)
     108     5551488 :    call mpi_comm_size(comm_t,npt ,mpierr)
     109             : 
     110     5551488 :    call obj%timer%stop("mpi_communication")
     111             :    ! The basic idea of this routine is that for every block (in the block cyclic
     112             :    ! distribution), the processor within comm_t which owns the diagonal
     113             :    ! broadcasts its values of vmat_s to all processors within comm_t.
     114             :    ! Of course this has not to be done for every block separately, since
     115             :    ! the communictation pattern repeats in the global matrix after
     116             :    ! the least common multiple of (nps,npt) blocks
     117             : 
     118     5551488 :    lcm_s_t   = least_common_multiple(nps,npt) ! least common multiple of nps, npt
     119             : 
     120     5551488 :    nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
     121             : 
     122             :    ! Get the number of blocks to be skipped at the begin.
     123             :    ! This must be a multiple of lcm_s_t (else it is getting complicated),
     124             :    ! thus some elements before nvs will be accessed/set.
     125             : 
     126     5551488 :    nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t
     127             : 
     128     5551488 :    allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
     129             : #ifdef WITH_OPENMP
     130     5551488 :    !$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
     131             : #endif
     132    14803968 :    do n = 0, lcm_s_t-1
     133             : 
     134     9252480 :       ips = mod(n,nps)
     135     9252480 :       ipt = mod(n,npt)
     136             : 
     137     9252480 :       if(mypt == ipt) then
     138             : 
     139     7394208 :          nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
     140     7394208 :          auxstride = nblk * nblks_comm
     141             : !         if(nblks_comm==0) cycle
     142     7394208 :          if (nblks_comm .ne. 0) then
     143     7255968 :          if(myps == ips) then
     144             : !            k = 0
     145             : #ifdef WITH_OPENMP
     146     2729664 :             !$omp do
     147             : #endif
     148    12012624 :             do lc=1,nvc
     149   218112816 :                do i = nblks_skip+n, nblks_tot-1, lcm_s_t
     150   190263936 :                   k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
     151   190263936 :                   ns = (i/nps)*nblk ! local start of block i
     152   279176544 :                   nl = min(nvr-i*nblk,nblk) ! length
     153  1698064704 :                   aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
     154             : !                  k = k+nblk
     155             :                enddo
     156             :             enddo
     157             :          endif
     158             : 
     159             : #ifdef WITH_OPENMP
     160     3627984 :          !$omp barrier
     161     2702736 :          !$omp master
     162             : #endif
     163             : 
     164             : #ifdef WITH_MPI
     165     5405472 :         call obj%timer%start("mpi_communication")
     166             : 
     167             :         call MPI_Bcast(aux, nblks_comm*nblk*nvc,    &
     168             : #if REALCASE == 1
     169             :                        MPI_REAL_PRECISION,    &
     170             : #endif
     171             : #if COMPLEXCASE == 1
     172             :                        MPI_COMPLEX_PRECISION, &
     173             : #endif
     174     5405472 :                        ips, comm_s, mpierr)
     175             : 
     176             : 
     177     5405472 :          call obj%timer%stop("mpi_communication")
     178             : #endif /* WITH_MPI */
     179             : 
     180             : #ifdef WITH_OPENMP
     181             :          !$omp end master
     182     3627984 :          !$omp barrier
     183             : 
     184     3627984 :          !$omp do
     185             : #endif
     186             : !         k = 0
     187    15935376 :          do lc=1,nvc
     188   274365024 :             do i = nblks_skip+n, nblks_tot-1, lcm_s_t
     189   237442848 :                k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
     190   237442848 :                ns = (i/npt)*nblk ! local start of block i
     191   348421176 :                nl = min(nvr-i*nblk,nblk) ! length
     192  2119408272 :                vmat_t(ns+1:ns+nl,lc) = aux(k+1:k+nl)
     193             : !               k = k+nblk
     194             :             enddo
     195             :          enddo
     196             :          endif
     197             :       endif
     198             : 
     199             :    enddo
     200             : #ifdef WITH_OPENMP
     201             :    !$omp end parallel
     202             : #endif
     203     5551488 :    deallocate(aux)
     204             : 
     205             :    call obj%timer%stop("elpa_transpose_vectors_&
     206             :    &MATH_DATATYPE&
     207             :    &" // &
     208             :    &PRECISION_SUFFIX &
     209     5551488 :    )
     210             : 
     211     5551488 : end subroutine
     212             : 

Generated by: LCOV version 1.12