mat_mat_coarray_rel Function

private impure function mat_mat_coarray_rel(a, b, transA, transB, option, coarray) result(c)

Arguments

Type IntentOptional Attributes Name
real(kind=rk), intent(in), contiguous :: a(:,:)
real(kind=rk), intent(in), contiguous :: b(:,:)
logical, intent(in), optional :: transA
logical, intent(in), optional :: transB
character(len=*), intent(in), optional :: option
logical, intent(in) :: coarray

Return Value real(kind=rk), allocatable, (:,:)


Calls

proc~~mat_mat_coarray_rel~~CallsGraph proc~mat_mat_coarray_rel formatmul::mat_mat_coarray_rel proc~compute_block_ranges formatmul::compute_block_ranges proc~mat_mat_coarray_rel->proc~compute_block_ranges

Called by

proc~~mat_mat_coarray_rel~~CalledByGraph proc~mat_mat_coarray_rel formatmul::mat_mat_coarray_rel interface~matmul formatmul::matmul interface~matmul->proc~mat_mat_coarray_rel

Source Code

   impure function mat_mat_coarray_rel(a, b, transA, transB, option, coarray) result(c)
      real(rk), intent(in), contiguous :: a(:,:), b(:,:)
      character(*), intent(in), optional :: option
      logical, intent(in), optional :: transA, transB
      real(rk), allocatable :: c(:,:)
      logical, intent(in)   :: coarray
#if defined (USE_COARRAY)
      integer               :: i, im, nimg, n, m
      integer               :: block_size(num_images()), start_elem(num_images()), end_elem(num_images())
      real(rk), allocatable :: C_block(:,:)[:], B_block(:,:)[:], A_block(:,:)[:]

      if (present(transA) .and. present(transB)) then
         if (.not.transA .and. .not.transB) then
            ! AB
            allocate(C(size(A,1), size(B,2)), source=0.0_rk)
            im   = this_image()
            nimg = num_images()
            m    = size(A,1)
            n    = size(A,2)
            call compute_block_ranges(size(B,2), nimg, block_size, start_elem, end_elem)
            allocate(B_block(n, block_size(im))[*], C_block(m, block_size(im))[*])
            B_block(:,:)[im] = B(:, start_elem(im):end_elem(im))
            C_block(:,:)[im] = matmul(A, B_block(:,:)[im], transA=.false., transB=.false., option=option)
            sync all
            if (im == 1) then
               do i = 1, nimg
                  C(:,start_elem(i):end_elem(i)) = C_block(:,:)[i]
               end do
            end if
         else if (transA .and. transB) then
            ! ATBT
            allocate(C(size(A,2), size(B,1)), source=0.0_rk)
            im   = this_image()
            nimg = num_images()
            m    = size(A,1)
            n    = size(A,2)
            call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
            allocate(A_block(m, block_size(im))[*], C_block(block_size(im), size(B,1))[*])
            A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
            C_block(:,:)[im] = matmul(A_block(:,:)[im], B, transA=.true., transB=.true., option=option)
            sync all
            if (im == 1) then
               do i = 1, nimg
                  C(start_elem(i):end_elem(i), :) = C_block(:,:)[i]
               end do
            end if
         else if (transA .and. .not.transB) then
            ! ATB
            allocate(C(size(A,2), size(B,2)), source=0.0_rk)
            im   = this_image()
            nimg = num_images()
            m    = size(A,1)
            n    = size(A,2)
            call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
            allocate(A_block(m, block_size(im))[*], C_block(block_size(im), size(B,2))[*])
            A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
            C_block(:,:)[im] = matmul(A_block(:, :)[im], B, transA=.true., transB=.false., option=option)
            sync all
            if (im == 1) then
               do i = 1, nimg
                  C(start_elem(i):end_elem(i), :) = C_block(:,:)[i]
               end do
            end if
         else if (.not.transA .and. transB) then
            ! ABT
            allocate(C(size(A,1), size(B,1)), source=0.0_rk)
            im   = this_image()
            nimg = num_images()
            m    = size(A,1)
            n    = size(A,2)
            call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
            allocate(A_block(m, block_size(im))[*], B_block(size(B,1), block_size(im))[*])
            allocate(C_block(m, size(B,1))[*])
            A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
            B_block(:,:)[im] = B(:, start_elem(im):end_elem(im))
            C_block(:,:)[im] = matmul(A_block(:,:)[im], B_block(:,:)[im], transA=.false., transB=.true., option=option)
            sync all
            if (im == 1) then
               do i = 1, nimg
                  C(:, :) = C(:, :) + C_block(:,:)[i]
               end do
            end if
         end if
      else if (present(transA) .or. present(transB)) then
         if (present(transA)) then
            if (transA) then
               ! ATB
               allocate(C(size(A,2), size(B,2)), source=0.0_rk)
               im   = this_image()
               nimg = num_images()
               m    = size(A,1)
               n    = size(A,2)
               call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
               allocate(A_block(m, block_size(im))[*], C_block(block_size(im), size(B,2))[*])
               A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
               C_block(:,:)[im] = matmul(A_block(:, :)[im], B, transA=.true., transB=.false., option=option)
               sync all
               if (im == 1) then
                  do i = 1, nimg
                     C(start_elem(i):end_elem(i), :) = C_block(:,:)[i]
                  end do
               end if
            else if (.not.transA) then
               ! ABT
               allocate(C(size(A,1), size(B,1)), source=0.0_rk)
               im   = this_image()
               nimg = num_images()
               m    = size(A,1)
               n    = size(A,2)
               call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
               allocate(A_block(m, block_size(im))[*], B_block(size(B,1), block_size(im))[*])
               allocate(C_block(m, size(B,1))[*])
               A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
               B_block(:,:)[im] = B(:, start_elem(im):end_elem(im))
               C_block(:,:)[im] = matmul(A_block(:,:)[im], B_block(:,:)[im], transA=.false., transB=.true., option=option)
               sync all
               if (im == 1) then
                  do i = 1, nimg
                     C(:, :) = C(:, :) + C_block(:,:)[i]
                  end do
               end if
            end if
         else if (present(transB)) then
            if (transB) then
               ! ABT
               allocate(C(size(A,1), size(B,1)), source=0.0_rk)
               im   = this_image()
               nimg = num_images()
               m    = size(A,1)
               n    = size(A,2)
               call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
               allocate(A_block(m, block_size(im))[*], B_block(size(B,1), block_size(im))[*])
               allocate(C_block(m, size(B,1))[*])
               A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
               B_block(:,:)[im] = B(:, start_elem(im):end_elem(im))
               C_block(:,:)[im] = matmul(A_block(:,:)[im], B_block(:,:)[im], transA=.false., transB=.true., option=option)
               sync all
               if (im == 1) then
                  do i = 1, nimg
                     C(:, :) = C(:, :) + C_block(:,:)[i]
                  end do
               end if
            else if (.not.transB) then
               ! ATB
               allocate(C(size(A,2), size(B,2)), source=0.0_rk)
               im   = this_image()
               nimg = num_images()
               m    = size(A,1)
               n    = size(A,2)
               call compute_block_ranges(size(A,2), nimg, block_size, start_elem, end_elem)
               allocate(A_block(m, block_size(im))[*], C_block(block_size(im), size(B,2))[*])
               A_block(:,:)[im] = A(:, start_elem(im):end_elem(im))
               C_block(:,:)[im] = matmul(A_block(:, :)[im], B, transA=.true., transB=.false., option=option)
               sync all
               if (im == 1) then
                  do i = 1, nimg
                     C(start_elem(i):end_elem(i), :) = C_block(:,:)[i]
                  end do
               end if
            end if
         end if
      else if (.not.present(transA) .and. .not.present(transB)) then
         ! AB
         allocate(C(size(A,1), size(B,2)), source=0.0_rk)
         im   = this_image()
         nimg = num_images()
         m    = size(A,1)
         n    = size(A,2)
         call compute_block_ranges(size(B,2), nimg, block_size, start_elem, end_elem)
         allocate(B_block(n, block_size(im))[*], C_block(m, block_size(im))[*])
         B_block(:,:)[im] = B(:, start_elem(im):end_elem(im))
         C_block(:,:)[im] = matmul(A, B_block(:,:)[im], transA=.false., transB=.false., option=option)
         sync all
         if (im == 1) then
            do i = 1, nimg
               C(:,start_elem(i):end_elem(i)) = C_block(:,:)[i]
            end do
         end if
      end if

#else
      C = matmul(A, B, transA=transA, transB=transB, option=option)
#endif

   end function mat_mat_coarray_rel