pure function mat_vec_block_rel(A, v, transA, option, nblock) result(w)
real(rk), intent(in), contiguous :: A(:,:), v(:)
logical, intent(in), optional :: transA
character(*), intent(in), optional :: option
integer, intent(in) :: nblock
real(rk), allocatable :: w(:)
integer :: ib, se, ee
integer :: block_size(nblock), start_elem(nblock), end_elem(nblock)
if (present(transA)) then
if (transA) then
! ATv
allocate(w(size(A,2)), source=0.0_rk)
call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem)
#if defined(USE_DO_CONCURRENT)
do concurrent (ib = 1: nblock)
se = start_elem(ib)
ee = end_elem(ib)
w(se:ee) = &
w(se:ee) + matmul(A(:,se:ee), v, transA=.true., option=option)
end do
#else
do ib = 1, nblock
se = start_elem(ib)
ee = end_elem(ib)
w(se:ee) = &
w(se:ee) + matmul(A(:,se:ee), v, transA=.true., option=option)
end do
#endif
else if (.not. transA) then
! Av
allocate(w(size(A,1)), source=0.0_rk)
call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem)
#if defined(USE_DO_CONCURRENT)
do concurrent (ib = 1: nblock)
se = start_elem(ib)
ee = end_elem(ib)
w(:) = &
w(:) + matmul(A(:,se:ee), v(se:ee), transA=.false., option=option)
end do
#else
do ib = 1, nblock
se = start_elem(ib)
ee = end_elem(ib)
w(:) = &
w(:) + matmul(A(:,se:ee), v(se:ee), transA=.false., option=option)
end do
#endif
end if
else if (.not. present(transA)) then
! Av
allocate(w(size(A,1)), source=0.0_rk)
call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem)
#if defined(USE_DO_CONCURRENT)
do concurrent (ib = 1: nblock)
se = start_elem(ib)
ee = end_elem(ib)
w(:) = &
w(:) + matmul(A(:,se:ee), v(se:ee), transA=.false., option=option)
end do
#else
do ib = 1, nblock
se = start_elem(ib)
ee = end_elem(ib)
w(:) = &
w(:) + matmul(A(:,se:ee), v(se:ee), transA=.false., option=option)
end do
#endif
end if
end function mat_vec_block_rel