27 logical,
save :: async_matvec_flg = .false.
42 real(kind=
kreal),
intent(in) :: x(:)
43 real(kind=
kreal),
intent(out) :: y(:)
44 real(kind=
kreal),
intent(inout) :: time_ax
45 real(kind=
kreal),
intent(inout),
optional :: commtime
47 real(kind=
kreal) :: tcomm
48 real(kind=
kreal),
allocatable :: wk(:)
53 allocate(wk(hecmat%NP * hecmat%NDOF))
57 call hecmw_matvec_nn_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
60 if (
present(commtime)) commtime = commtime + tcomm
122 subroutine hecmw_matvec_nn_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
133 real(kind=
kreal),
intent(in) :: x(:)
134 real(kind=
kreal),
intent(out) :: y(:)
135 real(kind=
kreal),
intent(inout) :: time_ax
136 real(kind=
kreal),
intent(inout),
optional :: commtime
138 real(kind=
kreal) :: start_time, end_time, tcomm
139 integer(kind=kint) :: i, j, k, l, js, je, in
140 real(kind=
kreal) :: yv(hecmat%NDOF), xv(hecmat%NDOF)
142 integer(kind=kint) :: n, np, ndof, ndof2
143 integer(kind=kint),
pointer :: indexl(:), iteml(:), indexu(:), itemu(:), indexa(:), itema(:)
144 real(kind=
kreal),
pointer :: al(:), au(:), d(:), a(:)
147 integer,
parameter :: numofblockperthread = 100
148 logical,
save :: isfirst = .true.
149 integer,
save :: numofthread = 1
150 integer,
save,
allocatable :: startpos(:), endpos(:)
151 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
152 integer(kind=kint) :: threadnum, blocknum, numofblock
153 integer(kind=kint) :: numofelement, elementcount, blockindex
154 real(kind=
kreal) :: numofelementperblock
162 time_ax = time_ax + end_time - start_time - tcomm
163 if (
present(commtime)) commtime = commtime + tcomm
168 indexl => hecmat%indexL
169 indexu => hecmat%indexU
170 indexa => hecmat%indexA
171 iteml => hecmat%itemL
172 itemu => hecmat%itemU
173 itema => hecmat%itemA
183 if (.not. isfirst)
then
184 numofblock = numofthread * numofblockperthread
185 if (endpos(numofblock-1) .ne. n-1)
then
186 deallocate(startpos, endpos)
192 numofblock = numofthread * numofblockperthread
193 allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
194 numofelement = n + indexl(n) + indexu(n)
195 numofelementperblock = dble(numofelement) / numofblock
198 startpos(blocknum) = 1
200 elementcount = elementcount + 1
201 elementcount = elementcount + (indexl(i) - indexl(i-1))
202 elementcount = elementcount + (indexu(i) - indexu(i-1))
203 if (elementcount > (blocknum + 1) * numofelementperblock)
then
207 blocknum = blocknum + 1
208 startpos(blocknum) = i + 1
209 if (blocknum == (numofblock - 1))
exit
216 do i= blocknum+1, numofblock-1
224 sectorcachesize0, sectorcachesize1)
238 if (
present(commtime)) commtime = commtime + end_time - start_time
253 xv(k) = x(ndof*(i-1)+k)
261 xv(k) = x(ndof*(in-1)+k)
265 yv(k)=yv(k)+a(ndof2*(j-1)+(k-1)*ndof+l)*xv(l)
270 y(ndof*(i-1)+k) = yv(k)
280 do blocknum = 0 , numofblockperthread - 1
281 blockindex = blocknum * numofthread + threadnum
282 do i = startpos(blockindex), endpos(blockindex)
284 xv(k) = x(ndof*(i-1)+k)
289 yv(k)=yv(k)+d(ndof2*(i-1)+(k-1)*ndof+l)*xv(l)
297 xv(k) = x(ndof*(in-1)+k)
301 yv(k)=yv(k)+al(ndof2*(j-1)+(k-1)*ndof+l)*xv(l)
311 xv(k) = x(ndof*(in-1)+k)
315 yv(k)=yv(k)+au(ndof2*(j-1)+(k-1)*ndof+l)*xv(l)
320 y(ndof*(i-1)+k) = yv(k)
334 time_ax = time_ax + end_time - start_time
337 end subroutine hecmw_matvec_nn_inner
351 real(kind=
kreal),
intent(in) :: x(:), b(:)
352 real(kind=
kreal),
intent(out) :: r(:)
353 real(kind=
kreal),
intent(inout) :: time_ax
354 real(kind=
kreal),
intent(inout),
optional :: commtime
356 integer(kind=kint) :: i
357 real(kind=
kreal) :: tcomm
361 if (
present(commtime)) commtime = commtime + tcomm
369 do i = 1, hecmat%N * hecmat%NDOF
392 real(kind=
kreal),
intent(inout) :: time_ax
393 real(kind=
kreal),
intent(inout),
optional :: commtime
395 real(kind=
kreal),
allocatable :: r(:)
396 real(kind=
kreal) :: bnorm2, rnorm2
397 real(kind=
kreal) :: tcomm
399 allocate(r(hecmat%NDOF*hecmat%NP))
403 hecmat%B, hecmat%B, bnorm2, tcomm)
404 if (bnorm2 == 0.d0)
then
411 if (
present(commtime)) commtime = commtime + tcomm
426 integer(kind=kint),
intent(in) :: ndof
427 real(kind=
kreal),
intent(in) :: x(:)
428 real(kind=
kreal),
intent(out) :: y(:)
429 real(kind=
kreal),
intent(inout) :: commtime
431 real(kind=
kreal) :: start_time, end_time
432 integer(kind=kint) :: i, j, jj, k, kk
437 commtime = commtime + end_time - start_time
446 do i= 1, hecmesh%nn_internal * ndof
458 outer:
do i= 1, hecmesh%mpc%n_mpc
459 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
460 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
462 k = hecmesh%mpc%mpc_index(i-1) + 1
463 kk = ndof * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
465 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
466 jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
467 y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
489 integer(kind=kint),
intent(in) :: ndof
490 real(kind=
kreal),
intent(in) :: x(:)
491 real(kind=
kreal),
intent(out) :: y(:)
492 real(kind=
kreal),
intent(inout) :: commtime
494 real(kind=
kreal) :: start_time, end_time
495 integer(kind=kint) :: i, j, jj, k, kk
500 commtime = commtime + end_time - start_time
509 do i= 1, hecmesh%nn_internal * ndof
521 outer:
do i= 1, hecmesh%mpc%n_mpc
522 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
523 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
525 k = hecmesh%mpc%mpc_index(i-1) + 1
526 kk = ndof * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
528 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
529 jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
535 y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
558 real(kind=
kreal),
intent(in) :: x(:)
559 real(kind=
kreal),
intent(out) :: y(:), w(:)
560 real(kind=
kreal),
intent(inout) :: time_ax
561 real(kind=
kreal),
intent(inout) :: commtime
564 call hecmw_matvec_nn_inner(hecmesh, hecmat, y, w, time_ax, commtime)
580 real(kind=
kreal),
intent(inout),
optional :: commtime
581 real(kind=
kreal),
allocatable :: w(:,:)
582 real(kind=
kreal),
pointer :: d(:)
583 integer(kind=kint) :: ip, ndof, i, j
584 real(kind=
kreal) :: start_time, end_time
586 allocate(w(ndof*hecmat%NP,ndof))
591 w(ndof*(ip-1)+i,j) = d(ndof*ndof*(ip-1)+(i-1)*ndof+j)
600 if (
present(commtime)) commtime = commtime + end_time - start_time
601 do ip= hecmat%N+1, hecmat%NP
604 d(ndof*ndof*(ip-1)+(i-1)*ndof+j) = w(ndof*(ip-1)+i,j)
615 integer(kind=kint) :: i
617 do i = 1, hecmat1%NP*hecmat1%NDOF*hecmat1%NDOF
618 hecmat3%D(i) = hecmat1%D(i) + hecmat2%D(i)
621 do i = 1, hecmat1%NPU*hecmat1%NDOF*hecmat1%NDOF
622 hecmat3%AU(i) = hecmat1%AU(i) + hecmat2%AU(i)
625 do i = 1, hecmat1%NPL*hecmat1%NDOF*hecmat1%NDOF
626 hecmat3%AL(i) = hecmat1%AL(i) + hecmat2%AL(i)
634 real(kind=
kreal),
intent(in) :: alpha
635 integer(kind=kint) :: i
637 do i = 1, hecmat%NP*hecmat%NDOF*hecmat%NDOF
638 hecmat%D(i) = alpha*hecmat%D(i)
641 do i = 1, hecmat%NPU*hecmat%NDOF*hecmat%NDOF
642 hecmat%AU(i) = alpha*hecmat%AU(i)
645 do i = 1, hecmat%NPL*hecmat%NDOF*hecmat%NDOF
646 hecmat%AL(i) = alpha*hecmat%AL(i)
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
integer(kind=kint) function, public hecmw_jad_is_initialized()
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
subroutine, public hecmw_mat_diag_sr_nn(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_ttmattvec_nn(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
subroutine, public hecmw_mat_multiple_nn(hecMAT, alpha)
subroutine, public hecmw_ttvec_nn(hecMESH, ndof, X, Y, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_nn(hecMESH, hecMAT, time_Ax, COMMtime)
subroutine, public hecmw_matvec_nn_unset_async
subroutine, public hecmw_tvec_nn(hecMESH, ndof, X, Y, COMMtime)
subroutine, public hecmw_matvec_nn_set_async(hecMAT)
subroutine, public hecmw_matvec_nn(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_matresid_nn(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
subroutine, public hecmw_mat_add_nn(hecMAT1, hecMAT2, hecMAT3)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_r(hecMESH, val, n, m)