25 logical,
save :: async_matvec_flg = .false.
40 real(kind=
kreal),
intent(in) :: x(:)
41 real(kind=
kreal),
intent(out) :: y(:)
42 real(kind=
kreal),
intent(inout) :: time_ax
43 real(kind=
kreal),
intent(inout),
optional :: commtime
45 real(kind=
kreal) :: tcomm
46 real(kind=
kreal),
allocatable :: wk(:)
51 allocate(wk(hecmat%NP * hecmat%NDOF))
55 call hecmw_matvec_33_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
58 if (
present(commtime)) commtime = commtime + tcomm
120 subroutine hecmw_matvec_33_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
131 real(kind=
kreal),
intent(in) :: x(:)
132 real(kind=
kreal),
intent(out) :: y(:)
133 real(kind=
kreal),
intent(inout) :: time_ax
134 real(kind=
kreal),
intent(inout),
optional :: commtime
136 real(kind=
kreal) :: start_time, end_time, tcomm
137 integer(kind=kint) :: i, j, js, je, in
138 real(kind=
kreal) :: yv1, yv2, yv3, x1, x2, x3
140 integer(kind=kint) :: n, np
141 integer(kind=kint),
pointer :: indexl(:), iteml(:), indexu(:), itemu(:), indexa(:), itema(:)
142 real(kind=
kreal),
pointer :: al(:), au(:), d(:), a(:)
145 integer,
parameter :: numofblockperthread = 100
146 logical,
save :: isfirst = .true.
147 integer,
save :: numofthread = 1
148 integer,
save,
allocatable :: startpos(:), endpos(:)
149 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
150 integer(kind=kint) :: threadnum, blocknum, numofblock
151 integer(kind=kint) :: numofelement, elementcount, blockindex
152 real(kind=
kreal) :: numofelementperblock
160 time_ax = time_ax + end_time - start_time - tcomm
161 if (
present(commtime)) commtime = commtime + tcomm
166 indexl => hecmat%indexL
167 indexu => hecmat%indexU
168 indexa => hecmat%indexA
169 iteml => hecmat%itemL
170 itemu => hecmat%itemU
171 itema => hecmat%itemA
179 if (.not. isfirst)
then
180 numofblock = numofthread * numofblockperthread
181 if (endpos(numofblock-1) .ne. n-1)
then
182 deallocate(startpos, endpos)
188 numofblock = numofthread * numofblockperthread
189 allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
190 numofelement = n + indexl(n) + indexu(n)
191 numofelementperblock = dble(numofelement) / numofblock
194 startpos(blocknum) = 1
196 elementcount = elementcount + 1
197 elementcount = elementcount + (indexl(i) - indexl(i-1))
198 elementcount = elementcount + (indexu(i) - indexu(i-1))
199 if (elementcount > (blocknum + 1) * numofelementperblock)
then
203 blocknum = blocknum + 1
204 startpos(blocknum) = i + 1
205 if (blocknum == (numofblock - 1))
exit
212 do i= blocknum+1, numofblock-1
220 sectorcachesize0, sectorcachesize1)
234 if (
present(commtime)) commtime = commtime + end_time - start_time
261 yv1= yv1 + a(9*j-8)*x1 + a(9*j-7)*x2 + a(9*j-6)*x3
262 yv2= yv2 + a(9*j-5)*x1 + a(9*j-4)*x2 + a(9*j-3)*x3
263 yv3= yv3 + a(9*j-2)*x1 + a(9*j-1)*x2 + a(9*j )*x3
276 do blocknum = 0 , numofblockperthread - 1
277 blockindex = blocknum * numofthread + threadnum
278 do i = startpos(blockindex), endpos(blockindex)
282 yv1= d(9*i-8)*x1 + d(9*i-7)*x2 + d(9*i-6)*x3
283 yv2= d(9*i-5)*x1 + d(9*i-4)*x2 + d(9*i-3)*x3
284 yv3= d(9*i-2)*x1 + d(9*i-1)*x2 + d(9*i )*x3
293 yv1= yv1 + al(9*j-8)*x1 + al(9*j-7)*x2 + al(9*j-6)*x3
294 yv2= yv2 + al(9*j-5)*x1 + al(9*j-4)*x2 + al(9*j-3)*x3
295 yv3= yv3 + al(9*j-2)*x1 + al(9*j-1)*x2 + al(9*j )*x3
305 yv1= yv1 + au(9*j-8)*x1 + au(9*j-7)*x2 + au(9*j-6)*x3
306 yv2= yv2 + au(9*j-5)*x1 + au(9*j-4)*x2 + au(9*j-3)*x3
307 yv3= yv3 + au(9*j-2)*x1 + au(9*j-1)*x2 + au(9*j )*x3
324 time_ax = time_ax + end_time - start_time
360 end subroutine hecmw_matvec_33_inner
372 real(kind=
kreal),
intent(in) :: x(:), b(:)
373 real(kind=
kreal),
intent(out) :: r(:)
374 real(kind=
kreal),
intent(inout) :: time_ax
375 real(kind=
kreal),
intent(inout),
optional :: commtime
377 integer(kind=kint) :: i
378 real(kind=
kreal) :: tcomm
382 if (
present(commtime)) commtime = commtime + tcomm
390 do i = 1, hecmat%N * 3
413 real(kind=
kreal),
intent(inout) :: time_ax
414 real(kind=
kreal),
intent(inout),
optional :: commtime
416 real(kind=
kreal),
allocatable :: r(:)
417 real(kind=
kreal) :: bnorm2, rnorm2
418 real(kind=
kreal) :: tcomm
420 allocate(r(hecmat%NDOF*hecmat%NP))
424 hecmat%B, hecmat%B, bnorm2, tcomm)
425 if (bnorm2 == 0.d0)
then
432 if (
present(commtime)) commtime = commtime + tcomm
447 real(kind=
kreal),
intent(in) :: x(:)
448 real(kind=
kreal),
intent(out) :: y(:)
449 real(kind=
kreal),
intent(inout) :: commtime
451 real(kind=
kreal) :: start_time, end_time
452 integer(kind=kint) :: i, j, jj, k, kk
457 commtime = commtime + end_time - start_time
466 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
478 outer:
do i= 1, hecmesh%mpc%n_mpc
479 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
480 if (hecmesh%mpc%mpc_dof(j) > 3) cycle outer
482 k = hecmesh%mpc%mpc_index(i-1) + 1
483 kk = 3 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
485 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
486 jj = 3 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
487 y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
509 real(kind=
kreal),
intent(in) :: x(:)
510 real(kind=
kreal),
intent(out) :: y(:)
511 real(kind=
kreal),
intent(inout) :: commtime
513 real(kind=
kreal) :: start_time, end_time
514 integer(kind=kint) :: i, j, jj, k, kk
519 commtime = commtime + end_time - start_time
528 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
540 outer:
do i= 1, hecmesh%mpc%n_mpc
541 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
542 if (hecmesh%mpc%mpc_dof(j) > 3) cycle outer
544 k = hecmesh%mpc%mpc_index(i-1) + 1
545 kk = 3 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
547 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
548 jj = 3 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
554 y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
577 real(kind=
kreal),
intent(in) :: x(:)
578 real(kind=
kreal),
intent(out) :: y(:), w(:)
579 real(kind=
kreal),
intent(inout) :: time_ax
580 real(kind=
kreal),
intent(inout) :: commtime
583 call hecmw_matvec_33_inner(hecmesh, hecmat, y, w, time_ax, commtime)
600 real(kind=
kreal),
intent(inout),
optional :: commtime
601 real(kind=
kreal),
allocatable :: w(:,:)
602 real(kind=
kreal),
pointer :: d(:)
603 integer(kind=kint) :: ip
604 real(kind=
kreal) :: start_time, end_time
605 allocate(w(3*hecmat%NP,3))
608 w(3*ip-2,1)= d(9*ip-8); w(3*ip-2,2)= d(9*ip-7); w(3*ip-2,3)= d(9*ip-6)
609 w(3*ip-1,1)= d(9*ip-5); w(3*ip-1,2)= d(9*ip-4); w(3*ip-1,3)= d(9*ip-3)
610 w(3*ip ,1)= d(9*ip-2); w(3*ip ,2)= d(9*ip-1); w(3*ip ,3)= d(9*ip )
617 if (
present(commtime)) commtime = commtime + end_time - start_time
618 do ip= hecmat%N+1, hecmat%NP
619 d(9*ip-8)= w(3*ip-2,1); d(9*ip-7)= w(3*ip-2,2); d(9*ip-6)= w(3*ip-2,3)
620 d(9*ip-5)= w(3*ip-1,1); d(9*ip-4)= w(3*ip-1,2); d(9*ip-3)= w(3*ip-1,3)
621 d(9*ip-2)= w(3*ip ,1); d(9*ip-1)= w(3*ip ,2); d(9*ip )= w(3*ip ,3)
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
integer(kind=kint) function, public hecmw_jad_is_initialized()
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
subroutine, public hecmw_matresid_33(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
subroutine, public hecmw_matvec_33(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_mat_diag_sr_33(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_matvec_33_unset_async
subroutine, public hecmw_ttmattvec_33(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
subroutine, public hecmw_ttvec_33(hecMESH, X, Y, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_33(hecMESH, hecMAT, time_Ax, COMMtime)
subroutine, public hecmw_tvec_33(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_matvec_33_set_async(hecMAT)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_r(hecMESH, val, n, m)