25 logical,
save :: async_matvec_flg = .false.
40 real(kind=
kreal),
intent(in) :: x(:)
41 real(kind=
kreal),
intent(out) :: y(:)
42 real(kind=
kreal),
intent(inout) :: time_ax
43 real(kind=
kreal),
intent(inout),
optional :: commtime
45 real(kind=
kreal) :: tcomm
46 real(kind=
kreal),
allocatable :: wk(:)
51 allocate(wk(hecmat%NP * hecmat%NDOF))
55 call hecmw_matvec_44_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
58 if (
present(commtime)) commtime = commtime + tcomm
88 subroutine hecmw_matvec_44_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
99 real(kind=
kreal),
intent(in) :: x(:)
100 real(kind=
kreal),
intent(out) :: y(:)
101 real(kind=
kreal),
intent(inout) :: time_ax
102 real(kind=
kreal),
intent(inout),
optional :: commtime
104 real(kind=
kreal) :: start_time, end_time, tcomm
105 integer(kind=kint) :: i, j, js, je, in
106 real(kind=
kreal) :: yv1, yv2, yv3, yv4, x1, x2, x3, x4
108 integer(kind=kint) :: n, np
109 integer(kind=kint),
pointer :: indexl(:), iteml(:), indexu(:), itemu(:), indexa(:), itema(:)
110 real(kind=
kreal),
pointer :: al(:), au(:), d(:), a(:)
113 integer,
parameter :: numofblockperthread = 100
114 logical,
save :: isfirst = .true.
115 integer,
save :: numofthread = 1
116 integer,
save,
allocatable :: startpos(:), endpos(:)
117 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
118 integer(kind=kint) :: threadnum, blocknum, numofblock
119 integer(kind=kint) :: numofelement, elementcount, blockindex
120 real(kind=
kreal) :: numofelementperblock
128 time_ax = time_ax + end_time - start_time - tcomm
129 if (
present(commtime)) commtime = commtime + tcomm
134 indexl => hecmat%indexL
135 indexu => hecmat%indexU
136 indexa => hecmat%indexA
137 iteml => hecmat%itemL
138 itemu => hecmat%itemU
139 itema => hecmat%itemA
147 if (.not. isfirst)
then
148 numofblock = numofthread * numofblockperthread
149 if (endpos(numofblock-1) .ne. n-1)
then
150 deallocate(startpos, endpos)
156 numofblock = numofthread * numofblockperthread
157 allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
158 numofelement = n + indexl(n) + indexu(n)
159 numofelementperblock = dble(numofelement) / numofblock
162 startpos(blocknum) = 1
164 elementcount = elementcount + 1
165 elementcount = elementcount + (indexl(i) - indexl(i-1))
166 elementcount = elementcount + (indexu(i) - indexu(i-1))
167 if (elementcount > (blocknum + 1) * numofelementperblock)
then
171 blocknum = blocknum + 1
172 startpos(blocknum) = i + 1
173 if (blocknum == (numofblock - 1))
exit
180 do i= blocknum+1, numofblock-1
188 sectorcachesize0, sectorcachesize1)
201 if (
present(commtime)) commtime = commtime + end_time - start_time
231 yv1= yv1 + a(16*j-15)*x1 + a(16*j-14)*x2 + a(16*j-13)*x3 + a(16*j-12)*x4
232 yv2= yv2 + a(16*j-11)*x1 + a(16*j-10)*x2 + a(16*j- 9)*x3 + a(16*j- 8)*x4
233 yv3= yv3 + a(16*j- 7)*x1 + a(16*j- 6)*x2 + a(16*j- 5)*x3 + a(16*j- 4)*x4
234 yv4= yv4 + a(16*j- 3)*x1 + a(16*j- 2)*x2 + a(16*j- 1)*x3 + a(16*j )*x4
248 do blocknum = 0 , numofblockperthread - 1
249 blockindex = blocknum * numofthread + threadnum
250 do i = startpos(blockindex), endpos(blockindex)
255 yv1= d(16*i-15)*x1 + d(16*i-14)*x2 + d(16*i-13)*x3 + d(16*i-12)*x4
256 yv2= d(16*i-11)*x1 + d(16*i-10)*x2 + d(16*i- 9)*x3 + d(16*i- 8)*x4
257 yv3= d(16*i- 7)*x1 + d(16*i- 6)*x2 + d(16*i- 5)*x3 + d(16*i- 4)*x4
258 yv4= d(16*i- 3)*x1 + d(16*i- 2)*x2 + d(16*i- 1)*x3 + d(16*i )*x4
268 yv1= yv1 + al(16*j-15)*x1 + al(16*j-14)*x2 + al(16*j-13)*x3 + al(16*j-12)*x4
269 yv2= yv2 + al(16*j-11)*x1 + al(16*j-10)*x2 + al(16*j- 9)*x3 + al(16*j- 8)*x4
270 yv3= yv3 + al(16*j- 7)*x1 + al(16*j- 6)*x2 + al(16*j- 5)*x3 + al(16*j- 4)*x4
271 yv4= yv4 + al(16*j- 3)*x1 + al(16*j- 2)*x2 + al(16*j- 1)*x3 + al(16*j )*x4
282 yv1= yv1 + au(16*j-15)*x1 + au(16*j-14)*x2 + au(16*j-13)*x3 + au(16*j-12)*x4
283 yv2= yv2 + au(16*j-11)*x1 + au(16*j-10)*x2 + au(16*j- 9)*x3 + au(16*j- 8)*x4
284 yv3= yv3 + au(16*j- 7)*x1 + au(16*j- 6)*x2 + au(16*j- 5)*x3 + au(16*j- 4)*x4
285 yv4= yv4 + au(16*j- 3)*x1 + au(16*j- 2)*x2 + au(16*j- 1)*x3 + au(16*j )*x4
303 time_ax = time_ax + end_time - start_time
306 end subroutine hecmw_matvec_44_inner
318 real(kind=
kreal),
intent(in) :: x(:), b(:)
319 real(kind=
kreal),
intent(out) :: r(:)
320 real(kind=
kreal),
intent(inout) :: time_ax
321 real(kind=
kreal),
intent(inout),
optional :: commtime
323 integer(kind=kint) :: i
324 real(kind=
kreal) :: tcomm
328 if (
present(commtime)) commtime = commtime + tcomm
336 do i = 1, hecmat%N * 4
359 real(kind=
kreal),
intent(inout) :: time_ax
360 real(kind=
kreal),
intent(inout),
optional :: commtime
362 real(kind=
kreal),
allocatable :: r(:)
363 real(kind=
kreal) :: bnorm2, rnorm2
364 real(kind=
kreal) :: tcomm
366 allocate(r(hecmat%NDOF*hecmat%NP))
370 hecmat%B, hecmat%B, bnorm2, tcomm)
371 if (bnorm2 == 0.d0)
then
378 if (
present(commtime)) commtime = commtime + tcomm
393 real(kind=
kreal),
intent(in) :: x(:)
394 real(kind=
kreal),
intent(out) :: y(:)
395 real(kind=
kreal),
intent(inout) :: commtime
397 real(kind=
kreal) :: start_time, end_time
398 integer(kind=kint) :: i, j, jj, k, kk
403 commtime = commtime + end_time - start_time
412 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
424 outer:
do i= 1, hecmesh%mpc%n_mpc
425 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
426 if (hecmesh%mpc%mpc_dof(j) > 4) cycle outer
428 k = hecmesh%mpc%mpc_index(i-1) + 1
429 kk = 4 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
431 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
432 jj = 4 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
433 y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
455 real(kind=
kreal),
intent(in) :: x(:)
456 real(kind=
kreal),
intent(out) :: y(:)
457 real(kind=
kreal),
intent(inout) :: commtime
459 real(kind=
kreal) :: start_time, end_time
460 integer(kind=kint) :: i, j, jj, k, kk
465 commtime = commtime + end_time - start_time
474 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
486 outer:
do i= 1, hecmesh%mpc%n_mpc
487 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
488 if (hecmesh%mpc%mpc_dof(j) > 4) cycle outer
490 k = hecmesh%mpc%mpc_index(i-1) + 1
491 kk = 4 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
493 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
494 jj = 4 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
500 y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
522 real(kind=
kreal),
intent(in) :: x(:)
523 real(kind=
kreal),
intent(out) :: y(:), w(:)
524 real(kind=
kreal),
intent(inout) :: time_ax
525 real(kind=
kreal),
intent(inout) :: commtime
528 call hecmw_matvec_44_inner(hecmesh, hecmat, y, w, time_ax, commtime)
545 real(kind=
kreal),
intent(inout),
optional :: commtime
546 real(kind=
kreal),
allocatable :: w(:,:)
547 real(kind=
kreal),
pointer :: d(:)
548 integer(kind=kint) :: ip
549 real(kind=
kreal) :: start_time, end_time
550 allocate(w(4*hecmat%NP,4))
553 w(4*ip-3,1)= d(16*ip-15); w(4*ip-3,2)= d(16*ip-14); w(4*ip-3,3)= d(16*ip-13); w(4*ip-3,4)= d(16*ip-12)
554 w(4*ip-2,1)= d(16*ip-11); w(4*ip-2,2)= d(16*ip-10); w(4*ip-2,3)= d(16*ip- 9); w(4*ip-2,4)= d(16*ip- 8)
555 w(4*ip-1,1)= d(16*ip- 7); w(4*ip-1,2)= d(16*ip- 6); w(4*ip-1,3)= d(16*ip- 5); w(4*ip-1,4)= d(16*ip- 4)
556 w(4*ip ,1)= d(16*ip- 3); w(4*ip ,2)= d(16*ip- 2); w(4*ip ,3)= d(16*ip- 1); w(4*ip ,4)= d(16*ip )
564 if (
present(commtime)) commtime = commtime + end_time - start_time
565 do ip= hecmat%N+1, hecmat%NP
566 d(16*ip-15)= w(4*ip-3,1); d(16*ip-14)= w(4*ip-3,2); d(16*ip-13)= w(4*ip-3,3); d(16*ip-12)= w(4*ip-3,4)
567 d(16*ip-11)= w(4*ip-2,1); d(16*ip-10)= w(4*ip-2,2); d(16*ip- 9)= w(4*ip-2,3); d(16*ip- 8)= w(4*ip-2,4)
568 d(16*ip- 7)= w(4*ip-1,1); d(16*ip- 6)= w(4*ip-1,2); d(16*ip- 5)= w(4*ip-1,3); d(16*ip- 4)= w(4*ip-1,4)
569 d(16*ip- 3)= w(4*ip ,1); d(16*ip- 2)= w(4*ip ,2); d(16*ip- 1)= w(4*ip ,3); d(16*ip )= w(4*ip ,4)
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
integer(kind=kint) function, public hecmw_jad_is_initialized()
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
subroutine, public hecmw_matresid_44(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
subroutine, public hecmw_mat_diag_sr_44(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_matvec_44(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_ttvec_44(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_matvec_44_unset_async
subroutine, public hecmw_matvec_44_set_async(hecMAT)
subroutine, public hecmw_tvec_44(hecMESH, X, Y, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_44(hecMESH, hecMAT, time_Ax, COMMtime)
subroutine, public hecmw_ttmattvec_44(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_r(hecMESH, val, n, m)