27 integer(kind=kint) :: N
28 real(kind=
kreal),
pointer :: d(:) => null()
29 real(kind=
kreal),
pointer :: al(:) => null()
30 real(kind=
kreal),
pointer :: au(:) => null()
31 integer(kind=kint),
pointer :: indexL(:) => null()
32 integer(kind=kint),
pointer :: indexU(:) => null()
33 integer(kind=kint),
pointer :: itemL(:) => null()
34 integer(kind=kint),
pointer :: itemU(:) => null()
35 real(kind=
kreal),
pointer :: alu(:) => null()
37 integer(kind=kint) :: NColor
38 integer(kind=kint),
pointer :: COLORindex(:) => null()
39 integer(kind=kint),
pointer :: perm(:) => null()
40 integer(kind=kint),
pointer :: iperm(:) => null()
42 logical,
save :: isFirst = .true.
44 logical,
save :: INITIALIZED = .false.
47 integer(kind=kint),
parameter :: numOfBlockPerThread = 100
48 integer(kind=kint),
save :: numOfThread = 1, numofblock
49 integer(kind=kint),
save,
allocatable :: icToBlockIndex(:)
50 integer(kind=kint),
save,
allocatable :: blockIndexToColorIndex(:)
51 integer(kind=kint),
save :: sectorCacheSize0, sectorCacheSize1
53 integer(kind=kint),
parameter :: DEBUG = 0
60 integer(kind=kint ) :: npl, npu
61 integer(kind=kint ) :: ncolor_in
62 real (kind=
kreal) :: sigma_diag
63 real (kind=
kreal) :: alutmp(3,3), pw(3)
64 integer(kind=kint ) :: ii, i, j, k
65 integer(kind=kint ) :: nthreads = 1
66 integer(kind=kint ),
allocatable :: perm_tmp(:)
67 real (kind=
kreal) :: t0
71 write(*,*)
'DEBUG: SSOR setup start',
hecmw_wtime()-t0
75 if (hecmat%Iarray(98) == 1)
then
77 else if (hecmat%Iarray(97) == 1)
then
94 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
96 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
97 if (debug >= 1)
write(*,*)
'DEBUG: RCM ordering done',
hecmw_wtime()-t0
99 hecmat%indexU, hecmat%itemU, perm_tmp, &
100 ncolor_in, ncolor, colorindex, perm, iperm)
101 if (debug >= 1)
write(*,*)
'DEBUG: MC ordering done',
hecmw_wtime()-t0
106 if (nthreads == 1)
then
108 allocate(colorindex(0:1), perm(n), iperm(n))
116 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
118 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
119 if (debug >= 1)
write(*,*)
'DEBUG: RCM ordering done',
hecmw_wtime()-t0
121 hecmat%indexU, hecmat%itemU, perm_tmp, &
122 ncolor_in, ncolor, colorindex, perm, iperm)
123 if (debug >= 1)
write(*,*)
'DEBUG: MC ordering done',
hecmw_wtime()-t0
132 do j=hecmat%indexU(i-1)+1,hecmat%indexU(i)
133 if( hecmat%itemU(j) > n )
exit
137 npl = max(hecmat%indexL(n),npl)
138 npu = hecmat%indexU(n)
139 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
141 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
142 indexl, indexu, iteml, itemu)
143 if (debug >= 1)
write(*,*)
'DEBUG: reordering profile done',
hecmw_wtime()-t0
147 allocate(d(9*n), al(9*npl), au(9*npu))
149 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
150 hecmat%AL, hecmat%AU, hecmat%D, &
151 indexl, indexu, iteml, itemu, al, au, d)
152 if (debug >= 1)
write(*,*)
'DEBUG: reordering values done',
hecmw_wtime()-t0
172 alutmp(1,1)= alu(9*ii-8) * sigma_diag
173 alutmp(1,2)= alu(9*ii-7)
174 alutmp(1,3)= alu(9*ii-6)
175 alutmp(2,1)= alu(9*ii-5)
176 alutmp(2,2)= alu(9*ii-4) * sigma_diag
177 alutmp(2,3)= alu(9*ii-3)
178 alutmp(3,1)= alu(9*ii-2)
179 alutmp(3,2)= alu(9*ii-1)
180 alutmp(3,3)= alu(9*ii ) * sigma_diag
182 alutmp(k,k)= 1.d0/alutmp(k,k)
184 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
186 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
193 alu(9*ii-8)= alutmp(1,1)
194 alu(9*ii-7)= alutmp(1,2)
195 alu(9*ii-6)= alutmp(1,3)
196 alu(9*ii-5)= alutmp(2,1)
197 alu(9*ii-4)= alutmp(2,2)
198 alu(9*ii-3)= alutmp(2,3)
199 alu(9*ii-2)= alutmp(3,1)
200 alu(9*ii-1)= alutmp(3,2)
201 alu(9*ii )= alutmp(3,3)
213 hecmat%Iarray(98) = 0
214 hecmat%Iarray(97) = 0
216 if (debug >= 1)
write(*,*)
'DEBUG: SSOR setup done',
hecmw_wtime()-t0
220 subroutine setup_tuning_parameters
223 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
224 real(kind=
kreal) :: numofelementperblock
225 integer(kind=kint) :: my_rank
226 integer(kind=kint) :: ic, i
227 if (debug >= 1)
write(*,*)
'DEBUG: setting up tuning parameters for SSOR'
232 numofblock = numofthread * numofblockperthread
233 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
234 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
235 allocate (ictoblockindex(0:ncolor), &
236 blockindextocolorindex(0:numofblock + ncolor))
237 numofelement = n + indexl(n) + indexu(n)
238 numofelementperblock = dble(numofelement) / numofblock
241 ictoblockindex(0) = 0
242 blockindextocolorindex = -1
243 blockindextocolorindex(0) = 0
252 do i = colorindex(ic-1)+1, colorindex(ic)
253 elementcount = elementcount + 1
254 elementcount = elementcount + (indexl(i) - indexl(i-1))
255 elementcount = elementcount + (indexu(i) - indexu(i-1))
256 if (elementcount > ii * numofelementperblock &
257 .or. i == colorindex(ic))
then
259 blockindex = blockindex + 1
260 blockindextocolorindex(blockindex) = i
265 ictoblockindex(ic) = blockindex
267 numofblock = blockindex
270 sectorcachesize0, sectorcachesize1 )
271 end subroutine setup_tuning_parameters
275 real(kind=
kreal),
intent(inout) :: zp(:)
276 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
277 real(kind=
kreal) :: sw1, sw2, sw3, x1, x2, x3
280 integer(kind=kint) :: blockindex
284 call setup_tuning_parameters
307 do i = colorindex(ic-1)+1, colorindex(ic)
310 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
311 do i = blockindextocolorindex(blockindex-1)+1, &
312 blockindextocolorindex(blockindex)
327 sw1= sw1 - al(9*j-8)*x1 - al(9*j-7)*x2 - al(9*j-6)*x3
328 sw2= sw2 - al(9*j-5)*x1 - al(9*j-4)*x2 - al(9*j-3)*x3
329 sw3= sw3 - al(9*j-2)*x1 - al(9*j-1)*x2 - al(9*j )*x3
335 x2= x2 - alu(9*i-5)*x1
336 x3= x3 - alu(9*i-2)*x1 - alu(9*i-1)*x2
338 x2= alu(9*i-4)*( x2 - alu(9*i-3)*x3 )
339 x1= alu(9*i-8)*( x1 - alu(9*i-6)*x3 - alu(9*i-7)*x2)
358 do i = colorindex(ic-1)+1, colorindex(ic)
361 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
362 do i = blockindextocolorindex(blockindex), &
363 blockindextocolorindex(blockindex-1)+1, -1
380 sw1= sw1 + au(9*j-8)*x1 + au(9*j-7)*x2 + au(9*j-6)*x3
381 sw2= sw2 + au(9*j-5)*x1 + au(9*j-4)*x2 + au(9*j-3)*x3
382 sw3= sw3 + au(9*j-2)*x1 + au(9*j-1)*x2 + au(9*j )*x3
388 x2= x2 - alu(9*i-5)*x1
389 x3= x3 - alu(9*i-2)*x1 - alu(9*i-1)*x2
391 x2= alu(9*i-4)*( x2 - alu(9*i-3)*x3 )
392 x1= alu(9*i-8)*( x1 - alu(9*i-6)*x3 - alu(9*i-7)*x2)
394 zp(3*iold-2)= zp(3*iold-2) - x1
395 zp(3*iold-1)= zp(3*iold-1) - x2
396 zp(3*iold )= zp(3*iold ) - x3
420 integer(kind=kint ) :: nthreads = 1
424 if (
associated(colorindex))
deallocate(colorindex)
425 if (
associated(perm))
deallocate(perm)
426 if (
associated(iperm))
deallocate(iperm)
427 if (
associated(alu))
deallocate(alu)
428 if (nthreads >= 1)
then
429 if (
associated(d))
deallocate(d)
430 if (
associated(al))
deallocate(al)
431 if (
associated(au))
deallocate(au)
432 if (
associated(indexl))
deallocate(indexl)
433 if (
associated(indexu))
deallocate(indexu)
434 if (
associated(iteml))
deallocate(iteml)
435 if (
associated(itemu))
deallocate(itemu)
448 initialized = .false.
451 subroutine write_debug_info
453 integer(kind=kint) :: my_rank, ic, in
456 if (my_rank.eq.0)
then
457 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
459 write(19000+my_rank,
'(a)')
'#NCOLORTot'
460 write(19000+my_rank,*) ncolor
461 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
463 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
465 write(29000+my_rank,
'(a)')
'#n_node'
466 write(29000+my_rank,*) n
467 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
469 write(29000+my_rank,*) in, iperm(in), perm(in)
470 if (perm(iperm(in)) .ne. in)
then
471 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
474 end subroutine write_debug_info
476 subroutine check_ordering
478 integer(kind=kint) :: ic, i, j, k
479 integer(kind=kint),
allocatable :: iicolor(:)
481 if (ncolor.gt.1)
then
484 do i= colorindex(ic-1)+1, colorindex(ic)
490 do i= colorindex(ic-1)+1, colorindex(ic)
491 do j= indexl(i-1)+1, indexl(i)
493 if (iicolor(i).eq.iicolor(k))
then
494 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
501 do i= colorindex(ic), colorindex(ic-1)+1, -1
502 do j= indexu(i-1)+1, indexu(i)
504 if (iicolor(i).eq.iicolor(k))
then
505 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
513 end subroutine check_ordering
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_33_apply(ZP)
subroutine, public hecmw_precond_ssor_33_setup(hecMAT)
subroutine, public hecmw_precond_ssor_33_clear(hecMAT)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
real(kind=kreal) function hecmw_wtime()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)