27 integer(kind=kint) :: N
28 real(kind=
kreal),
pointer :: d(:) => null()
29 real(kind=
kreal),
pointer :: al(:) => null()
30 real(kind=
kreal),
pointer :: au(:) => null()
31 integer(kind=kint),
pointer :: indexL(:) => null()
32 integer(kind=kint),
pointer :: indexU(:) => null()
33 integer(kind=kint),
pointer :: itemL(:) => null()
34 integer(kind=kint),
pointer :: itemU(:) => null()
35 real(kind=
kreal),
pointer :: alu(:) => null()
37 integer(kind=kint) :: NColor
38 integer(kind=kint),
pointer :: COLORindex(:) => null()
39 integer(kind=kint),
pointer :: perm(:) => null()
40 integer(kind=kint),
pointer :: iperm(:) => null()
42 logical,
save :: isFirst = .true.
44 logical,
save :: INITIALIZED = .false.
51 integer(kind=kint ) :: npl, npu
52 integer(kind=kint ) :: ncolor_in
53 real (kind=
kreal) :: sigma_diag
54 real (kind=
kreal) :: alutmp(hecmat%NDOF,hecmat%NDOF), pw(hecmat%NDOF)
55 integer(kind=kint ) :: ii, i, j, k, ndof, ndof2
56 integer(kind=kint ) :: nthreads = 1
57 integer(kind=kint ),
allocatable :: perm_tmp(:)
64 if (hecmat%Iarray(98) == 1)
then
66 else if (hecmat%Iarray(97) == 1)
then
85 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
87 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
90 hecmat%indexU, hecmat%itemU, perm_tmp, &
91 ncolor_in, ncolor, colorindex, perm, iperm)
97 if (nthreads == 1)
then
99 allocate(colorindex(0:1), perm(n), iperm(n))
107 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
109 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
112 hecmat%indexU, hecmat%itemU, perm_tmp, &
113 ncolor_in, ncolor, colorindex, perm, iperm)
121 npl = hecmat%indexL(n)
122 npu = hecmat%indexU(n)
123 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
125 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
126 indexl, indexu, iteml, itemu)
131 allocate(d(ndof2*n), al(ndof2*npl), au(ndof2*npu))
133 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
134 hecmat%AL, hecmat%AU, hecmat%D, &
135 indexl, indexu, iteml, itemu, al, au, d)
141 allocate(alu(ndof2*n))
158 alutmp(i,j) = alu(ndof2*(ii-1)+(i-1)*ndof+j)
159 if (i==j) alutmp(i,j)=alutmp(i,j)*sigma_diag
163 alutmp(k,k)= 1.d0/alutmp(k,k)
165 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
167 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
176 alu(ndof2*(ii-1)+(i-1)*ndof+j)= alutmp(i,j)
190 hecmat%Iarray(98) = 0
191 hecmat%Iarray(97) = 0
200 real(kind=
kreal),
intent(inout) :: zp(:)
201 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k, ndof, ndof2, idof,jdof
202 real(kind=
kreal) :: sw(ndof), x(ndof)
205 integer(kind=kint),
parameter :: numofblockperthread = 100
206 integer(kind=kint),
save :: numofthread = 1, numofblock
207 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
208 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
209 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
210 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
211 real(kind=
kreal) :: numofelementperblock
212 integer(kind=kint) :: my_rank
218 numofblock = numofthread * numofblockperthread
219 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
220 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
221 allocate (ictoblockindex(0:ncolor), &
222 blockindextocolorindex(0:numofblock + ncolor))
223 numofelement = n + indexl(n) + indexu(n)
224 numofelementperblock = dble(numofelement) / numofblock
227 ictoblockindex(0) = 0
228 blockindextocolorindex = -1
229 blockindextocolorindex(0) = 0
238 do i = colorindex(ic-1)+1, colorindex(ic)
239 elementcount = elementcount + 1
240 elementcount = elementcount + (indexl(i) - indexl(i-1))
241 elementcount = elementcount + (indexu(i) - indexu(i-1))
242 if (elementcount > ii * numofelementperblock &
243 .or. i == colorindex(ic))
then
245 blockindex = blockindex + 1
246 blockindextocolorindex(blockindex) = i
251 ictoblockindex(ic) = blockindex
253 numofblock = blockindex
256 sectorcachesize0, sectorcachesize1 )
280 do i = colorindex(ic-1)+1, colorindex(ic)
283 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
284 do i = blockindextocolorindex(blockindex-1)+1, &
285 blockindextocolorindex(blockindex)
289 sw(idof) = zp(ndof*(iold-1)+idof)
296 x(idof) = zp(ndof*(k-1)+idof)
300 sw(idof) = sw(idof) - al(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
308 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof )*x(jdof)
311 do idof = ndof, 1, -1
312 do jdof = ndof, idof+1, -1
313 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof)*x(jdof)
315 x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
317 zp(ndof*(iold-1)+1:ndof*(iold-1)+ndof) = x(1:ndof)
334 do i = colorindex(ic-1)+1, colorindex(ic)
337 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
338 do i = blockindextocolorindex(blockindex), &
339 blockindextocolorindex(blockindex-1)+1, -1
351 x(idof) = zp(ndof*(k-1)+idof)
355 sw(idof) = sw(idof) + au(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
363 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
366 do idof = ndof, 1, -1
367 do k = ndof, idof+1, -1
368 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
370 x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
374 zp(ndof*(iold-1)+idof) = zp(ndof*(iold-1)+idof) - x(idof)
399 integer(kind=kint ) :: nthreads = 1
403 if (
associated(colorindex))
deallocate(colorindex)
404 if (
associated(perm))
deallocate(perm)
405 if (
associated(iperm))
deallocate(iperm)
406 if (
associated(alu))
deallocate(alu)
407 if (nthreads >= 1)
then
408 if (
associated(d))
deallocate(d)
409 if (
associated(al))
deallocate(al)
410 if (
associated(au))
deallocate(au)
411 if (
associated(indexl))
deallocate(indexl)
412 if (
associated(indexu))
deallocate(indexu)
413 if (
associated(iteml))
deallocate(iteml)
414 if (
associated(itemu))
deallocate(itemu)
427 initialized = .false.
430 subroutine write_debug_info
432 integer(kind=kint) :: my_rank, ic, in
435 if (my_rank.eq.0)
then
436 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
438 write(19000+my_rank,
'(a)')
'#NCOLORTot'
439 write(19000+my_rank,*) ncolor
440 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
442 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
444 write(29000+my_rank,
'(a)')
'#n_node'
445 write(29000+my_rank,*) n
446 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
448 write(29000+my_rank,*) in, iperm(in), perm(in)
449 if (perm(iperm(in)) .ne. in)
then
450 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
453 end subroutine write_debug_info
455 subroutine check_ordering
457 integer(kind=kint) :: ic, i, j, k
458 integer(kind=kint),
allocatable :: iicolor(:)
460 if (ncolor.gt.1)
then
463 do i= colorindex(ic-1)+1, colorindex(ic)
469 do i= colorindex(ic-1)+1, colorindex(ic)
470 do j= indexl(i-1)+1, indexl(i)
472 if (iicolor(i).eq.iicolor(k))
then
473 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
480 do i= colorindex(ic), colorindex(ic-1)+1, -1
481 do j= indexu(i-1)+1, indexu(i)
483 if (iicolor(i).eq.iicolor(k))
then
484 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
492 end subroutine check_ordering
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_nn_setup(hecMAT)
subroutine, public hecmw_precond_ssor_nn_clear(hecMAT)
subroutine, public hecmw_precond_ssor_nn_apply(ZP, NDOF)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)