25 integer(kind=kint) :: N
26 real(kind=
kreal),
pointer :: d(:) => null()
27 real(kind=
kreal),
pointer :: al(:) => null()
28 real(kind=
kreal),
pointer :: au(:) => null()
29 integer(kind=kint),
pointer :: indexL(:) => null()
30 integer(kind=kint),
pointer :: indexU(:) => null()
31 integer(kind=kint),
pointer :: itemL(:) => null()
32 integer(kind=kint),
pointer :: itemU(:) => null()
33 real(kind=
kreal),
pointer :: alu(:) => null()
35 integer(kind=kint) :: NColor
36 integer(kind=kint),
pointer :: COLORindex(:) => null()
37 integer(kind=kint),
pointer :: perm(:) => null()
38 integer(kind=kint),
pointer :: iperm(:) => null()
40 logical,
save :: isFirst = .true.
42 logical,
save :: INITIALIZED = .false.
49 integer(kind=kint ) :: npl, npu, npcl, npcu
50 real (kind=
kreal),
allocatable :: cd(:)
51 integer(kind=kint ) :: ncolor_in
52 real (kind=
kreal) :: sigma_diag
53 real (kind=
kreal) :: alutmp(2,2), pw(2)
54 integer(kind=kint ) :: ii, i, j, k
55 integer(kind=kint ) :: nthreads = 1
56 integer(kind=kint ),
allocatable :: perm_tmp(:)
63 if (hecmat%Iarray(98) == 1)
then
65 else if (hecmat%Iarray(97) == 1)
then
79 if (nthreads == 1)
then
81 allocate(colorindex(0:1), perm(n), iperm(n))
89 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
91 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
94 hecmat%indexU, hecmat%itemU, perm_tmp, &
95 ncolor_in, ncolor, colorindex, perm, iperm)
102 npl = hecmat%indexL(n)
103 npu = hecmat%indexU(n)
104 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
106 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
107 indexl, indexu, iteml, itemu)
112 allocate(d(4*n), al(4*npl), au(4*npu))
114 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
115 hecmat%AL, hecmat%AU, hecmat%D, &
116 indexl, indexu, iteml, itemu, al, au, d)
132 alutmp(1,1)= alu(4*ii-3) * sigma_diag
133 alutmp(1,2)= alu(4*ii-2)
134 alutmp(2,1)= alu(4*ii-1)
135 alutmp(2,2)= alu(4*ii-0) * sigma_diag
137 alutmp(k,k)= 1.d0/alutmp(k,k)
139 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
141 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
148 alu(4*ii-3)= alutmp(1,1)
149 alu(4*ii-2)= alutmp(1,2)
150 alu(4*ii-1)= alutmp(2,1)
151 alu(4*ii-0)= alutmp(2,2)
159 hecmat%Iarray(98) = 0
160 hecmat%Iarray(97) = 0
169 real(kind=
kreal),
intent(inout) :: zp(:)
170 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
171 real(kind=
kreal) :: sw(2), x(2)
174 integer(kind=kint),
parameter :: numofblockperthread = 100
175 integer(kind=kint),
save :: numofthread = 1, numofblock
176 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
177 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
178 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
179 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
180 real(kind=
kreal) :: numofelementperblock
181 integer(kind=kint) :: my_rank
185 numofblock = numofthread * numofblockperthread
186 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
187 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
188 allocate (ictoblockindex(0:ncolor), &
189 blockindextocolorindex(0:numofblock + ncolor))
190 numofelement = n + indexl(n) + indexu(n)
191 numofelementperblock = dble(numofelement) / numofblock
194 ictoblockindex(0) = 0
195 blockindextocolorindex = -1
196 blockindextocolorindex(0) = 0
205 do i = colorindex(ic-1)+1, colorindex(ic)
206 elementcount = elementcount + 1
207 elementcount = elementcount + (indexl(i) - indexl(i-1))
208 elementcount = elementcount + (indexu(i) - indexu(i-1))
209 if (elementcount > ii * numofelementperblock &
210 .or. i == colorindex(ic))
then
212 blockindex = blockindex + 1
213 blockindextocolorindex(blockindex) = i
218 ictoblockindex(ic) = blockindex
220 numofblock = blockindex
223 sectorcachesize0, sectorcachesize1 )
242 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
243 do i = blockindextocolorindex(blockindex-1)+1, &
244 blockindextocolorindex(blockindex)
254 sw(1)= sw(1) - al(4*j-3)*x(1) - al(4*j-2)*x(2)
255 sw(2)= sw(2) - al(4*j-1)*x(1) - al(4*j-0)*x(2)
259 x(2)= x(2) - alu(4*i-1)*x(1)
260 x(2)= alu(4*i )* x(2)
261 x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2))
272 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
273 do i = blockindextocolorindex(blockindex), &
274 blockindextocolorindex(blockindex-1)+1, -1
286 sw(1)= sw(1) + au(4*j-3)*x(1) + au(4*j-2)*x(2)
287 sw(2)= sw(2) + au(4*j-1)*x(1) + au(4*j-0)*x(2)
291 x(2)= x(2) - alu(4*i-1)*x(1)
294 x(2)= alu(4*i )* x(2)
295 x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2) )
298 zp(2*iold-1)= zp(2*iold-1) - x(1)
299 zp(2*iold )= zp(2*iold ) - x(2)
316 integer(kind=kint ) :: nthreads = 1
318 if (
associated(colorindex))
deallocate(colorindex)
319 if (
associated(perm))
deallocate(perm)
320 if (
associated(iperm))
deallocate(iperm)
321 if (
associated(alu))
deallocate(alu)
322 if (nthreads >= 1)
then
323 if (
associated(d))
deallocate(d)
324 if (
associated(al))
deallocate(al)
325 if (
associated(au))
deallocate(au)
326 if (
associated(indexl))
deallocate(indexl)
327 if (
associated(indexu))
deallocate(indexu)
328 if (
associated(iteml))
deallocate(iteml)
329 if (
associated(itemu))
deallocate(itemu)
342 initialized = .false.
345 subroutine write_debug_info
347 integer(kind=kint) :: my_rank, ic, in
350 if (my_rank.eq.0)
then
351 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
353 write(19000+my_rank,
'(a)')
'#NCOLORTot'
354 write(19000+my_rank,*) ncolor
355 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
357 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
359 write(29000+my_rank,
'(a)')
'#n_node'
360 write(29000+my_rank,*) n
361 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
363 write(29000+my_rank,*) in, iperm(in), perm(in)
364 if (perm(iperm(in)) .ne. in)
then
365 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
368 end subroutine write_debug_info
370 subroutine check_ordering
372 integer(kind=kint) :: ic, i, j, k
373 integer(kind=kint),
allocatable :: iicolor(:)
375 if (ncolor.gt.1)
then
378 do i= colorindex(ic-1)+1, colorindex(ic)
384 do i= colorindex(ic-1)+1, colorindex(ic)
385 do j= indexl(i-1)+1, indexl(i)
387 if (iicolor(i).eq.iicolor(k))
then
388 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
395 do i= colorindex(ic), colorindex(ic-1)+1, -1
396 do j= indexu(i-1)+1, indexu(i)
398 if (iicolor(i).eq.iicolor(k))
then
399 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
407 end subroutine check_ordering