25 integer(kind=kint) :: N
26 real(kind=
kreal),
pointer :: d(:) => null()
27 real(kind=
kreal),
pointer :: al(:) => null()
28 real(kind=
kreal),
pointer :: au(:) => null()
29 integer(kind=kint),
pointer :: indexL(:) => null()
30 integer(kind=kint),
pointer :: indexU(:) => null()
31 integer(kind=kint),
pointer :: itemL(:) => null()
32 integer(kind=kint),
pointer :: itemU(:) => null()
33 real(kind=
kreal),
pointer :: alu(:) => null()
35 integer(kind=kint) :: NColor
36 integer(kind=kint),
pointer :: COLORindex(:) => null()
37 integer(kind=kint),
pointer :: perm(:) => null()
38 integer(kind=kint),
pointer :: iperm(:) => null()
40 logical,
save :: isFirst = .true.
42 logical,
save :: INITIALIZED = .false.
49 integer(kind=kint ) :: npl, npu, npcl, npcu
50 real (kind=
kreal),
allocatable :: cd(:)
51 integer(kind=kint ) :: ncolor_in
52 real (kind=
kreal) :: sigma_diag
53 real (kind=
kreal) :: alutmp(4,4), pw(4)
54 integer(kind=kint ) :: ii, i, j, k
55 integer(kind=kint ) :: nthreads = 1
56 integer(kind=kint ),
allocatable :: perm_tmp(:)
63 if (hecmat%Iarray(98) == 1)
then
65 else if (hecmat%Iarray(97) == 1)
then
79 if (nthreads == 1)
then
81 allocate(colorindex(0:1), perm(n), iperm(n))
89 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
91 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
94 hecmat%indexU, hecmat%itemU, perm_tmp, &
95 ncolor_in, ncolor, colorindex, perm, iperm)
102 npl = hecmat%indexL(n)
103 npu = hecmat%indexU(n)
104 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
106 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
107 indexl, indexu, iteml, itemu)
112 allocate(d(16*n), al(16*npl), au(16*npu))
114 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
115 hecmat%AL, hecmat%AU, hecmat%D, &
116 indexl, indexu, iteml, itemu, al, au, d)
130 alutmp(1,1)= alu(16*ii-15) * sigma_diag
131 alutmp(1,2)= alu(16*ii-14)
132 alutmp(1,3)= alu(16*ii-13)
133 alutmp(1,4)= alu(16*ii-12)
134 alutmp(2,1)= alu(16*ii-11)
135 alutmp(2,2)= alu(16*ii-10) * sigma_diag
136 alutmp(2,3)= alu(16*ii- 9)
137 alutmp(2,4)= alu(16*ii- 8)
138 alutmp(3,1)= alu(16*ii- 7)
139 alutmp(3,2)= alu(16*ii- 6)
140 alutmp(3,3)= alu(16*ii- 5) * sigma_diag
141 alutmp(3,4)= alu(16*ii- 4)
142 alutmp(4,1)= alu(16*ii- 3)
143 alutmp(4,2)= alu(16*ii- 2)
144 alutmp(4,3)= alu(16*ii- 1)
145 alutmp(4,4)= alu(16*ii ) * sigma_diag
148 alutmp(k,k)= 1.d0/alutmp(k,k)
150 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
152 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
159 alu(16*ii-15)= alutmp(1,1)
160 alu(16*ii-14)= alutmp(1,2)
161 alu(16*ii-13)= alutmp(1,3)
162 alu(16*ii-12)= alutmp(1,4)
163 alu(16*ii-11)= alutmp(2,1)
164 alu(16*ii-10)= alutmp(2,2)
165 alu(16*ii- 9)= alutmp(2,3)
166 alu(16*ii- 8)= alutmp(2,4)
167 alu(16*ii- 7)= alutmp(3,1)
168 alu(16*ii- 6)= alutmp(3,2)
169 alu(16*ii- 5)= alutmp(3,3)
170 alu(16*ii- 4)= alutmp(3,4)
171 alu(16*ii- 3)= alutmp(4,1)
172 alu(16*ii- 2)= alutmp(4,2)
173 alu(16*ii- 1)= alutmp(4,3)
174 alu(16*ii )= alutmp(4,4)
181 hecmat%Iarray(98) = 0
182 hecmat%Iarray(97) = 0
191 real(kind=
kreal),
intent(inout) :: zp(:)
192 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
193 real(kind=
kreal) :: sw1, sw2, sw3, sw4, x1, x2, x3, x4
196 integer(kind=kint),
parameter :: numofblockperthread = 100
197 integer(kind=kint),
save :: numofthread = 1, numofblock
198 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
199 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
200 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
201 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
202 real(kind=
kreal) :: numofelementperblock
203 integer(kind=kint) :: my_rank
207 numofblock = numofthread * numofblockperthread
208 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
209 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
210 allocate (ictoblockindex(0:ncolor), &
211 blockindextocolorindex(0:numofblock + ncolor))
212 numofelement = n + indexl(n) + indexu(n)
213 numofelementperblock = dble(numofelement) / numofblock
216 ictoblockindex(0) = 0
217 blockindextocolorindex = -1
218 blockindextocolorindex(0) = 0
227 do i = colorindex(ic-1)+1, colorindex(ic)
228 elementcount = elementcount + 1
229 elementcount = elementcount + (indexl(i) - indexl(i-1))
230 elementcount = elementcount + (indexu(i) - indexu(i-1))
231 if (elementcount > ii * numofelementperblock &
232 .or. i == colorindex(ic))
then
234 blockindex = blockindex + 1
235 blockindextocolorindex(blockindex) = i
240 ictoblockindex(ic) = blockindex
242 numofblock = blockindex
245 sectorcachesize0, sectorcachesize1 )
264 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
265 do i = blockindextocolorindex(blockindex-1)+1, &
266 blockindextocolorindex(blockindex)
282 sw1= sw1 - al(16*j-15)*x1 - al(16*j-14)*x2 - al(16*j-13)*x3 - al(16*j-12)*x4
283 sw2= sw2 - al(16*j-11)*x1 - al(16*j-10)*x2 - al(16*j- 9)*x3 - al(16*j- 8)*x4
284 sw3= sw3 - al(16*j- 7)*x1 - al(16*j- 6)*x2 - al(16*j- 5)*x3 - al(16*j- 4)*x4
285 sw4= sw4 - al(16*j- 3)*x1 - al(16*j- 2)*x2 - al(16*j- 1)*x3 - al(16*j- 0)*x4
292 x2= x2 - alu(16*i-11)*x1
293 x3= x3 - alu(16*i- 7)*x1 - alu(16*i- 6)*x2
294 x4= x4 - alu(16*i- 3)*x1 - alu(16*i- 2)*x2 - alu(16*i- 1)*x3
297 x3= alu(16*i- 5)*( x3 - alu(16*i- 4)*x4 )
298 x2= alu(16*i-10)*( x2 - alu(16*i- 8)*x4 - alu(16*i- 9)*x3 )
299 x1= alu(16*i-15)*( x1 - alu(16*i-12)*x4 - alu(16*i-13)*x3 - alu(16*i-14)*x2 )
313 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
314 do i = blockindextocolorindex(blockindex), &
315 blockindextocolorindex(blockindex-1)+1, -1
333 sw1= sw1 + au(16*j-15)*x1 + au(16*j-14)*x2 + au(16*j-13)*x3 + au(16*j-12)*x4
334 sw2= sw2 + au(16*j-11)*x1 + au(16*j-10)*x2 + au(16*j- 9)*x3 + au(16*j- 8)*x4
335 sw3= sw3 + au(16*j- 7)*x1 + au(16*j- 6)*x2 + au(16*j- 5)*x3 + au(16*j- 4)*x4
336 sw4= sw4 + au(16*j- 3)*x1 + au(16*j- 2)*x2 + au(16*j- 1)*x3 + au(16*j- 0)*x4
344 x2= x2 - alu(16*i-11)*x1
345 x3= x3 - alu(16*i- 7)*x1 - alu(16*i- 6)*x2
346 x4= x4 - alu(16*i- 3)*x1 - alu(16*i- 2)*x2 - alu(16*i- 1)*x3
348 x3= alu(16*i- 5)*( x3 - alu(16*i- 4)*x4 )
349 x2= alu(16*i-10)*( x2 - alu(16*i- 8)*x4 - alu(16*i- 9)*x3 )
350 x1= alu(16*i-15)*( x1 - alu(16*i-12)*x4 - alu(16*i-13)*x3 - alu(16*i-14)*x2 )
352 zp(4*iold-3)= zp(4*iold-3) - x1
353 zp(4*iold-2)= zp(4*iold-2) - x2
354 zp(4*iold-1)= zp(4*iold-1) - x3
355 zp(4*iold )= zp(4*iold ) - x4
372 integer(kind=kint ) :: nthreads = 1
374 if (
associated(colorindex))
deallocate(colorindex)
375 if (
associated(perm))
deallocate(perm)
376 if (
associated(iperm))
deallocate(iperm)
377 if (
associated(alu))
deallocate(alu)
378 if (nthreads >= 1)
then
379 if (
associated(d))
deallocate(d)
380 if (
associated(al))
deallocate(al)
381 if (
associated(au))
deallocate(au)
382 if (
associated(indexl))
deallocate(indexl)
383 if (
associated(indexu))
deallocate(indexu)
384 if (
associated(iteml))
deallocate(iteml)
385 if (
associated(itemu))
deallocate(itemu)
400 subroutine write_debug_info
402 integer(kind=kint) :: my_rank, ic, in
405 if (my_rank.eq.0)
then
406 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
408 write(19000+my_rank,
'(a)')
'#NCOLORTot'
409 write(19000+my_rank,*) ncolor
410 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
412 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
414 write(29000+my_rank,
'(a)')
'#n_node'
415 write(29000+my_rank,*) n
416 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
418 write(29000+my_rank,*) in, iperm(in), perm(in)
419 if (perm(iperm(in)) .ne. in)
then
420 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
423 end subroutine write_debug_info
425 subroutine check_ordering
427 integer(kind=kint) :: ic, i, j, k
428 integer(kind=kint),
allocatable :: iicolor(:)
430 if (ncolor.gt.1)
then
433 do i= colorindex(ic-1)+1, colorindex(ic)
439 do i= colorindex(ic-1)+1, colorindex(ic)
440 do j= indexl(i-1)+1, indexl(i)
442 if (iicolor(i).eq.iicolor(k))
then
443 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
450 do i= colorindex(ic), colorindex(ic-1)+1, -1
451 do j= indexu(i-1)+1, indexu(i)
453 if (iicolor(i).eq.iicolor(k))
then
454 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
462 end subroutine check_ordering