25 integer(kind=kint) :: N
26 real(kind=
kreal),
pointer :: d(:) => null()
27 real(kind=
kreal),
pointer :: al(:) => null()
28 real(kind=
kreal),
pointer :: au(:) => null()
29 integer(kind=kint),
pointer :: indexL(:) => null()
30 integer(kind=kint),
pointer :: indexU(:) => null()
31 integer(kind=kint),
pointer :: itemL(:) => null()
32 integer(kind=kint),
pointer :: itemU(:) => null()
33 real(kind=
kreal),
pointer :: alu(:) => null()
35 integer(kind=kint) :: NColor
36 integer(kind=kint),
pointer :: COLORindex(:) => null()
37 integer(kind=kint),
pointer :: perm(:) => null()
38 integer(kind=kint),
pointer :: iperm(:) => null()
40 logical,
save :: isFirst = .true.
47 integer(kind=kint ) :: npl, npu, npcl, npcu
48 real (kind=
kreal),
allocatable :: cd(:)
49 integer(kind=kint ) :: ncolor_in
50 real (kind=
kreal) :: sigma_diag
51 real (kind=
kreal) :: alutmp(6,6), pw(6)
52 integer(kind=kint ) :: ii, i, j, k
53 integer(kind=kint ) :: nthreads = 1
54 integer(kind=kint ),
allocatable :: perm_tmp(:)
67 if (nthreads == 1)
then
69 allocate(colorindex(0:1), perm(n), iperm(n))
80 indexl => hecmat%indexL
81 indexu => hecmat%indexU
85 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
87 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
90 hecmat%indexU, hecmat%itemU, perm_tmp, &
91 ncolor_in, ncolor, colorindex, perm, iperm)
97 npl = hecmat%indexL(n)
98 npu = hecmat%indexU(n)
99 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
101 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
102 indexl, indexu, iteml, itemu)
107 allocate(d(36*n), al(36*npl), au(36*npu))
109 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
110 hecmat%AL, hecmat%AU, hecmat%D, &
111 indexl, indexu, iteml, itemu, al, au, d)
126 alutmp(1,1)= alu(36*ii-35) * sigma_diag
127 alutmp(1,2)= alu(36*ii-34)
128 alutmp(1,3)= alu(36*ii-33)
129 alutmp(1,4)= alu(36*ii-32)
130 alutmp(1,5)= alu(36*ii-31)
131 alutmp(1,6)= alu(36*ii-30)
133 alutmp(2,1)= alu(36*ii-29)
134 alutmp(2,2)= alu(36*ii-28) * sigma_diag
135 alutmp(2,3)= alu(36*ii-27)
136 alutmp(2,4)= alu(36*ii-26)
137 alutmp(2,5)= alu(36*ii-25)
138 alutmp(2,6)= alu(36*ii-24)
140 alutmp(3,1)= alu(36*ii-23)
141 alutmp(3,2)= alu(36*ii-22)
142 alutmp(3,3)= alu(36*ii-21) * sigma_diag
143 alutmp(3,4)= alu(36*ii-20)
144 alutmp(3,5)= alu(36*ii-19)
145 alutmp(3,6)= alu(36*ii-18)
147 alutmp(4,1)= alu(36*ii-17)
148 alutmp(4,2)= alu(36*ii-16)
149 alutmp(4,3)= alu(36*ii-15)
150 alutmp(4,4)= alu(36*ii-14) * sigma_diag
151 alutmp(4,5)= alu(36*ii-13)
152 alutmp(4,6)= alu(36*ii-12)
154 alutmp(5,1)= alu(36*ii-11)
155 alutmp(5,2)= alu(36*ii-10)
156 alutmp(5,3)= alu(36*ii-9 )
157 alutmp(5,4)= alu(36*ii-8 )
158 alutmp(5,5)= alu(36*ii-7 ) * sigma_diag
159 alutmp(5,6)= alu(36*ii-6 )
161 alutmp(6,1)= alu(36*ii-5 )
162 alutmp(6,2)= alu(36*ii-4 )
163 alutmp(6,3)= alu(36*ii-3 )
164 alutmp(6,4)= alu(36*ii-2 )
165 alutmp(6,5)= alu(36*ii-1 )
166 alutmp(6,6)= alu(36*ii ) * sigma_diag
169 alutmp(k,k)= 1.d0/alutmp(k,k)
171 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
173 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
181 alu(36*ii-35)= alutmp(1,1)
182 alu(36*ii-34)= alutmp(1,2)
183 alu(36*ii-33)= alutmp(1,3)
184 alu(36*ii-32)= alutmp(1,4)
185 alu(36*ii-31)= alutmp(1,5)
186 alu(36*ii-30)= alutmp(1,6)
187 alu(36*ii-29)= alutmp(2,1)
188 alu(36*ii-28)= alutmp(2,2)
189 alu(36*ii-27)= alutmp(2,3)
190 alu(36*ii-26)= alutmp(2,4)
191 alu(36*ii-25)= alutmp(2,5)
192 alu(36*ii-24)= alutmp(2,6)
193 alu(36*ii-23)= alutmp(3,1)
194 alu(36*ii-22)= alutmp(3,2)
195 alu(36*ii-21)= alutmp(3,3)
196 alu(36*ii-20)= alutmp(3,4)
197 alu(36*ii-19)= alutmp(3,5)
198 alu(36*ii-18)= alutmp(3,6)
199 alu(36*ii-17)= alutmp(4,1)
200 alu(36*ii-16)= alutmp(4,2)
201 alu(36*ii-15)= alutmp(4,3)
202 alu(36*ii-14)= alutmp(4,4)
203 alu(36*ii-13)= alutmp(4,5)
204 alu(36*ii-12)= alutmp(4,6)
205 alu(36*ii-11)= alutmp(5,1)
206 alu(36*ii-10)= alutmp(5,2)
207 alu(36*ii-9 )= alutmp(5,3)
208 alu(36*ii-8 )= alutmp(5,4)
209 alu(36*ii-7 )= alutmp(5,5)
210 alu(36*ii-6 )= alutmp(5,6)
211 alu(36*ii-5 )= alutmp(6,1)
212 alu(36*ii-4 )= alutmp(6,2)
213 alu(36*ii-3 )= alutmp(6,3)
214 alu(36*ii-2 )= alutmp(6,4)
215 alu(36*ii-1 )= alutmp(6,5)
216 alu(36*ii )= alutmp(6,6)
228 real(kind=
kreal),
intent(inout) :: zp(:)
229 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
230 real(kind=
kreal) :: x1, x2, x3, x4, x5, x6
231 real(kind=
kreal) :: sw1, sw2, sw3, sw4, sw5, sw6
234 integer(kind=kint),
parameter :: numofblockperthread = 100
235 integer(kind=kint),
save :: numofthread = 1, numofblock
236 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
237 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
238 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
239 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
240 real(kind=
kreal) :: numofelementperblock
241 integer(kind=kint) :: my_rank
245 numofblock = numofthread * numofblockperthread
246 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
247 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
248 allocate (ictoblockindex(0:ncolor), &
249 blockindextocolorindex(0:numofblock + ncolor))
250 numofelement = n + indexl(n) + indexu(n)
251 numofelementperblock = dble(numofelement) / numofblock
254 ictoblockindex(0) = 0
255 blockindextocolorindex = -1
256 blockindextocolorindex(0) = 0
265 do i = colorindex(ic-1)+1, colorindex(ic)
266 elementcount = elementcount + 1
267 elementcount = elementcount + (indexl(i) - indexl(i-1))
268 elementcount = elementcount + (indexu(i) - indexu(i-1))
269 if (elementcount > ii * numofelementperblock &
270 .or. i == colorindex(ic))
then
272 blockindex = blockindex + 1
273 blockindextocolorindex(blockindex) = i
278 ictoblockindex(ic) = blockindex
280 numofblock = blockindex
283 sectorcachesize0, sectorcachesize1 )
302 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
303 do i = blockindextocolorindex(blockindex-1)+1, &
304 blockindextocolorindex(blockindex)
324 sw1= sw1 -al(36*j-35)*x1 -al(36*j-34)*x2 -al(36*j-33)*x3 -al(36*j-32)*x4 -al(36*j-31)*x5 -al(36*j-30)*x6
325 sw2= sw2 -al(36*j-29)*x1 -al(36*j-28)*x2 -al(36*j-27)*x3 -al(36*j-26)*x4 -al(36*j-25)*x5 -al(36*j-24)*x6
326 sw3= sw3 -al(36*j-23)*x1 -al(36*j-22)*x2 -al(36*j-21)*x3 -al(36*j-20)*x4 -al(36*j-19)*x5 -al(36*j-18)*x6
327 sw4= sw4 -al(36*j-17)*x1 -al(36*j-16)*x2 -al(36*j-15)*x3 -al(36*j-14)*x4 -al(36*j-13)*x5 -al(36*j-12)*x6
328 sw5= sw5 -al(36*j-11)*x1 -al(36*j-10)*x2 -al(36*j-9 )*x3 -al(36*j-8 )*x4 -al(36*j-7 )*x5 -al(36*j-6 )*x6
329 sw6= sw6 -al(36*j-5 )*x1 -al(36*j-4 )*x2 -al(36*j-3 )*x3 -al(36*j-2 )*x4 -al(36*j-1 )*x5 -al(36*j )*x6
338 x2= x2 -alu(36*i-29)*x1
339 x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
340 x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
341 x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
342 x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
344 x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
345 x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
346 x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
347 x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
348 x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
363 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
364 do i = blockindextocolorindex(blockindex), &
365 blockindextocolorindex(blockindex-1)+1, -1
387 sw1= sw1 +au(36*j-35)*x1 +au(36*j-34)*x2 +au(36*j-33)*x3 +au(36*j-32)*x4 +au(36*j-31)*x5 +au(36*j-30)*x6
388 sw2= sw2 +au(36*j-29)*x1 +au(36*j-28)*x2 +au(36*j-27)*x3 +au(36*j-26)*x4 +au(36*j-25)*x5 +au(36*j-24)*x6
389 sw3= sw3 +au(36*j-23)*x1 +au(36*j-22)*x2 +au(36*j-21)*x3 +au(36*j-20)*x4 +au(36*j-19)*x5 +au(36*j-18)*x6
390 sw4= sw4 +au(36*j-17)*x1 +au(36*j-16)*x2 +au(36*j-15)*x3 +au(36*j-14)*x4 +au(36*j-13)*x5 +au(36*j-12)*x6
391 sw5= sw5 +au(36*j-11)*x1 +au(36*j-10)*x2 +au(36*j-9 )*x3 +au(36*j-8 )*x4 +au(36*j-7 )*x5 +au(36*j-6 )*x6
392 sw6= sw6 +au(36*j-5 )*x1 +au(36*j-4 )*x2 +au(36*j-3 )*x3 +au(36*j-2 )*x4 +au(36*j-1 )*x5 +au(36*j )*x6
401 x2= x2 -alu(36*i-29)*x1
402 x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
403 x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
404 x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
405 x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
407 x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
408 x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
409 x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
410 x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
411 x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
413 zp(6*iold-5)= zp(6*iold-5) -x1
414 zp(6*iold-4)= zp(6*iold-4) -x2
415 zp(6*iold-3)= zp(6*iold-3) -x3
416 zp(6*iold-2)= zp(6*iold-2) -x4
417 zp(6*iold-1)= zp(6*iold-1) -x5
418 zp(6*iold )= zp(6*iold ) -x6
435 integer(kind=kint ) :: nthreads = 1
437 if (
associated(colorindex))
deallocate(colorindex)
438 if (
associated(perm))
deallocate(perm)
439 if (
associated(iperm))
deallocate(iperm)
440 if (
associated(alu))
deallocate(alu)
441 if (nthreads >= 1)
then
442 if (
associated(d))
deallocate(d)
443 if (
associated(al))
deallocate(al)
444 if (
associated(au))
deallocate(au)
445 if (
associated(indexl))
deallocate(indexl)
446 if (
associated(indexu))
deallocate(indexu)
447 if (
associated(iteml))
deallocate(iteml)
448 if (
associated(itemu))
deallocate(itemu)
463 subroutine write_debug_info
465 integer(kind=kint) :: my_rank, ic, in
468 if (my_rank.eq.0)
then
469 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
471 write(19000+my_rank,
'(a)')
'#NCOLORTot'
472 write(19000+my_rank,*) ncolor
473 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
475 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
477 write(29000+my_rank,
'(a)')
'#n_node'
478 write(29000+my_rank,*) n
479 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
481 write(29000+my_rank,*) in, iperm(in), perm(in)
482 if (perm(iperm(in)) .ne. in)
then
483 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
486 end subroutine write_debug_info
488 subroutine check_ordering
490 integer(kind=kint) :: ic, i, j, k
491 integer(kind=kint),
allocatable :: iicolor(:)
493 if (ncolor.gt.1)
then
496 do i= colorindex(ic-1)+1, colorindex(ic)
502 do i= colorindex(ic-1)+1, colorindex(ic)
503 do j= indexl(i-1)+1, indexl(i)
505 if (iicolor(i).eq.iicolor(k))
then
506 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
513 do i= colorindex(ic), colorindex(ic-1)+1, -1
514 do j= indexu(i-1)+1, indexu(i)
516 if (iicolor(i).eq.iicolor(k))
then
517 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
525 end subroutine check_ordering