FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_66.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_66
9 !C***
10 !C
12  use hecmw_util
17  !$ use omp_lib
18 
19  private
20 
24 
25  integer(kind=kint) :: N
26  real(kind=kreal), pointer :: d(:) => null()
27  real(kind=kreal), pointer :: al(:) => null()
28  real(kind=kreal), pointer :: au(:) => null()
29  integer(kind=kint), pointer :: indexL(:) => null()
30  integer(kind=kint), pointer :: indexU(:) => null()
31  integer(kind=kint), pointer :: itemL(:) => null()
32  integer(kind=kint), pointer :: itemU(:) => null()
33  real(kind=kreal), pointer :: alu(:) => null()
34 
35  integer(kind=kint) :: NColor
36  integer(kind=kint), pointer :: COLORindex(:) => null()
37  integer(kind=kint), pointer :: perm(:) => null()
38  integer(kind=kint), pointer :: iperm(:) => null()
39 
40  logical, save :: isFirst = .true.
41 
42 contains
43 
44  subroutine hecmw_precond_ssor_66_setup(hecMAT)
45  implicit none
46  type(hecmwst_matrix), intent(in) :: hecmat
47  integer(kind=kint ) :: npl, npu, npcl, npcu
48  real (kind=kreal), allocatable :: cd(:)
49  integer(kind=kint ) :: ncolor_in
50  real (kind=kreal) :: sigma_diag
51  real (kind=kreal) :: alutmp(6,6), pw(6)
52  integer(kind=kint ) :: ii, i, j, k
53  integer(kind=kint ) :: nthreads = 1
54  integer(kind=kint ), allocatable :: perm_tmp(:)
55  !real (kind=kreal) :: t0
56 
57  !t0 = hecmw_Wtime()
58  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
59 
60  !$ nthreads = omp_get_max_threads()
61 
62  n = hecmat%N
63  ! N = hecMAT%NP
64  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
65  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
66 
67  if (nthreads == 1) then
68  ncolor = 1
69  allocate(colorindex(0:1), perm(n), iperm(n))
70  colorindex(0) = 0
71  colorindex(1) = n
72  do i=1,n
73  perm(i) = i
74  iperm(i) = i
75  end do
76 
77  d => hecmat%D
78  al => hecmat%AL
79  au => hecmat%AU
80  indexl => hecmat%indexL
81  indexu => hecmat%indexU
82  iteml => hecmat%itemL
83  itemu => hecmat%itemU
84  else
85  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
86  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
87  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
88  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
89  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
90  hecmat%indexU, hecmat%itemU, perm_tmp, &
91  ncolor_in, ncolor, colorindex, perm, iperm)
92  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
93  deallocate(perm_tmp)
94 
95  !call write_debug_info
96 
97  npl = hecmat%indexL(n)
98  npu = hecmat%indexU(n)
99  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
100  call hecmw_matrix_reorder_profile(n, perm, iperm, &
101  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
102  indexl, indexu, iteml, itemu)
103  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
104 
105  call check_ordering
106 
107  allocate(d(36*n), al(36*npl), au(36*npu))
108  call hecmw_matrix_reorder_values(n, 6, perm, iperm, &
109  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
110  hecmat%AL, hecmat%AU, hecmat%D, &
111  indexl, indexu, iteml, itemu, al, au, d)
112  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
113 
114  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
115  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
116  end if
117 
118  allocate(alu(36*n))
119  alu = 0.d0
120 
121  do ii= 1, 36*n
122  alu(ii) = d(ii)
123  enddo
124 
125  do ii= 1, n
126  alutmp(1,1)= alu(36*ii-35) * sigma_diag
127  alutmp(1,2)= alu(36*ii-34)
128  alutmp(1,3)= alu(36*ii-33)
129  alutmp(1,4)= alu(36*ii-32)
130  alutmp(1,5)= alu(36*ii-31)
131  alutmp(1,6)= alu(36*ii-30)
132 
133  alutmp(2,1)= alu(36*ii-29)
134  alutmp(2,2)= alu(36*ii-28) * sigma_diag
135  alutmp(2,3)= alu(36*ii-27)
136  alutmp(2,4)= alu(36*ii-26)
137  alutmp(2,5)= alu(36*ii-25)
138  alutmp(2,6)= alu(36*ii-24)
139 
140  alutmp(3,1)= alu(36*ii-23)
141  alutmp(3,2)= alu(36*ii-22)
142  alutmp(3,3)= alu(36*ii-21) * sigma_diag
143  alutmp(3,4)= alu(36*ii-20)
144  alutmp(3,5)= alu(36*ii-19)
145  alutmp(3,6)= alu(36*ii-18)
146 
147  alutmp(4,1)= alu(36*ii-17)
148  alutmp(4,2)= alu(36*ii-16)
149  alutmp(4,3)= alu(36*ii-15)
150  alutmp(4,4)= alu(36*ii-14) * sigma_diag
151  alutmp(4,5)= alu(36*ii-13)
152  alutmp(4,6)= alu(36*ii-12)
153 
154  alutmp(5,1)= alu(36*ii-11)
155  alutmp(5,2)= alu(36*ii-10)
156  alutmp(5,3)= alu(36*ii-9 )
157  alutmp(5,4)= alu(36*ii-8 )
158  alutmp(5,5)= alu(36*ii-7 ) * sigma_diag
159  alutmp(5,6)= alu(36*ii-6 )
160 
161  alutmp(6,1)= alu(36*ii-5 )
162  alutmp(6,2)= alu(36*ii-4 )
163  alutmp(6,3)= alu(36*ii-3 )
164  alutmp(6,4)= alu(36*ii-2 )
165  alutmp(6,5)= alu(36*ii-1 )
166  alutmp(6,6)= alu(36*ii ) * sigma_diag
167 
168  do k= 1, 6
169  alutmp(k,k)= 1.d0/alutmp(k,k)
170  do i= k+1, 6
171  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
172  do j= k+1, 6
173  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
174  enddo
175  do j= k+1, 6
176  alutmp(i,j)= pw(j)
177  enddo
178  enddo
179  enddo
180 
181  alu(36*ii-35)= alutmp(1,1)
182  alu(36*ii-34)= alutmp(1,2)
183  alu(36*ii-33)= alutmp(1,3)
184  alu(36*ii-32)= alutmp(1,4)
185  alu(36*ii-31)= alutmp(1,5)
186  alu(36*ii-30)= alutmp(1,6)
187  alu(36*ii-29)= alutmp(2,1)
188  alu(36*ii-28)= alutmp(2,2)
189  alu(36*ii-27)= alutmp(2,3)
190  alu(36*ii-26)= alutmp(2,4)
191  alu(36*ii-25)= alutmp(2,5)
192  alu(36*ii-24)= alutmp(2,6)
193  alu(36*ii-23)= alutmp(3,1)
194  alu(36*ii-22)= alutmp(3,2)
195  alu(36*ii-21)= alutmp(3,3)
196  alu(36*ii-20)= alutmp(3,4)
197  alu(36*ii-19)= alutmp(3,5)
198  alu(36*ii-18)= alutmp(3,6)
199  alu(36*ii-17)= alutmp(4,1)
200  alu(36*ii-16)= alutmp(4,2)
201  alu(36*ii-15)= alutmp(4,3)
202  alu(36*ii-14)= alutmp(4,4)
203  alu(36*ii-13)= alutmp(4,5)
204  alu(36*ii-12)= alutmp(4,6)
205  alu(36*ii-11)= alutmp(5,1)
206  alu(36*ii-10)= alutmp(5,2)
207  alu(36*ii-9 )= alutmp(5,3)
208  alu(36*ii-8 )= alutmp(5,4)
209  alu(36*ii-7 )= alutmp(5,5)
210  alu(36*ii-6 )= alutmp(5,6)
211  alu(36*ii-5 )= alutmp(6,1)
212  alu(36*ii-4 )= alutmp(6,2)
213  alu(36*ii-3 )= alutmp(6,3)
214  alu(36*ii-2 )= alutmp(6,4)
215  alu(36*ii-1 )= alutmp(6,5)
216  alu(36*ii )= alutmp(6,6)
217  enddo
218 
219  isfirst = .true.
220 
221  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
222 
223  end subroutine hecmw_precond_ssor_66_setup
224 
225  subroutine hecmw_precond_ssor_66_apply(ZP)
227  implicit none
228  real(kind=kreal), intent(inout) :: zp(:)
229  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
230  real(kind=kreal) :: x1, x2, x3, x4, x5, x6
231  real(kind=kreal) :: sw1, sw2, sw3, sw4, sw5, sw6
232 
233  ! added for turning >>>
234  integer(kind=kint), parameter :: numofblockperthread = 100
235  integer(kind=kint), save :: numofthread = 1, numofblock
236  integer(kind=kint), save, allocatable :: ictoblockindex(:)
237  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
238  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
239  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
240  real(kind=kreal) :: numofelementperblock
241  integer(kind=kint) :: my_rank
242 
243  if (isfirst) then
244  !$ numOfThread = omp_get_max_threads()
245  numofblock = numofthread * numofblockperthread
246  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
247  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
248  allocate (ictoblockindex(0:ncolor), &
249  blockindextocolorindex(0:numofblock + ncolor))
250  numofelement = n + indexl(n) + indexu(n)
251  numofelementperblock = dble(numofelement) / numofblock
252  blockindex = 0
253  ictoblockindex = -1
254  ictoblockindex(0) = 0
255  blockindextocolorindex = -1
256  blockindextocolorindex(0) = 0
257  my_rank = hecmw_comm_get_rank()
258  ! write(9000+my_rank,*) &
259  ! '# numOfElementPerBlock =', numOfElementPerBlock
260  ! write(9000+my_rank,*) &
261  ! '# ic, blockIndex, colorIndex, elementCount'
262  do ic = 1, ncolor
263  elementcount = 0
264  ii = 1
265  do i = colorindex(ic-1)+1, colorindex(ic)
266  elementcount = elementcount + 1
267  elementcount = elementcount + (indexl(i) - indexl(i-1))
268  elementcount = elementcount + (indexu(i) - indexu(i-1))
269  if (elementcount > ii * numofelementperblock &
270  .or. i == colorindex(ic)) then
271  ii = ii + 1
272  blockindex = blockindex + 1
273  blockindextocolorindex(blockindex) = i
274  ! write(9000+my_rank,*) ic, blockIndex, &
275  ! blockIndexToColorIndex(blockIndex), elementCount
276  endif
277  enddo
278  ictoblockindex(ic) = blockindex
279  enddo
280  numofblock = blockindex
281 
283  sectorcachesize0, sectorcachesize1 )
284 
285  isfirst = .false.
286  endif
287  ! <<< added for turning
288 
289  !call start_collection("loopInPrecond66")
290 
291  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
292  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
293 
294  !$omp parallel default(none) &
295  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
296  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
297  !$omp&private(SW1,SW2,SW3,SW4,SW5,SW6,X1,X2,X3,X4,X5,X6,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
298 
299  !C-- FORWARD
300  do ic=1,ncolor
301  !$omp do schedule (static, 1)
302  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
303  do i = blockindextocolorindex(blockindex-1)+1, &
304  blockindextocolorindex(blockindex)
305  ! do i = startPos(threadNum, ic), endPos(threadNum, ic)
306  iold = perm(i)
307  sw1= zp(6*iold-5)
308  sw2= zp(6*iold-4)
309  sw3= zp(6*iold-3)
310  sw4= zp(6*iold-2)
311  sw5= zp(6*iold-1)
312  sw6= zp(6*iold )
313  isl= indexl(i-1)+1
314  iel= indexl(i)
315  do j= isl, iel
316  !k= perm(itemL(j))
317  k= iteml(j)
318  x1= zp(6*k-5)
319  x2= zp(6*k-4)
320  x3= zp(6*k-3)
321  x4= zp(6*k-2)
322  x5= zp(6*k-1)
323  x6= zp(6*k )
324  sw1= sw1 -al(36*j-35)*x1 -al(36*j-34)*x2 -al(36*j-33)*x3 -al(36*j-32)*x4 -al(36*j-31)*x5 -al(36*j-30)*x6
325  sw2= sw2 -al(36*j-29)*x1 -al(36*j-28)*x2 -al(36*j-27)*x3 -al(36*j-26)*x4 -al(36*j-25)*x5 -al(36*j-24)*x6
326  sw3= sw3 -al(36*j-23)*x1 -al(36*j-22)*x2 -al(36*j-21)*x3 -al(36*j-20)*x4 -al(36*j-19)*x5 -al(36*j-18)*x6
327  sw4= sw4 -al(36*j-17)*x1 -al(36*j-16)*x2 -al(36*j-15)*x3 -al(36*j-14)*x4 -al(36*j-13)*x5 -al(36*j-12)*x6
328  sw5= sw5 -al(36*j-11)*x1 -al(36*j-10)*x2 -al(36*j-9 )*x3 -al(36*j-8 )*x4 -al(36*j-7 )*x5 -al(36*j-6 )*x6
329  sw6= sw6 -al(36*j-5 )*x1 -al(36*j-4 )*x2 -al(36*j-3 )*x3 -al(36*j-2 )*x4 -al(36*j-1 )*x5 -al(36*j )*x6
330  enddo ! j
331 
332  x1= sw1
333  x2= sw2
334  x3= sw3
335  x4= sw4
336  x5= sw5
337  x6= sw6
338  x2= x2 -alu(36*i-29)*x1
339  x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
340  x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
341  x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
342  x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
343  x6= alu(36*i )* x6
344  x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
345  x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
346  x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
347  x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
348  x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
349  zp(6*iold-5)= x1
350  zp(6*iold-4)= x2
351  zp(6*iold-3)= x3
352  zp(6*iold-2)= x4
353  zp(6*iold-1)= x5
354  zp(6*iold )= x6
355  enddo ! i
356  enddo ! blockIndex
357  !$omp end do
358  enddo ! ic
359 
360  !C-- BACKWARD
361  do ic=ncolor, 1, -1
362  !$omp do schedule (static, 1)
363  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
364  do i = blockindextocolorindex(blockindex), &
365  blockindextocolorindex(blockindex-1)+1, -1
366  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
367  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
368  ! blockIndexToColorIndex(blockIndex)
369  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
370  sw1= 0.d0
371  sw2= 0.d0
372  sw3= 0.d0
373  sw4= 0.d0
374  sw5= 0.d0
375  sw6= 0.d0
376  isu= indexu(i-1) + 1
377  ieu= indexu(i)
378  do j= ieu, isu, -1
379  !k= perm(itemU(j))
380  k= itemu(j)
381  x1= zp(6*k-5)
382  x2= zp(6*k-4)
383  x3= zp(6*k-3)
384  x4= zp(6*k-2)
385  x5= zp(6*k-1)
386  x6= zp(6*k )
387  sw1= sw1 +au(36*j-35)*x1 +au(36*j-34)*x2 +au(36*j-33)*x3 +au(36*j-32)*x4 +au(36*j-31)*x5 +au(36*j-30)*x6
388  sw2= sw2 +au(36*j-29)*x1 +au(36*j-28)*x2 +au(36*j-27)*x3 +au(36*j-26)*x4 +au(36*j-25)*x5 +au(36*j-24)*x6
389  sw3= sw3 +au(36*j-23)*x1 +au(36*j-22)*x2 +au(36*j-21)*x3 +au(36*j-20)*x4 +au(36*j-19)*x5 +au(36*j-18)*x6
390  sw4= sw4 +au(36*j-17)*x1 +au(36*j-16)*x2 +au(36*j-15)*x3 +au(36*j-14)*x4 +au(36*j-13)*x5 +au(36*j-12)*x6
391  sw5= sw5 +au(36*j-11)*x1 +au(36*j-10)*x2 +au(36*j-9 )*x3 +au(36*j-8 )*x4 +au(36*j-7 )*x5 +au(36*j-6 )*x6
392  sw6= sw6 +au(36*j-5 )*x1 +au(36*j-4 )*x2 +au(36*j-3 )*x3 +au(36*j-2 )*x4 +au(36*j-1 )*x5 +au(36*j )*x6
393  enddo ! j
394 
395  x1= sw1
396  x2= sw2
397  x3= sw3
398  x4= sw4
399  x5= sw5
400  x6= sw6
401  x2= x2 -alu(36*i-29)*x1
402  x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
403  x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
404  x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
405  x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
406  x6= alu(36*i )* x6
407  x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
408  x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
409  x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
410  x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
411  x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
412  iold = perm(i)
413  zp(6*iold-5)= zp(6*iold-5) -x1
414  zp(6*iold-4)= zp(6*iold-4) -x2
415  zp(6*iold-3)= zp(6*iold-3) -x3
416  zp(6*iold-2)= zp(6*iold-2) -x4
417  zp(6*iold-1)= zp(6*iold-1) -x5
418  zp(6*iold )= zp(6*iold ) -x6
419  enddo ! i
420  enddo ! blockIndex
421  !$omp end do
422  enddo ! ic
423  !$omp end parallel
424 
425  !OCL END_CACHE_SUBSECTOR
426  !OCL END_CACHE_SECTOR_SIZE
427 
428  !call stop_collection("loopInPrecond66")
429 
430  end subroutine hecmw_precond_ssor_66_apply
431 
432  subroutine hecmw_precond_ssor_66_clear(hecMAT)
433  implicit none
434  type(hecmwst_matrix), intent(inout) :: hecmat
435  integer(kind=kint ) :: nthreads = 1
436  !$ nthreads = omp_get_max_threads()
437  if (associated(colorindex)) deallocate(colorindex)
438  if (associated(perm)) deallocate(perm)
439  if (associated(iperm)) deallocate(iperm)
440  if (associated(alu)) deallocate(alu)
441  if (nthreads >= 1) then
442  if (associated(d)) deallocate(d)
443  if (associated(al)) deallocate(al)
444  if (associated(au)) deallocate(au)
445  if (associated(indexl)) deallocate(indexl)
446  if (associated(indexu)) deallocate(indexu)
447  if (associated(iteml)) deallocate(iteml)
448  if (associated(itemu)) deallocate(itemu)
449  end if
450  nullify(colorindex)
451  nullify(perm)
452  nullify(iperm)
453  nullify(alu)
454  nullify(d)
455  nullify(al)
456  nullify(au)
457  nullify(indexl)
458  nullify(indexu)
459  nullify(iteml)
460  nullify(itemu)
461  end subroutine hecmw_precond_ssor_66_clear
462 
463  subroutine write_debug_info
464  implicit none
465  integer(kind=kint) :: my_rank, ic, in
466  my_rank = hecmw_comm_get_rank()
467  !--------------------> debug: shizawa
468  if (my_rank.eq.0) then
469  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
470  endif
471  write(19000+my_rank,'(a)') '#NCOLORTot'
472  write(19000+my_rank,*) ncolor
473  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
474  do ic=1,ncolor
475  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
476  enddo ! ic
477  write(29000+my_rank,'(a)') '#n_node'
478  write(29000+my_rank,*) n
479  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
480  do in=1,n
481  write(29000+my_rank,*) in, iperm(in), perm(in)
482  if (perm(iperm(in)) .ne. in) then
483  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
484  endif
485  enddo
486  end subroutine write_debug_info
487 
488  subroutine check_ordering
489  implicit none
490  integer(kind=kint) :: ic, i, j, k
491  integer(kind=kint), allocatable :: iicolor(:)
492  ! check color dependence of neighbouring nodes
493  if (ncolor.gt.1) then
494  allocate(iicolor(n))
495  do ic=1,ncolor
496  do i= colorindex(ic-1)+1, colorindex(ic)
497  iicolor(i) = ic
498  enddo ! i
499  enddo ! ic
500  ! FORWARD: L-part
501  do ic=1,ncolor
502  do i= colorindex(ic-1)+1, colorindex(ic)
503  do j= indexl(i-1)+1, indexl(i)
504  k= iteml(j)
505  if (iicolor(i).eq.iicolor(k)) then
506  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
507  endif
508  enddo ! j
509  enddo ! i
510  enddo ! ic
511  ! BACKWARD: U-part
512  do ic=ncolor, 1, -1
513  do i= colorindex(ic), colorindex(ic-1)+1, -1
514  do j= indexu(i-1)+1, indexu(i)
515  k= itemu(j)
516  if (iicolor(i).eq.iicolor(k)) then
517  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
518  endif
519  enddo ! j
520  enddo ! i
521  enddo ! ic
522  deallocate(iicolor)
523  endif ! if (NColor.gt.1)
524  !--------------------< debug: shizawa
525  end subroutine check_ordering
526 
527 end module hecmw_precond_ssor_66
hecmw_precond_ssor_66::hecmw_precond_ssor_66_clear
subroutine, public hecmw_precond_ssor_66_clear(hecMAT)
Definition: hecmw_precond_SSOR_66.f90:433
m_hecmw_matrix_ordering_cm
Definition: hecmw_matrix_ordering_CM.f90:6
hecmw_matrix_reorder::hecmw_matrix_reorder_values
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
Definition: hecmw_matrix_reorder.f90:71
hecmw_matrix_reorder::hecmw_matrix_reorder_profile
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
Definition: hecmw_matrix_reorder.f90:22
hecmw_matrix_reorder::hecmw_matrix_reorder_renum_item
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
Definition: hecmw_matrix_reorder.f90:144
hecmw_matrix_misc::hecmw_mat_get_ncolor_in
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
Definition: hecmw_matrix_misc.f90:531
hecmw_tuning_fx
Definition: hecmw_tuning_fx.f90:6
hecmw_tuning_fx::hecmw_tuning_fx_calc_sector_cache
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
Definition: hecmw_tuning_fx.f90:25
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
hecmw_precond_ssor_66::hecmw_precond_ssor_66_apply
subroutine, public hecmw_precond_ssor_66_apply(ZP)
Definition: hecmw_precond_SSOR_66.f90:226
m_hecmw_matrix_ordering_mc::hecmw_matrix_ordering_mc
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)
Definition: hecmw_matrix_ordering_MC.f90:17
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_matrix_misc::hecmw_mat_get_sigma_diag
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
Definition: hecmw_matrix_misc.f90:694
hecmw_precond_ssor_66::hecmw_precond_ssor_66_setup
subroutine, public hecmw_precond_ssor_66_setup(hecMAT)
Definition: hecmw_precond_SSOR_66.f90:45
m_hecmw_matrix_ordering_cm::hecmw_matrix_ordering_rcm
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
Definition: hecmw_matrix_ordering_CM.f90:67
m_hecmw_matrix_ordering_mc
Definition: hecmw_matrix_ordering_MC.f90:6
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_matrix_reorder
Definition: hecmw_matrix_reorder.f90:6
hecmw_util::hecmw_comm_get_rank
integer(kind=kint) function hecmw_comm_get_rank()
Definition: hecmw_util_f.F90:582
hecmw_precond_ssor_66
Definition: hecmw_precond_SSOR_66.f90:11
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444