FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_22.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_22
9 !C***
10 !C
12  use hecmw_util
17  !$ use omp_lib
18 
19  private
20 
24 
25  integer(kind=kint) :: N
26  real(kind=kreal), pointer :: d(:) => null()
27  real(kind=kreal), pointer :: al(:) => null()
28  real(kind=kreal), pointer :: au(:) => null()
29  integer(kind=kint), pointer :: indexL(:) => null()
30  integer(kind=kint), pointer :: indexU(:) => null()
31  integer(kind=kint), pointer :: itemL(:) => null()
32  integer(kind=kint), pointer :: itemU(:) => null()
33  real(kind=kreal), pointer :: alu(:) => null()
34 
35  integer(kind=kint) :: NColor
36  integer(kind=kint), pointer :: COLORindex(:) => null()
37  integer(kind=kint), pointer :: perm(:) => null()
38  integer(kind=kint), pointer :: iperm(:) => null()
39 
40  logical, save :: isFirst = .true.
41 
42  logical, save :: INITIALIZED = .false.
43 
44 contains
45 
46  subroutine hecmw_precond_ssor_22_setup(hecMAT)
47  implicit none
48  type(hecmwst_matrix), intent(inout) :: hecmat
49  integer(kind=kint ) :: npl, npu, npcl, npcu
50  real (kind=kreal), allocatable :: cd(:)
51  integer(kind=kint ) :: ncolor_in
52  real (kind=kreal) :: sigma_diag
53  real (kind=kreal) :: alutmp(2,2), pw(2)
54  integer(kind=kint ) :: ii, i, j, k
55  integer(kind=kint ) :: nthreads = 1
56  integer(kind=kint ), allocatable :: perm_tmp(:)
57  !real (kind=kreal) :: t0
58 
59  !t0 = hecmw_Wtime()
60  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
61 
62  if (initialized) then
63  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
64  call hecmw_precond_ssor_22_clear(hecmat)
65  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
66  call hecmw_precond_ssor_22_clear(hecmat) ! TEMPORARY
67  else
68  return
69  endif
70  endif
71 
72  !$ nthreads = omp_get_max_threads()
73 
74  n = hecmat%N
75  ! N = hecMAT%NP
76  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
77  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
78 
79  if (nthreads == 1) then
80  ncolor = 1
81  allocate(colorindex(0:1), perm(n), iperm(n))
82  colorindex(0) = 0
83  colorindex(1) = n
84  do i=1,n
85  perm(i) = i
86  iperm(i) = i
87  end do
88  else
89  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
90  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
91  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
92  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
93  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
94  hecmat%indexU, hecmat%itemU, perm_tmp, &
95  ncolor_in, ncolor, colorindex, perm, iperm)
96  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
97  deallocate(perm_tmp)
98 
99  !call write_debug_info
100  endif
101 
102  npl = hecmat%indexL(n)
103  npu = hecmat%indexU(n)
104  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
105  call hecmw_matrix_reorder_profile(n, perm, iperm, &
106  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
107  indexl, indexu, iteml, itemu)
108  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
109 
110  !call check_ordering
111 
112  allocate(d(4*n), al(4*npl), au(4*npu))
113  call hecmw_matrix_reorder_values(n, 2, perm, iperm, &
114  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
115  hecmat%AL, hecmat%AU, hecmat%D, &
116  indexl, indexu, iteml, itemu, al, au, d)
117  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
118 
119  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
120  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
121 
122  allocate(alu(4*n))
123  alu = 0.d0
124 
125  do ii= 1, 4*n
126  alu(ii) = d(ii)
127  enddo
128 
129  !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
130  !$omp do
131  do ii= 1, n
132  alutmp(1,1)= alu(4*ii-3) * sigma_diag
133  alutmp(1,2)= alu(4*ii-2)
134  alutmp(2,1)= alu(4*ii-1)
135  alutmp(2,2)= alu(4*ii-0) * sigma_diag
136  do k= 1, 2
137  alutmp(k,k)= 1.d0/alutmp(k,k)
138  do i= k+1, 2
139  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
140  do j= k+1, 2
141  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
142  enddo
143  do j= k+1, 2
144  alutmp(i,j)= pw(j)
145  enddo
146  enddo
147  enddo
148  alu(4*ii-3)= alutmp(1,1)
149  alu(4*ii-2)= alutmp(1,2)
150  alu(4*ii-1)= alutmp(2,1)
151  alu(4*ii-0)= alutmp(2,2)
152  enddo
153  !$omp end do
154  !$omp end parallel
155 
156  isfirst = .true.
157 
158  initialized = .true.
159  hecmat%Iarray(98) = 0 ! symbolic setup done
160  hecmat%Iarray(97) = 0 ! numerical setup done
161 
162  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
163 
164  end subroutine hecmw_precond_ssor_22_setup
165 
166  subroutine hecmw_precond_ssor_22_apply(ZP)
168  implicit none
169  real(kind=kreal), intent(inout) :: zp(:)
170  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
171  real(kind=kreal) :: sw(2), x(2)
172 
173  ! added for turning >>>
174  integer(kind=kint), parameter :: numofblockperthread = 100
175  integer(kind=kint), save :: numofthread = 1, numofblock
176  integer(kind=kint), save, allocatable :: ictoblockindex(:)
177  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
178  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
179  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
180  real(kind=kreal) :: numofelementperblock
181  integer(kind=kint) :: my_rank
182 
183  if (isfirst) then
184  !$ numOfThread = omp_get_max_threads()
185  numofblock = numofthread * numofblockperthread
186  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
187  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
188  allocate (ictoblockindex(0:ncolor), &
189  blockindextocolorindex(0:numofblock + ncolor))
190  numofelement = n + indexl(n) + indexu(n)
191  numofelementperblock = dble(numofelement) / numofblock
192  blockindex = 0
193  ictoblockindex = -1
194  ictoblockindex(0) = 0
195  blockindextocolorindex = -1
196  blockindextocolorindex(0) = 0
197  my_rank = hecmw_comm_get_rank()
198  ! write(9000+my_rank,*) &
199  ! '# numOfElementPerBlock =', numOfElementPerBlock
200  ! write(9000+my_rank,*) &
201  ! '# ic, blockIndex, colorIndex, elementCount'
202  do ic = 1, ncolor
203  elementcount = 0
204  ii = 1
205  do i = colorindex(ic-1)+1, colorindex(ic)
206  elementcount = elementcount + 1
207  elementcount = elementcount + (indexl(i) - indexl(i-1))
208  elementcount = elementcount + (indexu(i) - indexu(i-1))
209  if (elementcount > ii * numofelementperblock &
210  .or. i == colorindex(ic)) then
211  ii = ii + 1
212  blockindex = blockindex + 1
213  blockindextocolorindex(blockindex) = i
214  ! write(9000+my_rank,*) ic, blockIndex, &
215  ! blockIndexToColorIndex(blockIndex), elementCount
216  endif
217  enddo
218  ictoblockindex(ic) = blockindex
219  enddo
220  numofblock = blockindex
221 
223  sectorcachesize0, sectorcachesize1 )
224 
225  isfirst = .false.
226  endif
227  ! <<< added for turning
228 
229  !call start_collection("loopInPrecond33")
230 
231  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
232  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
233 
234  !$omp parallel default(none) &
235  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
236  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
237  !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
238 
239  !C-- FORWARD
240  do ic=1,ncolor
241  !$omp do schedule (static, 1)
242  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
243  do i = blockindextocolorindex(blockindex-1)+1, &
244  blockindextocolorindex(blockindex)
245  iold = perm(i)
246  sw(1)= zp(2*iold-1)
247  sw(2)= zp(2*iold-0)
248  isl= indexl(i-1)+1
249  iel= indexl(i)
250  do j= isl, iel
251  k= iteml(j)
252  x(1)= zp(2*k-1)
253  x(2)= zp(2*k-0)
254  sw(1)= sw(1) - al(4*j-3)*x(1) - al(4*j-2)*x(2)
255  sw(2)= sw(2) - al(4*j-1)*x(1) - al(4*j-0)*x(2)
256  enddo ! j
257 
258  x = sw
259  x(2)= x(2) - alu(4*i-1)*x(1)
260  x(2)= alu(4*i )* x(2)
261  x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2))
262  zp(2*iold-1)= x(1)
263  zp(2*iold-0)= x(2)
264  enddo ! i
265  enddo ! blockIndex
266  !$omp end do
267  enddo ! ic
268 
269  !C-- BACKWARD
270  do ic=ncolor, 1, -1
271  !$omp do schedule (static, 1)
272  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
273  do i = blockindextocolorindex(blockindex), &
274  blockindextocolorindex(blockindex-1)+1, -1
275  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
276  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
277  ! blockIndexToColorIndex(blockIndex)
278  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
279  sw= 0.d0
280  isu= indexu(i-1) + 1
281  ieu= indexu(i)
282  do j= ieu, isu, -1
283  k= itemu(j)
284  x(1)= zp(2*k-1)
285  x(2)= zp(2*k-0)
286  sw(1)= sw(1) + au(4*j-3)*x(1) + au(4*j-2)*x(2)
287  sw(2)= sw(2) + au(4*j-1)*x(1) + au(4*j-0)*x(2)
288  enddo ! j
289 
290  x = sw
291  x(2)= x(2) - alu(4*i-1)*x(1)
292 
293 
294  x(2)= alu(4*i )* x(2)
295  x(1)= alu(4*i-3)*( x(1) - alu(4*i-2)*x(2) )
296 
297  iold = perm(i)
298  zp(2*iold-1)= zp(2*iold-1) - x(1)
299  zp(2*iold )= zp(2*iold ) - x(2)
300  enddo ! i
301  enddo ! blockIndex
302  !$omp end do
303  enddo ! ic
304  !$omp end parallel
305 
306  !OCL END_CACHE_SUBSECTOR
307  !OCL END_CACHE_SECTOR_SIZE
308 
309  !call stop_collection("loopInPrecond33")
310 
311  end subroutine hecmw_precond_ssor_22_apply
312 
313  subroutine hecmw_precond_ssor_22_clear(hecMAT)
314  implicit none
315  type(hecmwst_matrix), intent(inout) :: hecmat
316  integer(kind=kint ) :: nthreads = 1
317  !$ nthreads = omp_get_max_threads()
318  if (associated(colorindex)) deallocate(colorindex)
319  if (associated(perm)) deallocate(perm)
320  if (associated(iperm)) deallocate(iperm)
321  if (associated(alu)) deallocate(alu)
322  if (nthreads >= 1) then
323  if (associated(d)) deallocate(d)
324  if (associated(al)) deallocate(al)
325  if (associated(au)) deallocate(au)
326  if (associated(indexl)) deallocate(indexl)
327  if (associated(indexu)) deallocate(indexu)
328  if (associated(iteml)) deallocate(iteml)
329  if (associated(itemu)) deallocate(itemu)
330  end if
331  nullify(colorindex)
332  nullify(perm)
333  nullify(iperm)
334  nullify(alu)
335  nullify(d)
336  nullify(al)
337  nullify(au)
338  nullify(indexl)
339  nullify(indexu)
340  nullify(iteml)
341  nullify(itemu)
342  initialized = .false.
343  end subroutine hecmw_precond_ssor_22_clear
344 
345  subroutine write_debug_info
346  implicit none
347  integer(kind=kint) :: my_rank, ic, in
348  my_rank = hecmw_comm_get_rank()
349  !--------------------> debug: shizawa
350  if (my_rank.eq.0) then
351  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
352  endif
353  write(19000+my_rank,'(a)') '#NCOLORTot'
354  write(19000+my_rank,*) ncolor
355  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
356  do ic=1,ncolor
357  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
358  enddo ! ic
359  write(29000+my_rank,'(a)') '#n_node'
360  write(29000+my_rank,*) n
361  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
362  do in=1,n
363  write(29000+my_rank,*) in, iperm(in), perm(in)
364  if (perm(iperm(in)) .ne. in) then
365  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
366  endif
367  enddo
368  end subroutine write_debug_info
369 
370  subroutine check_ordering
371  implicit none
372  integer(kind=kint) :: ic, i, j, k
373  integer(kind=kint), allocatable :: iicolor(:)
374  ! check color dependence of neighbouring nodes
375  if (ncolor.gt.1) then
376  allocate(iicolor(n))
377  do ic=1,ncolor
378  do i= colorindex(ic-1)+1, colorindex(ic)
379  iicolor(i) = ic
380  enddo ! i
381  enddo ! ic
382  ! FORWARD: L-part
383  do ic=1,ncolor
384  do i= colorindex(ic-1)+1, colorindex(ic)
385  do j= indexl(i-1)+1, indexl(i)
386  k= iteml(j)
387  if (iicolor(i).eq.iicolor(k)) then
388  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
389  endif
390  enddo ! j
391  enddo ! i
392  enddo ! ic
393  ! BACKWARD: U-part
394  do ic=ncolor, 1, -1
395  do i= colorindex(ic), colorindex(ic-1)+1, -1
396  do j= indexu(i-1)+1, indexu(i)
397  k= itemu(j)
398  if (iicolor(i).eq.iicolor(k)) then
399  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
400  endif
401  enddo ! j
402  enddo ! i
403  enddo ! ic
404  deallocate(iicolor)
405  endif ! if (NColor.gt.1)
406  !--------------------< debug: shizawa
407  end subroutine check_ordering
408 
409 end module hecmw_precond_ssor_22
m_hecmw_matrix_ordering_cm
Definition: hecmw_matrix_ordering_CM.f90:6
hecmw_matrix_reorder::hecmw_matrix_reorder_values
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
Definition: hecmw_matrix_reorder.f90:71
hecmw_precond_ssor_22::hecmw_precond_ssor_22_clear
subroutine, public hecmw_precond_ssor_22_clear(hecMAT)
Definition: hecmw_precond_SSOR_22.f90:314
hecmw_matrix_reorder::hecmw_matrix_reorder_profile
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
Definition: hecmw_matrix_reorder.f90:22
hecmw_matrix_reorder::hecmw_matrix_reorder_renum_item
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
Definition: hecmw_matrix_reorder.f90:144
hecmw_matrix_misc::hecmw_mat_get_ncolor_in
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
Definition: hecmw_matrix_misc.f90:531
hecmw_tuning_fx
Definition: hecmw_tuning_fx.f90:6
hecmw_tuning_fx::hecmw_tuning_fx_calc_sector_cache
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
Definition: hecmw_tuning_fx.f90:25
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
m_hecmw_matrix_ordering_mc::hecmw_matrix_ordering_mc
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)
Definition: hecmw_matrix_ordering_MC.f90:17
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_matrix_misc::hecmw_mat_get_sigma_diag
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
Definition: hecmw_matrix_misc.f90:694
hecmw_precond_ssor_22::hecmw_precond_ssor_22_apply
subroutine, public hecmw_precond_ssor_22_apply(ZP)
Definition: hecmw_precond_SSOR_22.f90:167
hecmw_precond_ssor_22
Definition: hecmw_precond_SSOR_22.f90:11
m_hecmw_matrix_ordering_cm::hecmw_matrix_ordering_rcm
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
Definition: hecmw_matrix_ordering_CM.f90:67
hecmw_precond_ssor_22::hecmw_precond_ssor_22_setup
subroutine, public hecmw_precond_ssor_22_setup(hecMAT)
Definition: hecmw_precond_SSOR_22.f90:47
m_hecmw_matrix_ordering_mc
Definition: hecmw_matrix_ordering_MC.f90:6
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_matrix_reorder
Definition: hecmw_matrix_reorder.f90:6
hecmw_util::hecmw_comm_get_rank
integer(kind=kint) function hecmw_comm_get_rank()
Definition: hecmw_util_f.F90:582
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444