FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_11.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_11
9 !C***
10 !C
12  use hecmw_util
17  !$ use omp_lib
18 
19  private
20 
24 
25  integer(kind=kint) :: N
26  real(kind=kreal), pointer :: d(:) => null()
27  real(kind=kreal), pointer :: al(:) => null()
28  real(kind=kreal), pointer :: au(:) => null()
29  integer(kind=kint), pointer :: indexL(:) => null()
30  integer(kind=kint), pointer :: indexU(:) => null()
31  integer(kind=kint), pointer :: itemL(:) => null()
32  integer(kind=kint), pointer :: itemU(:) => null()
33  real(kind=kreal), pointer :: alu(:) => null()
34 
35  integer(kind=kint) :: NColor
36  integer(kind=kint), pointer :: COLORindex(:) => null()
37  integer(kind=kint), pointer :: perm(:) => null()
38  integer(kind=kint), pointer :: iperm(:) => null()
39 
40  logical, save :: isFirst = .true.
41 
42  logical, save :: INITIALIZED = .false.
43 
44 contains
45 
46  subroutine hecmw_precond_ssor_11_setup(hecMAT)
47  implicit none
48  type(hecmwst_matrix), intent(inout) :: hecmat
49  integer(kind=kint ) :: npl, npu, npcl, npcu
50  real (kind=kreal), allocatable :: cd(:)
51  integer(kind=kint ) :: ncolor_in
52  real (kind=kreal) :: sigma_diag
53  real (kind=kreal) :: alutmp(1,1), pw(1)
54  integer(kind=kint ) :: ii, i, j, k
55  integer(kind=kint ) :: nthreads = 1
56  integer(kind=kint ), allocatable :: perm_tmp(:)
57  !real (kind=kreal) :: t0
58 
59  !t0 = hecmw_Wtime()
60  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
61 
62  if (initialized) then
63  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
64  call hecmw_precond_ssor_11_clear(hecmat)
65  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
66  call hecmw_precond_ssor_11_clear(hecmat) ! TEMPORARY
67  else
68  return
69  endif
70  endif
71 
72  !$ nthreads = omp_get_max_threads()
73 
74  n = hecmat%N
75  ! N = hecMAT%NP
76  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
77  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
78 
79  if (nthreads == 1) then
80  ncolor = 1
81  allocate(colorindex(0:1), perm(n), iperm(n))
82  colorindex(0) = 0
83  colorindex(1) = n
84  do i=1,n
85  perm(i) = i
86  iperm(i) = i
87  end do
88  else
89  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
90  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
91  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
92  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
93  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
94  hecmat%indexU, hecmat%itemU, perm_tmp, &
95  ncolor_in, ncolor, colorindex, perm, iperm)
96  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
97  deallocate(perm_tmp)
98 
99  !call write_debug_info
100  endif
101 
102  npl = hecmat%indexL(n)
103  npu = hecmat%indexU(n)
104  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
105  call hecmw_matrix_reorder_profile(n, perm, iperm, &
106  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
107  indexl, indexu, iteml, itemu)
108  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
109 
110  !call check_ordering
111 
112  allocate(d(n), al(npl), au(npu))
113  call hecmw_matrix_reorder_values(n, 1, perm, iperm, &
114  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
115  hecmat%AL, hecmat%AU, hecmat%D, &
116  indexl, indexu, iteml, itemu, al, au, d)
117  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
118 
119  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
120  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
121 
122  allocate(alu(n))
123  alu = 0.d0
124 
125  do ii= 1, n
126  alu(ii) = d(ii)
127  enddo
128 
129  !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
130  !$omp do
131  do ii= 1, n
132  alutmp(1,1)= alu(ii) * sigma_diag
133  alutmp(1,1)= 1.d0/alutmp(1,1)
134  alu(ii)= alutmp(1,1)
135  enddo
136  !$omp end do
137  !$omp end parallel
138 
139  isfirst = .true.
140 
141  initialized = .true.
142  hecmat%Iarray(98) = 0 ! symbolic setup done
143  hecmat%Iarray(97) = 0 ! numerical setup done
144 
145  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
146 
147  end subroutine hecmw_precond_ssor_11_setup
148 
149  subroutine hecmw_precond_ssor_11_apply(ZP)
151  implicit none
152  real(kind=kreal), intent(inout) :: zp(:)
153  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
154  real(kind=kreal) :: sw(1), x(1)
155 
156  ! added for turning >>>
157  integer(kind=kint), parameter :: numofblockperthread = 100
158  integer(kind=kint), save :: numofthread = 1, numofblock
159  integer(kind=kint), save, allocatable :: ictoblockindex(:)
160  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
161  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
162  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
163  real(kind=kreal) :: numofelementperblock
164  integer(kind=kint) :: my_rank
165 
166  if (isfirst) then
167  !$ numOfThread = omp_get_max_threads()
168  numofblock = numofthread * numofblockperthread
169  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
170  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
171  allocate (ictoblockindex(0:ncolor), &
172  blockindextocolorindex(0:numofblock + ncolor))
173  numofelement = n + indexl(n) + indexu(n)
174  numofelementperblock = dble(numofelement) / numofblock
175  blockindex = 0
176  ictoblockindex = -1
177  ictoblockindex(0) = 0
178  blockindextocolorindex = -1
179  blockindextocolorindex(0) = 0
180  my_rank = hecmw_comm_get_rank()
181  ! write(9000+my_rank,*) &
182  ! '# numOfElementPerBlock =', numOfElementPerBlock
183  ! write(9000+my_rank,*) &
184  ! '# ic, blockIndex, colorIndex, elementCount'
185  do ic = 1, ncolor
186  elementcount = 0
187  ii = 1
188  do i = colorindex(ic-1)+1, colorindex(ic)
189  elementcount = elementcount + 1
190  elementcount = elementcount + (indexl(i) - indexl(i-1))
191  elementcount = elementcount + (indexu(i) - indexu(i-1))
192  if (elementcount > ii * numofelementperblock &
193  .or. i == colorindex(ic)) then
194  ii = ii + 1
195  blockindex = blockindex + 1
196  blockindextocolorindex(blockindex) = i
197  ! write(9000+my_rank,*) ic, blockIndex, &
198  ! blockIndexToColorIndex(blockIndex), elementCount
199  endif
200  enddo
201  ictoblockindex(ic) = blockindex
202  enddo
203  numofblock = blockindex
204 
206  sectorcachesize0, sectorcachesize1 )
207 
208  isfirst = .false.
209  endif
210  ! <<< added for turning
211 
212  !call start_collection("loopInPrecond33")
213 
214  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
215  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
216 
217  !$omp parallel default(none) &
218  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
219  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
220  !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
221 
222  !C-- FORWARD
223  do ic=1,ncolor
224  !$omp do schedule (static, 1)
225  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
226  do i = blockindextocolorindex(blockindex-1)+1, &
227  blockindextocolorindex(blockindex)
228  iold = perm(i)
229  sw(1)= zp(iold)
230  isl= indexl(i-1)+1
231  iel= indexl(i)
232  do j= isl, iel
233  k= iteml(j)
234  x(1)= zp(k)
235  sw(1)= sw(1) - al(j)*x(1)
236  enddo ! j
237 
238  x = sw
239  x(1)= alu(i )* x(1)
240  zp(iold)= x(1)
241  enddo ! i
242  enddo ! blockIndex
243  !$omp end do
244  enddo ! ic
245 
246  !C-- BACKWARD
247  do ic=ncolor, 1, -1
248  !$omp do schedule (static, 1)
249  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
250  do i = blockindextocolorindex(blockindex), &
251  blockindextocolorindex(blockindex-1)+1, -1
252  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
253  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
254  ! blockIndexToColorIndex(blockIndex)
255  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
256  sw= 0.d0
257  isu= indexu(i-1) + 1
258  ieu= indexu(i)
259  do j= ieu, isu, -1
260  k= itemu(j)
261  x(1)= zp(k)
262  sw(1)= sw(1) + au(j)*x(1)
263  enddo ! j
264 
265  x = sw
266  x(1)= alu(i)* x(1)
267 
268  iold = perm(i)
269  zp(iold)= zp(iold) - x(1)
270  enddo ! i
271  enddo ! blockIndex
272  !$omp end do
273  enddo ! ic
274  !$omp end parallel
275 
276  !OCL END_CACHE_SUBSECTOR
277  !OCL END_CACHE_SECTOR_SIZE
278 
279  !call stop_collection("loopInPrecond33")
280 
281  end subroutine hecmw_precond_ssor_11_apply
282 
283  subroutine hecmw_precond_ssor_11_clear(hecMAT)
284  implicit none
285  type(hecmwst_matrix), intent(inout) :: hecmat
286  integer(kind=kint ) :: nthreads = 1
287  !$ nthreads = omp_get_max_threads()
288  if (associated(colorindex)) deallocate(colorindex)
289  if (associated(perm)) deallocate(perm)
290  if (associated(iperm)) deallocate(iperm)
291  if (associated(alu)) deallocate(alu)
292  if (nthreads >= 1) then
293  if (associated(d)) deallocate(d)
294  if (associated(al)) deallocate(al)
295  if (associated(au)) deallocate(au)
296  if (associated(indexl)) deallocate(indexl)
297  if (associated(indexu)) deallocate(indexu)
298  if (associated(iteml)) deallocate(iteml)
299  if (associated(itemu)) deallocate(itemu)
300  end if
301  nullify(colorindex)
302  nullify(perm)
303  nullify(iperm)
304  nullify(alu)
305  nullify(d)
306  nullify(al)
307  nullify(au)
308  nullify(indexl)
309  nullify(indexu)
310  nullify(iteml)
311  nullify(itemu)
312  initialized = .false.
313  end subroutine hecmw_precond_ssor_11_clear
314 
315  subroutine write_debug_info
316  implicit none
317  integer(kind=kint) :: my_rank, ic, in
318  my_rank = hecmw_comm_get_rank()
319  !--------------------> debug: shizawa
320  if (my_rank.eq.0) then
321  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
322  endif
323  write(19000+my_rank,'(a)') '#NCOLORTot'
324  write(19000+my_rank,*) ncolor
325  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
326  do ic=1,ncolor
327  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
328  enddo ! ic
329  write(29000+my_rank,'(a)') '#n_node'
330  write(29000+my_rank,*) n
331  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
332  do in=1,n
333  write(29000+my_rank,*) in, iperm(in), perm(in)
334  if (perm(iperm(in)) .ne. in) then
335  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
336  endif
337  enddo
338  end subroutine write_debug_info
339 
340  subroutine check_ordering
341  implicit none
342  integer(kind=kint) :: ic, i, j, k
343  integer(kind=kint), allocatable :: iicolor(:)
344  ! check color dependence of neighbouring nodes
345  if (ncolor.gt.1) then
346  allocate(iicolor(n))
347  do ic=1,ncolor
348  do i= colorindex(ic-1)+1, colorindex(ic)
349  iicolor(i) = ic
350  enddo ! i
351  enddo ! ic
352  ! FORWARD: L-part
353  do ic=1,ncolor
354  do i= colorindex(ic-1)+1, colorindex(ic)
355  do j= indexl(i-1)+1, indexl(i)
356  k= iteml(j)
357  if (iicolor(i).eq.iicolor(k)) then
358  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
359  endif
360  enddo ! j
361  enddo ! i
362  enddo ! ic
363  ! BACKWARD: U-part
364  do ic=ncolor, 1, -1
365  do i= colorindex(ic), colorindex(ic-1)+1, -1
366  do j= indexu(i-1)+1, indexu(i)
367  k= itemu(j)
368  if (iicolor(i).eq.iicolor(k)) then
369  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
370  endif
371  enddo ! j
372  enddo ! i
373  enddo ! ic
374  deallocate(iicolor)
375  endif ! if (NColor.gt.1)
376  !--------------------< debug: shizawa
377  end subroutine check_ordering
378 
379 end module hecmw_precond_ssor_11
hecmw_precond_ssor_11::hecmw_precond_ssor_11_setup
subroutine, public hecmw_precond_ssor_11_setup(hecMAT)
Definition: hecmw_precond_SSOR_11.f90:47
m_hecmw_matrix_ordering_cm
Definition: hecmw_matrix_ordering_CM.f90:6
hecmw_matrix_reorder::hecmw_matrix_reorder_values
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
Definition: hecmw_matrix_reorder.f90:71
hecmw_matrix_reorder::hecmw_matrix_reorder_profile
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
Definition: hecmw_matrix_reorder.f90:22
hecmw_matrix_reorder::hecmw_matrix_reorder_renum_item
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
Definition: hecmw_matrix_reorder.f90:144
hecmw_precond_ssor_11::hecmw_precond_ssor_11_apply
subroutine, public hecmw_precond_ssor_11_apply(ZP)
Definition: hecmw_precond_SSOR_11.f90:150
hecmw_matrix_misc::hecmw_mat_get_ncolor_in
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
Definition: hecmw_matrix_misc.f90:531
hecmw_tuning_fx
Definition: hecmw_tuning_fx.f90:6
hecmw_tuning_fx::hecmw_tuning_fx_calc_sector_cache
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
Definition: hecmw_tuning_fx.f90:25
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
m_hecmw_matrix_ordering_mc::hecmw_matrix_ordering_mc
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)
Definition: hecmw_matrix_ordering_MC.f90:17
hecmw_precond_ssor_11::hecmw_precond_ssor_11_clear
subroutine, public hecmw_precond_ssor_11_clear(hecMAT)
Definition: hecmw_precond_SSOR_11.f90:284
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_matrix_misc::hecmw_mat_get_sigma_diag
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
Definition: hecmw_matrix_misc.f90:694
m_hecmw_matrix_ordering_cm::hecmw_matrix_ordering_rcm
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
Definition: hecmw_matrix_ordering_CM.f90:67
m_hecmw_matrix_ordering_mc
Definition: hecmw_matrix_ordering_MC.f90:6
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_matrix_reorder
Definition: hecmw_matrix_reorder.f90:6
hecmw_precond_ssor_11
Definition: hecmw_precond_SSOR_11.f90:11
hecmw_util::hecmw_comm_get_rank
integer(kind=kint) function hecmw_comm_get_rank()
Definition: hecmw_util_f.F90:582
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444