FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_nn.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_nn
9 !C***
10 !C
12  use hecmw_util
17  !$ use omp_lib
18 
19  private
20 
24 
25  integer(kind=kint) :: N
26  real(kind=kreal), pointer :: d(:) => null()
27  real(kind=kreal), pointer :: al(:) => null()
28  real(kind=kreal), pointer :: au(:) => null()
29  integer(kind=kint), pointer :: indexL(:) => null()
30  integer(kind=kint), pointer :: indexU(:) => null()
31  integer(kind=kint), pointer :: itemL(:) => null()
32  integer(kind=kint), pointer :: itemU(:) => null()
33  real(kind=kreal), pointer :: alu(:) => null()
34 
35  integer(kind=kint) :: NColor
36  integer(kind=kint), pointer :: COLORindex(:) => null()
37  integer(kind=kint), pointer :: perm(:) => null()
38  integer(kind=kint), pointer :: iperm(:) => null()
39 
40  logical, save :: isFirst = .true.
41 
42  logical, save :: INITIALIZED = .false.
43 
44 contains
45 
46  subroutine hecmw_precond_ssor_nn_setup(hecMAT)
47  implicit none
48  type(hecmwst_matrix), intent(inout) :: hecmat
49  integer(kind=kint ) :: npl, npu, npcl, npcu
50  real (kind=kreal), allocatable :: cd(:)
51  integer(kind=kint ) :: ncolor_in
52  real (kind=kreal) :: sigma_diag
53  real (kind=kreal) :: alutmp(hecmat%NDOF,hecmat%NDOF), pw(hecmat%NDOF)
54  integer(kind=kint ) :: ii, i, j, k, ndof, ndof2
55  integer(kind=kint ) :: nthreads = 1
56  integer(kind=kint ), allocatable :: perm_tmp(:)
57  !real (kind=kreal) :: t0
58 
59  !t0 = hecmw_Wtime()
60  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
61 
62  if (initialized) then
63  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
64  call hecmw_precond_ssor_nn_clear(hecmat)
65  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
66  call hecmw_precond_ssor_nn_clear(hecmat) ! TEMPORARY
67  else
68  return
69  endif
70  endif
71 
72  !$ nthreads = omp_get_max_threads()
73 
74  n = hecmat%N
75  ndof=hecmat%NDOF
76  ndof2=ndof*ndof
77 
78  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
79  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
80 
81  if (nthreads == 1) then
82  ncolor = 1
83  allocate(colorindex(0:1), perm(n), iperm(n))
84  colorindex(0) = 0
85  colorindex(1) = n
86  do i=1,n
87  perm(i) = i
88  iperm(i) = i
89  end do
90  else
91  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
92  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
93  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
94  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
95  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
96  hecmat%indexU, hecmat%itemU, perm_tmp, &
97  ncolor_in, ncolor, colorindex, perm, iperm)
98  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
99  deallocate(perm_tmp)
100 
101  !call write_debug_info
102  endif
103 
104  npl = hecmat%indexL(n)
105  npu = hecmat%indexU(n)
106  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
107  call hecmw_matrix_reorder_profile(n, perm, iperm, &
108  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
109  indexl, indexu, iteml, itemu)
110  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
111 
112  !call check_ordering
113 
114  allocate(d(ndof2*n), al(ndof2*npl), au(ndof2*npu))
115  call hecmw_matrix_reorder_values(n, ndof, perm, iperm, &
116  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
117  hecmat%AL, hecmat%AU, hecmat%D, &
118  indexl, indexu, iteml, itemu, al, au, d)
119  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
120 
121  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
122  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
123 
124  allocate(alu(ndof2*n))
125  alu = 0.d0
126 
127  do ii= 1, ndof2*n
128  alu(ii) = d(ii)
129  enddo
130 
131  !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,NDOF,NDOF2,ALU,SIGMA_DIAG)
132  !$omp do
133  do ii= 1, n
134  do i = 1, ndof
135  do j = 1, ndof
136  alutmp(i,j) = alu(ndof2*(ii-1)+(i-1)*ndof+j)
137  if (i==j) alutmp(i,j)=alutmp(i,j)*sigma_diag
138  end do
139  end do
140  do k= 1, ndof
141  alutmp(k,k)= 1.d0/alutmp(k,k)
142  do i= k+1, ndof
143  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
144  do j= k+1, ndof
145  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
146  enddo
147  do j= k+1, ndof
148  alutmp(i,j)= pw(j)
149  enddo
150  enddo
151  enddo
152  do i = 1, ndof
153  do j = 1, ndof
154  alu(ndof2*(ii-1)+(i-1)*ndof+j)= alutmp(i,j)
155  end do
156  end do
157  enddo
158  !$omp end do
159  !$omp end parallel
160 
161  isfirst = .true.
162 
163  initialized = .true.
164  hecmat%Iarray(98) = 0 ! symbolic setup done
165  hecmat%Iarray(97) = 0 ! numerical setup done
166 
167  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
168 
169  end subroutine hecmw_precond_ssor_nn_setup
170 
171  subroutine hecmw_precond_ssor_nn_apply(ZP, NDOF)
173  implicit none
174  real(kind=kreal), intent(inout) :: zp(:)
175  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k, ndof, ndof2, idof,jdof
176  real(kind=kreal) :: sw(ndof), x(ndof)
177 
178  ! added for turning >>>
179  integer(kind=kint), parameter :: numofblockperthread = 100
180  integer(kind=kint), save :: numofthread = 1, numofblock
181  integer(kind=kint), save, allocatable :: ictoblockindex(:)
182  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
183  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
184  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
185  real(kind=kreal) :: numofelementperblock
186  integer(kind=kint) :: my_rank
187 
188  ndof2=ndof*ndof
189  if (isfirst) then
190  !$ numOfThread = omp_get_max_threads()
191  numofblock = numofthread * numofblockperthread
192  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
193  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
194  allocate (ictoblockindex(0:ncolor), &
195  blockindextocolorindex(0:numofblock + ncolor))
196  numofelement = n + indexl(n) + indexu(n)
197  numofelementperblock = dble(numofelement) / numofblock
198  blockindex = 0
199  ictoblockindex = -1
200  ictoblockindex(0) = 0
201  blockindextocolorindex = -1
202  blockindextocolorindex(0) = 0
203  my_rank = hecmw_comm_get_rank()
204  ! write(9000+my_rank,*) &
205  ! '# numOfElementPerBlock =', numOfElementPerBlock
206  ! write(9000+my_rank,*) &
207  ! '# ic, blockIndex, colorIndex, elementCount'
208  do ic = 1, ncolor
209  elementcount = 0
210  ii = 1
211  do i = colorindex(ic-1)+1, colorindex(ic)
212  elementcount = elementcount + 1
213  elementcount = elementcount + (indexl(i) - indexl(i-1))
214  elementcount = elementcount + (indexu(i) - indexu(i-1))
215  if (elementcount > ii * numofelementperblock &
216  .or. i == colorindex(ic)) then
217  ii = ii + 1
218  blockindex = blockindex + 1
219  blockindextocolorindex(blockindex) = i
220  ! write(9000+my_rank,*) ic, blockIndex, &
221  ! blockIndexToColorIndex(blockIndex), elementCount
222  endif
223  enddo
224  ictoblockindex(ic) = blockindex
225  enddo
226  numofblock = blockindex
227 
228  call hecmw_tuning_fx_calc_sector_cache( n, ndof, &
229  sectorcachesize0, sectorcachesize1 )
230 
231  isfirst = .false.
232  endif
233  ! <<< added for turning
234 
235  !call start_collection("loopInPrecond33")
236 
237  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
238  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
239 
240  !$omp parallel default(none) &
241  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
242  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex,NDOF,NDOF2) &
243  !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex,idof,jdof)
244 
245  !C-- FORWARD
246  do ic=1,ncolor
247  !$omp do schedule (static, 1)
248  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
249  do i = blockindextocolorindex(blockindex-1)+1, &
250  blockindextocolorindex(blockindex)
251  iold = perm(i)
252  do idof = 1, ndof
253  sw(idof) = zp(ndof*(iold-1)+idof)
254  end do
255  isl= indexl(i-1)+1
256  iel= indexl(i)
257  do j= isl, iel
258  k= iteml(j)
259  do idof = 1, ndof
260  x(idof) = zp(ndof*(k-1)+idof)
261  end do
262  do idof = 1, ndof
263  do jdof = 1, ndof
264  sw(idof) = sw(idof) - al(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
265  end do
266  end do
267  enddo ! j
268 
269  x = sw
270  do idof = 2,ndof
271  do jdof = 1, idof-1
272  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof )*x(jdof)
273  end do
274  end do
275  do idof = ndof, 1, -1
276  do jdof = ndof, idof+1, -1
277  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof)*x(jdof)
278  end do
279  x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
280  end do
281  zp(ndof*(iold-1)+1:ndof*(iold-1)+ndof) = x(1:ndof)
282 
283  enddo ! i
284  enddo ! blockIndex
285  !$omp end do
286  enddo ! ic
287 
288  !C-- BACKWARD
289  do ic=ncolor, 1, -1
290  !$omp do schedule (static, 1)
291  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
292  do i = blockindextocolorindex(blockindex), &
293  blockindextocolorindex(blockindex-1)+1, -1
294  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
295  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
296  ! blockIndexToColorIndex(blockIndex)
297  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
298  sw= 0.d0
299  isu= indexu(i-1) + 1
300  ieu= indexu(i)
301  do j= ieu, isu, -1
302  k= itemu(j)
303  do idof = 1, ndof
304  x(idof) = zp(ndof*(k-1)+idof)
305  end do
306  do idof = 1, ndof
307  do jdof = 1, ndof
308  sw(idof) = sw(idof) + au(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
309  end do
310  end do
311  enddo ! j
312 
313  x = sw
314  do idof = 2, ndof
315  do k = 1,idof-1
316  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
317  end do
318  end do
319  do idof = ndof, 1, -1
320  do k = ndof, idof+1, -1
321  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
322  end do
323  x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
324  end do
325  iold = perm(i)
326  do idof = 1, ndof
327  zp(ndof*(iold-1)+idof) = zp(ndof*(iold-1)+idof) - x(idof)
328  end do
329  enddo ! i
330  enddo ! blockIndex
331  !$omp end do
332  enddo ! ic
333  !$omp end parallel
334 
335  !OCL END_CACHE_SUBSECTOR
336  !OCL END_CACHE_SECTOR_SIZE
337 
338  !call stop_collection("loopInPrecond33")
339 
340  end subroutine hecmw_precond_ssor_nn_apply
341 
342  subroutine hecmw_precond_ssor_nn_clear(hecMAT)
343  implicit none
344  type(hecmwst_matrix), intent(inout) :: hecmat
345  integer(kind=kint ) :: nthreads = 1
346  !$ nthreads = omp_get_max_threads()
347  if (associated(colorindex)) deallocate(colorindex)
348  if (associated(perm)) deallocate(perm)
349  if (associated(iperm)) deallocate(iperm)
350  if (associated(alu)) deallocate(alu)
351  if (nthreads >= 1) then
352  if (associated(d)) deallocate(d)
353  if (associated(al)) deallocate(al)
354  if (associated(au)) deallocate(au)
355  if (associated(indexl)) deallocate(indexl)
356  if (associated(indexu)) deallocate(indexu)
357  if (associated(iteml)) deallocate(iteml)
358  if (associated(itemu)) deallocate(itemu)
359  end if
360  nullify(colorindex)
361  nullify(perm)
362  nullify(iperm)
363  nullify(alu)
364  nullify(d)
365  nullify(al)
366  nullify(au)
367  nullify(indexl)
368  nullify(indexu)
369  nullify(iteml)
370  nullify(itemu)
371  initialized = .false.
372  end subroutine hecmw_precond_ssor_nn_clear
373 
374  subroutine write_debug_info
375  implicit none
376  integer(kind=kint) :: my_rank, ic, in
377  my_rank = hecmw_comm_get_rank()
378  !--------------------> debug: shizawa
379  if (my_rank.eq.0) then
380  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
381  endif
382  write(19000+my_rank,'(a)') '#NCOLORTot'
383  write(19000+my_rank,*) ncolor
384  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
385  do ic=1,ncolor
386  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
387  enddo ! ic
388  write(29000+my_rank,'(a)') '#n_node'
389  write(29000+my_rank,*) n
390  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
391  do in=1,n
392  write(29000+my_rank,*) in, iperm(in), perm(in)
393  if (perm(iperm(in)) .ne. in) then
394  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
395  endif
396  enddo
397  end subroutine write_debug_info
398 
399  subroutine check_ordering
400  implicit none
401  integer(kind=kint) :: ic, i, j, k
402  integer(kind=kint), allocatable :: iicolor(:)
403  ! check color dependence of neighbouring nodes
404  if (ncolor.gt.1) then
405  allocate(iicolor(n))
406  do ic=1,ncolor
407  do i= colorindex(ic-1)+1, colorindex(ic)
408  iicolor(i) = ic
409  enddo ! i
410  enddo ! ic
411  ! FORWARD: L-part
412  do ic=1,ncolor
413  do i= colorindex(ic-1)+1, colorindex(ic)
414  do j= indexl(i-1)+1, indexl(i)
415  k= iteml(j)
416  if (iicolor(i).eq.iicolor(k)) then
417  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
418  endif
419  enddo ! j
420  enddo ! i
421  enddo ! ic
422  ! BACKWARD: U-part
423  do ic=ncolor, 1, -1
424  do i= colorindex(ic), colorindex(ic-1)+1, -1
425  do j= indexu(i-1)+1, indexu(i)
426  k= itemu(j)
427  if (iicolor(i).eq.iicolor(k)) then
428  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
429  endif
430  enddo ! j
431  enddo ! i
432  enddo ! ic
433  deallocate(iicolor)
434  endif ! if (NColor.gt.1)
435  !--------------------< debug: shizawa
436  end subroutine check_ordering
437 
438 end module hecmw_precond_ssor_nn
hecmw_precond_ssor_nn::hecmw_precond_ssor_nn_setup
subroutine, public hecmw_precond_ssor_nn_setup(hecMAT)
Definition: hecmw_precond_SSOR_nn.f90:47
m_hecmw_matrix_ordering_cm
Definition: hecmw_matrix_ordering_CM.f90:6
hecmw_matrix_reorder::hecmw_matrix_reorder_values
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
Definition: hecmw_matrix_reorder.f90:71
hecmw_matrix_reorder::hecmw_matrix_reorder_profile
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
Definition: hecmw_matrix_reorder.f90:22
hecmw_matrix_reorder::hecmw_matrix_reorder_renum_item
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
Definition: hecmw_matrix_reorder.f90:144
hecmw_matrix_misc::hecmw_mat_get_ncolor_in
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
Definition: hecmw_matrix_misc.f90:531
hecmw_tuning_fx
Definition: hecmw_tuning_fx.f90:6
hecmw_tuning_fx::hecmw_tuning_fx_calc_sector_cache
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
Definition: hecmw_tuning_fx.f90:25
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
hecmw_precond_ssor_nn
Definition: hecmw_precond_SSOR_nn.f90:11
m_hecmw_matrix_ordering_mc::hecmw_matrix_ordering_mc
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)
Definition: hecmw_matrix_ordering_MC.f90:17
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_matrix_misc::hecmw_mat_get_sigma_diag
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
Definition: hecmw_matrix_misc.f90:694
m_hecmw_matrix_ordering_cm::hecmw_matrix_ordering_rcm
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
Definition: hecmw_matrix_ordering_CM.f90:67
m_hecmw_matrix_ordering_mc
Definition: hecmw_matrix_ordering_MC.f90:6
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_matrix_reorder
Definition: hecmw_matrix_reorder.f90:6
hecmw_precond_ssor_nn::hecmw_precond_ssor_nn_apply
subroutine, public hecmw_precond_ssor_nn_apply(ZP, NDOF)
Definition: hecmw_precond_SSOR_nn.f90:172
hecmw_precond_ssor_nn::hecmw_precond_ssor_nn_clear
subroutine, public hecmw_precond_ssor_nn_clear(hecMAT)
Definition: hecmw_precond_SSOR_nn.f90:343
hecmw_util::hecmw_comm_get_rank
integer(kind=kint) function hecmw_comm_get_rank()
Definition: hecmw_util_f.F90:582
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444