FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_22.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7 
8  private
9 
10  public :: hecmw_matvec_22
13  public :: hecmw_matresid_22
14  public :: hecmw_rel_resid_l2_22
15  public :: hecmw_tvec_22
16  public :: hecmw_ttvec_22
17  public :: hecmw_ttmattvec_22
18  public :: hecmw_mat_diag_sr_22
19 
20  ! ! for communication hiding in matvec
21  ! integer(kind=kint), save, allocatable :: index_o(:), item_o(:)
22  ! real(kind=kreal), save, allocatable :: A_o(:)
23  logical, save :: async_matvec_flg = .false.
24 contains
25 
26  !C
27  !C***
28  !C*** hecmw_matvec_22
29  !C***
30  !C
31  subroutine hecmw_matvec_22 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
34  implicit none
35  type (hecmwst_local_mesh), intent(in) :: hecmesh
36  type (hecmwst_matrix), intent(in), target :: hecmat
37  real(kind=kreal), intent(in) :: x(:)
38  real(kind=kreal), intent(out) :: y(:)
39  real(kind=kreal), intent(inout) :: time_ax
40  real(kind=kreal), intent(inout), optional :: commtime
41 
42  real(kind=kreal) :: tcomm
43  real(kind=kreal), allocatable :: wk(:)
44 
45  tcomm = 0.d0
46 
47  if (hecmw_mat_get_flag_mpcmatvec(hecmat) /= 0) then
48  allocate(wk(hecmat%NP * hecmat%NDOF))
49  call hecmw_ttmattvec_22(hecmesh, hecmat, x, y, wk, time_ax, tcomm)
50  deallocate(wk)
51  else
52  call hecmw_matvec_22_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
53  endif
54 
55  if (present(commtime)) commtime = commtime + tcomm
56  end subroutine hecmw_matvec_22
57 
58  subroutine hecmw_matvec_22_set_async (hecMAT)
60  implicit none
61  type (hecmwst_matrix), intent(in) :: hecmat
62  end subroutine hecmw_matvec_22_set_async
64  implicit none
65  end subroutine hecmw_matvec_22_unset_async
66 
67  !C
68  !C***
69  !C*** hecmw_matvec_22_inner ( private subroutine )
70  !C***
71  !C
72  subroutine hecmw_matvec_22_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
73  use hecmw_util
74  use m_hecmw_comm_f
76  use hecmw_jad_type
77  use hecmw_tuning_fx
78  !$ use omp_lib
79 
80  implicit none
81  type (hecmwst_local_mesh), intent(in) :: hecmesh
82  type (hecmwst_matrix), intent(in), target :: hecmat
83  real(kind=kreal), intent(in) :: x(:)
84  real(kind=kreal), intent(out) :: y(:)
85  real(kind=kreal), intent(inout) :: time_ax
86  real(kind=kreal), intent(inout), optional :: commtime
87 
88  real(kind=kreal) :: start_time, end_time, tcomm
89  integer(kind=kint) :: i, j, js, je, in
90  real(kind=kreal) :: yv1, yv2, x1, x2
91 
92  integer(kind=kint) :: n, np
93  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
94  real(kind=kreal), pointer :: al(:), au(:), d(:)
95 
96  ! added for turning >>>
97  integer, parameter :: numofblockperthread = 100
98  logical, save :: isfirst = .true.
99  integer, save :: numofthread = 1
100  integer, save, allocatable :: startpos(:), endpos(:)
101  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
102  integer(kind=kint) :: threadnum, blocknum, numofblock
103  integer(kind=kint) :: numofelement, elementcount, blockindex
104  real(kind=kreal) :: numofelementperblock
105  ! <<< added for turning
106 
107  if (hecmw_jad_is_initialized().ne.0) then
108  tcomm = 0.d0
109  start_time = hecmw_wtime()
110  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
111  end_time = hecmw_wtime()
112  time_ax = time_ax + end_time - start_time - tcomm
113  if (present(commtime)) commtime = commtime + tcomm
114  else
115 
116  n = hecmat%N
117  np = hecmat%NP
118  indexl => hecmat%indexL
119  indexu => hecmat%indexU
120  iteml => hecmat%itemL
121  itemu => hecmat%itemU
122  al => hecmat%AL
123  au => hecmat%AU
124  d => hecmat%D
125 
126  ! added for turning >>>
127  if (.not. isfirst) then
128  numofblock = numofthread * numofblockperthread
129  if (endpos(numofblock-1) .ne. n-1) then
130  deallocate(startpos, endpos)
131  isfirst = .true.
132  endif
133  endif
134  if (isfirst) then
135  !$ numOfThread = omp_get_max_threads()
136  numofblock = numofthread * numofblockperthread
137  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
138  numofelement = n + indexl(n) + indexu(n)
139  numofelementperblock = dble(numofelement) / numofblock
140  blocknum = 0
141  elementcount = 0
142  startpos(blocknum) = 1
143  do i= 1, n
144  elementcount = elementcount + 1
145  elementcount = elementcount + (indexl(i) - indexl(i-1))
146  elementcount = elementcount + (indexu(i) - indexu(i-1))
147  if (elementcount > (blocknum + 1) * numofelementperblock) then
148  endpos(blocknum) = i
149  blocknum = blocknum + 1
150  startpos(blocknum) = i + 1
151  if (blocknum == (numofblock - 1)) exit
152  endif
153  enddo
154  endpos(blocknum) = n
155  ! for irregular data
156  do i= blocknum+1, numofblock-1
157  startpos(i) = n
158  endpos(i) = n-1
159  end do
160 
162  sectorcachesize0, sectorcachesize1)
163 
164  isfirst = .false.
165  endif
166  ! <<< added for turning
167 
168  start_time= hecmw_wtime()
169  call hecmw_update_r (hecmesh, x, np, 2)
170  end_time= hecmw_wtime()
171  if (present(commtime)) commtime = commtime + end_time - start_time
172 
173  start_time = hecmw_wtime()
174 
175  !call fapp_start("loopInMatvec33", 1, 0)
176  !call start_collection("loopInMatvec33")
177 
178  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
179  !OCL CACHE_SUBSECTOR_ASSIGN(X)
180 
181  !$OMP PARALLEL DEFAULT(NONE) &
182  !$OMP&PRIVATE(i,X1,X2,YV1,YV2,jS,jE,j,in,threadNum,blockNum,blockIndex) &
183  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread,N,async_matvec_flg)
184  threadnum = 0
185  !$ threadNum = omp_get_thread_num()
186  do blocknum = 0 , numofblockperthread - 1
187  blockindex = blocknum * numofthread + threadnum
188  do i = startpos(blockindex), endpos(blockindex)
189  x1= x(2*i-1)
190  x2= x(2*i )
191  yv1= d(4*i-3)*x1 + d(4*i-2)*x2
192  yv2= d(4*i-1)*x1 + d(4*i )*x2
193 
194  js= indexl(i-1) + 1
195  je= indexl(i )
196  do j= js, je
197  in = iteml(j)
198  x1 = x(2*in-1)
199  x2 = x(2*in )
200  yv1= yv1 + al(4*j-3)*x1 + al(4*j-2)*x2
201  yv2= yv2 + al(4*j-1)*x1 + al(4*j )*x2
202  enddo
203  js= indexu(i-1) + 1
204  je= indexu(i )
205  do j= js, je
206  in = itemu(j)
207  x1 = x(2*in-1)
208  x2 = x(2*in )
209  yv1= yv1 + au(4*j-3)*x1 + au(4*j-2)*x2
210  yv2= yv2 + au(4*j-1)*x1 + au(4*j )*x2
211  enddo
212  y(2*i-1)= yv1
213  y(2*i )= yv2
214  enddo
215  enddo
216 
217  !$OMP END PARALLEL
218 
219  !OCL END_CACHE_SUBSECTOR
220  !OCL END_CACHE_SECTOR_SIZE
221 
222 
223  end_time = hecmw_wtime()
224  time_ax = time_ax + end_time - start_time
225  endif
226 
227  end subroutine hecmw_matvec_22_inner
228 
229 
230 
231 
232 
233 
234  !C
235  !C***
236  !C*** hecmw_matresid_22
237  !C***
238  !C
239  subroutine hecmw_matresid_22 (hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
241 
242  implicit none
243  real(kind=kreal) :: x(:), b(:), r(:)
244  type (hecmwst_matrix) :: hecmat
245  type (hecmwst_local_mesh) :: hecmesh
246  real(kind=kreal) :: time_ax
247  real(kind=kreal), optional :: commtime
248 
249  integer(kind=kint) :: i
250  real(kind=kreal) :: tcomm
251 
252  tcomm = 0.d0
253  call hecmw_matvec_22 (hecmesh, hecmat, x, r, time_ax, tcomm)
254  if (present(commtime)) commtime = commtime + tcomm
255  !$omp parallel default(none),private(i),shared(hecMAT,R,B)
256  !$omp do
257  do i = 1, hecmat%N * 2
258  r(i) = b(i) - r(i)
259  enddo
260  !$omp end do
261  !$omp end parallel
262  end subroutine hecmw_matresid_22
263 
264  !C
265  !C***
266  !C*** hecmw_rel_resid_L2_22
267  !C***
268  !C
269  function hecmw_rel_resid_l2_22 (hecMESH, hecMAT, time_Ax, COMMtime)
272 
273  implicit none
274  real(kind=kreal) :: hecmw_rel_resid_l2_22
275  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
276  type ( hecmwst_matrix ), intent(in) :: hecmat
277  real(kind=kreal) :: time_ax
278  real(kind=kreal), optional :: commtime
279 
280  real(kind=kreal) :: r(hecmat%NDOF*hecmat%NP)
281  real(kind=kreal) :: bnorm2, rnorm2
282  real(kind=kreal) :: tcomm
283 
284  tcomm = 0.d0
285  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, hecmat%B, hecmat%B, bnorm2, tcomm)
286  if (bnorm2 == 0.d0) then
287  bnorm2 = 1.d0
288  endif
289  call hecmw_matresid_22(hecmesh, hecmat, hecmat%X, hecmat%B, r, time_ax, tcomm)
290  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
291  if (present(commtime)) commtime = commtime + tcomm
292 
293  hecmw_rel_resid_l2_22 = sqrt(rnorm2 / bnorm2)
294 
295  end function hecmw_rel_resid_l2_22
296  !C
297  !C***
298  !C*** hecmw_Tvec_22
299  !C***
300  !C
301  subroutine hecmw_tvec_22 (hecMESH, X, Y, COMMtime)
303  use m_hecmw_comm_f
304  implicit none
305  type (hecmwst_local_mesh), intent(in) :: hecmesh
306  real(kind=kreal), intent(in) :: x(:)
307  real(kind=kreal), intent(out) :: y(:)
308  real(kind=kreal), intent(inout) :: commtime
309 
310  real(kind=kreal) :: start_time, end_time
311  integer(kind=kint) :: i, j, jj, k, kk
312 
313  start_time= hecmw_wtime()
314  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 2)
315  end_time= hecmw_wtime()
316  commtime = commtime + end_time - start_time
317 
318  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
319  !$omp do
320  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
321  y(i)= x(i)
322  enddo
323  !$omp end do
324 
325  !$omp do
326  outer: do i= 1, hecmesh%mpc%n_mpc
327  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
328  if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
329  enddo
330  k = hecmesh%mpc%mpc_index(i-1) + 1
331  kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
332  y(kk) = 0.d0
333  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
334  jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
335  y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
336  enddo
337  enddo outer
338  !$omp end do
339  !$omp end parallel
340 
341  end subroutine hecmw_tvec_22
342 
343  !C
344  !C***
345  !C*** hecmw_Ttvec_22
346  !C***
347  !C
348  subroutine hecmw_ttvec_22 (hecMESH, X, Y, COMMtime)
350  use m_hecmw_comm_f
351  implicit none
352  type (hecmwst_local_mesh), intent(in) :: hecmesh
353  real(kind=kreal), intent(in) :: x(:)
354  real(kind=kreal), intent(out) :: y(:)
355  real(kind=kreal), intent(inout) :: commtime
356 
357  real(kind=kreal) :: start_time, end_time
358  integer(kind=kint) :: i, j, jj, k, kk
359 
360  start_time= hecmw_wtime()
361  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 2)
362  end_time= hecmw_wtime()
363  commtime = commtime + end_time - start_time
364 
365  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
366  !$omp do
367  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
368  y(i)= x(i)
369  enddo
370  !$omp end do
371 
372  !$omp do
373  outer: do i= 1, hecmesh%mpc%n_mpc
374  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
375  if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
376  enddo
377  k = hecmesh%mpc%mpc_index(i-1) + 1
378  kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
379  y(kk) = 0.d0
380  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
381  jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
382  !omp atomic
383  y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
384  enddo
385  enddo outer
386  !$omp end do
387  !$omp end parallel
388 
389  end subroutine hecmw_ttvec_22
390 
391  !C
392  !C***
393  !C*** hecmw_TtmatTvec_22
394  !C***
395  !C
396  subroutine hecmw_ttmattvec_22 (hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
398  implicit none
399  type (hecmwst_local_mesh), intent(in) :: hecmesh
400  type (hecmwst_matrix), intent(in) :: hecmat
401  real(kind=kreal), intent(in) :: x(:)
402  real(kind=kreal), intent(out) :: y(:), w(:)
403  real(kind=kreal), intent(inout) :: time_ax
404  real(kind=kreal), intent(inout) :: commtime
405 
406  call hecmw_tvec_22(hecmesh, x, y, commtime)
407  call hecmw_matvec_22_inner(hecmesh, hecmat, y, w, time_ax, commtime)
408  call hecmw_ttvec_22(hecmesh, w, y, commtime)
409 
410  end subroutine hecmw_ttmattvec_22
411 
412 
413  !C
414  !C***
415  !C*** hecmw_mat_diag_sr_22
416  !C***
417  !C
418  subroutine hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
420  use m_hecmw_comm_f
421  implicit none
422  type (hecmwst_local_mesh), intent(in) :: hecmesh
423  type (hecmwst_matrix), intent(inout), target :: hecmat
424  real(kind=kreal), intent(inout), optional :: commtime
425  real(kind=kreal), allocatable :: w(:,:)
426  real(kind=kreal), pointer :: d(:)
427  integer(kind=kint) :: ip
428  real(kind=kreal) :: start_time, end_time
429  allocate(w(2*hecmat%NP,2))
430  d => hecmat%D
431  do ip= 1, hecmat%N
432  w(2*ip-1,1)= d(4*ip-3); w(2*ip-1,2)= d(4*ip-2);
433  w(2*ip-0,1)= d(4*ip-1); w(2*ip-0,2)= d(4*ip-0);
434  enddo
435  start_time= hecmw_wtime()
436  call hecmw_update_r (hecmesh, w(:,1), hecmat%NP, 2)
437  call hecmw_update_r (hecmesh, w(:,2), hecmat%NP, 2)
438  end_time= hecmw_wtime()
439  if (present(commtime)) commtime = commtime + end_time - start_time
440  do ip= hecmat%N+1, hecmat%NP
441  d(4*ip-3)= w(2*ip-1,1); d(4*ip-2)= w(2*ip-1,2);
442  d(4*ip-1)= w(2*ip-0,1); d(4*ip-0)= w(2*ip-0,2);
443  enddo
444  deallocate(w)
445  end subroutine hecmw_mat_diag_sr_22
446 end module hecmw_solver_las_22
hecmw_solver_las_22::hecmw_ttvec_22
subroutine, public hecmw_ttvec_22(hecMESH, X, Y, COMMtime)
Definition: hecmw_solver_las_22.f90:349
hecmw_solver_las_22::hecmw_mat_diag_sr_22
subroutine, public hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
Definition: hecmw_solver_las_22.f90:419
hecmw_solver_las_22::hecmw_ttmattvec_22
subroutine, public hecmw_ttmattvec_22(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
Definition: hecmw_solver_las_22.f90:397
hecmw_solver_las_22
Definition: hecmw_solver_las_22.f90:6
hecmw_matrix_misc::hecmw_mat_get_flag_mpcmatvec
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
Definition: hecmw_matrix_misc.f90:643
hecmw_util::hecmw_wtime
real(kind=kreal) function hecmw_wtime()
Definition: hecmw_util_f.F90:549
hecmw_jad_type::hecmw_jad_is_initialized
integer(kind=kint) function, public hecmw_jad_is_initialized()
Definition: hecmw_jadm.f90:56
hecmw_jad_type
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
hecmw_tuning_fx
Definition: hecmw_tuning_fx.f90:6
hecmw_tuning_fx::hecmw_tuning_fx_calc_sector_cache
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
Definition: hecmw_tuning_fx.f90:25
hecmw_solver_misc::hecmw_innerproduct_r
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
Definition: hecmw_solver_misc.f90:47
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
hecmw_util::hecmwst_local_mesh
Definition: hecmw_util_f.F90:234
hecmw_solver_las_22::hecmw_matvec_22_set_async
subroutine, public hecmw_matvec_22_set_async(hecMAT)
Definition: hecmw_solver_las_22.f90:59
hecmw_solver_las_22::hecmw_matvec_22
subroutine, public hecmw_matvec_22(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
Definition: hecmw_solver_las_22.f90:32
m_hecmw_comm_f
Definition: hecmw_comm_f.F90:6
hecmw_solver_las_22::hecmw_matvec_22_unset_async
subroutine, public hecmw_matvec_22_unset_async
Definition: hecmw_solver_las_22.f90:64
hecmw_solver_las_22::hecmw_tvec_22
subroutine, public hecmw_tvec_22(hecMESH, X, Y, COMMtime)
Definition: hecmw_solver_las_22.f90:302
hecmw_solver_las_22::hecmw_matresid_22
subroutine, public hecmw_matresid_22(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
Definition: hecmw_solver_las_22.f90:240
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_solver_las_22::hecmw_rel_resid_l2_22
real(kind=kreal) function, public hecmw_rel_resid_l2_22(hecMESH, hecMAT, time_Ax, COMMtime)
Definition: hecmw_solver_las_22.f90:270
hecmw_solver_misc
Definition: hecmw_solver_misc.f90:6
m_hecmw_comm_f::hecmw_update_r
subroutine hecmw_update_r(hecMESH, val, n, m)
Definition: hecmw_comm_f.F90:683
hecmw_jad_type::hecmw_jad_matvec
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444