FrontISTR  5.9.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_22.F90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7 
8  private
9 
10  public :: hecmw_matvec_22
13  public :: hecmw_matresid_22
14  public :: hecmw_rel_resid_l2_22
15  public :: hecmw_tvec_22
16  public :: hecmw_ttvec_22
17  public :: hecmw_ttmattvec_22
18  public :: hecmw_mat_diag_sr_22
19 
20  ! ! for communication hiding in matvec
21  ! integer(kind=kint), save, allocatable :: index_o(:), item_o(:)
22  ! real(kind=kreal), save, allocatable :: A_o(:)
23  logical, save :: async_matvec_flg = .false.
24 contains
25 
26  !C
27  !C***
28  !C*** hecmw_matvec_22
29  !C***
30  !C
31  subroutine hecmw_matvec_22 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
32  use hecmw_util
34  implicit none
35  type (hecmwst_local_mesh), intent(in) :: hecmesh
36  type (hecmwst_matrix), intent(in), target :: hecmat
37  real(kind=kreal), intent(in) :: x(:)
38  real(kind=kreal), intent(out) :: y(:)
39  real(kind=kreal), intent(inout) :: time_ax
40  real(kind=kreal), intent(inout), optional :: commtime
41 
42  real(kind=kreal) :: tcomm
43  real(kind=kreal), allocatable :: wk(:)
44 
45  tcomm = 0.d0
46 
47  if (hecmw_mat_get_flag_mpcmatvec(hecmat) /= 0) then
48  allocate(wk(hecmat%NP * hecmat%NDOF))
49  call hecmw_ttmattvec_22(hecmesh, hecmat, x, y, wk, time_ax, tcomm)
50  deallocate(wk)
51  else
52  call hecmw_matvec_22_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
53  endif
54 
55  if (present(commtime)) commtime = commtime + tcomm
56  end subroutine hecmw_matvec_22
57 
58  subroutine hecmw_matvec_22_set_async (hecMAT)
59  use hecmw_util
60  implicit none
61  type (hecmwst_matrix), intent(in) :: hecmat
62  end subroutine hecmw_matvec_22_set_async
64  implicit none
65  end subroutine hecmw_matvec_22_unset_async
66 
67  !C
68  !C***
69  !C*** hecmw_matvec_22_inner ( private subroutine )
70  !C***
71  !C
72  subroutine hecmw_matvec_22_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
73  use hecmw_util
74  use m_hecmw_comm_f
76  use hecmw_jad_type
77  use hecmw_tuning_fx
78  !$ use omp_lib
79 
80  implicit none
81  type (hecmwst_local_mesh), intent(in) :: hecmesh
82  type (hecmwst_matrix), intent(in), target :: hecmat
83  real(kind=kreal), intent(in) :: x(:)
84  real(kind=kreal), intent(out) :: y(:)
85  real(kind=kreal), intent(inout) :: time_ax
86  real(kind=kreal), intent(inout), optional :: commtime
87 
88  real(kind=kreal) :: start_time, end_time, tcomm
89  integer(kind=kint) :: i, j, js, je, in
90  real(kind=kreal) :: yv1, yv2, x1, x2
91 
92  integer(kind=kint) :: n, np
93  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:), indexa(:), itema(:)
94  real(kind=kreal), pointer :: al(:), au(:), d(:), a(:)
95 
96  ! added for turning >>>
97  integer, parameter :: numofblockperthread = 100
98  logical, save :: isfirst = .true.
99  integer, save :: numofthread = 1
100  integer, save, allocatable :: startpos(:), endpos(:)
101  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
102  integer(kind=kint) :: threadnum, blocknum, numofblock
103  integer(kind=kint) :: numofelement, elementcount, blockindex
104  real(kind=kreal) :: numofelementperblock
105  ! <<< added for turning
106 
107  if (hecmw_jad_is_initialized().ne.0) then
108  tcomm = 0.d0
109  start_time = hecmw_wtime()
110  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
111  end_time = hecmw_wtime()
112  time_ax = time_ax + end_time - start_time - tcomm
113  if (present(commtime)) commtime = commtime + tcomm
114  else
115 
116  n = hecmat%N
117  np = hecmat%NP
118  indexl => hecmat%indexL
119  indexu => hecmat%indexU
120  indexa => hecmat%indexA
121  iteml => hecmat%itemL
122  itemu => hecmat%itemU
123  itema => hecmat%itemA
124  al => hecmat%AL
125  au => hecmat%AU
126  d => hecmat%D
127  a => hecmat%A
128 
129  ! added for turning >>>
130 #ifndef _OPENACC
131  if (.not. isfirst) then
132  numofblock = numofthread * numofblockperthread
133  if (endpos(numofblock-1) .ne. n-1) then
134  deallocate(startpos, endpos)
135  isfirst = .true.
136  endif
137  endif
138  if (isfirst) then
139  !$ numOfThread = omp_get_max_threads()
140  numofblock = numofthread * numofblockperthread
141  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
142  numofelement = n + indexl(n) + indexu(n)
143  numofelementperblock = dble(numofelement) / numofblock
144  blocknum = 0
145  elementcount = 0
146  startpos(blocknum) = 1
147  do i= 1, n
148  elementcount = elementcount + 1
149  elementcount = elementcount + (indexl(i) - indexl(i-1))
150  elementcount = elementcount + (indexu(i) - indexu(i-1))
151  if (elementcount > (blocknum + 1) * numofelementperblock) then
152  endpos(blocknum) = i
153  blocknum = blocknum + 1
154  startpos(blocknum) = i + 1
155  if (blocknum == (numofblock - 1)) exit
156  endif
157  enddo
158  endpos(blocknum) = n
159  ! for irregular data
160  do i= blocknum+1, numofblock-1
161  startpos(i) = n
162  endpos(i) = n-1
163  end do
164 
166  sectorcachesize0, sectorcachesize1)
167 
168  isfirst = .false.
169  endif
170 #endif
171  ! <<< added for turning
172 
173  start_time= hecmw_wtime()
174  call hecmw_update_r (hecmesh, x, np, 2)
175  end_time= hecmw_wtime()
176  if (present(commtime)) commtime = commtime + end_time - start_time
177 
178  start_time = hecmw_wtime()
179 
180  !call fapp_start("loopInMatvec33", 1, 0)
181  !call start_collection("loopInMatvec33")
182 
183  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
184  !OCL CACHE_SUBSECTOR_ASSIGN(X)
185 
186 #ifdef _OPENACC
187  !$acc kernels
188  !$acc loop independent
189  do i = 1,n
190  x1= x(2*i-1)
191  x2= x(2*i )
192  yv1= 0
193  yv2= 0
194  js= indexa(i) + 1
195  je= indexa(i+1)
196  do j= js, je
197  in = itema(j)
198  x1 = x(2*in-1)
199  x2 = x(2*in )
200  yv1= yv1 + a(4*j-3)*x1 + a(4*j-2)*x2
201  yv2= yv2 + a(4*j-1)*x1 + a(4*j )*x2
202  enddo
203  y(2*i-1)= yv1
204  y(2*i )= yv2
205  enddo
206  !$acc end kernels
207 #else
208  !$OMP PARALLEL DEFAULT(NONE) &
209  !$OMP&PRIVATE(i,X1,X2,YV1,YV2,jS,jE,j,in,threadNum,blockNum,blockIndex) &
210  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread,N,async_matvec_flg)
211  threadnum = 0
212  !$ threadNum = omp_get_thread_num()
213  do blocknum = 0 , numofblockperthread - 1
214  blockindex = blocknum * numofthread + threadnum
215  do i = startpos(blockindex), endpos(blockindex)
216  x1= x(2*i-1)
217  x2= x(2*i )
218  yv1= d(4*i-3)*x1 + d(4*i-2)*x2
219  yv2= d(4*i-1)*x1 + d(4*i )*x2
220 
221  js= indexl(i-1) + 1
222  je= indexl(i )
223  do j= js, je
224  in = iteml(j)
225  x1 = x(2*in-1)
226  x2 = x(2*in )
227  yv1= yv1 + al(4*j-3)*x1 + al(4*j-2)*x2
228  yv2= yv2 + al(4*j-1)*x1 + al(4*j )*x2
229  enddo
230  js= indexu(i-1) + 1
231  je= indexu(i )
232  do j= js, je
233  in = itemu(j)
234  x1 = x(2*in-1)
235  x2 = x(2*in )
236  yv1= yv1 + au(4*j-3)*x1 + au(4*j-2)*x2
237  yv2= yv2 + au(4*j-1)*x1 + au(4*j )*x2
238  enddo
239  y(2*i-1)= yv1
240  y(2*i )= yv2
241  enddo
242  enddo
243 
244  !$OMP END PARALLEL
245 #endif
246 
247  !OCL END_CACHE_SUBSECTOR
248  !OCL END_CACHE_SECTOR_SIZE
249 
250 
251  end_time = hecmw_wtime()
252  time_ax = time_ax + end_time - start_time
253  endif
254 
255  end subroutine hecmw_matvec_22_inner
256 
257 
258 
259 
260 
261 
262  !C
263  !C***
264  !C*** hecmw_matresid_22
265  !C***
266  !C
267  subroutine hecmw_matresid_22 (hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
268  use hecmw_util
269 
270  implicit none
271  real(kind=kreal) :: x(:), b(:), r(:)
272  type (hecmwst_matrix) :: hecmat
273  type (hecmwst_local_mesh) :: hecmesh
274  real(kind=kreal) :: time_ax
275  real(kind=kreal), optional :: commtime
276 
277  integer(kind=kint) :: i
278  real(kind=kreal) :: tcomm
279 
280  tcomm = 0.d0
281  call hecmw_matvec_22 (hecmesh, hecmat, x, r, time_ax, tcomm)
282  if (present(commtime)) commtime = commtime + tcomm
283 #ifdef _OPENACC
284  !$acc kernels
285  !$acc loop independent
286 #else
287  !$omp parallel default(none),private(i),shared(hecMAT,R,B)
288  !$omp do
289 #endif
290  do i = 1, hecmat%N * 2
291  r(i) = b(i) - r(i)
292  enddo
293 #ifdef _OPENACC
294  !$acc end kernels
295 #else
296  !$omp end do
297  !$omp end parallel
298 #endif
299  end subroutine hecmw_matresid_22
300 
301  !C
302  !C***
303  !C*** hecmw_rel_resid_L2_22
304  !C***
305  !C
306  function hecmw_rel_resid_l2_22 (hecMESH, hecMAT, time_Ax, COMMtime)
307  use hecmw_util
309 
310  implicit none
311  real(kind=kreal) :: hecmw_rel_resid_l2_22
312  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
313  type ( hecmwst_matrix ), intent(in) :: hecmat
314  real(kind=kreal) :: time_ax
315  real(kind=kreal), optional :: commtime
316 
317  real(kind=kreal) :: r(hecmat%NDOF*hecmat%NP)
318  real(kind=kreal) :: bnorm2, rnorm2
319  real(kind=kreal) :: tcomm
320 
321  tcomm = 0.d0
322  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, hecmat%B, hecmat%B, bnorm2, tcomm)
323  if (bnorm2 == 0.d0) then
324  bnorm2 = 1.d0
325  endif
326  call hecmw_matresid_22(hecmesh, hecmat, hecmat%X, hecmat%B, r, time_ax, tcomm)
327  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
328  if (present(commtime)) commtime = commtime + tcomm
329 
330  hecmw_rel_resid_l2_22 = sqrt(rnorm2 / bnorm2)
331 
332  end function hecmw_rel_resid_l2_22
333  !C
334  !C***
335  !C*** hecmw_Tvec_22
336  !C***
337  !C
338  subroutine hecmw_tvec_22 (hecMESH, X, Y, COMMtime)
339  use hecmw_util
340  use m_hecmw_comm_f
341  implicit none
342  type (hecmwst_local_mesh), intent(in) :: hecmesh
343  real(kind=kreal), intent(in) :: x(:)
344  real(kind=kreal), intent(out) :: y(:)
345  real(kind=kreal), intent(inout) :: commtime
346 
347  real(kind=kreal) :: start_time, end_time
348  integer(kind=kint) :: i, j, jj, k, kk
349 
350  start_time= hecmw_wtime()
351  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 2)
352  end_time= hecmw_wtime()
353  commtime = commtime + end_time - start_time
354 
355 #ifdef _OPENACC
356  !$acc kernels
357  !$acc loop independent
358 #else
359  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
360  !$omp do
361 #endif
362  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
363  y(i)= x(i)
364  enddo
365 #ifndef _OPENACC
366  !$omp end do
367 #endif
368 
369 #ifdef _OPENACC
370  !$acc loop independent
371 #else
372  !$omp do
373 #endif
374  outer: do i= 1, hecmesh%mpc%n_mpc
375  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
376  if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
377  enddo
378  k = hecmesh%mpc%mpc_index(i-1) + 1
379  kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
380  y(kk) = 0.d0
381  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
382  jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
383  y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
384  enddo
385  enddo outer
386 #ifdef _OPENACC
387  !$acc end kernels
388 #else
389  !$omp end do
390  !$omp end parallel
391 #endif
392 
393  end subroutine hecmw_tvec_22
394 
395  !C
396  !C***
397  !C*** hecmw_Ttvec_22
398  !C***
399  !C
400  subroutine hecmw_ttvec_22 (hecMESH, X, Y, COMMtime)
401  use hecmw_util
402  use m_hecmw_comm_f
403  implicit none
404  type (hecmwst_local_mesh), intent(in) :: hecmesh
405  real(kind=kreal), intent(in) :: x(:)
406  real(kind=kreal), intent(out) :: y(:)
407  real(kind=kreal), intent(inout) :: commtime
408 
409  real(kind=kreal) :: start_time, end_time
410  integer(kind=kint) :: i, j, jj, k, kk
411 
412  start_time= hecmw_wtime()
413  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 2)
414  end_time= hecmw_wtime()
415  commtime = commtime + end_time - start_time
416 
417 #ifdef _OPENACC
418  !$acc kernels
419  !$acc loop independent
420 #else
421  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
422  !$omp do
423 #endif
424  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
425  y(i)= x(i)
426  enddo
427 #ifndef _OPENACC
428  !$omp end do
429 #endif
430 
431 #ifdef _OPENACC
432  !$acc loop independent
433 #else
434  !$omp do
435 #endif
436  outer: do i= 1, hecmesh%mpc%n_mpc
437  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
438  if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
439  enddo
440  k = hecmesh%mpc%mpc_index(i-1) + 1
441  kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
442  y(kk) = 0.d0
443  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
444  jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
445 #ifdef _OPENACC
446  !$acc atomic update
447 #else
448  !omp atomic
449 #endif
450  y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
451  enddo
452  enddo outer
453 #ifdef _OPENACC
454  !$acc end kernels
455 #else
456  !$omp end do
457  !$omp end parallel
458 #endif
459 
460  end subroutine hecmw_ttvec_22
461 
462  !C
463  !C***
464  !C*** hecmw_TtmatTvec_22
465  !C***
466  !C
467  subroutine hecmw_ttmattvec_22 (hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
468  use hecmw_util
469  implicit none
470  type (hecmwst_local_mesh), intent(in) :: hecmesh
471  type (hecmwst_matrix), intent(in) :: hecmat
472  real(kind=kreal), intent(in) :: x(:)
473  real(kind=kreal), intent(out) :: y(:), w(:)
474  real(kind=kreal), intent(inout) :: time_ax
475  real(kind=kreal), intent(inout) :: commtime
476 
477  call hecmw_tvec_22(hecmesh, x, y, commtime)
478  call hecmw_matvec_22_inner(hecmesh, hecmat, y, w, time_ax, commtime)
479  call hecmw_ttvec_22(hecmesh, w, y, commtime)
480 
481  end subroutine hecmw_ttmattvec_22
482 
483 
484  !C
485  !C***
486  !C*** hecmw_mat_diag_sr_22
487  !C***
488  !C
489  subroutine hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
490  use hecmw_util
491  use m_hecmw_comm_f
492  implicit none
493  type (hecmwst_local_mesh), intent(in) :: hecmesh
494  type (hecmwst_matrix), intent(inout), target :: hecmat
495  real(kind=kreal), intent(inout), optional :: commtime
496  real(kind=kreal), allocatable :: w(:,:)
497  real(kind=kreal), pointer :: d(:)
498  integer(kind=kint) :: ip
499  real(kind=kreal) :: start_time, end_time
500  allocate(w(2*hecmat%NP,2))
501  d => hecmat%D
502  do ip= 1, hecmat%N
503  w(2*ip-1,1)= d(4*ip-3); w(2*ip-1,2)= d(4*ip-2);
504  w(2*ip-0,1)= d(4*ip-1); w(2*ip-0,2)= d(4*ip-0);
505  enddo
506  start_time= hecmw_wtime()
507  call hecmw_update_r (hecmesh, w(:,1), hecmat%NP, 2)
508  call hecmw_update_r (hecmesh, w(:,2), hecmat%NP, 2)
509  end_time= hecmw_wtime()
510  if (present(commtime)) commtime = commtime + end_time - start_time
511  do ip= hecmat%N+1, hecmat%NP
512  d(4*ip-3)= w(2*ip-1,1); d(4*ip-2)= w(2*ip-1,2);
513  d(4*ip-1)= w(2*ip-0,1); d(4*ip-0)= w(2*ip-0,2);
514  enddo
515  deallocate(w)
516  end subroutine hecmw_mat_diag_sr_22
517 end module hecmw_solver_las_22
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
integer(kind=kint) function, public hecmw_jad_is_initialized()
Definition: hecmw_jadm.f90:56
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
subroutine, public hecmw_matresid_22(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
subroutine, public hecmw_tvec_22(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_ttmattvec_22(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_22(hecMESH, hecMAT, time_Ax, COMMtime)
subroutine, public hecmw_matvec_22(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_ttvec_22(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_matvec_22_set_async(hecMAT)
subroutine, public hecmw_matvec_22_unset_async
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_r(hecMESH, val, n, m)