FrontISTR  5.9.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_66.F90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7  use hecmw_util
8  implicit none
9 
10  private
11 
12  public :: hecmw_matvec_66
13  public :: hecmw_matresid_66
14  public :: hecmw_rel_resid_l2_66
15  public :: hecmw_tvec_66
16  public :: hecmw_ttvec_66
17  public :: hecmw_ttmattvec_66
18 
19 contains
20 
21  !C
22  !C***
23  !C*** hecmw_matvec_66
24  !C***
25  !C
26  subroutine hecmw_matvec_66 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
27  use hecmw_util
28  use m_hecmw_comm_f
30  use hecmw_jad_type
31  use hecmw_tuning_fx
32  !$ use omp_lib
33 
34  implicit none
35  type (hecmwst_local_mesh), intent(in) :: hecmesh
36  type (hecmwst_matrix), intent(in), target :: hecmat
37  real(kind=kreal), intent(in) :: x(:)
38  real(kind=kreal), intent(out) :: y(:)
39  real(kind=kreal), intent(inout) :: time_ax
40  real(kind=kreal), intent(inout), optional :: commtime
41 
42  real(kind=kreal) :: start_time, end_time, tcomm
43  integer(kind=kint) :: i, j, js, je, in
44  real(kind=kreal) :: yv1, yv2, yv3, x1, x2, x3
45  real(kind=kreal) :: yv4, yv5, yv6, x4, x5, x6
46 
47  integer(kind=kint) :: n, np
48  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:), indexa(:), itema(:)
49  real(kind=kreal), pointer :: al(:), au(:), d(:), a(:)
50 
51  ! added for turning >>>
52  integer, parameter :: numofblockperthread = 100
53  logical, save :: isfirst = .true.
54  integer, save :: numofthread = 1
55  integer, save, allocatable :: startpos(:), endpos(:)
56  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
57  integer(kind=kint) :: threadnum, blocknum, numofblock
58  integer(kind=kint) :: numofelement, elementcount, blockindex
59  real(kind=kreal) :: numofelementperblock
60  ! <<< added for turning
61 
62  if (hecmw_mat_get_usejad(hecmat).ne.0) then
63  tcomm = 0.d0
64  start_time = hecmw_wtime()
65  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
66  end_time = hecmw_wtime()
67  time_ax = time_ax + end_time - start_time - tcomm
68  if (present(commtime)) commtime = commtime + tcomm
69  else
70 
71  n = hecmat%N
72  np = hecmat%NP
73  indexl => hecmat%indexL
74  indexu => hecmat%indexU
75  indexa => hecmat%indexA
76  iteml => hecmat%itemL
77  itemu => hecmat%itemU
78  itema => hecmat%itemA
79  al => hecmat%AL
80  au => hecmat%AU
81  d => hecmat%D
82  a => hecmat%A
83 
84  ! added for turning >>>
85 #ifndef _OPENACC
86  if (isfirst .eqv. .true.) then
87  !$ numOfThread = omp_get_max_threads()
88  numofblock = numofthread * numofblockperthread
89  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
90  numofelement = n + indexl(n) + indexu(n)
91  numofelementperblock = dble(numofelement) / numofblock
92  blocknum = 0
93  elementcount = 0
94  startpos(blocknum) = 1
95  do i= 1, n
96  elementcount = elementcount + 1
97  elementcount = elementcount + (indexl(i) - indexl(i-1))
98  elementcount = elementcount + (indexu(i) - indexu(i-1))
99  if (elementcount > (blocknum + 1) * numofelementperblock) then
100  endpos(blocknum) = i
101  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
102  ! startPos(blockNum), endPos(blockNum)
103  blocknum = blocknum + 1
104  startpos(blocknum) = i + 1
105  if (blocknum == (numofblock - 1)) exit
106  endif
107  enddo
108  endpos(blocknum) = n
109  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
110  ! startPos(blockNum), endPos(blockNum)
111  ! for irregular data
112  do i= blocknum+1, numofblock-1
113  startpos(i) = n
114  endpos(i) = n-1
115  ! write(9000+hecMESH%my_rank,*) mod(i, numOfThread), &
116  ! startPos(i), endPos(i)
117  end do
118 
120  sectorcachesize0, sectorcachesize1)
121 
122  isfirst = .false.
123  endif
124 #endif
125  ! <<< added for turning
126 
127  start_time= hecmw_wtime()
128  call hecmw_update_r (hecmesh, x, np, 6)
129  end_time= hecmw_wtime()
130  if (present(commtime)) commtime = commtime + end_time - start_time
131 
132  start_time = hecmw_wtime()
133 
134  !call fapp_start("loopInMatvec66", 1, 0)
135  !call start_collection("loopInMatvec66")
136 
137  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
138  !OCL CACHE_SUBSECTOR_ASSIGN(X)
139 
140 #ifdef _OPENACC
141  !$acc kernels
142  !$acc loop independent
143  do i = 1, n
144  x1= x(6*i-5)
145  x2= x(6*i-4)
146  x3= x(6*i-3)
147  x4= x(6*i-2)
148  x5= x(6*i-1)
149  x6= x(6*i )
150  yv1= 0
151  yv2= 0
152  yv3= 0
153  yv4= 0
154  yv5= 0
155  yv6= 0
156  js= indexa(i) + 1
157  je= indexa(i+1)
158  do j= js, je
159  in = itema(j)
160  x1= x(6*in-5)
161  x2= x(6*in-4)
162  x3= x(6*in-3)
163  x4= x(6*in-2)
164  x5= x(6*in-1)
165  x6= x(6*in )
166  yv1= yv1 + a(36*j-35)*x1 + a(36*j-34)*x2 + a(36*j-33)*x3 + a(36*j-32)*x4 + a(36*j-31)*x5 + a(36*j-30)*x6
167  yv2= yv2 + a(36*j-29)*x1 + a(36*j-28)*x2 + a(36*j-27)*x3 + a(36*j-26)*x4 + a(36*j-25)*x5 + a(36*j-24)*x6
168  yv3= yv3 + a(36*j-23)*x1 + a(36*j-22)*x2 + a(36*j-21)*x3 + a(36*j-20)*x4 + a(36*j-19)*x5 + a(36*j-18)*x6
169  yv4= yv4 + a(36*j-17)*x1 + a(36*j-16)*x2 + a(36*j-15)*x3 + a(36*j-14)*x4 + a(36*j-13)*x5 + a(36*j-12)*x6
170  yv5= yv5 + a(36*j-11)*x1 + a(36*j-10)*x2 + a(36*j-9 )*x3 + a(36*j-8 )*x4 + a(36*j-7 )*x5 + a(36*j-6 )*x6
171  yv6= yv6 + a(36*j-5 )*x1 + a(36*j-4 )*x2 + a(36*j-3 )*x3 + a(36*j-2 )*x4 + a(36*j-1 )*x5 + a(36*j )*x6
172  enddo
173  y(6*i-5)= yv1
174  y(6*i-4)= yv2
175  y(6*i-3)= yv3
176  y(6*i-2)= yv4
177  y(6*i-1)= yv5
178  y(6*i )= yv6
179  enddo
180  !$acc end kernels
181 #else
182  !$OMP PARALLEL DEFAULT(NONE) &
183  !$OMP&PRIVATE(i,X1,X2,X3,X4,X5,X6,YV1,YV2,YV3,YV4,YV5,YV6,jS,jE,j,in,threadNum,blockNum,blockIndex) &
184  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread)
185  threadnum = 0
186  !$ threadNum = omp_get_thread_num()
187  do blocknum = 0 , numofblockperthread - 1
188  blockindex = blocknum * numofthread + threadnum
189  do i = startpos(blockindex), endpos(blockindex)
190  x1= x(6*i-5)
191  x2= x(6*i-4)
192  x3= x(6*i-3)
193  x4= x(6*i-2)
194  x5= x(6*i-1)
195  x6= x(6*i )
196  yv1= d(36*i-35)*x1 + d(36*i-34)*x2 + d(36*i-33)*x3 + d(36*i-32)*x4 + d(36*i-31)*x5 + d(36*i-30)*x6
197  yv2= d(36*i-29)*x1 + d(36*i-28)*x2 + d(36*i-27)*x3 + d(36*i-26)*x4 + d(36*i-25)*x5 + d(36*i-24)*x6
198  yv3= d(36*i-23)*x1 + d(36*i-22)*x2 + d(36*i-21)*x3 + d(36*i-20)*x4 + d(36*i-19)*x5 + d(36*i-18)*x6
199  yv4= d(36*i-17)*x1 + d(36*i-16)*x2 + d(36*i-15)*x3 + d(36*i-14)*x4 + d(36*i-13)*x5 + d(36*i-12)*x6
200  yv5= d(36*i-11)*x1 + d(36*i-10)*x2 + d(36*i-9 )*x3 + d(36*i-8 )*x4 + d(36*i-7 )*x5 + d(36*i-6 )*x6
201  yv6= d(36*i-5 )*x1 + d(36*i-4 )*x2 + d(36*i-3 )*x3 + d(36*i-2 )*x4 + d(36*i-1 )*x5 + d(36*i )*x6
202 
203  js= indexl(i-1) + 1
204  je= indexl(i )
205  do j= js, je
206  in = iteml(j)
207  x1= x(6*in-5)
208  x2= x(6*in-4)
209  x3= x(6*in-3)
210  x4= x(6*in-2)
211  x5= x(6*in-1)
212  x6= x(6*in )
213  yv1= yv1 + al(36*j-35)*x1 + al(36*j-34)*x2 + al(36*j-33)*x3 + al(36*j-32)*x4 + al(36*j-31)*x5 + al(36*j-30)*x6
214  yv2= yv2 + al(36*j-29)*x1 + al(36*j-28)*x2 + al(36*j-27)*x3 + al(36*j-26)*x4 + al(36*j-25)*x5 + al(36*j-24)*x6
215  yv3= yv3 + al(36*j-23)*x1 + al(36*j-22)*x2 + al(36*j-21)*x3 + al(36*j-20)*x4 + al(36*j-19)*x5 + al(36*j-18)*x6
216  yv4= yv4 + al(36*j-17)*x1 + al(36*j-16)*x2 + al(36*j-15)*x3 + al(36*j-14)*x4 + al(36*j-13)*x5 + al(36*j-12)*x6
217  yv5= yv5 + al(36*j-11)*x1 + al(36*j-10)*x2 + al(36*j-9 )*x3 + al(36*j-8 )*x4 + al(36*j-7 )*x5 + al(36*j-6 )*x6
218  yv6= yv6 + al(36*j-5 )*x1 + al(36*j-4 )*x2 + al(36*j-3 )*x3 + al(36*j-2 )*x4 + al(36*j-1 )*x5 + al(36*j )*x6
219  enddo
220  js= indexu(i-1) + 1
221  je= indexu(i )
222  do j= js, je
223  in = itemu(j)
224  x1= x(6*in-5)
225  x2= x(6*in-4)
226  x3= x(6*in-3)
227  x4= x(6*in-2)
228  x5= x(6*in-1)
229  x6= x(6*in )
230  yv1= yv1 + au(36*j-35)*x1 + au(36*j-34)*x2 + au(36*j-33)*x3 + au(36*j-32)*x4 + au(36*j-31)*x5 + au(36*j-30)*x6
231  yv2= yv2 + au(36*j-29)*x1 + au(36*j-28)*x2 + au(36*j-27)*x3 + au(36*j-26)*x4 + au(36*j-25)*x5 + au(36*j-24)*x6
232  yv3= yv3 + au(36*j-23)*x1 + au(36*j-22)*x2 + au(36*j-21)*x3 + au(36*j-20)*x4 + au(36*j-19)*x5 + au(36*j-18)*x6
233  yv4= yv4 + au(36*j-17)*x1 + au(36*j-16)*x2 + au(36*j-15)*x3 + au(36*j-14)*x4 + au(36*j-13)*x5 + au(36*j-12)*x6
234  yv5= yv5 + au(36*j-11)*x1 + au(36*j-10)*x2 + au(36*j-9 )*x3 + au(36*j-8 )*x4 + au(36*j-7 )*x5 + au(36*j-6 )*x6
235  yv6= yv6 + au(36*j-5 )*x1 + au(36*j-4 )*x2 + au(36*j-3 )*x3 + au(36*j-2 )*x4 + au(36*j-1 )*x5 + au(36*j )*x6
236  enddo
237  y(6*i-5)= yv1
238  y(6*i-4)= yv2
239  y(6*i-3)= yv3
240  y(6*i-2)= yv4
241  y(6*i-1)= yv5
242  y(6*i )= yv6
243  enddo
244  enddo
245  !$OMP END PARALLEL
246 #endif
247 
248  !OCL END_CACHE_SUBSECTOR
249  !OCL END_CACHE_SECTOR_SIZE
250 
251  !call stop_collection("loopInMatvec66")
252  !call fapp_stop("loopInMatvec66", 1, 0)
253 
254  end_time = hecmw_wtime()
255  time_ax = time_ax + end_time - start_time
256 
257  endif
258  end subroutine hecmw_matvec_66
259 
260  !C
261  !C***
262  !C*** hecmw_matresid_66
263  !C***
264  !C
265  subroutine hecmw_matresid_66 (hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
266  use hecmw_util
267  implicit none
268  type (hecmwst_local_mesh), intent(in) :: hecmesh
269  type (hecmwst_matrix), intent(in) :: hecmat
270  real(kind=kreal), intent(in) :: x(:), b(:)
271  real(kind=kreal), intent(out) :: r(:)
272  real(kind=kreal), intent(inout) :: time_ax
273  real(kind=kreal), intent(inout), optional :: commtime
274 
275  integer(kind=kint) :: i
276  real(kind=kreal) :: tcomm
277 
278  tcomm = 0.d0
279  call hecmw_matvec_66 (hecmesh, hecmat, x, r, time_ax, tcomm)
280  if (present(commtime)) commtime = commtime + tcomm
281 #ifdef _OPENACC
282  !$acc kernels
283  !$acc loop independent
284 #endif
285  do i = 1, hecmat%N * 6
286  r(i) = b(i) - r(i)
287  enddo
288 #ifdef _OPENACC
289  !$acc end kernels
290 #endif
291 
292  end subroutine hecmw_matresid_66
293 
294  !C
295  !C***
296  !C*** hecmw_rel_resid_L2_66
297  !C***
298  !C
299  function hecmw_rel_resid_l2_66 (hecMESH, hecMAT, time_Ax, COMMtime)
300  use hecmw_util
302  implicit none
303  real(kind=kreal) :: hecmw_rel_resid_l2_66
304  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
305  type ( hecmwst_matrix ), intent(in) :: hecmat
306  real(kind=kreal), intent(inout) :: time_ax
307  real(kind=kreal), intent(inout), optional :: commtime
308 
309  real(kind=kreal), allocatable :: r(:)
310  real(kind=kreal) :: bnorm2, rnorm2
311  real(kind=kreal) :: tcomm
312 
313  allocate(r(hecmat%NDOF*hecmat%NP))
314 
315  tcomm = 0.d0
316  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, &
317  hecmat%B, hecmat%B, bnorm2, tcomm)
318  if (bnorm2 == 0.d0) then
319  bnorm2 = 1.d0
320  endif
321  call hecmw_matresid_66(hecmesh, hecmat, hecmat%X, hecmat%B, r, time_ax, tcomm)
322  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
323  hecmw_rel_resid_l2_66 = sqrt(rnorm2 / bnorm2)
324 
325  if (present(commtime)) commtime = commtime + tcomm
326 
327  deallocate(r)
328  end function hecmw_rel_resid_l2_66
329 
330  !C
331  !C***
332  !C*** hecmw_Tvec_66
333  !C***
334  !C
335  subroutine hecmw_tvec_66 (hecMESH, X, Y, COMMtime)
336  use hecmw_util
337  use m_hecmw_comm_f
338  implicit none
339  type (hecmwst_local_mesh), intent(in) :: hecmesh
340  real(kind=kreal), intent(in) :: x(:)
341  real(kind=kreal), intent(out) :: y(:)
342  real(kind=kreal), intent(inout) :: commtime
343 
344  real(kind=kreal) :: start_time, end_time
345  integer(kind=kint) :: i
346 
347  start_time= hecmw_wtime()
348  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 6)
349  end_time= hecmw_wtime()
350  commtime = commtime + end_time - start_time
351 
352  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
353  y(i)= x(i)
354  enddo
355 
356  ! do i= 1, hecMESH%mpc%n_mpc
357  ! k = hecMESH%mpc%mpc_index(i-1) + 1
358  ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
359  ! Y(kk) = 0.d0
360  ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
361  ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
362  ! Y(kk) = Y(kk) - hecMESH%mpc%mpc_val(j) * X(jj)
363  ! enddo
364  ! enddo
365 
366  end subroutine hecmw_tvec_66
367 
368  !C
369  !C***
370  !C*** hecmw_Ttvec_66
371  !C***
372  !C
373  subroutine hecmw_ttvec_66 (hecMESH, X, Y, COMMtime)
374  use hecmw_util
375  use m_hecmw_comm_f
376  implicit none
377  type (hecmwst_local_mesh), intent(in) :: hecmesh
378  real(kind=kreal), intent(in) :: x(:)
379  real(kind=kreal), intent(out) :: y(:)
380  real(kind=kreal), intent(inout) :: commtime
381 
382  real(kind=kreal) :: start_time, end_time
383  integer(kind=kint) :: i
384 
385  start_time= hecmw_wtime()
386  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 6)
387  end_time= hecmw_wtime()
388  commtime = commtime + end_time - start_time
389 
390  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
391  y(i)= x(i)
392  enddo
393 
394  ! do i= 1, hecMESH%mpc%n_mpc
395  ! k = hecMESH%mpc%mpc_index(i-1) + 1
396  ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
397  ! Y(kk) = 0.d0
398  ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
399  ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
400  ! Y(jj) = Y(jj) - hecMESH%mpc%mpc_val(j) * X(kk)
401  ! enddo
402  ! enddo
403 
404  end subroutine hecmw_ttvec_66
405 
406  !C
407  !C***
408  !C*** hecmw_TtmatTvec_66
409  !C***
410  !C
411  subroutine hecmw_ttmattvec_66 (hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
412  use hecmw_util
413  implicit none
414  type (hecmwst_local_mesh), intent(in) :: hecmesh
415  type (hecmwst_matrix), intent(in) :: hecmat
416  real(kind=kreal), intent(in) :: x(:)
417  real(kind=kreal), intent(out) :: y(:), w(:)
418  real(kind=kreal), intent(inout) :: time_ax
419  real(kind=kreal), intent(inout) :: commtime
420 
421  call hecmw_tvec_66(hecmesh, x, y, commtime)
422  call hecmw_matvec_66(hecmesh, hecmat, y, w, time_ax, commtime)
423  call hecmw_ttvec_66(hecmesh, w, y, commtime)
424 
425  end subroutine hecmw_ttmattvec_66
426 
427 
428 end module hecmw_solver_las_66
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
integer(kind=kint) function, public hecmw_mat_get_usejad(hecMAT)
subroutine, public hecmw_matresid_66(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
subroutine, public hecmw_tvec_66(hecMESH, X, Y, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_66(hecMESH, hecMAT, time_Ax, COMMtime)
subroutine, public hecmw_ttvec_66(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_matvec_66(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_ttmattvec_66(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_r(hecMESH, val, n, m)