FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_66.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7  use hecmw_util
8  implicit none
9 
10  private
11 
12  public :: hecmw_matvec_66
13  public :: hecmw_matresid_66
14  public :: hecmw_rel_resid_l2_66
15  public :: hecmw_tvec_66
16  public :: hecmw_ttvec_66
17  public :: hecmw_ttmattvec_66
18 
19 contains
20 
21  !C
22  !C***
23  !C*** hecmw_matvec_66
24  !C***
25  !C
26  subroutine hecmw_matvec_66 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
28  use m_hecmw_comm_f
30  use hecmw_jad_type
31  use hecmw_tuning_fx
32  !$ use omp_lib
33 
34  implicit none
35  type (hecmwst_local_mesh), intent(in) :: hecmesh
36  type (hecmwst_matrix), intent(in), target :: hecmat
37  real(kind=kreal), intent(in) :: x(:)
38  real(kind=kreal), intent(out) :: y(:)
39  real(kind=kreal), intent(inout) :: time_ax
40  real(kind=kreal), intent(inout), optional :: commtime
41 
42  real(kind=kreal) :: start_time, end_time, tcomm
43  integer(kind=kint) :: i, j, js, je, in
44  real(kind=kreal) :: yv1, yv2, yv3, x1, x2, x3
45  real(kind=kreal) :: yv4, yv5, yv6, x4, x5, x6
46 
47  integer(kind=kint) :: n, np
48  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
49  real(kind=kreal), pointer :: al(:), au(:), d(:)
50 
51  ! added for turning >>>
52  integer, parameter :: numofblockperthread = 100
53  logical, save :: isfirst = .true.
54  integer, save :: numofthread = 1
55  integer, save, allocatable :: startpos(:), endpos(:)
56  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
57  integer(kind=kint) :: threadnum, blocknum, numofblock
58  integer(kind=kint) :: numofelement, elementcount, blockindex
59  real(kind=kreal) :: numofelementperblock
60  ! <<< added for turning
61 
62  if (hecmw_mat_get_usejad(hecmat).ne.0) then
63  tcomm = 0.d0
64  start_time = hecmw_wtime()
65  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
66  end_time = hecmw_wtime()
67  time_ax = time_ax + end_time - start_time - tcomm
68  if (present(commtime)) commtime = commtime + tcomm
69  else
70 
71  n = hecmat%N
72  np = hecmat%NP
73  indexl => hecmat%indexL
74  indexu => hecmat%indexU
75  iteml => hecmat%itemL
76  itemu => hecmat%itemU
77  al => hecmat%AL
78  au => hecmat%AU
79  d => hecmat%D
80 
81  ! added for turning >>>
82  if (isfirst .eqv. .true.) then
83  !$ numOfThread = omp_get_max_threads()
84  numofblock = numofthread * numofblockperthread
85  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
86  numofelement = n + indexl(n) + indexu(n)
87  numofelementperblock = dble(numofelement) / numofblock
88  blocknum = 0
89  elementcount = 0
90  startpos(blocknum) = 1
91  do i= 1, n
92  elementcount = elementcount + 1
93  elementcount = elementcount + (indexl(i) - indexl(i-1))
94  elementcount = elementcount + (indexu(i) - indexu(i-1))
95  if (elementcount > (blocknum + 1) * numofelementperblock) then
96  endpos(blocknum) = i
97  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
98  ! startPos(blockNum), endPos(blockNum)
99  blocknum = blocknum + 1
100  startpos(blocknum) = i + 1
101  if (blocknum == (numofblock - 1)) exit
102  endif
103  enddo
104  endpos(blocknum) = n
105  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
106  ! startPos(blockNum), endPos(blockNum)
107  ! for irregular data
108  do i= blocknum+1, numofblock-1
109  startpos(i) = n
110  endpos(i) = n-1
111  ! write(9000+hecMESH%my_rank,*) mod(i, numOfThread), &
112  ! startPos(i), endPos(i)
113  end do
114 
116  sectorcachesize0, sectorcachesize1)
117 
118  isfirst = .false.
119  endif
120  ! <<< added for turning
121 
122  start_time= hecmw_wtime()
123  call hecmw_update_r (hecmesh, x, np, 6)
124  end_time= hecmw_wtime()
125  if (present(commtime)) commtime = commtime + end_time - start_time
126 
127  start_time = hecmw_wtime()
128 
129  !call fapp_start("loopInMatvec66", 1, 0)
130  !call start_collection("loopInMatvec66")
131 
132  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
133  !OCL CACHE_SUBSECTOR_ASSIGN(X)
134 
135  !$OMP PARALLEL DEFAULT(NONE) &
136  !$OMP&PRIVATE(i,X1,X2,X3,X4,X5,X6,YV1,YV2,YV3,YV4,YV5,YV6,jS,jE,j,in,threadNum,blockNum,blockIndex) &
137  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread)
138  threadnum = 0
139  !$ threadNum = omp_get_thread_num()
140  do blocknum = 0 , numofblockperthread - 1
141  blockindex = blocknum * numofthread + threadnum
142  do i = startpos(blockindex), endpos(blockindex)
143  x1= x(6*i-5)
144  x2= x(6*i-4)
145  x3= x(6*i-3)
146  x4= x(6*i-2)
147  x5= x(6*i-1)
148  x6= x(6*i )
149  yv1= d(36*i-35)*x1 + d(36*i-34)*x2 + d(36*i-33)*x3 + d(36*i-32)*x4 + d(36*i-31)*x5 + d(36*i-30)*x6
150  yv2= d(36*i-29)*x1 + d(36*i-28)*x2 + d(36*i-27)*x3 + d(36*i-26)*x4 + d(36*i-25)*x5 + d(36*i-24)*x6
151  yv3= d(36*i-23)*x1 + d(36*i-22)*x2 + d(36*i-21)*x3 + d(36*i-20)*x4 + d(36*i-19)*x5 + d(36*i-18)*x6
152  yv4= d(36*i-17)*x1 + d(36*i-16)*x2 + d(36*i-15)*x3 + d(36*i-14)*x4 + d(36*i-13)*x5 + d(36*i-12)*x6
153  yv5= d(36*i-11)*x1 + d(36*i-10)*x2 + d(36*i-9 )*x3 + d(36*i-8 )*x4 + d(36*i-7 )*x5 + d(36*i-6 )*x6
154  yv6= d(36*i-5 )*x1 + d(36*i-4 )*x2 + d(36*i-3 )*x3 + d(36*i-2 )*x4 + d(36*i-1 )*x5 + d(36*i )*x6
155 
156  js= indexl(i-1) + 1
157  je= indexl(i )
158  do j= js, je
159  in = iteml(j)
160  x1= x(6*in-5)
161  x2= x(6*in-4)
162  x3= x(6*in-3)
163  x4= x(6*in-2)
164  x5= x(6*in-1)
165  x6= x(6*in )
166  yv1= yv1 + al(36*j-35)*x1 + al(36*j-34)*x2 + al(36*j-33)*x3 + al(36*j-32)*x4 + al(36*j-31)*x5 + al(36*j-30)*x6
167  yv2= yv2 + al(36*j-29)*x1 + al(36*j-28)*x2 + al(36*j-27)*x3 + al(36*j-26)*x4 + al(36*j-25)*x5 + al(36*j-24)*x6
168  yv3= yv3 + al(36*j-23)*x1 + al(36*j-22)*x2 + al(36*j-21)*x3 + al(36*j-20)*x4 + al(36*j-19)*x5 + al(36*j-18)*x6
169  yv4= yv4 + al(36*j-17)*x1 + al(36*j-16)*x2 + al(36*j-15)*x3 + al(36*j-14)*x4 + al(36*j-13)*x5 + al(36*j-12)*x6
170  yv5= yv5 + al(36*j-11)*x1 + al(36*j-10)*x2 + al(36*j-9 )*x3 + al(36*j-8 )*x4 + al(36*j-7 )*x5 + al(36*j-6 )*x6
171  yv6= yv6 + al(36*j-5 )*x1 + al(36*j-4 )*x2 + al(36*j-3 )*x3 + al(36*j-2 )*x4 + al(36*j-1 )*x5 + al(36*j )*x6
172  enddo
173  js= indexu(i-1) + 1
174  je= indexu(i )
175  do j= js, je
176  in = itemu(j)
177  x1= x(6*in-5)
178  x2= x(6*in-4)
179  x3= x(6*in-3)
180  x4= x(6*in-2)
181  x5= x(6*in-1)
182  x6= x(6*in )
183  yv1= yv1 + au(36*j-35)*x1 + au(36*j-34)*x2 + au(36*j-33)*x3 + au(36*j-32)*x4 + au(36*j-31)*x5 + au(36*j-30)*x6
184  yv2= yv2 + au(36*j-29)*x1 + au(36*j-28)*x2 + au(36*j-27)*x3 + au(36*j-26)*x4 + au(36*j-25)*x5 + au(36*j-24)*x6
185  yv3= yv3 + au(36*j-23)*x1 + au(36*j-22)*x2 + au(36*j-21)*x3 + au(36*j-20)*x4 + au(36*j-19)*x5 + au(36*j-18)*x6
186  yv4= yv4 + au(36*j-17)*x1 + au(36*j-16)*x2 + au(36*j-15)*x3 + au(36*j-14)*x4 + au(36*j-13)*x5 + au(36*j-12)*x6
187  yv5= yv5 + au(36*j-11)*x1 + au(36*j-10)*x2 + au(36*j-9 )*x3 + au(36*j-8 )*x4 + au(36*j-7 )*x5 + au(36*j-6 )*x6
188  yv6= yv6 + au(36*j-5 )*x1 + au(36*j-4 )*x2 + au(36*j-3 )*x3 + au(36*j-2 )*x4 + au(36*j-1 )*x5 + au(36*j )*x6
189  enddo
190  y(6*i-5)= yv1
191  y(6*i-4)= yv2
192  y(6*i-3)= yv3
193  y(6*i-2)= yv4
194  y(6*i-1)= yv5
195  y(6*i )= yv6
196  enddo
197  enddo
198  !$OMP END PARALLEL
199 
200  !OCL END_CACHE_SUBSECTOR
201  !OCL END_CACHE_SECTOR_SIZE
202 
203  !call stop_collection("loopInMatvec66")
204  !call fapp_stop("loopInMatvec66", 1, 0)
205 
206  end_time = hecmw_wtime()
207  time_ax = time_ax + end_time - start_time
208 
209  endif
210  end subroutine hecmw_matvec_66
211 
212  !C
213  !C***
214  !C*** hecmw_matresid_66
215  !C***
216  !C
217  subroutine hecmw_matresid_66 (hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
219  implicit none
220  type (hecmwst_local_mesh), intent(in) :: hecmesh
221  type (hecmwst_matrix), intent(in) :: hecmat
222  real(kind=kreal), intent(in) :: x(:), b(:)
223  real(kind=kreal), intent(out) :: r(:)
224  real(kind=kreal), intent(inout) :: time_ax
225  real(kind=kreal), intent(inout), optional :: commtime
226 
227  integer(kind=kint) :: i
228  real(kind=kreal) :: tcomm
229 
230  tcomm = 0.d0
231  call hecmw_matvec_66 (hecmesh, hecmat, x, r, time_ax, tcomm)
232  if (present(commtime)) commtime = commtime + tcomm
233  do i = 1, hecmat%N * 6
234  r(i) = b(i) - r(i)
235  enddo
236 
237  end subroutine hecmw_matresid_66
238 
239  !C
240  !C***
241  !C*** hecmw_rel_resid_L2_66
242  !C***
243  !C
244  function hecmw_rel_resid_l2_66 (hecMESH, hecMAT, time_Ax, COMMtime)
247  implicit none
248  real(kind=kreal) :: hecmw_rel_resid_l2_66
249  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
250  type ( hecmwst_matrix ), intent(in) :: hecmat
251  real(kind=kreal), intent(inout) :: time_ax
252  real(kind=kreal), intent(inout), optional :: commtime
253 
254  real(kind=kreal), allocatable :: r(:)
255  real(kind=kreal) :: bnorm2, rnorm2
256  real(kind=kreal) :: tcomm
257 
258  allocate(r(hecmat%NDOF*hecmat%NP))
259 
260  tcomm = 0.d0
261  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, &
262  hecmat%B, hecmat%B, bnorm2, tcomm)
263  if (bnorm2 == 0.d0) then
264  bnorm2 = 1.d0
265  endif
266  call hecmw_matresid_66(hecmesh, hecmat, hecmat%X, hecmat%B, r, time_ax, tcomm)
267  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
268  hecmw_rel_resid_l2_66 = sqrt(rnorm2 / bnorm2)
269 
270  if (present(commtime)) commtime = commtime + tcomm
271 
272  deallocate(r)
273  end function hecmw_rel_resid_l2_66
274 
275  !C
276  !C***
277  !C*** hecmw_Tvec_66
278  !C***
279  !C
280  subroutine hecmw_tvec_66 (hecMESH, X, Y, COMMtime)
282  use m_hecmw_comm_f
283  implicit none
284  type (hecmwst_local_mesh), intent(in) :: hecmesh
285  real(kind=kreal), intent(in) :: x(:)
286  real(kind=kreal), intent(out) :: y(:)
287  real(kind=kreal), intent(inout) :: commtime
288 
289  real(kind=kreal) :: start_time, end_time
290  integer(kind=kint) :: i
291 
292  start_time= hecmw_wtime()
293  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 6)
294  end_time= hecmw_wtime()
295  commtime = commtime + end_time - start_time
296 
297  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
298  y(i)= x(i)
299  enddo
300 
301  ! do i= 1, hecMESH%mpc%n_mpc
302  ! k = hecMESH%mpc%mpc_index(i-1) + 1
303  ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
304  ! Y(kk) = 0.d0
305  ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
306  ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
307  ! Y(kk) = Y(kk) - hecMESH%mpc%mpc_val(j) * X(jj)
308  ! enddo
309  ! enddo
310 
311  end subroutine hecmw_tvec_66
312 
313  !C
314  !C***
315  !C*** hecmw_Ttvec_66
316  !C***
317  !C
318  subroutine hecmw_ttvec_66 (hecMESH, X, Y, COMMtime)
320  use m_hecmw_comm_f
321  implicit none
322  type (hecmwst_local_mesh), intent(in) :: hecmesh
323  real(kind=kreal), intent(in) :: x(:)
324  real(kind=kreal), intent(out) :: y(:)
325  real(kind=kreal), intent(inout) :: commtime
326 
327  real(kind=kreal) :: start_time, end_time
328  integer(kind=kint) :: i
329 
330  start_time= hecmw_wtime()
331  call hecmw_update_r (hecmesh, x, hecmesh%n_node, 6)
332  end_time= hecmw_wtime()
333  commtime = commtime + end_time - start_time
334 
335  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
336  y(i)= x(i)
337  enddo
338 
339  ! do i= 1, hecMESH%mpc%n_mpc
340  ! k = hecMESH%mpc%mpc_index(i-1) + 1
341  ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
342  ! Y(kk) = 0.d0
343  ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
344  ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
345  ! Y(jj) = Y(jj) - hecMESH%mpc%mpc_val(j) * X(kk)
346  ! enddo
347  ! enddo
348 
349  end subroutine hecmw_ttvec_66
350 
351  !C
352  !C***
353  !C*** hecmw_TtmatTvec_66
354  !C***
355  !C
356  subroutine hecmw_ttmattvec_66 (hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
358  implicit none
359  type (hecmwst_local_mesh), intent(in) :: hecmesh
360  type (hecmwst_matrix), intent(in) :: hecmat
361  real(kind=kreal), intent(in) :: x(:)
362  real(kind=kreal), intent(out) :: y(:), w(:)
363  real(kind=kreal), intent(inout) :: time_ax
364  real(kind=kreal), intent(inout) :: commtime
365 
366  call hecmw_tvec_66(hecmesh, x, y, commtime)
367  call hecmw_matvec_66(hecmesh, hecmat, y, w, time_ax, commtime)
368  call hecmw_ttvec_66(hecmesh, w, y, commtime)
369 
370  end subroutine hecmw_ttmattvec_66
371 
372 
373 end module hecmw_solver_las_66
hecmw_matrix_misc::hecmw_mat_get_usejad
integer(kind=kint) function, public hecmw_mat_get_usejad(hecMAT)
Definition: hecmw_matrix_misc.f90:519
hecmw_util::hecmw_wtime
real(kind=kreal) function hecmw_wtime()
Definition: hecmw_util_f.F90:549
hecmw_solver_las_66::hecmw_rel_resid_l2_66
real(kind=kreal) function, public hecmw_rel_resid_l2_66(hecMESH, hecMAT, time_Ax, COMMtime)
Definition: hecmw_solver_las_66.f90:245
hecmw_solver_las_66
Definition: hecmw_solver_las_66.f90:6
hecmw_jad_type
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
hecmw_tuning_fx
Definition: hecmw_tuning_fx.f90:6
hecmw_tuning_fx::hecmw_tuning_fx_calc_sector_cache
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
Definition: hecmw_tuning_fx.f90:25
hecmw_solver_misc::hecmw_innerproduct_r
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
Definition: hecmw_solver_misc.f90:47
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
hecmw_util::hecmwst_local_mesh
Definition: hecmw_util_f.F90:234
m_hecmw_comm_f
Definition: hecmw_comm_f.F90:6
hecmw_solver_las_66::hecmw_ttvec_66
subroutine, public hecmw_ttvec_66(hecMESH, X, Y, COMMtime)
Definition: hecmw_solver_las_66.f90:319
hecmw_solver_las_66::hecmw_matvec_66
subroutine, public hecmw_matvec_66(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
Definition: hecmw_solver_las_66.f90:27
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_solver_las_66::hecmw_matresid_66
subroutine, public hecmw_matresid_66(hecMESH, hecMAT, X, B, R, time_Ax, COMMtime)
Definition: hecmw_solver_las_66.f90:218
hecmw_solver_las_66::hecmw_ttmattvec_66
subroutine, public hecmw_ttmattvec_66(hecMESH, hecMAT, X, Y, W, time_Ax, COMMtime)
Definition: hecmw_solver_las_66.f90:357
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_solver_misc
Definition: hecmw_solver_misc.f90:6
hecmw_solver_las_66::hecmw_tvec_66
subroutine, public hecmw_tvec_66(hecMESH, X, Y, COMMtime)
Definition: hecmw_solver_las_66.f90:281
m_hecmw_comm_f::hecmw_update_r
subroutine hecmw_update_r(hecMESH, val, n, m)
Definition: hecmw_comm_f.F90:683
hecmw_jad_type::hecmw_jad_matvec
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444