FrontISTR  5.8.0
Large-scale structural analysis program with finit element method
hecmw_solver_GPBiCG.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C***
7 !C*** module hecmw_solver_GPBiCG
8 !C***
9 !
11 
12  private
13  public :: hecmw_solve_gpbicg
14 
15 contains
16  !C
17  !C*** hecmw_solve_GPBiCG
18  !C
19  subroutine hecmw_solve_gpbicg( hecMESH, hecMAT, ITER, RESID, error, &
20  & Tset, Tsol, Tcomm )
21  use hecmw_util
23  use m_hecmw_comm_f
28  use hecmw_precond
29 
30  implicit none
31 
32  type(hecmwst_local_mesh) :: hecmesh
33  type(hecmwst_matrix) :: hecmat
34  integer(kind=kint ), intent(inout):: iter, error
35  real (kind=kreal), intent(inout):: resid, tset, tsol, tcomm
36 
37  integer(kind=kint ) :: n, np, ndof, nndof
38  integer(kind=kint ) :: my_rank
39 
40  real (kind=kreal), pointer :: b(:)
41  real (kind=kreal), pointer :: x(:)
42 
43  integer(kind=kint ) :: iterlog, timelog
44 
45  real(kind=kreal), dimension(:,:), allocatable :: ww
46 
47  real(kind=kreal), dimension(2) :: rr
48 
49  integer(kind=kint ) :: maxit
50  real (kind=kreal) :: tol
51  integer(kind=kint ) :: i,j
52  real (kind=kreal) :: s_time,s1_time,e_time,e1_time
53  real (kind=kreal) :: bnrm2
54  real (kind=kreal) :: rho,rho1,beta,alpha,dnrm2
55  real (kind=kreal) :: qsi,eta,coef1
56  real (kind=kreal) :: t_max,t_min,t_avg,t_sd
57 
58  integer(kind=kint), parameter :: r= 1
59  integer(kind=kint), parameter ::rt= 2
60  integer(kind=kint), parameter :: t= 3
61  integer(kind=kint), parameter ::tt= 4
62  integer(kind=kint), parameter ::t0= 5
63  integer(kind=kint), parameter :: p= 6
64  integer(kind=kint), parameter ::pt= 7
65  integer(kind=kint), parameter :: u= 8
66  integer(kind=kint), parameter ::w1= 9
67  integer(kind=kint), parameter :: y=10
68  integer(kind=kint), parameter :: z=11
69  integer(kind=kint), parameter ::wk=12
70  integer(kind=kint), parameter ::w2=13
71  integer(kind=kint), parameter ::zq=14
72 
73  integer(kind=kint), parameter :: n_iter_recompute_r= 20
74 
75  call hecmw_barrier(hecmesh)
76  s_time= hecmw_wtime()
77  !C
78  !C-- INIT.
79  n = hecmat%N
80  np = hecmat%NP
81  ndof = hecmat%NDOF
82  nndof = n * ndof
83  my_rank = hecmesh%my_rank
84  x => hecmat%X
85  b => hecmat%B
86 
87  iterlog = hecmw_mat_get_iterlog( hecmat )
88  timelog = hecmw_mat_get_timelog( hecmat )
89  maxit = hecmw_mat_get_iter( hecmat )
90  tol = hecmw_mat_get_resid( hecmat )
91 
92  error= 0
93  beta = 0.0d0
94 
95  allocate (ww(ndof*np,14))
96  ww= 0.d0
97 
98  !C
99  !C-- SCALING
100  call hecmw_solver_scaling_fw(hecmesh, hecmat, tcomm)
101 
102  !C===
103  !C +----------------------+
104  !C | SETUP PRECONDITIONER |
105  !C +----------------------+
106  !C===
107  call hecmw_precond_setup(hecmat, hecmesh, 0)
108 
109  !C
110  !C +----------------------+
111  !C | {r}= {b} - [A]{xini} |
112  !C +----------------------+
113  !C===
114  call hecmw_matresid(hecmesh, hecmat, x, b, ww(:,r), tcomm)
115 
116  call hecmw_copy_r(nndof, ww(:,r), ww(:,rt))
117  !C==
118  call hecmw_innerproduct_r(hecmesh, ndof, b, b, bnrm2, tcomm)
119  if (bnrm2.eq.0.d0) then
120  iter = 0
121  maxit = 0
122  resid = 0.d0
123  x = 0.d0
124  endif
125 
126  call hecmw_innerproduct_r(hecmesh, ndof, ww(:,rt), ww(:,r), rho, tcomm)
127 
128  e_time= hecmw_wtime()
129  if (timelog.eq.2) then
130  call hecmw_time_statistics(hecmesh, e_time - s_time, &
131  t_max, t_min, t_avg, t_sd)
132  if (hecmesh%my_rank.eq.0) then
133  write(*,*) 'Time solver setup'
134  write(*,*) ' Max :',t_max
135  write(*,*) ' Min :',t_min
136  write(*,*) ' Avg :',t_avg
137  write(*,*) ' Std Dev :',t_sd
138  endif
139  tset = t_max
140  else
141  tset = e_time - s_time
142  endif
143  !C===
144 
145  !C
146  !C*************************************************************** ITERATIVE PROC.
147  !C
148  call hecmw_barrier(hecmesh)
149  s1_time= hecmw_wtime()
150  do iter= 1, maxit
151  !C
152  !C +----------------+
153  !C | {r}= [Minv]{r} |
154  !C +----------------+
155  !C===
156  call hecmw_copy_r(nndof, ww(:,r), ww(:,wk))
157 
158  call hecmw_precond_apply(hecmesh, hecmat, ww(:,wk), ww(:,r), ww(:,zq), tcomm)
159  !C===
160 
161  !C
162  !C +----------------------------------+
163  !C | {p} = {r} + BETA * ( {p} - {u} ) |
164  !C +----------------------------------+
165  !C===
166  if (iter.gt.1) then
167  call hecmw_axpy_r(nndof, -1.0d0, ww(:,u), ww(:,p))
168  call hecmw_xpay_r(nndof, beta, ww(:,r), ww(:,p))
169  else
170  call hecmw_copy_r(nndof, ww(:,r), ww(:,p))
171  endif
172  !C===
173 
174  !C
175  !C +--------------------------------+
176  !C | ALPHA= {r_tld}{r}/{r_tld} A{p} |
177  !C +--------------------------------+
178  !C===
179 
180  !C
181  !C-- calc. {p_tld}= A{p}
182  call hecmw_matvec(hecmesh, hecmat, ww(:,p), ww(:,pt), tcomm)
183 
184  !C
185  !C-- calc. ALPHA
186  call hecmw_innerproduct_r(hecmesh, ndof, ww(:,rt), ww(:,pt), rho1, tcomm)
187 
188  alpha= rho / rho1
189  !C===
190 
191  !C
192  !C +------------------------------------------+
193  !C | {y}= {t} - {r} - ALPHA{w} + ALPHA{p_tld} |
194  !C | {t}= {r} - ALPHA{p_tld} |
195  !C +------------------------------------------+
196  !C===
197  call hecmw_axpyz_r(nndof, -1.0d0, ww(:,w1), ww(:,pt), ww(:,y))
198  call hecmw_xpay_r (nndof, alpha, ww(:, t), ww(:, y))
199  call hecmw_axpy_r (nndof, -1.0d0, ww(:,wk), ww(:, y))
200  call hecmw_axpyz_r(nndof, -alpha, ww(:,pt), ww(:,wk), ww(:,t))
201  !C===
202 
203  !C
204  !C +-----------------------+
205  !C | {t_tld}= [A][Minv]{t} |
206  !C +-----------------------+
207  !C===
208 
209  !C
210  !C-- calc. {t_tld} and {t0} by [M] inversion
211  !C {W2} = [Minv]{p_tld}
212  !C
213  call hecmw_precond_apply(hecmesh, hecmat, ww(:,t), ww(:,tt), ww(:,zq), tcomm)
214  call hecmw_precond_apply(hecmesh, hecmat, ww(:,t0), ww(:,w2), ww(:,zq), tcomm)
215  call hecmw_copy_r(nndof, ww(:,w2), ww(:,t0))
216  call hecmw_precond_apply(hecmesh, hecmat, ww(:,pt), ww(:,w2), ww(:,zq), tcomm)
217 
218  !C===
219 
220  !C
221  !C-- calc. [A]{t_tld}
222  call hecmw_matvec(hecmesh, hecmat, ww(:,tt), ww(:,wk), tcomm)
223 
224  call hecmw_copy_r(nndof, ww(:,wk), ww(:,tt))
225  !C===
226 
227  !C
228  !C +-------------------+
229  !C | calc. QSI and ETA |
230  !C +-------------------+
231  !C===
232  !call pol_coef(iter, WW, T, TT, Y, QSI, ETA)
233  !call pol_coef_vanilla(iter, WW, T, TT, Y, QSI, ETA)
234  call pol_coef_vanilla2(iter, ww, t, tt, y, qsi, eta)
235  !C===
236 
237  !C
238  !C +----------------------------------------------------------+
239  !C | {u} = QSI [Minv]{pt} + ETA([Minv]{t0}-[Minv]{r}+BETA*{u} |
240  !C | {z} = QSI [Minv]{r} + ETA*{z} - ALPHA*{u} |
241  !C +----------------------------------------------------------+
242  !C===
243 
244  !C
245  !C-- compute. {u},{z}
246 
247  if (iter.gt.1) then
248  call hecmw_xpay_r (nndof, beta, ww(:,t0), ww(:,u))
249  call hecmw_axpy_r (nndof, -1.0d0, ww(:, r), ww(:,u))
250  call hecmw_axpby_r(nndof, qsi, eta, ww(:,w2), ww(:,u))
251  call hecmw_axpby_r(nndof, -alpha, eta, ww(:, u), ww(:,z))
252  call hecmw_axpy_r (nndof, qsi, ww(:,r), ww(:,z))
253  else
254  call hecmw_axpyz_r(nndof, -1.0d0, ww(:,r), ww(:,t0), ww(:,u))
255  call hecmw_axpby_r(nndof, qsi, eta, ww(:,w2), ww(:,u))
256  call hecmw_axpby_r(nndof, -alpha, eta, ww(:, u), ww(:,z))
257  call hecmw_axpy_r (nndof, qsi, ww(:,r), ww(:,z))
258  endif
259  !C===
260 
261  !C
262  !C +--------------------+
263  !C | update {x},{r},{w} |
264  !C +--------------------+
265  !C===
266  call hecmw_axpy_r(nndof, alpha, ww(:,p), x)
267  call hecmw_axpy_r(nndof, 1.0d0, ww(:,z), x)
268  call hecmw_copy_r(nndof, ww(:,t), ww(:,t0))
269  !C
270  !C--- recompute R sometimes
271  if ( mod(iter,n_iter_recompute_r)==0 ) then
272  call hecmw_matresid(hecmesh, hecmat, x, b, ww(:,r), tcomm)
273  else
274  call hecmw_axpyz_r(nndof, -eta, ww(:,y), ww(:,t), ww(:,r))
275  call hecmw_axpy_r (nndof, -qsi, ww(:,tt), ww(:,r))
276  endif
277 
278  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,r), ww(:,r), rr(1))
279  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,r), ww(:,rt), rr(2))
280  s_time= hecmw_wtime()
281  call hecmw_allreduce_r(hecmesh, rr, 2, hecmw_sum)
282  e_time= hecmw_wtime()
283  tcomm = tcomm + e_time - s_time
284  dnrm2 = rr(1)
285  coef1 = rr(2)
286 
287  beta = alpha*coef1 / (qsi*rho)
288  call hecmw_axpyz_r(nndof, beta, ww(:,pt), ww(:,tt), ww(:,w1))
289 
290  resid= dsqrt(dnrm2/bnrm2)
291  rho = coef1
292 
293  !C##### ITERATION HISTORY
294  if (my_rank.eq.0 .and. iterlog.eq.1) &
295  & write (*, 1000) iter, resid
296  1000 format (i5, 1pe16.6)
297  !C#####
298 
299  if (resid.le.tol ) then
300  if ( mod(iter,n_iter_recompute_r)==0 ) exit
301  !C----- recompute R to make sure it is really converged
302  call hecmw_matresid(hecmesh, hecmat, x, b, ww(:,r), tcomm)
303  call hecmw_innerproduct_r(hecmesh, ndof, ww(:,r), ww(:,r), dnrm2, tcomm)
304  resid= dsqrt(dnrm2/bnrm2)
305  if (resid.le.tol) exit
306  endif
307  if ( iter.eq.maxit ) error= hecmw_solver_error_noconv_maxit
308  !C===
309  enddo
310 
311  call hecmw_solver_scaling_bk(hecmat)
312  !C
313  !C-- INTERFACE data EXCHANGE
314 
315  s_time = hecmw_wtime()
316  call hecmw_update_r (hecmesh, x, hecmat%NP, hecmat%NDOF)
317  e_time = hecmw_wtime()
318  tcomm = tcomm + e_time - s_time
319 
320  deallocate (ww)
321  !call hecmw_precond_clear(hecMAT)
322 
323  e1_time= hecmw_wtime()
324  if (timelog.eq.2) then
325  call hecmw_time_statistics(hecmesh, e1_time - s1_time, &
326  t_max, t_min, t_avg, t_sd)
327  if (hecmesh%my_rank.eq.0) then
328  write(*,*) 'Time solver iterations'
329  write(*,*) ' Max :',t_max
330  write(*,*) ' Min :',t_min
331  write(*,*) ' Avg :',t_avg
332  write(*,*) ' Std Dev :',t_sd
333  endif
334  tsol = t_max
335  else
336  tsol = e1_time - s1_time
337  endif
338 
339  contains
340 
341  !C
342  !C*** pol_coef : computes QSI and ETA in original GPBiCG way
343  !C
344  subroutine pol_coef(iter, WW, T, TT, Y, QSI, ETA)
345  implicit none
346  integer(kind=kint), intent(in) :: iter
347  real(kind=kreal), intent(in) :: ww(:,:)
348  integer(kind=kint), intent(in) :: T, TT, Y
349  real(kind=kreal), intent(out) :: qsi, eta
350 
351  real(kind=kreal), dimension(5) :: cg
352  real(kind=kreal), dimension(2) :: eq
353  real(kind=kreal) :: delta
354 
355  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,y ), cg(1)) ! myu1
356  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,t ), cg(2)) ! omega2
357  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,t ), cg(3)) ! omega1
358  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,y ), cg(4)) ! nyu
359  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,tt), cg(5)) ! myu2
360  s_time= hecmw_wtime()
361  call hecmw_allreduce_r(hecmesh, cg, 5, hecmw_sum)
362  e_time= hecmw_wtime()
363  tcomm = tcomm + e_time - s_time
364 
365  if (iter.eq.1) then
366  eq(1)= cg(2)/cg(5) ! omega2 / myu2
367  eq(2)= 0.d0
368  else
369  delta= (cg(5)*cg(1)-cg(4)*cg(4)) ! myu1*myu2 - nyu^2
370  eq(1)= (cg(1)*cg(2)-cg(3)*cg(4)) / delta ! (myu1*omega2-nyu*omega1)/delta
371  eq(2)= (cg(5)*cg(3)-cg(4)*cg(2)) / delta ! (myu2*omega1-nyu*omega2)/delta
372  endif
373 
374  qsi= eq(1)
375  eta= eq(2)
376  end subroutine pol_coef
377 
378  !C
379  !C*** pol_coef_vanilla : computes QSI and ETA with vanilla strategy
380  !C see Fujino, Abe, Sugihara and Nakashima(2013) ISBN978-4-621-08741-1
381  !C
382  subroutine pol_coef_vanilla(iter, WW, T, TT, Y, QSI, ETA)
383  implicit none
384  integer(kind=kint), intent(in) :: iter
385  real(kind=kreal), intent(inout) :: ww(:,:)
386  integer(kind=kint), intent(in) :: T, TT, Y
387  real(kind=kreal), intent(out) :: qsi, eta
388 
389  real(kind=kreal), dimension(3) :: cg
390  real(kind=kreal) :: gamma1, gamma2
391  real(kind=kreal) :: c, c_abs
392 
393  real(kind=kreal), parameter :: omega = 0.707106781d0
394 
395  if (iter.eq.1) then
396  gamma1 = 0.d0
397  gamma2 = 0.d0
398  else
399  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,y ), cg(1)) ! myu
400  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,tt), cg(2)) ! nyu
401  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,t ), cg(3)) ! omega
402  s_time= hecmw_wtime()
403  call hecmw_allreduce_r(hecmesh, cg, 3, hecmw_sum)
404  e_time= hecmw_wtime()
405  tcomm = tcomm + e_time - s_time
406  gamma1 = cg(3)/cg(1) ! omega / myu
407  gamma2 = cg(2)/cg(1) ! nyu / myu
408  !!! COMMENTED OUT because no convergence obtained with following updates.
409  ! do j= 1, NNDOF
410  ! WW(j,T )= WW(j,T ) - gamma1*WW(j,Y)
411  ! WW(j,TT)= WW(j,TT) - gamma2*WW(j,Y)
412  ! enddo
413  endif
414 
415  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,t ), cg(1)) ! |r|^2
416  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,tt), cg(2)) ! |s|^2
417  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,tt), cg(3)) ! r.s
418  s_time= hecmw_wtime()
419  call hecmw_allreduce_r(hecmesh, cg, 3, hecmw_sum)
420  e_time= hecmw_wtime()
421  tcomm = tcomm + e_time - s_time
422 
423  c = cg(3) / dsqrt(cg(1)*cg(2))
424  c_abs = dabs(c)
425  if (c_abs > omega) then
426  qsi = c * dsqrt(cg(1)/cg(2))
427  else
428  ! QSI = (c / c_abs) * OMEGA * dsqrt(CG(1)/CG(2))
429  if (c >= 0.d0) then
430  qsi = omega * dsqrt(cg(1)/cg(2))
431  else
432  qsi = -omega * dsqrt(cg(1)/cg(2))
433  endif
434  endif
435  eta = gamma1 - qsi*gamma2
436  end subroutine pol_coef_vanilla
437 
438  !C
439  !C*** pol_coef_vanilla2 : optimized version of pol_coef_vanilla
440  !C
441  subroutine pol_coef_vanilla2(iter, WW, T, TT, Y, QSI, ETA)
442  implicit none
443  integer(kind=kint), intent(in) :: iter
444  real(kind=kreal), intent(inout) :: ww(:,:)
445  integer(kind=kint), intent(in) :: T, TT, Y
446  real(kind=kreal), intent(out) :: qsi, eta
447 
448  real(kind=kreal), dimension(6) :: cg
449  real(kind=kreal) :: gamma1, gamma2
450  real(kind=kreal) :: c, c_abs
451 
452  real(kind=kreal), parameter :: omega = 0.707106781d0
453 
454  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,t ), cg(1)) ! |r|^2
455  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,tt), cg(2)) ! |s|^2
456  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,tt), cg(3)) ! r.s
457 
458  if (iter.gt.1) then
459  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,y ), cg(4)) ! myu
460  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,tt), cg(5)) ! nyu
461  call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,t ), cg(6)) ! omega
462  s_time= hecmw_wtime()
463  call hecmw_allreduce_r(hecmesh, cg, 6, hecmw_sum)
464  e_time= hecmw_wtime()
465  tcomm = tcomm + e_time - s_time
466  gamma1 = cg(6)/cg(4) ! omega / myu
467  gamma2 = cg(5)/cg(4) ! nyu / myu
468  else
469  s_time= hecmw_wtime()
470  call hecmw_allreduce_r(hecmesh, cg, 3, hecmw_sum)
471  e_time= hecmw_wtime()
472  tcomm = tcomm + e_time - s_time
473  gamma1 = 0.d0
474  gamma2 = 0.d0
475  endif
476 
477  c = cg(3) / dsqrt(cg(1)*cg(2))
478  c_abs = dabs(c)
479  if (c_abs > omega) then
480  qsi = c * dsqrt(cg(1)/cg(2))
481  else
482  if (c >= 0.d0) then
483  qsi = omega * dsqrt(cg(1)/cg(2))
484  else
485  qsi = -omega * dsqrt(cg(1)/cg(2))
486  endif
487  endif
488  eta = gamma1 - qsi*gamma2
489  end subroutine pol_coef_vanilla2
490 
491  end subroutine hecmw_solve_gpbicg
492 
493 end module hecmw_solver_gpbicg
subroutine pol_coef(iter, WW, T, TT, Y, QSI, ETA)
real(kind=kreal) function, public hecmw_mat_get_resid(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_iterlog(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_timelog(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_iter(hecMAT)
subroutine, public hecmw_precond_setup(hecMAT, hecMESH, sym)
subroutine, public hecmw_precond_apply(hecMESH, hecMAT, R, Z, ZP, COMMtime)
subroutine, public hecmw_solve_gpbicg(hecMESH, hecMAT, ITER, RESID, error, Tset, Tsol, Tcomm)
subroutine, public hecmw_matresid(hecMESH, hecMAT, X, B, R, COMMtime)
subroutine, public hecmw_matvec(hecMESH, hecMAT, X, Y, COMMtime)
subroutine hecmw_xpay_r(n, alpha, X, Y)
subroutine hecmw_axpyz_r(n, alpha, X, Y, Z)
subroutine hecmw_innerproduct_r_nocomm(hecMESH, ndof, X, Y, sum)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine hecmw_axpby_r(n, alpha, beta, X, Y)
subroutine hecmw_axpy_r(n, alpha, X, Y)
subroutine hecmw_copy_r(n, X, Y)
subroutine hecmw_time_statistics(hecMESH, time, t_max, t_min, t_avg, t_sd)
subroutine, public hecmw_solver_scaling_fw(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_solver_scaling_bk(hecMAT)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=kint), parameter hecmw_sum
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_r(hecMESH, val, n, m)
subroutine hecmw_allreduce_r(hecMESH, val, n, ntag)
subroutine hecmw_barrier(hecMESH)
integer(kind=kint), parameter hecmw_solver_error_noconv_maxit