FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_local_matrix.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7  use hecmw_util
9 
10  private
11  public :: hecmwst_local_matrix
12  public :: hecmw_localmat_write
13  public :: hecmw_localmat_blocking
14  public :: hecmw_localmat_free
15  public :: hecmw_localmat_mulvec
16  public :: hecmw_trimatmul_ttkt
18  public :: hecmw_trimatmul_ttkt_mpc
19  public :: hecmw_localmat_transpose
20  public :: hecmw_localmat_assemble
21  public :: hecmw_localmat_add
24  public :: hecmw_localmat_multmat
27 
28  type hecmwst_local_matrix
29  integer :: nr, nc, nnz, ndof
30  integer(kind=kint), pointer :: index(:)
31  integer(kind=kint), pointer :: item(:)
32  real(kind=kreal), pointer :: a(:)
33  end type hecmwst_local_matrix
34 
35  integer(kind=kint), parameter :: cNCOL_ITEM = 3
36  integer(kind=kint), parameter :: cLID = 1
37  integer(kind=kint), parameter :: cRANK = 2
38  integer(kind=kint), parameter :: cGID = 3
39 
40  integer(kind=kint), parameter :: DEBUG = 0
41  integer(kind=kint), parameter :: DEBUG_MATRIX = 0
42  integer(kind=kint), parameter :: TIMER = 0
43 
44 contains
45 
46  subroutine hecmw_localmat_write(Tmat,iunit)
47  implicit none
48  type (hecmwst_local_matrix), intent(in) :: tmat
49  integer(kind=kint), intent(in) :: iunit
50  integer(kind=kint) :: nr, nc, nnz, ndof, ndof2, i, js, je, j, jj
51  character(len=64) :: fmt
52  nr=tmat%nr
53  nc=tmat%nc
54  nnz=tmat%nnz
55  ndof=tmat%ndof
56  ndof2=ndof*ndof
57  write(iunit,'(a,4i10)') 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
58  write(iunit,'(a)') 'i, j, A'
59  write(fmt,'(a,i0,a)') '(',ndof,'f12.3)'
60  do i=1,nr
61  js=tmat%index(i-1)+1
62  je=tmat%index(i)
63  do j=js,je
64  jj=tmat%item(j)
65  if (ndof==1) then
66  write(iunit,'(2i10,f12.3)') i, jj, tmat%A(j)
67  else
68  write(iunit,'(2i10)') i, jj
69  write(iunit,fmt) tmat%A((j-1)*ndof2+1:j*ndof2)
70  endif
71  enddo
72  enddo
73  end subroutine hecmw_localmat_write
74 
75  subroutine hecmw_localmat_write_size(Tmat,iunit)
76  implicit none
77  type (hecmwst_local_matrix), intent(in) :: tmat
78  integer(kind=kint), intent(in) :: iunit
79  integer(kind=kint) :: nr, nc, nnz, ndof
80  nr=tmat%nr
81  nc=tmat%nc
82  nnz=tmat%nnz
83  ndof=tmat%ndof
84  write(iunit,'(a,4i10)') 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
85  end subroutine hecmw_localmat_write_size
86 
87  subroutine hecmw_localmat_write_ij(Tmat,iunit)
88  implicit none
89  type (hecmwst_local_matrix), intent(in) :: tmat
90  integer(kind=kint), intent(in) :: iunit
91  integer(kind=kint) :: nr, nc, nnz, ndof, i, js, je, j, jj
92  nr=tmat%nr
93  nc=tmat%nc
94  nnz=tmat%nnz
95  ndof=tmat%ndof
96  write(iunit,'(a,4i10)') 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
97  write(iunit,'(a)') 'i, j'
98  do i=1,nr
99  js=tmat%index(i-1)+1
100  je=tmat%index(i)
101  do j=js,je
102  jj=tmat%item(j)
103  write(iunit,'(2i10)') i, jj
104  enddo
105  enddo
106  end subroutine hecmw_localmat_write_ij
107 
108  subroutine hecmw_localmat_blocking(Tmat, ndof, BTmat)
109  implicit none
110  type (hecmwst_local_matrix), intent(in) :: tmat
111  integer, intent(in) :: ndof
112  type (hecmwst_local_matrix), intent(out) :: btmat
113  integer, allocatable :: iw(:)
114  integer :: ndof2, i, icnt, idof, idx, ls, le, l, j, jb, k, lb0, jdof, ks, ke
115  ndof2=ndof*ndof
116 
117  if (mod(tmat%nr, ndof) /= 0 .or. mod(tmat%nc, ndof) /= 0) then
118  write(0,*) tmat%nr, tmat%nc, ndof
119  stop 'ERROR: blocking_Tmat failed'
120  endif
121  btmat%nr=tmat%nr/ndof
122  btmat%nc=tmat%nc/ndof
123  btmat%ndof=ndof
124 
125  allocate(iw(btmat%nc))
126  allocate(btmat%index(0:btmat%nr))
127 
128  btmat%index(0)=0
129  do i=1,btmat%nr
130  icnt=0
131  do idof=1,ndof
132  idx=(i-1)*ndof+idof
133  ls=tmat%index(idx-1)+1
134  le=tmat%index(idx)
135  lcol: do l=ls,le
136  j=tmat%item(l)
137  jb=(j-1)/ndof+1
138  do k=1,icnt
139  if (iw(k)==jb) cycle lcol
140  enddo
141  icnt=icnt+1
142  iw(icnt)=jb
143  enddo lcol
144  enddo
145  btmat%index(i)=btmat%index(i-1)+icnt
146  enddo
147 
148  btmat%nnz=btmat%index(btmat%nr)
149  allocate(btmat%item(btmat%nnz))
150  allocate(btmat%A(btmat%nnz*ndof2))
151  btmat%A=0.d0
152 
153  do i=1,btmat%nr
154  icnt=0
155  do idof=1,ndof
156  idx=(i-1)*ndof+idof
157  ls=tmat%index(idx-1)+1
158  le=tmat%index(idx)
159  lcol2: do l=ls,le
160  j=tmat%item(l)
161  jb=(j-1)/ndof+1
162  do k=1,icnt
163  if (iw(k)==jb) cycle lcol2
164  enddo
165  icnt=icnt+1
166  iw(icnt)=jb
167  enddo lcol2
168  enddo
169  ! if (icnt /= BTmat%index(i)-BTmat%index(i-1)) stop 'ERROR: blocking Tmat'
170  ! ! call qsort(iw, 1, icnt)
171  lb0=btmat%index(i-1)
172  do k=1,icnt
173  btmat%item(lb0+k)=iw(k)
174  enddo
175  do idof=1,ndof
176  idx=(i-1)*ndof+idof
177  ls=tmat%index(idx-1)+1
178  le=tmat%index(idx)
179  lcol3: do l=ls,le
180  j=tmat%item(l)
181  jb=(j-1)/ndof+1
182  jdof=mod((j-1), ndof)+1
183  ks=btmat%index(i-1)+1
184  ke=btmat%index(i)
185  do k=ks,ke
186  if (btmat%item(k)==jb) then
187  btmat%A((k-1)*ndof2+(idof-1)*ndof+jdof)=tmat%A(l)
188  cycle lcol3
189  endif
190  enddo
191  stop 'ERROR: something wrong in blocking Tmat'
192  enddo lcol3
193  enddo
194  enddo
195  end subroutine hecmw_localmat_blocking
196 
197  subroutine hecmw_localmat_free(Tmat)
198  implicit none
199  type (hecmwst_local_matrix), intent(inout) :: tmat
200  deallocate(tmat%index)
201  if (associated(tmat%item)) deallocate(tmat%item)
202  if (associated(tmat%A)) deallocate(tmat%A)
203  tmat%nr=0
204  tmat%nc=0
205  tmat%nnz=0
206  tmat%ndof=0
207  end subroutine hecmw_localmat_free
208 
209  subroutine hecmw_trimatmul_ttkt(hecMESH, BTtmat, hecMAT, BTmat, &
210  iwS, num_lagrange, hecTKT)
212  implicit none
213  type (hecmwst_local_mesh), intent(inout) :: hecmesh
214  type (hecmwst_local_matrix), intent(inout) :: bttmat, btmat
215  type (hecmwst_matrix), intent(in) :: hecmat
216  integer(kind=kint), intent(in) :: iws(:)
217  integer(kind=kint), intent(in) :: num_lagrange
218  type (hecmwst_matrix), intent(inout) :: hectkt
219  if (hecmesh%n_neighbor_pe == 0) then
220  call hecmw_trimatmul_ttkt_serial(hecmesh, bttmat, hecmat, btmat, &
221  iws, num_lagrange, hectkt)
222  else
223  call hecmw_trimatmul_ttkt_parallel(hecmesh, bttmat, hecmat, btmat, &
224  iws, num_lagrange, hectkt)
225  endif
226  end subroutine hecmw_trimatmul_ttkt
227 
228  subroutine hecmw_trimatmul_ttkt_serial(hecMESH, BTtmat, hecMAT, BTmat, &
229  iwS, num_lagrange, hecTKT)
231  implicit none
232  type (hecmwst_local_mesh), intent(in) :: hecmesh
233  type (hecmwst_local_matrix), intent(in) :: bttmat, btmat
234  type (hecmwst_matrix), intent(in) :: hecmat
235  integer(kind=kint), intent(in) :: iws(:)
236  integer(kind=kint), intent(in) :: num_lagrange
237  type (hecmwst_matrix), intent(inout) :: hectkt
238  type (hecmwst_local_matrix) :: bttkt
239  real(kind=kreal) :: num
240 
241  ! perform three matrices multiplication for elimination
242  call trimatmul_ttkt(bttmat, hecmat, btmat, bttkt)
243  call debug_write_matrix(bttkt, 'BTtKT(MPC)', debug_matrix)
244 
245  ! place small numbers where the DOF is eliminated
246  !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
247  num = 1.d0
248  call place_num_on_diag(bttkt, iws, num_lagrange, num)
249  call debug_write_matrix(bttkt, 'BTtKT(MPC) (place 1.0 on slave diag)', debug_matrix)
250 
251  ! make_new HECMW matrix
252  call make_new_hecmat(hecmat, bttkt, hectkt)
253  call hecmw_localmat_free(bttkt)
254  end subroutine hecmw_trimatmul_ttkt_serial
255 
256  subroutine hecmw_trimatmul_ttkt_parallel(hecMESH, BTtmat, hecMAT, BTmat, &
257  iwS, num_lagrange, hecTKT)
259  implicit none
260  type (hecmwST_local_mesh), intent(inout) :: hecMESH
261  type (hecmwST_local_matrix), intent(inout) :: BTtmat, BTmat
262  type (hecmwST_matrix), intent(in) :: hecMAT
263  integer(kind=kint), intent(in) :: iwS(:)
264  integer(kind=kint), intent(in) :: num_lagrange
265  type (hecmwST_matrix), intent(inout) :: hecTKT
266  type (hecmwST_local_matrix) :: BKmat, BTtKmat, BTtKTmat
267  real(kind=kreal) :: num
268  real(kind=kreal) :: t0, t1
269 
270  ! perform three matrices multiplication for elimination
271  t0 = hecmw_wtime()
272  call hecmw_localmat_init_with_hecmat(bkmat, hecmat)
273  call debug_write_matrix(bkmat, 'BKmat (hecMAT)', debug_matrix)
274  t1 = hecmw_wtime()
275  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (1) : ",t1-t0
276 
277  t0 = hecmw_wtime()
278  call hecmw_localmat_multmat(bttmat, bkmat, hecmesh, bttkmat)
279  if (debug >= 2) write(0,*) ' DEBUG2: multiply Tt and K done'
280  call debug_write_matrix(bttkmat, 'BTtKmat', debug_matrix)
281  call hecmw_localmat_free(bkmat)
282  t1 = hecmw_wtime()
283  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (2) : ",t1-t0
284 
285  t0 = hecmw_wtime()
286  call hecmw_localmat_multmat(bttkmat, btmat, hecmesh, bttktmat)
287  if (debug >= 2) write(0,*) ' DEBUG2: multiply TtK and T done'
288  call debug_write_matrix(bttktmat, 'BTtKTmat', debug_matrix)
289  call hecmw_localmat_free(bttkmat)
290  t1 = hecmw_wtime()
291  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (3) : ",t1-t0
292 
293  t0 = hecmw_wtime()
294  ! place small numbers where the DOF is eliminated
295  !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
296  num = 1.d0
297  call place_num_on_diag(bttktmat, iws, num_lagrange, num)
298  if (debug >= 2) then
299  write(700+hecmw_comm_get_rank(),*) 'num_lagrange =', num_lagrange
300  if (debug >= 3) then
301  write(700+hecmw_comm_get_rank(),*) 'iwS(1:num_lagrange)'
302  write(700+hecmw_comm_get_rank(),*) iws(1:num_lagrange)
303  endif
304  endif
305  call debug_write_matrix(bttktmat, 'BTtKTmat (place 1.0 on slave diag)', debug_matrix)
306  t1 = hecmw_wtime()
307  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (4) : ",t1-t0
308 
309  t0 = hecmw_wtime()
310  ! make_new HECMW matrix
311  call make_new_hecmat(hecmat, bttktmat, hectkt)
312  call hecmw_localmat_free(bttktmat)
313  t1 = hecmw_wtime()
314  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (5) : ",t1-t0
315  end subroutine hecmw_trimatmul_ttkt_parallel
316 
317  subroutine trimatmul_ttkt(BTtmat, hecMAT, BTmat, BTtKT)
318  implicit none
319  type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat
320  type (hecmwST_matrix), intent(in) :: hecMAT
321  type (hecmwST_local_matrix), intent(out) :: BTtKT
322  integer :: nr, nc, ndof, ndof2, i, icnt, js, je, j, jj, ks, ke, k, kk
323  integer :: ls, le, l, ll, m, ms, me, mm
324  integer, allocatable :: iw(:)
325  real(kind=kreal), pointer :: ttp(:), kp(:), tp(:), ttktp(:)
326  ! real(kind=kreal) :: tsym_s, tsym_e, tnum_s, tnum_e
327 
328  nr=bttmat%nr
329  nc=btmat%nc
330  ndof=bttmat%ndof
331  ndof2=ndof*ndof
332 
333  bttkt%nr=nr
334  bttkt%nc=nc
335  bttkt%ndof=ndof
336  allocate(bttkt%index(0:nr))
337 
338  ! tsym_s = hecmw_wtime()
339 
340  !$omp parallel default(none), &
341  !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m), &
342  !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT)
343  allocate(iw(nc))
344  !$omp do
345  do i=1,nr
346  icnt=0
347  js=bttmat%index(i-1)+1
348  je=bttmat%index(i)
349  do j=js,je
350  jj=bttmat%item(j)
351  ! lower
352  ks=hecmat%indexL(jj-1)+1
353  ke=hecmat%indexL(jj)
354  do k=ks,ke
355  kk=hecmat%itemL(k)
356  ls=btmat%index(kk-1)+1
357  le=btmat%index(kk)
358  ll1: do l=ls,le
359  ll=btmat%item(l)
360  do m=1,icnt
361  if (iw(m)==ll) cycle ll1
362  enddo
363  icnt=icnt+1
364  iw(icnt)=ll
365  !if (i==1) write(0,*) 'l', icnt, jj, kk, ll
366  enddo ll1
367  enddo
368  ! diagonal
369  ls=btmat%index(jj-1)+1
370  le=btmat%index(jj)
371  ll2: do l=ls,le
372  ll=btmat%item(l)
373  do m=1,icnt
374  if (iw(m)==ll) cycle ll2
375  enddo
376  icnt=icnt+1
377  iw(icnt)=ll
378  !if (i==1) write(0,*) 'd', icnt, jj, kk, ll
379  enddo ll2
380  ! upper
381  ks=hecmat%indexU(jj-1)+1
382  ke=hecmat%indexU(jj)
383  do k=ks,ke
384  kk=hecmat%itemU(k)
385  ls=btmat%index(kk-1)+1
386  le=btmat%index(kk)
387  ll3: do l=ls,le
388  ll=btmat%item(l)
389  do m=1,icnt
390  if (iw(m)==ll) cycle ll3
391  enddo
392  icnt=icnt+1
393  iw(icnt)=ll
394  !if (i==1) write(0,*) 'u', icnt, jj, kk, ll
395  enddo ll3
396  enddo
397  enddo
398  if (icnt == 0) icnt=1
399  !if (i==1) write(0,*) iw(1:icnt)
400  bttkt%index(i)=icnt
401  enddo
402  !$omp end do
403  deallocate(iw)
404  !$omp end parallel
405 
406  ! tsym_e = hecmw_wtime()
407  ! write(0,*) 'tsym:',tsym_e-tsym_s
408 
409  bttkt%index(0)=0
410  do i=1,nr
411  bttkt%index(i)=bttkt%index(i-1)+bttkt%index(i)
412  enddo
413  !write(0,*) BTtKT%index(1:n)-BTtKT%index(0:n-1)
414 
415  bttkt%nnz=bttkt%index(nr)
416  allocate(bttkt%item(bttkt%nnz))
417  allocate(bttkt%A(bttkt%nnz*ndof2))
418  bttkt%item=0
419  bttkt%A=0.d0
420 
421  ! tnum_s = hecmw_wtime()
422 
423  !$omp parallel default(none), &
424  !$omp& private(i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m, &
425  !$omp& ms,me,mm,Ttp,Kp,Tp,TtKTp), &
426  !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT,ndof,ndof2)
427  !$omp do
428  do i=1,nr
429  icnt=0
430  ms=bttkt%index(i-1)+1
431  !me=BTtKT%index(i)
432  js=bttmat%index(i-1)+1
433  je=bttmat%index(i)
434  do j=js,je
435  jj=bttmat%item(j)
436  ttp=>bttmat%A((j-1)*ndof2+1:j*ndof2)
437  ! lower
438  ks=hecmat%indexL(jj-1)+1
439  ke=hecmat%indexL(jj)
440  do k=ks,ke
441  kk=hecmat%itemL(k)
442  kp=>hecmat%AL((k-1)*ndof2+1:k*ndof2)
443  ls=btmat%index(kk-1)+1
444  le=btmat%index(kk)
445  do l=ls,le
446  ll=btmat%item(l)
447  tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
448  me=ms-1+icnt
449  mm=-1
450  do m=ms,me
451  if (bttkt%item(m)==ll) mm=m
452  enddo
453  if (mm<0) then
454  icnt=icnt+1
455  mm=me+1
456  bttkt%item(mm)=ll
457  !if (i==1) write(0,*) 'l', mm, jj, kk, ll
458  endif
459  ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
460  call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
461  enddo
462  enddo
463  ! diagonal
464  kp=>hecmat%D((jj-1)*ndof2+1:jj*ndof2)
465  ls=btmat%index(jj-1)+1
466  le=btmat%index(jj)
467  do l=ls,le
468  ll=btmat%item(l)
469  tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
470  me=ms-1+icnt
471  mm=-1
472  do m=ms,me
473  if (bttkt%item(m)==ll) mm=m
474  enddo
475  if (mm<0) then
476  icnt=icnt+1
477  mm=me+1
478  bttkt%item(mm)=ll
479  !if (i==1) write(0,*) 'd', mm, jj, kk, ll
480  endif
481  ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
482  call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
483  enddo
484  ! upper
485  ks=hecmat%indexU(jj-1)+1
486  ke=hecmat%indexU(jj)
487  do k=ks,ke
488  kk=hecmat%itemU(k)
489  kp=>hecmat%AU((k-1)*ndof2+1:k*ndof2)
490  ls=btmat%index(kk-1)+1
491  le=btmat%index(kk)
492  do l=ls,le
493  ll=btmat%item(l)
494  tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
495  me=ms-1+icnt
496  mm=-1
497  do m=ms,me
498  if (bttkt%item(m)==ll) mm=m
499  enddo
500  if (mm<0) then
501  icnt=icnt+1
502  mm=me+1
503  bttkt%item(mm)=ll
504  !if (i==1) write(0,*) 'u', mm, jj, kk, ll
505  endif
506  ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
507  call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
508  enddo
509  enddo
510  enddo
511  if (icnt == 0) then
512  icnt=1
513  bttkt%item(ms)=i
514  endif
515  ! error check!
516  !write(0,*) BTtKT%item(ms:ms-1+icnt)
517  !write(0,*) BTtKT%index(i)-BTtKT%index(i-1), icnt
518  if (ms-1+icnt /= bttkt%index(i)) stop 'ERROR: trimatmul'
519  enddo
520  !$omp end do
521  !$omp end parallel
522 
523  ! tnum_e = hecmw_wtime()
524  ! write(0,*) 'tnum:',tnum_e-tnum_s
525  end subroutine trimatmul_ttkt
526 
527  subroutine blk_trimatmul_add(ndof, A, B, C, ABC)
528  implicit none
529  integer, intent(in) :: ndof
530  real(kind=kreal), intent(in) :: a(:), b(:), c(:)
531  real(kind=kreal), intent(inout) :: abc(:)
532  real(kind=kreal), allocatable :: ab(:)
533  integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
534 
535  ndof2=ndof*ndof
536  allocate(ab(ndof2))
537  ab=0.d0
538 
539  do i=1,ndof
540  i0=(i-1)*ndof
541  do j=1,ndof
542  ij=i0+j
543  j0=(j-1)*ndof
544  do k=1,ndof
545  ik=i0+k
546  jk=j0+k
547  ab(ik)=ab(ik)+a(ij)*b(jk)
548  enddo
549  enddo
550  enddo
551 
552  do i=1,ndof
553  i0=(i-1)*ndof
554  do j=1,ndof
555  ij=i0+j
556  j0=(j-1)*ndof
557  do k=1,ndof
558  ik=i0+k
559  jk=j0+k
560  abc(ik)=abc(ik)+ab(ij)*c(jk)
561  enddo
562  enddo
563  enddo
564 
565  deallocate(ab)
566  end subroutine blk_trimatmul_add
567 
568  subroutine place_num_on_diag(BTtKT, iwS, num_lagrange, num)
569  implicit none
570  type (hecmwST_local_matrix), intent(inout) :: BTtKT
571  integer(kind=kint), intent(in) :: iwS(:)
572  integer(kind=kint), intent(in) :: num_lagrange
573  real(kind=kreal), intent(in) :: num
574  integer(kind=kint) :: ndof, ndof2, ilag, i, idof, js, je, j, jj
575  integer(kind=kint) :: nmissing, k, ks, ke
576  integer(kind=kint), allocatable :: missing(:), cnt(:)
577  integer(kind=kint), pointer :: index(:), item(:)
578  real(kind=kreal), pointer :: a(:)
579 
580  ndof=bttkt%ndof
581  ndof2=ndof*ndof
582 
583  ! check if there are places
584  allocate(missing(num_lagrange))
585  nmissing = 0
586  outer1: do ilag=1,num_lagrange
587  i=(iws(ilag)-1)/ndof+1
588  idof=mod(iws(ilag)-1, ndof)+1
589  js=bttkt%index(i-1)+1
590  je=bttkt%index(i)
591  do j=js,je
592  jj=bttkt%item(j)
593  if (jj==i) cycle outer1 ! found place
594  enddo
595  ! not found
596  do k=1,nmissing
597  if (missing(k) == i) cycle outer1 ! already marked as missing
598  enddo
599  nmissing = nmissing + 1
600  missing(nmissing) = i
601  enddo outer1
602 
603  ! if not, reallocate
604  if (nmissing > 0) then
605  allocate(cnt(bttkt%nr))
606  allocate(index(0:bttkt%nr))
607  do i=1,bttkt%nr
608  cnt(i) = bttkt%index(i) - bttkt%index(i-1)
609  enddo
610  do i=1,nmissing
611  cnt(missing(i)) = cnt(missing(i)) + 1
612  enddo
613  call make_index(bttkt%nr, cnt, index)
614  allocate(item(bttkt%nnz + nmissing))
615  allocate(a(ndof2 * (bttkt%nnz + nmissing)))
616  do i=1,bttkt%nr
617  ks=index(i-1)+1
618  js=bttkt%index(i-1)+1
619  je=bttkt%index(i)
620  item(ks:ks+(je-js))=bttkt%item(js:je)
621  a(ndof2*(ks-1)+1:ndof2*(ks+(je-js)))=bttkt%A(ndof2*(js-1)+1:ndof2*je)
622  enddo
623  do i=1,nmissing
624  ke=index(missing(i))
625  item(ke)=missing(i)
626  a(ndof2*(ke-1)+1:ndof2*ke)=0.d0
627  enddo
628  deallocate(bttkt%index)
629  deallocate(bttkt%item)
630  deallocate(bttkt%A)
631  bttkt%index => index
632  bttkt%item => item
633  bttkt%A => a
634  bttkt%nnz = index(bttkt%nr)
635  deallocate(cnt)
636  endif
637  deallocate(missing)
638 
639  ! place num
640  outer: do ilag=1,num_lagrange
641  i=(iws(ilag)-1)/ndof+1
642  idof=mod(iws(ilag)-1, ndof)+1
643  js=bttkt%index(i-1)+1
644  je=bttkt%index(i)
645  do j=js,je
646  jj=bttkt%item(j)
647  if (jj==i) then
648  !write(0,*) ilag, i, idof
649  bttkt%A((j-1)*ndof2+(idof-1)*ndof+idof)=num
650  cycle outer
651  endif
652  enddo
653  enddo outer
654  end subroutine place_num_on_diag
655 
656  subroutine replace_hecmat(hecMAT, BTtKT)
657  implicit none
658  type (hecmwST_matrix), intent(inout) :: hecMAT
659  type (hecmwST_local_matrix), intent(in) :: BTtKT
660  integer :: nr, nc, ndof, ndof2, i, nl, nu, js, je, j, jj
661  integer :: ksl, ksu, k
662 
663  nr=bttkt%nr
664  nc=bttkt%nc
665  ndof=hecmat%NDOF
666  ndof2=ndof*ndof
667 
668  ! free old hecMAT
669  if (associated(hecmat%AL)) deallocate(hecmat%AL)
670  if (associated(hecmat%AU)) deallocate(hecmat%AU)
671  if (associated(hecmat%itemL)) deallocate(hecmat%itemL)
672  if (associated(hecmat%itemU)) deallocate(hecmat%itemU)
673  hecmat%indexL=0
674  hecmat%indexU=0
675 
676  ! count NPL, NPU
677  !$omp parallel default(none),private(i,nl,nu,js,je,j,jj), &
678  !$omp& shared(nr,BTtKT,hecMAT)
679  !$omp do
680  do i=1,nr
681  nl=0
682  nu=0
683  js=bttkt%index(i-1)+1
684  je=bttkt%index(i)
685  do j=js,je
686  jj=bttkt%item(j)
687  if (jj < i) then
688  nl=nl+1
689  elseif (i < jj) then
690  nu=nu+1
691  else
692  ! diagonal
693  endif
694  enddo
695  hecmat%indexL(i)=nl
696  hecmat%indexU(i)=nu
697  enddo
698  !$omp end do
699  !$omp end parallel
700 
701  hecmat%indexL(0)=0
702  hecmat%indexU(0)=0
703  do i=1,nc
704  hecmat%indexL(i)=hecmat%indexL(i-1)+hecmat%indexL(i)
705  hecmat%indexU(i)=hecmat%indexU(i-1)+hecmat%indexU(i)
706  enddo
707  hecmat%NPL=hecmat%indexL(nc)
708  hecmat%NPU=hecmat%indexU(nc)
709 
710  ! allocate new hecMAT
711  allocate(hecmat%itemL(hecmat%NPL), hecmat%itemU(hecmat%NPU))
712  allocate(hecmat%AL(hecmat%NPL*ndof2), hecmat%AU(hecmat%NPU*ndof2))
713  hecmat%itemL=0
714  hecmat%itemU=0
715  hecmat%D=0.d0
716  hecmat%AL=0.d0
717  hecmat%AU=0.d0
718 
719  ! copy from BTtKT to hecMAT
720  !$omp parallel default(none),private(i,nl,nu,js,je,ksl,ksu,j,jj,k), &
721  !$omp& shared(nr,BTtKT,hecMAT,ndof2)
722  !$omp do
723  do i=1,nr
724  nl=0
725  nu=0
726  js=bttkt%index(i-1)+1
727  je=bttkt%index(i)
728  ksl=hecmat%indexL(i-1)+1
729  ksu=hecmat%indexU(i-1)+1
730  do j=js,je
731  jj=bttkt%item(j)
732  if (jj < i) then
733  k=ksl+nl
734  hecmat%itemL(k)=jj
735  hecmat%AL((k-1)*ndof2+1:k*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
736  nl=nl+1
737  elseif (i < jj) then
738  k=ksu+nu
739  hecmat%itemU(k)=jj
740  hecmat%AU((k-1)*ndof2+1:k*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
741  nu=nu+1
742  else
743  hecmat%D((i-1)*ndof2+1:i*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
744  endif
745  enddo
746  ! if (ksl+nl /= hecMAT%indexL(i)+1) stop 'ERROR: indexL'
747  ! if (ksu+nu /= hecMAT%indexU(i)+1) stop 'ERROR: indexU'
748  enddo
749  !$omp end do
750  !$omp end parallel
751 
752  ! do i=1,hecMAT%NPL
753  ! if (hecMAT%itemL(i) <= 0) stop 'ERROR: negative itemL'
754  ! if (hecMAT%itemL(i) > nc) stop 'ERROR: too big itemL'
755  ! enddo
756  ! do i=1,hecMAT%NPU
757  ! if (hecMAT%itemU(i) <= 0) stop 'ERROR: negative itemU'
758  ! if (hecMAT%itemU(i) > nc) stop 'ERROR: too big itemU'
759  ! enddo
760  end subroutine replace_hecmat
761 
762  subroutine make_new_hecmat(hecMAT, BTtKT, hecTKT)
763  implicit none
764  type(hecmwst_matrix), intent(in) :: hecMAT
765  type(hecmwst_local_matrix), intent(in) :: BTtKT
766  type(hecmwst_matrix), intent(inout) :: hecTKT
767  integer(kind=kint) :: nr, nc, ndof, ndof2
768 
769  nr=bttkt%nr
770  nc=bttkt%nc
771  ndof=bttkt%ndof
772  ndof2=ndof*ndof
773 
774  !write(0,*) 'DEBUG: nr, nc =',nr,nc
775 
776  ! if (nr /= nc) then
777  ! stop 'ERROR: nr /= nc'
778  ! endif
779  hectkt%N =hecmat%N
780  hectkt%NP=nc
781  hectkt%NDOF=ndof
782 
783  if (associated(hectkt%D)) deallocate(hectkt%D)
784  allocate(hectkt%D(nc*ndof2))
785 
786  if (associated(hectkt%indexL)) deallocate(hectkt%indexL)
787  if (associated(hectkt%indexU)) deallocate(hectkt%indexU)
788  allocate(hectkt%indexL(0:nc))
789  allocate(hectkt%indexU(0:nc))
790 
791  hectkt%Iarray=hecmat%Iarray
792  hectkt%Rarray=hecmat%Rarray
793 
794  call replace_hecmat(hectkt, bttkt)
795  end subroutine make_new_hecmat
796 
797  subroutine hecmw_localmat_mulvec(BTmat, V, TV)
798  implicit none
799  type (hecmwst_local_matrix), intent(in) :: btmat
800  real(kind=kreal), intent(in), target :: v(:)
801  real(kind=kreal), intent(out), target :: tv(:)
802  real(kind=kreal), pointer :: tvp(:), tp(:), vp(:)
803  integer :: nr, ndof, ndof2, i, js, je, j, jj, k, kl0, l
804  !!$ real(kind=kreal) :: vnorm
805 
806  nr=btmat%nr
807  ndof=btmat%ndof
808  ndof2=ndof*ndof
809 
810  tv=0.d0
811 
812  !!$ vnorm=0.d0
813  !!$ do i=1,nr*ndof
814  !!$ vnorm=vnorm+V(i)**2
815  !!$ enddo
816  !!$ write(0,*) 'vnorm:', sqrt(vnorm)
817 
818  !$omp parallel default(none),private(i,TVp,js,je,j,jj,Tp,Vp,k,kl0,l), &
819  !$omp& shared(nr,TV,ndof,BTmat,ndof2,V)
820  !$omp do
821  do i=1,nr
822  tvp=>tv((i-1)*ndof+1:i*ndof)
823  js=btmat%index(i-1)+1
824  je=btmat%index(i)
825  do j=js,je
826  jj=btmat%item(j)
827  tp=>btmat%A((j-1)*ndof2+1:j*ndof2)
828  vp=>v((jj-1)*ndof+1:jj*ndof)
829  do k=1,ndof
830  kl0=(k-1)*ndof
831  do l=1,ndof
832  tvp(k)=tvp(k)+tp(kl0+l)*vp(l)
833  enddo
834  enddo
835  enddo
836  enddo
837  !$omp end do
838  !$omp end parallel
839  end subroutine hecmw_localmat_mulvec
840 
841  subroutine hecmw_trimatmul_ttkt_mpc(hecMESH, hecMAT, hecTKT)
842  implicit none
843  type (hecmwst_local_mesh), intent(inout) :: hecmesh
844  type (hecmwst_matrix), intent(in) :: hecmat
845  type (hecmwst_matrix), intent(inout) :: hectkt
846  type (hecmwst_local_matrix) :: btmat, bttmat
847  integer(kind=kint), allocatable :: iws(:)
848  integer(kind=kint) :: ndof, n_mpc, i_mpc
849  integer(kind=kint) :: i, j, k, kk, ilag
850  integer(kind=kint) :: num_lagrange
851  real(kind=kreal) :: t0, t1
852  t0 = hecmw_wtime()
853  ndof=hecmat%NDOF
854  n_mpc=0
855  outer: do i=1,hecmesh%mpc%n_mpc
856  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
857  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
858  enddo
859  n_mpc=n_mpc+1
860  enddo outer
861  allocate(iws(n_mpc))
862  i_mpc=0
863  outer2: do i=1,hecmesh%mpc%n_mpc
864  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
865  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
866  enddo
867  i_mpc=i_mpc+1
868  k=hecmesh%mpc%mpc_index(i-1)+1
869  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
870  iws(i_mpc)=kk
871  enddo outer2
872  if (debug >= 2) then
873  write(700+hecmw_comm_get_rank(),*) 'DEBUG: n_mpc, slaves',n_mpc,iws(1:n_mpc)
874  endif
875  t1 = hecmw_wtime()
876  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (1) : ",t1-t0
877  t0 = hecmw_wtime()
878  call make_btmat_mpc(hecmesh, ndof, btmat)
879  call debug_write_matrix(btmat, 'BTmat(MPC)', debug_matrix)
880  t1 = hecmw_wtime()
881  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (2) : ",t1-t0
882  t0 = hecmw_wtime()
883  ! call make_BTtmat_mpc(hecMESH, ndof, BTtmat)
884  call hecmw_localmat_transpose(btmat, bttmat)
885  ! if (hecmw_localmat_equal(BTtmat, BTtmat2) == 0) then
886  ! write(0,*) 'ERROR: BTtmat2 is incorrect!!!'
887  ! else
888  ! write(0,*) 'DEBUG: BTtmat2 is correct'
889  ! endif
890  call debug_write_matrix(bttmat, 'BTtmat(MPC)', debug_matrix)
891 
892  if (debug >= 3) then
893  write(700+hecmw_comm_get_rank(),*) 'hecMESH%node_ID before trimatmul_TtKT'
894  do i=hecmesh%nn_internal+1, hecmesh%n_node
895  write(700+hecmw_comm_get_rank(),*) i,hecmesh%node_ID(2*i-1),hecmesh%node_ID(2*i),hecmesh%global_node_ID(i)
896  enddo
897  endif
898  t1 = hecmw_wtime()
899  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (3) : ",t1-t0
900  t0 = hecmw_wtime()
901  call hecmw_trimatmul_ttkt(hecmesh, bttmat, hecmat, btmat, iws, n_mpc, hectkt)
902  t1 = hecmw_wtime()
903  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (4) : ",t1-t0
904  t0 = hecmw_wtime()
905  if (debug >= 3) then
906  write(700+hecmw_comm_get_rank(),*) 'hecMESH%node_ID after trimatmul_TtKT'
907  do i=hecmesh%nn_internal+1, hecmesh%n_node
908  write(700+hecmw_comm_get_rank(),*) i,hecmesh%node_ID(2*i-1),hecmesh%node_ID(2*i),hecmesh%global_node_ID(i)
909  enddo
910  endif
911 
912  if (associated(hectkt%B)) deallocate(hectkt%B)
913  if (associated(hectkt%X)) deallocate(hectkt%X)
914  num_lagrange = size(hecmat%B) - hecmat%NP*ndof
915  allocate(hectkt%B(ndof*hectkt%NP + num_lagrange))
916  allocate(hectkt%X(ndof*hectkt%NP + num_lagrange))
917  hectkt%B(:) = 0.d0
918  hectkt%X(:) = 0.d0
919  do i=1, ndof*hecmat%NP
920  hectkt%B(i) = hecmat%B(i)
921  hectkt%X(i) = hecmat%X(i)
922  enddo
923  do i=1, num_lagrange
924  hectkt%B(ndof*hectkt%NP+i) = hecmat%B(ndof*hecmat%NP+i)
925  hectkt%X(ndof*hectkt%NP+i) = hecmat%X(ndof*hecmat%NP+i)
926  enddo
927  do ilag=1,n_mpc
928  hectkt%X(iws(ilag)) = 0.d0
929  enddo
930 
931  call hecmw_localmat_free(btmat)
932  call hecmw_localmat_free(bttmat)
933  ! call hecmw_localmat_free(BTtmat2)
934  deallocate(iws)
935  t1 = hecmw_wtime()
936  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (5) : ",t1-t0
937  end subroutine hecmw_trimatmul_ttkt_mpc
938 
939  subroutine make_btmat_mpc(hecMESH, ndof, BTmat)
940  implicit none
941  type (hecmwst_local_mesh), intent(in) :: hecmesh
942  integer(kind=kint), intent(in) :: ndof
943  type (hecmwst_local_matrix), intent(out) :: btmat
944  type (hecmwst_local_matrix) :: tmat
945  integer(kind=kint) :: n_mpc
946  integer(kind=kint) :: i,j,k,js,jj,kk
947  n_mpc=0
948  outer: do i=1,hecmesh%mpc%n_mpc
949  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
950  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
951  enddo
952  n_mpc=n_mpc+1
953  enddo outer
954  tmat%nr=hecmesh%n_node*ndof
955  tmat%nc=tmat%nr
956  tmat%ndof=1
957  allocate(tmat%index(0:tmat%nr))
958  ! count nonzero in each row
959  tmat%index(1:tmat%nr)=1
960  outer2: do i=1,hecmesh%mpc%n_mpc
961  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
962  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
963  enddo
964  k=hecmesh%mpc%mpc_index(i-1)+1
965  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
966  tmat%index(kk)=hecmesh%mpc%mpc_index(i)-hecmesh%mpc%mpc_index(i-1)-1
967  enddo outer2
968  ! index
969  tmat%index(0)=0
970  do i=1,tmat%nr
971  tmat%index(i)=tmat%index(i-1)+tmat%index(i)
972  enddo
973  tmat%nnz=tmat%index(tmat%nr)
974  allocate(tmat%item(tmat%nnz), tmat%A(tmat%nnz))
975  ! diag
976  do i=1,tmat%nr
977  js=tmat%index(i-1)+1
978  tmat%item(js)=i
979  tmat%A(js)=1.d0
980  enddo
981  ! others
982  outer3: do i=1,hecmesh%mpc%n_mpc
983  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
984  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer3
985  enddo
986  k=hecmesh%mpc%mpc_index(i-1)+1
987  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
988  js=tmat%index(kk-1)+1
989  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
990  jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
991  tmat%item(js)=jj
992  tmat%A(js)=-hecmesh%mpc%mpc_val(j)
993  js=js+1
994  enddo
995  enddo outer3
996  !call debug_write_matrix(Tmat, 'Tmat(MPC)', DEBUG_MATRIX)
997  call hecmw_localmat_blocking(tmat, ndof, btmat)
998  call hecmw_localmat_free(tmat)
999  end subroutine make_btmat_mpc
1000 
1001  subroutine make_bttmat_mpc(hecMESH, ndof, BTtmat)
1002  implicit none
1003  type (hecmwst_local_mesh), intent(in) :: hecmesh
1004  integer(kind=kint), intent(in) :: ndof
1005  type (hecmwst_local_matrix), intent(out) :: bttmat
1006  type (hecmwst_local_matrix) :: ttmat
1007  integer(kind=kint) :: n_mpc
1008  integer(kind=kint) :: i,j,k,js,je,jj,kk
1009  integer(kind=kint), allocatable :: iw(:)
1010  n_mpc=0
1011  outer: do i=1,hecmesh%mpc%n_mpc
1012  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1013  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
1014  enddo
1015  n_mpc=n_mpc+1
1016  enddo outer
1017  ttmat%nr=hecmesh%n_node*ndof
1018  ttmat%nc=ttmat%nr
1019  ttmat%ndof=1
1020  allocate(ttmat%index(0:ttmat%nr))
1021  ! count nonzero in each row
1022  ttmat%index(1:ttmat%nr)=1
1023  outer2: do i=1,hecmesh%mpc%n_mpc
1024  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1025  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
1026  enddo
1027  k=hecmesh%mpc%mpc_index(i-1)+1
1028  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
1029  ttmat%index(kk)=0
1030  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
1031  jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
1032  ttmat%index(jj)=ttmat%index(jj)+1
1033  enddo
1034  enddo outer2
1035  ! index
1036  ttmat%index(0)=0
1037  do i=1,ttmat%nr
1038  ttmat%index(i)=ttmat%index(i-1)+ttmat%index(i)
1039  enddo
1040  ttmat%nnz=ttmat%index(ttmat%nr)
1041  allocate(ttmat%item(ttmat%nnz), ttmat%A(ttmat%nnz))
1042  ! diag
1043  do i=1,ttmat%nr
1044  js=ttmat%index(i-1)+1
1045  je=ttmat%index(i)
1046  if (js <= je) then
1047  ttmat%item(js)=i
1048  ttmat%A(js)=1.d0
1049  endif
1050  enddo
1051  ! others
1052  allocate(iw(ttmat%nr))
1053  iw(:)=1
1054  outer3: do i=1,hecmesh%mpc%n_mpc
1055  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1056  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer3
1057  enddo
1058  k=hecmesh%mpc%mpc_index(i-1)+1
1059  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
1060  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
1061  jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
1062  js=ttmat%index(jj-1)+1+iw(jj)
1063  ttmat%item(js)=kk
1064  ttmat%A(js)=-hecmesh%mpc%mpc_val(j)
1065  iw(jj)=iw(jj)+1
1066  enddo
1067  enddo outer3
1068  deallocate(iw)
1069  !call debug_write_matrix(Ttmat, 'Ttmat(MPC)', DEBUG_MATRIX)
1070  call hecmw_localmat_blocking(ttmat, ndof, bttmat)
1071  call hecmw_localmat_free(ttmat)
1072  end subroutine make_bttmat_mpc
1073 
1074  subroutine hecmw_localmat_transpose(Tmat, Ttmat)
1075  implicit none
1076  type (hecmwst_local_matrix), intent(in) :: tmat
1077  type (hecmwst_local_matrix), intent(out) :: ttmat
1078  integer(kind=kint), allocatable :: iw(:)
1079  integer(kind=kint) :: i, j, jj, ndof, ndof2, k, idof, jdof
1080  allocate(iw(tmat%nc))
1081  iw = 0
1082  do i = 1, tmat%nr
1083  do j = tmat%index(i-1)+1, tmat%index(i)
1084  jj = tmat%item(j)
1085  iw(jj) = iw(jj) + 1
1086  enddo
1087  enddo
1088  ttmat%nr = tmat%nc
1089  ttmat%nc = tmat%nr
1090  ttmat%nnz = tmat%nnz
1091  ttmat%ndof = tmat%ndof
1092  ndof = tmat%ndof
1093  ndof2 = ndof * ndof
1094  allocate(ttmat%index(0:ttmat%nr))
1095  allocate(ttmat%item(ttmat%nnz))
1096  allocate(ttmat%A(ttmat%nnz*ndof2))
1097  ttmat%index(0) = 0
1098  do i = 1, ttmat%nr
1099  ttmat%index(i) = ttmat%index(i-1) + iw(i)
1100  iw(i) = ttmat%index(i-1) + 1
1101  enddo
1102  do i = 1, tmat%nr
1103  do j = tmat%index(i-1)+1, tmat%index(i)
1104  jj = tmat%item(j)
1105  k = iw(jj)
1106  ttmat%item( k ) = i
1107  do idof = 1, ndof
1108  do jdof = 1, ndof
1109  ttmat%A((k-1)*ndof2+(idof-1)*ndof+jdof) = &
1110  tmat%A((j-1)*ndof2+(jdof-1)*ndof+idof)
1111  enddo
1112  enddo
1113  iw(jj) = k + 1
1114  enddo
1115  enddo
1116  end subroutine hecmw_localmat_transpose
1117 
1118  function hecmw_localmat_equal(Tmat1, Tmat2)
1119  implicit none
1120  type (hecmwst_local_matrix), intent(in) :: tmat1, tmat2
1121  integer(kind=kint) :: hecmw_localmat_equal
1122  integer(kind=kint) :: i, j, k0, k, ndof, ndof2
1123  hecmw_localmat_equal = 0
1124  if (tmat1%nr /= tmat2%nr) return
1125  if (tmat1%nc /= tmat2%nc) return
1126  if (tmat1%nnz /= tmat2%nnz) return
1127  if (tmat1%ndof /= tmat2%ndof) return
1128  ndof = tmat1%ndof
1129  ndof2 = ndof * ndof
1130  do i = 1, tmat1%nr
1131  if (tmat1%index(i) /= tmat2%index(i)) return
1132  do j = tmat1%index(i-1)+1, tmat1%index(i)
1133  if (tmat1%item(j) /= tmat2%item(j)) return
1134  k0 = (j-1)*ndof2
1135  do k = 1, ndof2
1136  if (tmat1%A(k0+k) /= tmat2%A(k0+k)) return
1137  enddo
1138  enddo
1139  enddo
1140  hecmw_localmat_equal = 1
1141  end function hecmw_localmat_equal
1142 
1143 !!!
1144 !!! Subroutines for parallel contact analysis with iterative linear solver
1145 !!!
1146 
1147  subroutine hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew)
1148  implicit none
1149  type (hecmwst_local_matrix), intent(inout) :: btmat
1150  type (hecmwst_local_mesh), intent(in) :: hecmesh
1151  type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1152  integer(kind=kint) :: nn_int, np, ndof, ndof2, nr_ext, nnz_ext
1153  integer(kind=kint), allocatable :: exp_rows_index(:), exp_cols_index(:)
1154  integer(kind=kint), allocatable :: exp_rows_item(:,:), exp_cols_item(:,:)
1155  type (hecmwst_local_matrix), allocatable :: bt_ext(:)
1156  type (hecmwst_local_matrix) :: bt_int
1157  type (hecmwst_local_matrix) :: btnew
1158  ! some checks
1159  if (debug >= 1) write(0,*) 'DEBUG: nr,nc,nnz,ndof',btmat%nr,btmat%nc,btmat%nnz,btmat%ndof
1160  if (btmat%nr /= hecmesh%n_node) stop 'ERROR: invalid size in hecmw_localmat_assemble'
1161  !
1162  nn_int = hecmesh%nn_internal
1163  np = hecmesh%n_node
1164  ndof = btmat%ndof
1165  ndof2 = ndof*ndof
1166  !
1167  nr_ext = np - nn_int
1168  nnz_ext = btmat%index(np) - btmat%index(nn_int)
1169  !
1170  call prepare_bt_ext(btmat, hecmesh, exp_rows_index, exp_rows_item, bt_ext)
1171  if (debug >= 1) write(0,*) 'DEBUG: prepare_BT_ext done'
1172  !
1173  call prepare_column_info(hecmesh, bt_ext, exp_cols_index, exp_cols_item)
1174  if (debug >= 1) write(0,*) 'DEBUG: prepare_column info done'
1175  !
1176  call send_bt_ext_and_recv_bt_int(hecmesh, exp_rows_index, exp_rows_item, bt_ext, &
1177  exp_cols_index, exp_cols_item, bt_int, hecmeshnew)
1178  if (debug >= 1) write(0,*) 'DEBUG: send BT_ext and recv BT_int done'
1179  !
1180  !write(0,*) 'BTmat%ndof,BT_int%ndof',BTmat%ndof,BT_int%ndof
1181  call hecmw_localmat_add(btmat, bt_int, btnew)
1182  if (debug >= 1) write(0,*) 'DEBUG: localmat_add done'
1183  !
1184  call hecmw_localmat_free(btmat)
1185  call hecmw_localmat_free(bt_int)
1186  !
1187  btmat%nr = btnew%nr
1188  btmat%nc = btnew%nc
1189  btmat%nnz = btnew%nnz
1190  btmat%ndof = btnew%ndof
1191  btmat%index => btnew%index
1192  btmat%item => btnew%item
1193  btmat%A => btnew%A
1194  !
1195  ! hecMESH%n_node = hecMESHnew%n_node
1196  ! hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe
1197  ! deallocate(hecMESH%neighbor_pe)
1198  ! deallocate(hecMESH%import_index)
1199  ! deallocate(hecMESH%export_index)
1200  ! deallocate(hecMESH%import_item)
1201  ! deallocate(hecMESH%export_item)
1202  ! deallocate(hecMESH%node_ID)
1203  ! deallocate(hecMESH%global_node_ID)
1204  ! hecMESH%neighbor_pe => hecMESHnew%neighbor_pe
1205  ! hecMESH%import_index => hecMESHnew%import_index
1206  ! hecMESH%export_index => hecMESHnew%export_index
1207  ! hecMESH%import_item => hecMESHnew%import_item
1208  ! hecMESH%export_item => hecMESHnew%export_item
1209  ! hecMESH%node_ID => hecMESHnew%node_ID
1210  ! hecMESH%global_node_ID => hecMESHnew%global_node_ID
1211  !
1212  if (debug >= 1) write(0,*) 'DEBUG: update BTmat and hecMESH done'
1213  end subroutine hecmw_localmat_assemble
1214 
1215  subroutine prepare_bt_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext)
1216  implicit none
1217  type (hecmwst_local_matrix), intent(in) :: btmat
1218  type (hecmwst_local_mesh), intent(in) :: hecmesh
1219  integer(kind=kint), allocatable, intent(out) :: exp_rows_index(:)
1220  integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1221  type (hecmwst_local_matrix), allocatable, intent(out) :: bt_ext(:)
1222  integer(kind=kint), allocatable :: incl_nz(:), exp_cols_per_row(:), exp_rows_per_rank(:)
1223  integer(kind=kint) :: nn_int
1224  logical, parameter :: flg_check_nonzero_numerically = .true.
1225  nn_int = hecmesh%nn_internal
1226  !
1227  if (flg_check_nonzero_numerically) then
1228  ! efficient for assembling conMAT which has same non-zero profile as hecMAT
1229  ! but only dofs related to cntact are actually non-zero
1230  call check_external_nz_blocks(btmat, nn_int, incl_nz)
1231  else
1232  ! probably good enough to assemble T or T^t
1233  call incl_all_external_nz_blocks(btmat, nn_int, incl_nz)
1234  endif
1235  !
1236  call count_ext_rows_with_nz(btmat, nn_int, incl_nz, exp_cols_per_row)
1237  !
1238  call count_exp_rows_per_rank(hecmesh, exp_cols_per_row, exp_rows_per_rank)
1239  !
1240  allocate(exp_rows_index(0:hecmesh%n_neighbor_pe))
1241  call make_index(hecmesh%n_neighbor_pe, exp_rows_per_rank, exp_rows_index)
1242  !write(0,*) 'exp_rows_index',exp_rows_index(:)
1243  !
1244  deallocate(exp_rows_per_rank)
1245  !
1246  call make_exp_rows_item(hecmesh, exp_cols_per_row, exp_rows_index, exp_rows_item)
1247  !
1248  deallocate(exp_cols_per_row)
1249  !
1250  allocate(bt_ext(hecmesh%n_neighbor_pe))
1251  call extract_bt_ext(hecmesh, btmat, incl_nz, exp_rows_index, exp_rows_item, bt_ext)
1252  !
1253  deallocate(incl_nz)
1254  end subroutine prepare_bt_ext
1255 
1256  subroutine check_external_nz_blocks(BTmat, nn_internal, incl_nz)
1257  implicit none
1258  type (hecmwst_local_matrix), intent(in) :: btmat
1259  integer(kind=kint), intent(in) :: nn_internal
1260  integer(kind=kint), allocatable, intent(out) :: incl_nz(:)
1261  integer(kind=kint) :: ndof2, i0, nnz_ext, i, k, nnz_blk
1262  if (nn_internal > btmat%nr) stop 'ERROR: invalid nn_internal'
1263  ndof2 = btmat%ndof ** 2
1264  i0 = btmat%index(nn_internal)
1265  nnz_ext = btmat%index(btmat%nr) - i0
1266  allocate(incl_nz(nnz_ext))
1267  nnz_blk = 0
1268  do i = 1, nnz_ext
1269  incl_nz(i) = 0
1270  do k = 1, ndof2
1271  if (btmat%A(ndof2*(i0+i-1)+k) /= 0.0d0) then
1272  incl_nz(i) = 1
1273  nnz_blk = nnz_blk + 1
1274  exit
1275  endif
1276  enddo
1277  enddo
1278  if (debug >= 1) write(0,*) 'DEBUG: nnz_blk',nnz_blk
1279  end subroutine check_external_nz_blocks
1280 
1281  subroutine incl_all_external_nz_blocks(BTmat, nn_internal, incl_nz)
1282  implicit none
1283  type (hecmwst_local_matrix), intent(in) :: btmat
1284  integer(kind=kint), intent(in) :: nn_internal
1285  integer(kind=kint), allocatable, intent(out) :: incl_nz(:)
1286  integer(kind=kint) :: i0, nnz_ext
1287  if (nn_internal > btmat%nr) stop 'ERROR: invalid nn_internal'
1288  i0 = btmat%index(nn_internal)
1289  nnz_ext = btmat%index(btmat%nr) - i0
1290  allocate(incl_nz(nnz_ext))
1291  incl_nz(1:nnz_ext) = 1
1292  end subroutine incl_all_external_nz_blocks
1293 
1294  subroutine count_ext_rows_with_nz(BTmat, nn_internal, incl_nz, exp_cols_per_row)
1295  implicit none
1296  type (hecmwst_local_matrix), intent(in) :: btmat
1297  integer(kind=kint), intent(in) :: nn_internal
1298  integer(kind=kint), intent(in) :: incl_nz(:)
1299  integer(kind=kint), allocatable, intent(out) :: exp_cols_per_row(:)
1300  integer(kind=kint) :: nr_ext, nnz_int, i, irow, js, je, j, jcol
1301  nr_ext = btmat%nr - nn_internal
1302  nnz_int = btmat%index(nn_internal)
1303  allocate(exp_cols_per_row(nr_ext))
1304  exp_cols_per_row(:) = 0
1305  do i = 1, nr_ext
1306  irow = nn_internal+i
1307  js = btmat%index(irow-1)+1
1308  je = btmat%index(irow)
1309  do j = js, je
1310  jcol = btmat%item(j)
1311  if (incl_nz(j-nnz_int) == 1) exp_cols_per_row(i) = exp_cols_per_row(i) + 1
1312  enddo
1313  enddo
1314  !write(0,*) 'exp_cols_per_row',exp_cols_per_row(:)
1315  end subroutine count_ext_rows_with_nz
1316 
1317  subroutine count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank)
1318  implicit none
1319  type (hecmwst_local_mesh), intent(in) :: hecmesh
1320  integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1321  integer(kind=kint), allocatable, intent(out) :: exp_rows_per_rank(:)
1322  integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom
1323  allocate(exp_rows_per_rank(hecmesh%n_neighbor_pe))
1324  exp_rows_per_rank(1:hecmesh%n_neighbor_pe) = 0
1325  nn_int = hecmesh%nn_internal
1326  np = hecmesh%n_node
1327  nr_ext = np - nn_int
1328  do i = 1, nr_ext
1329  if (exp_cols_per_row(i) > 0) then
1330  irow = nn_int + i
1331  exp_rank = hecmesh%node_ID(2*irow)
1332  call rank_to_idom(hecmesh, exp_rank, idom)
1333  exp_rows_per_rank(idom) = exp_rows_per_rank(idom) + 1
1334  endif
1335  enddo
1336  !write(0,*) 'exp_rows_per_rank',exp_rows_per_rank(:)
1337  end subroutine count_exp_rows_per_rank
1338 
1339  subroutine rank_to_idom(hecMESH, rank, idom)
1340  implicit none
1341  type (hecmwst_local_mesh), intent(in) :: hecmesh
1342  integer(kind=kint), intent(in) :: rank
1343  integer(kind=kint), intent(out) :: idom
1344  integer(kind=kint) :: i
1345  do i = 1, hecmesh%n_neighbor_pe
1346  if (hecmesh%neighbor_pe(i) == rank) then
1347  idom = i
1348  return
1349  endif
1350  enddo
1351  stop 'ERROR: exp_rank not found in neighbor_pe'
1352  end subroutine rank_to_idom
1353 
1354  subroutine make_index(len, cnt, index)
1355  implicit none
1356  integer(kind=kint), intent(in) :: len
1357  integer(kind=kint), intent(in) :: cnt(len)
1358  integer(kind=kint), intent(out) :: index(0:)
1359  integer(kind=kint) :: i
1360  ! write(0,*) 'make_index: len',len
1361  index(0) = 0
1362  do i = 1,len
1363  index(i) = index(i-1) + cnt(i)
1364  enddo
1365  end subroutine make_index
1366 
1367  subroutine make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item)
1368  implicit none
1369  type (hecmwst_local_mesh), intent(in) :: hecmesh
1370  integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1371  integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1372  integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1373  integer(kind=kint), allocatable :: cnt(:)
1374  integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom, idx
1375  allocate(exp_rows_item(2,exp_rows_index(hecmesh%n_neighbor_pe)))
1376  allocate(cnt(hecmesh%n_neighbor_pe))
1377  cnt(:) = 0
1378  nn_int = hecmesh%nn_internal
1379  np = hecmesh%n_node
1380  nr_ext = np - nn_int
1381  do i = 1, nr_ext
1382  if (exp_cols_per_row(i) > 0) then
1383  irow = nn_int + i
1384  exp_rank = hecmesh%node_ID(2*irow)
1385  call rank_to_idom(hecmesh, exp_rank, idom)
1386  cnt(idom) = cnt(idom) + 1
1387  idx = exp_rows_index(idom-1) + cnt(idom)
1388  exp_rows_item(1,idx) = irow
1389  exp_rows_item(2,idx) = exp_cols_per_row(i)
1390  endif
1391  enddo
1392  !write(0,*) 'cnt',cnt(:)
1393  do idom = 1, hecmesh%n_neighbor_pe
1394  if (cnt(idom) /= exp_rows_index(idom)-exp_rows_index(idom-1)) stop 'ERROR: make exp_rows_item'
1395  enddo
1396  !write(0,*) 'exp_rows_item(1,:)',exp_rows_item(1,:)
1397  !write(0,*) 'exp_rows_item(2,:)',exp_rows_item(2,:)
1398  end subroutine make_exp_rows_item
1399 
1400  subroutine extract_bt_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext)
1401  implicit none
1402  type (hecmwst_local_mesh), intent(in) :: hecmesh
1403  type (hecmwst_local_matrix), intent(in) :: btmat
1404  integer(kind=kint), intent(in) :: incl_nz(:)
1405  integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1406  integer(kind=kint), intent(in) :: exp_rows_item(:,:)
1407  type (hecmwst_local_matrix), allocatable, intent(out) :: bt_ext(:)
1408  integer(kind=kint) :: ndof, ndof2, nn_int, nnz_int, idom, j, idx, ncol, cnt, jrow, ks, ke, k, kcol
1409  allocate(bt_ext(hecmesh%n_neighbor_pe))
1410  ndof = btmat%ndof
1411  ndof2 = ndof * ndof
1412  nn_int = hecmesh%nn_internal
1413  nnz_int = btmat%index(nn_int)
1414  do idom = 1, hecmesh%n_neighbor_pe
1415  bt_ext(idom)%nr = exp_rows_index(idom) - exp_rows_index(idom-1)
1416  bt_ext(idom)%nc = btmat%nc
1417  bt_ext(idom)%nnz = 0
1418  bt_ext(idom)%ndof = ndof
1419  allocate(bt_ext(idom)%index(0:bt_ext(idom)%nr))
1420  bt_ext(idom)%index(0) = 0
1421  do j = 1, bt_ext(idom)%nr
1422  idx = exp_rows_index(idom-1) + j
1423  ncol = exp_rows_item(2,idx)
1424  bt_ext(idom)%index(j) = bt_ext(idom)%index(j-1) + ncol
1425  enddo
1426  bt_ext(idom)%nnz = bt_ext(idom)%index(bt_ext(idom)%nr)
1427  if (debug >= 1) write(0,*) 'DEBUG: idom,nr,nc,nnz,ndof', &
1428  idom,bt_ext(idom)%nr,bt_ext(idom)%nc,bt_ext(idom)%nnz,bt_ext(idom)%ndof
1429  allocate(bt_ext(idom)%item(bt_ext(idom)%nnz))
1430  allocate(bt_ext(idom)%A(bt_ext(idom)%nnz * ndof2))
1431  cnt = 0
1432  do j = 1, bt_ext(idom)%nr
1433  idx = exp_rows_index(idom-1) + j
1434  jrow = exp_rows_item(1,idx)
1435  if (jrow < 1 .or. btmat%nr < jrow) stop 'ERROR: extract BT_ext: jrow'
1436  ks = btmat%index(jrow-1)+1
1437  ke = btmat%index(jrow)
1438  do k = ks, ke
1439  kcol = btmat%item(k)
1440  if (incl_nz(k-nnz_int) == 0) cycle
1441  cnt = cnt + 1
1442  bt_ext(idom)%item(cnt) = kcol
1443  bt_ext(idom)%A(ndof2*(cnt-1)+1:ndof2*cnt) = btmat%A(ndof2*(k-1)+1:ndof2*k)
1444  enddo
1445  if (cnt /= bt_ext(idom)%index(j)) stop 'ERROR: extract BT_ext'
1446  enddo
1447  ! write(label,'(a,i0,a)') 'BT_ext(',idom,')'
1448  ! call debug_write_matrix(Bt_ext(idom), label, DEBUG_MATRIX)
1449  enddo
1450  end subroutine extract_bt_ext
1451 
1452  subroutine prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1453  implicit none
1454  type (hecmwst_local_mesh), intent(in) :: hecmesh
1455  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1456  integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1457  integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1458  !
1459  call make_exp_cols_index(hecmesh%n_neighbor_pe, bt_ext, exp_cols_index)
1460  if (debug >= 2) write(0,*) ' DEBUG2: make exp_cols_index done'
1461  if (debug >= 3) write(0,*) ' DEBUG3: exp_cols_index', exp_cols_index(0:hecmesh%n_neighbor_pe)
1462  !
1463  ! (col ID, rank, global ID)
1464  !
1465  call make_exp_cols_item(hecmesh, bt_ext, exp_cols_index, exp_cols_item)
1466  if (debug >= 2) write(0,*) ' DEBUG2: make exp_cols_item done'
1467  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: exp_cols_item', exp_cols_item(1:cNCOL_ITEM,1:exp_cols_index(hecMESH%n_neighbor_pe))
1468  end subroutine prepare_column_info
1469 
1470  subroutine make_exp_cols_index(nnb, BT_ext, exp_cols_index)
1471  implicit none
1472  integer(kind=kint), intent(in) :: nnb
1473  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1474  integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1475  integer(kind=kint) :: idom
1476  allocate(exp_cols_index(0:nnb))
1477  exp_cols_index(0) = 0
1478  do idom = 1, nnb
1479  exp_cols_index(idom) = exp_cols_index(idom-1) + bt_ext(idom)%nnz
1480  enddo
1481  end subroutine make_exp_cols_index
1482 
1483  subroutine make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1484  implicit none
1485  type (hecmwst_local_mesh), intent(in) :: hecmesh
1486  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1487  integer(kind=kint), allocatable, intent(in) :: exp_cols_index(:)
1488  integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1489  integer(kind=kint) :: cnt, idom, j, jcol
1490  allocate(exp_cols_item(cncol_item,exp_cols_index(hecmesh%n_neighbor_pe)))
1491  cnt = 0
1492  do idom = 1, hecmesh%n_neighbor_pe
1493  do j = 1, bt_ext(idom)%nnz
1494  cnt = cnt + 1
1495  jcol = bt_ext(idom)%item(j)
1496  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: idom,j,cnt,jcol,nn_internal,n_node',&
1497  ! idom,j,cnt,jcol,hecMESH%nn_internal,hecMESH%n_node
1498  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of exp_cols_item',size(exp_cols_item)
1499  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of node_ID',size(hecMESH%node_ID)
1500  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of global_node_ID',size(hecMESH%global_node_ID)
1501  exp_cols_item(clid,cnt) = hecmesh%node_ID(2*jcol-1)
1502  exp_cols_item(crank,cnt) = hecmesh%node_ID(2*jcol)
1503  if (cncol_item >= 3) exp_cols_item(cgid,cnt) = hecmesh%global_node_ID(jcol)
1504  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: lid,rank(,gid)',exp_cols_item(1:cNCOL_ITEM,cnt)
1505  enddo
1506  if (cnt /= exp_cols_index(idom)) stop 'ERROR: make exp_cols_item'
1507  enddo
1508  end subroutine make_exp_cols_item
1509 
1510  subroutine send_bt_ext_and_recv_bt_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, &
1511  exp_cols_index, exp_cols_item, BT_int, hecMESHnew)
1512  implicit none
1513  type (hecmwst_local_mesh), intent(in) :: hecmesh
1514  integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1515  integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1516  type (hecmwst_local_matrix), allocatable, intent(inout) :: bt_ext(:)
1517  type (hecmwst_local_matrix), intent(out) :: bt_int
1518  type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1519  integer(kind=kint), allocatable :: imp_rows_index(:), imp_cols_index(:)
1520  integer(kind=kint), allocatable :: imp_rows_item(:,:), imp_cols_item(:,:)
1521  real(kind=kreal), allocatable :: imp_vals_item(:)
1522  integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
1523  integer(kind=kint) :: ndof, ndof2, idom, n_add_node, i0
1524  if (hecmesh%n_neighbor_pe == 0) return
1525  ndof = bt_ext(1)%ndof
1526  ndof2 = ndof*ndof
1527  !
1528  call convert_rowid_to_remote_localid(hecmesh, exp_rows_index(hecmesh%n_neighbor_pe), exp_rows_item)
1529  if (debug >= 2) write(0,*) ' DEBUG2: convert rowID to remote localID done'
1530  !
1531  call send_recv_bt_ext_nr_nnz(hecmesh, bt_ext, imp_rows_index, imp_cols_index)
1532  if (debug >= 2) write(0,*) ' DEBUG2: send recv BT_ext nr and nnz done'
1533  !
1534  call send_recv_bt_ext_contents(hecmesh, bt_ext, &
1535  exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1536  imp_rows_index, imp_cols_index, &
1537  imp_rows_item, imp_cols_item, imp_vals_item)
1538  if (debug >= 2) write(0,*) ' DEBUG2: send recv BT_ext contents done'
1539  !
1540  do idom = 1, hecmesh%n_neighbor_pe
1541  call hecmw_localmat_free(bt_ext(idom))
1542  enddo
1543  deallocate(bt_ext)
1544  !
1545  call allocate_bt_int(hecmesh, ndof, imp_rows_index, imp_rows_item, bt_int)
1546  if (debug >= 2) write(0,*) ' DEBUG2: allocate BT_int done'
1547  !
1548  ! call copy_mesh(hecMESH, hecMESHnew)
1549  ! if (DEBUG >= 2) write(0,*) ' DEBUG2: copy mesh done'
1550  !
1551  call map_imported_cols(hecmeshnew, imp_cols_index(hecmesh%n_neighbor_pe), &
1552  imp_cols_item, n_add_node, add_nodes, map, i0)
1553  if (debug >= 2) write(0,*) ' DEBUG2: map imported cols done'
1554  !
1555  call update_comm_table(hecmeshnew, n_add_node, add_nodes, i0)
1556  if (debug >= 2) write(0,*) ' DEBUG2: update comm_table done'
1557  !
1558  bt_int%nc = hecmeshnew%n_node
1559  !
1560  call copy_vals_to_bt_int(hecmesh%n_neighbor_pe, imp_rows_index, imp_cols_index, &
1561  imp_rows_item, map, ndof2, imp_vals_item, bt_int)
1562  if (debug >= 2) write(0,*) ' DEBUG2: copy vals to BT_int done'
1563  !
1564  deallocate(imp_rows_index)
1565  deallocate(imp_cols_index)
1566  deallocate(imp_rows_item)
1567  deallocate(imp_cols_item)
1568  deallocate(imp_vals_item)
1569  deallocate(map)
1570  !
1571  call sort_and_uniq_rows(bt_int)
1572  if (debug >= 2) write(0,*) ' DEBUG2: sort and uniq rows of BT_int done'
1573  end subroutine send_bt_ext_and_recv_bt_int
1574 
1575  subroutine convert_rowid_to_remote_localid(hecMESH, len, exp_rows_item)
1576  implicit none
1577  type (hecmwst_local_mesh), intent(in) :: hecmesh
1578  integer(kind=kint), intent(in) :: len
1579  integer(kind=kint), intent(out) :: exp_rows_item(:,:)
1580  integer(kind=kint) :: i
1581  do i = 1, len
1582  exp_rows_item(1,i) = hecmesh%node_ID(2 * exp_rows_item(1,i) - 1)
1583  enddo
1584  end subroutine convert_rowid_to_remote_localid
1585 
1586  subroutine send_recv_bt_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index)
1587  use m_hecmw_comm_f
1588  implicit none
1589  type (hecmwst_local_mesh), intent(in) :: hecmesh
1590  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1591  integer(kind=kint), allocatable, intent(out) :: imp_rows_index(:), imp_cols_index(:)
1592  integer(kind=kint) :: nnb, idom, irank, tag, recvbuf(2)
1593  integer(kind=kint), allocatable :: sendbuf(:,:)
1594  integer(kind=kint), allocatable :: requests(:)
1595  integer(kind=kint), allocatable :: statuses(:,:)
1596  nnb = hecmesh%n_neighbor_pe
1597  allocate(imp_rows_index(0:nnb))
1598  allocate(imp_cols_index(0:nnb))
1599  allocate(requests(nnb))
1600  allocate(statuses(hecmw_status_size, nnb))
1601  allocate(sendbuf(2,nnb))
1602  do idom = 1, nnb
1603  irank = hecmesh%neighbor_pe(idom)
1604  ! nr = exp_rows_per_rank(idom)
1605  sendbuf(1,idom) = bt_ext(idom)%nr
1606  sendbuf(2,idom) = bt_ext(idom)%nnz
1607  tag=2001
1608  call hecmw_isend_int(sendbuf(1,idom), 2, irank, tag, hecmesh%MPI_COMM, &
1609  requests(idom))
1610  enddo
1611  imp_rows_index(0) = 0
1612  imp_cols_index(0) = 0
1613  do idom = 1, nnb
1614  irank = hecmesh%neighbor_pe(idom)
1615  tag = 2001
1616  call hecmw_recv_int(recvbuf, 2, irank, tag, &
1617  hecmesh%MPI_COMM, statuses(:,1))
1618  imp_rows_index(idom) = imp_rows_index(idom-1) + recvbuf(1)
1619  imp_cols_index(idom) = imp_cols_index(idom-1) + recvbuf(2)
1620  enddo
1621  call hecmw_waitall(nnb, requests, statuses)
1622  deallocate(requests)
1623  deallocate(statuses)
1624  deallocate(sendbuf)
1625  end subroutine send_recv_bt_ext_nr_nnz
1626 
1627  subroutine send_recv_bt_ext_contents(hecMESH, BT_ext, &
1628  exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1629  imp_rows_index, imp_cols_index, &
1630  imp_rows_item, imp_cols_item, imp_vals_item)
1632  implicit none
1633  type (hecmwST_local_mesh), intent(in) :: hecMESH
1634  type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1635  integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1636  integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1637  integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
1638  integer(kind=kint), allocatable, intent(out) :: imp_rows_item(:,:), imp_cols_item(:,:)
1639  real(kind=kreal), allocatable, intent(out) :: imp_vals_item(:)
1640  integer(kind=kint) :: nnb, ndof2, n_send, idom, irank, tag, nr, nnz
1641  integer(kind=kint), allocatable :: requests(:)
1642  integer(kind=kint), allocatable :: statuses(:,:)
1643  nnb = hecmesh%n_neighbor_pe
1644  if (nnb < 1) return
1645  ndof2 = bt_ext(1)%ndof ** 2
1646  allocate(imp_rows_item(2,imp_rows_index(nnb)))
1647  allocate(imp_cols_item(cncol_item,imp_cols_index(nnb)))
1648  allocate(imp_vals_item(ndof2*imp_cols_index(nnb)))
1649  allocate(requests(3*nnb))
1650  allocate(statuses(hecmw_status_size, 3*nnb))
1651  n_send = 0
1652  do idom = 1, nnb
1653  irank = hecmesh%neighbor_pe(idom)
1654  if (bt_ext(idom)%nr > 0) then
1655  n_send = n_send + 1
1656  tag = 2002
1657  call hecmw_isend_int(exp_rows_item(1,exp_rows_index(idom-1)+1), &
1658  2*bt_ext(idom)%nr, irank, tag, hecmesh%MPI_COMM, &
1659  requests(n_send))
1660  n_send = n_send + 1
1661  tag = 2003
1662  call hecmw_isend_int(exp_cols_item(1,exp_cols_index(idom-1)+1), &
1663  cncol_item*bt_ext(idom)%nnz, irank, tag, hecmesh%MPI_COMM, &
1664  requests(n_send))
1665  n_send = n_send + 1
1666  tag = 2004
1667  call hecmw_isend_r(bt_ext(idom)%A, ndof2*bt_ext(idom)%nnz, irank, &
1668  tag, hecmesh%MPI_COMM, requests(n_send))
1669  endif
1670  enddo
1671  do idom = 1, nnb
1672  irank = hecmesh%neighbor_pe(idom)
1673  nr = imp_rows_index(idom) - imp_rows_index(idom-1)
1674  nnz = imp_cols_index(idom) - imp_cols_index(idom-1)
1675  if (nr > 0) then
1676  tag = 2002
1677  call hecmw_recv_int(imp_rows_item(1,imp_rows_index(idom-1)+1), &
1678  2*nr, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1679  tag = 2003
1680  call hecmw_recv_int(imp_cols_item(1,imp_cols_index(idom-1)+1), &
1681  cncol_item*nnz, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1682  tag = 2004
1683  call hecmw_recv_r(imp_vals_item(ndof2*imp_cols_index(idom-1)+1), &
1684  ndof2*nnz, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1685  endif
1686  enddo
1687  call hecmw_waitall(n_send, requests, statuses)
1688  deallocate(exp_rows_index)
1689  deallocate(exp_rows_item)
1690  deallocate(exp_cols_index)
1691  deallocate(exp_cols_item)
1692  end subroutine send_recv_bt_ext_contents
1693 
1694  subroutine allocate_bt_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
1695  implicit none
1696  type (hecmwST_local_mesh), intent(in) :: hecMESH
1697  integer(kind=kint), intent(in) :: ndof
1698  integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_rows_item(:,:)
1699  type (hecmwST_local_matrix), intent(out) :: BT_int
1700  integer(kind=kint), allocatable :: cnt(:)
1701  integer(kind=kint) :: idom, is, ie, i, irow, ncol, ndof2
1702  ndof2 = ndof*ndof
1703  bt_int%nr = hecmesh%nn_internal
1704  bt_int%nc = hecmesh%n_node
1705  bt_int%nnz = 0
1706  bt_int%ndof = ndof
1707  allocate(cnt(bt_int%nr))
1708  cnt(:) = 0
1709  do idom = 1, hecmesh%n_neighbor_pe
1710  is = imp_rows_index(idom-1)+1
1711  ie = imp_rows_index(idom)
1712  do i = is, ie
1713  irow = imp_rows_item(1,i)
1714  ncol = imp_rows_item(2,i)
1715  if (irow < 1 .or. bt_int%nr < irow) stop 'ERROR: allocate BT_int'
1716  cnt(irow) = cnt(irow) + ncol !!! might include duplicate cols
1717  enddo
1718  enddo
1719  !
1720  allocate(bt_int%index(0:bt_int%nr))
1721  call make_index(bt_int%nr, cnt, bt_int%index)
1722  !
1723  bt_int%nnz = bt_int%index(bt_int%nr)
1724  allocate(bt_int%item(bt_int%nnz))
1725  allocate(bt_int%A(bt_int%nnz * ndof2))
1726  bt_int%A(:) = 0.d0
1727  end subroutine allocate_bt_int
1728 
1729  subroutine copy_mesh(src, dst)
1730  implicit none
1731  type (hecmwST_local_mesh), intent(in) :: src
1732  type (hecmwST_local_mesh), intent(out) :: dst
1733  dst%zero = src%zero
1734  dst%MPI_COMM = src%MPI_COMM
1735  dst%PETOT = src%PETOT
1736  dst%PEsmpTOT = src%PEsmpTOT
1737  dst%my_rank = src%my_rank
1738  dst%n_subdomain = src%n_subdomain
1739  dst%n_node = src%n_node
1740  dst%nn_internal = src%nn_internal
1741  dst%n_dof = src%n_dof
1742  dst%n_neighbor_pe = src%n_neighbor_pe
1743  allocate(dst%neighbor_pe(dst%n_neighbor_pe))
1744  dst%neighbor_pe(:) = src%neighbor_pe(:)
1745  allocate(dst%import_index(0:dst%n_neighbor_pe))
1746  allocate(dst%export_index(0:dst%n_neighbor_pe))
1747  dst%import_index(:)= src%import_index(:)
1748  dst%export_index(:)= src%export_index(:)
1749  allocate(dst%import_item(dst%import_index(dst%n_neighbor_pe)))
1750  dst%import_item(:) = src%import_item(:)
1751  allocate(dst%export_item(dst%export_index(dst%n_neighbor_pe)))
1752  dst%export_item(:) = src%export_item(:)
1753  allocate(dst%node_ID(2*dst%n_node))
1754  dst%node_ID(1:2*dst%n_node) = src%node_ID(1:2*src%n_node)
1755  allocate(dst%global_node_ID(dst%n_node))
1756  dst%global_node_ID(1:dst%n_node) = src%global_node_ID(1:src%n_node)
1757  dst%mpc%n_mpc = 0
1758  dst%node => src%node
1759  end subroutine copy_mesh
1760 
1761  subroutine map_imported_cols(hecMESHnew, ncols, cols, n_add_node, add_nodes, map, i0)
1762  implicit none
1763  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1764  integer(kind=kint), intent(in) :: ncols
1765  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1766  integer(kind=kint), allocatable, intent(out) :: map(:)
1767  integer(kind=kint), intent(out) :: n_add_node
1768  integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1769  integer(kind=kint), intent(out) :: i0
1770  allocate(map(ncols))
1771  !
1772  call map_present_nodes(hecmeshnew, ncols, cols, map, n_add_node)
1773  !
1774  ! add nodes == unmapped nodes
1775  !
1776  call extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1777  !
1778  call append_nodes(hecmeshnew, n_add_node, add_nodes, i0)
1779  !
1780  call map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1781  end subroutine map_imported_cols
1782 
1783  subroutine map_present_nodes(hecMESH, ncols, cols, map, n_add_node)
1784  implicit none
1785  type (hecmwST_local_mesh), intent(in) :: hecMESH
1786  integer(kind=kint), intent(in) :: ncols
1787  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1788  integer(kind=kint), intent(out) :: map(ncols)
1789  integer(kind=kint), intent(out) :: n_add_node
1790  integer(kind=kint) :: i, j, lid, rank, llid, n_ext_node, idx
1791  integer(kind=kint), allocatable :: ext_node(:)
1792  type (hecmwST_pair_array) :: parray
1793  !
1794  call hecmw_pair_array_init(parray, hecmesh%n_node - hecmesh%nn_internal)
1795  do i = hecmesh%nn_internal + 1, hecmesh%n_node
1796  call hecmw_pair_array_append(parray, i, hecmesh%node_ID(2*i-1), hecmesh%node_ID(2*i))
1797  enddo
1798  call hecmw_pair_array_sort(parray)
1799  !
1800  n_add_node = 0
1801  n_ext_node = 0
1802  allocate(ext_node(ncols))
1803  !$omp parallel default(none), &
1804  !$omp& private(i,lid,rank,llid,idx,j), &
1805  !$omp& shared(ncols,hecMESH,cols,map,n_ext_node,ext_node,parray), &
1806  !$omp& reduction(+:n_add_node)
1807  !$omp do
1808  do i = 1, ncols
1809  lid = cols(clid,i)
1810  rank = cols(crank,i)
1811  ! check rank
1812  if (rank == hecmesh%my_rank) then ! internal: set mapping
1813  map(i) = lid
1814  else ! external
1815  !$omp atomic capture
1816  n_ext_node = n_ext_node + 1
1817  idx = n_ext_node
1818  !$omp end atomic
1819  ext_node(idx) = i
1820  endif
1821  enddo
1822  !$omp end do
1823  !$omp do
1824  do j = 1, n_ext_node
1825  i = ext_node(j)
1826  lid = cols(clid,i)
1827  rank = cols(crank,i)
1828  ! search node_ID in external nodes
1829  llid = hecmw_pair_array_find_id(parray, lid, rank)
1830  if (llid > 0) then ! found: set mapping
1831  map(i) = llid
1832  else ! not found
1833  map(i) = -1
1834  n_add_node = n_add_node + 1
1835  endif
1836  enddo
1837  !$omp end do
1838  !$omp end parallel
1839  deallocate(ext_node)
1840  !
1841  call hecmw_pair_array_finalize(parray)
1842  end subroutine map_present_nodes
1843 
1844  subroutine extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1845  implicit none
1846  integer(kind=kint), intent(in) :: ncols
1847  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols), map(ncols)
1848  integer(kind=kint), intent(inout) :: n_add_node
1849  integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1850  integer(kind=kint) :: cnt, i
1851  allocate(add_nodes(cncol_item,n_add_node))
1852  cnt = 0
1853  do i = 1, ncols
1854  if (map(i) == -1) then
1855  cnt = cnt + 1
1856  add_nodes(1:cncol_item,cnt) = cols(1:cncol_item,i)
1857  endif
1858  enddo
1859  if (cnt /= n_add_node) stop 'ERROR: extract add_nodes'
1860  call sort_and_uniq_add_nodes(n_add_node, add_nodes)
1861  end subroutine extract_add_nodes
1862 
1863  subroutine sort_and_uniq_add_nodes(n_add_node, add_nodes)
1864  implicit none
1865  integer(kind=kint), intent(inout) :: n_add_node
1866  integer(kind=kint), intent(inout) :: add_nodes(cNCOL_ITEM,n_add_node)
1867  integer(kind=kint) :: ndup
1868  call sort_add_nodes(add_nodes, 1, n_add_node)
1869  call uniq_add_nodes(add_nodes, n_add_node, ndup)
1870  n_add_node = n_add_node - ndup
1871  end subroutine sort_and_uniq_add_nodes
1872 
1873  recursive subroutine sort_add_nodes(add_nodes, id1, id2)
1874  implicit none
1875  integer(kind=kint), intent(inout) :: add_nodes(:,:)
1876  integer(kind=kint), intent(in) :: id1, id2
1877  integer(kind=kint) :: center, left, right
1878  integer(kind=kint) :: pivot(cNCOL_ITEM), tmp(cNCOL_ITEM)
1879  if (id1 >= id2) return
1880  center = (id1 + id2) / 2
1881  pivot(1:cncol_item) = add_nodes(1:cncol_item,center)
1882  left = id1
1883  right = id2
1884  do
1885  do while ((add_nodes(crank,left) < pivot(crank)) .or. &
1886  (add_nodes(crank,left) == pivot(crank) .and. add_nodes(clid,left) < pivot(clid)))
1887  left = left + 1
1888  enddo
1889  do while ((pivot(crank) < add_nodes(crank,right)) .or. &
1890  (pivot(crank) == add_nodes(crank,right) .and. pivot(clid) < add_nodes(clid,right)))
1891  right = right - 1
1892  enddo
1893  if (left >= right) exit
1894  tmp(1:cncol_item) = add_nodes(1:cncol_item,left)
1895  add_nodes(1:cncol_item,left) = add_nodes(1:cncol_item,right)
1896  add_nodes(1:cncol_item,right) = tmp(1:cncol_item)
1897  left = left + 1
1898  right = right - 1
1899  enddo
1900  if (id1 < left-1) call sort_add_nodes(add_nodes, id1, left-1)
1901  if (right+1 < id2) call sort_add_nodes(add_nodes, right+1, id2)
1902  return
1903  end subroutine sort_add_nodes
1904 
1905  subroutine uniq_add_nodes(add_nodes, len, ndup)
1906  implicit none
1907  integer(kind=kint), intent(inout) :: add_nodes(:,:)
1908  integer(kind=kint), intent(in) :: len
1909  integer(kind=kint), intent(out) :: ndup
1910  integer(kind=kint) :: i
1911  ndup = 0
1912  do i = 2,len
1913  if (add_nodes(clid,i) == add_nodes(clid,i-1-ndup) .and. &
1914  add_nodes(crank,i) == add_nodes(crank,i-1-ndup)) then
1915  ndup = ndup + 1
1916  else if (ndup > 0) then
1917  add_nodes(1:cncol_item,i-ndup) = add_nodes(1:cncol_item,i)
1918  endif
1919  enddo
1920  end subroutine uniq_add_nodes
1921 
1922  subroutine search_add_nodes(n_add_node, add_nodes, rank, lid, idx)
1923  implicit none
1924  integer(kind=kint), intent(in) :: n_add_node
1925  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
1926  integer(kind=kint), intent(in) :: rank
1927  integer(kind=kint), intent(in) :: lid
1928  integer(kind=kint), intent(out) :: idx
1929  integer(kind=kint) :: left, right, center
1930  left = 1
1931  right = n_add_node
1932  do while (left <= right)
1933  center = (left + right) / 2
1934  if ((rank == add_nodes(crank,center)) .and. (lid == add_nodes(clid,center))) then
1935  idx = center
1936  return
1937  else if ((rank < add_nodes(crank,center)) .or. &
1938  (rank == add_nodes(crank,center) .and. lid < add_nodes(clid,center))) then
1939  right = center - 1
1940  else if ((add_nodes(crank,center) < rank) .or. &
1941  (add_nodes(crank,center) == rank .and. add_nodes(clid,center) < lid)) then
1942  left = center + 1
1943  endif
1944  end do
1945  idx = -1
1946  end subroutine search_add_nodes
1947 
1948  subroutine append_nodes(hecMESHnew, n_add_node, add_nodes, i0)
1949  implicit none
1950  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1951  integer(kind=kint), intent(in) :: n_add_node
1952  integer(kind=kint), intent(in) :: add_nodes(:,:)
1953  integer(kind=kint), intent(out) :: i0
1954  integer(kind=kint) :: n_node, i, ii
1955  integer(kind=kint), pointer :: node_ID(:), global_node_ID(:)
1956  i0 = hecmeshnew%n_node
1957  n_node = hecmeshnew%n_node + n_add_node
1958  allocate(node_id(2*n_node))
1959  allocate(global_node_id(n_node))
1960  do i = 1, hecmeshnew%n_node
1961  node_id(2*i-1) = hecmeshnew%node_ID(2*i-1)
1962  node_id(2*i ) = hecmeshnew%node_ID(2*i )
1963  global_node_id(i) = hecmeshnew%global_node_ID(i)
1964  enddo
1965  do i = 1, n_add_node
1966  ii = hecmeshnew%n_node + i
1967  node_id(2*ii-1) = add_nodes(clid,i)
1968  node_id(2*ii ) = add_nodes(crank,i)
1969  if (cncol_item >= 3) then
1970  global_node_id(ii) = add_nodes(cgid,i)
1971  else
1972  global_node_id(ii) = -1
1973  endif
1974  enddo
1975  deallocate(hecmeshnew%node_ID)
1976  deallocate(hecmeshnew%global_node_ID)
1977  hecmeshnew%n_node = n_node
1978  hecmeshnew%node_ID => node_id
1979  hecmeshnew%global_node_ID => global_node_id
1980  end subroutine append_nodes
1981 
1982  subroutine map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1983  implicit none
1984  integer(kind=kint), intent(in) :: ncols
1985  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1986  integer(kind=kint), intent(in) :: n_add_node
1987  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
1988  integer(kind=kint), intent(in) :: i0
1989  integer(kind=kint), intent(inout) :: map(ncols)
1990  integer(kind=kint) :: i, j
1991  do i = 1, ncols
1992  if (map(i) > 0) cycle
1993  call search_add_nodes(n_add_node, add_nodes, cols(crank,i), cols(clid,i), j)
1994  if (j == -1) stop 'ERROR: map_additional_nodes'
1995  map(i) = i0 + j
1996  enddo
1997  end subroutine map_additional_nodes
1998 
1999  subroutine update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
2000  use m_hecmw_comm_f
2001  implicit none
2002  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
2003  integer(kind=kint), intent(in) :: n_add_node
2004  integer(kind=kint), allocatable, intent(inout) :: add_nodes(:,:)
2005  integer(kind=kint), intent(in) :: i0
2006  integer(kind=kint), allocatable :: n_add_imp(:), add_imp_index(:)
2007  integer(kind=kint), allocatable :: add_imp_item_remote(:), add_imp_item_local(:)
2008  integer(kind=kint), allocatable :: n_add_exp(:), add_exp_index(:), add_exp_item(:)
2009  integer(kind=kint), allocatable :: n_new_imp(:), n_new_exp(:)
2010  integer(kind=kint) :: npe, nnb, comm, new_nnb
2011  integer(kind=kint), pointer :: nbpe(:), new_nbpe(:)
2012  integer(kind=kint), pointer :: import_index(:), export_index(:), import_item(:), export_item(:)
2013  integer(kind=kint), pointer :: new_import_index(:), new_export_index(:)
2014  integer(kind=kint), pointer :: new_import_item(:), new_export_item(:)
2015  npe = hecmeshnew%PETOT
2016  nnb = hecmeshnew%n_neighbor_pe
2017  comm = hecmeshnew%MPI_COMM
2018  nbpe => hecmeshnew%neighbor_pe
2019  import_index => hecmeshnew%import_index
2020  export_index => hecmeshnew%export_index
2021  import_item => hecmeshnew%import_item
2022  export_item => hecmeshnew%export_item
2023  !
2024  call count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2025  if (debug >= 3) write(0,*) ' DEBUG3: count add_imp per rank done'
2026  !
2027  allocate(add_imp_index(0:npe))
2028  call make_index(npe, n_add_imp, add_imp_index)
2029  if (debug >= 3) write(0,*) ' DEBUG3: make add_imp_index done'
2030  !
2031  call make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2032  add_imp_item_remote, add_imp_item_local)
2033  if (debug >= 3) write(0,*) ' DEBUG3: make add_imp_item done'
2034  !
2035  deallocate(add_nodes)
2036  !
2037  ! all_to_all n_add_imp -> n_add_exp
2038  !
2039  allocate(n_add_exp(npe))
2040  call hecmw_alltoall_int(n_add_imp, 1, n_add_exp, 1, comm)
2041  if (debug >= 3) write(0,*) ' DEBUG3: alltoall n_add_imp to n_add_exp done'
2042  !
2043  allocate(add_exp_index(0:npe))
2044  call make_index(npe, n_add_exp, add_exp_index)
2045  if (debug >= 3) write(0,*) ' DEBUG3: make add_exp_index done'
2046  !
2047  call send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2048  add_exp_index, add_exp_item, comm)
2049  if (debug >= 3) write(0,*) ' DEBUG3: send recv add_imp/exp_item done'
2050  !
2051  ! count new import
2052  !
2053  call count_new_comm_nodes(npe, nnb, nbpe, import_index, n_add_imp, n_new_imp)
2054  if (debug >= 3) write(0,*) ' DEBUG3: count new comm_nodes (import) done'
2055  !
2056  ! count new export
2057  !
2058  call count_new_comm_nodes(npe, nnb, nbpe, export_index, n_add_exp, n_new_exp)
2059  if (debug >= 3) write(0,*) ' DEBUG3: count new comm_nodes (export) done'
2060  !
2061  call update_neighbor_pe(npe, n_new_imp, n_new_exp, new_nnb, new_nbpe)
2062  if (debug >= 3) write(0,*) ' DEBUG3: update neighbor_pe done'
2063  !
2064  ! merge import table: import
2065  !
2066  call merge_comm_table(npe, nnb, nbpe, import_index, import_item, &
2067  new_nnb, new_nbpe, add_imp_index, add_imp_item_local, n_add_imp, n_new_imp, &
2068  new_import_index, new_import_item)
2069  if (debug >= 3) write(0,*) ' DEBUG3: merge comm_table (import) done'
2070  !
2071  deallocate(n_add_imp)
2072  deallocate(add_imp_index)
2073  deallocate(add_imp_item_remote, add_imp_item_local)
2074  deallocate(n_new_imp)
2075  !
2076  ! merge export table: export
2077  !
2078  call merge_comm_table(npe, nnb, nbpe, export_index, export_item, &
2079  new_nnb, new_nbpe, add_exp_index, add_exp_item, n_add_exp, n_new_exp, &
2080  new_export_index, new_export_item)
2081  if (debug >= 3) write(0,*) ' DEBUG3: merge comm_table (export) done'
2082  !
2083  deallocate(n_add_exp)
2084  deallocate(add_exp_index)
2085  deallocate(add_exp_item)
2086  deallocate(n_new_exp)
2087  !
2088  deallocate(nbpe)
2089  deallocate(import_index,import_item)
2090  deallocate(export_index,export_item)
2091  hecmeshnew%n_neighbor_pe = new_nnb
2092  hecmeshnew%neighbor_pe => new_nbpe
2093  hecmeshnew%import_index => new_import_index
2094  hecmeshnew%export_index => new_export_index
2095  hecmeshnew%import_item => new_import_item
2096  hecmeshnew%export_item => new_export_item
2097  end subroutine update_comm_table
2098 
2099  subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2100  implicit none
2101  integer(kind=kint), intent(in) :: n_add_node
2102  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
2103  integer(kind=kint), intent(in) :: npe
2104  integer(kind=kint), allocatable, intent(out) :: n_add_imp(:)
2105  integer(kind=kint) :: i, rank
2106  allocate(n_add_imp(npe))
2107  n_add_imp(:) = 0
2108  do i = 1, n_add_node
2109  rank = add_nodes(crank,i)
2110  n_add_imp(rank+1) = n_add_imp(rank+1) + 1
2111  enddo
2112  end subroutine count_add_imp_per_rank
2113 
2114  subroutine make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2115  add_imp_item_remote, add_imp_item_local)
2116  implicit none
2117  integer(kind=kint), intent(in) :: n_add_node
2118  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
2119  integer(kind=kint), intent(in) :: npe, i0
2120  integer(kind=kint), allocatable, intent(in) :: add_imp_index(:)
2121  integer(kind=kint), allocatable, intent(out) :: add_imp_item_remote(:), add_imp_item_local(:)
2122  integer(kind=kint), allocatable :: cnt(:)
2123  integer(kind=kint) :: i, lid, rank, ipe
2124  allocate(add_imp_item_remote(add_imp_index(npe)))
2125  allocate(add_imp_item_local(add_imp_index(npe)))
2126  allocate(cnt(npe))
2127  cnt(:) = 0
2128  do i = 1, n_add_node
2129  lid = add_nodes(clid,i)
2130  rank = add_nodes(crank,i)
2131  ipe = rank + 1
2132  cnt(ipe) = cnt(ipe) + 1
2133  add_imp_item_remote(add_imp_index(ipe-1) + cnt(ipe)) = lid
2134  add_imp_item_local(add_imp_index(ipe-1) + cnt(ipe)) = i0 + i
2135  enddo
2136  deallocate(cnt)
2137  end subroutine make_add_imp_item
2138 
2139  subroutine send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2140  add_exp_index, add_exp_item, mpi_comm)
2141  use m_hecmw_comm_f
2142  implicit none
2143  integer(kind=kint), intent(in) :: npe
2144  integer(kind=kint), allocatable, intent(in) :: add_imp_index(:), add_imp_item_remote(:)
2145  integer(kind=kint), allocatable, intent(in) :: add_exp_index(:)
2146  integer(kind=kint), allocatable, intent(out) :: add_exp_item(:)
2147  integer(kind=kint), intent(in) :: mpi_comm
2148  integer(kind=kint) :: n_send, i, irank, is, ie, len, tag
2149  integer(kind=kint), allocatable :: requests(:)
2150  integer(kind=kint), allocatable :: statuses(:,:)
2151  allocate(add_exp_item(add_exp_index(npe)))
2152  allocate(requests(npe))
2153  allocate(statuses(hecmw_status_size, npe))
2154  n_send = 0
2155  do i = 1, npe
2156  irank = i-1
2157  is = add_imp_index(i-1)+1
2158  ie = add_imp_index(i)
2159  len = ie - is + 1
2160  if (len == 0) cycle
2161  tag = 4001
2162  n_send = n_send + 1
2163  call hecmw_isend_int(add_imp_item_remote(is:ie), len, irank, tag, &
2164  mpi_comm, requests(n_send))
2165  enddo
2166  !
2167  do i = 1, npe
2168  irank = i-1
2169  is = add_exp_index(i-1)+1
2170  ie = add_exp_index(i)
2171  len = ie - is + 1
2172  if (len == 0) cycle
2173  tag = 4001
2174  call hecmw_recv_int(add_exp_item(is:ie), len, irank, tag, &
2175  mpi_comm, statuses(:,1))
2176  enddo
2177  call hecmw_waitall(n_send, requests, statuses)
2178  end subroutine send_recv_add_imp_exp_item
2179 
2180  subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new)
2181  implicit none
2182  integer(kind=kint), intent(in) :: npe, org_nnb
2183  !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), n_add(npe)
2184  integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:)
2185  integer(kind=kint), intent(in) :: n_add(:)
2186  integer(kind=kint), allocatable, intent(out) :: n_new(:)
2187  integer(kind=kint) :: i, irank, n_org
2188  allocate(n_new(npe))
2189  n_new(:) = n_add(:)
2190  do i = 1, org_nnb
2191  irank = org_nbpe(i)
2192  n_org = org_index(i) - org_index(i-1)
2193  n_new(irank+1) = n_new(irank+1) + n_org
2194  enddo
2195  end subroutine count_new_comm_nodes
2196 
2197  subroutine update_neighbor_pe(npe, n_new_imp, n_new_exp, &
2198  new_nnb, new_nbpe)
2199  implicit none
2200  integer(kind=kint), intent(in) :: npe
2201  integer(kind=kint), intent(in) :: n_new_imp(npe), n_new_exp(npe)
2202  integer(kind=kint), intent(out) :: new_nnb
2203  integer(kind=kint), pointer, intent(out) :: new_nbpe(:)
2204  integer(kind=kint) :: i
2205  new_nnb = 0
2206  do i = 1, npe
2207  if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) new_nnb = new_nnb+1
2208  enddo
2209  allocate(new_nbpe(new_nnb))
2210  new_nnb = 0
2211  do i = 1, npe
2212  if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) then
2213  new_nnb = new_nnb+1
2214  new_nbpe(new_nnb) = i-1
2215  endif
2216  enddo
2217  end subroutine update_neighbor_pe
2218 
2219  subroutine merge_comm_table(npe, org_nnb, org_nbpe, org_index, org_item, &
2220  new_nnb, new_nbpe, add_index, add_item, n_add, n_new, new_index, new_item)
2221  implicit none
2222  integer(kind=kint), intent(in) :: npe, org_nnb
2223  !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), org_item(:)
2224  integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:), org_item(:)
2225  integer(kind=kint), intent(in) :: new_nnb
2226  !integer(kind=kint), intent(in) :: new_nbpe(new_nnb), add_index(0:npe), add_item(:)
2227  integer(kind=kint), pointer, intent(in) :: new_nbpe(:)
2228  integer(kind=kint), allocatable, intent(in) :: add_index(:), add_item(:)
2229  integer(kind=kint), intent(in) :: n_add(npe), n_new(npe)
2230  integer(kind=kint), pointer, intent(out) :: new_index(:), new_item(:)
2231  integer(kind=kint), allocatable :: cnt(:)
2232  integer(kind=kint) :: i, irank, j, jrank, i0, j0, len
2233  ! if (associated(new_index)) deallocate(new_index)
2234  ! if (associated(new_item)) deallocate(new_item)
2235  allocate(new_index(0:new_nnb))
2236  new_index(0) = 0
2237  do i = 1, new_nnb
2238  irank = new_nbpe(i)
2239  new_index(i) = new_index(i-1) + n_new(irank+1)
2240  enddo
2241  allocate(new_item(new_index(new_nnb)))
2242  allocate(cnt(npe))
2243  cnt(:) = 0
2244  j = 1
2245  jrank = new_nbpe(j)
2246  do i = 1, org_nnb
2247  if (org_index(i) - org_index(i-1) == 0) cycle
2248  irank = org_nbpe(i)
2249  do while (jrank < irank)
2250  j = j + 1
2251  if (j > new_nnb) exit
2252  jrank = new_nbpe(j)
2253  enddo
2254  if (jrank /= irank) stop 'ERROR: merging comm table: org into new'
2255  i0 = org_index(i-1)
2256  len = org_index(i) - i0
2257  j0 = new_index(j-1)
2258  new_item(j0+1:j0+len) = org_item(i0+1:i0+len)
2259  cnt(jrank+1) = len
2260  enddo
2261  j = 1
2262  jrank = new_nbpe(j)
2263  do i = 1, npe
2264  if (n_add(i) == 0) cycle
2265  irank = i-1
2266  do while (jrank < irank)
2267  j = j + 1
2268  jrank = new_nbpe(j)
2269  enddo
2270  if (jrank /= irank) stop 'ERROR: merging comm table: add into new'
2271  i0 = add_index(i-1)
2272  len = add_index(i) - i0
2273  j0 = new_index(j-1) + cnt(jrank+1)
2274  new_item(j0+1:j0+len) = add_item(i0+1:i0+len)
2275  cnt(jrank+1) = cnt(jrank+1) + len
2276  if (cnt(jrank+1) /= new_index(j)-new_index(j-1)) stop 'ERROR: merging comm table'
2277  enddo
2278  deallocate(cnt)
2279  end subroutine merge_comm_table
2280 
2281  subroutine copy_vals_to_bt_int(nnb, imp_rows_index, imp_cols_index, &
2282  imp_rows_item, map, ndof2, imp_vals_item, BT_int)
2283  implicit none
2284  integer(kind=kint), intent(in) :: nnb
2285  integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
2286  integer(kind=kint), intent(in) :: imp_rows_item(:,:), map(:)
2287  integer(kind=kint), intent(in) :: ndof2
2288  real(kind=kreal), intent(in) :: imp_vals_item(:)
2289  type (hecmwST_local_matrix), intent(inout) :: BT_int
2290  integer(kind=kint), allocatable :: cnt(:)
2291  integer(kind=kint) :: idom, is, ie, ic0, i, irow, ncol, j0, j
2292  allocate(cnt(bt_int%nr))
2293  cnt(:) = 0
2294  do idom = 1, nnb
2295  is = imp_rows_index(idom-1)+1
2296  ie = imp_rows_index(idom)
2297  ic0 = imp_cols_index(idom-1)
2298  do i = is, ie
2299  irow = imp_rows_item(1,i)
2300  ncol = imp_rows_item(2,i)
2301  if (irow < 1 .or. bt_int%nr < irow) stop 'ERROR: copy vals to BT_int: irow'
2302  j0 = bt_int%index(irow-1) + cnt(irow)
2303  do j = 1, ncol
2304  bt_int%item(j0+j) = map(ic0+j)
2305  bt_int%A(ndof2*(j0+j-1)+1:ndof2*(j0+j)) = imp_vals_item(ndof2*(ic0+j-1)+1:ndof2*(ic0+j))
2306  enddo
2307  cnt(irow) = cnt(irow) + ncol
2308  ic0 = ic0 + ncol
2309  enddo
2310  if (ic0 /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_int: ic0'
2311  enddo
2312  deallocate(cnt)
2313  end subroutine copy_vals_to_bt_int
2314 
2315  subroutine sort_and_uniq_rows(BTmat)
2316  use hecmw_array_util
2317  implicit none
2318  type (hecmwST_local_matrix), intent(inout) :: BTmat
2319  integer(kind=kint) :: nr, ndof, ndof2
2320  integer(kind=kint) :: irow, is, ie, is_new, ie_new, i, i_new
2321  integer(kind=kint) :: ndup, ndup_tot
2322  integer(kind=kint) :: js, je, js_new, je_new
2323  integer(kind=kint) :: new_nnz
2324  integer(kind=kint), allocatable :: cnt(:)
2325  integer(kind=kint), pointer :: sort_item(:), new_index(:), new_item(:)
2326  real(kind=kreal), pointer :: new_a(:)
2327  logical :: sorted
2328  real(kind=kreal) :: t0, t1
2329  t0 = hecmw_wtime()
2330  nr = btmat%nr
2331  ! check if already sorted
2332  sorted = .true.
2333  outer: do irow = 1, nr
2334  is = btmat%index(irow-1)+1
2335  ie = btmat%index(irow)
2336  do i = is, ie-1
2337  if (btmat%item(i) >= btmat%item(i+1)) then
2338  sorted = .false.
2339  exit outer
2340  endif
2341  enddo
2342  end do outer
2343  t1 = hecmw_wtime()
2344  if (timer >= 4) write(0, '(A,f10.4,L2)') "####### sort_and_uniq_rows (1) : ",t1-t0,sorted
2345  t0 = hecmw_wtime()
2346  if (sorted) return
2347  ! perform sort
2348  ndof = btmat%ndof
2349  ndof2 = ndof*ndof
2350  ! duplicate item array (sort_item)
2351  allocate(sort_item(btmat%nnz))
2352  do i = 1, btmat%nnz
2353  sort_item(i) = btmat%item(i)
2354  enddo
2355  ! sort and uniq item for each row
2356  allocate(cnt(nr))
2357  ndup_tot = 0
2358  !$omp parallel do default(none), &
2359  !$omp& schedule(dynamic,1), &
2360  !$omp& private(irow,is,ie,ndup), &
2361  !$omp& shared(nr,BTmat,sort_item,cnt), &
2362  !$omp& reduction(+:ndup_tot)
2363  do irow = 1, nr
2364  is = btmat%index(irow-1)+1
2365  ie = btmat%index(irow)
2366  call hecmw_qsort_int_array(sort_item, is, ie)
2367  call hecmw_uniq_int_array(sort_item, is, ie, ndup)
2368  cnt(irow) = (ie-is+1) - ndup
2369  ndup_tot = ndup_tot + ndup
2370  enddo
2371  !$omp end parallel do
2372  t1 = hecmw_wtime()
2373  if (timer >= 4) write(0, '(A,f10.4,I5)') "####### sort_and_uniq_rows (2) : ",t1-t0,ndup_tot
2374  t0 = hecmw_wtime()
2375  ! make new index and item array (new_index, new_item)
2376  if (ndup_tot == 0) then
2377  new_index => btmat%index
2378  new_nnz = btmat%nnz
2379  new_item => sort_item
2380  else
2381  allocate(new_index(0:nr))
2382  call make_index(nr, cnt, new_index)
2383  new_nnz = new_index(nr)
2384  allocate(new_item(new_nnz))
2385  do irow = 1, nr
2386  is = btmat%index(irow-1)+1
2387  ie = is+cnt(irow)-1
2388  is_new = new_index(irow-1)+1
2389  ie_new = is_new+cnt(irow)-1
2390  new_item(is_new:ie_new) = sort_item(is:ie)
2391  enddo
2392  deallocate(sort_item)
2393  endif
2394  deallocate(cnt)
2395  t1 = hecmw_wtime()
2396  if (timer >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (3) : ",t1-t0
2397  t0 = hecmw_wtime()
2398  ! allocate and clear value array (new_A)
2399  allocate(new_a(ndof2*new_nnz))
2400  new_a(:) = 0.d0
2401  ! copy/add value from old A to new A
2402  !$omp parallel do default(none), &
2403  !$omp& schedule(dynamic,1), &
2404  !$omp& private(irow,is,ie,is_new,ie_new,i,i_new,js,je,js_new,je_new), &
2405  !$omp& shared(nr,BTmat,new_index,new_item,ndof2,new_A)
2406  do irow = 1, nr
2407  is = btmat%index(irow-1)+1
2408  ie = btmat%index(irow)
2409  is_new = new_index(irow-1)+1
2410  ie_new = new_index(irow)
2411  ! for each item in row
2412  do i = is, ie
2413  ! find place in new item
2414  call hecmw_bsearch_int_array(new_item, is_new, ie_new, btmat%item(i), i_new)
2415  if (i_new == -1) stop 'ERROR: sort_and_uniq_rows'
2416  js = ndof2*(i-1)+1
2417  je = ndof2*i
2418  js_new = ndof2*(i_new-1)+1
2419  je_new = ndof2*i_new
2420  new_a(js_new:je_new) = new_a(js_new:je_new) + btmat%A(js:je)
2421  enddo
2422  enddo
2423  !$omp end parallel do
2424  t1 = hecmw_wtime()
2425  if (timer >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (4) : ",t1-t0
2426  t0 = hecmw_wtime()
2427  ! deallocate/update nnz, index, item, A
2428  if (ndup_tot == 0) then
2429  deallocate(btmat%item)
2430  btmat%item => new_item
2431  deallocate(btmat%A)
2432  btmat%A => new_a
2433  else
2434  btmat%nnz = new_nnz
2435  deallocate(btmat%index)
2436  btmat%index => new_index
2437  deallocate(btmat%item)
2438  btmat%item => new_item
2439  deallocate(btmat%A)
2440  btmat%A => new_a
2441  endif
2442  end subroutine sort_and_uniq_rows
2443 
2444  subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2445  implicit none
2446  type (hecmwst_local_matrix), intent(in) :: amat
2447  type (hecmwst_local_matrix), intent(in) :: bmat
2448  type (hecmwst_local_matrix), intent(out) :: cmat
2449  integer(kind=kint) :: ndof, ndof2, nr, nc, i, icnt, js, je, j, jcol, idx, i0, k
2450  integer(kind=kint), allocatable :: iw(:)
2451  if (amat%ndof /= bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2452  ndof = amat%ndof
2453  ndof2 = ndof*ndof
2454  nr = min(amat%nr, bmat%nr)
2455  nc = max(amat%nc, bmat%nc)
2456  cmat%ndof = ndof
2457  cmat%nr = nr
2458  cmat%nc = nc
2459  cmat%nnz = 0
2460  allocate(cmat%index(0:nr))
2461  cmat%index(0) = 0
2462  allocate(iw(nc))
2463  do i = 1, nr
2464  icnt = 0
2465  ! Amat
2466  js = amat%index(i-1)+1
2467  je = amat%index(i)
2468  do j = js, je
2469  jcol = amat%item(j)
2470  icnt = icnt + 1
2471  iw(icnt) = jcol
2472  enddo
2473  ! Bmat
2474  js = bmat%index(i-1)+1
2475  je = bmat%index(i)
2476  lj1: do j = js, je
2477  jcol = bmat%item(j)
2478  do k = 1, icnt
2479  if (iw(k) == jcol) cycle lj1
2480  enddo
2481  icnt = icnt + 1
2482  iw(icnt) = jcol
2483  enddo lj1
2484  cmat%index(i) = cmat%index(i-1) + icnt
2485  enddo
2486  cmat%nnz = cmat%index(nr)
2487  allocate(cmat%item(cmat%nnz))
2488  allocate(cmat%A(ndof2*cmat%nnz))
2489  do i = 1, nr
2490  i0 = cmat%index(i-1)
2491  icnt = 0
2492  ! Amat
2493  js = amat%index(i-1)+1
2494  je = amat%index(i)
2495  do j = js, je
2496  jcol = amat%item(j)
2497  icnt = icnt + 1
2498  idx = i0 + icnt
2499  cmat%item(idx) = jcol
2500  cmat%A(ndof2*(idx-1)+1:ndof2*idx) = amat%A(ndof2*(j-1)+1:ndof2*j)
2501  enddo
2502  ! Bmat
2503  js = bmat%index(i-1)+1
2504  je = bmat%index(i)
2505  lj2: do j = js, je
2506  jcol = bmat%item(j)
2507  do k = 1, icnt
2508  idx = i0 + k
2509  if (cmat%item(idx) == jcol) then
2510  cmat%A(ndof2*(idx-1)+1:ndof2*idx) = &
2511  cmat%A(ndof2*(idx-1)+1:ndof2*idx) + bmat%A(ndof2*(j-1)+1:ndof2*j)
2512  cycle lj2
2513  endif
2514  enddo
2515  icnt = icnt + 1
2516  idx = i0 + icnt
2517  cmat%item(idx) = jcol
2518  cmat%A(ndof2*(idx-1)+1:ndof2*idx) = bmat%A(ndof2*(j-1)+1:ndof2*j)
2519  enddo lj2
2520  if (i0 + icnt /= cmat%index(i)) stop 'ERROR: merge localmat'
2521  enddo
2522  call sort_and_uniq_rows(cmat)
2523  end subroutine hecmw_localmat_add
2524 
2525  ! subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2526  ! implicit none
2527  ! type (hecmwST_local_matrix), intent(in) :: Amat
2528  ! type (hecmwST_local_matrix), intent(in) :: Bmat
2529  ! type (hecmwST_local_matrix), intent(out) :: Cmat
2530  ! integer(kind=kint) :: ndof, ndof2, nr, nc, i, js, je, j, jcol, nnz_row, idx, ks, ke, k, kcol
2531  ! if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2532  ! ndof = Amat%ndof
2533  ! ndof2 = ndof*ndof
2534  ! nr = min(Amat%nr, Bmat%nr)
2535  ! nc = max(Amat%nc, Bmat%nc)
2536  ! Cmat%ndof = ndof
2537  ! Cmat%nr = nr
2538  ! Cmat%nc = nc
2539  ! Cmat%nnz = Amat%index(nr) + Bmat%index(nr)
2540  ! allocate(Cmat%index(0:nr))
2541  ! allocate(Cmat%item(Cmat%nnz))
2542  ! allocate(Cmat%A(ndof2 * Cmat%nnz))
2543  ! Cmat%index(0) = 0
2544  ! idx = 0
2545  ! do i = 1, nr
2546  ! ! Amat
2547  ! js = Amat%index(i-1)+1
2548  ! je = Amat%index(i)
2549  ! do j = js, je
2550  ! idx = idx + 1
2551  ! Cmat%item(idx) = Amat%item(j)
2552  ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j)
2553  ! enddo
2554  ! ! Bmat
2555  ! js = Bmat%index(i-1)+1
2556  ! je = Bmat%index(i)
2557  ! do j = js, je
2558  ! idx = idx + 1
2559  ! Cmat%item(idx) = Bmat%item(j)
2560  ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j)
2561  ! enddo
2562  ! Cmat%index(i) = idx
2563  ! enddo
2564  ! if (Cmat%index(nr) /= Cmat%nnz) stop 'ERROR: merge localmat'
2565  ! call sort_and_uniq_rows(Cmat)
2566  ! end subroutine hecmw_localmat_add
2567 
2568  subroutine hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange)
2569  implicit none
2570  type (hecmwst_local_matrix), intent(inout) :: bkmat
2571  type (hecmwst_matrix), intent(in) :: hecmat
2572  integer(kind=kint), optional, intent(in) :: num_lagrange
2573  integer(kind=kint) :: ndof, ndof2, i, idx, idx2, js, je, j, k
2574  integer(kind=kint), allocatable :: incl_nz(:), cnt(:)
2575  logical :: check_nonzero
2576  check_nonzero = .false.
2577  !check_nonzero = .true. !!! always checking nonzero seems to be faster
2578  !
2579  ndof = hecmat%NDOF
2580  ndof2 = ndof*ndof
2581  ! nr, nc, nnz
2582  bkmat%nr = hecmat%NP
2583  bkmat%nc = hecmat%NP
2584  bkmat%ndof = ndof
2585  !
2586  if (present(num_lagrange)) then !!! TEMPORARY (DUE TO WRONG conMAT WHEN num_lagrange==0) !!!
2587  check_nonzero = .true.
2588  endif
2589  !
2590  if (check_nonzero) then
2591  allocate(incl_nz(hecmat%NPL + hecmat%NPU + hecmat%NP))
2592  allocate(cnt(bkmat%nr))
2593  incl_nz(:) = 0
2594  !$omp parallel default(none), &
2595  !$omp& private(i,idx,js,je,j,k), &
2596  !$omp& shared(BKmat,hecMAT,cnt,ndof2,incl_nz)
2597  !$omp do
2598  do i = 1, bkmat%nr
2599  idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2600  cnt(i) = 0
2601  ! lower
2602  js = hecmat%indexL(i-1)+1
2603  je = hecmat%indexL(i)
2604  do j = js, je
2605  idx = idx + 1
2606  do k = 1, ndof2
2607  if (hecmat%AL(ndof2*(j-1)+k) /= 0.0d0) then
2608  incl_nz(idx) = 1
2609  cnt(i) = cnt(i) + 1
2610  exit
2611  endif
2612  enddo
2613  enddo
2614  ! diag
2615  idx = idx + 1
2616  do k = 1, ndof2
2617  if (hecmat%D(ndof2*(i-1)+k) /= 0.0d0) then
2618  incl_nz(idx) = 1
2619  cnt(i) = cnt(i) + 1
2620  exit
2621  endif
2622  enddo
2623  ! upper
2624  js = hecmat%indexU(i-1)+1
2625  je = hecmat%indexU(i)
2626  do j = js, je
2627  idx = idx + 1
2628  do k = 1, ndof2
2629  if (hecmat%AU(ndof2*(j-1)+k) /= 0.0d0) then
2630  incl_nz(idx) = 1
2631  cnt(i) = cnt(i) + 1
2632  exit
2633  endif
2634  enddo
2635  enddo
2636  if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: count'
2637  enddo
2638  !$omp end do
2639  !$omp end parallel
2640  ! index
2641  allocate(bkmat%index(0:bkmat%nr))
2642  call make_index(bkmat%nr, cnt, bkmat%index)
2643  deallocate(cnt)
2644  bkmat%nnz = bkmat%index(bkmat%nr)
2645  ! item, A
2646  allocate(bkmat%item(bkmat%nnz))
2647  allocate(bkmat%A(ndof2 * bkmat%nnz))
2648  !$omp parallel default(none), &
2649  !$omp& private(i,idx,idx2,js,je,j), &
2650  !$omp& shared(BKmat,hecMAT,ndof2,incl_nz)
2651  !$omp do
2652  do i = 1, bkmat%nr
2653  idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2654  idx2 = bkmat%index(i-1)
2655  ! lower
2656  js = hecmat%indexL(i-1)+1
2657  je = hecmat%indexL(i)
2658  do j = js, je
2659  idx = idx + 1
2660  if (incl_nz(idx) == 1) then
2661  idx2 = idx2 + 1
2662  bkmat%item(idx2) = hecmat%itemL(j)
2663  bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%AL(ndof2*(j-1)+1:ndof2*j)
2664  endif
2665  enddo
2666  ! diag
2667  idx = idx + 1
2668  if (incl_nz(idx) == 1) then
2669  idx2 = idx2 + 1
2670  bkmat%item(idx2) = i
2671  bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%D(ndof2*(i-1)+1:ndof2*i)
2672  endif
2673  ! upper
2674  js = hecmat%indexU(i-1)+1
2675  je = hecmat%indexU(i)
2676  do j = js, je
2677  idx = idx + 1
2678  if (incl_nz(idx) == 1) then
2679  idx2 = idx2 + 1
2680  bkmat%item(idx2) = hecmat%itemU(j)
2681  bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%AU(ndof2*(j-1)+1:ndof2*j)
2682  endif
2683  enddo
2684  if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2685  if (idx2 /= bkmat%index(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: index'
2686  enddo
2687  !$omp end do
2688  !$omp end parallel
2689  deallocate(incl_nz)
2690  else
2691  bkmat%nnz = hecmat%NPL + hecmat%NP + hecmat%NPU
2692  allocate(bkmat%index(0:bkmat%nr))
2693  allocate(bkmat%item(bkmat%nnz))
2694  allocate(bkmat%A(ndof2 * bkmat%nnz))
2695  bkmat%index(0) = 0
2696  !$omp parallel do default(none), &
2697  !$omp& private(i,idx,js,je,j), &
2698  !$omp& shared(BKmat,hecMAT,ndof2)
2699  do i = 1, bkmat%nr
2700  idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2701  ! lower
2702  js = hecmat%indexL(i-1)+1
2703  je = hecmat%indexL(i)
2704  do j = js, je
2705  idx = idx + 1
2706  bkmat%item(idx) = hecmat%itemL(j)
2707  bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%AL(ndof2*(j-1)+1:ndof2*j)
2708  enddo
2709  ! diag
2710  idx = idx + 1
2711  bkmat%item(idx) = i
2712  bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%D(ndof2*(i-1)+1:ndof2*i)
2713  ! upper
2714  js = hecmat%indexU(i-1)+1
2715  je = hecmat%indexU(i)
2716  do j = js, je
2717  idx = idx + 1
2718  bkmat%item(idx) = hecmat%itemU(j)
2719  bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%AU(ndof2*(j-1)+1:ndof2*j)
2720  enddo
2721  bkmat%index(i) = idx
2722  if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2723  enddo
2724  !$omp end parallel do
2725  endif
2726  end subroutine hecmw_localmat_init_with_hecmat
2727 
2728  subroutine hecmw_localmat_add_hecmat(BKmat, hecMAT)
2729  implicit none
2730  type (hecmwst_local_matrix), intent(inout) :: bkmat
2731  type (hecmwst_matrix), intent(in) :: hecmat
2732  type (hecmwst_local_matrix) :: w1mat, w2mat
2733  !! Should Be Simple If Non-Zero Profile Is Kept !!
2734  call hecmw_localmat_init_with_hecmat(w1mat, hecmat)
2735  call debug_write_matrix(w1mat, 'BKmat (hecMAT)', debug_matrix)
2736  call hecmw_localmat_add(bkmat, w1mat, w2mat)
2737  call hecmw_localmat_free(bkmat)
2738  call hecmw_localmat_free(w1mat)
2739  bkmat%nr = w2mat%nr
2740  bkmat%nc = w2mat%nc
2741  bkmat%nnz = w2mat%nnz
2742  bkmat%ndof = w2mat%ndof
2743  bkmat%index => w2mat%index
2744  bkmat%item => w2mat%item
2745  bkmat%A => w2mat%A
2746  end subroutine hecmw_localmat_add_hecmat
2747 
2748  subroutine hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat)
2749  implicit none
2750  type (hecmwst_local_matrix), intent(in) :: bkmat
2751  type (hecmwst_local_matrix), intent(inout) :: btmat
2752  type (hecmwst_local_mesh), intent(inout) :: hecmesh
2753  type (hecmwst_local_matrix), intent(out) :: bktmat
2754  type (hecmwst_matrix_comm) :: heccomm
2755  type (hecmwst_local_mesh) :: hecmeshnew
2756  type (hecmwst_local_matrix), allocatable :: bt_exp(:)
2757  type (hecmwst_local_matrix) :: bt_imp, bt_all
2758  integer(kind=kint), allocatable :: exp_cols_index(:)
2759  integer(kind=kint), allocatable :: exp_cols_item(:,:)
2760  real(kind=kreal) :: t0, t1
2761  t0 = hecmw_wtime()
2762  !
2763  if (hecmesh%PETOT > 1) then
2764  call make_comm_table(bkmat, hecmesh, heccomm)
2765  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: make_comm_table done'
2766  t1 = hecmw_wtime()
2767  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (1) : ',t1-t0
2768  t0 = hecmw_wtime()
2769  !
2770  if (btmat%nr > hecmesh%nn_internal) then
2771  ! consider only internal part of BTmat
2772  if (debug >= 1) write(0,'(A)') 'DEBUG: hecmw_localmat_multmat: ignore external part of BTmat'
2773  btmat%nr = hecmesh%nn_internal
2774  btmat%nnz = btmat%index(btmat%nr)
2775  endif
2776  !
2777  call extract_bt_exp(btmat, heccomm, bt_exp)
2778  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: extract_BT_exp done'
2779  t1 = hecmw_wtime()
2780  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (2) : ',t1-t0
2781  t0 = hecmw_wtime()
2782  !
2783  call prepare_column_info(hecmesh, bt_exp, exp_cols_index, exp_cols_item)
2784  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: prepare column info done'
2785  t1 = hecmw_wtime()
2786  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (3) : ',t1-t0
2787  t0 = hecmw_wtime()
2788  !
2789  call send_bt_exp_and_recv_bt_imp(hecmesh, heccomm, bt_exp, exp_cols_index, exp_cols_item, bt_imp, hecmeshnew)
2790  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: send BT_exp and recv BT_imp done'
2791  t1 = hecmw_wtime()
2792  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (4) : ',t1-t0
2793  t0 = hecmw_wtime()
2794  call free_comm_table(heccomm)
2795  !
2796  call concat_btmat_and_bt_imp(btmat, bt_imp, bt_all)
2797  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: concat BTmat and BT_imp into BT_all done'
2798  t1 = hecmw_wtime()
2799  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (5) : ',t1-t0
2800  t0 = hecmw_wtime()
2801  call hecmw_localmat_free(bt_imp)
2802  !
2803  call multiply_mat_mat(bkmat, bt_all, bktmat)
2804  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: multiply BKmat and BT_all into BKTmat done'
2805  t1 = hecmw_wtime()
2806  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (6) : ',t1-t0
2807  t0 = hecmw_wtime()
2808  call hecmw_localmat_free(bt_all)
2809  !
2810  if (hecmesh%n_neighbor_pe > 0) then
2811  hecmesh%n_node = hecmeshnew%n_node
2812  hecmesh%n_neighbor_pe = hecmeshnew%n_neighbor_pe
2813  deallocate(hecmesh%neighbor_pe)
2814  deallocate(hecmesh%import_index)
2815  deallocate(hecmesh%export_index)
2816  deallocate(hecmesh%import_item)
2817  deallocate(hecmesh%export_item)
2818  deallocate(hecmesh%node_ID)
2819  deallocate(hecmesh%global_node_ID)
2820  hecmesh%neighbor_pe => hecmeshnew%neighbor_pe
2821  hecmesh%import_index => hecmeshnew%import_index
2822  hecmesh%export_index => hecmeshnew%export_index
2823  hecmesh%import_item => hecmeshnew%import_item
2824  hecmesh%export_item => hecmeshnew%export_item
2825  hecmesh%node_ID => hecmeshnew%node_ID
2826  hecmesh%global_node_ID => hecmeshnew%global_node_ID
2827  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: update hecMESH done'
2828  t1 = hecmw_wtime()
2829  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (7) : ',t1-t0
2830  endif
2831  else
2832  call multiply_mat_mat(bkmat, btmat, bktmat)
2833  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: multiply BKmat and BTmat into BKTmat done'
2834  t1 = hecmw_wtime()
2835  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat : ',t1-t0
2836  endif
2837  end subroutine hecmw_localmat_multmat
2838 
2839  subroutine make_comm_table(BKmat, hecMESH, hecCOMM)
2840  use m_hecmw_comm_f
2841  implicit none
2842  type (hecmwst_local_matrix), intent(in) :: bkmat
2843  type (hecmwst_local_mesh), intent(in) :: hecmesh
2844  type (hecmwst_matrix_comm), intent(out) :: heccomm
2845  integer(kind=kint) :: nn_int, nn_ext, nnb, i, icol, irank, idom, idx, n_send, tag, js, je, len
2846  integer(kind=kint), allocatable :: is_nz_col(:), imp_cnt(:), exp_cnt(:), import_item_remote(:)
2847  integer(kind=kint), allocatable :: requests(:), statuses(:,:)
2848  heccomm%zero = hecmesh%zero
2849  heccomm%HECMW_COMM = hecmesh%MPI_COMM
2850  heccomm%PETOT = hecmesh%PETOT
2851  heccomm%PEsmpTOT = hecmesh%PEsmpTOT
2852  heccomm%my_rank = hecmesh%my_rank
2853  heccomm%errnof = hecmesh%errnof
2854  heccomm%n_subdomain = hecmesh%n_subdomain
2855  heccomm%n_neighbor_pe = hecmesh%n_neighbor_pe
2856  allocate(heccomm%neighbor_pe(heccomm%n_neighbor_pe))
2857  heccomm%neighbor_pe(:) = hecmesh%neighbor_pe(:)
2858  !
2859  nn_int = hecmesh%nn_internal
2860  nn_ext = hecmesh%n_node - hecmesh%nn_internal
2861  nnb = heccomm%n_neighbor_pe
2862  !
2863  ! check_external_nz_cols (by profile (not number))
2864  allocate(is_nz_col(nn_ext))
2865  is_nz_col(:) = 0
2866  do i = 1, bkmat%index(nn_int)
2867  icol = bkmat%item(i)
2868  if (icol > nn_int) is_nz_col(icol - nn_int) = 1
2869  enddo
2870  !
2871  ! count_nz_cols_per_rank
2872  allocate(imp_cnt(nnb))
2873  imp_cnt(:) = 0
2874  do i = 1, nn_ext
2875  if (is_nz_col(i) == 1) then
2876  irank = hecmesh%node_ID(2*(nn_int+i))
2877  call rank_to_idom(hecmesh, irank, idom)
2878  imp_cnt(idom) = imp_cnt(idom) + 1
2879  endif
2880  enddo
2881  if (debug >= 3) write(0,*) ' DEBUG3: imp_cnt',imp_cnt(:)
2882  !
2883  ! make_index
2884  allocate(heccomm%import_index(0:nnb))
2885  call make_index(nnb, imp_cnt, heccomm%import_index)
2886  if (debug >= 3) write(0,*) ' DEBUG3: import_index',heccomm%import_index(:)
2887  !
2888  ! fill item
2889  allocate(heccomm%import_item(heccomm%import_index(nnb)))
2890  imp_cnt(:) = 0
2891  do i = 1, nn_ext
2892  if (is_nz_col(i) == 1) then
2893  irank = hecmesh%node_ID(2*(nn_int+i))
2894  call rank_to_idom(hecmesh, irank, idom)
2895  imp_cnt(idom) = imp_cnt(idom) + 1
2896  idx = heccomm%import_index(idom-1)+imp_cnt(idom)
2897  heccomm%import_item(idx) = nn_int+i
2898  endif
2899  enddo
2900  if (debug >= 3) write(0,*) ' DEBUG3: import_item',heccomm%import_item(:)
2901  !
2902  allocate(import_item_remote(heccomm%import_index(nnb)))
2903  do i = 1, heccomm%import_index(nnb)
2904  import_item_remote(i) = hecmesh%node_ID(2*heccomm%import_item(i)-1)
2905  enddo
2906  if (debug >= 3) write(0,*) ' DEBUG3: import_item_remote',import_item_remote(:)
2907  !
2908  allocate(requests(2*nnb))
2909  allocate(statuses(hecmw_status_size, 2*nnb))
2910  !
2911  ! send/recv
2912  n_send = 0
2913  do idom = 1, nnb
2914  irank = heccomm%neighbor_pe(idom)
2915  n_send = n_send + 1
2916  tag = 6001
2917  call hecmw_isend_int(imp_cnt(idom), 1, irank, tag, heccomm%HECMW_COMM, requests(n_send))
2918  if (imp_cnt(idom) > 0) then
2919  js = heccomm%import_index(idom-1)+1
2920  je = heccomm%import_index(idom)
2921  len = je-js+1
2922  n_send = n_send + 1
2923  tag = 6002
2924  call hecmw_isend_int(import_item_remote(js:je), len, irank, tag, &
2925  heccomm%HECMW_COMM, requests(n_send))
2926  endif
2927  enddo
2928  !
2929  ! index
2930  allocate(exp_cnt(nnb))
2931  do idom = 1, nnb
2932  irank = heccomm%neighbor_pe(idom)
2933  tag = 6001
2934  call hecmw_recv_int(exp_cnt(idom), 1, irank, tag, heccomm%HECMW_COMM, statuses(:,1))
2935  enddo
2936  allocate(heccomm%export_index(0:nnb))
2937  call make_index(nnb, exp_cnt, heccomm%export_index)
2938  if (debug >= 3) write(0,*) ' DEBUG3: export_index',heccomm%export_index(:)
2939  !
2940  ! item
2941  allocate(heccomm%export_item(heccomm%export_index(nnb)))
2942  do idom = 1, nnb
2943  if (exp_cnt(idom) <= 0) cycle
2944  irank = heccomm%neighbor_pe(idom)
2945  js = heccomm%export_index(idom-1)+1
2946  je = heccomm%export_index(idom)
2947  len = je-js+1
2948  tag = 6002
2949  call hecmw_recv_int(heccomm%export_item(js:je), len, irank, tag, &
2950  heccomm%HECMW_COMM, statuses(:,1))
2951  enddo
2952  if (debug >= 3) write(0,*) ' DEBUG3: export_item',heccomm%export_item(:)
2953  call hecmw_waitall(n_send, requests, statuses)
2954  !
2955  deallocate(imp_cnt)
2956  deallocate(exp_cnt)
2957  deallocate(import_item_remote)
2958  end subroutine make_comm_table
2959 
2960  subroutine free_comm_table(hecCOMM)
2961  implicit none
2962  type (hecmwST_matrix_comm), intent(inout) :: hecCOMM
2963  deallocate(heccomm%neighbor_pe)
2964  deallocate(heccomm%import_index)
2965  deallocate(heccomm%import_item)
2966  deallocate(heccomm%export_index)
2967  deallocate(heccomm%export_item)
2968  end subroutine free_comm_table
2969 
2970  subroutine extract_bt_exp(BTmat, hecCOMM, BT_exp)
2971  implicit none
2972  type (hecmwST_local_matrix), intent(in) :: BTmat
2973  type (hecmwST_matrix_comm), intent(in) :: hecCOMM
2974  type (hecmwST_local_matrix), allocatable, intent(out) :: BT_exp(:)
2975  integer(kind=kint) :: ndof, ndof2, idom, idx_0, idx_n, j, jrow, nnz_row, idx, ks, ke, k
2976  if (heccomm%n_neighbor_pe == 0) return
2977  allocate(bt_exp(heccomm%n_neighbor_pe))
2978  ndof = btmat%ndof
2979  ndof2 = ndof * ndof
2980  do idom = 1, heccomm%n_neighbor_pe
2981  idx_0 = heccomm%export_index(idom-1)
2982  idx_n = heccomm%export_index(idom)
2983  bt_exp(idom)%nr = idx_n - idx_0
2984  bt_exp(idom)%nc = btmat%nc
2985  bt_exp(idom)%nnz = 0
2986  bt_exp(idom)%ndof = ndof
2987  allocate(bt_exp(idom)%index(0:bt_exp(idom)%nr))
2988  bt_exp(idom)%index(0) = 0
2989  do j = 1, bt_exp(idom)%nr
2990  jrow = heccomm%export_item(idx_0 + j)
2991  nnz_row = btmat%index(jrow) - btmat%index(jrow-1)
2992  bt_exp(idom)%index(j) = bt_exp(idom)%index(j-1) + nnz_row
2993  enddo
2994  bt_exp(idom)%nnz = bt_exp(idom)%index(bt_exp(idom)%nr)
2995  allocate(bt_exp(idom)%item(bt_exp(idom)%nnz))
2996  allocate(bt_exp(idom)%A(ndof2 * bt_exp(idom)%nnz))
2997  idx = 0
2998  do j = 1, bt_exp(idom)%nr
2999  jrow = heccomm%export_item(idx_0 + j)
3000  ks = btmat%index(jrow-1) + 1
3001  ke = btmat%index(jrow)
3002  do k = ks, ke
3003  idx = idx + 1
3004  bt_exp(idom)%item(idx) = btmat%item(k)
3005  bt_exp(idom)%A(ndof2*(idx-1)+1:ndof2*idx) = btmat%A(ndof2*(k-1)+1:ndof2*k)
3006  enddo
3007  if (idx /= bt_exp(idom)%index(j)) stop 'ERROR: extract BT_exp'
3008  enddo
3009  enddo
3010  end subroutine extract_bt_exp
3011 
3012  subroutine send_bt_exp_and_recv_bt_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew)
3013  use m_hecmw_comm_f
3014  implicit none
3015  type (hecmwST_local_mesh), intent(in) :: hecMESH
3016  type (hecmwST_matrix_comm), intent(in) :: hecCOMM
3017  type (hecmwST_local_matrix), allocatable, intent(inout) :: BT_exp(:)
3018  integer(kind=kint), allocatable, intent(inout) :: exp_cols_index(:)
3019  integer(kind=kint), allocatable, intent(inout) :: exp_cols_item(:,:)
3020  type (hecmwST_local_matrix), intent(out) :: BT_imp
3021  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
3022  integer(kind=kint), allocatable :: nnz_imp(:), cnt(:), index_imp(:)
3023  integer(kind=kint), allocatable :: imp_cols_index(:)
3024  integer(kind=kint), allocatable :: imp_cols_item(:,:)
3025  real(kind=kreal), allocatable :: imp_vals_item(:)
3026  integer(kind=kint) :: nnb, ndof, ndof2, idom, irank, nr, n_send, tag, idx_0, idx_n, j, jj, nnz
3027  integer(kind=kint), allocatable :: requests(:)
3028  integer(kind=kint), allocatable :: statuses(:,:)
3029  integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
3030  integer(kind=kint) :: n_add_node, i0
3031  nnb = heccomm%n_neighbor_pe
3032  if (nnb == 0) then
3033  bt_imp%nr = 0
3034  bt_imp%nc = 0
3035  bt_imp%nnz = 0
3036  bt_imp%ndof = 0
3037  allocate(bt_imp%index(0:0))
3038  bt_imp%index(0) = 0
3039  return
3040  endif
3041  ndof = bt_exp(1)%ndof
3042  ndof2 = ndof*ndof
3043  allocate(requests(nnb*3))
3044  allocate(statuses(hecmw_status_size, nnb*3))
3045  n_send = 0
3046  do idom = 1, nnb
3047  irank = heccomm%neighbor_pe(idom)
3048  nr = bt_exp(idom)%nr
3049  if (nr == 0) cycle
3050  n_send = n_send + 1
3051  tag = 3001
3052  call hecmw_isend_int(bt_exp(idom)%index(0:bt_exp(idom)%nr), bt_exp(idom)%nr + 1, &
3053  irank, tag, heccomm%HECMW_COMM, requests(n_send))
3054  if (bt_exp(idom)%nnz == 0) cycle
3055  n_send = n_send + 1
3056  tag = 3002
3057  call hecmw_isend_int(exp_cols_item(1,exp_cols_index(idom-1)+1), &
3058  cncol_item * bt_exp(idom)%nnz, irank, tag, heccomm%HECMW_COMM, requests(n_send))
3059  n_send = n_send + 1
3060  tag = 3003
3061  call hecmw_isend_r(bt_exp(idom)%A, ndof2 * bt_exp(idom)%nnz, &
3062  irank, tag, heccomm%HECMW_COMM, requests(n_send))
3063  enddo
3064  !
3065  ! BT_imp%nr = hecCOMM%import_index(nnb)
3066  bt_imp%nr = hecmesh%n_node - hecmesh%nn_internal
3067  bt_imp%nc = 0 !!! TEMPORARY
3068  bt_imp%nnz = 0
3069  bt_imp%ndof = ndof
3070  !
3071  allocate(nnz_imp(nnb))
3072  allocate(cnt(bt_imp%nr))
3073  !
3074  cnt(:) = 0
3075  do idom = 1, nnb
3076  irank = heccomm%neighbor_pe(idom)
3077  idx_0 = heccomm%import_index(idom-1)
3078  idx_n = heccomm%import_index(idom)
3079  nr = idx_n - idx_0
3080  if (nr == 0) then
3081  nnz_imp(idom) = 0
3082  cycle
3083  endif
3084  allocate(index_imp(0:nr))
3085  tag = 3001
3086  call hecmw_recv_int(index_imp(0:nr), nr+1, irank, tag, &
3087  heccomm%HECMW_COMM, statuses(:,1))
3088  nnz_imp(idom) = index_imp(nr)
3089  do j = 1, nr
3090  jj = heccomm%import_item(idx_0 + j) - hecmesh%nn_internal
3091  if (jj < 1 .or. bt_imp%nr < jj) stop 'ERROR: jj out of range'
3092  if (cnt(jj) /= 0) stop import rows?'
3093  cnt(jj) = index_imp(j) - index_imp(j-1)
3094  enddo
3095  deallocate(index_imp)
3096  enddo
3097  !
3098  allocate(imp_cols_index(0:nnb))
3099  call make_index(nnb, nnz_imp, imp_cols_index)
3100  deallocate(nnz_imp)
3101  !
3102  allocate(BT_imp%index(0:BT_imp%nr))
3103  call make_index(BT_imp%nr, cnt, BT_imp%index)
3104  deallocate(cnt)
3105  !
3106  BT_imp%nnz = BT_imp%index(BT_imp%nr)
3107  if (BT_imp%nnz /= imp_cols_index(nnb)) &
3108  stop 'error: total num of nonzero of bt_imp'
3109  !
3110  allocate(imp_cols_item(cNCOL_ITEM, BT_imp%nnz))
3111  allocate(imp_vals_item(ndof2 * BT_imp%nnz))
3112  !
3113  do idom = 1, nnb
3114  irank = hecCOMM%neighbor_pe(idom)
3115  idx_0 = imp_cols_index(idom-1)
3116  idx_n = imp_cols_index(idom)
3117  nnz = idx_n - idx_0
3118  if (nnz == 0) cycle
3119  tag = 3002
3120  call HECMW_RECV_INT(imp_cols_item(1, idx_0 + 1), cNCOL_ITEM * nnz, &
3121  irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
3122  tag = 3003
3123  call HECMW_RECV_R(imp_vals_item(ndof2*idx_0 + 1), ndof2 * nnz, &
3124  irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
3125  enddo
3126  call HECMW_Waitall(n_send, requests, statuses)
3127  if (DEBUG >= 2) write(0,*) ' debug2: send bt_imp and recv into temporary data done'
3128  !
3129  deallocate(requests)
3130  deallocate(statuses)
3131  !
3132  do idom = 1, nnb
3133  call hecmw_localmat_free(BT_exp(idom))
3134  enddo
3135  deallocate(BT_exp)
3136  deallocate(exp_cols_index)
3137  deallocate(exp_cols_item)
3138  !
3139  call copy_mesh(hecMESH, hecMESHnew)
3140  !
3141  call map_imported_cols(hecMESHnew, imp_cols_index(nnb), imp_cols_item, n_add_node, add_nodes, map, i0)
3142  if (DEBUG >= 2) write(0,*) ' debug2: map imported cols done'
3143  !
3144  call update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
3145  if (DEBUG >= 2) write(0,*) ' debug2: update comm_table done'
3146  !
3147  BT_imp%nc = hecMESHnew%n_node
3148  !
3149  allocate(BT_imp%item(BT_imp%nnz))
3150  allocate(BT_imp%A(ndof2 * BT_imp%nnz))
3151  call copy_vals_to_BT_imp(hecCOMM, hecMESH%nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3152  if (DEBUG >= 2) write(0,*) ' debug2: copy vals to bt_imp done'
3153  !
3154  deallocate(imp_cols_index)
3155  deallocate(imp_cols_item)
3156  deallocate(imp_vals_item)
3157  deallocate(map)
3158  end subroutine send_BT_exp_and_recv_BT_imp
3159 
3160  subroutine copy_vals_to_BT_imp(hecCOMM, nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3161  implicit none
3162  type (hecmwST_matrix_comm), intent(in) :: hecCOMM
3163  integer(kind=kint), intent(in) :: nn_internal
3164  integer(kind=kint), allocatable, intent(in) :: imp_cols_index(:)
3165  integer(kind=kint), intent(in) :: map(:)
3166  real(kind=kreal), intent(in) :: imp_vals_item(:)
3167  type (hecmwST_local_matrix), intent(inout) :: BT_imp
3168  integer(kind=kint) :: nnb, ndof2, idx, idom, idx_0, idx_n, nr, j, jrow, ks, ke, k
3169  nnb = hecCOMM%n_neighbor_pe
3170  ndof2 = BT_imp%ndof ** 2
3171  idx = 0
3172  do idom = 1, nnb
3173  idx_0 = hecCOMM%import_index(idom-1)
3174  idx_n = hecCOMM%import_index(idom)
3175  nr = idx_n - idx_0
3176  if (nr == 0) cycle
3177  do j = 1, nr
3178  jrow = hecCOMM%import_item(idx_0 + j) - nn_internal
3179  ks = BT_imp%index(jrow-1)+1
3180  ke = BT_imp%index(jrow)
3181  do k = ks, ke
3182  idx = idx + 1
3183  BT_imp%item(k) = map(idx)
3184  BT_imp%A(ndof2*(k-1)+1:ndof2*k) = imp_vals_item(ndof2*(idx-1)+1:ndof2*idx)
3185  enddo
3186  enddo
3187  if (idx /= imp_cols_index(idom)) stop 'error: copy vals to bt_imp'
3188  enddo
3189  end subroutine copy_vals_to_BT_imp
3190 
3191  subroutine concat_BTmat_and_BT_imp(BTmat, BT_imp, BT_all)
3192  implicit none
3193  type (hecmwST_local_matrix), intent(in) :: BTmat
3194  type (hecmwST_local_matrix), intent(in) :: BT_imp
3195  type (hecmwST_local_matrix), intent(out) :: BT_all
3196  integer(kind=kint) :: ndof, ndof2, i, ii
3197  ndof = BTmat%ndof
3198 .and. if (BT_imp%nr > 0 BT_imp%ndof /= ndof) stop 'error: concat btmat and bt_imp: ndof'
3199  ndof2 = ndof*ndof
3200  BT_all%nr = BTmat%nr + BT_imp%nr
3201  BT_all%nc = max(BTmat%nc, BT_imp%nc)
3202  BT_all%nnz = BTmat%nnz + BT_imp%nnz
3203  BT_all%ndof = ndof
3204  allocate(BT_all%index(0:BT_all%nr))
3205  allocate(BT_all%item(BT_all%nnz))
3206  allocate(BT_all%A(ndof2 * BT_all%nnz))
3207  BT_all%index(0) = 0
3208  do i = 1, BTmat%nr
3209  BT_all%index(i) = BTmat%index(i)
3210  enddo
3211  do i = 1, BT_imp%nr
3212  BT_all%index(BTmat%nr+i) = BT_all%index(BTmat%nr+i-1) + &
3213  BT_imp%index(i) - BT_imp%index(i-1)
3214  enddo
3215  do i = 1, BTmat%nnz
3216  BT_all%item(i) = BTmat%item(i)
3217  BT_all%A(ndof2*(i-1)+1:ndof2*i) = BTmat%A(ndof2*(i-1)+1:ndof2*i)
3218  enddo
3219  do i = 1, BT_imp%nnz
3220  ii = BTmat%nnz + i
3221  BT_all%item(ii) = BT_imp%item(i)
3222  BT_all%A(ndof2*(ii-1)+1:ndof2*ii) = BT_imp%A(ndof2*(i-1)+1:ndof2*i)
3223  enddo
3224  end subroutine concat_BTmat_and_BT_imp
3225 
3226  subroutine multiply_mat_mat(Amat, Bmat, Cmat)
3227  implicit none
3228  type (hecmwST_local_matrix), intent(in) :: Amat
3229  type (hecmwST_local_matrix), intent(in) :: Bmat
3230  type (hecmwST_local_matrix), intent(out) :: Cmat
3231  integer(kind=kint) :: ndof, ndof2, nr, nc, nnz, i, icnt
3232  integer(kind=kint) :: js, je, j, jj, ks, ke, k, kk, l, ll, l0
3233  integer(kind=kint), allocatable :: iw(:)
3234  real(kind=kreal), pointer :: Ap(:), Bp(:), Cp(:)
3235  real(kind=kreal) :: t0, t1
3236  t0 = hecmw_wtime()
3237  if (Amat%ndof /= Bmat%ndof) stop 'error: multiply_mat_mat: unmatching ndof'
3238  ndof = Amat%ndof
3239  ndof2 = ndof*ndof
3240  nr = Amat%nr
3241  nc = Bmat%nc
3242  if (Amat%nc /= Bmat%nr) then
3243  write(0,*) 'amat: nr, nc = ', Amat%nr, Amat%nc
3244  write(0,*) 'bmat: nr, nc = ', Bmat%nr, Bmat%nc
3245  stop 'error: multiply_mat_mat: unmatching size'
3246  endif
3247  Cmat%ndof = ndof
3248  Cmat%nr = nr
3249  Cmat%nc = nc
3250  allocate(Cmat%index(0:nr))
3251  Cmat%index(0) = 0
3252  !$omp parallel default(none), &
3253  !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,l), &
3254  !$omp& shared(nr,nc,Amat,Bmat,Cmat)
3255  allocate(iw(nc))
3256  !$omp do
3257  do i = 1, nr
3258  icnt = 0
3259  js = Amat%index(i-1)+1
3260  je = Amat%index(i)
3261  do j = js, je
3262  jj = Amat%item(j)
3263  ks = Bmat%index(jj-1)+1
3264  ke = Bmat%index(jj)
3265  kl1: do k = ks, ke
3266  kk = Bmat%item(k)
3267  do l = 1, icnt
3268  if (iw(l) == kk) cycle kl1
3269  enddo
3270  icnt = icnt + 1
3271  iw(icnt) = kk
3272  enddo kl1
3273  enddo
3274  Cmat%index(i) = icnt
3275  enddo
3276  !$omp end do
3277  deallocate(iw)
3278  !$omp end parallel
3279  do i = 1, nr
3280  Cmat%index(i) = Cmat%index(i-1) + Cmat%index(i)
3281  enddo
3282  nnz = Cmat%index(nr)
3283  Cmat%nnz = nnz
3284  !write(0,*) 'nnz',nnz
3285  t1 = hecmw_wtime()
3286  if (TIMER >= 3) write(0, '(a,f10.4)') "###### multiply_mat_mat (1) : ",t1-t0
3287  t0 = hecmw_wtime()
3288  allocate(Cmat%item(nnz))
3289  allocate(Cmat%A(ndof2 * nnz))
3290  Cmat%A(:) = 0.0d0
3291  !$omp parallel default(none), &
3292  !$omp& private(i,icnt,l0,js,je,j,jj,Ap,ks,ke,k,kk,Bp,ll,l,Cp), &
3293  !$omp& shared(nr,Cmat,Amat,Bmat,ndof2,ndof)
3294  !$omp do
3295  do i = 1, nr
3296  icnt = 0
3297  l0 = Cmat%index(i-1)
3298  ! item
3299  js = Amat%index(i-1)+1
3300  je = Amat%index(i)
3301  do j = js, je
3302  jj = Amat%item(j)
3303  Ap => Amat%A(ndof2*(j-1)+1:ndof2*j)
3304  ks = Bmat%index(jj-1)+1
3305  ke = Bmat%index(jj)
3306  do k = ks, ke
3307  kk = Bmat%item(k)
3308  Bp => Bmat%A(ndof2*(k-1)+1:ndof2*k)
3309  ll = -1
3310  do l = 1, icnt
3311  if (Cmat%item(l0+l) == kk) then
3312  ll = l0 + l
3313  exit
3314  endif
3315  enddo
3316  if (ll < 0) then
3317  icnt = icnt + 1
3318  ll = l0 + icnt
3319  Cmat%item(ll) = kk
3320  endif
3321  Cp => Cmat%A(ndof2*(ll-1)+1:ndof2*ll)
3322  call blk_matmul_add(ndof, Ap, Bp, Cp)
3323  enddo
3324  enddo
3325  !write(0,*) 'l0,icnt,index(i)',Cmat%index(i-1),icnt,Cmat%index(i)
3326  if (l0+icnt /= Cmat%index(i)) stop 'error: multiply_mat_mat: unknown error'
3327  enddo
3328  !$omp end do
3329  !$omp end parallel
3330  t1 = hecmw_wtime()
3331  if (TIMER >= 3) write(0, '(a,f10.4)') "###### multiply_mat_mat (2) : ",t1-t0
3332  t0 = hecmw_wtime()
3333  call sort_and_uniq_rows(Cmat)
3334  t1 = hecmw_wtime()
3335  if (TIMER >= 3) write(0, '(a,f10.4)') "###### multiply_mat_mat (3) : ",t1-t0
3336  end subroutine multiply_mat_mat
3337 
3338  subroutine blk_matmul_add(ndof, A, B, AB)
3339  implicit none
3340  integer, intent(in) :: ndof
3341  real(kind=kreal), intent(in) :: A(:), B(:)
3342  real(kind=kreal), intent(inout) :: AB(:)
3343  integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
3344  ndof2=ndof*ndof
3345  do i=1,ndof
3346  i0=(i-1)*ndof
3347  do j=1,ndof
3348  ij=i0+j
3349  j0=(j-1)*ndof
3350  do k=1,ndof
3351  ik=i0+k
3352  jk=j0+k
3353  !$omp atomic
3354  AB(ik)=AB(ik)+A(ij)*B(jk)
3355  enddo
3356  enddo
3357  enddo
3358  end subroutine blk_matmul_add
3359 
3360  subroutine hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT)
3361  implicit none
3362  type (hecmwST_matrix), intent(in) :: hecMAT
3363  type (hecmwST_local_matrix), intent(in) :: BTtKTmat
3364  type (hecmwST_matrix), intent(inout) :: hecTKT
3365  call make_new_hecmat(hecMAT, BTtKTmat, hecTKT)
3366  end subroutine hecmw_localmat_make_hecmat
3367 
3368  subroutine hecmw_localmat_shrink_comm_table(BKmat, hecMESH)
3369  implicit none
3370  type (hecmwST_local_matrix), intent(in) :: BKmat
3371  type (hecmwST_local_mesh), intent(inout) :: hecMESH
3372  type (hecmwST_matrix_comm) :: hecCOMM
3373  call make_comm_table(BKmat, hecMESH, hecCOMM)
3374  deallocate(hecMESH%import_index)
3375  deallocate(hecMESH%import_item)
3376  deallocate(hecMESH%export_index)
3377  deallocate(hecMESH%export_item)
3378  hecMESH%import_index => hecCOMM%import_index
3379  hecMESH%import_item => hecCOMM%import_item
3380  hecMESH%export_index => hecCOMM%export_index
3381  hecMESH%export_item => hecCOMM%export_item
3382  deallocate(hecCOMM%neighbor_pe)
3383  end subroutine hecmw_localmat_shrink_comm_table
3384 
3385  !> \brief Debug write matrix
3386  !>
3387  subroutine debug_write_matrix(Mat, label, level)
3388  type(hecmwST_local_matrix), intent(in) :: Mat !< matrix
3389  character(len=*), intent(in) :: label !< label for matrix
3390  integer(kind=kint), intent(in) :: level !< debug level
3391  !
3392  integer(kind=kint) :: iunit
3393 
3394  if (level <= 0) return
3395 
3396  iunit = 700 + hecmw_comm_get_rank()
3397  write(iunit,'(a,a)') trim(label),'============================================================'
3398  if (level == 1) then
3399  call hecmw_localmat_write_size(Mat, iunit)
3400  else if (level == 2) then
3401  call hecmw_localmat_write_ij(Mat, iunit)
3402  else
3403  call hecmw_localmat_write(Mat, iunit)
3404  endif
3405  end subroutine debug_write_matrix
3406 
3407 end module hecmw_local_matrix
hecmw_local_matrix::hecmw_trimatmul_ttkt_parallel
subroutine hecmw_trimatmul_ttkt_parallel(hecMESH, BTtmat, hecMAT, BTmat, iwS, num_lagrange, hecTKT)
Definition: hecmw_local_matrix.f90:258
hecmw_local_matrix
Definition: hecmw_local_matrix.f90:6
hecmw_local_matrix::hecmw_localmat_add
subroutine, public hecmw_localmat_add(Amat, Bmat, Cmat)
Definition: hecmw_local_matrix.f90:2445
hecmw_util::hecmw_wtime
real(kind=kreal) function hecmw_wtime()
Definition: hecmw_util_f.F90:549
hecmw_local_matrix::hecmw_localmat_blocking
subroutine, public hecmw_localmat_blocking(Tmat, ndof, BTmat)
Definition: hecmw_local_matrix.f90:109
hecmw_local_matrix::hecmw_localmat_make_hecmat
subroutine, public hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT)
Definition: hecmw_local_matrix.f90:3361
hecmw_pair_array::hecmw_pair_array_append
subroutine, public hecmw_pair_array_append(parray, id, i1, i2)
Definition: hecmw_pair_array.f90:45
hecmw_pair_array::hecmw_pair_array_sort
subroutine, public hecmw_pair_array_sort(parray)
Definition: hecmw_pair_array.f90:58
hecmw_local_matrix::hecmw_trimatmul_ttkt_serial
subroutine, public hecmw_trimatmul_ttkt_serial(hecMESH, BTtmat, hecMAT, BTmat, iwS, num_lagrange, hecTKT)
Definition: hecmw_local_matrix.f90:230
hecmw_pair_array
Definition: hecmw_pair_array.f90:1
hecmw_local_matrix::free_comm_table
subroutine free_comm_table(hecCOMM)
Definition: hecmw_local_matrix.f90:2961
hecmw_util::hecmw_status_size
integer(kind=kint), parameter hecmw_status_size
Definition: hecmw_util_f.F90:33
hecmw_util::hecmwst_matrix_comm
Definition: hecmw_util_f.F90:401
hecmw_array_util
Definition: hecmw_array_util.f90:6
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
hecmw_util::hecmwst_local_mesh
Definition: hecmw_util_f.F90:234
hecmw_local_matrix::hecmw_localmat_mulvec
subroutine, public hecmw_localmat_mulvec(BTmat, V, TV)
Definition: hecmw_local_matrix.f90:798
hecmw_array_util::hecmw_bsearch_int_array
subroutine, public hecmw_bsearch_int_array(array, istart, iend, val, idx)
Definition: hecmw_array_util.f90:63
m_hecmw_comm_f
Definition: hecmw_comm_f.F90:6
hecmw_pair_array::hecmw_pair_array_find_id
integer(kind=kint) function, public hecmw_pair_array_find_id(parray, i1, i2)
Definition: hecmw_pair_array.f90:64
hecmw_pair_array::hecmw_pair_array_finalize
subroutine, public hecmw_pair_array_finalize(parray)
Definition: hecmw_pair_array.f90:37
hecmw_local_matrix::hecmw_localmat_transpose
subroutine, public hecmw_localmat_transpose(Tmat, Ttmat)
Definition: hecmw_local_matrix.f90:1075
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
hecmw_local_matrix::hecmw_localmat_assemble
subroutine, public hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew)
Definition: hecmw_local_matrix.f90:1148
hecmw_local_matrix::allocate_bt_int
subroutine allocate_bt_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
Definition: hecmw_local_matrix.f90:1695
hecmw_local_matrix::count_add_imp_per_rank
subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
Definition: hecmw_local_matrix.f90:2100
m_hecmw_comm_f::hecmw_alltoall_int
subroutine hecmw_alltoall_int(sbuf, sc, rbuf, rc, comm)
Definition: hecmw_comm_f.F90:185
m_hecmw_comm_f::hecmw_recv_r
subroutine hecmw_recv_r(rbuf, rc, source, tag, comm, stat)
Definition: hecmw_comm_f.F90:300
hecmw_array_util::hecmw_qsort_int_array
recursive subroutine, public hecmw_qsort_int_array(array, istart, iend)
Definition: hecmw_array_util.f90:18
hecmw_local_matrix::trimatmul_ttkt
subroutine trimatmul_ttkt(BTtmat, hecMAT, BTmat, BTtKT)
Definition: hecmw_local_matrix.f90:318
m_hecmw_comm_f::hecmw_isend_r
subroutine hecmw_isend_r(sbuf, sc, dest, tag, comm, req)
Definition: hecmw_comm_f.F90:220
hecmw_local_matrix::hecmw_localmat_add_hecmat
subroutine, public hecmw_localmat_add_hecmat(BKmat, hecMAT)
Definition: hecmw_local_matrix.f90:2729
hecmw_array_util::hecmw_uniq_int_array
subroutine, public hecmw_uniq_int_array(array, istart, iend, ndup)
Definition: hecmw_array_util.f90:47
hecmw_local_matrix::hecmw_localmat_multmat
subroutine, public hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat)
Definition: hecmw_local_matrix.f90:2749
hecmw_matrix_misc
Definition: hecmw_matrix_misc.f90:6
hecmw_local_matrix::hecmw_localmat_free
subroutine, public hecmw_localmat_free(Tmat)
Definition: hecmw_local_matrix.f90:198
hecmw_local_matrix::hecmw_localmat_write
subroutine, public hecmw_localmat_write(Tmat, iunit)
Definition: hecmw_local_matrix.f90:47
hecmw_local_matrix::count_new_comm_nodes
subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new)
Definition: hecmw_local_matrix.f90:2181
hecmw_pair_array::hecmw_pair_array_init
subroutine, public hecmw_pair_array_init(parray, max_num)
Definition: hecmw_pair_array.f90:27
m_hecmw_comm_f::hecmw_recv_int
subroutine hecmw_recv_int(rbuf, rc, source, tag, comm, stat)
Definition: hecmw_comm_f.F90:283
hecmw_local_matrix::hecmw_trimatmul_ttkt
subroutine, public hecmw_trimatmul_ttkt(hecMESH, BTtmat, hecMAT, BTmat, iwS, num_lagrange, hecTKT)
Definition: hecmw_local_matrix.f90:211
hecmw_local_matrix::hecmw_trimatmul_ttkt_mpc
subroutine, public hecmw_trimatmul_ttkt_mpc(hecMESH, hecMAT, hecTKT)
Definition: hecmw_local_matrix.f90:842
m_hecmw_comm_f::hecmw_waitall
subroutine hecmw_waitall(cnt, reqs, stats)
Definition: hecmw_comm_f.F90:270
hecmw_local_matrix::send_recv_bt_ext_contents
subroutine send_recv_bt_ext_contents(hecMESH, BT_ext, exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, imp_rows_index, imp_cols_index, imp_rows_item, imp_cols_item, imp_vals_item)
Definition: hecmw_local_matrix.f90:1631
hecmw_local_matrix::hecmw_localmat_init_with_hecmat
subroutine, public hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange)
Definition: hecmw_local_matrix.f90:2569
hecmw_util::hecmw_comm_get_rank
integer(kind=kint) function hecmw_comm_get_rank()
Definition: hecmw_util_f.F90:582
m_hecmw_comm_f::hecmw_isend_int
subroutine hecmw_isend_int(sbuf, sc, dest, tag, comm, req)
Definition: hecmw_comm_f.F90:203
hecmw_local_matrix::hecmw_localmat_shrink_comm_table
subroutine, public hecmw_localmat_shrink_comm_table(BKmat, hecMESH)
Definition: hecmw_local_matrix.f90:3369
hecmw_util::hecmwst_matrix
Definition: hecmw_util_f.F90:444