FrontISTR  5.7.0
Large-scale structural analysis program with finit element method
hecmw_ClusterMKL_wrapper.F90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
6 #ifndef HECMW_SERIAL
7 # ifdef HECMW_WITH_MKL
8 # define WITH_CLUSTERMKL
9 # endif
10 #endif
11 
12 #ifdef WITH_CLUSTERMKL
13 include 'mkl_cluster_sparse_solver.f90'
14 #endif
15 
17  use hecmw_util
18  use m_hecmw_comm_f
19  use m_sparse_matrix
20 
21 #ifdef WITH_CLUSTERMKL
22  use mkl_cluster_sparse_solver
23 #endif
24 
25  implicit none
26 
27  private ! default
28  public hecmw_clustermkl_wrapper ! only entry point of Parallel Direct Solver is public
29 
30  logical, save :: INITIALIZED = .false.
31 #ifdef WITH_CLUSTERMKL
32  type(MKL_CLUSTER_SPARSE_SOLVER_HANDLE) :: pt(64)
33 #endif
34  integer maxfct, mnum, mtype, nrhs, msglvl
35  integer idum(1), i
36  data nrhs /1/, maxfct /1/, mnum /1/
37 
38  integer, save :: iparm(64)
39  integer(kind=kint), save :: nn
40  integer(kind=kint), pointer, save :: irow(:), jcol(:)
41  real(kind=kreal), pointer, save :: aval(:), rhs(:), solx(:)
42 
43  integer(kind=kint), parameter :: debug=0
44 
45 contains
46 
47  subroutine hecmw_clustermkl_wrapper(spMAT, phase_start, solution, istat)
48  implicit none
49  type (sparse_matrix), intent(inout) :: spmat
50  integer(kind=kint), intent(in) :: phase_start
51  integer(kind=kint), intent(out) :: istat
52  real(kind=kreal), pointer, intent(inout) :: solution(:)
53 
54  integer(kind=kint) :: myrank, phase
55  real(kind=kreal) :: t1,t2,t3,t4,t5
56 
57 #ifdef WITH_CLUSTERMKL
58 
60 
61  if (.not. initialized) then
62  do i=1,64
63  pt(i)%dummy = 0
64  enddo
65  iparm(:) = 0
66  iparm(1) = 1 ! no solver default
67  iparm(2) = 3 ! fill-in reordering from METIS
68  iparm(3) = 1
69  iparm(8) = 2
70  if (spmat%symtype == sparse_matrix_symtype_asym) then
71  iparm(10) = 13 ! perturb the pivot elements with 1E-13
72  else
73  iparm(10) = 8 ! perturb the pivot elements with 1E-8
74  endif
75  iparm(11) = 1 ! Enable scaling
76  iparm(13) = 1 ! Enable matching
77  iparm(18) = 0
78  iparm(19) = 0
79  msglvl = 0 ! print statistical information
80  initialized = .true.
81  else
82  if( phase_start == 1 ) then
83  phase = -1
84  call cluster_sparse_solver ( &
85  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
86  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
87  if (istat < 0) then
88  write(*,*) 'ERROR: MKL returned with error in phase -1', istat
89  return
90  endif
91  if( spmat%type == sparse_matrix_type_coo ) deallocate(irow,jcol,aval,rhs,solx)
92  end if
93  endif
94 
95  ! Additional setup
96  t1=hecmw_wtime()
97  if ( phase_start == 1 ) then
98  if (spmat%symtype == sparse_matrix_symtype_spd) then
99  mtype = 2
100  else if (spmat%symtype == sparse_matrix_symtype_sym) then
101  mtype = -2
102  else
103  mtype = 11
104  endif
105  endif
106 
107  if( spmat%type == sparse_matrix_type_csr ) then
108  iparm(40) = 2 ! Input: distributed matrix/rhs/solution format
109  iparm(41) = spmat%DISPLS(myrank+1)+1
110  iparm(42) = spmat%DISPLS(myrank+1)+spmat%N_COUNTS(myrank+1)
111  call sort_column_ascending_order(spmat,myrank)
112  nn = spmat%N
113  irow => spmat%IRN
114  jcol => spmat%JCN
115  aval => spmat%A
116  rhs => spmat%rhs
117  solx => solution
118  else if( spmat%type == sparse_matrix_type_coo ) then
119  iparm(40) = 0 ! Input: centralized input format( mkl don't have distributed nonsymmetric matrix input...)
120  !input matrix is gathered to rank0 if matrix is given in COO format
121  call export_spmat_to_centralizedcrs(spmat,myrank,nn,irow,jcol,aval,rhs,solx)
122  end if
123 
124  t2=hecmw_wtime()
125  if (myrank==0 .and. spmat%timelog > 0) &
126  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Additional Setup completed. time(sec)=',t2-t1
127 
128  if ( phase_start == 1 ) then
129  ! ANALYSIS
130  t2=hecmw_wtime()
131 
132  phase = 11
133  call cluster_sparse_solver ( &
134  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
135  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
136  if (istat < 0) then
137  if (myrank==0 .and. spmat%timelog > 0) call print_iparm_parameters()
138  write(*,*) 'ERROR: MKL returned with error in phase 1', istat
139  return
140  endif
141 
142  t3=hecmw_wtime()
143  if (myrank==0 .and. spmat%timelog > 0) &
144  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Analysis completed. time(sec)=',t3-t2
145  endif
146 
147  ! FACTORIZATION
148  if ( phase_start .le. 2 ) then
149  t3=hecmw_wtime()
150 
151  phase = 22
152  call cluster_sparse_solver ( &
153  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
154  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
155  if (istat < 0) then
156  if (myrank==0 .and. spmat%timelog > 0) call print_iparm_parameters()
157  write(*,*) 'ERROR: MKL returned with error in phase 2', istat
158  return
159  endif
160  t4=hecmw_wtime()
161  if (myrank==0 .and. spmat%timelog > 0) &
162  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Factorization completed. time(sec)=',t4-t3
163  endif
164 
165  t4=hecmw_wtime()
166 
167  ! SOLUTION
168  phase = 33
169  call cluster_sparse_solver ( &
170  pt, maxfct, mnum, mtype, phase, nn, aval, irow, jcol, &
171  idum, nrhs, iparm, msglvl, rhs, solx, hecmw_comm_get_comm(), istat )
172  if (istat < 0) then
173  if (myrank==0 .and. spmat%timelog > 0) call print_iparm_parameters()
174  write(*,*) 'ERROR: MKL returned with error in phase 3', istat
175  return
176  endif
177 
178  if( spmat%type == sparse_matrix_type_coo ) then !scatter global solution
179  call sparse_matrix_scatter_rhs(spmat, solx)
180  do i=1,spmat%N_loc
181  solution(i) = spmat%rhs(i)
182  end do
183  endif
184 
185  ! solution is written to hecMAT%X. you have to edit lib/solve_LINEQ.f90
186  ! to use this routine properly.
187 
188  t5=hecmw_wtime()
189  if (myrank==0 .and. spmat%timelog > 0) then
190  write(*,'(A,f10.3)') ' [Cluster Pardiso]: Solution completed. time(sec)=',t5-t4
191  end if
192  if( debug>0 .and. myrank==0 ) call print_iparm_parameters()
193 
194 #else
195  stop "MKL Pardiso not available"
196 #endif
197  end subroutine hecmw_clustermkl_wrapper
198 
199 #ifdef WITH_CLUSTERMKL
200  subroutine print_iparm_parameters()
201  write(*,'(A60,I8)') 'Number of iterative refinement steps performed: ',iparm(7)
202  write(*,'(A60,I8)') 'Number of perturbed pivots: ',iparm(14)
203  write(*,'(A60,I8)') 'Peak memory on symbolic factorization: ',iparm(15)
204  write(*,'(A60,I8)') 'Permanent memory on symbolic factorization: ',iparm(16)
205  write(*,'(A60,I8)') 'Size of factors/Peak memory on num. fact. and sol: ',iparm(17)
206  write(*,'(A60,I8)') 'The number of non-zero elements in the factors: ',iparm(18)
207  if( mtype < 11 ) then
208  write(*,'(A60,I8)') 'Number of positive eigenvalues: ',iparm(22)
209  write(*,'(A60,I8)') 'Number of negative eigenvalues: ',iparm(23)
210  end if
211  end subroutine
212 
213  subroutine export_spmat_to_centralizedcrs(spMAT,myrank,n,ia,ja,a,b,x)
214  type (sparse_matrix), intent(in) :: spmat
215  integer(kind=kint), intent(in) :: myrank
216  integer(kind=kint), intent(out) :: n
217  integer(kind=kint), pointer, intent(inout) :: ia(:), ja(:)
218  real(kind=kreal), pointer, intent(inout) :: a(:), b(:), x(:)
219 
220  integer(kind=kint) :: i, nprocs, ierr, nnz
221  integer(kind=kint), allocatable :: dispmat(:), ncounts(:)
222  real(kind=kreal) :: t1,t2,t3,t4
223 
224  t1=hecmw_wtime()
225 
227  allocate(dispmat(nprocs),ncounts(nprocs))
228  if (nprocs > 1) then
229  call hecmw_allgather_int_1(spmat%NZ, ncounts, hecmw_comm_get_comm())
230  endif
231  n = spmat%N
232  nnz = 0
233  dispmat(1)=0
234  do i=1,nprocs-1
235  dispmat(i+1)=dispmat(i)+ncounts(i)
236  nnz = nnz + ncounts(i)
237  enddo
238  nnz = nnz + ncounts(nprocs)
239 
240  if( myrank == 0 ) then
241  ierr = 0
242  if( .not. associated(ia) ) allocate(ia(nnz+n), stat=ierr)
243  if (ierr /= 0) then
244  write(*,*) " Allocation error in Setup Cluster MKL, ia",ierr,nnz
246  endif
247  if( .not. associated(ja) ) allocate(ja(nnz+n), stat=ierr)
248  if (ierr /= 0) then
249  write(*,*) " Allocation error in Setup Cluster MKL, ja",ierr,nnz
251  endif
252  if( .not. associated(a) ) allocate(a(nnz+n), stat=ierr)
253  if (ierr /= 0) then
254  write(*,*) " Allocation error in Setup Cluster MKL, a",ierr,nnz
256  endif
257  if( .not. associated(b) ) allocate(b(n), stat=ierr)
258  if (ierr /= 0) then
259  write(*,*) " Allocation error in Setup Cluster MKL, b",ierr,n
261  endif
262  if( .not. associated(x) ) allocate(x(n), stat=ierr)
263  if (ierr /= 0) then
264  write(*,*) " Allocation error in Setup Cluster MKL, x",ierr,n
266  endif
267  else !dummy
268  allocate(ia(1),ja(1),a(1),b(1),x(1))
269  end if
270 
271  t2=hecmw_wtime()
272  if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
273  write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Allocate Matrix time(sec)=',t2-t1
274 
275  !gather matrix components to rank 0
276 
277  call hecmw_gatherv_int(spmat%IRN, spmat%NZ, ia, ncounts, dispmat, 0, hecmw_comm_get_comm())
278  call hecmw_gatherv_int(spmat%JCN, spmat%NZ, ja, ncounts, dispmat, 0, hecmw_comm_get_comm())
279  call hecmw_gatherv_real(spmat%A, spmat%NZ, a, ncounts, dispmat, 0, hecmw_comm_get_comm())
280  if( myrank == 0 ) then !cluster pardiso must be given diagonal components even if they are zero.
281  do i=1,n
282  ia(nnz+i) = i
283  ja(nnz+i) = i
284  a(nnz+i) = 0.d0
285  end do
286  nnz = nnz + n
287  endif
288  call sparse_matrix_gather_rhs(spmat, b)
289 
290  t3=hecmw_wtime()
291  if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
292  write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Gather Matrix time(sec)=',t3-t2
293 
294  !convert COO to CRS
295  if( myrank==0 ) then
296  call coo2csr(n, nnz, ia, ja, a)
297  if( debug>0 ) call check_csr(n, nnz, ia, ja, a)
298  endif
299 
300  t4=hecmw_wtime()
301  if (myrank==0 .and. spmat%timelog > 0 .and. debug > 0 ) &
302  write(*,'(A,f10.3)') ' [Cluster Pardiso]: - Convert Matrix Format time(sec)=',t4-t3
303 
304  deallocate(dispmat,ncounts)
305  end subroutine
306 
307  subroutine coo2csr(n, nnz, ia, ja, a)
308  integer(kind=kint), intent(in) :: n
309  integer(kind=kint), intent(inout) :: nnz
310  integer(kind=kint), pointer, intent(inout) :: ia(:)
311  integer(kind=kint), pointer, intent(inout) :: ja(:)
312  real(kind=kreal), pointer, intent(inout) :: a(:)
313 
314  integer(kind=kint) :: i, k, idx, is, ie, nnz_new, prev
315  integer(kind=kint), allocatable :: counter(:), counter_curr(:), ia0(:), ja0(:)
316  real(kind=kreal), allocatable :: a0(:)
317 
318  allocate(counter(0:n),counter_curr(n),ia0(n+1),ja0(nnz),a0(nnz))
319 
320  counter(:) = 0
321  do i=1,nnz
322  idx = ia(i)
323  counter(idx) = counter(idx) + 1
324  end do
325  ia0(:)=0
326  do i=1,n
327  ia0(i+1) = ia0(i)+counter(i)
328  end do
329 
330  counter_curr(:) = 0
331  do i=1,nnz
332  idx = ia(i)
333  counter_curr(idx) = counter_curr(idx) + 1
334  idx = ia0(idx)+counter_curr(idx)
335  ja0(idx) = ja(i)
336  a0(idx) = a(i)
337  end do
338 
339  !$omp parallel private(i, iS, iE)
340  !$omp do
341  do i=1,n
342  is = ia0(i)+1
343  ie = ia0(i+1)
344  call sort_column_ascending_order0(ie-is+1,ja0(is:ie),a0(is:ie))
345  enddo
346  !$omp end do
347  !$omp end parallel
348 
349  ia(1)=1
350  do i=1,n
351  ia(i+1) = ia(i)+counter(i)
352  end do
353 
354  prev = 0
355  nnz_new = 0
356  ia(1) = 1
357  do i=1,n
358  do k=ia0(i)+1,ia0(i+1)
359  if( ja0(k) == prev ) then
360  a(nnz_new) = a(nnz_new) + a0(k)
361  else
362  nnz_new = nnz_new + 1
363  ja(nnz_new) = ja0(k)
364  a(nnz_new) = a0(k)
365  end if
366  prev = ja0(k)
367  end do
368  ia(i+1) = nnz_new + 1
369  prev = 0
370  end do
371 
372  nnz = nnz_new
373 
374  deallocate(counter,counter_curr,ia0,ja0,a0)
375 
376  end subroutine
377 
378  subroutine check_csr(n, nnz, ia, ja, a)
379  integer(kind=kint), intent(in) :: n
380  integer(kind=kint), intent(in) :: nnz
381  integer(kind=kint), pointer, intent(in) :: ia(:)
382  integer(kind=kint), pointer, intent(in) :: ja(:)
383  real(kind=kreal), pointer, intent(in) :: a(:)
384 
385  integer :: i,k
386 
387  if( ia(n+1)-1 /= nnz ) then
388  write(*,*) "Error in check_csr(1): ia(n+1)-1 /= nnz"
390  end if
391 
392  do i=1,n
393  do k=ia(i),ia(i+1)-1
394  if( ja(k) <= 0 ) then
395  write(*,*) "Error in check_csr(2): ja(k) <= 0"
397  end if
398  if( ja(k) > n ) then
399  write(*,*) "Error in check_csr(3): ja(k) > n"
401  end if
402  if( k > ia(i) ) then
403  if( ja(k) <= ja(k-1) ) then
404  write(*,*) "Error in check_csr(4): ja(k) <= ja(k-1)"
406  end if
407  end if
408  end do
409  end do
410 
411  end subroutine
412 
413  subroutine sort_column_ascending_order(spMAT,myrank)
414  type (sparse_matrix), intent(inout) :: spmat
415  integer(kind=kint), intent(in) :: myrank
416 
417  integer(kind=kint) :: i, is, ie
418 
419  do i=2,spmat%N_loc+1
420  if(spmat%IRN(i)-spmat%IRN(i-1)>10000) &
421  & write(*,*) 'warning: n may be too large for set_mkl_set_prof0 for row ',i-1
422  enddo
423 
424  !!$omp parallel private(i, iS, iE)
425  !!$omp do
426  do i=1,spmat%N_loc
427  is = spmat%IRN(i)
428  ie = spmat%IRN(i+1)-1
429  call sort_column_ascending_order0(ie-is+1,spmat%JCN(is:ie),spmat%A(is:ie))
430  enddo
431  !!$omp end do
432  !!$omp end parallel
433  end subroutine sort_column_ascending_order
434 
435  subroutine sort_column_ascending_order0(n,ja,a)
436  integer(kind=kint), intent(in) :: n
437  integer(kind=kint), intent(inout) :: ja(:)
438  real(kind=kreal), intent(inout) :: a(:)
439 
440  integer(kind=kint) :: i, work(2,n)
441  real(kind=kreal) :: oa(n)
442 
443  do i=1,n
444  work(1,i) = ja(i)
445  work(2,i) = i
446  oa(i) = a(i)
447  enddo
448 
449  call myqsort(n,work)
450 
451  do i=1,n
452  ja(i) = work(1,i)
453  a(i) = oa(work(2,i))
454  enddo
455 
456  end subroutine sort_column_ascending_order0
457 
458  recursive subroutine myqsort(n,work)
459  integer(kind=kint), intent(in) :: n
460  integer(kind=kint), intent(inout) :: work(:,:)
461 
462  integer(kind=kint) :: i, key, work1(2,n), n_low, n_high, next, minidx, maxidx
463  logical :: sorted
464 
465  if(n<2) return
466  !return if work is already sorted
467  sorted = .true.
468  minidx = work(1,1)
469  maxidx = work(1,1)
470  do i=1,n-1
471  next = work(1,i+1)
472  if( work(1,i) > next ) sorted = .false.
473  if( next < minidx ) minidx = next
474  if( next > maxidx ) maxidx = next
475  enddo
476  if(sorted) return
477 
478  if( n<5 .or. maxidx-minidx < 2) then
479  call myinssort(n,work)
480  return
481  endif
482 
483  key = (minidx+maxidx)/2
484 
485  n_low=0
486  do i=1,n
487  if(work(1,i)<key) then
488  n_low=n_low+1
489  work1(1:2,n_low) = work(1:2,i)
490  endif
491  enddo
492 
493  n_high=0
494  do i=1,n
495  if(work(1,i)>=key) then
496  n_high=n_high+1
497  work1(1:2,n_low+n_high) = work(1:2,i)
498  endif
499  enddo
500 
501  if(n_low>0) call myqsort(n_low,work1(1:2,1:n_low))
502  if(n_high>0) call myqsort(n_high,work1(1:2,n_low+1:n_low+n_high))
503 
504  do i=1,n
505  work(1:2,i) = work1(1:2,i)
506  enddo
507 
508  end subroutine myqsort
509 
510  subroutine myinssort(n,work)
511  integer(kind=kint), intent(in) :: n
512  integer(kind=kint), intent(inout) :: work(:,:)
513 
514  integer(kind=kint) :: i,j,tmp(2)
515 
516  do i=2,n
517  do j=i,2,-1
518  if(work(1,j)<work(1,j-1)) then
519  tmp(1:2) = work(1:2,j)
520  work(1:2,j) = work(1:2,j-1)
521  work(1:2,j-1) = tmp(1:2)
522  endif
523  enddo
524  enddo
525 
526  end subroutine myinssort
527 
528 #endif
529 
530  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
m_hecmw_clustermkl_wrapper::hecmw_clustermkl_wrapper
subroutine, public hecmw_clustermkl_wrapper(spMAT, phase_start, solution, istat)
Definition: hecmw_ClusterMKL_wrapper.F90:48
m_hecmw_comm_f::hecmw_allgather_int_1
subroutine hecmw_allgather_int_1(sval, rbuf, comm)
Definition: hecmw_comm_f.F90:96
m_sparse_matrix::sparse_matrix_scatter_rhs
subroutine, public sparse_matrix_scatter_rhs(spMAT, rhs_all)
Definition: sparse_matrix.f90:155
hecmw_util::hecmw_wtime
real(kind=kreal) function hecmw_wtime()
Definition: hecmw_util_f.F90:549
hecmw_util::hecmw_abort
subroutine hecmw_abort(comm)
Definition: hecmw_util_f.F90:534
m_fstr::myrank
integer(kind=kint) myrank
PARALLEL EXECUTION.
Definition: m_fstr.f90:96
m_sparse_matrix::sparse_matrix_symtype_spd
integer(kind=kint), parameter, public sparse_matrix_symtype_spd
Definition: sparse_matrix.f90:33
m_hecmw_comm_f::hecmw_gatherv_real
subroutine hecmw_gatherv_real(sbuf, sc, rbuf, rcs, disp, root, comm)
Definition: hecmw_comm_f.F90:132
m_sparse_matrix::sparse_matrix_type_csr
integer(kind=kint), parameter, public sparse_matrix_type_csr
Definition: sparse_matrix.f90:29
hecmw_util
I/O and Utility.
Definition: hecmw_util_f.F90:7
m_hecmw_comm_f::hecmw_gatherv_int
subroutine hecmw_gatherv_int(sbuf, sc, rbuf, rcs, disp, root, comm)
Definition: hecmw_comm_f.F90:152
m_hecmw_comm_f
Definition: hecmw_comm_f.F90:6
m_sparse_matrix::sparse_matrix_symtype_sym
integer(kind=kint), parameter, public sparse_matrix_symtype_sym
Definition: sparse_matrix.f90:34
m_fstr::nprocs
integer(kind=kint) nprocs
Definition: m_fstr.f90:97
hecmw_util::kreal
integer(kind=4), parameter kreal
Definition: hecmw_util_f.F90:16
m_sparse_matrix::sparse_matrix_symtype_asym
integer(kind=kint), parameter, public sparse_matrix_symtype_asym
Definition: sparse_matrix.f90:32
m_sparse_matrix::sparse_matrix_type_coo
integer(kind=kint), parameter, public sparse_matrix_type_coo
Definition: sparse_matrix.f90:30
m_sparse_matrix
This module provides DOF based sparse matrix data structure (CSR and COO)
Definition: sparse_matrix.f90:6
hecmw_util::hecmw_comm_get_size
integer(kind=kint) function hecmw_comm_get_size()
Definition: hecmw_util_f.F90:593
m_hecmw_clustermkl_wrapper
This module provides linear equation solver interface for Cluster Pardiso.
Definition: hecmw_ClusterMKL_wrapper.F90:16
hecmw_util::hecmw_comm_get_rank
integer(kind=kint) function hecmw_comm_get_rank()
Definition: hecmw_util_f.F90:582
hecmw_util::hecmw_comm_get_comm
integer(kind=kint) function hecmw_comm_get_comm()
Definition: hecmw_util_f.F90:571
m_sparse_matrix::sparse_matrix_gather_rhs
subroutine, public sparse_matrix_gather_rhs(spMAT, rhs_all)
Definition: sparse_matrix.f90:140