34 void SuperLu<double>::sequ(SuperMatrix *A,
double *r,
double *c,
double *rowcnd,
double *colcnd,
double *amax,
int *info)
36 dsequ(A, r, c, rowcnd, colcnd, amax, info);
40 void SuperLu<double>::laqgs(SuperMatrix *A,
float *r,
float *c,
float rowcnd,
float colcnd,
float amax,
char *equed)
42 dlaqgs(A, r, c, rowcnd, colcnd, amax, equed);
46 int SuperLu<double>::gstrf(superlu_options_t *options,
int m,
int n,
double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat,
int *info)
48 return dgstrf(options, m, n, anorm, LUstruct, grid, stat, info);
52 float SuperLu<double>::pivotGrowth(
int ncols, SuperMatrix *A,
int *perm_c, SuperMatrix *L, SuperMatrix *U)
54 return dPivotGrowth(ncols, A, perm_c, L, U);
58 float SuperLu<double>::langs(
char *norm, SuperMatrix *A)
60 return dlangs(norm, A);
63 void SuperLu<double>::gscon(
char *norm, SuperMatrix *L, SuperMatrix *U,
float anorm,
float *rcond, SuperLUStat_t *stat,
int *info)
65 dgscon(norm, L, U, anorm, rcond, stat, info);
69 void SuperLu<double>::gstrs(trans_t trans, SuperMatrix *L, SuperMatrix *U,
int *perm_c,
int *perm_r, SuperMatrix *B, SuperLUStat_t *stat,
int *info)
71 dgstrs(trans, L, U, perm_c, perm_r, B, stat, info);
75 double SuperLu<double>::lamch_(
char *cmach)
77 return dlamch_(cmach);
81 int SuperLu<double>::querySpace(SuperMatrix *a, SuperMatrix *b, mem_usage_t *mu)
83 return dquerySpace(a, b, mu);
87 template<
typename Scalar>
88 bool SuperLUSolver<Scalar>::check_status(
unsigned int info)
95 else if (info <= m->get_size())
97 this->warn(
"SuperLU: Factor U is singular, solution could not be computed.");
100 else if (info == m->get_size() + 1)
102 this->warn(
"SuperLU: RCOND is less than machine precision "
103 "(system matrix is singular to working precision).");
106 else if (info > m->get_size() + 1)
108 this->warn(
"SuperLU: Not enough memory.\n Failure when %.3f MB were allocated.",
109 (info - m->get_size()) / 1e6);
116 template<
typename Scalar>
117 SuperLUSolver<Scalar>::SuperLUSolver(CSCMatrix<Scalar> *m, SimpleVector<Scalar> *rhs)
118 : DirectSolver<Scalar>(m, rhs), m(m), rhs(rhs), local_Ai(nullptr), local_Ap(nullptr)
119 , local_Ax(nullptr), local_rhs(nullptr)
134 char *nt_var = getenv(
"OMP_NUM_THREADS");
136 options.nprocs = std::max(1, atoi(nt_var));
141 options.fact = EQUILIBRATE;
143 options.trans = NOTRANS;
147 options.diag_pivot_thresh = 1.0;
151 options.drop_tol = 0.0;
153 options.SymmetricMode = NO;
156 options.panel_size = sp_ienv(1);
157 options.relax = sp_ienv(2);
172 set_default_options(&options);
176 options.PrintStat = YES;
178 has_A = has_B = inited =
false;
181 inline SuperLuType<std::complex<double> >::Scalar to_superlu(SuperLuType<std::complex<double> >::Scalar &a, std::complex<double>b)
188 inline SuperLuType<double>::Scalar to_superlu(SuperLuType<double>::Scalar &a,
double b)
194 template<
typename Scalar>
195 SuperLUSolver<Scalar>::~SuperLUSolver()
200 template<
typename Scalar>
201 int SuperLUSolver<Scalar>::get_matrix_size()
203 return m->get_size();
206 template<
typename Scalar>
207 void SuperLUSolver<Scalar>::solve()
209 assert(m !=
nullptr);
210 assert(rhs !=
nullptr);
216 SLU_INIT_STAT(&stat);
221 void *work =
nullptr;
233 slu_memusage_t memusage;
235 double rpivot_growth;
240 options.lwork = lwork;
243 if (!setup_factorization())
244 throw Exceptions::Exception(
"SuperLU: LU factorization could not be completed.");
259 free_with_check(local_Ai);
260 local_Ai = malloc_with_check<SuperLUSolver<Scalar>,
int>(m->get_nnz(),
this);
261 memcpy(local_Ai, m->get_Ai(), m->get_nnz() *
sizeof(int));
263 free_with_check(local_Ap);
264 local_Ap = malloc_with_check<SuperLUSolver<Scalar>,
int>(m->get_size() + 1,
this);
265 memcpy(local_Ap, m->get_Ap(), (m->get_size() + 1) *
sizeof(
int));
267 free_with_check(local_Ax);
268 local_Ax = malloc_with_check<SuperLUSolver<Scalar>,
typename SuperLuType<Scalar>::Scalar>(m->get_nnz(),
this);
269 for (
unsigned int i = 0; i < m->get_nnz(); i++)
270 to_superlu(local_Ax[i], m->get_Ax()[i]);
273 create_csc_matrix(&A, m->get_size(), m->get_size(), m->get_nnz(), local_Ax, local_Ai, local_Ap, SLU_NC, SLU_DTYPE, SLU_GE);
282 free_with_check(local_rhs);
283 local_rhs = malloc_with_check<SuperLUSolver<Scalar>,
typename SuperLuType<Scalar>::Scalar>(rhs->get_size(),
this);
284 for (
unsigned int i = 0; i < rhs->get_size(); i++)
285 to_superlu(local_rhs[i], rhs->v[i]);
287 create_dense_matrix(&B, rhs->get_size(), 1, local_rhs, rhs->get_size(), SLU_DN, SLU_DTYPE, SLU_GE);
293 typename SuperLuType<Scalar>::Scalar*x = malloc_with_check<SuperLUSolver<Scalar>,
typename SuperLuType<Scalar>::Scalar>(m->get_size(),
this);
294 create_dense_matrix(&X, m->get_size(), 1, x, m->get_size(), SLU_DN, SLU_DTYPE, SLU_GE);
300 if (options.refact == NO)
307 get_perm_c(1, &A, perm_c);
325 slu_mt_solver_driver(&options, &A, perm_c, perm_r, &AC, &equed, R, C,
326 &L, &U, &B, &X,
nullptr, &rcond,
nullptr,
nullptr,
327 &stat,
nullptr, &info);
339 solver_driver(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U,
340 work, lwork, &B, &X, &rpivot_growth, &rcond, &ferr, &berr,
341 &memusage, &stat, &info);
349 A_changed = (equed != NOEQUIL);
351 A_changed = (*equed !=
'N');
354 bool factorized = check_status(info);
358 free_with_check(this->sln);
359 this->sln = malloc_with_check<SuperLUSolver<Scalar>, Scalar>(m->get_size(),
this);
361 Scalar *sol = (Scalar*)((DNformat*)X.Store)->nzval;
363 for (
unsigned int i = 0; i < rhs->get_size(); i++)
364 this->sln[i] = sol[i];
368 if (options.PrintStat)
369 SLU_PRINT_STAT(&stat);
375 Destroy_SuperMatrix_Store(&X);
378 this->time = this->accumulated();
381 throw Exceptions::LinearMatrixSolverException(
"SuperLU failed.");
384 template<
typename Scalar>
385 bool SuperLUSolver<Scalar>::setup_factorization()
387 unsigned int A_size = A.nrow < 0 ? 0 : A.nrow;
390 this->warn(
"You cannot reuse factorization structures for factorizing matrices of different sizes.");
399 eff_fact_scheme = this->reuse_scheme;
410 switch (eff_fact_scheme)
417 free_factorization_data();
420 if (!(perm_c = intMalloc(m->get_size())))
422 if (!(perm_r = intMalloc(m->get_size())))
426 if (!(R = (
double *)SUPERLU_MALLOC(m->get_size() *
sizeof(double))))
428 if (!(C = (
double *)SUPERLU_MALLOC(m->get_size() *
sizeof(double))))
432 options.fact = EQUILIBRATE;
434 options.perm_c = perm_c;
435 options.perm_r = perm_r;
439 if (!(etree = intMalloc(m->get_size())))
442 options.Fact = DOFACT;
450 options.fact = EQUILIBRATE;
451 options.refact = YES;
453 options.Fact = SamePattern;
465 options.fact = EQUILIBRATE;
466 options.refact = YES;
468 options.Fact = SamePattern_SameRowPerm;
475 options.fact = FACTORED;
476 options.refact = YES;
478 options.Fact = FACTORED;
488 template<
typename Scalar>
489 void SuperLUSolver<Scalar>::free_matrix()
493 Destroy_SuperMatrix_Store(&A);
498 template<
typename Scalar>
499 void SuperLUSolver<Scalar>::free_rhs()
503 Destroy_SuperMatrix_Store(&B);
508 template<
typename Scalar>
509 void SuperLUSolver<Scalar>::free()
511 free_factorization_data();
515 free_with_check(local_Ai);
516 free_with_check(local_Ap);
517 free_with_check(local_Ax);
518 free_with_check(local_rhs);
521 template<
typename Scalar>
522 void SuperLUSolver<Scalar>::free_factorization_data()
527 SUPERLU_FREE(options.etree);
528 SUPERLU_FREE(options.colcnt_h);
529 SUPERLU_FREE(options.part_super_h);
530 Destroy_CompCol_Permuted(&AC);
534 SUPERLU_FREE(perm_c);
535 SUPERLU_FREE(perm_r);
570 void slu_mt_solver_driver(slu_options_t *options, SuperMatrix *A,
571 int *perm_c,
int *perm_r, SuperMatrix *AC,
572 equed_t *equed,
double *R,
double *C,
573 SuperMatrix *L, SuperMatrix *U,
574 SuperMatrix *B, SuperMatrix *X,
575 double *recip_pivot_growth,
double *rcond,
576 double *ferr,
double *berr,
577 slu_stat_t *stat, slu_memusage_t *memusage,
585 int dofact = (options->fact == DOFACT);
586 int equil = (options->fact == EQUILIBRATE);
587 int notran = (options->trans == NOTRANS);
591 DNformat *Bstore = (DNformat*)B->Store;
592 DNformat *Xstore = (DNformat*)X->Store;
593 Scalar *Bmat = (Scalar*)Bstore->nzval;
594 Scalar *Xmat = (Scalar*)Xstore->nzval;
604 rowequ = colequ = FALSE;
608 rowequ = (*equed == ROW) || (*equed == BOTH);
609 colequ = (*equed == COL) || (*equed == BOTH);
614 t0 = SuperLU_timer_();
617 double rowcnd, colcnd, amax;
618 SLU_GSEQU(A, R, C, &rowcnd, &colcnd, &amax, &info1);
623 SLU_LAQGS(A, R, C, rowcnd, colcnd, amax, equed);
624 rowequ = (*equed == ROW) || (*equed == BOTH);
625 colequ = (*equed == COL) || (*equed == BOTH);
627 stat->utime[EQUIL] = SuperLU_timer_() - t0;
636 for (
int i = 0; i < A->nrow; ++i)
637 SLU_MULT(Bmat[i], R[i]);
641 for (
int i = 0; i < A->nrow; ++i)
642 SLU_MULT(Bmat[i], C[i]);
652 if (options->refact == NO)
654 t0 = SuperLU_timer_();
655 SLU_SP_COLORDER(A, perm_c, options, AC);
656 stat->utime[ETREE] = SuperLU_timer_() - t0;
660 t0 = SuperLU_timer_();
661 SLU_GSTRF(options, AC, perm_r, L, U, stat, info);
662 stat->utime[FACT] = SuperLU_timer_() - t0;
665 for (
int i = 0; i < options->nprocs; ++i) flopcnt += stat->procstat[i].fcops;
666 stat->ops[FACT] = flopcnt;
668 if (options->lwork == -1)
671 memusage->total_needed = *info - A->ncol;
678 if (*info <= A->ncol)
682 if (recip_pivot_growth)
683 *recip_pivot_growth = SLU_PIVOT_GROWTH(*info, A, perm_c, L, U);
691 if (recip_pivot_growth)
692 *recip_pivot_growth = SLU_PIVOT_GROWTH(A->ncol, A, perm_c, L, U);
699 t0 = SuperLU_timer_();
704 *(
unsigned char *)norm = (notran) ?
'1' :
'I';
706 double anorm = SLU_LANGS(norm, A);
707 SLU_GSCON(norm, L, U, anorm, rcond, info);
708 stat->utime[RCOND] = SuperLU_timer_() - t0;
715 memcpy(Xmat, Bmat, B->nrow *
sizeof(Scalar));
717 t0 = SuperLU_timer_();
718 SLU_GSTRS(options->trans, L, U, perm_r, perm_c, X, stat, info);
719 stat->utime[SOLVE] = SuperLU_timer_() - t0;
720 stat->ops[SOLVE] = stat->ops[TRISOLVE];
728 t0 = SuperLU_timer_();
729 SLU_GSRFS(options->trans, A, L, U, perm_r, perm_c, *equed,
730 R, C, B, X, ferr, berr, stat, info);
731 stat->utime[REFINE] = SuperLU_timer_() - t0;
741 for (
int i = 0; i < A->nrow; ++i)
742 SLU_MULT(Xmat[i], C[i]);
746 for (
int i = 0; i < A->nrow; ++i)
747 SLU_MULT(Xmat[i], R[i]);
752 char param[1]; param[0] =
'E';
753 if (rcond && *rcond < SLU_LAMCH_(param)) *info = A->ncol + 1;
757 SLU_QUERY_SPACE(options->nprocs, L, U, options->panel_size, memusage);
761 template class HERMES_API SuperLUSolver < double > ;
762 template class HERMES_API SuperLUSolver < std::complex<double> > ;
General namespace for the Hermes library.
Exception interface Basically a std::exception, but with a constructor with string and with print_msg...
File containing common definitions, and basic global enums etc. for HermesCommon. ...
existing factorization data.
File containing functionality for investigating call stack.
factorization / operator.
pattern as the one already factorized.
SuperLU solver interface.