HermesCommon  3.0
superlu_solver.cpp
Go to the documentation of this file.
1 // This file is part of HermesCommon
2 //
3 // Copyright (c) 2009 hp-FEM group at the University of Nevada, Reno (UNR).
4 // Email: hpfem-group@unr.edu, home page: http://www.hpfem.org/.
5 //
6 // Hermes2D is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published
8 // by the Free Software Foundation; either version 2 of the License,
9 // or (at your option) any later version.
10 //
11 // Hermes2D is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with Hermes2D; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #include "config.h"
23 #ifdef WITH_SUPERLU
24 #include "superlu_solver.h"
25 #include "callstack.h"
26 #include "util/memory_handling.h"
27 
28 namespace Hermes
29 {
30  namespace Solvers
31  {
32 #ifdef SLU_MT
33  template <>
34  void SuperLu<double>::sequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int *info)
35  {
36  dsequ(A, r, c, rowcnd, colcnd, amax, info);
37  }
38 
39  template <>
40  void SuperLu<double>::laqgs(SuperMatrix *A, float *r, float *c, float rowcnd, float colcnd, float amax, char *equed)
41  {
42  dlaqgs(A, r, c, rowcnd, colcnd, amax, equed);
43  }
44 
45  template <>
46  int SuperLu<double>::gstrf(superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info)
47  {
48  return dgstrf(options, m, n, anorm, LUstruct, grid, stat, info);
49  }
50 
51  template <>
52  float SuperLu<double>::pivotGrowth(int ncols, SuperMatrix *A, int *perm_c, SuperMatrix *L, SuperMatrix *U)
53  {
54  return dPivotGrowth(ncols, A, perm_c, L, U);
55  }
56 
57  template <>
58  float SuperLu<double>::langs(char *norm, SuperMatrix *A)
59  {
60  return dlangs(norm, A);
61  }
62  template <>
63  void SuperLu<double>::gscon(char *norm, SuperMatrix *L, SuperMatrix *U, float anorm, float *rcond, SuperLUStat_t *stat, int *info)
64  {
65  dgscon(norm, L, U, anorm, rcond, stat, info);
66  }
67 
68  template <>
69  void SuperLu<double>::gstrs(trans_t trans, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, SuperMatrix *B, SuperLUStat_t *stat, int *info)
70  {
71  dgstrs(trans, L, U, perm_c, perm_r, B, stat, info);
72  }
73 
74  template <>
75  double SuperLu<double>::lamch_(char *cmach)
76  {
77  return dlamch_(cmach);
78  }
79 
80  template <>
81  int SuperLu<double>::querySpace(SuperMatrix *a, SuperMatrix *b, mem_usage_t *mu)
82  {
83  return dquerySpace(a, b, mu);
84  }
85 #endif
86 
87  template<typename Scalar>
88  bool SuperLUSolver<Scalar>::check_status(unsigned int info)
89  {
90  if (info == 0)
91  {
92  // Success.
93  return true;
94  }
95  else if (info <= m->get_size())
96  {
97  this->warn("SuperLU: Factor U is singular, solution could not be computed.");
98  return false;
99  }
100  else if (info == m->get_size() + 1)
101  {
102  this->warn("SuperLU: RCOND is less than machine precision "
103  "(system matrix is singular to working precision).");
104  return true;
105  }
106  else if (info > m->get_size() + 1)
107  {
108  this->warn("SuperLU: Not enough memory.\n Failure when %.3f MB were allocated.",
109  (info - m->get_size()) / 1e6);
110  return false;
111  }
112 
113  return false;
114  }
115 
116  template<typename Scalar>
117  SuperLUSolver<Scalar>::SuperLUSolver(CSCMatrix<Scalar> *m, SimpleVector<Scalar> *rhs)
118  : DirectSolver<Scalar>(m, rhs), m(m), rhs(rhs), local_Ai(nullptr), local_Ap(nullptr)
119  , local_Ax(nullptr), local_rhs(nullptr)
120  {
121  R = nullptr;
122  C = nullptr;
123  perm_r = nullptr;
124  perm_c = nullptr;
125  etree = nullptr;
126 #ifndef SLU_MT
127  *equed = '\0';
128 #endif
129 
130  // Set the default input options:
131 #ifdef SLU_MT
132  // I am not sure if this will work well on Windows:
133  // http://stackoverflow.com/questions/631664/accessing-environment-variables-in-c
134  char *nt_var = getenv("OMP_NUM_THREADS");
135  if (nt_var)
136  options.nprocs = std::max(1, atoi(nt_var));
137  else
138  options.nprocs = 1;
139 
140  // Rescale the matrix if neccessary.
141  options.fact = EQUILIBRATE;
142  // Not solving the transposed problem.
143  options.trans = NOTRANS;
144  // Factorize from scratch for the first time.
145  options.refact = NO;
146  // Use partial pivoting during GEM.
147  options.diag_pivot_thresh = 1.0;
148  // Let SuperLU compute the row permutations.
149  options.usepr = NO;
150  // Not yet implemented in SuperLU_MT 2.0.
151  options.drop_tol = 0.0;
152  // Assume general non-symmetric problem.
153  options.SymmetricMode = NO;
154 
155  // Default options related to the supernodal algorithm.
156  options.panel_size = sp_ienv(1);
157  options.relax = sp_ienv(2);
158 #else
159  /*
160  options.Fact = DOFACT;
161  options.Equil = YES;
162  options.ColPerm = COLAMD;
163  options.DiagPivotThresh = 1.0;
164  options.Trans = NOTRANS;
165  options.IterRefine = NOREFINE;
166  options.SymmetricMode = NO;
167  options.PivotGrowth = NO;
168  options.ConditionNumber = NO;
169  options.PrintStat = YES;
170  */
171  // This function is only present in the sequential SLU.
172  set_default_options(&options);
173 #endif
174 
175  // Set to NO to suppress output.
176  options.PrintStat = YES;
177 
178  has_A = has_B = inited = false;
179  }
180 
181  inline SuperLuType<std::complex<double> >::Scalar to_superlu(SuperLuType<std::complex<double> >::Scalar &a, std::complex<double>b)
182  {
183  a.r = b.real();
184  a.i = b.imag();
185  return a;
186  }
187 
188  inline SuperLuType<double>::Scalar to_superlu(SuperLuType<double>::Scalar &a, double b)
189  {
190  a = b;
191  return a;
192  }
193 
194  template<typename Scalar>
195  SuperLUSolver<Scalar>::~SuperLUSolver()
196  {
197  this->free();
198  }
199 
200  template<typename Scalar>
201  int SuperLUSolver<Scalar>::get_matrix_size()
202  {
203  return m->get_size();
204  }
205 
206  template<typename Scalar>
207  void SuperLUSolver<Scalar>::solve()
208  {
209  assert(m != nullptr);
210  assert(rhs != nullptr);
211 
212  this->tick();
213 
214  // Initialize the statistics variable.
215  slu_stat_t stat;
216  SLU_INIT_STAT(&stat);
217 
218  // Prepare data structures serving as input for the solver driver
219  // (according to the chosen factorization reuse strategy).
220  // Explicit pointer to the factorization workspace
221  void *work = nullptr;
222  // (unused, see below).
223  // Space for the factorization will be allocated
224  int lwork = 0;
225  // internally by system malloc.
226  // Estimated relative forward error
227  double ferr = 1.0;
228  // (unused unless iterative refinement is performed).
229  // Estimated relative backward error
230  double berr = 1.0;
231  // (unused unless iterative refinement is performed).
232  // Record the memory usage statistics.
233  slu_memusage_t memusage;
234  // The reciprocal pivot growth factor.
235  double rpivot_growth;
236  // The estimate of the reciprocal condition number.
237  double rcond;
238 #ifdef SLU_MT
239  options.work = work;
240  options.lwork = lwork;
241 #endif
242 
243  if (!setup_factorization())
244  throw Exceptions::Exception("SuperLU: LU factorization could not be completed.");
245 
246  // If the previous factorization of A is to be fully reused as an input for the solver driver,
247  // keep the (possibly rescaled) matrix from the last factorization, otherwise recreate it
248  // from the master CSCMatrix<Scalar> pointed to by this->m (this also applies to the case when
249  // A does not yet exist).
250  if (!has_A || this->reuse_scheme != HERMES_REUSE_MATRIX_STRUCTURE_COMPLETELY)
251  {
252  if (A_changed)
253  free_matrix();
254 
255  if (!has_A)
256  {
257  // A will be created from the local copy of the value and index arrays, because these
258  // may be modified by the solver driver.
259  free_with_check(local_Ai);
260  local_Ai = malloc_with_check<SuperLUSolver<Scalar>, int>(m->get_nnz(), this);
261  memcpy(local_Ai, m->get_Ai(), m->get_nnz() * sizeof(int));
262 
263  free_with_check(local_Ap);
264  local_Ap = malloc_with_check<SuperLUSolver<Scalar>, int>(m->get_size() + 1, this);
265  memcpy(local_Ap, m->get_Ap(), (m->get_size() + 1) * sizeof(int));
266 
267  free_with_check(local_Ax);
268  local_Ax = malloc_with_check<SuperLUSolver<Scalar>, typename SuperLuType<Scalar>::Scalar>(m->get_nnz(), this);
269  for (unsigned int i = 0; i < m->get_nnz(); i++)
270  to_superlu(local_Ax[i], m->get_Ax()[i]);
271 
272  // Create new_ general (non-symmetric), column-major, non-supernodal, size X size matrix.
273  create_csc_matrix(&A, m->get_size(), m->get_size(), m->get_nnz(), local_Ax, local_Ai, local_Ap, SLU_NC, SLU_DTYPE, SLU_GE);
274 
275  has_A = true;
276  }
277  }
278 
279  // Recreate the input rhs for the solver driver from a local copy of the new_ value array.
280  free_rhs();
281 
282  free_with_check(local_rhs);
283  local_rhs = malloc_with_check<SuperLUSolver<Scalar>, typename SuperLuType<Scalar>::Scalar>(rhs->get_size(), this);
284  for (unsigned int i = 0; i < rhs->get_size(); i++)
285  to_superlu(local_rhs[i], rhs->v[i]);
286 
287  create_dense_matrix(&B, rhs->get_size(), 1, local_rhs, rhs->get_size(), SLU_DN, SLU_DTYPE, SLU_GE);
288 
289  has_B = true;
290 
291  // Initialize the solution variable.
292  SuperMatrix X;
293  typename SuperLuType<Scalar>::Scalar*x = malloc_with_check<SuperLUSolver<Scalar>, typename SuperLuType<Scalar>::Scalar>(m->get_size(), this);
294  create_dense_matrix(&X, m->get_size(), 1, x, m->get_size(), SLU_DN, SLU_DTYPE, SLU_GE);
295 
296  // Solve the system.
297  int info;
298 
299 #ifdef SLU_MT
300  if (options.refact == NO)
301  {
302  // Get column permutation vector perm_c[], according to the first argument:
303  // 0: natural ordering
304  // 1: minimum degree ordering on structure of A'*A
305  // 2: minimum degree ordering on structure of A' + A
306  // 3: approximate minimum degree for unsymmetric matrices
307  get_perm_c(1, &A, perm_c);
308  }
309 
310  /*
311  // Compute reciprocal pivot growth, estimate reciprocal condition number of A, solve,
312  // perform iterative refinement of the solution and estimate forward and backward error.
313  // Memory usage will be acquired at the end. If A is singular, info will be set to A->ncol + 1.
314  //
315  slu_mt_solver_driver( &options, &A, perm_c, perm_r, &AC, &equed, R, C,
316  &L, &U, &B, &X, &rpivot_growth, &rcond, &ferr, &berr,
317  &stat, &memusage, &info );
318  */
319 
320  // ... OR ...
321 
322  // Estimate reciprocal condition number of A and solve the system. If A is singular, info
323  // will be set to A->ncol + 1.
324  //
325  slu_mt_solver_driver(&options, &A, perm_c, perm_r, &AC, &equed, R, C,
326  &L, &U, &B, &X, nullptr, &rcond, nullptr, nullptr,
327  &stat, nullptr, &info);
328 
329  // ... OR ...
330 
331  /*
332  // Do not check the regularity of A and just solve the system.
333  //
334  slu_mt_solver_driver( &options, &A, perm_c, perm_r, &AC, &equed, R, C,
335  &L, &U, &B, &X, nullptr, nullptr, nullptr, nullptr,
336  &stat, nullptr, &info );
337  */
338 #else
339  solver_driver(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U,
340  work, lwork, &B, &X, &rpivot_growth, &rcond, &ferr, &berr,
341  &memusage, &stat, &info);
342 #endif
343 
344  // A and B may have been multiplied by the scaling vectors R and C on the output of the
345  // solver. If the next call to the solver should reuse factorization only partially,
346  // it will need the original unscaled matrix - this will indicate such situation
347  // (rhs is always recreated anew).
348 #ifdef SLU_MT
349  A_changed = (equed != NOEQUIL);
350 #else
351  A_changed = (*equed != 'N');
352 #endif
353 
354  bool factorized = check_status(info);
355 
356  if (factorized)
357  {
358  free_with_check(this->sln);
359  this->sln = malloc_with_check<SuperLUSolver<Scalar>, Scalar>(m->get_size(), this);
360 
361  Scalar *sol = (Scalar*)((DNformat*)X.Store)->nzval;
362 
363  for (unsigned int i = 0; i < rhs->get_size(); i++)
364  this->sln[i] = sol[i];
365  }
366 
367  // If required, print statistics.
368  if (options.PrintStat)
369  SLU_PRINT_STAT(&stat);
370 
371  // Free temporary local variables.
372  StatFree(&stat);
373 
374  free_with_check(x);
375  Destroy_SuperMatrix_Store(&X);
376 
377  this->tick();
378  this->time = this->accumulated();
379 
380  if (!factorized)
381  throw Exceptions::LinearMatrixSolverException("SuperLU failed.");
382  }
383 
384  template<typename Scalar>
385  bool SuperLUSolver<Scalar>::setup_factorization()
386  {
387  unsigned int A_size = A.nrow < 0 ? 0 : A.nrow;
388  if (has_A && this->reuse_scheme != HERMES_CREATE_STRUCTURE_FROM_SCRATCH && A_size != m->get_size())
389  {
390  this->warn("You cannot reuse factorization structures for factorizing matrices of different sizes.");
391  return false;
392  }
393 
394  // Always factorize from scratch for the first time.
395  int eff_fact_scheme;
396  if (!inited)
397  eff_fact_scheme = HERMES_CREATE_STRUCTURE_FROM_SCRATCH;
398  else
399  eff_fact_scheme = this->reuse_scheme;
400 
401  // Prepare factorization structures. In case of a particular reuse scheme, comments are given
402  // to clarify which arguments will be reused and which will be reset by the dgssvx (zgssvx) routine.
403  // It was determined empirically by running the dlinsolx2 example from SuperLU, setting options.Fact
404  // to the appropriate value and reallocating the various structures before the second run of dgssvx,
405  // and observing when segfault will happen and when not. It is actually not needed to reallocate
406  // the structures by hand, but comments at various places of the SuperLU 4.0 library contradict
407  // each other and often lead to segfault when the structures are reallocated according to them.
408  // It might thus bring some insight into how SuperLU works and how to correctly use it
409  // (the PDF documentation is, unfortunately, even less helpful).
410  switch (eff_fact_scheme)
411  {
413  // This case should generally allow for solving a completely new_ system, i.e. for a change of
414  // matrix and rhs size - for simplicity, we reallocate the structures every time.
415 
416  // Clear the structures emanating from previous factorization.
417  free_factorization_data();
418 
419  // Allocate the row/column reordering vectors.
420  if (!(perm_c = intMalloc(m->get_size())))
421  throw Hermes::Exceptions::Exception("Malloc fails for perm_c[].");
422  if (!(perm_r = intMalloc(m->get_size())))
423  throw Hermes::Exceptions::Exception("Malloc fails for perm_r[].");
424 
425  // Allocate vectors with row/column scaling factors.
426  if (!(R = (double *)SUPERLU_MALLOC(m->get_size() * sizeof(double))))
427  throw Hermes::Exceptions::Exception("SUPERLU_MALLOC fails for R[].");
428  if (!(C = (double *)SUPERLU_MALLOC(m->get_size() * sizeof(double))))
429  throw Hermes::Exceptions::Exception("SUPERLU_MALLOC fails for C[].");
430 
431 #ifdef SLU_MT
432  options.fact = EQUILIBRATE;
433  options.refact = NO;
434  options.perm_c = perm_c;
435  options.perm_r = perm_r;
436 #else
437  // Allocate additional structures used by the driver routine of sequential SuperLU.
438  // Elimination tree is contained in the options structure in SuperLU_MT.
439  if (!(etree = intMalloc(m->get_size())))
440  throw Hermes::Exceptions::Exception("Malloc fails for etree[].");
441 
442  options.Fact = DOFACT;
443 #endif
444  A_changed = true;
445  break;
447  // needed from previous: etree, perm_c
448  // not needed from previous: perm_r, R, C, L, U, equed
449 #ifdef SLU_MT
450  options.fact = EQUILIBRATE;
451  options.refact = YES;
452 #else
453  options.Fact = SamePattern;
454 #endif
455  // L, U matrices may be reused without reallocating.
456  // SLU_DESTROY_L(&L);
457  // SLU_DESTROY_U(&U);
458  break;
460  // needed from previous: etree, perm_c, perm_r, L, U
461  // not needed from previous: R, C, equed
462 #ifdef SLU_MT
463  // MT version of SLU cannot reuse the equilibration factors (R, C), so
464  // this is the same as the previous case.
465  options.fact = EQUILIBRATE;
466  options.refact = YES;
467 #else
468  options.Fact = SamePattern_SameRowPerm;
469 #endif
470  break;
472  // needed from previous: perm_c, perm_r, equed, L, U
473  // not needed from previous: etree, R, C
474 #ifdef SLU_MT
475  options.fact = FACTORED;
476  options.refact = YES;
477 #else
478  options.Fact = FACTORED;
479 #endif
480  break;
481  }
482 
483  inited = true;
484 
485  return true;
486  }
487 
488  template<typename Scalar>
489  void SuperLUSolver<Scalar>::free_matrix()
490  {
491  if (has_A)
492  {
493  Destroy_SuperMatrix_Store(&A);
494  has_A = false;
495  }
496  }
497 
498  template<typename Scalar>
499  void SuperLUSolver<Scalar>::free_rhs()
500  {
501  if (has_B)
502  {
503  Destroy_SuperMatrix_Store(&B);
504  has_B = false;
505  }
506  }
507 
508  template<typename Scalar>
509  void SuperLUSolver<Scalar>::free()
510  {
511  free_factorization_data();
512  free_matrix();
513  free_rhs();
514 
515  free_with_check(local_Ai);
516  free_with_check(local_Ap);
517  free_with_check(local_Ax);
518  free_with_check(local_rhs);
519  }
520 
521  template<typename Scalar>
522  void SuperLUSolver<Scalar>::free_factorization_data()
523  {
524  if (inited)
525  {
526 #ifdef SLU_MT
527  SUPERLU_FREE(options.etree);
528  SUPERLU_FREE(options.colcnt_h);
529  SUPERLU_FREE(options.part_super_h);
530  Destroy_CompCol_Permuted(&AC);
531 #else
532  SUPERLU_FREE(etree);
533 #endif
534  SUPERLU_FREE(perm_c);
535  SUPERLU_FREE(perm_r);
536  SUPERLU_FREE(R);
537  SUPERLU_FREE(C);
538  SLU_DESTROY_L(&L);
539  SLU_DESTROY_U(&U);
540  inited = false;
541  }
542  }
543 
544 #ifdef SLU_MT
545  // This is a modification of the original p*gssvx routines from the SuperLU_MT library.
546  //
547  // The original routines have been changed in view of our applications, i.e.
548  // * only one right hand side is allowed,
549  // * some initial parameter checks have been omitted,
550  // * macros allowing abstraction from the fundamental Scalar datatype have been used
551  // * some phases of the calculation may be omitted for speed-up (less information about
552  // the matrix/solution can then be acquired, however),
553  // * deallocation at the end of the routine has been removed (this was neccessary to
554  // enable factorization reuse).
555  //
556  // See the correspondingly named attributes of SuperLUSolver class for brief description
557  // of most parameters or the library source code for pdgssvx for more details. You may pass
558  // nullptr for
559  // * recip_pivot_growth - reciprocal pivot growth factor will then not be computed;
560  // reip_pivot_growth much less than one may indicate poor
561  // stability of the factorization;
562  // * rcond - estimate of the reciprocal condition number of matrix A will
563  // then not be computed; this will prevent detection of singularity
564  // of matrix A;
565  // * ferr or berr - iterative refinement of the solution will then not be performed;
566  // this also prevents computation of forward and backward error
567  // estimates of the computed solution;
568  // * memusage - memory usage during the factorization/solution will not be queried.
569  //
570  void slu_mt_solver_driver(slu_options_t *options, SuperMatrix *A,
571  int *perm_c, int *perm_r, SuperMatrix *AC,
572  equed_t *equed, double *R, double *C,
573  SuperMatrix *L, SuperMatrix *U,
574  SuperMatrix *B, SuperMatrix *X,
575  double *recip_pivot_growth, double *rcond,
576  double *ferr, double *berr,
577  slu_stat_t *stat, slu_memusage_t *memusage,
578  int *info)
579  {
580  /* Profiling variables. */
581  double t0;
582  flops_t flopcnt;
583 
584  /* State variables. */
585  int dofact = (options->fact == DOFACT);
586  int equil = (options->fact == EQUILIBRATE);
587  int notran = (options->trans == NOTRANS);
588  int colequ, rowequ;
589 
590  /* Right hand side and solution vectors. */
591  DNformat *Bstore = (DNformat*)B->Store;
592  DNformat *Xstore = (DNformat*)X->Store;
593  Scalar *Bmat = (Scalar*)Bstore->nzval;
594  Scalar *Xmat = (Scalar*)Xstore->nzval;
595 
596  *info = 0;
597 
598  /* ------------------------------------------------------------
599  Diagonal scaling to equilibrate the matrix.
600  ------------------------------------------------------------*/
601  if (dofact || equil)
602  {
603  *equed = NOEQUIL;
604  rowequ = colequ = FALSE;
605  }
606  else
607  {
608  rowequ = (*equed == ROW) || (*equed == BOTH);
609  colequ = (*equed == COL) || (*equed == BOTH);
610  }
611 
612  if (equil)
613  {
614  t0 = SuperLU_timer_();
615  /* Compute row and column scalings to equilibrate the matrix A. */
616  int info1;
617  double rowcnd, colcnd, amax;
618  SLU_GSEQU(A, R, C, &rowcnd, &colcnd, &amax, &info1);
619 
620  if (info1 == 0)
621  {
622  /* Equilibrate matrix A. */
623  SLU_LAQGS(A, R, C, rowcnd, colcnd, amax, equed);
624  rowequ = (*equed == ROW) || (*equed == BOTH);
625  colequ = (*equed == COL) || (*equed == BOTH);
626  }
627  stat->utime[EQUIL] = SuperLU_timer_() - t0;
628  }
629 
630  /* ------------------------------------------------------------
631  Scale the right hand side.
632  ------------------------------------------------------------*/
633  if (notran)
634  {
635  if (rowequ)
636  for (int i = 0; i < A->nrow; ++i)
637  SLU_MULT(Bmat[i], R[i]);
638  }
639  else if (colequ)
640  {
641  for (int i = 0; i < A->nrow; ++i)
642  SLU_MULT(Bmat[i], C[i]);
643  }
644 
645  /* ------------------------------------------------------------
646  Perform the LU factorization.
647  ------------------------------------------------------------*/
648  if (dofact || equil)
649  {
650  /* Obtain column etree, the column count (colcnt_h) and supernode
651  partition (part_super_h) for the Householder matrix. */
652  if (options->refact == NO)
653  {
654  t0 = SuperLU_timer_();
655  SLU_SP_COLORDER(A, perm_c, options, AC);
656  stat->utime[ETREE] = SuperLU_timer_() - t0;
657  }
658 
659  /* Compute the LU factorization of A*Pc. */
660  t0 = SuperLU_timer_();
661  SLU_GSTRF(options, AC, perm_r, L, U, stat, info);
662  stat->utime[FACT] = SuperLU_timer_() - t0;
663 
664  flopcnt = 0;
665  for (int i = 0; i < options->nprocs; ++i) flopcnt += stat->procstat[i].fcops;
666  stat->ops[FACT] = flopcnt;
667 
668  if (options->lwork == -1)
669  {
670  if (memusage)
671  memusage->total_needed = *info - A->ncol;
672  return;
673  }
674  }
675 
676  if (*info > 0)
677  {
678  if (*info <= A->ncol)
679  {
680  /* Compute the reciprocal pivot growth factor of the leading
681  rank-deficient *info columns of A. */
682  if (recip_pivot_growth)
683  *recip_pivot_growth = SLU_PIVOT_GROWTH(*info, A, perm_c, L, U);
684  }
685  }
686  else
687  {
688  /* ------------------------------------------------------------
689  Compute the reciprocal pivot growth factor *recip_pivot_growth.
690  ------------------------------------------------------------*/
691  if (recip_pivot_growth)
692  *recip_pivot_growth = SLU_PIVOT_GROWTH(A->ncol, A, perm_c, L, U);
693 
694  /* ------------------------------------------------------------
695  Estimate the reciprocal of the condition number of A.
696  ------------------------------------------------------------*/
697  if (rcond)
698  {
699  t0 = SuperLU_timer_();
700 
701  // Next two lines are a bit complicated, but taken as they appear
702  // in the original library function.
703  char norm[1];
704  *(unsigned char *)norm = (notran) ? '1' : 'I';
705 
706  double anorm = SLU_LANGS(norm, A);
707  SLU_GSCON(norm, L, U, anorm, rcond, info);
708  stat->utime[RCOND] = SuperLU_timer_() - t0;
709  }
710 
711  /* ------------------------------------------------------------
712  Compute the solution matrix X.
713  ------------------------------------------------------------*/
714  // Save a copy of the right hand side.
715  memcpy(Xmat, Bmat, B->nrow * sizeof(Scalar));
716 
717  t0 = SuperLU_timer_();
718  SLU_GSTRS(options->trans, L, U, perm_r, perm_c, X, stat, info);
719  stat->utime[SOLVE] = SuperLU_timer_() - t0;
720  stat->ops[SOLVE] = stat->ops[TRISOLVE];
721 
722  /* ------------------------------------------------------------
723  Use iterative refinement to improve the computed solution and
724  compute error bounds and backward error estimates for it.
725  ------------------------------------------------------------*/
726  if (ferr && berr)
727  {
728  t0 = SuperLU_timer_();
729  SLU_GSRFS(options->trans, A, L, U, perm_r, perm_c, *equed,
730  R, C, B, X, ferr, berr, stat, info);
731  stat->utime[REFINE] = SuperLU_timer_() - t0;
732  }
733 
734  /* ------------------------------------------------------------
735  Transform the solution matrix X to a solution of the original
736  system.
737  ------------------------------------------------------------*/
738  if (notran)
739  {
740  if (colequ)
741  for (int i = 0; i < A->nrow; ++i)
742  SLU_MULT(Xmat[i], C[i]);
743  }
744  else if (rowequ)
745  {
746  for (int i = 0; i < A->nrow; ++i)
747  SLU_MULT(Xmat[i], R[i]);
748  }
749 
750  /* Set INFO = A->ncol + 1 if the matrix is singular to
751  working precision.*/
752  char param[1]; param[0] = 'E';
753  if (rcond && *rcond < SLU_LAMCH_(param)) *info = A->ncol + 1;
754  }
755 
756  if (memusage)
757  SLU_QUERY_SPACE(options->nprocs, L, U, options->panel_size, memusage);
758  }
759 #endif
760 
761  template class HERMES_API SuperLUSolver < double > ;
762  template class HERMES_API SuperLUSolver < std::complex<double> > ;
763  }
764 }
765 #endif
General namespace for the Hermes library.
Exception interface Basically a std::exception, but with a constructor with string and with print_msg...
Definition: exceptions.h:49
File containing common definitions, and basic global enums etc. for HermesCommon. ...
File containing functionality for investigating call stack.
pattern as the one already factorized.
SuperLU solver interface.