HermesCommon  2.0
superlu_solver.cpp
Go to the documentation of this file.
1 // This file is part of HermesCommon
2 //
3 // Copyright (c) 2009 hp-FEM group at the University of Nevada, Reno (UNR).
4 // Email: hpfem-group@unr.edu, home page: http://hpfem.org/.
5 //
6 // Hermes2D is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published
8 // by the Free Software Foundation; either version 2 of the License,
9 // or (at your option) any later version.
10 //
11 // Hermes2D is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with Hermes2D; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 #include <algorithm>
23 #include "config.h"
24 #ifdef WITH_SUPERLU
25 #include "superlu_solver.h"
26 #include "callstack.h"
27 
28 namespace Hermes
29 {
30  namespace Solvers
31  {
32 #ifdef SLU_MT
33  template <>
34  void SuperLu<double>::sequ(SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int *info)
35  {
36  dsequ (A, r, c, rowcnd, colcnd, amax, info);
37  }
38 
39  template <>
40  void SuperLu<double>::laqgs (SuperMatrix *A, float *r, float *c, float rowcnd, float colcnd, float amax, char *equed)
41  {
42  dlaqgs (A, r, c, rowcnd, colcnd, amax, equed);
43  }
44 
45  template <>
46  int SuperLu<double>::gstrf (superlu_options_t *options, int m, int n, double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int *info)
47  {
48  return dgstrf (options, m, n, anorm, LUstruct, grid, stat, info);
49  }
50 
51  template <>
52  float SuperLu<double>::pivotGrowth (int ncols, SuperMatrix *A, int *perm_c, SuperMatrix *L, SuperMatrix *U)
53  {
54  return dPivotGrowth (ncols, A, perm_c, L, U);
55  }
56 
57  template <>
58  float SuperLu<double>::langs (char *norm, SuperMatrix *A)
59  {
60  return dlangs (norm, A);
61  }
62  template <>
63  void SuperLu<double>::gscon (char *norm, SuperMatrix *L, SuperMatrix *U, float anorm, float *rcond, SuperLUStat_t *stat, int *info)
64  {
65  dgscon (norm, L, U, anorm, rcond, stat, info);
66  }
67 
68  template <>
69  void SuperLu<double>::gstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, SuperMatrix *B, SuperLUStat_t *stat, int *info)
70  {
71  dgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
72  }
73 
74  template <>
75  double SuperLu<double>::lamch_ (char *cmach)
76  {
77  return dlamch_ (cmach);
78  }
79 
80  template <>
81  int SuperLu<double>::querySpace (SuperMatrix *a, SuperMatrix *b, mem_usage_t *mu)
82  {
83  return dquerySpace (a, b, mu);
84  }
85 #endif
86  }
87  namespace Algebra
88  {
95  static int find_position(int *Ai, int Alen, int idx)
96  {
97  assert(idx >= 0);
98 
99  register int lo = 0, hi = Alen - 1, mid;
100 
101  while (1)
102  {
103  mid = (lo + hi) >> 1;
104 
105  if(idx < Ai[mid]) hi = mid - 1;
106  else if(idx > Ai[mid]) lo = mid + 1;
107  else break;
108 
109  // Sparse matrix entry not found (raise an error when trying to add
110  // value to this position, return 0 when obtaining value there).
111  if(lo > hi) mid = -1;
112  }
113  return mid;
114  }
115 
116  template<typename Scalar>
117  SuperLUMatrix<Scalar>::SuperLUMatrix()
118  {
119  this->size = 0; nnz = 0;
120  Ax = NULL;
121  Ap = NULL;
122  Ai = NULL;
123  }
124 
125  template<typename Scalar>
126  SuperLUMatrix<Scalar>::~SuperLUMatrix()
127  {
128  this->free();
129  }
130 
131  template<typename Scalar>
132  void SuperLUMatrix<Scalar>::alloc()
133  {
134  assert(this->pages != NULL);
135 
136  // Initialize the arrays Ap and Ai.
137  Ap = new unsigned int[this->size + 1];
138  int aisize = this->get_num_indices();
139  Ai = new int[aisize];
140 
141  // sort the indices and remove duplicities, insert into Ai
142  unsigned int i, pos = 0;
143  for (i = 0; i < this->size; i++)
144  {
145  Ap[i] = pos;
146  pos += sort_and_store_indices(this->pages[i], Ai + pos, Ai + aisize);
147  }
148  Ap[i] = pos;
149 
150  delete [] this->pages;
151  this->pages = NULL;
152 
153  nnz = Ap[this->size];
154 
155  Ax = new Scalar[nnz];
156  memset(Ax, 0, sizeof(Scalar) * nnz);
157  }
158 
159  template<typename Scalar>
160  void SuperLUMatrix<Scalar>::free()
161  {
162  nnz = 0;
163  delete [] Ap; Ap = NULL;
164  delete [] Ai; Ai = NULL;
165  delete [] Ax; Ax = NULL;
166  }
167 
168  template<typename Scalar>
169  Scalar SuperLUMatrix<Scalar>::get(unsigned int m, unsigned int n)
170  {
171  // Find m-th row in the n-th column.
172  int mid = find_position(Ai + Ap[n], Ap[n + 1] - Ap[n], m);
173  // Return 0 if the entry has not been found.
174  if(mid < 0) return 0.0;
175  // Otherwise, add offset to the n-th column and return the value.
176  if(mid >= 0) mid += Ap[n];
177  return Ax[mid];
178  }
179 
180  template<typename Scalar>
181  void SuperLUMatrix<Scalar>::zero()
182  {
183  memset(Ax, 0, sizeof(Scalar) * nnz);
184  }
185 
186  template<typename Scalar>
187  void SuperLUMatrix<Scalar>::add(unsigned int m, unsigned int n, Scalar v)
188  {
189  if(v != 0.0) // ignore zero values.
190  {
191  // Find m-th row in the n-th column.
192  int pos = find_position(Ai + Ap[n], Ap[n + 1] - Ap[n], m);
193  // Make sure we are adding to an existing non-zero entry.
194  if(pos < 0)
195  throw Hermes::Exceptions::Exception("Sparse matrix entry not found");
196  // Add offset to the n-th column.
197  pos += Ap[n];
198  #pragma omp critical (SuperLUMatrix_add)
199  Ax[pos] += v;
200  }
201  }
202 
204 
205  template<typename Scalar>
206  void SuperLUMatrix<Scalar>::add_to_diagonal(Scalar v)
207  {
208  for (unsigned int i = 0; i<this->size; i++)
209  {
210  add(i, i, v);
211  }
212  };
213 
214  template<typename Scalar>
215  void SuperLUMatrix<Scalar>::add(unsigned int m, unsigned int n, Scalar **mat, int *rows, int *cols)
216  {
217  for (unsigned int i = 0; i < m; i++) // rows
218  for (unsigned int j = 0; j < n; j++) // cols
219  if(rows[i] >= 0 && cols[j] >= 0) // not Dir. dofs.
220  add(rows[i], cols[j], mat[i][j]);
221  }
222 
225  template<typename Scalar>
226  bool SuperLUMatrix<Scalar>::dump(FILE *file, const char *var_name, EMatrixDumpFormat fmt, char* number_format)
227  {
228  // TODO
229  switch (fmt)
230  {
231  case DF_MATLAB_SPARSE:
232  fprintf(file, "%% Size: %dx%d\n%% Nonzeros: %d\ntemp = zeros(%d, 3);\ntemp =[\n", this->size, this->size, Ap[this->size], Ap[this->size]);
233  for (unsigned int j = 0; j < this->size; j++)
234  for (unsigned int i = Ap[j]; i < Ap[j + 1]; i++)
235  {
236  fprintf(file, "%d %d " , Ai[i] + 1, j + 1);
237  Hermes::Helpers::fprint_num(file, Ax[i], number_format);
238  fprintf(file, "\n");
239  }
240  fprintf(file, "];\n%s = spconvert(temp);\n", var_name);
241 
242  return true;
243 
244  case DF_HERMES_BIN:
245  {
246  this->hermes_fwrite("HERMESX\001", 1, 8, file);
247  int ssize = sizeof(Scalar);
248  this->hermes_fwrite(&ssize, sizeof(int), 1, file);
249  this->hermes_fwrite(&this->size, sizeof(int), 1, file);
250  this->hermes_fwrite(&nnz, sizeof(int), 1, file);
251  this->hermes_fwrite(Ap, sizeof(int), this->size + 1, file);
252  this->hermes_fwrite(Ai, sizeof(int), nnz, file);
253  this->hermes_fwrite(Ax, sizeof(Scalar), nnz, file);
254  return true;
255  }
256 
257  default:
258  return false;
259  }
260  }
261 
262  template<typename Scalar>
263  unsigned int SuperLUMatrix<Scalar>::get_matrix_size() const
264  {
265  return this->size;
266  }
267 
268  template<typename Scalar>
269  unsigned int SuperLUMatrix<Scalar>::get_nnz() const
270  {
271  return nnz;
272  }
273 
274  template<typename Scalar>
275  double SuperLUMatrix<Scalar>::get_fill_in() const
276  {
277  return nnz / (double) (this->size * this->size);
278  }
279 
280  template<typename Scalar>
281  void SuperLUMatrix<Scalar>::add_matrix(SuperLUMatrix<Scalar>* mat)
282  {
283  add_as_block(0, 0, mat);
284  }
285 
286  template<typename Scalar>
287  void SuperLUMatrix<Scalar>::add_to_diagonal_blocks(int num_stages, SuperLUMatrix<Scalar>* mat)
288  {
289  int ndof = mat->get_size();
290  if(this->get_size() != (unsigned int) num_stages * ndof)
291  throw Hermes::Exceptions::Exception("Incompatible matrix sizes in PetscMatrix<Scalar>::add_to_diagonal_blocks()");
292 
293  for (int i = 0; i < num_stages; i++)
294  {
295  this->add_as_block(ndof*i, ndof*i, mat);
296  }
297  }
298 
299  template<typename Scalar>
300  void SuperLUMatrix<Scalar>::add_sparse_to_diagonal_blocks(int num_stages, SparseMatrix<Scalar>* mat)
301  {
302  add_to_diagonal_blocks(num_stages, static_cast<SuperLUMatrix*>(mat));
303  }
304 
305  template<typename Scalar>
306  void SuperLUMatrix<Scalar>::add_as_block(unsigned int i, unsigned int j, SuperLUMatrix<Scalar>* mat)
307  {
308  int idx;
309  for (unsigned int col = 0;col<mat->get_size();col++)
310  {
311  for (unsigned int n = mat->Ap[col];n<mat->Ap[col + 1];n++)
312  {
313  idx = find_position(Ai + Ap[col + j], Ap[col + 1 + j] - Ap[col + j], mat->Ai[n] + i);
314  if(idx<0)
315  throw Hermes::Exceptions::Exception("Sparse matrix entry not found");
316  idx += Ap[col + j];
317  Ax[idx] +=mat->Ax[n];
318  }
319  }
320  }
321 
322  // Applies the matrix to vector_in and saves result to vector_out.
323 
324  template<typename Scalar>
325  void SuperLUMatrix<Scalar>::multiply_with_vector(Scalar* vector_in, Scalar* vector_out)
326  {
327  for(unsigned int i = 0;i<this->size;i++)
328  {
329  vector_out[i] = 0;
330  }
331  for (unsigned int c = 0;c<this->size;c++)
332  {
333  for (unsigned int i = Ap[c];i<Ap[c + 1];i++)
334  {
335  vector_out[c] +=vector_in[Ai[i]]*Ax[i];
336  }
337  }
338  }
339 
340  // Multiplies matrix with a Scalar.
341  template<typename Scalar>
342  void SuperLUMatrix<Scalar>::multiply_with_Scalar(Scalar value)
343  {
344  int n = nnz;
345  for(int i = 0;i<n;i++)
346  {
347  Ax[i] = Ax[i]*value;
348  }
349  }
350  // Creates matrix using size, nnz, and the three arrays.
351 
352  template<typename Scalar>
353  void SuperLUMatrix<Scalar>::create(unsigned int size, unsigned int nnz, int* ap, int* ai, Scalar* ax)
354  {
355  this->nnz = nnz;
356  this->size = size;
357  this->Ap = new unsigned int[this->size + 1]; assert(this->Ap != NULL);
358  this->Ai = new int[nnz]; assert(this->Ai != NULL);
359  this->Ax = new Scalar[nnz]; assert(this->Ax != NULL);
360 
361  for (unsigned int i = 0; i < this->size + 1; i++)
362  {
363  this->Ap[i] = ap[i];
364  }
365  for (unsigned int i = 0; i < nnz; i++)
366  {
367  this->Ax[i] = ax[i];
368  this->Ai[i] = ai[i];
369  }
370  }
371  // Duplicates a matrix (including allocation).
372 
373  template<typename Scalar>
374  SuperLUMatrix<Scalar>* SuperLUMatrix<Scalar>::duplicate()
375  {
376  SuperLUMatrix<Scalar> * nmat = new SuperLUMatrix<Scalar>();
377 
378  nmat->nnz = nnz;
379  nmat->size = this->size;
380  nmat->Ap = new unsigned int[this->size + 1]; assert(nmat->Ap != NULL);
381  nmat->Ai = new int[nnz]; assert(nmat->Ai != NULL);
382  nmat->Ax = new Scalar[nnz]; assert(nmat->Ax != NULL);
383  for (unsigned int i = 0;i<nnz;i++)
384  {
385  nmat->Ai[i] = Ai[i];
386  nmat->Ax[i] = Ax[i];
387  }
388  for (unsigned int i = 0;i<this->size + 1;i++)
389  {
390  nmat->Ap[i] = Ap[i];
391  }
392  return nmat;
393  }
394 
395  // SuperLUVector<Scalar> /////////////////////////////////////////////////////////////////////////////////////
396 
397  template<typename Scalar>
398  SuperLUVector<Scalar>::SuperLUVector()
399  {
400  v = NULL;
401  this->size = 0;
402  }
403 
404  template<typename Scalar>
405  SuperLUVector<Scalar>::~SuperLUVector()
406  {
407  this->free();
408  }
409 
410  template<typename Scalar>
411  void SuperLUVector<Scalar>::alloc(unsigned int n)
412  {
413  this->free();
414  this->size = n;
415  v = new Scalar[n];
416  zero();
417  }
418 
419  template<typename Scalar>
420  void SuperLUVector<Scalar>::zero()
421  {
422  memset(v, 0, this->size * sizeof(Scalar));
423  }
424 
425  template<typename Scalar>
426  void SuperLUVector<Scalar>::change_sign()
427  {
428  for (unsigned int i = 0; i < this->size; i++) v[i] *= -1.;
429  }
430 
431  template<typename Scalar>
432  void SuperLUVector<Scalar>::free()
433  {
434  delete [] v;
435  v = NULL;
436  this->size = 0;
437  }
438 
439  template<typename Scalar>
440  void SuperLUVector<Scalar>::set(unsigned int idx, Scalar y)
441  {
442  v[idx] = y;
443  }
444 
445  template<typename Scalar>
446  void SuperLUVector<Scalar>::add(unsigned int idx, Scalar y)
447  {
448  v[idx] += y;
449  }
450 
451  template<typename Scalar>
452  void SuperLUVector<Scalar>::add(unsigned int n, unsigned int *idx, Scalar *y)
453  {
454  for (unsigned int i = 0; i < n; i++)
455  {
456  v[idx[i]] += y[i];
457  }
458  }
459 
460  template<typename Scalar>
461  Scalar SuperLUVector<Scalar>::get(unsigned int idx)
462  {
463  return v[idx];
464  }
465 
466  template<typename Scalar>
467  void SuperLUVector<Scalar>::extract(Scalar *v) const
468  {
469  memcpy(v, this->v, this->size * sizeof(Scalar));
470  }
471 
472  template<typename Scalar>
473  void SuperLUVector<Scalar>::add_vector(Vector<Scalar>* vec)
474  {
475  assert(this->length() == vec->length());
476  for (unsigned int i = 0; i < this->length(); i++) this->add(i, vec->get(i));
477  }
478 
479  template<typename Scalar>
480  void SuperLUVector<Scalar>::add_vector(Scalar* vec)
481  {
482  for (unsigned int i = 0; i < this->length(); i++) this->add(i, vec[i]);
483  }
484 
485  template<typename Scalar>
486  bool SuperLUVector<Scalar>::dump(FILE *file, const char *var_name, EMatrixDumpFormat fmt, char* number_format)
487  {
488  switch (fmt)
489  {
490  case DF_PLAIN_ASCII:
491  for (unsigned int i = 0; i < this->size; i++)
492  {
493  Hermes::Helpers::fprint_num(file, v[i], number_format);
494  fprintf(file, "\n");
495  }
496 
497  return true;
498 
499  case DF_MATLAB_SPARSE:
500  fprintf(file, "%% Size: %dx1\n%s =[\n", this->size, var_name);
501  for (unsigned int i = 0; i < this->size; i++)
502  {
503  Hermes::Helpers::fprint_num(file, v[i], number_format);
504  fprintf(file, "\n");
505  }
506  fprintf(file, " ];\n");
507  return true;
508 
509  case DF_HERMES_BIN:
510  {
511  this->hermes_fwrite("HERMESR\001", 1, 8, file);
512  int ssize = sizeof(Scalar);
513  this->hermes_fwrite(&ssize, sizeof(int), 1, file);
514  this->hermes_fwrite(&this->size, sizeof(int), 1, file);
515  this->hermes_fwrite(v, sizeof(Scalar), this->size, file);
516  return true;
517  }
518 
519  default:
520  return false;
521  }
522  }
523 
524  template class HERMES_API SuperLUMatrix<double>;
525  template class HERMES_API SuperLUMatrix<std::complex<double> >;
526  template class HERMES_API SuperLUVector<double>;
527  template class HERMES_API SuperLUVector<std::complex<double> >;
528  }
529  namespace Solvers
530  {
531  template<typename Scalar>
532  bool SuperLUSolver<Scalar>::check_status(unsigned int info)
533  {
534  if(info == 0)
535  {
536  // Success.
537  return true;
538  }
539  else if(info <= m->size)
540  {
541  this->warn("SuperLU: Factor U is singular, solution could not be computed.");
542  return false;
543  }
544  else if(info == m->size + 1)
545  {
546  this->warn("SuperLU: RCOND is less than machine precision "
547  "(system matrix is singular to working precision).");
548  return true;
549  }
550  else if(info > m->size + 1)
551  {
552  this->warn("SuperLU: Not enough memory.\n Failure when %.3f MB were allocated.",
553  (info - m->size)/1e6);
554  return false;
555  }
556 
557  return false;
558  }
559 
560  template<typename Scalar>
561  SuperLUSolver<Scalar>::SuperLUSolver(SuperLUMatrix<Scalar> *m, SuperLUVector<Scalar> *rhs)
562  : DirectSolver<Scalar>(HERMES_FACTORIZE_FROM_SCRATCH), m(m), rhs(rhs), local_Ai(NULL), local_Ap(NULL)
563  , local_Ax(NULL), local_rhs(NULL)
564  {
565  R = NULL;
566  C = NULL;
567  perm_r = NULL;
568  perm_c = NULL;
569  etree = NULL;
570 #ifndef SLU_MT
571  *equed = '\0';
572 #endif
573 
574  // Set the default input options:
575 #ifdef SLU_MT
576  // I am not sure if this will work well on Windows:
577  // http://stackoverflow.com/questions/631664/accessing-environment-variables-in-c
578  char *nt_var = getenv("OMP_NUM_THREADS");
579  if(nt_var)
580  options.nprocs = std::max(1, atoi(nt_var));
581  else
582  options.nprocs = 1;
583 
584  options.fact = EQUILIBRATE; // Rescale the matrix if neccessary.
585  options.trans = NOTRANS; // Not solving the transposed problem.
586  options.refact = NO; // Factorize from scratch for the first time.
587  options.diag_pivot_thresh = 1.0; // Use partial pivoting during GEM.
588  options.usepr = NO; // Let SuperLU compute the row permutations.
589  options.drop_tol = 0.0; // Not yet implemented in SuperLU_MT 2.0.
590  options.SymmetricMode = NO; // Assume general non-symmetric problem.
591 
592  // Default options related to the supernodal algorithm.
593  options.panel_size = sp_ienv(1);
594  options.relax = sp_ienv(2);
595 #else
596  /*
597  options.Fact = DOFACT;
598  options.Equil = YES;
599  options.ColPerm = COLAMD;
600  options.DiagPivotThresh = 1.0;
601  options.Trans = NOTRANS;
602  options.IterRefine = NOREFINE;
603  options.SymmetricMode = NO;
604  options.PivotGrowth = NO;
605  options.ConditionNumber = NO;
606  options.PrintStat = YES;
607  */
608  set_default_options(&options); // This function is only present in the sequential SLU.
609 #endif
610 
611  options.PrintStat = YES; // Set to NO to suppress output.
612 
613  has_A = has_B = inited = false;
614  }
615 
616  inline SuperLuType<std::complex<double> >::Scalar to_superlu(SuperLuType<std::complex<double> >::Scalar &a, std::complex<double>b)
617  {
618  a.r = b.real();
619  a.i = b.imag();
620  return a;
621  }
622 
623  inline SuperLuType<double>::Scalar to_superlu(SuperLuType<double>::Scalar &a, double b)
624  {
625  a = b;
626  return a;
627  }
628 
629  template<typename Scalar>
630  SuperLUSolver<Scalar>::~SuperLUSolver()
631  {
632  free_factorization_data();
633  free_matrix();
634  free_rhs();
635 
636  if(local_Ai) delete [] local_Ai;
637  if(local_Ap) delete [] local_Ap;
638  if(local_Ax) delete [] local_Ax;
639  if(local_rhs) delete [] local_rhs;
640  }
641 
642  template<typename Scalar>
643  int SuperLUSolver<Scalar>::get_matrix_size()
644  {
645  return m->size;
646  }
647 
648  template<typename Scalar>
649  bool SuperLUSolver<Scalar>::solve()
650  {
651  assert(m != NULL);
652  assert(rhs != NULL);
653 
654  this->tick();
655 
656  // Initialize the statistics variable.
657  slu_stat_t stat;
658  SLU_INIT_STAT(&stat);
659 
660  // Prepare data structures serving as input for the solver driver
661  // (according to the chosen factorization reuse strategy).
662  void *work = NULL; // Explicit pointer to the factorization workspace
663  // (unused, see below).
664  int lwork = 0; // Space for the factorization will be allocated
665  // internally by system malloc.
666  double ferr = 1.0; // Estimated relative forward error
667  // (unused unless iterative refinement is performed).
668  double berr = 1.0; // Estimated relative backward error
669  // (unused unless iterative refinement is performed).
670  slu_memusage_t memusage; // Record the memory usage statistics.
671  double rpivot_growth; // The reciprocal pivot growth factor.
672  double rcond; // The estimate of the reciprocal condition number.
673 #ifdef SLU_MT
674  options.work = work;
675  options.lwork = lwork;
676 #endif
677 
678  if( !setup_factorization() )
679  {
680  this->warn("LU factorization could not be completed.");
681  return false;
682  }
683 
684  // If the previous factorization of A is to be fully reused as an input for the solver driver,
685  // keep the (possibly rescaled) matrix from the last factorization, otherwise recreate it
686  // from the master SuperLUMatrix<Scalar> pointed to by this->m (this also applies to the case when
687  // A does not yet exist).
688  if(!has_A || this->factorization_scheme != HERMES_REUSE_FACTORIZATION_COMPLETELY)
689  {
690  if(A_changed)
691  free_matrix();
692 
693  if(!has_A)
694  {
695  // A will be created from the local copy of the value and index arrays, because these
696  // may be modified by the solver driver.
697  if(local_Ai) delete [] local_Ai;
698  local_Ai = new int[m->nnz];
699  memcpy(local_Ai, m->Ai, m->nnz * sizeof(int));
700 
701  if(local_Ap) delete [] local_Ap;
702  local_Ap = new int[m->size + 1];
703  memcpy(local_Ap, m->Ap, (m->size + 1) * sizeof(int));
704 
705  if(local_Ax) delete [] local_Ax;
706  local_Ax = new typename SuperLuType<Scalar>::Scalar[m->nnz];
707  for (unsigned int i = 0;i<m->nnz;i++)
708  to_superlu(local_Ax[i], m->Ax[i]);
709 
710  // Create new general (non-symmetric), column-major, non-supernodal, size X size matrix.
711  create_csc_matrix(&A, m->size, m->size, m->nnz, local_Ax, local_Ai, local_Ap, SLU_NC, SLU_DTYPE, SLU_GE);
712 
713  has_A = true;
714  }
715  }
716 
717  // Recreate the input rhs for the solver driver from a local copy of the new value array.
718  free_rhs();
719 
720  if(local_rhs) delete [] local_rhs;
721  local_rhs = new typename SuperLuType<Scalar>::Scalar[rhs->size];
722  for (unsigned int i = 0;i<rhs->size;i++)
723  to_superlu(local_rhs[i], rhs->v[i]);
724 
725  create_dense_matrix(&B, rhs->size, 1, local_rhs, rhs->size, SLU_DN, SLU_DTYPE, SLU_GE);
726 
727  has_B = true;
728 
729  // Initialize the solution variable.
730  SuperMatrix X;
731  typename SuperLuType<Scalar>::Scalar *x;
732  if( !(x = new typename SuperLuType<Scalar>::Scalar[m->size]) )
733  throw Hermes::Exceptions::Exception("Malloc fails for x[].");
734  create_dense_matrix(&X, m->size, 1, x, m->size, SLU_DN, SLU_DTYPE, SLU_GE);
735 
736  // Solve the system.
737  int info;
738 
739 #ifdef SLU_MT
740  if(options.refact == NO)
741  {
742  // Get column permutation vector perm_c[], according to the first argument:
743  // 0: natural ordering
744  // 1: minimum degree ordering on structure of A'*A
745  // 2: minimum degree ordering on structure of A' + A
746  // 3: approximate minimum degree for unsymmetric matrices
747  get_perm_c(1, &A, perm_c);
748  }
749 
750  /*
751  // Compute reciprocal pivot growth, estimate reciprocal condition number of A, solve,
752  // perform iterative refinement of the solution and estimate forward and backward error.
753  // Memory usage will be acquired at the end. If A is singular, info will be set to A->ncol + 1.
754  //
755  slu_mt_solver_driver( &options, &A, perm_c, perm_r, &AC, &equed, R, C,
756  &L, &U, &B, &X, &rpivot_growth, &rcond, &ferr, &berr,
757  &stat, &memusage, &info );
758  */
759 
760  // ... OR ...
761 
762  // Estimate reciprocal condition number of A and solve the system. If A is singular, info
763  // will be set to A->ncol + 1.
764  //
765  slu_mt_solver_driver( &options, &A, perm_c, perm_r, &AC, &equed, R, C,
766  &L, &U, &B, &X, NULL, &rcond, NULL, NULL,
767  &stat, NULL, &info );
768 
769  // ... OR ...
770 
771  /*
772  // Do not check the regularity of A and just solve the system.
773  //
774  slu_mt_solver_driver( &options, &A, perm_c, perm_r, &AC, &equed, R, C,
775  &L, &U, &B, &X, NULL, NULL, NULL, NULL,
776  &stat, NULL, &info );
777  */
778 #else
779  solver_driver(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U,
780  work, lwork, &B, &X, &rpivot_growth, &rcond, &ferr, &berr,
781  &memusage, &stat, &info);
782 #endif
783 
784  // A and B may have been multiplied by the scaling vectors R and C on the output of the
785  // solver. If the next call to the solver should reuse factorization only partially,
786  // it will need the original unscaled matrix - this will indicate such situation
787  // (rhs is always recreated anew).
788 #ifdef SLU_MT
789  A_changed = (equed != NOEQUIL);
790 #else
791  A_changed = (*equed != 'N');
792 #endif
793 
794  bool factorized = check_status(info);
795 
796  if(factorized)
797  {
798  delete [] this->sln;
799  this->sln = new Scalar[m->size];
800 
801  Scalar *sol = (Scalar*) ((DNformat*) X.Store)->nzval;
802 
803  for (unsigned int i = 0; i < rhs->size; i++)
804  this->sln[i] = sol[i];
805  }
806 
807  // If required, print statistics.
808  if( options.PrintStat ) SLU_PRINT_STAT(&stat);
809 
810  // Free temporary local variables.
811  StatFree(&stat);
812  //SUPERLU_FREE (x);
813  delete x;
814  Destroy_SuperMatrix_Store(&X);
815 
816  this->tick();
817  this->time = this->accumulated();
818 
819  return factorized;
820  }
821 
822  template<typename Scalar>
823  bool SuperLUSolver<Scalar>::setup_factorization()
824  {
825  unsigned int A_size = A.nrow < 0 ? 0 : A.nrow;
826  if(has_A && this->factorization_scheme != HERMES_FACTORIZE_FROM_SCRATCH && A_size != m->size)
827  {
828  this->warn("You cannot reuse factorization structures for factorizing matrices of different sizes.");
829  return false;
830  }
831 
832  // Always factorize from scratch for the first time.
833  int eff_fact_scheme;
834  if(!inited)
835  eff_fact_scheme = HERMES_FACTORIZE_FROM_SCRATCH;
836  else
837  eff_fact_scheme = this->factorization_scheme;
838 
839  // Prepare factorization structures. In case of a particular reuse scheme, comments are given
840  // to clarify which arguments will be reused and which will be reset by the dgssvx (zgssvx) routine.
841  // It was determined empirically by running the dlinsolx2 example from SuperLU, setting options.Fact
842  // to the appropriate value and reallocating the various structures before the second run of dgssvx,
843  // and observing when segfault will happen and when not. It is actually not needed to reallocate
844  // the structures by hand, but comments at various places of the SuperLU 4.0 library contradict
845  // each other and often lead to segfault when the structures are reallocated according to them.
846  // It might thus bring some insight into how SuperLU works and how to correctly use it
847  // (the PDF documentation is, unfortunately, even less helpful).
848  switch (eff_fact_scheme)
849  {
851  // This case should generally allow for solving a completely new system, i.e. for a change of
852  // matrix and rhs size - for simplicity, we reallocate the structures every time.
853 
854  // Clear the structures emanating from previous factorization.
855  free_factorization_data();
856 
857  // Allocate the row/column reordering vectors.
858  if( !(perm_c = intMalloc(m->size)) )
859  throw Hermes::Exceptions::Exception("Malloc fails for perm_c[].");
860  if( !(perm_r = intMalloc(m->size)) )
861  throw Hermes::Exceptions::Exception("Malloc fails for perm_r[].");
862 
863  // Allocate vectors with row/column scaling factors.
864  if( !(R = (double *) SUPERLU_MALLOC(m->size * sizeof(double))) )
865  throw Hermes::Exceptions::Exception("SUPERLU_MALLOC fails for R[].");
866  if( !(C = (double *) SUPERLU_MALLOC(m->size * sizeof(double))) )
867  throw Hermes::Exceptions::Exception("SUPERLU_MALLOC fails for C[].");
868 
869 #ifdef SLU_MT
870  options.fact = EQUILIBRATE;
871  options.refact = NO;
872  options.perm_c = perm_c;
873  options.perm_r = perm_r;
874 #else
875  // Allocate additional structures used by the driver routine of sequential SuperLU.
876  // Elimination tree is contained in the options structure in SuperLU_MT.
877  if( !(etree = intMalloc(m->size)) )
878  throw Hermes::Exceptions::Exception("Malloc fails for etree[].");
879 
880  options.Fact = DOFACT;
881 #endif
882  A_changed = true;
883  break;
885  // needed from previous: etree, perm_c
886  // not needed from previous: perm_r, R, C, L, U, equed
887 #ifdef SLU_MT
888  options.fact = EQUILIBRATE;
889  options.refact = YES;
890 #else
891  options.Fact = SamePattern;
892 #endif
893  // L, U matrices may be reused without reallocating.
894  // SLU_DESTROY_L(&L);
895  // SLU_DESTROY_U(&U);
896  break;
898  // needed from previous: etree, perm_c, perm_r, L, U
899  // not needed from previous: R, C, equed
900 #ifdef SLU_MT
901  // MT version of SLU cannot reuse the equilibration factors (R, C), so
902  // this is the same as the previous case.
903  options.fact = EQUILIBRATE;
904  options.refact = YES;
905 #else
906  options.Fact = SamePattern_SameRowPerm;
907 #endif
908  break;
910  // needed from previous: perm_c, perm_r, equed, L, U
911  // not needed from previous: etree, R, C
912 #ifdef SLU_MT
913  options.fact = FACTORED;
914  options.refact = YES;
915 #else
916  options.Fact = FACTORED;
917 #endif
918  break;
919  }
920 
921  inited = true;
922 
923  return true;
924  }
925 
926  template<typename Scalar>
927  void SuperLUSolver<Scalar>::free_matrix()
928  {
929  if(has_A)
930  {
931  Destroy_SuperMatrix_Store(&A);
932  has_A = false;
933  }
934  }
935 
936  template<typename Scalar>
937  void SuperLUSolver<Scalar>::free_rhs()
938  {
939  if(has_B)
940  {
941  Destroy_SuperMatrix_Store(&B);
942  has_B = false;
943  }
944  }
945 
946  template<typename Scalar>
947  void SuperLUSolver<Scalar>::free_factorization_data()
948  {
949  if(inited)
950  {
951 #ifdef SLU_MT
952  SUPERLU_FREE(options.etree);
953  SUPERLU_FREE(options.colcnt_h);
954  SUPERLU_FREE(options.part_super_h);
955  Destroy_CompCol_Permuted(&AC);
956 #else
957  SUPERLU_FREE (etree);
958 #endif
959  SUPERLU_FREE (perm_c);
960  SUPERLU_FREE (perm_r);
961  SUPERLU_FREE (R);
962  SUPERLU_FREE (C);
963  SLU_DESTROY_L(&L);
964  SLU_DESTROY_U(&U);
965  inited = false;
966  }
967  }
968 
969 #ifdef SLU_MT
970  // This is a modification of the original p*gssvx routines from the SuperLU_MT library.
971  //
972  // The original routines have been changed in view of our applications, i.e.
973  // * only one right hand side is allowed,
974  // * some initial parameter checks have been omitted,
975  // * macros allowing abstraction from the fundamental Scalar datatype have been used
976  // * some phases of the calculation may be omitted for speed-up (less information about
977  // the matrix/solution can then be acquired, however),
978  // * deallocation at the end of the routine has been removed (this was neccessary to
979  // enable factorization reuse).
980  //
981  // See the correspondingly named attributes of SuperLUSolver class for brief description
982  // of most parameters or the library source code for pdgssvx for more details. You may pass
983  // NULL for
984  // * recip_pivot_growth - reciprocal pivot growth factor will then not be computed;
985  // reip_pivot_growth much less than one may indicate poor
986  // stability of the factorization;
987  // * rcond - estimate of the reciprocal condition number of matrix A will
988  // then not be computed; this will prevent detection of singularity
989  // of matrix A;
990  // * ferr or berr - iterative refinement of the solution will then not be performed;
991  // this also prevents computation of forward and backward error
992  // estimates of the computed solution;
993  // * memusage - memory usage during the factorization/solution will not be queried.
994  //
995  void slu_mt_solver_driver(slu_options_t *options, SuperMatrix *A,
996  int *perm_c, int *perm_r, SuperMatrix *AC,
997  equed_t *equed, double *R, double *C,
998  SuperMatrix *L, SuperMatrix *U,
999  SuperMatrix *B, SuperMatrix *X,
1000  double *recip_pivot_growth, double *rcond,
1001  double *ferr, double *berr,
1002  slu_stat_t *stat, slu_memusage_t *memusage,
1003  int *info)
1004  {
1005  /* Profiling variables. */
1006  double t0;
1007  flops_t flopcnt;
1008 
1009  /* State variables. */
1010  int dofact = (options->fact == DOFACT);
1011  int equil = (options->fact == EQUILIBRATE);
1012  int notran = (options->trans == NOTRANS);
1013  int colequ, rowequ;
1014 
1015  /* Right hand side and solution vectors. */
1016  DNformat *Bstore = (DNformat*) B->Store;
1017  DNformat *Xstore = (DNformat*) X->Store;
1018  Scalar *Bmat = (Scalar*) Bstore->nzval;
1019  Scalar *Xmat = (Scalar*) Xstore->nzval;
1020 
1021  *info = 0;
1022 
1023  /* ------------------------------------------------------------
1024  Diagonal scaling to equilibrate the matrix.
1025  ------------------------------------------------------------*/
1026  if(dofact || equil)
1027  {
1028  *equed = NOEQUIL;
1029  rowequ = colequ = FALSE;
1030  }
1031  else
1032  {
1033  rowequ = (*equed == ROW) || (*equed == BOTH);
1034  colequ = (*equed == COL) || (*equed == BOTH);
1035  }
1036 
1037  if( equil )
1038  {
1039  t0 = SuperLU_timer_();
1040  /* Compute row and column scalings to equilibrate the matrix A. */
1041  int info1;
1042  double rowcnd, colcnd, amax;
1043  SLU_GSEQU(A, R, C, &rowcnd, &colcnd, &amax, &info1);
1044 
1045  if( info1 == 0 )
1046  {
1047  /* Equilibrate matrix A. */
1048  SLU_LAQGS(A, R, C, rowcnd, colcnd, amax, equed);
1049  rowequ = (*equed == ROW) || (*equed == BOTH);
1050  colequ = (*equed == COL) || (*equed == BOTH);
1051  }
1052  stat->utime[EQUIL] = SuperLU_timer_() - t0;
1053  }
1054 
1055  /* ------------------------------------------------------------
1056  Scale the right hand side.
1057  ------------------------------------------------------------*/
1058  if( notran )
1059  {
1060  if( rowequ )
1061  for (int i = 0; i < A->nrow; ++i)
1062  SLU_MULT(Bmat[i], R[i]);
1063  }
1064  else if( colequ )
1065  {
1066  for (int i = 0; i < A->nrow; ++i)
1067  SLU_MULT(Bmat[i], C[i]);
1068  }
1069 
1070  /* ------------------------------------------------------------
1071  Perform the LU factorization.
1072  ------------------------------------------------------------*/
1073  if( dofact || equil )
1074  {
1075  /* Obtain column etree, the column count (colcnt_h) and supernode
1076  partition (part_super_h) for the Householder matrix. */
1077  if(options->refact == NO)
1078  {
1079  t0 = SuperLU_timer_();
1080  SLU_SP_COLORDER(A, perm_c, options, AC);
1081  stat->utime[ETREE] = SuperLU_timer_() - t0;
1082  }
1083 
1084  /* Compute the LU factorization of A*Pc. */
1085  t0 = SuperLU_timer_();
1086  SLU_GSTRF(options, AC, perm_r, L, U, stat, info);
1087  stat->utime[FACT] = SuperLU_timer_() - t0;
1088 
1089  flopcnt = 0;
1090  for (int i = 0; i < options->nprocs; ++i) flopcnt += stat->procstat[i].fcops;
1091  stat->ops[FACT] = flopcnt;
1092 
1093  if( options->lwork == -1 )
1094  {
1095  if(memusage)
1096  memusage->total_needed = *info - A->ncol;
1097  return;
1098  }
1099  }
1100 
1101  if( *info > 0 )
1102  {
1103  if( *info <= A->ncol )
1104  {
1105  /* Compute the reciprocal pivot growth factor of the leading
1106  rank-deficient *info columns of A. */
1107  if(recip_pivot_growth)
1108  *recip_pivot_growth = SLU_PIVOT_GROWTH(*info, A, perm_c, L, U);
1109  }
1110  }
1111  else
1112  {
1113  /* ------------------------------------------------------------
1114  Compute the reciprocal pivot growth factor *recip_pivot_growth.
1115  ------------------------------------------------------------*/
1116  if(recip_pivot_growth)
1117  *recip_pivot_growth = SLU_PIVOT_GROWTH(A->ncol, A, perm_c, L, U);
1118 
1119  /* ------------------------------------------------------------
1120  Estimate the reciprocal of the condition number of A.
1121  ------------------------------------------------------------*/
1122  if(rcond)
1123  {
1124  t0 = SuperLU_timer_();
1125 
1126  // Next two lines are a bit complicated, but taken as they appear
1127  // in the original library function.
1128  char norm[1];
1129  *(unsigned char *)norm = (notran) ? '1' : 'I';
1130 
1131  double anorm = SLU_LANGS(norm, A);
1132  SLU_GSCON(norm, L, U, anorm, rcond, info);
1133  stat->utime[RCOND] = SuperLU_timer_() - t0;
1134  }
1135 
1136  /* ------------------------------------------------------------
1137  Compute the solution matrix X.
1138  ------------------------------------------------------------*/
1139  // Save a copy of the right hand side.
1140  memcpy(Xmat, Bmat, B->nrow * sizeof(Scalar));
1141 
1142  t0 = SuperLU_timer_();
1143  SLU_GSTRS(options->trans, L, U, perm_r, perm_c, X, stat, info);
1144  stat->utime[SOLVE] = SuperLU_timer_() - t0;
1145  stat->ops[SOLVE] = stat->ops[TRISOLVE];
1146 
1147  /* ------------------------------------------------------------
1148  Use iterative refinement to improve the computed solution and
1149  compute error bounds and backward error estimates for it.
1150  ------------------------------------------------------------*/
1151  if(ferr && berr)
1152  {
1153  t0 = SuperLU_timer_();
1154  SLU_GSRFS(options->trans, A, L, U, perm_r, perm_c, *equed,
1155  R, C, B, X, ferr, berr, stat, info);
1156  stat->utime[REFINE] = SuperLU_timer_() - t0;
1157  }
1158 
1159  /* ------------------------------------------------------------
1160  Transform the solution matrix X to a solution of the original
1161  system.
1162  ------------------------------------------------------------*/
1163  if( notran )
1164  {
1165  if( colequ )
1166  for (int i = 0; i < A->nrow; ++i)
1167  SLU_MULT(Xmat[i], C[i]);
1168  }
1169  else if( rowequ )
1170  {
1171  for (int i = 0; i < A->nrow; ++i)
1172  SLU_MULT(Xmat[i], R[i]);
1173  }
1174 
1175  /* Set INFO = A->ncol + 1 if the matrix is singular to
1176  working precision.*/
1177  char param[1]; param[0] = 'E';
1178  if( rcond && *rcond < SLU_LAMCH_(param) ) *info = A->ncol + 1;
1179  }
1180 
1181  if(memusage)
1182  SLU_QUERY_SPACE(options->nprocs, L, U, options->panel_size, memusage);
1183  }
1184 #endif
1185 
1186  template class HERMES_API SuperLUSolver<double>;
1187  template class HERMES_API SuperLUSolver<std::complex<double> >;
1188  }
1189 }
1190 #endif