34 void SuperLu<double>::sequ(SuperMatrix *A,
double *r,
double *c,
double *rowcnd,
double *colcnd,
double *amax,
int *info)
36 dsequ (A, r, c, rowcnd, colcnd, amax, info);
40 void SuperLu<double>::laqgs (SuperMatrix *A,
float *r,
float *c,
float rowcnd,
float colcnd,
float amax,
char *equed)
42 dlaqgs (A, r, c, rowcnd, colcnd, amax, equed);
46 int SuperLu<double>::gstrf (superlu_options_t *options,
int m,
int n,
double anorm, LUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat,
int *info)
48 return dgstrf (options, m, n, anorm, LUstruct, grid, stat, info);
52 float SuperLu<double>::pivotGrowth (
int ncols, SuperMatrix *A,
int *perm_c, SuperMatrix *L, SuperMatrix *U)
54 return dPivotGrowth (ncols, A, perm_c, L, U);
58 float SuperLu<double>::langs (
char *norm, SuperMatrix *A)
60 return dlangs (norm, A);
63 void SuperLu<double>::gscon (
char *norm, SuperMatrix *L, SuperMatrix *U,
float anorm,
float *rcond, SuperLUStat_t *stat,
int *info)
65 dgscon (norm, L, U, anorm, rcond, stat, info);
69 void SuperLu<double>::gstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
int *perm_c,
int *perm_r, SuperMatrix *B, SuperLUStat_t *stat,
int *info)
71 dgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
75 double SuperLu<double>::lamch_ (
char *cmach)
77 return dlamch_ (cmach);
81 int SuperLu<double>::querySpace (SuperMatrix *a, SuperMatrix *b, mem_usage_t *mu)
83 return dquerySpace (a, b, mu);
95 static int find_position(
int *Ai,
int Alen,
int idx)
99 register int lo = 0, hi = Alen - 1, mid;
103 mid = (lo + hi) >> 1;
105 if(idx < Ai[mid]) hi = mid - 1;
106 else if(idx > Ai[mid]) lo = mid + 1;
111 if(lo > hi) mid = -1;
116 template<
typename Scalar>
117 SuperLUMatrix<Scalar>::SuperLUMatrix()
119 this->size = 0; nnz = 0;
125 template<
typename Scalar>
126 SuperLUMatrix<Scalar>::~SuperLUMatrix()
131 template<
typename Scalar>
132 void SuperLUMatrix<Scalar>::alloc()
134 assert(this->pages != NULL);
137 Ap =
new unsigned int[this->size + 1];
138 int aisize = this->get_num_indices();
139 Ai =
new int[aisize];
142 unsigned int i, pos = 0;
143 for (i = 0; i < this->size; i++)
146 pos += sort_and_store_indices(this->pages[i], Ai + pos, Ai + aisize);
150 delete [] this->pages;
153 nnz = Ap[this->size];
155 Ax =
new Scalar[nnz];
156 memset(Ax, 0,
sizeof(Scalar) * nnz);
159 template<
typename Scalar>
160 void SuperLUMatrix<Scalar>::free()
163 delete [] Ap; Ap = NULL;
164 delete [] Ai; Ai = NULL;
165 delete [] Ax; Ax = NULL;
168 template<
typename Scalar>
169 Scalar SuperLUMatrix<Scalar>::get(
unsigned int m,
unsigned int n)
172 int mid = find_position(Ai + Ap[n], Ap[n + 1] - Ap[n], m);
174 if(mid < 0)
return 0.0;
176 if(mid >= 0) mid += Ap[n];
180 template<
typename Scalar>
181 void SuperLUMatrix<Scalar>::zero()
183 memset(Ax, 0,
sizeof(Scalar) * nnz);
186 template<
typename Scalar>
187 void SuperLUMatrix<Scalar>::add(
unsigned int m,
unsigned int n, Scalar v)
192 int pos = find_position(Ai + Ap[n], Ap[n + 1] - Ap[n], m);
198 #pragma omp critical (SuperLUMatrix_add)
205 template<
typename Scalar>
206 void SuperLUMatrix<Scalar>::add_to_diagonal(Scalar v)
208 for (
unsigned int i = 0; i<this->size; i++)
214 template<
typename Scalar>
215 void SuperLUMatrix<Scalar>::add(
unsigned int m,
unsigned int n, Scalar **mat,
int *rows,
int *cols)
217 for (
unsigned int i = 0; i < m; i++)
218 for (
unsigned int j = 0; j < n; j++)
219 if(rows[i] >= 0 && cols[j] >= 0)
220 add(rows[i], cols[j], mat[i][j]);
225 template<
typename Scalar>
226 bool SuperLUMatrix<Scalar>::dump(FILE *file,
const char *var_name,
EMatrixDumpFormat fmt,
char* number_format)
232 fprintf(file,
"%% Size: %dx%d\n%% Nonzeros: %d\ntemp = zeros(%d, 3);\ntemp =[\n", this->size, this->size, Ap[this->size], Ap[this->size]);
233 for (
unsigned int j = 0; j < this->size; j++)
234 for (
unsigned int i = Ap[j]; i < Ap[j + 1]; i++)
236 fprintf(file,
"%d %d " , Ai[i] + 1, j + 1);
237 Hermes::Helpers::fprint_num(file, Ax[i], number_format);
240 fprintf(file,
"];\n%s = spconvert(temp);\n", var_name);
246 this->hermes_fwrite(
"HERMESX\001", 1, 8, file);
247 int ssize =
sizeof(Scalar);
248 this->hermes_fwrite(&ssize,
sizeof(
int), 1, file);
249 this->hermes_fwrite(&this->size,
sizeof(
int), 1, file);
250 this->hermes_fwrite(&nnz,
sizeof(
int), 1, file);
251 this->hermes_fwrite(Ap,
sizeof(
int), this->size + 1, file);
252 this->hermes_fwrite(Ai,
sizeof(
int), nnz, file);
253 this->hermes_fwrite(Ax,
sizeof(Scalar), nnz, file);
262 template<
typename Scalar>
263 unsigned int SuperLUMatrix<Scalar>::get_matrix_size()
const
268 template<
typename Scalar>
269 unsigned int SuperLUMatrix<Scalar>::get_nnz()
const
274 template<
typename Scalar>
275 double SuperLUMatrix<Scalar>::get_fill_in()
const
277 return nnz / (double) (this->size * this->size);
280 template<
typename Scalar>
281 void SuperLUMatrix<Scalar>::add_matrix(SuperLUMatrix<Scalar>* mat)
283 add_as_block(0, 0, mat);
286 template<
typename Scalar>
287 void SuperLUMatrix<Scalar>::add_to_diagonal_blocks(
int num_stages, SuperLUMatrix<Scalar>* mat)
289 int ndof = mat->get_size();
290 if(this->get_size() != (
unsigned int) num_stages * ndof)
293 for (
int i = 0; i < num_stages; i++)
295 this->add_as_block(ndof*i, ndof*i, mat);
299 template<
typename Scalar>
300 void SuperLUMatrix<Scalar>::add_sparse_to_diagonal_blocks(
int num_stages, SparseMatrix<Scalar>* mat)
302 add_to_diagonal_blocks(num_stages, static_cast<SuperLUMatrix*>(mat));
305 template<
typename Scalar>
306 void SuperLUMatrix<Scalar>::add_as_block(
unsigned int i,
unsigned int j, SuperLUMatrix<Scalar>* mat)
309 for (
unsigned int col = 0;col<mat->get_size();col++)
311 for (
unsigned int n = mat->Ap[col];n<mat->Ap[col + 1];n++)
313 idx = find_position(Ai + Ap[col + j], Ap[col + 1 + j] - Ap[col + j], mat->Ai[n] + i);
317 Ax[idx] +=mat->Ax[n];
324 template<
typename Scalar>
325 void SuperLUMatrix<Scalar>::multiply_with_vector(Scalar* vector_in, Scalar* vector_out)
327 for(
unsigned int i = 0;i<this->size;i++)
331 for (
unsigned int c = 0;c<this->size;c++)
333 for (
unsigned int i = Ap[c];i<Ap[c + 1];i++)
335 vector_out[c] +=vector_in[Ai[i]]*Ax[i];
341 template<
typename Scalar>
342 void SuperLUMatrix<Scalar>::multiply_with_Scalar(Scalar value)
345 for(
int i = 0;i<n;i++)
352 template<
typename Scalar>
353 void SuperLUMatrix<Scalar>::create(
unsigned int size,
unsigned int nnz,
int* ap,
int* ai, Scalar* ax)
357 this->Ap =
new unsigned int[this->size + 1]; assert(this->Ap != NULL);
358 this->Ai =
new int[nnz]; assert(this->Ai != NULL);
359 this->Ax =
new Scalar[nnz]; assert(this->Ax != NULL);
361 for (
unsigned int i = 0; i < this->size + 1; i++)
365 for (
unsigned int i = 0; i < nnz; i++)
373 template<
typename Scalar>
374 SuperLUMatrix<Scalar>* SuperLUMatrix<Scalar>::duplicate()
376 SuperLUMatrix<Scalar> * nmat =
new SuperLUMatrix<Scalar>();
379 nmat->size = this->size;
380 nmat->Ap =
new unsigned int[this->size + 1]; assert(nmat->Ap != NULL);
381 nmat->Ai =
new int[nnz]; assert(nmat->Ai != NULL);
382 nmat->Ax =
new Scalar[nnz]; assert(nmat->Ax != NULL);
383 for (
unsigned int i = 0;i<nnz;i++)
388 for (
unsigned int i = 0;i<this->size + 1;i++)
397 template<
typename Scalar>
398 SuperLUVector<Scalar>::SuperLUVector()
404 template<
typename Scalar>
405 SuperLUVector<Scalar>::~SuperLUVector()
410 template<
typename Scalar>
411 void SuperLUVector<Scalar>::alloc(
unsigned int n)
419 template<
typename Scalar>
420 void SuperLUVector<Scalar>::zero()
422 memset(v, 0, this->size *
sizeof(Scalar));
425 template<
typename Scalar>
426 void SuperLUVector<Scalar>::change_sign()
428 for (
unsigned int i = 0; i < this->size; i++) v[i] *= -1.;
431 template<
typename Scalar>
432 void SuperLUVector<Scalar>::free()
439 template<
typename Scalar>
440 void SuperLUVector<Scalar>::set(
unsigned int idx, Scalar y)
445 template<
typename Scalar>
446 void SuperLUVector<Scalar>::add(
unsigned int idx, Scalar y)
451 template<
typename Scalar>
452 void SuperLUVector<Scalar>::add(
unsigned int n,
unsigned int *idx, Scalar *y)
454 for (
unsigned int i = 0; i < n; i++)
460 template<
typename Scalar>
461 Scalar SuperLUVector<Scalar>::get(
unsigned int idx)
466 template<
typename Scalar>
467 void SuperLUVector<Scalar>::extract(Scalar *v)
const
469 memcpy(v, this->v, this->size *
sizeof(Scalar));
472 template<
typename Scalar>
473 void SuperLUVector<Scalar>::add_vector(Vector<Scalar>* vec)
475 assert(this->length() == vec->length());
476 for (
unsigned int i = 0; i < this->length(); i++) this->add(i, vec->get(i));
479 template<
typename Scalar>
480 void SuperLUVector<Scalar>::add_vector(Scalar* vec)
482 for (
unsigned int i = 0; i < this->length(); i++) this->add(i, vec[i]);
485 template<
typename Scalar>
486 bool SuperLUVector<Scalar>::dump(FILE *file,
const char *var_name,
EMatrixDumpFormat fmt,
char* number_format)
491 for (
unsigned int i = 0; i < this->size; i++)
493 Hermes::Helpers::fprint_num(file, v[i], number_format);
500 fprintf(file,
"%% Size: %dx1\n%s =[\n", this->size, var_name);
501 for (
unsigned int i = 0; i < this->size; i++)
503 Hermes::Helpers::fprint_num(file, v[i], number_format);
506 fprintf(file,
" ];\n");
511 this->hermes_fwrite(
"HERMESR\001", 1, 8, file);
512 int ssize =
sizeof(Scalar);
513 this->hermes_fwrite(&ssize,
sizeof(
int), 1, file);
514 this->hermes_fwrite(&this->size,
sizeof(
int), 1, file);
515 this->hermes_fwrite(v,
sizeof(Scalar), this->size, file);
524 template class HERMES_API SuperLUMatrix<double>;
525 template class HERMES_API SuperLUMatrix<std::complex<double> >;
526 template class HERMES_API SuperLUVector<double>;
527 template class HERMES_API SuperLUVector<std::complex<double> >;
531 template<
typename Scalar>
532 bool SuperLUSolver<Scalar>::check_status(
unsigned int info)
539 else if(info <= m->size)
541 this->warn(
"SuperLU: Factor U is singular, solution could not be computed.");
544 else if(info == m->size + 1)
546 this->warn(
"SuperLU: RCOND is less than machine precision "
547 "(system matrix is singular to working precision).");
550 else if(info > m->size + 1)
552 this->warn(
"SuperLU: Not enough memory.\n Failure when %.3f MB were allocated.",
553 (info - m->size)/1e6);
560 template<
typename Scalar>
561 SuperLUSolver<Scalar>::SuperLUSolver(SuperLUMatrix<Scalar> *m, SuperLUVector<Scalar> *rhs)
563 , local_Ax(NULL), local_rhs(NULL)
578 char *nt_var = getenv(
"OMP_NUM_THREADS");
580 options.nprocs = std::max(1, atoi(nt_var));
584 options.fact = EQUILIBRATE;
585 options.trans = NOTRANS;
587 options.diag_pivot_thresh = 1.0;
589 options.drop_tol = 0.0;
590 options.SymmetricMode = NO;
593 options.panel_size = sp_ienv(1);
594 options.relax = sp_ienv(2);
608 set_default_options(&options);
611 options.PrintStat = YES;
613 has_A = has_B = inited =
false;
616 inline SuperLuType<std::complex<double> >::Scalar to_superlu(SuperLuType<std::complex<double> >::Scalar &a, std::complex<double>b)
623 inline SuperLuType<double>::Scalar to_superlu(SuperLuType<double>::Scalar &a,
double b)
629 template<
typename Scalar>
630 SuperLUSolver<Scalar>::~SuperLUSolver()
632 free_factorization_data();
636 if(local_Ai)
delete [] local_Ai;
637 if(local_Ap)
delete [] local_Ap;
638 if(local_Ax)
delete [] local_Ax;
639 if(local_rhs)
delete [] local_rhs;
642 template<
typename Scalar>
643 int SuperLUSolver<Scalar>::get_matrix_size()
648 template<
typename Scalar>
649 bool SuperLUSolver<Scalar>::solve()
658 SLU_INIT_STAT(&stat);
670 slu_memusage_t memusage;
671 double rpivot_growth;
675 options.lwork = lwork;
678 if( !setup_factorization() )
680 this->warn(
"LU factorization could not be completed.");
688 if(!has_A || this->factorization_scheme != HERMES_REUSE_FACTORIZATION_COMPLETELY)
697 if(local_Ai)
delete [] local_Ai;
698 local_Ai =
new int[m->nnz];
699 memcpy(local_Ai, m->Ai, m->nnz *
sizeof(
int));
701 if(local_Ap)
delete [] local_Ap;
702 local_Ap =
new int[m->size + 1];
703 memcpy(local_Ap, m->Ap, (m->size + 1) *
sizeof(
int));
705 if(local_Ax)
delete [] local_Ax;
706 local_Ax =
new typename SuperLuType<Scalar>::Scalar[m->nnz];
707 for (
unsigned int i = 0;i<m->nnz;i++)
708 to_superlu(local_Ax[i], m->Ax[i]);
711 create_csc_matrix(&A, m->size, m->size, m->nnz, local_Ax, local_Ai, local_Ap, SLU_NC, SLU_DTYPE, SLU_GE);
720 if(local_rhs)
delete [] local_rhs;
721 local_rhs =
new typename SuperLuType<Scalar>::Scalar[rhs->size];
722 for (
unsigned int i = 0;i<rhs->size;i++)
723 to_superlu(local_rhs[i], rhs->v[i]);
725 create_dense_matrix(&B, rhs->size, 1, local_rhs, rhs->size, SLU_DN, SLU_DTYPE, SLU_GE);
731 typename SuperLuType<Scalar>::Scalar *x;
732 if( !(x =
new typename SuperLuType<Scalar>::Scalar[m->size]) )
734 create_dense_matrix(&X, m->size, 1, x, m->size, SLU_DN, SLU_DTYPE, SLU_GE);
740 if(options.refact == NO)
747 get_perm_c(1, &A, perm_c);
765 slu_mt_solver_driver( &options, &A, perm_c, perm_r, &AC, &equed, R, C,
766 &L, &U, &B, &X, NULL, &rcond, NULL, NULL,
767 &stat, NULL, &info );
779 solver_driver(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U,
780 work, lwork, &B, &X, &rpivot_growth, &rcond, &ferr, &berr,
781 &memusage, &stat, &info);
789 A_changed = (equed != NOEQUIL);
791 A_changed = (*equed !=
'N');
794 bool factorized = check_status(info);
799 this->sln =
new Scalar[m->size];
801 Scalar *sol = (Scalar*) ((DNformat*) X.Store)->nzval;
803 for (
unsigned int i = 0; i < rhs->size; i++)
804 this->sln[i] = sol[i];
808 if( options.PrintStat ) SLU_PRINT_STAT(&stat);
814 Destroy_SuperMatrix_Store(&X);
817 this->time = this->accumulated();
822 template<
typename Scalar>
823 bool SuperLUSolver<Scalar>::setup_factorization()
825 unsigned int A_size = A.nrow < 0 ? 0 : A.nrow;
826 if(has_A && this->factorization_scheme != HERMES_FACTORIZE_FROM_SCRATCH && A_size != m->size)
828 this->warn(
"You cannot reuse factorization structures for factorizing matrices of different sizes.");
837 eff_fact_scheme = this->factorization_scheme;
848 switch (eff_fact_scheme)
855 free_factorization_data();
858 if( !(perm_c = intMalloc(m->size)) )
860 if( !(perm_r = intMalloc(m->size)) )
864 if( !(R = (
double *) SUPERLU_MALLOC(m->size *
sizeof(
double))) )
866 if( !(C = (
double *) SUPERLU_MALLOC(m->size *
sizeof(
double))) )
870 options.fact = EQUILIBRATE;
872 options.perm_c = perm_c;
873 options.perm_r = perm_r;
877 if( !(etree = intMalloc(m->size)) )
880 options.Fact = DOFACT;
888 options.fact = EQUILIBRATE;
889 options.refact = YES;
891 options.Fact = SamePattern;
903 options.fact = EQUILIBRATE;
904 options.refact = YES;
906 options.Fact = SamePattern_SameRowPerm;
913 options.fact = FACTORED;
914 options.refact = YES;
916 options.Fact = FACTORED;
926 template<
typename Scalar>
927 void SuperLUSolver<Scalar>::free_matrix()
931 Destroy_SuperMatrix_Store(&A);
936 template<
typename Scalar>
937 void SuperLUSolver<Scalar>::free_rhs()
941 Destroy_SuperMatrix_Store(&B);
946 template<
typename Scalar>
947 void SuperLUSolver<Scalar>::free_factorization_data()
952 SUPERLU_FREE(options.etree);
953 SUPERLU_FREE(options.colcnt_h);
954 SUPERLU_FREE(options.part_super_h);
955 Destroy_CompCol_Permuted(&AC);
957 SUPERLU_FREE (etree);
959 SUPERLU_FREE (perm_c);
960 SUPERLU_FREE (perm_r);
995 void slu_mt_solver_driver(slu_options_t *options, SuperMatrix *A,
996 int *perm_c,
int *perm_r, SuperMatrix *AC,
997 equed_t *equed,
double *R,
double *C,
998 SuperMatrix *L, SuperMatrix *U,
999 SuperMatrix *B, SuperMatrix *X,
1000 double *recip_pivot_growth,
double *rcond,
1001 double *ferr,
double *berr,
1002 slu_stat_t *stat, slu_memusage_t *memusage,
1010 int dofact = (options->fact == DOFACT);
1011 int equil = (options->fact == EQUILIBRATE);
1012 int notran = (options->trans == NOTRANS);
1016 DNformat *Bstore = (DNformat*) B->Store;
1017 DNformat *Xstore = (DNformat*) X->Store;
1018 Scalar *Bmat = (Scalar*) Bstore->nzval;
1019 Scalar *Xmat = (Scalar*) Xstore->nzval;
1029 rowequ = colequ = FALSE;
1033 rowequ = (*equed == ROW) || (*equed == BOTH);
1034 colequ = (*equed == COL) || (*equed == BOTH);
1039 t0 = SuperLU_timer_();
1042 double rowcnd, colcnd, amax;
1043 SLU_GSEQU(A, R, C, &rowcnd, &colcnd, &amax, &info1);
1048 SLU_LAQGS(A, R, C, rowcnd, colcnd, amax, equed);
1049 rowequ = (*equed == ROW) || (*equed == BOTH);
1050 colequ = (*equed == COL) || (*equed == BOTH);
1052 stat->utime[EQUIL] = SuperLU_timer_() - t0;
1061 for (
int i = 0; i < A->nrow; ++i)
1062 SLU_MULT(Bmat[i], R[i]);
1066 for (
int i = 0; i < A->nrow; ++i)
1067 SLU_MULT(Bmat[i], C[i]);
1073 if( dofact || equil )
1077 if(options->refact == NO)
1079 t0 = SuperLU_timer_();
1080 SLU_SP_COLORDER(A, perm_c, options, AC);
1081 stat->utime[ETREE] = SuperLU_timer_() - t0;
1085 t0 = SuperLU_timer_();
1086 SLU_GSTRF(options, AC, perm_r, L, U, stat, info);
1087 stat->utime[FACT] = SuperLU_timer_() - t0;
1090 for (
int i = 0; i < options->nprocs; ++i) flopcnt += stat->procstat[i].fcops;
1091 stat->ops[FACT] = flopcnt;
1093 if( options->lwork == -1 )
1096 memusage->total_needed = *info - A->ncol;
1103 if( *info <= A->ncol )
1107 if(recip_pivot_growth)
1108 *recip_pivot_growth = SLU_PIVOT_GROWTH(*info, A, perm_c, L, U);
1116 if(recip_pivot_growth)
1117 *recip_pivot_growth = SLU_PIVOT_GROWTH(A->ncol, A, perm_c, L, U);
1124 t0 = SuperLU_timer_();
1129 *(
unsigned char *)norm = (notran) ?
'1' :
'I';
1131 double anorm = SLU_LANGS(norm, A);
1132 SLU_GSCON(norm, L, U, anorm, rcond, info);
1133 stat->utime[RCOND] = SuperLU_timer_() - t0;
1140 memcpy(Xmat, Bmat, B->nrow *
sizeof(Scalar));
1142 t0 = SuperLU_timer_();
1143 SLU_GSTRS(options->trans, L, U, perm_r, perm_c, X, stat, info);
1144 stat->utime[SOLVE] = SuperLU_timer_() - t0;
1145 stat->ops[SOLVE] = stat->ops[TRISOLVE];
1153 t0 = SuperLU_timer_();
1154 SLU_GSRFS(options->trans, A, L, U, perm_r, perm_c, *equed,
1155 R, C, B, X, ferr, berr, stat, info);
1156 stat->utime[REFINE] = SuperLU_timer_() - t0;
1166 for (
int i = 0; i < A->nrow; ++i)
1167 SLU_MULT(Xmat[i], C[i]);
1171 for (
int i = 0; i < A->nrow; ++i)
1172 SLU_MULT(Xmat[i], R[i]);
1177 char param[1]; param[0] =
'E';
1178 if( rcond && *rcond < SLU_LAMCH_(param) ) *info = A->ncol + 1;
1182 SLU_QUERY_SPACE(options->nprocs, L, U, options->panel_size, memusage);
1186 template class HERMES_API SuperLUSolver<double>;
1187 template class HERMES_API SuperLUSolver<std::complex<double> >;