Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions Common/include/geometry/CGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ using namespace std;
* \author F. Palacios
*/
class CGeometry {
public:
/*!
* \brief Aggregates the full symmetric CSR and its LDU split (L strictly-lower, U strictly-upper).
* Built together lazily via GetSparsePattern; all three are always valid once non-empty.
*/
struct LDUSparsePattern {
CCompressedSparsePatternUL csr; /*!< Full symmetric pattern (with diagonal pointer). */
CCompressedSparsePatternUL l; /*!< Strictly-lower part. */
CCompressedSparsePatternUL u; /*!< Strictly-upper part. */

bool empty() const { return csr.empty(); }
};

protected:
enum : size_t { OMP_MIN_SIZE = 32 }; /*!< \brief Chunk size for small loops. */
enum : size_t { MAXNDIM = 3 };
Expand Down Expand Up @@ -187,12 +200,15 @@ class CGeometry {

/*--- Sparsity patterns associated with the geometry. ---*/

CCompressedSparsePatternUL finiteVolumeCSRFill0, /*!< \brief 0-fill FVM sparsity. */
finiteVolumeCSRFillN, /*!< \brief N-fill FVM sparsity (e.g. for ILUn preconditioner). */
finiteElementCSRFill0, /*!< \brief 0-fill FEM sparsity. */
finiteElementCSRFillN; /*!< \brief N-fill FEM sparsity (e.g. for ILUn preconditioner). */
LDUSparsePattern finiteVolumePatternFill0; /*!< \brief FVM sparsity with 0-fill (structural pattern). */
LDUSparsePattern finiteVolumePatternFillN; /*!< \brief FVM sparsity with N-fill (e.g. for ILU-N). */
LDUSparsePattern finiteElementPatternFill0; /*!< \brief FEM sparsity with 0-fill (structural pattern). */
LDUSparsePattern finiteElementPatternFillN; /*!< \brief FEM sparsity with N-fill (e.g. for ILU-N). */

CEdgeToNonZeroMapUL edgeToCSRMap; /*!< \brief Map edges to CSR entries referenced by them (i,j) and (j,i). */
su2vector<unsigned long> finiteVolumeLToUTranspMap; /*!< \brief FVM L-entry -> U-entry of its transpose. */
su2vector<unsigned long> finiteVolumeUToLTranspMap; /*!< \brief FVM U-entry -> L-entry of its transpose. */
su2vector<unsigned long> finiteElementLToUTranspMap; /*!< \brief FEM L-entry -> U-entry of its transpose. */
su2vector<unsigned long> finiteElementUToLTranspMap; /*!< \brief FEM U-entry -> L-entry of its transpose. */

/*--- Edge and element colorings. ---*/

Expand Down Expand Up @@ -1868,21 +1884,23 @@ class CGeometry {
* \param[in] fillLvl - Level of fill of the pattern.
* \return Reference to the sparse pattern.
*/
const CCompressedSparsePatternUL& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0);
const LDUSparsePattern& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0);

/*!
* \brief Get the edge to sparse pattern map.
* \note This method builds the map and required pattern (0-fill FVM) if that has not been done yet.
* \return Reference to the map.
* \brief Get the bijective map from L-entry indices to U-entry indices of their transposes.
* \note Requires symmetric pattern. Builds both LU transpose maps if not already built.
* \param[in] type - Finite volume or finite element.
* \return Reference to the l_to_u map.
*/
const CEdgeToNonZeroMapUL& GetEdgeToSparsePatternMap();
const su2vector<unsigned long>& GetLToUTransposeSparsePatternMap(ConnectivityType type);

/*!
* \brief Get the transpose of the (main, i.e 0 fill) sparse pattern (e.g. CSR becomes CSC).
* \brief Get the bijective map from U-entry indices to L-entry indices of their transposes.
* \note Requires symmetric pattern. Builds both LU transpose maps if not already built.
* \param[in] type - Finite volume or finite element.
* \return Reference to the map.
* \return Reference to the u_to_l map.
*/
const su2vector<unsigned long>& GetTransposeSparsePatternMap(ConnectivityType type);
const su2vector<unsigned long>& GetUToLTransposeSparsePatternMap(ConnectivityType type);

/*!
* \brief Get the edge coloring.
Expand Down
64 changes: 48 additions & 16 deletions Common/include/linear_algebra/CPastixWrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,28 @@ class CPastixWrapper {
vector<pastix_int_t> perm; /*!< \brief Ordering computed by PaStiX. */
vector<su2mixedfloat> workvec; /*!< \brief RHS vector which then becomes the solution. */

vector<unsigned long> csr_row_ptr; /*!< \brief Owned CSR row pointers (built from LDU). */
vector<unsigned long> csr_col_ind; /*!< \brief Owned CSR column indices (built from LDU). */

pastix_int_t iparm[IPARM_SIZE]; /*!< \brief Integer parameters for PaStiX. */
double dparm[DPARM_SIZE]; /*!< \brief Floating point parameters for PaStiX. */

struct {
unsigned long nVar = 0;
unsigned long nPoint = 0;
unsigned long nPointDomain = 0;
const unsigned long* rowptr = nullptr;
const unsigned long* colidx = nullptr;
const ScalarType* values = nullptr;
unsigned long blkSz = 0; /*!< \brief Block size (nVar * nVar) for value assembly. */

const unsigned long* row_ptr_l = nullptr; /*!< \brief LDU lower row pointers (geometry-owned). */
const unsigned long* row_ptr_u = nullptr; /*!< \brief LDU upper row pointers (geometry-owned). */
const ScalarType* d = nullptr; /*!< \brief Diagonal blocks (matrix-owned). */
const ScalarType* l = nullptr; /*!< \brief Lower blocks (matrix-owned). */
const ScalarType* u = nullptr; /*!< \brief Upper blocks (matrix-owned). */

unsigned long size_rhs() const { return nPointDomain * nVar; }
} matrix; /*!< \brief Pointers and sizes of the input matrix. */
} matrix; /*!< \brief Dimensions and LDU pointers captured from the owning CSysMatrix. */

bool issetup{}; /*!< \brief Signals that the matrix data has been provided. */
bool issetup{}; /*!< \brief Signals that the structure has been provided. */
bool isinitialized{}; /*!< \brief Signals that the sparsity pattern has been set. */
bool isfactorized{}; /*!< \brief Signals that a factorization has been computed. */
bool transpose{}; /*!< \brief Solve A^T x = b instead of A x = b. */
Expand Down Expand Up @@ -110,6 +117,11 @@ class CPastixWrapper {
*/
void Initialize(CGeometry* geometry, const CConfig* config);

/*!
* \brief Assemble CSR values from the stored LDU pointers directly into the values buffer.
*/
void AssembleValues();

public:
CPastixWrapper() = default;

Expand All @@ -125,23 +137,43 @@ class CPastixWrapper {
~CPastixWrapper() { Clean(); }

/*!
* \brief Set matrix data, only once.
* \param[in] nVar - DOF per point.
* \brief Returns true once SetLDU has been called.
*/
bool IsSetup() const { return issetup; }

/*!
* \brief Set LDU structure and value pointers; builds and owns assembled CSR (called once).
* \param[in] nVar - DOF per point (square blocks: nVar x nVar).
* \param[in] nPoint - Total number of points including halos.
* \param[in] nPointDomain - Number of internal points.
* \param[in] rowptr - Array, where column index data starts for each matrix row.
* \param[in] colidx - Non zeros column indices.
* \param[in] values - Matrix coefficients.
* \param[in] nPointDomain - Number of internal points (domain rows).
* \param[in] row_ptr_l/u - LDU lower/upper row pointers (geometry-owned, must outlive wrapper).
* \param[in] col_ind_l/u - LDU lower/upper column indices (geometry-owned).
* \param[in] d/l/u - LDU value blocks (matrix-owned, must outlive wrapper).
*/
void SetMatrix(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* rowptr,
const unsigned long* colidx, const ScalarType* values) {
void SetLDU(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* row_ptr_l,
const unsigned long* col_ind_l, const unsigned long* row_ptr_u, const unsigned long* col_ind_u,
const ScalarType* d, const ScalarType* l, const ScalarType* u) {
if (issetup) return;
matrix.nVar = nVar;
matrix.nPoint = nPoint;
matrix.nPointDomain = nPointDomain;
matrix.rowptr = rowptr;
matrix.colidx = colidx;
matrix.values = values;
matrix.row_ptr_l = row_ptr_l;
matrix.row_ptr_u = row_ptr_u;
matrix.d = d;
matrix.l = l;
matrix.u = u;
matrix.blkSz = nVar * nVar;

const unsigned long nnz_domain = row_ptr_l[nPointDomain] + nPointDomain + row_ptr_u[nPointDomain];
csr_row_ptr.resize(nPointDomain + 1);
csr_col_ind.reserve(nnz_domain);
for (auto i = 0ul; i < nPointDomain; ++i) {
csr_row_ptr[i] = static_cast<unsigned long>(csr_col_ind.size());
for (auto k = row_ptr_l[i]; k < row_ptr_l[i + 1]; ++k) csr_col_ind.push_back(col_ind_l[k]);
csr_col_ind.push_back(i);
for (auto k = row_ptr_u[i]; k < row_ptr_u[i + 1]; ++k) csr_col_ind.push_back(col_ind_u[k]);
}
csr_row_ptr[nPointDomain] = static_cast<unsigned long>(csr_col_ind.size());
issetup = true;
}

Expand Down
122 changes: 63 additions & 59 deletions Common/include/linear_algebra/CSysMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,25 +138,40 @@ class CSysMatrix {
unsigned long nVar; /*!< \brief Number of variables (and rows of the blocks). */
unsigned long nEqn; /*!< \brief Number of equations (and columns of the blocks). */

ScalarType* matrix; /*!< \brief Entries of the sparse matrix. */
unsigned long nnz; /*!< \brief Number of possible nonzero entries in the matrix. */
const unsigned long* row_ptr; /*!< \brief Pointers to the first element in each row. */
const unsigned long* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */
const unsigned long* col_ind; /*!< \brief Column index for each of the elements in val(). */
const unsigned long* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */

ScalarType* d_matrix; /*!< \brief Device Pointer to store the matrix values on the GPU. */
const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */
const unsigned long* d_col_ind; /*!< \brief Device Column index for each of the elements in val(). */
bool useCuda = false; /*!< \brief Boolean that indicates whether user has enabled CUDA or not.
Mainly used to conditionally free GPU memory in the class destructor. */

ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */
unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */
const unsigned long* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */
const unsigned long* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */
const unsigned long* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */
unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */
/*!
* \brief Aggregates value arrays and sparse-structure pointers for an LDU-partitioned matrix.
* Each CSysMatrix holds three LDU instances: the host matrix (mat), its device copy (gpu),
* and the ILU factorization (ilu). Ownership of the value arrays (d/l/u) and whether
* the pointers address host or device memory is managed by CSysMatrix.
*/
struct LDU {
ScalarType* d = nullptr; /*!< \brief Diagonal block values. */
ScalarType* l = nullptr; /*!< \brief Strictly-lower block values. */
ScalarType* u = nullptr; /*!< \brief Strictly-upper block values. */
const unsigned long* row_ptr_l = nullptr; /*!< \brief Row pointers for L (geometry-owned or GPU copy). */
const unsigned long* col_ind_l = nullptr; /*!< \brief Column indices for L. */
const unsigned long* row_ptr_u = nullptr; /*!< \brief Row pointers for U. */
const unsigned long* col_ind_u = nullptr; /*!< \brief Column indices for U. */
unsigned long nnz_l = 0; /*!< \brief Number of L nonzeros. */
unsigned long nnz_u = 0; /*!< \brief Number of U nonzeros. */
};

LDU mat; /*!< \brief Host matrix (values owned via aligned_alloc; pattern from geometry). */
LDU gpu; /*!< \brief Device matrix (all pointers to GPU memory). */
LDU ilu; /*!< \brief ILU factorization, host (values owned; pattern from geometry). */

bool useCuda = false; /*!< \brief Whether CUDA is enabled. */
const unsigned long* l_to_u_transp; /*!< \brief L-entry index -> U-entry index of its transpose. */
const unsigned long* u_to_l_transp; /*!< \brief U-entry index -> L-entry index of its transpose. */

/*!
* \brief Lookup table from edges to the L-index in the LDU split.
* U-index == edge index by construction (edges are ordered 1:1 with the U pattern).
* Therefore, edge_ptr_l == u_to_l_transp, but we keep a separate member for clarity.
*/
const unsigned long* edge_ptr_l;

unsigned short ilu_fill_in; /*!< \brief Fill level for the ILU preconditioner. */

/*!< \brief Level structure for alternative shared memory parallelization of ILU. */
CCompressedSparsePatternUL levels_ilu;
Expand Down Expand Up @@ -188,21 +203,6 @@ class CSysMatrix {
mutable CPastixWrapper<ScalarType> pastix_wrapper;
#endif

/*!
* \brief Auxilary object to wrap the edge map pointer used in fast block updates, i.e. without linear searches.
*/
struct {
const unsigned long* ptr = nullptr;
unsigned long nEdge = 0;

operator bool() { return nEdge != 0; }

inline unsigned long operator()(unsigned long edge, unsigned long node) const { return ptr[2 * edge + node]; }
inline unsigned long ij(unsigned long edge) const { return ptr[2 * edge]; }
inline unsigned long ji(unsigned long edge) const { return ptr[2 * edge + 1]; }

} edge_ptr;

/*!
* \brief Handle type conversion for when we Set, Add, etc. blocks, preserving derivative information (if supported by
* types).
Expand Down Expand Up @@ -392,7 +392,7 @@ class CSysMatrix {
* \param[in] neqn - Number of equations (and columns of the blocks).
* \param[in] geometry - Geometrical definition of the problem.
* \param[in] config - Definition of the particular problem.
* \param[in] needTranspPtr - If "col_ptr" should be created, used for "SetDiagonalAsColumnSum".
* \param[in] needTranspPtr - If the L/U transpose maps should be built, used for "SetDiagonalAsColumnSum".
*/
void Initialize(unsigned long npoint, unsigned long npointdomain, unsigned short nvar, unsigned short neqn,
bool EdgeConnect, CGeometry* geometry, const CConfig* config, bool needTranspPtr = false,
Expand Down Expand Up @@ -421,10 +421,14 @@ class CSysMatrix {
* \return Pointer to location in memory where the block starts.
*/
FORCEINLINE const ScalarType* GetBlock(unsigned long block_i, unsigned long block_j) const {
/*--- The position of the diagonal block is known which allows halving the search space. ---*/
const auto end = (block_j < block_i) ? dia_ptr[block_i] : row_ptr[block_i + 1];
for (auto index = (block_j < block_i) ? row_ptr[block_i] : dia_ptr[block_i]; index < end; ++index)
if (col_ind[index] == block_j) return &matrix[index * nVar * nEqn];
if (block_i == block_j) return &mat.d[block_i * nVar * nEqn];
if (block_j < block_i) {
for (auto index = mat.row_ptr_l[block_i]; index < mat.row_ptr_l[block_i + 1]; ++index)
if (mat.col_ind_l[index] == block_j) return &mat.l[index * nVar * nEqn];
return nullptr;
}
for (auto index = mat.row_ptr_u[block_i]; index < mat.row_ptr_u[block_i + 1]; ++index)
if (mat.col_ind_u[index] == block_j) return &mat.u[index * nVar * nEqn];
return nullptr;
}

Expand Down Expand Up @@ -574,10 +578,11 @@ class CSysMatrix {
*/
inline void GetBlocks(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint, ScalarType*& bii,
ScalarType*& bij, ScalarType*& bji, ScalarType*& bjj) {
bii = &matrix[dia_ptr[iPoint] * nVar * nEqn];
bjj = &matrix[dia_ptr[jPoint] * nVar * nEqn];
bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn];
bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn];
const auto blkSz = nVar * nEqn;
bii = &mat.d[iPoint * blkSz];
bjj = &mat.d[jPoint * blkSz];
bij = &mat.u[iEdge * blkSz];
bji = &mat.l[edge_ptr_l[iEdge] * blkSz];
}

/*!
Expand Down Expand Up @@ -652,10 +657,10 @@ class CSysMatrix {
if (mask[k] == 0) continue;

/*--- Fetch the blocks. ---*/
auto bii = &matrix[dia_ptr[iPoint[k]] * blkSz];
auto bjj = &matrix[dia_ptr[jPoint[k]] * blkSz];
auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz];
auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz];
auto bii = &mat.d[iPoint[k] * blkSz];
auto bjj = &mat.d[jPoint[k] * blkSz];
auto bij = &mat.u[iEdge[k] * blkSz];
auto bji = &mat.l[edge_ptr_l[iEdge[k]] * blkSz];

/*--- Update, block i was negated during transpose in the
* hope the assignments below become non-temporal stores. ---*/
Expand All @@ -682,8 +687,9 @@ class CSysMatrix {
template <class MatrixType, class OtherType = ScalarType, bool Overwrite = true>
inline void SetBlocks(unsigned long iEdge, const MatrixType& block_i, const MatrixType& block_j,
OtherType scale = 1) {
ScalarType* bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn];
ScalarType* bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn];
const auto blkSz = nVar * nEqn;
ScalarType* bij = &mat.u[iEdge * blkSz];
ScalarType* bji = &mat.l[edge_ptr_l[iEdge] * blkSz];

unsigned long iVar, jVar, offset = 0;

Expand Down Expand Up @@ -742,8 +748,8 @@ class CSysMatrix {
if (mask[k] == 0) continue;

/*--- Fetch the blocks. ---*/
auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz];
auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz];
ScalarType* bij = &mat.u[iEdge[k] * blkSz];
ScalarType* bji = &mat.l[edge_ptr_l[iEdge[k]] * blkSz];

/*--- Update, block i was negated during transpose in the
* hope the assignments below become non-temporal stores. ---*/
Expand All @@ -765,7 +771,7 @@ class CSysMatrix {
*/
template <class OtherType, bool Overwrite = true, class T = ScalarType>
inline void SetBlock2Diag(unsigned long block_i, const OtherType& val_block, T alpha = 1.0) {
auto mat_ii = &matrix[dia_ptr[block_i] * nVar * nEqn];
auto mat_ii = &mat.d[block_i * nVar * nEqn];

for (auto iVar = 0ul; iVar < nVar; iVar++)
for (auto jVar = 0ul; jVar < nEqn; jVar++) {
Expand Down Expand Up @@ -798,8 +804,8 @@ class CSysMatrix {
*/
template <class OtherType>
inline void AddVal2Diag(unsigned long block_i, OtherType val_matrix) {
for (auto iVar = 0ul; iVar < nVar; iVar++)
matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val_matrix);
auto d = &mat.d[block_i * nVar * nVar];
for (auto iVar = 0ul; iVar < nVar; iVar++) d[iVar * (nVar + 1)] += PassiveAssign(val_matrix);
}

/*!
Expand All @@ -811,7 +817,7 @@ class CSysMatrix {
*/
template <class OtherType>
inline void AddVal2Diag(unsigned long block_i, unsigned long iVar, OtherType val) {
matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val);
mat.d[block_i * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val);
}

/*!
Expand All @@ -822,13 +828,11 @@ class CSysMatrix {
*/
template <class OtherType>
inline void SetVal2Diag(unsigned long block_i, OtherType val_matrix) {
unsigned long iVar, index = dia_ptr[block_i] * nVar * nVar;

/*--- Clear entire block before setting its diagonal. ---*/
SU2_OMP_SIMD
for (iVar = 0; iVar < nVar * nVar; iVar++) matrix[index + iVar] = 0.0;
for (auto iVar = 0ul; iVar < nVar * nVar; iVar++) mat.d[block_i * nVar * nVar + iVar] = 0.0;

for (iVar = 0; iVar < nVar; iVar++) matrix[index + iVar * (nVar + 1)] = PassiveAssign(val_matrix);
AddVal2Diag(block_i, val_matrix);
}

/*!
Expand Down
Loading
Loading