diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp index 7958110fb23..5d1e7e5a83a 100644 --- a/Common/include/geometry/CGeometry.hpp +++ b/Common/include/geometry/CGeometry.hpp @@ -73,6 +73,19 @@ using namespace std; * \author F. Palacios */ class CGeometry { + public: + /*! + * \brief Aggregates the full symmetric CSR and its LDU split (L strictly-lower, U strictly-upper). + * Built together lazily via GetSparsePattern; all three are always valid once non-empty. + */ + struct LDUSparsePattern { + CCompressedSparsePatternUL csr; /*!< Full symmetric pattern (with diagonal pointer). */ + CCompressedSparsePatternUL l; /*!< Strictly-lower part. */ + CCompressedSparsePatternUL u; /*!< Strictly-upper part. */ + + bool empty() const { return csr.empty(); } + }; + protected: enum : size_t { OMP_MIN_SIZE = 32 }; /*!< \brief Chunk size for small loops. */ enum : size_t { MAXNDIM = 3 }; @@ -187,12 +200,15 @@ class CGeometry { /*--- Sparsity patterns associated with the geometry. ---*/ - CCompressedSparsePatternUL finiteVolumeCSRFill0, /*!< \brief 0-fill FVM sparsity. */ - finiteVolumeCSRFillN, /*!< \brief N-fill FVM sparsity (e.g. for ILUn preconditioner). */ - finiteElementCSRFill0, /*!< \brief 0-fill FEM sparsity. */ - finiteElementCSRFillN; /*!< \brief N-fill FEM sparsity (e.g. for ILUn preconditioner). */ + LDUSparsePattern finiteVolumePatternFill0; /*!< \brief FVM sparsity with 0-fill (structural pattern). */ + LDUSparsePattern finiteVolumePatternFillN; /*!< \brief FVM sparsity with N-fill (e.g. for ILU-N). */ + LDUSparsePattern finiteElementPatternFill0; /*!< \brief FEM sparsity with 0-fill (structural pattern). */ + LDUSparsePattern finiteElementPatternFillN; /*!< \brief FEM sparsity with N-fill (e.g. for ILU-N). */ - CEdgeToNonZeroMapUL edgeToCSRMap; /*!< \brief Map edges to CSR entries referenced by them (i,j) and (j,i). */ + su2vector finiteVolumeLToUTranspMap; /*!< \brief FVM L-entry -> U-entry of its transpose. */ + su2vector finiteVolumeUToLTranspMap; /*!< \brief FVM U-entry -> L-entry of its transpose. */ + su2vector finiteElementLToUTranspMap; /*!< \brief FEM L-entry -> U-entry of its transpose. */ + su2vector finiteElementUToLTranspMap; /*!< \brief FEM U-entry -> L-entry of its transpose. */ /*--- Edge and element colorings. ---*/ @@ -1868,21 +1884,23 @@ class CGeometry { * \param[in] fillLvl - Level of fill of the pattern. * \return Reference to the sparse pattern. */ - const CCompressedSparsePatternUL& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0); + const LDUSparsePattern& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0); /*! - * \brief Get the edge to sparse pattern map. - * \note This method builds the map and required pattern (0-fill FVM) if that has not been done yet. - * \return Reference to the map. + * \brief Get the bijective map from L-entry indices to U-entry indices of their transposes. + * \note Requires symmetric pattern. Builds both LU transpose maps if not already built. + * \param[in] type - Finite volume or finite element. + * \return Reference to the l_to_u map. */ - const CEdgeToNonZeroMapUL& GetEdgeToSparsePatternMap(); + const su2vector& GetLToUTransposeSparsePatternMap(ConnectivityType type); /*! - * \brief Get the transpose of the (main, i.e 0 fill) sparse pattern (e.g. CSR becomes CSC). + * \brief Get the bijective map from U-entry indices to L-entry indices of their transposes. + * \note Requires symmetric pattern. Builds both LU transpose maps if not already built. * \param[in] type - Finite volume or finite element. - * \return Reference to the map. + * \return Reference to the u_to_l map. */ - const su2vector& GetTransposeSparsePatternMap(ConnectivityType type); + const su2vector& GetUToLTransposeSparsePatternMap(ConnectivityType type); /*! * \brief Get the edge coloring. diff --git a/Common/include/linear_algebra/CPastixWrapper.hpp b/Common/include/linear_algebra/CPastixWrapper.hpp index dcc320debe1..a471a60eaff 100644 --- a/Common/include/linear_algebra/CPastixWrapper.hpp +++ b/Common/include/linear_algebra/CPastixWrapper.hpp @@ -62,6 +62,9 @@ class CPastixWrapper { vector perm; /*!< \brief Ordering computed by PaStiX. */ vector workvec; /*!< \brief RHS vector which then becomes the solution. */ + vector csr_row_ptr; /*!< \brief Owned CSR row pointers (built from LDU). */ + vector csr_col_ind; /*!< \brief Owned CSR column indices (built from LDU). */ + pastix_int_t iparm[IPARM_SIZE]; /*!< \brief Integer parameters for PaStiX. */ double dparm[DPARM_SIZE]; /*!< \brief Floating point parameters for PaStiX. */ @@ -69,14 +72,18 @@ class CPastixWrapper { unsigned long nVar = 0; unsigned long nPoint = 0; unsigned long nPointDomain = 0; - const unsigned long* rowptr = nullptr; - const unsigned long* colidx = nullptr; - const ScalarType* values = nullptr; + unsigned long blkSz = 0; /*!< \brief Block size (nVar * nVar) for value assembly. */ + + const unsigned long* row_ptr_l = nullptr; /*!< \brief LDU lower row pointers (geometry-owned). */ + const unsigned long* row_ptr_u = nullptr; /*!< \brief LDU upper row pointers (geometry-owned). */ + const ScalarType* d = nullptr; /*!< \brief Diagonal blocks (matrix-owned). */ + const ScalarType* l = nullptr; /*!< \brief Lower blocks (matrix-owned). */ + const ScalarType* u = nullptr; /*!< \brief Upper blocks (matrix-owned). */ unsigned long size_rhs() const { return nPointDomain * nVar; } - } matrix; /*!< \brief Pointers and sizes of the input matrix. */ + } matrix; /*!< \brief Dimensions and LDU pointers captured from the owning CSysMatrix. */ - bool issetup{}; /*!< \brief Signals that the matrix data has been provided. */ + bool issetup{}; /*!< \brief Signals that the structure has been provided. */ bool isinitialized{}; /*!< \brief Signals that the sparsity pattern has been set. */ bool isfactorized{}; /*!< \brief Signals that a factorization has been computed. */ bool transpose{}; /*!< \brief Solve A^T x = b instead of A x = b. */ @@ -110,6 +117,11 @@ class CPastixWrapper { */ void Initialize(CGeometry* geometry, const CConfig* config); + /*! + * \brief Assemble CSR values from the stored LDU pointers directly into the values buffer. + */ + void AssembleValues(); + public: CPastixWrapper() = default; @@ -125,23 +137,43 @@ class CPastixWrapper { ~CPastixWrapper() { Clean(); } /*! - * \brief Set matrix data, only once. - * \param[in] nVar - DOF per point. + * \brief Returns true once SetLDU has been called. + */ + bool IsSetup() const { return issetup; } + + /*! + * \brief Set LDU structure and value pointers; builds and owns assembled CSR (called once). + * \param[in] nVar - DOF per point (square blocks: nVar x nVar). * \param[in] nPoint - Total number of points including halos. - * \param[in] nPointDomain - Number of internal points. - * \param[in] rowptr - Array, where column index data starts for each matrix row. - * \param[in] colidx - Non zeros column indices. - * \param[in] values - Matrix coefficients. + * \param[in] nPointDomain - Number of internal points (domain rows). + * \param[in] row_ptr_l/u - LDU lower/upper row pointers (geometry-owned, must outlive wrapper). + * \param[in] col_ind_l/u - LDU lower/upper column indices (geometry-owned). + * \param[in] d/l/u - LDU value blocks (matrix-owned, must outlive wrapper). */ - void SetMatrix(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* rowptr, - const unsigned long* colidx, const ScalarType* values) { + void SetLDU(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* row_ptr_l, + const unsigned long* col_ind_l, const unsigned long* row_ptr_u, const unsigned long* col_ind_u, + const ScalarType* d, const ScalarType* l, const ScalarType* u) { if (issetup) return; matrix.nVar = nVar; matrix.nPoint = nPoint; matrix.nPointDomain = nPointDomain; - matrix.rowptr = rowptr; - matrix.colidx = colidx; - matrix.values = values; + matrix.row_ptr_l = row_ptr_l; + matrix.row_ptr_u = row_ptr_u; + matrix.d = d; + matrix.l = l; + matrix.u = u; + matrix.blkSz = nVar * nVar; + + const unsigned long nnz_domain = row_ptr_l[nPointDomain] + nPointDomain + row_ptr_u[nPointDomain]; + csr_row_ptr.resize(nPointDomain + 1); + csr_col_ind.reserve(nnz_domain); + for (auto i = 0ul; i < nPointDomain; ++i) { + csr_row_ptr[i] = static_cast(csr_col_ind.size()); + for (auto k = row_ptr_l[i]; k < row_ptr_l[i + 1]; ++k) csr_col_ind.push_back(col_ind_l[k]); + csr_col_ind.push_back(i); + for (auto k = row_ptr_u[i]; k < row_ptr_u[i + 1]; ++k) csr_col_ind.push_back(col_ind_u[k]); + } + csr_row_ptr[nPointDomain] = static_cast(csr_col_ind.size()); issetup = true; } diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp index a71d31c1c08..11baf6edf8e 100644 --- a/Common/include/linear_algebra/CSysMatrix.hpp +++ b/Common/include/linear_algebra/CSysMatrix.hpp @@ -138,25 +138,40 @@ class CSysMatrix { unsigned long nVar; /*!< \brief Number of variables (and rows of the blocks). */ unsigned long nEqn; /*!< \brief Number of equations (and columns of the blocks). */ - ScalarType* matrix; /*!< \brief Entries of the sparse matrix. */ - unsigned long nnz; /*!< \brief Number of possible nonzero entries in the matrix. */ - const unsigned long* row_ptr; /*!< \brief Pointers to the first element in each row. */ - const unsigned long* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */ - const unsigned long* col_ind; /*!< \brief Column index for each of the elements in val(). */ - const unsigned long* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */ - - ScalarType* d_matrix; /*!< \brief Device Pointer to store the matrix values on the GPU. */ - const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */ - const unsigned long* d_col_ind; /*!< \brief Device Column index for each of the elements in val(). */ - bool useCuda = false; /*!< \brief Boolean that indicates whether user has enabled CUDA or not. - Mainly used to conditionally free GPU memory in the class destructor. */ - - ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */ - unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */ - const unsigned long* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */ - const unsigned long* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */ - const unsigned long* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */ - unsigned short ilu_fill_in; /*!< \brief Fill in level for the ILU preconditioner. */ + /*! + * \brief Aggregates value arrays and sparse-structure pointers for an LDU-partitioned matrix. + * Each CSysMatrix holds three LDU instances: the host matrix (mat), its device copy (gpu), + * and the ILU factorization (ilu). Ownership of the value arrays (d/l/u) and whether + * the pointers address host or device memory is managed by CSysMatrix. + */ + struct LDU { + ScalarType* d = nullptr; /*!< \brief Diagonal block values. */ + ScalarType* l = nullptr; /*!< \brief Strictly-lower block values. */ + ScalarType* u = nullptr; /*!< \brief Strictly-upper block values. */ + const unsigned long* row_ptr_l = nullptr; /*!< \brief Row pointers for L (geometry-owned or GPU copy). */ + const unsigned long* col_ind_l = nullptr; /*!< \brief Column indices for L. */ + const unsigned long* row_ptr_u = nullptr; /*!< \brief Row pointers for U. */ + const unsigned long* col_ind_u = nullptr; /*!< \brief Column indices for U. */ + unsigned long nnz_l = 0; /*!< \brief Number of L nonzeros. */ + unsigned long nnz_u = 0; /*!< \brief Number of U nonzeros. */ + }; + + LDU mat; /*!< \brief Host matrix (values owned via aligned_alloc; pattern from geometry). */ + LDU gpu; /*!< \brief Device matrix (all pointers to GPU memory). */ + LDU ilu; /*!< \brief ILU factorization, host (values owned; pattern from geometry). */ + + bool useCuda = false; /*!< \brief Whether CUDA is enabled. */ + const unsigned long* l_to_u_transp; /*!< \brief L-entry index -> U-entry index of its transpose. */ + const unsigned long* u_to_l_transp; /*!< \brief U-entry index -> L-entry index of its transpose. */ + + /*! + * \brief Lookup table from edges to the L-index in the LDU split. + * U-index == edge index by construction (edges are ordered 1:1 with the U pattern). + * Therefore, edge_ptr_l == u_to_l_transp, but we keep a separate member for clarity. + */ + const unsigned long* edge_ptr_l; + + unsigned short ilu_fill_in; /*!< \brief Fill level for the ILU preconditioner. */ /*!< \brief Level structure for alternative shared memory parallelization of ILU. */ CCompressedSparsePatternUL levels_ilu; @@ -188,21 +203,6 @@ class CSysMatrix { mutable CPastixWrapper pastix_wrapper; #endif - /*! - * \brief Auxilary object to wrap the edge map pointer used in fast block updates, i.e. without linear searches. - */ - struct { - const unsigned long* ptr = nullptr; - unsigned long nEdge = 0; - - operator bool() { return nEdge != 0; } - - inline unsigned long operator()(unsigned long edge, unsigned long node) const { return ptr[2 * edge + node]; } - inline unsigned long ij(unsigned long edge) const { return ptr[2 * edge]; } - inline unsigned long ji(unsigned long edge) const { return ptr[2 * edge + 1]; } - - } edge_ptr; - /*! * \brief Handle type conversion for when we Set, Add, etc. blocks, preserving derivative information (if supported by * types). @@ -322,20 +322,11 @@ class CSysMatrix { inline const ScalarType* InvertDiagonalBlockILUMatrix(unsigned long block_i); /*! - * \brief Copies the block (i, j) of the matrix-by-blocks structure in the internal variable *block. - * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure. - * \param[in] block_j - Indexes of the block in the matrix-by-blocks structure. + * \brief Returns the start of the ILU block or nullptr if (i,j) is not a nonzero. + * \param[in] block_i/j - Indexes of the block in the matrix-by-blocks structure. */ inline ScalarType* GetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j); - /*! - * \brief Set the value of a block in the sparse matrix. - * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure. - * \param[in] block_j - Indexes of the block in the matrix-by-blocks structure. - * \param[in] **val_block - Block to set to A(i, j). - */ - inline void SetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType* val_block); - /*! * \brief Performs the product of i-th row of the upper part of a sparse matrix by a vector. * \param[in] vec - Vector to be multiplied by the upper part of the sparse matrix A. @@ -392,7 +383,7 @@ class CSysMatrix { * \param[in] neqn - Number of equations (and columns of the blocks). * \param[in] geometry - Geometrical definition of the problem. * \param[in] config - Definition of the particular problem. - * \param[in] needTranspPtr - If "col_ptr" should be created, used for "SetDiagonalAsColumnSum". + * \param[in] needTranspPtr - If the L/U transpose maps should be built, used for "SetDiagonalAsColumnSum". */ void Initialize(unsigned long npoint, unsigned long npointdomain, unsigned short nvar, unsigned short neqn, bool EdgeConnect, CGeometry* geometry, const CConfig* config, bool needTranspPtr = false, @@ -421,10 +412,14 @@ class CSysMatrix { * \return Pointer to location in memory where the block starts. */ FORCEINLINE const ScalarType* GetBlock(unsigned long block_i, unsigned long block_j) const { - /*--- The position of the diagonal block is known which allows halving the search space. ---*/ - const auto end = (block_j < block_i) ? dia_ptr[block_i] : row_ptr[block_i + 1]; - for (auto index = (block_j < block_i) ? row_ptr[block_i] : dia_ptr[block_i]; index < end; ++index) - if (col_ind[index] == block_j) return &matrix[index * nVar * nEqn]; + if (block_i == block_j) return &mat.d[block_i * nVar * nEqn]; + if (block_j < block_i) { + for (auto index = mat.row_ptr_l[block_i]; index < mat.row_ptr_l[block_i + 1]; ++index) + if (mat.col_ind_l[index] == block_j) return &mat.l[index * nVar * nEqn]; + return nullptr; + } + for (auto index = mat.row_ptr_u[block_i]; index < mat.row_ptr_u[block_i + 1]; ++index) + if (mat.col_ind_u[index] == block_j) return &mat.u[index * nVar * nEqn]; return nullptr; } @@ -574,10 +569,11 @@ class CSysMatrix { */ inline void GetBlocks(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint, ScalarType*& bii, ScalarType*& bij, ScalarType*& bji, ScalarType*& bjj) { - bii = &matrix[dia_ptr[iPoint] * nVar * nEqn]; - bjj = &matrix[dia_ptr[jPoint] * nVar * nEqn]; - bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn]; - bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn]; + const auto blkSz = nVar * nEqn; + bii = &mat.d[iPoint * blkSz]; + bjj = &mat.d[jPoint * blkSz]; + bij = &mat.u[iEdge * blkSz]; + bji = &mat.l[edge_ptr_l[iEdge] * blkSz]; } /*! @@ -652,10 +648,10 @@ class CSysMatrix { if (mask[k] == 0) continue; /*--- Fetch the blocks. ---*/ - auto bii = &matrix[dia_ptr[iPoint[k]] * blkSz]; - auto bjj = &matrix[dia_ptr[jPoint[k]] * blkSz]; - auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz]; - auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz]; + auto bii = &mat.d[iPoint[k] * blkSz]; + auto bjj = &mat.d[jPoint[k] * blkSz]; + auto bij = &mat.u[iEdge[k] * blkSz]; + auto bji = &mat.l[edge_ptr_l[iEdge[k]] * blkSz]; /*--- Update, block i was negated during transpose in the * hope the assignments below become non-temporal stores. ---*/ @@ -682,8 +678,9 @@ class CSysMatrix { template inline void SetBlocks(unsigned long iEdge, const MatrixType& block_i, const MatrixType& block_j, OtherType scale = 1) { - ScalarType* bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn]; - ScalarType* bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn]; + const auto blkSz = nVar * nEqn; + ScalarType* bij = &mat.u[iEdge * blkSz]; + ScalarType* bji = &mat.l[edge_ptr_l[iEdge] * blkSz]; unsigned long iVar, jVar, offset = 0; @@ -742,8 +739,8 @@ class CSysMatrix { if (mask[k] == 0) continue; /*--- Fetch the blocks. ---*/ - auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz]; - auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz]; + ScalarType* bij = &mat.u[iEdge[k] * blkSz]; + ScalarType* bji = &mat.l[edge_ptr_l[iEdge[k]] * blkSz]; /*--- Update, block i was negated during transpose in the * hope the assignments below become non-temporal stores. ---*/ @@ -765,7 +762,7 @@ class CSysMatrix { */ template inline void SetBlock2Diag(unsigned long block_i, const OtherType& val_block, T alpha = 1.0) { - auto mat_ii = &matrix[dia_ptr[block_i] * nVar * nEqn]; + auto mat_ii = &mat.d[block_i * nVar * nEqn]; for (auto iVar = 0ul; iVar < nVar; iVar++) for (auto jVar = 0ul; jVar < nEqn; jVar++) { @@ -798,8 +795,8 @@ class CSysMatrix { */ template inline void AddVal2Diag(unsigned long block_i, OtherType val_matrix) { - for (auto iVar = 0ul; iVar < nVar; iVar++) - matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val_matrix); + auto d = &mat.d[block_i * nVar * nVar]; + for (auto iVar = 0ul; iVar < nVar; iVar++) d[iVar * (nVar + 1)] += PassiveAssign(val_matrix); } /*! @@ -811,7 +808,7 @@ class CSysMatrix { */ template inline void AddVal2Diag(unsigned long block_i, unsigned long iVar, OtherType val) { - matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val); + mat.d[block_i * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val); } /*! @@ -822,13 +819,11 @@ class CSysMatrix { */ template inline void SetVal2Diag(unsigned long block_i, OtherType val_matrix) { - unsigned long iVar, index = dia_ptr[block_i] * nVar * nVar; - /*--- Clear entire block before setting its diagonal. ---*/ SU2_OMP_SIMD - for (iVar = 0; iVar < nVar * nVar; iVar++) matrix[index + iVar] = 0.0; + for (auto iVar = 0ul; iVar < nVar * nVar; iVar++) mat.d[block_i * nVar * nVar + iVar] = 0.0; - for (iVar = 0; iVar < nVar; iVar++) matrix[index + iVar * (nVar + 1)] = PassiveAssign(val_matrix); + AddVal2Diag(block_i, val_matrix); } /*! diff --git a/Common/include/linear_algebra/CSysMatrix.inl b/Common/include/linear_algebra/CSysMatrix.inl index 163e6fb08da..d40d68b3a4f 100644 --- a/Common/include/linear_algebra/CSysMatrix.inl +++ b/Common/include/linear_algebra/CSysMatrix.inl @@ -34,21 +34,16 @@ template FORCEINLINE ScalarType* CSysMatrix::GetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j) { - /*--- The position of the diagonal block is known which allows halving the search space. ---*/ - const auto end = (block_j < block_i) ? dia_ptr_ilu[block_i] : row_ptr_ilu[block_i + 1]; - for (auto index = (block_j < block_i) ? row_ptr_ilu[block_i] : dia_ptr_ilu[block_i]; index < end; ++index) - if (col_ind_ilu[index] == block_j) return &ILU_matrix[index * nVar * nVar]; + if (block_i == block_j) return &ilu.d[block_i * nVar * nVar]; + const auto* __restrict row_ptr = block_j < block_i ? ilu.row_ptr_l : ilu.row_ptr_u; + const auto* __restrict col_ind = block_j < block_i ? ilu.col_ind_l : ilu.col_ind_u; + auto* __restrict vals = block_j < block_i ? ilu.l : ilu.u; + for (auto k = row_ptr[block_i]; k < row_ptr[block_i + 1]; ++k) { + if (col_ind[k] == block_j) return vals + k * nVar * nVar; + } return nullptr; } -template -FORCEINLINE void CSysMatrix::SetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, - ScalarType* val_block) { - auto ilu_ij = GetBlock_ILUMatrix(block_i, block_j); - if (!ilu_ij) return; - MatrixCopy(val_block, ilu_ij); -} - namespace { template @@ -145,7 +140,7 @@ template FORCEINLINE void CSysMatrix::Gauss_Elimination(unsigned long block_i, ScalarType* rhs) const { /*--- Copy block, as the algorithm modifies the matrix ---*/ ScalarType block[MAXNVAR * MAXNVAR]; - MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block); + MatrixCopy(&mat.d[block_i * nVar * nVar], block); Gauss_Elimination(block, rhs); } @@ -154,7 +149,7 @@ template FORCEINLINE void CSysMatrix::InverseDiagonalBlock(unsigned long block_i, ScalarType* invBlock) const { /*--- Copy block, as the algorithm modifies the matrix ---*/ ScalarType block[MAXNVAR * MAXNVAR]; - MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block); + MatrixCopy(&mat.d[block_i * nVar * nVar], block); MatrixInverse(block, invBlock); } @@ -162,7 +157,7 @@ FORCEINLINE void CSysMatrix::InverseDiagonalBlock(unsigned long bloc template FORCEINLINE const ScalarType* CSysMatrix::InvertDiagonalBlockILUMatrix(unsigned long block_i) { /*--- Copy block, as the algorithm modifies the matrix ---*/ - auto* Uii = &ILU_matrix[dia_ptr_ilu[block_i] * nVar * nVar]; + auto* Uii = &ilu.d[block_i * nVar * nVar]; ScalarType block[MAXNVAR * MAXNVAR]; MatrixCopy(Uii, block); MatrixInverse(block, Uii); @@ -174,10 +169,13 @@ FORCEINLINE void CSysMatrix::RowProduct(const CSysVector ScalarType* prod) const { for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0; - for (auto index = row_ptr[row_i]; index < row_ptr[row_i + 1]; index++) { - auto col_j = col_ind[index]; - MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod); - } + for (auto index = mat.row_ptr_l[row_i]; index < mat.row_ptr_l[row_i + 1]; index++) + MatrixVectorProductAdd(&mat.l[index * nVar * nEqn], &vec[mat.col_ind_l[index] * nEqn], prod); + + MatrixVectorProductAdd(&mat.d[row_i * nVar * nEqn], &vec[row_i * nEqn], prod); + + for (auto index = mat.row_ptr_u[row_i]; index < mat.row_ptr_u[row_i + 1]; index++) + MatrixVectorProductAdd(&mat.u[index * nVar * nEqn], &vec[mat.col_ind_u[index] * nEqn], prod); } template @@ -185,11 +183,11 @@ FORCEINLINE void CSysMatrix::UpperProduct(const CSysVector= nPointDomain) - MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod); + MatrixVectorProductAdd(&mat.u[index * nVar * nEqn], &vec[col_j * nEqn], prod); } } @@ -198,14 +196,14 @@ FORCEINLINE void CSysMatrix::LowerProduct(const CSysVector= col_lb) MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod); + for (auto index = mat.row_ptr_l[row_i]; index < mat.row_ptr_l[row_i + 1]; index++) { + auto col_j = mat.col_ind_l[index]; + if (col_j >= col_lb) MatrixVectorProductAdd(&mat.l[index * nVar * nEqn], &vec[col_j * nEqn], prod); } } template FORCEINLINE void CSysMatrix::DiagonalProduct(const CSysVector& vec, unsigned long row_i, ScalarType* prod) const { - MatrixVectorProduct(&matrix[dia_ptr[row_i] * nVar * nEqn], &vec[row_i * nEqn], prod); + MatrixVectorProduct(&mat.d[row_i * nVar * nEqn], &vec[row_i * nEqn], prod); } diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp index 344e08ccf7c..170b973ea1f 100644 --- a/Common/include/toolboxes/graph_toolbox.hpp +++ b/Common/include/toolboxes/graph_toolbox.hpp @@ -417,30 +417,86 @@ CCompressedSparsePattern buildCSRPattern(Geometry_t& geometry, Connecti } /*! - * \brief Build a lookup table of the absolute positions of the non zero entries - * of a compressed sparse pattern, accessed when visiting the FVM edges - * of a grid. The table can then be used for fast access (avoids searches) - * to the non zero entries of a sparse matrix associated with the pattern. - * \param[in] geometry - Definition of the grid. - * \param[in] pattern - Sparse pattern. - * \return nEdge by 2 matrix. + * \brief Extract the strictly-lower part of a symmetric compressed sparse pattern. + * For each row i, the lower entries are those at positions [outerPtr[i], diagPtr[i]). + * \param[in] csr - Full symmetric pattern with diagonal pointer already built. + * \return Strictly-lower CSR pattern. */ -template -CEdgeToNonZeroMap mapEdgesToSparsePattern(Geometry_t& geometry, - const CCompressedSparsePattern& pattern) { - assert(!pattern.empty()); +template +CCompressedSparsePattern buildLowerPattern(const CCompressedSparsePattern& csr) { + assert(!csr.empty()); + const auto nPoint = csr.getOuterSize(); + const auto* outerPtr = csr.outerPtr(); + const auto* innerIdx = csr.innerIdx(); + const auto* diagPtr = csr.diagPtr(); + + su2vector outerPtrL(nPoint + 1); + outerPtrL(0) = 0; + for (auto i = 0ul; i < nPoint; ++i) outerPtrL(i + 1) = outerPtrL(i) + static_cast(diagPtr[i] - outerPtr[i]); + + su2vector innerIdxL(outerPtrL(nPoint)); + Index_t k = 0; + for (auto i = 0ul; i < nPoint; ++i) + for (auto p = outerPtr[i]; p < diagPtr[i]; ++p) innerIdxL(k++) = innerIdx[p]; - CEdgeToNonZeroMap edgeMap(geometry.GetnEdge(), 2); + return CCompressedSparsePattern(std::move(outerPtrL), std::move(innerIdxL)); +} + +/*! + * \brief Extract the strictly-upper part of a symmetric compressed sparse pattern. + * For each row i, the upper entries are those at positions (diagPtr[i], outerPtr[i+1]). + * \param[in] csr - Full symmetric pattern with diagonal pointer already built. + * \return Strictly-upper CSR pattern. + */ +template +CCompressedSparsePattern buildUpperPattern(const CCompressedSparsePattern& csr) { + assert(!csr.empty()); + const auto nPoint = csr.getOuterSize(); + const auto* outerPtr = csr.outerPtr(); + const auto* innerIdx = csr.innerIdx(); + const auto* diagPtr = csr.diagPtr(); + + su2vector outerPtrU(nPoint + 1); + outerPtrU(0) = 0; + for (auto i = 0ul; i < nPoint; ++i) + outerPtrU(i + 1) = outerPtrU(i) + static_cast(outerPtr[i + 1] - diagPtr[i] - 1); + + su2vector innerIdxU(outerPtrU(nPoint)); + Index_t k = 0; + for (auto i = 0ul; i < nPoint; ++i) + for (auto p = diagPtr[i] + 1; p < outerPtr[i + 1]; ++p) innerIdxU(k++) = innerIdx[p]; - for (Index_t iEdge = 0; iEdge < geometry.GetnEdge(); ++iEdge) { - Index_t iPoint = geometry.edges->GetNode(iEdge, 0); - Index_t jPoint = geometry.edges->GetNode(iEdge, 1); + return CCompressedSparsePattern(std::move(outerPtrU), std::move(innerIdxU)); +} - edgeMap(iEdge, 0) = pattern.quickFindInnerIdx(iPoint, jPoint); - edgeMap(iEdge, 1) = pattern.quickFindInnerIdx(jPoint, iPoint); +/*! + * \brief Build bijective maps between strictly-lower (L) and strictly-upper (U) non-zero entries + * that are each other's transposes. Requires a symmetric pattern. + * l_to_u[k_l] = k_u such that U-entry k_u is the transpose of L-entry k_l, and vice-versa. + * \param[in] pattern_l - Strictly-lower CSR pattern. + * \param[in] pattern_u - Strictly-upper CSR pattern. + * \param[out] l_to_u - For each L-entry index, the U-entry index of its transpose. + * \param[out] u_to_l - For each U-entry index, the L-entry index of its transpose. + */ +template +void buildLUTransposeMaps(const CCompressedSparsePattern& pattern_l, + const CCompressedSparsePattern& pattern_u, su2vector& l_to_u, + su2vector& u_to_l) { + const auto nnz_l = pattern_l.getNumNonZeros(); + const auto nnz_u = pattern_u.getNumNonZeros(); + assert(nnz_l == nnz_u && "L and U must have the same NNZ (symmetric pattern)."); + + l_to_u.resize(nnz_l); + u_to_l.resize(nnz_u); + + for (Index_t i = 0; i < pattern_l.getOuterSize(); ++i) { + for (Index_t k_l = pattern_l.outerPtr()[i]; k_l < pattern_l.outerPtr()[i + 1]; ++k_l) { + const Index_t j = pattern_l.innerIdx()[k_l]; // j < i (strictly lower) + const Index_t k_u = pattern_u.quickFindInnerIdx(j, i); // (j,i) is in U since jempty()) { - *pattern = buildCSRPattern(*this, type, fillLvl); - pattern->buildDiagPtr(); + auto& grp = fillLvl == 0 ? (fvm ? finiteVolumePatternFill0 : finiteElementPatternFill0) + : (fvm ? finiteVolumePatternFillN : finiteElementPatternFillN); + if (grp.empty()) { + grp.csr = buildCSRPattern(*this, type, fillLvl); + grp.csr.buildDiagPtr(); + grp.l = buildLowerPattern(grp.csr); + grp.u = buildUpperPattern(grp.csr); } - - return *pattern; + return grp; } -const CEdgeToNonZeroMapUL& CGeometry::GetEdgeToSparsePatternMap() { - if (edgeToCSRMap.empty()) { - if (finiteVolumeCSRFill0.empty()) { - finiteVolumeCSRFill0 = buildCSRPattern(*this, ConnectivityType::FiniteVolume, 0ul); - } - edgeToCSRMap = mapEdgesToSparsePattern(*this, finiteVolumeCSRFill0); +const su2vector& CGeometry::GetLToUTransposeSparsePatternMap(ConnectivityType type) { + bool fvm = (type == ConnectivityType::FiniteVolume); + auto& l_to_u = fvm ? finiteVolumeLToUTranspMap : finiteElementLToUTranspMap; + if (l_to_u.empty()) { + auto& u_to_l = fvm ? finiteVolumeUToLTranspMap : finiteElementUToLTranspMap; + const auto& pat = GetSparsePattern(type); + buildLUTransposeMaps(pat.l, pat.u, l_to_u, u_to_l); } - return edgeToCSRMap; + return l_to_u; } -const su2vector& CGeometry::GetTransposeSparsePatternMap(ConnectivityType type) { - /*--- Yes the const cast is weird but it is still better than repeating code. ---*/ - auto& pattern = const_cast(GetSparsePattern(type)); - pattern.buildTransposePtr(); - return pattern.transposePtr(); +const su2vector& CGeometry::GetUToLTransposeSparsePatternMap(ConnectivityType type) { + bool fvm = (type == ConnectivityType::FiniteVolume); + auto& u_to_l = fvm ? finiteVolumeUToLTranspMap : finiteElementUToLTranspMap; + if (u_to_l.empty()) { + auto& l_to_u = fvm ? finiteVolumeLToUTranspMap : finiteElementLToUTranspMap; + const auto& pat = GetSparsePattern(type); + buildLUTransposeMaps(pat.l, pat.u, l_to_u, u_to_l); + } + return u_to_l; } const CCompressedSparsePatternUL& CGeometry::GetEdgeColoring(su2double* efficiency, bool maximizeEdgeColorGroupSize) { diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp index 347a633e2a7..684da742b13 100644 --- a/Common/src/geometry/CMultiGridGeometry.cpp +++ b/Common/src/geometry/CMultiGridGeometry.cpp @@ -896,6 +896,9 @@ void CMultiGridGeometry::SetPoint_Connectivity(const CGeometry* fine_grid) { } } + /*--- See CPhysicalGeometry::SetPoint_Connectivity for why we sort. ---*/ + sort(points[iCoarsePoint].begin(), points[iCoarsePoint].end()); + /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel ---*/ nodes->SetnNeighbor(iCoarsePoint, points[iCoarsePoint].size()); diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 79421eded3e..a965b248b67 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -4463,6 +4463,11 @@ void CPhysicalGeometry::SetPoint_Connectivity() { } } + /*--- Sort the neighbors in ascending order so that the edge numbering done in + * SetEdges matches the upper-CSR ordering of the sparse pattern. This makes + * the edge->upper-block map the identity for the CSysMatrix LDU storage. ---*/ + sort(points[iPoint].begin(), points[iPoint].end()); + /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel. ---*/ nodes->SetnNeighbor(iPoint, points[iPoint].size()); } diff --git a/Common/src/linear_algebra/CPastixWrapper.cpp b/Common/src/linear_algebra/CPastixWrapper.cpp index f4db581a879..f213939eaf6 100644 --- a/Common/src/linear_algebra/CPastixWrapper.cpp +++ b/Common/src/linear_algebra/CPastixWrapper.cpp @@ -41,7 +41,7 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* if (isinitialized) return; // only need to do this once const unsigned long nVar = matrix.nVar, nPoint = matrix.nPoint, nPointDomain = matrix.nPointDomain; - const unsigned long *row_ptr = matrix.rowptr, *col_ind = matrix.colidx; + const unsigned long *row_ptr = csr_row_ptr.data(), *col_ind = csr_col_ind.data(); const unsigned long nNonZero = row_ptr[nPointDomain]; /*--- Allocate ---*/ @@ -209,6 +209,22 @@ void CPastixWrapper::Initialize(CGeometry* geometry, const CConfig* isinitialized = true; } +template +void CPastixWrapper::AssembleValues() { + const auto nDomain = matrix.nPointDomain; + const auto blkSz = matrix.blkSz; + const auto *d = matrix.d, *l = matrix.l, *u = matrix.u; + for (auto iPoint = 0ul; iPoint < nDomain; ++iPoint) { + auto* dst = values.data() + csr_row_ptr[iPoint] * blkSz; + for (auto k = matrix.row_ptr_l[iPoint]; k < matrix.row_ptr_l[iPoint + 1]; ++k, dst += blkSz) + for (auto b = 0ul; b < blkSz; ++b) dst[b] = SU2_TYPE::GetValue(l[k * blkSz + b]); + for (auto b = 0ul; b < blkSz; ++b) dst[b] = SU2_TYPE::GetValue(d[iPoint * blkSz + b]); + dst += blkSz; + for (auto k = matrix.row_ptr_u[iPoint]; k < matrix.row_ptr_u[iPoint + 1]; ++k, dst += blkSz) + for (auto b = 0ul; b < blkSz; ++b) dst[b] = SU2_TYPE::GetValue(u[k * blkSz + b]); + } +} + template void CPastixWrapper::Factorize(CGeometry* geometry, const CConfig* config, unsigned short kind_fact) { /*--- Detect a possible change of settings between direct and adjoint that requires a reset ---*/ @@ -246,28 +262,30 @@ void CPastixWrapper::Factorize(CGeometry* geometry, const CConfig* c if (isfactorized && !factorize) return; // No - /*--- Yes ---*/ + /*--- Yes: assemble LDU blocks into the flat CSR buffer ---*/ + AssembleValues(); if (mpi_rank == MASTER_NODE && verb > 0) { cout << "\n+-------------------------------------------------+"; cout << "\n+ PaStiX : Parallel Sparse matriX package +" << endl; } - const unsigned long szBlk = matrix.nVar * matrix.nVar, nNonZero = values.size(); + const auto blkSz = matrix.blkSz; - /*--- Copy matrix values and swap blocks as required ---*/ - - for (auto i = 0ul; i < nNonZero; ++i) values[i] = SU2_TYPE::GetValue(matrix.values[i]); + /*--- Permute blocks for rows with halo columns into global sorted order. + AssembleValues wrote them in LDU order; copy to tmp then write back sorted. ---*/ + vector tmp; for (auto i = 0ul; i < sort_rows.size(); ++i) { const auto iRow = sort_rows[i]; - const auto begin = matrix.rowptr[iRow]; - - for (auto j = 0ul; j < sort_order[i].size(); ++j) { - const auto target = (begin + j) * szBlk; - const auto source = sort_order[i][j] * szBlk; - - for (auto k = 0ul; k < szBlk; ++k) values[target + k] = SU2_TYPE::GetValue(matrix.values[source + k]); + /*--- colptr is 1-based Fortran numbering: row start = colptr[iRow] - 1. ---*/ + const auto begin = static_cast(colptr[iRow] - 1); + const auto nnz_row = sort_order[i].size(); + + tmp.assign(values.begin() + begin * blkSz, values.begin() + (begin + nnz_row) * blkSz); + for (auto j = 0ul; j < nnz_row; ++j) { + const auto src_pos = sort_order[i][j] - begin; + for (auto k = 0ul; k < blkSz; ++k) values[(begin + j) * blkSz + k] = tmp[src_pos * blkSz + k]; } } diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index e90f9e1c704..f02d908a059 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -31,6 +31,7 @@ #include "../../include/toolboxes/allocation_toolbox.hpp" #include +#include namespace { /*--- Helper function to regularize small pivots ---*/ @@ -51,21 +52,36 @@ CSysMatrix::CSysMatrix() : rank(SU2_MPI::GetRank()), size(SU2_MPI::G SU2_ZONE_SCOPED nPoint = nPointDomain = nVar = nEqn = 0; - nnz = nnz_ilu = 0; + mat.nnz_l = mat.nnz_u = 0; + gpu.nnz_l = gpu.nnz_u = 0; + ilu.nnz_l = ilu.nnz_u = 0; ilu_fill_in = 0; omp_partitions = nullptr; - matrix = nullptr; - row_ptr = nullptr; - dia_ptr = nullptr; - col_ind = nullptr; - col_ptr = nullptr; - - ILU_matrix = nullptr; - row_ptr_ilu = nullptr; - dia_ptr_ilu = nullptr; - col_ind_ilu = nullptr; + mat.row_ptr_l = nullptr; + mat.col_ind_l = nullptr; + mat.row_ptr_u = nullptr; + mat.col_ind_u = nullptr; + l_to_u_transp = nullptr; + u_to_l_transp = nullptr; + edge_ptr_l = nullptr; + + mat.d = nullptr; + mat.l = nullptr; + mat.u = nullptr; + + gpu.d = nullptr; + gpu.l = nullptr; + gpu.u = nullptr; + gpu.row_ptr_l = nullptr; + gpu.col_ind_l = nullptr; + gpu.row_ptr_u = nullptr; + gpu.col_ind_u = nullptr; + + ilu.l = nullptr; + ilu.d = nullptr; + ilu.u = nullptr; invM = nullptr; @@ -82,14 +98,22 @@ CSysMatrix::~CSysMatrix() { SU2_ZONE_SCOPED delete[] omp_partitions; - MemoryAllocation::aligned_free(ILU_matrix); - MemoryAllocation::aligned_free(matrix); + MemoryAllocation::aligned_free(ilu.l); + MemoryAllocation::aligned_free(ilu.d); + MemoryAllocation::aligned_free(ilu.u); + MemoryAllocation::aligned_free(mat.d); + MemoryAllocation::aligned_free(mat.l); + MemoryAllocation::aligned_free(mat.u); MemoryAllocation::aligned_free(invM); if (useCuda) { - GPUMemoryAllocation::gpu_free(d_matrix); - GPUMemoryAllocation::gpu_free(d_row_ptr); - GPUMemoryAllocation::gpu_free(d_col_ind); + GPUMemoryAllocation::gpu_free(gpu.d); + GPUMemoryAllocation::gpu_free(gpu.l); + GPUMemoryAllocation::gpu_free(gpu.u); + GPUMemoryAllocation::gpu_free(gpu.row_ptr_l); + GPUMemoryAllocation::gpu_free(gpu.col_ind_l); + GPUMemoryAllocation::gpu_free(gpu.row_ptr_u); + GPUMemoryAllocation::gpu_free(gpu.col_ind_u); } #ifdef USE_MKL @@ -109,7 +133,7 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi if (npoint == 0) return; - if (matrix != nullptr) { + if (mat.d != nullptr) { SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION); } @@ -146,45 +170,49 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi nPoint = npoint; nPointDomain = npointdomain; - /*--- Get sparse structure pointers from geometry, - * the data is managed by CGeometry to allow re-use. ---*/ - - const auto& csr = geometry->GetSparsePattern(type, 0); - - nnz = csr.getNumNonZeros(); - row_ptr = csr.outerPtr(); - col_ind = csr.innerIdx(); - dia_ptr = csr.diagPtr(); - - /*--- Allocate data. ---*/ + /*--- Allocate host data. ---*/ auto allocAndInit = [](ScalarType*& ptr, unsigned long num) { ptr = MemoryAllocation::aligned_alloc(64, num * sizeof(ScalarType)); }; - allocAndInit(matrix, nnz * nVar * nEqn); - useCuda = config->GetCUDA(); + /*--- L/D/U index structures and value arrays. ---*/ + { + const auto& pat = geometry->GetSparsePattern(type, 0); + mat.row_ptr_l = pat.l.outerPtr(); + mat.col_ind_l = pat.l.innerIdx(); + mat.nnz_l = pat.l.getNumNonZeros(); + mat.row_ptr_u = pat.u.outerPtr(); + mat.col_ind_u = pat.u.innerIdx(); + mat.nnz_u = pat.u.getNumNonZeros(); + } + allocAndInit(mat.d, nPoint * nVar * nEqn); + allocAndInit(mat.l, mat.nnz_l * nVar * nEqn); + allocAndInit(mat.u, mat.nnz_u * nVar * nEqn); + if (useCuda) { - /*--- Allocate GPU data. ---*/ auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) { ptr = GPUMemoryAllocation::gpu_alloc(num * sizeof(ScalarType)); }; - - auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) { - ptr = GPUMemoryAllocation::gpu_alloc_cpy(src_ptr, num * sizeof(const unsigned long)); + auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long* src_ptr, unsigned long num) { + ptr = GPUMemoryAllocation::gpu_alloc_cpy(src_ptr, num * sizeof(unsigned long)); }; - - GPUAllocAndInit(d_matrix, nnz * nVar * nEqn); - GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1)); - GPUAllocAndCopy(d_col_ind, col_ind, nnz); + GPUAllocAndInit(gpu.d, nPoint * nVar * nEqn); + GPUAllocAndInit(gpu.l, mat.nnz_l * nVar * nEqn); + GPUAllocAndInit(gpu.u, mat.nnz_u * nVar * nEqn); + GPUAllocAndCopy(gpu.row_ptr_l, mat.row_ptr_l, nPointDomain + 1); + GPUAllocAndCopy(gpu.col_ind_l, mat.col_ind_l, mat.nnz_l); + GPUAllocAndCopy(gpu.row_ptr_u, mat.row_ptr_u, nPointDomain + 1); + GPUAllocAndCopy(gpu.col_ind_u, mat.col_ind_u, mat.nnz_u); } - if (needTranspPtr) col_ptr = geometry->GetTransposeSparsePatternMap(type).data(); - if (type == ConnectivityType::FiniteVolume) { - edge_ptr.ptr = geometry->GetEdgeToSparsePatternMap().data(); - edge_ptr.nEdge = geometry->GetnEdge(); + edge_ptr_l = geometry->GetUToLTransposeSparsePatternMap(type).data(); + } + if (needTranspPtr) { + l_to_u_transp = geometry->GetLToUTransposeSparsePatternMap(type).data(); + u_to_l_transp = geometry->GetUToLTransposeSparsePatternMap(type).data(); } /*--- Get ILU sparse pattern, if fill is 0 no new data is allocated. --*/ @@ -192,21 +220,26 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi if (ilu_needed) { ilu_fill_in = config->GetLinear_Solver_ILU_n(); - const auto& csr_ilu = geometry->GetSparsePattern(type, ilu_fill_in); - - row_ptr_ilu = csr_ilu.outerPtr(); - col_ind_ilu = csr_ilu.innerIdx(); - dia_ptr_ilu = csr_ilu.diagPtr(); - nnz_ilu = csr_ilu.getNumNonZeros(); + const auto& pat_ilu = geometry->GetSparsePattern(type, ilu_fill_in); + ilu.row_ptr_l = pat_ilu.l.outerPtr(); + ilu.col_ind_l = pat_ilu.l.innerIdx(); + ilu.nnz_l = pat_ilu.l.getNumNonZeros(); + ilu.row_ptr_u = pat_ilu.u.outerPtr(); + ilu.col_ind_u = pat_ilu.u.innerIdx(); + ilu.nnz_u = pat_ilu.u.getNumNonZeros(); if (omp_get_max_threads() > 1 && config->GetLinear_Solver_ILU_levels()) { - levels_ilu = computeLevels(csr_ilu); + levels_ilu = computeLevels(pat_ilu.l); } } /*--- Preconditioners. ---*/ - if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu * nVar * nEqn); + if (ilu_needed) { + allocAndInit(ilu.l, ilu.nnz_l * nVar * nEqn); + allocAndInit(ilu.d, nPointDomain * nVar * nEqn); + allocAndInit(ilu.u, ilu.nnz_u * nVar * nEqn); + } if (diag_needed) allocAndInit(invM, nPointDomain * nVar * nEqn); @@ -216,7 +249,7 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi /*--- Set suitable chunk sizes for light static for loops, and heavy dynamic ones, such that threads are approximately evenly loaded. ---*/ - omp_light_size = computeStaticChunkSize(nnz * nVar * nEqn, num_threads, OMP_MAX_SIZE_L); + omp_light_size = computeStaticChunkSize((mat.nnz_l + mat.nnz_u + nPoint) * nVar * nEqn, num_threads, OMP_MAX_SIZE_L); omp_heavy_size = computeStaticChunkSize(nPointDomain, num_threads, OMP_MAX_SIZE_H); omp_num_parts = config->GetLinear_Solver_Prec_Threads(); @@ -228,13 +261,16 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi /*--- Work estimate based on non-zeros to produce balanced partitions. ---*/ - const auto row_ptr_prec = ilu_needed ? row_ptr_ilu : row_ptr; - const auto nnz_prec = row_ptr_prec[nPointDomain]; - + /*--- Cumulative nnz up to row iPoint for the preconditioner's LDU pattern. ---*/ + auto nnz_up_to = [&](unsigned long iPoint) -> unsigned long { + if (ilu_needed) return ilu.row_ptr_l[iPoint] + iPoint + ilu.row_ptr_u[iPoint]; + return mat.row_ptr_l[iPoint] + iPoint + mat.row_ptr_u[iPoint]; + }; + const auto nnz_prec = nnz_up_to(nPointDomain); const auto nnz_per_part = roundUpDiv(nnz_prec, omp_num_parts); for (auto iPoint = 0ul, part = 0ul; iPoint < nPointDomain; ++iPoint) { - if (row_ptr_prec[iPoint] >= part * nnz_per_part) omp_partitions[part++] = iPoint; + if (nnz_up_to(iPoint) >= part * nnz_per_part) omp_partitions[part++] = iPoint; } for (unsigned long thread = 0; thread < omp_num_parts; ++thread) { @@ -505,11 +541,18 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry* geometry, const CCon template void CSysMatrix::SetValZero() { SU2_ZONE_SCOPED - const auto size = nnz * nVar * nEqn; - const auto chunk = roundUpDiv(size, omp_get_num_threads()); - const auto begin = chunk * omp_get_thread_num(); - const auto mySize = min(chunk, size - begin) * sizeof(ScalarType); - memset(&matrix[begin], 0, mySize); + const auto nThreads = static_cast(omp_get_num_threads()); + const auto iThread = static_cast(omp_get_thread_num()); + auto zeroChunk = [&](ScalarType* arr, unsigned long n) { + if (n == 0) return; + const auto chunk = roundUpDiv(n, nThreads); + const auto begin = min(chunk * iThread, n); + const auto mySize = min(chunk, n - begin) * sizeof(ScalarType); + if (mySize) memset(&arr[begin], 0, mySize); + }; + zeroChunk(mat.d, nPoint * nVar * nEqn); + zeroChunk(mat.l, mat.nnz_l * nVar * nEqn); + zeroChunk(mat.u, mat.nnz_u * nVar * nEqn); SU2_OMP_BARRIER } @@ -517,8 +560,7 @@ template void CSysMatrix::SetValDiagonalZero() { SU2_ZONE_SCOPED SU2_OMP_FOR_STAT(omp_heavy_size) - for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint) - for (auto index = 0ul; index < nVar * nEqn; ++index) matrix[dia_ptr[iPoint] * nVar * nEqn + index] = 0.0; + for (auto iVar = 0ul; iVar < nPointDomain * nVar * nEqn; ++iVar) mat.d[iVar] = 0; END_SU2_OMP_FOR } @@ -631,12 +673,13 @@ void CSysMatrix::MatrixInverse(ScalarType* matrix, ScalarType* inver template void CSysMatrix::DeleteValsRowi(unsigned long block_i, unsigned long row) { SU2_ZONE_SCOPED - for (auto index = row_ptr[block_i]; index < row_ptr[block_i + 1]; index++) { - for (auto iVar = 0u; iVar < nVar; iVar++) - matrix[index * nVar * nVar + row * nVar + iVar] = 0.0; // Delete row values in the block - if (col_ind[index] == block_i) - matrix[index * nVar * nVar + row * nVar + row] = 1.0; // Set 1 to the diagonal element - } + for (auto k = mat.row_ptr_l[block_i]; k < mat.row_ptr_l[block_i + 1]; ++k) + for (auto iVar = 0u; iVar < nVar; iVar++) mat.l[k * nVar * nEqn + row * nEqn + iVar] = 0.0; + auto* d = &mat.d[block_i * nVar * nEqn]; + for (auto iVar = 0u; iVar < nVar; iVar++) d[row * nEqn + iVar] = 0.0; + d[row * nEqn + row] = 1.0; + for (auto k = mat.row_ptr_u[block_i]; k < mat.row_ptr_u[block_i + 1]; ++k) + for (auto iVar = 0u; iVar < nVar; iVar++) mat.u[k * nVar * nEqn + row * nEqn + iVar] = 0.0; } template @@ -700,49 +743,53 @@ void CSysMatrix::ComputeJacobiPreconditioner(const CSysVector void CSysMatrix::BuildILUPreconditioner() { + SU2_ZONE_SCOPED const auto blockSize = nVar * nVar; ScalarType Lij[MAXNVAR * MAXNVAR], Lij_Ujk[MAXNVAR * MAXNVAR]; /*--- Helper to copy block matrix to compute factorization in-place. ---*/ auto InitIluRow = [&](const auto iPoint) { + MatrixCopy(&mat.d[iPoint * blockSize], &ilu.d[iPoint * blockSize]); + if (ilu_fill_in == 0) { - /*--- ILU0, direct copy to initialize. ---*/ - const auto begin = row_ptr_ilu[iPoint] * blockSize; - const auto end = row_ptr_ilu[iPoint + 1] * blockSize; - SU2_OMP_SIMD - for (unsigned long k = begin; k < end; ++k) ILU_matrix[k] = matrix[k]; + /*--- ILU0: Same sparse pattern, copy L and U blocks directly. ---*/ + auto copy = [&](const unsigned long* row_ptr, const ScalarType* mat, ScalarType* ilu) { + const unsigned long begin = row_ptr[iPoint] * blockSize; + const unsigned long end = row_ptr[iPoint + 1] * blockSize; + SU2_OMP_SIMD + for (auto k = begin; k < end; ++k) ilu[k] = mat[k]; + }; + copy(ilu.row_ptr_l, mat.l, ilu.l); + copy(ilu.row_ptr_u, mat.u, ilu.u); return; } - /*--- ILUn, clear or copy the entries of the matrix. ---*/ - auto indexMat = row_ptr[iPoint]; - const auto endMat = row_ptr[iPoint + 1]; - for (auto index = row_ptr_ilu[iPoint]; index < row_ptr_ilu[iPoint + 1];) { - const auto jPoint = col_ind_ilu[index]; - const auto jPointMat = col_ind[indexMat]; - if (jPoint < jPointMat || indexMat == endMat) { - /*--- ILU column has not caught up with matrix column or all matrix columns were used. ---*/ - ZeroMatrix(&ILU_matrix[index * blockSize]); - ++index; - } else { - /*--- Columns match, copy the matrix block. ---*/ - if (jPoint == jPointMat) { - MatrixCopy(&matrix[indexMat * blockSize], &ILU_matrix[index * blockSize]); - ++index; + /*--- ILUn: Merge-scan L and U via shared lambda. ---*/ + auto scatterPart = [&](const unsigned long* mat_row_ptr, const unsigned long* mat_col_ind, + const ScalarType* mat_vals, const unsigned long* ilu_row_ptr, + const unsigned long* ilu_col_ind, ScalarType* ilu_vals) { + auto km = mat_row_ptr[iPoint], km_end = mat_row_ptr[iPoint + 1]; + for (auto k = ilu_row_ptr[iPoint]; k < ilu_row_ptr[iPoint + 1]; ++k) { + const auto jPoint = ilu_col_ind[k]; + while (km < km_end && mat_col_ind[km] < jPoint) ++km; + if (km < km_end && mat_col_ind[km] == jPoint) { + MatrixCopy(&mat_vals[km * blockSize], &ilu_vals[k * blockSize]); + } else { + ZeroMatrix(&ilu_vals[k * blockSize]); } - /*--- We've either copied the matrix column or it has not caught up with the ILU column. ---*/ - ++indexMat; } - } + }; + scatterPart(mat.row_ptr_l, mat.col_ind_l, mat.l, ilu.row_ptr_l, ilu.col_ind_l, ilu.l); + scatterPart(mat.row_ptr_u, mat.col_ind_u, mat.u, ilu.row_ptr_u, ilu.col_ind_u, ilu.u); }; /*--- Update one row of the LU matrix. ---*/ auto BuildIluRow = [&](const auto iPoint, const auto begin, const auto end) { /*--- For this row (unknown), loop over its lower diagonal entries. ---*/ - for (auto index = row_ptr_ilu[iPoint]; index < dia_ptr_ilu[iPoint]; ++index) { + for (auto kl = ilu.row_ptr_l[iPoint]; kl < ilu.row_ptr_l[iPoint + 1]; ++kl) { /*--- jPoint is the column index (jPoint < iPoint). ---*/ - const auto jPoint = col_ind_ilu[index]; + const auto jPoint = ilu.col_ind_l[kl]; /*--- We only care about the sub matrix within "begin" and "end-1". ---*/ @@ -750,16 +797,16 @@ void CSysMatrix::BuildILUPreconditioner() { /*--- Multiply the block by the inverse of the corresponding diagonal block. ---*/ - auto* Block_ij = &ILU_matrix[index * blockSize]; - const auto* invUjj = &ILU_matrix[dia_ptr_ilu[jPoint] * blockSize]; + auto* Block_ij = &ilu.l[kl * blockSize]; + const auto* invUjj = &ilu.d[jPoint * blockSize]; MatrixMatrixProduct(Block_ij, invUjj, Lij); /*--- Lij holds Aij*inv(Ujj). Jump to the upper part of the jPoint row. ---*/ - for (auto index_ = dia_ptr_ilu[jPoint] + 1; index_ < row_ptr_ilu[jPoint + 1]; ++index_) { + for (auto ku = ilu.row_ptr_u[jPoint]; ku < ilu.row_ptr_u[jPoint + 1]; ++ku) { /*--- Get the column index (kPoint > jPoint). ---*/ - const auto kPoint = col_ind_ilu[index_]; + const auto kPoint = ilu.col_ind_u[ku]; if (kPoint >= end) break; /*--- If Aik exists, update it: Aik -= Lij * Ujk ---*/ @@ -767,7 +814,7 @@ void CSysMatrix::BuildILUPreconditioner() { auto* Block_ik = GetBlock_ILUMatrix(iPoint, kPoint); if (Block_ik == nullptr) continue; - const auto* Ujk = &ILU_matrix[index_ * blockSize]; + const auto* Ujk = &ilu.u[ku * blockSize]; MatrixMatrixProduct(Lij, Ujk, Lij_Ujk); MatrixSubtraction(Block_ik, Lij_Ujk, Block_ik); } @@ -827,6 +874,7 @@ void CSysMatrix::BuildILUPreconditioner() { template void CSysMatrix::ComputeILUPreconditioner(const CSysVector& vec, CSysVector& prod, CGeometry* geometry, const CConfig* config) const { + SU2_ZONE_SCOPED /*--- Coherent view of vectors. ---*/ SU2_OMP_BARRIER @@ -839,11 +887,10 @@ void CSysMatrix::ComputeILUPreconditioner(const CSysVector::ComputeILUPreconditioner(const CSysVector= end) break; - const auto* Block_ij = &ILU_matrix[index * blockSize]; + const auto* Block_ij = &ilu.u[ku * blockSize]; MatrixVectorProductSub(Block_ij, &prod[jPoint * nVar], aux_vec); } @@ -1045,14 +1092,14 @@ void CSysMatrix::ComputeLineletPreconditioner(const CSysVector::EnforceSolutionAtNode(const unsigned long node_i, c * symmetric the entire column may not be eliminated, the result (matrix and vector) is still correct. * The vector is updated with the product of column i by the known (enforced) solution at node i. ---*/ - for (auto index = row_ptr[node_i]; index < row_ptr[node_i + 1]; ++index) { - auto node_j = col_ind[index]; - - /*--- The diagonal block is handled outside the loop. ---*/ - if (node_j == node_i) continue; - - /*--- Delete block j on row i (bij) and ATTEMPT to delete block i on row j (bji). ---*/ - auto bij = &matrix[index * nVar * nVar]; + /*--- Visit off-diagonal columns (L then U; diagonal is handled by SetVal2Diag outside). ---*/ + auto processOffDiag = [&](unsigned long node_j) { + auto bij = GetBlock(node_i, node_j); auto bji = GetBlock(node_j, node_i); - - /*--- The "attempt" part. ---*/ if (bji == nullptr) { node_j = node_i; bji = bij; } - for (auto iVar = 0ul; iVar < nVar; ++iVar) { for (auto jVar = 0ul; jVar < nVar; ++jVar) { - /*--- Column product. ---*/ b[node_j * nVar + iVar] -= bji[iVar * nVar + jVar] * x_i[jVar]; - /*--- Delete blocks. ---*/ bij[iVar * nVar + jVar] = bji[iVar * nVar + jVar] = 0.0; } } - } + }; + for (auto k = mat.row_ptr_l[node_i]; k < mat.row_ptr_l[node_i + 1]; ++k) processOffDiag(mat.col_ind_l[k]); + for (auto k = mat.row_ptr_u[node_i]; k < mat.row_ptr_u[node_i + 1]; ++k) processOffDiag(mat.col_ind_u[k]); /*--- Set the diagonal block to the identity. ---*/ SetVal2Diag(node_i, 1.0); @@ -1169,54 +1208,33 @@ template void CSysMatrix::EnforceZeroProjection(unsigned long node_i, const OtherType* n, CSysVector& b) { SU2_ZONE_SCOPED - for (auto index = row_ptr[node_i]; index < row_ptr[node_i + 1]; ++index) { - const auto node_j = col_ind[index]; - - /*--- Remove product components of block j on row i (bij) and ATTEMPT - * to remove solution components of block i on row j (bji). - * This is identical to symmetry correction applied to gradients - * but extended to the entire matrix. ---*/ - - auto bij = &matrix[index * nVar * nVar]; + /*--- Visit all columns (L, diagonal, U) of row node_i. ---*/ + auto processCol = [&](unsigned long node_j, bool isDiag) { + auto bij = GetBlock(node_i, node_j); auto bji = GetBlock(node_j, node_i); - - /*--- Attempt to remove solution components. ---*/ ScalarType nbn{}; if (bji != nullptr) { for (auto iVar = 0ul; iVar < nVar; ++iVar) { ScalarType proj{}; - for (auto jVar = 0ul; jVar < nVar; ++jVar) { - proj += bji[iVar * nVar + jVar] * PassiveAssign(n[jVar]); - } - for (auto jVar = 0ul; jVar < nVar; ++jVar) { - bji[iVar * nVar + jVar] -= proj * PassiveAssign(n[jVar]); - } + for (auto jVar = 0ul; jVar < nVar; ++jVar) proj += bji[iVar * nVar + jVar] * PassiveAssign(n[jVar]); + for (auto jVar = 0ul; jVar < nVar; ++jVar) bji[iVar * nVar + jVar] -= proj * PassiveAssign(n[jVar]); nbn += proj * PassiveAssign(n[iVar]); } } - - /*--- Product components. ---*/ for (auto jVar = 0ul; jVar < nVar; ++jVar) { ScalarType proj{}; - for (auto iVar = 0ul; iVar < nVar; ++iVar) { - proj += bij[iVar * nVar + jVar] * PassiveAssign(n[iVar]); - } - for (auto iVar = 0ul; iVar < nVar; ++iVar) { - bij[iVar * nVar + jVar] -= proj * PassiveAssign(n[iVar]); - } + for (auto iVar = 0ul; iVar < nVar; ++iVar) proj += bij[iVar * nVar + jVar] * PassiveAssign(n[iVar]); + for (auto iVar = 0ul; iVar < nVar; ++iVar) bij[iVar * nVar + jVar] -= proj * PassiveAssign(n[iVar]); } - - /*--- This part doesn't have the "*2" factor because the product components - * were removed from the result of removing the solution components - * instead of from the original block (bji == bij). ---*/ - if (node_i == node_j) { - for (auto iVar = 0ul; iVar < nVar; ++iVar) { - for (auto jVar = 0ul; jVar < nVar; ++jVar) { + if (isDiag) { + for (auto iVar = 0ul; iVar < nVar; ++iVar) + for (auto jVar = 0ul; jVar < nVar; ++jVar) bij[iVar * nVar + jVar] += PassiveAssign(n[iVar]) * nbn * PassiveAssign(n[jVar]); - } - } } - } + }; + for (auto k = mat.row_ptr_l[node_i]; k < mat.row_ptr_l[node_i + 1]; ++k) processCol(mat.col_ind_l[k], false); + processCol(node_i, true); + for (auto k = mat.row_ptr_u[node_i]; k < mat.row_ptr_u[node_i + 1]; ++k) processCol(mat.col_ind_u[k], false); OtherType proj{}; for (auto iVar = 0ul; iVar < nVar; ++iVar) proj += b(node_i, iVar) * n[iVar]; @@ -1229,14 +1247,16 @@ void CSysMatrix::SetDiagonalAsColumnSum() { SU2_OMP_FOR_DYN(omp_heavy_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) { - auto block_ii = &matrix[dia_ptr[iPoint] * nVar * nEqn]; + auto* d_i = &mat.d[iPoint * nVar * nEqn]; + for (auto k = 0ul; k < nVar * nEqn; ++k) d_i[k] = 0.0; - for (auto k = 0ul; k < nVar * nEqn; ++k) block_ii[k] = 0.0; + /*--- For each L entry (iPoint, j): subtract its U-transpose (j, iPoint). ---*/ + for (auto k_l = mat.row_ptr_l[iPoint]; k_l < mat.row_ptr_l[iPoint + 1]; ++k_l) + MatrixSubtraction(d_i, &mat.u[l_to_u_transp[k_l] * nVar * nEqn], d_i); - for (auto k = row_ptr[iPoint]; k < row_ptr[iPoint + 1]; ++k) { - auto block_ji = &matrix[col_ptr[k] * nVar * nEqn]; - if (block_ji != block_ii) MatrixSubtraction(block_ii, block_ji, block_ii); - } + /*--- For each U entry (iPoint, j): subtract its L-transpose (j, iPoint). ---*/ + for (auto k_u = mat.row_ptr_u[iPoint]; k_u < mat.row_ptr_u[iPoint + 1]; ++k_u) + MatrixSubtraction(d_i, &mat.l[u_to_l_transp[k_u] * nVar * nEqn], d_i); } END_SU2_OMP_FOR } @@ -1262,38 +1282,34 @@ void CSysMatrix::TransposeInPlace() { /*--- Swap ij with ji and transpose them. ---*/ - if (edge_ptr) { - /*--- The FV way. ---*/ + if (edge_ptr_l) { + /*--- FV path: each edge maps to one U and one L block. ---*/ SU2_OMP_FOR_DYN(omp_heavy_size * 2) - for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) { - auto bij = &matrix[edge_ptr(iEdge, 0) * nVar * nVar]; - auto bji = &matrix[edge_ptr(iEdge, 1) * nVar * nVar]; - - swapAndTransp(nVar, bij, bji); + for (auto iEdge = 0ul; iEdge < mat.nnz_l; ++iEdge) { + auto* bij_u = &mat.u[iEdge * nVar * nVar]; + auto* bji_l = &mat.l[edge_ptr_l[iEdge] * nVar * nVar]; + swapAndTransp(nVar, bij_u, bji_l); } END_SU2_OMP_FOR - } else if (col_ptr) { - /*--- If the column pointer was built. ---*/ + } else if (l_to_u_transp) { + /*--- FEM/general path: use the L→U transpose map (one L entry per pair). ---*/ SU2_OMP_FOR_DYN(omp_heavy_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) { - for (auto k = row_ptr[iPoint]; k < dia_ptr[iPoint]; ++k) { - auto bij = &matrix[k * nVar * nVar]; - auto bji = &matrix[col_ptr[k] * nVar * nVar]; - - swapAndTransp(nVar, bij, bji); + for (auto k_l = mat.row_ptr_l[iPoint]; k_l < mat.row_ptr_l[iPoint + 1]; ++k_l) { + const auto k_u = l_to_u_transp[k_l]; + swapAndTransp(nVar, &mat.u[k_u * nVar * nVar], &mat.l[k_l * nVar * nVar]); } } END_SU2_OMP_FOR } else { - /*--- Slow fallback, needs to search for ji. ---*/ + /*--- Slow fallback: search for each U entry's L partner via GetBlock. ---*/ SU2_OMP_FOR_DYN(omp_heavy_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) { - for (auto k = dia_ptr[iPoint] + 1ul; k < row_ptr[iPoint + 1]; ++k) { - const auto jPoint = col_ind[k]; - auto bij = &matrix[k * nVar * nVar]; - auto bji = GetBlock(jPoint, iPoint); + for (auto k_u = mat.row_ptr_u[iPoint]; k_u < mat.row_ptr_u[iPoint + 1]; ++k_u) { + const auto jPoint = mat.col_ind_u[k_u]; + auto* bij = &mat.u[k_u * nVar * nVar]; + auto* bji = GetBlock(jPoint, iPoint); assert(bji && "Pattern is not symmetric."); - swapAndTransp(nVar, bij, bji); } } @@ -1304,7 +1320,7 @@ void CSysMatrix::TransposeInPlace() { SU2_OMP_FOR_STAT(omp_heavy_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) { - auto bii = &matrix[dia_ptr[iPoint] * nVar * nVar]; + auto bii = &mat.d[iPoint * nVar * nVar]; for (auto i = 0ul; i < nVar; ++i) for (auto j = 0ul; j < i; ++j) std::swap(bii[i * nVar + j], bii[j * nVar + i]); } @@ -1320,16 +1336,20 @@ void CSysMatrix::TransposeInPlace() { template void CSysMatrix::MatrixMatrixAddition(ScalarType alpha, const CSysMatrix& B) { SU2_ZONE_SCOPED - /*--- Check that the sparse structure is shared between the two matrices, - * comparing pointers is ok as they are obtained from CGeometry. ---*/ - bool ok = (row_ptr == B.row_ptr) && (col_ind == B.col_ind) && (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz); - - if (!ok) { - SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); - } + /*--- Check that the LDU structure is shared (pointer equality since both come from CGeometry). ---*/ + const bool ok = (mat.row_ptr_l == B.mat.row_ptr_l) && (mat.col_ind_l == B.mat.col_ind_l) && + (mat.row_ptr_u == B.mat.row_ptr_u) && (mat.col_ind_u == B.mat.col_ind_u) && (nVar == B.nVar) && + (nEqn == B.nEqn) && (nPoint == B.nPoint) && (mat.nnz_l == B.mat.nnz_l) && (mat.nnz_u == B.mat.nnz_u); + if (!ok) SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); SU2_OMP_FOR_STAT(omp_light_size) - for (auto i = 0ul; i < nnz * nVar * nEqn; ++i) matrix[i] += alpha * B.matrix[i]; + for (auto i = 0ul; i < nPoint * nVar * nEqn; ++i) mat.d[i] += alpha * B.mat.d[i]; + END_SU2_OMP_FOR + SU2_OMP_FOR_STAT(omp_light_size) + for (auto i = 0ul; i < mat.nnz_l * nVar * nEqn; ++i) mat.l[i] += alpha * B.mat.l[i]; + END_SU2_OMP_FOR + SU2_OMP_FOR_STAT(omp_light_size) + for (auto i = 0ul; i < mat.nnz_u * nVar * nEqn; ++i) mat.u[i] += alpha * B.mat.u[i]; END_SU2_OMP_FOR } @@ -1338,9 +1358,9 @@ void CSysMatrix::BuildPastixPreconditioner(CGeometry* geometry, cons unsigned short kind_fact) { SU2_ZONE_SCOPED #ifdef HAVE_PASTIX - /*--- Pastix will launch nested threads. ---*/ BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS { - pastix_wrapper.SetMatrix(nVar, nPoint, nPointDomain, row_ptr, col_ind, matrix); + pastix_wrapper.SetLDU(nVar, nPoint, nPointDomain, mat.row_ptr_l, mat.col_ind_l, mat.row_ptr_u, mat.col_ind_u, mat.d, + mat.l, mat.u); pastix_wrapper.Factorize(geometry, config, kind_fact); } END_SU2_OMP_SAFE_GLOBAL_ACCESS diff --git a/Common/src/linear_algebra/CSysMatrixGPU.cu b/Common/src/linear_algebra/CSysMatrixGPU.cu index a3f1a77ca40..1ebba30097c 100644 --- a/Common/src/linear_algebra/CSysMatrixGPU.cu +++ b/Common/src/linear_algebra/CSysMatrixGPU.cu @@ -28,112 +28,70 @@ #include "../../include/linear_algebra/CSysMatrix.hpp" #include "../../include/linear_algebra/GPUComms.cuh" -#include -#include -#include - -inline void cusparseAssert(cusparseStatus_t code, const char* file, int line, bool abort = true) { - if (code != CUSPARSE_STATUS_SUCCESS) { - fprintf(stderr, "cuSPARSEassert: %s %s %d\n", cusparseGetErrorString(code), file, line); - if (abort) exit(static_cast(code)); +/*! + * \brief Block-LDU SpMV kernel: y[iRow] = (L + D + U) * x per block-row. + * One CUDA block per block-row; threadIdx.x indexes output variable (0..nVar-1). + */ +template +__global__ void BlockLDU_SpMV_kernel(unsigned long nRows, unsigned long nVar, + const unsigned long* __restrict__ row_ptr_l, + const unsigned long* __restrict__ col_ind_l, + const ScalarType* __restrict__ mat_l, + const ScalarType* __restrict__ mat_d, + const unsigned long* __restrict__ row_ptr_u, + const unsigned long* __restrict__ col_ind_u, + const ScalarType* __restrict__ mat_u, + const ScalarType* __restrict__ x, ScalarType* __restrict__ y) { + const unsigned long iRow = blockIdx.x; + const unsigned long iVar = threadIdx.x; + if (iRow >= nRows || iVar >= nVar) return; + + ScalarType sum = 0; + /* Lower */ + for (auto k = row_ptr_l[iRow]; k < row_ptr_l[iRow + 1]; ++k) { + const auto col = col_ind_l[k]; + const ScalarType* blk = mat_l + k * nVar * nVar + iVar * nVar; + for (unsigned long jVar = 0; jVar < nVar; ++jVar) sum += blk[jVar] * x[col * nVar + jVar]; } -} - -#define cusparseErrChk(ans) \ - { \ - cusparseAssert((ans), __FILE__, __LINE__); \ + /* Diagonal */ + { + const ScalarType* blk = mat_d + iRow * nVar * nVar + iVar * nVar; + for (unsigned long jVar = 0; jVar < nVar; ++jVar) sum += blk[jVar] * x[iRow * nVar + jVar]; } - -inline cusparseIndexType_t GetCusparseIndexType() { - if constexpr (sizeof(unsigned long) == 4) { - return CUSPARSE_INDEX_32I; - } else if constexpr (sizeof(unsigned long) == 8) { - return CUSPARSE_INDEX_64I; - } else { - static_assert(sizeof(unsigned long) == 4 || sizeof(unsigned long) == 8, - "cuSPARSE BSR SpMV only supports 32-bit or 64-bit index arrays in this path."); + /* Upper */ + for (auto k = row_ptr_u[iRow]; k < row_ptr_u[iRow + 1]; ++k) { + const auto col = col_ind_u[k]; + const ScalarType* blk = mat_u + k * nVar * nVar + iVar * nVar; + for (unsigned long jVar = 0; jVar < nVar; ++jVar) sum += blk[jVar] * x[col * nVar + jVar]; } + y[iRow * nVar + iVar] = sum; } template -constexpr cudaDataType GetCudaDataType() { - if constexpr (std::is_same::value) { - return CUDA_R_32F; - } else if constexpr (std::is_same::value) { - return CUDA_R_64F; - } else { - static_assert(std::is_same::value || std::is_same::value, - "cuSPARSE BSR SpMV only supports float and double in this path."); - } -} - -template -void CSysMatrix::HtDTransfer(bool trigger) const -{ - if(trigger) gpuErrChk(cudaMemcpy((void*)(d_matrix), (void*)&matrix[0], (sizeof(ScalarType)*nnz*nVar*nEqn), cudaMemcpyHostToDevice)); +void CSysMatrix::HtDTransfer(bool trigger) const { + if (!trigger) return; + gpuErrChk(cudaMemcpy(gpu.d, mat.d, sizeof(ScalarType) * nPoint * nVar * nEqn, cudaMemcpyHostToDevice)); + gpuErrChk(cudaMemcpy(gpu.l, mat.l, sizeof(ScalarType) * mat.nnz_l * nVar * nEqn, cudaMemcpyHostToDevice)); + gpuErrChk(cudaMemcpy(gpu.u, mat.u, sizeof(ScalarType) * mat.nnz_u * nVar * nEqn, cudaMemcpyHostToDevice)); } template void CSysMatrix::GPUMatrixVectorProduct(const CSysVector& vec, CSysVector& prod, CGeometry* geometry, const CConfig* config) const { if (nVar != nEqn) { - SU2_MPI::Error("CUDA CSysMatrix matvec with cuSPARSE BSR requires square blocks.", CURRENT_FUNCTION); + SU2_MPI::Error("CUDA CSysMatrix block-LDU SpMV requires square blocks.", CURRENT_FUNCTION); } ScalarType* d_vec = vec.GetDevicePointer(); ScalarType* d_prod = prod.GetDevicePointer(); - vec.HtDTransfer(); - const auto indexType = GetCusparseIndexType(); - const auto valueType = GetCudaDataType(); - - const std::int64_t blockSize = static_cast(nVar); - - const std::int64_t brows = static_cast(nPointDomain); - const std::int64_t bcols = static_cast(nPoint); - const std::int64_t bnnz = static_cast(nnz); - - const std::int64_t xSize = static_cast(nPoint) * blockSize; - const std::int64_t ySize = static_cast(nPointDomain) * blockSize; - - const ScalarType alpha = 1.0; - const ScalarType beta = 0.0; - - cusparseHandle_t handle = nullptr; - cusparseConstSpMatDescr_t matA = nullptr; - cusparseDnVecDescr_t vecX = nullptr; - cusparseDnVecDescr_t vecY = nullptr; - - cusparseErrChk(cusparseCreate(&handle)); - - cusparseErrChk(cusparseCreateConstBsr(&matA, brows, bcols, bnnz, blockSize, blockSize, d_row_ptr, d_col_ind, d_matrix, - indexType, indexType, CUSPARSE_INDEX_BASE_ZERO, valueType, CUSPARSE_ORDER_ROW)); - - cusparseErrChk(cusparseCreateDnVec(&vecX, xSize, d_vec, valueType)); - cusparseErrChk(cusparseCreateDnVec(&vecY, ySize, d_prod, valueType)); - - size_t bufferSize = 0; - void* dBuffer = nullptr; - - cusparseErrChk(cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, - valueType, CUSPARSE_SPMV_BSR_ALG1, &bufferSize)); - - if (bufferSize > 0) { - gpuErrChk(cudaMalloc(&dBuffer, bufferSize)); - } - - cusparseErrChk(cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, valueType, - CUSPARSE_SPMV_BSR_ALG1, dBuffer)); - - if (dBuffer != nullptr) { - gpuErrChk(cudaFree(dBuffer)); - } - - cusparseErrChk(cusparseDestroyDnVec(vecY)); - cusparseErrChk(cusparseDestroyDnVec(vecX)); - cusparseErrChk(cusparseDestroySpMat(matA)); - cusparseErrChk(cusparseDestroy(handle)); + dim3 blockDim(static_cast(nVar), 1, 1); + dim3 gridDim(static_cast(nPointDomain), 1, 1); + BlockLDU_SpMV_kernel<<>>( + nPointDomain, nVar, gpu.row_ptr_l, gpu.col_ind_l, gpu.l, gpu.d, + gpu.row_ptr_u, gpu.col_ind_u, gpu.u, d_vec, d_prod); + gpuErrChk(cudaGetLastError()); prod.DtHTransfer(); } diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index cd100f311ab..aae5d9ce707 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -211,12 +211,14 @@ bool CSysSolve::ModGramSchmidt(bool shared_hsbg, int i, su2matrix::multiDot(w, i + 1, 1, w, i + 1); LinearCombination( shared_hsbg, i + 1, w, [&h_i](int k) { return -h_i(0, k); }, w[i + 1], true); - - const auto& dh_i = CSysVector::multiDot(w, i + 1, 1, w, i + 1); - LinearCombination( - shared_hsbg, i + 1, w, [&dh_i](int k) { return -dh_i(0, k); }, w[i + 1], true); - - for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k) + dh_i(0, k)); + if (i < 5) { + for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k)); + } else { + const auto& dh_i = CSysVector::multiDot(w, i + 1, 1, w, i + 1); + LinearCombination( + shared_hsbg, i + 1, w, [&dh_i](int k) { return -dh_i(0, k); }, w[i + 1], true); + for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k) + dh_i(0, k)); + } /*--- The norm of w[i+1] is 0 or NaN: the input vector from mat_vec is * zero or contains NaN. Cannot proceed with orthogonalization. ---*/ diff --git a/TestCases/hybrid_regression.py b/TestCases/hybrid_regression.py index 612afb6c50a..03d23ae1357 100644 --- a/TestCases/hybrid_regression.py +++ b/TestCases/hybrid_regression.py @@ -51,7 +51,7 @@ def main(): channel.cfg_dir = "euler/channel" channel.cfg_file = "inv_channel_RK.cfg" channel.test_iter = 20 - channel.test_vals = [-2.000215, 3.536936, 0.034212, 0.193725] + channel.test_vals = [-2.000214, 3.536937, 0.034200, 0.193725] test_list.append(channel) # NACA0012 @@ -75,7 +75,7 @@ def main(): oneram6.cfg_dir = "euler/oneram6" oneram6.cfg_file = "inv_ONERAM6.cfg" oneram6.test_iter = 10 - oneram6.test_vals = [0.280800, 0.008623] + oneram6.test_vals = [0.280803, 0.008625] test_list.append(oneram6) # Fixed CL NACA0012 @@ -83,7 +83,7 @@ def main(): fixedCL_naca0012.cfg_dir = "fixed_cl/naca0012" fixedCL_naca0012.cfg_file = "inv_NACA0012.cfg" fixedCL_naca0012.test_iter = 10 - fixedCL_naca0012.test_vals = [-4.017883, 1.513039, 0.300961, 0.019472] + fixedCL_naca0012.test_vals = [-4.010006, 1.521019, 0.300952, 0.019472] test_list.append(fixedCL_naca0012) # HYPERSONIC FLOW PAST BLUNT BODY @@ -111,7 +111,7 @@ def main(): cylinder.cfg_dir = "navierstokes/cylinder" cylinder.cfg_file = "lam_cylinder.cfg" cylinder.test_iter = 25 - cylinder.test_vals = [-8.463699, -2.981890, 0.045839, 1.659615, 0.000000] + cylinder.test_vals = [-8.500692, -3.024889, 0.037977, 1.664043, 0.000000] test_list.append(cylinder) # Laminar cylinder (low Mach correction) @@ -119,7 +119,7 @@ def main(): cylinder_lowmach.cfg_dir = "navierstokes/cylinder" cylinder_lowmach.cfg_file = "cylinder_lowmach.cfg" cylinder_lowmach.test_iter = 25 - cylinder_lowmach.test_vals = [-6.473668, -1.011938, 0.275173, 71.130747, 0.000000] + cylinder_lowmach.test_vals = [-6.476617, -1.014894, 0.594405, 70.855527, 0.000000] cylinder_lowmach.test_vals_aarch64 = [-6.830996, -1.368850, -0.143956, 73.963354, 0] test_list.append(cylinder_lowmach) @@ -128,7 +128,7 @@ def main(): poiseuille.cfg_dir = "navierstokes/poiseuille" poiseuille.cfg_file = "lam_poiseuille.cfg" poiseuille.test_iter = 10 - poiseuille.test_vals = [-5.046139, 0.652976, 0.008353, 13.735637, 0] + poiseuille.test_vals = [-5.046138, 0.652977, 0.008358, 13.735661, 0.000000] test_list.append(poiseuille) # 2D Poiseuille flow (inlet profile file) @@ -181,7 +181,7 @@ def main(): turb_flatplate.cfg_dir = "rans/flatplate" turb_flatplate.cfg_file = "turb_SA_flatplate.cfg" turb_flatplate.test_iter = 20 - turb_flatplate.test_vals = [-4.958138, -7.438030, -0.187473, 0.015060] + turb_flatplate.test_vals = [-4.958328, -7.438303, -0.187472, 0.015052] test_list.append(turb_flatplate) # ONERA M6 Wing @@ -266,7 +266,7 @@ def main(): turb_naca0012_sst_restart_mg.cfg_file = "turb_NACA0012_sst_multigrid_restart.cfg" turb_naca0012_sst_restart_mg.test_iter = 20 turb_naca0012_sst_restart_mg.ntest_vals = 5 - turb_naca0012_sst_restart_mg.test_vals = [-6.589114, -5.057151, 0.830239, -0.008808, 0.078150] + turb_naca0012_sst_restart_mg.test_vals = [-6.589092, -5.057151, 0.830239, -0.008809, 0.078148] test_list.append(turb_naca0012_sst_restart_mg) ############################# @@ -347,7 +347,7 @@ def main(): inc_euler_naca0012.cfg_dir = "incomp_euler/naca0012" inc_euler_naca0012.cfg_file = "incomp_NACA0012.cfg" inc_euler_naca0012.test_iter = 20 - inc_euler_naca0012.test_vals = [-5.846118, -4.941076, 0.519913, 0.008955] + inc_euler_naca0012.test_vals = [-5.858106, -4.937298, 0.519817, 0.008958] test_list.append(inc_euler_naca0012) # C-D nozzle with pressure inlet and mass flow outlet @@ -355,7 +355,7 @@ def main(): inc_nozzle.cfg_dir = "incomp_euler/nozzle" inc_nozzle.cfg_file = "inv_nozzle.cfg" inc_nozzle.test_iter = 20 - inc_nozzle.test_vals = [-6.171980, -5.419034, 0.009267, 0.127577] + inc_nozzle.test_vals = [-6.171116, -5.413059, 0.008862, 0.127559] test_list.append(inc_nozzle) ############################# @@ -367,7 +367,7 @@ def main(): inc_lam_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_lam_cylinder.cfg_file = "incomp_cylinder.cfg" inc_lam_cylinder.test_iter = 10 - inc_lam_cylinder.test_vals = [-4.159236, -3.569077, 0.017653, 4.934964] + inc_lam_cylinder.test_vals = [-4.161196, -3.573053, 0.025533, 4.944646] test_list.append(inc_lam_cylinder) # Buoyancy-driven cavity @@ -432,7 +432,7 @@ def main(): cavity.cfg_dir = "moving_wall/cavity" cavity.cfg_file = "lam_cavity.cfg" cavity.test_iter = 25 - cavity.test_vals = [-7.938907, -2.490199, 0.013042, 0.004995] + cavity.test_vals = [-7.927914, -2.479035, 0.012837, 0.005129] test_list.append(cavity) # Spinning cylinder @@ -440,7 +440,7 @@ def main(): spinning_cylinder.cfg_dir = "moving_wall/spinning_cylinder" spinning_cylinder.cfg_file = "spinning_cylinder.cfg" spinning_cylinder.test_iter = 25 - spinning_cylinder.test_vals = [-7.547526, -2.080576, 1.888705, 1.812327] + spinning_cylinder.test_vals = [-7.533967, -2.066687, 1.832254, 1.843017] spinning_cylinder.test_vals_aarch64 = [-8.008023, -2.611064, 1.497308, 1.487483] test_list.append(spinning_cylinder) @@ -453,7 +453,7 @@ def main(): square_cylinder.cfg_dir = "unsteady/square_cylinder" square_cylinder.cfg_file = "turb_square.cfg" square_cylinder.test_iter = 3 - square_cylinder.test_vals = [-2.560678, -1.175981, 0.062203, 1.399351, 2.219223, 1.399297, 2.217470, 0.000000] + square_cylinder.test_vals = [-2.560678, -1.175980, 0.062203, 1.399351, 2.219223, 1.399297, 2.217470, 0.000000] square_cylinder.unsteady = True test_list.append(square_cylinder) @@ -462,7 +462,7 @@ def main(): sine_gust.cfg_dir = "gust" sine_gust.cfg_file = "inv_gust_NACA0012.cfg" sine_gust.test_iter = 5 - sine_gust.test_vals = [-1.977498, 3.481817, -0.009944, -0.004266] + sine_gust.test_vals = [-1.977498, 3.481817, -0.010188, -0.004302] sine_gust.unsteady = True test_list.append(sine_gust) @@ -471,7 +471,7 @@ def main(): cosine_gust.cfg_dir = "gust" cosine_gust.cfg_file = "cosine_gust_zdir.cfg" cosine_gust.test_iter = 79 - cosine_gust.test_vals = [-2.418805, 0.002211, -0.001258, 0.000438, -0.000592] + cosine_gust.test_vals = [-2.418805, 0.002212, -0.001249, 0.000439, -0.000592] cosine_gust.unsteady = True cosine_gust.enabled_with_tsan = False test_list.append(cosine_gust) @@ -491,7 +491,7 @@ def main(): aeroelastic.cfg_dir = "aeroelastic" aeroelastic.cfg_file = "aeroelastic_NACA64A010.cfg" aeroelastic.test_iter = 2 - aeroelastic.test_vals = [-1.876626, 4.021083, 0.081436, 0.027726, -0.001638, -0.000130, -1.056269] + aeroelastic.test_vals = [-1.876631, 4.021073, 0.081270, 0.027580, -0.001642, -0.000127, -1.052231] aeroelastic.unsteady = True aeroelastic.enabled_on_cpu_arch = ["x86_64"] # Requires AVX-capable architecture test_list.append(aeroelastic) @@ -654,7 +654,7 @@ def main(): bars_SST_2D.cfg_dir = "sliding_interface/bars_SST_2D" bars_SST_2D.cfg_file = "bars.cfg" bars_SST_2D.test_iter = 13 - bars_SST_2D.test_vals = [13.000000, -0.396746, -1.461254] + bars_SST_2D.test_vals = [13.000000, -0.391567, -1.460874] bars_SST_2D.multizone = True test_list.append(bars_SST_2D) diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py index b3c9e3ad7fd..4c153f8c51e 100644 --- a/TestCases/hybrid_regression_AD.py +++ b/TestCases/hybrid_regression_AD.py @@ -86,7 +86,7 @@ def main(): discadj_rans_naca0012_sst.cfg_dir = "disc_adj_rans/naca0012" discadj_rans_naca0012_sst.cfg_file = "turb_NACA0012_sst.cfg" discadj_rans_naca0012_sst.test_iter = 10 - discadj_rans_naca0012_sst.test_vals = [-2.201517, -0.175212, 3.044200, -0.041842] + discadj_rans_naca0012_sst.test_vals = [-2.201514, -0.175212, 3.044200, -0.041842] discadj_rans_naca0012_sst.test_vals_aarch64 = [-2.201855, -0.172443, 3.043400, -0.041820] test_list.append(discadj_rans_naca0012_sst) @@ -99,7 +99,7 @@ def main(): discadj_incomp_NACA0012.cfg_dir = "disc_adj_incomp_euler/naca0012" discadj_incomp_NACA0012.cfg_file = "incomp_NACA0012_disc.cfg" discadj_incomp_NACA0012.test_iter = 20 - discadj_incomp_NACA0012.test_vals = [20.000000, -3.122291, -2.287328, 0.000000] + discadj_incomp_NACA0012.test_vals = [20.000000, -3.122052, -2.290797, 0.000000] test_list.append(discadj_incomp_NACA0012) ##################################### @@ -133,7 +133,7 @@ def main(): discadj_incomp_turb_NACA0012_sst.cfg_dir = "disc_adj_incomp_rans/naca0012" discadj_incomp_turb_NACA0012_sst.cfg_file = "turb_naca0012_sst.cfg" discadj_incomp_turb_NACA0012_sst.test_iter = 10 - discadj_incomp_turb_NACA0012_sst.test_vals = [-3.775388, -3.089117, -7.143490, 0.000000, -0.896797] + discadj_incomp_turb_NACA0012_sst.test_vals = [-3.775383, -3.089125, -7.143499, 0.000000, -0.896795] test_list.append(discadj_incomp_turb_NACA0012_sst) ####################################################### @@ -173,7 +173,7 @@ def main(): discadj_DT_1ST_cylinder.cfg_dir = "disc_adj_rans/cylinder_DT_1ST" discadj_DT_1ST_cylinder.cfg_file = "cylinder.cfg" discadj_DT_1ST_cylinder.test_iter = 9 - discadj_DT_1ST_cylinder.test_vals = [1.196347, -3.339014, -0.006212, 0.000020] + discadj_DT_1ST_cylinder.test_vals = [1.196349, -3.339012, -0.006213, 0.000020] discadj_DT_1ST_cylinder.unsteady = True discadj_DT_1ST_cylinder.enabled_with_tsan = False test_list.append(discadj_DT_1ST_cylinder) @@ -187,7 +187,7 @@ def main(): discadj_pitchingNACA0012.cfg_dir = "disc_adj_euler/naca0012_pitching" discadj_pitchingNACA0012.cfg_file = "inv_NACA0012_pitching.cfg" discadj_pitchingNACA0012.test_iter = 4 - discadj_pitchingNACA0012.test_vals = [-1.041883, -1.512874, -0.006211, 0.000012] + discadj_pitchingNACA0012.test_vals = [-1.041978, -1.513013, -0.006202, 0.000012] discadj_pitchingNACA0012.tol = 0.01 discadj_pitchingNACA0012.unsteady = True discadj_pitchingNACA0012.enabled_with_tsan = False diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index faeca39742f..6cf6ccf4fbf 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -74,7 +74,7 @@ def main(): cfd_flamelet_h2.cfg_dir = "flamelet/07_laminar_premixed_h2_flame_cfd" cfd_flamelet_h2.cfg_file = "laminar_premixed_h2_flame_cfd.cfg" cfd_flamelet_h2.test_iter = 5 - cfd_flamelet_h2.test_vals = [-8.036794, -8.372668, -1.842800, -9.388446] + cfd_flamelet_h2.test_vals = [-8.036794, -8.372668, -1.842800, -9.388447] test_list.append(cfd_flamelet_h2) # Flame ignition methods @@ -95,7 +95,7 @@ def main(): thermalbath.cfg_dir = "nonequilibrium/thermalbath/finitechemistry" thermalbath.cfg_file = "thermalbath.cfg" thermalbath.test_iter = 10 - thermalbath.test_vals = [0.945997, 0.945997, -11.860991, -11.857906, -32.000000, 10.013239] + thermalbath.test_vals = [0.945997, 0.945997, -11.860991, -11.890652, -32.000000, 10.013239] test_list.append(thermalbath) # Adiabatic thermal bath @@ -112,7 +112,7 @@ def main(): thermalbath_frozen.cfg_dir = "nonequilibrium/thermalbath/frozen" thermalbath_frozen.cfg_file = "thermalbath_frozen.cfg" thermalbath_frozen.test_iter = 10 - thermalbath_frozen.test_vals = [-32.000000, -32.000000, -11.818647, -11.857909, -32.000000, 10.013545] + thermalbath_frozen.test_vals = [-32.000000, -32.000000, -11.818647, -11.848553, -32.000000, 10.013545] test_list.append(thermalbath_frozen) # Inviscid single wedge, ausm, implicit @@ -120,7 +120,7 @@ def main(): invwedge_a.cfg_dir = "nonequilibrium/invwedge" invwedge_a.cfg_file = "invwedge_ausm.cfg" invwedge_a.test_iter = 10 - invwedge_a.test_vals = [-1.069669, -1.594432, -18.299010, -18.626399, -18.572410, 2.245727, 1.874088, 5.290291, 0.847735] + invwedge_a.test_vals = [-1.069665, -1.594428, -18.299923, -18.627315, -18.573325, 2.245732, 1.874096, 5.290295, 0.847739] invwedge_a.test_vals_aarch64 = [-1.069675, -1.594438, -18.299736, -18.627126, -18.573137, 2.245721, 1.874105, 5.290285, 0.847729] test_list.append(invwedge_a) @@ -138,7 +138,7 @@ def main(): invwedge_msw.cfg_dir = "nonequilibrium/invwedge" invwedge_msw.cfg_file = "invwedge_msw.cfg" invwedge_msw.test_iter = 10 - invwedge_msw.test_vals = [-1.212335, -1.737098, -18.299375, -18.626782, -18.572771, 2.106171, 1.651949, 5.143958, 0.704444] + invwedge_msw.test_vals = [-1.212335, -1.737098, -18.301768, -18.629149, -18.575169, 2.106171, 1.651949, 5.143958, 0.704444] invwedge_msw.test_vals_aarch64 = [-1.212335, -1.737098, -18.299279, -18.626656, -18.572683, 2.106171, 1.651949, 5.143958, 0.704444] test_list.append(invwedge_msw) @@ -147,7 +147,7 @@ def main(): invwedge_roe.cfg_dir = "nonequilibrium/invwedge" invwedge_roe.cfg_file = "invwedge_roe.cfg" invwedge_roe.test_iter = 10 - invwedge_roe.test_vals = [-1.054108, -1.578871, -17.782421, -18.111535, -18.055538, 2.264661, 1.841727, 5.302630, 0.897714] + invwedge_roe.test_vals = [-1.023216, -1.547979, -17.814656, -18.143616, -18.087775, 2.295078, 1.885054, 5.338487, 0.926120] invwedge_roe.test_vals_aarch64 = [-1.052398, -1.577160, -17.794015, -18.122997, -18.067131, 2.266042, 1.849686, 5.304700, 0.899584] test_list.append(invwedge_roe) @@ -174,7 +174,7 @@ def main(): invwedge_ss_inlet.cfg_dir = "nonequilibrium/invwedge" invwedge_ss_inlet.cfg_file = "invwedge_ss_inlet.cfg" invwedge_ss_inlet.test_iter = 10 - invwedge_ss_inlet.test_vals = [-1.068592, -1.593355, -18.250183, -18.579524, -18.523255, 2.246972, 1.874197, 5.291273, 0.848771] + invwedge_ss_inlet.test_vals = [-1.068634, -1.593397, -18.246265, -18.575529, -18.519338, 2.246925, 1.874200, 5.291234, 0.848731] invwedge_ss_inlet.test_vals_aarch64 = [-1.068592, -1.593355, -18.250183, -18.579524, -18.523255, 2.246972, 1.874197, 5.291273, 0.848771] test_list.append(invwedge_ss_inlet) @@ -183,7 +183,7 @@ def main(): visc_cone.cfg_dir = "nonequilibrium/visc_wedge" visc_cone.cfg_file = "axi_visccone.cfg" visc_cone.test_iter = 10 - visc_cone.test_vals = [-5.222269, -5.746523, -20.560216, -20.510119, -20.409089, 1.255762, -3.208384, -0.016011, 0.093461, 32619] + visc_cone.test_vals = [-5.215235, -5.739371, -20.559852, -20.509281, -20.408911, 1.262701, -3.205457, -0.015696, 0.093205, 32637.000000] visc_cone.test_vals_aarch64 = [-5.222270, -5.746525, -20.560286, -20.510152, -20.409101, 1.255758, -3.208382, -0.016014, 0.093462, 32619.000000] test_list.append(visc_cone) @@ -216,7 +216,7 @@ def main(): ion_gy.cfg_dir = "nonequilibrium/visc_cylinder" ion_gy.cfg_file = "cyl_ion_gy.cfg" ion_gy.test_iter = 10 - ion_gy.test_vals = [-11.629873, -4.165563, -4.702662, -4.950351, -5.146155, -4.993878, -6.893332, 5.990109, 5.990004, -0.014849, 0.000000, 90090.000000] + ion_gy.test_vals = [-11.629873, -4.165562, -4.702662, -4.950351, -5.146155, -4.993878, -6.893332, 5.990109, 5.990004, -0.014849, 0.000000, 90090.000000] test_list.append(ion_gy) ########################## @@ -228,7 +228,7 @@ def main(): channel.cfg_dir = "euler/channel" channel.cfg_file = "inv_channel_RK.cfg" channel.test_iter = 20 - channel.test_vals = [-1.988782, 3.546674, 0.032065, 0.194399] + channel.test_vals = [-1.990518, 3.545643, 0.031745, 0.194289] test_list.append(channel) # NACA0012 @@ -252,7 +252,7 @@ def main(): oneram6.cfg_dir = "euler/oneram6" oneram6.cfg_file = "inv_ONERAM6.cfg" oneram6.test_iter = 10 - oneram6.test_vals = [-11.530230, -11.006685, 0.280800, 0.008623] + oneram6.test_vals = [-11.534670, -11.009582, 0.280800, 0.008623] oneram6.timeout = 3200 test_list.append(oneram6) @@ -261,7 +261,7 @@ def main(): fixedCL_naca0012.cfg_dir = "fixed_cl/naca0012" fixedCL_naca0012.cfg_file = "inv_NACA0012.cfg" fixedCL_naca0012.test_iter = 10 - fixedCL_naca0012.test_vals = [-4.006904, 1.523677, 0.300969, 0.019473] + fixedCL_naca0012.test_vals = [-3.962426, 1.570772, 0.300985, 0.019475] test_list.append(fixedCL_naca0012) # Polar sweep of the inviscid NACA0012 @@ -315,7 +315,7 @@ def main(): MFR_coupling.cfg_dir = "euler/turbofan_MFR_coupling" MFR_coupling.cfg_file = "MFR_coupling.cfg" MFR_coupling.test_iter = 100 - MFR_coupling.test_vals = [-2.1124e+02, 1.5003e+02, 2.0151e+01] + MFR_coupling.test_vals = [-211.250000, 150.030000, 20.151000] test_list.append(MFR_coupling) ########################## @@ -327,7 +327,7 @@ def main(): flatplate.cfg_dir = "navierstokes/flatplate" flatplate.cfg_file = "lam_flatplate.cfg" flatplate.test_iter = 100 - flatplate.test_vals = [-6.499563, -1.020731, 0.001223, 0.028373, 2.361500, -2.333200, 0.000000, 0.000000] + flatplate.test_vals = [-6.496808, -1.017942, 0.001224, 0.028377, 2.361500, -2.333200, 0.000000, 0.000000] test_list.append(flatplate) # Custom objective function @@ -352,7 +352,7 @@ def main(): cylinder.cfg_dir = "navierstokes/cylinder" cylinder.cfg_file = "lam_cylinder.cfg" cylinder.test_iter = 25 - cylinder.test_vals = [-8.411856, -2.937460, -0.002360, 1.643164, 0.000000] + cylinder.test_vals = [-8.427810, -2.952885, -0.007651, 1.646675, 0.000000] test_list.append(cylinder) # Laminar cylinder (low Mach correction) @@ -360,7 +360,7 @@ def main(): cylinder_lowmach.cfg_dir = "navierstokes/cylinder" cylinder_lowmach.cfg_file = "cylinder_lowmach.cfg" cylinder_lowmach.test_iter = 25 - cylinder_lowmach.test_vals = [-6.469910, -1.008163, -0.381561, 78.514909, 0.000000] + cylinder_lowmach.test_vals = [-6.469226, -1.007489, -0.631813, 78.583596, 0.000000] test_list.append(cylinder_lowmach) @@ -416,7 +416,7 @@ def main(): turb_flatplate.cfg_dir = "rans/flatplate" turb_flatplate.cfg_file = "turb_SA_flatplate.cfg" turb_flatplate.test_iter = 20 - turb_flatplate.test_vals = [-4.926218, -7.441083, -0.187477, 0.015330] + turb_flatplate.test_vals = [-4.949425, -7.443679, -0.187485, 0.015345] test_list.append(turb_flatplate) # Flat plate (compressible) with species inlet @@ -424,7 +424,7 @@ def main(): turb_flatplate_species.cfg_dir = "rans/flatplate" turb_flatplate_species.cfg_file = "turb_SA_flatplate_species.cfg" turb_flatplate_species.test_iter = 20 - turb_flatplate_species.test_vals = [-4.728172, -1.517257, -2.296200, 0.801649, -3.574284, 3.000000, -0.964853, 2.000000, -1.529003, 3.000000, -0.705161, 0.999933, 0.999933] + turb_flatplate_species.test_vals = [-4.795630, -1.532152, -2.318309, 0.717570, -3.574258, 3.000000, -0.931729, 2.000000, -1.526521, 3.000000, -0.705481, 0.999934, 0.999934] test_list.append(turb_flatplate_species) # Flat plate SST compressibility correction Wilcox @@ -596,7 +596,7 @@ def main(): turb_naca0012_sst_restart_mg.cfg_file = "turb_NACA0012_sst_multigrid_restart.cfg" turb_naca0012_sst_restart_mg.test_iter = 20 turb_naca0012_sst_restart_mg.ntest_vals = 5 - turb_naca0012_sst_restart_mg.test_vals = [-6.566570, -5.057149, 0.830238, -0.008695, 0.078156] + turb_naca0012_sst_restart_mg.test_vals = [-6.566317, -5.057149, 0.830238, -0.008702, 0.078154] turb_naca0012_sst_restart_mg.timeout = 3200 turb_naca0012_sst_restart_mg.tol = 0.000001 test_list.append(turb_naca0012_sst_restart_mg) @@ -610,7 +610,7 @@ def main(): inc_euler_naca0012.cfg_dir = "incomp_euler/naca0012" inc_euler_naca0012.cfg_file = "incomp_NACA0012.cfg" inc_euler_naca0012.test_iter = 20 - inc_euler_naca0012.test_vals = [-6.058441, -5.126560, 0.525681, 0.008778] + inc_euler_naca0012.test_vals = [-6.067964, -5.125607, 0.525745, 0.008772] test_list.append(inc_euler_naca0012) # C-D nozzle with pressure inlet and mass flow outlet @@ -618,7 +618,7 @@ def main(): inc_nozzle.cfg_dir = "incomp_euler/nozzle" inc_nozzle.cfg_file = "inv_nozzle.cfg" inc_nozzle.test_iter = 20 - inc_nozzle.test_vals = [-6.058643, -5.286249, 0.002391, 0.124107] + inc_nozzle.test_vals = [-6.053549, -5.296666, 0.001330, 0.124087] test_list.append(inc_nozzle) # Laminar wall mounted cylinder, Euler walls, cylinder wall diagonally split @@ -638,7 +638,7 @@ def main(): inc_lam_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_lam_cylinder.cfg_file = "incomp_cylinder.cfg" inc_lam_cylinder.test_iter = 10 - inc_lam_cylinder.test_vals = [-4.156152, -3.554127, -0.017923, 5.101126] + inc_lam_cylinder.test_vals = [-4.156113, -3.553508, -0.024563, 5.105605] test_list.append(inc_lam_cylinder) # Laminar sphere, Re=1. Last column: Cd=24/Re @@ -646,7 +646,7 @@ def main(): inc_lam_sphere.cfg_dir = "incomp_navierstokes/sphere" inc_lam_sphere.cfg_file = "sphere.cfg" inc_lam_sphere.test_iter = 5 - inc_lam_sphere.test_vals = [-8.165744, -8.968003, 0.121003, 25.782691] + inc_lam_sphere.test_vals = [-8.190948, -8.992588, 0.121003, 25.782691] test_list.append(inc_lam_sphere) # Buoyancy-driven cavity @@ -662,7 +662,7 @@ def main(): inc_poly_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_poly_cylinder.cfg_file = "poly_cylinder.cfg" inc_poly_cylinder.test_iter = 20 - inc_poly_cylinder.test_vals = [-2.362995, 0.006829, 1.923532, -172.590000] + inc_poly_cylinder.test_vals = [-2.363389, 0.005552, 1.922971, -172.380000] test_list.append(inc_poly_cylinder) # X-coarse laminar bend as a mixed element CGNS test @@ -670,7 +670,7 @@ def main(): inc_lam_bend.cfg_dir = "incomp_navierstokes/bend" inc_lam_bend.cfg_file = "lam_bend.cfg" inc_lam_bend.test_iter = 10 - inc_lam_bend.test_vals = [-3.588863, -3.101606, -0.022302, 1.062971] + inc_lam_bend.test_vals = [-3.585943, -3.096592, -0.022111, 1.064110] test_list.append(inc_lam_bend) # 3D laminar channnel with 1 cell in flow direction, streamwise periodic @@ -849,7 +849,7 @@ def main(): contadj_naca0012.cfg_dir = "cont_adj_euler/naca0012" contadj_naca0012.cfg_file = "inv_NACA0012.cfg" contadj_naca0012.test_iter = 5 - contadj_naca0012.test_vals = [-9.526111, -15.089167, -0.726250, 0.020280] + contadj_naca0012.test_vals = [-9.527707, -15.088310, -0.726250, 0.020280] contadj_naca0012.test_vals_aarch64 = [-9.662546, -14.998818, -0.726250, 0.020280] test_list.append(contadj_naca0012) @@ -858,7 +858,7 @@ def main(): contadj_oneram6.cfg_dir = "cont_adj_euler/oneram6" contadj_oneram6.cfg_file = "inv_ONERAM6.cfg" contadj_oneram6.test_iter = 10 - contadj_oneram6.test_vals = [-12.086022, -12.648069, -1.086100, 0.007556] + contadj_oneram6.test_vals = [-12.087421, -12.650762, -1.086100, 0.007556] test_list.append(contadj_oneram6) # Inviscid WEDGE: tests averaged outflow total pressure adjoint @@ -874,7 +874,7 @@ def main(): contadj_fixed_CL_naca0012.cfg_dir = "fixed_cl/naca0012" contadj_fixed_CL_naca0012.cfg_file = "inv_NACA0012_ContAdj.cfg" contadj_fixed_CL_naca0012.test_iter = 100 - contadj_fixed_CL_naca0012.test_vals = [1.382576, -4.042295, -0.008696, 0.003238] + contadj_fixed_CL_naca0012.test_vals = [1.377921, -4.048140, -0.008264, 0.003369] test_list.append(contadj_fixed_CL_naca0012) ################################### @@ -886,7 +886,7 @@ def main(): contadj_ns_cylinder.cfg_dir = "cont_adj_navierstokes/cylinder" contadj_ns_cylinder.cfg_file = "lam_cylinder.cfg" contadj_ns_cylinder.test_iter = 20 - contadj_ns_cylinder.test_vals = [-3.632833, -9.087692, 2.056700, -0.000000] + contadj_ns_cylinder.test_vals = [-3.634298, -9.089945, 2.056700, -0.000000] test_list.append(contadj_ns_cylinder) # Adjoint laminar naca0012 subsonic @@ -930,7 +930,7 @@ def main(): contadj_rans_rae2822.cfg_dir = "cont_adj_rans/rae2822" contadj_rans_rae2822.cfg_file = "turb_SA_RAE2822.cfg" contadj_rans_rae2822.test_iter = 20 - contadj_rans_rae2822.test_vals = [-5.399744, -10.904916, -0.212470, 0.005448] + contadj_rans_rae2822.test_vals = [-5.399567, -10.904741, -0.212470, 0.005448] test_list.append(contadj_rans_rae2822) ############################# @@ -1018,7 +1018,7 @@ def main(): cavity.cfg_dir = "moving_wall/cavity" cavity.cfg_file = "lam_cavity.cfg" cavity.test_iter = 25 - cavity.test_vals = [-7.828480, -2.367075, 0.008928, 0.007370] + cavity.test_vals = [-7.838584, -2.378333, 0.009057, 0.006632] test_list.append(cavity) # Spinning cylinder @@ -1026,7 +1026,7 @@ def main(): spinning_cylinder.cfg_dir = "moving_wall/spinning_cylinder" spinning_cylinder.cfg_file = "spinning_cylinder.cfg" spinning_cylinder.test_iter = 25 - spinning_cylinder.test_vals = [-7.383987, -1.919413, 2.601945, 1.976038] + spinning_cylinder.test_vals = [-7.401660, -1.942949, 2.534412, 1.997391] test_list.append(spinning_cylinder) ###################################### @@ -1047,7 +1047,7 @@ def main(): sine_gust.cfg_dir = "gust" sine_gust.cfg_file = "inv_gust_NACA0012.cfg" sine_gust.test_iter = 5 - sine_gust.test_vals = [-1.977498, 3.481817, -0.010337, -0.004460] + sine_gust.test_vals = [-1.977498, 3.481817, -0.010210, -0.004556] sine_gust.unsteady = True test_list.append(sine_gust) @@ -1153,7 +1153,7 @@ def main(): Jones_tc_restart.cfg_dir = "turbomachinery/APU_turbocharger" Jones_tc_restart.cfg_file = "Jones_restart.cfg" Jones_tc_restart.test_iter = 5 - Jones_tc_restart.test_vals = [-7.645871, -5.849734, -15.337010, -9.825760, -13.216108, -7.752293, 73286.000000, 73286.000000, 0.020055, 82.286000] + Jones_tc_restart.test_vals = [-7.645871, -5.849734, -15.337010, -9.825759, -13.216108, -7.752293, 73286.000000, 73286.000000, 0.020055, 82.286000] test_list.append(Jones_tc_restart) # 2D axial stage @@ -1169,7 +1169,7 @@ def main(): transonic_stator_restart.cfg_dir = "turbomachinery/transonic_stator_2D" transonic_stator_restart.cfg_file = "transonic_stator_restart.cfg" transonic_stator_restart.test_iter = 20 - transonic_stator_restart.test_vals = [-4.363703, -2.480017, -2.079311, 1.731709, -1.467573, 3.229965, -471620.000000, 94.838000, -0.046906] + transonic_stator_restart.test_vals = [-4.365633, -2.482254, -2.081248, 1.729767, -1.467304, 3.231076, -471620.000000, 94.837000, -0.046836] transonic_stator_restart.test_vals_aarch64 = [-4.437809, -2.553049, -2.164729, 1.657542, -1.356823, 3.178788, -471620.000000, 94.842000, -0.040365] test_list.append(transonic_stator_restart) @@ -1191,7 +1191,7 @@ def main(): uniform_flow.cfg_dir = "sliding_interface/uniform_flow" uniform_flow.cfg_file = "uniform_NN.cfg" uniform_flow.test_iter = 5 - uniform_flow.test_vals = [5.000000, 0.000000, -0.195002, -10.624448] + uniform_flow.test_vals = [5.000000, 0.000000, -0.195001, -10.624457] uniform_flow.unsteady = True uniform_flow.multizone = True test_list.append(uniform_flow) @@ -1253,7 +1253,7 @@ def main(): bars_SST_2D.cfg_dir = "sliding_interface/bars_SST_2D" bars_SST_2D.cfg_file = "bars.cfg" bars_SST_2D.test_iter = 13 - bars_SST_2D.test_vals = [13.000000, -0.397743, -1.458724] + bars_SST_2D.test_vals = [13.000000, -0.395917, -1.480217] bars_SST_2D.multizone = True test_list.append(bars_SST_2D) @@ -1262,7 +1262,7 @@ def main(): slinc_steady.cfg_dir = "sliding_interface/incompressible_steady" slinc_steady.cfg_file = "config.cfg" slinc_steady.test_iter = 19 - slinc_steady.test_vals = [19.000000, -1.131557, -1.370471] + slinc_steady.test_vals = [19.000000, -1.129117, -1.399143] slinc_steady.timeout = 100 slinc_steady.tol = 0.00002 slinc_steady.multizone = True @@ -1297,7 +1297,7 @@ def main(): thermal_beam_3d.cfg_dir = "fea_fsi/ThermalBeam_3d" thermal_beam_3d.cfg_file = "configBeam_3d.cfg" thermal_beam_3d.test_iter = 4 - thermal_beam_3d.test_vals = [-8.147213, -7.841446, -7.904153, -13.978110, 217, -4.095071, 39, -4.072614, 136760, 75] + thermal_beam_3d.test_vals = [-8.070340, -7.802437, -7.856284, -13.978110, 217.000000, -4.047750, 39.000000, -4.072613, 136760.000000, 75.000000] test_list.append(thermal_beam_3d) # Static beam, 3d with coupled temperature, nonlinear elasticity @@ -1325,7 +1325,7 @@ def main(): linear_plane_strain.cfg_dir = "fea_fsi/VonMissesVerif" linear_plane_strain.cfg_file = "linear_plane_strain_2d.cfg" linear_plane_strain.test_iter = 0 - linear_plane_strain.test_vals = [-6.036048, -6.001905, 0, 120140, 325, -8.025024] + linear_plane_strain.test_vals = [-6.406458, -5.995503, 0, 120140, 144, -8.122248] test_list.append(linear_plane_strain) # 2D beam in plain stress with thermal expansion. This tests fixes to the 2D von Mises stress calculation, @@ -1334,7 +1334,7 @@ def main(): nonlinear_plane_stress.cfg_dir = "fea_fsi/VonMissesVerif" nonlinear_plane_stress.cfg_file = "nonlinear_plane_stress_2d.cfg" nonlinear_plane_stress.test_iter = 16 - nonlinear_plane_stress.test_vals = [-7.131557, -2.945301, -13.165302, 1.6248e+05, 32, -4.129942] + nonlinear_plane_stress.test_vals = [-6.443023, -2.221455, -11.742638, 162480, 32, -4.347864] nonlinear_plane_stress.tol = [2e-4, 2e-4, 2e-4, 1e-5, 1e-5, 4e-4] test_list.append(nonlinear_plane_stress) @@ -1363,7 +1363,7 @@ def main(): dyn_fsi.cfg_dir = "fea_fsi/dyn_fsi" dyn_fsi.cfg_file = "config.cfg" dyn_fsi.test_iter = 4 - dyn_fsi.test_vals = [-4.330741, -4.152826, 0, 97] + dyn_fsi.test_vals = [-4.330741, -4.152826, 0, 75] dyn_fsi.multizone = True dyn_fsi.unsteady = True test_list.append(dyn_fsi) @@ -1390,7 +1390,7 @@ def main(): solid_periodic_pins.cfg_dir = "solid_heat_conduction/periodic_pins" solid_periodic_pins.cfg_file = "configSolid.cfg" solid_periodic_pins.test_iter = 750 - solid_periodic_pins.test_vals = [-15.878973, -14.569206, 300.900000, 425.320000, 5.000000, -1.672670] + solid_periodic_pins.test_vals = [-15.878945, -14.569206, 300.900000, 425.320000, 5.000000, -1.672702] solid_periodic_pins.test_vals_aarch64 = [-15.879016, -14.569206, 300.900000, 425.320000, 5.000000, -1.672666] test_list.append(solid_periodic_pins) @@ -1412,7 +1412,7 @@ def main(): cht_compressible.cfg_dir = "coupled_cht/comp_2d" cht_compressible.cfg_file = "cht_2d_3cylinders.cfg" cht_compressible.test_iter = 10 - cht_compressible.test_vals = [-4.256053, -0.532725, -0.532725, -0.532726] + cht_compressible.test_vals = [-4.256051, -0.532725, -0.532724, -0.532725] cht_compressible.multizone = True test_list.append(cht_compressible) @@ -1464,7 +1464,7 @@ def main(): pywrapper_square_cylinder.cfg_dir = "unsteady/square_cylinder" pywrapper_square_cylinder.cfg_file = "turb_square.cfg" pywrapper_square_cylinder.test_iter = 10 - pywrapper_square_cylinder.test_vals = [-1.178522, -0.349773, 1.401402, 2.359089, 1.401728, 2.300969, 0.000000] + pywrapper_square_cylinder.test_vals = [-1.178521, -0.349772, 1.401402, 2.359105, 1.401729, 2.300974, 0.000000] pywrapper_square_cylinder.command = TestCase.Command("mpirun -np 2", "SU2_CFD.py", "--parallel -f") pywrapper_square_cylinder.unsteady = True test_list.append(pywrapper_square_cylinder) @@ -1474,7 +1474,7 @@ def main(): pywrapper_aeroelastic.cfg_dir = "aeroelastic" pywrapper_aeroelastic.cfg_file = "aeroelastic_NACA64A010.cfg" pywrapper_aeroelastic.test_iter = 2 - pywrapper_aeroelastic.test_vals = [-1.876633, 4.021069, 0.082654, 0.027642, -0.001643, -0.000126, -0.966946] + pywrapper_aeroelastic.test_vals = [-1.876630, 4.021073, 0.082632, 0.027609, -0.001642, -0.000127, -0.965661] pywrapper_aeroelastic.command = TestCase.Command("mpirun -np 2", "SU2_CFD.py", "--parallel -f") pywrapper_aeroelastic.unsteady = True test_list.append(pywrapper_aeroelastic) @@ -1484,7 +1484,7 @@ def main(): pywrapper_custom_fea_load.cfg_dir = "py_wrapper/custom_load_fea" pywrapper_custom_fea_load.cfg_file = "config.cfg" pywrapper_custom_fea_load.test_iter = 13 - pywrapper_custom_fea_load.test_vals = [-7.262040, -4.945686, -14.163208, 34, -6.029883, 362.230000] + pywrapper_custom_fea_load.test_vals = [-7.262040, -4.945686, -14.163208, 27.000000, -6.282188, 362.230000] pywrapper_custom_fea_load.command = TestCase.Command("mpirun -np 2", "python", "run.py") test_list.append(pywrapper_custom_fea_load) @@ -1504,7 +1504,7 @@ def main(): pywrapper_unsteadyFSI.cfg_dir = "py_wrapper/dyn_fsi" pywrapper_unsteadyFSI.cfg_file = "config.cfg" pywrapper_unsteadyFSI.test_iter = 4 - pywrapper_unsteadyFSI.test_vals = [0, 31, 5, 58, -1.756677, -2.828286, -7.638545, -6.863930, 0.000156] + pywrapper_unsteadyFSI.test_vals = [0, 31, 5, 47, -1.756677, -2.828286, -7.638545, -6.863930, 0.000156] pywrapper_unsteadyFSI.command = TestCase.Command("mpirun -np 2", "python", "run.py") pywrapper_unsteadyFSI.unsteady = True pywrapper_unsteadyFSI.multizone = True @@ -1515,7 +1515,7 @@ def main(): pywrapper_unsteadyCHT.cfg_dir = "py_wrapper/flatPlate_unsteady_CHT" pywrapper_unsteadyCHT.cfg_file = "unsteady_CHT_FlatPlate_Conf.cfg" pywrapper_unsteadyCHT.test_iter = 5 - pywrapper_unsteadyCHT.test_vals = [-1.614168, 2.259968, -0.010622, 0.171621] + pywrapper_unsteadyCHT.test_vals = [-1.614169, 2.260087, 0.018775, 0.168073] pywrapper_unsteadyCHT.command = TestCase.Command("mpirun -np 2", "python", "launch_unsteady_CHT_FlatPlate.py --parallel -f") pywrapper_unsteadyCHT.unsteady = True test_list.append(pywrapper_unsteadyCHT) @@ -1525,7 +1525,7 @@ def main(): pywrapper_rigidMotion.cfg_dir = "py_wrapper/flatPlate_rigidMotion" pywrapper_rigidMotion.cfg_file = "flatPlate_rigidMotion_Conf.cfg" pywrapper_rigidMotion.test_iter = 5 - pywrapper_rigidMotion.test_vals = [-1.614166, 2.255135, 0.350194, 0.089496] + pywrapper_rigidMotion.test_vals = [-1.614166, 2.255135, 0.350196, 0.089496] pywrapper_rigidMotion.command = TestCase.Command("mpirun -np 2", "python", "launch_flatPlate_rigidMotion.py --parallel -f") pywrapper_rigidMotion.unsteady = True test_list.append(pywrapper_rigidMotion) @@ -1597,7 +1597,7 @@ def main(): mms_fvm_inc_euler.cfg_dir = "mms/fvm_incomp_euler" mms_fvm_inc_euler.cfg_file = "inv_mms_jst.cfg" mms_fvm_inc_euler.test_iter = 20 - mms_fvm_inc_euler.test_vals = [-9.128660, -9.441806, 0.000000, 0.000000] + mms_fvm_inc_euler.test_vals = [-9.128735, -9.441757, 0.000000, 0.000000] mms_fvm_inc_euler.tol = 0.0001 test_list.append(mms_fvm_inc_euler) @@ -1743,7 +1743,7 @@ def main(): species_passive_val.cfg_dir = "species_transport/passive_transport_validation" species_passive_val.cfg_file = "passive_transport.cfg" species_passive_val.test_iter = 50 - species_passive_val.test_vals = [-16.576049, -16.349168, -16.916647, -4.257599, 10, -4.232357, 8, -5.193350, 0.186610, 0] + species_passive_val.test_vals = [-16.493002, -16.246291, -16.871574, -4.257599, 10.000000, -4.526973, 8.000000, -5.193350, 0.186610, -0.000000] species_passive_val.test_vals_aarch64 = [-16.517744, -16.282420, -16.871663, -4.257599, 10.000000, -4.278151, 8.000000, -5.193350, 0.186610, 0.000000] test_list.append(species_passive_val) diff --git a/TestCases/parallel_regression_AD.py b/TestCases/parallel_regression_AD.py index f03615fba02..c39d9aba5a8 100644 --- a/TestCases/parallel_regression_AD.py +++ b/TestCases/parallel_regression_AD.py @@ -55,7 +55,7 @@ def main(): discadj_cylinder3D.cfg_dir = "disc_adj_euler/cylinder3D" discadj_cylinder3D.cfg_file = "inv_cylinder3D.cfg" discadj_cylinder3D.test_iter = 5 - discadj_cylinder3D.test_vals = [-3.693714, -3.889422, 0.000000, 0.000000] + discadj_cylinder3D.test_vals = [-3.693793, -3.889408, 0.000000, 0.000000] test_list.append(discadj_cylinder3D) # Arina nozzle 2D @@ -63,7 +63,7 @@ def main(): discadj_arina2k.cfg_dir = "disc_adj_euler/arina2k" discadj_arina2k.cfg_file = "Arina2KRS.cfg" discadj_arina2k.test_iter = 20 - discadj_arina2k.test_vals = [-2.931649, -3.356322, 0.073332, 0.000000] + discadj_arina2k.test_vals = [-2.933011, -3.357218, 0.073338, 0.000000] test_list.append(discadj_arina2k) # Equivalent area NACA64-206 @@ -104,7 +104,7 @@ def main(): discadj_incomp_NACA0012.cfg_dir = "disc_adj_incomp_euler/naca0012" discadj_incomp_NACA0012.cfg_file = "incomp_NACA0012_disc.cfg" discadj_incomp_NACA0012.test_iter = 20 - discadj_incomp_NACA0012.test_vals = [20.000000, -3.095094, -2.322528, 0.000000] + discadj_incomp_NACA0012.test_vals = [20.000000, -3.096582, -2.323911, 0.000000] test_list.append(discadj_incomp_NACA0012) ##################################### @@ -116,7 +116,7 @@ def main(): discadj_incomp_cylinder.cfg_dir = "disc_adj_incomp_navierstokes/cylinder" discadj_incomp_cylinder.cfg_file = "heated_cylinder.cfg" discadj_incomp_cylinder.test_iter = 20 - discadj_incomp_cylinder.test_vals = [20.000000, -1.652493, -6.202452, 0.000000] + discadj_incomp_cylinder.test_vals = [20.000000, -1.658160, -6.215433, 0.000000] test_list.append(discadj_incomp_cylinder) ###################################### @@ -163,7 +163,7 @@ def main(): discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" discadj_cylinder.cfg_file = "cylinder.cfg" discadj_cylinder.test_iter = 9 - discadj_cylinder.test_vals = [1.639372, -2.834295, -0.009538, 0.000020] #last 4 columns + discadj_cylinder.test_vals = [1.639372, -2.834293, -0.009538, 0.000020] discadj_cylinder.unsteady = True test_list.append(discadj_cylinder) @@ -176,7 +176,7 @@ def main(): discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" discadj_cylinder.cfg_file = "cylinder_Windowing_AD.cfg" discadj_cylinder.test_iter = 9 - discadj_cylinder.test_vals = [2.183366] #last column + discadj_cylinder.test_vals = [2.183361] discadj_cylinder.unsteady = True test_list.append(discadj_cylinder) @@ -204,7 +204,7 @@ def main(): discadj_DT_1ST_cylinder.cfg_dir = "disc_adj_rans/cylinder_DT_1ST" discadj_DT_1ST_cylinder.cfg_file = "cylinder.cfg" discadj_DT_1ST_cylinder.test_iter = 9 - discadj_DT_1ST_cylinder.test_vals = [1.196414, -3.339025, -0.006212, 0.000020] + discadj_DT_1ST_cylinder.test_vals = [1.196414, -3.339029, -0.006212, 0.000020] discadj_DT_1ST_cylinder.unsteady = True test_list.append(discadj_DT_1ST_cylinder) @@ -217,7 +217,7 @@ def main(): discadj_pitchingNACA0012.cfg_dir = "disc_adj_euler/naca0012_pitching" discadj_pitchingNACA0012.cfg_file = "inv_NACA0012_pitching.cfg" discadj_pitchingNACA0012.test_iter = 4 - discadj_pitchingNACA0012.test_vals = [-1.040215, -1.508864, -0.006076, 0.000012] + discadj_pitchingNACA0012.test_vals = [-1.040019, -1.508690, -0.006062, 0.000012] discadj_pitchingNACA0012.tol = 0.01 discadj_pitchingNACA0012.unsteady = True test_list.append(discadj_pitchingNACA0012) @@ -231,7 +231,7 @@ def main(): discadj_trans_stator.cfg_dir = "disc_adj_turbomachinery/transonic_stator_2D" discadj_trans_stator.cfg_file = "transonic_stator.cfg" discadj_trans_stator.test_iter = 79 - discadj_trans_stator.test_vals = [79.000000, 2.555965, 2.327109, 2.115689, 0.745501] + discadj_trans_stator.test_vals = [79.000000, 2.549036, 2.313067, 2.139714, 0.736741] discadj_trans_stator.test_vals_aarch64 = [79.000000, 0.696755, 0.485950, 0.569475, -0.990065] test_list.append(discadj_trans_stator) @@ -244,7 +244,7 @@ def main(): discadj_fea.cfg_dir = "disc_adj_fea" discadj_fea.cfg_file = "configAD_fem.cfg" discadj_fea.test_iter = 4 - discadj_fea.test_vals = [-2.849687, -3.238608, -0.000364, -8.708700] #last 4 columns + discadj_fea.test_vals = [-2.849947, -3.238801, -0.000364, -8.708700] discadj_fea.test_vals_aarch64 = [-2.849646, -3.238577, -0.000364, -8.708700] #last 4 columns test_list.append(discadj_fea) @@ -254,7 +254,7 @@ def main(): discadj_thermoelastic.cfg_dir = "fea_fsi/ThermalBeam_3d" discadj_thermoelastic.cfg_file = "configBeamNonlinear_3d_ad.cfg" discadj_thermoelastic.test_iter = 10 - discadj_thermoelastic.test_vals = [-5.355531, -5.293380, -6.164482, -6.433863, 43, -4.049760, 27, -4.164183, 0, 0.192640, 0] + discadj_thermoelastic.test_vals = [-5.355530, -5.293381, -6.164472, -6.433864, 43, -4.049760, 27, -4.164193, 0, 0.192640, 0] test_list.append(discadj_thermoelastic) ################################### @@ -286,7 +286,7 @@ def main(): discadj_fsi2.cfg_dir = "disc_adj_fsi/Airfoil_2d" discadj_fsi2.cfg_file = "config.cfg" discadj_fsi2.test_iter = 8 - discadj_fsi2.test_vals = [-3.824630, 1.979615, -3.863368, 0.295450, 3.839800] + discadj_fsi2.test_vals = [-3.824635, 1.979557, -3.863368, 0.295450, 3.839800] discadj_fsi2.test_vals_aarch64 = [-3.824870, 1.979160, -3.863368, 0.295450, 3.839800] discadj_fsi2.tol = 0.00001 test_list.append(discadj_fsi2) @@ -342,7 +342,7 @@ def main(): discadj_flamelet_ch4_hx.cfg_file = "lam_prem_ch4_hx_ad.cfg" discadj_flamelet_ch4_hx.multizone = False discadj_flamelet_ch4_hx.test_iter = 10 - discadj_flamelet_ch4_hx.test_vals = [-9.078706, -9.025745, -9.516205, -8.434002, -15.386905, -8.887596, -18.881167] + discadj_flamelet_ch4_hx.test_vals = [-9.083502, -9.033520, -9.529644, -8.442388, -15.399568, -6.170002, -18.881156] test_list.append(discadj_flamelet_ch4_hx) # 2D planar laminar premixed flame on isothermal burner with conjugate heat transfer (restart) @@ -351,7 +351,7 @@ def main(): discadj_flamelet_ch4_cht.cfg_file = "lam_prem_ch4_cht_ad_master.cfg" discadj_flamelet_ch4_cht.multizone = True discadj_flamelet_ch4_cht.test_iter = 10 - discadj_flamelet_ch4_cht.test_vals = [-1.545058, 0.628666, -6.533534, -18.651078, -18.648552, -3.794724, -6.561913, 11.000000] + discadj_flamelet_ch4_cht.test_vals = [-1.545058, 0.628666, -6.533534, -18.651078, -18.648552, -3.794724, -6.572494, 11.000000] test_list.append(discadj_flamelet_ch4_cht) ###################################### @@ -508,7 +508,7 @@ def main(): pywrapper_CFD_AD_MeshDisp.cfg_dir = "py_wrapper/disc_adj_flow/mesh_disp_sens" pywrapper_CFD_AD_MeshDisp.cfg_file = "configAD_flow.cfg" pywrapper_CFD_AD_MeshDisp.test_iter = 1000 - pywrapper_CFD_AD_MeshDisp.test_vals = [30.000000, -2.496560, 1.440884, 0.000000] + pywrapper_CFD_AD_MeshDisp.test_vals = [30.000000, -2.496452, 1.441022, 0.000000] pywrapper_CFD_AD_MeshDisp.command = TestCase.Command("mpirun -n 2", "python", "run_adjoint.py --parallel -f") pywrapper_CFD_AD_MeshDisp.timeout = 1600 pywrapper_CFD_AD_MeshDisp.tol = 0.000001 diff --git a/TestCases/serial_regression.py b/TestCases/serial_regression.py index 830ce48e4a5..a1cdb43eac7 100755 --- a/TestCases/serial_regression.py +++ b/TestCases/serial_regression.py @@ -64,7 +64,7 @@ def main(): invwedge.cfg_dir = "nonequilibrium/invwedge" invwedge.cfg_file = "invwedge_ausm.cfg" invwedge.test_iter = 10 - invwedge.test_vals = [-1.073693, -1.598456, -18.298997, -18.626405, -18.572419, 2.241766, 1.868557, 5.286079, 0.843747] + invwedge.test_vals = [-1.073689, -1.598452, -18.299910, -18.627322, -18.573334, 2.241771, 1.868566, 5.286082, 0.843751] invwedge.test_vals_aarch64 = [-1.073699, -1.598462, -18.299723, -18.627132, -18.573146, 2.241760, 1.868575, 5.286072, 0.843741] test_list.append(invwedge) @@ -93,7 +93,7 @@ def main(): channel.cfg_dir = "euler/channel" channel.cfg_file = "inv_channel_RK.cfg" channel.test_iter = 10 - channel.test_vals = [-1.969337, 3.565024, 0.000246, 0.160624] + channel.test_vals = [-1.969337, 3.565024, 0.000242, 0.160624] test_list.append(channel) # NACA0012 @@ -126,7 +126,7 @@ def main(): fixedCL_naca0012.cfg_dir = "fixed_cl/naca0012" fixedCL_naca0012.cfg_file = "inv_NACA0012.cfg" fixedCL_naca0012.test_iter = 10 - fixedCL_naca0012.test_vals = [-3.962510, 1.570618, 0.301016, 0.019477] + fixedCL_naca0012.test_vals = [-3.949168, 1.584810, 0.301017, 0.019479] test_list.append(fixedCL_naca0012) # Polar sweep of the inviscid NACA0012 @@ -166,7 +166,7 @@ def main(): flatplate.cfg_dir = "navierstokes/flatplate" flatplate.cfg_file = "lam_flatplate.cfg" flatplate.test_iter = 20 - flatplate.test_vals = [-5.492595, -0.012547, 0.002524, 0.011875, 2.361500, -2.349600, 0.000000, 0.000000] + flatplate.test_vals = [-5.499362, -0.019046, 0.002526, 0.011870, 2.361500, -2.349600, 0.000000, 0.000000] test_list.append(flatplate) # Laminar cylinder (steady) @@ -174,7 +174,7 @@ def main(): cylinder.cfg_dir = "navierstokes/cylinder" cylinder.cfg_file = "lam_cylinder.cfg" cylinder.test_iter = 25 - cylinder.test_vals = [-8.480930, -3.006218, -0.028387, 1.634020, 0.000000] + cylinder.test_vals = [-8.508933, -3.034987, -0.014846, 1.644215, 0.000000] test_list.append(cylinder) # Laminar cylinder (low Mach correction) @@ -182,7 +182,7 @@ def main(): cylinder_lowmach.cfg_dir = "navierstokes/cylinder" cylinder_lowmach.cfg_file = "cylinder_lowmach.cfg" cylinder_lowmach.test_iter = 25 - cylinder_lowmach.test_vals = [-6.447834, -0.986069, 0.825992, 65.209800, 0.000000] + cylinder_lowmach.test_vals = [-6.453096, -0.991328, 0.722165, 66.048089, 0.000000] test_list.append(cylinder_lowmach) # 2D Poiseuille flow (body force driven with periodic inlet / outlet) @@ -242,7 +242,7 @@ def main(): turb_flatplate.cfg_dir = "rans/flatplate" turb_flatplate.cfg_file = "turb_SA_flatplate.cfg" turb_flatplate.test_iter = 20 - turb_flatplate.test_vals = [-4.958238, -7.438031, -0.187473, 0.015059] + turb_flatplate.test_vals = [-4.958115, -7.438257, -0.187473, 0.015056] test_list.append(turb_flatplate) # FLAT PLATE, WALL FUNCTIONS, COMPRESSIBLE SST @@ -359,7 +359,7 @@ def main(): turb_naca0012_sst_restart_mg.cfg_file = "turb_NACA0012_sst_multigrid_restart.cfg" turb_naca0012_sst_restart_mg.test_iter = 50 turb_naca0012_sst_restart_mg.ntest_vals = 5 - turb_naca0012_sst_restart_mg.test_vals = [-6.610290, -5.081422, 0.810881, -0.008844, 0.077940] + turb_naca0012_sst_restart_mg.test_vals = [-6.610405, -5.081422, 0.810881, -0.008846, 0.077934] turb_naca0012_sst_restart_mg.timeout = 3200 turb_naca0012_sst_restart_mg.tol = 0.000001 test_list.append(turb_naca0012_sst_restart_mg) @@ -380,7 +380,7 @@ def main(): inc_euler_naca0012.cfg_dir = "incomp_euler/naca0012" inc_euler_naca0012.cfg_file = "incomp_NACA0012.cfg" inc_euler_naca0012.test_iter = 20 - inc_euler_naca0012.test_vals = [-5.968755, -5.003709, 0.522550, 0.008867] + inc_euler_naca0012.test_vals = [-5.988713, -5.020635, 0.522968, 0.008854] test_list.append(inc_euler_naca0012) # C-D nozzle with pressure inlet and mass flow outlet @@ -407,7 +407,7 @@ def main(): inc_lam_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_lam_cylinder.cfg_file = "incomp_cylinder.cfg" inc_lam_cylinder.test_iter = 10 - inc_lam_cylinder.test_vals = [-4.159153, -3.569142, 0.011445, 4.936802] + inc_lam_cylinder.test_vals = [-4.161215, -3.573002, 0.019888, 4.945923] test_list.append(inc_lam_cylinder) # Buoyancy-driven cavity @@ -423,7 +423,7 @@ def main(): inc_poly_cylinder.cfg_dir = "incomp_navierstokes/cylinder" inc_poly_cylinder.cfg_file = "poly_cylinder.cfg" inc_poly_cylinder.test_iter = 20 - inc_poly_cylinder.test_vals = [-8.232551, -2.412865, 0.010163, 1.895333, -172.760000] + inc_poly_cylinder.test_vals = [-8.081227, -2.399756, 0.009794, 1.900988, -172.970000] test_list.append(inc_poly_cylinder) # X-coarse laminar bend as a mixed element CGNS test @@ -431,7 +431,7 @@ def main(): inc_lam_bend.cfg_dir = "incomp_navierstokes/bend" inc_lam_bend.cfg_file = "lam_bend.cfg" inc_lam_bend.test_iter = 10 - inc_lam_bend.test_vals = [-3.647474, -3.230291, -0.016108, 1.085750] + inc_lam_bend.test_vals = [-3.639664, -3.218039, -0.016067, 1.090645] test_list.append(inc_lam_bend) ############################ @@ -586,7 +586,7 @@ def main(): contadj_naca0012.cfg_dir = "cont_adj_euler/naca0012" contadj_naca0012.cfg_file = "inv_NACA0012.cfg" contadj_naca0012.test_iter = 5 - contadj_naca0012.test_vals = [-9.531049, -15.087710, -0.726250, 0.020280] + contadj_naca0012.test_vals = [-9.531733, -15.088205, -0.726250, 0.020280] contadj_naca0012.tol = 0.001 test_list.append(contadj_naca0012) @@ -595,7 +595,7 @@ def main(): contadj_oneram6.cfg_dir = "cont_adj_euler/oneram6" contadj_oneram6.cfg_file = "inv_ONERAM6.cfg" contadj_oneram6.test_iter = 10 - contadj_oneram6.test_vals = [-12.080232, -12.641294, -1.086100, 0.007556] + contadj_oneram6.test_vals = [-12.083706, -12.645329, -1.086100, 0.007556] test_list.append(contadj_oneram6) # Inviscid WEDGE: tests averaged outflow total pressure adjoint @@ -611,7 +611,7 @@ def main(): contadj_fixedCL_naca0012.cfg_dir = "fixed_cl/naca0012" contadj_fixedCL_naca0012.cfg_file = "inv_NACA0012_ContAdj.cfg" contadj_fixedCL_naca0012.test_iter = 100 - contadj_fixedCL_naca0012.test_vals = [1.381080, -4.043374, -0.033478, 0.003350] + contadj_fixedCL_naca0012.test_vals = [1.378116, -4.047513, -0.030259, 0.003488] test_list.append(contadj_fixedCL_naca0012) ################################### @@ -630,7 +630,7 @@ def main(): contadj_ns_cylinder.cfg_dir = "cont_adj_navierstokes/cylinder" contadj_ns_cylinder.cfg_file = "lam_cylinder.cfg" contadj_ns_cylinder.test_iter = 20 - contadj_ns_cylinder.test_vals = [-3.628790, -9.082444, 2.056700, -0.000000] + contadj_ns_cylinder.test_vals = [-3.628460, -9.081344, 2.056700, -0.000000] test_list.append(contadj_ns_cylinder) # Adjoint laminar naca0012 subsonic @@ -674,7 +674,7 @@ def main(): contadj_rans_rae2822.cfg_dir = "cont_adj_rans/rae2822" contadj_rans_rae2822.cfg_file = "turb_SA_RAE2822.cfg" contadj_rans_rae2822.test_iter = 20 - contadj_rans_rae2822.test_vals = [-5.399819, -10.904997, -0.212470, 0.005448] + contadj_rans_rae2822.test_vals = [-5.399778, -10.904866, -0.212470, 0.005448] test_list.append(contadj_rans_rae2822) ############################# @@ -768,7 +768,7 @@ def main(): spinning_cylinder.cfg_dir = "moving_wall/spinning_cylinder" spinning_cylinder.cfg_file = "spinning_cylinder.cfg" spinning_cylinder.test_iter = 25 - spinning_cylinder.test_vals = [-7.543538, -2.078580, 1.956630, 1.859963] + spinning_cylinder.test_vals = [-7.549394, -2.082578, 1.841595, 1.853229] test_list.append(spinning_cylinder) ###################################### @@ -789,7 +789,7 @@ def main(): sine_gust.cfg_dir = "gust" sine_gust.cfg_file = "inv_gust_NACA0012.cfg" sine_gust.test_iter = 5 - sine_gust.test_vals = [-1.977498, 3.481817, -0.010134, -0.004283] + sine_gust.test_vals = [-1.977498, 3.481817, -0.010301, -0.004334] sine_gust.unsteady = True test_list.append(sine_gust) @@ -798,7 +798,7 @@ def main(): aeroelastic.cfg_dir = "aeroelastic" aeroelastic.cfg_file = "aeroelastic_NACA64A010.cfg" aeroelastic.test_iter = 2 - aeroelastic.test_vals = [-1.876626, 4.021083, 0.081596, 0.027684, -0.001638, -0.000130, -1.140510] + aeroelastic.test_vals = [-1.876631, 4.021073, 0.081373, 0.027542, -0.001642, -0.000127, -1.133902] aeroelastic.unsteady = True test_list.append(aeroelastic) @@ -824,7 +824,7 @@ def main(): unst_pitching_naca64a010_rans.cfg_dir = "unsteady/pitching_naca64a010_rans" unst_pitching_naca64a010_rans.cfg_file = "turb_NACA64A010.cfg" unst_pitching_naca64a010_rans.test_iter = 2 - unst_pitching_naca64a010_rans.test_vals = [-1.299045, -3.951366, 0.010128, 0.008245] + unst_pitching_naca64a010_rans.test_vals = [-1.299045, -3.951363, 0.010176, 0.008237] unst_pitching_naca64a010_rans.unsteady = True test_list.append(unst_pitching_naca64a010_rans) # unsteady pitching NACA64A010, Euler @@ -832,7 +832,7 @@ def main(): unst_pitching_naca64a010_euler.cfg_dir = "unsteady/pitching_naca64a010_euler" unst_pitching_naca64a010_euler.cfg_file = "pitching_NACA64A010.cfg" unst_pitching_naca64a010_euler.test_iter = 2 - unst_pitching_naca64a010_euler.test_vals = [-1.186839, 4.280301, -0.039488, 0.000918] + unst_pitching_naca64a010_euler.test_vals = [-1.186839, 4.280301, -0.039479, 0.000910] unst_pitching_naca64a010_euler.unsteady = True test_list.append(unst_pitching_naca64a010_euler) # unsteady plunging NACA0012, Laminar NS @@ -840,7 +840,7 @@ def main(): unst_plunging_naca0012.cfg_dir = "unsteady/plunging_naca0012" unst_plunging_naca0012.cfg_file = "plunging_NACA0012.cfg" unst_plunging_naca0012.test_iter = 2 - unst_plunging_naca0012.test_vals = [-4.083462, 1.366757, -6.470859, -0.078768] + unst_plunging_naca0012.test_vals = [-4.083462, 1.366757, -6.456450, -0.082788] unst_plunging_naca0012.unsteady = True test_list.append(unst_plunging_naca0012) @@ -849,7 +849,7 @@ def main(): unst_deforming_naca0012.cfg_dir = "disc_adj_euler/naca0012_pitching_def" unst_deforming_naca0012.cfg_file = "inv_NACA0012_pitching_deform.cfg" unst_deforming_naca0012.test_iter = 5 - unst_deforming_naca0012.test_vals = [-3.665263, -3.794184, -3.716978, -3.148551] + unst_deforming_naca0012.test_vals = [-3.665271, -3.794214, -3.716999, -3.148565] unst_deforming_naca0012.unsteady = True test_list.append(unst_deforming_naca0012) @@ -916,7 +916,7 @@ def main(): transonic_stator_restart.cfg_dir = "turbomachinery/transonic_stator_2D" transonic_stator_restart.cfg_file = "transonic_stator_restart.cfg" transonic_stator_restart.test_iter = 20 - transonic_stator_restart.test_vals = [-4.367851, -2.492866, -2.082422, 1.727424, -1.466963, 3.224518, -471620.000000, 94.839000, -0.052025] + transonic_stator_restart.test_vals = [-4.367780, -2.492918, -2.082410, 1.727494, -1.466974, 3.224733, -471620.000000, 94.839000, -0.052084] transonic_stator_restart.test_vals_aarch64 = [-4.443401, -2.566759, -2.169302, 1.651815, -1.356398, 3.172527, -471620.000000, 94.843000, -0.044669] test_list.append(transonic_stator_restart) @@ -946,7 +946,7 @@ def main(): uniform_flow.cfg_dir = "sliding_interface/uniform_flow" uniform_flow.cfg_file = "uniform_NN.cfg" uniform_flow.test_iter = 2 - uniform_flow.test_vals = [2.000000, 0.000000, -0.230641, -13.251039] + uniform_flow.test_vals = [2.000000, 0.000000, -0.230639, -13.250786] uniform_flow.test_vals_aarch64 = [2.000000, 0.000000, -0.230641, -13.249000] uniform_flow.tol = 0.000001 uniform_flow.unsteady = True @@ -1011,7 +1011,7 @@ def main(): bars_SST_2D.cfg_dir = "sliding_interface/bars_SST_2D" bars_SST_2D.cfg_file = "bars.cfg" bars_SST_2D.test_iter = 13 - bars_SST_2D.test_vals = [13.000000, -0.397243, -1.461873] + bars_SST_2D.test_vals = [13.000000, -0.393225, -1.462257] bars_SST_2D.multizone = True test_list.append(bars_SST_2D) @@ -1020,7 +1020,7 @@ def main(): slinc_steady.cfg_dir = "sliding_interface/incompressible_steady" slinc_steady.cfg_file = "config.cfg" slinc_steady.test_iter = 19 - slinc_steady.test_vals = [19.000000, -1.148249, -1.398402] + slinc_steady.test_vals = [19.000000, -1.141747, -1.450721] slinc_steady.timeout = 100 slinc_steady.multizone = True test_list.append(slinc_steady) @@ -1089,7 +1089,7 @@ def main(): fsi_cht.cfg_dir = "fea_fsi/stat_fsi" fsi_cht.cfg_file = "config.cfg" fsi_cht.test_iter = 20 - fsi_cht.test_vals = [5, -5.077012, -5.379779, -9.247804, -9.277819, -9.183796, 6.0835e+02, -1.2973e-02, 5.7607e-08, 29] + fsi_cht.test_vals = [5, -5.077014, -5.379795, -9.247813, -9.320514, -9.185010, 608.35, -0.012973, 5.7607e-08, 30] fsi_cht.multizone = True test_list.append(fsi_cht) @@ -1098,7 +1098,7 @@ def main(): dyn_fsi.cfg_dir = "fea_fsi/dyn_fsi" dyn_fsi.cfg_file = "config.cfg" dyn_fsi.test_iter = 4 - dyn_fsi.test_vals = [-4.330728, -4.152820, 5.3831e-08, 85] + dyn_fsi.test_vals = [-4.330728, -4.152820, 5.3831e-08, 86] dyn_fsi.multizone = True dyn_fsi.unsteady = True test_list.append(dyn_fsi) @@ -1108,7 +1108,7 @@ def main(): airfoilRBF.cfg_dir = "fea_fsi/Airfoil_RBF" airfoilRBF.cfg_file = "config.cfg" airfoilRBF.test_iter = 1 - airfoilRBF.test_vals = [1.000000, 0.028165, -3.530075] + airfoilRBF.test_vals = [1.000000, 0.026697, -3.532043] airfoilRBF.tol = 0.0001 airfoilRBF.multizone = True test_list.append(airfoilRBF) @@ -1150,7 +1150,7 @@ def main(): cht_incompressible.cfg_dir = "coupled_cht/comp_2d" cht_incompressible.cfg_file = "cht_2d_3cylinders.cfg" cht_incompressible.test_iter = 10 - cht_incompressible.test_vals = [-4.256032, -0.532728, -0.532729, -0.532728] + cht_incompressible.test_vals = [-4.256032, -0.532728, -0.532728, -0.532728] cht_incompressible.multizone = True test_list.append(cht_incompressible) @@ -1601,7 +1601,7 @@ def main(): pywrapper_unsteadyCHT.cfg_dir = "py_wrapper/flatPlate_unsteady_CHT" pywrapper_unsteadyCHT.cfg_file = "unsteady_CHT_FlatPlate_Conf.cfg" pywrapper_unsteadyCHT.test_iter = 5 - pywrapper_unsteadyCHT.test_vals = [-1.614168, 2.260078, -0.020704, 0.172871] + pywrapper_unsteadyCHT.test_vals = [-1.614169, 2.260215, -0.019432, 0.203751] pywrapper_unsteadyCHT.command = TestCase.Command(exec = "python", param = "launch_unsteady_CHT_FlatPlate.py -f") pywrapper_unsteadyCHT.timeout = 1600 pywrapper_unsteadyCHT.tol = 0.00001 diff --git a/TestCases/serial_regression_AD.py b/TestCases/serial_regression_AD.py index 6e11587b8cf..4354bd0dfd9 100644 --- a/TestCases/serial_regression_AD.py +++ b/TestCases/serial_regression_AD.py @@ -58,7 +58,7 @@ def main(): discadj_naca0012_via_mz.cfg_dir = "cont_adj_euler/naca0012" discadj_naca0012_via_mz.cfg_file = "inv_NACA0012_discadj_multizone.cfg" discadj_naca0012_via_mz.test_iter = 100 - discadj_naca0012_via_mz.test_vals = [-3.563784, -5.975640, -6.326231, -8.929567] + discadj_naca0012_via_mz.test_vals = [-3.563784, -5.975641, -6.326231, -8.929567] discadj_naca0012_via_mz.enabled_with_tapetests = True discadj_naca0012_via_mz.tapetest_vals = [0] test_list.append(discadj_naca0012_via_mz) @@ -68,7 +68,7 @@ def main(): discadj_cylinder3D.cfg_dir = "disc_adj_euler/cylinder3D" discadj_cylinder3D.cfg_file = "inv_cylinder3D.cfg" discadj_cylinder3D.test_iter = 5 - discadj_cylinder3D.test_vals = [-3.702105, -3.895140, -0.000000, 0.000000] + discadj_cylinder3D.test_vals = [-3.702109, -3.895140, -0.000000, 0.000000] test_list.append(discadj_cylinder3D) # Arina nozzle 2D @@ -108,7 +108,7 @@ def main(): discadj_incomp_NACA0012.cfg_dir = "disc_adj_incomp_euler/naca0012" discadj_incomp_NACA0012.cfg_file = "incomp_NACA0012_disc.cfg" discadj_incomp_NACA0012.test_iter = 20 - discadj_incomp_NACA0012.test_vals = [20.000000, -3.122293, -2.287328, 0.000000] + discadj_incomp_NACA0012.test_vals = [20.000000, -3.122056, -2.290799, 0.000000] test_list.append(discadj_incomp_NACA0012) ##################################### @@ -120,7 +120,7 @@ def main(): discadj_incomp_cylinder.cfg_dir = "disc_adj_incomp_navierstokes/cylinder" discadj_incomp_cylinder.cfg_file = "heated_cylinder.cfg" discadj_incomp_cylinder.test_iter = 20 - discadj_incomp_cylinder.test_vals = [20.000000, -1.672307, -6.247612, 0.000000] + discadj_incomp_cylinder.test_vals = [20.000000, -1.666057, -6.234535, 0.000000] test_list.append(discadj_incomp_cylinder) ####################################################### @@ -132,7 +132,7 @@ def main(): discadj_cylinder.cfg_dir = "disc_adj_rans/cylinder" discadj_cylinder.cfg_file = "cylinder.cfg" discadj_cylinder.test_iter = 9 - discadj_cylinder.test_vals = [1.639372, -2.834285, -0.009537, 0.000020] + discadj_cylinder.test_vals = [1.639373, -2.834286, -0.009537, 0.000020] discadj_cylinder.unsteady = True test_list.append(discadj_cylinder) @@ -145,7 +145,7 @@ def main(): discadj_DT_1ST_cylinder.cfg_dir = "disc_adj_rans/cylinder_DT_1ST" discadj_DT_1ST_cylinder.cfg_file = "cylinder.cfg" discadj_DT_1ST_cylinder.test_iter = 9 - discadj_DT_1ST_cylinder.test_vals = [1.196421, -3.339043, -0.006211, 0.000020] + discadj_DT_1ST_cylinder.test_vals = [1.196421, -3.339044, -0.006211, 0.000020] discadj_DT_1ST_cylinder.unsteady = True test_list.append(discadj_DT_1ST_cylinder) @@ -158,7 +158,7 @@ def main(): discadj_pitchingNACA0012.cfg_dir = "disc_adj_euler/naca0012_pitching" discadj_pitchingNACA0012.cfg_file = "inv_NACA0012_pitching.cfg" discadj_pitchingNACA0012.test_iter = 4 - discadj_pitchingNACA0012.test_vals = [-1.049761, -1.501951, -0.004853, 0.000013] + discadj_pitchingNACA0012.test_vals = [-1.050725, -1.504630, -0.004855, 0.000013] discadj_pitchingNACA0012.unsteady = True test_list.append(discadj_pitchingNACA0012) @@ -167,7 +167,7 @@ def main(): unst_deforming_naca0012.cfg_dir = "disc_adj_euler/naca0012_pitching_def" unst_deforming_naca0012.cfg_file = "inv_NACA0012_pitching_deform_ad.cfg" unst_deforming_naca0012.test_iter = 4 - unst_deforming_naca0012.test_vals = [-1.885816, -1.781193, 3920.300000, 0.000003] + unst_deforming_naca0012.test_vals = [-1.886194, -1.780629, 3882.900000, 0.000003] unst_deforming_naca0012.unsteady = True test_list.append(unst_deforming_naca0012) @@ -349,7 +349,7 @@ def main(): pywrapper_CFD_AD_MeshDisp.cfg_dir = "py_wrapper/disc_adj_flow/mesh_disp_sens" pywrapper_CFD_AD_MeshDisp.cfg_file = "configAD_flow.cfg" pywrapper_CFD_AD_MeshDisp.test_iter = 1000 - pywrapper_CFD_AD_MeshDisp.test_vals = [30.000000, -2.496380, 1.441373, 0.000000] + pywrapper_CFD_AD_MeshDisp.test_vals = [30.000000, -2.496258, 1.441670, 0.000000] pywrapper_CFD_AD_MeshDisp.command = TestCase.Command(exec = "python", param = "run_adjoint.py -f") pywrapper_CFD_AD_MeshDisp.timeout = 1600 pywrapper_CFD_AD_MeshDisp.tol = 0.000001 diff --git a/TestCases/tutorials.py b/TestCases/tutorials.py index 305ab9fb234..d11050e4bfd 100644 --- a/TestCases/tutorials.py +++ b/TestCases/tutorials.py @@ -49,7 +49,7 @@ def main(): cht_incompressible_unsteady.cfg_dir = "../Tutorials/multiphysics/unsteady_cht/" cht_incompressible_unsteady.cfg_file = "cht_2d_3cylinders.cfg" cht_incompressible_unsteady.test_iter = 2 - cht_incompressible_unsteady.test_vals = [-3.075372, -0.080399, -0.080399, -0.080399, -11.163219, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 238.240000] + cht_incompressible_unsteady.test_vals = [-3.204738, -0.080399, -0.080399, -0.080399, -11.531676, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 238.240000] cht_incompressible_unsteady.multizone = True cht_incompressible_unsteady.unsteady = True test_list.append(cht_incompressible_unsteady) @@ -165,7 +165,7 @@ def main(): premixed_hydrogen.cfg_dir = "../Tutorials/incompressible_flow/Inc_Combustion/1__premixed_hydrogen" premixed_hydrogen.cfg_file = "H2_burner.cfg" premixed_hydrogen.test_iter = 10 - premixed_hydrogen.test_vals = [-8.772073, -9.721083, -11.354959, -4.381554, -12.832787] + premixed_hydrogen.test_vals = [-8.771332, -9.721253, -11.354950, -4.381656, -12.832815] test_list.append(premixed_hydrogen) ### Compressible Flow @@ -201,7 +201,7 @@ def main(): tutorial_lam_cylinder.cfg_dir = "../Tutorials/compressible_flow/Laminar_Cylinder" tutorial_lam_cylinder.cfg_file = "lam_cylinder.cfg" tutorial_lam_cylinder.test_iter = 0 - tutorial_lam_cylinder.test_vals = [-6.162141, -0.699617, -0.124663, 31.721714] + tutorial_lam_cylinder.test_vals = [-6.162141, -0.699617, -0.085099, 31.790369] tutorial_lam_cylinder.no_restart = True test_list.append(tutorial_lam_cylinder) @@ -228,7 +228,7 @@ def main(): tutorial_trans_flatplate.cfg_dir = "../Tutorials/compressible_flow/Transitional_Flat_Plate" tutorial_trans_flatplate.cfg_file = "transitional_BC_model_ConfigFile.cfg" tutorial_trans_flatplate.test_iter = 0 - tutorial_trans_flatplate.test_vals = [-22.021786, -15.330766, 0.000000, 0.023944] + tutorial_trans_flatplate.test_vals = [-22.025101, -15.330766, 0.000000, 0.023944] tutorial_trans_flatplate.no_restart = True test_list.append(tutorial_trans_flatplate) @@ -292,7 +292,7 @@ def main(): tutorial_nicfd_nozzle_pinn.cfg_dir = "../Tutorials/compressible_flow/NICFD_nozzle/PhysicsInformed" tutorial_nicfd_nozzle_pinn.cfg_file = "config_NICFD_PINN.cfg" tutorial_nicfd_nozzle_pinn.test_iter = 20 - tutorial_nicfd_nozzle_pinn.test_vals = [-2.728179, -0.849337, -1.224542, 2.898995, -11.420290] + tutorial_nicfd_nozzle_pinn.test_vals = [-2.728179, -0.849337, -1.224543, 2.898995, -11.420290] tutorial_nicfd_nozzle_pinn.no_restart = True test_list.append(tutorial_nicfd_nozzle_pinn) diff --git a/UnitTests/Common/geometry/CGeometry_test.cpp b/UnitTests/Common/geometry/CGeometry_test.cpp index cad2157a804..22556b16055 100644 --- a/UnitTests/Common/geometry/CGeometry_test.cpp +++ b/UnitTests/Common/geometry/CGeometry_test.cpp @@ -122,9 +122,9 @@ TEST_CASE("Set control volume", "[Geometry]") { CHECK(TestCase->geometry->nodes->GetVolume(42) == Approx(0.015625)); - CHECK(TestCase->geometry->edges->GetNormal(32)[0] == 0.03125); + CHECK(TestCase->geometry->edges->GetNormal(31)[0] == 0.03125); CHECK(TestCase->geometry->edges->GetNormal(5)[1] == 0.0); - CHECK(TestCase->geometry->edges->GetNormal(10)[2] == 0.03125); + CHECK(TestCase->geometry->edges->GetNormal(11)[2] == 0.03125); CHECK(TestCase->config->GetDomainVolume() == Approx(1.0)); } diff --git a/meson.build b/meson.build index d43e2b031d8..e76354a6e86 100644 --- a/meson.build +++ b/meson.build @@ -20,13 +20,10 @@ python = pymod.find_installation() if get_option('enable-cuda') add_languages('cuda') add_global_arguments('-arch=sm_86', language : 'cuda') - cuda_deps = [meson.get_compiler('cuda').find_library('cusparse', required : true)] -else - cuda_deps = [] endif su2_cpp_args = [] -su2_deps = [declare_dependency(include_directories: 'externals/CLI11')] + cuda_deps +su2_deps = [declare_dependency(include_directories: 'externals/CLI11')] default_warning_flags = [] if build_machine.system() != 'windows'