diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp
index 7958110fb23..5d1e7e5a83a 100644
--- a/Common/include/geometry/CGeometry.hpp
+++ b/Common/include/geometry/CGeometry.hpp
@@ -73,6 +73,19 @@ using namespace std;
  * \author F. Palacios
  */
 class CGeometry {
+ public:
+  /*!
+   * \brief Aggregates the full symmetric CSR and its LDU split (L strictly-lower, U strictly-upper).
+   *        Built together lazily via GetSparsePattern; all three are always valid once non-empty.
+   */
+  struct LDUSparsePattern {
+    CCompressedSparsePatternUL csr; /*!< Full symmetric pattern (with diagonal pointer). */
+    CCompressedSparsePatternUL l;   /*!< Strictly-lower part. */
+    CCompressedSparsePatternUL u;   /*!< Strictly-upper part. */
+
+    bool empty() const { return csr.empty(); }
+  };
+
  protected:
   enum : size_t { OMP_MIN_SIZE = 32 }; /*!< \brief Chunk size for small loops. */
   enum : size_t { MAXNDIM = 3 };
@@ -187,12 +200,15 @@ class CGeometry {
 
   /*--- Sparsity patterns associated with the geometry. ---*/
 
-  CCompressedSparsePatternUL finiteVolumeCSRFill0, /*!< \brief 0-fill FVM sparsity. */
-      finiteVolumeCSRFillN,                        /*!< \brief N-fill FVM sparsity (e.g. for ILUn preconditioner). */
-      finiteElementCSRFill0,                       /*!< \brief 0-fill FEM sparsity. */
-      finiteElementCSRFillN;                       /*!< \brief N-fill FEM sparsity (e.g. for ILUn preconditioner). */
+  LDUSparsePattern finiteVolumePatternFill0;  /*!< \brief FVM sparsity with 0-fill (structural pattern). */
+  LDUSparsePattern finiteVolumePatternFillN;  /*!< \brief FVM sparsity with N-fill (e.g. for ILU-N). */
+  LDUSparsePattern finiteElementPatternFill0; /*!< \brief FEM sparsity with 0-fill (structural pattern). */
+  LDUSparsePattern finiteElementPatternFillN; /*!< \brief FEM sparsity with N-fill (e.g. for ILU-N). */
 
-  CEdgeToNonZeroMapUL edgeToCSRMap; /*!< \brief Map edges to CSR entries referenced by them (i,j) and (j,i). */
+  su2vector<unsigned long> finiteVolumeLToUTranspMap;  /*!< \brief FVM L-entry -> U-entry of its transpose. */
+  su2vector<unsigned long> finiteVolumeUToLTranspMap;  /*!< \brief FVM U-entry -> L-entry of its transpose. */
+  su2vector<unsigned long> finiteElementLToUTranspMap; /*!< \brief FEM L-entry -> U-entry of its transpose. */
+  su2vector<unsigned long> finiteElementUToLTranspMap; /*!< \brief FEM U-entry -> L-entry of its transpose. */
 
   /*--- Edge and element colorings. ---*/
 
@@ -1868,21 +1884,23 @@ class CGeometry {
    * \param[in] fillLvl - Level of fill of the pattern.
    * \return Reference to the sparse pattern.
    */
-  const CCompressedSparsePatternUL& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0);
+  const LDUSparsePattern& GetSparsePattern(ConnectivityType type, unsigned long fillLvl = 0);
 
   /*!
-   * \brief Get the edge to sparse pattern map.
-   * \note This method builds the map and required pattern (0-fill FVM) if that has not been done yet.
-   * \return Reference to the map.
+   * \brief Get the bijective map from L-entry indices to U-entry indices of their transposes.
+   * \note Requires symmetric pattern. Builds both LU transpose maps if not already built.
+   * \param[in] type - Finite volume or finite element.
+   * \return Reference to the l_to_u map.
    */
-  const CEdgeToNonZeroMapUL& GetEdgeToSparsePatternMap();
+  const su2vector<unsigned long>& GetLToUTransposeSparsePatternMap(ConnectivityType type);
 
   /*!
-   * \brief Get the transpose of the (main, i.e 0 fill) sparse pattern (e.g. CSR becomes CSC).
+   * \brief Get the bijective map from U-entry indices to L-entry indices of their transposes.
+   * \note Requires symmetric pattern. Builds both LU transpose maps if not already built.
    * \param[in] type - Finite volume or finite element.
-   * \return Reference to the map.
+   * \return Reference to the u_to_l map.
    */
-  const su2vector<unsigned long>& GetTransposeSparsePatternMap(ConnectivityType type);
+  const su2vector<unsigned long>& GetUToLTransposeSparsePatternMap(ConnectivityType type);
 
   /*!
    * \brief Get the edge coloring.
diff --git a/Common/include/linear_algebra/CPastixWrapper.hpp b/Common/include/linear_algebra/CPastixWrapper.hpp
index dcc320debe1..a471a60eaff 100644
--- a/Common/include/linear_algebra/CPastixWrapper.hpp
+++ b/Common/include/linear_algebra/CPastixWrapper.hpp
@@ -62,6 +62,9 @@ class CPastixWrapper {
   vector<pastix_int_t> perm;     /*!< \brief Ordering computed by PaStiX. */
   vector<su2mixedfloat> workvec; /*!< \brief RHS vector which then becomes the solution. */
 
+  vector<unsigned long> csr_row_ptr; /*!< \brief Owned CSR row pointers (built from LDU). */
+  vector<unsigned long> csr_col_ind; /*!< \brief Owned CSR column indices (built from LDU). */
+
   pastix_int_t iparm[IPARM_SIZE]; /*!< \brief Integer parameters for PaStiX. */
   double dparm[DPARM_SIZE];       /*!< \brief Floating point parameters for PaStiX. */
 
@@ -69,14 +72,18 @@ class CPastixWrapper {
     unsigned long nVar = 0;
     unsigned long nPoint = 0;
     unsigned long nPointDomain = 0;
-    const unsigned long* rowptr = nullptr;
-    const unsigned long* colidx = nullptr;
-    const ScalarType* values = nullptr;
+    unsigned long blkSz = 0; /*!< \brief Block size (nVar * nVar) for value assembly. */
+
+    const unsigned long* row_ptr_l = nullptr; /*!< \brief LDU lower row pointers (geometry-owned). */
+    const unsigned long* row_ptr_u = nullptr; /*!< \brief LDU upper row pointers (geometry-owned). */
+    const ScalarType* d = nullptr;            /*!< \brief Diagonal blocks (matrix-owned). */
+    const ScalarType* l = nullptr;            /*!< \brief Lower blocks (matrix-owned). */
+    const ScalarType* u = nullptr;            /*!< \brief Upper blocks (matrix-owned). */
 
     unsigned long size_rhs() const { return nPointDomain * nVar; }
-  } matrix; /*!< \brief Pointers and sizes of the input matrix. */
+  } matrix; /*!< \brief Dimensions and LDU pointers captured from the owning CSysMatrix. */
 
-  bool issetup{};        /*!< \brief Signals that the matrix data has been provided. */
+  bool issetup{};        /*!< \brief Signals that the structure has been provided. */
   bool isinitialized{};  /*!< \brief Signals that the sparsity pattern has been set. */
   bool isfactorized{};   /*!< \brief Signals that a factorization has been computed. */
   bool transpose{};      /*!< \brief Solve A^T x = b instead of A x = b. */
@@ -110,6 +117,11 @@ class CPastixWrapper {
    */
   void Initialize(CGeometry* geometry, const CConfig* config);
 
+  /*!
+   * \brief Assemble CSR values from the stored LDU pointers directly into the values buffer.
+   */
+  void AssembleValues();
+
  public:
   CPastixWrapper() = default;
 
@@ -125,23 +137,43 @@ class CPastixWrapper {
   ~CPastixWrapper() { Clean(); }
 
   /*!
-   * \brief Set matrix data, only once.
-   * \param[in] nVar - DOF per point.
+   * \brief Returns true once SetLDU has been called.
+   */
+  bool IsSetup() const { return issetup; }
+
+  /*!
+   * \brief Set LDU structure and value pointers; builds and owns assembled CSR (called once).
+   * \param[in] nVar - DOF per point (square blocks: nVar x nVar).
    * \param[in] nPoint - Total number of points including halos.
-   * \param[in] nPointDomain - Number of internal points.
-   * \param[in] rowptr - Array, where column index data starts for each matrix row.
-   * \param[in] colidx - Non zeros column indices.
-   * \param[in] values - Matrix coefficients.
+   * \param[in] nPointDomain - Number of internal points (domain rows).
+   * \param[in] row_ptr_l/u - LDU lower/upper row pointers (geometry-owned, must outlive wrapper).
+   * \param[in] col_ind_l/u - LDU lower/upper column indices (geometry-owned).
+   * \param[in] d/l/u - LDU value blocks (matrix-owned, must outlive wrapper).
    */
-  void SetMatrix(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* rowptr,
-                 const unsigned long* colidx, const ScalarType* values) {
+  void SetLDU(unsigned long nVar, unsigned long nPoint, unsigned long nPointDomain, const unsigned long* row_ptr_l,
+              const unsigned long* col_ind_l, const unsigned long* row_ptr_u, const unsigned long* col_ind_u,
+              const ScalarType* d, const ScalarType* l, const ScalarType* u) {
     if (issetup) return;
     matrix.nVar = nVar;
     matrix.nPoint = nPoint;
     matrix.nPointDomain = nPointDomain;
-    matrix.rowptr = rowptr;
-    matrix.colidx = colidx;
-    matrix.values = values;
+    matrix.row_ptr_l = row_ptr_l;
+    matrix.row_ptr_u = row_ptr_u;
+    matrix.d = d;
+    matrix.l = l;
+    matrix.u = u;
+    matrix.blkSz = nVar * nVar;
+
+    const unsigned long nnz_domain = row_ptr_l[nPointDomain] + nPointDomain + row_ptr_u[nPointDomain];
+    csr_row_ptr.resize(nPointDomain + 1);
+    csr_col_ind.reserve(nnz_domain);
+    for (auto i = 0ul; i < nPointDomain; ++i) {
+      csr_row_ptr[i] = static_cast<unsigned long>(csr_col_ind.size());
+      for (auto k = row_ptr_l[i]; k < row_ptr_l[i + 1]; ++k) csr_col_ind.push_back(col_ind_l[k]);
+      csr_col_ind.push_back(i);
+      for (auto k = row_ptr_u[i]; k < row_ptr_u[i + 1]; ++k) csr_col_ind.push_back(col_ind_u[k]);
+    }
+    csr_row_ptr[nPointDomain] = static_cast<unsigned long>(csr_col_ind.size());
     issetup = true;
   }
 
diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp
index a71d31c1c08..11baf6edf8e 100644
--- a/Common/include/linear_algebra/CSysMatrix.hpp
+++ b/Common/include/linear_algebra/CSysMatrix.hpp
@@ -138,25 +138,40 @@ class CSysMatrix {
   unsigned long nVar;         /*!< \brief Number of variables (and rows of the blocks). */
   unsigned long nEqn;         /*!< \brief Number of equations (and columns of the blocks). */
 
-  ScalarType* matrix;           /*!< \brief Entries of the sparse matrix. */
-  unsigned long nnz;            /*!< \brief Number of possible nonzero entries in the matrix. */
-  const unsigned long* row_ptr; /*!< \brief Pointers to the first element in each row. */
-  const unsigned long* dia_ptr; /*!< \brief Pointers to the diagonal element in each row. */
-  const unsigned long* col_ind; /*!< \brief Column index for each of the elements in val(). */
-  const unsigned long* col_ptr; /*!< \brief The transpose of col_ind, pointer to blocks with the same column index. */
-
-  ScalarType* d_matrix;           /*!< \brief Device Pointer to store the matrix values on the GPU. */
-  const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */
-  const unsigned long* d_col_ind; /*!< \brief Device Column index for each of the elements in val(). */
-  bool useCuda = false;           /*!< \brief Boolean that indicates whether user has enabled CUDA or not.
-                                     Mainly used to conditionally free GPU memory in the class destructor. */
-
-  ScalarType* ILU_matrix;           /*!< \brief Entries of the ILU sparse matrix. */
-  unsigned long nnz_ilu;            /*!< \brief Number of possible nonzero entries in the matrix (ILU). */
-  const unsigned long* row_ptr_ilu; /*!< \brief Pointers to the first element in each row (ILU). */
-  const unsigned long* dia_ptr_ilu; /*!< \brief Pointers to the diagonal element in each row (ILU). */
-  const unsigned long* col_ind_ilu; /*!< \brief Column index for each of the elements in val() (ILU). */
-  unsigned short ilu_fill_in;       /*!< \brief Fill in level for the ILU preconditioner. */
+  /*!
+   * \brief Aggregates value arrays and sparse-structure pointers for an LDU-partitioned matrix.
+   *        Each CSysMatrix holds three LDU instances: the host matrix (mat), its device copy (gpu),
+   *        and the ILU factorization (ilu). Ownership of the value arrays (d/l/u) and whether
+   *        the pointers address host or device memory is managed by CSysMatrix.
+   */
+  struct LDU {
+    ScalarType* d = nullptr;                  /*!< \brief Diagonal block values. */
+    ScalarType* l = nullptr;                  /*!< \brief Strictly-lower block values. */
+    ScalarType* u = nullptr;                  /*!< \brief Strictly-upper block values. */
+    const unsigned long* row_ptr_l = nullptr; /*!< \brief Row pointers for L (geometry-owned or GPU copy). */
+    const unsigned long* col_ind_l = nullptr; /*!< \brief Column indices for L. */
+    const unsigned long* row_ptr_u = nullptr; /*!< \brief Row pointers for U. */
+    const unsigned long* col_ind_u = nullptr; /*!< \brief Column indices for U. */
+    unsigned long nnz_l = 0;                  /*!< \brief Number of L nonzeros. */
+    unsigned long nnz_u = 0;                  /*!< \brief Number of U nonzeros. */
+  };
+
+  LDU mat; /*!< \brief Host matrix (values owned via aligned_alloc; pattern from geometry). */
+  LDU gpu; /*!< \brief Device matrix (all pointers to GPU memory). */
+  LDU ilu; /*!< \brief ILU factorization, host (values owned; pattern from geometry). */
+
+  bool useCuda = false;               /*!< \brief Whether CUDA is enabled. */
+  const unsigned long* l_to_u_transp; /*!< \brief L-entry index -> U-entry index of its transpose. */
+  const unsigned long* u_to_l_transp; /*!< \brief U-entry index -> L-entry index of its transpose. */
+
+  /*!
+   * \brief Lookup table from edges to the L-index in the LDU split.
+   * U-index == edge index by construction (edges are ordered 1:1 with the U pattern).
+   * Therefore, edge_ptr_l == u_to_l_transp, but we keep a separate member for clarity.
+   */
+  const unsigned long* edge_ptr_l;
+
+  unsigned short ilu_fill_in; /*!< \brief Fill level for the ILU preconditioner. */
 
   /*!< \brief Level structure for alternative shared memory parallelization of ILU. */
   CCompressedSparsePatternUL levels_ilu;
@@ -188,21 +203,6 @@ class CSysMatrix {
   mutable CPastixWrapper<ScalarType> pastix_wrapper;
 #endif
 
-  /*!
-   * \brief Auxilary object to wrap the edge map pointer used in fast block updates, i.e. without linear searches.
-   */
-  struct {
-    const unsigned long* ptr = nullptr;
-    unsigned long nEdge = 0;
-
-    operator bool() { return nEdge != 0; }
-
-    inline unsigned long operator()(unsigned long edge, unsigned long node) const { return ptr[2 * edge + node]; }
-    inline unsigned long ij(unsigned long edge) const { return ptr[2 * edge]; }
-    inline unsigned long ji(unsigned long edge) const { return ptr[2 * edge + 1]; }
-
-  } edge_ptr;
-
   /*!
    * \brief Handle type conversion for when we Set, Add, etc. blocks, preserving derivative information (if supported by
    * types).
@@ -322,20 +322,11 @@ class CSysMatrix {
   inline const ScalarType* InvertDiagonalBlockILUMatrix(unsigned long block_i);
 
   /*!
-   * \brief Copies the block (i, j) of the matrix-by-blocks structure in the internal variable *block.
-   * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure.
-   * \param[in] block_j - Indexes of the block in the matrix-by-blocks structure.
+   * \brief Returns the start of the ILU block or nullptr if (i,j) is not a nonzero.
+   * \param[in] block_i/j - Indexes of the block in the matrix-by-blocks structure.
    */
   inline ScalarType* GetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j);
 
-  /*!
-   * \brief Set the value of a block in the sparse matrix.
-   * \param[in] block_i - Indexes of the block in the matrix-by-blocks structure.
-   * \param[in] block_j - Indexes of the block in the matrix-by-blocks structure.
-   * \param[in] **val_block - Block to set to A(i, j).
-   */
-  inline void SetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j, ScalarType* val_block);
-
   /*!
    * \brief Performs the product of i-th row of the upper part of a sparse matrix by a vector.
    * \param[in] vec - Vector to be multiplied by the upper part of the sparse matrix A.
@@ -392,7 +383,7 @@ class CSysMatrix {
    * \param[in] neqn - Number of equations (and columns of the blocks).
    * \param[in] geometry - Geometrical definition of the problem.
    * \param[in] config - Definition of the particular problem.
-   * \param[in] needTranspPtr - If "col_ptr" should be created, used for "SetDiagonalAsColumnSum".
+   * \param[in] needTranspPtr - If the L/U transpose maps should be built, used for "SetDiagonalAsColumnSum".
    */
   void Initialize(unsigned long npoint, unsigned long npointdomain, unsigned short nvar, unsigned short neqn,
                   bool EdgeConnect, CGeometry* geometry, const CConfig* config, bool needTranspPtr = false,
@@ -421,10 +412,14 @@ class CSysMatrix {
    * \return Pointer to location in memory where the block starts.
    */
   FORCEINLINE const ScalarType* GetBlock(unsigned long block_i, unsigned long block_j) const {
-    /*--- The position of the diagonal block is known which allows halving the search space. ---*/
-    const auto end = (block_j < block_i) ? dia_ptr[block_i] : row_ptr[block_i + 1];
-    for (auto index = (block_j < block_i) ? row_ptr[block_i] : dia_ptr[block_i]; index < end; ++index)
-      if (col_ind[index] == block_j) return &matrix[index * nVar * nEqn];
+    if (block_i == block_j) return &mat.d[block_i * nVar * nEqn];
+    if (block_j < block_i) {
+      for (auto index = mat.row_ptr_l[block_i]; index < mat.row_ptr_l[block_i + 1]; ++index)
+        if (mat.col_ind_l[index] == block_j) return &mat.l[index * nVar * nEqn];
+      return nullptr;
+    }
+    for (auto index = mat.row_ptr_u[block_i]; index < mat.row_ptr_u[block_i + 1]; ++index)
+      if (mat.col_ind_u[index] == block_j) return &mat.u[index * nVar * nEqn];
     return nullptr;
   }
 
@@ -574,10 +569,11 @@ class CSysMatrix {
    */
   inline void GetBlocks(unsigned long iEdge, unsigned long iPoint, unsigned long jPoint, ScalarType*& bii,
                         ScalarType*& bij, ScalarType*& bji, ScalarType*& bjj) {
-    bii = &matrix[dia_ptr[iPoint] * nVar * nEqn];
-    bjj = &matrix[dia_ptr[jPoint] * nVar * nEqn];
-    bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn];
-    bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn];
+    const auto blkSz = nVar * nEqn;
+    bii = &mat.d[iPoint * blkSz];
+    bjj = &mat.d[jPoint * blkSz];
+    bij = &mat.u[iEdge * blkSz];
+    bji = &mat.l[edge_ptr_l[iEdge] * blkSz];
   }
 
   /*!
@@ -652,10 +648,10 @@ class CSysMatrix {
       if (mask[k] == 0) continue;
 
       /*--- Fetch the blocks. ---*/
-      auto bii = &matrix[dia_ptr[iPoint[k]] * blkSz];
-      auto bjj = &matrix[dia_ptr[jPoint[k]] * blkSz];
-      auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz];
-      auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz];
+      auto bii = &mat.d[iPoint[k] * blkSz];
+      auto bjj = &mat.d[jPoint[k] * blkSz];
+      auto bij = &mat.u[iEdge[k] * blkSz];
+      auto bji = &mat.l[edge_ptr_l[iEdge[k]] * blkSz];
 
       /*--- Update, block i was negated during transpose in the
        * hope the assignments below become non-temporal stores. ---*/
@@ -682,8 +678,9 @@ class CSysMatrix {
   template <class MatrixType, class OtherType = ScalarType, bool Overwrite = true>
   inline void SetBlocks(unsigned long iEdge, const MatrixType& block_i, const MatrixType& block_j,
                         OtherType scale = 1) {
-    ScalarType* bij = &matrix[edge_ptr(iEdge, 0) * nVar * nEqn];
-    ScalarType* bji = &matrix[edge_ptr(iEdge, 1) * nVar * nEqn];
+    const auto blkSz = nVar * nEqn;
+    ScalarType* bij = &mat.u[iEdge * blkSz];
+    ScalarType* bji = &mat.l[edge_ptr_l[iEdge] * blkSz];
 
     unsigned long iVar, jVar, offset = 0;
 
@@ -742,8 +739,8 @@ class CSysMatrix {
       if (mask[k] == 0) continue;
 
       /*--- Fetch the blocks. ---*/
-      auto bij = &matrix[edge_ptr(iEdge[k], 0) * blkSz];
-      auto bji = &matrix[edge_ptr(iEdge[k], 1) * blkSz];
+      ScalarType* bij = &mat.u[iEdge[k] * blkSz];
+      ScalarType* bji = &mat.l[edge_ptr_l[iEdge[k]] * blkSz];
 
       /*--- Update, block i was negated during transpose in the
        * hope the assignments below become non-temporal stores. ---*/
@@ -765,7 +762,7 @@ class CSysMatrix {
    */
   template <class OtherType, bool Overwrite = true, class T = ScalarType>
   inline void SetBlock2Diag(unsigned long block_i, const OtherType& val_block, T alpha = 1.0) {
-    auto mat_ii = &matrix[dia_ptr[block_i] * nVar * nEqn];
+    auto mat_ii = &mat.d[block_i * nVar * nEqn];
 
     for (auto iVar = 0ul; iVar < nVar; iVar++)
       for (auto jVar = 0ul; jVar < nEqn; jVar++) {
@@ -798,8 +795,8 @@ class CSysMatrix {
    */
   template <class OtherType>
   inline void AddVal2Diag(unsigned long block_i, OtherType val_matrix) {
-    for (auto iVar = 0ul; iVar < nVar; iVar++)
-      matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val_matrix);
+    auto d = &mat.d[block_i * nVar * nVar];
+    for (auto iVar = 0ul; iVar < nVar; iVar++) d[iVar * (nVar + 1)] += PassiveAssign(val_matrix);
   }
 
   /*!
@@ -811,7 +808,7 @@ class CSysMatrix {
    */
   template <class OtherType>
   inline void AddVal2Diag(unsigned long block_i, unsigned long iVar, OtherType val) {
-    matrix[dia_ptr[block_i] * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val);
+    mat.d[block_i * nVar * nVar + iVar * (nVar + 1)] += PassiveAssign(val);
   }
 
   /*!
@@ -822,13 +819,11 @@ class CSysMatrix {
    */
   template <class OtherType>
   inline void SetVal2Diag(unsigned long block_i, OtherType val_matrix) {
-    unsigned long iVar, index = dia_ptr[block_i] * nVar * nVar;
-
     /*--- Clear entire block before setting its diagonal. ---*/
     SU2_OMP_SIMD
-    for (iVar = 0; iVar < nVar * nVar; iVar++) matrix[index + iVar] = 0.0;
+    for (auto iVar = 0ul; iVar < nVar * nVar; iVar++) mat.d[block_i * nVar * nVar + iVar] = 0.0;
 
-    for (iVar = 0; iVar < nVar; iVar++) matrix[index + iVar * (nVar + 1)] = PassiveAssign(val_matrix);
+    AddVal2Diag(block_i, val_matrix);
   }
 
   /*!
diff --git a/Common/include/linear_algebra/CSysMatrix.inl b/Common/include/linear_algebra/CSysMatrix.inl
index 163e6fb08da..d40d68b3a4f 100644
--- a/Common/include/linear_algebra/CSysMatrix.inl
+++ b/Common/include/linear_algebra/CSysMatrix.inl
@@ -34,21 +34,16 @@
 
 template <class ScalarType>
 FORCEINLINE ScalarType* CSysMatrix<ScalarType>::GetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j) {
-  /*--- The position of the diagonal block is known which allows halving the search space. ---*/
-  const auto end = (block_j < block_i) ? dia_ptr_ilu[block_i] : row_ptr_ilu[block_i + 1];
-  for (auto index = (block_j < block_i) ? row_ptr_ilu[block_i] : dia_ptr_ilu[block_i]; index < end; ++index)
-    if (col_ind_ilu[index] == block_j) return &ILU_matrix[index * nVar * nVar];
+  if (block_i == block_j) return &ilu.d[block_i * nVar * nVar];
+  const auto* __restrict row_ptr = block_j < block_i ? ilu.row_ptr_l : ilu.row_ptr_u;
+  const auto* __restrict col_ind = block_j < block_i ? ilu.col_ind_l : ilu.col_ind_u;
+  auto* __restrict vals = block_j < block_i ? ilu.l : ilu.u;
+  for (auto k = row_ptr[block_i]; k < row_ptr[block_i + 1]; ++k) {
+    if (col_ind[k] == block_j) return vals + k * nVar * nVar;
+  }
   return nullptr;
 }
 
-template <class ScalarType>
-FORCEINLINE void CSysMatrix<ScalarType>::SetBlock_ILUMatrix(unsigned long block_i, unsigned long block_j,
-                                                            ScalarType* val_block) {
-  auto ilu_ij = GetBlock_ILUMatrix(block_i, block_j);
-  if (!ilu_ij) return;
-  MatrixCopy(val_block, ilu_ij);
-}
-
 namespace {
 
 template <class T, bool alpha, bool beta, bool transp>
@@ -145,7 +140,7 @@ template <class ScalarType>
 FORCEINLINE void CSysMatrix<ScalarType>::Gauss_Elimination(unsigned long block_i, ScalarType* rhs) const {
   /*--- Copy block, as the algorithm modifies the matrix ---*/
   ScalarType block[MAXNVAR * MAXNVAR];
-  MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block);
+  MatrixCopy(&mat.d[block_i * nVar * nVar], block);
 
   Gauss_Elimination(block, rhs);
 }
@@ -154,7 +149,7 @@ template <class ScalarType>
 FORCEINLINE void CSysMatrix<ScalarType>::InverseDiagonalBlock(unsigned long block_i, ScalarType* invBlock) const {
   /*--- Copy block, as the algorithm modifies the matrix ---*/
   ScalarType block[MAXNVAR * MAXNVAR];
-  MatrixCopy(&matrix[dia_ptr[block_i] * nVar * nVar], block);
+  MatrixCopy(&mat.d[block_i * nVar * nVar], block);
 
   MatrixInverse(block, invBlock);
 }
@@ -162,7 +157,7 @@ FORCEINLINE void CSysMatrix<ScalarType>::InverseDiagonalBlock(unsigned long bloc
 template <class ScalarType>
 FORCEINLINE const ScalarType* CSysMatrix<ScalarType>::InvertDiagonalBlockILUMatrix(unsigned long block_i) {
   /*--- Copy block, as the algorithm modifies the matrix ---*/
-  auto* Uii = &ILU_matrix[dia_ptr_ilu[block_i] * nVar * nVar];
+  auto* Uii = &ilu.d[block_i * nVar * nVar];
   ScalarType block[MAXNVAR * MAXNVAR];
   MatrixCopy(Uii, block);
   MatrixInverse(block, Uii);
@@ -174,10 +169,13 @@ FORCEINLINE void CSysMatrix<ScalarType>::RowProduct(const CSysVector<ScalarType>
                                                     ScalarType* prod) const {
   for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;
 
-  for (auto index = row_ptr[row_i]; index < row_ptr[row_i + 1]; index++) {
-    auto col_j = col_ind[index];
-    MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
-  }
+  for (auto index = mat.row_ptr_l[row_i]; index < mat.row_ptr_l[row_i + 1]; index++)
+    MatrixVectorProductAdd(&mat.l[index * nVar * nEqn], &vec[mat.col_ind_l[index] * nEqn], prod);
+
+  MatrixVectorProductAdd(&mat.d[row_i * nVar * nEqn], &vec[row_i * nEqn], prod);
+
+  for (auto index = mat.row_ptr_u[row_i]; index < mat.row_ptr_u[row_i + 1]; index++)
+    MatrixVectorProductAdd(&mat.u[index * nVar * nEqn], &vec[mat.col_ind_u[index] * nEqn], prod);
 }
 
 template <class ScalarType>
@@ -185,11 +183,11 @@ FORCEINLINE void CSysMatrix<ScalarType>::UpperProduct(const CSysVector<ScalarTyp
                                                       unsigned long col_ub, ScalarType* prod) const {
   for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;
 
-  for (auto index = dia_ptr[row_i] + 1; index < row_ptr[row_i + 1]; index++) {
-    auto col_j = col_ind[index];
+  for (auto index = mat.row_ptr_u[row_i]; index < mat.row_ptr_u[row_i + 1]; index++) {
+    auto col_j = mat.col_ind_u[index];
     /*--- Always include halos. ---*/
     if (col_j < col_ub || col_j >= nPointDomain)
-      MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+      MatrixVectorProductAdd(&mat.u[index * nVar * nEqn], &vec[col_j * nEqn], prod);
   }
 }
 
@@ -198,14 +196,14 @@ FORCEINLINE void CSysMatrix<ScalarType>::LowerProduct(const CSysVector<ScalarTyp
                                                       unsigned long col_lb, ScalarType* prod) const {
   for (auto iVar = 0ul; iVar < nVar; iVar++) prod[iVar] = 0.0;
 
-  for (auto index = row_ptr[row_i]; index < dia_ptr[row_i]; index++) {
-    auto col_j = col_ind[index];
-    if (col_j >= col_lb) MatrixVectorProductAdd(&matrix[index * nVar * nEqn], &vec[col_j * nEqn], prod);
+  for (auto index = mat.row_ptr_l[row_i]; index < mat.row_ptr_l[row_i + 1]; index++) {
+    auto col_j = mat.col_ind_l[index];
+    if (col_j >= col_lb) MatrixVectorProductAdd(&mat.l[index * nVar * nEqn], &vec[col_j * nEqn], prod);
   }
 }
 
 template <class ScalarType>
 FORCEINLINE void CSysMatrix<ScalarType>::DiagonalProduct(const CSysVector<ScalarType>& vec, unsigned long row_i,
                                                          ScalarType* prod) const {
-  MatrixVectorProduct(&matrix[dia_ptr[row_i] * nVar * nEqn], &vec[row_i * nEqn], prod);
+  MatrixVectorProduct(&mat.d[row_i * nVar * nEqn], &vec[row_i * nEqn], prod);
 }
diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp
index 344e08ccf7c..170b973ea1f 100644
--- a/Common/include/toolboxes/graph_toolbox.hpp
+++ b/Common/include/toolboxes/graph_toolbox.hpp
@@ -417,30 +417,86 @@ CCompressedSparsePattern<Index_t> buildCSRPattern(Geometry_t& geometry, Connecti
 }
 
 /*!
- * \brief Build a lookup table of the absolute positions of the non zero entries
- *        of a compressed sparse pattern, accessed when visiting the FVM edges
- *        of a grid. The table can then be used for fast access (avoids searches)
- *        to the non zero entries of a sparse matrix associated with the pattern.
- * \param[in] geometry - Definition of the grid.
- * \param[in] pattern - Sparse pattern.
- * \return nEdge by 2 matrix.
+ * \brief Extract the strictly-lower part of a symmetric compressed sparse pattern.
+ *        For each row i, the lower entries are those at positions [outerPtr[i], diagPtr[i]).
+ * \param[in] csr - Full symmetric pattern with diagonal pointer already built.
+ * \return Strictly-lower CSR pattern.
  */
-template <class Geometry_t, typename Index_t>
-CEdgeToNonZeroMap<Index_t> mapEdgesToSparsePattern(Geometry_t& geometry,
-                                                   const CCompressedSparsePattern<Index_t>& pattern) {
-  assert(!pattern.empty());
+template <typename Index_t>
+CCompressedSparsePattern<Index_t> buildLowerPattern(const CCompressedSparsePattern<Index_t>& csr) {
+  assert(!csr.empty());
+  const auto nPoint = csr.getOuterSize();
+  const auto* outerPtr = csr.outerPtr();
+  const auto* innerIdx = csr.innerIdx();
+  const auto* diagPtr = csr.diagPtr();
+
+  su2vector<Index_t> outerPtrL(nPoint + 1);
+  outerPtrL(0) = 0;
+  for (auto i = 0ul; i < nPoint; ++i) outerPtrL(i + 1) = outerPtrL(i) + static_cast<Index_t>(diagPtr[i] - outerPtr[i]);
+
+  su2vector<Index_t> innerIdxL(outerPtrL(nPoint));
+  Index_t k = 0;
+  for (auto i = 0ul; i < nPoint; ++i)
+    for (auto p = outerPtr[i]; p < diagPtr[i]; ++p) innerIdxL(k++) = innerIdx[p];
 
-  CEdgeToNonZeroMap<Index_t> edgeMap(geometry.GetnEdge(), 2);
+  return CCompressedSparsePattern<Index_t>(std::move(outerPtrL), std::move(innerIdxL));
+}
+
+/*!
+ * \brief Extract the strictly-upper part of a symmetric compressed sparse pattern.
+ *        For each row i, the upper entries are those at positions (diagPtr[i], outerPtr[i+1]).
+ * \param[in] csr - Full symmetric pattern with diagonal pointer already built.
+ * \return Strictly-upper CSR pattern.
+ */
+template <typename Index_t>
+CCompressedSparsePattern<Index_t> buildUpperPattern(const CCompressedSparsePattern<Index_t>& csr) {
+  assert(!csr.empty());
+  const auto nPoint = csr.getOuterSize();
+  const auto* outerPtr = csr.outerPtr();
+  const auto* innerIdx = csr.innerIdx();
+  const auto* diagPtr = csr.diagPtr();
+
+  su2vector<Index_t> outerPtrU(nPoint + 1);
+  outerPtrU(0) = 0;
+  for (auto i = 0ul; i < nPoint; ++i)
+    outerPtrU(i + 1) = outerPtrU(i) + static_cast<Index_t>(outerPtr[i + 1] - diagPtr[i] - 1);
+
+  su2vector<Index_t> innerIdxU(outerPtrU(nPoint));
+  Index_t k = 0;
+  for (auto i = 0ul; i < nPoint; ++i)
+    for (auto p = diagPtr[i] + 1; p < outerPtr[i + 1]; ++p) innerIdxU(k++) = innerIdx[p];
 
-  for (Index_t iEdge = 0; iEdge < geometry.GetnEdge(); ++iEdge) {
-    Index_t iPoint = geometry.edges->GetNode(iEdge, 0);
-    Index_t jPoint = geometry.edges->GetNode(iEdge, 1);
+  return CCompressedSparsePattern<Index_t>(std::move(outerPtrU), std::move(innerIdxU));
+}
 
-    edgeMap(iEdge, 0) = pattern.quickFindInnerIdx(iPoint, jPoint);
-    edgeMap(iEdge, 1) = pattern.quickFindInnerIdx(jPoint, iPoint);
+/*!
+ * \brief Build bijective maps between strictly-lower (L) and strictly-upper (U) non-zero entries
+ *        that are each other's transposes. Requires a symmetric pattern.
+ *        l_to_u[k_l] = k_u such that U-entry k_u is the transpose of L-entry k_l, and vice-versa.
+ * \param[in] pattern_l - Strictly-lower CSR pattern.
+ * \param[in] pattern_u - Strictly-upper CSR pattern.
+ * \param[out] l_to_u - For each L-entry index, the U-entry index of its transpose.
+ * \param[out] u_to_l - For each U-entry index, the L-entry index of its transpose.
+ */
+template <typename Index_t>
+void buildLUTransposeMaps(const CCompressedSparsePattern<Index_t>& pattern_l,
+                          const CCompressedSparsePattern<Index_t>& pattern_u, su2vector<Index_t>& l_to_u,
+                          su2vector<Index_t>& u_to_l) {
+  const auto nnz_l = pattern_l.getNumNonZeros();
+  const auto nnz_u = pattern_u.getNumNonZeros();
+  assert(nnz_l == nnz_u && "L and U must have the same NNZ (symmetric pattern).");
+
+  l_to_u.resize(nnz_l);
+  u_to_l.resize(nnz_u);
+
+  for (Index_t i = 0; i < pattern_l.getOuterSize(); ++i) {
+    for (Index_t k_l = pattern_l.outerPtr()[i]; k_l < pattern_l.outerPtr()[i + 1]; ++k_l) {
+      const Index_t j = pattern_l.innerIdx()[k_l];            // j < i (strictly lower)
+      const Index_t k_u = pattern_u.quickFindInnerIdx(j, i);  // (j,i) is in U since j<i
+      l_to_u(k_l) = k_u;
+      u_to_l(k_u) = k_l;
+    }
   }
-
-  return edgeMap;
 }
 
 /*!
diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp
index 258e5612be8..f6f423761e6 100644
--- a/Common/src/geometry/CGeometry.cpp
+++ b/Common/src/geometry/CGeometry.cpp
@@ -4123,39 +4123,39 @@ void CGeometry::SetGridVelocity(const CConfig* config) {
   }
 }
 
-const CCompressedSparsePatternUL& CGeometry::GetSparsePattern(ConnectivityType type, unsigned long fillLvl) {
+const CGeometry::LDUSparsePattern& CGeometry::GetSparsePattern(ConnectivityType type, unsigned long fillLvl) {
   bool fvm = (type == ConnectivityType::FiniteVolume);
-
-  CCompressedSparsePatternUL* pattern = nullptr;
-
-  if (fillLvl == 0)
-    pattern = fvm ? &finiteVolumeCSRFill0 : &finiteElementCSRFill0;
-  else
-    pattern = fvm ? &finiteVolumeCSRFillN : &finiteElementCSRFillN;
-
-  if (pattern->empty()) {
-    *pattern = buildCSRPattern(*this, type, fillLvl);
-    pattern->buildDiagPtr();
+  auto& grp = fillLvl == 0 ? (fvm ? finiteVolumePatternFill0 : finiteElementPatternFill0)
+                           : (fvm ? finiteVolumePatternFillN : finiteElementPatternFillN);
+  if (grp.empty()) {
+    grp.csr = buildCSRPattern(*this, type, fillLvl);
+    grp.csr.buildDiagPtr();
+    grp.l = buildLowerPattern(grp.csr);
+    grp.u = buildUpperPattern(grp.csr);
   }
-
-  return *pattern;
+  return grp;
 }
 
-const CEdgeToNonZeroMapUL& CGeometry::GetEdgeToSparsePatternMap() {
-  if (edgeToCSRMap.empty()) {
-    if (finiteVolumeCSRFill0.empty()) {
-      finiteVolumeCSRFill0 = buildCSRPattern(*this, ConnectivityType::FiniteVolume, 0ul);
-    }
-    edgeToCSRMap = mapEdgesToSparsePattern(*this, finiteVolumeCSRFill0);
+const su2vector<unsigned long>& CGeometry::GetLToUTransposeSparsePatternMap(ConnectivityType type) {
+  bool fvm = (type == ConnectivityType::FiniteVolume);
+  auto& l_to_u = fvm ? finiteVolumeLToUTranspMap : finiteElementLToUTranspMap;
+  if (l_to_u.empty()) {
+    auto& u_to_l = fvm ? finiteVolumeUToLTranspMap : finiteElementUToLTranspMap;
+    const auto& pat = GetSparsePattern(type);
+    buildLUTransposeMaps(pat.l, pat.u, l_to_u, u_to_l);
   }
-  return edgeToCSRMap;
+  return l_to_u;
 }
 
-const su2vector<unsigned long>& CGeometry::GetTransposeSparsePatternMap(ConnectivityType type) {
-  /*--- Yes the const cast is weird but it is still better than repeating code. ---*/
-  auto& pattern = const_cast<CCompressedSparsePatternUL&>(GetSparsePattern(type));
-  pattern.buildTransposePtr();
-  return pattern.transposePtr();
+const su2vector<unsigned long>& CGeometry::GetUToLTransposeSparsePatternMap(ConnectivityType type) {
+  bool fvm = (type == ConnectivityType::FiniteVolume);
+  auto& u_to_l = fvm ? finiteVolumeUToLTranspMap : finiteElementUToLTranspMap;
+  if (u_to_l.empty()) {
+    auto& l_to_u = fvm ? finiteVolumeLToUTranspMap : finiteElementLToUTranspMap;
+    const auto& pat = GetSparsePattern(type);
+    buildLUTransposeMaps(pat.l, pat.u, l_to_u, u_to_l);
+  }
+  return u_to_l;
 }
 
 const CCompressedSparsePatternUL& CGeometry::GetEdgeColoring(su2double* efficiency, bool maximizeEdgeColorGroupSize) {
diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp
index 347a633e2a7..684da742b13 100644
--- a/Common/src/geometry/CMultiGridGeometry.cpp
+++ b/Common/src/geometry/CMultiGridGeometry.cpp
@@ -896,6 +896,9 @@ void CMultiGridGeometry::SetPoint_Connectivity(const CGeometry* fine_grid) {
       }
     }
 
+    /*--- See CPhysicalGeometry::SetPoint_Connectivity for why we sort. ---*/
+    sort(points[iCoarsePoint].begin(), points[iCoarsePoint].end());
+
     /*--- Set the number of neighbors variable, this is
      important for JST and multigrid in parallel ---*/
     nodes->SetnNeighbor(iCoarsePoint, points[iCoarsePoint].size());
diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp
index 79421eded3e..a965b248b67 100644
--- a/Common/src/geometry/CPhysicalGeometry.cpp
+++ b/Common/src/geometry/CPhysicalGeometry.cpp
@@ -4463,6 +4463,11 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
         }
       }
 
+      /*--- Sort the neighbors in ascending order so that the edge numbering done in
+       *    SetEdges matches the upper-CSR ordering of the sparse pattern. This makes
+       *    the edge->upper-block map the identity for the CSysMatrix LDU storage. ---*/
+      sort(points[iPoint].begin(), points[iPoint].end());
+
       /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel. ---*/
       nodes->SetnNeighbor(iPoint, points[iPoint].size());
     }
diff --git a/Common/src/linear_algebra/CPastixWrapper.cpp b/Common/src/linear_algebra/CPastixWrapper.cpp
index f4db581a879..f213939eaf6 100644
--- a/Common/src/linear_algebra/CPastixWrapper.cpp
+++ b/Common/src/linear_algebra/CPastixWrapper.cpp
@@ -41,7 +41,7 @@ void CPastixWrapper<ScalarType>::Initialize(CGeometry* geometry, const CConfig*
   if (isinitialized) return;  // only need to do this once
 
   const unsigned long nVar = matrix.nVar, nPoint = matrix.nPoint, nPointDomain = matrix.nPointDomain;
-  const unsigned long *row_ptr = matrix.rowptr, *col_ind = matrix.colidx;
+  const unsigned long *row_ptr = csr_row_ptr.data(), *col_ind = csr_col_ind.data();
   const unsigned long nNonZero = row_ptr[nPointDomain];
 
   /*--- Allocate ---*/
@@ -209,6 +209,22 @@ void CPastixWrapper<ScalarType>::Initialize(CGeometry* geometry, const CConfig*
   isinitialized = true;
 }
 
+template <class ScalarType>
+void CPastixWrapper<ScalarType>::AssembleValues() {
+  const auto nDomain = matrix.nPointDomain;
+  const auto blkSz = matrix.blkSz;
+  const auto *d = matrix.d, *l = matrix.l, *u = matrix.u;
+  for (auto iPoint = 0ul; iPoint < nDomain; ++iPoint) {
+    auto* dst = values.data() + csr_row_ptr[iPoint] * blkSz;
+    for (auto k = matrix.row_ptr_l[iPoint]; k < matrix.row_ptr_l[iPoint + 1]; ++k, dst += blkSz)
+      for (auto b = 0ul; b < blkSz; ++b) dst[b] = SU2_TYPE::GetValue(l[k * blkSz + b]);
+    for (auto b = 0ul; b < blkSz; ++b) dst[b] = SU2_TYPE::GetValue(d[iPoint * blkSz + b]);
+    dst += blkSz;
+    for (auto k = matrix.row_ptr_u[iPoint]; k < matrix.row_ptr_u[iPoint + 1]; ++k, dst += blkSz)
+      for (auto b = 0ul; b < blkSz; ++b) dst[b] = SU2_TYPE::GetValue(u[k * blkSz + b]);
+  }
+}
+
 template <class ScalarType>
 void CPastixWrapper<ScalarType>::Factorize(CGeometry* geometry, const CConfig* config, unsigned short kind_fact) {
   /*--- Detect a possible change of settings between direct and adjoint that requires a reset ---*/
@@ -246,28 +262,30 @@ void CPastixWrapper<ScalarType>::Factorize(CGeometry* geometry, const CConfig* c
 
   if (isfactorized && !factorize) return;  // No
 
-  /*--- Yes ---*/
+  /*--- Yes: assemble LDU blocks into the flat CSR buffer ---*/
+  AssembleValues();
 
   if (mpi_rank == MASTER_NODE && verb > 0) {
     cout << "\n+-------------------------------------------------+";
     cout << "\n+     PaStiX : Parallel Sparse matriX package     +" << endl;
   }
 
-  const unsigned long szBlk = matrix.nVar * matrix.nVar, nNonZero = values.size();
+  const auto blkSz = matrix.blkSz;
 
-  /*--- Copy matrix values and swap blocks as required ---*/
-
-  for (auto i = 0ul; i < nNonZero; ++i) values[i] = SU2_TYPE::GetValue(matrix.values[i]);
+  /*--- Permute blocks for rows with halo columns into global sorted order.
+        AssembleValues wrote them in LDU order; copy to tmp then write back sorted. ---*/
 
+  vector<su2mixedfloat> tmp;
   for (auto i = 0ul; i < sort_rows.size(); ++i) {
     const auto iRow = sort_rows[i];
-    const auto begin = matrix.rowptr[iRow];
-
-    for (auto j = 0ul; j < sort_order[i].size(); ++j) {
-      const auto target = (begin + j) * szBlk;
-      const auto source = sort_order[i][j] * szBlk;
-
-      for (auto k = 0ul; k < szBlk; ++k) values[target + k] = SU2_TYPE::GetValue(matrix.values[source + k]);
+    /*--- colptr is 1-based Fortran numbering: row start = colptr[iRow] - 1. ---*/
+    const auto begin = static_cast<unsigned long>(colptr[iRow] - 1);
+    const auto nnz_row = sort_order[i].size();
+
+    tmp.assign(values.begin() + begin * blkSz, values.begin() + (begin + nnz_row) * blkSz);
+    for (auto j = 0ul; j < nnz_row; ++j) {
+      const auto src_pos = sort_order[i][j] - begin;
+      for (auto k = 0ul; k < blkSz; ++k) values[(begin + j) * blkSz + k] = tmp[src_pos * blkSz + k];
     }
   }
 
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index e90f9e1c704..f02d908a059 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -31,6 +31,7 @@
 #include "../../include/toolboxes/allocation_toolbox.hpp"
 
 #include <cmath>
+#include <vector>
 
 namespace {
 /*--- Helper function to regularize small pivots ---*/
@@ -51,21 +52,36 @@ CSysMatrix<ScalarType>::CSysMatrix() : rank(SU2_MPI::GetRank()), size(SU2_MPI::G
   SU2_ZONE_SCOPED
 
   nPoint = nPointDomain = nVar = nEqn = 0;
-  nnz = nnz_ilu = 0;
+  mat.nnz_l = mat.nnz_u = 0;
+  gpu.nnz_l = gpu.nnz_u = 0;
+  ilu.nnz_l = ilu.nnz_u = 0;
   ilu_fill_in = 0;
 
   omp_partitions = nullptr;
 
-  matrix = nullptr;
-  row_ptr = nullptr;
-  dia_ptr = nullptr;
-  col_ind = nullptr;
-  col_ptr = nullptr;
-
-  ILU_matrix = nullptr;
-  row_ptr_ilu = nullptr;
-  dia_ptr_ilu = nullptr;
-  col_ind_ilu = nullptr;
+  mat.row_ptr_l = nullptr;
+  mat.col_ind_l = nullptr;
+  mat.row_ptr_u = nullptr;
+  mat.col_ind_u = nullptr;
+  l_to_u_transp = nullptr;
+  u_to_l_transp = nullptr;
+  edge_ptr_l = nullptr;
+
+  mat.d = nullptr;
+  mat.l = nullptr;
+  mat.u = nullptr;
+
+  gpu.d = nullptr;
+  gpu.l = nullptr;
+  gpu.u = nullptr;
+  gpu.row_ptr_l = nullptr;
+  gpu.col_ind_l = nullptr;
+  gpu.row_ptr_u = nullptr;
+  gpu.col_ind_u = nullptr;
+
+  ilu.l = nullptr;
+  ilu.d = nullptr;
+  ilu.u = nullptr;
 
   invM = nullptr;
 
@@ -82,14 +98,22 @@ CSysMatrix<ScalarType>::~CSysMatrix() {
   SU2_ZONE_SCOPED
 
   delete[] omp_partitions;
-  MemoryAllocation::aligned_free(ILU_matrix);
-  MemoryAllocation::aligned_free(matrix);
+  MemoryAllocation::aligned_free(ilu.l);
+  MemoryAllocation::aligned_free(ilu.d);
+  MemoryAllocation::aligned_free(ilu.u);
+  MemoryAllocation::aligned_free(mat.d);
+  MemoryAllocation::aligned_free(mat.l);
+  MemoryAllocation::aligned_free(mat.u);
   MemoryAllocation::aligned_free(invM);
 
   if (useCuda) {
-    GPUMemoryAllocation::gpu_free(d_matrix);
-    GPUMemoryAllocation::gpu_free(d_row_ptr);
-    GPUMemoryAllocation::gpu_free(d_col_ind);
+    GPUMemoryAllocation::gpu_free(gpu.d);
+    GPUMemoryAllocation::gpu_free(gpu.l);
+    GPUMemoryAllocation::gpu_free(gpu.u);
+    GPUMemoryAllocation::gpu_free(gpu.row_ptr_l);
+    GPUMemoryAllocation::gpu_free(gpu.col_ind_l);
+    GPUMemoryAllocation::gpu_free(gpu.row_ptr_u);
+    GPUMemoryAllocation::gpu_free(gpu.col_ind_u);
   }
 
 #ifdef USE_MKL
@@ -109,7 +133,7 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
 
   if (npoint == 0) return;
 
-  if (matrix != nullptr) {
+  if (mat.d != nullptr) {
     SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION);
   }
 
@@ -146,45 +170,49 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   nPoint = npoint;
   nPointDomain = npointdomain;
 
-  /*--- Get sparse structure pointers from geometry,
-   *    the data is managed by CGeometry to allow re-use. ---*/
-
-  const auto& csr = geometry->GetSparsePattern(type, 0);
-
-  nnz = csr.getNumNonZeros();
-  row_ptr = csr.outerPtr();
-  col_ind = csr.innerIdx();
-  dia_ptr = csr.diagPtr();
-
-  /*--- Allocate data. ---*/
+  /*--- Allocate host data. ---*/
   auto allocAndInit = [](ScalarType*& ptr, unsigned long num) {
     ptr = MemoryAllocation::aligned_alloc<ScalarType, true>(64, num * sizeof(ScalarType));
   };
 
-  allocAndInit(matrix, nnz * nVar * nEqn);
-
   useCuda = config->GetCUDA();
 
+  /*--- L/D/U index structures and value arrays. ---*/
+  {
+    const auto& pat = geometry->GetSparsePattern(type, 0);
+    mat.row_ptr_l = pat.l.outerPtr();
+    mat.col_ind_l = pat.l.innerIdx();
+    mat.nnz_l = pat.l.getNumNonZeros();
+    mat.row_ptr_u = pat.u.outerPtr();
+    mat.col_ind_u = pat.u.innerIdx();
+    mat.nnz_u = pat.u.getNumNonZeros();
+  }
+  allocAndInit(mat.d, nPoint * nVar * nEqn);
+  allocAndInit(mat.l, mat.nnz_l * nVar * nEqn);
+  allocAndInit(mat.u, mat.nnz_u * nVar * nEqn);
+
   if (useCuda) {
-    /*--- Allocate GPU data. ---*/
     auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) {
       ptr = GPUMemoryAllocation::gpu_alloc<ScalarType, true>(num * sizeof(ScalarType));
     };
-
-    auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) {
-      ptr = GPUMemoryAllocation::gpu_alloc_cpy<const unsigned long>(src_ptr, num * sizeof(const unsigned long));
+    auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long* src_ptr, unsigned long num) {
+      ptr = GPUMemoryAllocation::gpu_alloc_cpy<unsigned long>(src_ptr, num * sizeof(unsigned long));
     };
-
-    GPUAllocAndInit(d_matrix, nnz * nVar * nEqn);
-    GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1));
-    GPUAllocAndCopy(d_col_ind, col_ind, nnz);
+    GPUAllocAndInit(gpu.d, nPoint * nVar * nEqn);
+    GPUAllocAndInit(gpu.l, mat.nnz_l * nVar * nEqn);
+    GPUAllocAndInit(gpu.u, mat.nnz_u * nVar * nEqn);
+    GPUAllocAndCopy(gpu.row_ptr_l, mat.row_ptr_l, nPointDomain + 1);
+    GPUAllocAndCopy(gpu.col_ind_l, mat.col_ind_l, mat.nnz_l);
+    GPUAllocAndCopy(gpu.row_ptr_u, mat.row_ptr_u, nPointDomain + 1);
+    GPUAllocAndCopy(gpu.col_ind_u, mat.col_ind_u, mat.nnz_u);
   }
 
-  if (needTranspPtr) col_ptr = geometry->GetTransposeSparsePatternMap(type).data();
-
   if (type == ConnectivityType::FiniteVolume) {
-    edge_ptr.ptr = geometry->GetEdgeToSparsePatternMap().data();
-    edge_ptr.nEdge = geometry->GetnEdge();
+    edge_ptr_l = geometry->GetUToLTransposeSparsePatternMap(type).data();
+  }
+  if (needTranspPtr) {
+    l_to_u_transp = geometry->GetLToUTransposeSparsePatternMap(type).data();
+    u_to_l_transp = geometry->GetUToLTransposeSparsePatternMap(type).data();
   }
 
   /*--- Get ILU sparse pattern, if fill is 0 no new data is allocated. --*/
@@ -192,21 +220,26 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   if (ilu_needed) {
     ilu_fill_in = config->GetLinear_Solver_ILU_n();
 
-    const auto& csr_ilu = geometry->GetSparsePattern(type, ilu_fill_in);
-
-    row_ptr_ilu = csr_ilu.outerPtr();
-    col_ind_ilu = csr_ilu.innerIdx();
-    dia_ptr_ilu = csr_ilu.diagPtr();
-    nnz_ilu = csr_ilu.getNumNonZeros();
+    const auto& pat_ilu = geometry->GetSparsePattern(type, ilu_fill_in);
+    ilu.row_ptr_l = pat_ilu.l.outerPtr();
+    ilu.col_ind_l = pat_ilu.l.innerIdx();
+    ilu.nnz_l = pat_ilu.l.getNumNonZeros();
+    ilu.row_ptr_u = pat_ilu.u.outerPtr();
+    ilu.col_ind_u = pat_ilu.u.innerIdx();
+    ilu.nnz_u = pat_ilu.u.getNumNonZeros();
 
     if (omp_get_max_threads() > 1 && config->GetLinear_Solver_ILU_levels()) {
-      levels_ilu = computeLevels(csr_ilu);
+      levels_ilu = computeLevels(pat_ilu.l);
     }
   }
 
   /*--- Preconditioners. ---*/
 
-  if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu * nVar * nEqn);
+  if (ilu_needed) {
+    allocAndInit(ilu.l, ilu.nnz_l * nVar * nEqn);
+    allocAndInit(ilu.d, nPointDomain * nVar * nEqn);
+    allocAndInit(ilu.u, ilu.nnz_u * nVar * nEqn);
+  }
 
   if (diag_needed) allocAndInit(invM, nPointDomain * nVar * nEqn);
 
@@ -216,7 +249,7 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
 
   /*--- Set suitable chunk sizes for light static for loops, and heavy
    dynamic ones, such that threads are approximately evenly loaded. ---*/
-  omp_light_size = computeStaticChunkSize(nnz * nVar * nEqn, num_threads, OMP_MAX_SIZE_L);
+  omp_light_size = computeStaticChunkSize((mat.nnz_l + mat.nnz_u + nPoint) * nVar * nEqn, num_threads, OMP_MAX_SIZE_L);
   omp_heavy_size = computeStaticChunkSize(nPointDomain, num_threads, OMP_MAX_SIZE_H);
 
   omp_num_parts = config->GetLinear_Solver_Prec_Threads();
@@ -228,13 +261,16 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
 
   /*--- Work estimate based on non-zeros to produce balanced partitions. ---*/
 
-  const auto row_ptr_prec = ilu_needed ? row_ptr_ilu : row_ptr;
-  const auto nnz_prec = row_ptr_prec[nPointDomain];
-
+  /*--- Cumulative nnz up to row iPoint for the preconditioner's LDU pattern. ---*/
+  auto nnz_up_to = [&](unsigned long iPoint) -> unsigned long {
+    if (ilu_needed) return ilu.row_ptr_l[iPoint] + iPoint + ilu.row_ptr_u[iPoint];
+    return mat.row_ptr_l[iPoint] + iPoint + mat.row_ptr_u[iPoint];
+  };
+  const auto nnz_prec = nnz_up_to(nPointDomain);
   const auto nnz_per_part = roundUpDiv(nnz_prec, omp_num_parts);
 
   for (auto iPoint = 0ul, part = 0ul; iPoint < nPointDomain; ++iPoint) {
-    if (row_ptr_prec[iPoint] >= part * nnz_per_part) omp_partitions[part++] = iPoint;
+    if (nnz_up_to(iPoint) >= part * nnz_per_part) omp_partitions[part++] = iPoint;
   }
 
   for (unsigned long thread = 0; thread < omp_num_parts; ++thread) {
@@ -505,11 +541,18 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry* geometry, const CCon
 template <class ScalarType>
 void CSysMatrix<ScalarType>::SetValZero() {
   SU2_ZONE_SCOPED
-  const auto size = nnz * nVar * nEqn;
-  const auto chunk = roundUpDiv(size, omp_get_num_threads());
-  const auto begin = chunk * omp_get_thread_num();
-  const auto mySize = min(chunk, size - begin) * sizeof(ScalarType);
-  memset(&matrix[begin], 0, mySize);
+  const auto nThreads = static_cast<unsigned long>(omp_get_num_threads());
+  const auto iThread = static_cast<unsigned long>(omp_get_thread_num());
+  auto zeroChunk = [&](ScalarType* arr, unsigned long n) {
+    if (n == 0) return;
+    const auto chunk = roundUpDiv(n, nThreads);
+    const auto begin = min(chunk * iThread, n);
+    const auto mySize = min(chunk, n - begin) * sizeof(ScalarType);
+    if (mySize) memset(&arr[begin], 0, mySize);
+  };
+  zeroChunk(mat.d, nPoint * nVar * nEqn);
+  zeroChunk(mat.l, mat.nnz_l * nVar * nEqn);
+  zeroChunk(mat.u, mat.nnz_u * nVar * nEqn);
   SU2_OMP_BARRIER
 }
 
@@ -517,8 +560,7 @@ template <class ScalarType>
 void CSysMatrix<ScalarType>::SetValDiagonalZero() {
   SU2_ZONE_SCOPED
   SU2_OMP_FOR_STAT(omp_heavy_size)
-  for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint)
-    for (auto index = 0ul; index < nVar * nEqn; ++index) matrix[dia_ptr[iPoint] * nVar * nEqn + index] = 0.0;
+  for (auto iVar = 0ul; iVar < nPointDomain * nVar * nEqn; ++iVar) mat.d[iVar] = 0;
   END_SU2_OMP_FOR
 }
 
@@ -631,12 +673,13 @@ void CSysMatrix<ScalarType>::MatrixInverse(ScalarType* matrix, ScalarType* inver
 template <class ScalarType>
 void CSysMatrix<ScalarType>::DeleteValsRowi(unsigned long block_i, unsigned long row) {
   SU2_ZONE_SCOPED
-  for (auto index = row_ptr[block_i]; index < row_ptr[block_i + 1]; index++) {
-    for (auto iVar = 0u; iVar < nVar; iVar++)
-      matrix[index * nVar * nVar + row * nVar + iVar] = 0.0;  // Delete row values in the block
-    if (col_ind[index] == block_i)
-      matrix[index * nVar * nVar + row * nVar + row] = 1.0;  // Set 1 to the diagonal element
-  }
+  for (auto k = mat.row_ptr_l[block_i]; k < mat.row_ptr_l[block_i + 1]; ++k)
+    for (auto iVar = 0u; iVar < nVar; iVar++) mat.l[k * nVar * nEqn + row * nEqn + iVar] = 0.0;
+  auto* d = &mat.d[block_i * nVar * nEqn];
+  for (auto iVar = 0u; iVar < nVar; iVar++) d[row * nEqn + iVar] = 0.0;
+  d[row * nEqn + row] = 1.0;
+  for (auto k = mat.row_ptr_u[block_i]; k < mat.row_ptr_u[block_i + 1]; ++k)
+    for (auto iVar = 0u; iVar < nVar; iVar++) mat.u[k * nVar * nEqn + row * nEqn + iVar] = 0.0;
 }
 
 template <class ScalarType>
@@ -700,49 +743,53 @@ void CSysMatrix<ScalarType>::ComputeJacobiPreconditioner(const CSysVector<Scalar
 
 template <class ScalarType>
 void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
+  SU2_ZONE_SCOPED
   const auto blockSize = nVar * nVar;
   ScalarType Lij[MAXNVAR * MAXNVAR], Lij_Ujk[MAXNVAR * MAXNVAR];
 
   /*--- Helper to copy block matrix to compute factorization in-place. ---*/
   auto InitIluRow = [&](const auto iPoint) {
+    MatrixCopy(&mat.d[iPoint * blockSize], &ilu.d[iPoint * blockSize]);
+
     if (ilu_fill_in == 0) {
-      /*--- ILU0, direct copy to initialize. ---*/
-      const auto begin = row_ptr_ilu[iPoint] * blockSize;
-      const auto end = row_ptr_ilu[iPoint + 1] * blockSize;
-      SU2_OMP_SIMD
-      for (unsigned long k = begin; k < end; ++k) ILU_matrix[k] = matrix[k];
+      /*--- ILU0: Same sparse pattern, copy L and U blocks directly. ---*/
+      auto copy = [&](const unsigned long* row_ptr, const ScalarType* mat, ScalarType* ilu) {
+        const unsigned long begin = row_ptr[iPoint] * blockSize;
+        const unsigned long end = row_ptr[iPoint + 1] * blockSize;
+        SU2_OMP_SIMD
+        for (auto k = begin; k < end; ++k) ilu[k] = mat[k];
+      };
+      copy(ilu.row_ptr_l, mat.l, ilu.l);
+      copy(ilu.row_ptr_u, mat.u, ilu.u);
       return;
     }
-    /*--- ILUn, clear or copy the entries of the matrix. ---*/
-    auto indexMat = row_ptr[iPoint];
-    const auto endMat = row_ptr[iPoint + 1];
-    for (auto index = row_ptr_ilu[iPoint]; index < row_ptr_ilu[iPoint + 1];) {
-      const auto jPoint = col_ind_ilu[index];
-      const auto jPointMat = col_ind[indexMat];
-      if (jPoint < jPointMat || indexMat == endMat) {
-        /*--- ILU column has not caught up with matrix column or all matrix columns were used. ---*/
-        ZeroMatrix(&ILU_matrix[index * blockSize]);
-        ++index;
-      } else {
-        /*--- Columns match, copy the matrix block. ---*/
-        if (jPoint == jPointMat) {
-          MatrixCopy(&matrix[indexMat * blockSize], &ILU_matrix[index * blockSize]);
-          ++index;
+    /*--- ILUn: Merge-scan L and U via shared lambda. ---*/
+    auto scatterPart = [&](const unsigned long* mat_row_ptr, const unsigned long* mat_col_ind,
+                           const ScalarType* mat_vals, const unsigned long* ilu_row_ptr,
+                           const unsigned long* ilu_col_ind, ScalarType* ilu_vals) {
+      auto km = mat_row_ptr[iPoint], km_end = mat_row_ptr[iPoint + 1];
+      for (auto k = ilu_row_ptr[iPoint]; k < ilu_row_ptr[iPoint + 1]; ++k) {
+        const auto jPoint = ilu_col_ind[k];
+        while (km < km_end && mat_col_ind[km] < jPoint) ++km;
+        if (km < km_end && mat_col_ind[km] == jPoint) {
+          MatrixCopy(&mat_vals[km * blockSize], &ilu_vals[k * blockSize]);
+        } else {
+          ZeroMatrix(&ilu_vals[k * blockSize]);
         }
-        /*--- We've either copied the matrix column or it has not caught up with the ILU column. ---*/
-        ++indexMat;
       }
-    }
+    };
+    scatterPart(mat.row_ptr_l, mat.col_ind_l, mat.l, ilu.row_ptr_l, ilu.col_ind_l, ilu.l);
+    scatterPart(mat.row_ptr_u, mat.col_ind_u, mat.u, ilu.row_ptr_u, ilu.col_ind_u, ilu.u);
   };
 
   /*--- Update one row of the LU matrix. ---*/
   auto BuildIluRow = [&](const auto iPoint, const auto begin, const auto end) {
     /*--- For this row (unknown), loop over its lower diagonal entries. ---*/
 
-    for (auto index = row_ptr_ilu[iPoint]; index < dia_ptr_ilu[iPoint]; ++index) {
+    for (auto kl = ilu.row_ptr_l[iPoint]; kl < ilu.row_ptr_l[iPoint + 1]; ++kl) {
       /*--- jPoint is the column index (jPoint < iPoint). ---*/
 
-      const auto jPoint = col_ind_ilu[index];
+      const auto jPoint = ilu.col_ind_l[kl];
 
       /*--- We only care about the sub matrix within "begin" and "end-1". ---*/
 
@@ -750,16 +797,16 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
 
       /*--- Multiply the block by the inverse of the corresponding diagonal block. ---*/
 
-      auto* Block_ij = &ILU_matrix[index * blockSize];
-      const auto* invUjj = &ILU_matrix[dia_ptr_ilu[jPoint] * blockSize];
+      auto* Block_ij = &ilu.l[kl * blockSize];
+      const auto* invUjj = &ilu.d[jPoint * blockSize];
       MatrixMatrixProduct(Block_ij, invUjj, Lij);
 
       /*--- Lij holds Aij*inv(Ujj). Jump to the upper part of the jPoint row. ---*/
 
-      for (auto index_ = dia_ptr_ilu[jPoint] + 1; index_ < row_ptr_ilu[jPoint + 1]; ++index_) {
+      for (auto ku = ilu.row_ptr_u[jPoint]; ku < ilu.row_ptr_u[jPoint + 1]; ++ku) {
         /*--- Get the column index (kPoint > jPoint). ---*/
 
-        const auto kPoint = col_ind_ilu[index_];
+        const auto kPoint = ilu.col_ind_u[ku];
         if (kPoint >= end) break;
 
         /*--- If Aik exists, update it: Aik -= Lij * Ujk ---*/
@@ -767,7 +814,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
         auto* Block_ik = GetBlock_ILUMatrix(iPoint, kPoint);
         if (Block_ik == nullptr) continue;
 
-        const auto* Ujk = &ILU_matrix[index_ * blockSize];
+        const auto* Ujk = &ilu.u[ku * blockSize];
         MatrixMatrixProduct(Lij, Ujk, Lij_Ujk);
         MatrixSubtraction(Block_ik, Lij_Ujk, Block_ik);
       }
@@ -827,6 +874,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
 template <class ScalarType>
 void CSysMatrix<ScalarType>::ComputeILUPreconditioner(const CSysVector<ScalarType>& vec, CSysVector<ScalarType>& prod,
                                                       CGeometry* geometry, const CConfig* config) const {
+  SU2_ZONE_SCOPED
   /*--- Coherent view of vectors. ---*/
   SU2_OMP_BARRIER
 
@@ -839,11 +887,10 @@ void CSysMatrix<ScalarType>::ComputeILUPreconditioner(const CSysVector<ScalarTyp
   auto ForwardSolve = [&](const auto iPoint, const auto begin) {
     for (auto iVar = 0ul; iVar < nVar; ++iVar) prod(iPoint, iVar) = vec(iPoint, iVar);
 
-    for (auto index = row_ptr_ilu[iPoint]; index < dia_ptr_ilu[iPoint]; ++index) {
-      const auto jPoint = col_ind_ilu[index];
+    for (auto kl = ilu.row_ptr_l[iPoint]; kl < ilu.row_ptr_l[iPoint + 1]; ++kl) {
+      const auto jPoint = ilu.col_ind_l[kl];
       if (jPoint < begin) continue;
-      const auto* Block_ij = &ILU_matrix[index * blockSize];
-      MatrixVectorProductSub(Block_ij, &prod[jPoint * nVar], &prod[iPoint * nVar]);
+      MatrixVectorProductSub(&ilu.l[kl * blockSize], &prod[jPoint * nVar], &prod[iPoint * nVar]);
     }
   };
 
@@ -851,12 +898,12 @@ void CSysMatrix<ScalarType>::ComputeILUPreconditioner(const CSysVector<ScalarTyp
     ScalarType aux_vec[MAXNVAR];
     for (auto iVar = 0ul; iVar < nVar; ++iVar) aux_vec[iVar] = prod(iPoint, iVar);
 
-    const auto* invUii = &ILU_matrix[dia_ptr_ilu[iPoint] * blockSize];
+    const auto* invUii = &ilu.d[iPoint * blockSize];
 
-    for (auto index = dia_ptr_ilu[iPoint] + 1; index < row_ptr_ilu[iPoint + 1]; ++index) {
-      const auto jPoint = col_ind_ilu[index];
+    for (auto ku = ilu.row_ptr_u[iPoint]; ku < ilu.row_ptr_u[iPoint + 1]; ++ku) {
+      const auto jPoint = ilu.col_ind_u[ku];
       if (jPoint >= end) break;
-      const auto* Block_ij = &ILU_matrix[index * blockSize];
+      const auto* Block_ij = &ilu.u[ku * blockSize];
       MatrixVectorProductSub(Block_ij, &prod[jPoint * nVar], aux_vec);
     }
 
@@ -1045,14 +1092,14 @@ void CSysMatrix<ScalarType>::ComputeLineletPreconditioner(const CSysVector<Scala
     ScalarType aux_block[MAXNVAR * MAXNVAR], aux_vector[MAXNVAR];
 
     /*--- Copy diagonal block for first point in this linelet. ---*/
-    MatrixCopy(&matrix[dia_ptr[li.linelets[iLinelet][0]] * nVar * nVar], lineletInvDiag.data());
+    MatrixCopy(&mat.d[li.linelets[iLinelet][0] * nVar * nEqn], lineletInvDiag.data());
 
     for (auto iElem = 1ul; iElem < nElem; iElem++) {
       /*--- Setup pointers to required matrices and vectors ---*/
       const auto im1Point = li.linelets[iLinelet][iElem - 1];
       const auto iPoint = li.linelets[iLinelet][iElem];
 
-      const auto* d = &matrix[dia_ptr[iPoint] * nVar * nVar];
+      const auto* d = GetBlock(iPoint, iPoint);
       const auto* l = GetBlock(iPoint, im1Point);
       const auto* u = GetBlock(im1Point, iPoint);
 
@@ -1131,31 +1178,23 @@ void CSysMatrix<ScalarType>::EnforceSolutionAtNode(const unsigned long node_i, c
    *    symmetric the entire column may not be eliminated, the result (matrix and vector) is still correct.
    *    The vector is updated with the product of column i by the known (enforced) solution at node i. ---*/
 
-  for (auto index = row_ptr[node_i]; index < row_ptr[node_i + 1]; ++index) {
-    auto node_j = col_ind[index];
-
-    /*--- The diagonal block is handled outside the loop. ---*/
-    if (node_j == node_i) continue;
-
-    /*--- Delete block j on row i (bij) and ATTEMPT to delete block i on row j (bji). ---*/
-    auto bij = &matrix[index * nVar * nVar];
+  /*--- Visit off-diagonal columns (L then U; diagonal is handled by SetVal2Diag outside). ---*/
+  auto processOffDiag = [&](unsigned long node_j) {
+    auto bij = GetBlock(node_i, node_j);
     auto bji = GetBlock(node_j, node_i);
-
-    /*--- The "attempt" part. ---*/
     if (bji == nullptr) {
       node_j = node_i;
       bji = bij;
     }
-
     for (auto iVar = 0ul; iVar < nVar; ++iVar) {
       for (auto jVar = 0ul; jVar < nVar; ++jVar) {
-        /*--- Column product. ---*/
         b[node_j * nVar + iVar] -= bji[iVar * nVar + jVar] * x_i[jVar];
-        /*--- Delete blocks. ---*/
         bij[iVar * nVar + jVar] = bji[iVar * nVar + jVar] = 0.0;
       }
     }
-  }
+  };
+  for (auto k = mat.row_ptr_l[node_i]; k < mat.row_ptr_l[node_i + 1]; ++k) processOffDiag(mat.col_ind_l[k]);
+  for (auto k = mat.row_ptr_u[node_i]; k < mat.row_ptr_u[node_i + 1]; ++k) processOffDiag(mat.col_ind_u[k]);
 
   /*--- Set the diagonal block to the identity. ---*/
   SetVal2Diag(node_i, 1.0);
@@ -1169,54 +1208,33 @@ template <class OtherType>
 void CSysMatrix<ScalarType>::EnforceZeroProjection(unsigned long node_i, const OtherType* n, CSysVector<OtherType>& b) {
   SU2_ZONE_SCOPED
 
-  for (auto index = row_ptr[node_i]; index < row_ptr[node_i + 1]; ++index) {
-    const auto node_j = col_ind[index];
-
-    /*--- Remove product components of block j on row i (bij) and ATTEMPT
-     *    to remove solution components of block i on row j (bji).
-     *    This is identical to symmetry correction applied to gradients
-     *    but extended to the entire matrix. ---*/
-
-    auto bij = &matrix[index * nVar * nVar];
+  /*--- Visit all columns (L, diagonal, U) of row node_i. ---*/
+  auto processCol = [&](unsigned long node_j, bool isDiag) {
+    auto bij = GetBlock(node_i, node_j);
     auto bji = GetBlock(node_j, node_i);
-
-    /*--- Attempt to remove solution components. ---*/
     ScalarType nbn{};
     if (bji != nullptr) {
       for (auto iVar = 0ul; iVar < nVar; ++iVar) {
         ScalarType proj{};
-        for (auto jVar = 0ul; jVar < nVar; ++jVar) {
-          proj += bji[iVar * nVar + jVar] * PassiveAssign(n[jVar]);
-        }
-        for (auto jVar = 0ul; jVar < nVar; ++jVar) {
-          bji[iVar * nVar + jVar] -= proj * PassiveAssign(n[jVar]);
-        }
+        for (auto jVar = 0ul; jVar < nVar; ++jVar) proj += bji[iVar * nVar + jVar] * PassiveAssign(n[jVar]);
+        for (auto jVar = 0ul; jVar < nVar; ++jVar) bji[iVar * nVar + jVar] -= proj * PassiveAssign(n[jVar]);
         nbn += proj * PassiveAssign(n[iVar]);
       }
     }
-
-    /*--- Product components. ---*/
     for (auto jVar = 0ul; jVar < nVar; ++jVar) {
       ScalarType proj{};
-      for (auto iVar = 0ul; iVar < nVar; ++iVar) {
-        proj += bij[iVar * nVar + jVar] * PassiveAssign(n[iVar]);
-      }
-      for (auto iVar = 0ul; iVar < nVar; ++iVar) {
-        bij[iVar * nVar + jVar] -= proj * PassiveAssign(n[iVar]);
-      }
+      for (auto iVar = 0ul; iVar < nVar; ++iVar) proj += bij[iVar * nVar + jVar] * PassiveAssign(n[iVar]);
+      for (auto iVar = 0ul; iVar < nVar; ++iVar) bij[iVar * nVar + jVar] -= proj * PassiveAssign(n[iVar]);
     }
-
-    /*--- This part doesn't have the "*2" factor because the product components
-     *    were removed from the result of removing the solution components
-     *    instead of from the original block (bji == bij). ---*/
-    if (node_i == node_j) {
-      for (auto iVar = 0ul; iVar < nVar; ++iVar) {
-        for (auto jVar = 0ul; jVar < nVar; ++jVar) {
+    if (isDiag) {
+      for (auto iVar = 0ul; iVar < nVar; ++iVar)
+        for (auto jVar = 0ul; jVar < nVar; ++jVar)
           bij[iVar * nVar + jVar] += PassiveAssign(n[iVar]) * nbn * PassiveAssign(n[jVar]);
-        }
-      }
     }
-  }
+  };
+  for (auto k = mat.row_ptr_l[node_i]; k < mat.row_ptr_l[node_i + 1]; ++k) processCol(mat.col_ind_l[k], false);
+  processCol(node_i, true);
+  for (auto k = mat.row_ptr_u[node_i]; k < mat.row_ptr_u[node_i + 1]; ++k) processCol(mat.col_ind_u[k], false);
 
   OtherType proj{};
   for (auto iVar = 0ul; iVar < nVar; ++iVar) proj += b(node_i, iVar) * n[iVar];
@@ -1229,14 +1247,16 @@ void CSysMatrix<ScalarType>::SetDiagonalAsColumnSum() {
 
   SU2_OMP_FOR_DYN(omp_heavy_size)
   for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-    auto block_ii = &matrix[dia_ptr[iPoint] * nVar * nEqn];
+    auto* d_i = &mat.d[iPoint * nVar * nEqn];
+    for (auto k = 0ul; k < nVar * nEqn; ++k) d_i[k] = 0.0;
 
-    for (auto k = 0ul; k < nVar * nEqn; ++k) block_ii[k] = 0.0;
+    /*--- For each L entry (iPoint, j): subtract its U-transpose (j, iPoint). ---*/
+    for (auto k_l = mat.row_ptr_l[iPoint]; k_l < mat.row_ptr_l[iPoint + 1]; ++k_l)
+      MatrixSubtraction(d_i, &mat.u[l_to_u_transp[k_l] * nVar * nEqn], d_i);
 
-    for (auto k = row_ptr[iPoint]; k < row_ptr[iPoint + 1]; ++k) {
-      auto block_ji = &matrix[col_ptr[k] * nVar * nEqn];
-      if (block_ji != block_ii) MatrixSubtraction(block_ii, block_ji, block_ii);
-    }
+    /*--- For each U entry (iPoint, j): subtract its L-transpose (j, iPoint). ---*/
+    for (auto k_u = mat.row_ptr_u[iPoint]; k_u < mat.row_ptr_u[iPoint + 1]; ++k_u)
+      MatrixSubtraction(d_i, &mat.l[u_to_l_transp[k_u] * nVar * nEqn], d_i);
   }
   END_SU2_OMP_FOR
 }
@@ -1262,38 +1282,34 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
 
   /*--- Swap ij with ji and transpose them. ---*/
 
-  if (edge_ptr) {
-    /*--- The FV way. ---*/
+  if (edge_ptr_l) {
+    /*--- FV path: each edge maps to one U and one L block. ---*/
     SU2_OMP_FOR_DYN(omp_heavy_size * 2)
-    for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) {
-      auto bij = &matrix[edge_ptr(iEdge, 0) * nVar * nVar];
-      auto bji = &matrix[edge_ptr(iEdge, 1) * nVar * nVar];
-
-      swapAndTransp(nVar, bij, bji);
+    for (auto iEdge = 0ul; iEdge < mat.nnz_l; ++iEdge) {
+      auto* bij_u = &mat.u[iEdge * nVar * nVar];
+      auto* bji_l = &mat.l[edge_ptr_l[iEdge] * nVar * nVar];
+      swapAndTransp(nVar, bij_u, bji_l);
     }
     END_SU2_OMP_FOR
-  } else if (col_ptr) {
-    /*--- If the column pointer was built. ---*/
+  } else if (l_to_u_transp) {
+    /*--- FEM/general path: use the L→U transpose map (one L entry per pair). ---*/
     SU2_OMP_FOR_DYN(omp_heavy_size)
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-      for (auto k = row_ptr[iPoint]; k < dia_ptr[iPoint]; ++k) {
-        auto bij = &matrix[k * nVar * nVar];
-        auto bji = &matrix[col_ptr[k] * nVar * nVar];
-
-        swapAndTransp(nVar, bij, bji);
+      for (auto k_l = mat.row_ptr_l[iPoint]; k_l < mat.row_ptr_l[iPoint + 1]; ++k_l) {
+        const auto k_u = l_to_u_transp[k_l];
+        swapAndTransp(nVar, &mat.u[k_u * nVar * nVar], &mat.l[k_l * nVar * nVar]);
       }
     }
     END_SU2_OMP_FOR
   } else {
-    /*--- Slow fallback, needs to search for ji. ---*/
+    /*--- Slow fallback: search for each U entry's L partner via GetBlock. ---*/
     SU2_OMP_FOR_DYN(omp_heavy_size)
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-      for (auto k = dia_ptr[iPoint] + 1ul; k < row_ptr[iPoint + 1]; ++k) {
-        const auto jPoint = col_ind[k];
-        auto bij = &matrix[k * nVar * nVar];
-        auto bji = GetBlock(jPoint, iPoint);
+      for (auto k_u = mat.row_ptr_u[iPoint]; k_u < mat.row_ptr_u[iPoint + 1]; ++k_u) {
+        const auto jPoint = mat.col_ind_u[k_u];
+        auto* bij = &mat.u[k_u * nVar * nVar];
+        auto* bji = GetBlock(jPoint, iPoint);
         assert(bji && "Pattern is not symmetric.");
-
         swapAndTransp(nVar, bij, bji);
       }
     }
@@ -1304,7 +1320,7 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
 
   SU2_OMP_FOR_STAT(omp_heavy_size)
   for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-    auto bii = &matrix[dia_ptr[iPoint] * nVar * nVar];
+    auto bii = &mat.d[iPoint * nVar * nVar];
     for (auto i = 0ul; i < nVar; ++i)
       for (auto j = 0ul; j < i; ++j) std::swap(bii[i * nVar + j], bii[j * nVar + i]);
   }
@@ -1320,16 +1336,20 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
 template <class ScalarType>
 void CSysMatrix<ScalarType>::MatrixMatrixAddition(ScalarType alpha, const CSysMatrix<ScalarType>& B) {
   SU2_ZONE_SCOPED
-  /*--- Check that the sparse structure is shared between the two matrices,
-   *    comparing pointers is ok as they are obtained from CGeometry. ---*/
-  bool ok = (row_ptr == B.row_ptr) && (col_ind == B.col_ind) && (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz);
-
-  if (!ok) {
-    SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION);
-  }
+  /*--- Check that the LDU structure is shared (pointer equality since both come from CGeometry). ---*/
+  const bool ok = (mat.row_ptr_l == B.mat.row_ptr_l) && (mat.col_ind_l == B.mat.col_ind_l) &&
+                  (mat.row_ptr_u == B.mat.row_ptr_u) && (mat.col_ind_u == B.mat.col_ind_u) && (nVar == B.nVar) &&
+                  (nEqn == B.nEqn) && (nPoint == B.nPoint) && (mat.nnz_l == B.mat.nnz_l) && (mat.nnz_u == B.mat.nnz_u);
+  if (!ok) SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION);
 
   SU2_OMP_FOR_STAT(omp_light_size)
-  for (auto i = 0ul; i < nnz * nVar * nEqn; ++i) matrix[i] += alpha * B.matrix[i];
+  for (auto i = 0ul; i < nPoint * nVar * nEqn; ++i) mat.d[i] += alpha * B.mat.d[i];
+  END_SU2_OMP_FOR
+  SU2_OMP_FOR_STAT(omp_light_size)
+  for (auto i = 0ul; i < mat.nnz_l * nVar * nEqn; ++i) mat.l[i] += alpha * B.mat.l[i];
+  END_SU2_OMP_FOR
+  SU2_OMP_FOR_STAT(omp_light_size)
+  for (auto i = 0ul; i < mat.nnz_u * nVar * nEqn; ++i) mat.u[i] += alpha * B.mat.u[i];
   END_SU2_OMP_FOR
 }
 
@@ -1338,9 +1358,9 @@ void CSysMatrix<ScalarType>::BuildPastixPreconditioner(CGeometry* geometry, cons
                                                        unsigned short kind_fact) {
   SU2_ZONE_SCOPED
 #ifdef HAVE_PASTIX
-  /*--- Pastix will launch nested threads. ---*/
   BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
-    pastix_wrapper.SetMatrix(nVar, nPoint, nPointDomain, row_ptr, col_ind, matrix);
+    pastix_wrapper.SetLDU(nVar, nPoint, nPointDomain, mat.row_ptr_l, mat.col_ind_l, mat.row_ptr_u, mat.col_ind_u, mat.d,
+                          mat.l, mat.u);
     pastix_wrapper.Factorize(geometry, config, kind_fact);
   }
   END_SU2_OMP_SAFE_GLOBAL_ACCESS
diff --git a/Common/src/linear_algebra/CSysMatrixGPU.cu b/Common/src/linear_algebra/CSysMatrixGPU.cu
index a3f1a77ca40..1ebba30097c 100644
--- a/Common/src/linear_algebra/CSysMatrixGPU.cu
+++ b/Common/src/linear_algebra/CSysMatrixGPU.cu
@@ -28,112 +28,70 @@
 #include "../../include/linear_algebra/CSysMatrix.hpp"
 #include "../../include/linear_algebra/GPUComms.cuh"
 
-#include <cusparse.h>
-#include <cstdint>
-#include <type_traits>
-
-inline void cusparseAssert(cusparseStatus_t code, const char* file, int line, bool abort = true) {
-  if (code != CUSPARSE_STATUS_SUCCESS) {
-    fprintf(stderr, "cuSPARSEassert: %s %s %d\n", cusparseGetErrorString(code), file, line);
-    if (abort) exit(static_cast<int>(code));
+/*!
+ * \brief Block-LDU SpMV kernel: y[iRow] = (L + D + U) * x per block-row.
+ *        One CUDA block per block-row; threadIdx.x indexes output variable (0..nVar-1).
+ */
+template <class ScalarType>
+__global__ void BlockLDU_SpMV_kernel(unsigned long nRows, unsigned long nVar,
+                                     const unsigned long* __restrict__ row_ptr_l,
+                                     const unsigned long* __restrict__ col_ind_l,
+                                     const ScalarType* __restrict__ mat_l,
+                                     const ScalarType* __restrict__ mat_d,
+                                     const unsigned long* __restrict__ row_ptr_u,
+                                     const unsigned long* __restrict__ col_ind_u,
+                                     const ScalarType* __restrict__ mat_u,
+                                     const ScalarType* __restrict__ x, ScalarType* __restrict__ y) {
+  const unsigned long iRow = blockIdx.x;
+  const unsigned long iVar = threadIdx.x;
+  if (iRow >= nRows || iVar >= nVar) return;
+
+  ScalarType sum = 0;
+  /* Lower */
+  for (auto k = row_ptr_l[iRow]; k < row_ptr_l[iRow + 1]; ++k) {
+    const auto col = col_ind_l[k];
+    const ScalarType* blk = mat_l + k * nVar * nVar + iVar * nVar;
+    for (unsigned long jVar = 0; jVar < nVar; ++jVar) sum += blk[jVar] * x[col * nVar + jVar];
   }
-}
-
-#define cusparseErrChk(ans)                    \
-  {                                            \
-    cusparseAssert((ans), __FILE__, __LINE__); \
+  /* Diagonal */
+  {
+    const ScalarType* blk = mat_d + iRow * nVar * nVar + iVar * nVar;
+    for (unsigned long jVar = 0; jVar < nVar; ++jVar) sum += blk[jVar] * x[iRow * nVar + jVar];
   }
-
-inline cusparseIndexType_t GetCusparseIndexType() {
-  if constexpr (sizeof(unsigned long) == 4) {
-    return CUSPARSE_INDEX_32I;
-  } else if constexpr (sizeof(unsigned long) == 8) {
-    return CUSPARSE_INDEX_64I;
-  } else {
-    static_assert(sizeof(unsigned long) == 4 || sizeof(unsigned long) == 8,
-                  "cuSPARSE BSR SpMV only supports 32-bit or 64-bit index arrays in this path.");
+  /* Upper */
+  for (auto k = row_ptr_u[iRow]; k < row_ptr_u[iRow + 1]; ++k) {
+    const auto col = col_ind_u[k];
+    const ScalarType* blk = mat_u + k * nVar * nVar + iVar * nVar;
+    for (unsigned long jVar = 0; jVar < nVar; ++jVar) sum += blk[jVar] * x[col * nVar + jVar];
   }
+  y[iRow * nVar + iVar] = sum;
 }
 
 template <class ScalarType>
-constexpr cudaDataType GetCudaDataType() {
-  if constexpr (std::is_same<ScalarType, float>::value) {
-    return CUDA_R_32F;
-  } else if constexpr (std::is_same<ScalarType, double>::value) {
-    return CUDA_R_64F;
-  } else {
-    static_assert(std::is_same<ScalarType, float>::value || std::is_same<ScalarType, double>::value,
-                  "cuSPARSE BSR SpMV only supports float and double in this path.");
-  }
-}
-
-template<class ScalarType>
-void CSysMatrix<ScalarType>::HtDTransfer(bool trigger) const
-{
-   if(trigger) gpuErrChk(cudaMemcpy((void*)(d_matrix), (void*)&matrix[0], (sizeof(ScalarType)*nnz*nVar*nEqn), cudaMemcpyHostToDevice));
+void CSysMatrix<ScalarType>::HtDTransfer(bool trigger) const {
+  if (!trigger) return;
+  gpuErrChk(cudaMemcpy(gpu.d, mat.d, sizeof(ScalarType) * nPoint * nVar * nEqn, cudaMemcpyHostToDevice));
+  gpuErrChk(cudaMemcpy(gpu.l, mat.l, sizeof(ScalarType) * mat.nnz_l * nVar * nEqn, cudaMemcpyHostToDevice));
+  gpuErrChk(cudaMemcpy(gpu.u, mat.u, sizeof(ScalarType) * mat.nnz_u * nVar * nEqn, cudaMemcpyHostToDevice));
 }
 
 template <class ScalarType>
 void CSysMatrix<ScalarType>::GPUMatrixVectorProduct(const CSysVector<ScalarType>& vec, CSysVector<ScalarType>& prod,
                                                     CGeometry* geometry, const CConfig* config) const {
   if (nVar != nEqn) {
-    SU2_MPI::Error("CUDA CSysMatrix matvec with cuSPARSE BSR requires square blocks.", CURRENT_FUNCTION);
+    SU2_MPI::Error("CUDA CSysMatrix block-LDU SpMV requires square blocks.", CURRENT_FUNCTION);
   }
 
   ScalarType* d_vec = vec.GetDevicePointer();
   ScalarType* d_prod = prod.GetDevicePointer();
-
   vec.HtDTransfer();
 
-  const auto indexType = GetCusparseIndexType();
-  const auto valueType = GetCudaDataType<ScalarType>();
-
-  const std::int64_t blockSize = static_cast<std::int64_t>(nVar);
-
-  const std::int64_t brows = static_cast<std::int64_t>(nPointDomain);
-  const std::int64_t bcols = static_cast<std::int64_t>(nPoint);
-  const std::int64_t bnnz = static_cast<std::int64_t>(nnz);
-
-  const std::int64_t xSize = static_cast<std::int64_t>(nPoint) * blockSize;
-  const std::int64_t ySize = static_cast<std::int64_t>(nPointDomain) * blockSize;
-
-  const ScalarType alpha = 1.0;
-  const ScalarType beta = 0.0;
-
-  cusparseHandle_t handle = nullptr;
-  cusparseConstSpMatDescr_t matA = nullptr;
-  cusparseDnVecDescr_t vecX = nullptr;
-  cusparseDnVecDescr_t vecY = nullptr;
-
-  cusparseErrChk(cusparseCreate(&handle));
-
-  cusparseErrChk(cusparseCreateConstBsr(&matA, brows, bcols, bnnz, blockSize, blockSize, d_row_ptr, d_col_ind, d_matrix,
-                                        indexType, indexType, CUSPARSE_INDEX_BASE_ZERO, valueType, CUSPARSE_ORDER_ROW));
-
-  cusparseErrChk(cusparseCreateDnVec(&vecX, xSize, d_vec, valueType));
-  cusparseErrChk(cusparseCreateDnVec(&vecY, ySize, d_prod, valueType));
-
-  size_t bufferSize = 0;
-  void* dBuffer = nullptr;
-
-  cusparseErrChk(cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY,
-                                         valueType, CUSPARSE_SPMV_BSR_ALG1, &bufferSize));
-
-  if (bufferSize > 0) {
-    gpuErrChk(cudaMalloc(&dBuffer, bufferSize));
-  }
-
-  cusparseErrChk(cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, valueType,
-                              CUSPARSE_SPMV_BSR_ALG1, dBuffer));
-
-  if (dBuffer != nullptr) {
-    gpuErrChk(cudaFree(dBuffer));
-  }
-
-  cusparseErrChk(cusparseDestroyDnVec(vecY));
-  cusparseErrChk(cusparseDestroyDnVec(vecX));
-  cusparseErrChk(cusparseDestroySpMat(matA));
-  cusparseErrChk(cusparseDestroy(handle));
+  dim3 blockDim(static_cast<unsigned>(nVar), 1, 1);
+  dim3 gridDim(static_cast<unsigned>(nPointDomain), 1, 1);
+  BlockLDU_SpMV_kernel<ScalarType><<<gridDim, blockDim>>>(
+      nPointDomain, nVar, gpu.row_ptr_l, gpu.col_ind_l, gpu.l, gpu.d,
+      gpu.row_ptr_u, gpu.col_ind_u, gpu.u, d_vec, d_prod);
+  gpuErrChk(cudaGetLastError());
 
   prod.DtHTransfer();
 }
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index cd100f311ab..aae5d9ce707 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -211,12 +211,14 @@ bool CSysSolve<ScalarType>::ModGramSchmidt(bool shared_hsbg, int i, su2matrix<Sc
   const auto h_i = CSysVector<ScalarType>::multiDot(w, i + 1, 1, w, i + 1);
   LinearCombination(
       shared_hsbg, i + 1, w, [&h_i](int k) { return -h_i(0, k); }, w[i + 1], true);
-
-  const auto& dh_i = CSysVector<ScalarType>::multiDot(w, i + 1, 1, w, i + 1);
-  LinearCombination(
-      shared_hsbg, i + 1, w, [&dh_i](int k) { return -dh_i(0, k); }, w[i + 1], true);
-
-  for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k) + dh_i(0, k));
+  if (i < 5) {
+    for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k));
+  } else {
+    const auto& dh_i = CSysVector<ScalarType>::multiDot(w, i + 1, 1, w, i + 1);
+    LinearCombination(
+        shared_hsbg, i + 1, w, [&dh_i](int k) { return -dh_i(0, k); }, w[i + 1], true);
+    for (int k = 0; k < i + 1; k++) SetHsbg(k, i, h_i(0, k) + dh_i(0, k));
+  }
 
   /*--- The norm of w[i+1] is 0 or NaN: the input vector from mat_vec is
    * zero or contains NaN. Cannot proceed with orthogonalization. ---*/
diff --git a/TestCases/hybrid_regression.py b/TestCases/hybrid_regression.py
index 612afb6c50a..03d23ae1357 100644
--- a/TestCases/hybrid_regression.py
+++ b/TestCases/hybrid_regression.py
@@ -51,7 +51,7 @@ def main():
     channel.cfg_dir   = "euler/channel"
     channel.cfg_file  = "inv_channel_RK.cfg"
     channel.test_iter = 20
-    channel.test_vals = [-2.000215, 3.536936, 0.034212, 0.193725]
+    channel.test_vals = [-2.000214, 3.536937, 0.034200, 0.193725]
     test_list.append(channel)
 
     # NACA0012
@@ -75,7 +75,7 @@ def main():
     oneram6.cfg_dir   = "euler/oneram6"
     oneram6.cfg_file  = "inv_ONERAM6.cfg"
     oneram6.test_iter = 10
-    oneram6.test_vals = [0.280800, 0.008623]
+    oneram6.test_vals = [0.280803, 0.008625]
     test_list.append(oneram6)
 
     # Fixed CL NACA0012
@@ -83,7 +83,7 @@ def main():
     fixedCL_naca0012.cfg_dir   = "fixed_cl/naca0012"
     fixedCL_naca0012.cfg_file  = "inv_NACA0012.cfg"
     fixedCL_naca0012.test_iter = 10
-    fixedCL_naca0012.test_vals = [-4.017883, 1.513039, 0.300961, 0.019472]
+    fixedCL_naca0012.test_vals = [-4.010006, 1.521019, 0.300952, 0.019472]
     test_list.append(fixedCL_naca0012)
 
     # HYPERSONIC FLOW PAST BLUNT BODY
@@ -111,7 +111,7 @@ def main():
     cylinder.cfg_dir   = "navierstokes/cylinder"
     cylinder.cfg_file  = "lam_cylinder.cfg"
     cylinder.test_iter = 25
-    cylinder.test_vals = [-8.463699, -2.981890, 0.045839, 1.659615, 0.000000]
+    cylinder.test_vals = [-8.500692, -3.024889, 0.037977, 1.664043, 0.000000]
     test_list.append(cylinder)
 
     # Laminar cylinder (low Mach correction)
@@ -119,7 +119,7 @@ def main():
     cylinder_lowmach.cfg_dir   = "navierstokes/cylinder"
     cylinder_lowmach.cfg_file  = "cylinder_lowmach.cfg"
     cylinder_lowmach.test_iter = 25
-    cylinder_lowmach.test_vals         = [-6.473668, -1.011938, 0.275173, 71.130747, 0.000000]
+    cylinder_lowmach.test_vals         = [-6.476617, -1.014894, 0.594405, 70.855527, 0.000000]
     cylinder_lowmach.test_vals_aarch64 = [-6.830996, -1.368850, -0.143956, 73.963354, 0]
     test_list.append(cylinder_lowmach)
 
@@ -128,7 +128,7 @@ def main():
     poiseuille.cfg_dir   = "navierstokes/poiseuille"
     poiseuille.cfg_file  = "lam_poiseuille.cfg"
     poiseuille.test_iter = 10
-    poiseuille.test_vals = [-5.046139, 0.652976, 0.008353, 13.735637, 0]
+    poiseuille.test_vals = [-5.046138, 0.652977, 0.008358, 13.735661, 0.000000]
     test_list.append(poiseuille)
 
     # 2D Poiseuille flow (inlet profile file)
@@ -181,7 +181,7 @@ def main():
     turb_flatplate.cfg_dir   = "rans/flatplate"
     turb_flatplate.cfg_file  = "turb_SA_flatplate.cfg"
     turb_flatplate.test_iter = 20
-    turb_flatplate.test_vals = [-4.958138, -7.438030, -0.187473, 0.015060]
+    turb_flatplate.test_vals = [-4.958328, -7.438303, -0.187472, 0.015052]
     test_list.append(turb_flatplate)
 
     # ONERA M6 Wing
@@ -266,7 +266,7 @@ def main():
     turb_naca0012_sst_restart_mg.cfg_file  = "turb_NACA0012_sst_multigrid_restart.cfg"
     turb_naca0012_sst_restart_mg.test_iter = 20
     turb_naca0012_sst_restart_mg.ntest_vals = 5
-    turb_naca0012_sst_restart_mg.test_vals = [-6.589114, -5.057151, 0.830239, -0.008808, 0.078150]
+    turb_naca0012_sst_restart_mg.test_vals = [-6.589092, -5.057151, 0.830239, -0.008809, 0.078148]
     test_list.append(turb_naca0012_sst_restart_mg)
 
     #############################
@@ -347,7 +347,7 @@ def main():
     inc_euler_naca0012.cfg_dir   = "incomp_euler/naca0012"
     inc_euler_naca0012.cfg_file  = "incomp_NACA0012.cfg"
     inc_euler_naca0012.test_iter = 20
-    inc_euler_naca0012.test_vals = [-5.846118, -4.941076, 0.519913, 0.008955]
+    inc_euler_naca0012.test_vals = [-5.858106, -4.937298, 0.519817, 0.008958]
     test_list.append(inc_euler_naca0012)
 
     # C-D nozzle with pressure inlet and mass flow outlet
@@ -355,7 +355,7 @@ def main():
     inc_nozzle.cfg_dir   = "incomp_euler/nozzle"
     inc_nozzle.cfg_file  = "inv_nozzle.cfg"
     inc_nozzle.test_iter = 20
-    inc_nozzle.test_vals = [-6.171980, -5.419034, 0.009267, 0.127577]
+    inc_nozzle.test_vals = [-6.171116, -5.413059, 0.008862, 0.127559]
     test_list.append(inc_nozzle)
 
     #############################
@@ -367,7 +367,7 @@ def main():
     inc_lam_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_lam_cylinder.cfg_file  = "incomp_cylinder.cfg"
     inc_lam_cylinder.test_iter = 10
-    inc_lam_cylinder.test_vals = [-4.159236, -3.569077, 0.017653, 4.934964]
+    inc_lam_cylinder.test_vals = [-4.161196, -3.573053, 0.025533, 4.944646]
     test_list.append(inc_lam_cylinder)
 
     # Buoyancy-driven cavity
@@ -432,7 +432,7 @@ def main():
     cavity.cfg_dir   = "moving_wall/cavity"
     cavity.cfg_file  = "lam_cavity.cfg"
     cavity.test_iter = 25
-    cavity.test_vals = [-7.938907, -2.490199, 0.013042, 0.004995]
+    cavity.test_vals = [-7.927914, -2.479035, 0.012837, 0.005129]
     test_list.append(cavity)
 
     # Spinning cylinder
@@ -440,7 +440,7 @@ def main():
     spinning_cylinder.cfg_dir   = "moving_wall/spinning_cylinder"
     spinning_cylinder.cfg_file  = "spinning_cylinder.cfg"
     spinning_cylinder.test_iter = 25
-    spinning_cylinder.test_vals         = [-7.547526, -2.080576, 1.888705, 1.812327]
+    spinning_cylinder.test_vals         = [-7.533967, -2.066687, 1.832254, 1.843017]
     spinning_cylinder.test_vals_aarch64 = [-8.008023, -2.611064, 1.497308, 1.487483]
     test_list.append(spinning_cylinder)
 
@@ -453,7 +453,7 @@ def main():
     square_cylinder.cfg_dir   = "unsteady/square_cylinder"
     square_cylinder.cfg_file  = "turb_square.cfg"
     square_cylinder.test_iter = 3
-    square_cylinder.test_vals = [-2.560678, -1.175981, 0.062203, 1.399351, 2.219223, 1.399297, 2.217470, 0.000000]
+    square_cylinder.test_vals = [-2.560678, -1.175980, 0.062203, 1.399351, 2.219223, 1.399297, 2.217470, 0.000000]
     square_cylinder.unsteady  = True
     test_list.append(square_cylinder)
 
@@ -462,7 +462,7 @@ def main():
     sine_gust.cfg_dir   = "gust"
     sine_gust.cfg_file  = "inv_gust_NACA0012.cfg"
     sine_gust.test_iter = 5
-    sine_gust.test_vals = [-1.977498, 3.481817, -0.009944, -0.004266]
+    sine_gust.test_vals = [-1.977498, 3.481817, -0.010188, -0.004302]
     sine_gust.unsteady  = True
     test_list.append(sine_gust)
 
@@ -471,7 +471,7 @@ def main():
     cosine_gust.cfg_dir   = "gust"
     cosine_gust.cfg_file  = "cosine_gust_zdir.cfg"
     cosine_gust.test_iter = 79
-    cosine_gust.test_vals = [-2.418805, 0.002211, -0.001258, 0.000438, -0.000592]
+    cosine_gust.test_vals = [-2.418805, 0.002212, -0.001249, 0.000439, -0.000592]
     cosine_gust.unsteady  = True
     cosine_gust.enabled_with_tsan = False
     test_list.append(cosine_gust)
@@ -491,7 +491,7 @@ def main():
     aeroelastic.cfg_dir   = "aeroelastic"
     aeroelastic.cfg_file  = "aeroelastic_NACA64A010.cfg"
     aeroelastic.test_iter = 2
-    aeroelastic.test_vals = [-1.876626, 4.021083, 0.081436, 0.027726, -0.001638, -0.000130, -1.056269]
+    aeroelastic.test_vals = [-1.876631, 4.021073, 0.081270, 0.027580, -0.001642, -0.000127, -1.052231]
     aeroelastic.unsteady  = True
     aeroelastic.enabled_on_cpu_arch = ["x86_64"] # Requires AVX-capable architecture
     test_list.append(aeroelastic)
@@ -654,7 +654,7 @@ def main():
     bars_SST_2D.cfg_dir   = "sliding_interface/bars_SST_2D"
     bars_SST_2D.cfg_file  = "bars.cfg"
     bars_SST_2D.test_iter = 13
-    bars_SST_2D.test_vals = [13.000000, -0.396746, -1.461254]
+    bars_SST_2D.test_vals = [13.000000, -0.391567, -1.460874]
     bars_SST_2D.multizone = True
     test_list.append(bars_SST_2D)
 
diff --git a/TestCases/hybrid_regression_AD.py b/TestCases/hybrid_regression_AD.py
index b3c9e3ad7fd..4c153f8c51e 100644
--- a/TestCases/hybrid_regression_AD.py
+++ b/TestCases/hybrid_regression_AD.py
@@ -86,7 +86,7 @@ def main():
     discadj_rans_naca0012_sst.cfg_dir   = "disc_adj_rans/naca0012"
     discadj_rans_naca0012_sst.cfg_file  = "turb_NACA0012_sst.cfg"
     discadj_rans_naca0012_sst.test_iter = 10
-    discadj_rans_naca0012_sst.test_vals = [-2.201517, -0.175212, 3.044200, -0.041842]
+    discadj_rans_naca0012_sst.test_vals = [-2.201514, -0.175212, 3.044200, -0.041842]
     discadj_rans_naca0012_sst.test_vals_aarch64 = [-2.201855, -0.172443, 3.043400, -0.041820]
     test_list.append(discadj_rans_naca0012_sst)
 
@@ -99,7 +99,7 @@ def main():
     discadj_incomp_NACA0012.cfg_dir   = "disc_adj_incomp_euler/naca0012"
     discadj_incomp_NACA0012.cfg_file  = "incomp_NACA0012_disc.cfg"
     discadj_incomp_NACA0012.test_iter = 20
-    discadj_incomp_NACA0012.test_vals = [20.000000, -3.122291, -2.287328, 0.000000]
+    discadj_incomp_NACA0012.test_vals = [20.000000, -3.122052, -2.290797, 0.000000]
     test_list.append(discadj_incomp_NACA0012)
 
     #####################################
@@ -133,7 +133,7 @@ def main():
     discadj_incomp_turb_NACA0012_sst.cfg_dir   = "disc_adj_incomp_rans/naca0012"
     discadj_incomp_turb_NACA0012_sst.cfg_file  = "turb_naca0012_sst.cfg"
     discadj_incomp_turb_NACA0012_sst.test_iter = 10
-    discadj_incomp_turb_NACA0012_sst.test_vals = [-3.775388, -3.089117, -7.143490, 0.000000, -0.896797]
+    discadj_incomp_turb_NACA0012_sst.test_vals = [-3.775383, -3.089125, -7.143499, 0.000000, -0.896795]
     test_list.append(discadj_incomp_turb_NACA0012_sst)
 
     #######################################################
@@ -173,7 +173,7 @@ def main():
     discadj_DT_1ST_cylinder.cfg_dir   = "disc_adj_rans/cylinder_DT_1ST"
     discadj_DT_1ST_cylinder.cfg_file  = "cylinder.cfg"
     discadj_DT_1ST_cylinder.test_iter = 9
-    discadj_DT_1ST_cylinder.test_vals = [1.196347, -3.339014, -0.006212, 0.000020]
+    discadj_DT_1ST_cylinder.test_vals = [1.196349, -3.339012, -0.006213, 0.000020]
     discadj_DT_1ST_cylinder.unsteady  = True
     discadj_DT_1ST_cylinder.enabled_with_tsan = False
     test_list.append(discadj_DT_1ST_cylinder)
@@ -187,7 +187,7 @@ def main():
     discadj_pitchingNACA0012.cfg_dir   = "disc_adj_euler/naca0012_pitching"
     discadj_pitchingNACA0012.cfg_file  = "inv_NACA0012_pitching.cfg"
     discadj_pitchingNACA0012.test_iter = 4
-    discadj_pitchingNACA0012.test_vals = [-1.041883, -1.512874, -0.006211, 0.000012]
+    discadj_pitchingNACA0012.test_vals = [-1.041978, -1.513013, -0.006202, 0.000012]
     discadj_pitchingNACA0012.tol = 0.01
     discadj_pitchingNACA0012.unsteady  = True
     discadj_pitchingNACA0012.enabled_with_tsan = False
diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py
index faeca39742f..6cf6ccf4fbf 100755
--- a/TestCases/parallel_regression.py
+++ b/TestCases/parallel_regression.py
@@ -74,7 +74,7 @@ def main():
     cfd_flamelet_h2.cfg_dir = "flamelet/07_laminar_premixed_h2_flame_cfd"
     cfd_flamelet_h2.cfg_file = "laminar_premixed_h2_flame_cfd.cfg"
     cfd_flamelet_h2.test_iter = 5
-    cfd_flamelet_h2.test_vals = [-8.036794, -8.372668, -1.842800, -9.388446]
+    cfd_flamelet_h2.test_vals = [-8.036794, -8.372668, -1.842800, -9.388447]
     test_list.append(cfd_flamelet_h2)
 
     # Flame ignition methods
@@ -95,7 +95,7 @@ def main():
     thermalbath.cfg_dir = "nonequilibrium/thermalbath/finitechemistry"
     thermalbath.cfg_file = "thermalbath.cfg"
     thermalbath.test_iter = 10
-    thermalbath.test_vals = [0.945997, 0.945997, -11.860991, -11.857906, -32.000000, 10.013239]
+    thermalbath.test_vals = [0.945997, 0.945997, -11.860991, -11.890652, -32.000000, 10.013239]
     test_list.append(thermalbath)
 
     # Adiabatic thermal bath
@@ -112,7 +112,7 @@ def main():
     thermalbath_frozen.cfg_dir = "nonequilibrium/thermalbath/frozen"
     thermalbath_frozen.cfg_file = "thermalbath_frozen.cfg"
     thermalbath_frozen.test_iter = 10
-    thermalbath_frozen.test_vals = [-32.000000, -32.000000, -11.818647, -11.857909, -32.000000, 10.013545]
+    thermalbath_frozen.test_vals = [-32.000000, -32.000000, -11.818647, -11.848553, -32.000000, 10.013545]
     test_list.append(thermalbath_frozen)
 
     # Inviscid single wedge, ausm, implicit
@@ -120,7 +120,7 @@ def main():
     invwedge_a.cfg_dir = "nonequilibrium/invwedge"
     invwedge_a.cfg_file = "invwedge_ausm.cfg"
     invwedge_a.test_iter = 10
-    invwedge_a.test_vals = [-1.069669, -1.594432, -18.299010, -18.626399, -18.572410, 2.245727, 1.874088, 5.290291, 0.847735]
+    invwedge_a.test_vals = [-1.069665, -1.594428, -18.299923, -18.627315, -18.573325, 2.245732, 1.874096, 5.290295, 0.847739]
     invwedge_a.test_vals_aarch64 = [-1.069675, -1.594438, -18.299736, -18.627126, -18.573137, 2.245721, 1.874105, 5.290285, 0.847729]
     test_list.append(invwedge_a)
 
@@ -138,7 +138,7 @@ def main():
     invwedge_msw.cfg_dir = "nonequilibrium/invwedge"
     invwedge_msw.cfg_file = "invwedge_msw.cfg"
     invwedge_msw.test_iter = 10
-    invwedge_msw.test_vals = [-1.212335, -1.737098, -18.299375, -18.626782, -18.572771, 2.106171, 1.651949, 5.143958, 0.704444]
+    invwedge_msw.test_vals = [-1.212335, -1.737098, -18.301768, -18.629149, -18.575169, 2.106171, 1.651949, 5.143958, 0.704444]
     invwedge_msw.test_vals_aarch64 = [-1.212335, -1.737098, -18.299279, -18.626656, -18.572683, 2.106171, 1.651949, 5.143958, 0.704444]
     test_list.append(invwedge_msw)
 
@@ -147,7 +147,7 @@ def main():
     invwedge_roe.cfg_dir = "nonequilibrium/invwedge"
     invwedge_roe.cfg_file = "invwedge_roe.cfg"
     invwedge_roe.test_iter = 10
-    invwedge_roe.test_vals = [-1.054108, -1.578871, -17.782421, -18.111535, -18.055538, 2.264661, 1.841727, 5.302630, 0.897714]
+    invwedge_roe.test_vals = [-1.023216, -1.547979, -17.814656, -18.143616, -18.087775, 2.295078, 1.885054, 5.338487, 0.926120]
     invwedge_roe.test_vals_aarch64 = [-1.052398, -1.577160, -17.794015, -18.122997, -18.067131, 2.266042, 1.849686, 5.304700, 0.899584]
     test_list.append(invwedge_roe)
 
@@ -174,7 +174,7 @@ def main():
     invwedge_ss_inlet.cfg_dir = "nonequilibrium/invwedge"
     invwedge_ss_inlet.cfg_file = "invwedge_ss_inlet.cfg"
     invwedge_ss_inlet.test_iter = 10
-    invwedge_ss_inlet.test_vals = [-1.068592, -1.593355, -18.250183, -18.579524, -18.523255, 2.246972, 1.874197, 5.291273, 0.848771]
+    invwedge_ss_inlet.test_vals = [-1.068634, -1.593397, -18.246265, -18.575529, -18.519338, 2.246925, 1.874200, 5.291234, 0.848731]
     invwedge_ss_inlet.test_vals_aarch64 = [-1.068592, -1.593355, -18.250183, -18.579524, -18.523255, 2.246972, 1.874197, 5.291273, 0.848771]
     test_list.append(invwedge_ss_inlet)
 
@@ -183,7 +183,7 @@ def main():
     visc_cone.cfg_dir = "nonequilibrium/visc_wedge"
     visc_cone.cfg_file = "axi_visccone.cfg"
     visc_cone.test_iter = 10
-    visc_cone.test_vals = [-5.222269, -5.746523, -20.560216, -20.510119, -20.409089, 1.255762, -3.208384, -0.016011, 0.093461, 32619]
+    visc_cone.test_vals = [-5.215235, -5.739371, -20.559852, -20.509281, -20.408911, 1.262701, -3.205457, -0.015696, 0.093205, 32637.000000]
     visc_cone.test_vals_aarch64 = [-5.222270, -5.746525, -20.560286, -20.510152, -20.409101, 1.255758, -3.208382, -0.016014, 0.093462, 32619.000000]
     test_list.append(visc_cone)
 
@@ -216,7 +216,7 @@ def main():
     ion_gy.cfg_dir = "nonequilibrium/visc_cylinder"
     ion_gy.cfg_file = "cyl_ion_gy.cfg"
     ion_gy.test_iter = 10
-    ion_gy.test_vals = [-11.629873, -4.165563, -4.702662, -4.950351, -5.146155, -4.993878, -6.893332, 5.990109, 5.990004, -0.014849, 0.000000, 90090.000000]
+    ion_gy.test_vals = [-11.629873, -4.165562, -4.702662, -4.950351, -5.146155, -4.993878, -6.893332, 5.990109, 5.990004, -0.014849, 0.000000, 90090.000000]
     test_list.append(ion_gy)
 
     ##########################
@@ -228,7 +228,7 @@ def main():
     channel.cfg_dir   = "euler/channel"
     channel.cfg_file  = "inv_channel_RK.cfg"
     channel.test_iter = 20
-    channel.test_vals = [-1.988782, 3.546674, 0.032065, 0.194399]
+    channel.test_vals = [-1.990518, 3.545643, 0.031745, 0.194289]
     test_list.append(channel)
 
     # NACA0012
@@ -252,7 +252,7 @@ def main():
     oneram6.cfg_dir   = "euler/oneram6"
     oneram6.cfg_file  = "inv_ONERAM6.cfg"
     oneram6.test_iter = 10
-    oneram6.test_vals = [-11.530230, -11.006685, 0.280800, 0.008623]
+    oneram6.test_vals = [-11.534670, -11.009582, 0.280800, 0.008623]
     oneram6.timeout   = 3200
     test_list.append(oneram6)
 
@@ -261,7 +261,7 @@ def main():
     fixedCL_naca0012.cfg_dir   = "fixed_cl/naca0012"
     fixedCL_naca0012.cfg_file  = "inv_NACA0012.cfg"
     fixedCL_naca0012.test_iter = 10
-    fixedCL_naca0012.test_vals = [-4.006904, 1.523677, 0.300969, 0.019473]
+    fixedCL_naca0012.test_vals = [-3.962426, 1.570772, 0.300985, 0.019475]
     test_list.append(fixedCL_naca0012)
 
     # Polar sweep of the inviscid NACA0012
@@ -315,7 +315,7 @@ def main():
     MFR_coupling.cfg_dir   = "euler/turbofan_MFR_coupling"
     MFR_coupling.cfg_file  = "MFR_coupling.cfg"
     MFR_coupling.test_iter = 100
-    MFR_coupling.test_vals = [-2.1124e+02, 1.5003e+02, 2.0151e+01]
+    MFR_coupling.test_vals = [-211.250000, 150.030000, 20.151000]
     test_list.append(MFR_coupling)
 
     ##########################
@@ -327,7 +327,7 @@ def main():
     flatplate.cfg_dir   = "navierstokes/flatplate"
     flatplate.cfg_file  = "lam_flatplate.cfg"
     flatplate.test_iter = 100
-    flatplate.test_vals = [-6.499563, -1.020731, 0.001223, 0.028373, 2.361500, -2.333200, 0.000000, 0.000000]
+    flatplate.test_vals = [-6.496808, -1.017942, 0.001224, 0.028377, 2.361500, -2.333200, 0.000000, 0.000000]
     test_list.append(flatplate)
 
     # Custom objective function
@@ -352,7 +352,7 @@ def main():
     cylinder.cfg_dir   = "navierstokes/cylinder"
     cylinder.cfg_file  = "lam_cylinder.cfg"
     cylinder.test_iter = 25
-    cylinder.test_vals = [-8.411856, -2.937460, -0.002360, 1.643164, 0.000000]
+    cylinder.test_vals = [-8.427810, -2.952885, -0.007651, 1.646675, 0.000000]
     test_list.append(cylinder)
 
     # Laminar cylinder (low Mach correction)
@@ -360,7 +360,7 @@ def main():
     cylinder_lowmach.cfg_dir   = "navierstokes/cylinder"
     cylinder_lowmach.cfg_file  = "cylinder_lowmach.cfg"
     cylinder_lowmach.test_iter = 25
-    cylinder_lowmach.test_vals = [-6.469910, -1.008163, -0.381561, 78.514909, 0.000000]
+    cylinder_lowmach.test_vals = [-6.469226, -1.007489, -0.631813, 78.583596, 0.000000]
 
     test_list.append(cylinder_lowmach)
 
@@ -416,7 +416,7 @@ def main():
     turb_flatplate.cfg_dir   = "rans/flatplate"
     turb_flatplate.cfg_file  = "turb_SA_flatplate.cfg"
     turb_flatplate.test_iter = 20
-    turb_flatplate.test_vals = [-4.926218, -7.441083, -0.187477, 0.015330]
+    turb_flatplate.test_vals = [-4.949425, -7.443679, -0.187485, 0.015345]
     test_list.append(turb_flatplate)
 
     # Flat plate (compressible) with species inlet
@@ -424,7 +424,7 @@ def main():
     turb_flatplate_species.cfg_dir   = "rans/flatplate"
     turb_flatplate_species.cfg_file  = "turb_SA_flatplate_species.cfg"
     turb_flatplate_species.test_iter = 20
-    turb_flatplate_species.test_vals = [-4.728172, -1.517257, -2.296200, 0.801649, -3.574284, 3.000000, -0.964853, 2.000000, -1.529003, 3.000000, -0.705161, 0.999933, 0.999933]
+    turb_flatplate_species.test_vals = [-4.795630, -1.532152, -2.318309, 0.717570, -3.574258, 3.000000, -0.931729, 2.000000, -1.526521, 3.000000, -0.705481, 0.999934, 0.999934]
     test_list.append(turb_flatplate_species)
 
     # Flat plate SST compressibility correction Wilcox
@@ -596,7 +596,7 @@ def main():
     turb_naca0012_sst_restart_mg.cfg_file  = "turb_NACA0012_sst_multigrid_restart.cfg"
     turb_naca0012_sst_restart_mg.test_iter = 20
     turb_naca0012_sst_restart_mg.ntest_vals = 5
-    turb_naca0012_sst_restart_mg.test_vals = [-6.566570, -5.057149, 0.830238, -0.008695, 0.078156]
+    turb_naca0012_sst_restart_mg.test_vals = [-6.566317, -5.057149, 0.830238, -0.008702, 0.078154]
     turb_naca0012_sst_restart_mg.timeout   = 3200
     turb_naca0012_sst_restart_mg.tol       = 0.000001
     test_list.append(turb_naca0012_sst_restart_mg)
@@ -610,7 +610,7 @@ def main():
     inc_euler_naca0012.cfg_dir   = "incomp_euler/naca0012"
     inc_euler_naca0012.cfg_file  = "incomp_NACA0012.cfg"
     inc_euler_naca0012.test_iter = 20
-    inc_euler_naca0012.test_vals = [-6.058441, -5.126560, 0.525681, 0.008778]
+    inc_euler_naca0012.test_vals = [-6.067964, -5.125607, 0.525745, 0.008772]
     test_list.append(inc_euler_naca0012)
 
     # C-D nozzle with pressure inlet and mass flow outlet
@@ -618,7 +618,7 @@ def main():
     inc_nozzle.cfg_dir   = "incomp_euler/nozzle"
     inc_nozzle.cfg_file  = "inv_nozzle.cfg"
     inc_nozzle.test_iter = 20
-    inc_nozzle.test_vals = [-6.058643, -5.286249, 0.002391, 0.124107]
+    inc_nozzle.test_vals = [-6.053549, -5.296666, 0.001330, 0.124087]
     test_list.append(inc_nozzle)
 
     # Laminar wall mounted cylinder, Euler walls, cylinder wall diagonally split
@@ -638,7 +638,7 @@ def main():
     inc_lam_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_lam_cylinder.cfg_file  = "incomp_cylinder.cfg"
     inc_lam_cylinder.test_iter = 10
-    inc_lam_cylinder.test_vals = [-4.156152, -3.554127, -0.017923, 5.101126]
+    inc_lam_cylinder.test_vals = [-4.156113, -3.553508, -0.024563, 5.105605]
     test_list.append(inc_lam_cylinder)
 
     # Laminar sphere, Re=1. Last column: Cd=24/Re
@@ -646,7 +646,7 @@ def main():
     inc_lam_sphere.cfg_dir   = "incomp_navierstokes/sphere"
     inc_lam_sphere.cfg_file  = "sphere.cfg"
     inc_lam_sphere.test_iter = 5
-    inc_lam_sphere.test_vals = [-8.165744, -8.968003, 0.121003, 25.782691]
+    inc_lam_sphere.test_vals = [-8.190948, -8.992588, 0.121003, 25.782691]
     test_list.append(inc_lam_sphere)
 
     # Buoyancy-driven cavity
@@ -662,7 +662,7 @@ def main():
     inc_poly_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_poly_cylinder.cfg_file  = "poly_cylinder.cfg"
     inc_poly_cylinder.test_iter = 20
-    inc_poly_cylinder.test_vals = [-2.362995, 0.006829, 1.923532, -172.590000]
+    inc_poly_cylinder.test_vals = [-2.363389, 0.005552, 1.922971, -172.380000]
     test_list.append(inc_poly_cylinder)
 
     # X-coarse laminar bend as a mixed element CGNS test
@@ -670,7 +670,7 @@ def main():
     inc_lam_bend.cfg_dir    = "incomp_navierstokes/bend"
     inc_lam_bend.cfg_file   = "lam_bend.cfg"
     inc_lam_bend.test_iter = 10
-    inc_lam_bend.test_vals = [-3.588863, -3.101606, -0.022302, 1.062971]
+    inc_lam_bend.test_vals = [-3.585943, -3.096592, -0.022111, 1.064110]
     test_list.append(inc_lam_bend)
 
     # 3D laminar channnel with 1 cell in flow direction, streamwise periodic
@@ -849,7 +849,7 @@ def main():
     contadj_naca0012.cfg_dir   = "cont_adj_euler/naca0012"
     contadj_naca0012.cfg_file  = "inv_NACA0012.cfg"
     contadj_naca0012.test_iter = 5
-    contadj_naca0012.test_vals = [-9.526111, -15.089167, -0.726250, 0.020280]
+    contadj_naca0012.test_vals = [-9.527707, -15.088310, -0.726250, 0.020280]
     contadj_naca0012.test_vals_aarch64 = [-9.662546, -14.998818, -0.726250, 0.020280]
     test_list.append(contadj_naca0012)
 
@@ -858,7 +858,7 @@ def main():
     contadj_oneram6.cfg_dir   = "cont_adj_euler/oneram6"
     contadj_oneram6.cfg_file  = "inv_ONERAM6.cfg"
     contadj_oneram6.test_iter = 10
-    contadj_oneram6.test_vals = [-12.086022, -12.648069, -1.086100, 0.007556]
+    contadj_oneram6.test_vals = [-12.087421, -12.650762, -1.086100, 0.007556]
     test_list.append(contadj_oneram6)
 
     # Inviscid WEDGE: tests averaged outflow total pressure adjoint
@@ -874,7 +874,7 @@ def main():
     contadj_fixed_CL_naca0012.cfg_dir   = "fixed_cl/naca0012"
     contadj_fixed_CL_naca0012.cfg_file  = "inv_NACA0012_ContAdj.cfg"
     contadj_fixed_CL_naca0012.test_iter = 100
-    contadj_fixed_CL_naca0012.test_vals = [1.382576, -4.042295, -0.008696, 0.003238]
+    contadj_fixed_CL_naca0012.test_vals = [1.377921, -4.048140, -0.008264, 0.003369]
     test_list.append(contadj_fixed_CL_naca0012)
 
     ###################################
@@ -886,7 +886,7 @@ def main():
     contadj_ns_cylinder.cfg_dir   = "cont_adj_navierstokes/cylinder"
     contadj_ns_cylinder.cfg_file  = "lam_cylinder.cfg"
     contadj_ns_cylinder.test_iter = 20
-    contadj_ns_cylinder.test_vals = [-3.632833, -9.087692, 2.056700, -0.000000]
+    contadj_ns_cylinder.test_vals = [-3.634298, -9.089945, 2.056700, -0.000000]
     test_list.append(contadj_ns_cylinder)
 
     # Adjoint laminar naca0012 subsonic
@@ -930,7 +930,7 @@ def main():
     contadj_rans_rae2822.cfg_dir   = "cont_adj_rans/rae2822"
     contadj_rans_rae2822.cfg_file  = "turb_SA_RAE2822.cfg"
     contadj_rans_rae2822.test_iter = 20
-    contadj_rans_rae2822.test_vals = [-5.399744, -10.904916, -0.212470, 0.005448]
+    contadj_rans_rae2822.test_vals = [-5.399567, -10.904741, -0.212470, 0.005448]
     test_list.append(contadj_rans_rae2822)
 
     #############################
@@ -1018,7 +1018,7 @@ def main():
     cavity.cfg_dir   = "moving_wall/cavity"
     cavity.cfg_file  = "lam_cavity.cfg"
     cavity.test_iter = 25
-    cavity.test_vals = [-7.828480, -2.367075, 0.008928, 0.007370]
+    cavity.test_vals = [-7.838584, -2.378333, 0.009057, 0.006632]
     test_list.append(cavity)
 
     # Spinning cylinder
@@ -1026,7 +1026,7 @@ def main():
     spinning_cylinder.cfg_dir   = "moving_wall/spinning_cylinder"
     spinning_cylinder.cfg_file  = "spinning_cylinder.cfg"
     spinning_cylinder.test_iter = 25
-    spinning_cylinder.test_vals = [-7.383987, -1.919413, 2.601945, 1.976038]
+    spinning_cylinder.test_vals = [-7.401660, -1.942949, 2.534412, 1.997391]
     test_list.append(spinning_cylinder)
 
     ######################################
@@ -1047,7 +1047,7 @@ def main():
     sine_gust.cfg_dir   = "gust"
     sine_gust.cfg_file  = "inv_gust_NACA0012.cfg"
     sine_gust.test_iter = 5
-    sine_gust.test_vals = [-1.977498, 3.481817, -0.010337, -0.004460]
+    sine_gust.test_vals = [-1.977498, 3.481817, -0.010210, -0.004556]
     sine_gust.unsteady  = True
     test_list.append(sine_gust)
 
@@ -1153,7 +1153,7 @@ def main():
     Jones_tc_restart.cfg_dir   = "turbomachinery/APU_turbocharger"
     Jones_tc_restart.cfg_file  = "Jones_restart.cfg"
     Jones_tc_restart.test_iter = 5
-    Jones_tc_restart.test_vals = [-7.645871, -5.849734, -15.337010, -9.825760, -13.216108, -7.752293, 73286.000000, 73286.000000, 0.020055, 82.286000]
+    Jones_tc_restart.test_vals = [-7.645871, -5.849734, -15.337010, -9.825759, -13.216108, -7.752293, 73286.000000, 73286.000000, 0.020055, 82.286000]
     test_list.append(Jones_tc_restart)
 
     # 2D axial stage
@@ -1169,7 +1169,7 @@ def main():
     transonic_stator_restart.cfg_dir   = "turbomachinery/transonic_stator_2D"
     transonic_stator_restart.cfg_file  = "transonic_stator_restart.cfg"
     transonic_stator_restart.test_iter = 20
-    transonic_stator_restart.test_vals = [-4.363703, -2.480017, -2.079311, 1.731709, -1.467573, 3.229965, -471620.000000, 94.838000, -0.046906]
+    transonic_stator_restart.test_vals = [-4.365633, -2.482254, -2.081248, 1.729767, -1.467304, 3.231076, -471620.000000, 94.837000, -0.046836]
     transonic_stator_restart.test_vals_aarch64 = [-4.437809, -2.553049, -2.164729, 1.657542, -1.356823, 3.178788, -471620.000000, 94.842000, -0.040365]
     test_list.append(transonic_stator_restart)
 
@@ -1191,7 +1191,7 @@ def main():
     uniform_flow.cfg_dir   = "sliding_interface/uniform_flow"
     uniform_flow.cfg_file  = "uniform_NN.cfg"
     uniform_flow.test_iter = 5
-    uniform_flow.test_vals = [5.000000, 0.000000, -0.195002, -10.624448]
+    uniform_flow.test_vals = [5.000000, 0.000000, -0.195001, -10.624457]
     uniform_flow.unsteady  = True
     uniform_flow.multizone = True
     test_list.append(uniform_flow)
@@ -1253,7 +1253,7 @@ def main():
     bars_SST_2D.cfg_dir   = "sliding_interface/bars_SST_2D"
     bars_SST_2D.cfg_file  = "bars.cfg"
     bars_SST_2D.test_iter = 13
-    bars_SST_2D.test_vals = [13.000000, -0.397743, -1.458724]
+    bars_SST_2D.test_vals = [13.000000, -0.395917, -1.480217]
     bars_SST_2D.multizone = True
     test_list.append(bars_SST_2D)
 
@@ -1262,7 +1262,7 @@ def main():
     slinc_steady.cfg_dir   = "sliding_interface/incompressible_steady"
     slinc_steady.cfg_file  = "config.cfg"
     slinc_steady.test_iter = 19
-    slinc_steady.test_vals = [19.000000, -1.131557, -1.370471]
+    slinc_steady.test_vals = [19.000000, -1.129117, -1.399143]
     slinc_steady.timeout   = 100
     slinc_steady.tol       = 0.00002
     slinc_steady.multizone = True
@@ -1297,7 +1297,7 @@ def main():
     thermal_beam_3d.cfg_dir = "fea_fsi/ThermalBeam_3d"
     thermal_beam_3d.cfg_file = "configBeam_3d.cfg"
     thermal_beam_3d.test_iter = 4
-    thermal_beam_3d.test_vals = [-8.147213, -7.841446, -7.904153, -13.978110, 217, -4.095071, 39, -4.072614, 136760, 75]
+    thermal_beam_3d.test_vals = [-8.070340, -7.802437, -7.856284, -13.978110, 217.000000, -4.047750, 39.000000, -4.072613, 136760.000000, 75.000000]
     test_list.append(thermal_beam_3d)
 
     # Static beam, 3d with coupled temperature, nonlinear elasticity
@@ -1325,7 +1325,7 @@ def main():
     linear_plane_strain.cfg_dir = "fea_fsi/VonMissesVerif"
     linear_plane_strain.cfg_file = "linear_plane_strain_2d.cfg"
     linear_plane_strain.test_iter = 0
-    linear_plane_strain.test_vals = [-6.036048, -6.001905, 0, 120140, 325, -8.025024]
+    linear_plane_strain.test_vals = [-6.406458, -5.995503, 0, 120140, 144, -8.122248]
     test_list.append(linear_plane_strain)
 
     # 2D beam in plain stress with thermal expansion. This tests fixes to the 2D von Mises stress calculation,
@@ -1334,7 +1334,7 @@ def main():
     nonlinear_plane_stress.cfg_dir = "fea_fsi/VonMissesVerif"
     nonlinear_plane_stress.cfg_file = "nonlinear_plane_stress_2d.cfg"
     nonlinear_plane_stress.test_iter = 16
-    nonlinear_plane_stress.test_vals = [-7.131557, -2.945301, -13.165302, 1.6248e+05, 32, -4.129942]
+    nonlinear_plane_stress.test_vals = [-6.443023, -2.221455, -11.742638, 162480, 32, -4.347864]
     nonlinear_plane_stress.tol = [2e-4, 2e-4, 2e-4, 1e-5, 1e-5, 4e-4]
     test_list.append(nonlinear_plane_stress)
 
@@ -1363,7 +1363,7 @@ def main():
     dyn_fsi.cfg_dir   = "fea_fsi/dyn_fsi"
     dyn_fsi.cfg_file  = "config.cfg"
     dyn_fsi.test_iter = 4
-    dyn_fsi.test_vals = [-4.330741, -4.152826, 0, 97]
+    dyn_fsi.test_vals = [-4.330741, -4.152826, 0, 75]
     dyn_fsi.multizone = True
     dyn_fsi.unsteady  = True
     test_list.append(dyn_fsi)
@@ -1390,7 +1390,7 @@ def main():
     solid_periodic_pins.cfg_dir   = "solid_heat_conduction/periodic_pins"
     solid_periodic_pins.cfg_file  = "configSolid.cfg"
     solid_periodic_pins.test_iter = 750
-    solid_periodic_pins.test_vals = [-15.878973, -14.569206, 300.900000, 425.320000, 5.000000, -1.672670]
+    solid_periodic_pins.test_vals = [-15.878945, -14.569206, 300.900000, 425.320000, 5.000000, -1.672702]
     solid_periodic_pins.test_vals_aarch64 = [-15.879016, -14.569206, 300.900000, 425.320000, 5.000000, -1.672666]
     test_list.append(solid_periodic_pins)
 
@@ -1412,7 +1412,7 @@ def main():
     cht_compressible.cfg_dir   = "coupled_cht/comp_2d"
     cht_compressible.cfg_file  = "cht_2d_3cylinders.cfg"
     cht_compressible.test_iter = 10
-    cht_compressible.test_vals = [-4.256053, -0.532725, -0.532725, -0.532726]
+    cht_compressible.test_vals = [-4.256051, -0.532725, -0.532724, -0.532725]
     cht_compressible.multizone = True
     test_list.append(cht_compressible)
 
@@ -1464,7 +1464,7 @@ def main():
     pywrapper_square_cylinder.cfg_dir   = "unsteady/square_cylinder"
     pywrapper_square_cylinder.cfg_file  = "turb_square.cfg"
     pywrapper_square_cylinder.test_iter = 10
-    pywrapper_square_cylinder.test_vals = [-1.178522, -0.349773, 1.401402, 2.359089, 1.401728, 2.300969, 0.000000]
+    pywrapper_square_cylinder.test_vals = [-1.178521, -0.349772, 1.401402, 2.359105, 1.401729, 2.300974, 0.000000]
     pywrapper_square_cylinder.command   = TestCase.Command("mpirun -np 2", "SU2_CFD.py", "--parallel -f")
     pywrapper_square_cylinder.unsteady  = True
     test_list.append(pywrapper_square_cylinder)
@@ -1474,7 +1474,7 @@ def main():
     pywrapper_aeroelastic.cfg_dir   = "aeroelastic"
     pywrapper_aeroelastic.cfg_file  = "aeroelastic_NACA64A010.cfg"
     pywrapper_aeroelastic.test_iter = 2
-    pywrapper_aeroelastic.test_vals = [-1.876633, 4.021069, 0.082654, 0.027642, -0.001643, -0.000126, -0.966946]
+    pywrapper_aeroelastic.test_vals = [-1.876630, 4.021073, 0.082632, 0.027609, -0.001642, -0.000127, -0.965661]
     pywrapper_aeroelastic.command   = TestCase.Command("mpirun -np 2", "SU2_CFD.py", "--parallel -f")
     pywrapper_aeroelastic.unsteady  = True
     test_list.append(pywrapper_aeroelastic)
@@ -1484,7 +1484,7 @@ def main():
     pywrapper_custom_fea_load.cfg_dir = "py_wrapper/custom_load_fea"
     pywrapper_custom_fea_load.cfg_file = "config.cfg"
     pywrapper_custom_fea_load.test_iter = 13
-    pywrapper_custom_fea_load.test_vals = [-7.262040, -4.945686, -14.163208, 34, -6.029883, 362.230000]
+    pywrapper_custom_fea_load.test_vals = [-7.262040, -4.945686, -14.163208, 27.000000, -6.282188, 362.230000]
     pywrapper_custom_fea_load.command = TestCase.Command("mpirun -np 2", "python", "run.py")
     test_list.append(pywrapper_custom_fea_load)
 
@@ -1504,7 +1504,7 @@ def main():
     pywrapper_unsteadyFSI.cfg_dir = "py_wrapper/dyn_fsi"
     pywrapper_unsteadyFSI.cfg_file = "config.cfg"
     pywrapper_unsteadyFSI.test_iter = 4
-    pywrapper_unsteadyFSI.test_vals = [0, 31, 5, 58, -1.756677, -2.828286, -7.638545, -6.863930, 0.000156]
+    pywrapper_unsteadyFSI.test_vals = [0, 31, 5, 47, -1.756677, -2.828286, -7.638545, -6.863930, 0.000156]
     pywrapper_unsteadyFSI.command = TestCase.Command("mpirun -np 2", "python", "run.py")
     pywrapper_unsteadyFSI.unsteady = True
     pywrapper_unsteadyFSI.multizone = True
@@ -1515,7 +1515,7 @@ def main():
     pywrapper_unsteadyCHT.cfg_dir = "py_wrapper/flatPlate_unsteady_CHT"
     pywrapper_unsteadyCHT.cfg_file = "unsteady_CHT_FlatPlate_Conf.cfg"
     pywrapper_unsteadyCHT.test_iter = 5
-    pywrapper_unsteadyCHT.test_vals = [-1.614168, 2.259968, -0.010622, 0.171621]
+    pywrapper_unsteadyCHT.test_vals = [-1.614169, 2.260087, 0.018775, 0.168073]
     pywrapper_unsteadyCHT.command = TestCase.Command("mpirun -np 2", "python", "launch_unsteady_CHT_FlatPlate.py --parallel -f")
     pywrapper_unsteadyCHT.unsteady = True
     test_list.append(pywrapper_unsteadyCHT)
@@ -1525,7 +1525,7 @@ def main():
     pywrapper_rigidMotion.cfg_dir = "py_wrapper/flatPlate_rigidMotion"
     pywrapper_rigidMotion.cfg_file = "flatPlate_rigidMotion_Conf.cfg"
     pywrapper_rigidMotion.test_iter = 5
-    pywrapper_rigidMotion.test_vals = [-1.614166, 2.255135, 0.350194, 0.089496]
+    pywrapper_rigidMotion.test_vals = [-1.614166, 2.255135, 0.350196, 0.089496]
     pywrapper_rigidMotion.command = TestCase.Command("mpirun -np 2", "python", "launch_flatPlate_rigidMotion.py --parallel -f")
     pywrapper_rigidMotion.unsteady = True
     test_list.append(pywrapper_rigidMotion)
@@ -1597,7 +1597,7 @@ def main():
     mms_fvm_inc_euler.cfg_dir   = "mms/fvm_incomp_euler"
     mms_fvm_inc_euler.cfg_file  = "inv_mms_jst.cfg"
     mms_fvm_inc_euler.test_iter = 20
-    mms_fvm_inc_euler.test_vals = [-9.128660, -9.441806, 0.000000, 0.000000]
+    mms_fvm_inc_euler.test_vals = [-9.128735, -9.441757, 0.000000, 0.000000]
     mms_fvm_inc_euler.tol       = 0.0001
     test_list.append(mms_fvm_inc_euler)
 
@@ -1743,7 +1743,7 @@ def main():
     species_passive_val.cfg_dir   = "species_transport/passive_transport_validation"
     species_passive_val.cfg_file  = "passive_transport.cfg"
     species_passive_val.test_iter = 50
-    species_passive_val.test_vals = [-16.576049, -16.349168, -16.916647, -4.257599, 10, -4.232357, 8, -5.193350, 0.186610, 0]
+    species_passive_val.test_vals = [-16.493002, -16.246291, -16.871574, -4.257599, 10.000000, -4.526973, 8.000000, -5.193350, 0.186610, -0.000000]
     species_passive_val.test_vals_aarch64 = [-16.517744, -16.282420, -16.871663, -4.257599, 10.000000, -4.278151, 8.000000, -5.193350, 0.186610, 0.000000]
     test_list.append(species_passive_val)
 
diff --git a/TestCases/parallel_regression_AD.py b/TestCases/parallel_regression_AD.py
index f03615fba02..c39d9aba5a8 100644
--- a/TestCases/parallel_regression_AD.py
+++ b/TestCases/parallel_regression_AD.py
@@ -55,7 +55,7 @@ def main():
     discadj_cylinder3D.cfg_dir   = "disc_adj_euler/cylinder3D"
     discadj_cylinder3D.cfg_file  = "inv_cylinder3D.cfg"
     discadj_cylinder3D.test_iter = 5
-    discadj_cylinder3D.test_vals = [-3.693714, -3.889422, 0.000000, 0.000000]
+    discadj_cylinder3D.test_vals = [-3.693793, -3.889408, 0.000000, 0.000000]
     test_list.append(discadj_cylinder3D)
 
     # Arina nozzle 2D
@@ -63,7 +63,7 @@ def main():
     discadj_arina2k.cfg_dir      = "disc_adj_euler/arina2k"
     discadj_arina2k.cfg_file     = "Arina2KRS.cfg"
     discadj_arina2k.test_iter    = 20
-    discadj_arina2k.test_vals    = [-2.931649, -3.356322, 0.073332, 0.000000]
+    discadj_arina2k.test_vals    = [-2.933011, -3.357218, 0.073338, 0.000000]
     test_list.append(discadj_arina2k)
 
     # Equivalent area NACA64-206
@@ -104,7 +104,7 @@ def main():
     discadj_incomp_NACA0012.cfg_dir   = "disc_adj_incomp_euler/naca0012"
     discadj_incomp_NACA0012.cfg_file  = "incomp_NACA0012_disc.cfg"
     discadj_incomp_NACA0012.test_iter = 20
-    discadj_incomp_NACA0012.test_vals = [20.000000, -3.095094, -2.322528, 0.000000]
+    discadj_incomp_NACA0012.test_vals = [20.000000, -3.096582, -2.323911, 0.000000]
     test_list.append(discadj_incomp_NACA0012)
 
     #####################################
@@ -116,7 +116,7 @@ def main():
     discadj_incomp_cylinder.cfg_dir   = "disc_adj_incomp_navierstokes/cylinder"
     discadj_incomp_cylinder.cfg_file  = "heated_cylinder.cfg"
     discadj_incomp_cylinder.test_iter = 20
-    discadj_incomp_cylinder.test_vals = [20.000000, -1.652493, -6.202452, 0.000000]
+    discadj_incomp_cylinder.test_vals = [20.000000, -1.658160, -6.215433, 0.000000]
     test_list.append(discadj_incomp_cylinder)
 
     ######################################
@@ -163,7 +163,7 @@ def main():
     discadj_cylinder.cfg_dir   = "disc_adj_rans/cylinder"
     discadj_cylinder.cfg_file  = "cylinder.cfg"
     discadj_cylinder.test_iter = 9
-    discadj_cylinder.test_vals = [1.639372, -2.834295, -0.009538, 0.000020] #last 4 columns
+    discadj_cylinder.test_vals = [1.639372, -2.834293, -0.009538, 0.000020]
     discadj_cylinder.unsteady  = True
     test_list.append(discadj_cylinder)
 
@@ -176,7 +176,7 @@ def main():
     discadj_cylinder.cfg_dir   = "disc_adj_rans/cylinder"
     discadj_cylinder.cfg_file  = "cylinder_Windowing_AD.cfg"
     discadj_cylinder.test_iter = 9
-    discadj_cylinder.test_vals = [2.183366] #last column
+    discadj_cylinder.test_vals = [2.183361]
     discadj_cylinder.unsteady  = True
     test_list.append(discadj_cylinder)
 
@@ -204,7 +204,7 @@ def main():
     discadj_DT_1ST_cylinder.cfg_dir   = "disc_adj_rans/cylinder_DT_1ST"
     discadj_DT_1ST_cylinder.cfg_file  = "cylinder.cfg"
     discadj_DT_1ST_cylinder.test_iter = 9
-    discadj_DT_1ST_cylinder.test_vals = [1.196414, -3.339025, -0.006212, 0.000020]
+    discadj_DT_1ST_cylinder.test_vals = [1.196414, -3.339029, -0.006212, 0.000020]
     discadj_DT_1ST_cylinder.unsteady  = True
     test_list.append(discadj_DT_1ST_cylinder)
 
@@ -217,7 +217,7 @@ def main():
     discadj_pitchingNACA0012.cfg_dir   = "disc_adj_euler/naca0012_pitching"
     discadj_pitchingNACA0012.cfg_file  = "inv_NACA0012_pitching.cfg"
     discadj_pitchingNACA0012.test_iter = 4
-    discadj_pitchingNACA0012.test_vals = [-1.040215, -1.508864, -0.006076, 0.000012]
+    discadj_pitchingNACA0012.test_vals = [-1.040019, -1.508690, -0.006062, 0.000012]
     discadj_pitchingNACA0012.tol = 0.01
     discadj_pitchingNACA0012.unsteady  = True
     test_list.append(discadj_pitchingNACA0012)
@@ -231,7 +231,7 @@ def main():
     discadj_trans_stator.cfg_dir   = "disc_adj_turbomachinery/transonic_stator_2D"
     discadj_trans_stator.cfg_file  = "transonic_stator.cfg"
     discadj_trans_stator.test_iter = 79
-    discadj_trans_stator.test_vals = [79.000000, 2.555965, 2.327109, 2.115689, 0.745501]
+    discadj_trans_stator.test_vals = [79.000000, 2.549036, 2.313067, 2.139714, 0.736741]
     discadj_trans_stator.test_vals_aarch64 = [79.000000, 0.696755, 0.485950, 0.569475, -0.990065]
     test_list.append(discadj_trans_stator)
 
@@ -244,7 +244,7 @@ def main():
     discadj_fea.cfg_dir   = "disc_adj_fea"
     discadj_fea.cfg_file  = "configAD_fem.cfg"
     discadj_fea.test_iter = 4
-    discadj_fea.test_vals         = [-2.849687, -3.238608, -0.000364, -8.708700] #last 4 columns
+    discadj_fea.test_vals         = [-2.849947, -3.238801, -0.000364, -8.708700]
     discadj_fea.test_vals_aarch64 = [-2.849646, -3.238577, -0.000364, -8.708700] #last 4 columns
     test_list.append(discadj_fea)
 
@@ -254,7 +254,7 @@ def main():
     discadj_thermoelastic.cfg_dir = "fea_fsi/ThermalBeam_3d"
     discadj_thermoelastic.cfg_file = "configBeamNonlinear_3d_ad.cfg"
     discadj_thermoelastic.test_iter = 10
-    discadj_thermoelastic.test_vals = [-5.355531, -5.293380, -6.164482, -6.433863, 43, -4.049760, 27, -4.164183, 0, 0.192640, 0]
+    discadj_thermoelastic.test_vals = [-5.355530, -5.293381, -6.164472, -6.433864, 43, -4.049760, 27, -4.164193, 0, 0.192640, 0]
     test_list.append(discadj_thermoelastic)
 
     ###################################
@@ -286,7 +286,7 @@ def main():
     discadj_fsi2.cfg_dir   = "disc_adj_fsi/Airfoil_2d"
     discadj_fsi2.cfg_file  = "config.cfg"
     discadj_fsi2.test_iter = 8
-    discadj_fsi2.test_vals         = [-3.824630, 1.979615, -3.863368, 0.295450, 3.839800]
+    discadj_fsi2.test_vals         = [-3.824635, 1.979557, -3.863368, 0.295450, 3.839800]
     discadj_fsi2.test_vals_aarch64 = [-3.824870, 1.979160, -3.863368, 0.295450, 3.839800]
     discadj_fsi2.tol       = 0.00001
     test_list.append(discadj_fsi2)
@@ -342,7 +342,7 @@ def main():
     discadj_flamelet_ch4_hx.cfg_file         = "lam_prem_ch4_hx_ad.cfg"
     discadj_flamelet_ch4_hx.multizone        = False
     discadj_flamelet_ch4_hx.test_iter        = 10
-    discadj_flamelet_ch4_hx.test_vals        = [-9.078706, -9.025745, -9.516205, -8.434002, -15.386905, -8.887596, -18.881167]
+    discadj_flamelet_ch4_hx.test_vals        = [-9.083502, -9.033520, -9.529644, -8.442388, -15.399568, -6.170002, -18.881156]
     test_list.append(discadj_flamelet_ch4_hx)
 
     # 2D planar laminar premixed flame on isothermal burner with conjugate heat transfer (restart)
@@ -351,7 +351,7 @@ def main():
     discadj_flamelet_ch4_cht.cfg_file         = "lam_prem_ch4_cht_ad_master.cfg"
     discadj_flamelet_ch4_cht.multizone        = True
     discadj_flamelet_ch4_cht.test_iter        = 10
-    discadj_flamelet_ch4_cht.test_vals        = [-1.545058, 0.628666, -6.533534, -18.651078, -18.648552, -3.794724, -6.561913, 11.000000]
+    discadj_flamelet_ch4_cht.test_vals        = [-1.545058, 0.628666, -6.533534, -18.651078, -18.648552, -3.794724, -6.572494, 11.000000]
     test_list.append(discadj_flamelet_ch4_cht)
 
     ######################################
@@ -508,7 +508,7 @@ def main():
     pywrapper_CFD_AD_MeshDisp.cfg_dir       = "py_wrapper/disc_adj_flow/mesh_disp_sens"
     pywrapper_CFD_AD_MeshDisp.cfg_file      = "configAD_flow.cfg"
     pywrapper_CFD_AD_MeshDisp.test_iter     = 1000
-    pywrapper_CFD_AD_MeshDisp.test_vals     = [30.000000, -2.496560, 1.440884, 0.000000]
+    pywrapper_CFD_AD_MeshDisp.test_vals     = [30.000000, -2.496452, 1.441022, 0.000000]
     pywrapper_CFD_AD_MeshDisp.command       = TestCase.Command("mpirun -n 2", "python", "run_adjoint.py --parallel -f")
     pywrapper_CFD_AD_MeshDisp.timeout       = 1600
     pywrapper_CFD_AD_MeshDisp.tol           = 0.000001
diff --git a/TestCases/serial_regression.py b/TestCases/serial_regression.py
index 830ce48e4a5..a1cdb43eac7 100755
--- a/TestCases/serial_regression.py
+++ b/TestCases/serial_regression.py
@@ -64,7 +64,7 @@ def main():
     invwedge.cfg_dir = "nonequilibrium/invwedge"
     invwedge.cfg_file = "invwedge_ausm.cfg"
     invwedge.test_iter = 10
-    invwedge.test_vals = [-1.073693, -1.598456, -18.298997, -18.626405, -18.572419, 2.241766, 1.868557, 5.286079, 0.843747]
+    invwedge.test_vals = [-1.073689, -1.598452, -18.299910, -18.627322, -18.573334, 2.241771, 1.868566, 5.286082, 0.843751]
     invwedge.test_vals_aarch64 = [-1.073699, -1.598462, -18.299723, -18.627132, -18.573146, 2.241760, 1.868575, 5.286072, 0.843741]
     test_list.append(invwedge)
 
@@ -93,7 +93,7 @@ def main():
     channel.cfg_dir   = "euler/channel"
     channel.cfg_file  = "inv_channel_RK.cfg"
     channel.test_iter = 10
-    channel.test_vals = [-1.969337, 3.565024, 0.000246, 0.160624]
+    channel.test_vals = [-1.969337, 3.565024, 0.000242, 0.160624]
     test_list.append(channel)
 
     # NACA0012
@@ -126,7 +126,7 @@ def main():
     fixedCL_naca0012.cfg_dir   = "fixed_cl/naca0012"
     fixedCL_naca0012.cfg_file  = "inv_NACA0012.cfg"
     fixedCL_naca0012.test_iter = 10
-    fixedCL_naca0012.test_vals = [-3.962510, 1.570618, 0.301016, 0.019477]
+    fixedCL_naca0012.test_vals = [-3.949168, 1.584810, 0.301017, 0.019479]
     test_list.append(fixedCL_naca0012)
 
     # Polar sweep of the inviscid NACA0012
@@ -166,7 +166,7 @@ def main():
     flatplate.cfg_dir   = "navierstokes/flatplate"
     flatplate.cfg_file  = "lam_flatplate.cfg"
     flatplate.test_iter = 20
-    flatplate.test_vals = [-5.492595, -0.012547, 0.002524, 0.011875, 2.361500, -2.349600, 0.000000, 0.000000]
+    flatplate.test_vals = [-5.499362, -0.019046, 0.002526, 0.011870, 2.361500, -2.349600, 0.000000, 0.000000]
     test_list.append(flatplate)
 
     # Laminar cylinder (steady)
@@ -174,7 +174,7 @@ def main():
     cylinder.cfg_dir   = "navierstokes/cylinder"
     cylinder.cfg_file  = "lam_cylinder.cfg"
     cylinder.test_iter = 25
-    cylinder.test_vals = [-8.480930, -3.006218, -0.028387, 1.634020, 0.000000]
+    cylinder.test_vals = [-8.508933, -3.034987, -0.014846, 1.644215, 0.000000]
     test_list.append(cylinder)
 
     # Laminar cylinder (low Mach correction)
@@ -182,7 +182,7 @@ def main():
     cylinder_lowmach.cfg_dir   = "navierstokes/cylinder"
     cylinder_lowmach.cfg_file  = "cylinder_lowmach.cfg"
     cylinder_lowmach.test_iter = 25
-    cylinder_lowmach.test_vals = [-6.447834, -0.986069, 0.825992, 65.209800, 0.000000]
+    cylinder_lowmach.test_vals = [-6.453096, -0.991328, 0.722165, 66.048089, 0.000000]
     test_list.append(cylinder_lowmach)
 
     # 2D Poiseuille flow (body force driven with periodic inlet / outlet)
@@ -242,7 +242,7 @@ def main():
     turb_flatplate.cfg_dir   = "rans/flatplate"
     turb_flatplate.cfg_file  = "turb_SA_flatplate.cfg"
     turb_flatplate.test_iter = 20
-    turb_flatplate.test_vals = [-4.958238, -7.438031, -0.187473, 0.015059]
+    turb_flatplate.test_vals = [-4.958115, -7.438257, -0.187473, 0.015056]
     test_list.append(turb_flatplate)
 
     # FLAT PLATE, WALL FUNCTIONS, COMPRESSIBLE SST
@@ -359,7 +359,7 @@ def main():
     turb_naca0012_sst_restart_mg.cfg_file  = "turb_NACA0012_sst_multigrid_restart.cfg"
     turb_naca0012_sst_restart_mg.test_iter = 50
     turb_naca0012_sst_restart_mg.ntest_vals = 5
-    turb_naca0012_sst_restart_mg.test_vals = [-6.610290, -5.081422, 0.810881, -0.008844, 0.077940]
+    turb_naca0012_sst_restart_mg.test_vals = [-6.610405, -5.081422, 0.810881, -0.008846, 0.077934]
     turb_naca0012_sst_restart_mg.timeout   = 3200
     turb_naca0012_sst_restart_mg.tol       = 0.000001
     test_list.append(turb_naca0012_sst_restart_mg)
@@ -380,7 +380,7 @@ def main():
     inc_euler_naca0012.cfg_dir   = "incomp_euler/naca0012"
     inc_euler_naca0012.cfg_file  = "incomp_NACA0012.cfg"
     inc_euler_naca0012.test_iter = 20
-    inc_euler_naca0012.test_vals = [-5.968755, -5.003709, 0.522550, 0.008867]
+    inc_euler_naca0012.test_vals = [-5.988713, -5.020635, 0.522968, 0.008854]
     test_list.append(inc_euler_naca0012)
 
     # C-D nozzle with pressure inlet and mass flow outlet
@@ -407,7 +407,7 @@ def main():
     inc_lam_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_lam_cylinder.cfg_file  = "incomp_cylinder.cfg"
     inc_lam_cylinder.test_iter = 10
-    inc_lam_cylinder.test_vals = [-4.159153, -3.569142, 0.011445, 4.936802]
+    inc_lam_cylinder.test_vals = [-4.161215, -3.573002, 0.019888, 4.945923]
     test_list.append(inc_lam_cylinder)
 
     # Buoyancy-driven cavity
@@ -423,7 +423,7 @@ def main():
     inc_poly_cylinder.cfg_dir   = "incomp_navierstokes/cylinder"
     inc_poly_cylinder.cfg_file  = "poly_cylinder.cfg"
     inc_poly_cylinder.test_iter = 20
-    inc_poly_cylinder.test_vals = [-8.232551, -2.412865, 0.010163, 1.895333, -172.760000]
+    inc_poly_cylinder.test_vals = [-8.081227, -2.399756, 0.009794, 1.900988, -172.970000]
     test_list.append(inc_poly_cylinder)
 
     # X-coarse laminar bend as a mixed element CGNS test
@@ -431,7 +431,7 @@ def main():
     inc_lam_bend.cfg_dir   = "incomp_navierstokes/bend"
     inc_lam_bend.cfg_file  = "lam_bend.cfg"
     inc_lam_bend.test_iter = 10
-    inc_lam_bend.test_vals = [-3.647474, -3.230291, -0.016108, 1.085750]
+    inc_lam_bend.test_vals = [-3.639664, -3.218039, -0.016067, 1.090645]
     test_list.append(inc_lam_bend)
 
     ############################
@@ -586,7 +586,7 @@ def main():
     contadj_naca0012.cfg_dir   = "cont_adj_euler/naca0012"
     contadj_naca0012.cfg_file  = "inv_NACA0012.cfg"
     contadj_naca0012.test_iter = 5
-    contadj_naca0012.test_vals = [-9.531049, -15.087710, -0.726250, 0.020280]
+    contadj_naca0012.test_vals = [-9.531733, -15.088205, -0.726250, 0.020280]
     contadj_naca0012.tol       = 0.001
     test_list.append(contadj_naca0012)
 
@@ -595,7 +595,7 @@ def main():
     contadj_oneram6.cfg_dir   = "cont_adj_euler/oneram6"
     contadj_oneram6.cfg_file  = "inv_ONERAM6.cfg"
     contadj_oneram6.test_iter = 10
-    contadj_oneram6.test_vals = [-12.080232, -12.641294, -1.086100, 0.007556]
+    contadj_oneram6.test_vals = [-12.083706, -12.645329, -1.086100, 0.007556]
     test_list.append(contadj_oneram6)
 
     # Inviscid WEDGE: tests averaged outflow total pressure adjoint
@@ -611,7 +611,7 @@ def main():
     contadj_fixedCL_naca0012.cfg_dir   = "fixed_cl/naca0012"
     contadj_fixedCL_naca0012.cfg_file  = "inv_NACA0012_ContAdj.cfg"
     contadj_fixedCL_naca0012.test_iter = 100
-    contadj_fixedCL_naca0012.test_vals = [1.381080, -4.043374, -0.033478, 0.003350]
+    contadj_fixedCL_naca0012.test_vals = [1.378116, -4.047513, -0.030259, 0.003488]
     test_list.append(contadj_fixedCL_naca0012)
 
     ###################################
@@ -630,7 +630,7 @@ def main():
     contadj_ns_cylinder.cfg_dir   = "cont_adj_navierstokes/cylinder"
     contadj_ns_cylinder.cfg_file  = "lam_cylinder.cfg"
     contadj_ns_cylinder.test_iter = 20
-    contadj_ns_cylinder.test_vals = [-3.628790, -9.082444, 2.056700, -0.000000]
+    contadj_ns_cylinder.test_vals = [-3.628460, -9.081344, 2.056700, -0.000000]
     test_list.append(contadj_ns_cylinder)
 
     # Adjoint laminar naca0012 subsonic
@@ -674,7 +674,7 @@ def main():
     contadj_rans_rae2822.cfg_dir   = "cont_adj_rans/rae2822"
     contadj_rans_rae2822.cfg_file  = "turb_SA_RAE2822.cfg"
     contadj_rans_rae2822.test_iter = 20
-    contadj_rans_rae2822.test_vals = [-5.399819, -10.904997, -0.212470, 0.005448]
+    contadj_rans_rae2822.test_vals = [-5.399778, -10.904866, -0.212470, 0.005448]
     test_list.append(contadj_rans_rae2822)
 
     #############################
@@ -768,7 +768,7 @@ def main():
     spinning_cylinder.cfg_dir   = "moving_wall/spinning_cylinder"
     spinning_cylinder.cfg_file  = "spinning_cylinder.cfg"
     spinning_cylinder.test_iter = 25
-    spinning_cylinder.test_vals = [-7.543538, -2.078580, 1.956630, 1.859963]
+    spinning_cylinder.test_vals = [-7.549394, -2.082578, 1.841595, 1.853229]
     test_list.append(spinning_cylinder)
 
     ######################################
@@ -789,7 +789,7 @@ def main():
     sine_gust.cfg_dir   = "gust"
     sine_gust.cfg_file  = "inv_gust_NACA0012.cfg"
     sine_gust.test_iter = 5
-    sine_gust.test_vals = [-1.977498, 3.481817, -0.010134, -0.004283]
+    sine_gust.test_vals = [-1.977498, 3.481817, -0.010301, -0.004334]
     sine_gust.unsteady  = True
     test_list.append(sine_gust)
 
@@ -798,7 +798,7 @@ def main():
     aeroelastic.cfg_dir   = "aeroelastic"
     aeroelastic.cfg_file  = "aeroelastic_NACA64A010.cfg"
     aeroelastic.test_iter = 2
-    aeroelastic.test_vals = [-1.876626, 4.021083, 0.081596, 0.027684, -0.001638, -0.000130, -1.140510]
+    aeroelastic.test_vals = [-1.876631, 4.021073, 0.081373, 0.027542, -0.001642, -0.000127, -1.133902]
     aeroelastic.unsteady  = True
     test_list.append(aeroelastic)
 
@@ -824,7 +824,7 @@ def main():
     unst_pitching_naca64a010_rans.cfg_dir   = "unsteady/pitching_naca64a010_rans"
     unst_pitching_naca64a010_rans.cfg_file  = "turb_NACA64A010.cfg"
     unst_pitching_naca64a010_rans.test_iter = 2
-    unst_pitching_naca64a010_rans.test_vals = [-1.299045, -3.951366, 0.010128, 0.008245]
+    unst_pitching_naca64a010_rans.test_vals = [-1.299045, -3.951363, 0.010176, 0.008237]
     unst_pitching_naca64a010_rans.unsteady  = True
     test_list.append(unst_pitching_naca64a010_rans)
     # unsteady pitching NACA64A010, Euler
@@ -832,7 +832,7 @@ def main():
     unst_pitching_naca64a010_euler.cfg_dir   = "unsteady/pitching_naca64a010_euler"
     unst_pitching_naca64a010_euler.cfg_file  = "pitching_NACA64A010.cfg"
     unst_pitching_naca64a010_euler.test_iter = 2
-    unst_pitching_naca64a010_euler.test_vals = [-1.186839, 4.280301, -0.039488, 0.000918]
+    unst_pitching_naca64a010_euler.test_vals = [-1.186839, 4.280301, -0.039479, 0.000910]
     unst_pitching_naca64a010_euler.unsteady  = True
     test_list.append(unst_pitching_naca64a010_euler)
     # unsteady plunging NACA0012, Laminar NS
@@ -840,7 +840,7 @@ def main():
     unst_plunging_naca0012.cfg_dir   = "unsteady/plunging_naca0012"
     unst_plunging_naca0012.cfg_file  = "plunging_NACA0012.cfg"
     unst_plunging_naca0012.test_iter = 2
-    unst_plunging_naca0012.test_vals = [-4.083462, 1.366757, -6.470859, -0.078768]
+    unst_plunging_naca0012.test_vals = [-4.083462, 1.366757, -6.456450, -0.082788]
     unst_plunging_naca0012.unsteady  = True
     test_list.append(unst_plunging_naca0012)
 
@@ -849,7 +849,7 @@ def main():
     unst_deforming_naca0012.cfg_dir   = "disc_adj_euler/naca0012_pitching_def"
     unst_deforming_naca0012.cfg_file  = "inv_NACA0012_pitching_deform.cfg"
     unst_deforming_naca0012.test_iter = 5
-    unst_deforming_naca0012.test_vals = [-3.665263, -3.794184, -3.716978, -3.148551]
+    unst_deforming_naca0012.test_vals = [-3.665271, -3.794214, -3.716999, -3.148565]
     unst_deforming_naca0012.unsteady  = True
     test_list.append(unst_deforming_naca0012)
 
@@ -916,7 +916,7 @@ def main():
     transonic_stator_restart.cfg_dir   = "turbomachinery/transonic_stator_2D"
     transonic_stator_restart.cfg_file  = "transonic_stator_restart.cfg"
     transonic_stator_restart.test_iter = 20
-    transonic_stator_restart.test_vals         = [-4.367851, -2.492866, -2.082422, 1.727424, -1.466963, 3.224518, -471620.000000, 94.839000, -0.052025]
+    transonic_stator_restart.test_vals         = [-4.367780, -2.492918, -2.082410, 1.727494, -1.466974, 3.224733, -471620.000000, 94.839000, -0.052084]
     transonic_stator_restart.test_vals_aarch64 = [-4.443401, -2.566759, -2.169302, 1.651815, -1.356398, 3.172527, -471620.000000, 94.843000, -0.044669]
     test_list.append(transonic_stator_restart)
 
@@ -946,7 +946,7 @@ def main():
     uniform_flow.cfg_dir   = "sliding_interface/uniform_flow"
     uniform_flow.cfg_file  = "uniform_NN.cfg"
     uniform_flow.test_iter = 2
-    uniform_flow.test_vals = [2.000000, 0.000000, -0.230641, -13.251039]
+    uniform_flow.test_vals = [2.000000, 0.000000, -0.230639, -13.250786]
     uniform_flow.test_vals_aarch64 = [2.000000, 0.000000, -0.230641, -13.249000]
     uniform_flow.tol       = 0.000001
     uniform_flow.unsteady  = True
@@ -1011,7 +1011,7 @@ def main():
     bars_SST_2D.cfg_dir   = "sliding_interface/bars_SST_2D"
     bars_SST_2D.cfg_file  = "bars.cfg"
     bars_SST_2D.test_iter = 13
-    bars_SST_2D.test_vals = [13.000000, -0.397243, -1.461873]
+    bars_SST_2D.test_vals = [13.000000, -0.393225, -1.462257]
     bars_SST_2D.multizone = True
     test_list.append(bars_SST_2D)
 
@@ -1020,7 +1020,7 @@ def main():
     slinc_steady.cfg_dir   = "sliding_interface/incompressible_steady"
     slinc_steady.cfg_file  = "config.cfg"
     slinc_steady.test_iter = 19
-    slinc_steady.test_vals = [19.000000, -1.148249, -1.398402]
+    slinc_steady.test_vals = [19.000000, -1.141747, -1.450721]
     slinc_steady.timeout   = 100
     slinc_steady.multizone = True
     test_list.append(slinc_steady)
@@ -1089,7 +1089,7 @@ def main():
     fsi_cht.cfg_dir   = "fea_fsi/stat_fsi"
     fsi_cht.cfg_file  = "config.cfg"
     fsi_cht.test_iter = 20
-    fsi_cht.test_vals = [5, -5.077012, -5.379779, -9.247804, -9.277819, -9.183796, 6.0835e+02, -1.2973e-02, 5.7607e-08, 29]
+    fsi_cht.test_vals = [5, -5.077014, -5.379795, -9.247813, -9.320514, -9.185010, 608.35, -0.012973, 5.7607e-08, 30]
     fsi_cht.multizone = True
     test_list.append(fsi_cht)
 
@@ -1098,7 +1098,7 @@ def main():
     dyn_fsi.cfg_dir   = "fea_fsi/dyn_fsi"
     dyn_fsi.cfg_file  = "config.cfg"
     dyn_fsi.test_iter = 4
-    dyn_fsi.test_vals = [-4.330728, -4.152820, 5.3831e-08, 85]
+    dyn_fsi.test_vals = [-4.330728, -4.152820, 5.3831e-08, 86]
     dyn_fsi.multizone = True
     dyn_fsi.unsteady  = True
     test_list.append(dyn_fsi)
@@ -1108,7 +1108,7 @@ def main():
     airfoilRBF.cfg_dir   = "fea_fsi/Airfoil_RBF"
     airfoilRBF.cfg_file  = "config.cfg"
     airfoilRBF.test_iter = 1
-    airfoilRBF.test_vals = [1.000000, 0.028165, -3.530075]
+    airfoilRBF.test_vals = [1.000000, 0.026697, -3.532043]
     airfoilRBF.tol       = 0.0001
     airfoilRBF.multizone = True
     test_list.append(airfoilRBF)
@@ -1150,7 +1150,7 @@ def main():
     cht_incompressible.cfg_dir   = "coupled_cht/comp_2d"
     cht_incompressible.cfg_file  = "cht_2d_3cylinders.cfg"
     cht_incompressible.test_iter = 10
-    cht_incompressible.test_vals = [-4.256032, -0.532728, -0.532729, -0.532728]
+    cht_incompressible.test_vals = [-4.256032, -0.532728, -0.532728, -0.532728]
     cht_incompressible.multizone = True
     test_list.append(cht_incompressible)
 
@@ -1601,7 +1601,7 @@ def main():
     pywrapper_unsteadyCHT.cfg_dir       = "py_wrapper/flatPlate_unsteady_CHT"
     pywrapper_unsteadyCHT.cfg_file      = "unsteady_CHT_FlatPlate_Conf.cfg"
     pywrapper_unsteadyCHT.test_iter     = 5
-    pywrapper_unsteadyCHT.test_vals     = [-1.614168, 2.260078, -0.020704, 0.172871]
+    pywrapper_unsteadyCHT.test_vals     = [-1.614169, 2.260215, -0.019432, 0.203751]
     pywrapper_unsteadyCHT.command       =  TestCase.Command(exec = "python", param = "launch_unsteady_CHT_FlatPlate.py -f")
     pywrapper_unsteadyCHT.timeout       = 1600
     pywrapper_unsteadyCHT.tol           = 0.00001
diff --git a/TestCases/serial_regression_AD.py b/TestCases/serial_regression_AD.py
index 6e11587b8cf..4354bd0dfd9 100644
--- a/TestCases/serial_regression_AD.py
+++ b/TestCases/serial_regression_AD.py
@@ -58,7 +58,7 @@ def main():
     discadj_naca0012_via_mz.cfg_dir = "cont_adj_euler/naca0012"
     discadj_naca0012_via_mz.cfg_file = "inv_NACA0012_discadj_multizone.cfg"
     discadj_naca0012_via_mz.test_iter = 100
-    discadj_naca0012_via_mz.test_vals = [-3.563784, -5.975640, -6.326231, -8.929567]
+    discadj_naca0012_via_mz.test_vals = [-3.563784, -5.975641, -6.326231, -8.929567]
     discadj_naca0012_via_mz.enabled_with_tapetests = True
     discadj_naca0012_via_mz.tapetest_vals = [0]
     test_list.append(discadj_naca0012_via_mz)
@@ -68,7 +68,7 @@ def main():
     discadj_cylinder3D.cfg_dir   = "disc_adj_euler/cylinder3D"
     discadj_cylinder3D.cfg_file  = "inv_cylinder3D.cfg"
     discadj_cylinder3D.test_iter = 5
-    discadj_cylinder3D.test_vals = [-3.702105, -3.895140, -0.000000, 0.000000]
+    discadj_cylinder3D.test_vals = [-3.702109, -3.895140, -0.000000, 0.000000]
     test_list.append(discadj_cylinder3D)
 
     # Arina nozzle 2D
@@ -108,7 +108,7 @@ def main():
     discadj_incomp_NACA0012.cfg_dir   = "disc_adj_incomp_euler/naca0012"
     discadj_incomp_NACA0012.cfg_file  = "incomp_NACA0012_disc.cfg"
     discadj_incomp_NACA0012.test_iter = 20
-    discadj_incomp_NACA0012.test_vals = [20.000000, -3.122293, -2.287328, 0.000000]
+    discadj_incomp_NACA0012.test_vals = [20.000000, -3.122056, -2.290799, 0.000000]
     test_list.append(discadj_incomp_NACA0012)
 
     #####################################
@@ -120,7 +120,7 @@ def main():
     discadj_incomp_cylinder.cfg_dir   = "disc_adj_incomp_navierstokes/cylinder"
     discadj_incomp_cylinder.cfg_file  = "heated_cylinder.cfg"
     discadj_incomp_cylinder.test_iter = 20
-    discadj_incomp_cylinder.test_vals = [20.000000, -1.672307, -6.247612, 0.000000]
+    discadj_incomp_cylinder.test_vals = [20.000000, -1.666057, -6.234535, 0.000000]
     test_list.append(discadj_incomp_cylinder)
 
     #######################################################
@@ -132,7 +132,7 @@ def main():
     discadj_cylinder.cfg_dir   = "disc_adj_rans/cylinder"
     discadj_cylinder.cfg_file  = "cylinder.cfg"
     discadj_cylinder.test_iter = 9
-    discadj_cylinder.test_vals = [1.639372, -2.834285, -0.009537, 0.000020]
+    discadj_cylinder.test_vals = [1.639373, -2.834286, -0.009537, 0.000020]
     discadj_cylinder.unsteady  = True
     test_list.append(discadj_cylinder)
 
@@ -145,7 +145,7 @@ def main():
     discadj_DT_1ST_cylinder.cfg_dir   = "disc_adj_rans/cylinder_DT_1ST"
     discadj_DT_1ST_cylinder.cfg_file  = "cylinder.cfg"
     discadj_DT_1ST_cylinder.test_iter = 9
-    discadj_DT_1ST_cylinder.test_vals = [1.196421, -3.339043, -0.006211, 0.000020]
+    discadj_DT_1ST_cylinder.test_vals = [1.196421, -3.339044, -0.006211, 0.000020]
     discadj_DT_1ST_cylinder.unsteady  = True
     test_list.append(discadj_DT_1ST_cylinder)
 
@@ -158,7 +158,7 @@ def main():
     discadj_pitchingNACA0012.cfg_dir   = "disc_adj_euler/naca0012_pitching"
     discadj_pitchingNACA0012.cfg_file  = "inv_NACA0012_pitching.cfg"
     discadj_pitchingNACA0012.test_iter = 4
-    discadj_pitchingNACA0012.test_vals = [-1.049761, -1.501951, -0.004853, 0.000013]
+    discadj_pitchingNACA0012.test_vals = [-1.050725, -1.504630, -0.004855, 0.000013]
     discadj_pitchingNACA0012.unsteady  = True
     test_list.append(discadj_pitchingNACA0012)
 
@@ -167,7 +167,7 @@ def main():
     unst_deforming_naca0012.cfg_dir   = "disc_adj_euler/naca0012_pitching_def"
     unst_deforming_naca0012.cfg_file  = "inv_NACA0012_pitching_deform_ad.cfg"
     unst_deforming_naca0012.test_iter = 4
-    unst_deforming_naca0012.test_vals = [-1.885816, -1.781193, 3920.300000, 0.000003]
+    unst_deforming_naca0012.test_vals = [-1.886194, -1.780629, 3882.900000, 0.000003]
     unst_deforming_naca0012.unsteady  = True
     test_list.append(unst_deforming_naca0012)
 
@@ -349,7 +349,7 @@ def main():
     pywrapper_CFD_AD_MeshDisp.cfg_dir       = "py_wrapper/disc_adj_flow/mesh_disp_sens"
     pywrapper_CFD_AD_MeshDisp.cfg_file      = "configAD_flow.cfg"
     pywrapper_CFD_AD_MeshDisp.test_iter     = 1000
-    pywrapper_CFD_AD_MeshDisp.test_vals     = [30.000000, -2.496380, 1.441373, 0.000000]
+    pywrapper_CFD_AD_MeshDisp.test_vals     = [30.000000, -2.496258, 1.441670, 0.000000]
     pywrapper_CFD_AD_MeshDisp.command       = TestCase.Command(exec = "python", param = "run_adjoint.py -f")
     pywrapper_CFD_AD_MeshDisp.timeout       = 1600
     pywrapper_CFD_AD_MeshDisp.tol           = 0.000001
diff --git a/TestCases/tutorials.py b/TestCases/tutorials.py
index 305ab9fb234..d11050e4bfd 100644
--- a/TestCases/tutorials.py
+++ b/TestCases/tutorials.py
@@ -49,7 +49,7 @@ def main():
     cht_incompressible_unsteady.cfg_dir   = "../Tutorials/multiphysics/unsteady_cht/"
     cht_incompressible_unsteady.cfg_file  = "cht_2d_3cylinders.cfg"
     cht_incompressible_unsteady.test_iter = 2
-    cht_incompressible_unsteady.test_vals = [-3.075372, -0.080399, -0.080399, -0.080399, -11.163219, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 238.240000]
+    cht_incompressible_unsteady.test_vals = [-3.204738, -0.080399, -0.080399, -0.080399, -11.531676, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 238.240000]
     cht_incompressible_unsteady.multizone = True
     cht_incompressible_unsteady.unsteady  = True
     test_list.append(cht_incompressible_unsteady)
@@ -165,7 +165,7 @@ def main():
     premixed_hydrogen.cfg_dir = "../Tutorials/incompressible_flow/Inc_Combustion/1__premixed_hydrogen"
     premixed_hydrogen.cfg_file = "H2_burner.cfg"
     premixed_hydrogen.test_iter = 10
-    premixed_hydrogen.test_vals = [-8.772073, -9.721083, -11.354959, -4.381554, -12.832787]
+    premixed_hydrogen.test_vals = [-8.771332, -9.721253, -11.354950, -4.381656, -12.832815]
     test_list.append(premixed_hydrogen)
 
     ### Compressible Flow
@@ -201,7 +201,7 @@ def main():
     tutorial_lam_cylinder.cfg_dir    = "../Tutorials/compressible_flow/Laminar_Cylinder"
     tutorial_lam_cylinder.cfg_file   = "lam_cylinder.cfg"
     tutorial_lam_cylinder.test_iter  = 0
-    tutorial_lam_cylinder.test_vals  = [-6.162141, -0.699617, -0.124663, 31.721714]
+    tutorial_lam_cylinder.test_vals  = [-6.162141, -0.699617, -0.085099, 31.790369]
     tutorial_lam_cylinder.no_restart = True
     test_list.append(tutorial_lam_cylinder)
 
@@ -228,7 +228,7 @@ def main():
     tutorial_trans_flatplate.cfg_dir    = "../Tutorials/compressible_flow/Transitional_Flat_Plate"
     tutorial_trans_flatplate.cfg_file   = "transitional_BC_model_ConfigFile.cfg"
     tutorial_trans_flatplate.test_iter  = 0
-    tutorial_trans_flatplate.test_vals  = [-22.021786, -15.330766, 0.000000, 0.023944]
+    tutorial_trans_flatplate.test_vals  = [-22.025101, -15.330766, 0.000000, 0.023944]
     tutorial_trans_flatplate.no_restart = True
     test_list.append(tutorial_trans_flatplate)
 
@@ -292,7 +292,7 @@ def main():
     tutorial_nicfd_nozzle_pinn.cfg_dir   = "../Tutorials/compressible_flow/NICFD_nozzle/PhysicsInformed"
     tutorial_nicfd_nozzle_pinn.cfg_file  = "config_NICFD_PINN.cfg"
     tutorial_nicfd_nozzle_pinn.test_iter = 20
-    tutorial_nicfd_nozzle_pinn.test_vals = [-2.728179, -0.849337, -1.224542, 2.898995, -11.420290]
+    tutorial_nicfd_nozzle_pinn.test_vals = [-2.728179, -0.849337, -1.224543, 2.898995, -11.420290]
     tutorial_nicfd_nozzle_pinn.no_restart = True
     test_list.append(tutorial_nicfd_nozzle_pinn)
 
diff --git a/UnitTests/Common/geometry/CGeometry_test.cpp b/UnitTests/Common/geometry/CGeometry_test.cpp
index cad2157a804..22556b16055 100644
--- a/UnitTests/Common/geometry/CGeometry_test.cpp
+++ b/UnitTests/Common/geometry/CGeometry_test.cpp
@@ -122,9 +122,9 @@ TEST_CASE("Set control volume", "[Geometry]") {
 
   CHECK(TestCase->geometry->nodes->GetVolume(42) == Approx(0.015625));
 
-  CHECK(TestCase->geometry->edges->GetNormal(32)[0] == 0.03125);
+  CHECK(TestCase->geometry->edges->GetNormal(31)[0] == 0.03125);
   CHECK(TestCase->geometry->edges->GetNormal(5)[1] == 0.0);
-  CHECK(TestCase->geometry->edges->GetNormal(10)[2] == 0.03125);
+  CHECK(TestCase->geometry->edges->GetNormal(11)[2] == 0.03125);
 
   CHECK(TestCase->config->GetDomainVolume() == Approx(1.0));
 }
diff --git a/meson.build b/meson.build
index d43e2b031d8..e76354a6e86 100644
--- a/meson.build
+++ b/meson.build
@@ -20,13 +20,10 @@ python = pymod.find_installation()
 if get_option('enable-cuda')
   add_languages('cuda')
   add_global_arguments('-arch=sm_86', language : 'cuda')
-  cuda_deps = [meson.get_compiler('cuda').find_library('cusparse', required : true)]
-else
-  cuda_deps = []
 endif
 
 su2_cpp_args = []
-su2_deps     = [declare_dependency(include_directories: 'externals/CLI11')] + cuda_deps
+su2_deps     = [declare_dependency(include_directories: 'externals/CLI11')]
 
 default_warning_flags = []
 if build_machine.system() != 'windows'