Add assembly of rectangular sparse matrices (#1279)

termi-official · web-flow · commit a3477615a9ed · 2026-03-07T10:53:47.000+01:00
diff --git a/ext/FerriteSparseMatrixCSR.jl b/ext/FerriteSparseMatrixCSR.jl
@@ -7,34 +7,39 @@ import Base: @propagate_inbounds
 # Could be generalized if https://github.com/JuliaSparse/SparseArrays.jl/pull/546 is merged
 function Ferrite.start_assemble(K::SparseMatrixCSR{<:Any, T}, f::Vector = T[]; fillzero::Bool = true, maxcelldofs_hint::Int = 0) where {T}
     fillzero && (Ferrite.fillzero!(K); Ferrite.fillzero!(f))
-    return CSRAssembler(K, f, zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint))
+    return CSRAssembler(K, f, zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint))
 end
 
-@propagate_inbounds function Ferrite._assemble_inner!(K::SparseMatrixCSR, Ke::AbstractMatrix, dofs::AbstractVector, sorteddofs::AbstractVector, permutation::AbstractVector, sym::Bool)
+@propagate_inbounds function Ferrite._assemble_inner!(
+        K::SparseMatrixCSR, Ke::AbstractMatrix,
+        rowdofs::AbstractVector, sortedrowdofs::AbstractVector, rowpermutation::AbstractVector,
+        coldofs::AbstractVector, sortedcoldofs::AbstractVector, colpermutation::AbstractVector,
+        sym::Bool
+    )
     current_row = 1
-    ld = length(dofs)
-    return @inbounds for Krow in sorteddofs
+    ld = length(coldofs)
+    return @inbounds for Krow in sortedrowdofs
         maxlookups = sym ? current_row : ld
-        Kerow = permutation[current_row]
+        Kerow = rowpermutation[current_row]
         ci = 1 # col index pointer for the local matrix
         Ci = 1 # col index pointer for the global matrix
         nzr = nzrange(K, Krow)
         while Ci <= length(nzr) && ci <= maxlookups
             C = nzr[Ci]
             Kcol = K.colval[C]
-            Kecol = permutation[ci]
+            Kecol = colpermutation[ci]
             val = Ke[Kerow, Kecol]
-            if Kcol == dofs[Kecol]
+            if Kcol == coldofs[Kecol]
                 # Match: add the value (if non-zero) and advance the pointers
                 if !iszero(val)
                     K.nzval[C] += val
                 end
                 ci += 1
                 Ci += 1
-            elseif Kcol < dofs[Kecol]
+            elseif Kcol < coldofs[Kecol]
                 # No match yet: advance the global matrix row pointer
                 Ci += 1
-            else # Kcol > dofs[Kecol]
+            else # Kcol > coldofs[Kecol]
                 # No match: no entry exist in the global matrix for this row. This is
                 # allowed as long as the value which would have been inserted is zero.
                 iszero(val) || Ferrite._missing_sparsity_pattern_error(Krow, Kcol)
@@ -44,8 +49,8 @@ end
         end
         # Make sure that remaining entries in this column of the local matrix are all zero
         for i in ci:maxlookups
-            if !iszero(Ke[Kerow, permutation[i]])
-                Ferrite._missing_sparsity_pattern_error(Krow, sorteddofs[i])
+            if !iszero(Ke[Kerow, colpermutation[i]])
+                Ferrite._missing_sparsity_pattern_error(Krow, sortedcoldofs[i])
             end
         end
         current_row += 1
diff --git a/src/assembler.jl b/src/assembler.jl
@@ -176,8 +176,10 @@ Assembler for sparse matrix with CSC storage type.
 struct CSCAssembler{Tv, Ti, MT <: AbstractSparseMatrixCSC{Tv, Ti}} <: AbstractCSCAssembler
     K::MT
     f::Vector{Tv}
-    permutation::Vector{Int}
-    sorteddofs::Vector{Int}
+    rowpermutation::Vector{Int}
+    colpermutation::Vector{Int}
+    sortedrowdofs::Vector{Int}
+    sortedcoldofs::Vector{Int}
 end
 
 """
@@ -186,8 +188,10 @@ Assembler for sparse matrix with CSR storage type.
 struct CSRAssembler{Tv, Ti, MT <: AbstractSparseMatrix{Tv, Ti}} <: AbstractCSRAssembler #AbstractSparseMatrixCSR does not exist
     K::MT
     f::Vector{Tv}
-    permutation::Vector{Int}
-    sorteddofs::Vector{Int}
+    rowpermutation::Vector{Int}
+    colpermutation::Vector{Int}
+    sortedrowdofs::Vector{Int}
+    sortedcoldofs::Vector{Int}
 end
 
 """
@@ -196,8 +200,10 @@ Assembler for symmetric sparse matrix with CSC storage type.
 struct SymmetricCSCAssembler{Tv, Ti, MT <: Symmetric{Tv, <:AbstractSparseMatrixCSC{Tv, Ti}}} <: AbstractCSCAssembler
     K::MT
     f::Vector{Tv}
-    permutation::Vector{Int}
-    sorteddofs::Vector{Int}
+    rowpermutation::Vector{Int} # Symmetric assembly doesn't need separate row and
+    colpermutation::Vector{Int} # col permutation and dofs, but simplifies code reuse
+    sortedrowdofs::Vector{Int}  # reuse with non-symmetric cases. sortedrowdofs and
+    sortedcoldofs::Vector{Int}  # rowpermutation always aliased to sortedcoldofs and colpermutation.
 end
 
 function Base.show(io::IO, ::MIME"text/plain", a::Union{CSCAssembler, CSRAssembler, SymmetricCSCAssembler})
@@ -239,11 +245,13 @@ start_assemble(K::Union{AbstractSparseMatrixCSC, Symmetric{<:Any, <:AbstractSpar
 
 function start_assemble(K::AbstractSparseMatrixCSC{T}, f::Vector = T[]; fillzero::Bool = true, maxcelldofs_hint::Int = 0) where {T}
     fillzero && (fillzero!(K); fillzero!(f))
-    return CSCAssembler(K, f, zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint))
+    return CSCAssembler(K, f, zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint))
 end
 function start_assemble(K::Symmetric{T, <:SparseMatrixCSC}, f::Vector = T[]; fillzero::Bool = true, maxcelldofs_hint::Int = 0) where {T}
     fillzero && (fillzero!(K); fillzero!(f))
-    return SymmetricCSCAssembler(K, f, zeros(Int, maxcelldofs_hint), zeros(Int, maxcelldofs_hint))
+    permutation = zeros(Int, maxcelldofs_hint)
+    sorteddofs = zeros(Int, maxcelldofs_hint)
+    return SymmetricCSCAssembler(K, f, permutation, permutation, sorteddofs, sorteddofs)
 end
 
 function finish_assemble(a::Union{CSCAssembler, CSRAssembler, SymmetricCSCAssembler})
@@ -254,19 +262,29 @@ end
     assemble!(A::AbstractAssembler, dofs::AbstractVector{Int}, Ke::AbstractMatrix)
     assemble!(A::AbstractAssembler, dofs::AbstractVector{Int}, Ke::AbstractMatrix, fe::AbstractVector)
 
-Assemble the element stiffness matrix `Ke` (and optional force vector `fe`) into the global
+Assemble the square element stiffness matrix `Ke` (and optional force vector `fe`) into the global
 stiffness (and force) in `A`, given the element degrees of freedom `dofs`.
 
-This is equivalent to `K[dofs, dofs] += Ke` and `f[dofs] += fe`, where `K` is the global
-stiffness matrix and `f` the global force/residual vector, but more efficient.
+This is equivalent to `K[dofs, dofs] += Ke` and `f[dofs] += fe`, where `K` is the global stiffness matrix and `f` the global force/residual vector, but more efficient.
+
+    assemble!(A::AbstractAssembler, rowdofs::AbstractVector{Int}, coldofs::AbstractVector{Int}, Ke::AbstractMatrix)
+    assemble!(A::AbstractAssembler, rowdofs::AbstractVector{Int}, coldofs::AbstractVector{Int}, Ke::AbstractMatrix, fe::AbstractVector)
+
+Assemble the element stiffness matrix `Ke` (and optional force vector `fe`) into the global
+stiffness (and force) in `A`, given the element row degrees of freedom, `rowdofs`, and element column degrees of freedom, `coldofs`.
+This is equivalent to `K[rowdofs, coldofs] += Ke` and `f[rowdofs] += fe`, but more efficient.
 """
 assemble!(::AbstractAssembler, ::AbstractVector{<:Integer}, ::AbstractMatrix, ::AbstractVector)
 
 @propagate_inbounds function assemble!(A::AbstractAssembler, dofs::AbstractVector{<:Integer}, Ke::AbstractMatrix, fe::Union{AbstractVector, Nothing} = nothing)
-    return _assemble!(A, dofs, Ke, fe, false)
+    size(Ke, 1) == size(Ke, 2) || throw(ArgumentError("Ke is rectangular, but only a single `dofs` vector is provided. Please call assemble!(A, rowdofs, coldofs, Ke, fe) instead."))
+    return _assemble!(A, dofs, dofs, Ke, fe, false)
+end
+@propagate_inbounds function assemble!(A::AbstractAssembler, rowdofs::AbstractVector{<:Integer}, coldofs::AbstractVector{<:Integer}, Ke::AbstractMatrix, fe::Union{AbstractVector, Nothing} = nothing)
+    return _assemble!(A, rowdofs, coldofs, Ke, fe, false)
 end
 @propagate_inbounds function assemble!(A::SymmetricCSCAssembler, dofs::AbstractVector{<:Integer}, Ke::AbstractMatrix, fe::Union{AbstractVector, Nothing} = nothing)
-    return _assemble!(A, dofs, Ke, fe, true)
+    return _assemble!(A, dofs, dofs, Ke, fe, true)
 end
 
 """
@@ -283,53 +301,62 @@ Sorts the dofs into a separate buffer and returns it together with a permutation
     return sorteddofs, permutation
 end
 
-@propagate_inbounds function _assemble!(A::Union{AbstractCSCAssembler, AbstractCSRAssembler}, dofs::AbstractVector{<:Integer}, Ke::AbstractMatrix, fe::Union{AbstractVector, Nothing}, sym::Bool)
-    ld = length(dofs)
-    @boundscheck checkbounds(Ke, keys(dofs), keys(dofs))
+@propagate_inbounds function _assemble!(A::Union{AbstractCSCAssembler, AbstractCSRAssembler}, rowdofs::AbstractVector{<:Integer}, coldofs::AbstractVector{<:Integer}, Ke::AbstractMatrix, fe::Union{AbstractVector, Nothing}, sym::Bool)
+    @boundscheck checkbounds(Ke, keys(rowdofs), keys(coldofs))
     if fe !== nothing
-        @boundscheck checkbounds(fe, keys(dofs))
-        @boundscheck checkbounds(A.f, dofs)
-        @inbounds assemble!(A.f, dofs, fe)
+        @boundscheck checkbounds(fe, keys(rowdofs))
+        @boundscheck checkbounds(A.f, rowdofs)
+        @inbounds assemble!(A.f, rowdofs, fe)
     end
 
     K = matrix_handle(A)
-    @boundscheck checkbounds(K, dofs, dofs)
+    @boundscheck checkbounds(K, rowdofs, coldofs)
 
     # We assume that the input dofs are not sorted, because the cells need the dofs in
     # a specific order, which might not be the sorted order. Hence we sort them.
     # Note that we are not allowed to mutate `dofs` in the process.
-    sorteddofs, permutation = _sortdofs_for_assembly!(A.permutation, A.sorteddofs, dofs)
+    sortedcoldofs, colpermutation = _sortdofs_for_assembly!(A.colpermutation, A.sortedcoldofs, coldofs)
+    sortedrowdofs, rowpermutation = if rowdofs !== coldofs
+        _sortdofs_for_assembly!(A.rowpermutation, A.sortedrowdofs, rowdofs)
+    else
+        sortedcoldofs, colpermutation
+    end
 
-    return _assemble_inner!(K, Ke, dofs, sorteddofs, permutation, sym)
+    return _assemble_inner!(K, Ke, rowdofs, sortedrowdofs, rowpermutation, coldofs, sortedcoldofs, colpermutation, sym)
 end
 
-@propagate_inbounds function _assemble_inner!(K::SparseMatrixCSC, Ke::AbstractMatrix, dofs::AbstractVector, sorteddofs::AbstractVector, permutation::AbstractVector, sym::Bool)
+@propagate_inbounds function _assemble_inner!(
+        K::SparseMatrixCSC, Ke::AbstractMatrix,
+        rowdofs::AbstractVector, sortedrowdofs::AbstractVector, rowpermutation::AbstractVector,
+        coldofs::AbstractVector, sortedcoldofs::AbstractVector, colpermutation::AbstractVector,
+        sym::Bool
+    )
     current_col = 1
     Krows = rowvals(K)
     Kvals = nonzeros(K)
-    ld = length(dofs)
-    @inbounds for Kcol in sorteddofs
+    ld = length(rowdofs)
+    @inbounds for Kcol in sortedcoldofs
         maxlookups = sym ? current_col : ld
-        Kecol = permutation[current_col]
+        Kecol = colpermutation[current_col]
         ri = 1 # row index pointer for the local matrix
         Ri = 1 # row index pointer for the global matrix
         nzr = nzrange(K, Kcol)
         while Ri <= length(nzr) && ri <= maxlookups
             R = nzr[Ri]
             Krow = Krows[R]
-            Kerow = permutation[ri]
+            Kerow = rowpermutation[ri]
             val = Ke[Kerow, Kecol]
-            if Krow == dofs[Kerow]
+            if Krow == rowdofs[Kerow]
                 # Match: add the value (if non-zero) and advance the pointers
                 if !iszero(val)
                     Kvals[R] += val
                 end
                 ri += 1
                 Ri += 1
-            elseif Krow < dofs[Kerow]
+            elseif Krow < rowdofs[Kerow]
                 # No match yet: advance the global matrix row pointer
                 Ri += 1
-            else # Krow > dofs[Kerow]
+            else # Krow > rowdofs[Kerow]
                 # No match: no entry exist in the global matrix for this row. This is
                 # allowed as long as the value which would have been inserted is zero.
                 iszero(val) || _missing_sparsity_pattern_error(Krow, Kcol)
@@ -339,8 +366,8 @@ end
         end
         # Make sure that remaining entries in this column of the local matrix are all zero
         for i in ri:maxlookups
-            if !iszero(Ke[permutation[i], Kecol])
-                _missing_sparsity_pattern_error(sorteddofs[i], Kcol)
+            if !iszero(Ke[rowpermutation[i], Kecol])
+                _missing_sparsity_pattern_error(sortedrowdofs[i], Kcol)
             end
         end
         current_col += 1
diff --git a/test/test_assemble.jl b/test/test_assemble.jl
@@ -1,3 +1,6 @@
+using Ferrite, SparseArrays
+import LinearAlgebra: Symmetric
+
 @testset "assemble" begin
     dofs = [1, 3, 5, 7]
     maxd = maximum(dofs)
@@ -43,14 +46,44 @@
     @test size(K) == (10, 10)
     @test length(f) == 10
 
-    # assemble with different row and col dofs
+    # COOAssembler: assemble with different row and col dofs
     rdofs = [1, 4, 6]
     cdofs = [1, 7]
     a = Ferrite.COOAssembler()
     Ke = rand(length(rdofs), length(cdofs))
     assemble!(a, rdofs, cdofs, Ke)
     K, _ = finish_assemble(a)
-    @test (K[rdofs, cdofs] .== Ke) |> all
+    @test all(K[rdofs, cdofs] .== Ke)
+
+    # CSCAssembler: assemble with different row and col dofs
+    I = [1, 1, 4, 4, 6, 6]
+    J = [1, 3, 1, 3, 1, 3]
+    V = zeros(length(I))
+    K = sparse(I, J, V)
+    f = zeros(6)
+    assembler = start_assemble(K, f)
+    rdofs = [1, 4, 6]
+    cdofs = [1, 3]
+    Ke = rand(length(rdofs), length(cdofs))
+    fe = rand(length(rdofs))
+    assemble!(assembler, rdofs, cdofs, Ke, fe)
+    assemble!(assembler, rdofs, cdofs, Ke, fe)
+    @test_throws ArgumentError assemble!(assembler, rdofs, Ke, fe) # Not in sparsity pattern
+    @test all(K[rdofs, cdofs] .== 2Ke)
+    @test all(f[rdofs] .== 2fe)
+
+    # CSCAssembler: Assemble rectangular part in quadratic matrix
+    K = SparseMatrixCSC(6, 6, [K.colptr..., 7, 7, 7], K.rowval, K.nzval)
+    assembler = start_assemble(K, f)
+    rdofs = [1, 4, 6]
+    cdofs = [1, 3]
+    Ke = rand(length(rdofs), length(cdofs))
+    fe = rand(length(rdofs))
+    assemble!(assembler, rdofs, cdofs, Ke, fe)
+    assemble!(assembler, rdofs, cdofs, Ke, fe)
+    @test_throws ArgumentError assemble!(assembler, rdofs, Ke, fe) # Not in sparsity pattern
+    @test all(K[rdofs, cdofs] .== 2Ke)
+    @test all(f[rdofs] .== 2fe)
 
     # SparseMatrix assembler
     K = spzeros(10, 10)
diff --git a/test/test_assembler_extensions.jl b/test/test_assembler_extensions.jl
@@ -1,3 +1,4 @@
+using Ferrite
 import SparseMatricesCSR: SparseMatrixCSR, sparsecsr
 using SparseArrays, LinearAlgebra
 
@@ -84,6 +85,36 @@ using SparseArrays, LinearAlgebra
         @test K ≈ sparsecsr(I, J, V)
         @test f ≈ [4 / 3, 2.0, 1.0]
 
+        # CSRAssembler: assemble with different row and col dofs
+        I = [1, 1, 4, 4, 6, 6]
+        J = [1, 3, 1, 3, 1, 3]
+        V = zeros(length(I))
+        K = sparsecsr(I, J, V)
+        f = zeros(6)
+        assembler = start_assemble(K, f)
+        rdofs = [1, 4, 6]
+        cdofs = [1, 3]
+        Ke = rand(length(rdofs), length(cdofs))
+        fe = rand(length(rdofs))
+        assemble!(assembler, rdofs, cdofs, Ke, fe)
+        assemble!(assembler, rdofs, cdofs, Ke, fe)
+        @test_throws ArgumentError assemble!(assembler, rdofs, Ke, fe) # Not in sparsity pattern
+        @test all(K[rdofs, cdofs] .== 2Ke)
+        @test all(f[rdofs] .== 2fe)
+
+        # CSRAssembler: Assemble rectangular part in quadratic matrix
+        K = SparseMatrixCSR{1}(6, 6, K.rowptr, K.colval, K.nzval)
+        assembler = start_assemble(K, f)
+        rdofs = [1, 4, 6]
+        cdofs = [1, 3]
+        Ke = rand(length(rdofs), length(cdofs))
+        fe = rand(length(rdofs))
+        assemble!(assembler, rdofs, cdofs, Ke, fe)
+        assemble!(assembler, rdofs, cdofs, Ke, fe)
+        @test_throws ArgumentError assemble!(assembler, rdofs, Ke, fe) # Not in sparsity pattern
+        @test all(K[rdofs, cdofs] .== 2Ke)
+        @test all(f[rdofs] .== 2fe)
+
         # Check if coupling works
         grid = generate_grid(Quadrilateral, (2, 2))
         ip = Lagrange{RefQuadrilateral, 1}()