Skip to content

Commit ed2c8a8

Browse files
authored
FastSparsityPattern (#1302)
This patch adds a faster internal path for allocation of matrices using only cell coupling via allocate_matrix. It could be extended to support more cases or merged with SparsityPattern in the future, see #1302 for further details and discussions.
1 parent ef210a5 commit ed2c8a8

6 files changed

Lines changed: 383 additions & 1 deletion

File tree

benchmark/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
[deps]
22
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
3+
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
34
Ferrite = "c061ca5d-56c9-439f-9c0e-210fe06d3992"
45
PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d"
6+
SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
using Ferrite
2+
using SparseMatricesCSR: SparseMatrixCSR
3+
using SparseArrays: SparseMatrixCSC
4+
using DataFrames
5+
using Ferrite: FastSparsityPattern
6+
7+
function dh_scalar(grid)
8+
CT = getcelltype(grid)
9+
RS = getrefshape(CT)
10+
return close!(add!(DofHandler(grid), :u, Lagrange{RS, 1}()))
11+
end
12+
13+
function dh_complex(grid::Grid{sdim}) where {sdim}
14+
CT = getcelltype(grid)
15+
RS = getrefshape(CT)
16+
dh = DofHandler(grid)
17+
add!(dh, :u, Lagrange{RS, 2}())
18+
add!(dh, :v, Lagrange{RS, 1}()^sdim)
19+
return close!(dh)
20+
end
21+
22+
create_sp(dh) = add_sparsity_entries!(init_sparsity_pattern(dh), dh)
23+
create_fsp(dh) = FastSparsityPattern(dh)
24+
25+
grid_2d = generate_grid(Triangle, 1000 .* (1, 1))
26+
grid_3d = generate_grid(Hexahedron, 80 .* (1, 1, 1))
27+
28+
dofhandlers = [
29+
"dh_2d_scalar" => dh_scalar(grid_2d),
30+
"dh_3d_scalar" => dh_scalar(grid_3d),
31+
"dh_2d_complex" => dh_complex(grid_2d),
32+
"dh_3d_complex" => dh_complex(grid_3d),
33+
]
34+
35+
function timef(f::F, ::Type{MatrixType}, args...; kwargs...) where {F, MatrixType}
36+
sp0 = f(args...; kwargs...) # Compile
37+
allocate_matrix(MatrixType, sp0) # Compile
38+
GC.gc()
39+
sp_allocs = @allocations (sp_runtime = @elapsed (sp = f(args...; kwargs...)))
40+
m_allocs = @allocations (m_runtime = @elapsed allocate_matrix(MatrixType, sp))
41+
return (; sp_t = sp_runtime, sp_a = sp_allocs, m_t = m_runtime, m_a = m_allocs)
42+
end
43+
44+
function fmt_time(t::Number; digits = 2)
45+
units = ["ns", "μs", "ms", "s", "min", "h"]
46+
values = [1.0e-9, 1.0e-6, 1.0e-3, 1.0, 60.0, 3600]
47+
idx = findfirst(v -> v > t, values)
48+
i = max(idx === nothing ? length(values) : idx - 1, 1)
49+
return string(round(t / values[i]; digits)) * " " * units[i]
50+
end
51+
52+
function fmt_count(n::Integer; digits = 2)
53+
units = ["k", "M", "G"]
54+
values = [1.0e3, 1.0e6, 1.0e9]
55+
n < 1000 && return string(n)
56+
i = findlast(v -> v n, values)
57+
return string(round(n / values[i]; digits)) * units[i]
58+
end
59+
60+
function make_timings(MatrixType)
61+
return map([create_sp, create_fsp]) do f
62+
[key => timef(f, MatrixType, dh) for (key, dh) in dofhandlers]
63+
end
64+
end
65+
66+
function make_df(timings)
67+
_getdata(v, k) = getindex.(last.(v), k)
68+
return DataFrame(
69+
"case" => first.(dofhandlers),
70+
"t (sp) [s]" => fmt_time.(_getdata(timings[1], :sp_t)),
71+
"t (fsp) [s]" => fmt_time.(_getdata(timings[2], :sp_t)),
72+
"allocs (sp)" => fmt_count.(_getdata(timings[1], :sp_a)),
73+
"allocs (fsp)" => fmt_count.(_getdata(timings[2], :sp_a)),
74+
"t (K,sp) [s]" => fmt_time.(_getdata(timings[1], :m_t)),
75+
"t (K,fsp) [s]" => fmt_time.(_getdata(timings[2], :m_t)),
76+
"allocs (K,sp)" => fmt_count.(_getdata(timings[1], :m_a)),
77+
"allocs (K,fsp)" => fmt_count.(_getdata(timings[2], :m_a))
78+
)
79+
end
80+
81+
display(make_df(make_timings(SparseMatrixCSC{Float64, Int})))
82+
# display(make_df(make_timings(SparseMatrixCSR)))

docs/src/devdocs/special_datastructures.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,8 @@ Ferrite.ArrayOfVectorViews
1414
Ferrite.ConstructionBuffer
1515
Ferrite.push_at_index!
1616
```
17+
18+
## `FastSparsityPattern`
19+
```@docs
20+
Ferrite.FastSparsityPattern
21+
```

ext/FerriteSparseMatrixCSR.jl

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module FerriteSparseMatrixCSR
22

33
using Ferrite, SparseArrays, SparseMatricesCSR
4-
import Ferrite: AbstractSparsityPattern, CSRAssembler
4+
import Ferrite: AbstractSparsityPattern, CSRAssembler, FastSparsityPattern, getnrows, getncols
55
import Base: @propagate_inbounds
66

77
# Could be generalized if https://github.com/JuliaSparse/SparseArrays.jl/pull/546 is merged
@@ -113,4 +113,41 @@ function _allocate_matrix(::Type{SparseMatrixCSR{1, Tv, Ti}}, sp::AbstractSparsi
113113
return S
114114
end
115115

116+
## ================= ##
117+
# FastSparsityPattern #
118+
## ================= ##
119+
120+
function _allocate_matrix(::Type{SparseMatrixCSR{1, Tv, Ti}}, sp::FastSparsityPattern{Ti}, sym::Bool) where {Tv, Ti}
121+
sym && throw(ArgumentError("FastSparsityPattern does not support symmetric matrices yet"))
122+
sp.is_colidx_sorted || sort_rows_threaded!(sp) # Require sorted rows
123+
nzval = zeros(Tv, length(sp.colidx))
124+
return SparseMatrixCSR{1}(getnrows(sp), getncols(sp), sp.rowptr, sp.colidx, nzval)
125+
end
126+
127+
function sort_rows!(sp::FastSparsityPattern, rowrange::UnitRange)
128+
@inbounds for row in rowrange
129+
i1 = sp.rowptr[row]
130+
i2 = sp.rowptr[row + 1] - 1
131+
if i1 < i2
132+
sort!(view(sp.colidx, i1:i2); alg = QuickSort)
133+
end
134+
end
135+
return sp
136+
end
137+
138+
function sort_rows_threaded!(
139+
sp::FastSparsityPattern, # Default ΔN ≥ 1000 and `n_tasks ≥ 1`
140+
ntasks = max(min(Threads.nthreads() * 100, getnrows(sp) ÷ 1000), 1)
141+
) # Otherwise, 100 per thread for load balancing
142+
nrows = getnrows(sp)
143+
ΔN = cld(nrows, ntasks)
144+
Threads.@threads for taskid in 1:ntasks
145+
first_idx = 1 + ΔN * (taskid - 1)
146+
last_idx = min(first_idx + ΔN - 1, nrows)
147+
sort_rows!(sp, first_idx:last_idx)
148+
end
149+
sp.is_colidx_sorted = true
150+
return sp
151+
end
152+
116153
end

src/Dofs/sparsity_pattern.jl

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,12 @@ arguments `args` and keyword arguments `kwargs`.
413413
copy(K)`) instead.
414414
"""
415415
function allocate_matrix(::Type{MatrixType}, dh::DofHandler, args...; kwargs...) where {MatrixType}
416+
_get_Ti(::Type{<:AbstractMatrix}) = Int
417+
_get_Ti(::Type{<:AbstractSparseMatrix{<:Any, Ti}}) where {Ti} = Ti
418+
if _can_use_fastsp(MatrixType, args...; kwargs...)
419+
fsp = FastSparsityPattern(_get_Ti(MatrixType), dh, args...; kwargs...)
420+
return allocate_matrix(MatrixType, fsp)
421+
end
416422
sp = init_sparsity_pattern(dh)
417423
add_sparsity_entries!(sp, dh, args...; kwargs...)
418424
return allocate_matrix(MatrixType, sp)
@@ -681,3 +687,198 @@ function _allocate_matrix(::Type{SparseMatrixCSC{Tv, Ti}}, sp::AbstractSparsityP
681687
S = SparseMatrixCSC(getnrows(sp), getncols(sp), colptr, rowval, nzval)
682688
return S
683689
end
690+
691+
## ================= ##
692+
# FastSparsityPattern #
693+
## ================= ##
694+
695+
"""
696+
FastSparsityPattern([Ti = Int64], dh::DofHandler)
697+
698+
This sparsity does not currently support the full `AbstractSparsityPattern` interface,
699+
but is used as an internal fast-path for `allocate_matrix(MatrixType, dh)` for some
700+
supported `MatrixType`s. It can be extended in the future or potentially be merged
701+
with `SparsityPattern`.
702+
See [#1302](https://github.com/Ferrite-FEM/Ferrite.jl/pull/1302) for details.
703+
704+
!!! warning "Internal"
705+
`FastSparsityPattern` is strictly internal and its interface and implementation
706+
may change at any time.
707+
708+
"""
709+
mutable struct FastSparsityPattern{Ti} <: AbstractSparsityPattern
710+
const rowlen::Vector{Ti} # Number of stored entries in each row
711+
const marker::Vector{Ti} # Marker if column has been "visited" by certain row
712+
const rowptr::Vector{Ti} # Index of stored entries at the start of each row
713+
const colidx::Vector{Ti} # colidx[i] gives the column number of the ith stored entry
714+
is_colidx_sorted::Bool # Is colidx sorted for each row
715+
end
716+
function FastSparsityPattern(::Type{Ti}, ncols, nrows) where {Ti <: Integer}
717+
rowlen = zeros(Ti, nrows)
718+
marker = zeros(Ti, ncols)
719+
rowptr = Vector{Ti}(undef, nrows + 1)
720+
colidx = Vector{Ti}(undef, 0) # To be resized later
721+
return FastSparsityPattern(rowlen, marker, rowptr, colidx, false)
722+
end
723+
724+
# _can_use_fastsp(MatrixType, args...; kwargs...) where args and kwargs are those passed to
725+
# `allocate_matrix`. See `add_sparsity_entries!` for a description of args/kwargs.
726+
function _can_use_fastsp(
727+
::Type{MatrixType},
728+
ch = nothing;
729+
topology = nothing,
730+
keep_constrained = true,
731+
coupling = nothing,
732+
interface_coupling = nothing
733+
) where {MatrixType}
734+
if ch === topology === coupling === interface_coupling === nothing
735+
if MatrixType <: AbstractSparseMatrix # Symmetric/Block matrices not supported
736+
return keep_constrained
737+
end
738+
end
739+
return false
740+
end
741+
742+
FastSparsityPattern(dh::DofHandler) = FastSparsityPattern(Int, dh)
743+
function FastSparsityPattern(::Type{Ti}, dh::DofHandler) where {Ti}
744+
sp = FastSparsityPattern(Ti, ndofs(dh), ndofs(dh))
745+
# Step 1: Create cell_dof_views::ArrayOfVectorViews (would be nice in `DofHandler` directly)
746+
cell_dofs_views = create_celldofs(dh)
747+
# Step 2: Define mapping rownr to cells
748+
row_to_cells = create_row_to_cells(cell_dofs_views, sp)
749+
# Step 3: Count how many cols stored for each row
750+
count_row_sizes!(sp, row_to_cells, cell_dofs_views)
751+
# Step 4: Build the rowptr (indices for s)
752+
build_rowptr!(sp)
753+
fill_colidx!(sp, row_to_cells, cell_dofs_views)
754+
return sp
755+
end
756+
757+
getncols(sp::FastSparsityPattern) = length(sp.marker)
758+
getnrows(sp::FastSparsityPattern) = length(sp.rowlen)
759+
760+
function create_celldofs(dh::DofHandler)
761+
isclosed(dh) || throw(ArgumentError("DofHandler must be closed"))
762+
ncells = getncells(dh.grid)
763+
indices = similar(dh.cell_dofs_offset, ncells + 1)
764+
cell_dofs = similar(dh.cell_dofs)
765+
n = 1
766+
for cell_idx in 1:ncells
767+
indices[cell_idx] = n
768+
num = ndofs_per_cell(dh, cell_idx)
769+
num == 0 && continue
770+
r = n:(n + num - 1)
771+
#celldofs!(view(cell_dofs, r), dh, cell_idx), but faster without view:
772+
soffs = dh.cell_dofs_offset[cell_idx]
773+
copyto!(cell_dofs, n, dh.cell_dofs, soffs, num)
774+
n = last(r) + 1
775+
end
776+
indices[end] = n
777+
return ArrayOfVectorViews(indices, cell_dofs, LinearIndices((ncells,)))
778+
end
779+
780+
function create_row_to_cells(cell_dofs::ArrayOfVectorViews, sp)
781+
nrows = getnrows(sp)
782+
num_cells = zeros(Int, nrows)
783+
# 1: Figure out how many cells are connected to each dof
784+
n_connected = 0
785+
@inbounds for rows in cell_dofs # dof = row
786+
for row in rows
787+
num_cells[row] += 1
788+
n_connected += 1
789+
end
790+
end
791+
792+
# 2: Create the correct datastructure
793+
data = Vector{Int}(undef, n_connected)
794+
indices = Vector{Int}(undef, nrows + 1)
795+
indices[1] = 1
796+
@inbounds for row in 1:nrows
797+
indices[row + 1] = indices[row] + num_cells[row]
798+
end
799+
fill!(num_cells, 0) # Now we use this to count how many have been added
800+
@inbounds for (cellnr, rows) in enumerate(cell_dofs)
801+
for row in rows
802+
data[indices[row] + num_cells[row]] = cellnr
803+
num_cells[row] += 1
804+
end
805+
end
806+
return ArrayOfVectorViews(indices, data, LinearIndices((nrows,)))
807+
end
808+
809+
function count_row_sizes!(sp::FastSparsityPattern, row_to_cells::AbstractVector, cell_dofs::ArrayOfVectorViews)
810+
@inbounds for row in 1:getnrows(sp)
811+
for cnr in row_to_cells[row]
812+
for col in cell_dofs[cnr]
813+
if sp.marker[col] != row
814+
sp.marker[col] = row
815+
sp.rowlen[row] += 1
816+
end
817+
end
818+
end
819+
end
820+
return sp
821+
end
822+
823+
function build_rowptr!(sp)
824+
sp.rowptr[1] = 1
825+
@inbounds for row in 1:getnrows(sp)
826+
sp.rowptr[row + 1] = sp.rowptr[row] + sp.rowlen[row]
827+
end
828+
return sp
829+
end
830+
831+
function fill_colidx!(sp::FastSparsityPattern, row_to_cells::AbstractVector, cell_dofs::ArrayOfVectorViews)
832+
resize!(sp.colidx, sp.rowptr[end] - 1) # nnz
833+
fill!(sp.marker, 0)
834+
@inbounds for row in 1:getnrows(sp)
835+
pos = sp.rowptr[row]
836+
for cnr in row_to_cells[row]
837+
for col in cell_dofs[cnr]
838+
if sp.marker[col] != row
839+
sp.marker[col] = row
840+
sp.colidx[pos] = col
841+
pos += 1
842+
end
843+
end
844+
end
845+
end
846+
return sp
847+
end
848+
849+
allocate_matrix(sp::FastSparsityPattern) = allocate_matrix(SparseMatrixCSC, sp)
850+
allocate_matrix(::Type{SparseMatrixCSC}, sp::FastSparsityPattern{Int}) = allocate_matrix(SparseMatrixCSC{Float64, Int}, sp)
851+
function allocate_matrix(::Type{<:SparseMatrixCSC{Tv, Ti}}, sp::FastSparsityPattern{Ti}) where {Ti, Tv}
852+
nnz = length(sp.colidx)
853+
ncols = getncols(sp)
854+
nrows = getnrows(sp)
855+
856+
# Number of stored entries per column
857+
collen = zeros(Ti, ncols)
858+
@inbounds for col in sp.colidx
859+
collen[col] += 1
860+
end
861+
862+
# Index of stored entries at the start of each column
863+
colptr = Vector{Ti}(undef, ncols + 1)
864+
colptr[1] = 1
865+
@inbounds for col in 1:ncols
866+
colptr[col + 1] = colptr[col] + collen[col]
867+
end
868+
869+
# Build rowidx[i] giving the row number of the ith stored entry
870+
rowidx = Vector{Ti}(undef, nnz)
871+
next = copy(colptr)
872+
@inbounds for row in 1:nrows
873+
for p in sp.rowptr[row]:(sp.rowptr[row + 1] - 1)
874+
col = sp.colidx[p]
875+
q = next[col]
876+
rowidx[q] = row
877+
next[col] = q + 1
878+
# For a given col, next[col] is increasing, and row is
879+
# increasing in the outer loop -> rowidx sorted for each col
880+
end
881+
end
882+
nzval = zeros(Tv, nnz)
883+
return SparseMatrixCSC(nrows, ncols, colptr, rowidx, nzval)
884+
end

0 commit comments

Comments
 (0)