@@ -681,3 +681,195 @@ function _allocate_matrix(::Type{SparseMatrixCSC{Tv, Ti}}, sp::AbstractSparsityP
681681 S = SparseMatrixCSC (getnrows (sp), getncols (sp), colptr, rowval, nzval)
682682 return S
683683end
684+
685+ # # ================== ##
686+ # FastSparsityPattern #
687+ # # ================== ##
688+
689+ # Full `AbstractSparsityPattern` interface not supported
690+ mutable struct FastSparsityPattern{Ti} <: AbstractSparsityPattern
691+ const rowlen:: Vector{Ti} # Number of stored entries in each row
692+ const marker:: Vector{Ti} # Marker if column has been "visited" by certain row
693+ const rowptr:: Vector{Ti} # Index of stored entries at the start of each row
694+ const colidx:: Vector{Ti} # colidx[i] gives the column number of the ith stored entry
695+ is_colidx_sorted:: Bool # Is colidx sorted for each row
696+ end
697+ function FastSparsityPattern (:: Type{Ti} , ncols, nrows) where {Ti <: Integer }
698+ rowlen = zeros (Ti, nrows)
699+ marker = zeros (Ti, ncols)
700+ rowptr = Vector {Ti} (undef, nrows + 1 )
701+ colidx = Vector {Ti} (undef, 0 ) # To be resized later
702+ return FastSparsityPattern (rowlen, marker, rowptr, colidx, false )
703+ end
704+
705+ FastSparsityPattern (dh:: DofHandler ) = FastSparsityPattern (Int, dh)
706+ function FastSparsityPattern (:: Type{Ti} , dh:: DofHandler ) where {Ti}
707+ sp = FastSparsityPattern (Ti, ndofs (dh), ndofs (dh))
708+ # Step 1: Create cell_dof_views::ArrayOfVectorViews (would be nice in `DofHandler` directly)
709+ ncells = getncells (dh. grid)
710+ indices = copy (dh. cell_dofs_offset)
711+ push! (indices, length (dh. cell_dofs) + 1 )
712+ cell_dofs_views = ArrayOfVectorViews (indices, dh. cell_dofs, LinearIndices ((ncells,)))
713+ # Step 2: Define mapping rownr to cells
714+ row_to_cells = create_row_to_cells (cell_dofs_views, sp)
715+ # Step 3: Count how many cols stored for each row
716+ count_row_sizes! (sp, row_to_cells, cell_dofs_views)
717+ # Step 4: Build the rowptr (indices for s)
718+ build_rowptr! (sp)
719+ fill_colidx! (sp, row_to_cells, cell_dofs_views)
720+ return sp
721+ end
722+
723+ getncols (sp:: FastSparsityPattern ) = length (sp. marker)
724+ getnrows (sp:: FastSparsityPattern ) = length (sp. rowlen)
725+
726+ function create_row_to_cells (cell_dofs:: ArrayOfVectorViews , sp)
727+ nrows = getnrows (sp)
728+ num_cells = zeros (Int, nrows)
729+ # 1: Figure out how many cells are connected to each dof
730+ n_connected = 0
731+ @inbounds for rows in cell_dofs # dof = row
732+ for row in rows
733+ num_cells[row] += 1
734+ n_connected += 1
735+ end
736+ end
737+ # n_connected = sum(num_cells) better?
738+
739+ # 2: Create the correct datastructure
740+ data = Vector {Int} (undef, n_connected)
741+ indices = Vector {Int} (undef, nrows + 1 )
742+ indices[1 ] = 1
743+ @inbounds for row in 1 : nrows
744+ indices[row + 1 ] = indices[row] + num_cells[row]
745+ end
746+ fill! (num_cells, 0 ) # Now we use this to count how many have been added
747+ @inbounds for (cellnr, rows) in enumerate (cell_dofs)
748+ for row in rows
749+ data[indices[row] + num_cells[row]] = cellnr
750+ num_cells[row] += 1
751+ end
752+ end
753+ return ArrayOfVectorViews (indices, data, LinearIndices ((nrows,)))
754+ end
755+
756+ function count_row_sizes! (sp:: FastSparsityPattern , row_to_cells:: AbstractVector , cell_dofs:: ArrayOfVectorViews )
757+ @inbounds for row in 1 : getnrows (sp)
758+ for cnr in row_to_cells[row]
759+ for col in cell_dofs[cnr]
760+ if sp. marker[col] != row
761+ sp. marker[col] = row
762+ sp. rowlen[row] += 1
763+ end
764+ end
765+ end
766+ end
767+ return sp
768+ end
769+
770+ function build_rowptr! (sp)
771+ sp. rowptr[1 ] = 1
772+ @inbounds for row in 1 : getnrows (sp)
773+ sp. rowptr[row + 1 ] = sp. rowptr[row] + sp. rowlen[row]
774+ end
775+ return sp
776+ end
777+
778+ function fill_colidx! (sp:: FastSparsityPattern , row_to_cells:: AbstractVector , cell_dofs:: ArrayOfVectorViews )
779+ resize! (sp. colidx, sp. rowptr[end ] - 1 ) # nnz
780+ fill! (sp. marker, 0 )
781+ @inbounds for row in 1 : getnrows (sp)
782+ pos = sp. rowptr[row]
783+ for cnr in row_to_cells[row]
784+ for col in cell_dofs[cnr]
785+ if sp. marker[col] != row
786+ sp. marker[col] = row
787+ sp. colidx[pos] = col
788+ pos += 1
789+ end
790+ end
791+ end
792+ end
793+ return sp
794+ end
795+
796+ allocate_matrix (sp:: FastSparsityPattern ) = allocate_matrix (SparseMatrixCSC, sp)
797+ allocate_matrix (:: Type{SparseMatrixCSC} , sp:: FastSparsityPattern{Int} ) = allocate_matrix (SparseMatrixCSC{Float64, Int}, sp)
798+ function allocate_matrix (:: Type{<:SparseMatrixCSC{Tv, Ti}} , sp:: FastSparsityPattern{Ti} ) where {Ti, Tv}
799+ nnz = length (sp. colidx)
800+ ncols = getncols (sp)
801+ nrows = getnrows (sp)
802+
803+ # Number of stored entries per column
804+ collen = zeros (Ti, ncols)
805+ @inbounds for col in sp. colidx
806+ collen[col] += 1
807+ end
808+
809+ # Index of stored entries at the start of each column
810+ colptr = Vector {Ti} (undef, ncols + 1 )
811+ colptr[1 ] = 1
812+ @inbounds for col in 1 : ncols
813+ colptr[col + 1 ] = colptr[col] + collen[col]
814+ end
815+
816+ # Build rowidx[i] giving the row number of the ith stored entry
817+ rowidx = Vector {Ti} (undef, nnz)
818+ next = copy (colptr)
819+ @inbounds for row in 1 : nrows
820+ for p in sp. rowptr[row]: (sp. rowptr[row + 1 ] - 1 )
821+ col = sp. colidx[p]
822+ q = next[col]
823+ rowidx[q] = row
824+ next[col] = q + 1
825+ # For a given col, next[col] is increasing, and row is
826+ # increasing in the outer loop -> rowidx sorted for each col
827+ end
828+ end
829+ nzval = zeros (Tv, nnz)
830+ return SparseMatrixCSC (nrows, ncols, colptr, rowidx, nzval)
831+ end
832+
833+ #= # TODO : Move to extension
834+ function sort_rows!(sp)
835+ sort_rows!(sp, 1:getnrows(sp))
836+ sp.is_sorted = true
837+ return sp
838+ end
839+
840+ function sort_rows!(sp::FastSparsityPattern, rowrange::UnitRange)
841+ @inbounds for row in rowrange
842+ i1 = sp.rowptr[row]
843+ i2 = sp.rowptr[row + 1] - 1
844+ if i1 < i2
845+ sort!(view(sp.colidx, i1:i2))
846+ end
847+ end
848+ return sp
849+ end
850+
851+ function sort_rows_threaded!(
852+ sp::FastSparsityPattern, # Default ΔN ≥ 1000 and `n_tasks ≥ 1`
853+ ntasks = max(min(Threads.nthreads() * 100, getnrows(sp) ÷ 1000), 1)
854+ ) # Otherwise, 100 per thread for load balancing
855+ nrows = getnrows(sp)
856+ ΔN = nrows ÷ ntasks
857+ Base.Experimental.@sync begin
858+ for taskid in 1:ntasks
859+ Threads.@spawn begin
860+ first_idx = 1 + ΔN * (taskid - 1)
861+ last_idx = min(first_idx + ΔN - 1, nrows)
862+ sort_rows!(sp, first_idx:last_idx)
863+ end
864+ end
865+ end
866+ sp.is_sorted = true
867+ return sp
868+ end
869+
870+ function allocate_matrix(::Type{<:SparseMatrixCSR}, sp::FastSparsityPattern{Ti}) where {Ti}
871+ sp.is_sorted || sort_rows_threaded!(sp) # Require sorted rows
872+ nzval = zeros(Float64, length(sp.colidx))
873+ return SparseMatrixCSR{1}(ndofs(sp), ndofs(sp), sp.rowptr, sp.colidx, nzval)
874+ end
875+ =#
0 commit comments