5959@kernel function ka_assembly_kernel (assembler, @Const (color), cc, cv, Kes, fes)
6060 # This is the classical grid-stride-loop
6161 task_index = @index (Global, Linear)
62- stride = KA. @groupsize ()[1 ]
62+ stride = KA. @groupsize ()[1 ]
6363 for i in task_index: stride: length (color)
6464 # Work item index
6565 cellid = color[i]
@@ -88,9 +88,9 @@ function assemble_global_ka!(backend, cv::CellValues, K, f, cc, colors::Vector,
8888 # We divide the work into blocks and fire up the kernel.
8989 n = length (color)
9090 threads = min (NUM_THREADS, n)
91- blocks = cld (length (color), threads)
91+ blocks = cld (length (color), threads)
9292 ka_kernel = ka_assembly_kernel (backend, threads)
93- ka_kernel (assembler, color, cc, cv, Ke, fe, ndrange= length (color))
93+ ka_kernel (assembler, color, cc, cv, Ke, fe, ndrange = length (color))
9494 # Since the kernel launches asynchronously we need to add a synchronization
9595 # point before proceeding here. Otherwise we will start assembling the next color,
9696 # while there are still threads working on the current color, therefore potentially
103103# And here now the CUDA variant. Please see above for details, as the kernels are almost the same.
104104function cuda_assembly_kernel (assembler, color, cc, cv, Kes, fes)
105105 task_index = (blockIdx (). x - Int32 (1 )) * blockDim (). x + threadIdx (). x
106- stride = gridDim (). x * blockDim (). x
106+ stride = gridDim (). x * blockDim (). x
107107 for i in task_index: stride: length (color)
108108 cellid = color[i]
109109 cv_i = get_substruct (task_index, cv)
@@ -120,15 +120,15 @@ function assemble_global_cuda!(cv::CellValues, K, f, cc, colors::Vector, Ke, fe)
120120 for color in colors
121121 n = length (color)
122122 threads = min (NUM_THREADS, n)
123- blocks = cld (length (color), threads)
123+ blocks = cld (length (color), threads)
124124 @cuda threads = threads blocks = blocks cuda_assembly_kernel (assembler, color, cc, cv, Ke, fe)
125125 CUDA. synchronize ()
126126 end
127127 return nothing
128128end
129129
130130# Reference for internal testing #hide
131- function assemble_global! (cv:: CellValues , K:: SparseMatrixCSC , f, dh:: DofHandler )# hide
131+ function assemble_global! (cv:: CellValues , K:: SparseMatrixCSC , f, dh:: DofHandler ) # hide
132132 n_basefuncs = getnbasefunctions (cv) # hide
133133 Ke = zeros (Float32, n_basefuncs, n_basefuncs) # hide
134134 fe = zeros (Float32, n_basefuncs) # hide
0 commit comments