From 2882b8bc98883436d60d4801865c641f8b4dacf6 Mon Sep 17 00:00:00 2001 From: David Ellis Date: Thu, 30 Apr 2026 21:29:00 -0500 Subject: [PATCH 1/4] Better local group selection based on GPU capabilities and global workgroup size --- alan_compiler/src/std/root.ln | 23 +++++++------ alan_std.js | 65 ++++++++++++++++++++++++++--------- alan_std/src/lib.rs | 43 +++++++++++++++-------- 3 files changed, 90 insertions(+), 41 deletions(-) diff --git a/alan_compiler/src/std/root.ln b/alan_compiler/src/std/root.ln index 13bc969f..dac261d5 100644 --- a/alan_compiler/src/std/root.ln +++ b/alan_compiler/src/std/root.ln @@ -131,8 +131,8 @@ type arm_64 = Env{"ALAN_ARCH"} == "arm_64"; type Browser = Env{"ALAN_PLATFORM"} == "browser"; // Technically not necessary, always also Js // Importing the Root Scope backing implementation and supporting 3rd party libraries -type{Rs} RootBacking = Rust{"alan_std" @ "https://github.com/alantech/alan.git"}; -type{Js} RootBacking = Nodejs{"alan_std" @ "https://github.com/alantech/alan.git"}; +type{Rs} RootBacking = Rust{"alan_std" @ "https://github.com/alantech/alan.git#better-local-group-selection"}; +type{Js} RootBacking = Nodejs{"alan_std" @ "https://github.com/alantech/alan.git#better-local-group-selection"}; // Defining derived types type void = (); @@ -1850,8 +1850,8 @@ fn{Rs} id(b: GBufferRaw) = {"alan_std::buffer_id" <- RootBacking :: GBufferRaw - fn{Js} id(b: GBufferRaw) = {"alan_std.bufferid" <- RootBacking :: GBufferRaw -> string}(b); fn id{T}(b: GBuffer{T}) = b.rawBuffer.id; fn id(b: GBufferTagged) = b.rawBuffer.id; -fn{Rs} optimalLocalGroup() = {"alan_std::optimal_local_group" <- RootBacking :: () -> Buffer{i64, 3}}(); -fn{Js} optimalLocalGroup() = {"alan_std.optimalLocalGroup" <- RootBacking :: () -> Buffer{i64, 3}}(); +fn{Rs} optimalLocalGroup(global: Buffer{i64, 3}) = {"alan_std::optimal_local_group" <- RootBacking :: Deref{Buffer{i64, 3}} -> Buffer{i64, 3}}(global); +fn{Js} optimalLocalGroup(global: Buffer{i64, 3}) = {"alan_std.optimalLocalGroup" <- RootBacking :: Buffer{i64, 3} -> Buffer{i64, 3}}(global); fn{Rs} GPGPU "alan_std::GPGPU::new" <- RootBacking :: (Own{string}, Own{Array{Array{GBufferRaw}}}, Deref{Buffer{i64, 3}}, Deref{Buffer{i64, 3}}) -> GPGPU; fn{Js} GPGPU (src: string, gbuffers: Array{Array{GBufferRaw}}, idx: Buffer{i64, 3}, local: Buffer{i64, 3}) = {"new alan_std.GPGPU" <- RootBacking :: (string, Array{Array{GBufferRaw}}, Buffer{i32, 3}, Buffer{i32, 3}) -> GPGPU}(src, gbuffers, idx.map(i32), local.map(i32)); fn GPGPU{T}(src: string, buf: GBuffer{T}, local: Buffer{i64, 3}) -> GPGPU { @@ -1906,13 +1906,16 @@ fn build{N}(ret: N) { maxGlobalIdArray[1] ?? 0, maxGlobalIdArray[2] ?? 0 ); - let localGroup = optimalLocalGroup(); + // Sentinel values (-1 for width, -2 for height) are resolved at runtime by the window + // dispatch path, so we can't compute optimal local group size at compile time. Fall back + // to a reasonable default; dispatch counts will be calculated correctly at runtime. + let hasSentinels = maxGlobalId.0 < 0 || maxGlobalId.1 < 0 || maxGlobalId.2 < 0; + let localGroup = if(hasSentinels, {i64[3]}(8, 8, 1), optimalLocalGroup(maxGlobalId)); // Clamp each local dimension so it doesn't exceed the corresponding global dimension. - // If local > global, one workgroup would launch more invocations than there are valid - // indices, causing out-of-bounds writes since shaders have no bounds checking. - let localX = max(min(localGroup.0, maxGlobalId.0), 1); - let localY = max(min(localGroup.1, maxGlobalId.1), 1); - let localZ = max(min(localGroup.2, maxGlobalId.2), 1); + // Skip clamping when sentinels are present since actual dimensions are unknown. + let localX = if(hasSentinels, localGroup.0, max(min(localGroup.0, maxGlobalId.0), 1)); + let localY = if(hasSentinels, localGroup.1, max(min(localGroup.1, maxGlobalId.1), 1)); + let localZ = if(hasSentinels, localGroup.2, max(min(localGroup.2, maxGlobalId.2), 1)); let wgslHeader = bufferArray.map(fn (gb: GBufferTagged, i: i64) { return "@group(0)\n@binding(" .concat(i.string) diff --git a/alan_std.js b/alan_std.js index 9ad562ed..86ae362a 100644 --- a/alan_std.js +++ b/alan_std.js @@ -792,26 +792,57 @@ export class GPU { } let GPUS = null; -let OPTIMAL_LOCAL_GROUP = null; - -export function optimalLocalGroup() { - if (OPTIMAL_LOCAL_GROUP === null) { +let SUBGROUP_MAX_SIZE = null; + +// Typical subgroup_max_size values from wgpu docs (https://docs.rs/wgpu/28.0.0/wgpu/struct.AdapterInfo.html#structfield.subgroup_max_size) +const SUBGROUP_MAX_SIZE_BY_VENDOR = { + "Intel": 32, // Intel: 16 or 32, using upper bound + "AMD": 64, // AMD GCN/Vega: 64, RDNA+: 64 + "Apple": 32, // Apple M-series (Metal backend) + "Google": 32, // ChromeOS (typically AMD/Intel) + "Qualcomm": 128, // Qualcomm: 128 + "NVIDIA": 32, // NVIDIA: 32 + "Microsoft": 128, // WARP software rasterizer: 4 or 128, using upper bound +}; + +function getSubgroupMaxSize() { + if (SUBGROUP_MAX_SIZE === null) { if (GPUS !== null && GPUS.length > 0) { - let maxInvocations = GPUS[0].device.limits.maxComputeInvocationsPerWorkgroup; - let n = maxInvocations; - let sqrt = Math.floor(Math.sqrt(n)); - if (sqrt * sqrt === n) { - OPTIMAL_LOCAL_GROUP = [sqrt, sqrt, 1]; - return OPTIMAL_LOCAL_GROUP; - } - if (n % 8 === 0) { - OPTIMAL_LOCAL_GROUP = [n / 8, 8, 1]; - return OPTIMAL_LOCAL_GROUP; - } + let vendor = GPUS[0].adapter.info.vendor || ""; + // Try exact match first, then partial match + SUBGROUP_MAX_SIZE = SUBGROUP_MAX_SIZE_BY_VENDOR[vendor] || 64; // 64 is safe middle ground + } else { + SUBGROUP_MAX_SIZE = 64; } - OPTIMAL_LOCAL_GROUP = [8, 8, 1]; } - return OPTIMAL_LOCAL_GROUP; + return SUBGROUP_MAX_SIZE; +} + +export function optimalLocalGroup(global) { + let totalGlobal = global[0] * global[1] * global[2]; + if (totalGlobal === 0) { + return [1, 1, 1]; + } + let subMax = getSubgroupMaxSize(); + let maxInvocations = 256; + if (GPUS !== null && GPUS.length > 0) { + maxInvocations = GPUS[0].device.limits.maxComputeInvocationsPerWorkgroup; + } + // Target totalInvocationsPerWorkgroup so that totalWorkgroups ~ subgroup_max_size + let target = Math.ceil(totalGlobal / subMax); + // Clamp to [8, maxInvocations] + target = Math.max(8, Math.min(target, maxInvocations)); + // Snap to nearest multiple of 8 (hardware alignment) + target = Math.ceil(target / 8) * 8; + // Shape: prefer S*S*1, then D*8*1 + let sqrt = Math.floor(Math.sqrt(target)); + if (sqrt >= 8 && sqrt * sqrt === target) { + return [sqrt, sqrt, 1]; + } + if (target % 8 === 0) { + return [target / 8, 8, 1]; + } + return [target, 1, 1]; } export async function gpu() { diff --git a/alan_std/src/lib.rs b/alan_std/src/lib.rs index d7186ba1..f6f3736f 100644 --- a/alan_std/src/lib.rs +++ b/alan_std/src/lib.rs @@ -823,7 +823,7 @@ impl GPU { } static GPUS: OnceLock> = OnceLock::new(); -static OPTIMAL_LOCAL_GROUP: OnceLock<[i64; 3]> = OnceLock::new(); +static SUBGROUP_MAX_SIZE: OnceLock = OnceLock::new(); fn gpu() -> &'static GPU { match GPUS.get_or_init(|| GPU::init(GPU::list())).first() { @@ -834,23 +834,38 @@ fn gpu() -> &'static GPU { } } -pub fn optimal_local_group() -> [i64; 3] { - *OPTIMAL_LOCAL_GROUP.get_or_init(|| { +fn subgroup_max_size() -> u32 { + *SUBGROUP_MAX_SIZE.get_or_init(|| { let g = gpu(); - let max_invocations = g.adapter.limits().max_compute_invocations_per_workgroup; - let n = max_invocations as u64; - let sqrt = (n as f64).sqrt() as u64; - if sqrt * sqrt == n { - return [sqrt as i64, sqrt as i64, 1]; - } - if n % 8 == 0 { - let d = n / 8; - return [d as i64, 8, 1]; - } - [8, 8, 1] + g.adapter.get_info().subgroup_max_size }) } +pub fn optimal_local_group(global: [i64; 3]) -> [i64; 3] { + let total_global = (global[0] as u64) * (global[1] as u64) * (global[2] as u64); + if total_global == 0 { + return [1, 1, 1]; + } + let g = gpu(); + let sub_max = subgroup_max_size(); + let max_invocations = g.adapter.limits().max_compute_invocations_per_workgroup as u64; + // Target totalInvocationsPerWorkgroup so that totalWorkgroups ~ subgroup_max_size + let mut target = (total_global as f64 / sub_max as f64).ceil() as u64; + // Clamp to [8, maxInvocations] + target = target.max(8).min(max_invocations); + // Snap to nearest multiple of 8 (hardware alignment) + target = ((target + 7) / 8) * 8; + // Shape: prefer S*S*1, then D*8*1 + let sqrt = (target as f64).sqrt() as u64; + if sqrt >= 8 && sqrt * sqrt == target { + return [sqrt as i64, sqrt as i64, 1]; + } + if target % 8 == 0 { + return [(target / 8) as i64, 8, 1]; + } + [target as i64, 1, 1] +} + #[derive(Clone)] pub struct GBuffer { buffer: Rc, From be6329629000da5488a01e213306fbb6f2c1d4e8 Mon Sep 17 00:00:00 2001 From: David Ellis Date: Thu, 30 Apr 2026 22:55:11 -0500 Subject: [PATCH 2/4] Attempt to remove the hardwired local group sizing and the sentinel hackery for the window and frame --- alan_compiler/src/std/root.ln | 21 ++++++------ alan_std.js | 61 +++++++++++------------------------ alan_std/src/lib.rs | 42 ++++++++---------------- 3 files changed, 42 insertions(+), 82 deletions(-) diff --git a/alan_compiler/src/std/root.ln b/alan_compiler/src/std/root.ln index dac261d5..9709f45d 100644 --- a/alan_compiler/src/std/root.ln +++ b/alan_compiler/src/std/root.ln @@ -1906,16 +1906,12 @@ fn build{N}(ret: N) { maxGlobalIdArray[1] ?? 0, maxGlobalIdArray[2] ?? 0 ); - // Sentinel values (-1 for width, -2 for height) are resolved at runtime by the window - // dispatch path, so we can't compute optimal local group size at compile time. Fall back - // to a reasonable default; dispatch counts will be calculated correctly at runtime. - let hasSentinels = maxGlobalId.0 < 0 || maxGlobalId.1 < 0 || maxGlobalId.2 < 0; - let localGroup = if(hasSentinels, {i64[3]}(8, 8, 1), optimalLocalGroup(maxGlobalId)); + // Compute optimal local group size based on total work to perform + let localGroup = optimalLocalGroup(maxGlobalId); // Clamp each local dimension so it doesn't exceed the corresponding global dimension. - // Skip clamping when sentinels are present since actual dimensions are unknown. - let localX = if(hasSentinels, localGroup.0, max(min(localGroup.0, maxGlobalId.0), 1)); - let localY = if(hasSentinels, localGroup.1, max(min(localGroup.1, maxGlobalId.1), 1)); - let localZ = if(hasSentinels, localGroup.2, max(min(localGroup.2, maxGlobalId.2), 1)); + let localX = max(min(localGroup.0, maxGlobalId.0), 1); + let localY = max(min(localGroup.1, maxGlobalId.1), 1); + let localZ = max(min(localGroup.2, maxGlobalId.2), 1); let wgslHeader = bufferArray.map(fn (gb: GBufferTagged, i: i64) { return "@group(0)\n@binding(" .concat(i.string) @@ -2698,6 +2694,7 @@ fn gFor{T}(x: T, y: T, z: T) = gFor(x.u32, y.u32, z.u32); fn gFor{T}(x: T, y: T) = gFor(x.u32, y.u32, 1.u32); fn gFor{T}(x: T) = gFor(x.u32, 1.u32, 1.u32).x; // TODO: More hackery to eliminate +fn gFor(x: u32, y: u32) = gFor(x, y, 1.u32); fn gFor(x: i64, y: i64) { let initialStatement = "@builtin(global_invocation_id) id: vec3u"; let statements = Dict(initialStatement, x.string.concat(',').concat(y.string).concat(',1')); @@ -5091,7 +5088,11 @@ fn{Rs} context(f: Frame) = GBuffer{u32}({Property{"context.clone()"} :: Frame -> fn{Js} context(f: Frame) = GBuffer{u32}({"alan_std.frameContext" <- RootBacking :: Frame -> GBufferRaw}(f)); fn{Rs} framebuffer(f: Frame) = GBuffer{u32}({Property{"framebuffer.clone()"} :: Frame -> GBufferRaw}(f)); fn{Js} framebuffer(f: Frame) = GBuffer{u32}({"alan_std.frameFramebuffer" <- RootBacking :: Frame -> GBufferRaw}(f)); -fn pixel Frame = gFor(-1, -2); // Magic numbers for the binding +fn{Rs} width(f: Frame) = {Property{"width"} :: Frame -> u32}(f); +fn{Js} width(f: Frame) = {"alan_std.frameWidth" <- RootBacking :: Frame -> u32}(f); +fn{Rs} height(f: Frame) = {Property{"height"} :: Frame -> u32}(f); +fn{Js} height(f: Frame) = {"alan_std.frameHeight" <- RootBacking :: Frame -> u32}(f); +fn pixel(f: Frame) = gFor(f.width, f.height); /// Process exit-related functions fn{Rs} ExitCode "std::process::ExitCode::from" :: Own{u8} -> ExitCode; diff --git a/alan_std.js b/alan_std.js index 86ae362a..90310ea0 100644 --- a/alan_std.js +++ b/alan_std.js @@ -1136,6 +1136,14 @@ export function frameFramebuffer(frame) { return frame.framebuffer; } +export function frameWidth(frame) { + return frame.width; +} + +export function frameHeight(frame) { + return frame.height; +} + export async function runWindow(initialContextFn, contextFn, gpgpuShaderFn) { // None of this can run before `document.body` exists, so let's wait for that if (document.readyState !== "complete" && document.readyState !== "loaded") { @@ -1208,39 +1216,25 @@ export async function runWindow(initialContextFn, contextFn, gpgpuShaderFn) { label: `buffer_${uuidv4().replaceAll('-', '_')}`, }); buffer.ValKind = U32; - let gpgpuShaders = await gpgpuShaderFn({ context: contextBuffer, framebuffer: buffer }); + let gpgpuShaders = await gpgpuShaderFn({ context: contextBuffer, framebuffer: buffer, width: width, height: height }); let redraw = async function() { // First resize things if necessary if (width !== context.canvas.width || height !== context.canvas.height) { - width = context.canvas.width; - height = context.canvas.height; + width = Math.max(1, context.canvas.width); + height = Math.max(1, context.canvas.height); context.bufferWidth = (4 * width) % 256 === 0 ? 4 * width : 4 * width + (256 - ((4 * width) % 256)); bufferHeight = height; bufferSize = context.bufferWidth * bufferHeight; - let oldBufferId = buffer.label; let newBuffer = await device.createBuffer({ size: bufferSize, usage: storageBufferType(), label: `buffer_${uuidv4().replaceAll('-', '_')}`, }); newBuffer.ValKind = U32; - for (let shader of gpgpuShaders) { - for (let group of shader.buffers) { - let idx = undefined; - for (let i = 0; i < group.length; i++) { - let buffer = group[i]; - if (buffer.label == oldBufferId) { - idx = i; - break; - } - } - if (typeof(idx) !== 'undefined') { - group[idx] = newBuffer; - } - } - } buffer.destroy(); buffer = newBuffer; + // Re-invoke the shader callback with new dimensions so shaders are regenerated + gpgpuShaders = await gpgpuShaderFn({ context: contextBuffer, framebuffer: buffer, width: width, height: height }); } // Now, actually start drawing let frame = surface.getCurrentTexture(); @@ -1291,32 +1285,13 @@ export async function runWindow(initialContextFn, contextFn, gpgpuShaderFn) { for (let i = 0; i < gg.buffers.length; i++) { cpass.setBindGroup(i, bindGroups[i]); } - let x = 0; - let y = 0; let lx = gg.localWorkgroupSize[0] ?? 8; let ly = gg.localWorkgroupSize[1] ?? 8; - switch (gg.workgroupSizes[0].val) { - case -1: - x = Math.ceil(width / lx); - break; - case -2: - x = Math.ceil(height / lx); - break; - default: - x = Math.ceil(gg.workgroupSizes[0].val / lx); - } - switch (gg.workgroupSizes[1].val) { - case -1: - y = Math.ceil(width / ly); - break; - case -2: - y = Math.ceil(height / ly); - break; - default: - y = Math.ceil(gg.workgroupSizes[1].val / ly); - } - let z = gg.workgroupSizes[2].val; - cpass.dispatchWorkgroups(x, y, z); + cpass.dispatchWorkgroups( + Math.ceil(gg.workgroupSizes[0].val / lx), + Math.ceil(gg.workgroupSizes[1].val / ly), + gg.workgroupSizes[2].val + ); cpass.end(); } encoder.copyBufferToTexture({ diff --git a/alan_std/src/lib.rs b/alan_std/src/lib.rs index f6f3736f..a8edaed7 100644 --- a/alan_std/src/lib.rs +++ b/alan_std/src/lib.rs @@ -1327,6 +1327,8 @@ impl AlanWindowContext { pub struct AlanWindowFrame { pub context: GBuffer, pub framebuffer: GBuffer, + pub width: u32, + pub height: u32, } pub struct AlanWindow @@ -1442,6 +1444,8 @@ where self.gpgpu_shaders = Some((self.gpgpu_shader_fn)(&AlanWindowFrame { context: self.context_buffer.as_ref().unwrap().clone(), framebuffer: self.buffer.as_ref().unwrap().clone(), + width: size.width, + height: size.height, })); } self.inited = true; @@ -1490,8 +1494,6 @@ where if !self.inited { self.window_gpu_init(); } - // We need to create a new buffer with the right size *and* replace all instances - // of the old buffer in the GPGPU array with the new one. let device = self.device.as_ref().unwrap(); new_size.width = new_size.width.max(1); new_size.height = new_size.height.max(1); @@ -1503,7 +1505,6 @@ where let buffer_height = new_size.height; let buffer_size = (self.context.buffer_width.unwrap() as u64) * (buffer_height as u64); - let old_buffer_id = self.buffer.as_ref().unwrap().id.clone(); let new_buffer = GBuffer { buffer: Rc::new(device.create_buffer(&wgpu::BufferDescriptor { label: None, @@ -1514,24 +1515,17 @@ where id: format!("buffer_{}", format!("{}", Uuid::new_v4()).replace("-", "_")), element_size: 4, }; - for shader in self.gpgpu_shaders.as_mut().unwrap() { - for group in &mut shader.buffers { - let mut idx = None; - for (i, buffer) in group.iter().enumerate() { - if buffer.id == old_buffer_id { - idx = Some(i); - break; - } - } - if let Some(id) = idx { - group[id] = new_buffer.clone(); - } - } - } if let Some(b) = &self.buffer { b.destroy(); } self.buffer = Some(new_buffer); + // Re-invoke the shader callback with new dimensions so shaders are regenerated + self.gpgpu_shaders = Some((self.gpgpu_shader_fn)(&AlanWindowFrame { + context: self.context_buffer.as_ref().unwrap().clone(), + framebuffer: self.buffer.as_ref().unwrap().clone(), + width: new_size.width, + height: new_size.height, + })); self.context.window.as_ref().unwrap().request_redraw(); } WindowEvent::RedrawRequested => { @@ -1639,19 +1633,9 @@ where } let lx = gg.local_workgroup_size[0]; let ly = gg.local_workgroup_size[1]; - let wx = match gg.workgroup_sizes[0] { - -1 => size.width as i64, - -2 => size.height as i64, - _ => gg.workgroup_sizes[0], - }; - let wy = match gg.workgroup_sizes[1] { - -1 => size.width as i64, - -2 => size.height as i64, - _ => gg.workgroup_sizes[1], - }; cpass.dispatch_workgroups( - ((wx + lx - 1) / lx) as u32, - ((wy + ly - 1) / ly) as u32, + ((gg.workgroup_sizes[0] + lx - 1) / lx) as u32, + ((gg.workgroup_sizes[1] + ly - 1) / ly) as u32, gg.workgroup_sizes[2] as u32, ); } From 86db7a42b299945471a4639609d8303908b17883 Mon Sep 17 00:00:00 2001 From: David Ellis Date: Thu, 30 Apr 2026 23:44:10 -0500 Subject: [PATCH 3/4] Fix root scope --- alan_compiler/src/std/root.ln | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alan_compiler/src/std/root.ln b/alan_compiler/src/std/root.ln index 9709f45d..986f8de4 100644 --- a/alan_compiler/src/std/root.ln +++ b/alan_compiler/src/std/root.ln @@ -131,8 +131,8 @@ type arm_64 = Env{"ALAN_ARCH"} == "arm_64"; type Browser = Env{"ALAN_PLATFORM"} == "browser"; // Technically not necessary, always also Js // Importing the Root Scope backing implementation and supporting 3rd party libraries -type{Rs} RootBacking = Rust{"alan_std" @ "https://github.com/alantech/alan.git#better-local-group-selection"}; -type{Js} RootBacking = Nodejs{"alan_std" @ "https://github.com/alantech/alan.git#better-local-group-selection"}; +type{Rs} RootBacking = Rust{"alan_std" @ "https://github.com/alantech/alan.git"}; +type{Js} RootBacking = Nodejs{"alan_std" @ "https://github.com/alantech/alan.git"}; // Defining derived types type void = (); From c470c8ad858c40830aadd245b1cf413aea47f365 Mon Sep 17 00:00:00 2001 From: David Ellis Date: Fri, 1 May 2026 00:45:02 -0500 Subject: [PATCH 4/4] Attempt to fix JS failures --- alan_std.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/alan_std.js b/alan_std.js index 90310ea0..ad1eea5b 100644 --- a/alan_std.js +++ b/alan_std.js @@ -819,9 +819,9 @@ function getSubgroupMaxSize() { } export function optimalLocalGroup(global) { - let totalGlobal = global[0] * global[1] * global[2]; + let totalGlobal = Number(global[0]) * Number(global[1]) * Number(global[2]); if (totalGlobal === 0) { - return [1, 1, 1]; + return [new I64(1), new I64(1), new I64(1)]; } let subMax = getSubgroupMaxSize(); let maxInvocations = 256; @@ -837,12 +837,12 @@ export function optimalLocalGroup(global) { // Shape: prefer S*S*1, then D*8*1 let sqrt = Math.floor(Math.sqrt(target)); if (sqrt >= 8 && sqrt * sqrt === target) { - return [sqrt, sqrt, 1]; + return [new I64(sqrt), new I64(sqrt), new I64(1)]; } if (target % 8 === 0) { - return [target / 8, 8, 1]; + return [new I64(target / 8), new I64(8), new I64(1)]; } - return [target, 1, 1]; + return [new I64(target), new I64(1), new I64(1)]; } export async function gpu() { @@ -1285,12 +1285,12 @@ export async function runWindow(initialContextFn, contextFn, gpgpuShaderFn) { for (let i = 0; i < gg.buffers.length; i++) { cpass.setBindGroup(i, bindGroups[i]); } - let lx = gg.localWorkgroupSize[0] ?? 8; - let ly = gg.localWorkgroupSize[1] ?? 8; + let lx = Number(gg.localWorkgroupSize[0]) ?? 8; + let ly = Number(gg.localWorkgroupSize[1]) ?? 8; cpass.dispatchWorkgroups( - Math.ceil(gg.workgroupSizes[0].val / lx), - Math.ceil(gg.workgroupSizes[1].val / ly), - gg.workgroupSizes[2].val + Math.ceil(Number(gg.workgroupSizes[0]) / lx), + Math.ceil(Number(gg.workgroupSizes[1]) / ly), + Number(gg.workgroupSizes[2]) ); cpass.end(); }