From e5a4a07b6c2e031bc431c474e073d653c18af82f Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 10:25:29 +1100 Subject: [PATCH 01/10] bilinear texture sampling with textureSample instead of textureLoad --- .../vello_hybrid/src/render/webgl.rs | 4 +- sparse_strips/vello_hybrid/src/render/wgpu.rs | 55 ++++++++++++++----- .../shaders/render_strips.wgsl | 36 ++++++++---- 3 files changed, 67 insertions(+), 28 deletions(-) diff --git a/sparse_strips/vello_hybrid/src/render/webgl.rs b/sparse_strips/vello_hybrid/src/render/webgl.rs index 654d5d2bfa..5a372c7981 100644 --- a/sparse_strips/vello_hybrid/src/render/webgl.rs +++ b/sparse_strips/vello_hybrid/src/render/webgl.rs @@ -1842,12 +1842,12 @@ fn create_texture_inner(gl: &WebGl2RenderingContext, target: u32) -> WebGlTextur gl.tex_parameteri( target, WebGl2RenderingContext::TEXTURE_MIN_FILTER, - WebGl2RenderingContext::NEAREST as i32, + WebGl2RenderingContext::LINEAR as i32, ); gl.tex_parameteri( target, WebGl2RenderingContext::TEXTURE_MAG_FILTER, - WebGl2RenderingContext::NEAREST as i32, + WebGl2RenderingContext::LINEAR as i32, ); gl.tex_parameteri( target, diff --git a/sparse_strips/vello_hybrid/src/render/wgpu.rs b/sparse_strips/vello_hybrid/src/render/wgpu.rs index f08e372bed..421e9fe59f 100644 --- a/sparse_strips/vello_hybrid/src/render/wgpu.rs +++ b/sparse_strips/vello_hybrid/src/render/wgpu.rs @@ -846,6 +846,8 @@ struct GpuResources { atlas_texture_array: Texture, /// View for atlas texture array atlas_texture_array_view: TextureView, + /// Bilinear sampler for GPU-native image sampling + atlas_sampler: Sampler, /// Bind group for atlas textures (as texture array) atlas_bind_group: BindGroup, /// Filter atlas textures and their associated views/bind groups. @@ -963,18 +965,33 @@ impl Programs { let atlas_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("Atlas Texture Bind Group Layout"), - entries: &[wgpu::BindGroupLayoutEntry { - binding: 0, - visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2Array, - multisampled: false, + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2Array, + multisampled: false, + }, + count: None, }, - count: None, - }], + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering), + count: None, + }, + ], }); + let atlas_sampler = device.create_sampler(&wgpu::SamplerDescriptor { + label: Some("Atlas Bilinear Sampler"), + mag_filter: wgpu::FilterMode::Linear, + min_filter: wgpu::FilterMode::Linear, + ..Default::default() + }); + let encoded_paints_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("Encoded Paints Bind Group Layout"), @@ -1347,6 +1364,7 @@ impl Programs { device, &atlas_bind_group_layout, &atlas_texture_array_view, + &atlas_sampler, ); // Create a 1x1 stub atlas texture array for use during render_to_atlas. @@ -1355,7 +1373,7 @@ impl Programs { let (_stub_atlas_texture, stub_atlas_view) = Self::create_atlas_texture_array(device, 1, 1, 1); let stub_atlas_bind_group = - Self::create_atlas_bind_group(device, &atlas_bind_group_layout, &stub_atlas_view); + Self::create_atlas_bind_group(device, &atlas_bind_group_layout, &stub_atlas_view, &atlas_sampler); const INITIAL_ENCODED_PAINTS_TEXTURE_HEIGHT: u32 = 1; let encoded_paints_data = vec![ @@ -1432,6 +1450,7 @@ impl Programs { alphas_texture, atlas_texture_array, atlas_texture_array_view, + atlas_sampler, atlas_bind_group, filter_atlas, stub_atlas_bind_group, @@ -1612,14 +1631,21 @@ impl Programs { device: &Device, atlas_bind_group_layout: &BindGroupLayout, atlas_texture_array_view: &TextureView, + atlas_sampler: &Sampler, ) -> BindGroup { device.create_bind_group(&wgpu::BindGroupDescriptor { label: Some("Atlas Bind Group"), layout: atlas_bind_group_layout, - entries: &[wgpu::BindGroupEntry { - binding: 0, - resource: wgpu::BindingResource::TextureView(atlas_texture_array_view), - }], + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(atlas_texture_array_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::Sampler(atlas_sampler), + }, + ], }) } @@ -2002,6 +2028,7 @@ impl Programs { device, atlas_bind_group_layout, &new_atlas_texture_array_view, + &resources.atlas_sampler, ); // Replace the old resources diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index f15a0535a8..9855b0d010 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -240,6 +240,9 @@ var config: Config; @group(1) @binding(0) var atlas_texture_array: texture_2d_array; +@group(1) @binding(1) +var atlas_sampler: sampler; + @group(2) @binding(0) var encoded_paints_texture: texture_2d; @@ -933,10 +936,26 @@ fn extend_mode_normalized(t: f32, mode: u32) -> f32 { } } -// Bilinear filtering +// Convert atlas-space pixel coordinates to normalized UVs suitable for textureSample. +// +// To prevent bleeding from neighboring atlas images, we clamp the UVs so that the boundary +// samples land on the CENTER of the boundary texels, not their edges. // -// Bilinear filtering consists of sampling the 4 surrounding pixels of the target point and -// interpolating them with a bilinear filter. +// The input `sample_xy` is in atlas pixel space (after extend-mode and offset are applied). +fn atlas_to_normalized_uv( + sample_xy: vec2, + image_offset: vec2, + image_size: vec2, + extend_modes: vec2, +) -> vec2 { + let atlas_dim = vec2(textureDimensions(atlas_texture_array)); + let uv_min = (image_offset + 0.5) / atlas_dim; + let uv_max = (image_offset + image_size - 0.5) / atlas_dim; + let uv = ((sample_xy + 0.5) / atlas_dim); + return clamp(uv, uv_min, uv_max); +} + +// Bilinear filtering via hardware textureSample. fn bilinear_sample( tex: texture_2d_array, coords: vec2, @@ -946,15 +965,8 @@ fn bilinear_sample( extend_modes: vec2, image_padding: f32, ) -> vec4 { - let atlas_max = image_offset + image_size - vec2(1.0); - let atlas_uv_clamped = clamp(coords, image_offset, atlas_max); - let uv_quad = vec4(floor(atlas_uv_clamped), ceil(atlas_uv_clamped)); - let uv_frac = fract(coords); - let a = textureLoad(tex, vec2(uv_quad.xy), atlas_idx, 0); - let b = textureLoad(tex, vec2(uv_quad.xw), atlas_idx, 0); - let c = textureLoad(tex, vec2(uv_quad.zy), atlas_idx, 0); - let d = textureLoad(tex, vec2(uv_quad.zw), atlas_idx, 0); - return mix(mix(a, b, uv_frac.y), mix(c, d, uv_frac.y), uv_frac.x); + let uv = atlas_to_normalized_uv(coords, image_offset, image_size, extend_modes); + return textureSample(tex, atlas_sampler, uv, atlas_idx); } // Bicubic filtering using Mitchell filter with B=1/3, C=1/3 From 4aa02e937e3732b2762300499f1574e13b69410b Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 11:58:12 +1100 Subject: [PATCH 02/10] test 0.0 offset in CI --- sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index 9855b0d010..10de42ba3a 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -388,7 +388,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { // This offset doesn't exist in vello_cpu, and we use it because 45 degree skewing seems to cause // artifacts on the GPU. We have something similar in place for gradients. It might be worth revisiting // this to see whether a better approach is possible. - let offset = 0.00001; + let offset = 0.0; let extended_xy = vec2( extend_mode(local_xy.x + offset, encoded_image.extend_modes.x, image_size.x), extend_mode(local_xy.y + offset, encoded_image.extend_modes.y, image_size.y) From 83c726139eda328ca658a30d686c8c4e0da38baf Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 12:12:08 +1100 Subject: [PATCH 03/10] Revert "test 0.0 offset in CI" This reverts commit 4aa02e937e3732b2762300499f1574e13b69410b. --- sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index 10de42ba3a..9855b0d010 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -388,7 +388,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { // This offset doesn't exist in vello_cpu, and we use it because 45 degree skewing seems to cause // artifacts on the GPU. We have something similar in place for gradients. It might be worth revisiting // this to see whether a better approach is possible. - let offset = 0.0; + let offset = 0.00001; let extended_xy = vec2( extend_mode(local_xy.x + offset, encoded_image.extend_modes.x, image_size.x), extend_mode(local_xy.y + offset, encoded_image.extend_modes.y, image_size.y) From 3e379292875cafb68870085dffcb98126a123eee Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 15:47:03 +1100 Subject: [PATCH 04/10] simplify --- .../vello_sparse_shaders/shaders/render_strips.wgsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index 9855b0d010..d7f4a2b03e 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -410,7 +410,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { encoded_image.image_padding, ); } else if encoded_image.quality == IMAGE_QUALITY_MEDIUM { - let final_xy = image_offset + extended_xy - vec2(0.5); + let final_xy = image_offset + extended_xy; sample_color = bilinear_sample( atlas_texture_array, final_xy, @@ -948,10 +948,10 @@ fn atlas_to_normalized_uv( image_size: vec2, extend_modes: vec2, ) -> vec2 { - let atlas_dim = vec2(textureDimensions(atlas_texture_array)); - let uv_min = (image_offset + 0.5) / atlas_dim; - let uv_max = (image_offset + image_size - 0.5) / atlas_dim; - let uv = ((sample_xy + 0.5) / atlas_dim); + let inv_atlas_dim = 1.0 / vec2(textureDimensions(atlas_texture_array)); + let uv_min = (image_offset + 0.5) * inv_atlas_dim; + let uv_max = (image_offset + image_size - 0.5) * inv_atlas_dim; + let uv = sample_xy * inv_atlas_dim; return clamp(uv, uv_min, uv_max); } From d2ca24f165b28d8dddbe881bd2e91a7f97964340 Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 15:49:27 +1100 Subject: [PATCH 05/10] fmt --- sparse_strips/vello_hybrid/src/render/wgpu.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sparse_strips/vello_hybrid/src/render/wgpu.rs b/sparse_strips/vello_hybrid/src/render/wgpu.rs index 421e9fe59f..bccdcc8e70 100644 --- a/sparse_strips/vello_hybrid/src/render/wgpu.rs +++ b/sparse_strips/vello_hybrid/src/render/wgpu.rs @@ -1372,8 +1372,12 @@ impl Programs { // a shader input (bind group) and render target in the same pass. let (_stub_atlas_texture, stub_atlas_view) = Self::create_atlas_texture_array(device, 1, 1, 1); - let stub_atlas_bind_group = - Self::create_atlas_bind_group(device, &atlas_bind_group_layout, &stub_atlas_view, &atlas_sampler); + let stub_atlas_bind_group = Self::create_atlas_bind_group( + device, + &atlas_bind_group_layout, + &stub_atlas_view, + &atlas_sampler, + ); const INITIAL_ENCODED_PAINTS_TEXTURE_HEIGHT: u32 = 1; let encoded_paints_data = vec![ From 7d29b89635fc0450e52034e7475de6b7b82eda26 Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 16:15:58 +1100 Subject: [PATCH 06/10] webgl: split texture_array and texture path --- .../vello_hybrid/src/render/webgl.rs | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/sparse_strips/vello_hybrid/src/render/webgl.rs b/sparse_strips/vello_hybrid/src/render/webgl.rs index 5a372c7981..12d7c8bebb 100644 --- a/sparse_strips/vello_hybrid/src/render/webgl.rs +++ b/sparse_strips/vello_hybrid/src/render/webgl.rs @@ -1829,7 +1829,36 @@ fn create_texture(gl: &WebGl2RenderingContext) -> WebGlTexture { /// Create a texture array with nearest neighbor sampling and /// clamp-to-edge wrapping. fn create_texture_array(gl: &WebGl2RenderingContext) -> WebGlTexture { - create_texture_inner(gl, WebGl2RenderingContext::TEXTURE_2D_ARRAY) + let target = WebGl2RenderingContext::TEXTURE_2D_ARRAY; + let texture = gl.create_texture().unwrap(); + gl.active_texture(WebGl2RenderingContext::TEXTURE0); + gl.bind_texture(target, Some(&texture)); + // The filter and wrap modes are irrelevant because the shader + // (`render_strips.wgsl`) exclusively uses `textureLoad`, which bypasses + // the sampler entirely. + gl.tex_parameteri( + target, + WebGl2RenderingContext::TEXTURE_MIN_FILTER, + WebGl2RenderingContext::LINEAR as i32, + ); + gl.tex_parameteri( + target, + WebGl2RenderingContext::TEXTURE_MAG_FILTER, + WebGl2RenderingContext::LINEAR as i32, + ); + gl.tex_parameteri( + target, + WebGl2RenderingContext::TEXTURE_WRAP_S, + WebGl2RenderingContext::CLAMP_TO_EDGE as i32, + ); + gl.tex_parameteri( + target, + WebGl2RenderingContext::TEXTURE_WRAP_T, + WebGl2RenderingContext::CLAMP_TO_EDGE as i32, + ); + gl.tex_parameteri(target, WebGl2RenderingContext::TEXTURE_MAX_LEVEL, 0); + + texture } fn create_texture_inner(gl: &WebGl2RenderingContext, target: u32) -> WebGlTexture { @@ -1842,12 +1871,12 @@ fn create_texture_inner(gl: &WebGl2RenderingContext, target: u32) -> WebGlTextur gl.tex_parameteri( target, WebGl2RenderingContext::TEXTURE_MIN_FILTER, - WebGl2RenderingContext::LINEAR as i32, + WebGl2RenderingContext::NEAREST as i32, ); gl.tex_parameteri( target, WebGl2RenderingContext::TEXTURE_MAG_FILTER, - WebGl2RenderingContext::LINEAR as i32, + WebGl2RenderingContext::NEAREST as i32, ); gl.tex_parameteri( target, From 2e1ac4e7712d423346070eb01b8ee686f36cdfbb Mon Sep 17 00:00:00 2001 From: Harley Mellifont Date: Fri, 20 Mar 2026 16:22:07 +1100 Subject: [PATCH 07/10] hybrid tolerance for 8-bit fixed-point on windows --- sparse_strips/vello_sparse_tests/tests/image.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sparse_strips/vello_sparse_tests/tests/image.rs b/sparse_strips/vello_sparse_tests/tests/image.rs index 4c95b6b261..4ed3d90a50 100644 --- a/sparse_strips/vello_sparse_tests/tests/image.rs +++ b/sparse_strips/vello_sparse_tests/tests/image.rs @@ -372,7 +372,7 @@ fn image_bilinear_identity(ctx: &mut impl Renderer) { ); } -#[vello_test] +#[vello_test(hybrid_tolerance = 2)] fn image_bilinear_2x_scale(ctx: &mut impl Renderer) { let image_source = rgb_img_2x2(ctx); quality( @@ -384,7 +384,7 @@ fn image_bilinear_2x_scale(ctx: &mut impl Renderer) { ); } -#[vello_test] +#[vello_test(hybrid_tolerance = 2)] fn image_bilinear_5x_scale(ctx: &mut impl Renderer) { let image_source = rgb_img_2x2(ctx); quality( @@ -396,7 +396,7 @@ fn image_bilinear_5x_scale(ctx: &mut impl Renderer) { ); } -#[vello_test] +#[vello_test(hybrid_tolerance = 2)] fn image_bilinear_10x_scale(ctx: &mut impl Renderer) { let image_source = rgb_img_2x2(ctx); quality( @@ -408,7 +408,7 @@ fn image_bilinear_10x_scale(ctx: &mut impl Renderer) { ); } -#[vello_test] +#[vello_test(hybrid_tolerance = 2)] fn image_bilinear_with_rotation(ctx: &mut impl Renderer) { let image_source = rgb_img_2x2(ctx); quality( @@ -420,7 +420,7 @@ fn image_bilinear_with_rotation(ctx: &mut impl Renderer) { ); } -#[vello_test] +#[vello_test(hybrid_tolerance = 2)] fn image_bilinear_with_translation(ctx: &mut impl Renderer) { let image_source = rgb_img_2x2(ctx); quality( @@ -432,7 +432,7 @@ fn image_bilinear_with_translation(ctx: &mut impl Renderer) { ); } -#[vello_test] +#[vello_test(hybrid_tolerance = 2)] fn image_bilinear_10x_scale_2(ctx: &mut impl Renderer) { let image_source = rgb_img_2x3(ctx); quality( From 36e3f3e4add3627515e7c2491c40d6cd8c7836d5 Mon Sep 17 00:00:00 2001 From: Taj Pereira Date: Sun, 29 Mar 2026 06:32:09 +1030 Subject: [PATCH 08/10] 30-40% faster via atlas_dim_bits + vertex UV --- .../vello_hybrid/src/render/common.rs | 6 +- .../vello_hybrid/src/render/webgl.rs | 15 ++++- sparse_strips/vello_hybrid/src/render/wgpu.rs | 36 ++++++++---- sparse_strips/vello_hybrid/src/scene.rs | 4 ++ .../shaders/render_strips.wgsl | 58 +++++-------------- 5 files changed, 59 insertions(+), 60 deletions(-) diff --git a/sparse_strips/vello_hybrid/src/render/common.rs b/sparse_strips/vello_hybrid/src/render/common.rs index 38da25a6de..6fa501bb6a 100644 --- a/sparse_strips/vello_hybrid/src/render/common.rs +++ b/sparse_strips/vello_hybrid/src/render/common.rs @@ -51,8 +51,10 @@ pub struct Config { pub strip_offset_x: i32, /// A vertical offset to apply to strips. pub strip_offset_y: i32, - /// Padding to satisfy WebGL's 16-byte alignment requirement for uniform buffers. - pub _padding: u32, + /// Number of trailing zeros in the atlas texture dimension (log2 of the square atlas size). + /// Used to normalize pixel coordinates to UVs for `textureSample` without calling + /// `textureDimensions` per pixel. + pub atlas_dim_bits: u32, } /// A GPU strip instance for rendering. diff --git a/sparse_strips/vello_hybrid/src/render/webgl.rs b/sparse_strips/vello_hybrid/src/render/webgl.rs index 4d8e8e2038..e2ed0f6f68 100644 --- a/sparse_strips/vello_hybrid/src/render/webgl.rs +++ b/sparse_strips/vello_hybrid/src/render/webgl.rs @@ -812,6 +812,9 @@ struct WebGlResources { filter_atlas_width: u32, /// Cached atlas height for creating new filter atlas textures. filter_atlas_height: u32, + /// log2 of the square atlas texture dimension, passed to the shader uniform + /// so it can normalize pixel coords to UVs without `textureDimensions`. + atlas_dim_bits: u32, } /// Config for the clear slots pipeline. @@ -1167,7 +1170,7 @@ impl WebGlPrograms { encoded_paints_tex_width_bits: max_texture_dimension_2d.trailing_zeros(), strip_offset_x: 0, strip_offset_y: 0, - _padding: 0, + atlas_dim_bits: self.resources.atlas_dim_bits, }; gl.bind_buffer( @@ -1193,7 +1196,7 @@ impl WebGlPrograms { encoded_paints_tex_width_bits: max_texture_dimension_2d.trailing_zeros(), strip_offset_x: 0, strip_offset_y: 0, - _padding: 0, + atlas_dim_bits: self.resources.atlas_dim_bits, }; gl.bind_buffer( @@ -1918,6 +1921,11 @@ fn create_webgl_resources( initial_atlas_count, .. } = image_cache.atlas_manager().config(); + debug_assert_eq!( + atlas_width, atlas_height, + "Atlas must be square for atlas_dim_bits to work" + ); + let atlas_dim_bits = atlas_width.trailing_zeros(); let atlas_texture_array = create_atlas_texture_array(gl, *atlas_width, *atlas_height, *initial_atlas_count as u32); @@ -1989,6 +1997,7 @@ fn create_webgl_resources( filter_config_buffer, filter_atlas_width: *filter_atlas_width, filter_atlas_height: *filter_atlas_height, + atlas_dim_bits, } } @@ -2180,7 +2189,7 @@ impl WebGlRendererContext<'_> { .trailing_zeros(), strip_offset_x, strip_offset_y, - _padding: 0, + atlas_dim_bits: self.programs.resources.atlas_dim_bits, }; let buf = &self.programs.resources.filter_config_buffer; self.gl diff --git a/sparse_strips/vello_hybrid/src/render/wgpu.rs b/sparse_strips/vello_hybrid/src/render/wgpu.rs index 7e6c47e83e..ab29654e99 100644 --- a/sparse_strips/vello_hybrid/src/render/wgpu.rs +++ b/sparse_strips/vello_hybrid/src/render/wgpu.rs @@ -883,6 +883,10 @@ struct GpuResources { /// Placeholder atlas bind group with a 1x1 dummy texture, used during /// `render_to_atlas` to avoid a read-write conflict on the real atlas texture. stub_atlas_bind_group: BindGroup, + + /// log2 of the square atlas texture dimension, passed to the shader uniform + /// so it can normalize pixel coords to UVs without `textureDimensions`. + atlas_dim_bits: u32, } const SIZE_OF_CONFIG: NonZeroU64 = NonZeroU64::new(size_of::() as u64).unwrap(); @@ -1321,16 +1325,29 @@ impl Programs { slot_count as u64 * size_of::() as u64, ); + let max_texture_dimension_2d = device.limits().max_texture_dimension_2d; + + let AtlasConfig { + atlas_size: (atlas_width, atlas_height), + initial_atlas_count, + .. + } = image_cache.atlas_manager().config(); + debug_assert_eq!( + atlas_width, atlas_height, + "Atlas must be square for atlas_dim_bits to work" + ); + let atlas_dim_bits = atlas_width.trailing_zeros(); + let slot_config_buffer = Self::create_config_buffer( device, &RenderSize { width: u32::from(WideTile::WIDTH), height: u32::from(Tile::HEIGHT) * slot_count as u32, }, - device.limits().max_texture_dimension_2d, + max_texture_dimension_2d, + atlas_dim_bits, ); - let max_texture_dimension_2d = device.limits().max_texture_dimension_2d; const INITIAL_ALPHA_TEXTURE_HEIGHT: u32 = 1; let alphas_texture = Self::create_alphas_texture( device, @@ -1344,13 +1361,8 @@ impl Programs { height: render_target_config.height, }, max_texture_dimension_2d, + atlas_dim_bits, ); - - let AtlasConfig { - atlas_size: (atlas_width, atlas_height), - initial_atlas_count, - .. - } = image_cache.atlas_manager().config(); let (atlas_texture_array, atlas_texture_array_view) = Self::create_atlas_texture_array( device, *atlas_width, @@ -1462,6 +1474,7 @@ impl Programs { filter_data_texture, filter_base_bind_group, view_config_buffer, + atlas_dim_bits, }; Self { @@ -1516,6 +1529,7 @@ impl Programs { device: &Device, render_size: &RenderSize, alpha_texture_width: u32, + atlas_dim_bits: u32, ) -> Buffer { device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("Config Buffer"), @@ -1527,7 +1541,7 @@ impl Programs { encoded_paints_tex_width_bits: alpha_texture_width.trailing_zeros(), strip_offset_x: 0, strip_offset_y: 0, - _padding: 0, + atlas_dim_bits, }), usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, }) @@ -1985,7 +1999,7 @@ impl Programs { encoded_paints_tex_width_bits: max_texture_dimension_2d.trailing_zeros(), strip_offset_x: 0, strip_offset_y: 0, - _padding: 0, + atlas_dim_bits: self.resources.atlas_dim_bits, }; let mut buffer = queue .write_buffer_with(&self.resources.view_config_buffer, 0, SIZE_OF_CONFIG) @@ -2325,7 +2339,7 @@ impl RendererContext<'_> { .trailing_zeros(), strip_offset_x, strip_offset_y, - _padding: 0, + atlas_dim_bits: self.programs.resources.atlas_dim_bits, }), usage: wgpu::BufferUsages::UNIFORM, }); diff --git a/sparse_strips/vello_hybrid/src/scene.rs b/sparse_strips/vello_hybrid/src/scene.rs index 19d08faca2..8fc2b665c2 100644 --- a/sparse_strips/vello_hybrid/src/scene.rs +++ b/sparse_strips/vello_hybrid/src/scene.rs @@ -324,6 +324,10 @@ impl Scene { } } + pub fn paint_transform(&self) -> &Affine { + &self.render_state.paint_transform + } + /// Encode the current paint into a `Paint` that can be used for rendering. /// /// For solid colors, this is a simple conversion. For gradients and images, diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index d7f4a2b03e..2f9a8b5b80 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -123,8 +123,9 @@ struct Config { // within the atlas where the filter layer will be rendered to. strip_offset_x: i32, strip_offset_y: i32, - // Padding to satisfy WebGL's 16-byte alignment requirement for uniform buffers. - _padding0: u32, + // log2 of the square atlas texture dimension, used to normalize pixel coordinates + // to UVs for textureSample without calling textureDimensions per pixel. + atlas_dim_bits: u32, } // A `StripInstance` can represent either a **normal strip** (representing a sparse fill or alpha fill of height @@ -229,6 +230,10 @@ struct VertexOutput { // Bits 0-7: x0, 8-15: y0, 16-23: x1, 24-31: y1. // Zero for normal strips. @location(5) @interpolate(flat) rect_frac: u32, + // Pre-computed UV clamping bounds for bilinear image sampling. + // xy = uv_min, zw = uv_max (prevents atlas bleeding at image boundaries). + // Only meaningful for PAINT_TYPE_IMAGE with IMAGE_QUALITY_MEDIUM. + @location(6) @interpolate(flat) uv_bounds: vec4, // Normalized device coordinates (NDC) for the current vertex @builtin(position) position: vec4, }; @@ -297,6 +302,10 @@ fn vs_main( // Use view coordinates for image sampling (always in global view space) let pos = vec2(f32(scene_strip_x) + x * f32(width), f32(scene_strip_y) + y * f32(height)); out.sample_xy = encoded_image.translate + encoded_image.image_offset + encoded_image.transform * pos; + let inv_atlas_dim = 1.0 / f32(1u << config.atlas_dim_bits); + let uv_min = (encoded_image.image_offset + 0.5) * inv_atlas_dim; + let uv_max = (encoded_image.image_offset + encoded_image.image_size - 0.5) * inv_atlas_dim; + out.uv_bounds = vec4(uv_min, uv_max); } else if paint_type == PAINT_TYPE_LINEAR_GRADIENT || paint_type == PAINT_TYPE_RADIAL_GRADIENT || paint_type == PAINT_TYPE_SWEEP_GRADIENT { // Use view coordinates for gradient transform (always in global view space) out.sample_xy = vec2( @@ -411,15 +420,9 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { ); } else if encoded_image.quality == IMAGE_QUALITY_MEDIUM { let final_xy = image_offset + extended_xy; - sample_color = bilinear_sample( - atlas_texture_array, - final_xy, - i32(encoded_image.atlas_index), - image_offset, - image_size, - encoded_image.extend_modes, - encoded_image.image_padding, - ); + let inv_atlas_dim = 1.0 / f32(1u << config.atlas_dim_bits); + let uv = clamp(final_xy * inv_atlas_dim, in.uv_bounds.xy, in.uv_bounds.zw); + sample_color = textureSample(atlas_texture_array, atlas_sampler, uv, i32(encoded_image.atlas_index)); } else { let final_xy = image_offset + extended_xy; sample_color = textureLoad( @@ -936,39 +939,6 @@ fn extend_mode_normalized(t: f32, mode: u32) -> f32 { } } -// Convert atlas-space pixel coordinates to normalized UVs suitable for textureSample. -// -// To prevent bleeding from neighboring atlas images, we clamp the UVs so that the boundary -// samples land on the CENTER of the boundary texels, not their edges. -// -// The input `sample_xy` is in atlas pixel space (after extend-mode and offset are applied). -fn atlas_to_normalized_uv( - sample_xy: vec2, - image_offset: vec2, - image_size: vec2, - extend_modes: vec2, -) -> vec2 { - let inv_atlas_dim = 1.0 / vec2(textureDimensions(atlas_texture_array)); - let uv_min = (image_offset + 0.5) * inv_atlas_dim; - let uv_max = (image_offset + image_size - 0.5) * inv_atlas_dim; - let uv = sample_xy * inv_atlas_dim; - return clamp(uv, uv_min, uv_max); -} - -// Bilinear filtering via hardware textureSample. -fn bilinear_sample( - tex: texture_2d_array, - coords: vec2, - atlas_idx: i32, - image_offset: vec2, - image_size: vec2, - extend_modes: vec2, - image_padding: f32, -) -> vec4 { - let uv = atlas_to_normalized_uv(coords, image_offset, image_size, extend_modes); - return textureSample(tex, atlas_sampler, uv, atlas_idx); -} - // Bicubic filtering using Mitchell filter with B=1/3, C=1/3 // // Cubic resampling consists of sampling the 16 surrounding pixels of the target point and From b50aaee8c1a755bf570cb15538a29099e8822690 Mon Sep 17 00:00:00 2001 From: Taj Pereira Date: Sun, 29 Mar 2026 06:36:17 +1030 Subject: [PATCH 09/10] Uhhhh 50% faster again? --- .../shaders/render_strips.wgsl | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index 2f9a8b5b80..6cf1c5cfe3 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -406,32 +406,32 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { // TODO: add a fast path for images where we are using bilinear sampling and want transparent pixels, // using GPU-native bilinear sampling - var sample_color: vec4; - if encoded_image.quality == IMAGE_QUALITY_HIGH { - let final_xy = image_offset + extended_xy; - sample_color = bicubic_sample( - atlas_texture_array, - final_xy, - i32(encoded_image.atlas_index), - image_offset, - image_size, - encoded_image.extend_modes, - encoded_image.image_padding, - ); - } else if encoded_image.quality == IMAGE_QUALITY_MEDIUM { + //var sample_color: vec4; + //if encoded_image.quality == IMAGE_QUALITY_HIGH { + // let final_xy = image_offset + extended_xy; + // sample_color = bicubic_sample( + // atlas_texture_array, + // final_xy, + // i32(encoded_image.atlas_index), + // image_offset, + // image_size, + // encoded_image.extend_modes, + // encoded_image.image_padding, + // ); + //} else if encoded_image.quality == IMAGE_QUALITY_MEDIUM { let final_xy = image_offset + extended_xy; let inv_atlas_dim = 1.0 / f32(1u << config.atlas_dim_bits); let uv = clamp(final_xy * inv_atlas_dim, in.uv_bounds.xy, in.uv_bounds.zw); - sample_color = textureSample(atlas_texture_array, atlas_sampler, uv, i32(encoded_image.atlas_index)); - } else { - let final_xy = image_offset + extended_xy; - sample_color = textureLoad( - atlas_texture_array, - vec2(final_xy), - i32(encoded_image.atlas_index), - 0, - ); - } + let sample_color = textureSample(atlas_texture_array, atlas_sampler, uv, i32(encoded_image.atlas_index)); + //} else { + // let final_xy = image_offset + extended_xy; + // sample_color = textureLoad( + // atlas_texture_array, + // vec2(final_xy), + // i32(encoded_image.atlas_index), + // 0, + // ); + //} let is_multiply = bool(encoded_image.tint_mode); final_color = alpha * select( From 58ea8fe740b0fe4ad346122b989a64605db90ec9 Mon Sep 17 00:00:00 2001 From: Taj Pereira Date: Sun, 29 Mar 2026 07:17:45 +1030 Subject: [PATCH 10/10] Remove tint cost --- .../shaders/render_strips.wgsl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl index 6cf1c5cfe3..1d10faad18 100644 --- a/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl +++ b/sparse_strips/vello_sparse_shaders/shaders/render_strips.wgsl @@ -380,14 +380,13 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { } // Apply the alpha value to the unpacked RGBA color or slot index let color_source = (in.paint_and_rect_flag >> 29u) & 0x3u; - var final_color: vec4; if color_source == COLOR_SOURCE_PAYLOAD { let paint_type = (in.paint_and_rect_flag >> 26u) & 0x7u; // in.payload encodes a color for PAINT_TYPE_SOLID or sample_xy for PAINT_TYPE_IMAGE if paint_type == PAINT_TYPE_SOLID { - final_color = alpha * unpack4x8unorm(in.payload); + return alpha * unpack4x8unorm(in.payload); } else if paint_type == PAINT_TYPE_IMAGE { let paint_tex_idx = in.paint_and_rect_flag & PAINT_TEXTURE_INDEX_MASK; let encoded_image = unpack_encoded_image(paint_tex_idx); @@ -434,7 +433,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { //} let is_multiply = bool(encoded_image.tint_mode); - final_color = alpha * select( + return alpha * select( encoded_image.tint * sample_color.a, sample_color * encoded_image.tint, is_multiply @@ -456,7 +455,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { linear_gradient.texture_width, true ); - final_color = alpha * gradient_color; + return alpha * gradient_color; } else if paint_type == PAINT_TYPE_RADIAL_GRADIENT { let paint_tex_idx = in.paint_and_rect_flag & PAINT_TEXTURE_INDEX_MASK; let radial_gradient = unpack_radial_gradient(paint_tex_idx); @@ -474,7 +473,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { radial_gradient.texture_width, gradient_result.is_valid ); - final_color = alpha * gradient_color; + return alpha * gradient_color; } else if paint_type == PAINT_TYPE_SWEEP_GRADIENT { let paint_tex_idx = in.paint_and_rect_flag & PAINT_TEXTURE_INDEX_MASK; let sweep_gradient = unpack_sweep_gradient(paint_tex_idx); @@ -506,7 +505,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { sweep_gradient.texture_width, true ); - final_color = alpha * gradient_color; + return alpha * gradient_color; } } else if color_source == COLOR_SOURCE_SLOT { // in.payload encodes a slot in the source clip texture. @@ -524,7 +523,7 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { // Extract opacity from first 8 bits (quantized from [0, 255]) let opacity = f32(in.paint_and_rect_flag & 0xFFu) * (1.0 / 255.0); - final_color = alpha * opacity * clip_in_color; + return alpha * opacity * clip_in_color; } else if color_source == COLOR_SOURCE_BLEND { let opacity = f32((in.paint_and_rect_flag >> 16u) & 0xFFu) * (1.0 / 255.0); let mix_mode = (in.paint_and_rect_flag >> 8u) & 0xFFu; @@ -543,9 +542,9 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4 { let dest_y = clip_y_in_strip + dest_slot * config.strip_height; let dest_color = textureLoad(clip_input_texture, vec2(clip_x, dest_y), 0); - final_color = blend_mix_compose(dest_color, src_color * opacity * alpha, compose_mode, mix_mode); + return blend_mix_compose(dest_color, src_color * opacity * alpha, compose_mode, mix_mode); } - return final_color; + return vec4(0); } // Apply color mixing and composition. Both input and output colors are premultiplied RGB.