Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions examples/scenes/src/test_scenes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ export_scenes!(
fn many_draw_objects(many_draw_objects)
fn blurred_rounded_rect(blurred_rounded_rect)
fn image_sampling(impls::image_sampling(), "image_sampling", false)
fn image_sampling_bicubic(impls::image_sampling_bicubic(), "image_sampling_bicubic", false)
fn image_extend_modes_bilinear(impls::image_extend_modes(ImageQuality::Medium), "image_extend_modes (bilinear)", false)
fn image_extend_modes_nearest_neighbor(impls::image_extend_modes(ImageQuality::Low), "image_extend_modes (nearest neighbor)", false)
fn luminance_mask(luminance_mask)
Expand Down Expand Up @@ -157,6 +158,38 @@ mod impls {
}
}

fn sample_bicubic_image_data() -> ImageData {
let mut blob: Vec<u8> = Vec::with_capacity(16 * 16 * 4);
for y in 0..16 {
for x in 0..16 {
let is_checker = ((x / 2) + (y / 2)) % 2 == 0;
let mut color = if is_checker {
palette::css::BLACK
} else {
palette::css::WHITE
};
if x == 8 || y == 8 {
color = palette::css::RED;
}
if x == y || x + y == 15 {
color = palette::css::BLUE;
}
if (x == 2 && y == 13) || (x == 13 && y == 2) {
color = palette::css::LIME;
}
blob.extend(color.to_rgba8().to_u8_array());
}
}
let data = Blob::new(Arc::new(blob));
ImageData {
data,
format: ImageFormat::Rgba8,
width: 16,
height: 16,
alpha_type: ImageAlphaType::Alpha,
}
}

pub(super) fn emoji(scene: &mut Scene, params: &mut SceneParams<'_>) {
let text_size = 120. + 20. * (params.time * 2.).sin() as f32;
let s = "🎉🤠✅";
Expand Down Expand Up @@ -1969,6 +2002,35 @@ mod impls {
}
}

pub(super) fn image_sampling_bicubic() -> impl FnMut(&mut Scene, &mut SceneParams<'_>) {
let image = sample_bicubic_image_data();
let image_low = ImageBrush::new(image.clone()).with_quality(ImageQuality::Low);
let image_medium = ImageBrush::new(image.clone()).with_quality(ImageQuality::Medium);
let image_high = ImageBrush::new(image).with_quality(ImageQuality::High);

move |scene, params| {
params.resolution = Some(Vec2::new(1400., 900.));
params.base_color = Some(palette::css::WHITE);

let transforms = [
Affine::translate((-8.0, -8.0))
.then_rotate(PI / 5.0)
.then_scale_non_uniform(18.0, 14.0)
.then_translate((250.0, 270.0).into()),
Affine::translate((250.0, 670.0))
* Affine::scale_non_uniform(20.0, 10.0)
* Affine::skew(0.35, -0.15)
* Affine::translate((-8.0, -8.0)),
];

for transform in transforms {
scene.draw_image(&image_low, transform);
scene.draw_image(&image_medium, transform.then_translate((420.0, 0.0).into()));
scene.draw_image(&image_high, transform.then_translate((840.0, 0.0).into()));
}
}
}

pub(super) fn image_extend_modes(
quality: ImageQuality,
) -> impl FnMut(&mut Scene, &mut SceneParams<'_>) {
Expand Down
115 changes: 114 additions & 1 deletion vello_shaders/shader/fine.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,106 @@ fn extend_mode(t: f32, mode: u32, max: f32) -> f32 {
}
}

// Cubic resampler logic borrowed from Skia (same as CPU cubic_resampler function)
// Mitchell-Netravali cubic filter coefficients with parameters B=1/3 and C=1/3
const MF: array<vec4<f32>, 4> = array<vec4<f32>, 4>(
vec4<f32>(
(1.0 / 6.0) / 3.0,
-(3.0 / 6.0) / 3.0 - 1.0 / 3.0,
(3.0 / 6.0) / 3.0 + 2.0 * 1.0 / 3.0,
-(1.0 / 6.0) / 3.0 - 1.0 / 3.0
),
vec4<f32>(
1.0 - (2.0 / 6.0) / 3.0,
0.0,
-3.0 + (12.0 / 6.0) / 3.0 + 1.0 / 3.0,
2.0 - (9.0 / 6.0) / 3.0 - 1.0 / 3.0
),
vec4<f32>(
(1.0 / 6.0) / 3.0,
(3.0 / 6.0) / 3.0 + 1.0 / 3.0,
3.0 - (15.0 / 6.0) / 3.0 - 2.0 * 1.0 / 3.0,
-2.0 + (9.0 / 6.0) / 3.0 + 1.0 / 3.0
),
vec4<f32>(
0.0,
0.0,
-1.0 / 3.0,
(1.0 / 6.0) / 3.0 + 1.0 / 3.0
)
);

// Calculate the weights for a single fractional value (same as CPU weights function)
fn cubic_weights(fract: f32) -> vec4<f32> {
return vec4<f32>(
single_weight(fract, MF[0][0], MF[0][1], MF[0][2], MF[0][3]),
single_weight(fract, MF[1][0], MF[1][1], MF[1][2], MF[1][3]),
single_weight(fract, MF[2][0], MF[2][1], MF[2][2], MF[2][3]),
single_weight(fract, MF[3][0], MF[3][1], MF[3][2], MF[3][3])
);
}

// Calculate a weight based on the fractional value t and the cubic coefficients
// This matches the CPU implementation exactly
fn single_weight(t: f32, a: f32, b: f32, c: f32, d: f32) -> f32 {
return t * (t * (t * d + c) + b) + a;
}

// Bicubic filtering using Mitchell filter with B=1/3, C=1/3
//
// Cubic resampling consists of sampling the 16 surrounding pixels of the target point and
// interpolating them with a cubic filter. The generated matrix is 4x4 and represent the coefficients
// of the cubic function used to calculate weights based on the `x_fract` and `y_fract` of the
// location we are looking at.
//
// This is adapted from the sparse-strips shader for the main Vello image path:
// - the atlas is a single `texture_2d`, not a texture array
// - each tap is premultiplied before filtering, matching the existing bilinear path
fn bicubic_sample(
coords: vec2<f32>,
atlas_offset: vec2<f32>,
atlas_max: vec2<f32>,
alpha_type: u32,
) -> vec4<f32> {
let frac_coords = fract(coords + vec2(0.5));
// Get cubic weights for x and y directions
let cx = cubic_weights(frac_coords.x);
let cy = cubic_weights(frac_coords.y);

// Sample 4x4 grid around coords
let s00 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-1.5, -1.5), atlas_offset, atlas_max)), 0), alpha_type);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Outside the scope of this PR, but looking at this, I think I would handle "maybe premul" a bit differently. The atlas would always be premultiplied, so the upload of the image into the atlas would do premultiplication when needed. That would get rid of the branch here, which is especially significant for bicubic because there are so many of them. That said, I see the complication – uploads are currently done with queue.write_texture, which doesn't have any mechanism to perform the premultiplication, so there's an additional pass that would be required in the separate alpha case. I'm thinking about ways to reduce the additional memory traffic, and what comes to mind is uploading the data from the CPU into a staging texture (or buffer; I'm not sure it matters much), then using either a compute shader or a draw call to read from the staging texture and write the premultiplied pixels to the atlas.

More important than the branch, storing the atlas as always-premultiplied opens the door to using textureSample rather than textureLoad, as is being explored in #1547 (and likely other related work). That's a clear win for bilinear filtering, but less so for bicubic as it doesn't work with the negative weights. There are various tricks (Bicubic Filtering in Fewer Taps is one), but not clear it's worth the lift in the bicubic case.

I should probably capture this in an issue, especially because similar considerations apply across hybrid and classic.

let s10 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-0.5, -1.5), atlas_offset, atlas_max)), 0), alpha_type);
let s20 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(0.5, -1.5), atlas_offset, atlas_max)), 0), alpha_type);
let s30 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(1.5, -1.5), atlas_offset, atlas_max)), 0), alpha_type);

let s01 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-1.5, -0.5), atlas_offset, atlas_max)), 0), alpha_type);
let s11 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-0.5, -0.5), atlas_offset, atlas_max)), 0), alpha_type);
let s21 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(0.5, -0.5), atlas_offset, atlas_max)), 0), alpha_type);
let s31 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(1.5, -0.5), atlas_offset, atlas_max)), 0), alpha_type);

let s02 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-1.5, 0.5), atlas_offset, atlas_max)), 0), alpha_type);
let s12 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-0.5, 0.5), atlas_offset, atlas_max)), 0), alpha_type);
let s22 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(0.5, 0.5), atlas_offset, atlas_max)), 0), alpha_type);
let s32 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(1.5, 0.5), atlas_offset, atlas_max)), 0), alpha_type);

let s03 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-1.5, 1.5), atlas_offset, atlas_max)), 0), alpha_type);
let s13 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(-0.5, 1.5), atlas_offset, atlas_max)), 0), alpha_type);
let s23 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(0.5, 1.5), atlas_offset, atlas_max)), 0), alpha_type);
let s33 = maybe_premul_alpha(textureLoad(image_atlas, vec2<i32>(clamp(coords + vec2(1.5, 1.5), atlas_offset, atlas_max)), 0), alpha_type);

// Interpolate in x direction for each row
let row0 = cx.x * s00 + cx.y * s10 + cx.z * s20 + cx.w * s30;
let row1 = cx.x * s01 + cx.y * s11 + cx.z * s21 + cx.w * s31;
let row2 = cx.x * s02 + cx.y * s12 + cx.z * s22 + cx.w * s32;
let row3 = cx.x * s03 + cx.y * s13 + cx.z * s23 + cx.w * s33;
// Interpolate in y direction
let result = cy.x * row0 + cy.y * row1 + cy.z * row2 + cy.w * row3;

// Clamp alpha first, then clamp premultiplied color channels against it.
let a = clamp(result.a, 0.0, 1.0);
return vec4<f32>(clamp(result.rgb, vec3(0.0), vec3(a)), a);
}

const PIXELS_PER_THREAD = 4u;

#ifndef msaa
Expand Down Expand Up @@ -1235,7 +1335,6 @@ fn main(
}
}
case IMAGE_QUALITY_MEDIUM, default: {
// We don't have an implementation for `IMAGE_QUALITY_HIGH` yet, just use the same as medium
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
// We only need to load from the textures if the value will be used.
if area[i] != 0.0 {
Expand All @@ -1261,6 +1360,20 @@ fn main(
}
}
}
case IMAGE_QUALITY_HIGH: {
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
if area[i] != 0.0 {
let my_xy = vec2(xy.x + f32(i), xy.y);
var atlas_uv = image.matrx.xy * my_xy.x + image.matrx.zw * my_xy.y + image.xlat;
atlas_uv.x = extend_mode(atlas_uv.x, image.x_extend_mode, image.extents.x);
atlas_uv.y = extend_mode(atlas_uv.y, image.y_extend_mode, image.extents.y);
atlas_uv = atlas_uv + image.atlas_offset;
let fg_rgba = bicubic_sample(atlas_uv, image.atlas_offset, atlas_max, image.alpha_type);
let fg_i = pixel_format(fg_rgba * area[i] * image.alpha, image.format);
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
}
}
}
}
cmd_ix += 2u;
}
Expand Down
3 changes: 3 additions & 0 deletions vello_tests/snapshots/image_sampling_bicubic.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions vello_tests/tests/snapshot_test_scenes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ fn snapshot_image_sampling() {
snapshot_test_scene(test_scene, params);
}

#[test]
#[cfg_attr(skip_gpu_tests, ignore)]
fn snapshot_image_sampling_bicubic() {
let test_scene = test_scenes::image_sampling_bicubic();
let params = TestParams::new("image_sampling_bicubic", 520, 336);
snapshot_test_scene(test_scene, params);
}

#[test]
#[cfg_attr(skip_gpu_tests, ignore)]
fn snapshot_image_extend_modes_bilinear() {
Expand Down
Loading