Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sparse_strips/vello_common/src/clip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ pub fn intersect(
///
/// This is all that this method does. It just looks more complicated as the logic for iterating
/// in lock step is a bit tricky.
#[inline(always)]
fn intersect_impl<S: Simd>(
simd: S,
path_1: PathDataRef<'_>,
Expand Down
10 changes: 10 additions & 0 deletions sparse_strips/vello_common/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ pub trait FromF32Color: Sized + Debug + Copy + Clone {
impl FromF32Color for f32 {
const ZERO: Self = 0.0;

#[inline(always)]
fn from_f32<S: Simd>(color: f32x4<S>) -> [Self; 4] {
color.into()
}
Expand All @@ -993,6 +994,7 @@ impl FromF32Color for f32 {
impl FromF32Color for u8 {
const ZERO: Self = 0;

#[inline(always)]
fn from_f32<S: Simd>(mut color: f32x4<S>) -> [Self; 4] {
let simd = color.simd;
color = color.mul_add(f32x4::splat(simd, 255.0), f32x4::splat(simd, 0.5));
Expand All @@ -1016,6 +1018,14 @@ pub struct GradientLut<T: FromF32Color> {
impl<T: FromF32Color> GradientLut<T> {
/// Create a new lookup table.
fn new<S: Simd>(simd: S, ranges: &[GradientRange]) -> Self {
simd.vectorize(
#[inline(always)]
|| Self::new_inner(simd, ranges),
)
}

#[inline(always)]
fn new_inner<S: Simd>(simd: S, ranges: &[GradientRange]) -> Self {
let lut_size = determine_lut_size(ranges);
let mut lut = vec![[T::ZERO; 4]; lut_size];

Expand Down
1 change: 1 addition & 0 deletions sparse_strips/vello_common/src/rect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub fn render(level: Level, rect: Rect, strip_buf: &mut Vec<Strip>, alpha_buf: &
///
/// The x-alpha masks for the left/right edge tiles are y-independent, so they
/// are precomputed once and reused across all interior rows.
#[inline(always)]
fn render_impl<S: Simd>(s: S, rect: Rect, strip_buf: &mut Vec<Strip>, alpha_buf: &mut Vec<u8>) {
if rect.is_zero_area() {
return;
Expand Down
1 change: 1 addition & 0 deletions sparse_strips/vello_common/src/tile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ impl Tiles {
))
}

#[inline(always)]
fn make_tiles_analytic_aa_impl<S: Simd>(
&mut self,
s: S,
Expand Down
27 changes: 16 additions & 11 deletions sparse_strips/vello_cpu/src/fine/common/gradient/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,22 @@ pub(crate) struct GradientPainter<'a, S: Simd> {

impl<'a, S: Simd> GradientPainter<'a, S> {
pub(crate) fn new(simd: S, gradient: &'a EncodedGradient, t_vals: &'a [f32]) -> Self {
let lut = gradient.f32_lut(simd);
let scale_factor: f32x8<S> = f32x8::splat(simd, lut.scale_factor());

Self {
gradient,
scale_factor,
lut,
t_vals: t_vals.chunks_exact(8),
has_undefined: gradient.has_undefined,
simd,
}
simd.vectorize(
#[inline(always)]
|| {
let lut = gradient.f32_lut(simd);
let scale_factor: f32x8<S> = f32x8::splat(simd, lut.scale_factor());

Self {
gradient,
scale_factor,
lut,
t_vals: t_vals.chunks_exact(8),
has_undefined: gradient.has_undefined,
simd,
}
},
)
}
}

Expand Down
43 changes: 24 additions & 19 deletions sparse_strips/vello_cpu/src/fine/common/gradient/radial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,31 @@ pub(crate) struct SimdRadialKind<S: Simd> {

impl<S: Simd> SimdRadialKind<S> {
pub(crate) fn new(simd: S, kind: &RadialKind) -> Self {
let inner = match kind {
RadialKind::Radial { bias, scale } => SimdRadialKindInner::Radial {
bias: f32x8::splat(simd, *bias),
scale: f32x8::splat(simd, *scale),
},
RadialKind::Strip { scaled_r0_squared } => SimdRadialKindInner::Strip {
scaled_r0_squared: f32x8::splat(simd, *scaled_r0_squared),
},
RadialKind::Focal {
focal_data,
fp0,
fp1,
} => SimdRadialKindInner::Focal {
fp0: f32x8::splat(simd, *fp0),
fp1: f32x8::splat(simd, *fp1),
focal_data: *focal_data,
},
};
simd.vectorize(
#[inline(always)]
|| {
let inner = match kind {
RadialKind::Radial { bias, scale } => SimdRadialKindInner::Radial {
bias: f32x8::splat(simd, *bias),
scale: f32x8::splat(simd, *scale),
},
RadialKind::Strip { scaled_r0_squared } => SimdRadialKindInner::Strip {
scaled_r0_squared: f32x8::splat(simd, *scaled_r0_squared),
},
RadialKind::Focal {
focal_data,
fp0,
fp1,
} => SimdRadialKindInner::Focal {
fp0: f32x8::splat(simd, *fp0),
fp1: f32x8::splat(simd, *fp1),
focal_data: *focal_data,
},
};

Self { inner }
Self { inner }
},
)
}
}

Expand Down
13 changes: 8 additions & 5 deletions sparse_strips/vello_cpu/src/fine/common/gradient/sweep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@ pub(crate) struct SimdSweepKind<S: Simd> {

impl<S: Simd> SimdSweepKind<S> {
pub(crate) fn new(simd: S, kind: &SweepKind) -> Self {
Self {
start_angle: f32x8::splat(simd, kind.start_angle),
inv_angle_delta: f32x8::splat(simd, kind.inv_angle_delta),
simd,
}
simd.vectorize(
#[inline(always)]
|| Self {
start_angle: f32x8::splat(simd, kind.start_angle),
inv_angle_delta: f32x8::splat(simd, kind.inv_angle_delta),
simd,
},
)
}
}

Expand Down
116 changes: 64 additions & 52 deletions sparse_strips/vello_cpu/src/fine/common/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,38 @@ impl<'a, S: Simd> PlainNNImagePainter<'a, S> {
) -> Self {
let data = ImagePainterData::new(simd, image, pixmap, start_x, start_y);

let y_positions = extend(
simd,
f32x4::splat_pos(
simd,
data.cur_pos.y as f32,
data.x_advances.1,
data.y_advances.1,
),
image.sampler.y_extend,
data.height,
data.height_inv,
);

let cur_x_pos = f32x4::splat_pos(
simd,
data.cur_pos.x as f32,
data.x_advances.0,
data.y_advances.0,
);

Self {
data,
advance: image.x_advance.x as f32,
y_positions,
cur_x_pos,
simd,
}
simd.vectorize(
#[inline(always)]
|| {
let y_positions = extend(
simd,
f32x4::splat_pos(
simd,
data.cur_pos.y as f32,
data.x_advances.1,
data.y_advances.1,
),
image.sampler.y_extend,
data.height,
data.height_inv,
);

let cur_x_pos = f32x4::splat_pos(
simd,
data.cur_pos.x as f32,
data.x_advances.0,
data.y_advances.0,
);

Self {
data,
advance: image.x_advance.x as f32,
y_positions,
cur_x_pos,
simd,
}
},
)
}
}

Expand Down Expand Up @@ -106,6 +111,7 @@ impl<'a, S: Simd> NNImagePainter<'a, S> {
impl<S: Simd> Iterator for NNImagePainter<'_, S> {
type Item = u8x16<S>;

#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
let x_positions = extend(
self.simd,
Expand Down Expand Up @@ -175,6 +181,7 @@ impl<'a, S: Simd, const QUALITY: u8> FilteredImagePainter<'a, S, QUALITY> {
impl<S: Simd, const QUALITY: u8> Iterator for FilteredImagePainter<'_, S, QUALITY> {
type Item = f32x16<S>;

#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
let x_positions = f32x4::splat_pos(
self.simd,
Expand Down Expand Up @@ -366,31 +373,36 @@ impl<'a, S: Simd> ImagePainterData<'a, S> {
start_x: f64,
start_y: f64,
) -> Self {
let width = pixmap.width() as f32;
let height = pixmap.height() as f32;
let start_pos = image.transform * Point::new(start_x, start_y);

let width_inv = f32x4::splat(simd, 1.0 / width);
let height_inv = f32x4::splat(simd, 1.0 / height);
let width = f32x4::splat(simd, width);
let width_u32 = u32x4::splat(simd, pixmap.width() as u32);
let height = f32x4::splat(simd, height);

let x_advances = (image.x_advance.x as f32, image.x_advance.y as f32);
let y_advances = (image.y_advance.x as f32, image.y_advance.y as f32);

Self {
cur_pos: start_pos,
pixmap,
x_advances,
y_advances,
image,
width,
height,
width_u32,
width_inv,
height_inv,
}
simd.vectorize(
#[inline(always)]
|| {
let width = pixmap.width() as f32;
let height = pixmap.height() as f32;
let start_pos = image.transform * Point::new(start_x, start_y);

let width_inv = f32x4::splat(simd, 1.0 / width);
let height_inv = f32x4::splat(simd, 1.0 / height);
let width = f32x4::splat(simd, width);
let width_u32 = u32x4::splat(simd, pixmap.width() as u32);
let height = f32x4::splat(simd, height);

let x_advances = (image.x_advance.x as f32, image.x_advance.y as f32);
let y_advances = (image.y_advance.x as f32, image.y_advance.y as f32);

Self {
cur_pos: start_pos,
pixmap,
x_advances,
y_advances,
image,
width,
height,
width_u32,
width_inv,
height_inv,
}
},
)
}
}

Expand Down
Loading
Loading