diff --git a/sparse_strips/vello_common/src/filter/color_matrix.rs b/sparse_strips/vello_common/src/filter/color_matrix.rs
new file mode 100644
index 0000000000..1b801c2f5e
--- /dev/null
+++ b/sparse_strips/vello_common/src/filter/color_matrix.rs
@@ -0,0 +1,65 @@
+// Copyright 2026 the Vello Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! The color matrix filter.
+
+/// Matrix-based color transformation filter.
+///
+/// The matrix is stored as four rows of five values. Each row computes one output
+/// channel (`R`, `G`, `B`, `A`) from the four input channels plus a constant offset.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct ColorMatrix {
+    /// The 4x5 color transformation matrix in row-major order.
+    pub matrix: [f32; 20],
+}
+
+impl ColorMatrix {
+    /// Create a new color matrix filter.
+    pub fn new(matrix: [f32; 20]) -> Self {
+        Self { matrix }
+    }
+
+    /// Return true if this matrix can be applied directly to premultiplied colors.
+    ///
+    /// A premultiplied-compatible matrix preserves alpha, does not read alpha
+    /// from the RGB rows, and has no RGB offsets. For this subset, renderers can
+    /// apply the RGB rows directly to premultiplied RGB and clamp the result to
+    /// the unchanged alpha channel.
+    pub fn is_premul_compatible(&self) -> bool {
+        self.matrix[3] == 0.0
+            && self.matrix[4] == 0.0
+            && self.matrix[8] == 0.0
+            && self.matrix[9] == 0.0
+            && self.matrix[13] == 0.0
+            && self.matrix[14] == 0.0
+            && self.matrix[15] == 0.0
+            && self.matrix[16] == 0.0
+            && self.matrix[17] == 0.0
+            && self.matrix[18] == 1.0
+            && self.matrix[19] == 0.0
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::filter_effects::matrices;
+
+    #[test]
+    fn premul_compatible_matrices_are_rgb_only_and_alpha_preserving() {
+        assert!(ColorMatrix::new(matrices::GRAYSCALE).is_premul_compatible());
+        assert!(ColorMatrix::new(matrices::SEPIA).is_premul_compatible());
+        assert!(!ColorMatrix::new(matrices::ALPHA_TO_BLACK).is_premul_compatible());
+    }
+
+    #[test]
+    fn premul_compatible_matrix_rejects_rgb_offsets_and_alpha_changes() {
+        let mut offset_matrix = matrices::IDENTITY;
+        offset_matrix[4] = 0.25;
+        assert!(!ColorMatrix::new(offset_matrix).is_premul_compatible());
+
+        let mut opacity_matrix = matrices::IDENTITY;
+        opacity_matrix[18] = 0.5;
+        assert!(!ColorMatrix::new(opacity_matrix).is_premul_compatible());
+    }
+}
diff --git a/sparse_strips/vello_common/src/filter/mod.rs b/sparse_strips/vello_common/src/filter/mod.rs
index 4da635d61a..f8a3fbb101 100644
--- a/sparse_strips/vello_common/src/filter/mod.rs
+++ b/sparse_strips/vello_common/src/filter/mod.rs
@@ -7,6 +7,7 @@
 //! represent a special representation of each filter to be used as the basis for rendering in
 //! `vello_hybrid` and `vello_cpu`.
 
+use crate::filter::color_matrix::ColorMatrix;
 use crate::filter::drop_shadow::{DropShadow, transform_shadow_params};
 use crate::filter::flood::Flood;
 use crate::filter::gaussian_blur::{GaussianBlur, transform_blur_params};
@@ -14,6 +15,7 @@ use crate::filter::offset::Offset;
 use crate::filter_effects::{Filter, FilterPrimitive};
 use crate::kurbo::{Affine, Vec2};
 
+pub mod color_matrix;
 pub mod drop_shadow;
 pub mod flood;
 pub mod gaussian_blur;
@@ -30,6 +32,8 @@ pub enum PreparedFilter {
     Offset(Offset),
     /// A drop shadow filter.
     DropShadow(DropShadow),
+    /// A color matrix filter.
+    ColorMatrix(ColorMatrix),
 }
 
 impl PreparedFilter {
@@ -73,8 +77,9 @@ impl PreparedFilter {
 
                 Self::Offset(offset)
             }
+            FilterPrimitive::ColorMatrix { matrix } => Self::ColorMatrix(ColorMatrix::new(*matrix)),
             _ => {
-                // Other primitives like Blend, ColorMatrix, ComponentTransfer, etc.
+                // Other primitives like Blend, ComponentTransfer, etc.
                 // are not yet implemented
                 unimplemented!("Other filter primitives not yet implemented");
             }
@@ -92,3 +97,23 @@ fn transform_offset_params(dx: f32, dy: f32, transform: &Affine) -> (f32, f32) {
     let transformed_offset = Vec2::new(a * offset.x + c * offset.y, b * offset.x + d * offset.y);
     (transformed_offset.x as f32, transformed_offset.y as f32)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::filter_effects::matrices;
+
+    #[test]
+    fn prepares_color_matrix_filter() {
+        let filter = Filter::from_primitive(FilterPrimitive::ColorMatrix {
+            matrix: matrices::SEPIA,
+        });
+        let prepared = PreparedFilter::new(&filter, &Affine::IDENTITY);
+
+        let PreparedFilter::ColorMatrix(color_matrix) = prepared else {
+            panic!("expected color matrix filter");
+        };
+
+        assert_eq!(color_matrix.matrix, matrices::SEPIA);
+    }
+}
diff --git a/sparse_strips/vello_common/src/filter_effects.rs b/sparse_strips/vello_common/src/filter_effects.rs
index 40d7f82b9a..0e5efc7eb5 100644
--- a/sparse_strips/vello_common/src/filter_effects.rs
+++ b/sparse_strips/vello_common/src/filter_effects.rs
@@ -20,6 +20,7 @@
 //! - `Flood` - Solid color fill
 //! - `GaussianBlur` - Gaussian blur filter
 //! - `DropShadow` - Drop shadow effect (compound primitive)
+//! - `ColorMatrix` - Matrix-based color transformation
 //! - `Offset` - Translation/shift (single primitive)
 //!
 //! **Note:** Currently only single primitive filters are supported. Filter graphs with
@@ -36,7 +37,6 @@
 //!   `Opacity`, `Saturate`, `Sepia`
 //!
 //! **Filter Primitives:**
-//! - `ColorMatrix` - Matrix-based color transformation
 //! - `Composite` - Porter-Duff compositing operations
 //! - `Blend` - Blend mode operations
 //! - `Morphology` - Dilate/erode operations
@@ -400,11 +400,6 @@ pub enum FilterPrimitive {
         /// Default is `EdgeMode::None` per SVG spec.
         edge_mode: EdgeMode,
     },
-    //
-    // ============================================================
-    // TODO: The following filter primitives are not yet implemented
-    // ============================================================
-    //
     /// Matrix-based color transformation.
     ///
     /// Applies a 4x5 matrix transformation to colors, allowing arbitrary
@@ -425,6 +420,11 @@ pub enum FilterPrimitive {
         dy: f32,
     },
 
+    //
+    // ============================================================
+    // TODO: The following filter primitives are not yet implemented
+    // ============================================================
+    //
     /// Composite two inputs using Porter-Duff compositing operations.
     ///
     /// Combines two input images using standard compositing operators
diff --git a/sparse_strips/vello_cpu/src/filter/color_matrix.rs b/sparse_strips/vello_cpu/src/filter/color_matrix.rs
new file mode 100644
index 0000000000..dc1b48c60f
--- /dev/null
+++ b/sparse_strips/vello_cpu/src/filter/color_matrix.rs
@@ -0,0 +1,303 @@
+// Copyright 2026 the Vello Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! Color matrix filter implementation.
+
+use super::FilterEffect;
+use super::pixel::{
+    clamp_unit, norm_to_u8, premul_rgba8_to_straight_f32, straight_f32_to_premul_rgba8, u8_to_norm,
+};
+use crate::layer_manager::LayerManager;
+use vello_common::filter::color_matrix::ColorMatrix;
+use vello_common::filter_effects::matrices;
+use vello_common::peniko::color::PremulRgba8;
+use vello_common::pixmap::Pixmap;
+
+impl FilterEffect for ColorMatrix {
+    fn execute_lowp(&self, pixmap: &mut Pixmap, _layer_manager: &mut LayerManager) {
+        apply_color_matrix(pixmap, self);
+    }
+
+    fn execute_highp(&self, pixmap: &mut Pixmap, _layer_manager: &mut LayerManager) {
+        apply_color_matrix(pixmap, self);
+    }
+}
+
+fn apply_color_matrix(pixmap: &mut Pixmap, filter: &ColorMatrix) {
+    let matrix = &filter.matrix;
+    if matrix == &matrices::IDENTITY {
+        return;
+    }
+
+    let mut may_have_transparency = false;
+    if filter.is_premul_compatible() {
+        // For RGB-only, alpha-preserving matrices, applying the RGB matrix to
+        // premultiplied channels is equivalent to unpremultiply -> matrix ->
+        // premultiply. This avoids the per-pixel alpha division.
+        for pixel in pixmap.data_mut() {
+            let transformed = apply_premul_color_matrix_to_pixel(*pixel, matrix);
+            may_have_transparency |= transformed.a != 255;
+            *pixel = transformed;
+        }
+    } else {
+        for pixel in pixmap.data_mut() {
+            let transformed = apply_color_matrix_to_pixel(*pixel, matrix);
+            may_have_transparency |= transformed.a != 255;
+            *pixel = transformed;
+        }
+    }
+
+    pixmap.set_may_have_transparency(may_have_transparency);
+}
+
+fn apply_color_matrix_to_pixel(pixel: PremulRgba8, matrix: &[f32; 20]) -> PremulRgba8 {
+    let [r, g, b, a] = premul_rgba8_to_straight_f32(pixel);
+
+    let out_r = apply_row(matrix, 0, r, g, b, a);
+    let out_g = apply_row(matrix, 5, r, g, b, a);
+    let out_b = apply_row(matrix, 10, r, g, b, a);
+    let out_a = apply_row(matrix, 15, r, g, b, a);
+
+    straight_f32_to_premul_rgba8(out_r, out_g, out_b, out_a)
+}
+
+fn apply_premul_color_matrix_to_pixel(pixel: PremulRgba8, matrix: &[f32; 20]) -> PremulRgba8 {
+    let r = u8_to_norm(pixel.r);
+    let g = u8_to_norm(pixel.g);
+    let b = u8_to_norm(pixel.b);
+    let a = u8_to_norm(pixel.a);
+
+    let out_r = matrix[0] * r + matrix[1] * g + matrix[2] * b;
+    let out_g = matrix[5] * r + matrix[6] * g + matrix[7] * b;
+    let out_b = matrix[10] * r + matrix[11] * g + matrix[12] * b;
+
+    PremulRgba8 {
+        // Straight-alpha clamping before re-premultiplication becomes
+        // clamping to [0, alpha] in premultiplied space.
+        r: norm_to_u8(out_r.clamp(0.0, a)),
+        g: norm_to_u8(out_g.clamp(0.0, a)),
+        b: norm_to_u8(out_b.clamp(0.0, a)),
+        a: pixel.a,
+    }
+}
+
+fn apply_row(matrix: &[f32; 20], offset: usize, r: f32, g: f32, b: f32, a: f32) -> f32 {
+    clamp_unit(
+        matrix[offset] * r
+            + matrix[offset + 1] * g
+            + matrix[offset + 2] * b
+            + matrix[offset + 3] * a
+            + matrix[offset + 4],
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn identity_preserves_premultiplied_pixel() {
+        let pixel = PremulRgba8 {
+            r: 128,
+            g: 64,
+            b: 32,
+            a: 128,
+        };
+
+        assert_eq!(
+            apply_color_matrix_to_pixel(pixel, &matrices::IDENTITY),
+            pixel
+        );
+    }
+
+    #[test]
+    fn identity_matrix_leaves_pixmap_unchanged() {
+        let mut pixmap = Pixmap::from_parts_with_opacity(
+            alloc::vec![
+                PremulRgba8 {
+                    r: 255,
+                    g: 0,
+                    b: 0,
+                    a: 255,
+                },
+                PremulRgba8 {
+                    r: 128,
+                    g: 64,
+                    b: 32,
+                    a: 128,
+                },
+            ],
+            2,
+            1,
+            true,
+        );
+        let before = [pixmap.sample(0, 0), pixmap.sample(1, 0)];
+
+        apply_color_matrix(&mut pixmap, &ColorMatrix::new(matrices::IDENTITY));
+
+        assert_eq!([pixmap.sample(0, 0), pixmap.sample(1, 0)], before);
+        assert!(pixmap.may_have_transparency());
+    }
+
+    #[test]
+    fn grayscale_uses_unpremultiplied_color_channels() {
+        let pixel = PremulRgba8 {
+            r: 128,
+            g: 0,
+            b: 0,
+            a: 128,
+        };
+
+        assert_eq!(
+            apply_color_matrix_to_pixel(pixel, &matrices::GRAYSCALE),
+            PremulRgba8 {
+                r: 27,
+                g: 27,
+                b: 27,
+                a: 128,
+            }
+        );
+    }
+
+    #[test]
+    fn premul_compatible_matrices_skip_straight_alpha_conversion() {
+        assert!(ColorMatrix::new(matrices::GRAYSCALE).is_premul_compatible());
+        assert!(ColorMatrix::new(matrices::SEPIA).is_premul_compatible());
+        assert!(!ColorMatrix::new(matrices::ALPHA_TO_BLACK).is_premul_compatible());
+    }
+
+    #[test]
+    fn premul_compatible_path_matches_straight_alpha_path() {
+        let pixel = PremulRgba8 {
+            r: 80,
+            g: 32,
+            b: 16,
+            a: 128,
+        };
+
+        let premul = apply_premul_color_matrix_to_pixel(pixel, &matrices::SEPIA);
+        let straight = apply_color_matrix_to_pixel(pixel, &matrices::SEPIA);
+
+        assert_eq!(premul, straight);
+    }
+
+    #[test]
+    fn premul_compatible_path_clamps_rgb_to_alpha() {
+        let pixel = PremulRgba8 {
+            r: 128,
+            g: 0,
+            b: 0,
+            a: 128,
+        };
+        let matrix = [
+            2.0, 0.0, 0.0, 0.0, 0.0, //
+            0.0, 1.0, 0.0, 0.0, 0.0, //
+            0.0, 0.0, 1.0, 0.0, 0.0, //
+            0.0, 0.0, 0.0, 1.0, 0.0,
+        ];
+
+        let premul = apply_premul_color_matrix_to_pixel(pixel, &matrix);
+        let straight = apply_color_matrix_to_pixel(pixel, &matrix);
+
+        assert_eq!(premul, straight);
+        assert_eq!(
+            premul,
+            PremulRgba8 {
+                r: 128,
+                g: 0,
+                b: 0,
+                a: 128,
+            }
+        );
+    }
+
+    #[test]
+    fn matrix_offsets_can_create_color_from_transparent_black() {
+        let matrix = [
+            0.0, 0.0, 0.0, 0.0, 0.5, //
+            0.0, 0.0, 0.0, 0.0, 0.25, //
+            0.0, 0.0, 0.0, 0.0, 0.0, //
+            0.0, 0.0, 0.0, 0.0, 0.5,
+        ];
+
+        assert_eq!(
+            apply_color_matrix_to_pixel(PremulRgba8::from_u32(0), &matrix),
+            PremulRgba8 {
+                r: 64,
+                g: 32,
+                b: 0,
+                a: 128,
+            }
+        );
+    }
+
+    #[test]
+    fn execution_updates_pixmap_transparency_flag() {
+        let mut pixmap = Pixmap::from_parts_with_opacity(
+            alloc::vec![PremulRgba8 {
+                r: 255,
+                g: 0,
+                b: 0,
+                a: 255,
+            }],
+            1,
+            1,
+            false,
+        );
+        let mut layer_manager = LayerManager::new();
+        let matrix = [
+            1.0, 0.0, 0.0, 0.0, 0.0, //
+            0.0, 1.0, 0.0, 0.0, 0.0, //
+            0.0, 0.0, 1.0, 0.0, 0.0, //
+            0.0, 0.0, 0.0, 0.0, 0.5,
+        ];
+        let filter = ColorMatrix::new(matrix);
+
+        filter.execute_lowp(&mut pixmap, &mut layer_manager);
+
+        assert!(pixmap.may_have_transparency());
+        assert_eq!(
+            pixmap.sample(0, 0),
+            PremulRgba8 {
+                r: 128,
+                g: 0,
+                b: 0,
+                a: 128,
+            }
+        );
+    }
+
+    #[test]
+    fn execution_updates_transparency_flag_for_opaque_output() {
+        let mut pixmap = Pixmap::from_parts_with_opacity(
+            alloc::vec![PremulRgba8 {
+                r: 128,
+                g: 0,
+                b: 0,
+                a: 128,
+            }],
+            1,
+            1,
+            true,
+        );
+        let matrix = [
+            1.0, 0.0, 0.0, 0.0, 0.0, //
+            0.0, 1.0, 0.0, 0.0, 0.0, //
+            0.0, 0.0, 1.0, 0.0, 0.0, //
+            0.0, 0.0, 0.0, 0.0, 1.0,
+        ];
+
+        apply_color_matrix(&mut pixmap, &ColorMatrix::new(matrix));
+
+        assert!(!pixmap.may_have_transparency());
+        assert_eq!(
+            pixmap.sample(0, 0),
+            PremulRgba8 {
+                r: 255,
+                g: 0,
+                b: 0,
+                a: 255,
+            }
+        );
+    }
+}
diff --git a/sparse_strips/vello_cpu/src/filter/drop_shadow.rs b/sparse_strips/vello_cpu/src/filter/drop_shadow.rs
index 83f0cf3be6..734aaf0ffc 100644
--- a/sparse_strips/vello_cpu/src/filter/drop_shadow.rs
+++ b/sparse_strips/vello_cpu/src/filter/drop_shadow.rs
@@ -15,14 +15,15 @@
 
 use super::FilterEffect;
 use super::gaussian_blur::apply_blur;
+use super::pixel::{norm_to_u8, premultiply_u8, u8_to_norm};
 use super::shift::offset_pixels;
 use crate::layer_manager::LayerManager;
 use vello_common::color::{AlphaColor, Srgb};
 use vello_common::filter::drop_shadow::DropShadow;
 use vello_common::filter_effects::EdgeMode;
-use vello_common::peniko::color::PremulRgba8;
 #[cfg(not(feature = "std"))]
-use vello_common::peniko::kurbo::common::FloatFuncs as _;
+use vello_common::kurbo::common::FloatFuncs as _;
+use vello_common::peniko::color::PremulRgba8;
 use vello_common::pixmap::Pixmap;
 
 impl FilterEffect for DropShadow {
@@ -109,14 +110,10 @@ fn compose_shadow_direct(shadow: &Pixmap, dst: &mut Pixmap, color: AlphaColor<Sr
             let shadow_alpha = (u8_to_norm(alpha) * color.components[3]).min(1.0);
             let final_alpha = norm_to_u8(shadow_alpha);
 
-            // Premultiply RGB by alpha as required by PremulRgba8
-            let alpha_u16 = u16::from(final_alpha);
-            let premultiply = |channel: u8| ((u16::from(channel) * alpha_u16) / 255) as u8;
-
             let colored_shadow = PremulRgba8 {
-                r: premultiply(shadow_r),
-                g: premultiply(shadow_g),
-                b: premultiply(shadow_b),
+                r: premultiply_u8(shadow_r, final_alpha),
+                g: premultiply_u8(shadow_g, final_alpha),
+                b: premultiply_u8(shadow_b, final_alpha),
                 a: final_alpha,
             };
 
@@ -155,48 +152,11 @@ fn src_over_channel(src: u8, dst: u8, src_alpha: f32) -> u8 {
     norm_to_u8(result)
 }
 
-/// Convert a u8 color component (0-255) to normalized f32 (0.0-1.0).
-#[inline]
-fn u8_to_norm(value: u8) -> f32 {
-    value as f32 / 255.0
-}
-
-/// Convert a normalized f32 (0.0-1.0) to u8 color component (0-255).
-#[inline]
-fn norm_to_u8(value: f32) -> u8 {
-    (value * 255.0).round() as u8
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
     use vello_common::color::Srgb;
 
-    /// Test `u8_to_norm` conversion.
-    #[test]
-    fn test_u8_to_norm() {
-        assert_eq!(u8_to_norm(0), 0.0);
-        assert!((u8_to_norm(255) - 1.0).abs() < 1e-6);
-    }
-
-    /// Test `norm_to_u8` conversion.
-    #[test]
-    fn test_norm_to_u8() {
-        assert_eq!(norm_to_u8(0.0), 0);
-        assert_eq!(norm_to_u8(1.0), 255);
-        assert_eq!(norm_to_u8(0.5), 128); // 0.5 * 255 = 127.5 → 128
-    }
-
-    /// Test round-trip conversion u8 → norm → u8.
-    #[test]
-    fn test_conversion_roundtrip() {
-        for value in [0, 1, 50, 127, 128, 200, 254, 255] {
-            let normalized = u8_to_norm(value);
-            let back = norm_to_u8(normalized);
-            assert_eq!(back, value);
-        }
-    }
-
     /// Test Porter-Duff source-over with fully opaque source.
     #[test]
     fn test_compose_src_over_opaque_source() {
diff --git a/sparse_strips/vello_cpu/src/filter/mod.rs b/sparse_strips/vello_cpu/src/filter/mod.rs
index c0d56ccc13..542873abbb 100644
--- a/sparse_strips/vello_cpu/src/filter/mod.rs
+++ b/sparse_strips/vello_cpu/src/filter/mod.rs
@@ -8,10 +8,12 @@
 //! Filters are applied to layers through the layer manager, which handles
 //! intermediate storage.
 
+mod color_matrix;
 mod drop_shadow;
 mod flood;
 mod gaussian_blur;
 mod offset;
+mod pixel;
 mod shift;
 
 use crate::layer_manager::LayerManager;
@@ -78,6 +80,9 @@ pub(crate) fn filter_lowp(
         PreparedFilter::DropShadow(drop_shadow) => {
             drop_shadow.execute_lowp(pixmap, layer_manager);
         }
+        PreparedFilter::ColorMatrix(color_matrix) => {
+            color_matrix.execute_lowp(pixmap, layer_manager);
+        }
     }
 }
 
@@ -116,5 +121,8 @@ pub(crate) fn filter_highp(
         PreparedFilter::DropShadow(drop_shadow) => {
             drop_shadow.execute_highp(pixmap, layer_manager);
         }
+        PreparedFilter::ColorMatrix(color_matrix) => {
+            color_matrix.execute_highp(pixmap, layer_manager);
+        }
     }
 }
diff --git a/sparse_strips/vello_cpu/src/filter/pixel.rs b/sparse_strips/vello_cpu/src/filter/pixel.rs
new file mode 100644
index 0000000000..3f4213768a
--- /dev/null
+++ b/sparse_strips/vello_cpu/src/filter/pixel.rs
@@ -0,0 +1,116 @@
+// Copyright 2026 the Vello Authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! Pixel helpers for CPU filter effects.
+
+use vello_common::peniko::color::PremulRgba8;
+#[cfg(not(feature = "std"))]
+use vello_common::peniko::kurbo::common::FloatFuncs as _;
+
+const INV_255: f32 = 1.0 / 255.0;
+
+/// Convert a u8 color component to normalized f32.
+#[inline]
+pub(super) fn u8_to_norm(value: u8) -> f32 {
+    f32::from(value) * INV_255
+}
+
+/// Convert a normalized f32 color component to u8.
+#[inline]
+pub(super) fn norm_to_u8(value: f32) -> u8 {
+    (clamp_unit(value) * 255.0).round() as u8
+}
+
+/// Premultiply a u8 color component by a u8 alpha value.
+#[inline]
+pub(super) fn premultiply_u8(channel: u8, alpha: u8) -> u8 {
+    ((u16::from(channel) * u16::from(alpha)) / 255) as u8
+}
+
+/// Convert a premultiplied RGBA8 pixel to normalized straight-alpha components.
+#[inline]
+pub(super) fn premul_rgba8_to_straight_f32(pixel: PremulRgba8) -> [f32; 4] {
+    let a = u8_to_norm(pixel.a);
+
+    match pixel.a {
+        0 => [0.0, 0.0, 0.0, 0.0],
+        255 => [
+            u8_to_norm(pixel.r),
+            u8_to_norm(pixel.g),
+            u8_to_norm(pixel.b),
+            1.0,
+        ],
+        _ => {
+            let inv_alpha = 1.0 / a;
+            [
+                u8_to_norm(pixel.r) * inv_alpha,
+                u8_to_norm(pixel.g) * inv_alpha,
+                u8_to_norm(pixel.b) * inv_alpha,
+                a,
+            ]
+        }
+    }
+}
+
+/// Convert normalized straight-alpha components to a premultiplied RGBA8 pixel.
+#[inline]
+pub(super) fn straight_f32_to_premul_rgba8(r: f32, g: f32, b: f32, a: f32) -> PremulRgba8 {
+    let a = clamp_unit(a);
+
+    PremulRgba8 {
+        r: norm_to_u8(r * a),
+        g: norm_to_u8(g * a),
+        b: norm_to_u8(b * a),
+        a: norm_to_u8(a),
+    }
+}
+
+#[inline]
+pub(super) fn clamp_unit(value: f32) -> f32 {
+    value.clamp(0.0, 1.0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn u8_to_norm_converts_endpoints() {
+        assert_eq!(u8_to_norm(0), 0.0);
+        assert!((u8_to_norm(255) - 1.0).abs() < 1e-6);
+    }
+
+    #[test]
+    fn norm_to_u8_rounds_to_nearest() {
+        assert_eq!(norm_to_u8(0.0), 0);
+        assert_eq!(norm_to_u8(1.0), 255);
+        assert_eq!(norm_to_u8(0.5), 128);
+    }
+
+    #[test]
+    fn norm_to_u8_clamps() {
+        assert_eq!(norm_to_u8(-1.0), 0);
+        assert_eq!(norm_to_u8(2.0), 255);
+    }
+
+    #[test]
+    fn premul_to_straight_handles_transparent_black() {
+        assert_eq!(
+            premul_rgba8_to_straight_f32(PremulRgba8::from_u32(0)),
+            [0.0, 0.0, 0.0, 0.0]
+        );
+    }
+
+    #[test]
+    fn straight_to_premul_premultiplies_rgb() {
+        assert_eq!(
+            straight_f32_to_premul_rgba8(1.0, 0.5, 0.0, 0.5),
+            PremulRgba8 {
+                r: 128,
+                g: 64,
+                b: 0,
+                a: 128,
+            }
+        );
+    }
+}
diff --git a/sparse_strips/vello_hybrid/src/filter.rs b/sparse_strips/vello_hybrid/src/filter.rs
index cd3f71299b..f44d9e6fc0 100644
--- a/sparse_strips/vello_hybrid/src/filter.rs
+++ b/sparse_strips/vello_hybrid/src/filter.rs
@@ -32,6 +32,7 @@ use hashbrown::HashMap;
 use vello_common::coarse::{WideTile, WideTilesBbox};
 use vello_common::encode::{EncodedImage, EncodedPaint};
 use vello_common::filter::PreparedFilter;
+use vello_common::filter::color_matrix::ColorMatrix;
 use vello_common::filter::drop_shadow::DropShadow;
 use vello_common::filter::flood::Flood;
 use vello_common::filter::gaussian_blur::{DecimationSizer, GaussianBlur, MAX_KERNEL_SIZE};
@@ -61,9 +62,13 @@ const FILTER_ATLAS_PADDING: u16 = MAX_KERNEL_SIZE as u16 / 2;
 
 // Since we store in RGBA32 texture.
 const BYTES_PER_TEXEL: usize = 16;
-const FILTER_SIZE_BYTES: usize = 48;
+const FILTER_SIZE_BYTES: usize = 96;
 const FILTER_SIZE_U32: usize = FILTER_SIZE_BYTES / 4;
 
+// Header bit used by color matrix filters to select the shader path that works
+// directly in premultiplied-alpha space.
+const COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG: u32 = 1 << 13;
+
 const _: () = assert!(
     size_of::<GpuFilterData>() == FILTER_SIZE_BYTES,
     "memory size of filters need to match"
@@ -84,12 +89,17 @@ const _: () = assert!(
     size_of::<GpuGaussianBlur>() == FILTER_SIZE_BYTES,
     "memory size of filters need to match"
 );
+const _: () = assert!(
+    size_of::<GpuColorMatrix>() == FILTER_SIZE_BYTES,
+    "memory size of filters need to match"
+);
 
 pub(crate) mod filter_type {
     pub(crate) const OFFSET: u32 = 0;
     pub(crate) const FLOOD: u32 = 1;
     pub(crate) const GAUSSIAN_BLUR: u32 = 2;
     pub(crate) const DROP_SHADOW: u32 = 3;
+    pub(crate) const COLOR_MATRIX: u32 = 4;
 }
 
 pub(crate) mod edge_mode {
@@ -109,6 +119,7 @@ pub(crate) mod pass_kind {
     pub(crate) const BLUR_V: u32 = 5;
     pub(crate) const UPSCALE: u32 = 6;
     pub(crate) const COMPOSITE_DROP_SHADOW: u32 = 7;
+    pub(crate) const COLOR_MATRIX: u32 = 8;
 }
 
 pub(crate) fn edge_mode_to_gpu(mode: EdgeMode) -> u32 {
@@ -126,6 +137,14 @@ fn pack_header(filter_type: u32) -> u32 {
     filter_type
 }
 
+fn pack_color_matrix_header(color_matrix: &ColorMatrix) -> u32 {
+    let mut header = pack_header(filter_type::COLOR_MATRIX);
+    if color_matrix.is_premul_compatible() {
+        header |= COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG;
+    }
+    header
+}
+
 fn pack_header_with_gaussian_params(
     filter_type: u32,
     edge_mode: u32,
@@ -220,7 +239,7 @@ pub(crate) struct GpuOffset {
     pub header: u32,
     pub dx: f32,
     pub dy: f32,
-    pub _padding: [u32; 9],
+    pub _padding: [u32; 21],
 }
 
 impl From<&Offset> for GpuOffset {
@@ -229,7 +248,7 @@ impl From<&Offset> for GpuOffset {
             header: pack_header(filter_type::OFFSET),
             dx: offset.dx,
             dy: offset.dy,
-            _padding: [0; 9],
+            _padding: [0; 21],
         }
     }
 }
@@ -239,7 +258,7 @@ impl From<&Offset> for GpuOffset {
 pub(crate) struct GpuFlood {
     pub header: u32,
     pub color: u32,
-    pub _padding: [u32; 10],
+    pub _padding: [u32; 22],
 }
 
 impl From<&Flood> for GpuFlood {
@@ -247,7 +266,7 @@ impl From<&Flood> for GpuFlood {
         Self {
             header: pack_header(filter_type::FLOOD),
             color: flood.color.premultiply().to_rgba8().to_u32(),
-            _padding: [0; 10],
+            _padding: [0; 22],
         }
     }
 }
@@ -260,7 +279,7 @@ pub(crate) struct GpuGaussianBlur {
     pub linear_weights: [f32; MAX_TAPS_PER_SIDE],
     pub linear_offsets: [f32; MAX_TAPS_PER_SIDE],
     // Needed since drop shadow has a bigger footprint.
-    pub _padding: [u32; 4],
+    pub _padding: [u32; 16],
 }
 
 impl From<&GaussianBlur> for GpuGaussianBlur {
@@ -284,7 +303,7 @@ impl From<&GaussianBlur> for GpuGaussianBlur {
             center_weight: lk.center_weight,
             linear_weights: lk.weights,
             linear_offsets: lk.offsets,
-            _padding: [0; 4],
+            _padding: [0; 16],
         }
     }
 }
@@ -299,7 +318,7 @@ pub(crate) struct GpuDropShadow {
     pub dx: f32,
     pub dy: f32,
     pub color: u32,
-    pub _padding: [u32; 1],
+    pub _padding: [u32; 13],
 }
 
 impl From<&DropShadow> for GpuDropShadow {
@@ -322,7 +341,25 @@ impl From<&DropShadow> for GpuDropShadow {
             dx: shadow.dx,
             dy: shadow.dy,
             color: shadow.color.premultiply().to_rgba8().to_u32(),
-            _padding: [0; 1],
+            _padding: [0; 13],
+        }
+    }
+}
+
+#[repr(C, align(16))]
+#[derive(Debug, Clone, Copy, PartialEq, Zeroable, Pod)]
+pub(crate) struct GpuColorMatrix {
+    pub header: u32,
+    pub matrix: [f32; 20],
+    pub _padding: [u32; 3],
+}
+
+impl From<&ColorMatrix> for GpuColorMatrix {
+    fn from(color_matrix: &ColorMatrix) -> Self {
+        Self {
+            header: pack_color_matrix_header(color_matrix),
+            matrix: color_matrix.matrix,
+            _padding: [0; 3],
         }
     }
 }
@@ -364,6 +401,7 @@ impl CastToFilterData for GpuOffset {}
 impl CastToFilterData for GpuFlood {}
 impl CastToFilterData for GpuGaussianBlur {}
 impl CastToFilterData for GpuDropShadow {}
+impl CastToFilterData for GpuColorMatrix {}
 
 impl<T: CastToFilterData> From<T> for GpuFilterData {
     fn from(filter: T) -> Self {
@@ -378,6 +416,7 @@ impl From<&PreparedFilter> for GpuFilterData {
             PreparedFilter::Flood(f) => GpuFlood::from(f).into(),
             PreparedFilter::GaussianBlur(f) => GpuGaussianBlur::from(f).into(),
             PreparedFilter::DropShadow(f) => GpuDropShadow::from(f).into(),
+            PreparedFilter::ColorMatrix(f) => GpuColorMatrix::from(f).into(),
         }
     }
 }
@@ -945,6 +984,7 @@ impl FilterContext {
             let pass = match filter_type {
                 filter_type::OFFSET => pass_kind::OFFSET,
                 filter_type::FLOOD => pass_kind::FLOOD,
+                filter_type::COLOR_MATRIX => pass_kind::COLOR_MATRIX,
                 // The above are the only single-pass filters currently implemented.
                 _ => unimplemented!(),
             };
@@ -1059,6 +1099,7 @@ mod tests {
     use super::*;
     use vello_common::color::AlphaColor;
     use vello_common::filter::gaussian_blur::{compute_gaussian_kernel, plan_decimated_blur};
+    use vello_common::filter_effects::matrices;
 
     #[test]
     fn test_offset_conversion() {
@@ -1114,6 +1155,52 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_color_matrix_round_trip() {
+        check_round_trip(
+            GpuColorMatrix::from(&ColorMatrix::new(matrices::SEPIA)),
+            filter_type::COLOR_MATRIX,
+        );
+    }
+
+    #[test]
+    fn color_matrix_header_marks_premul_compatible_matrices() {
+        assert!(
+            GpuColorMatrix::from(&ColorMatrix::new(matrices::GRAYSCALE)).header
+                & COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG
+                != 0
+        );
+        assert!(
+            GpuColorMatrix::from(&ColorMatrix::new(matrices::SEPIA)).header
+                & COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG
+                != 0
+        );
+        assert_eq!(
+            GpuColorMatrix::from(&ColorMatrix::new(matrices::ALPHA_TO_BLACK)).header
+                & COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG,
+            0
+        );
+    }
+
+    #[test]
+    fn color_matrix_header_rejects_offsets_and_alpha_changes() {
+        let mut offset_matrix = matrices::IDENTITY;
+        offset_matrix[4] = 0.25;
+        assert_eq!(
+            GpuColorMatrix::from(&ColorMatrix::new(offset_matrix)).header
+                & COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG,
+            0
+        );
+
+        let mut opacity_matrix = matrices::IDENTITY;
+        opacity_matrix[18] = 0.5;
+        assert_eq!(
+            GpuColorMatrix::from(&ColorMatrix::new(opacity_matrix)).header
+                & COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG,
+            0
+        );
+    }
+
     fn check_linear_kernel(kernel: &[f32; MAX_KERNEL_SIZE], size: u8, expected_taps: u8) {
         let lk = LinearKernel::new(kernel, size);
         assert_eq!(lk.n_taps, expected_taps);
diff --git a/sparse_strips/vello_sparse_shaders/shaders/filters.wgsl b/sparse_strips/vello_sparse_shaders/shaders/filters.wgsl
index 5619a5fe91..764f1bd0b2 100644
--- a/sparse_strips/vello_sparse_shaders/shaders/filters.wgsl
+++ b/sparse_strips/vello_sparse_shaders/shaders/filters.wgsl
@@ -13,7 +13,7 @@
 
 // Keep these variables and structs in sync with the ones in `filter.rs`!
 
-const FILTER_SIZE_BYTES: u32 = 48;
+const FILTER_SIZE_BYTES: u32 = 96;
 const FILTER_SIZE_U32: u32 = FILTER_SIZE_BYTES / 4;
 const TEXELS_PER_FILTER: u32 = FILTER_SIZE_U32 / 4u;
 
@@ -21,6 +21,7 @@ const FILTER_TYPE_OFFSET: u32 = 0u;
 const FILTER_TYPE_FLOOD: u32 = 1u;
 const FILTER_TYPE_GAUSSIAN_BLUR: u32 = 2u;
 const FILTER_TYPE_DROP_SHADOW: u32 = 3u;
+const FILTER_TYPE_COLOR_MATRIX: u32 = 4u;
 
 const PASS_COPY: u32 = 0u;
 const PASS_FLOOD: u32 = 1u;
@@ -30,6 +31,7 @@ const PASS_BLUR_H: u32 = 4u;
 const PASS_BLUR_V: u32 = 5u;
 const PASS_UPSCALE: u32 = 6u;
 const PASS_COMPOSITE_DROP_SHADOW: u32 = 7u;
+const PASS_COLOR_MATRIX: u32 = 8u;
 
 const MAX_TAPS_PER_SIDE: u32 = 3u;
 
@@ -66,10 +68,15 @@ struct DropShadowFilter {
 //   bits [5:6]   = edge_mode     (2 bits, only for blur filters), currently ignored.
 //   bits [7:10]  = n_decimations (4 bits, only for blur filters), only read on the CPU side.
 //   bits [11:12] = n_linear_taps (2 bits, only for blur filters)
-//   bits [13:32] = reserved for future use
+//   bit  [13]    = premultiplied-compatible matrix (only for color matrix filters)
+//   bits [14:32] = reserved for future use
+const COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG: u32 = 1u << 13u;
 
 fn unpack_filter_type(data: GpuFilterData) -> u32 { return data.data[0] & 0x1Fu; }
 fn unpack_header_n_linear_taps(header: u32) -> u32 { return (header >> 11u) & 0x3u; }
+fn unpack_header_color_matrix_premul_compatible(header: u32) -> bool {
+    return (header & COLOR_MATRIX_PREMUL_COMPATIBLE_FLAG) != 0u;
+}
 
 fn unpack_offset_filter(data: GpuFilterData) -> OffsetFilter {
     return OffsetFilter(
@@ -114,7 +121,17 @@ fn load_filter_data(texel_offset: u32) -> GpuFilterData {
     let t0 = textureLoad(filter_data, vec2((texel_offset     ) % w, (texel_offset     ) / w), 0);
     let t1 = textureLoad(filter_data, vec2((texel_offset + 1u) % w, (texel_offset + 1u) / w), 0);
     let t2 = textureLoad(filter_data, vec2((texel_offset + 2u) % w, (texel_offset + 2u) / w), 0);
-    return GpuFilterData(array(t0.x, t0.y, t0.z, t0.w, t1.x, t1.y, t1.z, t1.w, t2.x, t2.y, t2.z, t2.w));
+    let t3 = textureLoad(filter_data, vec2((texel_offset + 3u) % w, (texel_offset + 3u) / w), 0);
+    let t4 = textureLoad(filter_data, vec2((texel_offset + 4u) % w, (texel_offset + 4u) / w), 0);
+    let t5 = textureLoad(filter_data, vec2((texel_offset + 5u) % w, (texel_offset + 5u) / w), 0);
+    return GpuFilterData(array(
+        t0.x, t0.y, t0.z, t0.w,
+        t1.x, t1.y, t1.z, t1.w,
+        t2.x, t2.y, t2.z, t2.w,
+        t3.x, t3.y, t3.z, t3.w,
+        t4.x, t4.y, t4.z, t4.w,
+        t5.x, t5.y, t5.z, t5.w,
+    ));
 }
 
 struct FilterInstanceData {
@@ -311,6 +328,61 @@ fn convolve(
     return color;
 }
 
+fn color_matrix_row(data: GpuFilterData, base: u32, color: vec4<f32>) -> f32 {
+    return clamp(
+        bitcast<f32>(data.data[base]) * color.r +
+        bitcast<f32>(data.data[base + 1u]) * color.g +
+        bitcast<f32>(data.data[base + 2u]) * color.b +
+        bitcast<f32>(data.data[base + 3u]) * color.a +
+        bitcast<f32>(data.data[base + 4u]),
+        0.0,
+        1.0,
+    );
+}
+
+fn color_matrix_premul_row(data: GpuFilterData, base: u32, rgb: vec3<f32>) -> f32 {
+    return
+        bitcast<f32>(data.data[base]) * rgb.r +
+        bitcast<f32>(data.data[base + 1u]) * rgb.g +
+        bitcast<f32>(data.data[base + 2u]) * rgb.b;
+}
+
+fn apply_premul_compatible_color_matrix(data: GpuFilterData, pixel: vec4<f32>) -> vec4<f32> {
+    // For RGB-only, alpha-preserving matrices, applying the RGB rows directly
+    // to premultiplied channels is equivalent to unpremultiply -> matrix ->
+    // premultiply. Straight-alpha clamping becomes clamping RGB to [0, alpha].
+    let transformed_rgb = vec3<f32>(
+        color_matrix_premul_row(data, 1u, pixel.rgb),
+        color_matrix_premul_row(data, 6u, pixel.rgb),
+        color_matrix_premul_row(data, 11u, pixel.rgb),
+    );
+
+    return vec4<f32>(
+        clamp(transformed_rgb, vec3<f32>(0.0), vec3<f32>(pixel.a)),
+        pixel.a,
+    );
+}
+
+fn apply_color_matrix(data: GpuFilterData, pixel: vec4<f32>) -> vec4<f32> {
+    if unpack_header_color_matrix_premul_compatible(data.data[0]) {
+        return apply_premul_compatible_color_matrix(data, pixel);
+    }
+
+    var rgb = vec3<f32>(0.0);
+    if pixel.a > 0.0 {
+        rgb = pixel.rgb / pixel.a;
+    }
+    let straight = vec4<f32>(rgb, pixel.a);
+    let transformed = vec4<f32>(
+        color_matrix_row(data, 1u, straight),
+        color_matrix_row(data, 6u, straight),
+        color_matrix_row(data, 11u, straight),
+        color_matrix_row(data, 16u, straight),
+    );
+
+    return vec4<f32>(transformed.rgb * transformed.a, transformed.a);
+}
+
 const HORIZONTAL: vec2<f32> = vec2<f32>(1.0, 0.0);
 const VERTICAL: vec2<f32> = vec2<f32>(0.0, 1.0);
 
@@ -378,6 +450,10 @@ fn fs_main(in: FilterVertexOutput) -> @location(0) vec4<f32> {
             // Simple source-over compositing.
             return original + shadow_result * (1.0 - original.a);
         }
+        case PASS_COLOR_MATRIX: {
+            let data = load_filter_data(in.filter_offset);
+            return apply_color_matrix(data, sample_input(in, rel_coord));
+        }
         // Shouldn't be reached.
         default: {
             return vec4<f32>(0.0);
diff --git a/sparse_strips/vello_sparse_tests/snapshots/filter_color_matrix_full_sepia.png b/sparse_strips/vello_sparse_tests/snapshots/filter_color_matrix_full_sepia.png
new file mode 100644
index 0000000000..e4cbe2c85c
--- /dev/null
+++ b/sparse_strips/vello_sparse_tests/snapshots/filter_color_matrix_full_sepia.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e894206089d727bc79c5330f6834a11d2fa623b66ec2d2a43effadc0b040c34
+size 151
diff --git a/sparse_strips/vello_sparse_tests/tests/filter.rs b/sparse_strips/vello_sparse_tests/tests/filter.rs
index 815f6e3625..57a9b2d66c 100644
--- a/sparse_strips/vello_sparse_tests/tests/filter.rs
+++ b/sparse_strips/vello_sparse_tests/tests/filter.rs
@@ -10,7 +10,7 @@ use vello_common::color::AlphaColor;
 use vello_common::color::palette::css::{
     BLACK, LIME, PURPLE, REBECCA_PURPLE, ROYAL_BLUE, SEA_GREEN, TOMATO, VIOLET,
 };
-use vello_common::filter_effects::{EdgeMode, Filter, FilterPrimitive};
+use vello_common::filter_effects::{EdgeMode, Filter, FilterPrimitive, matrices};
 use vello_common::kurbo::{Affine, BezPath, Circle, Point, Rect, Shape, Stroke};
 use vello_common::paint::Image;
 use vello_common::peniko::{
@@ -83,6 +83,24 @@ fn filter_offset_nested(ctx: &mut impl Renderer) {
     ctx.pop_layer();
 }
 
+#[vello_test(skip_multithreaded, width = 120, height = 80, hybrid_tolerance = 1)]
+fn filter_color_matrix_full_sepia(ctx: &mut impl Renderer) {
+    let filter = Filter::from_primitive(FilterPrimitive::ColorMatrix {
+        matrix: matrices::SEPIA,
+    });
+
+    ctx.push_filter_layer(filter);
+    ctx.set_paint(RED);
+    ctx.fill_rect(&Rect::new(10.0, 10.0, 55.0, 35.0));
+    ctx.set_paint(GREEN);
+    ctx.fill_rect(&Rect::new(65.0, 10.0, 110.0, 35.0));
+    ctx.set_paint(BLUE);
+    ctx.fill_rect(&Rect::new(10.0, 45.0, 55.0, 70.0));
+    ctx.set_paint(AlphaColor::from_rgba8(60, 120, 240, 128));
+    ctx.fill_rect(&Rect::new(65.0, 45.0, 110.0, 70.0));
+    ctx.pop_layer();
+}
+
 /// Test Gaussian blur with small radius (`std_deviation` = 2.0, no decimation).
 /// Uses direct separable convolution at full resolution.
 #[vello_test(skip_multithreaded)]