Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,35 @@ jobs:
- name: Install jxl-perfhistory
run: cargo install --git https://github.com/zond/jxl-perfhistory

- name: Select a few test images
- name: Select benchmark corpus (autoresearch 8 images)
id: select-images
run: |
mkdir testimages
cp jxl/resources/test/conformance_test_images/sunset_logo.jxl testimages
cp jxl/resources/test/conformance_test_images/bike.jxl testimages
cp jxl/resources/test/green_queen_modular_e3.jxl testimages
cp jxl/resources/test/green_queen_vardct_e3.jxl testimages
mkdir -p testimages

# Keep this aligned with the autoresearch image set so CI numbers stay
# comparable while avoiding benchmark timeouts.
image_map=(
"jxl/resources/test/conformance_test_images/sunset_logo.jxl:sunset_logo.jxl"
"jxl/resources/test/conformance_test_images/bike.jxl:bike.jxl"
"jxl/resources/test/green_queen_modular_e3.jxl:green_queen_modular_e3.jxl"
"jxl/resources/test/green_queen_vardct_e3.jxl:green_queen_vardct_e3.jxl"
"jxl/resources/test/conformance_test_images/bicycles.jxl:bicycles.jxl"
"jxl/resources/test/conformance_test_images/delta_palette.jxl:delta_palette.jxl"
"jxl/resources/test/conformance_test_images/lz77_flower.jxl:lz77_flower.jxl"
"jxl/resources/test/conformance_test_images/patches.jxl:patches_lossless.jxl"
)

for entry in "${image_map[@]}"; do
src="${entry%%:*}"
dest="${entry##*:}"
if [ ! -f "$src" ]; then
echo "Missing benchmark image: $src"
exit 1
fi
ln -s "$GITHUB_WORKSPACE/$src" "testimages/$dest"
done

echo "Selected $(find testimages -maxdepth 1 -name '*.jxl' | wc -l) images for benchmark"

- name: Cache benchmark binaries
uses: actions/cache@v4
Expand Down
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
[profile.release]
debug = true
lto = "thin"
panic = "abort"
overflow-checks = false

[profile.bench]
debug = true
Expand Down
12 changes: 9 additions & 3 deletions jxl/src/api/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1259,7 +1259,9 @@ impl JxlColorProfile {
_ => false,
}
}
// ICC profiles require CMS
// Same ICC profile bytes means same color encoding -- skip CMS
(Self::Icc(a), Self::Icc(b)) => a == b,
// Mixed Simple/ICC always requires CMS
_ => false,
}
}
Expand Down Expand Up @@ -2736,7 +2738,7 @@ mod test {

#[test]
fn test_same_color_encoding_icc_profile() {
// ICC profiles are never considered same (even with themselves)
// ICC vs Simple are never the same
let srgb = JxlColorProfile::Simple(JxlColorEncoding::RgbColorSpace {
white_point: JxlWhitePoint::D65,
primaries: JxlPrimaries::SRGB,
Expand All @@ -2746,7 +2748,11 @@ mod test {
let icc = JxlColorProfile::Icc(vec![0u8; 100]); // Dummy ICC profile
assert!(!srgb.same_color_encoding(&icc));
assert!(!icc.same_color_encoding(&srgb));
assert!(!icc.same_color_encoding(&icc));
// Same ICC bytes ARE the same encoding (skip CMS identity transform)
assert!(icc.same_color_encoding(&icc));
// Different ICC bytes are NOT the same
let icc2 = JxlColorProfile::Icc(vec![1u8; 100]);
assert!(!icc.same_color_encoding(&icc2));
}

#[test]
Expand Down
81 changes: 49 additions & 32 deletions jxl/src/api/inner/codestream_parser/sections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,11 @@ impl CodestreamParser {
.front()
.is_some_and(|s| s.len <= self.ready_section_data)
{
let s = self.sections.pop_front().unwrap();
let mut s = self.sections.pop_front().unwrap();
self.ready_section_data -= s.len;
// Add 8 zero-padding bytes so BitReader::refill() always takes the
// fast path (avoids refill_slow for small/tail sections).
s.data.extend_from_slice(&[0u8; 8]);

match s.section {
Section::LfGlobal => {
Expand All @@ -87,32 +90,31 @@ impl CodestreamParser {
let pixel_format = self.pixel_format.as_ref().unwrap();

let complete_lf_global;
let (lf_global, lf_global_is_complete) = if let Some(d) = self.lf_global_section.take() {
complete_lf_global = d;
(
Some(&complete_lf_global.data[..complete_lf_global.len]),
true,
)
} else if do_flush
&& self
.sections
.front()
.is_some_and(|s| s.section == Section::LfGlobal)
&& 2 * self.ready_section_data > 3 * self.section_state.lf_global_flush_len
&& frame_header.encoding == Encoding::Modular
&& matches!(
frame_header.frame_type,
FrameType::RegularFrame | FrameType::LFFrame
)
{
self.section_state.lf_global_flush_len = self.ready_section_data;
(
Some(&self.sections[0].data[..self.ready_section_data]),
false,
)
} else {
(None, false)
};
// lf_global_real_len: the actual data length (excluding padding bytes)
let (lf_global, lf_global_real_len, lf_global_is_complete) =
if let Some(d) = self.lf_global_section.take() {
complete_lf_global = d;
// Use full data slice (includes 8-byte padding from dequeue)
let real_len = complete_lf_global.len;
(Some(&complete_lf_global.data[..]), real_len, true)
} else if do_flush
&& self
.sections
.front()
.is_some_and(|s| s.section == Section::LfGlobal)
&& 2 * self.ready_section_data > 3 * self.section_state.lf_global_flush_len
&& frame_header.encoding == Encoding::Modular
&& matches!(
frame_header.frame_type,
FrameType::RegularFrame | FrameType::LFFrame
)
{
self.section_state.lf_global_flush_len = self.ready_section_data;
let rsd = self.ready_section_data;
(Some(&self.sections[0].data[..rsd]), rsd, false)
} else {
(None, 0, false)
};

'process: {
if frame_header.num_groups() == 1 && frame_header.passes.num_passes == 1 {
Expand All @@ -121,7 +123,11 @@ impl CodestreamParser {
break 'process;
};
assert!(self.sections.is_empty() || !lf_global_is_complete);
let mut br = BitReader::new(buf);
let mut br = if lf_global_is_complete {
BitReader::new_padded(buf, lf_global_real_len)
} else {
BitReader::new(buf)
};
let res = (|| -> Result<()> {
frame.decode_lf_global(&mut br, !lf_global_is_complete)?;
frame.decode_lf_group(0, &mut br)?;
Expand All @@ -148,7 +154,12 @@ impl CodestreamParser {
}
} else {
if let Some(buf) = lf_global {
match frame.decode_lf_global(&mut BitReader::new(buf), !lf_global_is_complete) {
let mut br = if lf_global_is_complete {
BitReader::new_padded(buf, lf_global_real_len)
} else {
BitReader::new(buf)
};
match frame.decode_lf_global(&mut br, !lf_global_is_complete) {
Ok(_) => {
self.section_state.lf_global_done = true;
processed_section = true;
Expand All @@ -168,7 +179,10 @@ impl CodestreamParser {
let Section::Lf { group } = lf_section.section else {
unreachable!()
};
frame.decode_lf_group(group, &mut BitReader::new(&lf_section.data))?;
frame.decode_lf_group(
group,
&mut BitReader::new_padded(&lf_section.data, lf_section.len),
)?;
processed_section = true;
self.section_state.remaining_lf -= 1;
}
Expand All @@ -178,7 +192,10 @@ impl CodestreamParser {
}

if let Some(hf_global) = self.hf_global_section.take() {
frame.decode_hf_global(&mut BitReader::new(&hf_global.data))?;
frame.decode_hf_global(&mut BitReader::new_padded(
&hf_global.data,
hf_global.len,
))?;
frame.finalize_lf()?;
self.section_state.hf_global_done = true;
processed_section = true;
Expand All @@ -202,7 +219,7 @@ impl CodestreamParser {
break;
};
self.section_state.completed_passes[g] += 1;
sections.push((pass, BitReader::new(&s.data)));
sections.push((pass, BitReader::new_padded(&s.data, s.len)));
}
if !sections.is_empty() {
group_readers.push((g, sections));
Expand Down
25 changes: 20 additions & 5 deletions jxl/src/bit_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,23 @@ impl<'a> BitReader<'a> {
}
}

/// Constructs a BitReader for data with zero-padding appended.
/// `data` must contain at least `real_len + 8` bytes, with the last 8 bytes
/// being zero padding. `initial_bits` is set to `real_len * 8` for error checking.
/// This ensures `refill()` always takes the fast path (no refill_slow calls).
pub fn new_padded(data: &[u8], real_len: usize) -> BitReader<'_> {
debug_assert!(data.len() >= real_len + 8);
BitReader {
data,
bit_buf: 0,
bits_in_buf: 0,
total_bits_read: 0,
initial_bits: real_len * 8,
}
}

/// Reads `num` bits from the buffer without consuming them.
#[inline]
#[inline(always)]
pub fn peek(&mut self, num: usize) -> u64 {
debug_assert!(num <= MAX_BITS_PER_CALL);
if self.bits_in_buf < num {
Expand All @@ -66,7 +81,7 @@ impl<'a> BitReader<'a> {
Ok(())
}

#[inline]
#[inline(always)]
pub fn consume_optimistic(&mut self, num: usize) {
self.bit_buf >>= num;
self.bits_in_buf = self.bits_in_buf.saturating_sub(num);
Expand All @@ -84,7 +99,7 @@ impl<'a> BitReader<'a> {
/// assert!(br.read(1).is_err());
/// # Ok::<(), jxl::error::Error>(())
/// ```
#[inline]
#[inline(always)]
pub fn read(&mut self, num: usize) -> Result<u64, Error> {
let ret = self.peek(num);
self.consume(num)?;
Expand All @@ -97,7 +112,7 @@ impl<'a> BitReader<'a> {
self.read(num)
}

#[inline]
#[inline(always)]
pub fn read_optimistic(&mut self, num: usize) -> u64 {
let ret = self.peek(num);
self.consume_optimistic(num);
Expand Down Expand Up @@ -201,7 +216,7 @@ impl<'a> BitReader<'a> {
Ok(())
}

#[inline]
#[inline(always)]
fn refill(&mut self) {
// See Refill() in C++ code.
if self.data.len() >= 8 {
Expand Down
10 changes: 7 additions & 3 deletions jxl/src/entropy_coding/ans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl AnsHistogram {
}

impl AnsHistogram {
#[inline]
#[inline(always)]
pub fn read(&self, br: &mut BitReader, state: &mut u32) -> u32 {
let idx = *state & 0xfff;
let i = (idx >> self.log_bucket_size) as usize;
Expand Down Expand Up @@ -433,9 +433,13 @@ impl AnsReader {
Ok(Self(initial_state))
}

#[inline]
#[inline(always)]
#[allow(unsafe_code)]
pub fn read(&mut self, codes: &AnsCodes, br: &mut BitReader, ctx: usize) -> u32 {
codes.histograms[ctx].read(br, &mut self.0)
debug_assert!(ctx < codes.histograms.len());
// SAFETY: ctx is a validated cluster ID from the context map,
// checked during Histograms::decode() to be < histograms.len().
unsafe { codes.histograms.get_unchecked(ctx) }.read(br, &mut self.0)
}

pub fn check_final_state(self) -> Result<()> {
Expand Down
Loading
Loading