diff --git a/AUTHORS b/AUTHORS index 23d5a1679..e33b6cf2a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -26,9 +26,10 @@ Jonathan Brown (Jonnyawsom3) Louis Dispa Luca Versari Martin Bruse +Matthew Little Moritz Firsching Mrmaxmeier <3913977+Mrmaxmeier@users.noreply.github.com> Sami Boukortt Tomáš Král Wonwoo Choi -Zoltan Szabadka +Zoltan Szabadka \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 8bc4fab48..6d5916039 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "anes" version = "0.1.6" @@ -142,6 +157,27 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "brotli" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -642,6 +678,7 @@ version = "0.3.0" dependencies = [ "arbtest", "array-init", + "brotli", "byteorder", "jxl_macros", "jxl_simd", @@ -661,6 +698,7 @@ name = "jxl_cli" version = "0.3.0" dependencies = [ "anyhow", + "brotli", "bytemuck", "clap", "color-eyre", diff --git a/jxl/Cargo.toml b/jxl/Cargo.toml index 76ea6ab10..4df64b558 100644 --- a/jxl/Cargo.toml +++ b/jxl/Cargo.toml @@ -25,6 +25,7 @@ jxl_simd = { path = "../jxl_simd", version = "=0.3.0" } [dev-dependencies] arbtest = "0.3.2" +brotli = "8.0" paste = "1.0.15" rand = "0.9.2" rand_xorshift = "0.4.0" diff --git a/jxl/resources/test/metadata_test_images/all_metadata.jxl b/jxl/resources/test/metadata_test_images/all_metadata.jxl new file mode 100644 index 000000000..e5169b351 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/all_metadata.jxl differ diff --git a/jxl/resources/test/metadata_test_images/all_metadata_brob.jxl b/jxl/resources/test/metadata_test_images/all_metadata_brob.jxl new file mode 100644 index 000000000..b44ff59c9 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/all_metadata_brob.jxl differ diff --git a/jxl/resources/test/metadata_test_images/large_exif.jxl b/jxl/resources/test/metadata_test_images/large_exif.jxl new file mode 100644 index 000000000..6cb68cb0e Binary files /dev/null and b/jxl/resources/test/metadata_test_images/large_exif.jxl differ diff --git a/jxl/resources/test/metadata_test_images/mixed_compression.jxl b/jxl/resources/test/metadata_test_images/mixed_compression.jxl new file mode 100644 index 000000000..004aba62c Binary files /dev/null and b/jxl/resources/test/metadata_test_images/mixed_compression.jxl differ diff --git a/jxl/resources/test/metadata_test_images/multi_exif.jxl b/jxl/resources/test/metadata_test_images/multi_exif.jxl new file mode 100644 index 000000000..d12cb025e Binary files /dev/null and b/jxl/resources/test/metadata_test_images/multi_exif.jxl differ diff --git a/jxl/resources/test/metadata_test_images/multi_exif_brob.jxl b/jxl/resources/test/metadata_test_images/multi_exif_brob.jxl new file mode 100644 index 000000000..4775779f7 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/multi_exif_brob.jxl differ diff --git a/jxl/resources/test/metadata_test_images/multi_jumbf.jxl b/jxl/resources/test/metadata_test_images/multi_jumbf.jxl new file mode 100644 index 000000000..e14653a49 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/multi_jumbf.jxl differ diff --git a/jxl/resources/test/metadata_test_images/multi_xmp.jxl b/jxl/resources/test/metadata_test_images/multi_xmp.jxl new file mode 100644 index 000000000..351eb7eb7 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/multi_xmp.jxl differ diff --git a/jxl/resources/test/metadata_test_images/single_exif.jxl b/jxl/resources/test/metadata_test_images/single_exif.jxl new file mode 100644 index 000000000..f13955b19 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/single_exif.jxl differ diff --git a/jxl/resources/test/metadata_test_images/single_exif_brob.jxl b/jxl/resources/test/metadata_test_images/single_exif_brob.jxl new file mode 100644 index 000000000..b1d2cabe6 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/single_exif_brob.jxl differ diff --git a/jxl/resources/test/metadata_test_images/single_jumbf.jxl b/jxl/resources/test/metadata_test_images/single_jumbf.jxl new file mode 100644 index 000000000..4d8246eb9 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/single_jumbf.jxl differ diff --git a/jxl/resources/test/metadata_test_images/single_jumbf_brob.jxl b/jxl/resources/test/metadata_test_images/single_jumbf_brob.jxl new file mode 100644 index 000000000..18b7b1679 Binary files /dev/null and b/jxl/resources/test/metadata_test_images/single_jumbf_brob.jxl differ diff --git a/jxl/resources/test/metadata_test_images/single_xmp.jxl b/jxl/resources/test/metadata_test_images/single_xmp.jxl new file mode 100644 index 000000000..b2c6668fb Binary files /dev/null and b/jxl/resources/test/metadata_test_images/single_xmp.jxl differ diff --git a/jxl/resources/test/metadata_test_images/single_xmp_brob.jxl b/jxl/resources/test/metadata_test_images/single_xmp_brob.jxl new file mode 100644 index 000000000..80c1c275f Binary files /dev/null and b/jxl/resources/test/metadata_test_images/single_xmp_brob.jxl differ diff --git a/jxl/src/api/data_types.rs b/jxl/src/api/data_types.rs index 966033c5a..675357f68 100644 --- a/jxl/src/api/data_types.rs +++ b/jxl/src/api/data_types.rs @@ -264,3 +264,13 @@ pub struct JxlFrameHeader { /// Frame size (width, height) pub size: (usize, usize), } + +/// Captured metadata box from a JXL container. +#[derive(Clone, Debug)] +pub struct JxlMetadataBox { + /// Raw box content bytes. + pub data: Vec, + /// True if this was a Brotli-compressed (brob) box. + /// Caller must decompress if needed. + pub is_brotli_compressed: bool, +} diff --git a/jxl/src/api/decoder.rs b/jxl/src/api/decoder.rs index fa0821fec..ea6f4a396 100644 --- a/jxl/src/api/decoder.rs +++ b/jxl/src/api/decoder.rs @@ -5,7 +5,7 @@ use super::{ JxlBasicInfo, JxlBitstreamInput, JxlColorProfile, JxlDecoderInner, JxlDecoderOptions, - JxlOutputBuffer, JxlPixelFormat, ProcessingResult, + JxlMetadataBox, JxlOutputBuffer, JxlPixelFormat, ProcessingResult, }; #[cfg(test)] use crate::frame::Frame; @@ -141,6 +141,29 @@ impl JxlDecoder { self.inner.has_more_frames() } + /// Returns captured EXIF boxes from the container. + /// + /// Each box's data includes the 4-byte TIFF offset followed by TIFF/EXIF data. + /// Returns `None` if capture was disabled, `Some(...)` if enabled (empty slice if no boxes found). + pub fn exif_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.inner.exif_boxes() + } + + /// Returns captured XMP boxes from the container. + /// + /// Returns `None` if capture was disabled, `Some(...)` if enabled (empty slice if no boxes found). + pub fn xmp_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.inner.xmp_boxes() + } + + /// Returns captured JUMBF boxes from the container. + /// + /// JUMBF (JPEG Universal Metadata Box Format) is used for C2PA content credentials. + /// Returns `None` if capture was disabled, `Some(...)` if enabled (empty slice if no boxes found). + pub fn jumbf_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.inner.jumbf_boxes() + } + #[cfg(test)] pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) { self.inner.set_use_simple_pipeline(u); @@ -193,7 +216,7 @@ impl JxlDecoder { #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::api::{JxlDataFormat, JxlDecoderOptions}; + use crate::api::{JxlDataFormat, JxlDecoderOptions, JxlMetadataCaptureOptions}; use crate::error::Error; use crate::image::{Image, Rect}; use jxl_macros::for_each_test_file; @@ -1325,4 +1348,670 @@ pub(crate) mod tests { let _ = profile.try_as_icc(); } } + + // ========================================= + // Metadata capture tests + // ========================================= + + /// Helper to decode a file to WithImageInfo state with custom options + fn decode_to_image_info( + file: &[u8], + options: JxlDecoderOptions, + ) -> JxlDecoder { + let mut decoder = JxlDecoder::::new(options); + let mut input = file; + loop { + match decoder.process(&mut input).unwrap() { + ProcessingResult::Complete { result } => return result, + ProcessingResult::NeedsMoreInput { fallback, .. } => { + if input.is_empty() { + panic!("Unexpected end of input"); + } + decoder = fallback; + } + } + } + } + + /// Extracts ImageDescription (tag 0x010E) from EXIF TIFF data. + /// EXIF in JXL has a 4-byte offset prefix, then standard TIFF structure: + /// - Bytes 0-1: byte order ("II" = little-endian, "MM" = big-endian) + /// - Bytes 2-3: TIFF magic (42) + /// - Bytes 4-7: offset to IFD0 + /// - IFD: 2-byte entry count, then 12-byte entries (tag, type, count, value/offset) + fn extract_exif_image_description(exif_data: &[u8]) -> Option { + let d = exif_data.get(4..)?; // Skip 4-byte TIFF offset (JXL-specific prefix) + let le = d.get(0..2)? == [0x49, 0x49]; // "II" = Intel = little-endian + let u16_at = |o: usize| { + d.get(o..o + 2).map(|b| { + if le { + u16::from_le_bytes([b[0], b[1]]) + } else { + u16::from_be_bytes([b[0], b[1]]) + } + }) + }; + let u32_at = |o: usize| { + d.get(o..o + 4).map(|b| { + if le { + u32::from_le_bytes([b[0], b[1], b[2], b[3]]) + } else { + u32::from_be_bytes([b[0], b[1], b[2], b[3]]) + } + }) + }; + + if u16_at(2)? != 42 { + return None; + } // TIFF magic number + let ifd = u32_at(4)? as usize; // IFD0 offset + let count = u16_at(ifd)? as usize; // Number of directory entries + + for i in 0..count { + let e = ifd + 2 + i * 12; // Each IFD entry is 12 bytes + if u16_at(e)? != 0x010E { + continue; + } // ImageDescription tag + if u16_at(e + 2)? != 2 { + return None; + } // Type 2 = ASCII string + let len = u32_at(e + 4)? as usize; // String length including null + let off = if len <= 4 { + e + 8 + } else { + u32_at(e + 8)? as usize + }; // Inline if ≤4 bytes, else offset + let s = d.get(off..off + len)?; + let s = s.strip_suffix(&[0]).unwrap_or(s); // Remove null terminator + return std::str::from_utf8(s).ok().map(|s| s.to_string()); + } + None + } + + /// XMP template with {DESCRIPTION} placeholder for test assertions. + const XMP_TEMPLATE: &str = r#" + + + + {DESCRIPTION} + + +"#; + + mod metadata_tests { + use super::*; + + #[test] + fn empty_for_bare_codestream() { + let file = include_bytes!("../../resources/test/basic.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + // Bare codestream has no container, metadata capture returns Some but empty + assert!(decoder.exif_boxes().is_none_or(|b| b.is_empty())); + assert!(decoder.xmp_boxes().is_none_or(|b| b.is_empty())); + assert!(decoder.jumbf_boxes().is_none_or(|b| b.is_empty())); + } + + #[test] + fn empty_for_container_without_metadata() { + let file = include_bytes!("../../resources/test/has_permutation_with_container.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + // Container present but no metadata boxes + let exif = decoder.exif_boxes(); + assert!(exif.is_some() && exif.unwrap().is_empty()); + } + + #[test] + fn capture_disabled_by_default() { + let file = include_bytes!("../../resources/test/basic.jxl"); + let options = JxlDecoderOptions::default(); + let decoder = decode_to_image_info(file, options); + // Default has capture disabled, so returns None + assert!(decoder.exif_boxes().is_none()); + assert!(decoder.xmp_boxes().is_none()); + assert!(decoder.jumbf_boxes().is_none()); + } + + #[test] + fn single_exif_box_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_exif.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF should be captured"); + assert_eq!(boxes.len(), 1); + + let description = extract_exif_image_description(&boxes[0].data); + assert_eq!(description, Some("Test EXIF content".to_string())); + } + + #[test] + fn single_xmp_box_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_xmp.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.xmp_boxes().expect("XMP should be captured"); + assert_eq!(boxes.len(), 1); + + let xmp_str = std::str::from_utf8(&boxes[0].data).unwrap(); + assert_eq!( + xmp_str, + XMP_TEMPLATE.replace("{DESCRIPTION}", "Test XMP content") + ); + } + + #[test] + fn single_jumbf_box_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_jumbf.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.jumbf_boxes().expect("JUMBF should be captured"); + assert_eq!(boxes.len(), 1); + + let jumbf_str = std::str::from_utf8(&boxes[0].data).unwrap(); + assert!(jumbf_str.contains(r#"{"test": "Test JUMBF content"}"#)); + } + + #[test] + fn multiple_exif_boxes_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/multi_exif.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF should be captured"); + assert_eq!( + boxes.len(), + 2, + "Expected exactly 2 boxes, got {}", + boxes.len() + ); + + assert_eq!( + extract_exif_image_description(&boxes[0].data), + Some("First EXIF".to_string()) + ); + assert_eq!( + extract_exif_image_description(&boxes[1].data), + Some("Second EXIF".to_string()) + ); + } + + #[test] + fn multiple_xmp_boxes_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/multi_xmp.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.xmp_boxes().expect("XMP should be captured"); + assert_eq!( + boxes.len(), + 2, + "Expected exactly 2 boxes, got {}", + boxes.len() + ); + + let xmp0 = std::str::from_utf8(&boxes[0].data).unwrap(); + let xmp1 = std::str::from_utf8(&boxes[1].data).unwrap(); + assert_eq!(xmp0, XMP_TEMPLATE.replace("{DESCRIPTION}", "First XMP")); + assert_eq!(xmp1, XMP_TEMPLATE.replace("{DESCRIPTION}", "Second XMP")); + } + + #[test] + fn multiple_jumbf_boxes_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/multi_jumbf.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.jumbf_boxes().expect("JUMBF should be captured"); + assert_eq!( + boxes.len(), + 2, + "Expected exactly 2 boxes, got {}", + boxes.len() + ); + + let jumbf0 = std::str::from_utf8(&boxes[0].data).unwrap(); + let jumbf1 = std::str::from_utf8(&boxes[1].data).unwrap(); + assert!(jumbf0.contains(r#"{"test": "First JUMBF"}"#)); + assert!(jumbf1.contains(r#"{"test": "Second JUMBF"}"#)); + } + + #[test] + fn all_metadata_types_content() { + let file = include_bytes!("../../resources/test/metadata_test_images/all_metadata.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + // EXIF + let exif = decoder.exif_boxes().expect("EXIF should be captured"); + assert!(!exif.is_empty(), "Should have EXIF"); + assert_eq!( + extract_exif_image_description(&exif[0].data), + Some("EXIF data".to_string()) + ); + + // XMP + let xmp = decoder.xmp_boxes().expect("XMP should be captured"); + assert!(!xmp.is_empty(), "Should have XMP"); + let xmp_str = std::str::from_utf8(&xmp[0].data).unwrap(); + assert_eq!(xmp_str, XMP_TEMPLATE.replace("{DESCRIPTION}", "XMP data")); + + // JUMBF + let jumbf = decoder.jumbf_boxes().expect("JUMBF should be captured"); + assert!(!jumbf.is_empty(), "Should have JUMBF"); + let jumbf_str = std::str::from_utf8(&jumbf[0].data).unwrap(); + assert!(jumbf_str.contains(r#"{"test": "JUMBF data"}"#)); + } + + #[test] + fn exif_capture_disabled() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_exif.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_exif: false, + capture_xmp: true, + capture_jumbf: true, + ..JxlMetadataCaptureOptions::capture_all_with_limits() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + assert!( + decoder.exif_boxes().is_none(), + "EXIF should not be captured" + ); + } + + #[test] + fn xmp_capture_disabled() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_xmp.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_exif: true, + capture_xmp: false, + capture_jumbf: true, + ..JxlMetadataCaptureOptions::capture_all_with_limits() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + assert!(decoder.xmp_boxes().is_none(), "XMP should not be captured"); + } + + #[test] + fn jumbf_capture_disabled() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_jumbf.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_exif: true, + capture_xmp: true, + capture_jumbf: false, + ..JxlMetadataCaptureOptions::capture_all_with_limits() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + assert!( + decoder.jumbf_boxes().is_none(), + "JUMBF should not be captured" + ); + } + + #[test] + fn all_capture_disabled() { + let file = include_bytes!("../../resources/test/metadata_test_images/all_metadata.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::no_capture(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + assert!(decoder.exif_boxes().is_none()); + assert!(decoder.xmp_boxes().is_none()); + assert!(decoder.jumbf_boxes().is_none()); + } + + #[test] + fn exif_size_limit_exceeded() { + // large_exif.jxl has EXIF > 200 bytes + let file = include_bytes!("../../resources/test/metadata_test_images/large_exif.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_exif: true, + exif_size_limit: Some(50), // Too small + ..Default::default() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF capture enabled"); + assert!(boxes.is_empty(), "EXIF should be skipped due to size limit"); + } + + #[test] + fn exif_size_limit_allows() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_exif.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_exif: true, + exif_size_limit: Some(10000), // Generous + ..Default::default() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF capture enabled"); + assert!(!boxes.is_empty(), "EXIF should be captured"); + assert_eq!( + extract_exif_image_description(&boxes[0].data), + Some("Test EXIF content".to_string()) + ); + } + + #[test] + fn xmp_size_limit_exceeded() { + // single_xmp.jxl has XMP > 300 bytes + let file = include_bytes!("../../resources/test/metadata_test_images/single_xmp.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_xmp: true, + xmp_size_limit: Some(50), // Too small + ..Default::default() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.xmp_boxes().expect("XMP capture enabled"); + assert!(boxes.is_empty(), "XMP should be skipped due to size limit"); + } + + #[test] + fn jumbf_size_limit_exceeded() { + let file = include_bytes!("../../resources/test/metadata_test_images/single_jumbf.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_jumbf: true, + jumbf_size_limit: Some(10), // Too small + ..Default::default() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.jumbf_boxes().expect("JUMBF capture enabled"); + assert!( + boxes.is_empty(), + "JUMBF should be skipped due to size limit" + ); + } + + #[test] + fn multiple_boxes_aggregate_size_limit() { + // multi_exif.jxl has 2 EXIF boxes + let file = include_bytes!("../../resources/test/metadata_test_images/multi_exif.jxl"); + + // Set limit that allows first box but not both + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions { + capture_exif: true, + exif_size_limit: Some(60), // Allows ~1 box + ..Default::default() + }, + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF capture enabled"); + // Should have exactly 1 box (second exceeds aggregate limit) + assert_eq!(boxes.len(), 1, "Should have captured only first box"); + assert_eq!( + extract_exif_image_description(&boxes[0].data), + Some("First EXIF".to_string()) + ); + } + } + + mod brob_tests { + use super::*; + + /// Decompress brotli data for test assertions + fn brotli_decompress(data: &[u8]) -> Vec { + let mut output = Vec::new(); + brotli::BrotliDecompress(&mut std::io::Cursor::new(data), &mut output) + .expect("Brotli decompression failed"); + output + } + + #[test] + fn single_exif_box() { + let file = + include_bytes!("../../resources/test/metadata_test_images/single_exif_brob.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF should be captured"); + assert_eq!(boxes.len(), 1); + assert!( + boxes[0].is_brotli_compressed, + "EXIF box should be marked as brotli-compressed" + ); + + // Decompress and verify content + let decompressed = brotli_decompress(&boxes[0].data); + let description = extract_exif_image_description(&decompressed); + assert_eq!(description, Some("Test EXIF content".to_string())); + } + + #[test] + fn single_xmp_box() { + let file = + include_bytes!("../../resources/test/metadata_test_images/single_xmp_brob.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.xmp_boxes().expect("XMP should be captured"); + assert_eq!(boxes.len(), 1); + assert!( + boxes[0].is_brotli_compressed, + "XMP box should be marked as brotli-compressed" + ); + + // Decompress and verify content + let decompressed = brotli_decompress(&boxes[0].data); + let xmp_str = std::str::from_utf8(&decompressed).unwrap(); + assert_eq!( + xmp_str, + XMP_TEMPLATE.replace("{DESCRIPTION}", "Test XMP content") + ); + } + + #[test] + fn single_jumbf_box() { + let file = + include_bytes!("../../resources/test/metadata_test_images/single_jumbf_brob.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.jumbf_boxes().expect("JUMBF should be captured"); + assert_eq!(boxes.len(), 1); + assert!( + boxes[0].is_brotli_compressed, + "JUMBF box should be marked as brotli-compressed" + ); + + // Decompress and verify content + let decompressed = brotli_decompress(&boxes[0].data); + let jumbf_str = std::str::from_utf8(&decompressed).unwrap(); + assert!(jumbf_str.contains(r#"{"test": "Test JUMBF content"}"#)); + } + + #[test] + fn multiple_exif_boxes() { + let file = + include_bytes!("../../resources/test/metadata_test_images/multi_exif_brob.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF should be captured"); + assert_eq!( + boxes.len(), + 2, + "Expected exactly 2 boxes, got {}", + boxes.len() + ); + + // Both should be compressed + assert!( + boxes[0].is_brotli_compressed, + "First box should be compressed" + ); + assert!( + boxes[1].is_brotli_compressed, + "Second box should be compressed" + ); + + // Decompress and verify content + let desc0 = extract_exif_image_description(&brotli_decompress(&boxes[0].data)); + let desc1 = extract_exif_image_description(&brotli_decompress(&boxes[1].data)); + assert_eq!(desc0, Some("First EXIF".to_string())); + assert_eq!(desc1, Some("Second EXIF".to_string())); + } + + #[test] + fn mixed_compression() { + let file = + include_bytes!("../../resources/test/metadata_test_images/mixed_compression.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF should be captured"); + assert_eq!( + boxes.len(), + 2, + "Expected exactly 2 boxes, got {}", + boxes.len() + ); + + // First should be uncompressed, second should be compressed + assert!( + !boxes[0].is_brotli_compressed, + "First box should be uncompressed" + ); + assert!( + boxes[1].is_brotli_compressed, + "Second box should be brotli-compressed" + ); + + // Verify content - first box data is raw, second needs decompression + let desc0 = extract_exif_image_description(&boxes[0].data); + let desc1 = extract_exif_image_description(&brotli_decompress(&boxes[1].data)); + assert_eq!(desc0, Some("Uncompressed EXIF".to_string())); + assert_eq!(desc1, Some("Compressed EXIF".to_string())); + } + + #[test] + fn all_metadata_types() { + let file = + include_bytes!("../../resources/test/metadata_test_images/all_metadata_brob.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + // EXIF + let exif = decoder.exif_boxes().expect("EXIF should be captured"); + assert!(!exif.is_empty(), "Should have EXIF"); + assert!(exif[0].is_brotli_compressed, "EXIF should be compressed"); + assert_eq!( + extract_exif_image_description(&brotli_decompress(&exif[0].data)), + Some("EXIF data".to_string()) + ); + + // XMP + let xmp = decoder.xmp_boxes().expect("XMP should be captured"); + assert!(!xmp.is_empty(), "Should have XMP"); + assert!(xmp[0].is_brotli_compressed, "XMP should be compressed"); + let xmp_decompressed = brotli_decompress(&xmp[0].data); + let xmp_str = std::str::from_utf8(&xmp_decompressed).unwrap(); + assert_eq!(xmp_str, XMP_TEMPLATE.replace("{DESCRIPTION}", "XMP data")); + + // JUMBF + let jumbf = decoder.jumbf_boxes().expect("JUMBF should be captured"); + assert!(!jumbf.is_empty(), "Should have JUMBF"); + assert!(jumbf[0].is_brotli_compressed, "JUMBF should be compressed"); + let jumbf_decompressed = brotli_decompress(&jumbf[0].data); + let jumbf_str = std::str::from_utf8(&jumbf_decompressed).unwrap(); + assert!(jumbf_str.contains(r#"{"test": "JUMBF data"}"#)); + } + + #[test] + fn uncompressed_box_flag_is_false() { + // Verify that uncompressed boxes have is_brotli_compressed = false + let file = include_bytes!("../../resources/test/metadata_test_images/single_exif.jxl"); + let options = JxlDecoderOptions { + metadata_capture: JxlMetadataCaptureOptions::capture_all_with_limits(), + ..Default::default() + }; + let decoder = decode_to_image_info(file, options); + + let boxes = decoder.exif_boxes().expect("EXIF should be captured"); + assert_eq!(boxes.len(), 1); + assert!( + !boxes[0].is_brotli_compressed, + "Uncompressed EXIF box should have is_brotli_compressed = false" + ); + } + } } diff --git a/jxl/src/api/inner/box_parser.rs b/jxl/src/api/inner/box_parser.rs index eb66cb3b1..4ae676f5f 100644 --- a/jxl/src/api/inner/box_parser.rs +++ b/jxl/src/api/inner/box_parser.rs @@ -3,18 +3,43 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -use crate::error::{Error, Result}; +use crate::{ + container::ContainerBoxType, + error::{Error, Result}, +}; +use std::io::IoSliceMut; use crate::api::{ - JxlBitstreamInput, JxlSignatureType, check_signature_internal, inner::process::SmallBuffer, + JxlBitstreamInput, JxlMetadataBox, JxlMetadataCaptureOptions, JxlSignatureType, + check_signature_internal, inner::process::SmallBuffer, }; +/// Type of metadata box being captured. +#[derive(Clone, Copy)] +enum MetadataBoxType { + Exif, + Xmp, + Jumbf, +} + #[derive(Clone)] enum ParseState { SignatureNeeded, BoxNeeded, CodestreamBox(u64), SkippableBox(u64), + /// Reading metadata box content (EXIF, XMP, or JUMBF) + MetadataBox { + box_type: MetadataBoxType, + bytes_left: u64, + buffer: Vec, + is_brotli_compressed: bool, + }, + /// Reading brob header (4-byte inner type) before deciding what to do with content + BrotliBoxHeader { + /// Total content length remaining (including the 4-byte inner type) + bytes_left: u64, + }, } enum CodestreamBoxType { @@ -28,14 +53,44 @@ pub(super) struct BoxParser { pub(super) box_buffer: SmallBuffer, state: ParseState, box_type: CodestreamBoxType, + + // Captured metadata boxes + pub(super) exif_boxes: Vec, + pub(super) xmp_boxes: Vec, + pub(super) jumbf_boxes: Vec, + + // Aggregate sizes for limit tracking + exif_total_size: u64, + xmp_total_size: u64, + jumbf_total_size: u64, + + // Capture options + capture_exif: bool, + capture_xmp: bool, + capture_jumbf: bool, + exif_size_limit: Option, + xmp_size_limit: Option, + jumbf_size_limit: Option, } impl BoxParser { - pub(super) fn new() -> Self { + pub(super) fn new(opts: &JxlMetadataCaptureOptions) -> Self { BoxParser { box_buffer: SmallBuffer::new(128), state: ParseState::SignatureNeeded, box_type: CodestreamBoxType::None, + exif_boxes: Vec::new(), + xmp_boxes: Vec::new(), + jumbf_boxes: Vec::new(), + exif_total_size: 0, + xmp_total_size: 0, + jumbf_total_size: 0, + capture_exif: opts.capture_exif, + capture_xmp: opts.capture_xmp, + capture_jumbf: opts.capture_jumbf, + exif_size_limit: opts.exif_size_limit, + xmp_size_limit: opts.xmp_size_limit, + jumbf_size_limit: opts.jumbf_size_limit, } } @@ -83,6 +138,108 @@ impl BoxParser { self.state = ParseState::SkippableBox(s); } } + ParseState::MetadataBox { + box_type, + mut bytes_left, + mut buffer, + is_brotli_compressed, + } => { + let num = bytes_left.min(usize::MAX as u64) as usize; + // First consume any buffered data + if !self.box_buffer.is_empty() { + let to_read = num.min(self.box_buffer.len()); + buffer.extend_from_slice(&self.box_buffer[..to_read]); + self.box_buffer.consume(to_read); + bytes_left -= to_read as u64; + } else { + // Read directly from input using IoSliceMut + let mut read_buf = vec![0u8; num.min(8192)]; + let read = input.read(&mut [IoSliceMut::new(&mut read_buf)])?; + if read == 0 { + return Err(Error::OutOfBounds(num)); + } + buffer.extend_from_slice(&read_buf[..read]); + bytes_left -= read as u64; + } + if bytes_left == 0 { + // Store completed metadata box and update aggregate size + let box_size = buffer.len() as u64; + let metadata_box = JxlMetadataBox { + data: buffer, + is_brotli_compressed, + }; + match box_type { + MetadataBoxType::Exif => { + self.exif_total_size += box_size; + self.exif_boxes.push(metadata_box); + } + MetadataBoxType::Xmp => { + self.xmp_total_size += box_size; + self.xmp_boxes.push(metadata_box); + } + MetadataBoxType::Jumbf => { + self.jumbf_total_size += box_size; + self.jumbf_boxes.push(metadata_box); + } + } + self.state = ParseState::BoxNeeded; + } else { + self.state = ParseState::MetadataBox { + box_type, + bytes_left, + buffer, + is_brotli_compressed, + }; + } + } + ParseState::BrotliBoxHeader { bytes_left } => { + // We need at least 4 bytes to read the inner box type + self.box_buffer.refill(|b| input.read(b), None)?; + if self.box_buffer.len() < 4 { + return Err(Error::OutOfBounds(4 - self.box_buffer.len())); + } + let inner_ty = ContainerBoxType(self.box_buffer[0..4].try_into().unwrap()); + self.box_buffer.consume(4); + let content_len = bytes_left - 4; + + // Check if we should capture this brob based on inner type + let (should_capture, box_type, size_limit, current_size) = match inner_ty { + ContainerBoxType::EXIF => ( + self.capture_exif, + MetadataBoxType::Exif, + self.exif_size_limit, + self.exif_total_size, + ), + ContainerBoxType::XML => ( + self.capture_xmp, + MetadataBoxType::Xmp, + self.xmp_size_limit, + self.xmp_total_size, + ), + ContainerBoxType::JUMBF => ( + self.capture_jumbf, + MetadataBoxType::Jumbf, + self.jumbf_size_limit, + self.jumbf_total_size, + ), + _ => (false, MetadataBoxType::Exif, None, 0), // Won't be used + }; + + let within_limit = size_limit + .map(|limit| current_size.saturating_add(content_len) <= limit) + .unwrap_or(true); + + if should_capture && within_limit { + self.state = ParseState::MetadataBox { + box_type, + bytes_left: content_len, + buffer: Vec::with_capacity(content_len.min(65536) as usize), + is_brotli_compressed: true, + }; + } else { + self.state = ParseState::SkippableBox(content_len); + } + } ParseState::BoxNeeded => { self.box_buffer.refill(|b| input.read(b), None)?; let min_len = match &self.box_buffer[..] { @@ -92,8 +249,12 @@ impl BoxParser { if self.box_buffer.len() <= min_len { return Err(Error::OutOfBounds(min_len - self.box_buffer.len())); } - let ty: [_; 4] = self.box_buffer[4..8].try_into().unwrap(); - let extra_len = if &ty == b"jxlp" { 4 } else { 0 }; + let ty = ContainerBoxType(self.box_buffer[4..8].try_into().unwrap()); + let extra_len = if ty == ContainerBoxType::PARTIAL_CODESTREAM { + 4 + } else { + 0 + }; if self.box_buffer.len() <= min_len + extra_len { return Err(Error::OutOfBounds( min_len + extra_len - self.box_buffer.len(), @@ -106,7 +267,10 @@ impl BoxParser { _ => u32::from_be_bytes(self.box_buffer[0..4].try_into().unwrap()) as u64, }; // Per JXL spec: jxlc box with length 0 has special meaning "extends to EOF" - let content_len = if box_len == 0 && (&ty == b"jxlp" || &ty == b"jxlc") { + let content_len = if box_len == 0 + && (ty == ContainerBoxType::PARTIAL_CODESTREAM + || ty == ContainerBoxType::CODESTREAM) + { u64::MAX } else { if box_len <= (min_len + extra_len) as u64 { @@ -114,8 +278,8 @@ impl BoxParser { } box_len - min_len as u64 - extra_len as u64 }; - match &ty { - b"jxlc" => { + match ty { + ContainerBoxType::CODESTREAM => { if matches!( self.box_type, CodestreamBoxType::Jxlp(..) | CodestreamBoxType::LastJxlp @@ -125,7 +289,7 @@ impl BoxParser { self.box_type = CodestreamBoxType::Jxlc; self.state = ParseState::CodestreamBox(content_len); } - b"jxlp" => { + ContainerBoxType::PARTIAL_CODESTREAM => { let index = u32::from_be_bytes( self.box_buffer[min_len..min_len + 4].try_into().unwrap(), ); @@ -148,6 +312,82 @@ impl BoxParser { }; self.state = ParseState::CodestreamBox(content_len); } + ContainerBoxType::EXIF => { + // Capture EXIF metadata box if enabled and within aggregate limit + let within_limit = self + .exif_size_limit + .map(|limit| { + self.exif_total_size.saturating_add(content_len) <= limit + }) + .unwrap_or(true); + // u64::MAX is a sentinel for unbounded boxes (extends to EOF) + let is_bounded = content_len < u64::MAX; + if self.capture_exif && is_bounded && within_limit { + self.state = ParseState::MetadataBox { + box_type: MetadataBoxType::Exif, + bytes_left: content_len, + buffer: Vec::with_capacity(content_len.min(65536) as usize), + is_brotli_compressed: false, + }; + } else { + self.state = ParseState::SkippableBox(content_len); + } + } + ContainerBoxType::XML => { + // Capture XMP metadata box if enabled and within aggregate limit + let within_limit = self + .xmp_size_limit + .map(|limit| { + self.xmp_total_size.saturating_add(content_len) <= limit + }) + .unwrap_or(true); + // u64::MAX is a sentinel for unbounded boxes (extends to EOF) + let is_bounded = content_len < u64::MAX; + if self.capture_xmp && is_bounded && within_limit { + self.state = ParseState::MetadataBox { + box_type: MetadataBoxType::Xmp, + bytes_left: content_len, + buffer: Vec::with_capacity(content_len.min(65536) as usize), + is_brotli_compressed: false, + }; + } else { + self.state = ParseState::SkippableBox(content_len); + } + } + ContainerBoxType::JUMBF => { + // Capture JUMBF metadata box if enabled and within aggregate limit + let within_limit = self + .jumbf_size_limit + .map(|limit| { + self.jumbf_total_size.saturating_add(content_len) <= limit + }) + .unwrap_or(true); + // u64::MAX is a sentinel for unbounded boxes (extends to EOF) + let is_bounded = content_len < u64::MAX; + if self.capture_jumbf && is_bounded && within_limit { + self.state = ParseState::MetadataBox { + box_type: MetadataBoxType::Jumbf, + bytes_left: content_len, + buffer: Vec::with_capacity(content_len.min(65536) as usize), + is_brotli_compressed: false, + }; + } else { + self.state = ParseState::SkippableBox(content_len); + } + } + ContainerBoxType::BROTLI_COMPRESSED => { + // Brotli-compressed box - read 4-byte inner type to decide action + // u64::MAX is a sentinel for unbounded boxes (extends to EOF) + let is_bounded = content_len < u64::MAX; + // brob needs at least 4 bytes for inner type + if is_bounded && content_len >= 4 { + self.state = ParseState::BrotliBoxHeader { + bytes_left: content_len, + }; + } else { + self.state = ParseState::SkippableBox(content_len); + } + } _ => { self.state = ParseState::SkippableBox(content_len); } @@ -168,4 +408,16 @@ impl BoxParser { unreachable!() } } + + pub(super) fn exif_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.capture_exif.then_some(&self.exif_boxes[..]) + } + + pub(super) fn xmp_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.capture_xmp.then_some(&self.xmp_boxes[..]) + } + + pub(super) fn jumbf_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.capture_jumbf.then_some(&self.jumbf_boxes[..]) + } } diff --git a/jxl/src/api/inner/mod.rs b/jxl/src/api/inner/mod.rs index 977a6dfb9..27d3a2147 100644 --- a/jxl/src/api/inner/mod.rs +++ b/jxl/src/api/inner/mod.rs @@ -10,7 +10,7 @@ use crate::{ error::{Error, Result}, }; -use super::{JxlBasicInfo, JxlColorProfile, JxlDecoderOptions, JxlPixelFormat}; +use super::{JxlBasicInfo, JxlColorProfile, JxlDecoderOptions, JxlMetadataBox, JxlPixelFormat}; use box_parser::BoxParser; use codestream_parser::CodestreamParser; @@ -28,9 +28,10 @@ pub struct JxlDecoderInner { impl JxlDecoderInner { /// Creates a new decoder with the given options and, optionally, CMS. pub fn new(options: JxlDecoderOptions) -> Self { + let box_parser = BoxParser::new(&options.metadata_capture); JxlDecoderInner { options, - box_parser: BoxParser::new(), + box_parser, codestream_parser: CodestreamParser::new(), } } @@ -113,7 +114,7 @@ impl JxlDecoderInner { /// After calling this, the caller should provide input from the beginning of the file. pub fn reset(&mut self) { // TODO(veluca): keep track of frame offsets for skipping. - self.box_parser = BoxParser::new(); + self.box_parser = BoxParser::new(&self.options.metadata_capture); self.codestream_parser = CodestreamParser::new(); } @@ -127,7 +128,7 @@ impl JxlDecoderInner { /// /// Returns `true` if pixel_format was preserved, `false` if none was set. pub fn rewind(&mut self) -> bool { - self.box_parser = BoxParser::new(); + self.box_parser = BoxParser::new(&self.options.metadata_capture); self.codestream_parser.rewind().is_some() } @@ -135,6 +136,29 @@ impl JxlDecoderInner { self.codestream_parser.has_more_frames } + /// Returns captured EXIF boxes from the container. + /// + /// Each box's data includes the 4-byte TIFF offset followed by TIFF/EXIF data. + /// Returns `None` if capture was disabled, `Some(...)` if enabled (empty slice if no boxes found). + pub fn exif_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.box_parser.exif_boxes() + } + + /// Returns captured XMP boxes from the container. + /// + /// Returns `None` if capture was disabled, `Some(...)` if enabled (empty slice if no boxes found). + pub fn xmp_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.box_parser.xmp_boxes() + } + + /// Returns captured JUMBF boxes from the container. + /// + /// JUMBF (JPEG Universal Metadata Box Format) is used for C2PA content credentials. + /// Returns `None` if capture was disabled, `Some(...)` if enabled (empty slice if no boxes found). + pub fn jumbf_boxes(&self) -> Option<&[JxlMetadataBox]> { + self.box_parser.jumbf_boxes() + } + #[cfg(test)] pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) { self.codestream_parser.set_use_simple_pipeline(u); diff --git a/jxl/src/api/options.rs b/jxl/src/api/options.rs index 0c0b5e35c..d7f5bd112 100644 --- a/jxl/src/api/options.rs +++ b/jxl/src/api/options.rs @@ -5,6 +5,18 @@ use crate::api::JxlCms; +/// Default maximum aggregate size for EXIF metadata (1MB). +/// Typical EXIF data is 10-64KB. +pub const DEFAULT_EXIF_SIZE_LIMIT: u64 = 1024 * 1024; + +/// Default maximum aggregate size for XMP metadata (1MB). +/// Typical XMP data is 1-100KB. +pub const DEFAULT_XMP_SIZE_LIMIT: u64 = 1024 * 1024; + +/// Default maximum aggregate size for JUMBF metadata (16MB). +/// JUMBF can be larger due to C2PA embedded images. +pub const DEFAULT_JUMBF_SIZE_LIMIT: u64 = 16 * 1024 * 1024; + pub enum JxlProgressiveMode { /// Renders all pixels in every call to Process. Eager, @@ -14,6 +26,75 @@ pub enum JxlProgressiveMode { FullFrame, } +/// Options for capturing metadata boxes during container parsing. +/// All capture flags default to false (opt-in) to avoid memory overhead. +#[derive(Debug, Clone, Default)] +pub struct JxlMetadataCaptureOptions { + /// Whether to capture EXIF metadata boxes during container parsing. + /// When enabled, EXIF boxes can be retrieved via `exif_boxes()` after parsing. + pub capture_exif: bool, + /// Whether to capture XMP metadata boxes during container parsing. + /// When enabled, XMP boxes can be retrieved via `xmp_boxes()` after parsing. + pub capture_xmp: bool, + /// Whether to capture JUMBF metadata boxes during container parsing. + /// JUMBF (JPEG Universal Metadata Box Format) is used for C2PA content credentials. + /// When enabled, JUMBF boxes can be retrieved via `jumbf_boxes()` after parsing. + pub capture_jumbf: bool, + /// Maximum aggregate size in bytes for all EXIF boxes combined. + /// Once this limit is reached, additional EXIF boxes are skipped. + /// Set to `None` to disable the limit. + /// Default: 1MB ([`DEFAULT_EXIF_SIZE_LIMIT`]) + pub exif_size_limit: Option, + /// Maximum aggregate size in bytes for all XMP boxes combined. + /// Once this limit is reached, additional XMP boxes are skipped. + /// Set to `None` to disable the limit. + /// Default: 1MB ([`DEFAULT_XMP_SIZE_LIMIT`]) + pub xmp_size_limit: Option, + /// Maximum aggregate size in bytes for all JUMBF boxes combined. + /// Once this limit is reached, additional JUMBF boxes are skipped. + /// Set to `None` to disable the limit. + /// Default: 16MB ([`DEFAULT_JUMBF_SIZE_LIMIT`]) + pub jumbf_size_limit: Option, +} + +impl JxlMetadataCaptureOptions { + /// Create options with all metadata capture enabled and default size limits. + pub fn capture_all_with_limits() -> Self { + Self { + capture_exif: true, + capture_xmp: true, + capture_jumbf: true, + exif_size_limit: Some(DEFAULT_EXIF_SIZE_LIMIT), + xmp_size_limit: Some(DEFAULT_XMP_SIZE_LIMIT), + jumbf_size_limit: Some(DEFAULT_JUMBF_SIZE_LIMIT), + } + } + + /// Create options with all metadata capture enabled and no size limits. + pub fn capture_all() -> Self { + Self { + capture_exif: true, + capture_xmp: true, + capture_jumbf: true, + exif_size_limit: None, + xmp_size_limit: None, + jumbf_size_limit: None, + } + } + + /// Create options with all metadata capture disabled + pub fn no_capture() -> Self { + Self { + capture_exif: false, + capture_xmp: false, + capture_jumbf: false, + exif_size_limit: Some(0), + xmp_size_limit: Some(0), + jumbf_size_limit: Some(0), + } + } +} + #[non_exhaustive] pub struct JxlDecoderOptions { pub adjust_orientation: bool, @@ -39,6 +120,8 @@ pub struct JxlDecoderOptions { /// This produces premultiplied alpha output, which is useful for compositing. /// Default: false (output straight alpha) pub premultiply_output: bool, + /// Options for capturing metadata boxes (EXIF, XMP, JUMBF) during container parsing. + pub metadata_capture: JxlMetadataCaptureOptions, } impl Default for JxlDecoderOptions { @@ -54,6 +137,7 @@ impl Default for JxlDecoderOptions { pixel_limit: None, high_precision: false, premultiply_output: false, + metadata_capture: JxlMetadataCaptureOptions::no_capture(), } } } diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index c6e9e5050..ad7749e7f 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -8,6 +8,7 @@ pub mod box_header; pub mod parse; +pub use box_header::ContainerBoxType; use box_header::*; pub use parse::ParseEvent; use parse::*; @@ -27,7 +28,6 @@ enum DetectState { WaitingBoxHeader, WaitingJxlpIndex(ContainerBoxHeader), InAuxBox { - #[allow(unused)] header: ContainerBoxHeader, bytes_left: Option, }, @@ -108,6 +108,9 @@ impl ContainerParser { ParseEvent::Codestream(buf) => { codestream.extend_from_slice(buf); } + ParseEvent::AuxiliaryBox { .. } => { + // Ignore auxiliary boxes when collecting codestream + } } } Ok(codestream) diff --git a/jxl/src/container/parse.rs b/jxl/src/container/parse.rs index 726516d23..89972cc14 100644 --- a/jxl/src/container/parse.rs +++ b/jxl/src/container/parse.rs @@ -187,29 +187,43 @@ impl<'inner, 'buf> ParseEvents<'inner, 'buf> { return Ok(Some(ParseEvent::Codestream(payload))); } DetectState::InAuxBox { - header: _, + header, bytes_left: None, } => { - let _payload = *buf; + let payload = *buf; + let box_type = header.box_type(); *buf = &[]; - // FIXME: emit auxiliary box event + if !payload.is_empty() { + return Ok(Some(ParseEvent::AuxiliaryBox { + box_type, + data: payload, + is_last: false, // unbounded box - we don't know when it ends + })); + } } DetectState::InAuxBox { - header: _, + header, bytes_left: Some(bytes_left), } => { - let _payload = if buf.len() >= *bytes_left { + let box_type = header.box_type(); + let (payload, is_last) = if buf.len() >= *bytes_left { let (payload, remaining) = buf.split_at(*bytes_left); *state = DetectState::WaitingBoxHeader; *buf = remaining; - payload + (payload, true) } else { let payload = *buf; *bytes_left -= buf.len(); *buf = &[]; - payload + (payload, false) }; - // FIXME: emit auxiliary box event + if !payload.is_empty() || is_last { + return Ok(Some(ParseEvent::AuxiliaryBox { + box_type, + data: payload, + is_last, + })); + } } } } @@ -258,6 +272,18 @@ pub enum ParseEvent<'buf> { /// Returned data may be partial. Complete codestream can be obtained by concatenating all data /// of `Codestream` events. Codestream(&'buf [u8]), + /// Auxiliary box data is read (EXIF, XMP, JUMBF, etc.). + /// + /// Returned data may be partial. Complete box data can be obtained by concatenating all data + /// of `AuxiliaryBox` events with the same box type. + AuxiliaryBox { + /// The type of the auxiliary box (e.g., EXIF, XMP, JUMBF). + box_type: ContainerBoxType, + /// The data payload of this chunk. + data: &'buf [u8], + /// True if this is the final chunk of data for this box. + is_last: bool, + }, } impl std::fmt::Debug for ParseEvent<'_> { @@ -268,6 +294,16 @@ impl std::fmt::Debug for ParseEvent<'_> { .debug_tuple("Codestream") .field(&format_args!("{} byte(s)", buf.len())) .finish(), + Self::AuxiliaryBox { + box_type, + data, + is_last, + } => f + .debug_struct("AuxiliaryBox") + .field("box_type", box_type) + .field("data", &format_args!("{} byte(s)", data.len())) + .field("is_last", is_last) + .finish(), } } } diff --git a/jxl_cli/Cargo.toml b/jxl_cli/Cargo.toml index aeaf3c96e..a2537ac26 100644 --- a/jxl_cli/Cargo.toml +++ b/jxl_cli/Cargo.toml @@ -17,6 +17,7 @@ half = "2.4.1" png = "0.18.0" exr = { version = "1.73.0", optional = true } color-eyre = "0.6.5" +brotli = "8.0" [dev-dependencies] jxl_macros = { path = "../jxl_macros", features = ["test"], version = "=0.3.0" } diff --git a/jxl_cli/src/dec/mod.rs b/jxl_cli/src/dec/mod.rs index 73e15f2e0..7c79c11bf 100644 --- a/jxl_cli/src/dec/mod.rs +++ b/jxl_cli/src/dec/mod.rs @@ -13,8 +13,8 @@ use color_eyre::eyre::{Result, eyre}; use jxl::{ api::{ Endianness, JxlAnimation, JxlBitDepth, JxlBitstreamInput, JxlColorProfile, JxlColorType, - JxlDataFormat, JxlDecoder, JxlDecoderOptions, JxlOutputBuffer, JxlPixelFormat, - ProcessingResult, states::WithImageInfo, + JxlDataFormat, JxlDecoder, JxlDecoderOptions, JxlMetadataBox, JxlOutputBuffer, + JxlPixelFormat, ProcessingResult, states::WithImageInfo, }, headers::extra_channels::ExtraChannel, image::{OwnedRawImage, Rect}, @@ -35,6 +35,9 @@ pub struct DecodeOutput { pub output_profile: JxlColorProfile, pub embedded_profile: JxlColorProfile, pub jxl_animation: Option, + pub exif_boxes: Option>, + pub xmp_boxes: Option>, + pub jumbf_boxes: Option>, } pub fn decode_header( @@ -213,6 +216,9 @@ pub fn decode_frames( output_profile, embedded_profile, jxl_animation: info.animation.clone(), + exif_boxes: None, + xmp_boxes: None, + jumbf_boxes: None, }; let extra_channels = info.extra_channels.len() - if interleave_alpha { 1 } else { 0 }; @@ -304,6 +310,9 @@ pub fn decode_frames( }); if !decoder_with_image_info.has_more_frames() { + image_data.exif_boxes = decoder_with_image_info.exif_boxes().map(|b| b.to_vec()); + image_data.xmp_boxes = decoder_with_image_info.xmp_boxes().map(|b| b.to_vec()); + image_data.jumbf_boxes = decoder_with_image_info.jumbf_boxes().map(|b| b.to_vec()); break; } } diff --git a/jxl_cli/src/lib.rs b/jxl_cli/src/lib.rs index 96782db21..6bddd5283 100644 --- a/jxl_cli/src/lib.rs +++ b/jxl_cli/src/lib.rs @@ -6,11 +6,12 @@ pub mod cms; pub mod dec; pub mod enc; +pub mod metadata; #[cfg(test)] mod tests { use crate::dec::{DecodeOutput, OutputDataType, decode_frames}; - use jxl::api::JxlDecoderOptions; + use jxl::api::{JxlDecoderOptions, JxlMetadataCaptureOptions}; use std::path::PathBuf; fn get_test_file(name: &str) -> PathBuf { @@ -193,4 +194,101 @@ mod tests { .unwrap(); } } + + fn do_decode_with_metadata( + mut input: &[u8], + metadata_capture: JxlMetadataCaptureOptions, + ) -> DecodeOutput { + let mut options = JxlDecoderOptions::default(); + options.metadata_capture = metadata_capture; + decode_frames( + &mut input, + options, + None, + None, + &[OutputDataType::U8], + true, + false, + None, + false, + ) + .unwrap() + .0 + } + + fn get_metadata_test_file(name: &str) -> PathBuf { + get_test_file(&format!("metadata_test_images/{}", name)) + } + + #[test] + fn test_metadata_capture_disabled() { + let path = get_metadata_test_file("single_exif.jxl"); + if !path.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + let file = std::fs::read(&path).unwrap(); + let output = do_decode_with_metadata(&file, JxlMetadataCaptureOptions::no_capture()); + assert!(output.exif_boxes.is_none()); + assert!(output.xmp_boxes.is_none()); + assert!(output.jumbf_boxes.is_none()); + } + + #[test] + fn test_single_exif_capture() { + let path = get_metadata_test_file("single_exif.jxl"); + if !path.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + let file = std::fs::read(&path).unwrap(); + let output = do_decode_with_metadata(&file, JxlMetadataCaptureOptions::capture_all()); + let exif = output.exif_boxes.unwrap(); + assert_eq!(exif.len(), 1); + assert!(!exif[0].data.is_empty()); + } + + #[test] + fn test_multi_exif_capture() { + let path = get_metadata_test_file("multi_exif.jxl"); + if !path.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + let file = std::fs::read(&path).unwrap(); + let output = do_decode_with_metadata(&file, JxlMetadataCaptureOptions::capture_all()); + let exif = output.exif_boxes.unwrap(); + assert_eq!(exif.len(), 2); + assert!(!exif[0].data.is_empty()); + assert!(!exif[1].data.is_empty()); + } + + #[test] + fn test_all_metadata_capture() { + let path = get_metadata_test_file("all_metadata.jxl"); + if !path.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + let file = std::fs::read(&path).unwrap(); + let output = do_decode_with_metadata(&file, JxlMetadataCaptureOptions::capture_all()); + assert!(!output.exif_boxes.unwrap().is_empty()); + assert!(!output.xmp_boxes.unwrap().is_empty()); + assert!(!output.jumbf_boxes.unwrap().is_empty()); + } + + #[test] + fn test_metadata_brotli_compressed() { + let path = get_metadata_test_file("single_exif_brob.jxl"); + if !path.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + let file = std::fs::read(&path).unwrap(); + let output = do_decode_with_metadata(&file, JxlMetadataCaptureOptions::capture_all()); + let exif = output.exif_boxes.unwrap(); + assert_eq!(exif.len(), 1); + assert!(exif[0].is_brotli_compressed); + assert!(!exif[0].data.is_empty()); + } } diff --git a/jxl_cli/src/main.rs b/jxl_cli/src/main.rs index 5c9ac6b32..84d37c00a 100644 --- a/jxl_cli/src/main.rs +++ b/jxl_cli/src/main.rs @@ -5,9 +5,10 @@ use clap::Parser; use color_eyre::eyre::{Result, WrapErr, eyre}; -use jxl::api::JxlDecoderOptions; +use jxl::api::{JxlDecoderOptions, JxlMetadataBox, JxlMetadataCaptureOptions, ProcessingResult}; use jxl_cli::dec::OutputDataType; use jxl_cli::enc::OutputFormat; +use jxl_cli::metadata::{compress_metadata_boxes, print_metadata_info, save_metadata_boxes}; use jxl_cli::{cms::Lcms2Cms, dec}; use std::fs; use std::io::{BufReader, Read, Seek}; @@ -29,7 +30,7 @@ struct Opt { /// Output image file, should end in .ppm, .pgm, .png, .apng or .npy /// (optional with --speedtest or --info) - #[clap(required_unless_present_any = ["speedtest", "info"])] + #[clap(required_unless_present_any = ["speedtest", "info", "exif_out", "xmp_out", "jumbf_out", "metadata_out", "compress_metadata"])] output: Option, /// Print measured decoding speed. @@ -52,6 +53,22 @@ struct Opt { #[clap(long)] original_icc_out: Option, + /// If specified, writes EXIF metadata boxes to files based on this path + #[clap(long)] + exif_out: Option, + + /// If specified, writes XMP metadata boxes to files based on this path + #[clap(long)] + xmp_out: Option, + + /// If specified, writes JUMBF metadata boxes to files based on this path + #[clap(long)] + jumbf_out: Option, + + /// If specified, writes all metadata boxes to the given directory + #[clap(long)] + metadata_out: Option, + /// If specified, takes precedence over the bit depth in the input metadata #[clap(long)] override_bitdepth: Option, @@ -81,6 +98,10 @@ struct Opt { /// Force a partial render every `render_interval` bytes. #[clap(long)] render_interval: Option, + + /// Brotli-compress metadata boxes (Exif, XMP, JUMBF) and write to output path + #[clap(long)] + compress_metadata: Option, } fn save_icc(icc_bytes: &[u8], icc_filename: Option<&PathBuf>) -> Result<()> { @@ -90,6 +111,27 @@ fn save_icc(icc_bytes: &[u8], icc_filename: Option<&PathBuf>) -> Result<()> { }) } +/// Save all metadata boxes according to CLI options. +fn save_all_metadata( + opt: &Opt, + exif: &Option>, + xmp: &Option>, + jumbf: &Option>, +) -> Result<()> { + save_metadata_boxes(opt.exif_out.as_ref(), exif, true)?; + save_metadata_boxes(opt.xmp_out.as_ref(), xmp, false)?; + save_metadata_boxes(opt.jumbf_out.as_ref(), jumbf, false)?; + + if let Some(dir) = &opt.metadata_out { + fs::create_dir_all(dir) + .wrap_err_with(|| format!("Failed to create metadata output directory {:?}", dir))?; + save_metadata_boxes(Some(&dir.join("metadata_exif.exif")), exif, true)?; + save_metadata_boxes(Some(&dir.join("metadata_xmp.xmp")), xmp, false)?; + save_metadata_boxes(Some(&dir.join("metadata_jumbf.bin")), jumbf, false)?; + } + Ok(()) +} + fn main() -> Result<()> { #[cfg(feature = "tracing-subscriber")] { @@ -101,6 +143,12 @@ fn main() -> Result<()> { } let opt = Opt::parse(); + + // Handle --compress-metadata mode (no decoding needed) + if let Some(output_path) = &opt.compress_metadata { + return compress_metadata_boxes(&opt.input, output_path); + } + let mut file = fs::File::open(opt.input.clone()) .wrap_err_with(|| format!("Failed to read source image from {:?}", opt.input))?; @@ -111,16 +159,28 @@ fn main() -> Result<()> { .transpose()?; let high_precision = opt.high_precision; + let wants_metadata = opt.metadata_out.is_some() + || opt.exif_out.is_some() + || opt.xmp_out.is_some() + || opt.jumbf_out.is_some(); + + let metadata_capture = JxlMetadataCaptureOptions { + capture_exif: wants_metadata || opt.exif_out.is_some(), + capture_xmp: wants_metadata || opt.xmp_out.is_some(), + capture_jumbf: wants_metadata || opt.jumbf_out.is_some(), + ..JxlMetadataCaptureOptions::capture_all() + }; let options = |skip_preview: bool| { let mut options = JxlDecoderOptions::default(); options.render_spot_colors = !matches!(output_format, Some(OutputFormat::Npy)); options.skip_preview = skip_preview; options.high_precision = high_precision; options.cms = Some(Box::new(Lcms2Cms)); + options.metadata_capture = metadata_capture.clone(); options }; - // Handle --info flag: print image info and exit + // Handle --info flag: print image info and exit (unless metadata extraction is also requested) if opt.info { let mut reader = BufReader::new(&mut file); let decoder = dec::decode_header(&mut reader, options(true))?; @@ -140,6 +200,55 @@ fn main() -> Result<()> { ); } println!("Extra channels: {}", info.extra_channels.len()); + if !wants_metadata { + return Ok(()); + } + // Seek back to start so the metadata-only path can re-read the file + file.seek(std::io::SeekFrom::Start(0))?; + } + + // Fast path: metadata extraction without pixel decoding + if opt.output.is_none() && !opt.speedtest && wants_metadata { + let mut reader = BufReader::new(file); + let mut decoder = dec::decode_header(&mut reader, options(true))?; + + // Skip through all frames to consume the codestream and discover + // any trailing metadata boxes (e.g. EXIF/XMP after codestream data) + loop { + let frame_decoder = match decoder.process(&mut reader)? { + ProcessingResult::Complete { result } => result, + ProcessingResult::NeedsMoreInput { .. } => { + return Err(eyre!("Source file truncated")); + } + }; + decoder = match frame_decoder.skip_frame(&mut reader)? { + ProcessingResult::Complete { result } => result, + ProcessingResult::NeedsMoreInput { .. } => { + return Err(eyre!("Source file truncated")); + } + }; + if !decoder.has_more_frames() { + break; + } + } + let exif = decoder.exif_boxes().map(|b| b.to_vec()); + let xmp = decoder.xmp_boxes().map(|b| b.to_vec()); + let jumbf = decoder.jumbf_boxes().map(|b| b.to_vec()); + + if opt.info { + if metadata_capture.capture_exif { + print_metadata_info("EXIF", &exif); + } + if metadata_capture.capture_xmp { + print_metadata_info("XMP", &xmp); + } + if metadata_capture.capture_jumbf { + print_metadata_info("JUMBF", &jumbf); + } + } + + save_all_metadata(&opt, &exif, &xmp, &jumbf)?; + return Ok(()); } @@ -240,5 +349,12 @@ fn main() -> Result<()> { save_icc(&output_icc, opt.icc_out.as_ref())?; save_icc(&embedded_icc, opt.original_icc_out.as_ref())?; + save_all_metadata( + &opt, + &output.exif_boxes, + &output.xmp_boxes, + &output.jumbf_boxes, + )?; + Ok(()) } diff --git a/jxl_cli/src/metadata.rs b/jxl_cli/src/metadata.rs new file mode 100644 index 000000000..d1cff1d5a --- /dev/null +++ b/jxl_cli/src/metadata.rs @@ -0,0 +1,410 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +use color_eyre::eyre::{Result, WrapErr, eyre}; +use jxl::api::JxlMetadataBox; +use jxl::container::{BitstreamKind, ContainerBoxType, ContainerParser, ParseEvent}; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; + +/// Decompress a Brotli-compressed metadata box, or return the data as-is if not compressed. +pub fn decompress_metadata(metadata_box: &JxlMetadataBox) -> Result> { + if metadata_box.is_brotli_compressed { + let mut decompressed = Vec::new(); + let mut reader = brotli::Decompressor::new(std::io::Cursor::new(&metadata_box.data), 4096); + std::io::Read::read_to_end(&mut reader, &mut decompressed) + .wrap_err("Failed to decompress Brotli-compressed metadata box")?; + Ok(decompressed) + } else { + Ok(metadata_box.data.clone()) + } +} + +/// Generate the output path for the Nth metadata box (0-indexed). +/// If there's only one box, use the base path as-is. +/// For additional boxes (index >= 1), insert a 1-based number before the extension. +pub fn numbered_path(base: &Path, index: usize, total: usize) -> PathBuf { + if total == 1 { + return base.to_path_buf(); + } + let stem = base.file_stem().unwrap_or_default().to_string_lossy(); + let ext = base.extension(); + let name = if index == 0 { + match ext { + Some(e) => format!("{}.{}", stem, e.to_string_lossy()), + None => stem.into_owned(), + } + } else { + let num = index + 1; // 1-based, so second box is _2 + match ext { + Some(e) => format!("{}_{}.{}", stem, num, e.to_string_lossy()), + None => format!("{}_{}", stem, num), + } + }; + base.with_file_name(name) +} + +/// Strip the 4-byte TIFF header offset prefix from EXIF box data. +/// The JXL Exif box prepends a 4-byte big-endian offset before the TIFF data. +/// In practice this is always zero, but we warn if it's not. +fn strip_exif_tiff_offset(data: &[u8]) -> &[u8] { + if data.len() < 4 { + return data; + } + let offset = u32::from_be_bytes(data[..4].try_into().unwrap()); + if offset != 0 { + eprintln!( + "Warning: EXIF box has non-zero TIFF header offset ({offset}), stripping 4-byte prefix anyway" + ); + } + &data[4..] +} + +/// Save metadata boxes to files at the given base path. +pub fn save_metadata_boxes( + base_path: Option<&PathBuf>, + boxes: &Option>, + is_exif: bool, +) -> Result<()> { + let Some(base) = base_path else { + return Ok(()); + }; + let Some(metadata_boxes) = boxes else { + return Ok(()); + }; + let total = metadata_boxes.len(); + for (i, metadata_box) in metadata_boxes.iter().enumerate() { + let data = decompress_metadata(metadata_box)?; + let data = if is_exif { + strip_exif_tiff_offset(&data).to_vec() + } else { + data + }; + let path = numbered_path(base, i, total); + std::fs::write(&path, &data) + .wrap_err_with(|| format!("Failed to write metadata to {:?}", path))?; + } + Ok(()) +} + +/// Print metadata box info for --info output. +pub fn print_metadata_info(label: &str, boxes: &Option>) { + match boxes { + Some(b) if !b.is_empty() => { + let sizes: Vec = b + .iter() + .map(|m| { + if m.is_brotli_compressed { + format!("{} bytes (brotli)", m.data.len()) + } else { + format!("{} bytes", m.data.len()) + } + }) + .collect(); + println!("{}: {} box(es): {}", label, b.len(), sizes.join(", ")); + } + _ => println!("{}: none", label), + } +} + +/// JXL container signature: the first box is always `JXL ` (12 bytes). +const JXL_CONTAINER_SIGNATURE: &[u8] = b"\x00\x00\x00\x0cJXL \x0d\x0a\x87\x0a"; + +/// Brotli-compress the given data at maximum quality. +fn brotli_compress(data: &[u8]) -> Result> { + let mut compressed = Vec::new(); + let mut compressor = brotli::CompressorWriter::new(&mut compressed, 4096, 11, 22); + compressor.write_all(data)?; + drop(compressor); + Ok(compressed) +} + +/// Write a standard or extended box header. +fn write_box_header( + out: &mut impl Write, + box_type: ContainerBoxType, + content_size: u64, +) -> Result<()> { + let total_size = 8 + content_size; + if total_size > u32::MAX as u64 { + // Extended size header (16 bytes) + out.write_all(&1u32.to_be_bytes())?; + out.write_all(&box_type.0)?; + out.write_all(&(16 + content_size).to_be_bytes())?; + } else { + // Standard header (8 bytes) + out.write_all(&(total_size as u32).to_be_bytes())?; + out.write_all(&box_type.0)?; + } + Ok(()) +} + +/// Brotli-compress metadata boxes in a JXL container file. +/// +/// Reads the input file, parses boxes using `ContainerParser`, compresses `Exif`, `xml `, +/// and `jumb` boxes as `brob` boxes, and writes the result to the output path. +/// Already-compressed `brob` boxes and all other box types are reconstructed as-is. +pub fn compress_metadata_boxes(input_path: &Path, output_path: &Path) -> Result<()> { + let data = std::fs::read(input_path)?; + + let mut parser = ContainerParser::new(); + let mut output = BufWriter::new( + std::fs::File::create(output_path) + .wrap_err_with(|| format!("Failed to create output file {:?}", output_path))?, + ); + let mut codestream_buf: Vec = Vec::new(); + let mut summary: Vec<(String, usize, usize)> = Vec::new(); + // Accumulator for auxiliary boxes that arrive in multiple chunks. + let mut aux_acc: Option<(ContainerBoxType, Vec)> = None; + + for event in parser.process_bytes(&data) { + let event = event.map_err(|e| eyre!("Failed to parse JXL container: {:?}", e))?; + + // When we leave codestream events, flush the accumulated codestream as a jxlc box. + if !matches!(event, ParseEvent::Codestream(_)) && !codestream_buf.is_empty() { + write_box_header( + &mut output, + ContainerBoxType::CODESTREAM, + codestream_buf.len() as u64, + )?; + output.write_all(&codestream_buf)?; + codestream_buf.clear(); + } + + match event { + ParseEvent::BitstreamKind(BitstreamKind::Container) => { + output.write_all(JXL_CONTAINER_SIGNATURE)?; + } + ParseEvent::BitstreamKind(_) => { + return Err(eyre!( + "Not a JXL container file (bare codestream?). Cannot compress metadata." + )); + } + ParseEvent::Codestream(chunk) => { + codestream_buf.extend_from_slice(chunk); + } + ParseEvent::AuxiliaryBox { + box_type, + data: chunk, + is_last, + } => { + // Accumulate partial box data until we have the complete box. + let (bt, full_data) = if let Some((acc_type, mut acc_data)) = aux_acc.take() { + acc_data.extend_from_slice(chunk); + if !is_last { + aux_acc = Some((acc_type, acc_data)); + continue; + } + (acc_type, acc_data) + } else if is_last { + (box_type, chunk.to_vec()) + } else { + aux_acc = Some((box_type, chunk.to_vec())); + continue; + }; + + if bt == ContainerBoxType::EXIF + || bt == ContainerBoxType::XML + || bt == ContainerBoxType::JUMBF + { + let compressed = brotli_compress(&full_data)?; + let brob_content_size = 4 + compressed.len() as u64; + write_box_header( + &mut output, + ContainerBoxType::BROTLI_COMPRESSED, + brob_content_size, + )?; + output.write_all(&bt.0)?; + output.write_all(&compressed)?; + + let type_str = String::from_utf8_lossy(&bt.0); + summary.push((type_str.into_owned(), full_data.len(), compressed.len())); + } else { + write_box_header(&mut output, bt, full_data.len() as u64)?; + output.write_all(&full_data)?; + } + } + } + } + + // Flush any remaining codestream data. + if !codestream_buf.is_empty() { + write_box_header( + &mut output, + ContainerBoxType::CODESTREAM, + codestream_buf.len() as u64, + )?; + output.write_all(&codestream_buf)?; + } + + output.flush()?; + + if !summary.is_empty() { + println!("Compressed metadata boxes:"); + for (type_name, orig, comp) in &summary { + println!(" {}: {} bytes -> {} bytes (brotli)", type_name, orig, comp); + } + } else { + println!("No metadata boxes found to compress."); + } + println!("Output written to: {}", output_path.display()); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_numbered_path_single() { + let base = Path::new("/tmp/metadata.bin"); + assert_eq!( + numbered_path(base, 0, 1), + PathBuf::from("/tmp/metadata.bin") + ); + } + + #[test] + fn test_numbered_path_multi_first() { + let base = Path::new("/tmp/metadata.bin"); + assert_eq!( + numbered_path(base, 0, 3), + PathBuf::from("/tmp/metadata.bin") + ); + } + + #[test] + fn test_numbered_path_multi_second() { + let base = Path::new("/tmp/metadata.bin"); + assert_eq!( + numbered_path(base, 1, 3), + PathBuf::from("/tmp/metadata_2.bin") + ); + } + + #[test] + fn test_numbered_path_multi_third() { + let base = Path::new("/tmp/metadata.bin"); + assert_eq!( + numbered_path(base, 2, 3), + PathBuf::from("/tmp/metadata_3.bin") + ); + } + + #[test] + fn test_numbered_path_no_extension() { + let base = Path::new("/tmp/metadata"); + assert_eq!(numbered_path(base, 0, 1), PathBuf::from("/tmp/metadata")); + assert_eq!(numbered_path(base, 1, 2), PathBuf::from("/tmp/metadata_2")); + } + + fn get_metadata_test_file(name: &str) -> PathBuf { + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + root.parent() + .unwrap() + .join("jxl/resources/test/metadata_test_images") + .join(name) + } + + #[test] + fn test_compress_metadata_boxes() { + let input = get_metadata_test_file("single_exif.jxl"); + if !input.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + + let dir = std::env::temp_dir().join("jxl_test_compress_metadata"); + std::fs::create_dir_all(&dir).unwrap(); + let output = dir.join("compressed.jxl"); + + compress_metadata_boxes(&input, &output).unwrap(); + + // Output should exist and be smaller (compressed metadata) + let input_size = std::fs::metadata(&input).unwrap().len(); + let output_size = std::fs::metadata(&output).unwrap().len(); + assert!(output_size < input_size); + + // Output should start with JXL container signature + let output_data = std::fs::read(&output).unwrap(); + assert!(output_data.starts_with(JXL_CONTAINER_SIGNATURE)); + + // Output should contain brob boxes, not Exif boxes + assert!(!contains_box_type(&output_data, b"Exif")); + assert!(contains_box_type(&output_data, b"brob")); + + std::fs::remove_dir_all(&dir).ok(); + } + + #[test] + fn test_compress_already_compressed_is_noop() { + let input = get_metadata_test_file("single_exif_brob.jxl"); + if !input.exists() { + eprintln!("Skipping (metadata test images not found)"); + return; + } + + let dir = std::env::temp_dir().join("jxl_test_compress_noop"); + std::fs::create_dir_all(&dir).unwrap(); + let output = dir.join("compressed.jxl"); + + compress_metadata_boxes(&input, &output).unwrap(); + + // Output should be identical to input (no uncompressed metadata to compress) + let input_data = std::fs::read(&input).unwrap(); + let output_data = std::fs::read(&output).unwrap(); + assert_eq!(input_data, output_data); + + std::fs::remove_dir_all(&dir).ok(); + } + + #[test] + fn test_compress_bare_codestream_errors() { + let input = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("jxl/resources/test/3x3_srgb_lossless.jxl"); + if !input.exists() { + eprintln!("Skipping (test images not found)"); + return; + } + + let dir = std::env::temp_dir().join("jxl_test_compress_bare"); + std::fs::create_dir_all(&dir).unwrap(); + let output = dir.join("output.jxl"); + + let result = compress_metadata_boxes(&input, &output); + assert!(result.is_err()); + + std::fs::remove_dir_all(&dir).ok(); + } + + /// Check if raw file data contains a box with the given 4-byte type. + fn contains_box_type(data: &[u8], box_type: &[u8; 4]) -> bool { + let mut pos = 0; + while pos + 8 <= data.len() { + let size_field = u32::from_be_bytes(data[pos..pos + 4].try_into().unwrap()); + let ty: [u8; 4] = data[pos + 4..pos + 8].try_into().unwrap(); + if &ty == box_type { + return true; + } + let content_start; + let content_size: u64; + if size_field == 1 && pos + 16 <= data.len() { + let total = u64::from_be_bytes(data[pos + 8..pos + 16].try_into().unwrap()); + content_start = pos + 16; + content_size = total - 16; + } else if size_field == 0 { + break; + } else { + content_start = pos + 8; + content_size = (size_field as u64) - 8; + } + pos = content_start + content_size as usize; + } + false + } +} diff --git a/tools/inspect_metadata.sh b/tools/inspect_metadata.sh new file mode 100755 index 000000000..4a2e341d6 --- /dev/null +++ b/tools/inspect_metadata.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Inspects metadata in a JXL file using jxl_cli and exiftool. + +set -e + +INPUT="$1" +OUTPUT_DIR="$2" + +if [ -z "$INPUT" ]; then + echo "Usage: $0 [output_dir]" + echo "" + echo "Extracts and inspects metadata from a JXL file." + echo "If output_dir is not specified, a temporary directory is used." + exit 1 +fi + +if [ ! -f "$INPUT" ]; then + echo "Error: Input file not found at $INPUT" + exit 1 +fi + +echo "Building jxl_cli..." +cargo build --bin jxl_cli --release + +BINARY="./target/release/jxl_cli" + +if [ ! -f "$BINARY" ]; then + echo "Error: Binary not found at $BINARY after build." + exit 1 +fi + +CLEANUP_DIR="" +if [ -z "$OUTPUT_DIR" ]; then + OUTPUT_DIR=$(mktemp -d) + CLEANUP_DIR="$OUTPUT_DIR" +fi + +mkdir -p "$OUTPUT_DIR" + +echo "" +echo "=== jxl_cli --info ===" +"$BINARY" "$INPUT" --info --metadata-out "$OUTPUT_DIR" + +echo "" +echo "=== exiftool (JXL file) ===" +exiftool "$INPUT" + +if [ -f "$OUTPUT_DIR/metadata_exif.exif" ]; then + echo "" + echo "=== exiftool (extracted EXIF) ===" + exiftool "$OUTPUT_DIR/metadata_exif.exif" +fi + +if [ -f "$OUTPUT_DIR/metadata_xmp.xmp" ]; then + echo "" + echo "=== exiftool (extracted XMP) ===" + exiftool "$OUTPUT_DIR/metadata_xmp.xmp" +fi + +if [ -n "$CLEANUP_DIR" ]; then + rm -rf "$CLEANUP_DIR" +fi