Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions jxl/src/color/tf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,7 @@ pub fn scene_to_hlg_precise(samples: &mut [f32]) {
let y = if a <= 1.0 / 12.0 {
(3.0 * a).sqrt()
} else {
// TODO(tirr-c): maybe use mul_add?
HLG_A * (12.0 * a - HLG_B).ln() + HLG_C
HLG_A.mul_add((12.0 * a - HLG_B).ln(), HLG_C)
};
*s = (y as f32).copysign(*s);
}
Expand Down Expand Up @@ -484,10 +483,9 @@ pub fn scene_to_hlg(samples: &mut [f32]) {
let y = if a <= 1.0 / 12.0 {
(3.0 * a).sqrt()
} else {
// TODO(tirr-c): maybe use mul_add?
let log = crate::util::fast_log2f(12.0 * a - HLG_B as f32);
// log2 x = ln x / ln 2, therefore ln x = (ln 2)(log2 x)
(HLG_A * std::f64::consts::LN_2) as f32 * log + HLG_C as f32
((HLG_A * std::f64::consts::LN_2) as f32).mul_add(log, HLG_C as f32)
};
*s = y.copysign(*s);
}
Expand All @@ -512,8 +510,7 @@ pub fn hlg_to_scene(samples: &mut [f32]) {
// Constant: 0.003_639_807_079_052_639
const MUL: f32 = 0.003_639_807;

// TODO(OneDeuxTriSeiGo): maybe use mul_add?
crate::util::fast_pow2f(a * POW) * MUL + ADD
crate::util::fast_pow2f(a * POW).mul_add(MUL, ADD)
};
*s = y.copysign(*s);
}
Expand Down
7 changes: 7 additions & 0 deletions jxl/src/entropy_coding/ans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct AnsHistogram {
bucket_mask: u32,
// For optimizing fast-lossless case.
single_symbol: Option<u32>,
alphabet_size: usize,
}

// log_alphabet_size <= 8 and log_bucket_size <= 7, so u8 is sufficient for symbols and cutoffs.
Expand Down Expand Up @@ -310,6 +311,7 @@ impl AnsHistogram {
log_bucket_size,
bucket_mask,
single_symbol,
alphabet_size,
})
}

Expand Down Expand Up @@ -415,6 +417,11 @@ impl AnsCodes {
pub fn single_symbol(&self, ctx: usize) -> Option<u32> {
self.histograms[ctx].single_symbol()
}

/// Returns the largest symbol that can appear in this cluster's histogram.
pub fn max_symbol_for_cluster(&self, cluster: usize) -> u32 {
self.histograms[cluster].alphabet_size.saturating_sub(1) as u32
}
}

#[derive(Debug)]
Expand Down
17 changes: 17 additions & 0 deletions jxl/src/entropy_coding/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,23 @@ impl Histograms {
self.codes.single_symbol(lz_dist_cluster) == Some(1) && lz_conf.is_split_exponent_zero()
}

/// Returns the maximum number of bits any decoded value can require across all clusters.
///
/// Returns `usize::MAX` conservatively for Huffman-coded streams. Mirrors libjxl's
/// `max_num_bits` field in `AnsCodes` / `dec_ans.cc`.
pub fn max_num_bits(&self) -> usize {
match &self.codes {
Codes::Ans(ans) => (0..self.uint_configs.len())
.map(|i| {
let max_sym = ans.max_symbol_for_cluster(i);
self.uint_configs[i].max_bits_for_symbol(max_sym)
})
.max()
.unwrap_or(0),
Codes::Huffman(_) => usize::MAX,
}
}

pub(crate) fn lz77_params(&self) -> Lz77Params {
self.lz77_params
}
Expand Down
15 changes: 15 additions & 0 deletions jxl/src/entropy_coding/hybrid_uint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ impl HybridUint {
})
}

/// Returns the maximum number of output bits for the given maximum input symbol.
///
/// Mirrors libjxl's `UpdateMaxNumBits` from `dec_ans.cc`.
pub fn max_bits_for_symbol(&self, max_symbol: u32) -> usize {
if max_symbol < self.split_token {
self.split_exponent as usize
} else {
let bits_in_token = self.lsb_in_token + self.msb_in_token;
// split_exponent >= bits_in_token is guaranteed by decode() validation
let n_extra = self.split_exponent - bits_in_token
+ ((max_symbol - self.split_token) >> bits_in_token);
(bits_in_token + n_extra + 1) as usize
}
}

/// Returns true if this config matches the 420 pattern (common in e3 images):
/// split_exponent=4, msb_in_token=2, lsb_in_token=0
#[inline(always)]
Expand Down
44 changes: 34 additions & 10 deletions jxl/src/frame/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ use crate::{
error::Result,
features::{noise::Noise, patches::PatchesDictionary, spline::Splines},
frame::{
DecoderState, Frame, HfGlobalState, HfMetadata, LfGlobalState, PassState, coeff_order,
DecoderState, Frame, HfCoefficients, HfGlobalState, HfMetadata, LfGlobalState, PassState,
coeff_order,
},
headers::{
color_encoding::ColorSpace,
Expand Down Expand Up @@ -508,24 +509,47 @@ impl Frame {
// keep around the coefficients, so allocate coefficients under those conditions
// too.
// TODO(veluca): evaluate whether we can make this check more precise.
let hf_coefficients = if passes.len() <= 1
&& !(self
let need_hf_coefficients = passes.len() > 1
|| (self
.lf_global
.as_mut()
.unwrap()
.modular_global
.can_do_partial_render()
&& self.header.num_extra_channels > 0)
{
&& self.header.num_extra_channels > 0);

// Use i16 storage when the histogram guarantees all values fit: mirrors libjxl's
// dec_frame.cc `use_16_bit` check (max_num_bits + CeilLog2Nonzero(num_passes) < 16).
let max_bits = passes
.iter()
.map(|p| p.histograms.max_num_bits())
.max()
.unwrap_or(0);
let pass_log = if passes.len() <= 1 {
0
} else {
(passes.len() - 1).ilog2() as usize + 1
};
let use_i16 = max_bits.saturating_add(pass_log) < 16;

let hf_coefficients = if !need_hf_coefficients {
None
} else {
let xs = GROUP_DIM * GROUP_DIM;
let ys = self.header.num_groups();
Some((
Image::new((xs, ys))?,
Image::new((xs, ys))?,
Image::new((xs, ys))?,
))
if use_i16 {
Some(HfCoefficients::I16(
Image::new((xs, ys))?,
Image::new((xs, ys))?,
Image::new((xs, ys))?,
))
} else {
Some(HfCoefficients::I32(
Image::new((xs, ys))?,
Image::new((xs, ys))?,
Image::new((xs, ys))?,
))
}
};

self.hf_global = Some(HfGlobalState {
Expand Down
Loading
Loading