From 0d19aac93c31faa824228e352e20cd70f0594dc9 Mon Sep 17 00:00:00 2001 From: Helmut Januschka Date: Mon, 8 Jun 2026 10:44:43 +0200 Subject: [PATCH 1/4] Avoid storing unused partial renders Only retain progressive render snapshots in the CLI when they can be written to an output. --- jxl_cli/benches/decode.rs | 1 + jxl_cli/src/dec/mod.rs | 5 +++-- jxl_cli/src/lib.rs | 2 ++ jxl_cli/src/main.rs | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/jxl_cli/benches/decode.rs b/jxl_cli/benches/decode.rs index a066c146e..f15a4bccd 100644 --- a/jxl_cli/benches/decode.rs +++ b/jxl_cli/benches/decode.rs @@ -68,6 +68,7 @@ fn decode_benches(c: &mut Criterion) { false, None, false, + false, ) .unwrap(); }) diff --git a/jxl_cli/src/dec/mod.rs b/jxl_cli/src/dec/mod.rs index d8e5e737f..b1d6b4f09 100644 --- a/jxl_cli/src/dec/mod.rs +++ b/jxl_cli/src/dec/mod.rs @@ -139,6 +139,7 @@ pub fn decode_frames( linear_output: bool, render_interval: Option, allow_partial_files: bool, + store_partial_renders: bool, ) -> Result<(DecodeOutput, Duration)> { let start = Instant::now(); @@ -282,7 +283,7 @@ pub fn decode_frames( // render and retry. if render_interval.is_some() && input.available_bytes()? > 0 { has_rendered_data |= fallback.flush_pixels(&mut output_bufs)?; - if has_rendered_data { + if has_rendered_data && store_partial_renders { partial_renders.push( outputs .iter() @@ -332,7 +333,7 @@ pub fn decode_frames( // render and retry. if render_interval.is_some() && input.available_bytes()? > 0 { has_rendered_data |= fallback.flush_pixels(&mut output_bufs)?; - if has_rendered_data { + if has_rendered_data && store_partial_renders { partial_renders.push( outputs .iter() diff --git a/jxl_cli/src/lib.rs b/jxl_cli/src/lib.rs index 4f4b627ba..bf71087e0 100644 --- a/jxl_cli/src/lib.rs +++ b/jxl_cli/src/lib.rs @@ -77,6 +77,7 @@ mod tests { false, None, false, + false, ) .unwrap() .0 @@ -188,6 +189,7 @@ mod tests { false, None, false, + false, ) .unwrap(); } diff --git a/jxl_cli/src/main.rs b/jxl_cli/src/main.rs index d8abf491c..64faf4e94 100644 --- a/jxl_cli/src/main.rs +++ b/jxl_cli/src/main.rs @@ -164,6 +164,7 @@ fn main() -> Result<()> { let linear_output = matches!(output_format, Some(OutputFormat::Exr)); #[cfg(not(feature = "exr"))] let linear_output = false; + let store_partial_renders = output_format.is_some() && opt.render_interval.is_some(); let (mut output, duration) = dec::decode_frames( $input, options(skip_preview), @@ -176,6 +177,7 @@ fn main() -> Result<()> { linear_output, opt.render_interval, opt.allow_partial_files, + store_partial_renders, )?; if opt.preview { output.frames.truncate(1); From ea3af6eab7d52b61240adcf361e9786cab16fdc6 Mon Sep 17 00:00:00 2001 From: Helmut Januschka Date: Mon, 8 Jun 2026 11:07:51 +0200 Subject: [PATCH 2/4] Free squeeze neighbor buffers after final render Inverse squeeze steps read neighbor grids (next average and previous decoded) that the transform graph counts as buffer uses, but the per-step code never released them, so those intermediate modular buffers stayed allocated for the whole frame. Mark them used on the final render so they are freed once consumed. --- jxl/src/frame/modular/transforms/apply.rs | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/jxl/src/frame/modular/transforms/apply.rs b/jxl/src/frame/modular/transforms/apply.rs index 33fe5bd2a..101fc92df 100644 --- a/jxl/src/frame/modular/transforms/apply.rs +++ b/jxl/src/frame/modular/transforms/apply.rs @@ -382,6 +382,20 @@ impl TransformStepChunk { } buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final); buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final); + // Release the weak neighbor grids read above (next average and previous + // decoded), which are counted as uses in the transform graph. + let (gx, gy) = self.grid_pos; + if gx + 1 < buffers[*buf_out].grid_shape.0 { + let next_avg_grid = + buffers[buf_in[0]].get_grid_idx(out_grid_kind, (gx + 1, gy)); + if next_avg_grid != in_grid { + buffers[buf_in[0]].buffer_grid[next_avg_grid].mark_used(is_final); + } + } + if gx > 0 { + let prev_out_grid = buffers[*buf_out].get_grid_idx(out_grid_kind, (gx - 1, gy)); + buffers[*buf_out].buffer_grid[prev_out_grid].mark_used(is_final); + } } TransformStep::VSqueeze { buf_in, @@ -491,6 +505,20 @@ impl TransformStepChunk { } buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final); buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final); + // Release the weak neighbor grids read above (next average and previous + // decoded), which are counted as uses in the transform graph. + let (gx, gy) = self.grid_pos; + if gy + 1 < buffers[*buf_out].grid_shape.1 { + let next_avg_grid = + buffers[buf_in[0]].get_grid_idx(out_grid_kind, (gx, gy + 1)); + if next_avg_grid != in_grid { + buffers[buf_in[0]].buffer_grid[next_avg_grid].mark_used(is_final); + } + } + if gy > 0 { + let prev_out_grid = buffers[*buf_out].get_grid_idx(out_grid_kind, (gx, gy - 1)); + buffers[*buf_out].buffer_grid[prev_out_grid].mark_used(is_final); + } } }; From 6489571551e49f1a56d6760c2e8f861c55b92e80 Mon Sep 17 00:00:00 2001 From: Helmut Januschka Date: Mon, 8 Jun 2026 11:33:05 +0200 Subject: [PATCH 3/4] Bound render pipeline scratch buffer pool Modular frames never reclaim center group buffers via get_buffer, so the scratch pool grew to a full-frame copy that was retained for the pipeline's lifetime. Cap it to the few buffers sequential rendering can actually reuse. --- jxl/src/render/low_memory_pipeline/group_scheduler.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/jxl/src/render/low_memory_pipeline/group_scheduler.rs b/jxl/src/render/low_memory_pipeline/group_scheduler.rs index abc810ef8..e3213282b 100644 --- a/jxl/src/render/low_memory_pipeline/group_scheduler.rs +++ b/jxl/src/render/low_memory_pipeline/group_scheduler.rs @@ -113,7 +113,15 @@ impl LowMemoryRenderPipeline { } fn store_scratch_buffer(&mut self, channel: usize, kind: usize, image: OwnedRawImage) { - self.scratch_channel_buffers[channel * 3 + kind].push(image) + // The scratch pool only exists to recycle buffers for upcoming groups. Sequential + // rendering never needs more than a couple of buffers per (channel, kind) in flight, so + // bound the pool; otherwise pure-modular frames (which never reclaim center buffers via + // `get_buffer`) would retain a full-frame copy for the pipeline's lifetime. + const MAX_SCRATCH_BUFFERS: usize = 4; + let pool = &mut self.scratch_channel_buffers[channel * 3 + kind]; + if pool.len() < MAX_SCRATCH_BUFFERS { + pool.push(image); + } } pub(super) fn render_with_new_group( From 4446cea80e5d325f2c1bd8842207770fe3583126 Mon Sep 17 00:00:00 2001 From: Helmut Januschka Date: Mon, 8 Jun 2026 14:13:59 +0200 Subject: [PATCH 4/4] Revert "Bound render pipeline scratch buffer pool" This reverts commit 6489571551e49f1a56d6760c2e8f861c55b92e80. --- jxl/src/render/low_memory_pipeline/group_scheduler.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/jxl/src/render/low_memory_pipeline/group_scheduler.rs b/jxl/src/render/low_memory_pipeline/group_scheduler.rs index e3213282b..abc810ef8 100644 --- a/jxl/src/render/low_memory_pipeline/group_scheduler.rs +++ b/jxl/src/render/low_memory_pipeline/group_scheduler.rs @@ -113,15 +113,7 @@ impl LowMemoryRenderPipeline { } fn store_scratch_buffer(&mut self, channel: usize, kind: usize, image: OwnedRawImage) { - // The scratch pool only exists to recycle buffers for upcoming groups. Sequential - // rendering never needs more than a couple of buffers per (channel, kind) in flight, so - // bound the pool; otherwise pure-modular frames (which never reclaim center buffers via - // `get_buffer`) would retain a full-frame copy for the pipeline's lifetime. - const MAX_SCRATCH_BUFFERS: usize = 4; - let pool = &mut self.scratch_channel_buffers[channel * 3 + kind]; - if pool.len() < MAX_SCRATCH_BUFFERS { - pool.push(image); - } + self.scratch_channel_buffers[channel * 3 + kind].push(image) } pub(super) fn render_with_new_group(