Skip to content
Closed
75 changes: 59 additions & 16 deletions crates/bevy_pbr/src/render/gpu_preprocess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use bevy_core_pipeline::{
DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass, PreviousViewData,
PreviousViewUniformOffset, PreviousViewUniforms,
},
schedule::{Core3d, Core3dSystems},
schedule::{Core3d, Core3dSystems, RootNonCameraView},
};
use bevy_derive::{Deref, DerefMut};
use bevy_ecs::{
Expand All @@ -45,6 +45,7 @@ use bevy_render::{
PreprocessWorkItemBuffers, UntypedPhaseBatchedInstanceBuffers,
UntypedPhaseIndirectParametersBuffers,
},
camera::ExtractedCamera,
diagnostic::RecordDiagnostics as _,
occlusion_culling::OcclusionCulling,
render_phase::GpuRenderBinnedMeshInstance,
Expand All @@ -69,6 +70,7 @@ use bevy_render::{
use bevy_shader::Shader;
use bevy_utils::{default, TypeIdMap};
use bitflags::bitflags;
use bytemuck::{Pod, Zeroable};
use smallvec::{smallvec, SmallVec};
use tracing::warn;

Expand Down Expand Up @@ -291,7 +293,7 @@ pub enum PhasePreprocessBindGroups {
},

/// The bind groups used for the compute shader when indirect drawing is
/// being used, but occlusion culling isn't being used.
/// being used and occlusion culling is being used.
///
/// Because indirect drawing requires splitting the meshes into indexed and
/// non-indexed meshes, and because occlusion culling requires splitting
Expand All @@ -313,6 +315,13 @@ pub enum PhasePreprocessBindGroups {
},
}

#[derive(Clone, Copy, Pod, Zeroable)]
#[repr(C)]
struct PreprocessImmediates {
cur_view_world_position: [f32; 3],
late_preprocess_work_item_indirect_offset: u32,
}

/// The bind groups for the compute shaders that reset indirect draw counts and
/// build indirect parameters.
///
Expand Down Expand Up @@ -600,7 +609,15 @@ pub fn unpack_bins(
}

pub fn early_gpu_preprocess(
current_view: ViewQuery<Option<&ViewLightEntities>, Without<SkipGpuPreprocess>>,
current_view: ViewQuery<
(
Option<&ViewLightEntities>,
&ExtractedView,
Has<RootNonCameraView>,
),
Without<SkipGpuPreprocess>,
>,
camera_views: Query<&ExtractedView, (With<ExtractedCamera>, Without<SkipGpuPreprocess>)>,
view_query: Query<
(
&ExtractedView,
Expand Down Expand Up @@ -630,7 +647,7 @@ pub fn early_gpu_preprocess(
let pass_span = diagnostics.pass_span(&mut compute_pass, "early_mesh_preprocessing");

let view_entity = current_view.entity();
let shadow_cascade_views = current_view.into_inner();
let (shadow_cascade_views, extracted_view, has_non_root_view) = current_view.into_inner();
let all_views =
gather_shadow_cascades_for_view(view_entity, shadow_cascade_views, &light_query);

Expand All @@ -641,6 +658,23 @@ pub fn early_gpu_preprocess(
else {
continue;
};
// Set the camera position that will be used for visibility range culling.
let cur_view_world_position = if !has_non_root_view {
extracted_view.world_from_view.translation().to_array()
Comment thread
kfc35 marked this conversation as resolved.
} else {
// TODO: We need to better handle this case.
// As written, point and spot lights shadows will just use the first user camera
// that is returned by the query.
// If there is only one user camera, this is fine, but for multiple user cameras,
// only one of them is randomly used for visibility range culling.
let camera_view: Option<&ExtractedView> = camera_views.iter().next();
if let Some(camera_view) = camera_view {
camera_view.world_from_view.translation().to_array()
} else {
// No camera views to render to.
continue;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this continue skip the entire preprocess dispatch for the view?

@kfc35 kfc35 May 13, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the PointLight/SpotLight shadow view, yes

But I would consider that a user configuration error at this point, so the shadows just wouldn’t be rendered.

(However we do not need to continue this conversation because I will close this PR! It seems the consensus is converging on reverting the offending PR)

}
};

let Some(bind_groups) = bind_groups else {
continue;
Expand Down Expand Up @@ -746,10 +780,15 @@ pub fn early_gpu_preprocess(
..
} = *work_item_buffers
{
compute_pass.set_immediates(
0,
bytemuck::bytes_of(&late_indirect_parameters_indexed_offset),
);
let immediates = PreprocessImmediates {
cur_view_world_position,
late_preprocess_work_item_indirect_offset:
late_indirect_parameters_indexed_offset,
};
compute_pass.set_immediates(0, bytemuck::bytes_of(&immediates));
} else {
compute_pass
.set_immediates(0, bytemuck::bytes_of(&cur_view_world_position));
}

compute_pass.set_bind_group(0, indexed_bind_group, &dynamic_offsets);
Expand All @@ -770,10 +809,15 @@ pub fn early_gpu_preprocess(
..
} = *work_item_buffers
{
compute_pass.set_immediates(
0,
bytemuck::bytes_of(&late_indirect_parameters_non_indexed_offset),
);
let immediates = PreprocessImmediates {
cur_view_world_position,
late_preprocess_work_item_indirect_offset:
late_indirect_parameters_non_indexed_offset,
};
compute_pass.set_immediates(0, bytemuck::bytes_of(&immediates));
} else {
compute_pass
.set_immediates(0, bytemuck::bytes_of(&cur_view_world_position));
}

compute_pass.set_bind_group(0, non_indexed_bind_group, &dynamic_offsets);
Expand Down Expand Up @@ -1144,9 +1188,6 @@ impl PreprocessPipelines {
GpuPreprocessingMode::None => false,
GpuPreprocessingMode::PreprocessingOnly => {
self.direct_preprocess.is_loaded(pipeline_cache)
&& self
.gpu_frustum_culling_preprocess
.is_loaded(pipeline_cache)
Comment thread
kfc35 marked this conversation as resolved.
}
GpuPreprocessingMode::Culling => {
self.direct_preprocess.is_loaded(pipeline_cache)
Expand Down Expand Up @@ -1247,7 +1288,9 @@ impl SpecializedComputePipeline for PreprocessPipeline {
),
layout: vec![self.bind_group_layout.clone()],
immediate_size: if key.contains(PreprocessPipelineKey::OCCLUSION_CULLING) {
4
16
} else if key.contains(PreprocessPipelineKey::FRUSTUM_CULLING) {
12
} else {
0
},
Expand Down
18 changes: 15 additions & 3 deletions crates/bevy_pbr/src/render/mesh_preprocess.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,24 @@ struct LatePreprocessWorkItemIndirectParameters {
pad: vec4<u32>,
}

#ifdef FRUSTUM_CULLING
// These have to be in a structure because of Naga limitations on DX12.
struct Immediates {
// The world position of the `CurrentView`
cur_view_world_position: vec3<f32>,
#ifdef OCCLUSION_CULLING
// The offset into the `late_preprocess_work_item_indirect_parameters`
// buffer.
late_preprocess_work_item_indirect_offset: u32,
#endif // OCCLUSION_CULLING
}
#else // FRUSTUM_CULLING
struct Immediates {
// The offset into the `late_preprocess_work_item_indirect_parameters`
// buffer.
late_preprocess_work_item_indirect_offset: u32,
}
#endif // FRUSTUM_CULLING

// The current frame's `MeshInput`.
@group(0) @binding(3) var<storage> current_input: array<MeshInput>;
Expand Down Expand Up @@ -98,6 +110,8 @@ struct Immediates {
@group(0) @binding(9) var<storage> mesh_culling_data: array<MeshCullingData>;

@group(0) @binding(10) var<storage> visibility_ranges: array<vec4<f32>>;

var<immediate> immediates: Immediates;
#endif // FRUSTUM_CULLING

#ifdef OCCLUSION_CULLING
Expand All @@ -115,8 +129,6 @@ struct Immediates {
@group(0) @binding(13) var<storage, read> late_preprocess_work_item_indirect_parameters:
array<LatePreprocessWorkItemIndirectParameters>;
#endif // LATE_PHASE

var<immediate> immediates: Immediates;
#endif // OCCLUSION_CULLING

#ifdef FRUSTUM_CULLING
Expand Down Expand Up @@ -224,7 +236,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
world_pos = world_from_local[3].xyz;
}

let camera_distance = length(position_world_to_view(world_pos));
let camera_distance = length(immediates.cur_view_world_position - world_pos);
// `x` is the minimum range; `w` is the largest range.
if (camera_distance < lod_range.x || camera_distance >= lod_range.w) {
return;
Expand Down
1 change: 1 addition & 0 deletions crates/bevy_render/src/batching/gpu_preprocessing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,7 @@ impl FromWorld for GpuPreprocessingSupport {
crate::get_pixel10_driver_version(adapter_info).is_some()
}

// Includes occlusion culling and frustum culling
let culling_feature_support = device
.features()
.contains(Features::INDIRECT_FIRST_INSTANCE | Features::IMMEDIATES);
Expand Down
Loading