Skip to content

Commit 212c2c2

Browse files
committed
added support for distance-on-stack culling in shrinknig ray queries
1 parent af214cf commit 212c2c2

2 files changed

Lines changed: 99 additions & 37 deletions

File tree

CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA
2+
# CORPORATION & AFFILIATES. All rights reserved.
23
# SPDX-License-Identifier: Apache-2.0
34

45
cmake_minimum_required(VERSION 3.16)
56

7+
set(CUBQL_VERSION_MAJOR 1)
8+
set(CUBQL_VERSION_MINOR 3)
9+
set(CUBQL_VERSION_PATCH 0)
10+
set(CUBQL_VERSION ${CUBQL_VERSION_MAJOR}.${CUBQL_VERSION_MINOR}.${CUBQL_VERSION_PATCH})
11+
612
cmake_policy(SET CMP0048 NEW)
713
set(CMAKE_BUILD_TYPE_INIT "Release")
8-
project(cuBQL VERSION 1.2.0 LANGUAGES C CXX)
14+
project(cuBQL VERSION ${CUBQL_VERSION} LANGUAGES C CXX)
915

1016
if (CUBQL_OMP)
1117
set(CUBQL_DISABLE_CUDA ON)

cuBQL/traversal/rayQueries.h

Lines changed: 91 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include "cuBQL/math/Ray.h"
99
#include "cuBQL/traversal/fixedBoxQuery.h"
1010

11+
#define CUBQL_DIST_STACK 1
12+
1113
namespace cuBQL {
1214

1315
// ******************************************************************
@@ -263,15 +265,17 @@ namespace cuBQL {
263265
template<typename Lambda, typename T, int D>
264266
inline __cubql_both
265267
void fixedRayQuery::forEachLeaf(const Lambda &lambdaToCallOnEachLeaf,
266-
cuBQL::bvh_t<T, D> bvh,
267-
cuBQL::ray3f ray,
268+
cuBQL::bvh_t<T,D> bvh,
269+
cuBQL::ray_t<T> ray,
268270
bool dbg)
269271
{
272+
using node_admin_t = typename bvh_t<T,D>::node_t::Admin;
273+
enum { stackDepth = 64 };
270274
struct StackEntry {
271275
uint32_t idx;
272276
};
273-
bvh3f::node_t::Admin traversalStack[64], *stackPtr = traversalStack;
274-
bvh3f::node_t::Admin node = bvh.nodes[0].admin;
277+
node_admin_t traversalStack[stackDepth], *stackPtr = traversalStack;
278+
node_admin_t node = bvh.nodes[0].admin;
275279
// ------------------------------------------------------------------
276280
// traverse until there's nothing left to traverse:
277281
// ------------------------------------------------------------------
@@ -472,12 +476,18 @@ namespace cuBQL {
472476
ray_t ray,
473477
bool dbg)
474478
{
479+
using scalar_t = T;
475480
using node_t = typename bvh_t<T, D>::node_t;
481+
using node_admin_t = typename node_t::Admin;
476482
struct StackEntry {
477483
uint32_t idx;
478484
};
479-
typename node_t::Admin traversalStack[64], *stackPtr = traversalStack;
480-
typename node_t::Admin node = bvh.nodes[0].admin;
485+
enum { stackDepth = 64 };
486+
node_admin_t traversalStack[stackDepth], *stackPtr = traversalStack;
487+
node_admin_t node = bvh.nodes[0].admin;
488+
#if CUBQL_DIST_STACK
489+
T distStack[stackDepth], *distStackPtr = distStack;
490+
#endif
481491

482492
if (ray.direction.x == (T)0) ray.direction.x = T(1e-20);
483493
if (ray.direction.y == (T)0) ray.direction.y = T(1e-20);
@@ -510,7 +520,12 @@ namespace cuBQL {
510520

511521
if (o0) {
512522
if (o1) {
513-
*stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin;
523+
*stackPtr++
524+
= (node_t0 < node_t1) ? n1.admin : n0.admin;
525+
#if CUBQL_DIST_STACK
526+
*distStackPtr++
527+
= (node_t0 < node_t1) ? node_t1 : node_t0;
528+
#endif
514529
node = (node_t0 < node_t1) ? n0.admin : n1.admin;
515530
} else {
516531
node = n0.admin;
@@ -536,9 +551,20 @@ namespace cuBQL {
536551
// pop next un-traversed node from stack, discarding any nodes
537552
// that are more distant than whatever query radius we now have
538553
// ------------------------------------------------------------------
539-
if (stackPtr == traversalStack)
540-
return ray.tMax;
541-
node = *--stackPtr;
554+
555+
while (true) {
556+
if (stackPtr == traversalStack)
557+
return ray.tMax;
558+
#if CUBQL_DIST_STACK
559+
scalar_t tFromStack = *--distStackPtr;
560+
if (tFromStack >= ray.tMax) {
561+
--stackPtr;
562+
continue;
563+
}
564+
#endif
565+
node = *--stackPtr;
566+
break;
567+
}
542568
}
543569
}
544570

@@ -668,28 +694,38 @@ namespace cuBQL {
668694
bvh_t bvh,
669695
/*! REFERENCE to a ray, so 'enterBlas()' can modify it */
670696
ray_t &ray,
671-
bool dbg)
697+
bool _dbg)
672698
{
699+
#ifdef NDEBUG
700+
const bool dbg = false;
701+
#else
702+
bool dbg = _dbg;
703+
#endif
673704
using node_t = typename bvh_t::node_t;
674705
using T = typename bvh_t::scalar_t;
706+
using scalar_t = typename bvh_t::scalar_t;
707+
using vec3_t = typename cuBQL::vec_t<scalar_t,3>;
675708
struct StackEntry {
676709
uint32_t idx;
677710
};
678-
enum { STACK_DEPTH=128 };
711+
enum { STACK_DEPTH=64 };
679712
typename node_t::Admin
680713
traversalStack[STACK_DEPTH],
681714
*stackPtr = traversalStack,
682715
*blasStackBase = nullptr;
683716
typename node_t::Admin node = bvh.nodes[0].admin;
717+
#if CUBQL_DIST_STACK
718+
T distStack[STACK_DEPTH], *distStackPtr = distStack;
719+
#endif
684720

685721
node_t *tlasSavedNodePtr = 0;
686722
uint32_t *tlasSavedPrimIDs = 0;
687-
vec_t<T,3> saved_dir, saved_org;
723+
vec3_t saved_dir, saved_org;
688724

689-
if (ray.direction.x == (T)0) ray.direction.x = T(1e-20);
690-
if (ray.direction.y == (T)0) ray.direction.y = T(1e-20);
691-
if (ray.direction.z == (T)0) ray.direction.z = T(1e-20);
692-
vec_t<T,3> rcp_dir = rcp(ray.direction);
725+
if (ray.direction.x == (scalar_t)0) ray.direction.x = scalar_t(1e-20);
726+
if (ray.direction.y == (scalar_t)0) ray.direction.y = scalar_t(1e-20);
727+
if (ray.direction.z == (scalar_t)0) ray.direction.z = scalar_t(1e-20);
728+
vec3_t rcp_dir = rcp(ray.direction);
693729

694730
// ------------------------------------------------------------------
695731
// traverse until there's nothing left to traverse:
@@ -714,8 +750,10 @@ namespace cuBQL {
714750
// it's not a real leaf, so this must be a instance node
715751
tlasSavedNodePtr = bvh.nodes;
716752
tlasSavedPrimIDs = bvh.primIDs;
753+
#ifndef NDEBUG
717754
if (node.count != 1)
718755
printf("TWO-LEVEL BVH MUST BE BUILT WITH 1 PRIM PER LEAF!\n");
756+
#endif
719757
if (dbg)
720758
printf("inner-leaf primIDs %p ofs %i count %i\n",
721759
bvh.primIDs,
@@ -732,11 +770,14 @@ namespace cuBQL {
732770
bvh_t blas;
733771
ray_t transformed_ray = ray;
734772
enterBlas(transformed_ray,blas,instID);
735-
ray.origin = transformed_ray.origin;
773+
ray.origin = transformed_ray.origin;
736774
ray.direction = transformed_ray.direction;
737-
if (ray.direction.x == (T)0) ray.direction.x = T(1e-20);
738-
if (ray.direction.y == (T)0) ray.direction.y = T(1e-20);
739-
if (ray.direction.z == (T)0) ray.direction.z = T(1e-20);
775+
if (ray.direction.x == (scalar_t)0)
776+
ray.direction.x = scalar_t(1e-20);
777+
if (ray.direction.y == (scalar_t)0)
778+
ray.direction.y = scalar_t(1e-20);
779+
if (ray.direction.z == (scalar_t)0)
780+
ray.direction.z = scalar_t(1e-20);
740781
rcp_dir = rcp(ray.direction);
741782
bvh.nodes = blas.nodes;
742783
bvh.primIDs = blas.primIDs;
@@ -745,14 +786,14 @@ namespace cuBQL {
745786
// now check if those blas root node is _also_ a leaf:
746787
if (node.count != 0)
747788
break;
748-
if (dbg) printf("new node %i.%i\n",(int)node.offset,(int)node.count);
789+
// if (dbg) printf("new node %i.%i\n",(int)node.offset,(int)node.count);
749790
}
750791

751792
uint32_t n0Idx = (uint32_t)node.offset+0;
752793
uint32_t n1Idx = (uint32_t)node.offset+1;
753794
node_t n0 = bvh.nodes[n0Idx];
754795
node_t n1 = bvh.nodes[n1Idx];
755-
T node_t0 = T(0), node_t1 = T(0);
796+
scalar_t node_t0 = scalar_t(0), node_t1 = scalar_t(0);
756797
bool o0 = rayIntersectsBox(node_t0,ray,rcp_dir,n0.bounds);
757798
bool o1 = rayIntersectsBox(node_t1,ray,rcp_dir,n1.bounds);
758799

@@ -769,7 +810,12 @@ namespace cuBQL {
769810
return;
770811
}
771812

772-
*stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin;
813+
*stackPtr++
814+
= (node_t0 < node_t1) ? n1.admin : n0.admin;
815+
#if CUBQL_DIST_STACK
816+
*distStackPtr++
817+
= (node_t0 < node_t1) ? node_t1 : node_t0;
818+
#endif
773819
node = (node_t0 < node_t1) ? n0.admin : n1.admin;
774820
} else {
775821
node = n0.admin;
@@ -798,18 +844,28 @@ namespace cuBQL {
798844
// pop next un-traversed node from stack, discarding any nodes
799845
// that are more distant than whatever query radius we now have
800846
// ------------------------------------------------------------------
801-
if (stackPtr == blasStackBase) {
802-
leaveBlas();
803-
ray.direction = saved_dir;
804-
ray.origin = saved_org;
805-
rcp_dir = rcp(ray.direction);
806-
blasStackBase = nullptr;
807-
bvh.nodes = tlasSavedNodePtr;
808-
bvh.primIDs = tlasSavedPrimIDs;
847+
while (true) {
848+
if (stackPtr == blasStackBase) {
849+
leaveBlas();
850+
ray.direction = saved_dir;
851+
ray.origin = saved_org;
852+
rcp_dir = rcp(ray.direction);
853+
blasStackBase = nullptr;
854+
bvh.nodes = tlasSavedNodePtr;
855+
bvh.primIDs = tlasSavedPrimIDs;
856+
}
857+
if (stackPtr == traversalStack)
858+
return;// ray.tMax;
859+
#if CUBQL_DIST_STACK
860+
scalar_t tFromStack = *--distStackPtr;
861+
if (tFromStack >= ray.tMax) {
862+
--stackPtr;
863+
continue;
864+
}
865+
#endif
866+
node = *--stackPtr;
867+
break;
809868
}
810-
if (stackPtr == traversalStack)
811-
return;// ray.tMax;
812-
node = *--stackPtr;
813869
}
814870
}
815871

0 commit comments

Comments
 (0)