Skip to content

Commit 0106260

Browse files
authored
Merge pull request NVIDIA#20 from manuelkNVDA/main
Implement WideBVH fixedRadius and shrinkingRadius ray intersection queries
2 parents 2ec7128 + 6c48fa6 commit 0106260

1 file changed

Lines changed: 223 additions & 16 deletions

File tree

cuBQL/traversal/rayQueries.h

Lines changed: 223 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,24 @@ namespace cuBQL {
2121
_terminate_ a traveral, but ordering child nodes is not required
2222
because ordering shouldn't matter */
2323
namespace fixedRayQuery {
24-
template<typename Lambda>
24+
template<typename Lambda, typename T, int D>
2525
inline __cubql_both
2626
void forEachLeaf(const Lambda &lambdaToExecuteForEachCandidate,
27-
cuBQL::bvh3f bvh,
27+
cuBQL::bvh_t<T, D> bvh,
28+
cuBQL::ray3f ray,
29+
bool dbg=false);
30+
31+
template<typename Lambda, typename T, int D, int W>
32+
inline __cubql_both
33+
void forEachLeaf(const Lambda &lambdaToExecuteForEachCandidate,
34+
cuBQL::WideBVH<T, D, W> bvh,
2835
cuBQL::ray3f ray,
2936
bool dbg=false);
3037

31-
template<typename Lambda>
38+
template<typename Lambda, typename bvh_t>
3239
inline __cubql_both
3340
void forEachPrim(const Lambda &lambdaToExecuteForEachCandidate,
34-
cuBQL::bvh3f bvh,
41+
bvh_t bvh,
3542
cuBQL::ray3f ray,
3643
bool dbg=false);
3744

@@ -77,10 +84,20 @@ namespace cuBQL {
7784
/*! single level BVH ray traversal, provided lambda covers what
7885
happens when a ray wants to intersect a given prim within that
7986
bvh */
80-
template<typename Lambda, typename bvh_t, typename ray_t>
87+
template<typename Lambda, typename T, int D, typename ray_t>
88+
inline __cubql_both
89+
float forEachLeaf(const Lambda &lambdaToCallOnEachLeaf,
90+
bvh_t<T, D> bvh,
91+
ray_t ray,
92+
bool dbg=false);
93+
94+
/*! single level BVH ray traversal, provided lambda covers what
95+
happens when a ray wants to intersect a given prim within that
96+
bvh */
97+
template<typename Lambda, typename T, int D, int W, typename ray_t>
8198
inline __cubql_both
8299
float forEachLeaf(const Lambda &lambdaToCallOnEachLeaf,
83-
bvh_t bvh,
100+
WideBVH<T, D, W> bvh,
84101
ray_t ray,
85102
bool dbg=false);
86103

@@ -243,12 +260,10 @@ namespace cuBQL {
243260
forEachLeaf(leafCode,bvh,ray,dbg);
244261
}
245262

246-
247-
248-
template<typename Lambda>
263+
template<typename Lambda, typename T, int D>
249264
inline __cubql_both
250265
void fixedRayQuery::forEachLeaf(const Lambda &lambdaToCallOnEachLeaf,
251-
cuBQL::bvh3f bvh,
266+
cuBQL::bvh_t<T, D> bvh,
252267
cuBQL::ray3f ray,
253268
bool dbg)
254269
{
@@ -314,12 +329,121 @@ namespace cuBQL {
314329
}
315330
}
316331

332+
template<int N>
333+
struct ChildOrder {
334+
inline __cubql_both void clear(int i) { v[i] = (uint64_t)-1; }
335+
inline __cubql_both void set(int i, float dist, uint32_t payload) {
336+
v[i] = (uint64_t(__float_as_int(dist)) << 32) | payload;
337+
}
338+
uint64_t v[N];
339+
};
340+
341+
template<int N>
342+
inline __cubql_both void sort(ChildOrder<N>& children)
343+
{
344+
#pragma unroll
345+
for (int i = N - 1; i > 0; --i) {
346+
#pragma unroll
347+
for (int j = 0; j < i; j++) {
348+
uint64_t c0 = children.v[j + 0];
349+
uint64_t c1 = children.v[j + 1];
350+
children.v[j + 0] = min(c0, c1);
351+
children.v[j + 1] = max(c0, c1);
352+
}
353+
}
354+
}
355+
356+
template<typename Lambda, typename T, int D, int W>
357+
inline __cubql_both
358+
void fixedRayQuery::forEachLeaf(const Lambda& lambdaToCallOnEachLeaf,
359+
cuBQL::WideBVH<T, D, W> bvh,
360+
cuBQL::ray3f ray,
361+
bool dbg)
362+
{
363+
using node_t = typename WideBVH<T, D, W>::node_t;
364+
365+
int traversalStack[64], * stackPtr = traversalStack;
366+
int nodeID = 0;
367+
368+
if (ray.direction.x == (T)0) ray.direction.x = T(1e-20);
369+
if (ray.direction.y == (T)0) ray.direction.y = T(1e-20);
370+
if (ray.direction.z == (T)0) ray.direction.z = T(1e-20);
371+
vec_t<T, 3> rcp_dir = rcp(ray.direction);
372+
373+
ChildOrder<W> childOrder;
374+
375+
// ------------------------------------------------------------------
376+
// traverse until there's nothing left to traverse:
377+
// ------------------------------------------------------------------
378+
while (true) {
379+
while (true) {
380+
while (nodeID == -1) {
381+
if (stackPtr == traversalStack)
382+
return;
383+
nodeID = *--stackPtr;
384+
// pop....
385+
}
386+
if (nodeID & (1 << 31))
387+
break;
388+
389+
node_t const& node = bvh.nodes[nodeID];
390+
#pragma unroll
391+
for (int c = 0; c < W; c++) {
392+
const auto child = node.children[c];
393+
if (!node.children[c].valid)
394+
childOrder.clear(c);
395+
else {
396+
float dist2;
397+
bool o = rayIntersectsBox(dist2, ray, rcp_dir, node.children[c].bounds);
398+
if (!o)
399+
childOrder.clear(c);
400+
else {
401+
uint32_t payload = child.count ?
402+
((1 << 31) | (nodeID << log_of<W>::value) | c) : child.offset;
403+
childOrder.set(c, dist2, payload);
404+
}
405+
}
406+
}
407+
sort(childOrder);
408+
#pragma unroll
409+
for (int c = W - 1; c > 0; --c) {
410+
uint64_t coc = childOrder.v[c];
411+
if (coc != uint64_t(-1)) {
412+
*stackPtr++ = coc;
413+
// if (stackPtr - stackBase == stackSize)
414+
// printf("stack overrun!\n");
415+
}
416+
}
417+
if (childOrder.v[0] == uint64_t(-1)) {
418+
nodeID = -1;
419+
continue;
420+
}
421+
nodeID = uint32_t(childOrder.v[0]);
422+
}
423+
424+
int c = nodeID & ((1 << log_of<W>::value) - 1);
425+
int n = (nodeID & 0x7fffffff) >> log_of<W>::value;
426+
int offset = bvh.nodes[n].children[c].offset;
427+
int count = bvh.nodes[n].children[c].count;
428+
429+
if (count != 0) {
430+
// we're at a valid leaf: call the lambda and see if that gave
431+
// us a new, closer cull radius
432+
int leafResult
433+
= lambdaToCallOnEachLeaf(bvh.primIDs + offset, count);
434+
if (leafResult == CUBQL_TERMINATE_TRAVERSAL)
435+
return;
436+
}
437+
nodeID = -1;
438+
}
439+
}
440+
317441
/*! this query assumes lambads that return CUBQL_CONTINUE_TRAVERSAL
318442
or CUBQL_TERMINATE_TRAVERSAL */
319-
template<typename Lambda>
443+
template<typename Lambda, typename bvh_t>
320444
inline __cubql_both
321445
void fixedRayQuery::forEachPrim(const Lambda &lambdaToExecuteForEachCandidate,
322-
cuBQL::bvh3f bvh,
446+
bvh_t bvh,
323447
cuBQL::ray3f ray,
324448
bool dbg)
325449
{
@@ -341,15 +465,14 @@ namespace cuBQL {
341465
forEachLeaf(leafCode,bvh,ray,dbg);
342466
}
343467

344-
template<typename Lambda, typename bvh_t, typename ray_t>
468+
template<typename Lambda, typename T, int D, typename ray_t>
345469
inline __cubql_both
346470
float shrinkingRayQuery::forEachLeaf(const Lambda &lambdaToCallOnEachLeaf,
347-
bvh_t bvh,
471+
bvh_t<T, D> bvh,
348472
ray_t ray,
349473
bool dbg)
350474
{
351-
using node_t = typename bvh_t::node_t;
352-
using T = typename bvh_t::scalar_t;
475+
using node_t = typename bvh_t<T, D>::node_t;
353476
struct StackEntry {
354477
uint32_t idx;
355478
};
@@ -419,6 +542,90 @@ namespace cuBQL {
419542
}
420543
}
421544

545+
template<typename Lambda, typename T, int D, int W, typename ray_t>
546+
inline __cubql_both
547+
float shrinkingRayQuery::forEachLeaf(const Lambda& lambdaToCallOnEachLeaf,
548+
WideBVH<T, D, W> bvh,
549+
ray_t ray,
550+
bool dbg)
551+
{
552+
using node_t = typename WideBVH<T, D, W>::node_t;
553+
554+
int traversalStack[64], * stackPtr = traversalStack;
555+
int nodeID = 0;
556+
557+
if (ray.direction.x == (T)0) ray.direction.x = T(1e-20);
558+
if (ray.direction.y == (T)0) ray.direction.y = T(1e-20);
559+
if (ray.direction.z == (T)0) ray.direction.z = T(1e-20);
560+
vec_t<T, 3> rcp_dir = rcp(ray.direction);
561+
562+
ChildOrder<W> childOrder;
563+
564+
// ------------------------------------------------------------------
565+
// traverse until there's nothing left to traverse:
566+
// ------------------------------------------------------------------
567+
while (true) {
568+
while (true) {
569+
while (nodeID == -1) {
570+
if (stackPtr == traversalStack)
571+
return ray.tMax;
572+
nodeID = *--stackPtr;
573+
// pop....
574+
}
575+
if (nodeID & (1 << 31))
576+
break;
577+
578+
node_t const& node = bvh.nodes[nodeID];
579+
#pragma unroll
580+
for (int c = 0; c < W; c++) {
581+
const auto child = node.children[c];
582+
if (!node.children[c].valid)
583+
childOrder.clear(c);
584+
else {
585+
float dist2;
586+
bool o = rayIntersectsBox(dist2, ray, rcp_dir, node.children[c].bounds);
587+
if (!o)
588+
childOrder.clear(c);
589+
else {
590+
uint32_t payload = child.count ?
591+
((1 << 31) | (nodeID << log_of<W>::value) | c) : child.offset;
592+
childOrder.set(c, dist2, payload);
593+
}
594+
}
595+
}
596+
sort(childOrder);
597+
#pragma unroll
598+
for (int c = W - 1; c > 0; --c) {
599+
uint64_t coc = childOrder.v[c];
600+
if (coc != uint64_t(-1)) {
601+
*stackPtr++ = coc;
602+
// if (stackPtr - stackBase == stackSize)
603+
// printf("stack overrun!\n");
604+
}
605+
}
606+
if (childOrder.v[0] == uint64_t(-1)) {
607+
nodeID = -1;
608+
continue;
609+
}
610+
nodeID = uint32_t(childOrder.v[0]);
611+
}
612+
613+
int c = nodeID & ((1 << log_of<W>::value) - 1);
614+
int n = (nodeID & 0x7fffffff) >> log_of<W>::value;
615+
int offset = bvh.nodes[n].children[c].offset;
616+
int count = bvh.nodes[n].children[c].count;
617+
618+
if (count != 0) {
619+
// we're at a valid leaf: call the lambda and see if that gave
620+
// us a new, closer cull radius
621+
ray.tMax
622+
= lambdaToCallOnEachLeaf(bvh.primIDs + offset, count);
623+
}
624+
nodeID = -1;
625+
}
626+
return T(CUBQL_INF);
627+
}
628+
422629
template<typename Lambda, typename bvh_t, typename ray_t>
423630
inline __cubql_both
424631
void shrinkingRayQuery::forEachPrim(const Lambda &lambdaToExecuteForEachCandidate,

0 commit comments

Comments
 (0)