@@ -21,17 +21,24 @@ namespace cuBQL {
2121 _terminate_ a traveral, but ordering child nodes is not required
2222 because ordering shouldn't matter */
2323 namespace fixedRayQuery {
24- template <typename Lambda>
24+ template <typename Lambda, typename T, int D >
2525 inline __cubql_both
2626 void forEachLeaf (const Lambda &lambdaToExecuteForEachCandidate,
27- cuBQL::bvh3f bvh,
27+ cuBQL::bvh_t <T, D> bvh,
28+ cuBQL::ray3f ray,
29+ bool dbg=false );
30+
31+ template <typename Lambda, typename T, int D, int W>
32+ inline __cubql_both
33+ void forEachLeaf (const Lambda &lambdaToExecuteForEachCandidate,
34+ cuBQL::WideBVH<T, D, W> bvh,
2835 cuBQL::ray3f ray,
2936 bool dbg=false );
3037
31- template <typename Lambda>
38+ template <typename Lambda, typename bvh_t >
3239 inline __cubql_both
3340 void forEachPrim (const Lambda &lambdaToExecuteForEachCandidate,
34- cuBQL::bvh3f bvh,
41+ bvh_t bvh,
3542 cuBQL::ray3f ray,
3643 bool dbg=false );
3744
@@ -77,10 +84,20 @@ namespace cuBQL {
7784 /* ! single level BVH ray traversal, provided lambda covers what
7885 happens when a ray wants to intersect a given prim within that
7986 bvh */
80- template <typename Lambda, typename bvh_t , typename ray_t >
87+ template <typename Lambda, typename T, int D, typename ray_t >
88+ inline __cubql_both
89+ float forEachLeaf (const Lambda &lambdaToCallOnEachLeaf,
90+ bvh_t <T, D> bvh,
91+ ray_t ray,
92+ bool dbg=false );
93+
94+ /* ! single level BVH ray traversal, provided lambda covers what
95+ happens when a ray wants to intersect a given prim within that
96+ bvh */
97+ template <typename Lambda, typename T, int D, int W, typename ray_t >
8198 inline __cubql_both
8299 float forEachLeaf (const Lambda &lambdaToCallOnEachLeaf,
83- bvh_t bvh,
100+ WideBVH<T, D, W> bvh,
84101 ray_t ray,
85102 bool dbg=false );
86103
@@ -243,12 +260,10 @@ namespace cuBQL {
243260 forEachLeaf (leafCode,bvh,ray,dbg);
244261 }
245262
246-
247-
248- template <typename Lambda>
263+ template <typename Lambda, typename T, int D>
249264 inline __cubql_both
250265 void fixedRayQuery::forEachLeaf (const Lambda &lambdaToCallOnEachLeaf,
251- cuBQL::bvh3f bvh,
266+ cuBQL::bvh_t <T, D> bvh,
252267 cuBQL::ray3f ray,
253268 bool dbg)
254269 {
@@ -314,12 +329,121 @@ namespace cuBQL {
314329 }
315330 }
316331
332+ template <int N>
333+ struct ChildOrder {
334+ inline __cubql_both void clear (int i) { v[i] = (uint64_t )-1 ; }
335+ inline __cubql_both void set (int i, float dist, uint32_t payload) {
336+ v[i] = (uint64_t (__float_as_int (dist)) << 32 ) | payload;
337+ }
338+ uint64_t v[N];
339+ };
340+
341+ template <int N>
342+ inline __cubql_both void sort (ChildOrder<N>& children)
343+ {
344+ #pragma unroll
345+ for (int i = N - 1 ; i > 0 ; --i) {
346+ #pragma unroll
347+ for (int j = 0 ; j < i; j++) {
348+ uint64_t c0 = children.v [j + 0 ];
349+ uint64_t c1 = children.v [j + 1 ];
350+ children.v [j + 0 ] = min (c0, c1);
351+ children.v [j + 1 ] = max (c0, c1);
352+ }
353+ }
354+ }
355+
356+ template <typename Lambda, typename T, int D, int W>
357+ inline __cubql_both
358+ void fixedRayQuery::forEachLeaf (const Lambda& lambdaToCallOnEachLeaf,
359+ cuBQL::WideBVH<T, D, W> bvh,
360+ cuBQL::ray3f ray,
361+ bool dbg)
362+ {
363+ using node_t = typename WideBVH<T, D, W>::node_t ;
364+
365+ int traversalStack[64 ], * stackPtr = traversalStack;
366+ int nodeID = 0 ;
367+
368+ if (ray.direction .x == (T)0 ) ray.direction .x = T (1e-20 );
369+ if (ray.direction .y == (T)0 ) ray.direction .y = T (1e-20 );
370+ if (ray.direction .z == (T)0 ) ray.direction .z = T (1e-20 );
371+ vec_t <T, 3 > rcp_dir = rcp (ray.direction );
372+
373+ ChildOrder<W> childOrder;
374+
375+ // ------------------------------------------------------------------
376+ // traverse until there's nothing left to traverse:
377+ // ------------------------------------------------------------------
378+ while (true ) {
379+ while (true ) {
380+ while (nodeID == -1 ) {
381+ if (stackPtr == traversalStack)
382+ return ;
383+ nodeID = *--stackPtr;
384+ // pop....
385+ }
386+ if (nodeID & (1 << 31 ))
387+ break ;
388+
389+ node_t const & node = bvh.nodes [nodeID];
390+ #pragma unroll
391+ for (int c = 0 ; c < W; c++) {
392+ const auto child = node.children [c];
393+ if (!node.children [c].valid )
394+ childOrder.clear (c);
395+ else {
396+ float dist2;
397+ bool o = rayIntersectsBox (dist2, ray, rcp_dir, node.children [c].bounds );
398+ if (!o)
399+ childOrder.clear (c);
400+ else {
401+ uint32_t payload = child.count ?
402+ ((1 << 31 ) | (nodeID << log_of<W>::value) | c) : child.offset ;
403+ childOrder.set (c, dist2, payload);
404+ }
405+ }
406+ }
407+ sort (childOrder);
408+ #pragma unroll
409+ for (int c = W - 1 ; c > 0 ; --c) {
410+ uint64_t coc = childOrder.v [c];
411+ if (coc != uint64_t (-1 )) {
412+ *stackPtr++ = coc;
413+ // if (stackPtr - stackBase == stackSize)
414+ // printf("stack overrun!\n");
415+ }
416+ }
417+ if (childOrder.v [0 ] == uint64_t (-1 )) {
418+ nodeID = -1 ;
419+ continue ;
420+ }
421+ nodeID = uint32_t (childOrder.v [0 ]);
422+ }
423+
424+ int c = nodeID & ((1 << log_of<W>::value) - 1 );
425+ int n = (nodeID & 0x7fffffff ) >> log_of<W>::value;
426+ int offset = bvh.nodes [n].children [c].offset ;
427+ int count = bvh.nodes [n].children [c].count ;
428+
429+ if (count != 0 ) {
430+ // we're at a valid leaf: call the lambda and see if that gave
431+ // us a new, closer cull radius
432+ int leafResult
433+ = lambdaToCallOnEachLeaf (bvh.primIDs + offset, count);
434+ if (leafResult == CUBQL_TERMINATE_TRAVERSAL)
435+ return ;
436+ }
437+ nodeID = -1 ;
438+ }
439+ }
440+
317441 /* ! this query assumes lambads that return CUBQL_CONTINUE_TRAVERSAL
318442 or CUBQL_TERMINATE_TRAVERSAL */
319- template <typename Lambda>
443+ template <typename Lambda, typename bvh_t >
320444 inline __cubql_both
321445 void fixedRayQuery::forEachPrim (const Lambda &lambdaToExecuteForEachCandidate,
322- cuBQL::bvh3f bvh,
446+ bvh_t bvh,
323447 cuBQL::ray3f ray,
324448 bool dbg)
325449 {
@@ -341,15 +465,14 @@ namespace cuBQL {
341465 forEachLeaf (leafCode,bvh,ray,dbg);
342466 }
343467
344- template <typename Lambda, typename bvh_t , typename ray_t >
468+ template <typename Lambda, typename T, int D , typename ray_t >
345469 inline __cubql_both
346470 float shrinkingRayQuery::forEachLeaf (const Lambda &lambdaToCallOnEachLeaf,
347- bvh_t bvh,
471+ bvh_t <T, D> bvh,
348472 ray_t ray,
349473 bool dbg)
350474 {
351- using node_t = typename bvh_t ::node_t ;
352- using T = typename bvh_t ::scalar_t ;
475+ using node_t = typename bvh_t <T, D>::node_t ;
353476 struct StackEntry {
354477 uint32_t idx;
355478 };
@@ -419,6 +542,90 @@ namespace cuBQL {
419542 }
420543 }
421544
545+ template <typename Lambda, typename T, int D, int W, typename ray_t >
546+ inline __cubql_both
547+ float shrinkingRayQuery::forEachLeaf (const Lambda& lambdaToCallOnEachLeaf,
548+ WideBVH<T, D, W> bvh,
549+ ray_t ray,
550+ bool dbg)
551+ {
552+ using node_t = typename WideBVH<T, D, W>::node_t ;
553+
554+ int traversalStack[64 ], * stackPtr = traversalStack;
555+ int nodeID = 0 ;
556+
557+ if (ray.direction .x == (T)0 ) ray.direction .x = T (1e-20 );
558+ if (ray.direction .y == (T)0 ) ray.direction .y = T (1e-20 );
559+ if (ray.direction .z == (T)0 ) ray.direction .z = T (1e-20 );
560+ vec_t <T, 3 > rcp_dir = rcp (ray.direction );
561+
562+ ChildOrder<W> childOrder;
563+
564+ // ------------------------------------------------------------------
565+ // traverse until there's nothing left to traverse:
566+ // ------------------------------------------------------------------
567+ while (true ) {
568+ while (true ) {
569+ while (nodeID == -1 ) {
570+ if (stackPtr == traversalStack)
571+ return ray.tMax ;
572+ nodeID = *--stackPtr;
573+ // pop....
574+ }
575+ if (nodeID & (1 << 31 ))
576+ break ;
577+
578+ node_t const & node = bvh.nodes [nodeID];
579+ #pragma unroll
580+ for (int c = 0 ; c < W; c++) {
581+ const auto child = node.children [c];
582+ if (!node.children [c].valid )
583+ childOrder.clear (c);
584+ else {
585+ float dist2;
586+ bool o = rayIntersectsBox (dist2, ray, rcp_dir, node.children [c].bounds );
587+ if (!o)
588+ childOrder.clear (c);
589+ else {
590+ uint32_t payload = child.count ?
591+ ((1 << 31 ) | (nodeID << log_of<W>::value) | c) : child.offset ;
592+ childOrder.set (c, dist2, payload);
593+ }
594+ }
595+ }
596+ sort (childOrder);
597+ #pragma unroll
598+ for (int c = W - 1 ; c > 0 ; --c) {
599+ uint64_t coc = childOrder.v [c];
600+ if (coc != uint64_t (-1 )) {
601+ *stackPtr++ = coc;
602+ // if (stackPtr - stackBase == stackSize)
603+ // printf("stack overrun!\n");
604+ }
605+ }
606+ if (childOrder.v [0 ] == uint64_t (-1 )) {
607+ nodeID = -1 ;
608+ continue ;
609+ }
610+ nodeID = uint32_t (childOrder.v [0 ]);
611+ }
612+
613+ int c = nodeID & ((1 << log_of<W>::value) - 1 );
614+ int n = (nodeID & 0x7fffffff ) >> log_of<W>::value;
615+ int offset = bvh.nodes [n].children [c].offset ;
616+ int count = bvh.nodes [n].children [c].count ;
617+
618+ if (count != 0 ) {
619+ // we're at a valid leaf: call the lambda and see if that gave
620+ // us a new, closer cull radius
621+ ray.tMax
622+ = lambdaToCallOnEachLeaf (bvh.primIDs + offset, count);
623+ }
624+ nodeID = -1 ;
625+ }
626+ return T (CUBQL_INF);
627+ }
628+
422629 template <typename Lambda, typename bvh_t , typename ray_t >
423630 inline __cubql_both
424631 void shrinkingRayQuery::forEachPrim (const Lambda &lambdaToExecuteForEachCandidate,
0 commit comments