88#include " cuBQL/math/Ray.h"
99#include " cuBQL/traversal/fixedBoxQuery.h"
1010
11+ #define CUBQL_DIST_STACK 1
12+
1113namespace cuBQL {
1214
1315 // ******************************************************************
@@ -263,15 +265,17 @@ namespace cuBQL {
263265 template <typename Lambda, typename T, int D>
264266 inline __cubql_both
265267 void fixedRayQuery::forEachLeaf (const Lambda &lambdaToCallOnEachLeaf,
266- cuBQL::bvh_t <T, D> bvh,
267- cuBQL::ray3f ray,
268+ cuBQL::bvh_t <T,D> bvh,
269+ cuBQL::ray_t <T> ray,
268270 bool dbg)
269271 {
272+ using node_admin_t = typename bvh_t <T,D>::node_t ::Admin;
273+ enum { stackDepth = 64 };
270274 struct StackEntry {
271275 uint32_t idx;
272276 };
273- bvh3f:: node_t ::Admin traversalStack[64 ], *stackPtr = traversalStack;
274- bvh3f:: node_t ::Admin node = bvh.nodes [0 ].admin ;
277+ node_admin_t traversalStack[stackDepth ], *stackPtr = traversalStack;
278+ node_admin_t node = bvh.nodes [0 ].admin ;
275279 // ------------------------------------------------------------------
276280 // traverse until there's nothing left to traverse:
277281 // ------------------------------------------------------------------
@@ -472,12 +476,18 @@ namespace cuBQL {
472476 ray_t ray,
473477 bool dbg)
474478 {
479+ using scalar_t = T;
475480 using node_t = typename bvh_t <T, D>::node_t ;
481+ using node_admin_t = typename node_t ::Admin;
476482 struct StackEntry {
477483 uint32_t idx;
478484 };
479- typename node_t ::Admin traversalStack[64 ], *stackPtr = traversalStack;
480- typename node_t ::Admin node = bvh.nodes [0 ].admin ;
485+ enum { stackDepth = 64 };
486+ node_admin_t traversalStack[stackDepth], *stackPtr = traversalStack;
487+ node_admin_t node = bvh.nodes [0 ].admin ;
488+ #if CUBQL_DIST_STACK
489+ T distStack[stackDepth], *distStackPtr = distStack;
490+ #endif
481491
482492 if (ray.direction .x == (T)0 ) ray.direction .x = T (1e-20 );
483493 if (ray.direction .y == (T)0 ) ray.direction .y = T (1e-20 );
@@ -510,7 +520,12 @@ namespace cuBQL {
510520
511521 if (o0) {
512522 if (o1) {
513- *stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin ;
523+ *stackPtr++
524+ = (node_t0 < node_t1) ? n1.admin : n0.admin ;
525+ #if CUBQL_DIST_STACK
526+ *distStackPtr++
527+ = (node_t0 < node_t1) ? node_t1 : node_t0;
528+ #endif
514529 node = (node_t0 < node_t1) ? n0.admin : n1.admin ;
515530 } else {
516531 node = n0.admin ;
@@ -536,9 +551,20 @@ namespace cuBQL {
536551 // pop next un-traversed node from stack, discarding any nodes
537552 // that are more distant than whatever query radius we now have
538553 // ------------------------------------------------------------------
539- if (stackPtr == traversalStack)
540- return ray.tMax ;
541- node = *--stackPtr;
554+
555+ while (true ) {
556+ if (stackPtr == traversalStack)
557+ return ray.tMax ;
558+ #if CUBQL_DIST_STACK
559+ scalar_t tFromStack = *--distStackPtr;
560+ if (tFromStack >= ray.tMax ) {
561+ --stackPtr;
562+ continue ;
563+ }
564+ #endif
565+ node = *--stackPtr;
566+ break ;
567+ }
542568 }
543569 }
544570
@@ -668,28 +694,38 @@ namespace cuBQL {
668694 bvh_t bvh,
669695 /* ! REFERENCE to a ray, so 'enterBlas()' can modify it */
670696 ray_t &ray,
671- bool dbg )
697+ bool _dbg )
672698 {
699+ #ifdef NDEBUG
700+ const bool dbg = false ;
701+ #else
702+ bool dbg = _dbg;
703+ #endif
673704 using node_t = typename bvh_t ::node_t ;
674705 using T = typename bvh_t ::scalar_t ;
706+ using scalar_t = typename bvh_t ::scalar_t ;
707+ using vec3_t = typename cuBQL::vec_t <scalar_t ,3 >;
675708 struct StackEntry {
676709 uint32_t idx;
677710 };
678- enum { STACK_DEPTH=128 };
711+ enum { STACK_DEPTH=64 };
679712 typename node_t ::Admin
680713 traversalStack[STACK_DEPTH],
681714 *stackPtr = traversalStack,
682715 *blasStackBase = nullptr ;
683716 typename node_t ::Admin node = bvh.nodes [0 ].admin ;
717+ #if CUBQL_DIST_STACK
718+ T distStack[STACK_DEPTH], *distStackPtr = distStack;
719+ #endif
684720
685721 node_t *tlasSavedNodePtr = 0 ;
686722 uint32_t *tlasSavedPrimIDs = 0 ;
687- vec_t <T, 3 > saved_dir, saved_org;
723+ vec3_t saved_dir, saved_org;
688724
689- if (ray.direction .x == (T )0 ) ray.direction .x = T (1e-20 );
690- if (ray.direction .y == (T )0 ) ray.direction .y = T (1e-20 );
691- if (ray.direction .z == (T )0 ) ray.direction .z = T (1e-20 );
692- vec_t <T, 3 > rcp_dir = rcp (ray.direction );
725+ if (ray.direction .x == (scalar_t )0 ) ray.direction .x = scalar_t (1e-20 );
726+ if (ray.direction .y == (scalar_t )0 ) ray.direction .y = scalar_t (1e-20 );
727+ if (ray.direction .z == (scalar_t )0 ) ray.direction .z = scalar_t (1e-20 );
728+ vec3_t rcp_dir = rcp (ray.direction );
693729
694730 // ------------------------------------------------------------------
695731 // traverse until there's nothing left to traverse:
@@ -714,8 +750,10 @@ namespace cuBQL {
714750 // it's not a real leaf, so this must be a instance node
715751 tlasSavedNodePtr = bvh.nodes ;
716752 tlasSavedPrimIDs = bvh.primIDs ;
753+ #ifndef NDEBUG
717754 if (node.count != 1 )
718755 printf (" TWO-LEVEL BVH MUST BE BUILT WITH 1 PRIM PER LEAF!\n " );
756+ #endif
719757 if (dbg)
720758 printf (" inner-leaf primIDs %p ofs %i count %i\n " ,
721759 bvh.primIDs ,
@@ -732,11 +770,14 @@ namespace cuBQL {
732770 bvh_t blas;
733771 ray_t transformed_ray = ray;
734772 enterBlas (transformed_ray,blas,instID);
735- ray.origin = transformed_ray.origin ;
773+ ray.origin = transformed_ray.origin ;
736774 ray.direction = transformed_ray.direction ;
737- if (ray.direction .x == (T)0 ) ray.direction .x = T (1e-20 );
738- if (ray.direction .y == (T)0 ) ray.direction .y = T (1e-20 );
739- if (ray.direction .z == (T)0 ) ray.direction .z = T (1e-20 );
775+ if (ray.direction .x == (scalar_t )0 )
776+ ray.direction .x = scalar_t (1e-20 );
777+ if (ray.direction .y == (scalar_t )0 )
778+ ray.direction .y = scalar_t (1e-20 );
779+ if (ray.direction .z == (scalar_t )0 )
780+ ray.direction .z = scalar_t (1e-20 );
740781 rcp_dir = rcp (ray.direction );
741782 bvh.nodes = blas.nodes ;
742783 bvh.primIDs = blas.primIDs ;
@@ -745,14 +786,14 @@ namespace cuBQL {
745786 // now check if those blas root node is _also_ a leaf:
746787 if (node.count != 0 )
747788 break ;
748- if (dbg) printf (" new node %i.%i\n " ,(int )node.offset ,(int )node.count );
789+ // if (dbg) printf("new node %i.%i\n",(int)node.offset,(int)node.count);
749790 }
750791
751792 uint32_t n0Idx = (uint32_t )node.offset +0 ;
752793 uint32_t n1Idx = (uint32_t )node.offset +1 ;
753794 node_t n0 = bvh.nodes [n0Idx];
754795 node_t n1 = bvh.nodes [n1Idx];
755- T node_t0 = T (0 ), node_t1 = T (0 );
796+ scalar_t node_t0 = scalar_t (0 ), node_t1 = scalar_t (0 );
756797 bool o0 = rayIntersectsBox (node_t0,ray,rcp_dir,n0.bounds );
757798 bool o1 = rayIntersectsBox (node_t1,ray,rcp_dir,n1.bounds );
758799
@@ -769,7 +810,12 @@ namespace cuBQL {
769810 return ;
770811 }
771812
772- *stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin ;
813+ *stackPtr++
814+ = (node_t0 < node_t1) ? n1.admin : n0.admin ;
815+ #if CUBQL_DIST_STACK
816+ *distStackPtr++
817+ = (node_t0 < node_t1) ? node_t1 : node_t0;
818+ #endif
773819 node = (node_t0 < node_t1) ? n0.admin : n1.admin ;
774820 } else {
775821 node = n0.admin ;
@@ -798,18 +844,28 @@ namespace cuBQL {
798844 // pop next un-traversed node from stack, discarding any nodes
799845 // that are more distant than whatever query radius we now have
800846 // ------------------------------------------------------------------
801- if (stackPtr == blasStackBase) {
802- leaveBlas ();
803- ray.direction = saved_dir;
804- ray.origin = saved_org;
805- rcp_dir = rcp (ray.direction );
806- blasStackBase = nullptr ;
807- bvh.nodes = tlasSavedNodePtr;
808- bvh.primIDs = tlasSavedPrimIDs;
847+ while (true ) {
848+ if (stackPtr == blasStackBase) {
849+ leaveBlas ();
850+ ray.direction = saved_dir;
851+ ray.origin = saved_org;
852+ rcp_dir = rcp (ray.direction );
853+ blasStackBase = nullptr ;
854+ bvh.nodes = tlasSavedNodePtr;
855+ bvh.primIDs = tlasSavedPrimIDs;
856+ }
857+ if (stackPtr == traversalStack)
858+ return ;// ray.tMax;
859+ #if CUBQL_DIST_STACK
860+ scalar_t tFromStack = *--distStackPtr;
861+ if (tFromStack >= ray.tMax ) {
862+ --stackPtr;
863+ continue ;
864+ }
865+ #endif
866+ node = *--stackPtr;
867+ break ;
809868 }
810- if (stackPtr == traversalStack)
811- return ;// ray.tMax;
812- node = *--stackPtr;
813869 }
814870 }
815871
0 commit comments