diff --git a/source/MRMesh/MRRayBoxIntersection.h b/source/MRMesh/MRRayBoxIntersection.h index acf17339d9c1..40657260175d 100644 --- a/source/MRMesh/MRRayBoxIntersection.h +++ b/source/MRMesh/MRRayBoxIntersection.h @@ -1,6 +1,7 @@ #pragma once #include "MRBox.h" #include "MRIntersectionPrecomputes.h" +#include "MRMesh/MRMacros.h" #include "MRPch/MRBindingMacros.h" namespace MR @@ -13,7 +14,8 @@ namespace MR template struct RayOrigin { - Vector3 p; + // This is hidden to match the specialization below. + MR_BIND_IGNORE Vector3 p; RayOrigin( const Vector3 & ro ) : p( ro ) { } }; @@ -25,57 +27,61 @@ struct RayOrigin MR_BIND_IGNORE __m128 p; RayOrigin( const Vector3f & ro ) { p = _mm_set_ps( ro.x, ro.y, ro.z, 0 ); } }; +#endif /// finds intersection between the Ray and the Box. /// Precomputed values could be useful for several calls with the same direction, /// see "An Efficient and Robust Ray-Box Intersection Algorithm" at https://people.csail.mit.edu/amy/papers/box-jgt.pdf -inline bool rayBoxIntersect( const Box3f& box, const RayOrigin & rayOrigin, float & t0, float & t1, const IntersectionPrecomputes& prec ) -{ - __m128 l = _mm_set_ps( box.min.x, box.min.y, box.min.z, t0 ); - __m128 r = _mm_set_ps( box.max.x, box.max.y, box.max.z, t1 ); - l = _mm_sub_ps( l, rayOrigin.p ); - r = _mm_sub_ps( r, rayOrigin.p ); - l = _mm_mul_ps( l, prec.invDir ); - r = _mm_mul_ps( r, prec.invDir ); - - __m128 a = _mm_min_ps( l, r ); - __m128 b = _mm_max_ps( l, r ); - - __m128 aa = _mm_movehl_ps( a, a ); - aa = _mm_max_ps( aa, a ); - __m128 aaa = _mm_shuffle_ps( aa, aa, 1 ); - aaa = _mm_max_ss( aaa, aa ); - t0 = _mm_cvtss_f32( aaa ); - - __m128 bb = _mm_movehl_ps( b, b ); - bb = _mm_min_ps( bb, b ); - __m128 bbb = _mm_shuffle_ps( bb, bb, 1 ); - bbb = _mm_min_ss( bbb, bb ); - t1 = _mm_cvtss_f32( bbb ); - - return t0 <= t1; -} -#else - #pragma message("rayBoxIntersect: no hardware optimized instructions") -#endif - -template +template bool rayBoxIntersect( const Box3& box, const RayOrigin & rayOrigin, T & t0, T & t1, const IntersectionPrecomputes& prec ) { - const Vector3i& sign = prec.sign; - - // compare and update x-dimension with t0-t1 - t1 = std::min( (box[sign.x].x - rayOrigin.p.x) * prec.invDir.x, t1 ); - t0 = std::max( (box[1 - sign.x].x - rayOrigin.p.x) * prec.invDir.x, t0 ); - - // compare and update y-dimension with t0-t1 - t1 = std::min( (box[sign.y].y - rayOrigin.p.y) * prec.invDir.y, t1 ); - t0 = std::max( (box[1 - sign.y].y - rayOrigin.p.y) * prec.invDir.y, t0 ); - - // compare and update z-dimension with t0-t1 - t1 = std::min( (box[sign.z].z - rayOrigin.p.z) * prec.invDir.z, t1 ); - t0 = std::max( (box[1 - sign.z].z - rayOrigin.p.z) * prec.invDir.z, t0 ); - return t0 <= t1; + #if defined(__x86_64__) || defined(_M_X64) + if constexpr (std::is_same_v) + { + __m128 l = _mm_set_ps( box.min.x, box.min.y, box.min.z, t0 ); + __m128 r = _mm_set_ps( box.max.x, box.max.y, box.max.z, t1 ); + l = _mm_sub_ps( l, rayOrigin.p ); + r = _mm_sub_ps( r, rayOrigin.p ); + l = _mm_mul_ps( l, prec.invDir ); + r = _mm_mul_ps( r, prec.invDir ); + + __m128 a = _mm_min_ps( l, r ); + __m128 b = _mm_max_ps( l, r ); + + __m128 aa = _mm_movehl_ps( a, a ); + aa = _mm_max_ps( aa, a ); + __m128 aaa = _mm_shuffle_ps( aa, aa, 1 ); + aaa = _mm_max_ss( aaa, aa ); + t0 = _mm_cvtss_f32( aaa ); + + __m128 bb = _mm_movehl_ps( b, b ); + bb = _mm_min_ps( bb, b ); + __m128 bbb = _mm_shuffle_ps( bb, bb, 1 ); + bbb = _mm_min_ss( bbb, bb ); + t1 = _mm_cvtss_f32( bbb ); + + return t0 <= t1; + } + else + #else + #pragma message("rayBoxIntersect: no hardware optimized instructions") + #endif + { + const Vector3i& sign = prec.sign; + + // compare and update x-dimension with t0-t1 + t1 = std::min( (box[sign.x].x - rayOrigin.p.x) * prec.invDir.x, t1 ); + t0 = std::max( (box[1 - sign.x].x - rayOrigin.p.x) * prec.invDir.x, t0 ); + + // compare and update y-dimension with t0-t1 + t1 = std::min( (box[sign.y].y - rayOrigin.p.y) * prec.invDir.y, t1 ); + t0 = std::max( (box[1 - sign.y].y - rayOrigin.p.y) * prec.invDir.y, t0 ); + + // compare and update z-dimension with t0-t1 + t1 = std::min( (box[sign.z].z - rayOrigin.p.z) * prec.invDir.z, t1 ); + t0 = std::max( (box[1 - sign.z].z - rayOrigin.p.z) * prec.invDir.z, t0 ); + return t0 <= t1; + } } template