2525#include < cuda/__fwd/mdspan.h>
2626#include < cuda/__numeric/add_overflow.h>
2727#include < cuda/__numeric/mul_overflow.h>
28- #include < cuda/__numeric/overflow_cast.h>
2928#include < cuda/std/__concepts/concept_macros.h>
3029#include < cuda/std/__cstddef/types.h>
3130#include < cuda/std/__mdspan/concepts.h>
@@ -132,7 +131,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES layout_stride_relaxed::mapping
132131 {
133132 for (rank_type __d = 0 ; __d < __rank_; ++__d)
134133 {
135- _CCCL_ASSERT (! ::cuda::overflow_cast <offset_type>(__other.stride (__d)),
134+ _CCCL_ASSERT (::cuda::std::in_range <offset_type>(__other.stride (__d)),
136135 " layout_stride_relaxed::mapping: stride is out of range" );
137136 __init_strides[__d] = static_cast <offset_type>(__other.stride (__d));
138137 }
@@ -261,25 +260,44 @@ class _CCCL_DECLSPEC_EMPTY_BASES layout_stride_relaxed::mapping
261260 // The dot product of indices and strides is linear.
262261 // Thus, over all valid indices, the max value of the dot product is achieved at the extrema: either the min
263262 // index (0) if the stride is negative, or the max index (extent(r) - 1) if the stride is non-negative.
264- // For non-negative stride: contribution is (extent - 1) * stride
265- // For negative stride: contribution is 0 (max achieved at index 0)
263+ // For non-negative stride: max contribution is (extent - 1) * stride, min contribution is 0
264+ // For negative stride: max contribution is 0 (max achieved at index 0), min is -(extent - 1) * |stride|
265+ // __min_dot tracks the total positive magnitude of the negative contributions
266266 index_type __dot{1 };
267+ offset_type __min_dot{0 };
267268 for (rank_type __r = 0 ; __r < __rank_; ++__r)
268269 {
269270 const auto __ext = extents ().extent (__r);
270271 if (__ext == index_type{0 })
271272 {
272273 return index_type{0 };
273274 }
274- _CCCL_ASSERT (!::cuda::overflow_cast<index_type>(::cuda::uabs (strides ().stride (__r))),
275+ const auto __stride_val = strides ().stride (__r);
276+ _CCCL_ASSERT (::cuda::std::in_range<index_type>(::cuda::uabs (__stride_val)),
275277 " layout_stride_relaxed::mapping: stride is out of range" );
276- const auto __max_index = strides ().stride (__r) < 0 ? index_type{0 } : static_cast <index_type>(__ext - 1 );
277- const auto __stride = static_cast <index_type>(strides ().stride (__r));
278+ if (__stride_val < 0 )
279+ {
280+ _CCCL_ASSERT (::cuda::std::in_range<offset_type>(__ext - 1 ),
281+ " layout_stride_relaxed::mapping: extent - 1 is not representable as offset_type" );
282+ const auto __min_extent = static_cast <offset_type>(__ext - 1 );
283+ const auto __abs_stride_u = ::cuda::uabs (__stride_val);
284+ _CCCL_ASSERT (::cuda::std::in_range<offset_type>(__abs_stride_u),
285+ " layout_stride_relaxed::mapping: absolute stride is not representable as offset_type" );
286+ const auto __abs_stride = static_cast <offset_type>(__abs_stride_u);
287+ _CCCL_ASSERT (!::cuda::mul_overflow (__min_extent, __abs_stride)
288+ && !::cuda::add_overflow (__min_extent * __abs_stride, __min_dot),
289+ " layout_stride_relaxed::mapping: minimum mapped index is not representable" );
290+ __min_dot += __min_extent * __abs_stride;
291+ }
292+ const auto __max_index = __stride_val < 0 ? index_type{0 } : static_cast <index_type>(__ext - 1 );
293+ const auto __stride = static_cast <index_type>(__stride_val);
278294 _CCCL_ASSERT (!::cuda::mul_overflow<index_type>(__max_index, __stride)
279295 && !::cuda::add_overflow (__max_index * __stride, __dot),
280296 " layout_stride_relaxed::mapping: required_span_size is not representable as index_type" );
281297 __dot += __max_index * __stride;
282298 }
299+ _CCCL_ASSERT (::cuda::std::cmp_greater_equal (__offset_val, __min_dot),
300+ " layout_stride_relaxed::mapping: offset is insufficient for negative strides" );
283301 _CCCL_ASSERT (!::cuda::add_overflow<index_type>(__offset_val, __dot),
284302 " layout_stride_relaxed::mapping: required_span_size is not representable as index_type" );
285303 return static_cast <index_type>(__offset_val + __dot);
@@ -291,7 +309,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES layout_stride_relaxed::mapping
291309 {
292310 if constexpr (::cuda::std::__cccl_is_integer_v<_Index>)
293311 {
294- return ::cuda::std::cmp_greater_equal (__index, index_type{0 }) && ! ::cuda::overflow_cast <index_type>(__index);
312+ return ::cuda::std::cmp_greater_equal (__index, index_type{0 }) && ::cuda::std::in_range <index_type>(__index);
295313 }
296314 else
297315 {
0 commit comments