Skip to content

Commit 2f55b2e

Browse files
authored
additional constrains for layout_stride_relaxed (#8354)
* layout_stride_relaxed additional constrains * use cuda::std::in_range
1 parent 4bed546 commit 2f55b2e

File tree

4 files changed

+78
-10
lines changed

4 files changed

+78
-10
lines changed

libcudacxx/include/cuda/__mdspan/layout_stride_relaxed.h

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <cuda/__fwd/mdspan.h>
2626
#include <cuda/__numeric/add_overflow.h>
2727
#include <cuda/__numeric/mul_overflow.h>
28-
#include <cuda/__numeric/overflow_cast.h>
2928
#include <cuda/std/__concepts/concept_macros.h>
3029
#include <cuda/std/__cstddef/types.h>
3130
#include <cuda/std/__mdspan/concepts.h>
@@ -132,7 +131,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES layout_stride_relaxed::mapping
132131
{
133132
for (rank_type __d = 0; __d < __rank_; ++__d)
134133
{
135-
_CCCL_ASSERT(!::cuda::overflow_cast<offset_type>(__other.stride(__d)),
134+
_CCCL_ASSERT(::cuda::std::in_range<offset_type>(__other.stride(__d)),
136135
"layout_stride_relaxed::mapping: stride is out of range");
137136
__init_strides[__d] = static_cast<offset_type>(__other.stride(__d));
138137
}
@@ -261,25 +260,44 @@ class _CCCL_DECLSPEC_EMPTY_BASES layout_stride_relaxed::mapping
261260
// The dot product of indices and strides is linear.
262261
// Thus, over all valid indices, the max value of the dot product is achieved at the extrema: either the min
263262
// index (0) if the stride is negative, or the max index (extent(r) - 1) if the stride is non-negative.
264-
// For non-negative stride: contribution is (extent - 1) * stride
265-
// For negative stride: contribution is 0 (max achieved at index 0)
263+
// For non-negative stride: max contribution is (extent - 1) * stride, min contribution is 0
264+
// For negative stride: max contribution is 0 (max achieved at index 0), min is -(extent - 1) * |stride|
265+
// __min_dot tracks the total positive magnitude of the negative contributions
266266
index_type __dot{1};
267+
offset_type __min_dot{0};
267268
for (rank_type __r = 0; __r < __rank_; ++__r)
268269
{
269270
const auto __ext = extents().extent(__r);
270271
if (__ext == index_type{0})
271272
{
272273
return index_type{0};
273274
}
274-
_CCCL_ASSERT(!::cuda::overflow_cast<index_type>(::cuda::uabs(strides().stride(__r))),
275+
const auto __stride_val = strides().stride(__r);
276+
_CCCL_ASSERT(::cuda::std::in_range<index_type>(::cuda::uabs(__stride_val)),
275277
"layout_stride_relaxed::mapping: stride is out of range");
276-
const auto __max_index = strides().stride(__r) < 0 ? index_type{0} : static_cast<index_type>(__ext - 1);
277-
const auto __stride = static_cast<index_type>(strides().stride(__r));
278+
if (__stride_val < 0)
279+
{
280+
_CCCL_ASSERT(::cuda::std::in_range<offset_type>(__ext - 1),
281+
"layout_stride_relaxed::mapping: extent - 1 is not representable as offset_type");
282+
const auto __min_extent = static_cast<offset_type>(__ext - 1);
283+
const auto __abs_stride_u = ::cuda::uabs(__stride_val);
284+
_CCCL_ASSERT(::cuda::std::in_range<offset_type>(__abs_stride_u),
285+
"layout_stride_relaxed::mapping: absolute stride is not representable as offset_type");
286+
const auto __abs_stride = static_cast<offset_type>(__abs_stride_u);
287+
_CCCL_ASSERT(!::cuda::mul_overflow(__min_extent, __abs_stride)
288+
&& !::cuda::add_overflow(__min_extent * __abs_stride, __min_dot),
289+
"layout_stride_relaxed::mapping: minimum mapped index is not representable");
290+
__min_dot += __min_extent * __abs_stride;
291+
}
292+
const auto __max_index = __stride_val < 0 ? index_type{0} : static_cast<index_type>(__ext - 1);
293+
const auto __stride = static_cast<index_type>(__stride_val);
278294
_CCCL_ASSERT(!::cuda::mul_overflow<index_type>(__max_index, __stride)
279295
&& !::cuda::add_overflow(__max_index * __stride, __dot),
280296
"layout_stride_relaxed::mapping: required_span_size is not representable as index_type");
281297
__dot += __max_index * __stride;
282298
}
299+
_CCCL_ASSERT(::cuda::std::cmp_greater_equal(__offset_val, __min_dot),
300+
"layout_stride_relaxed::mapping: offset is insufficient for negative strides");
283301
_CCCL_ASSERT(!::cuda::add_overflow<index_type>(__offset_val, __dot),
284302
"layout_stride_relaxed::mapping: required_span_size is not representable as index_type");
285303
return static_cast<index_type>(__offset_val + __dot);
@@ -291,7 +309,7 @@ class _CCCL_DECLSPEC_EMPTY_BASES layout_stride_relaxed::mapping
291309
{
292310
if constexpr (::cuda::std::__cccl_is_integer_v<_Index>)
293311
{
294-
return ::cuda::std::cmp_greater_equal(__index, index_type{0}) && !::cuda::overflow_cast<index_type>(__index);
312+
return ::cuda::std::cmp_greater_equal(__index, index_type{0}) && ::cuda::std::in_range<index_type>(__index);
295313
}
296314
else
297315
{

libcudacxx/include/cuda/__mdspan/strides.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#endif // no system header
2323

2424
#include <cuda/__fwd/mdspan.h>
25-
#include <cuda/__numeric/overflow_cast.h>
2625
#include <cuda/std/__concepts/concept_macros.h>
2726
#include <cuda/std/__cstddef/types.h>
2827
#include <cuda/std/__mdspan/extents.h>
@@ -67,7 +66,7 @@ class strides
6766
[[nodiscard]] _CCCL_API static constexpr bool __is_representable_as(_From... __values) noexcept
6867
{
6968
return (
70-
(!::cuda::overflow_cast<offset_type>(__values) || static_cast<::cuda::std::ptrdiff_t>(__values) == dynamic_stride)
69+
(::cuda::std::in_range<offset_type>(__values) || static_cast<::cuda::std::ptrdiff_t>(__values) == dynamic_stride)
7170
&& ...);
7271
}
7372

libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/layout_stride_relaxed/assertions.pass.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,45 @@ void test_strides_narrowing_assertion()
6363
TEST_CCCL_ASSERT_FAILURE((small_strides(too_big)), "strides construction: stride is out of range");
6464
}
6565

66+
void test_insufficient_offset_for_negative_strides()
67+
{
68+
// 1D: extent=4, stride=-1 requires offset >= 3
69+
{
70+
using extents_t = cuda::std::extents<int, 4>;
71+
using mapping_t = cuda::layout_stride_relaxed::mapping<extents_t>;
72+
using strides_t = typename mapping_t::strides_type;
73+
using offset_t = typename mapping_t::offset_type;
74+
75+
TEST_CCCL_ASSERT_FAILURE(mapping_t(extents_t{}, strides_t(-1), static_cast<offset_t>(2)),
76+
"layout_stride_relaxed::mapping: offset is insufficient for negative strides");
77+
}
78+
// 2D: extents(3,4), strides(4,-1) requires offset >= (4-1)*1 = 3
79+
{
80+
using extents_t = cuda::std::extents<int, 3, 4>;
81+
using mapping_t = cuda::layout_stride_relaxed::mapping<extents_t>;
82+
using strides_t = typename mapping_t::strides_type;
83+
using offset_t = typename mapping_t::offset_type;
84+
85+
TEST_CCCL_ASSERT_FAILURE(mapping_t(extents_t{}, strides_t(4, -1), static_cast<offset_t>(2)),
86+
"layout_stride_relaxed::mapping: offset is insufficient for negative strides");
87+
}
88+
// 2D: extents(3,4), strides(-2,-1) requires offset >= (3-1)*2 + (4-1)*1 = 7
89+
{
90+
using extents_t = cuda::std::extents<int, 3, 4>;
91+
using mapping_t = cuda::layout_stride_relaxed::mapping<extents_t>;
92+
using strides_t = typename mapping_t::strides_type;
93+
using offset_t = typename mapping_t::offset_type;
94+
95+
TEST_CCCL_ASSERT_FAILURE(mapping_t(extents_t{}, strides_t(-2, -1), static_cast<offset_t>(6)),
96+
"layout_stride_relaxed::mapping: offset is insufficient for negative strides");
97+
}
98+
}
99+
66100
int main(int, char**)
67101
{
68102
test_negative_offset_assertion();
69103
test_static_stride_comparison();
70104
test_strides_narrowing_assertion();
105+
test_insufficient_offset_for_negative_strides();
71106
return 0;
72107
}

libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/layout_stride_relaxed/required_span_size.pass.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,22 @@ __host__ __device__ constexpr bool test()
172172
test_required_span_size(cuda::std::extents<int, D, D, D>(5, 0, 3), cuda::std::array<intptr_t, 3>{-1, 0, 1}, 4, 0);
173173
test_required_span_size(cuda::std::extents<int, D, D, D>(5, 3, 0), cuda::std::array<intptr_t, 3>{-1, 0, 1}, 4, 0);
174174

175+
// ============================================================================
176+
// Minimum valid offset for negative strides (boundary cases)
177+
// ============================================================================
178+
179+
// 1D: extent=4, stride=-1 requires offset >= (4-1)*1 = 3
180+
// mapped values: 3, 2, 1, 0 → required_span_size = 3 + 1 = 4
181+
test_required_span_size(cuda::std::extents<int, 4>(), cuda::std::array<intptr_t, 1>{-1}, 3, 4);
182+
183+
// 2D: extents(3,4), strides(4,-1) requires offset >= (4-1)*1 = 3 (only dim 1 is negative)
184+
// max mapped = 3 + (3-1)*4 + 0 = 11 → required_span_size = 12
185+
test_required_span_size(cuda::std::extents<int, 3, 4>(), cuda::std::array<intptr_t, 2>{4, -1}, 3, 12);
186+
187+
// 2D: extents(3,4), strides(-2,-1), both negative, requires offset >= (3-1)*2 + (4-1)*1 = 7
188+
// max mapped = 7 + 0 + 0 = 7 → required_span_size = 7 + 1 = 8
189+
test_required_span_size(cuda::std::extents<int, 3, 4>(), cuda::std::array<intptr_t, 2>{-2, -1}, 7, 8);
190+
175191
return true;
176192
}
177193

0 commit comments

Comments
 (0)