-
Notifications
You must be signed in to change notification settings - Fork 117
Expand file tree
/
Copy pathreduce_sycl_reduction.cpp
More file actions
66 lines (49 loc) · 1.65 KB
/
reduce_sycl_reduction.cpp
File metadata and controls
66 lines (49 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
* This code performs a reduce operation over an array using a SYCL reduction
* variable.
*
*/
#include <benchmark.h>
#include <sycl/sycl.hpp>
#include "../helpers.hpp"
using T = float;
constexpr size_t dataSize = 32'768;
constexpr size_t workGroupSize = 1024;
constexpr int numIters = 100;
int main(int argc, char* argv[]) {
T a[dataSize];
for (auto i = 0; i < dataSize; ++i) {
a[i] = static_cast<T>(i);
}
auto q = sycl::queue{};
T* devA = sycl::malloc_device<T>(dataSize, q);
T* devReduced = sycl::malloc_device<T>(1, q); // Holds intermediate values
T zeroVal = 0;
auto e1 = q.memcpy(devA, a, sizeof(T) * dataSize);
auto e2 = q.memcpy(devReduced, &zeroVal, sizeof(T));
auto myNd = sycl::nd_range(sycl::range(dataSize), sycl::range(workGroupSize));
util::benchmark(
[&]() {
q.submit([&](sycl::handler& cgh) {
cgh.depends_on({e1, e2});
auto myReduction = sycl::reduction(
devReduced, sycl::plus<T>(),
sycl::property::reduction::initialize_to_identity{});
cgh.parallel_for(myNd, myReduction,
[=](sycl::nd_item<1> item, auto& sum) {
sum += devA[item.get_global_linear_id()];
});
}).wait();
},
numIters, "Reduction using sycl::reduction");
T devAns = 0;
q.memcpy(&devAns, devReduced, sizeof(T)).wait();
T serialAns = 0;
for (auto i = 0; i < dataSize; i++) {
serialAns += a[i];
}
std::cout << "Got device ans " << devAns << '\n';
std::cout << "vs serial ans " << serialAns << "\n\n";
sycl::free(devA, q);
sycl::free(devReduced, q);
}