Skip to content

Commit 70eb1d4

Browse files
authored
Merge pull request #45 from bkille/SIMD-hwy
Add support for SIMD highway
2 parents a3c98ed + ff6f54f commit 70eb1d4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+670
-652
lines changed

.github/workflows/coverage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
6363
run: |
6464
which cpplint
65-
cmake -Bbuild -DBITLIB_TEST=1 -DCMAKE_CXX_CPPCHECK="cppcheck;--std=c++17;--file-filter=*BitLib*" -DCMAKE_CXX_CPPLINT="cpplint;--linelength=100;"
65+
cmake -Bbuild -DBITLIB_TEST=1 -DBITLIB_COVERAGE=1 -DCMAKE_CXX_CPPCHECK="cppcheck;--std=c++17;--file-filter=*BitLib*" -DCMAKE_CXX_CPPLINT="cpplint;--linelength=140;"
6666
6767
6868
- name: Build

CMakeLists.txt

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ project(Bit-Vector VERSION 0.1.1)
77
# set output directory of builds
88
#set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
99

10-
# set build type
11-
set(CMAKE_BUILD_TYPE RelWithDebInfo)
12-
1310
# set CXX standard
1411
# Things seem to be faster in cxx 20, and there is also std::shift_*
1512
# Should fall back on 17 if 20 is not supported
@@ -33,7 +30,6 @@ target_include_directories(bitlib INTERFACE
3330

3431

3532
# specify global compiler flags
36-
add_compile_options(-march=native -pedantic -Wall -Wextra -Wfatal-errors)
3733
include_directories("include/" "utils/" )
3834

3935
# Add fmt library (useful for printing words in binary and other debugging stuff)
@@ -46,13 +42,14 @@ include_directories("include/" "utils/" )
4642
#FetchContent_MakeAvailable(fmt)
4743

4844

49-
option(BITLIB_SIMDPP "Build with simdpp SIMD extensions" OFF)
45+
option(BITLIB_HWY "Build with google highway SIMD extensions" OFF)
5046
option(BITLIB_BENCHMARK "Build bitlib benchmarks" OFF)
5147
option(BITLIB_EXAMPLE "Build bitlib examples" OFF)
5248
option(BITLIB_TEST "Build bitlib tests" OFF)
49+
option(BITLIB_COVERAGE "Compute test coverage" OFF)
5350

54-
if (BITLIB_SIMDPP)
55-
add_definitions(-DBITLIB_SIMDPP)
51+
if (BITLIB_HWY)
52+
add_definitions(-DBITLIB_HWY)
5653
endif()
5754

5855
if(BITLIB_BENCHMARK)

benchmark/CMakeLists.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
set(BENCHMARK_ENABLE_GTEST_TESTS OFF)
2-
set(DEBUG_FLAGS -O0 -g -fno-omit-frame-pointer -pg -fprofile-arcs -ftest-coverage -fsanitize=address -Wextra-semi)
3-
# specify the C++ standard
4-
cmake_minimum_required(VERSION 3.14)
52

63
find_package(benchmark REQUIRED)
74

@@ -19,5 +16,5 @@ add_executable(bitlib-bench ${BENCH_SOURCES})
1916
include_directories(${googlebench_SOURCE_DIR}/benchmark/include src/utils)
2017
target_link_libraries(bitlib-bench PRIVATE benchmark::benchmark -pthread)
2118

22-
target_compile_options(bitlib-bench PUBLIC -O3 -DNDEBUG -mtune=native)
19+
target_compile_options(bitlib-bench PUBLIC -O3 -DNDEBUG -march=native -Wpedantic)
2320
install(TARGETS bitlib-bench DESTINATION .)

benchmark/src/benchmark_main.cc

Lines changed: 56 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "rotate_bench.hpp"
2525
#include "reverse_bench.hpp"
2626
#include "fill_bench.hpp"
27+
#include "find_bench.hpp"
2728
#include "shift_bench.hpp"
2829
#include "copy_bench.hpp"
2930
#include "move_bench.hpp"
@@ -116,15 +117,15 @@ int main(int argc, char** argv) {
116117
register_word_containers<decltype(BM_BitShiftLeft), std::vector>(
117118
BM_BitShiftLeft,
118119
"bit::shift_left (large) (AA)",
119-
size_large);
120+
size_huge);
120121
register_word_containers<decltype(BM_BitShiftLeft_UU), std::vector>(
121122
BM_BitShiftLeft_UU,
122123
"bit::shift_left (large) (UU)",
123-
size_large);
124+
size_huge);
124125
register_bool_containers<decltype(BM_BoolShiftLeft), std::vector>(
125126
BM_BoolShiftLeft,
126127
"std::shift_left (large)",
127-
size_large);
128+
size_huge);
128129
register_word_containers<decltype(BM_BitShiftRight_UU), std::vector>(
129130
BM_BitShiftRight_UU,
130131
"bit::shift_right (small) (UU)",
@@ -136,11 +137,15 @@ int main(int argc, char** argv) {
136137
register_word_containers<decltype(BM_BitShiftRight), std::vector>(
137138
BM_BitShiftRight,
138139
"bit::shift_right (large) (AA)",
139-
size_large);
140+
size_huge);
141+
register_word_containers<decltype(BM_BitShiftRight_UU), std::vector>(
142+
BM_BitShiftRight_UU,
143+
"bit::shift_right (large) (UU)",
144+
size_huge);
140145
register_bool_containers<decltype(BM_BoolShiftRight), std::vector>(
141146
BM_BoolShiftRight,
142147
"std::shift_right (large)",
143-
size_large);
148+
size_huge);
144149

145150
// Reverse benchmarks
146151
register_word_containers<decltype(BM_BitReverse_UU), std::vector>(
@@ -154,15 +159,15 @@ int main(int argc, char** argv) {
154159
register_word_containers<decltype(BM_BitReverse), std::vector>(
155160
BM_BitReverse,
156161
"bit::reverse (large) (AA)",
157-
size_large);
162+
size_huge);
158163
register_word_containers<decltype(BM_BitReverse_UU), std::vector>(
159164
BM_BitReverse_UU,
160165
"bit::reverse (large) (UU)",
161-
size_large);
166+
size_huge);
162167
register_bool_containers<decltype(BM_BoolReverse), std::vector>(
163168
BM_BoolReverse,
164169
"std::reverse (large)",
165-
size_large);
170+
size_huge);
166171

167172
// transform benchmarks
168173
register_word_containers<decltype(BM_BitTransformUnaryAA), std::vector>(
@@ -180,15 +185,15 @@ int main(int argc, char** argv) {
180185
register_word_containers<decltype(BM_BitTransformUnaryAA), std::vector>(
181186
BM_BitTransformUnaryAA,
182187
"bit::transform(UnaryOp) (large) (AA)",
183-
size_large);
188+
size_huge);
184189
register_word_containers<decltype(BM_BitTransformUnaryUU), std::vector>(
185190
BM_BitTransformUnaryUU,
186191
"bit::transform(UnaryOp) (large) (UU)",
187-
size_large);
192+
size_huge);
188193
register_bool_containers<decltype(BM_BoolTransformUnary), std::vector>(
189194
BM_BoolTransformUnary,
190195
"std::transform(UnaryOp) (large)",
191-
size_large);
196+
size_huge);
192197
register_word_containers<decltype(BM_BitTransformBinaryAA), std::vector>(
193198
BM_BitTransformBinaryAA,
194199
"bit::transform(BinaryOp) (small) (AA)",
@@ -204,15 +209,15 @@ int main(int argc, char** argv) {
204209
register_word_containers<decltype(BM_BitTransformBinaryAA), std::vector>(
205210
BM_BitTransformBinaryAA,
206211
"bit::transform(BinaryOp) (large) (AA)",
207-
size_large);
212+
size_huge);
208213
register_word_containers<decltype(BM_BitTransformBinaryUU), std::vector>(
209214
BM_BitTransformBinaryUU,
210215
"bit::transform(BinaryOp) (large) (UU)",
211-
size_large);
216+
size_huge);
212217
register_bool_containers<decltype(BM_BoolTransformBinary), std::vector>(
213218
BM_BoolTransformBinary,
214219
"std::transform(BinaryOp) (large)",
215-
size_large);
220+
size_huge);
216221

217222
// Rotate benchmarks
218223
register_word_containers<decltype(BM_BitRotate), std::vector>(
@@ -226,11 +231,11 @@ int main(int argc, char** argv) {
226231
register_word_containers<decltype(BM_BitRotate), std::vector>(
227232
BM_BitRotate,
228233
"bit::rotate (large) (ARA)",
229-
size_large);
234+
size_huge);
230235
register_bool_containers<decltype(BM_BoolRotate), std::vector>(
231236
BM_BoolRotate,
232237
"std::rotate (large)",
233-
size_large);
238+
size_huge);
234239

235240
// Count benchmarks
236241
register_word_containers<decltype(BM_BitCount), std::vector>(
@@ -244,11 +249,11 @@ int main(int argc, char** argv) {
244249
register_word_containers<decltype(BM_BitCount), std::vector>(
245250
BM_BitCount,
246251
"bit::count (large) (AA)",
247-
size_large);
252+
size_huge);
248253
register_bool_containers<decltype(BM_BoolCount), std::vector>(
249254
BM_BoolCount,
250255
"std::count (large)",
251-
size_large);
256+
size_huge);
252257

253258
// swap_ranges benchmarks
254259
register_word_containers<decltype(BM_BitSwapRangesAA), std::vector>(
@@ -266,15 +271,15 @@ int main(int argc, char** argv) {
266271
register_word_containers<decltype(BM_BitSwapRangesAA), std::vector>(
267272
BM_BitSwapRangesAA,
268273
"bit::swap_ranges (large) (AA)",
269-
size_large);
274+
size_huge);
270275
register_word_containers<decltype(BM_BitSwapRangesUU), std::vector>(
271276
BM_BitSwapRangesUU,
272277
"bit::swap_ranges (large) (UU)",
273-
size_large);
278+
size_huge);
274279
register_bool_containers<decltype(BM_BoolSwapRanges), std::vector>(
275280
BM_BoolSwapRanges,
276281
"std::swap_ranges (large)",
277-
size_large);
282+
size_huge);
278283

279284
// copy benchmarks
280285
register_word_containers<decltype(BM_BitCopy), std::vector>(
@@ -288,11 +293,11 @@ int main(int argc, char** argv) {
288293
register_word_containers<decltype(BM_BitCopy), std::vector>(
289294
BM_BitCopy,
290295
"bit::copy (large) (UU)",
291-
size_large);
296+
size_huge);
292297
register_bool_containers<decltype(BM_BoolCopy), std::vector>(
293298
BM_BoolCopy,
294299
"std::copy (large)",
295-
size_large);
300+
size_huge);
296301

297302
// Equal benchmarks
298303
register_word_containers<decltype(BM_BitEqual), std::vector>(
@@ -306,11 +311,11 @@ int main(int argc, char** argv) {
306311
register_word_containers<decltype(BM_BitEqual), std::vector>(
307312
BM_BitEqual,
308313
"bit::equal (large) (UU)",
309-
size_large);
314+
size_huge);
310315
register_bool_containers<decltype(BM_BoolEqual), std::vector>(
311316
BM_BoolEqual,
312317
"std::equal (large)",
313-
size_large);
318+
size_huge);
314319

315320
// move benchmarks
316321
register_word_containers<decltype(BM_BitMove), std::vector>(
@@ -324,11 +329,11 @@ int main(int argc, char** argv) {
324329
register_word_containers<decltype(BM_BitMove), std::vector>(
325330
BM_BitMove,
326331
"bit::move (large) (UU)",
327-
size_large);
332+
size_huge);
328333
register_bool_containers<decltype(BM_BoolMove), std::vector>(
329334
BM_BoolMove,
330335
"std::move (large)",
331-
size_large);
336+
size_huge);
332337

333338
// copy_backward benchmarks
334339
register_word_containers<decltype(BM_BitCopyBackward), std::vector>(
@@ -342,11 +347,11 @@ int main(int argc, char** argv) {
342347
register_word_containers<decltype(BM_BitCopyBackward), std::vector>(
343348
BM_BitCopyBackward,
344349
"bit::copy_backward (large) (UU)",
345-
size_large);
350+
size_huge);
346351
register_bool_containers<decltype(BM_BoolCopyBackward), std::vector>(
347352
BM_BoolCopyBackward,
348353
"std::copy_backward (large)",
349-
size_large);
354+
size_huge);
350355

351356
// fill benchmarks
352357
register_word_containers<decltype(BM_BitFill), std::vector>(
@@ -366,23 +371,41 @@ int main(int argc, char** argv) {
366371
"std::fill (huge)",
367372
size_huge);
368373

374+
// find benchmarks
375+
register_word_containers<decltype(BM_BitFind), std::vector>(
376+
BM_BitFind,
377+
"bit::find (small) (UU)",
378+
size_small);
379+
register_bool_containers<decltype(BM_BoolFind), std::vector>(
380+
BM_BoolFind,
381+
"std::find (small)",
382+
size_small);
383+
register_word_containers<decltype(BM_BitFind), std::vector>(
384+
BM_BitFind,
385+
"bit::find (huge) (UU)",
386+
size_huge);
387+
register_bool_containers<decltype(BM_BoolFind), std::vector>(
388+
BM_BoolFind,
389+
"std::find (huge)",
390+
size_huge);
391+
369392
//// Search benchmarks
370393
//register_word_containers<decltype(BM_BitSearch), std::vector>(
371394
//BM_BitSearch,
372395
//"Search_Bit_Large",
373-
//size_large);
396+
//size_huge);
374397
//register_bool_containers<decltype(BM_BoolSearch), std::vector>(
375398
//BM_BoolSearch,
376399
//"Search_Bool_Large",
377-
//size_large);
400+
//size_huge);
378401
//register_word_containers<decltype(BM_BitSearch_WorstCase), std::vector>(
379402
//BM_BitSearch_WorstCase,
380403
//"Search_Bit_Large_WorstCase",
381-
//size_large);
404+
//size_huge);
382405
//register_bool_containers<decltype(BM_BoolSearch_WorstCase), std::vector>(
383406
//BM_BoolSearch_WorstCase,
384407
//"Search_Bool_Large_WorstCase",
385-
//size_large);
408+
//size_huge);
386409
benchmark::Initialize(&argc, argv);
387410
benchmark::RunSpecifiedBenchmarks();
388411
}

benchmark/src/find_bench.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,26 @@ auto BM_BitFind = [](benchmark::State& state, auto input) {
88
unsigned int total_bits = std::get<2>(input);
99
auto digits = bit::binary_digits<word_type>::value;
1010
auto container_size = ceil(float(total_bits) / digits);
11-
container_type bitcont = make_random_container<container_type>(container_size);
11+
container_type bitcont(container_size);
1212
auto first = bit::bit_iterator<decltype(std::begin(bitcont))>(std::begin(bitcont));
1313
auto last = bit::bit_iterator<decltype(std::end(bitcont))>(std::end(bitcont));
14+
*(first + (bitcont.size() / 2) + 4) = bit::bit1;
1415
for (auto _ : state) {
1516
benchmark::DoNotOptimize(bit::find(first + 2, last - 3, bit::bit1));
16-
//benchmark::ClobberMemory();
17+
benchmark::ClobberMemory();
1718
}
1819
};
1920

2021
auto BM_BoolFind = [](benchmark::State& state, auto input) {
2122
using container_type = std::vector<bool>;
2223
using num_type = typename container_type::value_type;
2324
unsigned int container_size = std::get<2>(input);
24-
container_type cont = make_random_container<container_type>(container_size);
25+
container_type cont(container_size);
26+
cont[(cont.size() / 2) + 4] = true;
2527
auto first = cont.begin();
2628
auto last = cont.end();
2729
for (auto _ : state) {
2830
benchmark::DoNotOptimize(std::find(first + 2, last - 3, true));
29-
//benchmark::ClobberMemory();
31+
benchmark::ClobberMemory();
3032
}
3133
};

benchmark/src/shift_bench.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ auto BM_BitShiftLeft_UU = [](benchmark::State& state, auto input) {
3030
container_type bitcont = make_random_container<container_type>(container_size);
3131
bit::bit_iterator<iterator_type> first = bit::bit_iterator<iterator_type>(bitcont.begin()) + 1;
3232
bit::bit_iterator<iterator_type> last = bit::bit_iterator<iterator_type>(bitcont.end()) - 1;
33-
auto n = bit::distance(first, last) / 2 + 3;
33+
auto n = bit::distance(first, last) / 2 + 6;
3434
for (auto _ : state) {
3535
benchmark::DoNotOptimize(bit::shift_left(first, last, n));
3636
benchmark::ClobberMemory();
@@ -44,7 +44,7 @@ auto BM_BoolShiftLeft = [](benchmark::State& state, auto input) {
4444
container_type cont = make_random_container<container_type>(container_size);
4545
auto first = cont.begin();
4646
auto last = cont.end();
47-
auto n = std::distance(first, last) / 2 + 3;
47+
auto n = std::distance(first, last) / 2 + 6;
4848
for (auto _ : state) {
4949
benchmark::DoNotOptimize(bit::word_shift_left(first, last, n));
5050
benchmark::ClobberMemory();
@@ -76,7 +76,7 @@ auto BM_BitShiftRight_UU = [](benchmark::State& state, auto input) {
7676
container_type bitcont = make_random_container<container_type>(container_size);
7777
auto first = bit::bit_iterator<decltype(std::begin(bitcont))>(std::begin(bitcont)) + 2;
7878
auto last = bit::bit_iterator<decltype(std::end(bitcont))>(std::end(bitcont)) - 3;
79-
auto n = bit::distance(first, last) / 2 + 3;
79+
auto n = bit::distance(first, last) / 2 + 6;
8080
for (auto _ : state) {
8181
benchmark::DoNotOptimize(bit::shift_right(first, last, n));
8282
benchmark::ClobberMemory();
@@ -90,7 +90,7 @@ auto BM_BoolShiftRight = [](benchmark::State& state, auto input) {
9090
container_type cont = make_random_container<container_type>(container_size);
9191
auto first = cont.begin();
9292
auto last = cont.end();
93-
auto n = std::distance(first, last) / 2 + 3;
93+
auto n = std::distance(first, last) / 2 + 6;
9494
for (auto _ : state) {
9595
benchmark::DoNotOptimize(bit::word_shift_right(first, last, n));
9696
benchmark::ClobberMemory();

0 commit comments

Comments
 (0)