From f6655d9db20410a4a28bb989b11c16b02ec428a5 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Tue, 10 Sep 2024 16:20:16 -0400 Subject: [PATCH 01/11] adding api changes --- .gitignore | 1 + conda/environments/cylon.yml | 21 +- cpp/CMakeLists.txt | 9 +- cpp/src/cylon/arrow/arrow_comparator.cpp | 2 +- .../cylon/arrow/arrow_partition_kernels.cpp | 12 +- cpp/src/cylon/arrow/arrow_type_traits.hpp | 4 +- cpp/src/cylon/arrow/arrow_types.cpp | 297 ++++++++++++------ cpp/src/cylon/compute/aggregate_utils.hpp | 6 +- cpp/src/cylon/compute/aggregates.cpp | 7 + cpp/src/cylon/ctx/arrow_memory_pool_utils.hpp | 43 ++- cpp/src/cylon/ctx/memory_pool.hpp | 6 + cpp/src/cylon/indexing/index.cpp | 4 +- cpp/src/cylon/join/sort_join.cpp | 10 +- cpp/src/cylon/util/flatten_array.cpp | 2 +- cpp/src/examples/indexing_example.cpp | 60 ++-- cpp/test/comparator_test.cpp | 14 +- cpp/test/sorting_test.cpp | 6 +- cpp/test/test_arrow_utils.hpp | 8 +- 18 files changed, 310 insertions(+), 202 deletions(-) diff --git a/.gitignore b/.gitignore index d754c966a..1d769c496 100644 --- a/.gitignore +++ b/.gitignore @@ -38,6 +38,7 @@ # ignore these folders build/ +debug/ **/bin/ diff --git a/conda/environments/cylon.yml b/conda/environments/cylon.yml index 35849906a..937b9ddf5 100644 --- a/conda/environments/cylon.yml +++ b/conda/environments/cylon.yml @@ -3,19 +3,22 @@ channels: - conda-forge - defaults dependencies: - - python>=3.8,<3.10 - - cmake>=3.23.1,!=3.25.0 - - arrow-cpp=9 - - pyarrow=9.0.0 + - python>=3.9,<3.12 + - cmake + - pyarrow=16.1.0 + - libarrow-acero==16.1.0.* + - libarrow-dataset==16.1.0.* + - libarrow==16.1.0.* - glog - - openmpi=4.1.3=ha1ae619_105 - - ucx>=1.12.1 - - cython>=0.29.31,<3 - - numpy<1.24.4 - - pandas>=1.0,<2.0.0 + - openmpi + - ucx + - cython>=0.29.31 + - numpy>=1.23,<2.0a0 + - pandas>=2.0,<2.2.3dev - fsspec>=0.6.0 - setuptools # they are not needed for using pygcylon or compiling it - pytest - pytest-mpi - mpi4py + \ No newline at end of file diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e7581731d..1cb8fbea3 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -24,9 +24,10 @@ cmake_minimum_required(VERSION 3.17 FATAL_ERROR) set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) -project(CYLON VERSION 0.6.0) +set(CYLON_VERSION 0.7.0) + +project(CYLON VERSION ${CYLON_VERSION}) -set(CYLON_VERSION 0.6.0) ## defaults to release build if (NOT CMAKE_BUILD_TYPE) @@ -34,7 +35,7 @@ if (NOT CMAKE_BUILD_TYPE) endif () # cmake modules directories -set(CYLON_ARROW_VERSION 9.0.0) +set(CYLON_ARROW_VERSION 16.1.0) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake/Modules/" ${CMAKE_MODULE_PATH}) list(APPEND CMAKE_MODULE_PATH ${CYLON_SOURCE_DIR}/CMake) @@ -74,7 +75,7 @@ else () endif () # C++ standard -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(GCC_ABI_COMPILE_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0") diff --git a/cpp/src/cylon/arrow/arrow_comparator.cpp b/cpp/src/cylon/arrow/arrow_comparator.cpp index bbc149dc5..7d5372525 100644 --- a/cpp/src/cylon/arrow/arrow_comparator.cpp +++ b/cpp/src/cylon/arrow/arrow_comparator.cpp @@ -150,7 +150,7 @@ struct CompareFunc> { template struct CompareFunc> { - static int compare(const arrow::util::string_view &v1, const arrow::util::string_view &v2) { + static int compare(const std::string_view &v1, const std::string_view &v2) { if (Asc) { return v1.compare(v2); } else { diff --git a/cpp/src/cylon/arrow/arrow_partition_kernels.cpp b/cpp/src/cylon/arrow/arrow_partition_kernels.cpp index e827b15c6..112b93e99 100644 --- a/cpp/src/cylon/arrow/arrow_partition_kernels.cpp +++ b/cpp/src/cylon/arrow/arrow_partition_kernels.cpp @@ -260,7 +260,7 @@ class FixedSizeBinaryHashPartitionKernel : public HashPartitionKernel { return visit_chunked_array( idx_col, - [&](uint64_t global_idx, arrow::util::string_view val) { + [&](uint64_t global_idx, std::string_view val) { uint32_t hash = 0; util::MurmurHash3_x86_32(&val, len, 0, &hash); hash += 31 * target_partitions[global_idx]; @@ -276,7 +276,7 @@ class FixedSizeBinaryHashPartitionKernel : public HashPartitionKernel { } else { return visit_chunked_array( idx_col, - [&](uint64_t global_idx, arrow::util::string_view val) { + [&](uint64_t global_idx, std::string_view val) { uint32_t hash = 0; util::MurmurHash3_x86_32(&val, len, 0, &hash); hash += 31 * target_partitions[global_idx]; @@ -302,7 +302,7 @@ class FixedSizeBinaryHashPartitionKernel : public HashPartitionKernel { return visit_chunked_array (idx_col, - [&](uint64_t global_idx, arrow::util::string_view val) { + [&](uint64_t global_idx, std::string_view val) { uint32_t hash = 0; util::MurmurHash3_x86_32(&val, byte_width, 0, &hash); hash += 31 * partial_hashes[global_idx]; @@ -348,7 +348,7 @@ class BinaryHashPartitionKernel : public HashPartitionKernel { return visit_chunked_array( idx_col, - [&](uint64_t global_idx, arrow::util::string_view val) { + [&](uint64_t global_idx, std::string_view val) { uint32_t hash = 0; util::MurmurHash3_x86_32(&val, static_cast(val.size()), 0, &hash); hash += 31 * target_partitions[global_idx]; @@ -365,7 +365,7 @@ class BinaryHashPartitionKernel : public HashPartitionKernel { return visit_chunked_array( idx_col, - [&](uint64_t global_idx, arrow::util::string_view val) { + [&](uint64_t global_idx, std::string_view val) { uint32_t hash = 0; util::MurmurHash3_x86_32(&val, static_cast(val.size()), 0, &hash); hash += 31 * target_partitions[global_idx]; @@ -390,7 +390,7 @@ class BinaryHashPartitionKernel : public HashPartitionKernel { return visit_chunked_array( idx_col, - [&](uint64_t global_idx, arrow::util::string_view val) { + [&](uint64_t global_idx, std::string_view val) { uint32_t hash = 0; util::MurmurHash3_x86_32(&val, static_cast(val.size()), 0, &hash); hash += 31 * partial_hashes[global_idx]; diff --git a/cpp/src/cylon/arrow/arrow_type_traits.hpp b/cpp/src/cylon/arrow/arrow_type_traits.hpp index d995412a5..8054412c4 100644 --- a/cpp/src/cylon/arrow/arrow_type_traits.hpp +++ b/cpp/src/cylon/arrow/arrow_type_traits.hpp @@ -15,6 +15,8 @@ #ifndef CYLON_CPP_SRC_CYLON_ARROW_ARROW_TYPE_TRAITS_HPP_ #define CYLON_CPP_SRC_CYLON_ARROW_ARROW_TYPE_TRAITS_HPP_ +#include + namespace cylon { template @@ -35,7 +37,7 @@ template struct ArrowTypeTraits> { using ScalarT = typename arrow::TypeTraits::ScalarType; using ArrayT = typename arrow::TypeTraits::ArrayType; - using ValueT = arrow::util::string_view; + using ValueT = std::string_view; static ValueT ExtractFromScalar(const std::shared_ptr &scalar) { return ValueT(*(std::static_pointer_cast(scalar))->value); diff --git a/cpp/src/cylon/arrow/arrow_types.cpp b/cpp/src/cylon/arrow/arrow_types.cpp index 9eb820ff2..7af88119d 100644 --- a/cpp/src/cylon/arrow/arrow_types.cpp +++ b/cpp/src/cylon/arrow/arrow_types.cpp @@ -12,66 +12,93 @@ * limitations under the License. */ +#include + #include #include -#include - namespace cylon { namespace tarrow { std::shared_ptr ToArrowType(const std::shared_ptr &type) { switch (type->getType()) { - case Type::BOOL:return std::make_shared(); - case Type::UINT8:return std::make_shared(); - case Type::INT8:return std::make_shared(); - case Type::UINT16:return std::make_shared(); - case Type::INT16:return std::make_shared(); - case Type::UINT32:return std::make_shared(); - case Type::INT32:return std::make_shared(); - case Type::UINT64:return std::make_shared(); - case Type::INT64:return std::make_shared(); - case Type::HALF_FLOAT:return std::make_shared(); - case Type::FLOAT:return std::make_shared(); - case Type::DOUBLE:return std::make_shared(); - case Type::STRING:return std::make_shared(); - case Type::BINARY:return std::make_shared(); + case Type::BOOL: + return std::make_shared(); + case Type::UINT8: + return std::make_shared(); + case Type::INT8: + return std::make_shared(); + case Type::UINT16: + return std::make_shared(); + case Type::INT16: + return std::make_shared(); + case Type::UINT32: + return std::make_shared(); + case Type::INT32: + return std::make_shared(); + case Type::UINT64: + return std::make_shared(); + case Type::INT64: + return std::make_shared(); + case Type::HALF_FLOAT: + return std::make_shared(); + case Type::FLOAT: + return std::make_shared(); + case Type::DOUBLE: + return std::make_shared(); + case Type::STRING: + return std::make_shared(); + case Type::BINARY: + return std::make_shared(); case Type::FIXED_SIZE_BINARY: - return arrow::fixed_size_binary(std::static_pointer_cast(type) - ->byte_width_); - case Type::DATE32:return std::make_shared(); - case Type::DATE64:return std::make_shared(); + return arrow::fixed_size_binary( + std::static_pointer_cast(type)->byte_width_); + case Type::DATE32: + return std::make_shared(); + case Type::DATE64: + return std::make_shared(); case Type::TIMESTAMP: { const auto &casted = std::static_pointer_cast(type); return arrow::timestamp(ToArrowTimeUnit(casted->unit_), casted->timezone_); } - case Type::TIME32:return std::make_shared(); - case Type::TIME64:return std::make_shared(); + case Type::TIME32: + return std::make_shared(); + case Type::TIME64: + return std::make_shared(); case Type::DURATION: { const auto &casted = std::static_pointer_cast(type); return std::make_shared(ToArrowTimeUnit(casted->unit_)); } - case Type::LARGE_STRING:return std::make_shared(); - case Type::LARGE_BINARY:return std::make_shared(); + case Type::LARGE_STRING: + return std::make_shared(); + case Type::LARGE_BINARY: + return std::make_shared(); case Type::DECIMAL: { const auto &casted = std::static_pointer_cast(type); - if (casted->byte_width_ == 16) return arrow::decimal128(casted->precision_, casted->scale_); + if (casted->byte_width_ == 16) + return arrow::decimal128(casted->precision_, casted->scale_); else if (casted->byte_width_ == 32) return arrow::decimal256(casted->precision_, casted->scale_); - else break; + else + break; } - case Type::INTERVAL:break; - case Type::LIST:break; - case Type::FIXED_SIZE_LIST:break; - case Type::EXTENSION:break; - case Type::MAX_ID:break; + case Type::INTERVAL: + break; + case Type::LIST: + break; + case Type::FIXED_SIZE_LIST: + break; + case Type::EXTENSION: + break; + case Type::MAX_ID: + break; } return nullptr; } Status CheckSupportedTypes(const std::shared_ptr &table) { const auto &schema = table->schema(); - for (const auto &t: schema->fields()) { + for (const auto &t : schema->fields()) { switch (t->type()->id()) { /* following types are supported. go to next column type */ case arrow::Type::BOOL: @@ -95,7 +122,8 @@ Status CheckSupportedTypes(const std::shared_ptr &table) { case arrow::Type::DATE64: case arrow::Type::TIMESTAMP: case arrow::Type::TIME32: - case arrow::Type::TIME64: continue; + case arrow::Type::TIME64: + continue; case arrow::Type::LIST: { const auto &t_value = std::static_pointer_cast(t->type()); switch (t_value->value_type()->id()) { @@ -110,13 +138,16 @@ Status CheckSupportedTypes(const std::shared_ptr &table) { case arrow::Type::INT64: case arrow::Type::HALF_FLOAT: case arrow::Type::FLOAT: - case arrow::Type::DOUBLE:continue; + case arrow::Type::DOUBLE: + continue; default: - return {Code::NotImplemented, - "unsupported value type for lists " + t_value->value_type()->ToString()};; + return {Code::NotImplemented, "unsupported value type for lists " + + t_value->value_type()->ToString()}; + ; } } - default: return {Code::NotImplemented, "unsupported type " + t->type()->ToString()}; + default: + return {Code::NotImplemented, "unsupported type " + t->type()->ToString()}; } } return Status::OK(); @@ -124,53 +155,81 @@ Status CheckSupportedTypes(const std::shared_ptr &table) { TimeUnit::type ToCylonTimeUnit(arrow::TimeUnit::type a_time_unit) { switch (a_time_unit) { - case arrow::TimeUnit::MICRO: return TimeUnit::MICRO; - case arrow::TimeUnit::SECOND: return TimeUnit::SECOND; - case arrow::TimeUnit::MILLI: return TimeUnit::MILLI; - case arrow::TimeUnit::NANO: return TimeUnit::NANO; + case arrow::TimeUnit::MICRO: + return TimeUnit::MICRO; + case arrow::TimeUnit::SECOND: + return TimeUnit::SECOND; + case arrow::TimeUnit::MILLI: + return TimeUnit::MILLI; + case arrow::TimeUnit::NANO: + return TimeUnit::NANO; } return TimeUnit::MICRO; } arrow::TimeUnit::type ToArrowTimeUnit(TimeUnit::type time_unit) { switch (time_unit) { - case TimeUnit::MICRO: return arrow::TimeUnit::MICRO; - case TimeUnit::SECOND: return arrow::TimeUnit::SECOND; - case TimeUnit::MILLI: return arrow::TimeUnit::MILLI; - case TimeUnit::NANO: return arrow::TimeUnit::NANO; + case TimeUnit::MICRO: + return arrow::TimeUnit::MICRO; + case TimeUnit::SECOND: + return arrow::TimeUnit::SECOND; + case TimeUnit::MILLI: + return arrow::TimeUnit::MILLI; + case TimeUnit::NANO: + return arrow::TimeUnit::NANO; } return arrow::TimeUnit::MICRO; } std::shared_ptr ToCylonType(const std::shared_ptr &a_type) { switch (a_type->id()) { - case arrow::Type::BOOL:return cylon::Bool(); - case arrow::Type::UINT8:return cylon::UInt8(); - case arrow::Type::INT8:return cylon::Int8(); - case arrow::Type::UINT16:return cylon::UInt16(); - case arrow::Type::INT16:return cylon::Int16(); - case arrow::Type::UINT32:return cylon::UInt32(); - case arrow::Type::INT32:return cylon::Int32(); - case arrow::Type::UINT64:return cylon::UInt64(); - case arrow::Type::INT64:return cylon::Int64(); - case arrow::Type::HALF_FLOAT:return cylon::HalfFloat(); - case arrow::Type::FLOAT:return cylon::Float(); - case arrow::Type::DOUBLE:return cylon::Double(); + case arrow::Type::BOOL: + return cylon::Bool(); + case arrow::Type::UINT8: + return cylon::UInt8(); + case arrow::Type::INT8: + return cylon::Int8(); + case arrow::Type::UINT16: + return cylon::UInt16(); + case arrow::Type::INT16: + return cylon::Int16(); + case arrow::Type::UINT32: + return cylon::UInt32(); + case arrow::Type::INT32: + return cylon::Int32(); + case arrow::Type::UINT64: + return cylon::UInt64(); + case arrow::Type::INT64: + return cylon::Int64(); + case arrow::Type::HALF_FLOAT: + return cylon::HalfFloat(); + case arrow::Type::FLOAT: + return cylon::Float(); + case arrow::Type::DOUBLE: + return cylon::Double(); case arrow::Type::FIXED_SIZE_BINARY: - return cylon::FixedSizeBinary(std::static_pointer_cast(a_type) - ->byte_width()); - case arrow::Type::BINARY:return cylon::Binary(); - case arrow::Type::STRING:return cylon::String(); - case arrow::Type::LARGE_STRING: return cylon::LargeString(); - case arrow::Type::LARGE_BINARY: return cylon::LargeBinary(); - case arrow::Type::DATE32:return cylon::Date32(); - case arrow::Type::DATE64:return cylon::Date64(); + return cylon::FixedSizeBinary( + std::static_pointer_cast(a_type)->byte_width()); + case arrow::Type::BINARY: + return cylon::Binary(); + case arrow::Type::STRING: + return cylon::String(); + case arrow::Type::LARGE_STRING: + return cylon::LargeString(); + case arrow::Type::LARGE_BINARY: + return cylon::LargeBinary(); + case arrow::Type::DATE32: + return cylon::Date32(); + case arrow::Type::DATE64: + return cylon::Date64(); case arrow::Type::TIMESTAMP: { const auto &casted = std::static_pointer_cast(a_type); return cylon::Timestamp(ToCylonTimeUnit(casted->unit()), casted->timezone()); } - case arrow::Type::TIME32:return cylon::Time32(); - case arrow::Type::TIME64:return cylon::Time64(); + case arrow::Type::TIME32: + return cylon::Time32(); + case arrow::Type::TIME64: + return cylon::Time64(); case arrow::Type::DECIMAL128: { const auto &casted = std::static_pointer_cast(a_type); return cylon::Decimal(16, casted->precision(), casted->scale()); @@ -179,49 +238,79 @@ std::shared_ptr ToCylonType(const std::shared_ptr &a_ const auto &casted = std::static_pointer_cast(a_type); return cylon::Decimal(32, casted->precision(), casted->scale()); } - case arrow::Type::NA:break; - case arrow::Type::INTERVAL_MONTHS:break; - case arrow::Type::INTERVAL_DAY_TIME:break; - case arrow::Type::LIST:break; - case arrow::Type::STRUCT:break; - case arrow::Type::SPARSE_UNION:break; - case arrow::Type::DENSE_UNION:break; - case arrow::Type::DICTIONARY:break; - case arrow::Type::MAP:break; - case arrow::Type::EXTENSION:break; - case arrow::Type::FIXED_SIZE_LIST:break; - case arrow::Type::DURATION:break; - case arrow::Type::LARGE_LIST:break; - case arrow::Type::MAX_ID:break; + case arrow::Type::NA: + case arrow::Type::INTERVAL_MONTHS: + case arrow::Type::INTERVAL_DAY_TIME: + case arrow::Type::LIST: + case arrow::Type::STRUCT: + case arrow::Type::SPARSE_UNION: + case arrow::Type::DENSE_UNION: + case arrow::Type::DICTIONARY: + case arrow::Type::MAP: + case arrow::Type::EXTENSION: + case arrow::Type::FIXED_SIZE_LIST: + case arrow::Type::DURATION: + case arrow::Type::LARGE_LIST: + case arrow::Type::MAX_ID: + case arrow::Type::INTERVAL_MONTH_DAY_NANO: + case arrow::Type::RUN_END_ENCODED: + case arrow::Type::STRING_VIEW: + case arrow::Type::BINARY_VIEW: + case arrow::Type::LIST_VIEW: + case arrow::Type::LARGE_LIST_VIEW: + break; } return nullptr; } Type::type ToCylonTypeId(const std::shared_ptr &type) { switch (type->id()) { - case arrow::Type::BOOL:return Type::BOOL; - case arrow::Type::UINT8:return Type::UINT8; - case arrow::Type::INT8:return Type::INT8; - case arrow::Type::UINT16:return Type::UINT16; - case arrow::Type::INT16:return Type::INT16; - case arrow::Type::UINT32:return Type::UINT32; - case arrow::Type::INT32:return Type::INT32; - case arrow::Type::UINT64:return Type::UINT64; - case arrow::Type::INT64:return Type::INT64; - case arrow::Type::HALF_FLOAT:return Type::HALF_FLOAT; - case arrow::Type::FLOAT:return Type::FLOAT; - case arrow::Type::DOUBLE:return Type::DOUBLE; - case arrow::Type::STRING:return Type::STRING; - case arrow::Type::BINARY:return Type::BINARY; - case arrow::Type::FIXED_SIZE_BINARY:return Type::FIXED_SIZE_BINARY; - case arrow::Type::DATE32:return Type::DATE32; - case arrow::Type::DATE64:return Type::DATE64; - case arrow::Type::TIMESTAMP:return Type::TIMESTAMP; - case arrow::Type::TIME32:return Type::TIME32; - case arrow::Type::TIME64:return Type::TIME64; - case arrow::Type::LARGE_STRING:return Type::LARGE_STRING; - case arrow::Type::LARGE_BINARY:return Type::LARGE_BINARY; - default:return Type::MAX_ID; + case arrow::Type::BOOL: + return Type::BOOL; + case arrow::Type::UINT8: + return Type::UINT8; + case arrow::Type::INT8: + return Type::INT8; + case arrow::Type::UINT16: + return Type::UINT16; + case arrow::Type::INT16: + return Type::INT16; + case arrow::Type::UINT32: + return Type::UINT32; + case arrow::Type::INT32: + return Type::INT32; + case arrow::Type::UINT64: + return Type::UINT64; + case arrow::Type::INT64: + return Type::INT64; + case arrow::Type::HALF_FLOAT: + return Type::HALF_FLOAT; + case arrow::Type::FLOAT: + return Type::FLOAT; + case arrow::Type::DOUBLE: + return Type::DOUBLE; + case arrow::Type::STRING: + return Type::STRING; + case arrow::Type::BINARY: + return Type::BINARY; + case arrow::Type::FIXED_SIZE_BINARY: + return Type::FIXED_SIZE_BINARY; + case arrow::Type::DATE32: + return Type::DATE32; + case arrow::Type::DATE64: + return Type::DATE64; + case arrow::Type::TIMESTAMP: + return Type::TIMESTAMP; + case arrow::Type::TIME32: + return Type::TIME32; + case arrow::Type::TIME64: + return Type::TIME64; + case arrow::Type::LARGE_STRING: + return Type::LARGE_STRING; + case arrow::Type::LARGE_BINARY: + return Type::LARGE_BINARY; + default: + return Type::MAX_ID; } } diff --git a/cpp/src/cylon/compute/aggregate_utils.hpp b/cpp/src/cylon/compute/aggregate_utils.hpp index db5b30ff2..1c1f9ff71 100644 --- a/cpp/src/cylon/compute/aggregate_utils.hpp +++ b/cpp/src/cylon/compute/aggregate_utils.hpp @@ -99,9 +99,9 @@ cylon::Status AllReduce(const std::shared_ptr &ctx, auto rcv_scalar = std::make_shared(*send_scalar); std::memset(&rcv_scalar->value, 0, sizeof(CType)); - RETURN_CYLON_STATUS_IF_FAILED(cylon::mpi::AllReduce(ctx, send_scalar->mutable_data(), - rcv_scalar->mutable_data(), - 1, data_type, reduce_ops[i])); + RETURN_CYLON_STATUS_IF_FAILED(cylon::mpi::AllReduce( + ctx, send_scalar->data(), const_cast(rcv_scalar->data()), 1, + data_type, reduce_ops[i])); rcv_scalar_vector.push_back(rcv_scalar); } auto rcv_struct_scalar = std::make_shared(rcv_scalar_vector, diff --git a/cpp/src/cylon/compute/aggregates.cpp b/cpp/src/cylon/compute/aggregates.cpp index 94f686572..324e064de 100644 --- a/cpp/src/cylon/compute/aggregates.cpp +++ b/cpp/src/cylon/compute/aggregates.cpp @@ -274,6 +274,13 @@ cylon::Status CreateTableFromScalar(const std::shared_ptr &input, case arrow::Type::DENSE_UNION:break; case arrow::Type::MAX_ID:break; case arrow::Type::DECIMAL256:break; + case arrow::Type::INTERVAL_MONTH_DAY_NANO:break; + case arrow::Type::RUN_END_ENCODED:break; + case arrow::Type::STRING_VIEW:break; + case arrow::Type::BINARY_VIEW:break; + case arrow::Type::LIST_VIEW:break; + case arrow::Type::LARGE_LIST_VIEW:break; + } return cylon::Status(Code::NotImplemented, "Not Supported Type"); } diff --git a/cpp/src/cylon/ctx/arrow_memory_pool_utils.hpp b/cpp/src/cylon/ctx/arrow_memory_pool_utils.hpp index 09d2a9e5f..0f91b401d 100644 --- a/cpp/src/cylon/ctx/arrow_memory_pool_utils.hpp +++ b/cpp/src/cylon/ctx/arrow_memory_pool_utils.hpp @@ -16,55 +16,54 @@ #define CYLON_SRC_CYLON_CTX_ARROW_MEMORY_POOL_UTILS_HPP_ #include + #include namespace cylon { inline arrow::Status ArrowStatus(const cylon::Status &status) { - return arrow::Status(static_cast(status.get_code()), status.get_msg()); + return arrow::Status(static_cast(status.get_code()), + status.get_msg()); } class ProxyMemoryPool : public arrow::MemoryPool { public: - explicit ProxyMemoryPool(cylon::MemoryPool *tx_memory) { - this->tx_memory = tx_memory; - } + explicit ProxyMemoryPool(cylon::MemoryPool *tx_memory) { this->tx_memory = tx_memory; } - ~ProxyMemoryPool() override { - delete tx_memory; - } + ~ProxyMemoryPool() override { delete tx_memory; } - arrow::Status Allocate(int64_t size, uint8_t **out) override { + arrow::Status Allocate(int64_t size, int64_t /* alignment */, uint8_t **out) override { return ArrowStatus(tx_memory->Allocate(size, out)); } - arrow::Status Reallocate(int64_t old_size, int64_t new_size, uint8_t **ptr) override { + arrow::Status Reallocate(int64_t old_size, int64_t new_size, int64_t /* alignment */, + uint8_t **ptr) override { return ArrowStatus(tx_memory->Reallocate(old_size, new_size, ptr)); }; - void Free(uint8_t *buffer, int64_t size) override { + void Free(uint8_t *buffer, int64_t size, int64_t /* alignment */) override { tx_memory->Free(buffer, size); } - int64_t bytes_allocated() const override { - return this->tx_memory->bytes_allocated(); - } + int64_t bytes_allocated() const override { return this->tx_memory->bytes_allocated(); } - int64_t max_memory() const override { - return this->tx_memory->max_memory(); - } + int64_t max_memory() const override { return this->tx_memory->max_memory(); } - std::string backend_name() const override { - return this->tx_memory->backend_name(); + int64_t total_bytes_allocated() const override { + return this->tx_memory->total_bytes_allocated(); } + int64_t num_allocations() const override { return this->tx_memory->num_allocations(); } + + std::string backend_name() const override { return this->tx_memory->backend_name(); } + private: cylon::MemoryPool *tx_memory; }; arrow::MemoryPool *ToArrowPool(const std::shared_ptr &ctx); -arrow::MemoryPool *ToArrowPool(cylon::CylonContext* ctx); -arrow::MemoryPool *ToArrowPool(MemoryPool* pool); -} +arrow::MemoryPool *ToArrowPool(cylon::CylonContext *ctx); +arrow::MemoryPool *ToArrowPool(MemoryPool *pool); +} // namespace cylon -#endif //CYLON_SRC_CYLON_CTX_ARROW_MEMORY_POOL_UTILS_HPP_ +#endif // CYLON_SRC_CYLON_CTX_ARROW_MEMORY_POOL_UTILS_HPP_ diff --git a/cpp/src/cylon/ctx/memory_pool.hpp b/cpp/src/cylon/ctx/memory_pool.hpp index 5f96b723e..699b3e555 100644 --- a/cpp/src/cylon/ctx/memory_pool.hpp +++ b/cpp/src/cylon/ctx/memory_pool.hpp @@ -56,6 +56,12 @@ class MemoryPool { /// returns -1 virtual int64_t max_memory() const = 0; + /// The number of bytes that were allocated. + virtual int64_t total_bytes_allocated() const = 0; + + /// The number of allocations or reallocations that were requested. + virtual int64_t num_allocations() const = 0; + /// The name of the backend used by this MemoryPool (e.g. "system" or "jemalloc"); virtual std::string backend_name() const = 0; diff --git a/cpp/src/cylon/indexing/index.cpp b/cpp/src/cylon/indexing/index.cpp index 4da77f815..300e7f5a1 100644 --- a/cpp/src/cylon/indexing/index.cpp +++ b/cpp/src/cylon/indexing/index.cpp @@ -118,7 +118,7 @@ Status ArrowLinearIndex::LocationByValue(const std::shared_ptr &s auto cast_val = search_param->CastTo(index_array_->type()).ValueOrDie(); for (int64_t ix = 0; ix < index_array_->length(); ix++) { auto val = index_array_->GetScalar(ix).ValueOrDie(); - if (cast_val->Equals(val)) { + if (cast_val->Equals(*val)) { find_index.push_back(ix); } } @@ -128,7 +128,7 @@ Status ArrowLinearIndex::LocationByValue(const std::shared_ptr &s auto cast_val = search_param->CastTo(index_array_->type()).ValueOrDie(); for (int64_t ix = 0; ix < index_array_->length(); ix++) { auto val = index_array_->GetScalar(ix).ValueOrDie(); - if (cast_val->Equals(val)) { + if (cast_val->Equals(*val)) { *find_index = ix; break; } diff --git a/cpp/src/cylon/join/sort_join.cpp b/cpp/src/cylon/join/sort_join.cpp index e5717d8c6..5addbe44d 100644 --- a/cpp/src/cylon/join/sort_join.cpp +++ b/cpp/src/cylon/join/sort_join.cpp @@ -654,27 +654,27 @@ Status SortJoin(const std::shared_ptr &left_tab, joined_table, memory_pool); case arrow::Type::STRING: - return do_single_column_join( + return do_single_column_join( left_tab, right_tab, left_indices[0], right_indices[0], join_config.GetType(), join_config.GetLeftTableSuffix(), join_config.GetRightTableSuffix(), joined_table, memory_pool); case arrow::Type::BINARY: - return do_single_column_join( + return do_single_column_join( left_tab, right_tab, left_indices[0], right_indices[0], join_config.GetType(), join_config.GetLeftTableSuffix(), join_config.GetRightTableSuffix(), joined_table, memory_pool); case arrow::Type::LARGE_STRING: - return do_single_column_join( + return do_single_column_join( left_tab, right_tab, left_indices[0], right_indices[0], join_config.GetType(), join_config.GetLeftTableSuffix(), join_config.GetRightTableSuffix(), joined_table, memory_pool); case arrow::Type::LARGE_BINARY: - return do_single_column_join( + return do_single_column_join( left_tab, right_tab, left_indices[0], right_indices[0], join_config.GetType(), join_config.GetLeftTableSuffix(), join_config.GetRightTableSuffix(), joined_table, memory_pool); case arrow::Type::FIXED_SIZE_BINARY: - return do_single_column_join( + return do_single_column_join( left_tab, right_tab, left_indices[0], right_indices[0], join_config.GetType(), join_config.GetLeftTableSuffix(), join_config.GetRightTableSuffix(), joined_table, memory_pool); diff --git a/cpp/src/cylon/util/flatten_array.cpp b/cpp/src/cylon/util/flatten_array.cpp index 9298d7d93..baad9b469 100644 --- a/cpp/src/cylon/util/flatten_array.cpp +++ b/cpp/src/cylon/util/flatten_array.cpp @@ -166,7 +166,7 @@ struct BinaryColumnFlattenKernelImpl : public ColumnFlattenKernel { } else { int64_t i = 1; // dont update offsets[0] arrow::VisitArraySpanInline(*array_data, - [&](const arrow::util::string_view &val) { + [&](const std::string_view &val) { offsets[i] += static_cast(val.size()); i++; }, diff --git a/cpp/src/examples/indexing_example.cpp b/cpp/src/examples/indexing_example.cpp index 34a59f363..93a55ae7c 100644 --- a/cpp/src/examples/indexing_example.cpp +++ b/cpp/src/examples/indexing_example.cpp @@ -137,7 +137,7 @@ int arrow_take_test(std::shared_ptr &ctx, std::shared_ptr arrow::Int64Builder idx_builder(pool); const arrow::Datum input_table(input1->get_table()); - idx_builder.AppendValues({0, 1, 3}); + std::ignore = idx_builder.AppendValues({0, 1, 3}); arrow_status = idx_builder.Finish(&out_idx); const arrow::Datum filter_indices(out_idx); @@ -175,7 +175,7 @@ int build_int_index_from_values(std::shared_ptr &ctx) { auto pool = cylon::ToArrowPool(ctx); arrow::Int32Builder int_32_builder(pool); - int_32_builder.AppendValues(ix_vals); + std::ignore = int_32_builder.AppendValues(ix_vals); arrow_status = int_32_builder.Finish(&index_values); if (!arrow_status.ok()) { @@ -712,8 +712,8 @@ int arrow_indexer_test_4() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {7, 10}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::shared_ptr index; cylon::IndexingType schema = cylon::IndexingType::Linear; @@ -777,8 +777,8 @@ int arrow_indexer_str_test_4() { arrow::StringBuilder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {"f", "h"}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::shared_ptr index; cylon::IndexingType schema = cylon::IndexingType::Linear; @@ -842,8 +842,8 @@ int arrow_indexer_test_5() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {7, 10}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); int start_column_idx = 0; int end_column_idx = 1; @@ -909,8 +909,8 @@ int arrow_indexer_str_test_5() { arrow::StringBuilder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {"f", "h"}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); int start_column_idx = 0; int end_column_idx = 1; @@ -976,8 +976,8 @@ int arrow_indexer_test_6() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {7, 10}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::vector columns = {0, 1}; std::shared_ptr index; @@ -1042,8 +1042,8 @@ int arrow_indexer_str_test_6() { arrow::StringBuilder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {"f", "h"}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::vector columns = {0, 1}; std::shared_ptr index; @@ -1310,8 +1310,8 @@ int arrow_iloc_indexer_test_4() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {0, 1, 2, 3, 4, 5}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::shared_ptr index; cylon::IndexingType schema = cylon::IndexingType::Range; @@ -1375,8 +1375,8 @@ int arrow_iloc_indexer_test_5() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {0, 1, 2, 3, 4, 5}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); int start_column_idx = 0; int end_column_idx = 1; @@ -1443,8 +1443,8 @@ int arrow_iloc_indexer_test_6() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {0, 1, 2, 3, 4, 5}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::vector columns = {0, 1}; @@ -1484,11 +1484,11 @@ int create_int64_arrow_array(arrow::Int64Builder &builder, int64_t offset, std::shared_ptr &out_array) { - builder.Reserve(capacity); + std::ignore = builder.Reserve(capacity); for (int64_t ix = 0 + offset; ix < capacity + offset; ix++) { - builder.Append(ix); + std::ignore = builder.Append(ix); } - builder.Finish(&out_array); + std::ignore = builder.Finish(&out_array); return 0; } @@ -1532,8 +1532,8 @@ int arrow_filter_example() { arrow::Int64Builder builder(pool); std::vector search_index_values = {7, 10}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::vector columns = {0, 1}; @@ -1592,7 +1592,7 @@ int arrow_filter_example() { arrow::Int64Builder filter_index_builder(pool); std::shared_ptr filter_index_array; - filter_index_builder.Reserve(arr_isin->length()); + std::ignore = filter_index_builder.Reserve(arr_isin->length()); auto bool_scalar_true = arrow::MakeScalar(true); std::shared_ptr @@ -1604,10 +1604,10 @@ int arrow_filter_example() { auto val = arr_isin_bool_array->Value(ix); if (val) { - filter_index_builder.Append(ix); + std::ignore = filter_index_builder.Append(ix); } } - filter_index_builder.Finish(&filter_index_array); + std::ignore = filter_index_builder.Finish(&filter_index_array); std::shared_ptr casted_index_array = std::static_pointer_cast(filter_index_array); print_arrow_array(casted_index_array); @@ -1645,8 +1645,8 @@ int arrow_range_indexer_test() { arrow::Int64Builder builder(pool); std::shared_ptr search_index_array; std::vector search_index_values = {10}; - builder.AppendValues(search_index_values); - builder.Finish(&search_index_array); + std::ignore = builder.AppendValues(search_index_values); + std::ignore = builder.Finish(&search_index_array); std::shared_ptr index; cylon::IndexingType schema = cylon::IndexingType::Range; diff --git a/cpp/test/comparator_test.cpp b/cpp/test/comparator_test.cpp index 2e6658591..642df20f9 100644 --- a/cpp/test/comparator_test.cpp +++ b/cpp/test/comparator_test.cpp @@ -35,13 +35,13 @@ struct Helper::value>> { }; template<> -struct Helper { - static arrow::util::string_view max() { return "ZZZZ"; } - static arrow::util::string_view min() { return ""; } +struct Helper { + static std::string_view max() { return "ZZZZ"; } + static std::string_view min() { return ""; } static int compare(bool asc, - const arrow::util::string_view &v1, - const arrow::util::string_view &v2) { + const std::string_view &v1, + const std::string_view &v2) { return asc ? v1.compare(v2) : v2.compare(v1); } }; @@ -229,7 +229,7 @@ TEST_CASE("test table", "[comp]") { return std::all_of(cols.begin(), cols.end(), [&](auto c) { auto v1 = *c->chunk(0)->GetScalar(i); auto v2 = *c->chunk(0)->GetScalar(j); - return v1->Equals(v2); + return v1->Equals(*v2); }); }; @@ -249,7 +249,7 @@ TEST_CASE("test table", "[comp]") { for (const auto &c: table->columns()) { auto v1 = *c->chunk(0)->GetScalar(i); auto v2 = *c->chunk(0)->GetScalar(j); - if (v1->Equals(v2)) { + if (v1->Equals(*v2)) { continue; } diff --git a/cpp/test/sorting_test.cpp b/cpp/test/sorting_test.cpp index 347f2d7bf..d2c479411 100644 --- a/cpp/test/sorting_test.cpp +++ b/cpp/test/sorting_test.cpp @@ -29,12 +29,12 @@ namespace test { Status create_table(std::shared_ptr &table) { arrow::Int64Builder b0; for (const auto &x: {0, 1, 4, 3, 2, 3}) { - b0.Append(x); + std::ignore = b0.Append(x); } arrow::StringBuilder b1; for (const auto &x: {"d", "a", "b", "e", "c", "b"}) { - b1.Append(x); + std::ignore = b1.Append(x); } const std::shared_ptr @@ -42,7 +42,7 @@ Status create_table(std::shared_ptr &table) { auto atable = arrow::Table::Make(schema, {b0.Finish().ValueOrDie(), b1.Finish().ValueOrDie()}); - Table::FromArrowTable(ctx, atable, table); + std::ignore = Table::FromArrowTable(ctx, atable, table); return Status::OK(); } diff --git a/cpp/test/test_arrow_utils.hpp b/cpp/test/test_arrow_utils.hpp index 8b2c21944..508ec7fa3 100644 --- a/cpp/test/test_arrow_utils.hpp +++ b/cpp/test/test_arrow_utils.hpp @@ -58,15 +58,15 @@ using ArrowBinaryTypes = std::tuple ArrayFromJSON(const std::shared_ptr &type, - arrow::util::string_view json) { + std::string_view json) { const auto &res = arrow::ipc::internal::json::ArrayFromJSON(type, json); ARROW_ABORT_NOT_OK(res.status()); return res.ValueOrDie(); } std::shared_ptr DictArrayFromJSON(const std::shared_ptr &type, - arrow::util::string_view indices_json, - arrow::util::string_view dictionary_json) { + std::string_view indices_json, + std::string_view dictionary_json) { std::shared_ptr out; ARROW_ABORT_NOT_OK(arrow::ipc::internal::json::DictArrayFromJSON(type, indices_json, dictionary_json, &out)); return out; @@ -105,7 +105,7 @@ std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr< ])"); */ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr &schema, - arrow::util::string_view json) { + std::string_view json) { // Parse as a StructArray auto struct_type = struct_(schema->fields()); std::shared_ptr struct_array = ArrayFromJSON(struct_type, json); From f083ad37c73b1e0704455affda5f3609597ec2e3 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 11 Sep 2024 21:52:51 -0400 Subject: [PATCH 02/11] fixing site packages --- cpp/CMakeLists.txt | 14 +++++++++++++- cpp/src/cylon/util/to_string.hpp | 8 +++++++- 2 files changed, 20 insertions(+), 2 deletions(-) mode change 100755 => 100644 cpp/CMakeLists.txt diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt old mode 100755 new mode 100644 index 1cb8fbea3..d0052b36a --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -399,7 +399,19 @@ if (${ARROW_BUILD_TYPE} STREQUAL "SYSTEM") message(STATUS "Arrow dataset lib: ${ARROW_DATASET_LIB}") if (PYCYLON_BUILD) - find_library(ARROW_PY_LIB arrow_python ${CYLON_ARROW_VERSION} REQUIRED) + find_package (Python3 COMPONENTS Interpreter) + write_file("find_site_packages.py" [=[import sysconfig; print(sysconfig.get_paths()["purelib"])]=]) + execute_process(COMMAND ${Python3_EXECUTABLE} "find_site_packages.py" + RESULT_VARIABLE PROC_RES + OUTPUT_VARIABLE PROC_OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE) + if (PROC_RES) + message(FATAL_ERROR "Unable to find python site packages dir: ${PROC_OUT}") + else() + message(STATUS "python site packages dir: ${PROC_OUT}") + endif() + find_library(ARROW_PY_LIB arrow_python PATHS "${PROC_OUT}" PATH_SUFFIXES "pyarrow" REQUIRED NO_DEFAULT_PATH) message(STATUS "Arrow py lib: ${ARROW_PY_LIB}") endif (PYCYLON_BUILD) diff --git a/cpp/src/cylon/util/to_string.hpp b/cpp/src/cylon/util/to_string.hpp index 75d306d6d..6f983de6c 100644 --- a/cpp/src/cylon/util/to_string.hpp +++ b/cpp/src/cylon/util/to_string.hpp @@ -53,7 +53,7 @@ std::string array_to_string(const std::shared_ptr &array, int inde case arrow::Type::TIMESTAMP:return do_to_string_numeric(array, index); case arrow::Type::TIME32:return do_to_string_numeric(array, index); case arrow::Type::TIME64:return do_to_string_numeric(array, index); - case arrow::Type::DECIMAL:break; + case arrow::Type::DECIMAL: case arrow::Type::LIST:break; case arrow::Type::STRUCT:break; case arrow::Type::DICTIONARY:break; @@ -70,6 +70,12 @@ std::string array_to_string(const std::shared_ptr &array, int inde case arrow::Type::DENSE_UNION:break; case arrow::Type::MAX_ID:break; case arrow::Type::DECIMAL256:break; + case arrow::Type::INTERVAL_MONTH_DAY_NANO:break; + case arrow::Type::RUN_END_ENCODED:break; + case arrow::Type::STRING_VIEW:break; + case arrow::Type::BINARY_VIEW:break; + case arrow::Type::LIST_VIEW:break; + case arrow::Type::LARGE_LIST_VIEW:break; } return "NA"; } From 3d3e28bc1dc9fb8e672553b3dca2a7cd2b97db52 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 11 Sep 2024 22:11:09 -0400 Subject: [PATCH 03/11] fixing pycylon --- python/pycylon/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pycylon/setup.py b/python/pycylon/setup.py index 3c1fb3c79..7c9d37d7a 100644 --- a/python/pycylon/setup.py +++ b/python/pycylon/setup.py @@ -75,7 +75,7 @@ extra_compile_args = [] extra_link_args = [] -std_version = '-std=c++14' +std_version = '-std=c++17' extra_compile_args.extend([std_version, '-DARROW_METADATA_V4 -DNEED_EXCLUSIVE_SCAN']) extra_compile_args.append('-DOMPI_SKIP_MPICXX=1') From 4810d0921b655e72b04d3bea4fc826e1eb0f3648 Mon Sep 17 00:00:00 2001 From: mstaylor Date: Thu, 12 Dec 2024 15:01:08 -0500 Subject: [PATCH 04/11] Arrow 16 Updates - fixes actions --- .github/workflows/conda-actions.yml | 2 ++ .github/workflows/conda-cpp-redis.yml | 2 ++ .github/workflows/conda-cpp.yml | 2 ++ .github/workflows/macos.yml | 2 ++ .github/workflows/windows.yml | 2 ++ conda/environments/cylon_NoUCX.yml | 18 ++++++++++-------- cpp/CMake/Modules/SetupCxxFlags.cmake | 6 +++--- 7 files changed, 23 insertions(+), 11 deletions(-) diff --git a/.github/workflows/conda-actions.yml b/.github/workflows/conda-actions.yml index 00602edad..3c4de50e1 100644 --- a/.github/workflows/conda-actions.yml +++ b/.github/workflows/conda-actions.yml @@ -51,6 +51,8 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: + miniconda-version: "latest" # You can specify a version or leave it as "latest" + auto-update-conda: true activate-environment: gcylon_dev environment-file: conda/environments/gcylon.yml diff --git a/.github/workflows/conda-cpp-redis.yml b/.github/workflows/conda-cpp-redis.yml index 6e7f51b61..9b7c70857 100644 --- a/.github/workflows/conda-cpp-redis.yml +++ b/.github/workflows/conda-cpp-redis.yml @@ -38,6 +38,8 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: + miniconda-version: "latest" # You can specify a version or leave it as "latest" + auto-update-conda: true activate-environment: cylon_dev environment-file: conda/environments/cylon.yml diff --git a/.github/workflows/conda-cpp.yml b/.github/workflows/conda-cpp.yml index 00111c5aa..b9bc7e1d4 100644 --- a/.github/workflows/conda-cpp.yml +++ b/.github/workflows/conda-cpp.yml @@ -39,6 +39,8 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: + miniconda-version: "latest" # You can specify a version or leave it as "latest" + auto-update-conda: true activate-environment: cylon_dev environment-file: conda/environments/cylon_NoUCX.yml diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 15cbb3d38..fb08cda68 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -31,6 +31,8 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: + miniconda-version: "latest" # You can specify a version or leave it as "latest" + auto-update-conda: true activate-environment: cylon_dev environment-file: conda/environments/cylon_MacOS.yml diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 581f8d004..f723713a7 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -27,6 +27,8 @@ jobs: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 with: + miniconda-version: "latest" # You can specify a version or leave it as "latest" + auto-update-conda: true activate-environment: cylon_dev environment-file: conda/environments/windows.yml diff --git a/conda/environments/cylon_NoUCX.yml b/conda/environments/cylon_NoUCX.yml index 39d6f9ad7..b94c10648 100644 --- a/conda/environments/cylon_NoUCX.yml +++ b/conda/environments/cylon_NoUCX.yml @@ -3,15 +3,17 @@ channels: - conda-forge - defaults dependencies: - - python>=3.8,<3.10 - - cmake>=3.23.1,!=3.25.0 - - arrow-cpp=9 - - pyarrow=9.0.0 + - python>=3.9,<3.12 + - cmake + - pyarrow=16.1.0 + - libarrow-acero==16.1.0.* + - libarrow-dataset==16.1.0.* + - libarrow==16.1.0.* - glog - - openmpi=4.1.3=ha1ae619_105 - - cython>=0.29.31,<3 - - numpy<1.24.4 - - pandas>=1.0,<2.0.0 + - openmpi + - cython>=0.29.31 + - numpy>=1.23,<2.0a0 + - pandas>=2.0,<2.2.3dev - fsspec>=0.6.0 - setuptools # they are not needed for using pygcylon or compiling it diff --git a/cpp/CMake/Modules/SetupCxxFlags.cmake b/cpp/CMake/Modules/SetupCxxFlags.cmake index abad875c3..9b85f498b 100644 --- a/cpp/CMake/Modules/SetupCxxFlags.cmake +++ b/cpp/CMake/Modules/SetupCxxFlags.cmake @@ -117,9 +117,9 @@ if(CYLON_CPU_FLAG STREQUAL "armv8") if(NOT CXX_SUPPORTS_ARMV8_ARCH) message(FATAL_ERROR "Unsupported arch flag: ${CYLON_ARMV8_ARCH_FLAG}.") endif() - if(CYLON_ARMV8_ARCH_FLAG MATCHES "native") - message(FATAL_ERROR "native arch not allowed, please specify arch explicitly.") - endif() +# if(CYLON_ARMV8_ARCH_FLAG MATCHES "native") +# message(FATAL_ERROR "native arch not allowed, please specify arch explicitly.") + #endif() set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${CYLON_ARMV8_ARCH_FLAG}") add_definitions(-DCYLON_HAVE_NEON) From e9f80b015b6da74f8903413e84b207795a0fa0a8 Mon Sep 17 00:00:00 2001 From: bud Date: Thu, 21 Aug 2025 16:30:53 -0400 Subject: [PATCH 05/11] removes windows and macos from workflows --- .github/workflows/macos.yml | 46 ----------------------------------- .github/workflows/windows.yml | 40 ------------------------------ 2 files changed, 86 deletions(-) delete mode 100644 .github/workflows/macos.yml delete mode 100644 .github/workflows/windows.yml diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml deleted file mode 100644 index fb08cda68..000000000 --- a/.github/workflows/macos.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: MacOS C++/Python - Clang,OpenMPI - -on: - push: - branches: - - main - - 0.** - pull_request: - branches: - - main - - 0.** - -jobs: - build: - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash -l {0} - strategy: - fail-fast: false - matrix: - include: - - os: macos-latest - - steps: - - uses: actions/checkout@v2 - - name: Remove link for preventing an error - run: rm -f /usr/local/bin/2to3 - - name: Install dependencies - run: brew install re2 automake boost brotli c-ares ccache flatbuffers grpc llvm lz4 minio ninja openssl@1.1 protobuf rapidjson snappy thrift wget zstd - - - uses: conda-incubator/setup-miniconda@v2 - with: - miniconda-version: "latest" # You can specify a version or leave it as "latest" - auto-update-conda: true - activate-environment: cylon_dev - environment-file: conda/environments/cylon_MacOS.yml - - - name: activate conda - run: conda activate cylon_dev - - - name: Build cylon, pycylon and run cpp test - run: python build.py --cpp --python --test - - - name: Run pytest - run: python build.py --pytest diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml deleted file mode 100644 index f723713a7..000000000 --- a/.github/workflows/windows.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Windows C++/Python - MSVC, MSMPI - -on: - push: - branches: - - main - - 0.** - pull_request: - branches: - - main - - 0.** - -jobs: - build: - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash -l {0} - strategy: - fail-fast: false - # explicit include-based build matrix, of known valid options - matrix: - include: - - os: windows-latest - - steps: - - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 - with: - miniconda-version: "latest" # You can specify a version or leave it as "latest" - auto-update-conda: true - activate-environment: cylon_dev - environment-file: conda/environments/windows.yml - - - name: Activate conda - run: conda activate cylon_dev - - name: Build cylon, pycylon and run c++ tests - run: python build.py --cpp --python --test - - name: Run python tests - run: python build.py --pytest From 6818a4352cdbe5a0009af72e5d63d28dfbea9311 Mon Sep 17 00:00:00 2001 From: bud Date: Thu, 21 Aug 2025 16:56:58 -0400 Subject: [PATCH 06/11] removes windows and macos from workflows --- .github/workflows/c-cpp.yml | 2 +- .github/workflows/conda-actions.yml | 2 +- .github/workflows/conda-cpp-redis.yml | 2 +- .github/workflows/conda-cpp.yml | 2 +- conda/environments/cylon_MacOS.yml | 20 -------------------- conda/environments/windows.yml | 20 -------------------- 6 files changed, 4 insertions(+), 44 deletions(-) delete mode 100644 conda/environments/cylon_MacOS.yml delete mode 100644 conda/environments/windows.yml diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index c8249cd42..7bd512634 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -13,7 +13,7 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/conda-actions.yml b/.github/workflows/conda-actions.yml index 3c4de50e1..279bb4df7 100644 --- a/.github/workflows/conda-actions.yml +++ b/.github/workflows/conda-actions.yml @@ -22,7 +22,7 @@ jobs: matrix: include: # 20.04 supports CUDA 11.0+ - - os: ubuntu-20.04 + - os: ubuntu-22.04 cuda: "11.5.2" gcc: 9 diff --git a/.github/workflows/conda-cpp-redis.yml b/.github/workflows/conda-cpp-redis.yml index 9b7c70857..b046bbaf7 100644 --- a/.github/workflows/conda-cpp-redis.yml +++ b/.github/workflows/conda-cpp-redis.yml @@ -22,7 +22,7 @@ jobs: matrix: include: # 20.04 supports CUDA 11.0+ - - os: ubuntu-20.04 + - os: ubuntu-22.04 gcc: 9 ucc: "master" diff --git a/.github/workflows/conda-cpp.yml b/.github/workflows/conda-cpp.yml index b9bc7e1d4..844eb1e79 100644 --- a/.github/workflows/conda-cpp.yml +++ b/.github/workflows/conda-cpp.yml @@ -22,7 +22,7 @@ jobs: matrix: include: # 20.04 supports CUDA 11.0+ - - os: ubuntu-20.04 + - os: ubuntu-22.04 gcc: 9 ucc: "master" ucx: "override-remote-address3" diff --git a/conda/environments/cylon_MacOS.yml b/conda/environments/cylon_MacOS.yml deleted file mode 100644 index e41c71556..000000000 --- a/conda/environments/cylon_MacOS.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: cylon_dev -channels: - - conda-forge - - defaults -dependencies: - - python>=3.9,<3.10 - - cmake>=3.23.1,!=3.25.0 - - arrow-cpp=9 - - pyarrow=9.0.0 - - glog - - openmpi>=4.1.2 - - cython>=0.29.31,<3 - - numpy<1.24.4 - - pandas>=1.0,<2.0.0 - - fsspec>=0.6.0 - - setuptools - # they are not needed for using pygcylon or compiling it - - pytest - - pytest-mpi - - mpi4py diff --git a/conda/environments/windows.yml b/conda/environments/windows.yml deleted file mode 100644 index 4c6411bcf..000000000 --- a/conda/environments/windows.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: cylon_dev -channels: - - conda-forge - - defaults -dependencies: - - python>=3.8,<3.10 - - cmake>=3.23.1,!=3.25.0 - - arrow-cpp=9 - - pyarrow=9.0.0 - - glog - - msmpi - - cython>=0.29.31,<3 - - numpy<1.24.4 - - pandas>=1.0,<2.0.0 - - fsspec>=0.6.0 - - setuptools - # they are not needed for using pygcylon or compiling it - - pytest - - pytest-mpi - - mpi4py From 9d6dee4635d348cdfadb8b7f3b733eab713f1ec3 Mon Sep 17 00:00:00 2001 From: bud Date: Thu, 21 Aug 2025 17:12:21 -0400 Subject: [PATCH 07/11] removes windows and macos from workflows --- .github/workflows/conda-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-cpp.yml b/.github/workflows/conda-cpp.yml index 844eb1e79..a673522f6 100644 --- a/.github/workflows/conda-cpp.yml +++ b/.github/workflows/conda-cpp.yml @@ -33,7 +33,7 @@ jobs: # Specify the correct host compilers - name: Install/Select gcc and g++ run: | - sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git + sudo apt-get install -y wget bzip2 ca-certificates curl git build-essential echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV From 92c413e7d0bc27e31c825fb8314081172ddc711d Mon Sep 17 00:00:00 2001 From: bud Date: Thu, 21 Aug 2025 17:22:03 -0400 Subject: [PATCH 08/11] removes windows and macos from workflows --- .github/workflows/conda-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-cpp.yml b/.github/workflows/conda-cpp.yml index a673522f6..4c027022e 100644 --- a/.github/workflows/conda-cpp.yml +++ b/.github/workflows/conda-cpp.yml @@ -51,7 +51,7 @@ jobs: run: | git clone --single-branch https://github.com/nirandaperera/gloo.git $HOME/gloo mkdir -p $HOME/gloo/build && cd $HOME/gloo/build - cmake .. -DBUILD_SHARED_LIBS=1 -DUSE_MPI=1 -DCMAKE_INSTALL_PREFIX=$HOME/gloo/install + cmake .. -DBUILD_SHARED_LIBS=1 -DUSE_MPI=1 -DCMAKE_INSTALL_PREFIX=$HOME/gloo/install -DCMAKE_POLICY_VERSION_MINIMUM=3.5 make install - name: Install UCX From 61225b909012f92b7bb1928511603033abbb84be Mon Sep 17 00:00:00 2001 From: bud Date: Thu, 21 Aug 2025 17:50:49 -0400 Subject: [PATCH 09/11] removes windows and macos from workflows --- .github/workflows/conda-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-cpp.yml b/.github/workflows/conda-cpp.yml index 4c027022e..b9054ce7e 100644 --- a/.github/workflows/conda-cpp.yml +++ b/.github/workflows/conda-cpp.yml @@ -24,7 +24,7 @@ jobs: # 20.04 supports CUDA 11.0+ - os: ubuntu-22.04 gcc: 9 - ucc: "master" + ucc: "ucc-v1.2.0" ucx: "override-remote-address3" steps: From c71b23e71339126df7d1dde61f6459cd39e54cbb Mon Sep 17 00:00:00 2001 From: bud Date: Thu, 21 Aug 2025 18:42:46 -0400 Subject: [PATCH 10/11] removes windows and macos from workflows --- .github/workflows/conda-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-cpp.yml b/.github/workflows/conda-cpp.yml index b9054ce7e..c7606c724 100644 --- a/.github/workflows/conda-cpp.yml +++ b/.github/workflows/conda-cpp.yml @@ -64,7 +64,7 @@ jobs: - name: Install UCC run: | - git clone --single-branch -b ${{ matrix.ucc }} https://github.com/openucx/ucc.git $HOME/ucc + git clone --single-branch -b ${{ matrix.ucc }} https://github.com/mstaylor/ucc.git $HOME/ucc cd $HOME/ucc ./autogen.sh ./configure --prefix=$HOME/ucc/install --with-ucx=$HOME/ucx/install From 7711bbd63cca81947570a1801867e49210f5fd6d Mon Sep 17 00:00:00 2001 From: APKAI5AM767AKRTSIELQ Date: Sun, 24 Aug 2025 14:23:31 -0400 Subject: [PATCH 11/11] updates env variable style --- python/pycylon/pycylon/ctx/context.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pycylon/pycylon/ctx/context.pyx b/python/pycylon/pycylon/ctx/context.pyx index 8ccdd6e2d..6cc7b2d8f 100644 --- a/python/pycylon/pycylon/ctx/context.pyx +++ b/python/pycylon/pycylon/ctx/context.pyx @@ -80,7 +80,7 @@ cdef class CylonContext: if config is None: raise ValueError("No config passed for a distributed context") - status = CCylonContext.InitDistributed(self.init_dist(config), &self.ctx_shd_ptr) + cdef CStatus status = CCylonContext.InitDistributed(self.init_dist(config), &self.ctx_shd_ptr) if not status.is_ok(): raise Exception(f"Ctx initialization failed: {status.get_msg().decode()}")