Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,11 @@ def _validate_collection_item_types(
"""
if sample is None:
return
if all(type(item) in valid_types for item in sample):
# Arrow/Athena deserialises Array columns as numpy.ndarray with object dtype;
# coerce to a plain list so element-level checks work uniformly.
items = sample.tolist() if isinstance(sample, np.ndarray) else sample
# None elements are valid in nullable Arrow columns — skip them when checking types.
if all(type(item) in valid_types for item in items if item is not None):
return

# to_numpy() upcasts INT32/INT64 with NULL to Float64 automatically
Expand All @@ -749,11 +753,13 @@ def _validate_collection_item_types(
ValueType.INT32_SET,
ValueType.INT64_SET,
]
for item in sample:
for item in items:
if item is None:
continue
if type(item) not in valid_types:
if feast_value_type in int_collection_types:
# Check if the float values are due to NULL upcast
if not any(np.isnan(i) for i in sample if isinstance(i, float)):
if not any(np.isnan(i) for i in items if isinstance(i, float)):
logger.error(
f"{feast_value_type.name} has NULL values. to_numpy() upcasts to Float64 automatically."
)
Expand Down Expand Up @@ -945,9 +951,24 @@ def _convert_list_values_to_proto(
for value in values
]

# Generic list conversion
# Generic list conversion.
# Arrow/Athena may return each row as a numpy.ndarray (object dtype) rather
# than a plain Python list. Protobuf rejects ndarray directly, so coerce to
# list and strip None elements (which protobuf fixed-type lists cannot hold).
return [
ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore[arg-type]
ProtoValue(
**{
field_name: proto_type( # type: ignore[arg-type]
val=[
v
for v in (
value.tolist() if isinstance(value, np.ndarray) else value
)
if v is not None
]
)
}
)
if value is not None
else ProtoValue()
for value in values
Expand Down
45 changes: 45 additions & 0 deletions sdk/python/tests/unit/test_type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,51 @@ def test_python_values_to_proto_values_int_list_with_null_not_supported():
_ = python_values_to_proto_values(arr, ValueType.INT32_LIST)


class TestAthenaArrayStringConversion:
"""Regression tests for Array(String) materialisation via Athena offline store.

Arrow/Athena deserialises Array(String) columns as numpy.ndarray with object
dtype rather than plain Python lists. Two bugs were present:
- _validate_collection_item_types raised TypeError on None elements inside ndarrays.
- The generic list conversion path passed the ndarray directly to protobuf, which
rejects non-list inputs with TypeError.
"""

def test_string_list_from_ndarray(self):
"""Plain ndarray of strings converts without error."""
values = [np.array(["a", "b", "c"], dtype=object)]
protos = python_values_to_proto_values(values, ValueType.STRING_LIST)
result = feast_value_type_to_python_type(protos[0])
assert result == ["a", "b", "c"]

def test_string_list_from_ndarray_with_none_elements(self):
"""ndarray containing None elements (nullable Arrow column) converts without TypeError."""
values = [np.array(["a", None, "c"], dtype=object)]
protos = python_values_to_proto_values(values, ValueType.STRING_LIST)
result = feast_value_type_to_python_type(protos[0])
# None elements are stripped (protobuf StringList cannot hold nulls)
assert result == ["a", "c"]

def test_string_list_from_empty_ndarray(self):
"""Empty ndarray (entity row with no array values) converts to empty list."""
values = [np.array([], dtype=object)]
protos = python_values_to_proto_values(values, ValueType.STRING_LIST)
result = feast_value_type_to_python_type(protos[0])
assert result == []

def test_string_list_mixed_null_and_ndarray_rows(self):
"""Mix of None rows (null feature) and ndarray rows converts correctly."""
values = [
np.array(["x", "y"], dtype=object),
None,
np.array(["z"], dtype=object),
]
protos = python_values_to_proto_values(values, ValueType.STRING_LIST)
assert feast_value_type_to_python_type(protos[0]) == ["x", "y"]
assert feast_value_type_to_python_type(protos[1]) is None
assert feast_value_type_to_python_type(protos[2]) == ["z"]


class TestMapTypes:
"""Test cases for MAP and MAP_LIST value types."""

Expand Down
Loading