diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 7960a3a3620..932b8801f74 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -863,6 +863,40 @@ def _convert_list_values_to_proto( ] raise _type_err(sample, valid_types[0]) + # Arrow/Athena may deserialize array columns as numpy.ndarray with + # object dtype instead of plain Python lists. Normalise every value + # to a Python list so that protobuf constructors accept them, and + # replace None elements with a type-appropriate default (protobuf + # repeated fields do not accept None). + _LIST_NONE_DEFAULTS: Dict[ValueType, Any] = { + ValueType.STRING_LIST: "", + ValueType.BYTES_LIST: b"", + ValueType.INT32_LIST: 0, + ValueType.INT64_LIST: 0, + ValueType.FLOAT_LIST: 0.0, + ValueType.DOUBLE_LIST: 0.0, + ValueType.BOOL_LIST: False, + ValueType.UNIX_TIMESTAMP_LIST: NULL_TIMESTAMP_INT_VALUE, + ValueType.UUID_LIST: "", + ValueType.TIME_UUID_LIST: "", + ValueType.DECIMAL_LIST: "", + } + none_default = _LIST_NONE_DEFAULTS.get(feast_value_type) + + def _sanitize(value: Any) -> Any: + """Convert ndarray to list and replace None elements.""" + if isinstance(value, np.ndarray): + value = value.tolist() + if isinstance(value, list) and len(value) == 0: + return None + if none_default is not None and isinstance(value, list): + value = [none_default if v is None else v for v in value] + return value + + values = [_sanitize(v) if v is not None else v for v in values] + if sample is not None: + sample = _sanitize(sample) + # Validate item types using shared helper _validate_collection_item_types(sample, valid_types, feast_value_type) @@ -875,7 +909,6 @@ def _convert_list_values_to_proto( return _convert_bool_collection_to_proto(values, field_name, proto_type) if feast_value_type in (ValueType.UUID_LIST, ValueType.TIME_UUID_LIST): - # uuid.UUID objects must be converted to str for StringList proto. return [ ( ProtoValue( @@ -888,7 +921,6 @@ def _convert_list_values_to_proto( ] if feast_value_type == ValueType.DECIMAL_LIST: - # decimal.Decimal objects must be converted to str for StringList proto. return [ ( ProtoValue( diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 4f87aa46f19..b1395262044 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1953,3 +1953,68 @@ def test_non_empty_array_treated_as_null_unix_timestamp(self): "non-empty array in UNIX_TIMESTAMP scalar column should produce null" ) assert result[1].unix_timestamp_val == int(ts.timestamp()) + + +class TestNdarrayListConversion: + """Regression tests for https://github.com/feast-dev/feast/issues/6325 + Arrow/Athena deserializes Array(String) columns as numpy.ndarray with + object dtype instead of plain Python lists. Ensure these are converted + to proto without raising ValueError or TypeError. + """ + + def test_ndarray_string_list_roundtrip(self): + """ndarray of strings converts to STRING_LIST proto and back.""" + values = [np.array(["tag1", "tag2"], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == ["tag1", "tag2"] + + def test_ndarray_string_list_with_none_elements(self): + """None elements inside an ndarray are replaced with empty string.""" + values = [np.array(["tag1", None, "tag3"], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == ["tag1", "", "tag3"] + + def test_ndarray_empty_string_list(self): + """An empty ndarray in a list column produces an empty ProtoValue (null).""" + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + values = [np.array([], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + assert protos[0] == ProtoValue() + + def test_ndarray_string_list_mixed_batch(self): + """Batch with populated ndarray, None, and empty ndarray.""" + from feast.protos.feast.types.Value_pb2 import Value as ProtoValue + + values = [ + np.array(["a", "b"], dtype=object), + None, + np.array([], dtype=object), + ] + protos = python_values_to_proto_values(values, ValueType.STRING_LIST) + assert feast_value_type_to_python_type(protos[0]) == ["a", "b"] + assert protos[1] == ProtoValue() + assert protos[2] == ProtoValue() + + def test_ndarray_int64_list_roundtrip(self): + """ndarray of ints converts to INT64_LIST proto and back.""" + values = [np.array([1, 2, 3], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.INT64_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [1, 2, 3] + + def test_ndarray_double_list_with_none_elements(self): + """None elements in a DOUBLE_LIST ndarray are replaced with 0.0.""" + values = [np.array([1.5, None, 3.5], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.DOUBLE_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [1.5, 0.0, 3.5] + + def test_ndarray_bool_list_roundtrip(self): + """ndarray of bools converts to BOOL_LIST proto and back.""" + values = [np.array([True, False, True], dtype=object)] + protos = python_values_to_proto_values(values, ValueType.BOOL_LIST) + converted = feast_value_type_to_python_type(protos[0]) + assert converted == [True, False, True]