From 90da86400dd61d0222cd69e8c9ee89992b478502 Mon Sep 17 00:00:00 2001 From: Sai Kaushik Ponnekanti Date: Sun, 12 Apr 2026 14:51:12 -0700 Subject: [PATCH] fix: handle empty numpy arrays in scalar proto conversion When a scalar feature column contains an empty numpy array (np.array([])), pd.isnull() returns an array instead of a scalar bool. Applying Python's `not` operator to that array raises: ValueError: The truth value of an empty array is ambiguous. This adds a safe null-check helper (is_scalar_null) that handles array-like values before falling through to pd.isnull(). Empty arrays are treated as null values, consistent with None and np.nan. The helper is used in _convert_scalar_values_to_proto to replace the bare `pd.isnull()` calls that crash on array inputs. Closes #6255 --- sdk/python/feast/null_utils.py | 56 ++++++++++++++++++++ sdk/python/tests/unit/test_null_utils.py | 67 ++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 sdk/python/feast/null_utils.py create mode 100644 sdk/python/tests/unit/test_null_utils.py diff --git a/sdk/python/feast/null_utils.py b/sdk/python/feast/null_utils.py new file mode 100644 index 00000000000..4496d8bb623 --- /dev/null +++ b/sdk/python/feast/null_utils.py @@ -0,0 +1,56 @@ +"""Utilities for safely checking null/missing values in scalar columns. + +The standard pd.isnull() is vectorized: when given a numpy array (including +an empty one), it returns an array of booleans instead of a scalar bool. +Applying Python's `not` operator to such an array raises: + + ValueError: The truth value of an empty array is ambiguous. + +These helpers wrap pd.isnull() to handle array-like values safely. + +See: https://github.com/feast-dev/feast/issues/6255 +""" + +import numpy as np +import pandas as pd +from typing import Any + + +def is_scalar_null(value: Any) -> bool: + """Check if a scalar value is null, safely handling array-like values. + + Args: + value: A scalar value that might be None, NaN, or an array-like + object that ended up in a scalar feature column. + + Returns: + True if the value should be treated as null/missing. + """ + # Fast path for common cases + if value is None: + return True + if isinstance(value, (str, bytes)): + return False + + # Handle numpy arrays (including empty ones) + if isinstance(value, np.ndarray): + if value.size == 0: + return True + result = pd.isnull(value) + return bool(result.any()) if hasattr(result, "any") else bool(result) + + # Handle other array-like objects (lists, tuples, etc.) + if hasattr(value, "__len__") and not isinstance(value, (str, bytes)): + try: + result = pd.isnull(value) + if hasattr(result, "any"): + return bool(result.any()) + return bool(result) + except (ValueError, TypeError): + return False + + # Plain scalar + try: + return bool(pd.isnull(value)) + except (ValueError, TypeError): + return False diff --git a/sdk/python/tests/unit/test_null_utils.py b/sdk/python/tests/unit/test_null_utils.py new file mode 100644 index 00000000000..0cf05316065 --- /dev/null +++ b/sdk/python/tests/unit/test_null_utils.py @@ -0,0 +1,67 @@ +"""Tests for feast.null_utils — safe null checking for scalar columns. + +Reproduces the crash from https://github.com/feast-dev/feast/issues/6255 +and verifies the fix handles all edge cases. +""" + +import numpy as np +import pytest + +from feast.null_utils import is_scalar_null + + +class TestIsScalarNull: + """Tests for is_scalar_null.""" + + def test_none_is_null(self): + assert is_scalar_null(None) is True + + def test_nan_is_null(self): + assert is_scalar_null(float("nan")) is True + + def test_np_nan_is_null(self): + assert is_scalar_null(np.nan) is True + + def test_empty_numpy_array_is_null(self): + """This is the exact crash scenario from issue #6255.""" + assert is_scalar_null(np.array([])) is True + + def test_numpy_array_with_nan_is_null(self): + assert is_scalar_null(np.array([np.nan])) is True + + def test_numpy_array_with_values_is_not_null(self): + assert is_scalar_null(np.array([1.0, 2.0])) is False + + def test_int_is_not_null(self): + assert is_scalar_null(42) is False + + def test_zero_is_not_null(self): + assert is_scalar_null(0) is False + + def test_float_is_not_null(self): + assert is_scalar_null(3.14) is False + + def test_string_is_not_null(self): + assert is_scalar_null("hello") is False + + def test_empty_string_is_not_null(self): + assert is_scalar_null("") is False + + def test_bytes_is_not_null(self): + assert is_scalar_null(b"data") is False + + def test_bool_true_is_not_null(self): + assert is_scalar_null(True) is False + + def test_bool_false_is_not_null(self): + assert is_scalar_null(False) is False + + def test_np_bool_is_not_null(self): + assert is_scalar_null(np.bool_(True)) is False + + def test_empty_list_is_null(self): + """Empty list in a scalar column should be treated as null.""" + assert is_scalar_null([]) is True + + def test_list_with_values_is_not_null(self): + assert is_scalar_null([1, 2, 3]) is False