From 14a3bbab8698bfa6a80616fec4dbd7259edf64de Mon Sep 17 00:00:00 2001
From: Anas Khan <83116240+anxkhn@users.noreply.github.com>
Date: Mon, 29 Jun 2026 01:33:47 +0530
Subject: [PATCH] feat(expressions): add truthiness semantics for constant
 boolean expressions

`AlwaysTrue`/`AlwaysFalse` extend `IcebergRootModel[bool]` but defined no
`__bool__`, so they used default object truthiness and were both always truthy.
That made `bool(AlwaysFalse())` return `True`, so intuitive control-flow patterns
(`if not visit_starts_with(...)`, `expr or AlwaysTrue()`) behaved incorrectly for
the FALSE constant.

Define `__bool__` on the two constants (`True`/`False`) and make the base
`BooleanExpression.__bool__` raise `TypeError`, so truthiness is only defined for
the constants and an accidental `if predicate:` is an explicit error rather than a
silently-true no-op. `not expr` returns a Python bool; `~expr` still returns the
negated expression.

Per the audit kevinjqliu requested on the issue, fix the one optional-expression
check that relied on truthiness instead of presence: REST scan-planning residual
handling (`FileScanTask.from_rest_response`) now uses `is not None`, so an explicit
residual filter is preserved and not treated as missing. Also update the
`expr or AlwaysTrue()` default in the pyarrow `project` test helper.

Tests cover constant truthiness, control-flow branching, `TypeError` for
non-constant expressions, and the REST residual path.

Closes #3543
---
 pyiceberg/expressions/__init__.py          | 22 ++++++++++++++++++
 pyiceberg/table/__init__.py                |  2 +-
 tests/catalog/test_scan_planning_models.py | 19 +++++++++++++++
 tests/expressions/test_expressions.py      | 27 ++++++++++++++++++++++
 tests/io/test_pyarrow.py                   |  2 +-
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/pyiceberg/expressions/__init__.py b/pyiceberg/expressions/__init__.py
index ef4cb2506e..8928c1c3a3 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -70,6 +70,20 @@ def __or__(self, other: BooleanExpression) -> BooleanExpression:
 
         return Or(self, other)
 
+    def __bool__(self) -> bool:
+        """Reject truthiness checks on non-constant expressions.
+
+        Truthiness is only defined for the constant expressions ``AlwaysTrue`` and
+        ``AlwaysFalse``, which override this method. Evaluating a predicate such as
+        ``if EqualTo("x", 1):`` is almost always a mistake; use ``~expr`` to negate
+        an expression or compare explicitly against ``AlwaysTrue()``/``AlwaysFalse()``.
+        """
+        raise TypeError(
+            f"The truth value of {type(self).__name__} is ambiguous. "
+            "Truthiness is only defined for AlwaysTrue() and AlwaysFalse(); "
+            "use ~expr to negate an expression or compare against AlwaysTrue()/AlwaysFalse()."
+        )
+
     @model_validator(mode="wrap")
     @classmethod
     def handle_primitive_type(cls, v: Any, handler: ValidatorFunctionWrapHandler) -> BooleanExpression:
@@ -455,6 +469,10 @@ def __invert__(self) -> AlwaysFalse:
         """Transform the Expression into its negated version."""
         return AlwaysFalse()
 
+    def __bool__(self) -> bool:
+        """Return True, the constant value of this expression."""
+        return True
+
     def __str__(self) -> str:
         """Return the string representation of the AlwaysTrue class."""
         return "AlwaysTrue()"
@@ -473,6 +491,10 @@ def __invert__(self) -> AlwaysTrue:
         """Transform the Expression into its negated version."""
         return AlwaysTrue()
 
+    def __bool__(self) -> bool:
+        """Return False, the constant value of this expression."""
+        return False
+
     def __str__(self) -> str:
         """Return the string representation of the AlwaysFalse class."""
         return "AlwaysFalse()"
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 63b87d290e..bc4ac92624 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -2103,7 +2103,7 @@ def from_rest_response(
         return FileScanTask(
             data_file=data_file,
             delete_files=resolved_deletes,
-            residual=rest_task.residual_filter if rest_task.residual_filter else ALWAYS_TRUE,
+            residual=rest_task.residual_filter if rest_task.residual_filter is not None else ALWAYS_TRUE,
         )
 
 
diff --git a/tests/catalog/test_scan_planning_models.py b/tests/catalog/test_scan_planning_models.py
index f2c80cfb9b..e6af338ebe 100644
--- a/tests/catalog/test_scan_planning_models.py
+++ b/tests/catalog/test_scan_planning_models.py
@@ -39,6 +39,7 @@
 )
 from pyiceberg.expressions import AlwaysTrue, EqualTo, Reference
 from pyiceberg.manifest import FileFormat
+from pyiceberg.table import FileScanTask
 
 TEST_URI = "https://iceberg-test-catalog/"
 
@@ -242,6 +243,24 @@ def test_scan_task_with_residual_filter_true() -> None:
     assert isinstance(task.residual_filter, AlwaysTrue)
 
 
+def test_from_rest_response_preserves_non_constant_residual_filter() -> None:
+    data = {
+        "data-file": _rest_data_file(),
+        "residual-filter": {"type": "eq", "term": "x", "value": 1},
+    }
+    rest_task = RESTFileScanTask.model_validate(data)
+    task = FileScanTask.from_rest_response(rest_task, [])
+    assert task.residual == EqualTo(Reference("x"), 1)
+
+
+def test_from_rest_response_defaults_missing_residual_filter_to_always_true() -> None:
+    data = {"data-file": _rest_data_file()}
+    rest_task = RESTFileScanTask.model_validate(data)
+    assert rest_task.residual_filter is None
+    task = FileScanTask.from_rest_response(rest_task, [])
+    assert task.residual == AlwaysTrue()
+
+
 def test_empty_scan_tasks() -> None:
     data: dict[str, Any] = {
         "delete-files": [],
diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py
index 8ce48a6897..2e93466425 100644
--- a/tests/expressions/test_expressions.py
+++ b/tests/expressions/test_expressions.py
@@ -639,6 +639,33 @@ def test_invert_always() -> None:
     assert ~AlwaysTrue() == AlwaysFalse()
 
 
+def test_always_bool() -> None:
+    assert bool(AlwaysTrue()) is True
+    assert bool(AlwaysFalse()) is False
+
+
+def test_always_bool_control_flow() -> None:
+    assert (1 if AlwaysTrue() else 0) == 1
+    assert (1 if AlwaysFalse() else 0) == 0
+    assert not AlwaysFalse()
+    assert not (not AlwaysTrue())
+
+
+@pytest.mark.parametrize(
+    "expression",
+    [
+        EqualTo("x", 1),
+        IsNull("x"),
+        And(EqualTo("x", 1), IsNull("y")),
+        Or(EqualTo("x", 1), IsNull("y")),
+        Not(EqualTo("x", 1)),
+    ],
+)
+def test_non_constant_expression_bool_raises(expression: BooleanExpression) -> None:
+    with pytest.raises(TypeError, match="truth value"):
+        bool(expression)
+
+
 def test_accessor_base_class() -> None:
     """Test retrieving a value at a position of a container using an accessor"""
 
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index 532311899d..0fa5ce2a03 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -1137,7 +1137,7 @@ def _set_spec_id(datafile: DataFile) -> DataFile:
         ),
         io=PyArrowFileIO(),
         projected_schema=schema,
-        row_filter=expr or AlwaysTrue(),
+        row_filter=expr if expr is not None else AlwaysTrue(),
         case_sensitive=True,
     ).to_table(
         tasks=[