Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions pyiceberg/expressions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ def __or__(self, other: BooleanExpression) -> BooleanExpression:

return Or(self, other)

def __bool__(self) -> bool:
"""Reject truthiness checks on non-constant expressions.

Truthiness is only defined for the constant expressions ``AlwaysTrue`` and
``AlwaysFalse``, which override this method. Evaluating a predicate such as
``if EqualTo("x", 1):`` is almost always a mistake; use ``~expr`` to negate
an expression or compare explicitly against ``AlwaysTrue()``/``AlwaysFalse()``.
"""
raise TypeError(
f"The truth value of {type(self).__name__} is ambiguous. "
"Truthiness is only defined for AlwaysTrue() and AlwaysFalse(); "
"use ~expr to negate an expression or compare against AlwaysTrue()/AlwaysFalse()."
)

@model_validator(mode="wrap")
@classmethod
def handle_primitive_type(cls, v: Any, handler: ValidatorFunctionWrapHandler) -> BooleanExpression:
Expand Down Expand Up @@ -455,6 +469,10 @@ def __invert__(self) -> AlwaysFalse:
"""Transform the Expression into its negated version."""
return AlwaysFalse()

def __bool__(self) -> bool:
"""Return True, the constant value of this expression."""
return True

def __str__(self) -> str:
"""Return the string representation of the AlwaysTrue class."""
return "AlwaysTrue()"
Expand All @@ -473,6 +491,10 @@ def __invert__(self) -> AlwaysTrue:
"""Transform the Expression into its negated version."""
return AlwaysTrue()

def __bool__(self) -> bool:
"""Return False, the constant value of this expression."""
return False

def __str__(self) -> str:
"""Return the string representation of the AlwaysFalse class."""
return "AlwaysFalse()"
Expand Down
2 changes: 1 addition & 1 deletion pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2103,7 +2103,7 @@ def from_rest_response(
return FileScanTask(
data_file=data_file,
delete_files=resolved_deletes,
residual=rest_task.residual_filter if rest_task.residual_filter else ALWAYS_TRUE,
residual=rest_task.residual_filter if rest_task.residual_filter is not None else ALWAYS_TRUE,
)


Expand Down
19 changes: 19 additions & 0 deletions tests/catalog/test_scan_planning_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from pyiceberg.expressions import AlwaysTrue, EqualTo, Reference
from pyiceberg.manifest import FileFormat
from pyiceberg.table import FileScanTask

TEST_URI = "https://iceberg-test-catalog/"

Expand Down Expand Up @@ -242,6 +243,24 @@ def test_scan_task_with_residual_filter_true() -> None:
assert isinstance(task.residual_filter, AlwaysTrue)


def test_from_rest_response_preserves_non_constant_residual_filter() -> None:
data = {
"data-file": _rest_data_file(),
"residual-filter": {"type": "eq", "term": "x", "value": 1},
}
rest_task = RESTFileScanTask.model_validate(data)
task = FileScanTask.from_rest_response(rest_task, [])
assert task.residual == EqualTo(Reference("x"), 1)


def test_from_rest_response_defaults_missing_residual_filter_to_always_true() -> None:
data = {"data-file": _rest_data_file()}
rest_task = RESTFileScanTask.model_validate(data)
assert rest_task.residual_filter is None
task = FileScanTask.from_rest_response(rest_task, [])
assert task.residual == AlwaysTrue()


def test_empty_scan_tasks() -> None:
data: dict[str, Any] = {
"delete-files": [],
Expand Down
27 changes: 27 additions & 0 deletions tests/expressions/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,33 @@ def test_invert_always() -> None:
assert ~AlwaysTrue() == AlwaysFalse()


def test_always_bool() -> None:
assert bool(AlwaysTrue()) is True
assert bool(AlwaysFalse()) is False


def test_always_bool_control_flow() -> None:
assert (1 if AlwaysTrue() else 0) == 1
assert (1 if AlwaysFalse() else 0) == 0
assert not AlwaysFalse()
assert not (not AlwaysTrue())


@pytest.mark.parametrize(
"expression",
[
EqualTo("x", 1),
IsNull("x"),
And(EqualTo("x", 1), IsNull("y")),
Or(EqualTo("x", 1), IsNull("y")),
Not(EqualTo("x", 1)),
],
)
def test_non_constant_expression_bool_raises(expression: BooleanExpression) -> None:
with pytest.raises(TypeError, match="truth value"):
bool(expression)


def test_accessor_base_class() -> None:
"""Test retrieving a value at a position of a container using an accessor"""

Expand Down
2 changes: 1 addition & 1 deletion tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ def _set_spec_id(datafile: DataFile) -> DataFile:
),
io=PyArrowFileIO(),
projected_schema=schema,
row_filter=expr or AlwaysTrue(),
row_filter=expr if expr is not None else AlwaysTrue(),
case_sensitive=True,
).to_table(
tasks=[
Expand Down