Skip to content
24 changes: 20 additions & 4 deletions strands-py/src/strands/hooks/events.py
Comment thread
Zelys-DFKH marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import uuid
import warnings
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -330,18 +331,33 @@ class BeforeNodeCallEvent(BaseHookEvent, _Interruptible):
source: The multi-agent orchestrator instance
node_id: ID of the node about to execute
invocation_state: Configuration that user passes in
cancel_node: A user defined message that when set, will cancel the node execution with status FAILED.
The message will be emitted under a MultiAgentNodeCancel event. If set to `True`, Strands will cancel the
node using a default cancel message.
skip_node: A user defined message that when set, will skip the node execution and emit a
:class:`~strands.types._events.MultiAgentNodeSkipEvent`. If set to ``True``, a default
skip message is used. Any falsy value (``False``, ``""`` etc.) means "do not skip".
Takes precedence over ``cancel_node`` when both are truthy.
cancel_node: Deprecated. Use ``skip_node`` instead. When set to a truthy value, behaves
identically to ``skip_node`` but also emits a ``DeprecationWarning`` at the assignment
site.
"""

source: "MultiAgentBase"
node_id: str
invocation_state: dict[str, Any] | None = None
skip_node: bool | str = False
cancel_node: bool | str = False

def _can_write(self, name: str) -> bool:
return name in ["cancel_node"]
return name in ["skip_node", "cancel_node"]

def __setattr__(self, name: str, value: Any) -> None:
"""Set attribute, emitting a DeprecationWarning when cancel_node is assigned a truthy value."""
if name == "cancel_node" and value:
warnings.warn(
"BeforeNodeCallEvent.cancel_node is deprecated; use skip_node instead",
DeprecationWarning,
stacklevel=2,
)
super().__setattr__(name, value)

@override
def _interrupt_id(self, name: str) -> str:
Expand Down
22 changes: 14 additions & 8 deletions strands-py/src/strands/multiagent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ class Status(Enum):
PENDING: Task has not started execution yet.
EXECUTING: Task is currently running.
COMPLETED: Task finished successfully.
SKIPPED: Task was intentionally bypassed via cancel_node; downstream nodes still execute.
FAILED: Task encountered an error and could not complete.
INTERRUPTED: Task was interrupted by user.
"""

PENDING = "pending"
EXECUTING = "executing"
COMPLETED = "completed"
SKIPPED = "skipped"
FAILED = "failed"
INTERRUPTED = "interrupted"

Expand All @@ -44,8 +46,8 @@ class Status(Enum):
class NodeResult:
"""Unified result from node execution - handles both Agent and nested MultiAgentBase results."""

# Core result data - single AgentResult, nested MultiAgentResult, or Exception
result: Union[AgentResult, "MultiAgentResult", Exception]
# Core result data - single AgentResult, nested MultiAgentResult, Exception, or None (skipped)
result: Union[AgentResult, "MultiAgentResult", Exception, None]

# Execution metadata
execution_time: int = 0
Expand All @@ -59,8 +61,8 @@ class NodeResult:

def get_agent_results(self) -> list[AgentResult]:
"""Get all AgentResult objects from this node, flattened if nested."""
if isinstance(self.result, Exception):
return [] # No agent results for exceptions
if self.result is None or isinstance(self.result, Exception):
return []
elif isinstance(self.result, AgentResult):
return [self.result]
else:
Expand All @@ -72,8 +74,10 @@ def get_agent_results(self) -> list[AgentResult]:

def to_dict(self) -> dict[str, Any]:
"""Convert NodeResult to JSON-serializable dict, ignoring state field."""
if isinstance(self.result, Exception):
result_data: dict[str, Any] = {"type": "exception", "message": str(self.result)}
if self.result is None:
result_data: dict[str, Any] = {"type": "skipped"}
elif isinstance(self.result, Exception):
result_data = {"type": "exception", "message": str(self.result)}
elif isinstance(self.result, AgentResult):
result_data = self.result.to_dict()
else:
Expand All @@ -97,8 +101,10 @@ def from_dict(cls, data: dict[str, Any]) -> "NodeResult":
raise TypeError("NodeResult.from_dict: missing 'result'")
raw = data["result"]

result: AgentResult | MultiAgentResult | Exception
if isinstance(raw, dict) and raw.get("type") == "agent_result":
result: AgentResult | MultiAgentResult | Exception | None
if isinstance(raw, dict) and raw.get("type") == "skipped":
result = None
elif isinstance(raw, dict) and raw.get("type") == "agent_result":
result = AgentResult.from_dict(raw)
elif isinstance(raw, dict) and raw.get("type") == "exception":
result = Exception(str(raw.get("message", "node failed")))
Expand Down
48 changes: 36 additions & 12 deletions strands-py/src/strands/multiagent/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
from ..telemetry import get_tracer
from ..types._events import (
MultiAgentHandoffEvent,
MultiAgentNodeCancelEvent,
MultiAgentNodeInterruptEvent,
MultiAgentNodeSkipEvent,
MultiAgentNodeStartEvent,
MultiAgentNodeStopEvent,
MultiAgentNodeStreamEvent,
Expand All @@ -68,7 +68,8 @@ class GraphState:

Attributes:
status: Current execution status of the graph.
completed_nodes: Set of nodes that have completed execution.
completed_nodes: Set of nodes whose execution is settled — either completed normally or skipped via skip_node.
Both statuses satisfy downstream readiness checks; inspect node.execution_status to distinguish them.
failed_nodes: Set of nodes that failed during execution.
interrupted_nodes: Set of nodes that user interrupted during execution.
execution_order: List of nodes in the order they were executed.
Expand Down Expand Up @@ -134,6 +135,9 @@ class GraphResult(MultiAgentResult):

total_nodes: int = 0
completed_nodes: int = 0
"""Number of nodes that successfully ran to completion (excludes skipped nodes)."""
skipped_nodes: int = 0

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: The GraphResult.completed_nodes field previously counted all settled nodes but now excludes skipped nodes. This is a behavioral change to an existing public field that users may be relying on (e.g., if result.completed_nodes == result.total_nodes: ... to check "all done").

Suggestion: Document this breaking change prominently in the PR description. Users who relied on completed_nodes == total_nodes to mean "graph finished all work" would now need completed_nodes + skipped_nodes == total_nodes (excluding failures/interrupts). Consider whether this warrants a note in a changelog or migration guide.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The direction is the opposite — skipped nodes are added to completed_nodes (the skip path calls self.state.completed_nodes.add(node)). Before this PR, cancel_node raised RuntimeError, so skip didn't exist as a concept; nothing assumed completed_nodes excluded skipped nodes. Callers that want to distinguish can check node.execution_status == Status.SKIPPED on individual results.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correction to my previous reply: GraphResult.completed_nodes (the public int in _build_result) only counts Status.COMPLETED nodes — skipped ones are filtered by the if n.execution_status == Status.COMPLETED guard. The bot's observation about completed_nodes == total_nodes is right for new callers using skip_node: they'd need completed_nodes + skipped_nodes == total_nodes to mean "all work settled." That's what skipped_nodes is for. The backward-compat point holds: cancel_node raised RuntimeError before this PR, so no existing code relied on skip behavior and there's nothing to migrate.

"""Number of nodes bypassed via skip_node or cancel_node; downstream nodes continued executing."""
failed_nodes: int = 0
interrupted_nodes: int = 0
execution_order: list["GraphNode"] = field(default_factory=list)
Expand Down Expand Up @@ -929,13 +933,29 @@ async def _execute_node(self, node: GraphNode, invocation_state: dict[str, Any])
yield self._activate_interrupt(node, interrupts, from_hook=True)
return

if before_event.cancel_node:
cancel_message = (
before_event.cancel_node if isinstance(before_event.cancel_node, str) else "node cancelled by user"
skip_value = before_event.skip_node or before_event.cancel_node

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable is still named cancel_message and the old cancel_node path now also routes through this "skipped" branch, which is a behavior change.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on this observation. The cancel_message variable should be renamed to skip_message in both graph.py and swarm.py to match the new semantics. Same applies in swarm.py line 780.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — cancel_message is now skip_message in both files. The behavior change is intentional: cancel_node is now a deprecated alias for skip_node; assigning a truthy value routes to the skip path and emits a DeprecationWarning. Both fields bypass the node identically.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: The skip_node or cancel_node short-circuit means if both are set, only skip_node's value is used (since it's checked first with or). This interaction isn't documented and could surprise users who set both fields.

Suggestion: Add a brief note in the BeforeNodeCallEvent docstring about precedence, e.g., "If both skip_node and cancel_node are set, skip_node takes precedence." Alternatively, you could log a warning when both are truthy.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Added a note to the skip_node docstring: "Takes precedence over cancel_node when both are truthy."


if skip_value:
skip_message = skip_value if isinstance(skip_value, str) else "node skipped by user"
logger.debug("reason=<%s> | node skipped, graph continues", skip_message)
yield MultiAgentNodeSkipEvent(node.node_id, skip_message)
node_result = NodeResult(
result=None,
execution_time=0,
status=Status.SKIPPED,
accumulated_usage=Usage(inputTokens=0, outputTokens=0, totalTokens=0),
accumulated_metrics=Metrics(latencyMs=0),
execution_count=0,
)
logger.debug("reason=<%s> | cancelling execution", cancel_message)
yield MultiAgentNodeCancelEvent(node.node_id, cancel_message)
raise RuntimeError(cancel_message)
node.result = node_result
node.execution_time = 0
node.execution_status = Status.SKIPPED
self.state.completed_nodes.add(node)
self.state.results[node.node_id] = node_result
self.state.execution_order.append(node)
self._accumulate_metrics(node_result)
yield MultiAgentNodeStopEvent(node_id=node.node_id, node_result=node_result)
return

# Build node input from satisfied dependencies
node_input = self._build_node_input(node)
Expand Down Expand Up @@ -1120,7 +1140,7 @@ def _build_node_input(self, node: GraphNode) -> list[ContentBlock]:

return node_responses

# Get satisfied dependencies
# Get satisfied dependencies, excluding skipped nodes (they produced no output)
dependency_results = {}
for edge in self.edges:
if (
Expand All @@ -1129,7 +1149,9 @@ def _build_node_input(self, node: GraphNode) -> list[ContentBlock]:
and edge.from_node.node_id in self.state.results
):
if edge.should_traverse(self.state):
dependency_results[edge.from_node.node_id] = self.state.results[edge.from_node.node_id]
nr = self.state.results[edge.from_node.node_id]
if nr.status != Status.SKIPPED:
dependency_results[edge.from_node.node_id] = nr

if not dependency_results:
# No dependencies - return task as ContentBlocks
Expand Down Expand Up @@ -1180,7 +1202,8 @@ def _build_result(self, interrupts: list[Interrupt]) -> GraphResult:
execution_count=self.state.execution_count,
execution_time=self.state.execution_time,
total_nodes=self.state.total_nodes,
completed_nodes=len(self.state.completed_nodes),
completed_nodes=sum(1 for n in self.state.completed_nodes if n.execution_status == Status.COMPLETED),
skipped_nodes=sum(1 for n in self.state.completed_nodes if n.execution_status == Status.SKIPPED),
failed_nodes=len(self.state.failed_nodes),
interrupted_nodes=len(self.state.interrupted_nodes),
execution_order=self.state.execution_order,
Expand Down Expand Up @@ -1285,7 +1308,8 @@ def _from_dict(self, payload: dict[str, Any]) -> None:
self.nodes[node_id] for node_id in (payload.get("completed_nodes") or []) if node_id in self.nodes
)
for node in self.state.completed_nodes:
node.execution_status = Status.COMPLETED
nr = results.get(node.node_id)
node.execution_status = Status.SKIPPED if (nr and nr.status == Status.SKIPPED) else Status.COMPLETED

# Execution order (only nodes that still exist)
order_node_ids = payload.get("execution_order") or []
Expand Down
17 changes: 8 additions & 9 deletions strands-py/src/strands/multiagent/swarm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
from ..tools.decorator import tool
from ..types._events import (
MultiAgentHandoffEvent,
MultiAgentNodeCancelEvent,
MultiAgentNodeInterruptEvent,
MultiAgentNodeSkipEvent,
MultiAgentNodeStartEvent,
MultiAgentNodeStopEvent,
MultiAgentNodeStreamEvent,
Expand Down Expand Up @@ -774,14 +774,13 @@ async def _execute_swarm(self, invocation_state: dict[str, Any]) -> AsyncIterato
yield self._activate_interrupt(current_node, interrupts)
break

if before_event.cancel_node:
cancel_message = (
before_event.cancel_node
if isinstance(before_event.cancel_node, str)
else "node cancelled by user"
)
logger.debug("reason=<%s> | cancelling execution", cancel_message)
yield MultiAgentNodeCancelEvent(current_node.node_id, cancel_message)
skip_value = before_event.skip_node or before_event.cancel_node

if skip_value:
skip_message = skip_value if isinstance(skip_value, str) else "node skipped by user"
logger.debug("reason=<%s> | node skipped, stopping swarm sequence", skip_message)
yield MultiAgentNodeSkipEvent(current_node.node_id, skip_message)
# Linear swarm: nodes depend on prior output; skip sets FAILED (unlike graph, which continues).
self.state.completion_status = Status.FAILED

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: Setting completion_status = Status.FAILED when a node is intentionally skipped seems semantically incorrect. A user deliberately skipping a node is not a failure — it's an intentional control-flow decision. This could confuse monitoring/alerting systems that watch for FAILED status.

Suggestion: Consider introducing Status.SKIPPED handling in the swarm as well (perhaps a new terminal status), or at minimum document in a code comment why FAILED is the correct outcome here (e.g., "swarm treats skip as abort — no handoff target available, so it cannot complete successfully"). If this is intentional behavior preserved from before, a brief comment explaining the rationale would help future maintainers.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intentional. In a linear swarm, each node's output feeds the next — skip one and downstream nodes never get their input, so the sequence can't complete. FAILED is the accurate status: the run didn't finish. Added an inline comment at line 782 to document this. A dedicated CANCELLED / abort-graph status is tracked in #2401.

break

Expand Down
30 changes: 29 additions & 1 deletion strands-py/src/strands/types/_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,11 @@ def __init__(self, node_id: str, agent_event: dict[str, Any]) -> None:


class MultiAgentNodeCancelEvent(TypedEvent):

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: The .. deprecated:: directive in the docstring says "Do not use this event" but the class remains importable and isn't deprecated at the Python level (no warnings.warn on instantiation). Users who discover it via autocomplete won't see the RST directive.

Suggestion: Either add a runtime deprecation warning in __init__ (consistent with other deprecated classes in this repo that use warnings.warn), or remove the .. deprecated:: RST directive and just use plain docstring text noting it's reserved for future use — since the event isn't actually deprecated yet, it's just unused.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No .. deprecated:: in the current source — the docstring notes the class is planned but not yet emitted. Since it isn't deprecated (just reserved for #2401), no runtime warning is appropriate.

"""Event emitted when a user cancels node execution from their BeforeNodeCallEvent hook."""
"""Planned event for when a node stops graph execution entirely (see issue #2401).

Not currently emitted by the library. To handle bypassed nodes, subscribe to
:class:`MultiAgentNodeSkipEvent` (type ``multiagent_node_skip``) instead.
"""

def __init__(self, node_id: str, message: str) -> None:
"""Initialize with cancel message.
Expand All @@ -582,6 +586,30 @@ def __init__(self, node_id: str, message: str) -> None:
)


class MultiAgentNodeSkipEvent(TypedEvent):
"""Event emitted when a node is bypassed via :attr:`BeforeNodeCallEvent.skip_node`.

Also triggered by the deprecated :attr:`BeforeNodeCallEvent.cancel_node` alias. The
orchestrator's behavior after skip depends on its type: a graph continues executing
downstream nodes, while a swarm stops the current run.
"""

def __init__(self, node_id: str, message: str) -> None:
"""Initialize with skip message.

Args:
node_id: Unique identifier for the node.
message: The node skip message.
"""
super().__init__(
{
"type": "multiagent_node_skip",
"node_id": node_id,
"message": message,
}
)


class MultiAgentNodeInterruptEvent(TypedEvent):
"""Event emitted when a node is interrupted."""

Expand Down
Loading