From 6155935191c7cae95780704bdedb04155a8e9fb6 Mon Sep 17 00:00:00 2001 From: Smoke Test Date: Tue, 2 Jun 2026 00:58:52 -0700 Subject: [PATCH 1/4] Improve bootstrap Python discovery --- scripts/bootstrap.sh | 136 +++++++++++++++++++++++++++++++- tests/test_scripts_bootstrap.py | 44 +++++++++++ 2 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 tests/test_scripts_bootstrap.py diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index 705b634..3a05585 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -4,10 +4,144 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" VENV_DIR="${ROOT_DIR}/.venv" -PYTHON_BIN="${PYTHON:-python3.12}" PIP_BIN="${VENV_DIR}/bin/pip" UV_BIN="${VENV_DIR}/bin/uv" +resolve_command() { + local candidate="$1" + + if [[ "${candidate}" == */* ]]; then + if [[ -x "${candidate}" ]]; then + printf '%s\n' "${candidate}" + return 0 + fi + return 1 + fi + + command -v "${candidate}" 2>/dev/null +} + +is_python_312() { + "$1" -c 'import sys; raise SystemExit(0 if sys.version_info[:2] == (3, 12) else 1)' >/dev/null 2>&1 +} + +print_python_install_hint() { + local os_name + os_name="$(uname -s 2>/dev/null || printf 'unknown')" + + printf 'Python 3.12 is required to bootstrap Foundation CLI.\n' >&2 + + case "${os_name}" in + Darwin) + if command -v brew >/dev/null 2>&1; then + cat >&2 <<'EOF' + +Install it with Homebrew: + brew install python@3.12 + +Then rerun: + ./scripts/bootstrap.sh + +If Homebrew installed Python outside your PATH, rerun with: + PYTHON=/opt/homebrew/bin/python3.12 ./scripts/bootstrap.sh +EOF + else + cat >&2 <<'EOF' + +Install Homebrew from https://brew.sh, then run: + brew install python@3.12 + ./scripts/bootstrap.sh + +Or install Python 3.12 from https://www.python.org/downloads/ and rerun with: + PYTHON=/path/to/python3.12 ./scripts/bootstrap.sh +EOF + fi + ;; + Linux) + if command -v apt-get >/dev/null 2>&1; then + cat >&2 <<'EOF' + +Install it with apt: + sudo apt-get update + sudo apt-get install -y python3.12 python3.12-venv +EOF + elif command -v dnf >/dev/null 2>&1; then + cat >&2 <<'EOF' + +Install it with dnf: + sudo dnf install python3.12 +EOF + elif command -v yum >/dev/null 2>&1; then + cat >&2 <<'EOF' + +Install it with yum: + sudo yum install python3.12 +EOF + elif command -v pacman >/dev/null 2>&1; then + cat >&2 <<'EOF' + +Install it with pacman: + sudo pacman -S python +EOF + elif command -v apk >/dev/null 2>&1; then + cat >&2 <<'EOF' + +Install it with apk: + sudo apk add python3 +EOF + else + cat >&2 <<'EOF' + +Install Python 3.12 with your system package manager, then rerun: + ./scripts/bootstrap.sh +EOF + fi + ;; + *) + cat >&2 <<'EOF' + +Install Python 3.12, then rerun: + ./scripts/bootstrap.sh +EOF + ;; + esac +} + +find_python_312() { + local python_override="${PYTHON:-}" + local candidate + local resolved + + if [[ -n "${python_override}" ]]; then + if ! resolved="$(resolve_command "${python_override}")"; then + printf 'Requested Python interpreter not found: PYTHON=%s\n\n' "${python_override}" >&2 + print_python_install_hint + return 1 + fi + + if ! is_python_312 "${resolved}"; then + printf 'Requested Python interpreter is not Python 3.12: %s\n\n' "${resolved}" >&2 + print_python_install_hint + return 1 + fi + + printf '%s\n' "${resolved}" + return 0 + fi + + for candidate in python3.12 /opt/homebrew/bin/python3.12 /usr/local/bin/python3.12; do + if resolved="$(resolve_command "${candidate}")" && is_python_312 "${resolved}"; then + printf '%s\n' "${resolved}" + return 0 + fi + done + + print_python_install_hint + return 1 +} + +PYTHON_BIN="$(find_python_312)" + if [[ ! -d "${VENV_DIR}" ]]; then "${PYTHON_BIN}" -m venv "${VENV_DIR}" fi diff --git a/tests/test_scripts_bootstrap.py b/tests/test_scripts_bootstrap.py new file mode 100644 index 0000000..23b459c --- /dev/null +++ b/tests/test_scripts_bootstrap.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import os +import shutil +import subprocess +from pathlib import Path + + +def _make_repo_copy(tmp_path: Path) -> Path: + repo_root = tmp_path / "repo" + scripts_dir = repo_root / "scripts" + scripts_dir.mkdir(parents=True) + + source_script = Path(__file__).resolve().parents[1] / "scripts" / "bootstrap.sh" + shutil.copy2(source_script, scripts_dir / "bootstrap.sh") + (scripts_dir / "bootstrap.sh").chmod(0o755) + return repo_root + + +def test_bootstrap_prints_homebrew_python312_hint_when_requested_python_is_missing( + tmp_path: Path, +) -> None: + repo_root = _make_repo_copy(tmp_path) + bin_dir = tmp_path / "bin" + bin_dir.mkdir() + brew_bin = bin_dir / "brew" + brew_bin.write_text("#!/bin/sh\nexit 0\n", encoding="utf-8") + brew_bin.chmod(0o755) + + env = os.environ.copy() + env["PATH"] = f"{bin_dir}:/usr/bin:/bin" + env["PYTHON"] = "python3.12-missing-for-test" + + completed = subprocess.run( + ["/bin/bash", str(repo_root / "scripts" / "bootstrap.sh")], + check=False, + capture_output=True, + text=True, + env=env, + ) + + assert completed.returncode == 1 + assert "Python 3.12 is required" in completed.stderr + assert "brew install python@3.12" in completed.stderr From 255fb2fae4212424e6514956b2cf01c18a6560df Mon Sep 17 00:00:00 2001 From: Smoke Test Date: Tue, 2 Jun 2026 09:24:35 -0700 Subject: [PATCH 2/4] Document Ollama setup in quickstart --- README.md | 3 +- docs/how-to-use.md | 92 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 docs/how-to-use.md diff --git a/README.md b/README.md index 9a999d5..ceb818d 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,11 @@ If any of that is a dealbreaker, that's totally fair — come back in a few mont ./scripts/uv run foundation ``` -You'll need Python 3.12 and an API key for either OpenAI or Ollama. See [`docs/TECHNICAL.md`](docs/TECHNICAL.md) for full setup, configuration, the CLI surface, and architecture notes. +You'll need Python 3.12 and an API key for either OpenAI or Ollama. On a fresh Mac, start with the macOS walkthrough in [`docs/how-to-use.md`](docs/how-to-use.md). See [`docs/TECHNICAL.md`](docs/TECHNICAL.md) for the full configuration surface, CLI commands, and architecture notes. ## Where things live +- [`docs/how-to-use.md`](docs/how-to-use.md) — macOS setup and first-run commands. - [`docs/TECHNICAL.md`](docs/TECHNICAL.md) — the detailed README: features, configuration, commands, layout, limitations. - [`docs/monitor-protocol.md`](docs/monitor-protocol.md) — event-log wire format and live transports. - [`plans/`](plans/) — stage-by-stage implementation plans. These are the prompts/specs the agent worked from. Probably the most honest record of how the project actually got built. diff --git a/docs/how-to-use.md b/docs/how-to-use.md new file mode 100644 index 0000000..76599d2 --- /dev/null +++ b/docs/how-to-use.md @@ -0,0 +1,92 @@ +# How to Use Foundation CLI + +This is the shortest path for a macOS user trying the repo from a fresh machine. + +## 1. Install Python 3.12 + +Foundation CLI requires Python 3.12. On macOS, Homebrew is the simplest install path: + +```bash +brew install python@3.12 +python3.12 --version +``` + +If `python3.12` is still not on your `PATH`, use the explicit Homebrew path when bootstrapping: + +```bash +PYTHON=/opt/homebrew/bin/python3.12 ./scripts/bootstrap.sh +``` + +On Intel Macs, the Homebrew path may be: + +```bash +PYTHON=/usr/local/bin/python3.12 ./scripts/bootstrap.sh +``` + +## 2. Bootstrap the repo + +```bash +./scripts/bootstrap.sh +./scripts/uv run foundation --help +``` + +## 3. Configure Ollama Cloud + +Create the Foundation config directory: + +```bash +mkdir -p "$HOME/Library/Application Support/foundation" +``` + +Create `config.toml`: + +```bash +cat > "$HOME/Library/Application Support/foundation/config.toml" <<'EOF' +[provider] +name = "ollama" +model = "qwen3.5:397b-cloud" +base_url = "https://ollama.com/api" +request_timeout_seconds = 180 +api_key_env_var = "OLLAMA_API_KEY" +EOF +``` + +Add your Ollama Cloud API key to the paired env file: + +```bash +cat > "$HOME/Library/Application Support/foundation/foundation.env" <<'EOF' +OLLAMA_API_KEY=your-ollama-cloud-api-key +EOF +``` + +Then verify the setup: + +```bash +./scripts/uv run foundation doctor +``` + +You want to see `Provider: ollama`, `Base URL: https://ollama.com/api`, and a secret lookup line saying credentials resolved from `$OLLAMA_API_KEY`. + +## 4. Start Foundation + +```bash +./scripts/uv run foundation +``` + +## Local Ollama + +Local Ollama usually does not need an API key. Use a local model name and local base URL instead: + +```toml +[provider] +name = "ollama" +model = "gpt-oss:20b" +base_url = "http://localhost:11434/api" +request_timeout_seconds = 180 +``` + +Then verify with: + +```bash +./scripts/uv run foundation doctor +``` From 9578ec11195652a413c147cf2a1315f9711f71b7 Mon Sep 17 00:00:00 2001 From: Smoke Test Date: Tue, 2 Jun 2026 09:31:12 -0700 Subject: [PATCH 3/4] Add OpenAI provider setup example --- docs/how-to-use.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/how-to-use.md b/docs/how-to-use.md index 76599d2..7fabf33 100644 --- a/docs/how-to-use.md +++ b/docs/how-to-use.md @@ -67,6 +67,36 @@ Then verify the setup: You want to see `Provider: ollama`, `Base URL: https://ollama.com/api`, and a secret lookup line saying credentials resolved from `$OLLAMA_API_KEY`. +## OpenAI + +To use OpenAI instead, use this provider config: + +```bash +cat > "$HOME/Library/Application Support/foundation/config.toml" <<'EOF' +[provider] +name = "openai" +model = "gpt-5-mini" +request_timeout_seconds = 180 +api_key_env_var = "OPENAI_API_KEY" +EOF +``` + +Add your OpenAI API key to the paired env file: + +```bash +cat > "$HOME/Library/Application Support/foundation/foundation.env" <<'EOF' +OPENAI_API_KEY=your-openai-api-key +EOF +``` + +Then verify with: + +```bash +./scripts/uv run foundation doctor +``` + +You want to see `Provider: openai`, `Base URL: https://api.openai.com/v1`, and a secret lookup line saying credentials resolved from `$OPENAI_API_KEY`. + ## 4. Start Foundation ```bash From 4bf8dbd3b3a5fe90fe0aeb20c9829542060c8436 Mon Sep 17 00:00:00 2001 From: Smoke Test Date: Tue, 2 Jun 2026 12:20:07 -0700 Subject: [PATCH 4/4] Harden out-of-scope write handling --- AGENTS.md | 2 + src/foundation/models/orchestration.py | 1 + src/foundation/services/orchestrator.py | 259 ++++++++++++++++++++---- src/foundation/services/planner.py | 5 + tests/test_orchestrator.py | 126 ++++++++++++ 5 files changed, 355 insertions(+), 38 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index fa73586..5280bd0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,6 +6,8 @@ Rules for AI coding agents (Claude Code, Codex, Aider, etc.) helping work on thi Agents do not open PRs against this repo. The human contributor reviews the change end-to-end first, *then* opens the PR themselves. If a maintainer sees a PR opened directly by a `[bot]` account or a fresh account that obviously didn't write the description, it gets closed. +Exception: when AnmolNoor is the author actively working on the change, agents may follow direct PR instructions from AnmolNoor without this rule blocking the PR. + ## Surgical changes - Touch only what the task requires. diff --git a/src/foundation/models/orchestration.py b/src/foundation/models/orchestration.py index 91e752e..30c95b3 100644 --- a/src/foundation/models/orchestration.py +++ b/src/foundation/models/orchestration.py @@ -108,6 +108,7 @@ class LoopStopReason(StrEnum): MAX_ITERATIONS = "max_iterations" MAX_ACTIONS = "max_actions" NO_PROGRESS = "no_progress" + TERMINAL_POLICY_BLOCK = "terminal_policy_block" class VerificationOutcome(StrEnum): diff --git a/src/foundation/services/orchestrator.py b/src/foundation/services/orchestrator.py index da3156b..0777808 100644 --- a/src/foundation/services/orchestrator.py +++ b/src/foundation/services/orchestrator.py @@ -33,6 +33,7 @@ PolicyDecision, PolicyDecisionType, PolicyEvaluationRecord, + PolicyReasonCode, ProviderMessage, ProviderMessageRole, ProviderPrompt, @@ -193,6 +194,9 @@ def _worst_verification_outcome( "\n\n[Loop stopped: a fatal execution failure occurred.]" ), LoopStopReason.NO_PROGRESS: ("\n\n[Loop stopped: no progress detected across iterations.]"), + LoopStopReason.TERMINAL_POLICY_BLOCK: ( + "\n\n[Loop stopped: terminal policy block. Work was not performed.]" + ), } # v4 stage 03 — soft-completion notice when NO_PROGRESS fires *after* the @@ -211,6 +215,23 @@ def _worst_verification_outcome( "foundation.git.commit", ) +_FILE_MUTATION_CAPABILITY_IDS = frozenset( + { + "foundation.file.write", + "foundation.file.edit", + "foundation.file.apply_diff", + } +) + +_FINAL_MUTATION_CLAIM_RE = re.compile( + r"\b(created|wrote|written|saved|updated|edited|modified|generated)\b", + re.IGNORECASE, +) +_FILESYSTEM_CLAIM_RE = re.compile( + r"\b(file|files|folder|folders|directory|directories|path|workspace)\b|[/\\][^\s]+|\.[A-Za-z0-9]{1,8}\b", + re.IGNORECASE, +) + def _is_side_effecting_capability(capability_id: str | None) -> bool: if not capability_id: @@ -221,6 +242,14 @@ def _is_side_effecting_capability(capability_id: str | None) -> bool: ) +def _is_file_mutation_action(action: PlannedAction) -> bool: + return ( + action.kind is ActionKind.TOOL_CALL + and action.tool_call is not None + and action.tool_call.capability_id in _FILE_MUTATION_CAPABILITY_IDS + ) + + _TOOL_CALL_LOG_KEYS: tuple[str, ...] = ( "path", "paths", @@ -1056,8 +1085,14 @@ def _run_replan_loop( r.status is ExecutionStatus.AWAITING_INPUT for r in execution_results ) has_fatal = any(self._is_fatal_result(r) for r in execution_results) + has_terminal_policy_block = self._has_terminal_policy_block( + actions_to_execute, + decisions, + ) - if has_pending: + if has_terminal_policy_block: + stop_reason = LoopStopReason.TERMINAL_POLICY_BLOCK + elif has_pending: stop_reason = LoopStopReason.PENDING_APPROVAL elif has_awaiting_input: stop_reason = LoopStopReason.AWAITING_USER_INPUT @@ -1165,47 +1200,59 @@ def _run_replan_loop( had_fatal_failure = any(self._is_fatal_result(r) for r in all_results) - # When the loop is structurally stuck (missing capability, bad path, or - # no progress), reframe the raw failure as a graceful capability-gap - # handoff: the chat surface shows a plain-language message and options - # instead of an error. The underlying failure stays in execution_results - # and is recorded to the trace + event log via EVENT_CAPABILITY_GAP. - gap_handoff = build_gap_handoff( - request=request.message, - stop_reason=stop_reason, - results=all_results, - iteration=len(iterations), - had_cumulative_changes=bool(cumulative_changed_paths), - phraser=make_provider_phraser(self._provider), - ) - if gap_handoff is not None: - msg_content = gap_handoff.message - self._observer.emit( - EVENT_CAPABILITY_GAP, - payload=gap_handoff.report.model_dump(mode="json"), - session_id=session_id, - logger_name="foundation.services.orchestrator", - level=logging.WARNING, + if stop_reason is LoopStopReason.TERMINAL_POLICY_BLOCK: + gap_handoff = None + msg_content = self._terminal_policy_block_message( + request=request, + decisions=all_decisions, ) else: - command_error_message = ( - self._command_usage_failure_message( - terminal_plan.assistant_message, - iterations, - ) - if stop_reason is LoopStopReason.NO_PROGRESS - else None + # When the loop is structurally stuck (missing capability, bad path, or + # no progress), reframe the raw failure as a graceful capability-gap + # handoff: the chat surface shows a plain-language message and options + # instead of an error. The underlying failure stays in execution_results + # and is recorded to the trace + event log via EVENT_CAPABILITY_GAP. + gap_handoff = build_gap_handoff( + request=request.message, + stop_reason=stop_reason, + results=all_results, + iteration=len(iterations), + had_cumulative_changes=bool(cumulative_changed_paths), + phraser=make_provider_phraser(self._provider), ) - msg_content = ( - command_error_message - if command_error_message is not None - else self._augment_message_with_stop_reason( - terminal_plan.assistant_message, - stop_reason, - cumulative_changed_paths=cumulative_changed_paths, - had_fatal=had_fatal_failure, + if gap_handoff is not None: + msg_content = gap_handoff.message + self._observer.emit( + EVENT_CAPABILITY_GAP, + payload=gap_handoff.report.model_dump(mode="json"), + session_id=session_id, + logger_name="foundation.services.orchestrator", + level=logging.WARNING, ) - ) + else: + command_error_message = ( + self._command_usage_failure_message( + terminal_plan.assistant_message, + iterations, + ) + if stop_reason is LoopStopReason.NO_PROGRESS + else None + ) + msg_content = ( + command_error_message + if command_error_message is not None + else self._augment_message_with_stop_reason( + terminal_plan.assistant_message, + stop_reason, + cumulative_changed_paths=cumulative_changed_paths, + had_fatal=had_fatal_failure, + ) + ) + msg_content = self._guard_unverified_final_message( + msg_content, + stop_reason=stop_reason, + results=all_results, + ) assistant_message = AssistantMessage(content=msg_content) verification_notice = self._build_verification_notice( @@ -1566,6 +1613,140 @@ def _build_verification_notice( reason=reason_by_outcome[outcome], ) + @staticmethod + def _has_terminal_policy_block( + actions: list[PlannedAction], + decisions: list[PolicyDecision], + ) -> bool: + for action, decision in zip(actions, decisions, strict=True): + if decision.decision is not PolicyDecisionType.BLOCK: + continue + if PolicyReasonCode.PATH_OUT_OF_SCOPE not in decision.reason_codes: + continue + if "workspace_write" in decision.risk_categories: + return True + if _is_file_mutation_action(action): + return True + return False + + def _terminal_policy_block_message( + self, + *, + request: UserRequest, + decisions: list[PolicyDecision], + ) -> str: + paths = self._terminal_policy_block_paths(decisions) + target = ", ".join(paths) if paths else "the requested path" + fallback = ( + f"I did not write to {target} because it is outside the workspace root. " + "This is a terminal policy block: outside-workspace writes stay blocked " + "for safety. To perform this, open fcli in that directory or start a " + "workspace rooted at that path, then ask again." + ) + phrased = self._phrase_terminal_policy_block( + request=request.message, + target=target, + fallback=fallback, + ) + return phrased or fallback + + @staticmethod + def _terminal_policy_block_paths(decisions: list[PolicyDecision]) -> list[str]: + paths: list[str] = [] + seen: set[str] = set() + for decision in decisions: + if decision.decision is not PolicyDecisionType.BLOCK: + continue + if PolicyReasonCode.PATH_OUT_OF_SCOPE not in decision.reason_codes: + continue + if "workspace_write" not in decision.risk_categories: + continue + for path in decision.paths: + if path in seen: + continue + seen.add(path) + paths.append(path) + return paths + + def _phrase_terminal_policy_block( + self, + *, + request: str, + target: str, + fallback: str, + ) -> str | None: + developer = ( + "You are fcli, a local coding-agent CLI. The runtime verified that " + "an outside-workspace write was blocked and no such write was " + "performed. Explain this in one short, calm paragraph. Keep these " + "facts: the path is outside the active workspace, fcli did not do " + "the write, the block is for safety, and the user can retry by " + "opening fcli in that directory or using that directory as the " + "workspace root. Do NOT claim the task was completed. Output only prose." + ) + user = ( + f"Original request: {request}\n" + f"Blocked target: {target}\n\n" + f"Fallback wording to preserve: {fallback}" + ) + try: + response = self._provider.complete( + ProviderPrompt( + messages=[ + ProviderMessage(role=ProviderMessageRole.DEVELOPER, content=developer), + ProviderMessage(role=ProviderMessageRole.USER, content=user), + ], + response_format=ProviderResponseFormat.TEXT, + ) + ) + except ProviderError: + return None + except Exception: # pragma: no cover - defensive on a terminal message path + logger.debug("terminal policy block phrasing failed", exc_info=True) + return None + return self._sanitize_terminal_policy_block_message(response.content) + + @staticmethod + def _sanitize_terminal_policy_block_message(content: str | None) -> str | None: + text = (content or "").strip() + if not text: + return None + if text[0] in "{[" or text.startswith("```"): + return None + if '"actions"' in text or '"assistant_message"' in text: + return None + text = " ".join(text.split()) + lower = text.lower() + if "outside" not in lower or "workspace" not in lower: + return None + if "open" not in lower and "workspace root" not in lower: + return None + if _FINAL_MUTATION_CLAIM_RE.search(text): + return None + if len(text) > 500: + text = text[:500].rsplit(" ", 1)[0].rstrip() + "..." + return text + + @staticmethod + def _guard_unverified_final_message( + message: str, + *, + stop_reason: LoopStopReason, + results: list[ExecutionResult], + ) -> str: + if stop_reason is not LoopStopReason.ZERO_ACTION_PLAN: + return message + if results: + return message + if not _FINAL_MUTATION_CLAIM_RE.search(message): + return message + if not _FILESYSTEM_CLAIM_RE.search(message): + return message + return ( + "I did not create or modify any files because no actions were executed. " + "I can only report filesystem changes after a successful write or edit action." + ) + @staticmethod def _augment_message_with_stop_reason( message: str, @@ -1758,6 +1939,8 @@ def _session_status_for_result( # Stopped to ask the user something we couldn't prompt for inline # (non-interactive run, or the user dismissed the prompt). return SessionStatus.COMPLETED_INCONCLUSIVE + if stop_reason is LoopStopReason.TERMINAL_POLICY_BLOCK: + return SessionStatus.COMPLETED_INCONCLUSIVE if stop_reason is LoopStopReason.FATAL_EXECUTION_FAILURE: return SessionStatus.FAILED if stop_reason is LoopStopReason.ZERO_ACTION_PLAN: diff --git a/src/foundation/services/planner.py b/src/foundation/services/planner.py index a00e996..1170127 100644 --- a/src/foundation/services/planner.py +++ b/src/foundation/services/planner.py @@ -322,6 +322,11 @@ def _base_plan_messages( "foundation.file.read with its absolute path; the user will be asked " "to approve out-of-scope read access rather than it being silently " "blocked. Do not refuse preemptively. " + "Writes, edits, apply-diffs, and shell commands that create or mutate " + "files outside the workspace root are hard-blocked by policy and cannot " + "be approved from inside this workspace. If the user asks for that, " + "return zero actions and explain that they should open fcli in the " + "target directory or use that directory as the workspace root. " "Shell args are passed directly to the target binary via execve, " "NOT interpreted by a shell. Do NOT wrap args in single or double " "quotes, do NOT expect glob expansion or variable substitution, " diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 32a9bf2..a1ff8e4 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -917,10 +917,136 @@ def callback(question: QuestionAction) -> str: # Writes are never escalated: no prompt, blocked, nothing written. assert prompts == [] + assert result.stop_reason is LoopStopReason.TERMINAL_POLICY_BLOCK + assert len(result.iterations) == 1 assert any(r.status is ExecutionStatus.BLOCKED for r in result.execution_results) + assert result.summary.blocked_actions == 1 + assert "outside the workspace" in result.assistant_message.content + assert "open" in result.assistant_message.content.lower() assert not target.exists() +def test_terminal_policy_block_message_is_model_phrased( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + outside = tmp_path / "outside" + outside.mkdir() + target = outside / "evil.md" + phrased = ( + "I did not write outside the workspace root. Open fcli in that directory " + "or use it as the workspace root, then ask again." + ) + provider = StubProvider( + [ + _provider_response( + { + "assistant_message": "Writing outside the workspace.", + "actions": [ + { + "id": "write_evil", + "kind": "tool_call", + "summary": "Write outside the workspace", + "tool_call": { + "capability_id": "foundation.file.write", + "arguments": {"path": str(target), "content": "x"}, + }, + } + ], + } + ), + _text_response(phrased), + ] + ) + orchestrator, _, _ = _orchestrator(tmp_path, monkeypatch, provider) + + result = orchestrator.orchestrate(UserRequest(message="write outside")) + + assert result.stop_reason is LoopStopReason.TERMINAL_POLICY_BLOCK + assert result.assistant_message.content == phrased + assert not target.exists() + + +def test_out_of_scope_shell_write_is_terminal_policy_block( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + outside = tmp_path / "outside-shell-dir" + provider = StubProvider( + [ + _provider_response( + { + "assistant_message": "Creating the directory.", + "actions": [ + { + "id": "mkdir_outside", + "kind": "shell", + "summary": "Create an outside directory", + "shell": {"command": "mkdir", "args": [str(outside)]}, + } + ], + } + ) + ] + ) + orchestrator, runtime, _ = _orchestrator(tmp_path, monkeypatch, provider) + + result = orchestrator.orchestrate(UserRequest(message=f"make {outside}")) + + assert result.stop_reason is LoopStopReason.TERMINAL_POLICY_BLOCK + assert runtime.calls == 0 + assert result.summary.blocked_actions == 1 + assert not outside.exists() + + +def test_zero_action_file_creation_claim_is_not_reported_as_done( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + target = tmp_path / "outside" / "cheatsheet.md" + provider = StubProvider( + [ + _provider_response( + { + "assistant_message": f"Done, I created {target}.", + "actions": [], + } + ) + ] + ) + orchestrator, _, _ = _orchestrator(tmp_path, monkeypatch, provider) + + result = orchestrator.orchestrate(UserRequest(message=f"create a file at {target}")) + + assert result.stop_reason is LoopStopReason.ZERO_ACTION_PLAN + assert result.summary.executed_actions == 0 + assert "created" not in result.assistant_message.content.lower() + assert "no actions were executed" in result.assistant_message.content.lower() + assert not target.exists() + + +def test_zero_action_non_file_generation_claim_is_left_alone( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider = StubProvider( + [ + _provider_response( + { + "assistant_message": "Generated a short reminder: run git status first.", + "actions": [], + } + ) + ] + ) + orchestrator, _, _ = _orchestrator(tmp_path, monkeypatch, provider) + + result = orchestrator.orchestrate(UserRequest(message="generate a git reminder")) + + assert result.stop_reason is LoopStopReason.ZERO_ACTION_PLAN + assert result.assistant_message.content == "Generated a short reminder: run git status first." + + def test_orchestrator_retries_shell_cat_plan_without_executing_it( tmp_path: Path, monkeypatch: pytest.MonkeyPatch,