Coverage for cli/commands/mission_cmd.py: 88%
502 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Mission goal-directed iteration loop CLI commands.
3The whole subcommand group is gated by ``GCO_ENABLE_MISSION``: when
4the env var is unset, the group prints a one-line hint and exits with
5code 2 before dispatching to any subcommand. With the flag set, the
6nine subcommands talk directly to the persistence backend and the
7:class:`mission.engine.MissionEngine` — no MCP round-trip is involved
8so the CLI works without the MCP server running.
10Subcommands:
12* ``start`` — validate inputs, resolve sampling state, persist a new
13 ``SessionState``. With ``--run``, iterate to completion synchronously.
14* ``status`` — read the full session JSON.
15* ``iterate`` — drive one or more iterations of an existing session.
16* ``checkpoint`` — re-run the verdict cascade on the latest iteration.
17* ``complete`` — force a session into ``completed``.
18* ``abort`` — pause or terminate a session.
19* ``resume`` — transition ``paused`` to ``running``.
20* ``history`` — return the iteration history (full or summary).
21* ``list`` — list sessions across the configured backend.
23Output formats: every subcommand defaults to ``--output json``; pass
24``--output table`` for a human-readable summary.
25"""
27from __future__ import annotations
29import asyncio
30import json
31import os
32import secrets
33import sys
34from collections.abc import Mapping
35from datetime import UTC, datetime
36from pathlib import Path
37from typing import TYPE_CHECKING, Any, cast
39import click
41# The Mission package lives under ``mcp/mission/`` and is imported as
42# ``mission.*``. Match the path-injection pattern used throughout the
43# MCP module surface and the ``test_mission_*`` test files so the
44# imports below resolve regardless of how this module is loaded.
45sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "mcp"))
48if TYPE_CHECKING: # pragma: no cover - import only for type checkers
49 from mission.types import SessionState
52_FEATURE_FLAG_HINT = (
53 "Mission tools are gated. Set GCO_ENABLE_MISSION=true (or GCO_ENABLE_ALL_TOOLS=true) to enable."
54)
57def _flag_enabled() -> bool:
58 """Return True iff ``GCO_ENABLE_MISSION`` (or umbrella) is truthy."""
59 truthy = {"true", "1", "yes", "on"}
60 return (
61 os.environ.get("GCO_ENABLE_MISSION", "").strip().lower() in truthy
62 or os.environ.get("GCO_ENABLE_ALL_TOOLS", "").strip().lower() in truthy
63 )
66def _check_feature_flag() -> None:
67 """Print the hint and exit with code 2 when the gating flag is unset."""
68 if not _flag_enabled():
69 click.echo(_FEATURE_FLAG_HINT, err=True)
70 raise SystemExit(2)
73# ---------------------------------------------------------------------------
74# Output helpers
75# ---------------------------------------------------------------------------
78def _strip_private_criteria(session: Mapping[str, Any]) -> dict[str, Any]:
79 """Return a JSON-safe copy of ``session`` with private criterion keys dropped.
81 Thin alias over :func:`mission.validation.strip_private_fields` —
82 the canonical implementation lives next to ``validate_criteria``
83 (which creates the ``_parsed_ast`` keys). Kept under the older
84 ``_strip_private_criteria`` name so the call sites in this file
85 don't churn while the underlying logic is consolidated.
86 """
87 from mission.validation import strip_private_fields # noqa: PLC0415
89 return strip_private_fields(session)
92def _strip_iteration(iteration: Any) -> Any:
93 """Strip private keys from an iteration's ``criteria_evaluation`` shape.
95 Thin alias over the iteration variant of the canonical helper.
96 Returns non-dict input verbatim so a corrupt history entry stays
97 observable to the caller.
98 """
99 if not isinstance(iteration, Mapping): 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true
100 return iteration
101 from mission.validation import strip_private_fields_iterations # noqa: PLC0415
103 return strip_private_fields_iterations([iteration])[0]
106def _emit_json(payload: Any, *, err: bool = False) -> None:
107 """Emit ``payload`` as a single JSON line.
109 ``default=str`` keeps any straggling datetime / Path objects from
110 raising — the engine's persisted shapes are already pure JSON, but
111 a CLI command may surface a partially-built dict (e.g., the start
112 summary before save) and we want every output path to succeed.
113 """
114 click.echo(json.dumps(payload, default=str), err=err)
117def _emit_error(code: str, details: dict[str, Any] | None = None) -> None:
118 """Emit a structured error envelope to stderr."""
119 payload: dict[str, Any] = {"code": code}
120 if details is not None: 120 ↛ 122line 120 didn't jump to line 122 because the condition on line 120 was always true
121 payload["details"] = details
122 _emit_json(payload, err=True)
125# ---------------------------------------------------------------------------
126# Stub dispatcher
127# ---------------------------------------------------------------------------
130def _make_stub_dispatcher() -> Any:
131 """Return a tool dispatcher that returns canned responses.
133 Thin wrapper around :func:`mcp.mission._engine_factory.make_stub_dispatcher`
134 kept for backward compat with the small set of tests that import
135 this name directly. Production paths now go through
136 :func:`_build_engine` which decides between the live FastMCP
137 dispatcher and this stub based on ``--dry-run`` opt-in.
138 """
139 from mission._engine_factory import make_stub_dispatcher # noqa: PLC0415
141 return make_stub_dispatcher()
144# ---------------------------------------------------------------------------
145# Click group
146# ---------------------------------------------------------------------------
149@click.group("mission")
150def mission_cmd() -> None:
151 """Mission goal-directed iteration loop commands.
153 Subcommands manage Mission sessions: ``start``, ``status``,
154 ``iterate``, ``checkpoint``, ``complete``, ``abort``, ``resume``,
155 ``history``, ``list``.
157 Gated by the ``GCO_ENABLE_MISSION`` environment variable. With
158 the flag unset, every subcommand prints a one-line hint to stderr
159 and exits with code 2.
160 """
161 _check_feature_flag()
164# ---------------------------------------------------------------------------
165# start
166# ---------------------------------------------------------------------------
169@mission_cmd.command("start")
170@click.option("--directive", required=True, help="Natural-language goal description.")
171@click.option(
172 "--criteria-file",
173 type=click.Path(exists=True, dir_okay=False),
174 default=None,
175 help="JSON file containing the criteria list. Required unless --with-defaults is set.",
176)
177@click.option(
178 "--max-iterations",
179 type=int,
180 required=True,
181 help="Hard cap on the iteration count. Pass -1 to opt out (uncapped).",
182)
183@click.option(
184 "--max-wall-clock",
185 type=int,
186 required=True,
187 help="Hard cap on wall-clock seconds. Pass -1 to opt out (uncapped).",
188)
189@click.option(
190 "--tool-allowlist",
191 multiple=True,
192 help="Tool name to allowlist; pass multiple times. Optional with --allow-all-tools.",
193)
194@click.option(
195 "--allow-all-tools",
196 is_flag=True,
197 help=(
198 "Resolve the session's tool allowlist to every registered MCP tool "
199 "(minus the mission_* control tools). Makes --tool-allowlist optional; "
200 "mutually exclusive with it."
201 ),
202)
203@click.option(
204 "--cadence",
205 type=click.Choice(["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]),
206 default="every_iteration",
207 show_default=True,
208 help="Checkpoint cadence kind.",
209)
210@click.option("--cadence-n", type=int, default=None, help="Cadence n parameter.")
211@click.option(
212 "--cadence-t",
213 type=int,
214 default=None,
215 help="Cadence t parameter (seconds).",
216)
217@click.option(
218 "--cadence-event",
219 default=None,
220 help="Cadence event_name parameter.",
221)
222@click.option(
223 "--stagnation-threshold",
224 type=int,
225 default=3,
226 show_default=True,
227 help="Iterations of no progress before terminate.",
228)
229@click.option(
230 "--use-sampling/--no-sampling",
231 "use_sampling",
232 default=None,
233 help="Enable/disable LLM sampling (default: auto-detect).",
234)
235@click.option(
236 "--bedrock-model-id",
237 default=None,
238 help="Override the Bedrock model id used by the CLI sampling backend.",
239)
240@click.option(
241 "--allow-scripted-strategies",
242 is_flag=True,
243 help="Allow scripted strategies to run via the Mission sandbox.",
244)
245@click.option(
246 "--with-defaults",
247 is_flag=True,
248 help="Use a basic placeholder predicate criterion when no --criteria-file is provided.",
249)
250@click.option(
251 "--run",
252 "run_mode",
253 is_flag=True,
254 help="Iterate to completion synchronously after creating the session.",
255)
256@click.option(
257 "--dry-run",
258 "dry_run",
259 is_flag=True,
260 help=(
261 "Use a stub tool dispatcher and disable Strategy_Revision sampling "
262 "during iteration. Useful for smoke-testing the loop bookkeeping "
263 "without spending Bedrock or AWS credits. Only meaningful with --run."
264 ),
265)
266@click.option(
267 "--output",
268 type=click.Choice(["json", "table"]),
269 default="json",
270 show_default=True,
271 help="Output format.",
272)
273def mission_start(
274 directive: str,
275 criteria_file: str | None,
276 max_iterations: int,
277 max_wall_clock: int,
278 tool_allowlist: tuple[str, ...],
279 allow_all_tools: bool,
280 cadence: str,
281 cadence_n: int | None,
282 cadence_t: int | None,
283 cadence_event: str | None,
284 stagnation_threshold: int,
285 use_sampling: bool | None,
286 bedrock_model_id: str | None,
287 allow_scripted_strategies: bool,
288 with_defaults: bool,
289 run_mode: bool,
290 dry_run: bool,
291 output: str,
292) -> None:
293 """Start a new Mission session.
295 Validates inputs through the shared validators in
296 ``mission.validation``, resolves the sampling state via
297 ``mission.sampling.resolve_sampling_state``, and persists the
298 session through the configured backend (``GCO_MISSION_STATE_BACKEND``,
299 defaults to filesystem under ``~/.gco/missions``).
301 With ``--run``, iterates to completion synchronously: each verdict
302 is printed as one JSON line to stderr; the final stdout is the
303 Final_Report JSON.
304 """
305 from mission import ( # noqa: PLC0415 — lazy: avoids cost when help-only
306 sampling as mission_sampling,
307 )
308 from mission import (
309 state as mission_state,
310 )
311 from mission import (
312 validation as mission_validation,
313 )
314 from mission.types import SCHEMA_VERSION
315 from mission.validation import MissionValidationError
317 # Build the criteria list from the file or the placeholder default.
318 criteria: list[dict[str, Any]]
319 if criteria_file:
320 try:
321 with open(criteria_file, encoding="utf-8") as fp:
322 criteria = json.load(fp)
323 except (OSError, ValueError) as exc:
324 _emit_error(
325 "validation_error",
326 {"field": "criteria-file", "reason": str(exc)},
327 )
328 sys.exit(1)
329 elif with_defaults:
330 criteria = [
331 {
332 "criterion_id": "default",
333 "kind": "predicate",
334 "required": True,
335 "expression": "True",
336 }
337 ]
338 else:
339 _emit_error(
340 "validation_error",
341 {
342 "field": "criteria",
343 "reason": "either --criteria-file or --with-defaults is required",
344 },
345 )
346 sys.exit(1)
348 # Build the budget dict.
349 budget: dict[str, Any] = {
350 "max_iterations": max_iterations,
351 "max_wall_clock_seconds": max_wall_clock,
352 }
354 # Build the cadence dict.
355 cadence_dict: dict[str, Any] = {"kind": cadence}
356 if cadence_n is not None: 356 ↛ 357line 356 didn't jump to line 357 because the condition on line 356 was never true
357 cadence_dict["n"] = cadence_n
358 if cadence_t is not None: 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true
359 cadence_dict["t"] = cadence_t
360 if cadence_event is not None: 360 ↛ 361line 360 didn't jump to line 361 because the condition on line 360 was never true
361 cadence_dict["event_name"] = cadence_event
363 # Resolve the effective allowlist before any persistence. The explicit
364 # path keeps the thin CLI behaviour (no live-registry per-name check); the
365 # all-tools path resolves from the on-demand registry. A rejection here
366 # emits a structured envelope and exits before any session is built.
367 allowlist_resolved = _resolve_cli_allowlist(
368 allow_all_tools=allow_all_tools, tool_allowlist=tool_allowlist
369 )
371 # Validate inputs. The CLI has no live FastMCP tool registry on the
372 # explicit path, so the tool-allowlist validator is skipped and the
373 # budget validator gets an empty tag map — meaning a CLI-started session
374 # with a cost-incurring tool will only be caught at iterate time when the
375 # engine routes through the real tool dispatcher. The MCP tool surface
376 # performs the full validation; the CLI is intentionally a thin
377 # smoke-test path.
378 try:
379 directive_clean = mission_validation.validate_directive(directive)
380 criteria_clean = mission_validation.validate_criteria(criteria)
381 budget_clean = mission_validation.validate_budget(budget, allowlist_resolved, {})
382 cadence_clean = mission_validation.validate_cadence(cadence_dict)
383 except MissionValidationError as exc:
384 _emit_error(exc.code, exc.details)
385 sys.exit(1)
387 if not isinstance(stagnation_threshold, int) or stagnation_threshold <= 0:
388 _emit_error(
389 "validation_error",
390 {"field": "stagnation-threshold", "reason": "must_be_positive_int"},
391 )
392 sys.exit(1)
394 # Resolve sampling state. ``ctx=None`` because this is the CLI path;
395 # the helper's third precedence branch then probes local AWS
396 # credentials and returns ``("bedrock", True)`` when they resolve.
397 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state(
398 None, use_sampling
399 )
401 session_id = f"mission-{secrets.token_hex(8)}"
402 now_iso = datetime.now(UTC).isoformat()
403 session: dict[str, Any] = {
404 "version": SCHEMA_VERSION,
405 "session_id": session_id,
406 "directive_text": directive_clean,
407 "criteria": criteria_clean,
408 "budget": budget_clean,
409 "tool_allowlist": allowlist_resolved,
410 "checkpoint_cadence": cadence_clean,
411 "stagnation_threshold": stagnation_threshold,
412 "use_sampling": use_sampling_resolved,
413 "sampling_backend_resolved": backend_resolved,
414 "allow_scripted_strategies": bool(allow_scripted_strategies),
415 "status": "pending",
416 "created_at": now_iso,
417 "iterations": [],
418 "no_progress_counter": 0,
419 }
420 if bedrock_model_id: 420 ↛ 421line 420 didn't jump to line 421 because the condition on line 420 was never true
421 session["bedrock_model_id"] = bedrock_model_id
423 backend = mission_state.get_backend()
425 # ``save_session`` will not accept the cached ``_parsed_ast`` AST on
426 # predicate criteria when the backend is the filesystem JSON writer.
427 # Strip them just before persistence; the validators left them on
428 # the in-memory copy so the engine can use them at iterate time —
429 # we'll re-validate when iterate next runs against the loaded
430 # session.
431 backend.save_session(cast("SessionState", _strip_private_criteria(session)))
433 summary = {
434 "session_id": session_id,
435 "status": "pending",
436 "use_sampling": use_sampling_resolved,
437 "sampling_backend_resolved": backend_resolved,
438 }
440 if not run_mode:
441 if output == "table":
442 click.echo(f"Session ID: {session_id}")
443 click.echo("Status: pending")
444 click.echo(
445 f"Sampling: {'on' if use_sampling_resolved else 'off'} ({backend_resolved})"
446 )
447 else:
448 _emit_json(summary)
449 return
451 # --run mode: iterate to completion.
452 _run_to_completion(session_id, dry_run=dry_run)
455def _run_to_completion(session_id: str, *, dry_run: bool = False) -> None:
456 """Drive ``session_id`` through iterations until terminal verdict.
458 When ``dry_run`` is False (the default), wires the live FastMCP
459 dispatcher and the Strategy_Revision sampling callable through
460 :func:`mcp.mission._engine_factory.build_mission_engine` so the
461 loop can actually iterate against real tools and let the model
462 revise the strategy between iterations. When ``dry_run`` is True,
463 falls back to the canned-stub dispatcher and disables sampling so
464 the CLI can smoke-test the loop bookkeeping without spending
465 Bedrock or AWS credits.
467 Writes one JSON line per iteration's verdict to stderr; the final
468 stdout is the Final_Report JSON when present, falling back to the
469 persisted session JSON otherwise.
470 """
471 from mission import state as mission_state # noqa: PLC0415
472 from mission._engine_factory import build_mission_engine # noqa: PLC0415
473 from mission.engine import MissionEngineError # noqa: PLC0415
474 from mission.state import FilesystemBackend # noqa: PLC0415
476 backend = mission_state.get_backend()
477 session_for_runner = backend.load_session(session_id)
478 if session_for_runner is None: 478 ↛ 479line 478 didn't jump to line 479 because the condition on line 478 was never true
479 _emit_error("session_not_found", {"session_id": session_id})
480 sys.exit(1)
482 # Populate the FastMCP tool registry so the live dispatcher can
483 # find the operator-allowlisted tools. Safe to call repeatedly —
484 # ``register_all_tools`` is idempotent (FastMCP rejects duplicate
485 # registrations after the first call). Skipped on the dry-run path
486 # because the stub dispatcher never consults the registry.
487 if not dry_run:
488 _ensure_tool_registry()
490 async def _drive() -> None:
491 engine = await build_mission_engine(
492 session_for_runner, ctx=None, use_stub_dispatcher=dry_run
493 )
494 while True:
495 try:
496 record = await engine.run_iteration(session_id, ctx=None)
497 except MissionEngineError as exc:
498 _emit_error(exc.code, {"session_id": session_id})
499 sys.exit(1)
500 _emit_json(
501 {
502 "iteration_index": record["iteration_index"],
503 "verdict": record["verdict"],
504 "verdict_reason": record["verdict_reason"],
505 },
506 err=True,
507 )
508 if record["verdict"] in ("complete", "terminate"): 508 ↛ 494line 508 didn't jump to line 494 because the condition on line 508 was always true
509 break
511 asyncio.run(_drive())
513 # Emit the final report when the filesystem backend wrote one;
514 # fall back to the persisted session for other backends.
515 session = backend.load_session(session_id)
516 if isinstance(backend, FilesystemBackend): 516 ↛ 521line 516 didn't jump to line 521 because the condition on line 516 was always true
517 report_path = backend.root / f"{session_id}.report.json"
518 if report_path.exists(): 518 ↛ 521line 518 didn't jump to line 521 because the condition on line 518 was always true
519 click.echo(report_path.read_text(encoding="utf-8"))
520 return
521 if session is not None:
522 _emit_json(_strip_private_criteria(session))
523 else:
524 _emit_error("session_disappeared", {"session_id": session_id})
525 sys.exit(1)
528def _ensure_tool_registry() -> None:
529 """Register every MCP tool against the shared FastMCP server, once.
531 The CLI doesn't normally boot the MCP server, so its FastMCP
532 instance starts empty. The live tool dispatcher in the engine
533 factory looks up tools on that instance, so we eagerly register
534 every tool group up-front when the live path is selected. The
535 underlying ``register_all_tools`` is import-time side-effects on
536 module load; calling it twice is harmless because the per-module
537 decorators only fire on the first import.
538 """
539 sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "mcp"))
540 from tools import register_all_tools # noqa: PLC0415
542 register_all_tools()
545def _resolve_registered_tools_for_cli() -> tuple[dict[str, Any], set[str]]:
546 """Register every MCP tool on demand and snapshot the live registry.
548 Returns a ``(name -> Tool, control-tool names)`` pair. The control set is
549 derived from the ``"mission"`` tag, so it auto-adapts if a tenth
550 session-management tool is ever added. Calls the idempotent
551 :func:`_ensure_tool_registry` first, then lists tools through
552 ``mcp._list_tools()`` — the same low-level path the engine factory uses.
553 Returns ``({}, set())`` only when the registry genuinely holds no tools,
554 which the resolver then rejects as ``allow_all_tools_empty_registry``.
555 """
556 _ensure_tool_registry()
557 from server import mcp # noqa: PLC0415 — lazy
559 async def _list() -> list[Any]:
560 return list(await mcp._list_tools())
562 tools = asyncio.run(_list())
563 registered = {t.name: t for t in tools}
564 control = {t.name for t in tools if "mission" in (getattr(t, "tags", None) or set())}
565 return registered, control
568def _resolve_cli_allowlist(*, allow_all_tools: bool, tool_allowlist: tuple[str, ...]) -> list[str]:
569 """Resolve a subcommand's effective tool allowlist or exit with code 1.
571 The all-tools branch populates the registry on demand and resolves the
572 effective list from it. The explicit branch preserves the thin CLI path
573 (no per-name registry check) but enforces at-least-one, emitting the
574 existing ``empty`` rejection when no name is supplied. On any
575 :class:`MissionValidationError` the structured envelope is emitted and the
576 process exits 1 — before the caller builds or persists a session.
577 """
578 from mission import validation as mission_validation # noqa: PLC0415
579 from mission.validation import MissionValidationError # noqa: PLC0415
581 if allow_all_tools:
582 registered_tools, control_tools = _resolve_registered_tools_for_cli()
583 try:
584 resolved: list[str] = mission_validation.resolve_effective_allowlist(
585 allow_all_tools=True,
586 explicit_allowlist=list(tool_allowlist),
587 registered_tools=registered_tools,
588 control_tools=control_tools,
589 )
590 except MissionValidationError as exc:
591 _emit_error(exc.code, exc.details)
592 sys.exit(1)
593 return resolved
594 if not tool_allowlist:
595 _emit_error("validation_error", {"field": "tool_allowlist", "reason": "empty"})
596 sys.exit(1)
597 return list(tool_allowlist)
600# ---------------------------------------------------------------------------
601# status
602# ---------------------------------------------------------------------------
605@mission_cmd.command("status")
606@click.argument("session_id")
607@click.option(
608 "--output",
609 type=click.Choice(["json", "table"]),
610 default="json",
611 show_default=True,
612)
613def mission_status_cmd(session_id: str, output: str) -> None:
614 """Get the full state of a Mission session."""
615 from mission.state import get_backend # noqa: PLC0415
617 backend = get_backend()
618 session = backend.load_session(session_id)
619 if session is None:
620 _emit_error("session_not_found", {"session_id": session_id})
621 sys.exit(1)
622 cleaned = _strip_private_criteria(session)
623 if output == "table": 623 ↛ 635line 623 didn't jump to line 635 because the condition on line 623 was always true
624 click.echo(f"Session ID: {cleaned.get('session_id', '')}")
625 click.echo(f"Status: {cleaned.get('status', '')}")
626 click.echo(f"Directive: {cleaned.get('directive_text', '')}")
627 click.echo(f"Iterations: {len(cleaned.get('iterations', []) or [])}")
628 allowlist = cleaned.get("tool_allowlist", []) or []
629 click.echo(f"Allowlist: {', '.join(allowlist)}")
630 click.echo(
631 f"Sampling: {'on' if cleaned.get('use_sampling') else 'off'} "
632 f"({cleaned.get('sampling_backend_resolved', 'none')})"
633 )
634 else:
635 _emit_json(cleaned)
638# ---------------------------------------------------------------------------
639# iterate
640# ---------------------------------------------------------------------------
643@mission_cmd.command("iterate")
644@click.argument("session_id")
645@click.option(
646 "--max-iterations",
647 type=int,
648 default=1,
649 show_default=True,
650 help="How many iterations to run in this call.",
651)
652@click.option(
653 "--dry-run",
654 "dry_run",
655 is_flag=True,
656 help=(
657 "Use a stub tool dispatcher and disable Strategy_Revision sampling. "
658 "Useful for smoke-testing the loop without spending Bedrock or AWS credits."
659 ),
660)
661@click.option(
662 "--output",
663 type=click.Choice(["json", "table"]),
664 default="json",
665 show_default=True,
666)
667def mission_iterate_cmd(session_id: str, max_iterations: int, dry_run: bool, output: str) -> None:
668 """Run one or more iterations on a Mission session.
670 Stops early on a terminal verdict. By default the engine is wired
671 with the live FastMCP tool dispatcher and the Strategy_Revision
672 sampling callable so the loop iterates against real tool results
673 and lets the model revise the strategy between iterations.
675 Pass ``--dry-run`` to substitute the canned-stub dispatcher and
676 disable sampling — useful for smoke-testing the bookkeeping
677 without spending Bedrock or AWS credits.
678 """
679 from mission._engine_factory import build_mission_engine # noqa: PLC0415
680 from mission.engine import MissionEngineError # noqa: PLC0415
681 from mission.state import get_backend # noqa: PLC0415
683 if max_iterations <= 0:
684 # This is the per-call iteration count (how many iterations to
685 # run THIS call), NOT the session-wide ``budget.max_iterations``
686 # cap. The budget cap accepts ``-1`` as the "uncapped" sentinel;
687 # this per-call count must always be a positive int because a
688 # zero or negative value here would be a no-op invocation.
689 _emit_error(
690 "validation_error",
691 {"field": "max-iterations", "reason": "must_be_positive_int"},
692 )
693 sys.exit(1)
695 backend = get_backend()
696 session_for_runner = backend.load_session(session_id)
697 if session_for_runner is None:
698 _emit_error("session_not_found", {"session_id": session_id})
699 sys.exit(1)
701 if not dry_run:
702 _ensure_tool_registry()
704 async def _drive() -> dict[str, Any]:
705 engine = await build_mission_engine(
706 session_for_runner, ctx=None, use_stub_dispatcher=dry_run
707 )
708 records: list[dict[str, Any]] = []
709 for _ in range(max_iterations):
710 try:
711 record = await engine.run_iteration(session_id, ctx=None)
712 except MissionEngineError as exc:
713 return {
714 "session_id": session_id,
715 "error": {"code": exc.code},
716 "iterations": records,
717 }
718 records.append(
719 {
720 "iteration_index": record["iteration_index"],
721 "verdict": record["verdict"],
722 "verdict_reason": record["verdict_reason"],
723 }
724 )
725 if record["verdict"] in ("complete", "terminate"): 725 ↛ 726line 725 didn't jump to line 726 because the condition on line 725 was never true
726 break
727 return {"session_id": session_id, "iterations": records}
729 result = asyncio.run(_drive())
731 if "error" in result: 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true
732 _emit_error(result["error"]["code"], {"session_id": session_id})
733 sys.exit(1)
735 if output == "table":
736 for it in result.get("iterations", []):
737 click.echo(
738 f" Iteration {it['iteration_index']}: {it['verdict']} ({it['verdict_reason']})"
739 )
740 else:
741 _emit_json(result)
744# ---------------------------------------------------------------------------
745# checkpoint
746# ---------------------------------------------------------------------------
749@mission_cmd.command("checkpoint")
750@click.argument("session_id")
751@click.option(
752 "--output",
753 type=click.Choice(["json", "table"]),
754 default="json",
755 show_default=True,
756)
757def mission_checkpoint_cmd(session_id: str, output: str) -> None:
758 """Re-run the verdict cascade on the latest iteration of a session."""
759 from mission.decide import decide_verdict # noqa: PLC0415
760 from mission.state import get_backend # noqa: PLC0415
762 backend = get_backend()
763 session = backend.load_session(session_id)
764 if session is None:
765 _emit_error("session_not_found", {"session_id": session_id})
766 sys.exit(1)
767 iterations = session.get("iterations") or []
768 if not iterations:
769 _emit_error("no_iterations", {"session_id": session_id})
770 sys.exit(1)
771 latest = iterations[-1]
772 verdict, reason = decide_verdict(session, latest, datetime.now(UTC))
773 payload = {
774 "session_id": session_id,
775 "iteration_index": latest.get("iteration_index"),
776 "verdict": verdict,
777 "verdict_reason": reason,
778 }
779 if output == "table": 779 ↛ 782line 779 didn't jump to line 782 because the condition on line 779 was always true
780 click.echo(f"Iteration {payload['iteration_index']}: {verdict} ({reason})")
781 else:
782 _emit_json(payload)
785# ---------------------------------------------------------------------------
786# complete
787# ---------------------------------------------------------------------------
790@mission_cmd.command("complete")
791@click.argument("session_id")
792@click.option(
793 "--output",
794 type=click.Choice(["json", "table"]),
795 default="json",
796 show_default=True,
797)
798def mission_complete_cmd(session_id: str, output: str) -> None:
799 """Force a Mission session into ``completed`` status."""
800 from mission.state import get_backend # noqa: PLC0415
801 from mission.types import TERMINAL_STATES # noqa: PLC0415
803 backend = get_backend()
804 session = backend.load_session(session_id)
805 if session is None:
806 _emit_error("session_not_found", {"session_id": session_id})
807 sys.exit(1)
808 if session["status"] in TERMINAL_STATES:
809 _emit_error(
810 "session_terminal",
811 {"session_id": session_id, "status": session["status"]},
812 )
813 sys.exit(1)
814 now_iso = datetime.now(UTC).isoformat()
815 session["status"] = "completed"
816 session["final_verdict"] = "complete"
817 session["ended_at"] = now_iso
818 backend.save_session(cast("SessionState", _strip_private_criteria(session)))
819 payload = {
820 "session_id": session_id,
821 "status": "completed",
822 "final_verdict": "complete",
823 }
824 if output == "table":
825 click.echo(f"Session {session_id}: completed (forced)")
826 else:
827 _emit_json(payload)
830# ---------------------------------------------------------------------------
831# abort
832# ---------------------------------------------------------------------------
835@mission_cmd.command("abort")
836@click.argument("session_id")
837@click.option("--pause", is_flag=True, help="Pause the session instead of terminating.")
838@click.option(
839 "--output",
840 type=click.Choice(["json", "table"]),
841 default="json",
842 show_default=True,
843)
844def mission_abort_cmd(session_id: str, pause: bool, output: str) -> None:
845 """Pause or terminate a Mission session.
847 With ``--pause``, transitions the session to ``paused`` (resumable).
848 Without ``--pause``, transitions to ``terminated`` and stamps the
849 final verdict.
850 """
851 from mission.state import get_backend # noqa: PLC0415
852 from mission.types import TERMINAL_STATES # noqa: PLC0415
854 backend = get_backend()
855 session = backend.load_session(session_id)
856 if session is None:
857 _emit_error("session_not_found", {"session_id": session_id})
858 sys.exit(1)
859 if session["status"] in TERMINAL_STATES:
860 _emit_error(
861 "session_terminal",
862 {"session_id": session_id, "status": session["status"]},
863 )
864 sys.exit(1)
865 if pause:
866 session["status"] = "paused"
867 else:
868 now_iso = datetime.now(UTC).isoformat()
869 session["status"] = "terminated"
870 session["final_verdict"] = "terminate"
871 session["ended_at"] = now_iso
872 backend.save_session(cast("SessionState", _strip_private_criteria(session)))
873 payload = {"session_id": session_id, "status": session["status"]}
874 if output == "table":
875 click.echo(f"Session {session_id}: {session['status']}")
876 else:
877 _emit_json(payload)
880# ---------------------------------------------------------------------------
881# resume
882# ---------------------------------------------------------------------------
885@mission_cmd.command("resume")
886@click.argument("session_id")
887@click.option(
888 "--output",
889 type=click.Choice(["json", "table"]),
890 default="json",
891 show_default=True,
892)
893def mission_resume_cmd(session_id: str, output: str) -> None:
894 """Resume a paused Mission session."""
895 from mission.state import get_backend # noqa: PLC0415
897 backend = get_backend()
898 session = backend.load_session(session_id)
899 if session is None:
900 _emit_error("session_not_found", {"session_id": session_id})
901 sys.exit(1)
902 if session["status"] != "paused":
903 _emit_error(
904 "invalid_state",
905 {"session_id": session_id, "status": session["status"]},
906 )
907 sys.exit(1)
908 session["status"] = "running"
909 backend.save_session(cast("SessionState", _strip_private_criteria(session)))
910 payload = {"session_id": session_id, "status": "running"}
911 if output == "table": 911 ↛ 914line 911 didn't jump to line 914 because the condition on line 911 was always true
912 click.echo(f"Session {session_id}: running")
913 else:
914 _emit_json(payload)
917# ---------------------------------------------------------------------------
918# history
919# ---------------------------------------------------------------------------
922@mission_cmd.command("history")
923@click.argument("session_id")
924@click.option(
925 "--format",
926 "fmt",
927 type=click.Choice(["full", "summary"]),
928 default="summary",
929 show_default=True,
930 help="Iteration history detail level.",
931)
932@click.option(
933 "--include-observations",
934 "include_obs",
935 is_flag=True,
936 help=(
937 "Include the observation and strategy dicts in each iteration's "
938 "output. Only meaningful with --format full. Useful for debugging "
939 "what each tool returned and what strategy was proposed."
940 ),
941)
942@click.option(
943 "--output",
944 type=click.Choice(["json", "table"]),
945 default="json",
946 show_default=True,
947)
948def mission_history_cmd(session_id: str, fmt: str, include_obs: bool, output: str) -> None:
949 """Get the iteration history of a Mission session."""
950 from mission.state import get_backend # noqa: PLC0415
952 backend = get_backend()
953 session = backend.load_session(session_id)
954 if session is None:
955 _emit_error("session_not_found", {"session_id": session_id})
956 sys.exit(1)
957 iterations = session.get("iterations") or []
959 if fmt == "full":
960 cleaned = [_strip_iteration(it) for it in iterations]
961 if not include_obs:
962 # Strip observation and strategy from the output to keep it
963 # concise. Operators who need the full shape pass
964 # --include-observations.
965 for it in cleaned:
966 if isinstance(it, dict): 966 ↛ 965line 966 didn't jump to line 965 because the condition on line 966 was always true
967 it.pop("observation", None)
968 it.pop("strategy", None)
969 if output == "table":
970 for it in cleaned:
971 if not isinstance(it, dict): 971 ↛ 972line 971 didn't jump to line 972 because the condition on line 971 was never true
972 continue
973 idx = it.get("iteration_index", "?")
974 verdict = it.get("verdict", "?")
975 reason = it.get("verdict_reason", "?")
976 click.echo(f" Iteration {idx}: {verdict} ({reason})")
977 if include_obs:
978 obs = it.get("observation", {})
979 results = obs.get("tool_results", [])
980 errors = obs.get("errors", [])
981 strat = it.get("strategy", {})
982 rationale = strat.get("rationale", "")[:100]
983 calls = strat.get("tool_calls", [])
984 tool_names = [c.get("tool_name", "?") for c in calls if isinstance(c, dict)]
985 click.echo(f" tools: {tool_names}")
986 click.echo(f" rationale: {rationale}")
987 click.echo(f" tool_results: {len(results)} entries, errors: {len(errors)}")
988 else:
989 _emit_json({"session_id": session_id, "iterations": cleaned})
990 return
992 summaries = [
993 {
994 "iteration_index": it.get("iteration_index"),
995 "verdict": it.get("verdict"),
996 "verdict_reason": it.get("verdict_reason"),
997 "started_at": it.get("started_at"),
998 "ended_at": it.get("ended_at"),
999 "checkpoint_evaluated": it.get("checkpoint_evaluated", False),
1000 }
1001 for it in iterations
1002 ]
1003 if output == "table": 1003 ↛ 1009line 1003 didn't jump to line 1009 because the condition on line 1003 was always true
1004 for s in summaries:
1005 click.echo(
1006 f" Iteration {s['iteration_index']}: {s['verdict']} ({s['verdict_reason']})"
1007 )
1008 else:
1009 _emit_json({"session_id": session_id, "iterations": summaries})
1012# ---------------------------------------------------------------------------
1013# list
1014# ---------------------------------------------------------------------------
1017@mission_cmd.command("list")
1018@click.option(
1019 "--status",
1020 default=None,
1021 help="Filter sessions by status (pending, running, paused, ...).",
1022)
1023@click.option(
1024 "--output",
1025 type=click.Choice(["json", "table"]),
1026 default="json",
1027 show_default=True,
1028)
1029def mission_list_cmd(status: str | None, output: str) -> None:
1030 """List Mission sessions."""
1031 from mission.state import get_backend # noqa: PLC0415
1033 backend = get_backend()
1034 filter_dict = {"status": status} if status else None
1035 sessions = backend.list_sessions(filter_dict)
1037 if output == "table":
1038 header = f" {'SESSION ID':<40} {'STATUS':<11} {'ITER':>5} CREATED"
1039 click.echo(header)
1040 click.echo(" " + "-" * (len(header) - 2))
1041 for s in sessions:
1042 sid = (s.get("session_id") or "")[:40]
1043 st = (s.get("status") or "")[:11]
1044 it = s.get("iteration_count", 0)
1045 ca = (s.get("created_at") or "")[:19]
1046 click.echo(f" {sid:<40} {st:<11} {it:>5} {ca}")
1047 else:
1048 _emit_json({"sessions": sessions})
1051# ---------------------------------------------------------------------------
1052# scaffold-criteria
1053# ---------------------------------------------------------------------------
1056@mission_cmd.command("scaffold-criteria")
1057@click.option(
1058 "--directive",
1059 required=True,
1060 help="Natural-language goal description used to seed the criteria.",
1061)
1062@click.option(
1063 "--allowlist",
1064 "allowlist",
1065 multiple=True,
1066 help=(
1067 "Optional tool names that the resulting session would be "
1068 "configured with. Used informationally on the deterministic "
1069 "path; on the sampling path, shapes the prompt so the model "
1070 "picks metric/event names plausibly produced by the listed tools."
1071 ),
1072)
1073@click.option(
1074 "--use-sampling/--no-sampling",
1075 "use_sampling",
1076 default=None,
1077 help=(
1078 "Force the sampling path on/off. Default auto-detects: MCP "
1079 "host capability, then Bedrock credentials, then deterministic."
1080 ),
1081)
1082@click.option(
1083 "--bedrock-model-id",
1084 default=None,
1085 help="Override the Bedrock model id used by the CLI sampling backend.",
1086)
1087@click.option(
1088 "--max-criteria",
1089 type=int,
1090 default=5,
1091 show_default=True,
1092 help="Cap on the number of criterion entries scaffolded.",
1093)
1094@click.option(
1095 "--retries",
1096 type=int,
1097 default=3,
1098 show_default=True,
1099 help="Sampling-path retry budget on validator rejections.",
1100)
1101@click.option(
1102 "--output-file",
1103 "output_file",
1104 type=click.Path(dir_okay=False),
1105 default=None,
1106 help="Write the JSON to this file instead of stdout.",
1107)
1108@click.option(
1109 "--output",
1110 type=click.Choice(["json", "table"]),
1111 default="json",
1112 show_default=True,
1113 help="Output format (table mode prints a per-entry summary alongside the JSON).",
1114)
1115def mission_scaffold_criteria_cmd(
1116 directive: str,
1117 allowlist: tuple[str, ...],
1118 use_sampling: bool | None,
1119 bedrock_model_id: str | None,
1120 max_criteria: int,
1121 retries: int,
1122 output_file: str | None,
1123 output: str,
1124) -> None:
1125 """Scaffold a criteria.json from a natural-language directive.
1127 Resolves the sampling state via ``mission.sampling.resolve_sampling_state``;
1128 when a backend resolves and ``--use-sampling`` permits, the resolved
1129 backend is asked for a JSON array. The response is validated through
1130 ``validate_criteria`` and retried up to ``--retries`` times on
1131 rejection. Falls back to the deterministic keyword-template
1132 generator when sampling is unavailable, disabled, or after the
1133 retry budget is exhausted.
1135 The output always validates through ``validate_criteria`` so the
1136 resulting file is immediately usable with ``mission start
1137 --criteria-file``.
1138 """
1139 from mission import ( # noqa: PLC0415 — lazy: avoids cost when help-only
1140 criteria_scaffold,
1141 )
1142 from mission import (
1143 sampling as mission_sampling,
1144 )
1146 if max_criteria < 1:
1147 _emit_error(
1148 "validation_error",
1149 {"field": "max-criteria", "reason": "must_be_positive_int"},
1150 )
1151 sys.exit(1)
1152 if retries < 0:
1153 _emit_error(
1154 "validation_error",
1155 {"field": "retries", "reason": "must_be_non_negative_int"},
1156 )
1157 sys.exit(1)
1159 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state(
1160 None, use_sampling
1161 )
1163 criteria: list[dict[str, Any]] | None = None
1164 sampling_path_taken = False
1165 if use_sampling_resolved and backend_resolved != "none":
1166 backend_obj = mission_sampling.select_sampling_backend(
1167 None,
1168 model_id=bedrock_model_id,
1169 prefs=None,
1170 )
1171 if backend_obj is not None: 1171 ↛ 1194line 1171 didn't jump to line 1194 because the condition on line 1171 was always true
1172 try:
1173 criteria = asyncio.run(
1174 criteria_scaffold.generate_sampled_criteria(
1175 backend_obj,
1176 directive,
1177 allowlist=list(allowlist),
1178 max_criteria=max_criteria,
1179 retries=retries,
1180 )
1181 )
1182 sampling_path_taken = True
1183 except criteria_scaffold.ScaffoldSamplingError as exc:
1184 # The sampling path failed; emit a one-line warning to
1185 # stderr so the operator sees what happened, then fall
1186 # through to the deterministic generator.
1187 click.echo(
1188 f"sampling path failed ({exc.last_reason}); "
1189 "falling back to deterministic templates.",
1190 err=True,
1191 )
1192 criteria = None
1194 if criteria is None:
1195 criteria = criteria_scaffold.generate_deterministic_criteria(
1196 directive,
1197 allowlist=list(allowlist) or None,
1198 max_criteria=max_criteria,
1199 )
1201 payload = json.dumps(criteria, indent=2, sort_keys=False)
1203 if output_file:
1204 Path(output_file).write_text(payload + "\n", encoding="utf-8")
1205 # Echo a structured summary on the chosen format so the operator
1206 # can see what was written without re-reading the file.
1207 if output == "table": 1207 ↛ 1208line 1207 didn't jump to line 1208 because the condition on line 1207 was never true
1208 for c in criteria:
1209 click.echo(
1210 f" {c.get('criterion_id'):<32} "
1211 f"kind={c.get('kind'):<16} required={c.get('required')}"
1212 )
1213 click.echo(f" written to {output_file}")
1214 else:
1215 _emit_json(
1216 {
1217 "output_file": output_file,
1218 "criteria_count": len(criteria),
1219 "sampling_path": sampling_path_taken,
1220 }
1221 )
1222 return
1224 # No --output-file: write JSON to stdout.
1225 if output == "table":
1226 for c in criteria:
1227 click.echo(
1228 f" {c.get('criterion_id'):<32} "
1229 f"kind={c.get('kind'):<16} required={c.get('required')}"
1230 )
1231 return
1232 click.echo(payload)
1235# ---------------------------------------------------------------------------
1236# run — chain scaffold + start + iterate-to-completion in one call
1237# ---------------------------------------------------------------------------
1240@mission_cmd.command("run")
1241@click.option(
1242 "--directive",
1243 required=True,
1244 help="Natural-language goal description.",
1245)
1246@click.option(
1247 "--tool-allowlist",
1248 multiple=True,
1249 help="Tool name to allowlist; pass multiple times. Optional with --allow-all-tools.",
1250)
1251@click.option(
1252 "--allow-all-tools",
1253 is_flag=True,
1254 help=(
1255 "Resolve the session's tool allowlist to every registered MCP tool "
1256 "(minus the mission_* control tools). Makes --tool-allowlist optional; "
1257 "mutually exclusive with it."
1258 ),
1259)
1260@click.option(
1261 "--max-iterations",
1262 type=int,
1263 default=5,
1264 show_default=True,
1265 help="Hard cap on the iteration count. Pass -1 to opt out (uncapped).",
1266)
1267@click.option(
1268 "--max-wall-clock",
1269 type=int,
1270 default=300,
1271 show_default=True,
1272 help="Hard cap on wall-clock seconds. Pass -1 to opt out (uncapped).",
1273)
1274@click.option(
1275 "--max-criteria",
1276 type=int,
1277 default=5,
1278 show_default=True,
1279 help="Cap on the number of criterion entries scaffolded.",
1280)
1281@click.option(
1282 "--retries",
1283 type=int,
1284 default=3,
1285 show_default=True,
1286 help="Sampling-path retry budget on validator rejections during scaffolding.",
1287)
1288@click.option(
1289 "--use-sampling/--no-sampling",
1290 "use_sampling",
1291 default=None,
1292 help=(
1293 "Force the sampling path on/off for both the scaffolder and "
1294 "the loop's Strategy_Revision sampler. Default auto-detects: "
1295 "MCP host capability, then Bedrock credentials, then deterministic."
1296 ),
1297)
1298@click.option(
1299 "--bedrock-model-id",
1300 default=None,
1301 help="Override the Bedrock model id used by the CLI sampling backend.",
1302)
1303@click.option(
1304 "--allow-scripted-strategies",
1305 is_flag=True,
1306 help="Allow scripted strategies to run via the Mission sandbox.",
1307)
1308@click.option(
1309 "--save-criteria",
1310 "save_criteria",
1311 type=click.Path(dir_okay=False),
1312 default=None,
1313 help="Optional path to also persist the scaffolded criteria JSON to disk.",
1314)
1315@click.option(
1316 "--stagnation-threshold",
1317 type=int,
1318 default=3,
1319 show_default=True,
1320 help="Iterations of no progress before terminate.",
1321)
1322@click.option(
1323 "--cadence",
1324 type=click.Choice(["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]),
1325 default="every_iteration",
1326 show_default=True,
1327 help="Checkpoint cadence kind.",
1328)
1329@click.option(
1330 "--dry-run",
1331 "dry_run",
1332 is_flag=True,
1333 help=(
1334 "Use a stub tool dispatcher and disable Strategy_Revision sampling "
1335 "during iteration. The criteria scaffolder still runs through "
1336 "Bedrock when sampling is enabled. Useful for smoke-testing the "
1337 "loop without spending live tool credits."
1338 ),
1339)
1340def mission_run_cmd(
1341 directive: str,
1342 tool_allowlist: tuple[str, ...],
1343 allow_all_tools: bool,
1344 max_iterations: int,
1345 max_wall_clock: int,
1346 max_criteria: int,
1347 retries: int,
1348 use_sampling: bool | None,
1349 bedrock_model_id: str | None,
1350 allow_scripted_strategies: bool,
1351 save_criteria: str | None,
1352 stagnation_threshold: int,
1353 cadence: str,
1354 dry_run: bool,
1355) -> None:
1356 """Scaffold criteria and run a Mission session to completion in one call.
1358 The chained shorthand for the most common Mission invocation: turn
1359 a natural-language directive into a criteria file via
1360 ``scaffold-criteria`` (sampling path with deterministic fallback),
1361 persist a new session with ``start``'s validators, then drive it
1362 through ``run-to-completion`` with the same per-call verdict
1363 streaming as ``mission start --run``.
1365 Per-iteration verdict updates land on stderr as JSON lines; the
1366 Final_Report (or persisted session JSON when no Final_Report file
1367 was written) lands on stdout when the loop terminates.
1369 With ``--save-criteria PATH``, the scaffolded criteria JSON is
1370 also written to ``PATH`` so the operator can inspect / re-use it
1371 without re-running the scaffold step.
1372 """
1373 from mission import ( # noqa: PLC0415 — lazy
1374 criteria_scaffold,
1375 )
1376 from mission import (
1377 sampling as mission_sampling,
1378 )
1379 from mission import (
1380 state as mission_state,
1381 )
1382 from mission import (
1383 validation as mission_validation,
1384 )
1385 from mission.types import SCHEMA_VERSION
1386 from mission.validation import MissionValidationError
1388 if max_criteria < 1:
1389 _emit_error(
1390 "validation_error",
1391 {"field": "max-criteria", "reason": "must_be_positive_int"},
1392 )
1393 sys.exit(1)
1394 if retries < 0:
1395 _emit_error(
1396 "validation_error",
1397 {"field": "retries", "reason": "must_be_non_negative_int"},
1398 )
1399 sys.exit(1)
1401 # Resolve the effective allowlist up front, before scaffolding or any
1402 # persistence. A mutual-exclusivity or empty-registry rejection exits here
1403 # with no sampling spend, no criteria file write, and no state write. The
1404 # scaffolder below still consults the explicit ``tool_allowlist`` (empty
1405 # under --allow-all-tools, which routes it to the directive-only
1406 # deterministic path); ``allowlist_resolved`` fills the persisted session.
1407 allowlist_resolved = _resolve_cli_allowlist(
1408 allow_all_tools=allow_all_tools, tool_allowlist=tool_allowlist
1409 )
1411 # ---- Step 1: scaffold criteria. -------------------------------------
1412 # Resolve the sampling state once; reuse it for both the scaffold
1413 # call and the persisted session's ``use_sampling`` field so the
1414 # operator's --use-sampling/--no-sampling intent applies end-to-end.
1415 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state(
1416 None, use_sampling
1417 )
1419 criteria: list[dict[str, Any]] | None = None
1420 sampling_path_taken = False
1421 if use_sampling_resolved and backend_resolved != "none": 1421 ↛ 1422line 1421 didn't jump to line 1422 because the condition on line 1421 was never true
1422 backend_obj = mission_sampling.select_sampling_backend(
1423 None,
1424 model_id=bedrock_model_id,
1425 prefs=None,
1426 )
1427 if backend_obj is not None:
1428 try:
1429 criteria = asyncio.run(
1430 criteria_scaffold.generate_sampled_criteria(
1431 backend_obj,
1432 directive,
1433 allowlist=list(tool_allowlist),
1434 max_criteria=max_criteria,
1435 retries=retries,
1436 )
1437 )
1438 sampling_path_taken = True
1439 except criteria_scaffold.ScaffoldSamplingError as exc:
1440 click.echo(
1441 f"sampling path failed ({exc.last_reason}); "
1442 "falling back to deterministic templates.",
1443 err=True,
1444 )
1445 criteria = None
1447 if criteria is None: 1447 ↛ 1454line 1447 didn't jump to line 1454 because the condition on line 1447 was always true
1448 criteria = criteria_scaffold.generate_deterministic_criteria(
1449 directive,
1450 allowlist=list(tool_allowlist) or None,
1451 max_criteria=max_criteria,
1452 )
1454 if save_criteria:
1455 Path(save_criteria).write_text(
1456 json.dumps(criteria, indent=2, sort_keys=False) + "\n",
1457 encoding="utf-8",
1458 )
1460 # ---- Step 2: validate everything and persist the session. -----------
1461 budget: dict[str, Any] = {
1462 "max_iterations": max_iterations,
1463 "max_wall_clock_seconds": max_wall_clock,
1464 }
1465 cadence_dict: dict[str, Any] = {"kind": cadence}
1467 try:
1468 directive_clean = mission_validation.validate_directive(directive)
1469 criteria_clean = mission_validation.validate_criteria(criteria)
1470 budget_clean = mission_validation.validate_budget(budget, allowlist_resolved, {})
1471 cadence_clean = mission_validation.validate_cadence(cadence_dict)
1472 except MissionValidationError as exc:
1473 _emit_error(exc.code, exc.details)
1474 sys.exit(1)
1476 if not isinstance(stagnation_threshold, int) or stagnation_threshold <= 0: 1476 ↛ 1477line 1476 didn't jump to line 1477 because the condition on line 1476 was never true
1477 _emit_error(
1478 "validation_error",
1479 {"field": "stagnation-threshold", "reason": "must_be_positive_int"},
1480 )
1481 sys.exit(1)
1483 session_id = f"mission-{secrets.token_hex(8)}"
1484 now_iso = datetime.now(UTC).isoformat()
1485 session: dict[str, Any] = {
1486 "version": SCHEMA_VERSION,
1487 "session_id": session_id,
1488 "directive_text": directive_clean,
1489 "criteria": criteria_clean,
1490 "budget": budget_clean,
1491 "tool_allowlist": allowlist_resolved,
1492 "checkpoint_cadence": cadence_clean,
1493 "stagnation_threshold": stagnation_threshold,
1494 "use_sampling": use_sampling_resolved,
1495 "sampling_backend_resolved": backend_resolved,
1496 "allow_scripted_strategies": bool(allow_scripted_strategies),
1497 "status": "pending",
1498 "created_at": now_iso,
1499 "iterations": [],
1500 "no_progress_counter": 0,
1501 }
1502 if bedrock_model_id: 1502 ↛ 1503line 1502 didn't jump to line 1503 because the condition on line 1502 was never true
1503 session["bedrock_model_id"] = bedrock_model_id
1505 backend = mission_state.get_backend()
1506 backend.save_session(cast("SessionState", _strip_private_criteria(session)))
1508 # Emit a one-line scaffold summary to stderr so the operator can see
1509 # what shape the criteria landed in before the loop starts. Stdout is
1510 # reserved for the Final_Report at the end.
1511 _emit_json(
1512 {
1513 "event": "mission.run.scaffolded",
1514 "session_id": session_id,
1515 "criteria_count": len(criteria),
1516 "sampling_path": sampling_path_taken,
1517 "sampling_backend_resolved": backend_resolved,
1518 },
1519 err=True,
1520 )
1522 # ---- Step 3: iterate to completion. ---------------------------------
1523 _run_to_completion(session_id, dry_run=dry_run)