Coverage for cli/commands/mission

1"""Mission goal-directed iteration loop CLI commands.

3The whole subcommand group is gated by ``GCO_ENABLE_MISSION``: when

4the env var is unset, the group prints a one-line hint and exits with

5code 2 before dispatching to any subcommand. With the flag set, the

6nine subcommands talk directly to the persistence backend and the

7:class:`mission.engine.MissionEngine` — no MCP round-trip is involved

8so the CLI works without the MCP server running.

10Subcommands:

12* ``start`` — validate inputs, resolve sampling state, persist a new

13 ``SessionState``. With ``--run``, iterate to completion synchronously.

14* ``status`` — read the full session JSON.

15* ``iterate`` — drive one or more iterations of an existing session.

16* ``checkpoint`` — re-run the verdict cascade on the latest iteration.

17* ``complete`` — force a session into ``completed``.

18* ``abort`` — pause or terminate a session.

19* ``resume`` — transition ``paused`` to ``running``.

20* ``history`` — return the iteration history (full or summary).

21* ``list`` — list sessions across the configured backend.

23Output formats: every subcommand defaults to ``--output json``; pass

24``--output table`` for a human-readable summary.

25"""

27from __future__ import annotations

29import asyncio

30import json

31import os

32import secrets

33import sys

34from collections.abc import Mapping

35from datetime import UTC, datetime

36from pathlib import Path

37from typing import TYPE_CHECKING, Any, cast

39import click

41# The Mission package lives under ``gco_mcp/mission/`` and is imported as

42# ``mission.*``. Match the path-injection pattern used throughout the

43# MCP module surface and the ``test_mission_*`` test files so the

44# imports below resolve regardless of how this module is loaded.

45sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "gco_mcp"))

47from gco.bedrock import BedrockFTUFormNotAcceptedError # noqa: E402

49if TYPE_CHECKING: # pragma: no cover - import only for type checkers

50 from mission.types import SessionState

53_FEATURE_FLAG_HINT = (

54 "Mission tools are gated. Set GCO_ENABLE_MISSION=true (or GCO_ENABLE_ALL_TOOLS=true) to enable."

55)

58def _flag_enabled() -> bool:

59 """Return True iff ``GCO_ENABLE_MISSION`` (or umbrella) is truthy."""

60 truthy = {"true", "1", "yes", "on"}

61 return (

62 os.environ.get("GCO_ENABLE_MISSION", "").strip().lower() in truthy

63 or os.environ.get("GCO_ENABLE_ALL_TOOLS", "").strip().lower() in truthy

64 )

67def _check_feature_flag() -> None:

68 """Print the hint and exit with code 2 when the gating flag is unset."""

69 if not _flag_enabled():

70 click.echo(_FEATURE_FLAG_HINT, err=True)

71 raise SystemExit(2)

74# ---------------------------------------------------------------------------

75# Output helpers

76# ---------------------------------------------------------------------------

79def _strip_private_criteria(session: Mapping[str, Any]) -> dict[str, Any]:

80 """Return a JSON-safe copy of ``session`` with private criterion keys dropped.

82 Thin alias over :func:`mission.validation.strip_private_fields` —

83 the canonical implementation lives next to ``validate_criteria``

84 (which creates the ``_parsed_ast`` keys). Kept under the older

85 ``_strip_private_criteria`` name so the call sites in this file

86 don't churn while the underlying logic is consolidated.

87 """

88 from mission.validation import strip_private_fields # noqa: PLC0415

90 cleaned: dict[str, Any] = strip_private_fields(session)

91 return cleaned

94def _strip_iteration(iteration: Any) -> Any:

95 """Strip private keys from an iteration's ``criteria_evaluation`` shape.

97 Thin alias over the iteration variant of the canonical helper.

98 Returns non-dict input verbatim so a corrupt history entry stays

99 observable to the caller.

100 """

101 if not isinstance(iteration, Mapping): 101 ↛ 102line 101 didn't jump to line 102 because the condition on line 101 was never true

102 return iteration

103 from mission.validation import strip_private_fields_iterations # noqa: PLC0415

104

105 return strip_private_fields_iterations([iteration])[0]

106

107

108def _emit_json(payload: Any, *, err: bool = False) -> None:

109 """Emit ``payload`` as a single JSON line.

110

111 ``default=str`` keeps any straggling datetime / Path objects from

112 raising — the engine's persisted shapes are already pure JSON, but

113 a CLI command may surface a partially-built dict (e.g., the start

114 summary before save) and we want every output path to succeed.

115 """

116 click.echo(json.dumps(payload, default=str), err=err)

117

118

119def _emit_error(code: str, details: dict[str, Any] | None = None) -> None:

120 """Emit a structured error envelope to stderr."""

121 payload: dict[str, Any] = {"code": code}

122 if details is not None: 122 ↛ 124line 122 didn't jump to line 124 because the condition on line 122 was always true

123 payload["details"] = details

124 _emit_json(payload, err=True)

125

126

127# ---------------------------------------------------------------------------

128# Stub dispatcher

129# ---------------------------------------------------------------------------

130

131

132def _make_stub_dispatcher() -> Any:

133 """Return a tool dispatcher that returns canned responses.

134

135 Thin wrapper around :func:`mcp.mission._engine_factory.make_stub_dispatcher`

136 kept for backward compat with the small set of tests that import

137 this name directly. Production paths now go through

138 :func:`_build_engine` which decides between the live FastMCP

139 dispatcher and this stub based on ``--dry-run`` opt-in.

140 """

141 from mission._engine_factory import make_stub_dispatcher # noqa: PLC0415

142

143 return make_stub_dispatcher()

144

145

146# ---------------------------------------------------------------------------

147# Click group

148# ---------------------------------------------------------------------------

149

150

151@click.group("mission")

152def mission_cmd() -> None:

153 """Mission goal-directed iteration loop commands.

154

155 Subcommands manage Mission sessions: ``start``, ``status``,

156 ``iterate``, ``checkpoint``, ``complete``, ``abort``, ``resume``,

157 ``history``, ``list``.

158

159 Gated by the ``GCO_ENABLE_MISSION`` environment variable. With

160 the flag unset, every subcommand prints a one-line hint to stderr

161 and exits with code 2.

162 """

163 _check_feature_flag()

164

165

166# ---------------------------------------------------------------------------

167# start

168# ---------------------------------------------------------------------------

169

170

171@mission_cmd.command("start")

172@click.option("--directive", required=True, help="Natural-language goal description.")

173@click.option(

174 "--criteria-file",

175 type=click.Path(exists=True, dir_okay=False),

176 default=None,

177 help="JSON file containing the criteria list. Required unless --with-defaults is set.",

178)

179@click.option(

180 "--max-iterations",

181 type=int,

182 required=True,

183 help="Hard cap on the iteration count. Pass -1 to opt out (uncapped).",

184)

185@click.option(

186 "--max-wall-clock",

187 type=int,

188 required=True,

189 help="Hard cap on wall-clock seconds. Pass -1 to opt out (uncapped).",

190)

191@click.option(

192 "--tool-allowlist",

193 multiple=True,

194 help="Tool name to allowlist; pass multiple times. Optional with --allow-all-tools.",

195)

196@click.option(

197 "--allow-all-tools",

198 is_flag=True,

199 help=(

200 "Resolve the session's tool allowlist to every registered MCP tool "

201 "(minus the mission_* control tools). Makes --tool-allowlist optional; "

202 "mutually exclusive with it."

203 ),

204)

205@click.option(

206 "--cadence",

207 type=click.Choice(["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]),

208 default="every_iteration",

209 show_default=True,

210 help="Checkpoint cadence kind.",

211)

212@click.option("--cadence-n", type=int, default=None, help="Cadence n parameter.")

213@click.option(

214 "--cadence-t",

215 type=int,

216 default=None,

217 help="Cadence t parameter (seconds).",

218)

219@click.option(

220 "--cadence-event",

221 default=None,

222 help="Cadence event_name parameter.",

223)

224@click.option(

225 "--stagnation-threshold",

226 type=int,

227 default=3,

228 show_default=True,

229 help="Iterations of no progress before terminate.",

230)

231@click.option(

232 "--use-sampling/--no-sampling",

233 "use_sampling",

234 default=None,

235 help="Enable/disable LLM sampling (default: auto-detect).",

236)

237@click.option(

238 "--bedrock-model-id",

239 default=None,

240 help="Override the Bedrock model id used by the CLI sampling backend.",

241)

242@click.option(

243 "--allow-scripted-strategies",

244 is_flag=True,

245 help="Allow scripted strategies to run via the Mission sandbox.",

246)

247@click.option(

248 "--with-defaults",

249 is_flag=True,

250 help="Use a basic placeholder predicate criterion when no --criteria-file is provided.",

251)

252@click.option(

253 "--run",

254 "run_mode",

255 is_flag=True,

256 help="Iterate to completion synchronously after creating the session.",

257)

258@click.option(

259 "--dry-run",

260 "dry_run",

261 is_flag=True,

262 help=(

263 "Use a stub tool dispatcher and disable Strategy_Revision sampling "

264 "during iteration. Useful for smoke-testing the loop bookkeeping "

265 "without spending Bedrock or AWS credits. Only meaningful with --run."

266 ),

267)

268@click.option(

269 "--output",

270 type=click.Choice(["json", "table"]),

271 default="json",

272 show_default=True,

273 help="Output format.",

274)

275def mission_start(

276 directive: str,

277 criteria_file: str | None,

278 max_iterations: int,

279 max_wall_clock: int,

280 tool_allowlist: tuple[str, ...],

281 allow_all_tools: bool,

282 cadence: str,

283 cadence_n: int | None,

284 cadence_t: int | None,

285 cadence_event: str | None,

286 stagnation_threshold: int,

287 use_sampling: bool | None,

288 bedrock_model_id: str | None,

289 allow_scripted_strategies: bool,

290 with_defaults: bool,

291 run_mode: bool,

292 dry_run: bool,

293 output: str,

294) -> None:

295 """Start a new Mission session.

296

297 Validates inputs through the shared validators in

298 ``mission.validation``, resolves the sampling state via

299 ``mission.sampling.resolve_sampling_state``, and persists the

300 session through the configured backend (``GCO_MISSION_STATE_BACKEND``,

301 defaults to filesystem under ``~/.gco/missions``).

302

303 With ``--run``, iterates to completion synchronously: each verdict

304 is printed as one JSON line to stderr; the final stdout is the

305 Final_Report JSON.

306 """

307 from mission import ( # noqa: PLC0415 — lazy: avoids cost when help-only

308 sampling as mission_sampling,

309 )

310 from mission import (

311 state as mission_state,

312 )

313 from mission import (

314 validation as mission_validation,

315 )

316 from mission.types import SCHEMA_VERSION

317 from mission.validation import MissionValidationError

318

319 # Build the criteria list from the file or the placeholder default.

320 criteria: list[dict[str, Any]]

321 if criteria_file:

322 try:

323 with open(criteria_file, encoding="utf-8") as fp:

324 criteria = json.load(fp)

325 except (OSError, ValueError) as exc:

326 _emit_error(

327 "validation_error",

328 {"field": "criteria-file", "reason": str(exc)},

329 )

330 sys.exit(1)

331 elif with_defaults:

332 criteria = [

333 {

334 "criterion_id": "default",

335 "kind": "predicate",

336 "required": True,

337 "expression": "True",

338 }

339 ]

340 else:

341 _emit_error(

342 "validation_error",

343 {

344 "field": "criteria",

345 "reason": "either --criteria-file or --with-defaults is required",

346 },

347 )

348 sys.exit(1)

349

350 # Build the budget dict.

351 budget: dict[str, Any] = {

352 "max_iterations": max_iterations,

353 "max_wall_clock_seconds": max_wall_clock,

354 }

355

356 # Build the cadence dict.

357 cadence_dict: dict[str, Any] = {"kind": cadence}

358 if cadence_n is not None: 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true

359 cadence_dict["n"] = cadence_n

360 if cadence_t is not None: 360 ↛ 361line 360 didn't jump to line 361 because the condition on line 360 was never true

361 cadence_dict["t"] = cadence_t

362 if cadence_event is not None: 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true

363 cadence_dict["event_name"] = cadence_event

364

365 # Resolve the effective allowlist before any persistence. The explicit

366 # path keeps the thin CLI behaviour (no live-registry per-name check); the

367 # all-tools path resolves from the on-demand registry. A rejection here

368 # emits a structured envelope and exits before any session is built.

369 allowlist_resolved = _resolve_cli_allowlist(

370 allow_all_tools=allow_all_tools, tool_allowlist=tool_allowlist

371 )

372

373 # Validate inputs. The CLI has no live FastMCP tool registry on the

374 # explicit path, so the tool-allowlist validator is skipped and the

375 # budget validator gets an empty tag map — meaning a CLI-started session

376 # with a cost-incurring tool will only be caught at iterate time when the

377 # engine routes through the real tool dispatcher. The MCP tool surface

378 # performs the full validation; the CLI is intentionally a thin

379 # smoke-test path.

380 try:

381 directive_clean = mission_validation.validate_directive(directive)

382 criteria_clean = mission_validation.validate_criteria(criteria)

383 budget_clean = mission_validation.validate_budget(budget, allowlist_resolved, {})

384 cadence_clean = mission_validation.validate_cadence(cadence_dict)

385 except MissionValidationError as exc:

386 _emit_error(exc.code, exc.details)

387 sys.exit(1)

388

389 if not isinstance(stagnation_threshold, int) or stagnation_threshold <= 0:

390 _emit_error(

391 "validation_error",

392 {"field": "stagnation-threshold", "reason": "must_be_positive_int"},

393 )

394 sys.exit(1)

395

396 # Resolve sampling state. ``ctx=None`` because this is the CLI path;

397 # the helper's third precedence branch then probes local AWS

398 # credentials and returns ``("bedrock", True)`` when they resolve.

399 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state(

400 None, use_sampling

401 )

402

403 session_id = f"mission-{secrets.token_hex(8)}"

404 now_iso = datetime.now(UTC).isoformat()

405 session: dict[str, Any] = {

406 "version": SCHEMA_VERSION,

407 "session_id": session_id,

408 "directive_text": directive_clean,

409 "criteria": criteria_clean,

410 "budget": budget_clean,

411 "tool_allowlist": allowlist_resolved,

412 "checkpoint_cadence": cadence_clean,

413 "stagnation_threshold": stagnation_threshold,

414 "use_sampling": use_sampling_resolved,

415 "sampling_backend_resolved": backend_resolved,

416 "allow_scripted_strategies": bool(allow_scripted_strategies),

417 "status": "pending",

418 "created_at": now_iso,

419 "iterations": [],

420 "no_progress_counter": 0,

421 }

422 if bedrock_model_id: 422 ↛ 423line 422 didn't jump to line 423 because the condition on line 422 was never true

423 session["bedrock_model_id"] = bedrock_model_id

424

425 backend = mission_state.get_backend()

426

427 # ``save_session`` will not accept the cached ``_parsed_ast`` AST on

428 # predicate criteria when the backend is the filesystem JSON writer.

429 # Strip them just before persistence; the validators left them on

430 # the in-memory copy so the engine can use them at iterate time —

431 # we'll re-validate when iterate next runs against the loaded

432 # session.

433 backend.save_session(cast("SessionState", _strip_private_criteria(session)))

434

435 summary = {

436 "session_id": session_id,

437 "status": "pending",

438 "use_sampling": use_sampling_resolved,

439 "sampling_backend_resolved": backend_resolved,

440 }

441

442 if not run_mode:

443 if output == "table":

444 click.echo(f"Session ID: {session_id}")

445 click.echo("Status: pending")

446 click.echo(

447 f"Sampling: {'on' if use_sampling_resolved else 'off'} ({backend_resolved})"

448 )

449 else:

450 _emit_json(summary)

451 return

452

453 # --run mode: iterate to completion.

454 _run_to_completion(session_id, dry_run=dry_run)

455

456

457def _run_to_completion(session_id: str, *, dry_run: bool = False) -> None:

458 """Drive ``session_id`` through iterations until terminal verdict.

459

460 When ``dry_run`` is False (the default), wires the live FastMCP

461 dispatcher and the Strategy_Revision sampling callable through

462 :func:`mcp.mission._engine_factory.build_mission_engine` so the

463 loop can actually iterate against real tools and let the model

464 revise the strategy between iterations. When ``dry_run`` is True,

465 falls back to the canned-stub dispatcher and disables sampling so

466 the CLI can smoke-test the loop bookkeeping without spending

467 Bedrock or AWS credits.

468

469 Writes one JSON line per iteration's verdict to stderr; the final

470 stdout is the Final_Report JSON when present, falling back to the

471 persisted session JSON otherwise.

472 """

473 from mission import state as mission_state # noqa: PLC0415

474 from mission._engine_factory import build_mission_engine # noqa: PLC0415

475 from mission.engine import MissionEngineError # noqa: PLC0415

476 from mission.state import FilesystemBackend # noqa: PLC0415

477

478 backend = mission_state.get_backend()

479 session_for_runner = backend.load_session(session_id)

480 if session_for_runner is None: 480 ↛ 481line 480 didn't jump to line 481 because the condition on line 480 was never true

481 _emit_error("session_not_found", {"session_id": session_id})

482 sys.exit(1)

483

484 # Populate the FastMCP tool registry so the live dispatcher can

485 # find the operator-allowlisted tools. Safe to call repeatedly —

486 # ``register_all_tools`` is idempotent (FastMCP rejects duplicate

487 # registrations after the first call). Skipped on the dry-run path

488 # because the stub dispatcher never consults the registry.

489 if not dry_run:

490 _ensure_tool_registry()

491

492 async def _drive() -> None:

493 engine = await build_mission_engine(

494 session_for_runner, ctx=None, use_stub_dispatcher=dry_run

495 )

496 while True:

497 try:

498 record = await engine.run_iteration(session_id, ctx=None)

499 except MissionEngineError as exc:

500 _emit_error(exc.code, {"session_id": session_id})

501 sys.exit(1)

502 _emit_json(

503 {

504 "iteration_index": record["iteration_index"],

505 "verdict": record["verdict"],

506 "verdict_reason": record["verdict_reason"],

507 },

508 err=True,

509 )

510 if record["verdict"] in ("complete", "terminate"): 510 ↛ 496line 510 didn't jump to line 496 because the condition on line 510 was always true

511 break

512

513 asyncio.run(_drive())

514

515 # Emit the final report when the filesystem backend wrote one;

516 # fall back to the persisted session for other backends.

517 session = backend.load_session(session_id)

518 if isinstance(backend, FilesystemBackend): 518 ↛ 523line 518 didn't jump to line 523 because the condition on line 518 was always true

519 report_path = backend.root / f"{session_id}.report.json"

520 if report_path.exists():

521 click.echo(report_path.read_text(encoding="utf-8"))

522 return

523 if session is not None: 523 ↛ 526line 523 didn't jump to line 526 because the condition on line 523 was always true

524 _emit_json(_strip_private_criteria(session))

525 else:

526 _emit_error("session_disappeared", {"session_id": session_id})

527 sys.exit(1)

528

529

530def _ensure_tool_registry() -> None:

531 """Register every MCP tool against the shared FastMCP server, once.

532

533 The CLI doesn't normally boot the MCP server, so its FastMCP

534 instance starts empty. The live tool dispatcher in the engine

535 factory looks up tools on that instance, so we eagerly register

536 every tool group up-front when the live path is selected. The

537 underlying ``register_all_tools`` is import-time side-effects on

538 module load; calling it twice is harmless because the per-module

539 decorators only fire on the first import.

540 """

541 sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "gco_mcp"))

542 from tools import register_all_tools # noqa: PLC0415

543

544 register_all_tools()

545

546

547def _resolve_registered_tools_for_cli() -> tuple[dict[str, Any], set[str]]:

548 """Register every MCP tool on demand and snapshot the live registry.

549

550 Returns a ``(name -> Tool, control-tool names)`` pair. The control set is

551 derived from the ``"mission"`` tag, so it auto-adapts if a tenth

552 session-management tool is ever added. Calls the idempotent

553 :func:`_ensure_tool_registry` first, then lists tools through

554 ``mcp._list_tools()`` — the same low-level path the engine factory uses.

555 Returns ``({}, set())`` only when the registry genuinely holds no tools,

556 which the resolver then rejects as ``allow_all_tools_empty_registry``.

557 """

558 _ensure_tool_registry()

559 from server import mcp # noqa: PLC0415 — lazy

560

561 async def _list() -> list[Any]:

562 return list(await mcp._list_tools())

563

564 tools = asyncio.run(_list())

565 registered = {t.name: t for t in tools}

566 control = {t.name for t in tools if "mission" in (getattr(t, "tags", None) or set())}

567 return registered, control

568

569

570def _resolve_cli_allowlist(*, allow_all_tools: bool, tool_allowlist: tuple[str, ...]) -> list[str]:

571 """Resolve a subcommand's effective tool allowlist or exit with code 1.

572

573 The all-tools branch populates the registry on demand and resolves the

574 effective list from it. The explicit branch preserves the thin CLI path

575 (no per-name registry check) but enforces at-least-one, emitting the

576 existing ``empty`` rejection when no name is supplied. On any

577 :class:`MissionValidationError` the structured envelope is emitted and the

578 process exits 1 — before the caller builds or persists a session.

579 """

580 from mission import validation as mission_validation # noqa: PLC0415

581 from mission.validation import MissionValidationError # noqa: PLC0415

582

583 if allow_all_tools:

584 registered_tools, control_tools = _resolve_registered_tools_for_cli()

585 try:

586 resolved: list[str] = mission_validation.resolve_effective_allowlist(

587 allow_all_tools=True,

588 explicit_allowlist=list(tool_allowlist),

589 registered_tools=registered_tools,

590 control_tools=control_tools,

591 )

592 except MissionValidationError as exc:

593 _emit_error(exc.code, exc.details)

594 sys.exit(1)

595 return resolved

596 if not tool_allowlist:

597 _emit_error("validation_error", {"field": "tool_allowlist", "reason": "empty"})

598 sys.exit(1)

599 return list(tool_allowlist)

600

601

602# ---------------------------------------------------------------------------

603# status

604# ---------------------------------------------------------------------------

605

606

607@mission_cmd.command("status")

608@click.argument("session_id")

609@click.option(

610 "--output",

611 type=click.Choice(["json", "table"]),

612 default="json",

613 show_default=True,

614)

615def mission_status_cmd(session_id: str, output: str) -> None:

616 """Get the full state of a Mission session."""

617 from mission.state import get_backend # noqa: PLC0415

618

619 backend = get_backend()

620 session = backend.load_session(session_id)

621 if session is None:

622 _emit_error("session_not_found", {"session_id": session_id})

623 sys.exit(1)

624 cleaned = _strip_private_criteria(session)

625 if output == "table": 625 ↛ 637line 625 didn't jump to line 637 because the condition on line 625 was always true

626 click.echo(f"Session ID: {cleaned.get('session_id', '')}")

627 click.echo(f"Status: {cleaned.get('status', '')}")

628 click.echo(f"Directive: {cleaned.get('directive_text', '')}")

629 click.echo(f"Iterations: {len(cleaned.get('iterations', []) or [])}")

630 allowlist = cleaned.get("tool_allowlist", []) or []

631 click.echo(f"Allowlist: {', '.join(allowlist)}")

632 click.echo(

633 f"Sampling: {'on' if cleaned.get('use_sampling') else 'off'} "

634 f"({cleaned.get('sampling_backend_resolved', 'none')})"

635 )

636 else:

637 _emit_json(cleaned)

638

639

640# ---------------------------------------------------------------------------

641# iterate

642# ---------------------------------------------------------------------------

643

644

645@mission_cmd.command("iterate")

646@click.argument("session_id")

647@click.option(

648 "--max-iterations",

649 type=int,

650 default=1,

651 show_default=True,

652 help="How many iterations to run in this call.",

653)

654@click.option(

655 "--dry-run",

656 "dry_run",

657 is_flag=True,

658 help=(

659 "Use a stub tool dispatcher and disable Strategy_Revision sampling. "

660 "Useful for smoke-testing the loop without spending Bedrock or AWS credits."

661 ),

662)

663@click.option(

664 "--output",

665 type=click.Choice(["json", "table"]),

666 default="json",

667 show_default=True,

668)

669def mission_iterate_cmd(session_id: str, max_iterations: int, dry_run: bool, output: str) -> None:

670 """Run one or more iterations on a Mission session.

671

672 Stops early on a terminal verdict. By default the engine is wired

673 with the live FastMCP tool dispatcher and the Strategy_Revision

674 sampling callable so the loop iterates against real tool results

675 and lets the model revise the strategy between iterations.

676

677 Pass ``--dry-run`` to substitute the canned-stub dispatcher and

678 disable sampling — useful for smoke-testing the bookkeeping

679 without spending Bedrock or AWS credits.

680 """

681 from mission._engine_factory import build_mission_engine # noqa: PLC0415

682 from mission.engine import MissionEngineError # noqa: PLC0415

683 from mission.state import get_backend # noqa: PLC0415

684

685 if max_iterations <= 0:

686 # This is the per-call iteration count (how many iterations to

687 # run THIS call), NOT the session-wide ``budget.max_iterations``

688 # cap. The budget cap accepts ``-1`` as the "uncapped" sentinel;

689 # this per-call count must always be a positive int because a

690 # zero or negative value here would be a no-op invocation.

691 _emit_error(

692 "validation_error",

693 {"field": "max-iterations", "reason": "must_be_positive_int"},

694 )

695 sys.exit(1)

696

697 backend = get_backend()

698 session_for_runner = backend.load_session(session_id)

699 if session_for_runner is None:

700 _emit_error("session_not_found", {"session_id": session_id})

701 sys.exit(1)

702

703 if not dry_run:

704 _ensure_tool_registry()

705

706 async def _drive() -> dict[str, Any]:

707 engine = await build_mission_engine(

708 session_for_runner, ctx=None, use_stub_dispatcher=dry_run

709 )

710 records: list[dict[str, Any]] = []

711 for _ in range(max_iterations):

712 try:

713 record = await engine.run_iteration(session_id, ctx=None)

714 except MissionEngineError as exc:

715 return {

716 "session_id": session_id,

717 "error": {"code": exc.code},

718 "iterations": records,

719 }

720 records.append(

721 {

722 "iteration_index": record["iteration_index"],

723 "verdict": record["verdict"],

724 "verdict_reason": record["verdict_reason"],

725 }

726 )

727 if record["verdict"] in ("complete", "terminate"):

728 break

729 return {"session_id": session_id, "iterations": records}

730

731 result = asyncio.run(_drive())

732

733 if "error" in result: 733 ↛ 734line 733 didn't jump to line 734 because the condition on line 733 was never true

734 _emit_error(result["error"]["code"], {"session_id": session_id})

735 sys.exit(1)

736

737 if output == "table":

738 for it in result.get("iterations", []):

739 click.echo(

740 f" Iteration {it['iteration_index']}: {it['verdict']} ({it['verdict_reason']})"

741 )

742 else:

743 _emit_json(result)

744

745

746# ---------------------------------------------------------------------------

747# checkpoint

748# ---------------------------------------------------------------------------

749

750

751@mission_cmd.command("checkpoint")

752@click.argument("session_id")

753@click.option(

754 "--output",

755 type=click.Choice(["json", "table"]),

756 default="json",

757 show_default=True,

758)

759def mission_checkpoint_cmd(session_id: str, output: str) -> None:

760 """Re-run the verdict cascade on the latest iteration of a session."""

761 from mission.decide import decide_verdict # noqa: PLC0415

762 from mission.state import get_backend # noqa: PLC0415

763

764 backend = get_backend()

765 session = backend.load_session(session_id)

766 if session is None:

767 _emit_error("session_not_found", {"session_id": session_id})

768 sys.exit(1)

769 iterations = session.get("iterations") or []

770 if not iterations:

771 _emit_error("no_iterations", {"session_id": session_id})

772 sys.exit(1)

773 latest = iterations[-1]

774 verdict, reason = decide_verdict(session, latest, datetime.now(UTC))

775 payload = {

776 "session_id": session_id,

777 "iteration_index": latest.get("iteration_index"),

778 "verdict": verdict,

779 "verdict_reason": reason,

780 }

781 if output == "table": 781 ↛ 784line 781 didn't jump to line 784 because the condition on line 781 was always true

782 click.echo(f"Iteration {payload['iteration_index']}: {verdict} ({reason})")

783 else:

784 _emit_json(payload)

785

786

787# ---------------------------------------------------------------------------

788# complete

789# ---------------------------------------------------------------------------

790

791

792@mission_cmd.command("complete")

793@click.argument("session_id")

794@click.option(

795 "--output",

796 type=click.Choice(["json", "table"]),

797 default="json",

798 show_default=True,

799)

800def mission_complete_cmd(session_id: str, output: str) -> None:

801 """Force a Mission session into ``completed`` status."""

802 from mission.state import get_backend # noqa: PLC0415

803 from mission.types import TERMINAL_STATES # noqa: PLC0415

804

805 backend = get_backend()

806 session = backend.load_session(session_id)

807 if session is None:

808 _emit_error("session_not_found", {"session_id": session_id})

809 sys.exit(1)

810 if session["status"] in TERMINAL_STATES:

811 _emit_error(

812 "session_terminal",

813 {"session_id": session_id, "status": session["status"]},

814 )

815 sys.exit(1)

816 now_iso = datetime.now(UTC).isoformat()

817 session["status"] = "completed"

818 session["final_verdict"] = "complete"

819 session["ended_at"] = now_iso

820 backend.save_session(cast("SessionState", _strip_private_criteria(session)))

821 payload = {

822 "session_id": session_id,

823 "status": "completed",

824 "final_verdict": "complete",

825 }

826 if output == "table":

827 click.echo(f"Session {session_id}: completed (forced)")

828 else:

829 _emit_json(payload)

830

831

832# ---------------------------------------------------------------------------

833# abort

834# ---------------------------------------------------------------------------

835

836

837@mission_cmd.command("abort")

838@click.argument("session_id")

839@click.option("--pause", is_flag=True, help="Pause the session instead of terminating.")

840@click.option(

841 "--output",

842 type=click.Choice(["json", "table"]),

843 default="json",

844 show_default=True,

845)

846def mission_abort_cmd(session_id: str, pause: bool, output: str) -> None:

847 """Pause or terminate a Mission session.

848

849 With ``--pause``, transitions the session to ``paused`` (resumable).

850 Without ``--pause``, transitions to ``terminated`` and stamps the

851 final verdict.

852 """

853 from mission.state import get_backend # noqa: PLC0415

854 from mission.types import TERMINAL_STATES # noqa: PLC0415

855

856 backend = get_backend()

857 session = backend.load_session(session_id)

858 if session is None:

859 _emit_error("session_not_found", {"session_id": session_id})

860 sys.exit(1)

861 if session["status"] in TERMINAL_STATES:

862 _emit_error(

863 "session_terminal",

864 {"session_id": session_id, "status": session["status"]},

865 )

866 sys.exit(1)

867 if pause:

868 session["status"] = "paused"

869 else:

870 now_iso = datetime.now(UTC).isoformat()

871 session["status"] = "terminated"

872 session["final_verdict"] = "terminate"

873 session["ended_at"] = now_iso

874 backend.save_session(cast("SessionState", _strip_private_criteria(session)))

875 payload = {"session_id": session_id, "status": session["status"]}

876 if output == "table":

877 click.echo(f"Session {session_id}: {session['status']}")

878 else:

879 _emit_json(payload)

880

881

882# ---------------------------------------------------------------------------

883# resume

884# ---------------------------------------------------------------------------

885

886

887@mission_cmd.command("resume")

888@click.argument("session_id")

889@click.option(

890 "--output",

891 type=click.Choice(["json", "table"]),

892 default="json",

893 show_default=True,

894)

895def mission_resume_cmd(session_id: str, output: str) -> None:

896 """Resume a paused Mission session."""

897 from mission.state import get_backend # noqa: PLC0415

898

899 backend = get_backend()

900 session = backend.load_session(session_id)

901 if session is None:

902 _emit_error("session_not_found", {"session_id": session_id})

903 sys.exit(1)

904 if session["status"] != "paused":

905 _emit_error(

906 "invalid_state",

907 {"session_id": session_id, "status": session["status"]},

908 )

909 sys.exit(1)

910 session["status"] = "running"

911 backend.save_session(cast("SessionState", _strip_private_criteria(session)))

912 payload = {"session_id": session_id, "status": "running"}

913 if output == "table": 913 ↛ 916line 913 didn't jump to line 916 because the condition on line 913 was always true

914 click.echo(f"Session {session_id}: running")

915 else:

916 _emit_json(payload)

917

918

919# ---------------------------------------------------------------------------

920# history

921# ---------------------------------------------------------------------------

922

923

924@mission_cmd.command("history")

925@click.argument("session_id")

926@click.option(

927 "--format",

928 "fmt",

929 type=click.Choice(["full", "summary"]),

930 default="summary",

931 show_default=True,

932 help="Iteration history detail level.",

933)

934@click.option(

935 "--include-observations",

936 "include_obs",

937 is_flag=True,

938 help=(

939 "Include the observation and strategy dicts in each iteration's "

940 "output. Only meaningful with --format full. Useful for debugging "

941 "what each tool returned and what strategy was proposed."

942 ),

943)

944@click.option(

945 "--output",

946 type=click.Choice(["json", "table"]),

947 default="json",

948 show_default=True,

949)

950def mission_history_cmd(session_id: str, fmt: str, include_obs: bool, output: str) -> None:

951 """Get the iteration history of a Mission session."""

952 from mission.state import get_backend # noqa: PLC0415

953

954 backend = get_backend()

955 session = backend.load_session(session_id)

956 if session is None:

957 _emit_error("session_not_found", {"session_id": session_id})

958 sys.exit(1)

959 iterations = session.get("iterations") or []

960

961 if fmt == "full":

962 cleaned = [_strip_iteration(it) for it in iterations]

963 if not include_obs:

964 # Strip observation and strategy from the output to keep it

965 # concise. Operators who need the full shape pass

966 # --include-observations.

967 for it in cleaned:

968 if isinstance(it, dict): 968 ↛ 967line 968 didn't jump to line 967 because the condition on line 968 was always true

969 it.pop("observation", None)

970 it.pop("strategy", None)

971 if output == "table":

972 for it in cleaned:

973 if not isinstance(it, dict): 973 ↛ 974line 973 didn't jump to line 974 because the condition on line 973 was never true

974 continue

975 idx = it.get("iteration_index", "?")

976 verdict = it.get("verdict", "?")

977 reason = it.get("verdict_reason", "?")

978 click.echo(f" Iteration {idx}: {verdict} ({reason})")

979 if include_obs:

980 obs = it.get("observation", {})

981 results = obs.get("tool_results", [])

982 errors = obs.get("errors", [])

983 strat = it.get("strategy", {})

984 rationale = strat.get("rationale", "")[:100]

985 calls = strat.get("tool_calls", [])

986 tool_names = [c.get("tool_name", "?") for c in calls if isinstance(c, dict)]

987 click.echo(f" tools: {tool_names}")

988 click.echo(f" rationale: {rationale}")

989 click.echo(f" tool_results: {len(results)} entries, errors: {len(errors)}")

990 else:

991 _emit_json({"session_id": session_id, "iterations": cleaned})

992 return

993

994 summaries = [

995 {

996 "iteration_index": it.get("iteration_index"),

997 "verdict": it.get("verdict"),

998 "verdict_reason": it.get("verdict_reason"),

999 "started_at": it.get("started_at"),

1000 "ended_at": it.get("ended_at"),

1001 "checkpoint_evaluated": it.get("checkpoint_evaluated", False),

1002 }

1003 for it in iterations

1004 ]

1005 if output == "table": 1005 ↛ 1011line 1005 didn't jump to line 1011 because the condition on line 1005 was always true

1006 for s in summaries:

1007 click.echo(

1008 f" Iteration {s['iteration_index']}: {s['verdict']} ({s['verdict_reason']})"

1009 )

1010 else:

1011 _emit_json({"session_id": session_id, "iterations": summaries})

1012

1013

1014# ---------------------------------------------------------------------------

1015# list

1016# ---------------------------------------------------------------------------

1017

1018

1019@mission_cmd.command("list")

1020@click.option(

1021 "--status",

1022 default=None,

1023 help="Filter sessions by status (pending, running, paused, ...).",

1024)

1025@click.option(

1026 "--output",

1027 type=click.Choice(["json", "table"]),

1028 default="json",

1029 show_default=True,

1030)

1031def mission_list_cmd(status: str | None, output: str) -> None:

1032 """List Mission sessions."""

1033 from mission.state import get_backend # noqa: PLC0415

1034

1035 backend = get_backend()

1036 filter_dict = {"status": status} if status else None

1037 sessions = backend.list_sessions(filter_dict)

1038

1039 if output == "table":

1040 header = f" {'SESSION ID':<40} {'STATUS':<11} {'ITER':>5} CREATED"

1041 click.echo(header)

1042 click.echo(" " + "-" * (len(header) - 2))

1043 for s in sessions:

1044 sid = (s.get("session_id") or "")[:40]

1045 st = (s.get("status") or "")[:11]

1046 it = s.get("iteration_count", 0)

1047 ca = (s.get("created_at") or "")[:19]

1048 click.echo(f" {sid:<40} {st:<11} {it:>5} {ca}")

1049 else:

1050 _emit_json({"sessions": sessions})

1051

1052

1053# ---------------------------------------------------------------------------

1054# scaffold-criteria

1055# ---------------------------------------------------------------------------

1056

1057

1058@mission_cmd.command("scaffold-criteria")

1059@click.option(

1060 "--directive",

1061 required=True,

1062 help="Natural-language goal description used to seed the criteria.",

1063)

1064@click.option(

1065 "--allowlist",

1066 "allowlist",

1067 multiple=True,

1068 help=(

1069 "Optional tool names that the resulting session would be "

1070 "configured with. Used informationally on the deterministic "

1071 "path; on the sampling path, shapes the prompt so the model "

1072 "picks metric/event names plausibly produced by the listed tools."

1073 ),

1074)

1075@click.option(

1076 "--use-sampling/--no-sampling",

1077 "use_sampling",

1078 default=None,

1079 help=(

1080 "Force the sampling path on/off. Default auto-detects: MCP "

1081 "host capability, then Bedrock credentials, then deterministic."

1082 ),

1083)

1084@click.option(

1085 "--bedrock-model-id",

1086 default=None,

1087 help="Override the Bedrock model id used by the CLI sampling backend.",

1088)

1089@click.option(

1090 "--max-criteria",

1091 type=int,

1092 default=5,

1093 show_default=True,

1094 help="Cap on the number of criterion entries scaffolded.",

1095)

1096@click.option(

1097 "--retries",

1098 type=int,

1099 default=3,

1100 show_default=True,

1101 help="Sampling-path retry budget on validator rejections.",

1102)

1103@click.option(

1104 "--output-file",

1105 "output_file",

1106 type=click.Path(dir_okay=False),

1107 default=None,

1108 help="Write the JSON to this file instead of stdout.",

1109)

1110@click.option(

1111 "--output",

1112 type=click.Choice(["json", "table"]),

1113 default="json",

1114 show_default=True,

1115 help="Output format (table mode prints a per-entry summary alongside the JSON).",

1116)

1117def mission_scaffold_criteria_cmd(

1118 directive: str,

1119 allowlist: tuple[str, ...],

1120 use_sampling: bool | None,

1121 bedrock_model_id: str | None,

1122 max_criteria: int,

1123 retries: int,

1124 output_file: str | None,

1125 output: str,

1126) -> None:

1127 """Scaffold a criteria.json from a natural-language directive.

1128

1129 Resolves the sampling state via ``mission.sampling.resolve_sampling_state``;

1130 when a backend resolves and ``--use-sampling`` permits, the resolved

1131 backend is asked for a JSON array. The response is validated through

1132 ``validate_criteria`` and retried up to ``--retries`` times on

1133 rejection. Falls back to the deterministic keyword-template

1134 generator when sampling is unavailable, disabled, or after the

1135 retry budget is exhausted.

1136

1137 The output always validates through ``validate_criteria`` so the

1138 resulting file is immediately usable with ``mission start

1139 --criteria-file``.

1140 """

1141 import mission.criteria_scaffold as criteria_scaffold # noqa: PLC0415 — lazy: avoids cost when help-only

1142 from mission import (

1143 sampling as mission_sampling,

1144 )

1145

1146 if max_criteria < 1:

1147 _emit_error(

1148 "validation_error",

1149 {"field": "max-criteria", "reason": "must_be_positive_int"},

1150 )

1151 sys.exit(1)

1152 if retries < 0:

1153 _emit_error(

1154 "validation_error",

1155 {"field": "retries", "reason": "must_be_non_negative_int"},

1156 )

1157 sys.exit(1)

1158

1159 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state(

1160 None, use_sampling

1161 )

1162

1163 criteria: list[dict[str, Any]] | None = None

1164 sampling_path_taken = False

1165 if use_sampling_resolved and backend_resolved != "none":

1166 backend_obj = mission_sampling.select_sampling_backend(

1167 None,

1168 model_id=bedrock_model_id,

1169 prefs=None,

1170 )

1171 if backend_obj is not None: 1171 ↛ 1198line 1171 didn't jump to line 1198 because the condition on line 1171 was always true

1172 try:

1173 criteria = asyncio.run(

1174 criteria_scaffold.generate_sampled_criteria(

1175 backend_obj,

1176 directive,

1177 allowlist=list(allowlist),

1178 max_criteria=max_criteria,

1179 retries=retries,

1180 )

1181 )

1182 sampling_path_taken = True

1183 except BedrockFTUFormNotAcceptedError as exc:

1184 # Not a fallback case: the account cannot invoke any Anthropic

1185 # model until the one-time form is submitted, so report it.

1186 raise click.ClickException(str(exc)) from exc

1187 except criteria_scaffold.ScaffoldSamplingError as exc:

1188 # The sampling path failed; emit a one-line warning to

1189 # stderr so the operator sees what happened, then fall

1190 # through to the deterministic generator.

1191 click.echo(

1192 f"sampling path failed ({exc.last_reason}); "

1193 "falling back to deterministic templates.",

1194 err=True,

1195 )

1196 criteria = None

1197

1198 if criteria is None:

1199 criteria = criteria_scaffold.generate_deterministic_criteria(

1200 directive,

1201 allowlist=list(allowlist) or None,

1202 max_criteria=max_criteria,

1203 )

1204

1205 payload = json.dumps(criteria, indent=2, sort_keys=False)

1206

1207 if output_file:

1208 Path(output_file).write_text(payload + "\n", encoding="utf-8")

1209 # Echo a structured summary on the chosen format so the operator

1210 # can see what was written without re-reading the file.

1211 if output == "table": 1211 ↛ 1212line 1211 didn't jump to line 1212 because the condition on line 1211 was never true

1212 for c in criteria:

1213 click.echo(

1214 f" {c.get('criterion_id'):<32} "

1215 f"kind={c.get('kind'):<16} required={c.get('required')}"

1216 )

1217 click.echo(f" written to {output_file}")

1218 else:

1219 _emit_json(

1220 {

1221 "output_file": output_file,

1222 "criteria_count": len(criteria),

1223 "sampling_path": sampling_path_taken,

1224 }

1225 )

1226 return

1227

1228 # No --output-file: write JSON to stdout.

1229 if output == "table":

1230 for c in criteria:

1231 click.echo(

1232 f" {c.get('criterion_id'):<32} "

1233 f"kind={c.get('kind'):<16} required={c.get('required')}"

1234 )

1235 return

1236 click.echo(payload)

1237

1238

1239# ---------------------------------------------------------------------------

1240# run — chain scaffold + start + iterate-to-completion in one call

1241# ---------------------------------------------------------------------------

1242

1243

1244@mission_cmd.command("run")

1245@click.option(

1246 "--directive",

1247 required=True,

1248 help="Natural-language goal description.",

1249)

1250@click.option(

1251 "--tool-allowlist",

1252 multiple=True,

1253 help="Tool name to allowlist; pass multiple times. Optional with --allow-all-tools.",

1254)

1255@click.option(

1256 "--allow-all-tools",

1257 is_flag=True,

1258 help=(

1259 "Resolve the session's tool allowlist to every registered MCP tool "

1260 "(minus the mission_* control tools). Makes --tool-allowlist optional; "

1261 "mutually exclusive with it."

1262 ),

1263)

1264@click.option(

1265 "--max-iterations",

1266 type=int,

1267 default=5,

1268 show_default=True,

1269 help="Hard cap on the iteration count. Pass -1 to opt out (uncapped).",

1270)

1271@click.option(

1272 "--max-wall-clock",

1273 type=int,

1274 default=300,

1275 show_default=True,

1276 help="Hard cap on wall-clock seconds. Pass -1 to opt out (uncapped).",

1277)

1278@click.option(

1279 "--max-criteria",

1280 type=int,

1281 default=5,

1282 show_default=True,

1283 help="Cap on the number of criterion entries scaffolded.",

1284)

1285@click.option(

1286 "--retries",

1287 type=int,

1288 default=3,

1289 show_default=True,

1290 help="Sampling-path retry budget on validator rejections during scaffolding.",

1291)

1292@click.option(

1293 "--use-sampling/--no-sampling",

1294 "use_sampling",

1295 default=None,

1296 help=(

1297 "Force the sampling path on/off for both the scaffolder and "

1298 "the loop's Strategy_Revision sampler. Default auto-detects: "

1299 "MCP host capability, then Bedrock credentials, then deterministic."

1300 ),

1301)

1302@click.option(

1303 "--bedrock-model-id",

1304 default=None,

1305 help="Override the Bedrock model id used by the CLI sampling backend.",

1306)

1307@click.option(

1308 "--allow-scripted-strategies",

1309 is_flag=True,

1310 help="Allow scripted strategies to run via the Mission sandbox.",

1311)

1312@click.option(

1313 "--save-criteria",

1314 "save_criteria",

1315 type=click.Path(dir_okay=False),

1316 default=None,

1317 help="Optional path to also persist the scaffolded criteria JSON to disk.",

1318)

1319@click.option(

1320 "--stagnation-threshold",

1321 type=int,

1322 default=3,

1323 show_default=True,

1324 help="Iterations of no progress before terminate.",

1325)

1326@click.option(

1327 "--cadence",

1328 type=click.Choice(["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]),

1329 default="every_iteration",

1330 show_default=True,

1331 help="Checkpoint cadence kind.",

1332)

1333@click.option(

1334 "--dry-run",

1335 "dry_run",

1336 is_flag=True,

1337 help=(

1338 "Use a stub tool dispatcher and disable Strategy_Revision sampling "

1339 "during iteration. The criteria scaffolder still runs through "

1340 "Bedrock when sampling is enabled. Useful for smoke-testing the "

1341 "loop without spending live tool credits."

1342 ),

1343)

1344def mission_run_cmd(

1345 directive: str,

1346 tool_allowlist: tuple[str, ...],

1347 allow_all_tools: bool,

1348 max_iterations: int,

1349 max_wall_clock: int,

1350 max_criteria: int,

1351 retries: int,

1352 use_sampling: bool | None,

1353 bedrock_model_id: str | None,

1354 allow_scripted_strategies: bool,

1355 save_criteria: str | None,

1356 stagnation_threshold: int,

1357 cadence: str,

1358 dry_run: bool,

1359) -> None:

1360 """Scaffold criteria and run a Mission session to completion in one call.

1361

1362 The chained shorthand for the most common Mission invocation: turn

1363 a natural-language directive into a criteria file via

1364 ``scaffold-criteria`` (sampling path with deterministic fallback),

1365 persist a new session with ``start``'s validators, then drive it

1366 through ``run-to-completion`` with the same per-call verdict

1367 streaming as ``mission start --run``.

1368

1369 Per-iteration verdict updates land on stderr as JSON lines; the

1370 Final_Report (or persisted session JSON when no Final_Report file

1371 was written) lands on stdout when the loop terminates.

1372

1373 With ``--save-criteria PATH``, the scaffolded criteria JSON is

1374 also written to ``PATH`` so the operator can inspect / re-use it

1375 without re-running the scaffold step.

1376 """

1377 import mission.criteria_scaffold as criteria_scaffold # noqa: PLC0415 — lazy

1378 from mission import (

1379 sampling as mission_sampling,

1380 )

1381 from mission import (

1382 state as mission_state,

1383 )

1384 from mission import (

1385 validation as mission_validation,

1386 )

1387 from mission.types import SCHEMA_VERSION

1388 from mission.validation import MissionValidationError

1389

1390 if max_criteria < 1:

1391 _emit_error(

1392 "validation_error",

1393 {"field": "max-criteria", "reason": "must_be_positive_int"},

1394 )

1395 sys.exit(1)

1396 if retries < 0:

1397 _emit_error(

1398 "validation_error",

1399 {"field": "retries", "reason": "must_be_non_negative_int"},

1400 )

1401 sys.exit(1)

1402

1403 # Resolve the effective allowlist up front, before scaffolding or any

1404 # persistence. A mutual-exclusivity or empty-registry rejection exits here

1405 # with no sampling spend, no criteria file write, and no state write. The

1406 # scaffolder below still consults the explicit ``tool_allowlist`` (empty

1407 # under --allow-all-tools, which routes it to the directive-only

1408 # deterministic path); ``allowlist_resolved`` fills the persisted session.

1409 allowlist_resolved = _resolve_cli_allowlist(

1410 allow_all_tools=allow_all_tools, tool_allowlist=tool_allowlist

1411 )

1412

1413 # ---- Step 1: scaffold criteria. -------------------------------------

1414 # Resolve the sampling state once; reuse it for both the scaffold

1415 # call and the persisted session's ``use_sampling`` field so the

1416 # operator's --use-sampling/--no-sampling intent applies end-to-end.

1417 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state(

1418 None, use_sampling

1419 )

1420

1421 criteria: list[dict[str, Any]] | None = None

1422 sampling_path_taken = False

1423 if use_sampling_resolved and backend_resolved != "none":

1424 backend_obj = mission_sampling.select_sampling_backend(

1425 None,

1426 model_id=bedrock_model_id,

1427 prefs=None,

1428 )

1429 if backend_obj is not None: 1429 ↛ 1453line 1429 didn't jump to line 1453 because the condition on line 1429 was always true

1430 try:

1431 criteria = asyncio.run(

1432 criteria_scaffold.generate_sampled_criteria(

1433 backend_obj,

1434 directive,

1435 allowlist=list(tool_allowlist),

1436 max_criteria=max_criteria,

1437 retries=retries,

1438 )

1439 )

1440 sampling_path_taken = True

1441 except BedrockFTUFormNotAcceptedError as exc:

1442 # Not a fallback case: the account cannot invoke any Anthropic

1443 # model until the one-time form is submitted, so report it.

1444 raise click.ClickException(str(exc)) from exc

1445 except criteria_scaffold.ScaffoldSamplingError as exc:

1446 click.echo(

1447 f"sampling path failed ({exc.last_reason}); "

1448 "falling back to deterministic templates.",

1449 err=True,

1450 )

1451 criteria = None

1452

1453 if criteria is None:

1454 criteria = criteria_scaffold.generate_deterministic_criteria(

1455 directive,

1456 allowlist=list(tool_allowlist) or None,

1457 max_criteria=max_criteria,

1458 )

1459

1460 if save_criteria:

1461 Path(save_criteria).write_text(

1462 json.dumps(criteria, indent=2, sort_keys=False) + "\n",

1463 encoding="utf-8",

1464 )

1465

1466 # ---- Step 2: validate everything and persist the session. -----------

1467 budget: dict[str, Any] = {

1468 "max_iterations": max_iterations,

1469 "max_wall_clock_seconds": max_wall_clock,

1470 }

1471 cadence_dict: dict[str, Any] = {"kind": cadence}

1472

1473 try:

1474 directive_clean = mission_validation.validate_directive(directive)

1475 criteria_clean = mission_validation.validate_criteria(criteria)

1476 budget_clean = mission_validation.validate_budget(budget, allowlist_resolved, {})

1477 cadence_clean = mission_validation.validate_cadence(cadence_dict)

1478 except MissionValidationError as exc:

1479 _emit_error(exc.code, exc.details)

1480 sys.exit(1)

1481

1482 if not isinstance(stagnation_threshold, int) or stagnation_threshold <= 0: 1482 ↛ 1483line 1482 didn't jump to line 1483 because the condition on line 1482 was never true

1483 _emit_error(

1484 "validation_error",

1485 {"field": "stagnation-threshold", "reason": "must_be_positive_int"},

1486 )

1487 sys.exit(1)

1488

1489 session_id = f"mission-{secrets.token_hex(8)}"

1490 now_iso = datetime.now(UTC).isoformat()

1491 session: dict[str, Any] = {

1492 "version": SCHEMA_VERSION,

1493 "session_id": session_id,

1494 "directive_text": directive_clean,

1495 "criteria": criteria_clean,

1496 "budget": budget_clean,

1497 "tool_allowlist": allowlist_resolved,

1498 "checkpoint_cadence": cadence_clean,

1499 "stagnation_threshold": stagnation_threshold,

1500 "use_sampling": use_sampling_resolved,

1501 "sampling_backend_resolved": backend_resolved,

1502 "allow_scripted_strategies": bool(allow_scripted_strategies),

1503 "status": "pending",

1504 "created_at": now_iso,

1505 "iterations": [],

1506 "no_progress_counter": 0,

1507 }

1508 if bedrock_model_id: 1508 ↛ 1509line 1508 didn't jump to line 1509 because the condition on line 1508 was never true

1509 session["bedrock_model_id"] = bedrock_model_id

1510

1511 backend = mission_state.get_backend()

1512 backend.save_session(cast("SessionState", _strip_private_criteria(session)))

1513

1514 # Emit a one-line scaffold summary to stderr so the operator can see

1515 # what shape the criteria landed in before the loop starts. Stdout is

1516 # reserved for the Final_Report at the end.

1517 _emit_json(

1518 {

1519 "event": "mission.run.scaffolded",

1520 "session_id": session_id,

1521 "criteria_count": len(criteria),

1522 "sampling_path": sampling_path_taken,

1523 "sampling_backend_resolved": backend_resolved,

1524 },

1525 err=True,

1526 )

1527

1528 # ---- Step 3: iterate to completion. ---------------------------------

1529 _run_to_completion(session_id, dry_run=dry_run)

Coverage for cli/commands/mission_cmd.py: 89.33%

508 statements