Coverage for cli/commands/mission_cmd.py: 88%

502 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Mission goal-directed iteration loop CLI commands. 

2 

3The whole subcommand group is gated by ``GCO_ENABLE_MISSION``: when 

4the env var is unset, the group prints a one-line hint and exits with 

5code 2 before dispatching to any subcommand. With the flag set, the 

6nine subcommands talk directly to the persistence backend and the 

7:class:`mission.engine.MissionEngine` — no MCP round-trip is involved 

8so the CLI works without the MCP server running. 

9 

10Subcommands: 

11 

12* ``start`` — validate inputs, resolve sampling state, persist a new 

13 ``SessionState``. With ``--run``, iterate to completion synchronously. 

14* ``status`` — read the full session JSON. 

15* ``iterate`` — drive one or more iterations of an existing session. 

16* ``checkpoint`` — re-run the verdict cascade on the latest iteration. 

17* ``complete`` — force a session into ``completed``. 

18* ``abort`` — pause or terminate a session. 

19* ``resume`` — transition ``paused`` to ``running``. 

20* ``history`` — return the iteration history (full or summary). 

21* ``list`` — list sessions across the configured backend. 

22 

23Output formats: every subcommand defaults to ``--output json``; pass 

24``--output table`` for a human-readable summary. 

25""" 

26 

27from __future__ import annotations 

28 

29import asyncio 

30import json 

31import os 

32import secrets 

33import sys 

34from collections.abc import Mapping 

35from datetime import UTC, datetime 

36from pathlib import Path 

37from typing import TYPE_CHECKING, Any, cast 

38 

39import click 

40 

41# The Mission package lives under ``mcp/mission/`` and is imported as 

42# ``mission.*``. Match the path-injection pattern used throughout the 

43# MCP module surface and the ``test_mission_*`` test files so the 

44# imports below resolve regardless of how this module is loaded. 

45sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "mcp")) 

46 

47 

48if TYPE_CHECKING: # pragma: no cover - import only for type checkers 

49 from mission.types import SessionState 

50 

51 

52_FEATURE_FLAG_HINT = ( 

53 "Mission tools are gated. Set GCO_ENABLE_MISSION=true (or GCO_ENABLE_ALL_TOOLS=true) to enable." 

54) 

55 

56 

57def _flag_enabled() -> bool: 

58 """Return True iff ``GCO_ENABLE_MISSION`` (or umbrella) is truthy.""" 

59 truthy = {"true", "1", "yes", "on"} 

60 return ( 

61 os.environ.get("GCO_ENABLE_MISSION", "").strip().lower() in truthy 

62 or os.environ.get("GCO_ENABLE_ALL_TOOLS", "").strip().lower() in truthy 

63 ) 

64 

65 

66def _check_feature_flag() -> None: 

67 """Print the hint and exit with code 2 when the gating flag is unset.""" 

68 if not _flag_enabled(): 

69 click.echo(_FEATURE_FLAG_HINT, err=True) 

70 raise SystemExit(2) 

71 

72 

73# --------------------------------------------------------------------------- 

74# Output helpers 

75# --------------------------------------------------------------------------- 

76 

77 

78def _strip_private_criteria(session: Mapping[str, Any]) -> dict[str, Any]: 

79 """Return a JSON-safe copy of ``session`` with private criterion keys dropped. 

80 

81 Thin alias over :func:`mission.validation.strip_private_fields` — 

82 the canonical implementation lives next to ``validate_criteria`` 

83 (which creates the ``_parsed_ast`` keys). Kept under the older 

84 ``_strip_private_criteria`` name so the call sites in this file 

85 don't churn while the underlying logic is consolidated. 

86 """ 

87 from mission.validation import strip_private_fields # noqa: PLC0415 

88 

89 return strip_private_fields(session) 

90 

91 

92def _strip_iteration(iteration: Any) -> Any: 

93 """Strip private keys from an iteration's ``criteria_evaluation`` shape. 

94 

95 Thin alias over the iteration variant of the canonical helper. 

96 Returns non-dict input verbatim so a corrupt history entry stays 

97 observable to the caller. 

98 """ 

99 if not isinstance(iteration, Mapping): 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true

100 return iteration 

101 from mission.validation import strip_private_fields_iterations # noqa: PLC0415 

102 

103 return strip_private_fields_iterations([iteration])[0] 

104 

105 

106def _emit_json(payload: Any, *, err: bool = False) -> None: 

107 """Emit ``payload`` as a single JSON line. 

108 

109 ``default=str`` keeps any straggling datetime / Path objects from 

110 raising — the engine's persisted shapes are already pure JSON, but 

111 a CLI command may surface a partially-built dict (e.g., the start 

112 summary before save) and we want every output path to succeed. 

113 """ 

114 click.echo(json.dumps(payload, default=str), err=err) 

115 

116 

117def _emit_error(code: str, details: dict[str, Any] | None = None) -> None: 

118 """Emit a structured error envelope to stderr.""" 

119 payload: dict[str, Any] = {"code": code} 

120 if details is not None: 120 ↛ 122line 120 didn't jump to line 122 because the condition on line 120 was always true

121 payload["details"] = details 

122 _emit_json(payload, err=True) 

123 

124 

125# --------------------------------------------------------------------------- 

126# Stub dispatcher 

127# --------------------------------------------------------------------------- 

128 

129 

130def _make_stub_dispatcher() -> Any: 

131 """Return a tool dispatcher that returns canned responses. 

132 

133 Thin wrapper around :func:`mcp.mission._engine_factory.make_stub_dispatcher` 

134 kept for backward compat with the small set of tests that import 

135 this name directly. Production paths now go through 

136 :func:`_build_engine` which decides between the live FastMCP 

137 dispatcher and this stub based on ``--dry-run`` opt-in. 

138 """ 

139 from mission._engine_factory import make_stub_dispatcher # noqa: PLC0415 

140 

141 return make_stub_dispatcher() 

142 

143 

144# --------------------------------------------------------------------------- 

145# Click group 

146# --------------------------------------------------------------------------- 

147 

148 

149@click.group("mission") 

150def mission_cmd() -> None: 

151 """Mission goal-directed iteration loop commands. 

152 

153 Subcommands manage Mission sessions: ``start``, ``status``, 

154 ``iterate``, ``checkpoint``, ``complete``, ``abort``, ``resume``, 

155 ``history``, ``list``. 

156 

157 Gated by the ``GCO_ENABLE_MISSION`` environment variable. With 

158 the flag unset, every subcommand prints a one-line hint to stderr 

159 and exits with code 2. 

160 """ 

161 _check_feature_flag() 

162 

163 

164# --------------------------------------------------------------------------- 

165# start 

166# --------------------------------------------------------------------------- 

167 

168 

169@mission_cmd.command("start") 

170@click.option("--directive", required=True, help="Natural-language goal description.") 

171@click.option( 

172 "--criteria-file", 

173 type=click.Path(exists=True, dir_okay=False), 

174 default=None, 

175 help="JSON file containing the criteria list. Required unless --with-defaults is set.", 

176) 

177@click.option( 

178 "--max-iterations", 

179 type=int, 

180 required=True, 

181 help="Hard cap on the iteration count. Pass -1 to opt out (uncapped).", 

182) 

183@click.option( 

184 "--max-wall-clock", 

185 type=int, 

186 required=True, 

187 help="Hard cap on wall-clock seconds. Pass -1 to opt out (uncapped).", 

188) 

189@click.option( 

190 "--tool-allowlist", 

191 multiple=True, 

192 help="Tool name to allowlist; pass multiple times. Optional with --allow-all-tools.", 

193) 

194@click.option( 

195 "--allow-all-tools", 

196 is_flag=True, 

197 help=( 

198 "Resolve the session's tool allowlist to every registered MCP tool " 

199 "(minus the mission_* control tools). Makes --tool-allowlist optional; " 

200 "mutually exclusive with it." 

201 ), 

202) 

203@click.option( 

204 "--cadence", 

205 type=click.Choice(["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]), 

206 default="every_iteration", 

207 show_default=True, 

208 help="Checkpoint cadence kind.", 

209) 

210@click.option("--cadence-n", type=int, default=None, help="Cadence n parameter.") 

211@click.option( 

212 "--cadence-t", 

213 type=int, 

214 default=None, 

215 help="Cadence t parameter (seconds).", 

216) 

217@click.option( 

218 "--cadence-event", 

219 default=None, 

220 help="Cadence event_name parameter.", 

221) 

222@click.option( 

223 "--stagnation-threshold", 

224 type=int, 

225 default=3, 

226 show_default=True, 

227 help="Iterations of no progress before terminate.", 

228) 

229@click.option( 

230 "--use-sampling/--no-sampling", 

231 "use_sampling", 

232 default=None, 

233 help="Enable/disable LLM sampling (default: auto-detect).", 

234) 

235@click.option( 

236 "--bedrock-model-id", 

237 default=None, 

238 help="Override the Bedrock model id used by the CLI sampling backend.", 

239) 

240@click.option( 

241 "--allow-scripted-strategies", 

242 is_flag=True, 

243 help="Allow scripted strategies to run via the Mission sandbox.", 

244) 

245@click.option( 

246 "--with-defaults", 

247 is_flag=True, 

248 help="Use a basic placeholder predicate criterion when no --criteria-file is provided.", 

249) 

250@click.option( 

251 "--run", 

252 "run_mode", 

253 is_flag=True, 

254 help="Iterate to completion synchronously after creating the session.", 

255) 

256@click.option( 

257 "--dry-run", 

258 "dry_run", 

259 is_flag=True, 

260 help=( 

261 "Use a stub tool dispatcher and disable Strategy_Revision sampling " 

262 "during iteration. Useful for smoke-testing the loop bookkeeping " 

263 "without spending Bedrock or AWS credits. Only meaningful with --run." 

264 ), 

265) 

266@click.option( 

267 "--output", 

268 type=click.Choice(["json", "table"]), 

269 default="json", 

270 show_default=True, 

271 help="Output format.", 

272) 

273def mission_start( 

274 directive: str, 

275 criteria_file: str | None, 

276 max_iterations: int, 

277 max_wall_clock: int, 

278 tool_allowlist: tuple[str, ...], 

279 allow_all_tools: bool, 

280 cadence: str, 

281 cadence_n: int | None, 

282 cadence_t: int | None, 

283 cadence_event: str | None, 

284 stagnation_threshold: int, 

285 use_sampling: bool | None, 

286 bedrock_model_id: str | None, 

287 allow_scripted_strategies: bool, 

288 with_defaults: bool, 

289 run_mode: bool, 

290 dry_run: bool, 

291 output: str, 

292) -> None: 

293 """Start a new Mission session. 

294 

295 Validates inputs through the shared validators in 

296 ``mission.validation``, resolves the sampling state via 

297 ``mission.sampling.resolve_sampling_state``, and persists the 

298 session through the configured backend (``GCO_MISSION_STATE_BACKEND``, 

299 defaults to filesystem under ``~/.gco/missions``). 

300 

301 With ``--run``, iterates to completion synchronously: each verdict 

302 is printed as one JSON line to stderr; the final stdout is the 

303 Final_Report JSON. 

304 """ 

305 from mission import ( # noqa: PLC0415 — lazy: avoids cost when help-only 

306 sampling as mission_sampling, 

307 ) 

308 from mission import ( 

309 state as mission_state, 

310 ) 

311 from mission import ( 

312 validation as mission_validation, 

313 ) 

314 from mission.types import SCHEMA_VERSION 

315 from mission.validation import MissionValidationError 

316 

317 # Build the criteria list from the file or the placeholder default. 

318 criteria: list[dict[str, Any]] 

319 if criteria_file: 

320 try: 

321 with open(criteria_file, encoding="utf-8") as fp: 

322 criteria = json.load(fp) 

323 except (OSError, ValueError) as exc: 

324 _emit_error( 

325 "validation_error", 

326 {"field": "criteria-file", "reason": str(exc)}, 

327 ) 

328 sys.exit(1) 

329 elif with_defaults: 

330 criteria = [ 

331 { 

332 "criterion_id": "default", 

333 "kind": "predicate", 

334 "required": True, 

335 "expression": "True", 

336 } 

337 ] 

338 else: 

339 _emit_error( 

340 "validation_error", 

341 { 

342 "field": "criteria", 

343 "reason": "either --criteria-file or --with-defaults is required", 

344 }, 

345 ) 

346 sys.exit(1) 

347 

348 # Build the budget dict. 

349 budget: dict[str, Any] = { 

350 "max_iterations": max_iterations, 

351 "max_wall_clock_seconds": max_wall_clock, 

352 } 

353 

354 # Build the cadence dict. 

355 cadence_dict: dict[str, Any] = {"kind": cadence} 

356 if cadence_n is not None: 356 ↛ 357line 356 didn't jump to line 357 because the condition on line 356 was never true

357 cadence_dict["n"] = cadence_n 

358 if cadence_t is not None: 358 ↛ 359line 358 didn't jump to line 359 because the condition on line 358 was never true

359 cadence_dict["t"] = cadence_t 

360 if cadence_event is not None: 360 ↛ 361line 360 didn't jump to line 361 because the condition on line 360 was never true

361 cadence_dict["event_name"] = cadence_event 

362 

363 # Resolve the effective allowlist before any persistence. The explicit 

364 # path keeps the thin CLI behaviour (no live-registry per-name check); the 

365 # all-tools path resolves from the on-demand registry. A rejection here 

366 # emits a structured envelope and exits before any session is built. 

367 allowlist_resolved = _resolve_cli_allowlist( 

368 allow_all_tools=allow_all_tools, tool_allowlist=tool_allowlist 

369 ) 

370 

371 # Validate inputs. The CLI has no live FastMCP tool registry on the 

372 # explicit path, so the tool-allowlist validator is skipped and the 

373 # budget validator gets an empty tag map — meaning a CLI-started session 

374 # with a cost-incurring tool will only be caught at iterate time when the 

375 # engine routes through the real tool dispatcher. The MCP tool surface 

376 # performs the full validation; the CLI is intentionally a thin 

377 # smoke-test path. 

378 try: 

379 directive_clean = mission_validation.validate_directive(directive) 

380 criteria_clean = mission_validation.validate_criteria(criteria) 

381 budget_clean = mission_validation.validate_budget(budget, allowlist_resolved, {}) 

382 cadence_clean = mission_validation.validate_cadence(cadence_dict) 

383 except MissionValidationError as exc: 

384 _emit_error(exc.code, exc.details) 

385 sys.exit(1) 

386 

387 if not isinstance(stagnation_threshold, int) or stagnation_threshold <= 0: 

388 _emit_error( 

389 "validation_error", 

390 {"field": "stagnation-threshold", "reason": "must_be_positive_int"}, 

391 ) 

392 sys.exit(1) 

393 

394 # Resolve sampling state. ``ctx=None`` because this is the CLI path; 

395 # the helper's third precedence branch then probes local AWS 

396 # credentials and returns ``("bedrock", True)`` when they resolve. 

397 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state( 

398 None, use_sampling 

399 ) 

400 

401 session_id = f"mission-{secrets.token_hex(8)}" 

402 now_iso = datetime.now(UTC).isoformat() 

403 session: dict[str, Any] = { 

404 "version": SCHEMA_VERSION, 

405 "session_id": session_id, 

406 "directive_text": directive_clean, 

407 "criteria": criteria_clean, 

408 "budget": budget_clean, 

409 "tool_allowlist": allowlist_resolved, 

410 "checkpoint_cadence": cadence_clean, 

411 "stagnation_threshold": stagnation_threshold, 

412 "use_sampling": use_sampling_resolved, 

413 "sampling_backend_resolved": backend_resolved, 

414 "allow_scripted_strategies": bool(allow_scripted_strategies), 

415 "status": "pending", 

416 "created_at": now_iso, 

417 "iterations": [], 

418 "no_progress_counter": 0, 

419 } 

420 if bedrock_model_id: 420 ↛ 421line 420 didn't jump to line 421 because the condition on line 420 was never true

421 session["bedrock_model_id"] = bedrock_model_id 

422 

423 backend = mission_state.get_backend() 

424 

425 # ``save_session`` will not accept the cached ``_parsed_ast`` AST on 

426 # predicate criteria when the backend is the filesystem JSON writer. 

427 # Strip them just before persistence; the validators left them on 

428 # the in-memory copy so the engine can use them at iterate time — 

429 # we'll re-validate when iterate next runs against the loaded 

430 # session. 

431 backend.save_session(cast("SessionState", _strip_private_criteria(session))) 

432 

433 summary = { 

434 "session_id": session_id, 

435 "status": "pending", 

436 "use_sampling": use_sampling_resolved, 

437 "sampling_backend_resolved": backend_resolved, 

438 } 

439 

440 if not run_mode: 

441 if output == "table": 

442 click.echo(f"Session ID: {session_id}") 

443 click.echo("Status: pending") 

444 click.echo( 

445 f"Sampling: {'on' if use_sampling_resolved else 'off'} ({backend_resolved})" 

446 ) 

447 else: 

448 _emit_json(summary) 

449 return 

450 

451 # --run mode: iterate to completion. 

452 _run_to_completion(session_id, dry_run=dry_run) 

453 

454 

455def _run_to_completion(session_id: str, *, dry_run: bool = False) -> None: 

456 """Drive ``session_id`` through iterations until terminal verdict. 

457 

458 When ``dry_run`` is False (the default), wires the live FastMCP 

459 dispatcher and the Strategy_Revision sampling callable through 

460 :func:`mcp.mission._engine_factory.build_mission_engine` so the 

461 loop can actually iterate against real tools and let the model 

462 revise the strategy between iterations. When ``dry_run`` is True, 

463 falls back to the canned-stub dispatcher and disables sampling so 

464 the CLI can smoke-test the loop bookkeeping without spending 

465 Bedrock or AWS credits. 

466 

467 Writes one JSON line per iteration's verdict to stderr; the final 

468 stdout is the Final_Report JSON when present, falling back to the 

469 persisted session JSON otherwise. 

470 """ 

471 from mission import state as mission_state # noqa: PLC0415 

472 from mission._engine_factory import build_mission_engine # noqa: PLC0415 

473 from mission.engine import MissionEngineError # noqa: PLC0415 

474 from mission.state import FilesystemBackend # noqa: PLC0415 

475 

476 backend = mission_state.get_backend() 

477 session_for_runner = backend.load_session(session_id) 

478 if session_for_runner is None: 478 ↛ 479line 478 didn't jump to line 479 because the condition on line 478 was never true

479 _emit_error("session_not_found", {"session_id": session_id}) 

480 sys.exit(1) 

481 

482 # Populate the FastMCP tool registry so the live dispatcher can 

483 # find the operator-allowlisted tools. Safe to call repeatedly — 

484 # ``register_all_tools`` is idempotent (FastMCP rejects duplicate 

485 # registrations after the first call). Skipped on the dry-run path 

486 # because the stub dispatcher never consults the registry. 

487 if not dry_run: 

488 _ensure_tool_registry() 

489 

490 async def _drive() -> None: 

491 engine = await build_mission_engine( 

492 session_for_runner, ctx=None, use_stub_dispatcher=dry_run 

493 ) 

494 while True: 

495 try: 

496 record = await engine.run_iteration(session_id, ctx=None) 

497 except MissionEngineError as exc: 

498 _emit_error(exc.code, {"session_id": session_id}) 

499 sys.exit(1) 

500 _emit_json( 

501 { 

502 "iteration_index": record["iteration_index"], 

503 "verdict": record["verdict"], 

504 "verdict_reason": record["verdict_reason"], 

505 }, 

506 err=True, 

507 ) 

508 if record["verdict"] in ("complete", "terminate"): 508 ↛ 494line 508 didn't jump to line 494 because the condition on line 508 was always true

509 break 

510 

511 asyncio.run(_drive()) 

512 

513 # Emit the final report when the filesystem backend wrote one; 

514 # fall back to the persisted session for other backends. 

515 session = backend.load_session(session_id) 

516 if isinstance(backend, FilesystemBackend): 516 ↛ 521line 516 didn't jump to line 521 because the condition on line 516 was always true

517 report_path = backend.root / f"{session_id}.report.json" 

518 if report_path.exists(): 518 ↛ 521line 518 didn't jump to line 521 because the condition on line 518 was always true

519 click.echo(report_path.read_text(encoding="utf-8")) 

520 return 

521 if session is not None: 

522 _emit_json(_strip_private_criteria(session)) 

523 else: 

524 _emit_error("session_disappeared", {"session_id": session_id}) 

525 sys.exit(1) 

526 

527 

528def _ensure_tool_registry() -> None: 

529 """Register every MCP tool against the shared FastMCP server, once. 

530 

531 The CLI doesn't normally boot the MCP server, so its FastMCP 

532 instance starts empty. The live tool dispatcher in the engine 

533 factory looks up tools on that instance, so we eagerly register 

534 every tool group up-front when the live path is selected. The 

535 underlying ``register_all_tools`` is import-time side-effects on 

536 module load; calling it twice is harmless because the per-module 

537 decorators only fire on the first import. 

538 """ 

539 sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "mcp")) 

540 from tools import register_all_tools # noqa: PLC0415 

541 

542 register_all_tools() 

543 

544 

545def _resolve_registered_tools_for_cli() -> tuple[dict[str, Any], set[str]]: 

546 """Register every MCP tool on demand and snapshot the live registry. 

547 

548 Returns a ``(name -> Tool, control-tool names)`` pair. The control set is 

549 derived from the ``"mission"`` tag, so it auto-adapts if a tenth 

550 session-management tool is ever added. Calls the idempotent 

551 :func:`_ensure_tool_registry` first, then lists tools through 

552 ``mcp._list_tools()`` — the same low-level path the engine factory uses. 

553 Returns ``({}, set())`` only when the registry genuinely holds no tools, 

554 which the resolver then rejects as ``allow_all_tools_empty_registry``. 

555 """ 

556 _ensure_tool_registry() 

557 from server import mcp # noqa: PLC0415 — lazy 

558 

559 async def _list() -> list[Any]: 

560 return list(await mcp._list_tools()) 

561 

562 tools = asyncio.run(_list()) 

563 registered = {t.name: t for t in tools} 

564 control = {t.name for t in tools if "mission" in (getattr(t, "tags", None) or set())} 

565 return registered, control 

566 

567 

568def _resolve_cli_allowlist(*, allow_all_tools: bool, tool_allowlist: tuple[str, ...]) -> list[str]: 

569 """Resolve a subcommand's effective tool allowlist or exit with code 1. 

570 

571 The all-tools branch populates the registry on demand and resolves the 

572 effective list from it. The explicit branch preserves the thin CLI path 

573 (no per-name registry check) but enforces at-least-one, emitting the 

574 existing ``empty`` rejection when no name is supplied. On any 

575 :class:`MissionValidationError` the structured envelope is emitted and the 

576 process exits 1 — before the caller builds or persists a session. 

577 """ 

578 from mission import validation as mission_validation # noqa: PLC0415 

579 from mission.validation import MissionValidationError # noqa: PLC0415 

580 

581 if allow_all_tools: 

582 registered_tools, control_tools = _resolve_registered_tools_for_cli() 

583 try: 

584 resolved: list[str] = mission_validation.resolve_effective_allowlist( 

585 allow_all_tools=True, 

586 explicit_allowlist=list(tool_allowlist), 

587 registered_tools=registered_tools, 

588 control_tools=control_tools, 

589 ) 

590 except MissionValidationError as exc: 

591 _emit_error(exc.code, exc.details) 

592 sys.exit(1) 

593 return resolved 

594 if not tool_allowlist: 

595 _emit_error("validation_error", {"field": "tool_allowlist", "reason": "empty"}) 

596 sys.exit(1) 

597 return list(tool_allowlist) 

598 

599 

600# --------------------------------------------------------------------------- 

601# status 

602# --------------------------------------------------------------------------- 

603 

604 

605@mission_cmd.command("status") 

606@click.argument("session_id") 

607@click.option( 

608 "--output", 

609 type=click.Choice(["json", "table"]), 

610 default="json", 

611 show_default=True, 

612) 

613def mission_status_cmd(session_id: str, output: str) -> None: 

614 """Get the full state of a Mission session.""" 

615 from mission.state import get_backend # noqa: PLC0415 

616 

617 backend = get_backend() 

618 session = backend.load_session(session_id) 

619 if session is None: 

620 _emit_error("session_not_found", {"session_id": session_id}) 

621 sys.exit(1) 

622 cleaned = _strip_private_criteria(session) 

623 if output == "table": 623 ↛ 635line 623 didn't jump to line 635 because the condition on line 623 was always true

624 click.echo(f"Session ID: {cleaned.get('session_id', '')}") 

625 click.echo(f"Status: {cleaned.get('status', '')}") 

626 click.echo(f"Directive: {cleaned.get('directive_text', '')}") 

627 click.echo(f"Iterations: {len(cleaned.get('iterations', []) or [])}") 

628 allowlist = cleaned.get("tool_allowlist", []) or [] 

629 click.echo(f"Allowlist: {', '.join(allowlist)}") 

630 click.echo( 

631 f"Sampling: {'on' if cleaned.get('use_sampling') else 'off'} " 

632 f"({cleaned.get('sampling_backend_resolved', 'none')})" 

633 ) 

634 else: 

635 _emit_json(cleaned) 

636 

637 

638# --------------------------------------------------------------------------- 

639# iterate 

640# --------------------------------------------------------------------------- 

641 

642 

643@mission_cmd.command("iterate") 

644@click.argument("session_id") 

645@click.option( 

646 "--max-iterations", 

647 type=int, 

648 default=1, 

649 show_default=True, 

650 help="How many iterations to run in this call.", 

651) 

652@click.option( 

653 "--dry-run", 

654 "dry_run", 

655 is_flag=True, 

656 help=( 

657 "Use a stub tool dispatcher and disable Strategy_Revision sampling. " 

658 "Useful for smoke-testing the loop without spending Bedrock or AWS credits." 

659 ), 

660) 

661@click.option( 

662 "--output", 

663 type=click.Choice(["json", "table"]), 

664 default="json", 

665 show_default=True, 

666) 

667def mission_iterate_cmd(session_id: str, max_iterations: int, dry_run: bool, output: str) -> None: 

668 """Run one or more iterations on a Mission session. 

669 

670 Stops early on a terminal verdict. By default the engine is wired 

671 with the live FastMCP tool dispatcher and the Strategy_Revision 

672 sampling callable so the loop iterates against real tool results 

673 and lets the model revise the strategy between iterations. 

674 

675 Pass ``--dry-run`` to substitute the canned-stub dispatcher and 

676 disable sampling — useful for smoke-testing the bookkeeping 

677 without spending Bedrock or AWS credits. 

678 """ 

679 from mission._engine_factory import build_mission_engine # noqa: PLC0415 

680 from mission.engine import MissionEngineError # noqa: PLC0415 

681 from mission.state import get_backend # noqa: PLC0415 

682 

683 if max_iterations <= 0: 

684 # This is the per-call iteration count (how many iterations to 

685 # run THIS call), NOT the session-wide ``budget.max_iterations`` 

686 # cap. The budget cap accepts ``-1`` as the "uncapped" sentinel; 

687 # this per-call count must always be a positive int because a 

688 # zero or negative value here would be a no-op invocation. 

689 _emit_error( 

690 "validation_error", 

691 {"field": "max-iterations", "reason": "must_be_positive_int"}, 

692 ) 

693 sys.exit(1) 

694 

695 backend = get_backend() 

696 session_for_runner = backend.load_session(session_id) 

697 if session_for_runner is None: 

698 _emit_error("session_not_found", {"session_id": session_id}) 

699 sys.exit(1) 

700 

701 if not dry_run: 

702 _ensure_tool_registry() 

703 

704 async def _drive() -> dict[str, Any]: 

705 engine = await build_mission_engine( 

706 session_for_runner, ctx=None, use_stub_dispatcher=dry_run 

707 ) 

708 records: list[dict[str, Any]] = [] 

709 for _ in range(max_iterations): 

710 try: 

711 record = await engine.run_iteration(session_id, ctx=None) 

712 except MissionEngineError as exc: 

713 return { 

714 "session_id": session_id, 

715 "error": {"code": exc.code}, 

716 "iterations": records, 

717 } 

718 records.append( 

719 { 

720 "iteration_index": record["iteration_index"], 

721 "verdict": record["verdict"], 

722 "verdict_reason": record["verdict_reason"], 

723 } 

724 ) 

725 if record["verdict"] in ("complete", "terminate"): 725 ↛ 726line 725 didn't jump to line 726 because the condition on line 725 was never true

726 break 

727 return {"session_id": session_id, "iterations": records} 

728 

729 result = asyncio.run(_drive()) 

730 

731 if "error" in result: 731 ↛ 732line 731 didn't jump to line 732 because the condition on line 731 was never true

732 _emit_error(result["error"]["code"], {"session_id": session_id}) 

733 sys.exit(1) 

734 

735 if output == "table": 

736 for it in result.get("iterations", []): 

737 click.echo( 

738 f" Iteration {it['iteration_index']}: {it['verdict']} ({it['verdict_reason']})" 

739 ) 

740 else: 

741 _emit_json(result) 

742 

743 

744# --------------------------------------------------------------------------- 

745# checkpoint 

746# --------------------------------------------------------------------------- 

747 

748 

749@mission_cmd.command("checkpoint") 

750@click.argument("session_id") 

751@click.option( 

752 "--output", 

753 type=click.Choice(["json", "table"]), 

754 default="json", 

755 show_default=True, 

756) 

757def mission_checkpoint_cmd(session_id: str, output: str) -> None: 

758 """Re-run the verdict cascade on the latest iteration of a session.""" 

759 from mission.decide import decide_verdict # noqa: PLC0415 

760 from mission.state import get_backend # noqa: PLC0415 

761 

762 backend = get_backend() 

763 session = backend.load_session(session_id) 

764 if session is None: 

765 _emit_error("session_not_found", {"session_id": session_id}) 

766 sys.exit(1) 

767 iterations = session.get("iterations") or [] 

768 if not iterations: 

769 _emit_error("no_iterations", {"session_id": session_id}) 

770 sys.exit(1) 

771 latest = iterations[-1] 

772 verdict, reason = decide_verdict(session, latest, datetime.now(UTC)) 

773 payload = { 

774 "session_id": session_id, 

775 "iteration_index": latest.get("iteration_index"), 

776 "verdict": verdict, 

777 "verdict_reason": reason, 

778 } 

779 if output == "table": 779 ↛ 782line 779 didn't jump to line 782 because the condition on line 779 was always true

780 click.echo(f"Iteration {payload['iteration_index']}: {verdict} ({reason})") 

781 else: 

782 _emit_json(payload) 

783 

784 

785# --------------------------------------------------------------------------- 

786# complete 

787# --------------------------------------------------------------------------- 

788 

789 

790@mission_cmd.command("complete") 

791@click.argument("session_id") 

792@click.option( 

793 "--output", 

794 type=click.Choice(["json", "table"]), 

795 default="json", 

796 show_default=True, 

797) 

798def mission_complete_cmd(session_id: str, output: str) -> None: 

799 """Force a Mission session into ``completed`` status.""" 

800 from mission.state import get_backend # noqa: PLC0415 

801 from mission.types import TERMINAL_STATES # noqa: PLC0415 

802 

803 backend = get_backend() 

804 session = backend.load_session(session_id) 

805 if session is None: 

806 _emit_error("session_not_found", {"session_id": session_id}) 

807 sys.exit(1) 

808 if session["status"] in TERMINAL_STATES: 

809 _emit_error( 

810 "session_terminal", 

811 {"session_id": session_id, "status": session["status"]}, 

812 ) 

813 sys.exit(1) 

814 now_iso = datetime.now(UTC).isoformat() 

815 session["status"] = "completed" 

816 session["final_verdict"] = "complete" 

817 session["ended_at"] = now_iso 

818 backend.save_session(cast("SessionState", _strip_private_criteria(session))) 

819 payload = { 

820 "session_id": session_id, 

821 "status": "completed", 

822 "final_verdict": "complete", 

823 } 

824 if output == "table": 

825 click.echo(f"Session {session_id}: completed (forced)") 

826 else: 

827 _emit_json(payload) 

828 

829 

830# --------------------------------------------------------------------------- 

831# abort 

832# --------------------------------------------------------------------------- 

833 

834 

835@mission_cmd.command("abort") 

836@click.argument("session_id") 

837@click.option("--pause", is_flag=True, help="Pause the session instead of terminating.") 

838@click.option( 

839 "--output", 

840 type=click.Choice(["json", "table"]), 

841 default="json", 

842 show_default=True, 

843) 

844def mission_abort_cmd(session_id: str, pause: bool, output: str) -> None: 

845 """Pause or terminate a Mission session. 

846 

847 With ``--pause``, transitions the session to ``paused`` (resumable). 

848 Without ``--pause``, transitions to ``terminated`` and stamps the 

849 final verdict. 

850 """ 

851 from mission.state import get_backend # noqa: PLC0415 

852 from mission.types import TERMINAL_STATES # noqa: PLC0415 

853 

854 backend = get_backend() 

855 session = backend.load_session(session_id) 

856 if session is None: 

857 _emit_error("session_not_found", {"session_id": session_id}) 

858 sys.exit(1) 

859 if session["status"] in TERMINAL_STATES: 

860 _emit_error( 

861 "session_terminal", 

862 {"session_id": session_id, "status": session["status"]}, 

863 ) 

864 sys.exit(1) 

865 if pause: 

866 session["status"] = "paused" 

867 else: 

868 now_iso = datetime.now(UTC).isoformat() 

869 session["status"] = "terminated" 

870 session["final_verdict"] = "terminate" 

871 session["ended_at"] = now_iso 

872 backend.save_session(cast("SessionState", _strip_private_criteria(session))) 

873 payload = {"session_id": session_id, "status": session["status"]} 

874 if output == "table": 

875 click.echo(f"Session {session_id}: {session['status']}") 

876 else: 

877 _emit_json(payload) 

878 

879 

880# --------------------------------------------------------------------------- 

881# resume 

882# --------------------------------------------------------------------------- 

883 

884 

885@mission_cmd.command("resume") 

886@click.argument("session_id") 

887@click.option( 

888 "--output", 

889 type=click.Choice(["json", "table"]), 

890 default="json", 

891 show_default=True, 

892) 

893def mission_resume_cmd(session_id: str, output: str) -> None: 

894 """Resume a paused Mission session.""" 

895 from mission.state import get_backend # noqa: PLC0415 

896 

897 backend = get_backend() 

898 session = backend.load_session(session_id) 

899 if session is None: 

900 _emit_error("session_not_found", {"session_id": session_id}) 

901 sys.exit(1) 

902 if session["status"] != "paused": 

903 _emit_error( 

904 "invalid_state", 

905 {"session_id": session_id, "status": session["status"]}, 

906 ) 

907 sys.exit(1) 

908 session["status"] = "running" 

909 backend.save_session(cast("SessionState", _strip_private_criteria(session))) 

910 payload = {"session_id": session_id, "status": "running"} 

911 if output == "table": 911 ↛ 914line 911 didn't jump to line 914 because the condition on line 911 was always true

912 click.echo(f"Session {session_id}: running") 

913 else: 

914 _emit_json(payload) 

915 

916 

917# --------------------------------------------------------------------------- 

918# history 

919# --------------------------------------------------------------------------- 

920 

921 

922@mission_cmd.command("history") 

923@click.argument("session_id") 

924@click.option( 

925 "--format", 

926 "fmt", 

927 type=click.Choice(["full", "summary"]), 

928 default="summary", 

929 show_default=True, 

930 help="Iteration history detail level.", 

931) 

932@click.option( 

933 "--include-observations", 

934 "include_obs", 

935 is_flag=True, 

936 help=( 

937 "Include the observation and strategy dicts in each iteration's " 

938 "output. Only meaningful with --format full. Useful for debugging " 

939 "what each tool returned and what strategy was proposed." 

940 ), 

941) 

942@click.option( 

943 "--output", 

944 type=click.Choice(["json", "table"]), 

945 default="json", 

946 show_default=True, 

947) 

948def mission_history_cmd(session_id: str, fmt: str, include_obs: bool, output: str) -> None: 

949 """Get the iteration history of a Mission session.""" 

950 from mission.state import get_backend # noqa: PLC0415 

951 

952 backend = get_backend() 

953 session = backend.load_session(session_id) 

954 if session is None: 

955 _emit_error("session_not_found", {"session_id": session_id}) 

956 sys.exit(1) 

957 iterations = session.get("iterations") or [] 

958 

959 if fmt == "full": 

960 cleaned = [_strip_iteration(it) for it in iterations] 

961 if not include_obs: 

962 # Strip observation and strategy from the output to keep it 

963 # concise. Operators who need the full shape pass 

964 # --include-observations. 

965 for it in cleaned: 

966 if isinstance(it, dict): 966 ↛ 965line 966 didn't jump to line 965 because the condition on line 966 was always true

967 it.pop("observation", None) 

968 it.pop("strategy", None) 

969 if output == "table": 

970 for it in cleaned: 

971 if not isinstance(it, dict): 971 ↛ 972line 971 didn't jump to line 972 because the condition on line 971 was never true

972 continue 

973 idx = it.get("iteration_index", "?") 

974 verdict = it.get("verdict", "?") 

975 reason = it.get("verdict_reason", "?") 

976 click.echo(f" Iteration {idx}: {verdict} ({reason})") 

977 if include_obs: 

978 obs = it.get("observation", {}) 

979 results = obs.get("tool_results", []) 

980 errors = obs.get("errors", []) 

981 strat = it.get("strategy", {}) 

982 rationale = strat.get("rationale", "")[:100] 

983 calls = strat.get("tool_calls", []) 

984 tool_names = [c.get("tool_name", "?") for c in calls if isinstance(c, dict)] 

985 click.echo(f" tools: {tool_names}") 

986 click.echo(f" rationale: {rationale}") 

987 click.echo(f" tool_results: {len(results)} entries, errors: {len(errors)}") 

988 else: 

989 _emit_json({"session_id": session_id, "iterations": cleaned}) 

990 return 

991 

992 summaries = [ 

993 { 

994 "iteration_index": it.get("iteration_index"), 

995 "verdict": it.get("verdict"), 

996 "verdict_reason": it.get("verdict_reason"), 

997 "started_at": it.get("started_at"), 

998 "ended_at": it.get("ended_at"), 

999 "checkpoint_evaluated": it.get("checkpoint_evaluated", False), 

1000 } 

1001 for it in iterations 

1002 ] 

1003 if output == "table": 1003 ↛ 1009line 1003 didn't jump to line 1009 because the condition on line 1003 was always true

1004 for s in summaries: 

1005 click.echo( 

1006 f" Iteration {s['iteration_index']}: {s['verdict']} ({s['verdict_reason']})" 

1007 ) 

1008 else: 

1009 _emit_json({"session_id": session_id, "iterations": summaries}) 

1010 

1011 

1012# --------------------------------------------------------------------------- 

1013# list 

1014# --------------------------------------------------------------------------- 

1015 

1016 

1017@mission_cmd.command("list") 

1018@click.option( 

1019 "--status", 

1020 default=None, 

1021 help="Filter sessions by status (pending, running, paused, ...).", 

1022) 

1023@click.option( 

1024 "--output", 

1025 type=click.Choice(["json", "table"]), 

1026 default="json", 

1027 show_default=True, 

1028) 

1029def mission_list_cmd(status: str | None, output: str) -> None: 

1030 """List Mission sessions.""" 

1031 from mission.state import get_backend # noqa: PLC0415 

1032 

1033 backend = get_backend() 

1034 filter_dict = {"status": status} if status else None 

1035 sessions = backend.list_sessions(filter_dict) 

1036 

1037 if output == "table": 

1038 header = f" {'SESSION ID':<40} {'STATUS':<11} {'ITER':>5} CREATED" 

1039 click.echo(header) 

1040 click.echo(" " + "-" * (len(header) - 2)) 

1041 for s in sessions: 

1042 sid = (s.get("session_id") or "")[:40] 

1043 st = (s.get("status") or "")[:11] 

1044 it = s.get("iteration_count", 0) 

1045 ca = (s.get("created_at") or "")[:19] 

1046 click.echo(f" {sid:<40} {st:<11} {it:>5} {ca}") 

1047 else: 

1048 _emit_json({"sessions": sessions}) 

1049 

1050 

1051# --------------------------------------------------------------------------- 

1052# scaffold-criteria 

1053# --------------------------------------------------------------------------- 

1054 

1055 

1056@mission_cmd.command("scaffold-criteria") 

1057@click.option( 

1058 "--directive", 

1059 required=True, 

1060 help="Natural-language goal description used to seed the criteria.", 

1061) 

1062@click.option( 

1063 "--allowlist", 

1064 "allowlist", 

1065 multiple=True, 

1066 help=( 

1067 "Optional tool names that the resulting session would be " 

1068 "configured with. Used informationally on the deterministic " 

1069 "path; on the sampling path, shapes the prompt so the model " 

1070 "picks metric/event names plausibly produced by the listed tools." 

1071 ), 

1072) 

1073@click.option( 

1074 "--use-sampling/--no-sampling", 

1075 "use_sampling", 

1076 default=None, 

1077 help=( 

1078 "Force the sampling path on/off. Default auto-detects: MCP " 

1079 "host capability, then Bedrock credentials, then deterministic." 

1080 ), 

1081) 

1082@click.option( 

1083 "--bedrock-model-id", 

1084 default=None, 

1085 help="Override the Bedrock model id used by the CLI sampling backend.", 

1086) 

1087@click.option( 

1088 "--max-criteria", 

1089 type=int, 

1090 default=5, 

1091 show_default=True, 

1092 help="Cap on the number of criterion entries scaffolded.", 

1093) 

1094@click.option( 

1095 "--retries", 

1096 type=int, 

1097 default=3, 

1098 show_default=True, 

1099 help="Sampling-path retry budget on validator rejections.", 

1100) 

1101@click.option( 

1102 "--output-file", 

1103 "output_file", 

1104 type=click.Path(dir_okay=False), 

1105 default=None, 

1106 help="Write the JSON to this file instead of stdout.", 

1107) 

1108@click.option( 

1109 "--output", 

1110 type=click.Choice(["json", "table"]), 

1111 default="json", 

1112 show_default=True, 

1113 help="Output format (table mode prints a per-entry summary alongside the JSON).", 

1114) 

1115def mission_scaffold_criteria_cmd( 

1116 directive: str, 

1117 allowlist: tuple[str, ...], 

1118 use_sampling: bool | None, 

1119 bedrock_model_id: str | None, 

1120 max_criteria: int, 

1121 retries: int, 

1122 output_file: str | None, 

1123 output: str, 

1124) -> None: 

1125 """Scaffold a criteria.json from a natural-language directive. 

1126 

1127 Resolves the sampling state via ``mission.sampling.resolve_sampling_state``; 

1128 when a backend resolves and ``--use-sampling`` permits, the resolved 

1129 backend is asked for a JSON array. The response is validated through 

1130 ``validate_criteria`` and retried up to ``--retries`` times on 

1131 rejection. Falls back to the deterministic keyword-template 

1132 generator when sampling is unavailable, disabled, or after the 

1133 retry budget is exhausted. 

1134 

1135 The output always validates through ``validate_criteria`` so the 

1136 resulting file is immediately usable with ``mission start 

1137 --criteria-file``. 

1138 """ 

1139 from mission import ( # noqa: PLC0415 — lazy: avoids cost when help-only 

1140 criteria_scaffold, 

1141 ) 

1142 from mission import ( 

1143 sampling as mission_sampling, 

1144 ) 

1145 

1146 if max_criteria < 1: 

1147 _emit_error( 

1148 "validation_error", 

1149 {"field": "max-criteria", "reason": "must_be_positive_int"}, 

1150 ) 

1151 sys.exit(1) 

1152 if retries < 0: 

1153 _emit_error( 

1154 "validation_error", 

1155 {"field": "retries", "reason": "must_be_non_negative_int"}, 

1156 ) 

1157 sys.exit(1) 

1158 

1159 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state( 

1160 None, use_sampling 

1161 ) 

1162 

1163 criteria: list[dict[str, Any]] | None = None 

1164 sampling_path_taken = False 

1165 if use_sampling_resolved and backend_resolved != "none": 

1166 backend_obj = mission_sampling.select_sampling_backend( 

1167 None, 

1168 model_id=bedrock_model_id, 

1169 prefs=None, 

1170 ) 

1171 if backend_obj is not None: 1171 ↛ 1194line 1171 didn't jump to line 1194 because the condition on line 1171 was always true

1172 try: 

1173 criteria = asyncio.run( 

1174 criteria_scaffold.generate_sampled_criteria( 

1175 backend_obj, 

1176 directive, 

1177 allowlist=list(allowlist), 

1178 max_criteria=max_criteria, 

1179 retries=retries, 

1180 ) 

1181 ) 

1182 sampling_path_taken = True 

1183 except criteria_scaffold.ScaffoldSamplingError as exc: 

1184 # The sampling path failed; emit a one-line warning to 

1185 # stderr so the operator sees what happened, then fall 

1186 # through to the deterministic generator. 

1187 click.echo( 

1188 f"sampling path failed ({exc.last_reason}); " 

1189 "falling back to deterministic templates.", 

1190 err=True, 

1191 ) 

1192 criteria = None 

1193 

1194 if criteria is None: 

1195 criteria = criteria_scaffold.generate_deterministic_criteria( 

1196 directive, 

1197 allowlist=list(allowlist) or None, 

1198 max_criteria=max_criteria, 

1199 ) 

1200 

1201 payload = json.dumps(criteria, indent=2, sort_keys=False) 

1202 

1203 if output_file: 

1204 Path(output_file).write_text(payload + "\n", encoding="utf-8") 

1205 # Echo a structured summary on the chosen format so the operator 

1206 # can see what was written without re-reading the file. 

1207 if output == "table": 1207 ↛ 1208line 1207 didn't jump to line 1208 because the condition on line 1207 was never true

1208 for c in criteria: 

1209 click.echo( 

1210 f" {c.get('criterion_id'):<32} " 

1211 f"kind={c.get('kind'):<16} required={c.get('required')}" 

1212 ) 

1213 click.echo(f" written to {output_file}") 

1214 else: 

1215 _emit_json( 

1216 { 

1217 "output_file": output_file, 

1218 "criteria_count": len(criteria), 

1219 "sampling_path": sampling_path_taken, 

1220 } 

1221 ) 

1222 return 

1223 

1224 # No --output-file: write JSON to stdout. 

1225 if output == "table": 

1226 for c in criteria: 

1227 click.echo( 

1228 f" {c.get('criterion_id'):<32} " 

1229 f"kind={c.get('kind'):<16} required={c.get('required')}" 

1230 ) 

1231 return 

1232 click.echo(payload) 

1233 

1234 

1235# --------------------------------------------------------------------------- 

1236# run — chain scaffold + start + iterate-to-completion in one call 

1237# --------------------------------------------------------------------------- 

1238 

1239 

1240@mission_cmd.command("run") 

1241@click.option( 

1242 "--directive", 

1243 required=True, 

1244 help="Natural-language goal description.", 

1245) 

1246@click.option( 

1247 "--tool-allowlist", 

1248 multiple=True, 

1249 help="Tool name to allowlist; pass multiple times. Optional with --allow-all-tools.", 

1250) 

1251@click.option( 

1252 "--allow-all-tools", 

1253 is_flag=True, 

1254 help=( 

1255 "Resolve the session's tool allowlist to every registered MCP tool " 

1256 "(minus the mission_* control tools). Makes --tool-allowlist optional; " 

1257 "mutually exclusive with it." 

1258 ), 

1259) 

1260@click.option( 

1261 "--max-iterations", 

1262 type=int, 

1263 default=5, 

1264 show_default=True, 

1265 help="Hard cap on the iteration count. Pass -1 to opt out (uncapped).", 

1266) 

1267@click.option( 

1268 "--max-wall-clock", 

1269 type=int, 

1270 default=300, 

1271 show_default=True, 

1272 help="Hard cap on wall-clock seconds. Pass -1 to opt out (uncapped).", 

1273) 

1274@click.option( 

1275 "--max-criteria", 

1276 type=int, 

1277 default=5, 

1278 show_default=True, 

1279 help="Cap on the number of criterion entries scaffolded.", 

1280) 

1281@click.option( 

1282 "--retries", 

1283 type=int, 

1284 default=3, 

1285 show_default=True, 

1286 help="Sampling-path retry budget on validator rejections during scaffolding.", 

1287) 

1288@click.option( 

1289 "--use-sampling/--no-sampling", 

1290 "use_sampling", 

1291 default=None, 

1292 help=( 

1293 "Force the sampling path on/off for both the scaffolder and " 

1294 "the loop's Strategy_Revision sampler. Default auto-detects: " 

1295 "MCP host capability, then Bedrock credentials, then deterministic." 

1296 ), 

1297) 

1298@click.option( 

1299 "--bedrock-model-id", 

1300 default=None, 

1301 help="Override the Bedrock model id used by the CLI sampling backend.", 

1302) 

1303@click.option( 

1304 "--allow-scripted-strategies", 

1305 is_flag=True, 

1306 help="Allow scripted strategies to run via the Mission sandbox.", 

1307) 

1308@click.option( 

1309 "--save-criteria", 

1310 "save_criteria", 

1311 type=click.Path(dir_okay=False), 

1312 default=None, 

1313 help="Optional path to also persist the scaffolded criteria JSON to disk.", 

1314) 

1315@click.option( 

1316 "--stagnation-threshold", 

1317 type=int, 

1318 default=3, 

1319 show_default=True, 

1320 help="Iterations of no progress before terminate.", 

1321) 

1322@click.option( 

1323 "--cadence", 

1324 type=click.Choice(["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]), 

1325 default="every_iteration", 

1326 show_default=True, 

1327 help="Checkpoint cadence kind.", 

1328) 

1329@click.option( 

1330 "--dry-run", 

1331 "dry_run", 

1332 is_flag=True, 

1333 help=( 

1334 "Use a stub tool dispatcher and disable Strategy_Revision sampling " 

1335 "during iteration. The criteria scaffolder still runs through " 

1336 "Bedrock when sampling is enabled. Useful for smoke-testing the " 

1337 "loop without spending live tool credits." 

1338 ), 

1339) 

1340def mission_run_cmd( 

1341 directive: str, 

1342 tool_allowlist: tuple[str, ...], 

1343 allow_all_tools: bool, 

1344 max_iterations: int, 

1345 max_wall_clock: int, 

1346 max_criteria: int, 

1347 retries: int, 

1348 use_sampling: bool | None, 

1349 bedrock_model_id: str | None, 

1350 allow_scripted_strategies: bool, 

1351 save_criteria: str | None, 

1352 stagnation_threshold: int, 

1353 cadence: str, 

1354 dry_run: bool, 

1355) -> None: 

1356 """Scaffold criteria and run a Mission session to completion in one call. 

1357 

1358 The chained shorthand for the most common Mission invocation: turn 

1359 a natural-language directive into a criteria file via 

1360 ``scaffold-criteria`` (sampling path with deterministic fallback), 

1361 persist a new session with ``start``'s validators, then drive it 

1362 through ``run-to-completion`` with the same per-call verdict 

1363 streaming as ``mission start --run``. 

1364 

1365 Per-iteration verdict updates land on stderr as JSON lines; the 

1366 Final_Report (or persisted session JSON when no Final_Report file 

1367 was written) lands on stdout when the loop terminates. 

1368 

1369 With ``--save-criteria PATH``, the scaffolded criteria JSON is 

1370 also written to ``PATH`` so the operator can inspect / re-use it 

1371 without re-running the scaffold step. 

1372 """ 

1373 from mission import ( # noqa: PLC0415 — lazy 

1374 criteria_scaffold, 

1375 ) 

1376 from mission import ( 

1377 sampling as mission_sampling, 

1378 ) 

1379 from mission import ( 

1380 state as mission_state, 

1381 ) 

1382 from mission import ( 

1383 validation as mission_validation, 

1384 ) 

1385 from mission.types import SCHEMA_VERSION 

1386 from mission.validation import MissionValidationError 

1387 

1388 if max_criteria < 1: 

1389 _emit_error( 

1390 "validation_error", 

1391 {"field": "max-criteria", "reason": "must_be_positive_int"}, 

1392 ) 

1393 sys.exit(1) 

1394 if retries < 0: 

1395 _emit_error( 

1396 "validation_error", 

1397 {"field": "retries", "reason": "must_be_non_negative_int"}, 

1398 ) 

1399 sys.exit(1) 

1400 

1401 # Resolve the effective allowlist up front, before scaffolding or any 

1402 # persistence. A mutual-exclusivity or empty-registry rejection exits here 

1403 # with no sampling spend, no criteria file write, and no state write. The 

1404 # scaffolder below still consults the explicit ``tool_allowlist`` (empty 

1405 # under --allow-all-tools, which routes it to the directive-only 

1406 # deterministic path); ``allowlist_resolved`` fills the persisted session. 

1407 allowlist_resolved = _resolve_cli_allowlist( 

1408 allow_all_tools=allow_all_tools, tool_allowlist=tool_allowlist 

1409 ) 

1410 

1411 # ---- Step 1: scaffold criteria. ------------------------------------- 

1412 # Resolve the sampling state once; reuse it for both the scaffold 

1413 # call and the persisted session's ``use_sampling`` field so the 

1414 # operator's --use-sampling/--no-sampling intent applies end-to-end. 

1415 use_sampling_resolved, backend_resolved = mission_sampling.resolve_sampling_state( 

1416 None, use_sampling 

1417 ) 

1418 

1419 criteria: list[dict[str, Any]] | None = None 

1420 sampling_path_taken = False 

1421 if use_sampling_resolved and backend_resolved != "none": 1421 ↛ 1422line 1421 didn't jump to line 1422 because the condition on line 1421 was never true

1422 backend_obj = mission_sampling.select_sampling_backend( 

1423 None, 

1424 model_id=bedrock_model_id, 

1425 prefs=None, 

1426 ) 

1427 if backend_obj is not None: 

1428 try: 

1429 criteria = asyncio.run( 

1430 criteria_scaffold.generate_sampled_criteria( 

1431 backend_obj, 

1432 directive, 

1433 allowlist=list(tool_allowlist), 

1434 max_criteria=max_criteria, 

1435 retries=retries, 

1436 ) 

1437 ) 

1438 sampling_path_taken = True 

1439 except criteria_scaffold.ScaffoldSamplingError as exc: 

1440 click.echo( 

1441 f"sampling path failed ({exc.last_reason}); " 

1442 "falling back to deterministic templates.", 

1443 err=True, 

1444 ) 

1445 criteria = None 

1446 

1447 if criteria is None: 1447 ↛ 1454line 1447 didn't jump to line 1454 because the condition on line 1447 was always true

1448 criteria = criteria_scaffold.generate_deterministic_criteria( 

1449 directive, 

1450 allowlist=list(tool_allowlist) or None, 

1451 max_criteria=max_criteria, 

1452 ) 

1453 

1454 if save_criteria: 

1455 Path(save_criteria).write_text( 

1456 json.dumps(criteria, indent=2, sort_keys=False) + "\n", 

1457 encoding="utf-8", 

1458 ) 

1459 

1460 # ---- Step 2: validate everything and persist the session. ----------- 

1461 budget: dict[str, Any] = { 

1462 "max_iterations": max_iterations, 

1463 "max_wall_clock_seconds": max_wall_clock, 

1464 } 

1465 cadence_dict: dict[str, Any] = {"kind": cadence} 

1466 

1467 try: 

1468 directive_clean = mission_validation.validate_directive(directive) 

1469 criteria_clean = mission_validation.validate_criteria(criteria) 

1470 budget_clean = mission_validation.validate_budget(budget, allowlist_resolved, {}) 

1471 cadence_clean = mission_validation.validate_cadence(cadence_dict) 

1472 except MissionValidationError as exc: 

1473 _emit_error(exc.code, exc.details) 

1474 sys.exit(1) 

1475 

1476 if not isinstance(stagnation_threshold, int) or stagnation_threshold <= 0: 1476 ↛ 1477line 1476 didn't jump to line 1477 because the condition on line 1476 was never true

1477 _emit_error( 

1478 "validation_error", 

1479 {"field": "stagnation-threshold", "reason": "must_be_positive_int"}, 

1480 ) 

1481 sys.exit(1) 

1482 

1483 session_id = f"mission-{secrets.token_hex(8)}" 

1484 now_iso = datetime.now(UTC).isoformat() 

1485 session: dict[str, Any] = { 

1486 "version": SCHEMA_VERSION, 

1487 "session_id": session_id, 

1488 "directive_text": directive_clean, 

1489 "criteria": criteria_clean, 

1490 "budget": budget_clean, 

1491 "tool_allowlist": allowlist_resolved, 

1492 "checkpoint_cadence": cadence_clean, 

1493 "stagnation_threshold": stagnation_threshold, 

1494 "use_sampling": use_sampling_resolved, 

1495 "sampling_backend_resolved": backend_resolved, 

1496 "allow_scripted_strategies": bool(allow_scripted_strategies), 

1497 "status": "pending", 

1498 "created_at": now_iso, 

1499 "iterations": [], 

1500 "no_progress_counter": 0, 

1501 } 

1502 if bedrock_model_id: 1502 ↛ 1503line 1502 didn't jump to line 1503 because the condition on line 1502 was never true

1503 session["bedrock_model_id"] = bedrock_model_id 

1504 

1505 backend = mission_state.get_backend() 

1506 backend.save_session(cast("SessionState", _strip_private_criteria(session))) 

1507 

1508 # Emit a one-line scaffold summary to stderr so the operator can see 

1509 # what shape the criteria landed in before the loop starts. Stdout is 

1510 # reserved for the Final_Report at the end. 

1511 _emit_json( 

1512 { 

1513 "event": "mission.run.scaffolded", 

1514 "session_id": session_id, 

1515 "criteria_count": len(criteria), 

1516 "sampling_path": sampling_path_taken, 

1517 "sampling_backend_resolved": backend_resolved, 

1518 }, 

1519 err=True, 

1520 ) 

1521 

1522 # ---- Step 3: iterate to completion. --------------------------------- 

1523 _run_to_completion(session_id, dry_run=dry_run)