Coverage for mcp/mission/validation.py: 93%

1"""Shared validators for Mission session inputs.

3Every Mission entry point — the MCP tools, the CLI subcommands, the engine's

4session loader — feeds operator-supplied JSON through this module before it

5ever touches state. The validators are intentionally pure: no I/O, no clocks,

6no environment lookups. The caller passes whatever external context is

7needed (the FastMCP tool catalog, the per-tool tag sets, the gating

8feature-flag lookup) as plain arguments. This keeps the validators trivial

9to unit-test and makes them safe to call from both async tool handlers and

10synchronous CLI code.

12Design notes:

14* Validators **return new normalized values**; they never mutate their

15 inputs. The :func:`validate_criteria` case attaches a cached parsed AST

16 under the private key ``_parsed_ast`` on each ``predicate`` criterion;

17 the original input dict is left untouched and a shallow copy carries the

18 added key.

20* Every rejection raises :class:`MissionValidationError` with a stable

21 short ``code`` (e.g. ``"validation_error"``) and a structured

22 ``details`` dict whose ``field`` key identifies the input that failed.

23 Tool wrappers render ``code`` and ``details`` as a structured FastMCP

24 tool error so clients can surface them without text parsing.

26* The script-strategy path forward-declares the sandbox: scripted

27 strategies are out of scope for this module and the sandbox module

28 lands in a later slice. The lazy import inside

29 :func:`validate_strategy` tolerates the missing module by raising a

30 dedicated ``script_sandbox_not_implemented`` code, so callers that hit

31 this path get a clear signal rather than an ``ImportError`` traceback.

32"""

34from __future__ import annotations

36from collections.abc import Collection, Mapping, Sequence

37from typing import Any, Final, cast

39from . import predicate

40from .types import (

41 BudgetControls,

42 Cadence,

43 Criterion,

44 Strategy,

45)

47# ---------------------------------------------------------------------------

48# Public exception

49# ---------------------------------------------------------------------------

52class MissionValidationError(Exception):

53 """Raised when a validator rejects an input.

55 Carries a stable short ``code`` and an optional structured ``details``

56 dict. FastMCP tool wrappers convert this into a structured tool-error

57 response; CLI handlers print ``code`` plus the ``details`` JSON.

59 The constructor accepts ``(code, details=None, *, message=None)``.

60 When ``message`` is not provided, the exception's string form falls

61 back to ``code`` so logs always show something meaningful.

62 """

64 def __init__(

65 self,

66 code: str,

67 details: dict[str, Any] | None = None,

68 *,

69 message: str | None = None,

70 ) -> None:

71 self.code: str = code

72 self.details: dict[str, Any] | None = details

73 rendered = message if message is not None else code

74 super().__init__(rendered)

77# ---------------------------------------------------------------------------

78# Constants

79# ---------------------------------------------------------------------------

81_DIRECTIVE_MAX_LEN: Final[int] = 8192

82"""The hard cap on directive_text length, in characters."""

84_CRITERION_KINDS: Final[frozenset[str]] = frozenset(

85 {"metric_threshold", "event", "predicate", "tool_call_succeeded", "metric_trend"}

86)

87"""The five valid Criterion ``kind`` values."""

89_METRIC_OPS: Final[frozenset[str]] = frozenset({"<", "<=", ">", ">=", "==", "!="})

90"""The six valid comparison operators on a ``metric_threshold`` criterion."""

92_METRIC_TREND_DIRECTIONS: Final[frozenset[str]] = frozenset(

93 {"decreasing", "increasing", "non_increasing", "non_decreasing"}

94)

95"""The four valid trend directions on a ``metric_trend`` criterion."""

97_CADENCE_KINDS: Final[frozenset[str]] = frozenset(

98 {"every_iteration", "every_n_iterations", "every_t_seconds", "on_event"}

99)

100"""The four valid Cadence ``kind`` values."""

101

102

103# ---------------------------------------------------------------------------

104# Helpers

105# ---------------------------------------------------------------------------

106

107

108def _is_positive_int(value: Any) -> bool:

109 """Return True iff ``value`` is an int (not bool) and strictly > 0."""

110 # bool is a subclass of int; reject it explicitly so True/False cannot

111 # silently masquerade as a positive integer count.

112 return isinstance(value, int) and not isinstance(value, bool) and value > 0

113

114

115def _is_positive_int_or_uncapped(value: Any) -> bool:

116 """Return True iff ``value`` is a strictly-positive int OR the sentinel ``-1``.

117

118 The Mission budget caps (``max_iterations``, ``max_wall_clock_seconds``)

119 accept ``-1`` as an explicit "uncapped" sentinel. Any other negative

120 integer, zero, non-integer, or bool is rejected — the operator must

121 pick exactly one of: a positive cap, or the explicit ``-1`` opt-out.

122 Allowing zero would silently terminate every session on iteration 1

123 / second 0; allowing arbitrary negatives would mask typos.

124 """

125 if isinstance(value, bool):

126 return False

127 if not isinstance(value, int):

128 return False

129 return value > 0 or value == -1

130

131

132def _is_number(value: Any) -> bool:

133 """Return True iff ``value`` is an int or float (not bool)."""

134 return isinstance(value, (int, float)) and not isinstance(value, bool)

135

136

137# ---------------------------------------------------------------------------

138# Directive

139# ---------------------------------------------------------------------------

140

141

142def validate_directive(text: str) -> str:

143 """Trim and validate a directive string.

144

145 The directive is the operator-supplied natural-language goal. It must

146 be a non-empty string (after stripping leading/trailing whitespace)

147 and must fit within :data:`_DIRECTIVE_MAX_LEN` characters. Returns

148 the trimmed string. Raises :class:`MissionValidationError` with

149 ``code="validation_error"`` on rejection.

150 """

151 if not isinstance(text, str):

152 raise MissionValidationError(

153 "validation_error",

154 details={"field": "directive", "reason": "not_a_string"},

155 )

156 trimmed = text.strip()

157 if not trimmed:

158 raise MissionValidationError(

159 "validation_error",

160 details={"field": "directive", "reason": "empty"},

161 )

162 if len(trimmed) > _DIRECTIVE_MAX_LEN:

163 raise MissionValidationError(

164 "validation_error",

165 details={

166 "field": "directive",

167 "reason": "too_long",

168 "max_length": _DIRECTIVE_MAX_LEN,

169 "actual_length": len(trimmed),

170 },

171 )

172 return trimmed

173

174

175# ---------------------------------------------------------------------------

176# Criteria

177# ---------------------------------------------------------------------------

178

179

180def _validate_metric_threshold(entry: dict[str, Any], criterion_id: str) -> None:

181 """Check the kind-specific keys for a ``metric_threshold`` criterion."""

182 metric = entry.get("metric")

183 if not isinstance(metric, str) or not metric:

184 raise MissionValidationError(

185 "validation_error",

186 details={

187 "field": "criteria",

188 "criterion_id": criterion_id,

189 "reason": "metric_missing_or_invalid",

190 },

191 )

192 op = entry.get("op")

193 if op not in _METRIC_OPS:

194 raise MissionValidationError(

195 "validation_error",

196 details={

197 "field": "criteria",

198 "criterion_id": criterion_id,

199 "reason": "op_invalid",

200 "allowed": sorted(_METRIC_OPS),

201 },

202 )

203 target = entry.get("target")

204 if not _is_number(target):

205 raise MissionValidationError(

206 "validation_error",

207 details={

208 "field": "criteria",

209 "criterion_id": criterion_id,

210 "reason": "target_not_a_number",

211 },

212 )

213

214

215def _validate_metric_trend(entry: dict[str, Any], criterion_id: str) -> None:

216 """Check the kind-specific keys for a ``metric_trend`` criterion.

217

218 Required: ``metric`` (non-empty dot-path string) and ``direction`` (one of

219 the four :data:`_METRIC_TREND_DIRECTIONS`). Optional: ``window`` (positive

220 int — how many of the most-recent points to consider) and ``min_points``

221 (positive int — the minimum number of numeric points required before the

222 criterion decides met/unmet rather than inconclusive).

223

224 Unlike ``metric_threshold`` this kind has no ``op``/``target``: the

225 comparison is "where did the metric go over the window?", evaluated by

226 :meth:`MissionEngine._evaluate_metric_trend` against the cumulative metric

227 history the engine accumulates across iterations.

228 """

229 metric = entry.get("metric")

230 if not isinstance(metric, str) or not metric:

231 raise MissionValidationError(

232 "validation_error",

233 details={

234 "field": "criteria",

235 "criterion_id": criterion_id,

236 "reason": "metric_missing_or_invalid",

237 },

238 )

239 direction = entry.get("direction")

240 if direction not in _METRIC_TREND_DIRECTIONS:

241 raise MissionValidationError(

242 "validation_error",

243 details={

244 "field": "criteria",

245 "criterion_id": criterion_id,

246 "reason": "direction_invalid",

247 "allowed": sorted(_METRIC_TREND_DIRECTIONS),

248 },

249 )

250 # ``window`` and ``min_points`` are optional, but when present each must be

251 # a strictly-positive int (bool rejected). A missing value lets the engine

252 # apply its defaults (window = all points; min_points = 2).

253 if "window" in entry and not _is_positive_int(entry.get("window")):

254 raise MissionValidationError(

255 "validation_error",

256 details={

257 "field": "criteria",

258 "criterion_id": criterion_id,

259 "reason": "window_must_be_positive_int",

260 },

261 )

262 if "min_points" in entry and not _is_positive_int(entry.get("min_points")):

263 raise MissionValidationError(

264 "validation_error",

265 details={

266 "field": "criteria",

267 "criterion_id": criterion_id,

268 "reason": "min_points_must_be_positive_int",

269 },

270 )

271

272

273def _validate_event_criterion(entry: dict[str, Any], criterion_id: str) -> None:

274 """Check the kind-specific keys for an ``event`` criterion."""

275 event_name = entry.get("event_name")

276 if not isinstance(event_name, str) or not event_name:

277 raise MissionValidationError(

278 "validation_error",

279 details={

280 "field": "criteria",

281 "criterion_id": criterion_id,

282 "reason": "event_name_missing_or_invalid",

283 },

284 )

285

286

287def _validate_predicate_criterion(entry: dict[str, Any], criterion_id: str) -> Any:

288 """Check the kind-specific keys for a ``predicate`` criterion.

289

290 Returns the parsed AST so the caller can attach it under

291 ``_parsed_ast`` on the normalized copy.

292 """

293 expression = entry.get("expression")

294 if not isinstance(expression, str) or not expression:

295 raise MissionValidationError(

296 "validation_error",

297 details={

298 "field": "criteria",

299 "criterion_id": criterion_id,

300 "reason": "expression_missing_or_invalid",

301 },

302 )

303 try:

304 return predicate.parse_predicate(expression)

305 except predicate.PredicateRejected as exc:

306 raise MissionValidationError(

307 "validation_error",

308 details={

309 "field": "criteria",

310 "criterion_id": criterion_id,

311 "reason": exc.reason,

312 "lineno": exc.lineno,

313 "col_offset": exc.col_offset,

314 },

315 ) from exc

316

317

318def _validate_tool_call_succeeded(entry: dict[str, Any], criterion_id: str) -> None:

319 """Check the kind-specific keys for a ``tool_call_succeeded`` criterion.

320

321 Required: ``tool_name`` (non-empty str). Optional: ``min_count``

322 (positive int; default 1). The criterion is met when the

323 Observation's ``tool_results`` list contains at least

324 ``min_count`` entries whose ``tool_name`` field equals

325 ``tool_name`` and whose ``_status`` equals ``"ok"``.

326

327 This kind exists so the most common Mission goal — "this tool

328 ran and succeeded N times" — does not require the operator (or

329 a sampling model) to write a Python predicate. It is a strict

330 subset of what ``predicate`` can express, but the engine

331 evaluates it server-side without going through the AST sandbox,

332 so the validator never needs to reason about syntax errors,

333 method-call shapes, or attribute walks for this case.

334 """

335 tool_name = entry.get("tool_name")

336 if not isinstance(tool_name, str) or not tool_name:

337 raise MissionValidationError(

338 "validation_error",

339 details={

340 "field": "criteria",

341 "criterion_id": criterion_id,

342 "reason": "tool_name_missing_or_invalid",

343 },

344 )

345 # ``min_count`` is optional; default 1 (any successful call).

346 if "min_count" in entry:

347 min_count = entry.get("min_count")

348 # bool is a subclass of int — reject explicitly so True/False cannot

349 # masquerade as 1/0 and silently pass through.

350 if isinstance(min_count, bool) or not isinstance(min_count, int) or min_count < 1:

351 raise MissionValidationError(

352 "validation_error",

353 details={

354 "field": "criteria",

355 "criterion_id": criterion_id,

356 "reason": "min_count_must_be_positive_int",

357 },

358 )

359

360

361def validate_criteria(criteria: list[dict[str, Any]]) -> list[Criterion]:

362 """Validate a list of criteria and attach cached predicate ASTs.

363

364 Required keys on every entry: ``criterion_id`` (non-empty str),

365 ``kind`` (one of the :class:`CriterionKind` values), and

366 ``required`` (bool). Each entry must also provide the kind-specific

367 keys: ``metric``/``op``/``target`` for ``metric_threshold``,

368 ``metric``/``direction`` (plus optional ``window``/``min_points``) for

369 ``metric_trend``, ``event_name`` for ``event``, ``tool_name`` for

370 ``tool_call_succeeded``, and ``expression`` for ``predicate``.

371

372 The ``criterion_id`` must be unique across the list. For each

373 ``predicate`` entry, the expression is parsed via

374 :func:`predicate.parse_predicate` and the resulting AST is cached

375 under the private key ``_parsed_ast`` on a shallow copy of the

376 entry. Returns the normalized list. The original input dicts are

377 not mutated.

378 """

379 if not isinstance(criteria, list): 379 ↛ 380line 379 didn't jump to line 380 because the condition on line 379 was never true

380 raise MissionValidationError(

381 "validation_error",

382 details={"field": "criteria", "reason": "not_a_list"},

383 )

384 if not criteria:

385 raise MissionValidationError(

386 "validation_error",

387 details={"field": "criteria", "reason": "empty"},

388 )

389 seen_ids: set[str] = set()

390 normalized: list[Criterion] = []

391 for index, entry in enumerate(criteria):

392 if not isinstance(entry, dict): 392 ↛ 393line 392 didn't jump to line 393 because the condition on line 392 was never true

393 raise MissionValidationError(

394 "validation_error",

395 details={

396 "field": "criteria",

397 "index": index,

398 "reason": "not_a_dict",

399 },

400 )

401 criterion_id = entry.get("criterion_id")

402 if not isinstance(criterion_id, str) or not criterion_id: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 raise MissionValidationError(

404 "validation_error",

405 details={

406 "field": "criteria",

407 "index": index,

408 "reason": "criterion_id_missing_or_invalid",

409 },

410 )

411 if criterion_id in seen_ids:

412 raise MissionValidationError(

413 "validation_error",

414 details={

415 "field": "criteria",

416 "criterion_id": criterion_id,

417 "reason": "duplicate_criterion_id",

418 },

419 )

420 seen_ids.add(criterion_id)

421 kind = entry.get("kind")

422 if kind not in _CRITERION_KINDS:

423 raise MissionValidationError(

424 "validation_error",

425 details={

426 "field": "criteria",

427 "criterion_id": criterion_id,

428 "reason": "kind_invalid",

429 "allowed": sorted(_CRITERION_KINDS),

430 },

431 )

432 if not isinstance(entry.get("required"), bool): 432 ↛ 433line 432 didn't jump to line 433 because the condition on line 432 was never true

433 raise MissionValidationError(

434 "validation_error",

435 details={

436 "field": "criteria",

437 "criterion_id": criterion_id,

438 "reason": "required_missing_or_not_a_bool",

439 },

440 )

441 # Build a shallow copy so we never mutate the caller's dict; we

442 # may need to attach _parsed_ast and we want the input to stay

443 # exactly as it was passed in.

444 normalized_entry: dict[str, Any] = dict(entry)

445 if kind == "metric_threshold":

446 _validate_metric_threshold(entry, criterion_id)

447 elif kind == "metric_trend":

448 _validate_metric_trend(entry, criterion_id)

449 elif kind == "event":

450 _validate_event_criterion(entry, criterion_id)

451 elif kind == "tool_call_succeeded":

452 _validate_tool_call_succeeded(entry, criterion_id)

453 else: # kind == "predicate"

454 parsed = _validate_predicate_criterion(entry, criterion_id)

455 normalized_entry["_parsed_ast"] = parsed

456 normalized.append(cast("Criterion", normalized_entry))

457 return normalized

458

459

460# ---------------------------------------------------------------------------

461# Budget

462# ---------------------------------------------------------------------------

463

464

465def validate_budget(

466 budget: dict[str, Any],

467 allowlist: list[str],

468 registered_tags: dict[str, set[str]],

469) -> BudgetControls:

470 """Validate a budget dict.

471

472 Required keys: ``max_iterations`` and ``max_wall_clock_seconds``.

473 Each accepts either a strictly-positive int OR the explicit

474 sentinel ``-1`` ("uncapped"). The operator must pick one;

475 omitting the key, passing zero, passing any other negative

476 number, or passing a non-integer is rejected. **At least one** of

477 the two caps must be a positive int — both being ``-1`` would be

478 a runaway loop with no axis-driven termination, so the validator

479 rejects that combination eagerly with

480 ``reason="at_least_one_cap_required"``.

481

482 Cost guardrails live out-of-band — Mission only enforces caps the

483 loop has direct visibility into. ``allowlist`` and

484 ``registered_tags`` are kept on the signature for API stability

485 so existing callers don't have to change shape; both are unused.

486 Returns a normalized dict suitable for use as a

487 :class:`BudgetControls`.

488 """

489 del allowlist, registered_tags # accepted for API stability; unused

490 if not isinstance(budget, dict): 490 ↛ 491line 490 didn't jump to line 491 because the condition on line 490 was never true

491 raise MissionValidationError(

492 "validation_error",

493 details={"field": "budget", "reason": "not_a_dict"},

494 )

495 max_iterations = budget.get("max_iterations")

496 if not _is_positive_int_or_uncapped(max_iterations):

497 raise MissionValidationError(

498 "validation_error",

499 details={

500 "field": "budget",

501 "subfield": "max_iterations",

502 "reason": "missing_or_not_positive_int_or_minus_one",

503 },

504 )

505 max_wall = budget.get("max_wall_clock_seconds")

506 if not _is_positive_int_or_uncapped(max_wall):

507 raise MissionValidationError(

508 "validation_error",

509 details={

510 "field": "budget",

511 "subfield": "max_wall_clock_seconds",

512 "reason": "missing_or_not_positive_int_or_minus_one",

513 },

514 )

515 normalized: dict[str, Any] = {

516 "max_iterations": max_iterations,

517 "max_wall_clock_seconds": max_wall,

518 }

519 return cast("BudgetControls", normalized)

520

521

522# ---------------------------------------------------------------------------

523# Tool allowlist

524# ---------------------------------------------------------------------------

525

526

527def validate_tool_allowlist(

528 allowlist: list[str],

529 registered_tools: dict[str, Any],

530 flag_lookup: dict[str, str] | None = None,

531) -> list[str]:

532 """Validate that every name in the allowlist is currently registered.

533

534 ``registered_tools`` is a structural mapping from tool name to the

535 tool object (FastMCP's ``Tool`` type, but typed loosely here so the

536 module imports cleanly without the optional FastMCP dependency).

537 Only the dict keys are read.

538

539 When a name is missing from ``registered_tools``, the validator

540 raises :class:`MissionValidationError`. If ``flag_lookup`` is

541 provided and contains the missing tool's name, the rejection's

542 ``details.flag`` field carries the gating feature-flag name (so

543 the operator can be told *why* the tool is currently absent —

544 typically because its feature flag is unset). Otherwise the

545 rejection carries ``details.tool_name`` only.

546 """

547 if not isinstance(allowlist, list): 547 ↛ 548line 547 didn't jump to line 548 because the condition on line 547 was never true

548 raise MissionValidationError(

549 "validation_error",

550 details={"field": "tool_allowlist", "reason": "not_a_list"},

551 )

552 if not allowlist:

553 raise MissionValidationError(

554 "validation_error",

555 details={"field": "tool_allowlist", "reason": "empty"},

556 )

557 seen: set[str] = set()

558 normalized: list[str] = []

559 for index, name in enumerate(allowlist):

560 if not isinstance(name, str) or not name: 560 ↛ 561line 560 didn't jump to line 561 because the condition on line 560 was never true

561 raise MissionValidationError(

562 "validation_error",

563 details={

564 "field": "tool_allowlist",

565 "index": index,

566 "reason": "tool_name_missing_or_invalid",

567 },

568 )

569 if name in seen:

570 raise MissionValidationError(

571 "validation_error",

572 details={

573 "field": "tool_allowlist",

574 "tool_name": name,

575 "reason": "duplicate_tool_name",

576 },

577 )

578 seen.add(name)

579 if name not in registered_tools:

580 details: dict[str, Any] = {

581 "field": "tool_allowlist",

582 "tool_name": name,

583 "reason": "tool_not_registered",

584 }

585 if flag_lookup is not None and name in flag_lookup:

586 details["flag"] = flag_lookup[name]

587 raise MissionValidationError("validation_error", details=details)

588 normalized.append(name)

589 return normalized

590

591

592# The nine session-management tool names. They are excluded from an

593# all-tools expansion so a session can never resolve an allowlist that lets

594# it recursively invoke the tools that start, drive, and tear down sessions.

595# This constant is the default exclusion set for

596# :func:`resolve_effective_allowlist`; callers holding a live tag map may pass

597# their own equivalent set instead.

598MISSION_CONTROL_TOOLS: frozenset[str] = frozenset(

599 {

600 "mission_start",

601 "mission_status",

602 "mission_iterate",

603 "mission_checkpoint",

604 "mission_complete",

605 "mission_abort",

606 "mission_resume",

607 "mission_history",

608 "mission_list",

609 }

610)

611"""The nine control-tool names excluded from an all-tools expansion."""

612

613

614def resolve_effective_allowlist(

615 *,

616 allow_all_tools: bool,

617 explicit_allowlist: list[str] | None,

618 registered_tools: dict[str, Any],

619 control_tools: Collection[str] = MISSION_CONTROL_TOOLS,

620 flag_lookup: dict[str, str] | None = None,

621) -> list[str]:

622 """Resolve a session's effective tool allowlist.

623

624 Pure: no I/O, no clocks, no environment lookups. The caller passes the

625 currently-registered tool names (``registered_tools`` — only the dict keys

626 are read) and the set of control-tool names to exclude from an all-tools

627 expansion (``control_tools``, defaulting to :data:`MISSION_CONTROL_TOOLS`).

628

629 Behaviour:

630

631 * When ``allow_all_tools`` is True and ``explicit_allowlist`` is non-empty,

632 the two inputs conflict, so the function raises

633 :class:`MissionValidationError` with

634 ``details.reason == "allow_all_and_explicit_allowlist_mutually_exclusive"``.

635 * When ``allow_all_tools`` is True and no explicit list is supplied, the

636 candidate is ``sorted(set(registered_tools) - set(control_tools))``. An

637 empty candidate (nothing registered, or only control tools registered)

638 raises ``details.reason == "allow_all_tools_empty_registry"``. Otherwise

639 the candidate is passed through :func:`validate_tool_allowlist` so the

640 resolved list satisfies every invariant an operator-supplied list would.

641 * When ``allow_all_tools`` is False, the call delegates to

642 :func:`validate_tool_allowlist` over ``explicit_allowlist or []``,

643 preserving its existing ``empty`` rejection on an empty/absent list.

644

645 Returns the normalized allowlist. The all-tools path returns a sorted,

646 duplicate-free list; the explicit path returns

647 :func:`validate_tool_allowlist`'s order-preserving output unchanged.

648 """

649 if allow_all_tools:

650 if explicit_allowlist:

651 raise MissionValidationError(

652 "validation_error",

653 details={

654 "field": "tool_allowlist",

655 "reason": "allow_all_and_explicit_allowlist_mutually_exclusive",

656 },

657 )

658 candidate = sorted(set(registered_tools) - set(control_tools))

659 if not candidate:

660 raise MissionValidationError(

661 "validation_error",

662 details={

663 "field": "tool_allowlist",

664 "reason": "allow_all_tools_empty_registry",

665 },

666 )

667 return validate_tool_allowlist(candidate, registered_tools)

668 return validate_tool_allowlist(explicit_allowlist or [], registered_tools, flag_lookup)

669

670

671# ---------------------------------------------------------------------------

672# Cadence

673# ---------------------------------------------------------------------------

674

675

676def validate_cadence(cadence: dict[str, Any]) -> Cadence:

677 """Validate a checkpoint cadence dict.

678

679 The base ``every_iteration`` kind requires no extra keys.

680 ``every_n_iterations`` requires a positive int ``n``.

681 ``every_t_seconds`` requires a positive int ``t``. ``on_event``

682 requires a non-empty str ``event_name``. Returns a normalized dict

683 suitable for use as a :class:`Cadence`.

684 """

685 if not isinstance(cadence, dict): 685 ↛ 686line 685 didn't jump to line 686 because the condition on line 685 was never true

686 raise MissionValidationError(

687 "validation_error",

688 details={"field": "checkpoint_cadence", "reason": "not_a_dict"},

689 )

690 kind = cadence.get("kind")

691 if kind not in _CADENCE_KINDS:

692 raise MissionValidationError(

693 "validation_error",

694 details={

695 "field": "checkpoint_cadence",

696 "reason": "kind_invalid",

697 "allowed": sorted(_CADENCE_KINDS),

698 },

699 )

700 normalized: dict[str, Any] = {"kind": kind}

701 if kind == "every_n_iterations":

702 n = cadence.get("n")

703 if not _is_positive_int(n):

704 raise MissionValidationError(

705 "validation_error",

706 details={

707 "field": "checkpoint_cadence",

708 "subfield": "n",

709 "reason": "missing_or_not_positive_int",

710 },

711 )

712 normalized["n"] = n

713 elif kind == "every_t_seconds":

714 t = cadence.get("t")

715 if not _is_positive_int(t): 715 ↛ 724line 715 didn't jump to line 724 because the condition on line 715 was always true

716 raise MissionValidationError(

717 "validation_error",

718 details={

719 "field": "checkpoint_cadence",

720 "subfield": "t",

721 "reason": "missing_or_not_positive_int",

722 },

723 )

724 normalized["t"] = t

725 elif kind == "on_event":

726 event_name = cadence.get("event_name")

727 if not isinstance(event_name, str) or not event_name:

728 raise MissionValidationError(

729 "validation_error",

730 details={

731 "field": "checkpoint_cadence",

732 "subfield": "event_name",

733 "reason": "missing_or_empty",

734 },

735 )

736 normalized["event_name"] = event_name

737 # every_iteration takes no extra keys; nothing else to copy.

738 return cast("Cadence", normalized)

739

740

741# ---------------------------------------------------------------------------

742# Strategy

743# ---------------------------------------------------------------------------

744

745

746def validate_strategy(

747 strategy: dict[str, Any],

748 allowlist: list[str],

749 allow_scripts: bool,

750) -> Strategy:

751 """Validate a Propose_Phase Strategy dict.

752

753 Exactly one of ``tool_calls`` (a non-empty list) or ``script`` (a

754 non-empty string) must be present. When ``script`` is present,

755 ``allow_scripts`` must be ``True`` — sessions started with

756 ``allow_scripted_strategies=False`` reject scripted proposals. The

757 script is then handed to the sandbox AST validator

758 (:func:`mission.sandbox.validate_script_ast`) for inspection

759 against ``allowlist``. The sandbox module is imported lazily

760 because it lands in a later slice; if it is missing at call time,

761 :class:`MissionValidationError` is raised with the dedicated code

762 ``script_sandbox_not_implemented`` so callers see a clear signal

763 instead of an ``ImportError`` traceback.

764

765 Returns a normalized strategy dict carrying through the optional

766 ``expected_observation_keys`` and ``rationale`` fields when

767 present.

768 """

769 if not isinstance(strategy, dict):

770 raise MissionValidationError(

771 "validation_error",

772 details={"field": "strategy", "reason": "not_a_dict"},

773 )

774 has_tool_calls = "tool_calls" in strategy

775 has_script = "script" in strategy

776 if has_tool_calls == has_script:

777 # Both present, or both absent — same error in either direction.

778 raise MissionValidationError(

779 "validation_error",

780 details={

781 "field": "strategy",

782 "reason": "must_have_exactly_one_of_tool_calls_or_script",

783 },

784 )

785

786 normalized: dict[str, Any] = {}

787 if has_tool_calls:

788 tool_calls = strategy["tool_calls"]

789 if not isinstance(tool_calls, list) or not tool_calls:

790 raise MissionValidationError(

791 "validation_error",

792 details={

793 "field": "strategy",

794 "subfield": "tool_calls",

795 "reason": "must_be_non_empty_list",

796 },

797 )

798 # Shallow-copy each call dict so the caller's list/dicts stay

799 # intact; we don't impose a deep schema on each call here

800 # because the tool dispatcher validates the per-call args

801 # against the registered tool's signature at execute time.

802 normalized["tool_calls"] = [dict(call) for call in tool_calls]

803 else:

804 script = strategy["script"]

805 if not isinstance(script, str) or not script: 805 ↛ 806line 805 didn't jump to line 806 because the condition on line 805 was never true

806 raise MissionValidationError(

807 "validation_error",

808 details={

809 "field": "strategy",

810 "subfield": "script",

811 "reason": "must_be_non_empty_string",

812 },

813 )

814 if not allow_scripts:

815 raise MissionValidationError(

816 "validation_error",

817 details={

818 "field": "strategy",

819 "subfield": "script",

820 "reason": "scripts_not_allowed_by_session",

821 },

822 )

823 try:

824 from mission.sandbox import ( # noqa: PLC0415 — lazy: sandbox is an optional runtime dep

825 ScriptRejected,

826 validate_script_ast,

827 )

828 except ModuleNotFoundError as exc:

829 raise MissionValidationError(

830 "script_sandbox_not_implemented",

831 details={

832 "hint": "scripted strategies require the sandbox module",

833 },

834 ) from exc

835 try:

836 validate_script_ast(script, allowlist)

837 except ScriptRejected as exc:

838 # Translate the sandbox-level rejection into our structured

839 # MissionValidationError so every operator-input rejection

840 # comes back through the same exception type. The sandbox's

841 # stable ``reason`` token, line, and column carry through

842 # so callers can render a precise error.

843 raise MissionValidationError(

844 "validation_error",

845 details={

846 "field": "strategy",

847 "subfield": "script",

848 "reason": exc.reason,

849 "lineno": exc.lineno,

850 "col_offset": exc.col_offset,

851 },

852 ) from exc

853 normalized["script"] = script

854

855 # Carry through the two optional pass-through fields when present.

856 if "expected_observation_keys" in strategy:

857 keys = strategy["expected_observation_keys"]

858 if not isinstance(keys, list) or not all(isinstance(k, str) for k in keys): 858 ↛ 867line 858 didn't jump to line 867 because the condition on line 858 was always true

859 raise MissionValidationError(

860 "validation_error",

861 details={

862 "field": "strategy",

863 "subfield": "expected_observation_keys",

864 "reason": "must_be_list_of_strings",

865 },

866 )

867 normalized["expected_observation_keys"] = list(keys)

868 if "rationale" in strategy:

869 rationale = strategy["rationale"]

870 if not isinstance(rationale, str): 870 ↛ 871line 870 didn't jump to line 871 because the condition on line 870 was never true

871 raise MissionValidationError(

872 "validation_error",

873 details={

874 "field": "strategy",

875 "subfield": "rationale",

876 "reason": "not_a_string",

877 },

878 )

879 normalized["rationale"] = rationale

880 return cast("Strategy", normalized)

881

882

883# ---------------------------------------------------------------------------

884# JSON-safety strippers

885# ---------------------------------------------------------------------------

886#

887# Why these live here rather than next to the persistence backend or

888# next to each call site: the only key that needs stripping today is

889# ``_parsed_ast``, which is also created here (by ``validate_criteria``

890# attaching the cached :class:`ast.Expression` to predicate criteria).

891# Putting the strippers next to the producer keeps the lifecycle

892# obvious — anyone who reads ``validate_criteria`` sees the matching

893# ``strip_private_fields`` helper one screen down.

894#

895# Three earlier slices each had their own near-duplicate implementation

896# (``cli/commands/mission_cmd.py::_strip_private_criteria``,

897# ``mcp/tools/mission.py::_strip_private_fields`` plus the iterations

898# variant, ``mcp/resources/mission.py::_strip_private_fields``). Those

899# now delegate here so a single source of truth governs the JSON-safety

900# contract.

901

902# Sentinel marking which keys count as "private" — anything starting

903# with an underscore. ``ast.Expression`` is the only object the

904# validators currently attach, but the rule is intentionally broad so

905# a future cache (a normalised JSON-Pointer for the metric path, a

906# pre-resolved tool-tag set) can ride on the same convention without

907# breaking persistence.

908_PRIVATE_PREFIX: Final[str] = "_"

909

910

911def _is_public_key(key: Any) -> bool:

912 """Return True iff ``key`` is a non-private dict key."""

913 return not str(key).startswith(_PRIVATE_PREFIX)

914

915

916def _strip_private_dict(d: Mapping[str, Any]) -> dict[str, Any]:

917 """Return a shallow copy of ``d`` with private keys removed."""

918 return {k: v for k, v in d.items() if _is_public_key(k)}

919

920

921def strip_private_fields(session: Mapping[str, Any]) -> dict[str, Any]:

922 """Return a JSON-safe copy of ``session`` with private criterion keys dropped.

923

924 Walks ``session["criteria"]`` and ``session["iterations"]`` and

925 drops any leading-underscore keys from each Criterion dict and

926 each ``criteria_evaluation`` entry on each iteration. Other

927 fields pass through verbatim — the strip is intentionally narrow

928 so a future field that legitimately starts with an underscore

929 (e.g. ``_meta`` for backwards compatibility) doesn't get

930 silently eaten outside the criterion / criterion-eval shapes.

931

932 Args:

933 session: Any session-shaped mapping; usually a

934 :class:`SessionState` ``TypedDict`` but the function is

935 duck-typed against ``Mapping[str, Any]`` so callers can

936 pass a partial session under construction without first

937 casting to the full type.

938

939 Returns:

940 A shallow copy of ``session`` with the criterion and

941 criterion-eval shapes cleaned. The original is never mutated.

942 """

943 cleaned: dict[str, Any] = dict(session)

944 criteria = cleaned.get("criteria")

945 if isinstance(criteria, list):

946 cleaned["criteria"] = [

947 _strip_private_dict(c) if isinstance(c, Mapping) else c for c in criteria

948 ]

949 iterations = cleaned.get("iterations")

950 if isinstance(iterations, list):

951 cleaned["iterations"] = strip_private_fields_iterations(iterations)

952 return cleaned

953

954

955def strip_private_fields_iterations(

956 iterations: Sequence[Mapping[str, Any]],

957) -> list[dict[str, Any]]:

958 """Strip private keys from each iteration's ``criteria_evaluation`` shape.

959

960 The Decide_Phase appends ``CriterionResult`` entries under

961 ``iteration["criteria_evaluation"]``. When a criterion is a

962 ``predicate``, the entry carries the same ``_parsed_ast`` cache

963 as the source criterion. Drop those keys so the iteration

964 history is JSON-safe.

965

966 Args:

967 iterations: A sequence of iteration dicts. Non-dict entries

968 (which shouldn't appear in a typed iteration list, but

969 could surface from a corrupt on-disk file) pass through

970 verbatim so the caller can still observe the corruption.

971

972 Returns:

973 A new list of shallow-copied iteration dicts. The originals

974 are never mutated.

975 """

976 out: list[dict[str, Any]] = []

977 for iteration in iterations:

978 if not isinstance(iteration, Mapping):

979 out.append(cast("dict[str, Any]", iteration))

980 continue

981 copy = dict(iteration)

982 evals = copy.get("criteria_evaluation")

983 if isinstance(evals, list):

984 copy["criteria_evaluation"] = [

985 _strip_private_dict(e) if isinstance(e, Mapping) else e for e in evals

986 ]

987 out.append(copy)

988 return out