Coverage for mcp/mission/validation.py: 93%

245 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Shared validators for Mission session inputs. 

2 

3Every Mission entry point — the MCP tools, the CLI subcommands, the engine's 

4session loader — feeds operator-supplied JSON through this module before it 

5ever touches state. The validators are intentionally pure: no I/O, no clocks, 

6no environment lookups. The caller passes whatever external context is 

7needed (the FastMCP tool catalog, the per-tool tag sets, the gating 

8feature-flag lookup) as plain arguments. This keeps the validators trivial 

9to unit-test and makes them safe to call from both async tool handlers and 

10synchronous CLI code. 

11 

12Design notes: 

13 

14* Validators **return new normalized values**; they never mutate their 

15 inputs. The :func:`validate_criteria` case attaches a cached parsed AST 

16 under the private key ``_parsed_ast`` on each ``predicate`` criterion; 

17 the original input dict is left untouched and a shallow copy carries the 

18 added key. 

19 

20* Every rejection raises :class:`MissionValidationError` with a stable 

21 short ``code`` (e.g. ``"validation_error"``) and a structured 

22 ``details`` dict whose ``field`` key identifies the input that failed. 

23 Tool wrappers render ``code`` and ``details`` as a structured FastMCP 

24 tool error so clients can surface them without text parsing. 

25 

26* The script-strategy path forward-declares the sandbox: scripted 

27 strategies are out of scope for this module and the sandbox module 

28 lands in a later slice. The lazy import inside 

29 :func:`validate_strategy` tolerates the missing module by raising a 

30 dedicated ``script_sandbox_not_implemented`` code, so callers that hit 

31 this path get a clear signal rather than an ``ImportError`` traceback. 

32""" 

33 

34from __future__ import annotations 

35 

36from collections.abc import Collection, Mapping, Sequence 

37from typing import Any, Final, cast 

38 

39from . import predicate 

40from .types import ( 

41 BudgetControls, 

42 Cadence, 

43 Criterion, 

44 Strategy, 

45) 

46 

47# --------------------------------------------------------------------------- 

48# Public exception 

49# --------------------------------------------------------------------------- 

50 

51 

52class MissionValidationError(Exception): 

53 """Raised when a validator rejects an input. 

54 

55 Carries a stable short ``code`` and an optional structured ``details`` 

56 dict. FastMCP tool wrappers convert this into a structured tool-error 

57 response; CLI handlers print ``code`` plus the ``details`` JSON. 

58 

59 The constructor accepts ``(code, details=None, *, message=None)``. 

60 When ``message`` is not provided, the exception's string form falls 

61 back to ``code`` so logs always show something meaningful. 

62 """ 

63 

64 def __init__( 

65 self, 

66 code: str, 

67 details: dict[str, Any] | None = None, 

68 *, 

69 message: str | None = None, 

70 ) -> None: 

71 self.code: str = code 

72 self.details: dict[str, Any] | None = details 

73 rendered = message if message is not None else code 

74 super().__init__(rendered) 

75 

76 

77# --------------------------------------------------------------------------- 

78# Constants 

79# --------------------------------------------------------------------------- 

80 

81_DIRECTIVE_MAX_LEN: Final[int] = 8192 

82"""The hard cap on directive_text length, in characters.""" 

83 

84_CRITERION_KINDS: Final[frozenset[str]] = frozenset( 

85 {"metric_threshold", "event", "predicate", "tool_call_succeeded", "metric_trend"} 

86) 

87"""The five valid Criterion ``kind`` values.""" 

88 

89_METRIC_OPS: Final[frozenset[str]] = frozenset({"<", "<=", ">", ">=", "==", "!="}) 

90"""The six valid comparison operators on a ``metric_threshold`` criterion.""" 

91 

92_METRIC_TREND_DIRECTIONS: Final[frozenset[str]] = frozenset( 

93 {"decreasing", "increasing", "non_increasing", "non_decreasing"} 

94) 

95"""The four valid trend directions on a ``metric_trend`` criterion.""" 

96 

97_CADENCE_KINDS: Final[frozenset[str]] = frozenset( 

98 {"every_iteration", "every_n_iterations", "every_t_seconds", "on_event"} 

99) 

100"""The four valid Cadence ``kind`` values.""" 

101 

102 

103# --------------------------------------------------------------------------- 

104# Helpers 

105# --------------------------------------------------------------------------- 

106 

107 

108def _is_positive_int(value: Any) -> bool: 

109 """Return True iff ``value`` is an int (not bool) and strictly > 0.""" 

110 # bool is a subclass of int; reject it explicitly so True/False cannot 

111 # silently masquerade as a positive integer count. 

112 return isinstance(value, int) and not isinstance(value, bool) and value > 0 

113 

114 

115def _is_positive_int_or_uncapped(value: Any) -> bool: 

116 """Return True iff ``value`` is a strictly-positive int OR the sentinel ``-1``. 

117 

118 The Mission budget caps (``max_iterations``, ``max_wall_clock_seconds``) 

119 accept ``-1`` as an explicit "uncapped" sentinel. Any other negative 

120 integer, zero, non-integer, or bool is rejected — the operator must 

121 pick exactly one of: a positive cap, or the explicit ``-1`` opt-out. 

122 Allowing zero would silently terminate every session on iteration 1 

123 / second 0; allowing arbitrary negatives would mask typos. 

124 """ 

125 if isinstance(value, bool): 

126 return False 

127 if not isinstance(value, int): 

128 return False 

129 return value > 0 or value == -1 

130 

131 

132def _is_number(value: Any) -> bool: 

133 """Return True iff ``value`` is an int or float (not bool).""" 

134 return isinstance(value, (int, float)) and not isinstance(value, bool) 

135 

136 

137# --------------------------------------------------------------------------- 

138# Directive 

139# --------------------------------------------------------------------------- 

140 

141 

142def validate_directive(text: str) -> str: 

143 """Trim and validate a directive string. 

144 

145 The directive is the operator-supplied natural-language goal. It must 

146 be a non-empty string (after stripping leading/trailing whitespace) 

147 and must fit within :data:`_DIRECTIVE_MAX_LEN` characters. Returns 

148 the trimmed string. Raises :class:`MissionValidationError` with 

149 ``code="validation_error"`` on rejection. 

150 """ 

151 if not isinstance(text, str): 

152 raise MissionValidationError( 

153 "validation_error", 

154 details={"field": "directive", "reason": "not_a_string"}, 

155 ) 

156 trimmed = text.strip() 

157 if not trimmed: 

158 raise MissionValidationError( 

159 "validation_error", 

160 details={"field": "directive", "reason": "empty"}, 

161 ) 

162 if len(trimmed) > _DIRECTIVE_MAX_LEN: 

163 raise MissionValidationError( 

164 "validation_error", 

165 details={ 

166 "field": "directive", 

167 "reason": "too_long", 

168 "max_length": _DIRECTIVE_MAX_LEN, 

169 "actual_length": len(trimmed), 

170 }, 

171 ) 

172 return trimmed 

173 

174 

175# --------------------------------------------------------------------------- 

176# Criteria 

177# --------------------------------------------------------------------------- 

178 

179 

180def _validate_metric_threshold(entry: dict[str, Any], criterion_id: str) -> None: 

181 """Check the kind-specific keys for a ``metric_threshold`` criterion.""" 

182 metric = entry.get("metric") 

183 if not isinstance(metric, str) or not metric: 

184 raise MissionValidationError( 

185 "validation_error", 

186 details={ 

187 "field": "criteria", 

188 "criterion_id": criterion_id, 

189 "reason": "metric_missing_or_invalid", 

190 }, 

191 ) 

192 op = entry.get("op") 

193 if op not in _METRIC_OPS: 

194 raise MissionValidationError( 

195 "validation_error", 

196 details={ 

197 "field": "criteria", 

198 "criterion_id": criterion_id, 

199 "reason": "op_invalid", 

200 "allowed": sorted(_METRIC_OPS), 

201 }, 

202 ) 

203 target = entry.get("target") 

204 if not _is_number(target): 

205 raise MissionValidationError( 

206 "validation_error", 

207 details={ 

208 "field": "criteria", 

209 "criterion_id": criterion_id, 

210 "reason": "target_not_a_number", 

211 }, 

212 ) 

213 

214 

215def _validate_metric_trend(entry: dict[str, Any], criterion_id: str) -> None: 

216 """Check the kind-specific keys for a ``metric_trend`` criterion. 

217 

218 Required: ``metric`` (non-empty dot-path string) and ``direction`` (one of 

219 the four :data:`_METRIC_TREND_DIRECTIONS`). Optional: ``window`` (positive 

220 int — how many of the most-recent points to consider) and ``min_points`` 

221 (positive int — the minimum number of numeric points required before the 

222 criterion decides met/unmet rather than inconclusive). 

223 

224 Unlike ``metric_threshold`` this kind has no ``op``/``target``: the 

225 comparison is "where did the metric go over the window?", evaluated by 

226 :meth:`MissionEngine._evaluate_metric_trend` against the cumulative metric 

227 history the engine accumulates across iterations. 

228 """ 

229 metric = entry.get("metric") 

230 if not isinstance(metric, str) or not metric: 

231 raise MissionValidationError( 

232 "validation_error", 

233 details={ 

234 "field": "criteria", 

235 "criterion_id": criterion_id, 

236 "reason": "metric_missing_or_invalid", 

237 }, 

238 ) 

239 direction = entry.get("direction") 

240 if direction not in _METRIC_TREND_DIRECTIONS: 

241 raise MissionValidationError( 

242 "validation_error", 

243 details={ 

244 "field": "criteria", 

245 "criterion_id": criterion_id, 

246 "reason": "direction_invalid", 

247 "allowed": sorted(_METRIC_TREND_DIRECTIONS), 

248 }, 

249 ) 

250 # ``window`` and ``min_points`` are optional, but when present each must be 

251 # a strictly-positive int (bool rejected). A missing value lets the engine 

252 # apply its defaults (window = all points; min_points = 2). 

253 if "window" in entry and not _is_positive_int(entry.get("window")): 

254 raise MissionValidationError( 

255 "validation_error", 

256 details={ 

257 "field": "criteria", 

258 "criterion_id": criterion_id, 

259 "reason": "window_must_be_positive_int", 

260 }, 

261 ) 

262 if "min_points" in entry and not _is_positive_int(entry.get("min_points")): 

263 raise MissionValidationError( 

264 "validation_error", 

265 details={ 

266 "field": "criteria", 

267 "criterion_id": criterion_id, 

268 "reason": "min_points_must_be_positive_int", 

269 }, 

270 ) 

271 

272 

273def _validate_event_criterion(entry: dict[str, Any], criterion_id: str) -> None: 

274 """Check the kind-specific keys for an ``event`` criterion.""" 

275 event_name = entry.get("event_name") 

276 if not isinstance(event_name, str) or not event_name: 

277 raise MissionValidationError( 

278 "validation_error", 

279 details={ 

280 "field": "criteria", 

281 "criterion_id": criterion_id, 

282 "reason": "event_name_missing_or_invalid", 

283 }, 

284 ) 

285 

286 

287def _validate_predicate_criterion(entry: dict[str, Any], criterion_id: str) -> Any: 

288 """Check the kind-specific keys for a ``predicate`` criterion. 

289 

290 Returns the parsed AST so the caller can attach it under 

291 ``_parsed_ast`` on the normalized copy. 

292 """ 

293 expression = entry.get("expression") 

294 if not isinstance(expression, str) or not expression: 

295 raise MissionValidationError( 

296 "validation_error", 

297 details={ 

298 "field": "criteria", 

299 "criterion_id": criterion_id, 

300 "reason": "expression_missing_or_invalid", 

301 }, 

302 ) 

303 try: 

304 return predicate.parse_predicate(expression) 

305 except predicate.PredicateRejected as exc: 

306 raise MissionValidationError( 

307 "validation_error", 

308 details={ 

309 "field": "criteria", 

310 "criterion_id": criterion_id, 

311 "reason": exc.reason, 

312 "lineno": exc.lineno, 

313 "col_offset": exc.col_offset, 

314 }, 

315 ) from exc 

316 

317 

318def _validate_tool_call_succeeded(entry: dict[str, Any], criterion_id: str) -> None: 

319 """Check the kind-specific keys for a ``tool_call_succeeded`` criterion. 

320 

321 Required: ``tool_name`` (non-empty str). Optional: ``min_count`` 

322 (positive int; default 1). The criterion is met when the 

323 Observation's ``tool_results`` list contains at least 

324 ``min_count`` entries whose ``tool_name`` field equals 

325 ``tool_name`` and whose ``_status`` equals ``"ok"``. 

326 

327 This kind exists so the most common Mission goal — "this tool 

328 ran and succeeded N times" — does not require the operator (or 

329 a sampling model) to write a Python predicate. It is a strict 

330 subset of what ``predicate`` can express, but the engine 

331 evaluates it server-side without going through the AST sandbox, 

332 so the validator never needs to reason about syntax errors, 

333 method-call shapes, or attribute walks for this case. 

334 """ 

335 tool_name = entry.get("tool_name") 

336 if not isinstance(tool_name, str) or not tool_name: 

337 raise MissionValidationError( 

338 "validation_error", 

339 details={ 

340 "field": "criteria", 

341 "criterion_id": criterion_id, 

342 "reason": "tool_name_missing_or_invalid", 

343 }, 

344 ) 

345 # ``min_count`` is optional; default 1 (any successful call). 

346 if "min_count" in entry: 

347 min_count = entry.get("min_count") 

348 # bool is a subclass of int — reject explicitly so True/False cannot 

349 # masquerade as 1/0 and silently pass through. 

350 if isinstance(min_count, bool) or not isinstance(min_count, int) or min_count < 1: 

351 raise MissionValidationError( 

352 "validation_error", 

353 details={ 

354 "field": "criteria", 

355 "criterion_id": criterion_id, 

356 "reason": "min_count_must_be_positive_int", 

357 }, 

358 ) 

359 

360 

361def validate_criteria(criteria: list[dict[str, Any]]) -> list[Criterion]: 

362 """Validate a list of criteria and attach cached predicate ASTs. 

363 

364 Required keys on every entry: ``criterion_id`` (non-empty str), 

365 ``kind`` (one of the :class:`CriterionKind` values), and 

366 ``required`` (bool). Each entry must also provide the kind-specific 

367 keys: ``metric``/``op``/``target`` for ``metric_threshold``, 

368 ``metric``/``direction`` (plus optional ``window``/``min_points``) for 

369 ``metric_trend``, ``event_name`` for ``event``, ``tool_name`` for 

370 ``tool_call_succeeded``, and ``expression`` for ``predicate``. 

371 

372 The ``criterion_id`` must be unique across the list. For each 

373 ``predicate`` entry, the expression is parsed via 

374 :func:`predicate.parse_predicate` and the resulting AST is cached 

375 under the private key ``_parsed_ast`` on a shallow copy of the 

376 entry. Returns the normalized list. The original input dicts are 

377 not mutated. 

378 """ 

379 if not isinstance(criteria, list): 379 ↛ 380line 379 didn't jump to line 380 because the condition on line 379 was never true

380 raise MissionValidationError( 

381 "validation_error", 

382 details={"field": "criteria", "reason": "not_a_list"}, 

383 ) 

384 if not criteria: 

385 raise MissionValidationError( 

386 "validation_error", 

387 details={"field": "criteria", "reason": "empty"}, 

388 ) 

389 seen_ids: set[str] = set() 

390 normalized: list[Criterion] = [] 

391 for index, entry in enumerate(criteria): 

392 if not isinstance(entry, dict): 392 ↛ 393line 392 didn't jump to line 393 because the condition on line 392 was never true

393 raise MissionValidationError( 

394 "validation_error", 

395 details={ 

396 "field": "criteria", 

397 "index": index, 

398 "reason": "not_a_dict", 

399 }, 

400 ) 

401 criterion_id = entry.get("criterion_id") 

402 if not isinstance(criterion_id, str) or not criterion_id: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 raise MissionValidationError( 

404 "validation_error", 

405 details={ 

406 "field": "criteria", 

407 "index": index, 

408 "reason": "criterion_id_missing_or_invalid", 

409 }, 

410 ) 

411 if criterion_id in seen_ids: 

412 raise MissionValidationError( 

413 "validation_error", 

414 details={ 

415 "field": "criteria", 

416 "criterion_id": criterion_id, 

417 "reason": "duplicate_criterion_id", 

418 }, 

419 ) 

420 seen_ids.add(criterion_id) 

421 kind = entry.get("kind") 

422 if kind not in _CRITERION_KINDS: 

423 raise MissionValidationError( 

424 "validation_error", 

425 details={ 

426 "field": "criteria", 

427 "criterion_id": criterion_id, 

428 "reason": "kind_invalid", 

429 "allowed": sorted(_CRITERION_KINDS), 

430 }, 

431 ) 

432 if not isinstance(entry.get("required"), bool): 432 ↛ 433line 432 didn't jump to line 433 because the condition on line 432 was never true

433 raise MissionValidationError( 

434 "validation_error", 

435 details={ 

436 "field": "criteria", 

437 "criterion_id": criterion_id, 

438 "reason": "required_missing_or_not_a_bool", 

439 }, 

440 ) 

441 # Build a shallow copy so we never mutate the caller's dict; we 

442 # may need to attach _parsed_ast and we want the input to stay 

443 # exactly as it was passed in. 

444 normalized_entry: dict[str, Any] = dict(entry) 

445 if kind == "metric_threshold": 

446 _validate_metric_threshold(entry, criterion_id) 

447 elif kind == "metric_trend": 

448 _validate_metric_trend(entry, criterion_id) 

449 elif kind == "event": 

450 _validate_event_criterion(entry, criterion_id) 

451 elif kind == "tool_call_succeeded": 

452 _validate_tool_call_succeeded(entry, criterion_id) 

453 else: # kind == "predicate" 

454 parsed = _validate_predicate_criterion(entry, criterion_id) 

455 normalized_entry["_parsed_ast"] = parsed 

456 normalized.append(cast("Criterion", normalized_entry)) 

457 return normalized 

458 

459 

460# --------------------------------------------------------------------------- 

461# Budget 

462# --------------------------------------------------------------------------- 

463 

464 

465def validate_budget( 

466 budget: dict[str, Any], 

467 allowlist: list[str], 

468 registered_tags: dict[str, set[str]], 

469) -> BudgetControls: 

470 """Validate a budget dict. 

471 

472 Required keys: ``max_iterations`` and ``max_wall_clock_seconds``. 

473 Each accepts either a strictly-positive int OR the explicit 

474 sentinel ``-1`` ("uncapped"). The operator must pick one; 

475 omitting the key, passing zero, passing any other negative 

476 number, or passing a non-integer is rejected. **At least one** of 

477 the two caps must be a positive int — both being ``-1`` would be 

478 a runaway loop with no axis-driven termination, so the validator 

479 rejects that combination eagerly with 

480 ``reason="at_least_one_cap_required"``. 

481 

482 Cost guardrails live out-of-band — Mission only enforces caps the 

483 loop has direct visibility into. ``allowlist`` and 

484 ``registered_tags`` are kept on the signature for API stability 

485 so existing callers don't have to change shape; both are unused. 

486 Returns a normalized dict suitable for use as a 

487 :class:`BudgetControls`. 

488 """ 

489 del allowlist, registered_tags # accepted for API stability; unused 

490 if not isinstance(budget, dict): 490 ↛ 491line 490 didn't jump to line 491 because the condition on line 490 was never true

491 raise MissionValidationError( 

492 "validation_error", 

493 details={"field": "budget", "reason": "not_a_dict"}, 

494 ) 

495 max_iterations = budget.get("max_iterations") 

496 if not _is_positive_int_or_uncapped(max_iterations): 

497 raise MissionValidationError( 

498 "validation_error", 

499 details={ 

500 "field": "budget", 

501 "subfield": "max_iterations", 

502 "reason": "missing_or_not_positive_int_or_minus_one", 

503 }, 

504 ) 

505 max_wall = budget.get("max_wall_clock_seconds") 

506 if not _is_positive_int_or_uncapped(max_wall): 

507 raise MissionValidationError( 

508 "validation_error", 

509 details={ 

510 "field": "budget", 

511 "subfield": "max_wall_clock_seconds", 

512 "reason": "missing_or_not_positive_int_or_minus_one", 

513 }, 

514 ) 

515 normalized: dict[str, Any] = { 

516 "max_iterations": max_iterations, 

517 "max_wall_clock_seconds": max_wall, 

518 } 

519 return cast("BudgetControls", normalized) 

520 

521 

522# --------------------------------------------------------------------------- 

523# Tool allowlist 

524# --------------------------------------------------------------------------- 

525 

526 

527def validate_tool_allowlist( 

528 allowlist: list[str], 

529 registered_tools: dict[str, Any], 

530 flag_lookup: dict[str, str] | None = None, 

531) -> list[str]: 

532 """Validate that every name in the allowlist is currently registered. 

533 

534 ``registered_tools`` is a structural mapping from tool name to the 

535 tool object (FastMCP's ``Tool`` type, but typed loosely here so the 

536 module imports cleanly without the optional FastMCP dependency). 

537 Only the dict keys are read. 

538 

539 When a name is missing from ``registered_tools``, the validator 

540 raises :class:`MissionValidationError`. If ``flag_lookup`` is 

541 provided and contains the missing tool's name, the rejection's 

542 ``details.flag`` field carries the gating feature-flag name (so 

543 the operator can be told *why* the tool is currently absent — 

544 typically because its feature flag is unset). Otherwise the 

545 rejection carries ``details.tool_name`` only. 

546 """ 

547 if not isinstance(allowlist, list): 547 ↛ 548line 547 didn't jump to line 548 because the condition on line 547 was never true

548 raise MissionValidationError( 

549 "validation_error", 

550 details={"field": "tool_allowlist", "reason": "not_a_list"}, 

551 ) 

552 if not allowlist: 

553 raise MissionValidationError( 

554 "validation_error", 

555 details={"field": "tool_allowlist", "reason": "empty"}, 

556 ) 

557 seen: set[str] = set() 

558 normalized: list[str] = [] 

559 for index, name in enumerate(allowlist): 

560 if not isinstance(name, str) or not name: 560 ↛ 561line 560 didn't jump to line 561 because the condition on line 560 was never true

561 raise MissionValidationError( 

562 "validation_error", 

563 details={ 

564 "field": "tool_allowlist", 

565 "index": index, 

566 "reason": "tool_name_missing_or_invalid", 

567 }, 

568 ) 

569 if name in seen: 

570 raise MissionValidationError( 

571 "validation_error", 

572 details={ 

573 "field": "tool_allowlist", 

574 "tool_name": name, 

575 "reason": "duplicate_tool_name", 

576 }, 

577 ) 

578 seen.add(name) 

579 if name not in registered_tools: 

580 details: dict[str, Any] = { 

581 "field": "tool_allowlist", 

582 "tool_name": name, 

583 "reason": "tool_not_registered", 

584 } 

585 if flag_lookup is not None and name in flag_lookup: 

586 details["flag"] = flag_lookup[name] 

587 raise MissionValidationError("validation_error", details=details) 

588 normalized.append(name) 

589 return normalized 

590 

591 

592# The nine session-management tool names. They are excluded from an 

593# all-tools expansion so a session can never resolve an allowlist that lets 

594# it recursively invoke the tools that start, drive, and tear down sessions. 

595# This constant is the default exclusion set for 

596# :func:`resolve_effective_allowlist`; callers holding a live tag map may pass 

597# their own equivalent set instead. 

598MISSION_CONTROL_TOOLS: frozenset[str] = frozenset( 

599 { 

600 "mission_start", 

601 "mission_status", 

602 "mission_iterate", 

603 "mission_checkpoint", 

604 "mission_complete", 

605 "mission_abort", 

606 "mission_resume", 

607 "mission_history", 

608 "mission_list", 

609 } 

610) 

611"""The nine control-tool names excluded from an all-tools expansion.""" 

612 

613 

614def resolve_effective_allowlist( 

615 *, 

616 allow_all_tools: bool, 

617 explicit_allowlist: list[str] | None, 

618 registered_tools: dict[str, Any], 

619 control_tools: Collection[str] = MISSION_CONTROL_TOOLS, 

620 flag_lookup: dict[str, str] | None = None, 

621) -> list[str]: 

622 """Resolve a session's effective tool allowlist. 

623 

624 Pure: no I/O, no clocks, no environment lookups. The caller passes the 

625 currently-registered tool names (``registered_tools`` — only the dict keys 

626 are read) and the set of control-tool names to exclude from an all-tools 

627 expansion (``control_tools``, defaulting to :data:`MISSION_CONTROL_TOOLS`). 

628 

629 Behaviour: 

630 

631 * When ``allow_all_tools`` is True and ``explicit_allowlist`` is non-empty, 

632 the two inputs conflict, so the function raises 

633 :class:`MissionValidationError` with 

634 ``details.reason == "allow_all_and_explicit_allowlist_mutually_exclusive"``. 

635 * When ``allow_all_tools`` is True and no explicit list is supplied, the 

636 candidate is ``sorted(set(registered_tools) - set(control_tools))``. An 

637 empty candidate (nothing registered, or only control tools registered) 

638 raises ``details.reason == "allow_all_tools_empty_registry"``. Otherwise 

639 the candidate is passed through :func:`validate_tool_allowlist` so the 

640 resolved list satisfies every invariant an operator-supplied list would. 

641 * When ``allow_all_tools`` is False, the call delegates to 

642 :func:`validate_tool_allowlist` over ``explicit_allowlist or []``, 

643 preserving its existing ``empty`` rejection on an empty/absent list. 

644 

645 Returns the normalized allowlist. The all-tools path returns a sorted, 

646 duplicate-free list; the explicit path returns 

647 :func:`validate_tool_allowlist`'s order-preserving output unchanged. 

648 """ 

649 if allow_all_tools: 

650 if explicit_allowlist: 

651 raise MissionValidationError( 

652 "validation_error", 

653 details={ 

654 "field": "tool_allowlist", 

655 "reason": "allow_all_and_explicit_allowlist_mutually_exclusive", 

656 }, 

657 ) 

658 candidate = sorted(set(registered_tools) - set(control_tools)) 

659 if not candidate: 

660 raise MissionValidationError( 

661 "validation_error", 

662 details={ 

663 "field": "tool_allowlist", 

664 "reason": "allow_all_tools_empty_registry", 

665 }, 

666 ) 

667 return validate_tool_allowlist(candidate, registered_tools) 

668 return validate_tool_allowlist(explicit_allowlist or [], registered_tools, flag_lookup) 

669 

670 

671# --------------------------------------------------------------------------- 

672# Cadence 

673# --------------------------------------------------------------------------- 

674 

675 

676def validate_cadence(cadence: dict[str, Any]) -> Cadence: 

677 """Validate a checkpoint cadence dict. 

678 

679 The base ``every_iteration`` kind requires no extra keys. 

680 ``every_n_iterations`` requires a positive int ``n``. 

681 ``every_t_seconds`` requires a positive int ``t``. ``on_event`` 

682 requires a non-empty str ``event_name``. Returns a normalized dict 

683 suitable for use as a :class:`Cadence`. 

684 """ 

685 if not isinstance(cadence, dict): 685 ↛ 686line 685 didn't jump to line 686 because the condition on line 685 was never true

686 raise MissionValidationError( 

687 "validation_error", 

688 details={"field": "checkpoint_cadence", "reason": "not_a_dict"}, 

689 ) 

690 kind = cadence.get("kind") 

691 if kind not in _CADENCE_KINDS: 

692 raise MissionValidationError( 

693 "validation_error", 

694 details={ 

695 "field": "checkpoint_cadence", 

696 "reason": "kind_invalid", 

697 "allowed": sorted(_CADENCE_KINDS), 

698 }, 

699 ) 

700 normalized: dict[str, Any] = {"kind": kind} 

701 if kind == "every_n_iterations": 

702 n = cadence.get("n") 

703 if not _is_positive_int(n): 

704 raise MissionValidationError( 

705 "validation_error", 

706 details={ 

707 "field": "checkpoint_cadence", 

708 "subfield": "n", 

709 "reason": "missing_or_not_positive_int", 

710 }, 

711 ) 

712 normalized["n"] = n 

713 elif kind == "every_t_seconds": 

714 t = cadence.get("t") 

715 if not _is_positive_int(t): 715 ↛ 724line 715 didn't jump to line 724 because the condition on line 715 was always true

716 raise MissionValidationError( 

717 "validation_error", 

718 details={ 

719 "field": "checkpoint_cadence", 

720 "subfield": "t", 

721 "reason": "missing_or_not_positive_int", 

722 }, 

723 ) 

724 normalized["t"] = t 

725 elif kind == "on_event": 

726 event_name = cadence.get("event_name") 

727 if not isinstance(event_name, str) or not event_name: 

728 raise MissionValidationError( 

729 "validation_error", 

730 details={ 

731 "field": "checkpoint_cadence", 

732 "subfield": "event_name", 

733 "reason": "missing_or_empty", 

734 }, 

735 ) 

736 normalized["event_name"] = event_name 

737 # every_iteration takes no extra keys; nothing else to copy. 

738 return cast("Cadence", normalized) 

739 

740 

741# --------------------------------------------------------------------------- 

742# Strategy 

743# --------------------------------------------------------------------------- 

744 

745 

746def validate_strategy( 

747 strategy: dict[str, Any], 

748 allowlist: list[str], 

749 allow_scripts: bool, 

750) -> Strategy: 

751 """Validate a Propose_Phase Strategy dict. 

752 

753 Exactly one of ``tool_calls`` (a non-empty list) or ``script`` (a 

754 non-empty string) must be present. When ``script`` is present, 

755 ``allow_scripts`` must be ``True`` — sessions started with 

756 ``allow_scripted_strategies=False`` reject scripted proposals. The 

757 script is then handed to the sandbox AST validator 

758 (:func:`mission.sandbox.validate_script_ast`) for inspection 

759 against ``allowlist``. The sandbox module is imported lazily 

760 because it lands in a later slice; if it is missing at call time, 

761 :class:`MissionValidationError` is raised with the dedicated code 

762 ``script_sandbox_not_implemented`` so callers see a clear signal 

763 instead of an ``ImportError`` traceback. 

764 

765 Returns a normalized strategy dict carrying through the optional 

766 ``expected_observation_keys`` and ``rationale`` fields when 

767 present. 

768 """ 

769 if not isinstance(strategy, dict): 

770 raise MissionValidationError( 

771 "validation_error", 

772 details={"field": "strategy", "reason": "not_a_dict"}, 

773 ) 

774 has_tool_calls = "tool_calls" in strategy 

775 has_script = "script" in strategy 

776 if has_tool_calls == has_script: 

777 # Both present, or both absent — same error in either direction. 

778 raise MissionValidationError( 

779 "validation_error", 

780 details={ 

781 "field": "strategy", 

782 "reason": "must_have_exactly_one_of_tool_calls_or_script", 

783 }, 

784 ) 

785 

786 normalized: dict[str, Any] = {} 

787 if has_tool_calls: 

788 tool_calls = strategy["tool_calls"] 

789 if not isinstance(tool_calls, list) or not tool_calls: 

790 raise MissionValidationError( 

791 "validation_error", 

792 details={ 

793 "field": "strategy", 

794 "subfield": "tool_calls", 

795 "reason": "must_be_non_empty_list", 

796 }, 

797 ) 

798 # Shallow-copy each call dict so the caller's list/dicts stay 

799 # intact; we don't impose a deep schema on each call here 

800 # because the tool dispatcher validates the per-call args 

801 # against the registered tool's signature at execute time. 

802 normalized["tool_calls"] = [dict(call) for call in tool_calls] 

803 else: 

804 script = strategy["script"] 

805 if not isinstance(script, str) or not script: 805 ↛ 806line 805 didn't jump to line 806 because the condition on line 805 was never true

806 raise MissionValidationError( 

807 "validation_error", 

808 details={ 

809 "field": "strategy", 

810 "subfield": "script", 

811 "reason": "must_be_non_empty_string", 

812 }, 

813 ) 

814 if not allow_scripts: 

815 raise MissionValidationError( 

816 "validation_error", 

817 details={ 

818 "field": "strategy", 

819 "subfield": "script", 

820 "reason": "scripts_not_allowed_by_session", 

821 }, 

822 ) 

823 try: 

824 from mission.sandbox import ( # noqa: PLC0415 — lazy: sandbox is an optional runtime dep 

825 ScriptRejected, 

826 validate_script_ast, 

827 ) 

828 except ModuleNotFoundError as exc: 

829 raise MissionValidationError( 

830 "script_sandbox_not_implemented", 

831 details={ 

832 "hint": "scripted strategies require the sandbox module", 

833 }, 

834 ) from exc 

835 try: 

836 validate_script_ast(script, allowlist) 

837 except ScriptRejected as exc: 

838 # Translate the sandbox-level rejection into our structured 

839 # MissionValidationError so every operator-input rejection 

840 # comes back through the same exception type. The sandbox's 

841 # stable ``reason`` token, line, and column carry through 

842 # so callers can render a precise error. 

843 raise MissionValidationError( 

844 "validation_error", 

845 details={ 

846 "field": "strategy", 

847 "subfield": "script", 

848 "reason": exc.reason, 

849 "lineno": exc.lineno, 

850 "col_offset": exc.col_offset, 

851 }, 

852 ) from exc 

853 normalized["script"] = script 

854 

855 # Carry through the two optional pass-through fields when present. 

856 if "expected_observation_keys" in strategy: 

857 keys = strategy["expected_observation_keys"] 

858 if not isinstance(keys, list) or not all(isinstance(k, str) for k in keys): 858 ↛ 867line 858 didn't jump to line 867 because the condition on line 858 was always true

859 raise MissionValidationError( 

860 "validation_error", 

861 details={ 

862 "field": "strategy", 

863 "subfield": "expected_observation_keys", 

864 "reason": "must_be_list_of_strings", 

865 }, 

866 ) 

867 normalized["expected_observation_keys"] = list(keys) 

868 if "rationale" in strategy: 

869 rationale = strategy["rationale"] 

870 if not isinstance(rationale, str): 870 ↛ 871line 870 didn't jump to line 871 because the condition on line 870 was never true

871 raise MissionValidationError( 

872 "validation_error", 

873 details={ 

874 "field": "strategy", 

875 "subfield": "rationale", 

876 "reason": "not_a_string", 

877 }, 

878 ) 

879 normalized["rationale"] = rationale 

880 return cast("Strategy", normalized) 

881 

882 

883# --------------------------------------------------------------------------- 

884# JSON-safety strippers 

885# --------------------------------------------------------------------------- 

886# 

887# Why these live here rather than next to the persistence backend or 

888# next to each call site: the only key that needs stripping today is 

889# ``_parsed_ast``, which is also created here (by ``validate_criteria`` 

890# attaching the cached :class:`ast.Expression` to predicate criteria). 

891# Putting the strippers next to the producer keeps the lifecycle 

892# obvious — anyone who reads ``validate_criteria`` sees the matching 

893# ``strip_private_fields`` helper one screen down. 

894# 

895# Three earlier slices each had their own near-duplicate implementation 

896# (``cli/commands/mission_cmd.py::_strip_private_criteria``, 

897# ``mcp/tools/mission.py::_strip_private_fields`` plus the iterations 

898# variant, ``mcp/resources/mission.py::_strip_private_fields``). Those 

899# now delegate here so a single source of truth governs the JSON-safety 

900# contract. 

901 

902# Sentinel marking which keys count as "private" — anything starting 

903# with an underscore. ``ast.Expression`` is the only object the 

904# validators currently attach, but the rule is intentionally broad so 

905# a future cache (a normalised JSON-Pointer for the metric path, a 

906# pre-resolved tool-tag set) can ride on the same convention without 

907# breaking persistence. 

908_PRIVATE_PREFIX: Final[str] = "_" 

909 

910 

911def _is_public_key(key: Any) -> bool: 

912 """Return True iff ``key`` is a non-private dict key.""" 

913 return not str(key).startswith(_PRIVATE_PREFIX) 

914 

915 

916def _strip_private_dict(d: Mapping[str, Any]) -> dict[str, Any]: 

917 """Return a shallow copy of ``d`` with private keys removed.""" 

918 return {k: v for k, v in d.items() if _is_public_key(k)} 

919 

920 

921def strip_private_fields(session: Mapping[str, Any]) -> dict[str, Any]: 

922 """Return a JSON-safe copy of ``session`` with private criterion keys dropped. 

923 

924 Walks ``session["criteria"]`` and ``session["iterations"]`` and 

925 drops any leading-underscore keys from each Criterion dict and 

926 each ``criteria_evaluation`` entry on each iteration. Other 

927 fields pass through verbatim — the strip is intentionally narrow 

928 so a future field that legitimately starts with an underscore 

929 (e.g. ``_meta`` for backwards compatibility) doesn't get 

930 silently eaten outside the criterion / criterion-eval shapes. 

931 

932 Args: 

933 session: Any session-shaped mapping; usually a 

934 :class:`SessionState` ``TypedDict`` but the function is 

935 duck-typed against ``Mapping[str, Any]`` so callers can 

936 pass a partial session under construction without first 

937 casting to the full type. 

938 

939 Returns: 

940 A shallow copy of ``session`` with the criterion and 

941 criterion-eval shapes cleaned. The original is never mutated. 

942 """ 

943 cleaned: dict[str, Any] = dict(session) 

944 criteria = cleaned.get("criteria") 

945 if isinstance(criteria, list): 

946 cleaned["criteria"] = [ 

947 _strip_private_dict(c) if isinstance(c, Mapping) else c for c in criteria 

948 ] 

949 iterations = cleaned.get("iterations") 

950 if isinstance(iterations, list): 

951 cleaned["iterations"] = strip_private_fields_iterations(iterations) 

952 return cleaned 

953 

954 

955def strip_private_fields_iterations( 

956 iterations: Sequence[Mapping[str, Any]], 

957) -> list[dict[str, Any]]: 

958 """Strip private keys from each iteration's ``criteria_evaluation`` shape. 

959 

960 The Decide_Phase appends ``CriterionResult`` entries under 

961 ``iteration["criteria_evaluation"]``. When a criterion is a 

962 ``predicate``, the entry carries the same ``_parsed_ast`` cache 

963 as the source criterion. Drop those keys so the iteration 

964 history is JSON-safe. 

965 

966 Args: 

967 iterations: A sequence of iteration dicts. Non-dict entries 

968 (which shouldn't appear in a typed iteration list, but 

969 could surface from a corrupt on-disk file) pass through 

970 verbatim so the caller can still observe the corruption. 

971 

972 Returns: 

973 A new list of shallow-copied iteration dicts. The originals 

974 are never mutated. 

975 """ 

976 out: list[dict[str, Any]] = [] 

977 for iteration in iterations: 

978 if not isinstance(iteration, Mapping): 

979 out.append(cast("dict[str, Any]", iteration)) 

980 continue 

981 copy = dict(iteration) 

982 evals = copy.get("criteria_evaluation") 

983 if isinstance(evals, list): 

984 copy["criteria_evaluation"] = [ 

985 _strip_private_dict(e) if isinstance(e, Mapping) else e for e in evals 

986 ] 

987 out.append(copy) 

988 return out