Coverage for mcp/mission/validation.py: 93%
245 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Shared validators for Mission session inputs.
3Every Mission entry point — the MCP tools, the CLI subcommands, the engine's
4session loader — feeds operator-supplied JSON through this module before it
5ever touches state. The validators are intentionally pure: no I/O, no clocks,
6no environment lookups. The caller passes whatever external context is
7needed (the FastMCP tool catalog, the per-tool tag sets, the gating
8feature-flag lookup) as plain arguments. This keeps the validators trivial
9to unit-test and makes them safe to call from both async tool handlers and
10synchronous CLI code.
12Design notes:
14* Validators **return new normalized values**; they never mutate their
15 inputs. The :func:`validate_criteria` case attaches a cached parsed AST
16 under the private key ``_parsed_ast`` on each ``predicate`` criterion;
17 the original input dict is left untouched and a shallow copy carries the
18 added key.
20* Every rejection raises :class:`MissionValidationError` with a stable
21 short ``code`` (e.g. ``"validation_error"``) and a structured
22 ``details`` dict whose ``field`` key identifies the input that failed.
23 Tool wrappers render ``code`` and ``details`` as a structured FastMCP
24 tool error so clients can surface them without text parsing.
26* The script-strategy path forward-declares the sandbox: scripted
27 strategies are out of scope for this module and the sandbox module
28 lands in a later slice. The lazy import inside
29 :func:`validate_strategy` tolerates the missing module by raising a
30 dedicated ``script_sandbox_not_implemented`` code, so callers that hit
31 this path get a clear signal rather than an ``ImportError`` traceback.
32"""
34from __future__ import annotations
36from collections.abc import Collection, Mapping, Sequence
37from typing import Any, Final, cast
39from . import predicate
40from .types import (
41 BudgetControls,
42 Cadence,
43 Criterion,
44 Strategy,
45)
47# ---------------------------------------------------------------------------
48# Public exception
49# ---------------------------------------------------------------------------
52class MissionValidationError(Exception):
53 """Raised when a validator rejects an input.
55 Carries a stable short ``code`` and an optional structured ``details``
56 dict. FastMCP tool wrappers convert this into a structured tool-error
57 response; CLI handlers print ``code`` plus the ``details`` JSON.
59 The constructor accepts ``(code, details=None, *, message=None)``.
60 When ``message`` is not provided, the exception's string form falls
61 back to ``code`` so logs always show something meaningful.
62 """
64 def __init__(
65 self,
66 code: str,
67 details: dict[str, Any] | None = None,
68 *,
69 message: str | None = None,
70 ) -> None:
71 self.code: str = code
72 self.details: dict[str, Any] | None = details
73 rendered = message if message is not None else code
74 super().__init__(rendered)
77# ---------------------------------------------------------------------------
78# Constants
79# ---------------------------------------------------------------------------
81_DIRECTIVE_MAX_LEN: Final[int] = 8192
82"""The hard cap on directive_text length, in characters."""
84_CRITERION_KINDS: Final[frozenset[str]] = frozenset(
85 {"metric_threshold", "event", "predicate", "tool_call_succeeded", "metric_trend"}
86)
87"""The five valid Criterion ``kind`` values."""
89_METRIC_OPS: Final[frozenset[str]] = frozenset({"<", "<=", ">", ">=", "==", "!="})
90"""The six valid comparison operators on a ``metric_threshold`` criterion."""
92_METRIC_TREND_DIRECTIONS: Final[frozenset[str]] = frozenset(
93 {"decreasing", "increasing", "non_increasing", "non_decreasing"}
94)
95"""The four valid trend directions on a ``metric_trend`` criterion."""
97_CADENCE_KINDS: Final[frozenset[str]] = frozenset(
98 {"every_iteration", "every_n_iterations", "every_t_seconds", "on_event"}
99)
100"""The four valid Cadence ``kind`` values."""
103# ---------------------------------------------------------------------------
104# Helpers
105# ---------------------------------------------------------------------------
108def _is_positive_int(value: Any) -> bool:
109 """Return True iff ``value`` is an int (not bool) and strictly > 0."""
110 # bool is a subclass of int; reject it explicitly so True/False cannot
111 # silently masquerade as a positive integer count.
112 return isinstance(value, int) and not isinstance(value, bool) and value > 0
115def _is_positive_int_or_uncapped(value: Any) -> bool:
116 """Return True iff ``value`` is a strictly-positive int OR the sentinel ``-1``.
118 The Mission budget caps (``max_iterations``, ``max_wall_clock_seconds``)
119 accept ``-1`` as an explicit "uncapped" sentinel. Any other negative
120 integer, zero, non-integer, or bool is rejected — the operator must
121 pick exactly one of: a positive cap, or the explicit ``-1`` opt-out.
122 Allowing zero would silently terminate every session on iteration 1
123 / second 0; allowing arbitrary negatives would mask typos.
124 """
125 if isinstance(value, bool):
126 return False
127 if not isinstance(value, int):
128 return False
129 return value > 0 or value == -1
132def _is_number(value: Any) -> bool:
133 """Return True iff ``value`` is an int or float (not bool)."""
134 return isinstance(value, (int, float)) and not isinstance(value, bool)
137# ---------------------------------------------------------------------------
138# Directive
139# ---------------------------------------------------------------------------
142def validate_directive(text: str) -> str:
143 """Trim and validate a directive string.
145 The directive is the operator-supplied natural-language goal. It must
146 be a non-empty string (after stripping leading/trailing whitespace)
147 and must fit within :data:`_DIRECTIVE_MAX_LEN` characters. Returns
148 the trimmed string. Raises :class:`MissionValidationError` with
149 ``code="validation_error"`` on rejection.
150 """
151 if not isinstance(text, str):
152 raise MissionValidationError(
153 "validation_error",
154 details={"field": "directive", "reason": "not_a_string"},
155 )
156 trimmed = text.strip()
157 if not trimmed:
158 raise MissionValidationError(
159 "validation_error",
160 details={"field": "directive", "reason": "empty"},
161 )
162 if len(trimmed) > _DIRECTIVE_MAX_LEN:
163 raise MissionValidationError(
164 "validation_error",
165 details={
166 "field": "directive",
167 "reason": "too_long",
168 "max_length": _DIRECTIVE_MAX_LEN,
169 "actual_length": len(trimmed),
170 },
171 )
172 return trimmed
175# ---------------------------------------------------------------------------
176# Criteria
177# ---------------------------------------------------------------------------
180def _validate_metric_threshold(entry: dict[str, Any], criterion_id: str) -> None:
181 """Check the kind-specific keys for a ``metric_threshold`` criterion."""
182 metric = entry.get("metric")
183 if not isinstance(metric, str) or not metric:
184 raise MissionValidationError(
185 "validation_error",
186 details={
187 "field": "criteria",
188 "criterion_id": criterion_id,
189 "reason": "metric_missing_or_invalid",
190 },
191 )
192 op = entry.get("op")
193 if op not in _METRIC_OPS:
194 raise MissionValidationError(
195 "validation_error",
196 details={
197 "field": "criteria",
198 "criterion_id": criterion_id,
199 "reason": "op_invalid",
200 "allowed": sorted(_METRIC_OPS),
201 },
202 )
203 target = entry.get("target")
204 if not _is_number(target):
205 raise MissionValidationError(
206 "validation_error",
207 details={
208 "field": "criteria",
209 "criterion_id": criterion_id,
210 "reason": "target_not_a_number",
211 },
212 )
215def _validate_metric_trend(entry: dict[str, Any], criterion_id: str) -> None:
216 """Check the kind-specific keys for a ``metric_trend`` criterion.
218 Required: ``metric`` (non-empty dot-path string) and ``direction`` (one of
219 the four :data:`_METRIC_TREND_DIRECTIONS`). Optional: ``window`` (positive
220 int — how many of the most-recent points to consider) and ``min_points``
221 (positive int — the minimum number of numeric points required before the
222 criterion decides met/unmet rather than inconclusive).
224 Unlike ``metric_threshold`` this kind has no ``op``/``target``: the
225 comparison is "where did the metric go over the window?", evaluated by
226 :meth:`MissionEngine._evaluate_metric_trend` against the cumulative metric
227 history the engine accumulates across iterations.
228 """
229 metric = entry.get("metric")
230 if not isinstance(metric, str) or not metric:
231 raise MissionValidationError(
232 "validation_error",
233 details={
234 "field": "criteria",
235 "criterion_id": criterion_id,
236 "reason": "metric_missing_or_invalid",
237 },
238 )
239 direction = entry.get("direction")
240 if direction not in _METRIC_TREND_DIRECTIONS:
241 raise MissionValidationError(
242 "validation_error",
243 details={
244 "field": "criteria",
245 "criterion_id": criterion_id,
246 "reason": "direction_invalid",
247 "allowed": sorted(_METRIC_TREND_DIRECTIONS),
248 },
249 )
250 # ``window`` and ``min_points`` are optional, but when present each must be
251 # a strictly-positive int (bool rejected). A missing value lets the engine
252 # apply its defaults (window = all points; min_points = 2).
253 if "window" in entry and not _is_positive_int(entry.get("window")):
254 raise MissionValidationError(
255 "validation_error",
256 details={
257 "field": "criteria",
258 "criterion_id": criterion_id,
259 "reason": "window_must_be_positive_int",
260 },
261 )
262 if "min_points" in entry and not _is_positive_int(entry.get("min_points")):
263 raise MissionValidationError(
264 "validation_error",
265 details={
266 "field": "criteria",
267 "criterion_id": criterion_id,
268 "reason": "min_points_must_be_positive_int",
269 },
270 )
273def _validate_event_criterion(entry: dict[str, Any], criterion_id: str) -> None:
274 """Check the kind-specific keys for an ``event`` criterion."""
275 event_name = entry.get("event_name")
276 if not isinstance(event_name, str) or not event_name:
277 raise MissionValidationError(
278 "validation_error",
279 details={
280 "field": "criteria",
281 "criterion_id": criterion_id,
282 "reason": "event_name_missing_or_invalid",
283 },
284 )
287def _validate_predicate_criterion(entry: dict[str, Any], criterion_id: str) -> Any:
288 """Check the kind-specific keys for a ``predicate`` criterion.
290 Returns the parsed AST so the caller can attach it under
291 ``_parsed_ast`` on the normalized copy.
292 """
293 expression = entry.get("expression")
294 if not isinstance(expression, str) or not expression:
295 raise MissionValidationError(
296 "validation_error",
297 details={
298 "field": "criteria",
299 "criterion_id": criterion_id,
300 "reason": "expression_missing_or_invalid",
301 },
302 )
303 try:
304 return predicate.parse_predicate(expression)
305 except predicate.PredicateRejected as exc:
306 raise MissionValidationError(
307 "validation_error",
308 details={
309 "field": "criteria",
310 "criterion_id": criterion_id,
311 "reason": exc.reason,
312 "lineno": exc.lineno,
313 "col_offset": exc.col_offset,
314 },
315 ) from exc
318def _validate_tool_call_succeeded(entry: dict[str, Any], criterion_id: str) -> None:
319 """Check the kind-specific keys for a ``tool_call_succeeded`` criterion.
321 Required: ``tool_name`` (non-empty str). Optional: ``min_count``
322 (positive int; default 1). The criterion is met when the
323 Observation's ``tool_results`` list contains at least
324 ``min_count`` entries whose ``tool_name`` field equals
325 ``tool_name`` and whose ``_status`` equals ``"ok"``.
327 This kind exists so the most common Mission goal — "this tool
328 ran and succeeded N times" — does not require the operator (or
329 a sampling model) to write a Python predicate. It is a strict
330 subset of what ``predicate`` can express, but the engine
331 evaluates it server-side without going through the AST sandbox,
332 so the validator never needs to reason about syntax errors,
333 method-call shapes, or attribute walks for this case.
334 """
335 tool_name = entry.get("tool_name")
336 if not isinstance(tool_name, str) or not tool_name:
337 raise MissionValidationError(
338 "validation_error",
339 details={
340 "field": "criteria",
341 "criterion_id": criterion_id,
342 "reason": "tool_name_missing_or_invalid",
343 },
344 )
345 # ``min_count`` is optional; default 1 (any successful call).
346 if "min_count" in entry:
347 min_count = entry.get("min_count")
348 # bool is a subclass of int — reject explicitly so True/False cannot
349 # masquerade as 1/0 and silently pass through.
350 if isinstance(min_count, bool) or not isinstance(min_count, int) or min_count < 1:
351 raise MissionValidationError(
352 "validation_error",
353 details={
354 "field": "criteria",
355 "criterion_id": criterion_id,
356 "reason": "min_count_must_be_positive_int",
357 },
358 )
361def validate_criteria(criteria: list[dict[str, Any]]) -> list[Criterion]:
362 """Validate a list of criteria and attach cached predicate ASTs.
364 Required keys on every entry: ``criterion_id`` (non-empty str),
365 ``kind`` (one of the :class:`CriterionKind` values), and
366 ``required`` (bool). Each entry must also provide the kind-specific
367 keys: ``metric``/``op``/``target`` for ``metric_threshold``,
368 ``metric``/``direction`` (plus optional ``window``/``min_points``) for
369 ``metric_trend``, ``event_name`` for ``event``, ``tool_name`` for
370 ``tool_call_succeeded``, and ``expression`` for ``predicate``.
372 The ``criterion_id`` must be unique across the list. For each
373 ``predicate`` entry, the expression is parsed via
374 :func:`predicate.parse_predicate` and the resulting AST is cached
375 under the private key ``_parsed_ast`` on a shallow copy of the
376 entry. Returns the normalized list. The original input dicts are
377 not mutated.
378 """
379 if not isinstance(criteria, list): 379 ↛ 380line 379 didn't jump to line 380 because the condition on line 379 was never true
380 raise MissionValidationError(
381 "validation_error",
382 details={"field": "criteria", "reason": "not_a_list"},
383 )
384 if not criteria:
385 raise MissionValidationError(
386 "validation_error",
387 details={"field": "criteria", "reason": "empty"},
388 )
389 seen_ids: set[str] = set()
390 normalized: list[Criterion] = []
391 for index, entry in enumerate(criteria):
392 if not isinstance(entry, dict): 392 ↛ 393line 392 didn't jump to line 393 because the condition on line 392 was never true
393 raise MissionValidationError(
394 "validation_error",
395 details={
396 "field": "criteria",
397 "index": index,
398 "reason": "not_a_dict",
399 },
400 )
401 criterion_id = entry.get("criterion_id")
402 if not isinstance(criterion_id, str) or not criterion_id: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true
403 raise MissionValidationError(
404 "validation_error",
405 details={
406 "field": "criteria",
407 "index": index,
408 "reason": "criterion_id_missing_or_invalid",
409 },
410 )
411 if criterion_id in seen_ids:
412 raise MissionValidationError(
413 "validation_error",
414 details={
415 "field": "criteria",
416 "criterion_id": criterion_id,
417 "reason": "duplicate_criterion_id",
418 },
419 )
420 seen_ids.add(criterion_id)
421 kind = entry.get("kind")
422 if kind not in _CRITERION_KINDS:
423 raise MissionValidationError(
424 "validation_error",
425 details={
426 "field": "criteria",
427 "criterion_id": criterion_id,
428 "reason": "kind_invalid",
429 "allowed": sorted(_CRITERION_KINDS),
430 },
431 )
432 if not isinstance(entry.get("required"), bool): 432 ↛ 433line 432 didn't jump to line 433 because the condition on line 432 was never true
433 raise MissionValidationError(
434 "validation_error",
435 details={
436 "field": "criteria",
437 "criterion_id": criterion_id,
438 "reason": "required_missing_or_not_a_bool",
439 },
440 )
441 # Build a shallow copy so we never mutate the caller's dict; we
442 # may need to attach _parsed_ast and we want the input to stay
443 # exactly as it was passed in.
444 normalized_entry: dict[str, Any] = dict(entry)
445 if kind == "metric_threshold":
446 _validate_metric_threshold(entry, criterion_id)
447 elif kind == "metric_trend":
448 _validate_metric_trend(entry, criterion_id)
449 elif kind == "event":
450 _validate_event_criterion(entry, criterion_id)
451 elif kind == "tool_call_succeeded":
452 _validate_tool_call_succeeded(entry, criterion_id)
453 else: # kind == "predicate"
454 parsed = _validate_predicate_criterion(entry, criterion_id)
455 normalized_entry["_parsed_ast"] = parsed
456 normalized.append(cast("Criterion", normalized_entry))
457 return normalized
460# ---------------------------------------------------------------------------
461# Budget
462# ---------------------------------------------------------------------------
465def validate_budget(
466 budget: dict[str, Any],
467 allowlist: list[str],
468 registered_tags: dict[str, set[str]],
469) -> BudgetControls:
470 """Validate a budget dict.
472 Required keys: ``max_iterations`` and ``max_wall_clock_seconds``.
473 Each accepts either a strictly-positive int OR the explicit
474 sentinel ``-1`` ("uncapped"). The operator must pick one;
475 omitting the key, passing zero, passing any other negative
476 number, or passing a non-integer is rejected. **At least one** of
477 the two caps must be a positive int — both being ``-1`` would be
478 a runaway loop with no axis-driven termination, so the validator
479 rejects that combination eagerly with
480 ``reason="at_least_one_cap_required"``.
482 Cost guardrails live out-of-band — Mission only enforces caps the
483 loop has direct visibility into. ``allowlist`` and
484 ``registered_tags`` are kept on the signature for API stability
485 so existing callers don't have to change shape; both are unused.
486 Returns a normalized dict suitable for use as a
487 :class:`BudgetControls`.
488 """
489 del allowlist, registered_tags # accepted for API stability; unused
490 if not isinstance(budget, dict): 490 ↛ 491line 490 didn't jump to line 491 because the condition on line 490 was never true
491 raise MissionValidationError(
492 "validation_error",
493 details={"field": "budget", "reason": "not_a_dict"},
494 )
495 max_iterations = budget.get("max_iterations")
496 if not _is_positive_int_or_uncapped(max_iterations):
497 raise MissionValidationError(
498 "validation_error",
499 details={
500 "field": "budget",
501 "subfield": "max_iterations",
502 "reason": "missing_or_not_positive_int_or_minus_one",
503 },
504 )
505 max_wall = budget.get("max_wall_clock_seconds")
506 if not _is_positive_int_or_uncapped(max_wall):
507 raise MissionValidationError(
508 "validation_error",
509 details={
510 "field": "budget",
511 "subfield": "max_wall_clock_seconds",
512 "reason": "missing_or_not_positive_int_or_minus_one",
513 },
514 )
515 normalized: dict[str, Any] = {
516 "max_iterations": max_iterations,
517 "max_wall_clock_seconds": max_wall,
518 }
519 return cast("BudgetControls", normalized)
522# ---------------------------------------------------------------------------
523# Tool allowlist
524# ---------------------------------------------------------------------------
527def validate_tool_allowlist(
528 allowlist: list[str],
529 registered_tools: dict[str, Any],
530 flag_lookup: dict[str, str] | None = None,
531) -> list[str]:
532 """Validate that every name in the allowlist is currently registered.
534 ``registered_tools`` is a structural mapping from tool name to the
535 tool object (FastMCP's ``Tool`` type, but typed loosely here so the
536 module imports cleanly without the optional FastMCP dependency).
537 Only the dict keys are read.
539 When a name is missing from ``registered_tools``, the validator
540 raises :class:`MissionValidationError`. If ``flag_lookup`` is
541 provided and contains the missing tool's name, the rejection's
542 ``details.flag`` field carries the gating feature-flag name (so
543 the operator can be told *why* the tool is currently absent —
544 typically because its feature flag is unset). Otherwise the
545 rejection carries ``details.tool_name`` only.
546 """
547 if not isinstance(allowlist, list): 547 ↛ 548line 547 didn't jump to line 548 because the condition on line 547 was never true
548 raise MissionValidationError(
549 "validation_error",
550 details={"field": "tool_allowlist", "reason": "not_a_list"},
551 )
552 if not allowlist:
553 raise MissionValidationError(
554 "validation_error",
555 details={"field": "tool_allowlist", "reason": "empty"},
556 )
557 seen: set[str] = set()
558 normalized: list[str] = []
559 for index, name in enumerate(allowlist):
560 if not isinstance(name, str) or not name: 560 ↛ 561line 560 didn't jump to line 561 because the condition on line 560 was never true
561 raise MissionValidationError(
562 "validation_error",
563 details={
564 "field": "tool_allowlist",
565 "index": index,
566 "reason": "tool_name_missing_or_invalid",
567 },
568 )
569 if name in seen:
570 raise MissionValidationError(
571 "validation_error",
572 details={
573 "field": "tool_allowlist",
574 "tool_name": name,
575 "reason": "duplicate_tool_name",
576 },
577 )
578 seen.add(name)
579 if name not in registered_tools:
580 details: dict[str, Any] = {
581 "field": "tool_allowlist",
582 "tool_name": name,
583 "reason": "tool_not_registered",
584 }
585 if flag_lookup is not None and name in flag_lookup:
586 details["flag"] = flag_lookup[name]
587 raise MissionValidationError("validation_error", details=details)
588 normalized.append(name)
589 return normalized
592# The nine session-management tool names. They are excluded from an
593# all-tools expansion so a session can never resolve an allowlist that lets
594# it recursively invoke the tools that start, drive, and tear down sessions.
595# This constant is the default exclusion set for
596# :func:`resolve_effective_allowlist`; callers holding a live tag map may pass
597# their own equivalent set instead.
598MISSION_CONTROL_TOOLS: frozenset[str] = frozenset(
599 {
600 "mission_start",
601 "mission_status",
602 "mission_iterate",
603 "mission_checkpoint",
604 "mission_complete",
605 "mission_abort",
606 "mission_resume",
607 "mission_history",
608 "mission_list",
609 }
610)
611"""The nine control-tool names excluded from an all-tools expansion."""
614def resolve_effective_allowlist(
615 *,
616 allow_all_tools: bool,
617 explicit_allowlist: list[str] | None,
618 registered_tools: dict[str, Any],
619 control_tools: Collection[str] = MISSION_CONTROL_TOOLS,
620 flag_lookup: dict[str, str] | None = None,
621) -> list[str]:
622 """Resolve a session's effective tool allowlist.
624 Pure: no I/O, no clocks, no environment lookups. The caller passes the
625 currently-registered tool names (``registered_tools`` — only the dict keys
626 are read) and the set of control-tool names to exclude from an all-tools
627 expansion (``control_tools``, defaulting to :data:`MISSION_CONTROL_TOOLS`).
629 Behaviour:
631 * When ``allow_all_tools`` is True and ``explicit_allowlist`` is non-empty,
632 the two inputs conflict, so the function raises
633 :class:`MissionValidationError` with
634 ``details.reason == "allow_all_and_explicit_allowlist_mutually_exclusive"``.
635 * When ``allow_all_tools`` is True and no explicit list is supplied, the
636 candidate is ``sorted(set(registered_tools) - set(control_tools))``. An
637 empty candidate (nothing registered, or only control tools registered)
638 raises ``details.reason == "allow_all_tools_empty_registry"``. Otherwise
639 the candidate is passed through :func:`validate_tool_allowlist` so the
640 resolved list satisfies every invariant an operator-supplied list would.
641 * When ``allow_all_tools`` is False, the call delegates to
642 :func:`validate_tool_allowlist` over ``explicit_allowlist or []``,
643 preserving its existing ``empty`` rejection on an empty/absent list.
645 Returns the normalized allowlist. The all-tools path returns a sorted,
646 duplicate-free list; the explicit path returns
647 :func:`validate_tool_allowlist`'s order-preserving output unchanged.
648 """
649 if allow_all_tools:
650 if explicit_allowlist:
651 raise MissionValidationError(
652 "validation_error",
653 details={
654 "field": "tool_allowlist",
655 "reason": "allow_all_and_explicit_allowlist_mutually_exclusive",
656 },
657 )
658 candidate = sorted(set(registered_tools) - set(control_tools))
659 if not candidate:
660 raise MissionValidationError(
661 "validation_error",
662 details={
663 "field": "tool_allowlist",
664 "reason": "allow_all_tools_empty_registry",
665 },
666 )
667 return validate_tool_allowlist(candidate, registered_tools)
668 return validate_tool_allowlist(explicit_allowlist or [], registered_tools, flag_lookup)
671# ---------------------------------------------------------------------------
672# Cadence
673# ---------------------------------------------------------------------------
676def validate_cadence(cadence: dict[str, Any]) -> Cadence:
677 """Validate a checkpoint cadence dict.
679 The base ``every_iteration`` kind requires no extra keys.
680 ``every_n_iterations`` requires a positive int ``n``.
681 ``every_t_seconds`` requires a positive int ``t``. ``on_event``
682 requires a non-empty str ``event_name``. Returns a normalized dict
683 suitable for use as a :class:`Cadence`.
684 """
685 if not isinstance(cadence, dict): 685 ↛ 686line 685 didn't jump to line 686 because the condition on line 685 was never true
686 raise MissionValidationError(
687 "validation_error",
688 details={"field": "checkpoint_cadence", "reason": "not_a_dict"},
689 )
690 kind = cadence.get("kind")
691 if kind not in _CADENCE_KINDS:
692 raise MissionValidationError(
693 "validation_error",
694 details={
695 "field": "checkpoint_cadence",
696 "reason": "kind_invalid",
697 "allowed": sorted(_CADENCE_KINDS),
698 },
699 )
700 normalized: dict[str, Any] = {"kind": kind}
701 if kind == "every_n_iterations":
702 n = cadence.get("n")
703 if not _is_positive_int(n):
704 raise MissionValidationError(
705 "validation_error",
706 details={
707 "field": "checkpoint_cadence",
708 "subfield": "n",
709 "reason": "missing_or_not_positive_int",
710 },
711 )
712 normalized["n"] = n
713 elif kind == "every_t_seconds":
714 t = cadence.get("t")
715 if not _is_positive_int(t): 715 ↛ 724line 715 didn't jump to line 724 because the condition on line 715 was always true
716 raise MissionValidationError(
717 "validation_error",
718 details={
719 "field": "checkpoint_cadence",
720 "subfield": "t",
721 "reason": "missing_or_not_positive_int",
722 },
723 )
724 normalized["t"] = t
725 elif kind == "on_event":
726 event_name = cadence.get("event_name")
727 if not isinstance(event_name, str) or not event_name:
728 raise MissionValidationError(
729 "validation_error",
730 details={
731 "field": "checkpoint_cadence",
732 "subfield": "event_name",
733 "reason": "missing_or_empty",
734 },
735 )
736 normalized["event_name"] = event_name
737 # every_iteration takes no extra keys; nothing else to copy.
738 return cast("Cadence", normalized)
741# ---------------------------------------------------------------------------
742# Strategy
743# ---------------------------------------------------------------------------
746def validate_strategy(
747 strategy: dict[str, Any],
748 allowlist: list[str],
749 allow_scripts: bool,
750) -> Strategy:
751 """Validate a Propose_Phase Strategy dict.
753 Exactly one of ``tool_calls`` (a non-empty list) or ``script`` (a
754 non-empty string) must be present. When ``script`` is present,
755 ``allow_scripts`` must be ``True`` — sessions started with
756 ``allow_scripted_strategies=False`` reject scripted proposals. The
757 script is then handed to the sandbox AST validator
758 (:func:`mission.sandbox.validate_script_ast`) for inspection
759 against ``allowlist``. The sandbox module is imported lazily
760 because it lands in a later slice; if it is missing at call time,
761 :class:`MissionValidationError` is raised with the dedicated code
762 ``script_sandbox_not_implemented`` so callers see a clear signal
763 instead of an ``ImportError`` traceback.
765 Returns a normalized strategy dict carrying through the optional
766 ``expected_observation_keys`` and ``rationale`` fields when
767 present.
768 """
769 if not isinstance(strategy, dict):
770 raise MissionValidationError(
771 "validation_error",
772 details={"field": "strategy", "reason": "not_a_dict"},
773 )
774 has_tool_calls = "tool_calls" in strategy
775 has_script = "script" in strategy
776 if has_tool_calls == has_script:
777 # Both present, or both absent — same error in either direction.
778 raise MissionValidationError(
779 "validation_error",
780 details={
781 "field": "strategy",
782 "reason": "must_have_exactly_one_of_tool_calls_or_script",
783 },
784 )
786 normalized: dict[str, Any] = {}
787 if has_tool_calls:
788 tool_calls = strategy["tool_calls"]
789 if not isinstance(tool_calls, list) or not tool_calls:
790 raise MissionValidationError(
791 "validation_error",
792 details={
793 "field": "strategy",
794 "subfield": "tool_calls",
795 "reason": "must_be_non_empty_list",
796 },
797 )
798 # Shallow-copy each call dict so the caller's list/dicts stay
799 # intact; we don't impose a deep schema on each call here
800 # because the tool dispatcher validates the per-call args
801 # against the registered tool's signature at execute time.
802 normalized["tool_calls"] = [dict(call) for call in tool_calls]
803 else:
804 script = strategy["script"]
805 if not isinstance(script, str) or not script: 805 ↛ 806line 805 didn't jump to line 806 because the condition on line 805 was never true
806 raise MissionValidationError(
807 "validation_error",
808 details={
809 "field": "strategy",
810 "subfield": "script",
811 "reason": "must_be_non_empty_string",
812 },
813 )
814 if not allow_scripts:
815 raise MissionValidationError(
816 "validation_error",
817 details={
818 "field": "strategy",
819 "subfield": "script",
820 "reason": "scripts_not_allowed_by_session",
821 },
822 )
823 try:
824 from mission.sandbox import ( # noqa: PLC0415 — lazy: sandbox is an optional runtime dep
825 ScriptRejected,
826 validate_script_ast,
827 )
828 except ModuleNotFoundError as exc:
829 raise MissionValidationError(
830 "script_sandbox_not_implemented",
831 details={
832 "hint": "scripted strategies require the sandbox module",
833 },
834 ) from exc
835 try:
836 validate_script_ast(script, allowlist)
837 except ScriptRejected as exc:
838 # Translate the sandbox-level rejection into our structured
839 # MissionValidationError so every operator-input rejection
840 # comes back through the same exception type. The sandbox's
841 # stable ``reason`` token, line, and column carry through
842 # so callers can render a precise error.
843 raise MissionValidationError(
844 "validation_error",
845 details={
846 "field": "strategy",
847 "subfield": "script",
848 "reason": exc.reason,
849 "lineno": exc.lineno,
850 "col_offset": exc.col_offset,
851 },
852 ) from exc
853 normalized["script"] = script
855 # Carry through the two optional pass-through fields when present.
856 if "expected_observation_keys" in strategy:
857 keys = strategy["expected_observation_keys"]
858 if not isinstance(keys, list) or not all(isinstance(k, str) for k in keys): 858 ↛ 867line 858 didn't jump to line 867 because the condition on line 858 was always true
859 raise MissionValidationError(
860 "validation_error",
861 details={
862 "field": "strategy",
863 "subfield": "expected_observation_keys",
864 "reason": "must_be_list_of_strings",
865 },
866 )
867 normalized["expected_observation_keys"] = list(keys)
868 if "rationale" in strategy:
869 rationale = strategy["rationale"]
870 if not isinstance(rationale, str): 870 ↛ 871line 870 didn't jump to line 871 because the condition on line 870 was never true
871 raise MissionValidationError(
872 "validation_error",
873 details={
874 "field": "strategy",
875 "subfield": "rationale",
876 "reason": "not_a_string",
877 },
878 )
879 normalized["rationale"] = rationale
880 return cast("Strategy", normalized)
883# ---------------------------------------------------------------------------
884# JSON-safety strippers
885# ---------------------------------------------------------------------------
886#
887# Why these live here rather than next to the persistence backend or
888# next to each call site: the only key that needs stripping today is
889# ``_parsed_ast``, which is also created here (by ``validate_criteria``
890# attaching the cached :class:`ast.Expression` to predicate criteria).
891# Putting the strippers next to the producer keeps the lifecycle
892# obvious — anyone who reads ``validate_criteria`` sees the matching
893# ``strip_private_fields`` helper one screen down.
894#
895# Three earlier slices each had their own near-duplicate implementation
896# (``cli/commands/mission_cmd.py::_strip_private_criteria``,
897# ``mcp/tools/mission.py::_strip_private_fields`` plus the iterations
898# variant, ``mcp/resources/mission.py::_strip_private_fields``). Those
899# now delegate here so a single source of truth governs the JSON-safety
900# contract.
902# Sentinel marking which keys count as "private" — anything starting
903# with an underscore. ``ast.Expression`` is the only object the
904# validators currently attach, but the rule is intentionally broad so
905# a future cache (a normalised JSON-Pointer for the metric path, a
906# pre-resolved tool-tag set) can ride on the same convention without
907# breaking persistence.
908_PRIVATE_PREFIX: Final[str] = "_"
911def _is_public_key(key: Any) -> bool:
912 """Return True iff ``key`` is a non-private dict key."""
913 return not str(key).startswith(_PRIVATE_PREFIX)
916def _strip_private_dict(d: Mapping[str, Any]) -> dict[str, Any]:
917 """Return a shallow copy of ``d`` with private keys removed."""
918 return {k: v for k, v in d.items() if _is_public_key(k)}
921def strip_private_fields(session: Mapping[str, Any]) -> dict[str, Any]:
922 """Return a JSON-safe copy of ``session`` with private criterion keys dropped.
924 Walks ``session["criteria"]`` and ``session["iterations"]`` and
925 drops any leading-underscore keys from each Criterion dict and
926 each ``criteria_evaluation`` entry on each iteration. Other
927 fields pass through verbatim — the strip is intentionally narrow
928 so a future field that legitimately starts with an underscore
929 (e.g. ``_meta`` for backwards compatibility) doesn't get
930 silently eaten outside the criterion / criterion-eval shapes.
932 Args:
933 session: Any session-shaped mapping; usually a
934 :class:`SessionState` ``TypedDict`` but the function is
935 duck-typed against ``Mapping[str, Any]`` so callers can
936 pass a partial session under construction without first
937 casting to the full type.
939 Returns:
940 A shallow copy of ``session`` with the criterion and
941 criterion-eval shapes cleaned. The original is never mutated.
942 """
943 cleaned: dict[str, Any] = dict(session)
944 criteria = cleaned.get("criteria")
945 if isinstance(criteria, list):
946 cleaned["criteria"] = [
947 _strip_private_dict(c) if isinstance(c, Mapping) else c for c in criteria
948 ]
949 iterations = cleaned.get("iterations")
950 if isinstance(iterations, list):
951 cleaned["iterations"] = strip_private_fields_iterations(iterations)
952 return cleaned
955def strip_private_fields_iterations(
956 iterations: Sequence[Mapping[str, Any]],
957) -> list[dict[str, Any]]:
958 """Strip private keys from each iteration's ``criteria_evaluation`` shape.
960 The Decide_Phase appends ``CriterionResult`` entries under
961 ``iteration["criteria_evaluation"]``. When a criterion is a
962 ``predicate``, the entry carries the same ``_parsed_ast`` cache
963 as the source criterion. Drop those keys so the iteration
964 history is JSON-safe.
966 Args:
967 iterations: A sequence of iteration dicts. Non-dict entries
968 (which shouldn't appear in a typed iteration list, but
969 could surface from a corrupt on-disk file) pass through
970 verbatim so the caller can still observe the corruption.
972 Returns:
973 A new list of shallow-copied iteration dicts. The originals
974 are never mutated.
975 """
976 out: list[dict[str, Any]] = []
977 for iteration in iterations:
978 if not isinstance(iteration, Mapping):
979 out.append(cast("dict[str, Any]", iteration))
980 continue
981 copy = dict(iteration)
982 evals = copy.get("criteria_evaluation")
983 if isinstance(evals, list):
984 copy["criteria_evaluation"] = [
985 _strip_private_dict(e) if isinstance(e, Mapping) else e for e in evals
986 ]
987 out.append(copy)
988 return out