Coverage for mcp/mission/types.py: 100%
115 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Domain types for the Mission goal-directed iteration loop.
3All structured types live in one module so the engine, validators, sampler,
4and tool wrappers share the same shape. ``TypedDict`` (not ``dataclass``) so
5``json.dumps`` / ``json.loads`` round-trip without any custom serialization.
6The ``version`` field on :class:`SessionState` is checked on every load
7against :data:`mcp.mission.SCHEMA_VERSION` (re-exported here as
8:data:`SCHEMA_VERSION` for callers that import only this module).
10Optional keys use :data:`typing.NotRequired` so that ``mypy --strict`` accepts
11absence on dict literals while still rejecting an unknown key.
12"""
14from __future__ import annotations
16from typing import Any, Literal, NotRequired, TypedDict
18# Re-exported here so callers that import only ``mcp.mission.types`` can read
19# the schema version without an extra import. The canonical value lives on
20# the package ``__init__``.
21from . import SCHEMA_VERSION as SCHEMA_VERSION
23# ---------------------------------------------------------------------------
24# Literal type aliases
25# ---------------------------------------------------------------------------
27VerdictLabel = Literal["continue", "adjust", "complete", "terminate"]
28"""The four possible Decide_Phase outputs.
30``continue`` keeps the loop running with the current strategy. ``adjust``
31runs another iteration with a Strategy_Revision. ``complete`` ends the
32session as success. ``terminate`` ends the session as give-up.
33"""
35VerdictReason = Literal[
36 "in_progress",
37 "cadence_skip",
38 "criteria_met",
39 "forced_complete",
40 "heuristic_unproductive",
41 "max_iterations",
42 "max_wall_clock",
43 "no_progress",
44 "user_abort",
45]
46"""The exhaustive set of reasons that pair with a :data:`VerdictLabel`."""
48StatusLabel = Literal["pending", "running", "paused", "completed", "terminated", "failed"]
49"""The lifecycle states of a :class:`SessionState`."""
51CriterionKind = Literal[
52 "metric_threshold", "event", "predicate", "tool_call_succeeded", "metric_trend"
53]
54"""The five Criterion evaluator kinds.
56``metric_trend`` is the history-aware kind: rather than comparing a single
57point-in-time value to a fixed target (``metric_threshold``), it evaluates the
58direction of a metric across iterations using the cumulative metric history the
59engine accumulates in :meth:`MissionEngine._build_cumulative_observation`.
60"""
62MetricTrendDirection = Literal["decreasing", "increasing", "non_increasing", "non_decreasing"]
63"""The four trend directions a ``metric_trend`` criterion can require.
65``decreasing`` / ``increasing`` require a strict net change across the window
66(last < first / last > first); ``non_increasing`` / ``non_decreasing`` allow a
67flat series (last <= first / last >= first).
68"""
70SamplingStatus = Literal["used", "rejected", "fallback", "unavailable", "disabled"]
71"""The terminal status of a single sampling attempt on an iteration."""
73CadenceKind = Literal["every_iteration", "every_n_iterations", "every_t_seconds", "on_event"]
74"""The four supported Checkpoint_Cadence kinds."""
77# ---------------------------------------------------------------------------
78# Terminal-state sets
79# ---------------------------------------------------------------------------
81TERMINAL_STATES: frozenset[StatusLabel] = frozenset({"completed", "terminated", "failed"})
82"""The :data:`StatusLabel` values from which a session cannot transition.
84A session in any of these states refuses further ``mission_iterate`` calls
85with ``session_terminal``. The engine consults this set on every iteration
86entry to short-circuit before performing any work.
87"""
89TERMINAL_VERDICTS: frozenset[VerdictLabel] = frozenset({"complete", "terminate"})
90"""The :data:`VerdictLabel` values that end a session.
92When the Decide_Phase emits a verdict in this set, the engine writes a
93Final_Report and transitions the session to ``completed`` or ``terminated``
94(matching the verdict).
95"""
98# ---------------------------------------------------------------------------
99# Criterion and CriterionResult
100# ---------------------------------------------------------------------------
103class Criterion(TypedDict):
104 """A single machine-checkable success condition.
106 The kind-specific keys (``metric``/``op``/``target`` for
107 ``metric_threshold``, ``event_name`` for ``event``, ``expression`` for
108 ``predicate``, ``tool_name``/``min_count`` for ``tool_call_succeeded``,
109 ``metric``/``direction``/``window``/``min_points`` for ``metric_trend``)
110 are not declared on the base ``TypedDict`` because they are mutually
111 exclusive per ``kind``. Validators in ``mcp.mission.validation`` verify
112 the right keys are present for each ``kind`` and may attach a private
113 cached AST under ``_parsed_ast`` for ``predicate`` entries.
114 """
116 criterion_id: str
117 kind: CriterionKind
118 required: bool
119 # Kind-specific keys (validator-enforced):
120 metric: NotRequired[str]
121 op: NotRequired[Literal["<", "<=", ">", ">=", "==", "!="]]
122 target: NotRequired[float]
123 event_name: NotRequired[str]
124 expression: NotRequired[str]
125 tool_name: NotRequired[str]
126 min_count: NotRequired[int]
127 # metric_trend keys: ``direction`` is required for the kind; ``window``
128 # bounds how many of the most-recent points are considered (default: all
129 # available); ``min_points`` is the minimum number of numeric points
130 # required before the criterion decides met/unmet rather than inconclusive.
131 direction: NotRequired[MetricTrendDirection]
132 window: NotRequired[int]
133 min_points: NotRequired[int]
134 # Cached parsed AST attached by ``validate_criteria`` for predicate entries.
135 _parsed_ast: NotRequired[Any]
138class CriterionResult(TypedDict):
139 """The outcome of evaluating one :class:`Criterion` at a checkpoint."""
141 criterion_id: str
142 status: Literal["met", "unmet", "inconclusive"]
143 evidence: Any
144 evaluated_at: str # ISO 8601 UTC
147# ---------------------------------------------------------------------------
148# Budget controls and cadence
149# ---------------------------------------------------------------------------
152class BudgetControls(TypedDict):
153 """Loop-control caps every Mission_Session declares at start time.
155 These are **loop-control** caps — not financial budgets. Mission
156 enforces only the caps the loop has direct visibility into:
157 iteration count and wall-clock seconds. Cost guardrails live
158 out-of-band; configure AWS Budgets and Cost Anomaly Detection at
159 the account level for those.
161 Both ``max_iterations`` and ``max_wall_clock_seconds`` accept
162 either a strictly-positive integer cap or the explicit sentinel
163 ``-1`` to opt out of that axis. The validator rejects every other
164 shape (zero, other negatives, non-integer types, missing keys),
165 and additionally rejects both caps being ``-1`` simultaneously
166 (with ``reason="at_least_one_cap_required"``) since that would
167 leave the loop with no axis-driven termination — a runaway-loop
168 config error.
169 """
171 max_iterations: int
172 max_wall_clock_seconds: int
175class Cadence(TypedDict):
176 """The Checkpoint_Cadence configuration on a session.
178 ``n`` is required for ``every_n_iterations``. ``t`` is required for
179 ``every_t_seconds``. ``event_name`` is required for ``on_event``. The
180 base ``every_iteration`` requires no extra keys.
181 """
183 kind: CadenceKind
184 n: NotRequired[int]
185 t: NotRequired[int]
186 event_name: NotRequired[str]
189# ---------------------------------------------------------------------------
190# Tool calls and strategy
191# ---------------------------------------------------------------------------
194class ToolCallRecord(TypedDict):
195 """A single tool invocation recorded during an Iteration's Execute_Phase.
197 Used both for direct ``tool_calls`` strategies and for in-script calls
198 captured by the Mission_Sandbox under ``IterationRecord.script_call_log``.
199 """
201 tool_name: str
202 args: dict[str, Any]
203 status: Literal["ok", "failed", "skipped_not_allowed"]
204 result_summary: Any
205 duration_ms: int
206 error_message: NotRequired[str]
209class Strategy(TypedDict, total=False):
210 """The Propose_Phase output. Carries one of ``tool_calls`` or ``script``.
212 ``total=False`` because every key is optional in isolation; the
213 ``validate_strategy`` validator enforces the mutual-exclusivity rule
214 (exactly one of ``tool_calls`` or ``script`` must be present and
215 non-empty).
216 """
218 tool_calls: list[dict[str, Any]]
219 script: str
220 expected_observation_keys: list[str]
221 rationale: str
224# ---------------------------------------------------------------------------
225# Observation, Phase, Iteration, Session
226# ---------------------------------------------------------------------------
229class Observation(TypedDict):
230 """The Observe_Phase output — a normalized view of Execute_Phase results."""
232 tool_results: list[Any]
233 metrics: dict[str, Any]
234 events: list[dict[str, Any]]
235 errors: NotRequired[list[dict[str, Any]]]
236 # Cumulative, history-aware view of every numeric metric seen across the
237 # session, keyed by metric name and ordered oldest→newest. Present only on
238 # the *cumulative* observation the Evaluate_Phase builds (see
239 # :meth:`MissionEngine._build_cumulative_observation`); the per-iteration
240 # Observation written to ``record["observation"]`` keeps ``metrics``
241 # strictly point-in-time and does not carry this key. Consumed by the
242 # ``metric_trend`` criterion and available to predicates.
243 metric_history: NotRequired[dict[str, list[float]]]
244 phase_started_at: str
245 phase_ended_at: str
248class PhaseRecord(TypedDict):
249 """One row in :attr:`IterationRecord.phases`. One per phase regardless of outcome."""
251 phase: Literal["propose", "execute", "observe", "evaluate", "decide"]
252 status: Literal["succeeded", "failed"]
253 started_at: str
254 ended_at: str
255 error_message: NotRequired[str]
258class IterationRecord(TypedDict):
259 """The complete record of one pass through the five-phase cycle.
261 Sampling-related fields (``sampling_status``, ``sampling_output``,
262 ``sampling_rejection_reason``) are present only when the iteration
263 triggered an advisory-path sampling call. ``script_call_log`` is
264 present only when the strategy carried a ``script``.
265 """
267 iteration_index: int
268 started_at: str
269 ended_at: str
270 phases: list[PhaseRecord]
271 strategy: Strategy
272 observation: Observation
273 criteria_evaluation: list[CriterionResult]
274 verdict: VerdictLabel
275 verdict_reason: VerdictReason
276 revision_rationale: NotRequired[str]
277 checkpoint_evaluated: bool
278 sampling_status: NotRequired[SamplingStatus]
279 sampling_output: NotRequired[str]
280 sampling_rejection_reason: NotRequired[str]
281 script_call_log: NotRequired[list[ToolCallRecord]]
282 # Set by ``_execute_script`` when the sandbox runner raises
283 # :class:`mcp.mission.sandbox.SandboxTerminated`. The Decide_Phase's
284 # cascade reads this sentinel before any other branch and emits
285 # ``("terminate", <reason>)`` so a sandbox cap propagates up to the
286 # budget-cap path rather than failing the iteration as a phase
287 # exception. Carries the wall-clock :data:`VerdictReason`
288 # ``max_wall_clock`` for duration / memory / runtime caps.
289 sandbox_terminated_reason: NotRequired[VerdictReason]
292class SessionState(TypedDict):
293 """The durable Mission_Session payload persisted by Mission_State_Backend.
295 The ``version`` field carries :data:`SCHEMA_VERSION`; loaders compare it
296 against the current value and reject mismatches. Optional fields are
297 populated as the session progresses (``started_at`` on first iteration,
298 ``ended_at`` and ``final_report_path`` on terminal verdict, etc.).
299 """
301 version: int
302 session_id: str
303 directive_text: str
304 criteria: list[Criterion]
305 budget: BudgetControls
306 tool_allowlist: list[str]
307 checkpoint_cadence: Cadence
308 stagnation_threshold: int
309 use_sampling: bool
310 sampling_backend_resolved: NotRequired[Literal["mcp", "bedrock", "none"]]
311 bedrock_model_id: NotRequired[str]
312 allow_scripted_strategies: bool
313 sampling_model_preferences: NotRequired[dict[str, Any]]
314 status: StatusLabel
315 created_at: str
316 started_at: NotRequired[str]
317 ended_at: NotRequired[str]
318 iterations: list[IterationRecord]
319 no_progress_counter: int
320 last_checkpoint_at: NotRequired[str]
321 final_verdict: NotRequired[VerdictLabel]
322 final_report_path: NotRequired[str]