Coverage for mcp/mission/_environment.py: 54%
48 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Gather slow-moving live environment signals for the sampling prompt.
3The Mission Strategy_Revision prompt has an optional
4``=== Environment context ===`` block (see
5:class:`mcp.mission.sampling.SamplingPrompt.environment_context`) that
6carries cluster + queue snapshots the model would otherwise have to
7spend tool calls to discover. This module produces the dict that fills
8it.
10What lands in the block:
12* ``regions``: sorted list of deployed regions.
13* ``cluster_metrics``: per-region snapshot keyed by region. Each entry
14 carries ``queue_depth``, ``running_jobs``, ``pending_jobs``,
15 ``gpu_utilization``, ``cpu_utilization``, ``recommendation_score``.
16 The keys mirror the fields :class:`cli.capacity.multi_region.RegionCapacity`
17 exposes — the reuse keeps the gather path fast (one CloudWatch +
18 one SQS roundtrip per region) and the data shape consistent with
19 what ``ai_recommend`` already feeds into its own model prompts.
20* ``reservations``: ``{"active_count": int}`` summary (counts only,
21 not the full reservation list — the full table is out of scope here
22 because the model can fetch it via the ``list_reservations`` tool
23 if it needs the detail).
25What deliberately does not land in the block:
27* Spot prices and on-demand prices — large, AZ-fanout, and cheap to
28 fetch on demand from the ``spot_prices`` tool when the model has
29 picked an instance shape. Putting them in the prompt up front
30 inflates the byte budget for every iteration even when the model
31 never touches that signal.
32* Capacity-block offerings — same reason; the ``reservation_check``
33 tool surfaces them when needed.
34* Anything timestamp-stamped to second precision. The byte-identical
35 determinism property in :func:`tests.test_mission_sampling.test_assemble_is_deterministic`
36 would start flapping if the prompt embedded a wall clock.
38Failure semantics: every AWS call is wrapped. A total credential failure
39or a missing capacity checker returns ``None`` so the sampling prompt
40omits the section entirely. Per-region partial failures land as the
41default zeroed :class:`RegionCapacity` shape — the model sees the region
42in the list with all-zero metrics rather than not at all, which is the
43honest representation.
44"""
46from __future__ import annotations
48import logging
49from typing import TYPE_CHECKING, Any
51if TYPE_CHECKING: # pragma: no cover - import-time only
52 from collections.abc import Mapping
54 from cli.capacity.multi_region import MultiRegionCapacityChecker
56logger = logging.getLogger(__name__)
59__all__ = ["gather_session_environment"]
62def _safe_get_checker() -> MultiRegionCapacityChecker | None:
63 """Return a multi-region checker, or ``None`` when AWS isn't reachable.
65 Lazy-imports ``boto3``/``cli.capacity.multi_region`` so a session
66 that opted out of sampling never pays the import cost. Any
67 construction failure (no boto3 installed, no resolved credentials,
68 config-loader bombs out) is logged at debug level and reported as
69 ``None`` — the caller treats that as "skip the env section".
70 """
71 try:
72 from cli.capacity.multi_region import ( # noqa: PLC0415
73 get_multi_region_capacity_checker,
74 )
76 return get_multi_region_capacity_checker()
77 except Exception as exc: # noqa: BLE001
78 logger.debug("environment context gather: checker init failed: %s", exc)
79 return None
82def _summarise_reservations(
83 checker: MultiRegionCapacityChecker,
84 regions: list[str],
85) -> dict[str, Any]:
86 """Return a counts-only reservation summary across ``regions``.
88 The CapacityChecker exposes a ``list_capacity_reservations(region)``
89 API that returns the full reservation list for a single region.
90 The model rarely needs every field — what shifts strategy is "are
91 there active CRs the operator can target?". So we return
92 ``{"active_count": int, "by_region": {region: int}}`` only, with
93 the full surface accessible through the ``list_reservations``
94 tool if the model wants to drill in.
96 Per-region failures land as ``0`` in ``by_region`` so the output
97 shape is stable regardless of which probe succeeded.
98 """
99 try:
100 from cli.capacity.checker import CapacityChecker # noqa: PLC0415
102 capacity_checker = CapacityChecker(checker.config)
103 except Exception as exc: # noqa: BLE001
104 logger.debug("environment context gather: reservation checker init failed: %s", exc)
105 return {"active_count": 0, "by_region": {}, "_error": "reservation_probe_failed"}
107 by_region: dict[str, int] = {}
108 total_active = 0
109 for region in regions:
110 try:
111 reservations = capacity_checker.list_capacity_reservations(region, state="active")
112 count = len(reservations)
113 except Exception as exc: # noqa: BLE001
114 logger.debug(
115 "environment context gather: reservation list failed for %s: %s", region, exc
116 )
117 count = 0
118 by_region[region] = count
119 total_active += count
121 return {"active_count": int(total_active), "by_region": by_region}
124def gather_session_environment(
125 session: Mapping[str, Any] | None = None,
126 *,
127 multi_region_checker: MultiRegionCapacityChecker | None = None,
128) -> dict[str, Any] | None:
129 """Build the environment-context dict for a Mission session.
131 Args:
132 session: Optional Mission session state. Reserved for future
133 use (today the gather logic is session-independent — the
134 argument exists so call sites can pin their interest
135 without churn when, e.g., per-session region scoping
136 lands).
137 multi_region_checker: Optional pre-built checker. When ``None``,
138 the gather function builds a fresh one. Tests pass a stub
139 here to bypass boto3 entirely.
141 Returns:
142 A JSON-safe dict suitable to hand to
143 :class:`mcp.mission.sampling.SamplingPrompt.environment_context`.
144 Returns ``None`` when no checker is available (offline mode,
145 no AWS creds, boto3 not installed) — the prompt then omits the
146 section entirely. Sorted-key emission and zero timestamps keep
147 the output byte-identical across two calls with the same
148 underlying state.
149 """
150 del session # reserved; see docstring
152 checker = multi_region_checker or _safe_get_checker()
153 if checker is None:
154 return None
156 try:
157 capacities = checker.get_all_regions_capacity()
158 except Exception as exc: # noqa: BLE001
159 logger.debug("environment context gather: get_all_regions_capacity failed: %s", exc)
160 return None
162 if not capacities:
163 # No deployed regions discovered — return an empty shape rather
164 # than ``None`` so the operator can see the gather did run.
165 return {
166 "regions": [],
167 "cluster_metrics": {},
168 "reservations": _summarise_reservations(checker, []),
169 }
171 cluster_metrics: dict[str, dict[str, Any]] = {}
172 for cap in capacities:
173 cluster_metrics[cap.region] = {
174 "queue_depth": int(cap.queue_depth),
175 "running_jobs": int(cap.running_jobs),
176 "pending_jobs": int(cap.pending_jobs),
177 "gpu_utilization": float(cap.gpu_utilization),
178 "cpu_utilization": float(cap.cpu_utilization),
179 "recommendation_score": float(cap.recommendation_score),
180 }
182 sorted_regions = sorted(cluster_metrics.keys())
183 return {
184 "regions": sorted_regions,
185 "cluster_metrics": cluster_metrics,
186 "reservations": _summarise_reservations(checker, sorted_regions),
187 }