Coverage for mcp/mission/_environment.py: 54%

1"""Gather slow-moving live environment signals for the sampling prompt.

3The Mission Strategy_Revision prompt has an optional

4``=== Environment context ===`` block (see

5:class:`mcp.mission.sampling.SamplingPrompt.environment_context`) that

6carries cluster + queue snapshots the model would otherwise have to

7spend tool calls to discover. This module produces the dict that fills

8it.

10What lands in the block:

12* ``regions``: sorted list of deployed regions.

13* ``cluster_metrics``: per-region snapshot keyed by region. Each entry

14 carries ``queue_depth``, ``running_jobs``, ``pending_jobs``,

15 ``gpu_utilization``, ``cpu_utilization``, ``recommendation_score``.

16 The keys mirror the fields :class:`cli.capacity.multi_region.RegionCapacity`

17 exposes — the reuse keeps the gather path fast (one CloudWatch +

18 one SQS roundtrip per region) and the data shape consistent with

19 what ``ai_recommend`` already feeds into its own model prompts.

20* ``reservations``: ``{"active_count": int}`` summary (counts only,

21 not the full reservation list — the full table is out of scope here

22 because the model can fetch it via the ``list_reservations`` tool

23 if it needs the detail).

25What deliberately does not land in the block:

27* Spot prices and on-demand prices — large, AZ-fanout, and cheap to

28 fetch on demand from the ``spot_prices`` tool when the model has

29 picked an instance shape. Putting them in the prompt up front

30 inflates the byte budget for every iteration even when the model

31 never touches that signal.

32* Capacity-block offerings — same reason; the ``reservation_check``

33 tool surfaces them when needed.

34* Anything timestamp-stamped to second precision. The byte-identical

35 determinism property in :func:`tests.test_mission_sampling.test_assemble_is_deterministic`

36 would start flapping if the prompt embedded a wall clock.

38Failure semantics: every AWS call is wrapped. A total credential failure

39or a missing capacity checker returns ``None`` so the sampling prompt

40omits the section entirely. Per-region partial failures land as the

41default zeroed :class:`RegionCapacity` shape — the model sees the region

42in the list with all-zero metrics rather than not at all, which is the

43honest representation.

44"""

46from __future__ import annotations

48import logging

49from typing import TYPE_CHECKING, Any

51if TYPE_CHECKING: # pragma: no cover - import-time only

52 from collections.abc import Mapping

54 from cli.capacity.multi_region import MultiRegionCapacityChecker

56logger = logging.getLogger(__name__)

59__all__ = ["gather_session_environment"]

62def _safe_get_checker() -> MultiRegionCapacityChecker | None:

63 """Return a multi-region checker, or ``None`` when AWS isn't reachable.

65 Lazy-imports ``boto3``/``cli.capacity.multi_region`` so a session

66 that opted out of sampling never pays the import cost. Any

67 construction failure (no boto3 installed, no resolved credentials,

68 config-loader bombs out) is logged at debug level and reported as

69 ``None`` — the caller treats that as "skip the env section".

70 """

71 try:

72 from cli.capacity.multi_region import ( # noqa: PLC0415

73 get_multi_region_capacity_checker,

74 )

76 return get_multi_region_capacity_checker()

77 except Exception as exc: # noqa: BLE001

78 logger.debug("environment context gather: checker init failed: %s", exc)

79 return None

82def _summarise_reservations(

83 checker: MultiRegionCapacityChecker,

84 regions: list[str],

85) -> dict[str, Any]:

86 """Return a counts-only reservation summary across ``regions``.

88 The CapacityChecker exposes a ``list_capacity_reservations(region)``

89 API that returns the full reservation list for a single region.

90 The model rarely needs every field — what shifts strategy is "are

91 there active CRs the operator can target?". So we return

92 ``{"active_count": int, "by_region": {region: int}}`` only, with

93 the full surface accessible through the ``list_reservations``

94 tool if the model wants to drill in.

96 Per-region failures land as ``0`` in ``by_region`` so the output

97 shape is stable regardless of which probe succeeded.

98 """

99 try:

100 from cli.capacity.checker import CapacityChecker # noqa: PLC0415

101

102 capacity_checker = CapacityChecker(checker.config)

103 except Exception as exc: # noqa: BLE001

104 logger.debug("environment context gather: reservation checker init failed: %s", exc)

105 return {"active_count": 0, "by_region": {}, "_error": "reservation_probe_failed"}

106

107 by_region: dict[str, int] = {}

108 total_active = 0

109 for region in regions:

110 try:

111 reservations = capacity_checker.list_capacity_reservations(region, state="active")

112 count = len(reservations)

113 except Exception as exc: # noqa: BLE001

114 logger.debug(

115 "environment context gather: reservation list failed for %s: %s", region, exc

116 )

117 count = 0

118 by_region[region] = count

119 total_active += count

120

121 return {"active_count": int(total_active), "by_region": by_region}

122

123

124def gather_session_environment(

125 session: Mapping[str, Any] | None = None,

126 *,

127 multi_region_checker: MultiRegionCapacityChecker | None = None,

128) -> dict[str, Any] | None:

129 """Build the environment-context dict for a Mission session.

130

131 Args:

132 session: Optional Mission session state. Reserved for future

133 use (today the gather logic is session-independent — the

134 argument exists so call sites can pin their interest

135 without churn when, e.g., per-session region scoping

136 lands).

137 multi_region_checker: Optional pre-built checker. When ``None``,

138 the gather function builds a fresh one. Tests pass a stub

139 here to bypass boto3 entirely.

140

141 Returns:

142 A JSON-safe dict suitable to hand to

143 :class:`mcp.mission.sampling.SamplingPrompt.environment_context`.

144 Returns ``None`` when no checker is available (offline mode,

145 no AWS creds, boto3 not installed) — the prompt then omits the

146 section entirely. Sorted-key emission and zero timestamps keep

147 the output byte-identical across two calls with the same

148 underlying state.

149 """

150 del session # reserved; see docstring

151

152 checker = multi_region_checker or _safe_get_checker()

153 if checker is None:

154 return None

155

156 try:

157 capacities = checker.get_all_regions_capacity()

158 except Exception as exc: # noqa: BLE001

159 logger.debug("environment context gather: get_all_regions_capacity failed: %s", exc)

160 return None

161

162 if not capacities:

163 # No deployed regions discovered — return an empty shape rather

164 # than ``None`` so the operator can see the gather did run.

165 return {

166 "regions": [],

167 "cluster_metrics": {},

168 "reservations": _summarise_reservations(checker, []),

169 }

170

171 cluster_metrics: dict[str, dict[str, Any]] = {}

172 for cap in capacities:

173 cluster_metrics[cap.region] = {

174 "queue_depth": int(cap.queue_depth),

175 "running_jobs": int(cap.running_jobs),

176 "pending_jobs": int(cap.pending_jobs),

177 "gpu_utilization": float(cap.gpu_utilization),

178 "cpu_utilization": float(cap.cpu_utilization),

179 "recommendation_score": float(cap.recommendation_score),

180 }

181

182 sorted_regions = sorted(cluster_metrics.keys())

183 return {

184 "regions": sorted_regions,

185 "cluster_metrics": cluster_metrics,

186 "reservations": _summarise_reservations(checker, sorted_regions),

187 }