Coverage for mcp/mission/_environment.py: 54%

48 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Gather slow-moving live environment signals for the sampling prompt. 

2 

3The Mission Strategy_Revision prompt has an optional 

4``=== Environment context ===`` block (see 

5:class:`mcp.mission.sampling.SamplingPrompt.environment_context`) that 

6carries cluster + queue snapshots the model would otherwise have to 

7spend tool calls to discover. This module produces the dict that fills 

8it. 

9 

10What lands in the block: 

11 

12* ``regions``: sorted list of deployed regions. 

13* ``cluster_metrics``: per-region snapshot keyed by region. Each entry 

14 carries ``queue_depth``, ``running_jobs``, ``pending_jobs``, 

15 ``gpu_utilization``, ``cpu_utilization``, ``recommendation_score``. 

16 The keys mirror the fields :class:`cli.capacity.multi_region.RegionCapacity` 

17 exposes — the reuse keeps the gather path fast (one CloudWatch + 

18 one SQS roundtrip per region) and the data shape consistent with 

19 what ``ai_recommend`` already feeds into its own model prompts. 

20* ``reservations``: ``{"active_count": int}`` summary (counts only, 

21 not the full reservation list — the full table is out of scope here 

22 because the model can fetch it via the ``list_reservations`` tool 

23 if it needs the detail). 

24 

25What deliberately does not land in the block: 

26 

27* Spot prices and on-demand prices — large, AZ-fanout, and cheap to 

28 fetch on demand from the ``spot_prices`` tool when the model has 

29 picked an instance shape. Putting them in the prompt up front 

30 inflates the byte budget for every iteration even when the model 

31 never touches that signal. 

32* Capacity-block offerings — same reason; the ``reservation_check`` 

33 tool surfaces them when needed. 

34* Anything timestamp-stamped to second precision. The byte-identical 

35 determinism property in :func:`tests.test_mission_sampling.test_assemble_is_deterministic` 

36 would start flapping if the prompt embedded a wall clock. 

37 

38Failure semantics: every AWS call is wrapped. A total credential failure 

39or a missing capacity checker returns ``None`` so the sampling prompt 

40omits the section entirely. Per-region partial failures land as the 

41default zeroed :class:`RegionCapacity` shape — the model sees the region 

42in the list with all-zero metrics rather than not at all, which is the 

43honest representation. 

44""" 

45 

46from __future__ import annotations 

47 

48import logging 

49from typing import TYPE_CHECKING, Any 

50 

51if TYPE_CHECKING: # pragma: no cover - import-time only 

52 from collections.abc import Mapping 

53 

54 from cli.capacity.multi_region import MultiRegionCapacityChecker 

55 

56logger = logging.getLogger(__name__) 

57 

58 

59__all__ = ["gather_session_environment"] 

60 

61 

62def _safe_get_checker() -> MultiRegionCapacityChecker | None: 

63 """Return a multi-region checker, or ``None`` when AWS isn't reachable. 

64 

65 Lazy-imports ``boto3``/``cli.capacity.multi_region`` so a session 

66 that opted out of sampling never pays the import cost. Any 

67 construction failure (no boto3 installed, no resolved credentials, 

68 config-loader bombs out) is logged at debug level and reported as 

69 ``None`` — the caller treats that as "skip the env section". 

70 """ 

71 try: 

72 from cli.capacity.multi_region import ( # noqa: PLC0415 

73 get_multi_region_capacity_checker, 

74 ) 

75 

76 return get_multi_region_capacity_checker() 

77 except Exception as exc: # noqa: BLE001 

78 logger.debug("environment context gather: checker init failed: %s", exc) 

79 return None 

80 

81 

82def _summarise_reservations( 

83 checker: MultiRegionCapacityChecker, 

84 regions: list[str], 

85) -> dict[str, Any]: 

86 """Return a counts-only reservation summary across ``regions``. 

87 

88 The CapacityChecker exposes a ``list_capacity_reservations(region)`` 

89 API that returns the full reservation list for a single region. 

90 The model rarely needs every field — what shifts strategy is "are 

91 there active CRs the operator can target?". So we return 

92 ``{"active_count": int, "by_region": {region: int}}`` only, with 

93 the full surface accessible through the ``list_reservations`` 

94 tool if the model wants to drill in. 

95 

96 Per-region failures land as ``0`` in ``by_region`` so the output 

97 shape is stable regardless of which probe succeeded. 

98 """ 

99 try: 

100 from cli.capacity.checker import CapacityChecker # noqa: PLC0415 

101 

102 capacity_checker = CapacityChecker(checker.config) 

103 except Exception as exc: # noqa: BLE001 

104 logger.debug("environment context gather: reservation checker init failed: %s", exc) 

105 return {"active_count": 0, "by_region": {}, "_error": "reservation_probe_failed"} 

106 

107 by_region: dict[str, int] = {} 

108 total_active = 0 

109 for region in regions: 

110 try: 

111 reservations = capacity_checker.list_capacity_reservations(region, state="active") 

112 count = len(reservations) 

113 except Exception as exc: # noqa: BLE001 

114 logger.debug( 

115 "environment context gather: reservation list failed for %s: %s", region, exc 

116 ) 

117 count = 0 

118 by_region[region] = count 

119 total_active += count 

120 

121 return {"active_count": int(total_active), "by_region": by_region} 

122 

123 

124def gather_session_environment( 

125 session: Mapping[str, Any] | None = None, 

126 *, 

127 multi_region_checker: MultiRegionCapacityChecker | None = None, 

128) -> dict[str, Any] | None: 

129 """Build the environment-context dict for a Mission session. 

130 

131 Args: 

132 session: Optional Mission session state. Reserved for future 

133 use (today the gather logic is session-independent — the 

134 argument exists so call sites can pin their interest 

135 without churn when, e.g., per-session region scoping 

136 lands). 

137 multi_region_checker: Optional pre-built checker. When ``None``, 

138 the gather function builds a fresh one. Tests pass a stub 

139 here to bypass boto3 entirely. 

140 

141 Returns: 

142 A JSON-safe dict suitable to hand to 

143 :class:`mcp.mission.sampling.SamplingPrompt.environment_context`. 

144 Returns ``None`` when no checker is available (offline mode, 

145 no AWS creds, boto3 not installed) — the prompt then omits the 

146 section entirely. Sorted-key emission and zero timestamps keep 

147 the output byte-identical across two calls with the same 

148 underlying state. 

149 """ 

150 del session # reserved; see docstring 

151 

152 checker = multi_region_checker or _safe_get_checker() 

153 if checker is None: 

154 return None 

155 

156 try: 

157 capacities = checker.get_all_regions_capacity() 

158 except Exception as exc: # noqa: BLE001 

159 logger.debug("environment context gather: get_all_regions_capacity failed: %s", exc) 

160 return None 

161 

162 if not capacities: 

163 # No deployed regions discovered — return an empty shape rather 

164 # than ``None`` so the operator can see the gather did run. 

165 return { 

166 "regions": [], 

167 "cluster_metrics": {}, 

168 "reservations": _summarise_reservations(checker, []), 

169 } 

170 

171 cluster_metrics: dict[str, dict[str, Any]] = {} 

172 for cap in capacities: 

173 cluster_metrics[cap.region] = { 

174 "queue_depth": int(cap.queue_depth), 

175 "running_jobs": int(cap.running_jobs), 

176 "pending_jobs": int(cap.pending_jobs), 

177 "gpu_utilization": float(cap.gpu_utilization), 

178 "cpu_utilization": float(cap.cpu_utilization), 

179 "recommendation_score": float(cap.recommendation_score), 

180 } 

181 

182 sorted_regions = sorted(cluster_metrics.keys()) 

183 return { 

184 "regions": sorted_regions, 

185 "cluster_metrics": cluster_metrics, 

186 "reservations": _summarise_reservations(checker, sorted_regions), 

187 }