Coverage for mcp/resources/inference.py: 100%
20 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Live inference endpoint resources (gco://inference/...) for the GCO MCP server.
3Reads the desired-state record from the inference DynamoDB store via
4``cli/inference.py::InferenceManager.get_endpoint`` and returns it as
5JSON. The store is the source of truth that the per-region
6``inference_monitor`` reconciles against, so this is the most
7authoritative spec available without reaching into Kubernetes.
8"""
10from __future__ import annotations
12import json
13import re
14from typing import Any
16# Same RFC 1123 label rule as job names — endpoint names map to
17# Kubernetes deployments and services in every regional cluster.
18_ENDPOINT_NAME_RE = re.compile(r"^[a-z0-9](?:[-a-z0-9]{0,251}[a-z0-9])?$")
21def _get_manager() -> Any:
22 """Lazy-import ``InferenceManager`` so the resource module doesn't
23 pull boto3 in at MCP server import time."""
24 from cli.inference import InferenceManager
26 return InferenceManager()
29def _inference_resource(endpoint_name: str) -> str:
30 """Return the stored spec for ``endpoint_name`` as JSON."""
31 if not _ENDPOINT_NAME_RE.match(endpoint_name):
32 return json.dumps(
33 {
34 "error": "invalid endpoint_name",
35 "detail": "must match ^[a-z0-9](?:[-a-z0-9]{0,251}[a-z0-9])?$",
36 "value": endpoint_name,
37 }
38 )
39 try:
40 record = _get_manager().get_endpoint(endpoint_name)
41 except Exception as e: # noqa: BLE001
42 return json.dumps({"error": str(e), "endpoint_name": endpoint_name})
43 if record is None:
44 return json.dumps({"error": "endpoint not found", "endpoint_name": endpoint_name})
45 return json.dumps(record, indent=2, default=str)
48def register(mcp_instance: Any) -> None:
49 """Register live inference-state resources against the shared MCP server."""
50 mcp_instance.resource("gco://inference/{endpoint_name}")(_inference_resource)