Coverage for mcp/tools/capacity.py: 98%
79 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Capacity checking and recommendation MCP tools."""
3import cli_runner
4from audit import audit_logged
5from feature_flags import FLAG_CAPACITY_PURCHASE, is_enabled
6from server import mcp
9@mcp.tool(tags={"safe", "capacity"})
10@audit_logged
11def check_capacity(instance_type: str, region: str) -> str:
12 """Check spot and on-demand capacity for a specific instance type.
14 Args:
15 instance_type: EC2 instance type (e.g. g4dn.xlarge, g5.2xlarge, p4d.24xlarge).
16 region: AWS region to check.
17 """
18 return cli_runner._run_cli("capacity", "check", "-i", instance_type, "-r", region)
21@mcp.tool(tags={"safe", "capacity"})
22@audit_logged
23def capacity_status(region: str | None = None) -> str:
24 """View capacity status across all deployed regions.
26 Args:
27 region: Specific region, or omit for all regions.
28 """
29 args = ["capacity", "status"]
30 if region:
31 args += ["-r", region]
32 return cli_runner._run_cli(*args)
35@mcp.tool(tags={"safe", "capacity"})
36@audit_logged
37def recommend_region(
38 gpu: bool = False, instance_type: str | None = None, gpu_count: int = 0
39) -> str:
40 """Get optimal region recommendation based on capacity.
42 Args:
43 gpu: Whether the workload requires GPUs.
44 instance_type: Specific instance type to check. When provided, uses weighted
45 multi-signal scoring (spot placement scores, pricing, queue depth, etc.).
46 gpu_count: Number of GPUs required for the workload.
47 """
48 args = ["capacity", "recommend-region"]
49 if gpu:
50 args.append("--gpu")
51 if instance_type:
52 args += ["-i", instance_type]
53 if gpu_count:
54 args += ["--gpu-count", str(gpu_count)]
55 return cli_runner._run_cli(*args)
58@mcp.tool(tags={"safe", "capacity"})
59@audit_logged
60def spot_prices(instance_type: str, region: str) -> str:
61 """Get current spot prices for an instance type.
63 Args:
64 instance_type: EC2 instance type.
65 region: AWS region.
66 """
67 return cli_runner._run_cli("capacity", "spot-prices", "-i", instance_type, "-r", region)
70@mcp.tool(tags={"safe", "capacity"})
71@audit_logged
72def ai_recommend(
73 workload: str,
74 instance_type: str | None = None,
75 region: str | None = None,
76 gpu: bool = False,
77 min_gpus: int = 0,
78 min_memory_gb: int = 0,
79 fault_tolerance: str = "low",
80 max_cost: float | None = None,
81 model: str = "anthropic.claude-sonnet-4-5-20250929-v1:0",
82) -> str:
83 """Get AI-powered capacity recommendation using Amazon Bedrock.
85 Gathers comprehensive capacity data (spot scores, pricing, cluster
86 utilization, queue depth) and sends it to an LLM for analysis.
87 Returns a recommended region, instance type, capacity type, and reasoning.
89 Requires AWS credentials with bedrock:InvokeModel permission and the
90 specified model enabled in your account.
92 Args:
93 workload: Description of the workload (e.g. "Fine-tuning a 20B parameter LLM").
94 instance_type: Specific instance type(s) to consider (e.g. "p4d.24xlarge").
95 region: Specific region(s) to consider (e.g. "us-east-1").
96 gpu: Whether the workload requires GPUs.
97 min_gpus: Minimum number of GPUs required.
98 min_memory_gb: Minimum GPU memory in GB.
99 fault_tolerance: Tolerance for interruptions ("low", "medium", "high").
100 max_cost: Maximum acceptable cost per hour in USD.
101 model: Bedrock model ID to use for analysis.
102 """
103 args = ["capacity", "ai-recommend", "-w", workload]
104 if instance_type:
105 args += ["-i", instance_type]
106 if region:
107 args += ["-r", region]
108 if gpu:
109 args.append("--gpu")
110 if min_gpus > 0:
111 args += ["--min-gpus", str(min_gpus)]
112 if min_memory_gb > 0:
113 args += ["--min-memory-gb", str(min_memory_gb)]
114 if fault_tolerance != "low":
115 args += ["--fault-tolerance", fault_tolerance]
116 if max_cost is not None:
117 args += ["--max-cost", str(max_cost)]
118 if model != "anthropic.claude-sonnet-4-5-20250929-v1:0": 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true
119 args += ["--model", model]
120 return cli_runner._run_cli(*args)
123@mcp.tool(tags={"safe", "capacity"})
124@audit_logged
125def list_reservations(
126 instance_type: str | None = None,
127 region: str | None = None,
128) -> str:
129 """List On-Demand Capacity Reservations (ODCRs) across regions.
131 Shows all active capacity reservations with utilization details.
133 Args:
134 instance_type: Filter by instance type (e.g. p5.48xlarge).
135 region: Filter by specific region.
136 """
137 args = ["capacity", "reservations"]
138 if instance_type:
139 args += ["-i", instance_type]
140 if region:
141 args += ["-r", region]
142 return cli_runner._run_cli(*args)
145@mcp.tool(tags={"safe", "capacity"})
146@audit_logged
147def reservation_check(
148 instance_type: str,
149 region: str | None = None,
150 count: int = 1,
151 include_blocks: bool = True,
152 block_duration: int = 24,
153) -> str:
154 """Check reservation availability and Capacity Block offerings.
156 Checks both existing ODCRs and purchasable Capacity Blocks for ML
157 workloads. Capacity Blocks provide guaranteed GPU capacity for a
158 fixed duration at a known price.
160 Args:
161 instance_type: GPU instance type (e.g. p4d.24xlarge, p5.48xlarge).
162 region: Specific region to check (omit for all deployed regions).
163 count: Minimum number of instances needed.
164 include_blocks: Whether to include Capacity Block offerings.
165 block_duration: Capacity Block duration in hours.
166 """
167 args = ["capacity", "reservation-check", "-i", instance_type, "-c", str(count)]
168 if region:
169 args += ["-r", region]
170 if not include_blocks:
171 args.append("--no-blocks")
172 if block_duration != 24:
173 args += ["--block-duration", str(block_duration)]
174 return cli_runner._run_cli(*args)
177# Capacity Block purchasing — disabled by default.
178# Set GCO_ENABLE_CAPACITY_PURCHASE=true to enable.
179if is_enabled(FLAG_CAPACITY_PURCHASE):
181 @mcp.tool(tags={"cost-incurring", "capacity"})
182 @audit_logged
183 def reserve_capacity(
184 offering_id: str,
185 region: str,
186 dry_run: bool = False,
187 ) -> str:
188 """Purchase a Capacity Block offering by its ID.
190 Use reservation_check first to find available offerings and their IDs,
191 then purchase with this tool. Use dry_run=True to validate without purchasing.
193 Args:
194 offering_id: Capacity Block offering ID (cb-xxx) from reservation_check.
195 region: AWS region where the offering exists.
196 dry_run: If True, validate the offering without purchasing (no cost).
197 """
198 args = ["capacity", "reserve", "-o", offering_id, "-r", region]
199 if dry_run:
200 args.append("--dry-run")
201 return cli_runner._run_cli(*args)