Coverage for mcp/tools/capacity.py: 98%

1"""Capacity checking and recommendation MCP tools."""

3import cli_runner

4from audit import audit_logged

5from feature_flags import FLAG_CAPACITY_PURCHASE, is_enabled

6from server import mcp

9@mcp.tool(tags={"safe", "capacity"})

10@audit_logged

11def check_capacity(instance_type: str, region: str) -> str:

12 """Check spot and on-demand capacity for a specific instance type.

14 Args:

15 instance_type: EC2 instance type (e.g. g4dn.xlarge, g5.2xlarge, p4d.24xlarge).

16 region: AWS region to check.

17 """

18 return cli_runner._run_cli("capacity", "check", "-i", instance_type, "-r", region)

21@mcp.tool(tags={"safe", "capacity"})

22@audit_logged

23def capacity_status(region: str | None = None) -> str:

24 """View capacity status across all deployed regions.

26 Args:

27 region: Specific region, or omit for all regions.

28 """

29 args = ["capacity", "status"]

30 if region:

31 args += ["-r", region]

32 return cli_runner._run_cli(*args)

35@mcp.tool(tags={"safe", "capacity"})

36@audit_logged

37def recommend_region(

38 gpu: bool = False, instance_type: str | None = None, gpu_count: int = 0

39) -> str:

40 """Get optimal region recommendation based on capacity.

42 Args:

43 gpu: Whether the workload requires GPUs.

44 instance_type: Specific instance type to check. When provided, uses weighted

45 multi-signal scoring (spot placement scores, pricing, queue depth, etc.).

46 gpu_count: Number of GPUs required for the workload.

47 """

48 args = ["capacity", "recommend-region"]

49 if gpu:

50 args.append("--gpu")

51 if instance_type:

52 args += ["-i", instance_type]

53 if gpu_count:

54 args += ["--gpu-count", str(gpu_count)]

55 return cli_runner._run_cli(*args)

58@mcp.tool(tags={"safe", "capacity"})

59@audit_logged

60def spot_prices(instance_type: str, region: str) -> str:

61 """Get current spot prices for an instance type.

63 Args:

64 instance_type: EC2 instance type.

65 region: AWS region.

66 """

67 return cli_runner._run_cli("capacity", "spot-prices", "-i", instance_type, "-r", region)

70@mcp.tool(tags={"safe", "capacity"})

71@audit_logged

72def ai_recommend(

73 workload: str,

74 instance_type: str | None = None,

75 region: str | None = None,

76 gpu: bool = False,

77 min_gpus: int = 0,

78 min_memory_gb: int = 0,

79 fault_tolerance: str = "low",

80 max_cost: float | None = None,

81 model: str = "anthropic.claude-sonnet-4-5-20250929-v1:0",

82) -> str:

83 """Get AI-powered capacity recommendation using Amazon Bedrock.

85 Gathers comprehensive capacity data (spot scores, pricing, cluster

86 utilization, queue depth) and sends it to an LLM for analysis.

87 Returns a recommended region, instance type, capacity type, and reasoning.

89 Requires AWS credentials with bedrock:InvokeModel permission and the

90 specified model enabled in your account.

92 Args:

93 workload: Description of the workload (e.g. "Fine-tuning a 20B parameter LLM").

94 instance_type: Specific instance type(s) to consider (e.g. "p4d.24xlarge").

95 region: Specific region(s) to consider (e.g. "us-east-1").

96 gpu: Whether the workload requires GPUs.

97 min_gpus: Minimum number of GPUs required.

98 min_memory_gb: Minimum GPU memory in GB.

99 fault_tolerance: Tolerance for interruptions ("low", "medium", "high").

100 max_cost: Maximum acceptable cost per hour in USD.

101 model: Bedrock model ID to use for analysis.

102 """

103 args = ["capacity", "ai-recommend", "-w", workload]

104 if instance_type:

105 args += ["-i", instance_type]

106 if region:

107 args += ["-r", region]

108 if gpu:

109 args.append("--gpu")

110 if min_gpus > 0:

111 args += ["--min-gpus", str(min_gpus)]

112 if min_memory_gb > 0:

113 args += ["--min-memory-gb", str(min_memory_gb)]

114 if fault_tolerance != "low":

115 args += ["--fault-tolerance", fault_tolerance]

116 if max_cost is not None:

117 args += ["--max-cost", str(max_cost)]

118 if model != "anthropic.claude-sonnet-4-5-20250929-v1:0": 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true

119 args += ["--model", model]

120 return cli_runner._run_cli(*args)

121

122

123@mcp.tool(tags={"safe", "capacity"})

124@audit_logged

125def list_reservations(

126 instance_type: str | None = None,

127 region: str | None = None,

128) -> str:

129 """List On-Demand Capacity Reservations (ODCRs) across regions.

130

131 Shows all active capacity reservations with utilization details.

132

133 Args:

134 instance_type: Filter by instance type (e.g. p5.48xlarge).

135 region: Filter by specific region.

136 """

137 args = ["capacity", "reservations"]

138 if instance_type:

139 args += ["-i", instance_type]

140 if region:

141 args += ["-r", region]

142 return cli_runner._run_cli(*args)

143

144

145@mcp.tool(tags={"safe", "capacity"})

146@audit_logged

147def reservation_check(

148 instance_type: str,

149 region: str | None = None,

150 count: int = 1,

151 include_blocks: bool = True,

152 block_duration: int = 24,

153) -> str:

154 """Check reservation availability and Capacity Block offerings.

155

156 Checks both existing ODCRs and purchasable Capacity Blocks for ML

157 workloads. Capacity Blocks provide guaranteed GPU capacity for a

158 fixed duration at a known price.

159

160 Args:

161 instance_type: GPU instance type (e.g. p4d.24xlarge, p5.48xlarge).

162 region: Specific region to check (omit for all deployed regions).

163 count: Minimum number of instances needed.

164 include_blocks: Whether to include Capacity Block offerings.

165 block_duration: Capacity Block duration in hours.

166 """

167 args = ["capacity", "reservation-check", "-i", instance_type, "-c", str(count)]

168 if region:

169 args += ["-r", region]

170 if not include_blocks:

171 args.append("--no-blocks")

172 if block_duration != 24:

173 args += ["--block-duration", str(block_duration)]

174 return cli_runner._run_cli(*args)

175

176

177# Capacity Block purchasing — disabled by default.

178# Set GCO_ENABLE_CAPACITY_PURCHASE=true to enable.

179if is_enabled(FLAG_CAPACITY_PURCHASE):

180

181 @mcp.tool(tags={"cost-incurring", "capacity"})

182 @audit_logged

183 def reserve_capacity(

184 offering_id: str,

185 region: str,

186 dry_run: bool = False,

187 ) -> str:

188 """Purchase a Capacity Block offering by its ID.

189

190 Use reservation_check first to find available offerings and their IDs,

191 then purchase with this tool. Use dry_run=True to validate without purchasing.

192

193 Args:

194 offering_id: Capacity Block offering ID (cb-xxx) from reservation_check.

195 region: AWS region where the offering exists.

196 dry_run: If True, validate the offering without purchasing (no cost).

197 """

198 args = ["capacity", "reserve", "-o", offering_id, "-r", region]

199 if dry_run:

200 args.append("--dry-run")

201 return cli_runner._run_cli(*args)