Coverage for mcp/tools/capacity.py: 98%

79 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Capacity checking and recommendation MCP tools.""" 

2 

3import cli_runner 

4from audit import audit_logged 

5from feature_flags import FLAG_CAPACITY_PURCHASE, is_enabled 

6from server import mcp 

7 

8 

9@mcp.tool(tags={"safe", "capacity"}) 

10@audit_logged 

11def check_capacity(instance_type: str, region: str) -> str: 

12 """Check spot and on-demand capacity for a specific instance type. 

13 

14 Args: 

15 instance_type: EC2 instance type (e.g. g4dn.xlarge, g5.2xlarge, p4d.24xlarge). 

16 region: AWS region to check. 

17 """ 

18 return cli_runner._run_cli("capacity", "check", "-i", instance_type, "-r", region) 

19 

20 

21@mcp.tool(tags={"safe", "capacity"}) 

22@audit_logged 

23def capacity_status(region: str | None = None) -> str: 

24 """View capacity status across all deployed regions. 

25 

26 Args: 

27 region: Specific region, or omit for all regions. 

28 """ 

29 args = ["capacity", "status"] 

30 if region: 

31 args += ["-r", region] 

32 return cli_runner._run_cli(*args) 

33 

34 

35@mcp.tool(tags={"safe", "capacity"}) 

36@audit_logged 

37def recommend_region( 

38 gpu: bool = False, instance_type: str | None = None, gpu_count: int = 0 

39) -> str: 

40 """Get optimal region recommendation based on capacity. 

41 

42 Args: 

43 gpu: Whether the workload requires GPUs. 

44 instance_type: Specific instance type to check. When provided, uses weighted 

45 multi-signal scoring (spot placement scores, pricing, queue depth, etc.). 

46 gpu_count: Number of GPUs required for the workload. 

47 """ 

48 args = ["capacity", "recommend-region"] 

49 if gpu: 

50 args.append("--gpu") 

51 if instance_type: 

52 args += ["-i", instance_type] 

53 if gpu_count: 

54 args += ["--gpu-count", str(gpu_count)] 

55 return cli_runner._run_cli(*args) 

56 

57 

58@mcp.tool(tags={"safe", "capacity"}) 

59@audit_logged 

60def spot_prices(instance_type: str, region: str) -> str: 

61 """Get current spot prices for an instance type. 

62 

63 Args: 

64 instance_type: EC2 instance type. 

65 region: AWS region. 

66 """ 

67 return cli_runner._run_cli("capacity", "spot-prices", "-i", instance_type, "-r", region) 

68 

69 

70@mcp.tool(tags={"safe", "capacity"}) 

71@audit_logged 

72def ai_recommend( 

73 workload: str, 

74 instance_type: str | None = None, 

75 region: str | None = None, 

76 gpu: bool = False, 

77 min_gpus: int = 0, 

78 min_memory_gb: int = 0, 

79 fault_tolerance: str = "low", 

80 max_cost: float | None = None, 

81 model: str = "anthropic.claude-sonnet-4-5-20250929-v1:0", 

82) -> str: 

83 """Get AI-powered capacity recommendation using Amazon Bedrock. 

84 

85 Gathers comprehensive capacity data (spot scores, pricing, cluster 

86 utilization, queue depth) and sends it to an LLM for analysis. 

87 Returns a recommended region, instance type, capacity type, and reasoning. 

88 

89 Requires AWS credentials with bedrock:InvokeModel permission and the 

90 specified model enabled in your account. 

91 

92 Args: 

93 workload: Description of the workload (e.g. "Fine-tuning a 20B parameter LLM"). 

94 instance_type: Specific instance type(s) to consider (e.g. "p4d.24xlarge"). 

95 region: Specific region(s) to consider (e.g. "us-east-1"). 

96 gpu: Whether the workload requires GPUs. 

97 min_gpus: Minimum number of GPUs required. 

98 min_memory_gb: Minimum GPU memory in GB. 

99 fault_tolerance: Tolerance for interruptions ("low", "medium", "high"). 

100 max_cost: Maximum acceptable cost per hour in USD. 

101 model: Bedrock model ID to use for analysis. 

102 """ 

103 args = ["capacity", "ai-recommend", "-w", workload] 

104 if instance_type: 

105 args += ["-i", instance_type] 

106 if region: 

107 args += ["-r", region] 

108 if gpu: 

109 args.append("--gpu") 

110 if min_gpus > 0: 

111 args += ["--min-gpus", str(min_gpus)] 

112 if min_memory_gb > 0: 

113 args += ["--min-memory-gb", str(min_memory_gb)] 

114 if fault_tolerance != "low": 

115 args += ["--fault-tolerance", fault_tolerance] 

116 if max_cost is not None: 

117 args += ["--max-cost", str(max_cost)] 

118 if model != "anthropic.claude-sonnet-4-5-20250929-v1:0": 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true

119 args += ["--model", model] 

120 return cli_runner._run_cli(*args) 

121 

122 

123@mcp.tool(tags={"safe", "capacity"}) 

124@audit_logged 

125def list_reservations( 

126 instance_type: str | None = None, 

127 region: str | None = None, 

128) -> str: 

129 """List On-Demand Capacity Reservations (ODCRs) across regions. 

130 

131 Shows all active capacity reservations with utilization details. 

132 

133 Args: 

134 instance_type: Filter by instance type (e.g. p5.48xlarge). 

135 region: Filter by specific region. 

136 """ 

137 args = ["capacity", "reservations"] 

138 if instance_type: 

139 args += ["-i", instance_type] 

140 if region: 

141 args += ["-r", region] 

142 return cli_runner._run_cli(*args) 

143 

144 

145@mcp.tool(tags={"safe", "capacity"}) 

146@audit_logged 

147def reservation_check( 

148 instance_type: str, 

149 region: str | None = None, 

150 count: int = 1, 

151 include_blocks: bool = True, 

152 block_duration: int = 24, 

153) -> str: 

154 """Check reservation availability and Capacity Block offerings. 

155 

156 Checks both existing ODCRs and purchasable Capacity Blocks for ML 

157 workloads. Capacity Blocks provide guaranteed GPU capacity for a 

158 fixed duration at a known price. 

159 

160 Args: 

161 instance_type: GPU instance type (e.g. p4d.24xlarge, p5.48xlarge). 

162 region: Specific region to check (omit for all deployed regions). 

163 count: Minimum number of instances needed. 

164 include_blocks: Whether to include Capacity Block offerings. 

165 block_duration: Capacity Block duration in hours. 

166 """ 

167 args = ["capacity", "reservation-check", "-i", instance_type, "-c", str(count)] 

168 if region: 

169 args += ["-r", region] 

170 if not include_blocks: 

171 args.append("--no-blocks") 

172 if block_duration != 24: 

173 args += ["--block-duration", str(block_duration)] 

174 return cli_runner._run_cli(*args) 

175 

176 

177# Capacity Block purchasing — disabled by default. 

178# Set GCO_ENABLE_CAPACITY_PURCHASE=true to enable. 

179if is_enabled(FLAG_CAPACITY_PURCHASE): 

180 

181 @mcp.tool(tags={"cost-incurring", "capacity"}) 

182 @audit_logged 

183 def reserve_capacity( 

184 offering_id: str, 

185 region: str, 

186 dry_run: bool = False, 

187 ) -> str: 

188 """Purchase a Capacity Block offering by its ID. 

189 

190 Use reservation_check first to find available offerings and their IDs, 

191 then purchase with this tool. Use dry_run=True to validate without purchasing. 

192 

193 Args: 

194 offering_id: Capacity Block offering ID (cb-xxx) from reservation_check. 

195 region: AWS region where the offering exists. 

196 dry_run: If True, validate the offering without purchasing (no cost). 

197 """ 

198 args = ["capacity", "reserve", "-o", offering_id, "-r", region] 

199 if dry_run: 

200 args.append("--dry-run") 

201 return cli_runner._run_cli(*args)