Coverage for mcp/tools/stacks.py: 85%
162 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Infrastructure stack management MCP tools."""
3from __future__ import annotations
5import asyncio
6from typing import Any
8import cli_runner
9from audit import audit_logged
10from feature_flags import (
11 FLAG_INFRASTRUCTURE_DEPLOY,
12 FLAG_INFRASTRUCTURE_DESTROY,
13 is_enabled,
14)
15from server import mcp
17from tools._long_task import _run_long_task
19# FastMCP's Progress / Context dependencies are optional from this
20# module's perspective — when ``fastmcp[tasks]`` is reachable they
21# inject real instances per call; otherwise the gated long-running
22# tools still register but rely on caller-provided fakes (the test path).
23try:
24 from fastmcp.server.dependencies import CurrentContext, Progress
25except ImportError: # pragma: no cover - degraded fastmcp install
26 CurrentContext = None # type: ignore[assignment]
27 Progress = None # type: ignore[misc,assignment]
29# TaskConfig opts the gated stack-lifecycle tools into FastMCP's
30# task protocol with ``mode="optional"`` — clients that support the task
31# protocol receive a task ID immediately and poll for progress, while
32# clients without task-protocol support fall back to inline execution
33# with progress streamed through FastMCP's Progress dependency.
34# Required-mode would lock out clients that don't speak the task protocol
35# (e.g. the GCO MCP orchestrator's ``call_tool`` proxy), and these tools
36# are useful enough that the inline fallback is worth keeping.
37# If the import path moves between fastmcp versions, the tools register
38# without the task config and run synchronously.
39try:
40 from fastmcp.server.tasks.config import TaskConfig
42 _TASK_CONFIG_OPTIONAL: Any = TaskConfig(mode="optional")
43except ImportError: # pragma: no cover - degraded fastmcp install
44 _TASK_CONFIG_OPTIONAL = None
47def _expected_stack_count_for_all() -> int | None:
48 """Return the number of stacks ``deploy-all`` / ``destroy-all`` will touch.
50 Reads ``cdk.json``'s ``context.deployment_regions`` and counts the
51 fixed-position stacks (gco-global, gco-api-gateway, gco-monitoring)
52 plus one per regional region. Returns ``None`` when the config is
53 unreadable or empty so the caller falls back to indeterminate
54 progress instead of an inaccurate total.
56 The count drives ``progress.set_total(...)`` so MCP clients render
57 a real percentage during a multi-stack deploy or destroy.
58 """
59 try:
60 from cli.config import _load_cdk_json
61 except Exception: # noqa: BLE001 — best-effort
62 return None
63 try:
64 cdk_regions = _load_cdk_json()
65 except Exception: # noqa: BLE001 — best-effort
66 return None
67 if not isinstance(cdk_regions, dict): 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true
68 return None
69 regional = cdk_regions.get("regional") or []
70 if not isinstance(regional, list): 70 ↛ 71line 70 didn't jump to line 71 because the condition on line 70 was never true
71 return None
72 # Three fixed stacks (global / api-gateway / monitoring) plus one
73 # per regional region. Analytics is opt-in and omitted from the
74 # baseline count — when enabled it adds one more stack but
75 # under-reporting is preferable to over-reporting (the progress
76 # bar rolls over rather than stopping at 95 %).
77 return 3 + len(regional)
80@mcp.tool(tags={"safe", "stacks"})
81@audit_logged
82def list_stacks() -> str:
83 """List all GCO CDK stacks."""
84 return cli_runner._run_cli("stacks", "list")
87@mcp.tool(tags={"safe", "stacks"})
88@audit_logged
89def stack_status(stack_name: str, region: str) -> str:
90 """Get detailed status of a CloudFormation stack.
92 Args:
93 stack_name: Stack name (e.g. gco-us-east-1).
94 region: AWS region.
95 """
96 return cli_runner._run_cli("stacks", "status", stack_name, "-r", region)
99@mcp.tool(tags={"low-risk", "stacks"})
100@audit_logged
101def setup_cluster_access(cluster: str | None = None, region: str | None = None) -> str:
102 """Configure kubectl access to a GCO EKS cluster.
104 Updates kubeconfig, creates an EKS access entry for your IAM principal,
105 and associates the cluster admin policy. Handles assumed roles automatically.
107 Args:
108 cluster: Cluster name (default: gco-{region}).
109 region: AWS region (default: first deployment region from cdk.json).
110 """
111 args = ["stacks", "access"]
112 if cluster:
113 args.extend(["-c", cluster])
114 if region:
115 args.extend(["-r", region])
116 return cli_runner._run_cli(*args)
119@mcp.tool(tags={"safe", "stacks"})
120@audit_logged
121def fsx_status() -> str:
122 """Check FSx for Lustre configuration status."""
123 return cli_runner._run_cli("stacks", "fsx", "status")
126# =============================================================================
127# Read-only inspection tools (async)
128# =============================================================================
131@mcp.tool(tags={"safe", "stacks"})
132@audit_logged
133async def stack_diff(stack_name: str | None = None) -> str:
134 """`gco stacks diff` — show CloudFormation diff for a stack.
136 Args:
137 stack_name: Stack to diff. If omitted, diffs all stacks.
138 """
139 args = ["stacks", "diff"]
140 if stack_name:
141 args.append(stack_name)
142 return await asyncio.to_thread(cli_runner._run_cli, *args)
145@mcp.tool(tags={"safe", "stacks"})
146@audit_logged
147async def stack_outputs(stack_name: str, region: str) -> str:
148 """`gco stacks outputs` — fetch CloudFormation outputs for a stack.
150 Args:
151 stack_name: Stack name (e.g. gco-us-east-1).
152 region: AWS region.
153 """
154 return await asyncio.to_thread(
155 cli_runner._run_cli, "stacks", "outputs", stack_name, "-r", region
156 )
159@mcp.tool(tags={"safe", "stacks"})
160@audit_logged
161async def stack_synth(stack_name: str | None = None, quiet: bool = True) -> str:
162 """`gco stacks synth` — synthesize CloudFormation templates from CDK.
164 Args:
165 stack_name: Stack to synthesize. If omitted, synthesizes all stacks.
166 quiet: When True, pass ``--quiet`` to suppress verbose CDK output.
167 """
168 args = ["stacks", "synth"]
169 if stack_name:
170 args.append(stack_name)
171 if quiet:
172 args.append("--quiet")
173 return await asyncio.to_thread(cli_runner._run_cli, *args)
176@mcp.tool(tags={"safe", "stacks"})
177@audit_logged
178async def valkey_status() -> str:
179 """`gco stacks valkey status` — show Valkey cache stack status."""
180 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "valkey", "status")
183@mcp.tool(tags={"safe", "stacks"})
184@audit_logged
185async def aurora_status() -> str:
186 """`gco stacks aurora status` — show Aurora database stack status."""
187 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "aurora", "status")
190# =============================================================================
191# Mutating cdk.json toggles (low-risk)
192# =============================================================================
195@mcp.tool(tags={"low-risk", "stacks"})
196@audit_logged
197async def enable_fsx() -> str:
198 """`gco stacks fsx enable` — flip FSx Lustre on in cdk.json.
200 Note: this only edits the cdk.json toggle. The change does not take effect
201 until ``gco stacks deploy-all`` runs to provision the FSx file system.
202 """
203 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "fsx", "enable", "-y")
206@mcp.tool(tags={"low-risk", "stacks"})
207@audit_logged
208async def disable_fsx() -> str:
209 """`gco stacks fsx disable` — flip FSx Lustre off in cdk.json.
211 Note: this only edits the cdk.json toggle. The change does not take effect
212 until ``gco stacks deploy-all`` runs to remove the FSx file system.
213 """
214 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "fsx", "disable", "-y")
217@mcp.tool(tags={"low-risk", "stacks"})
218@audit_logged
219async def enable_valkey() -> str:
220 """`gco stacks valkey enable` — flip Valkey Serverless on in cdk.json.
222 Note: this only edits the cdk.json toggle. The change does not take effect
223 until ``gco stacks deploy-all`` runs to provision the Valkey cache.
224 """
225 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "valkey", "enable", "-y")
228@mcp.tool(tags={"low-risk", "stacks"})
229@audit_logged
230async def disable_valkey() -> str:
231 """`gco stacks valkey disable` — flip Valkey Serverless off in cdk.json.
233 Note: this only edits the cdk.json toggle. The change does not take effect
234 until ``gco stacks deploy-all`` runs to remove the Valkey cache.
235 """
236 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "valkey", "disable", "-y")
239@mcp.tool(tags={"low-risk", "stacks"})
240@audit_logged
241async def enable_aurora() -> str:
242 """`gco stacks aurora enable` — flip Aurora pgvector on in cdk.json.
244 Note: this only edits the cdk.json toggle. The change does not take effect
245 until ``gco stacks deploy-all`` runs to provision the Aurora cluster.
246 """
247 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "aurora", "enable", "-y")
250@mcp.tool(tags={"low-risk", "stacks"})
251@audit_logged
252async def disable_aurora() -> str:
253 """`gco stacks aurora disable` — flip Aurora pgvector off in cdk.json.
255 Note: this only edits the cdk.json toggle. The change does not take effect
256 until ``gco stacks deploy-all`` runs to remove the Aurora cluster.
257 """
258 return await asyncio.to_thread(cli_runner._run_cli, "stacks", "aurora", "disable", "-y")
261# =============================================================================
262# Long-running stack lifecycle tools — gated by GCO_ENABLE_INFRASTRUCTURE_DEPLOY
263# =============================================================================
264#
265# deploy_stack / deploy_all / bootstrap_cdk drive CDK lifecycle operations
266# that exceed the short-running ``cli_runner._run_cli`` 120-second timeout.
267# They run via ``_run_long_task`` so progress streams back through the
268# FastMCP Progress dependency and clients can poll task status through
269# the standard MCP task protocol.
271if is_enabled(FLAG_INFRASTRUCTURE_DEPLOY):
272 # Build the decorator kwargs dict so we only pass ``task=...`` when
273 # TaskConfig was importable on this fastmcp version.
274 _deploy_decorator_kwargs: dict[str, Any] = {"tags": {"infrastructure", "stacks"}}
275 if _TASK_CONFIG_OPTIONAL is not None: 275 ↛ 278line 275 didn't jump to line 278 because the condition on line 275 was always true
276 _deploy_decorator_kwargs["task"] = _TASK_CONFIG_OPTIONAL
278 if Progress is not None and CurrentContext is not None: 278 ↛ 425line 278 didn't jump to line 425 because the condition on line 278 was always true
280 @mcp.tool(**_deploy_decorator_kwargs) # type: ignore[untyped-decorator]
281 @audit_logged
282 async def deploy_stack(
283 stack_name: str,
284 yes: bool = True,
285 outputs_file: str | None = None,
286 tags: list[str] | None = None,
287 *,
288 ctx: Any = CurrentContext(),
289 progress: Any = Progress(),
290 ) -> str:
291 """[gated by GCO_ENABLE_INFRASTRUCTURE_DEPLOY] long-running.
293 `gco stacks deploy` — deploy a single CDK stack to AWS.
295 Typical wall-clock: 15-30 minutes per regional stack. Clients that
296 speak FastMCP's task protocol can receive a task ID immediately
297 and poll `tasks://gco/{task_id}` for progress; clients that don't
298 run the tool inline with progress streamed through the FastMCP
299 Progress dependency. Cancellation sends SIGTERM to the running
300 CDK process and partial CloudFormation state may remain — inspect
301 via stack_status or the AWS console.
303 Args:
304 stack_name: Stack to deploy (e.g. ``gco-us-east-1``).
305 yes: Skip approval prompts (passes ``-y``). Defaults to True.
306 outputs_file: Optional path to write stack outputs JSON.
307 tags: Optional list of ``key=value`` tag strings applied to the stack.
308 """
309 argv = [
310 "gco",
311 "stacks",
312 "deploy",
313 stack_name,
314 ]
315 if yes:
316 argv.append("-y")
317 if outputs_file:
318 argv += ["--outputs-file", outputs_file]
319 for tag in tags or []:
320 argv += ["--tag", tag]
321 return await _run_long_task(
322 argv,
323 ctx=ctx,
324 progress=progress,
325 is_stack_op=True,
326 total_units=1,
327 )
329 @mcp.tool(**_deploy_decorator_kwargs) # type: ignore[untyped-decorator]
330 @audit_logged
331 async def deploy_all(
332 yes: bool = True,
333 outputs_file: str | None = None,
334 tags: list[str] | None = None,
335 parallel: bool = False,
336 max_workers: int | None = None,
337 *,
338 ctx: Any = CurrentContext(),
339 progress: Any = Progress(),
340 ) -> str:
341 """[gated by GCO_ENABLE_INFRASTRUCTURE_DEPLOY] long-running.
343 `gco stacks deploy-all` — deploy every CDK stack in dependency order.
345 Typical wall-clock: 30-60 minutes for a fresh multi-region deploy.
346 Clients that speak FastMCP's task protocol can receive a task ID
347 immediately and poll `tasks://gco/{task_id}` for progress; clients
348 that don't run the tool inline with progress streamed through the
349 FastMCP Progress dependency. Cancellation sends SIGTERM to the
350 running CDK process and partial CloudFormation state may remain —
351 inspect via stack_status or the AWS console.
353 Args:
354 yes: Skip approval prompts (passes ``-y``). Defaults to True.
355 outputs_file: Optional path to write stack outputs JSON.
356 tags: Optional list of ``key=value`` tag strings applied to every stack.
357 parallel: Deploy regional stacks concurrently when True.
358 max_workers: Cap on parallel deployments when ``parallel=True``.
359 """
360 argv = [
361 "gco",
362 "stacks",
363 "deploy-all",
364 ]
365 if yes: 365 ↛ 367line 365 didn't jump to line 367 because the condition on line 365 was always true
366 argv.append("-y")
367 if outputs_file: 367 ↛ 368line 367 didn't jump to line 368 because the condition on line 367 was never true
368 argv += ["--outputs-file", outputs_file]
369 for tag in tags or []: 369 ↛ 370line 369 didn't jump to line 370 because the loop on line 369 never started
370 argv += ["--tag", tag]
371 if parallel: 371 ↛ 373line 371 didn't jump to line 373 because the condition on line 371 was always true
372 argv.append("--parallel")
373 if max_workers is not None: 373 ↛ 375line 373 didn't jump to line 375 because the condition on line 373 was always true
374 argv += ["--max-workers", str(max_workers)]
375 return await _run_long_task(
376 argv,
377 ctx=ctx,
378 progress=progress,
379 is_stack_op=True,
380 total_units=_expected_stack_count_for_all(),
381 )
383 @mcp.tool(**_deploy_decorator_kwargs) # type: ignore[untyped-decorator]
384 @audit_logged
385 async def bootstrap_cdk(
386 region: str,
387 account: str | None = None,
388 *,
389 ctx: Any = CurrentContext(),
390 progress: Any = Progress(),
391 ) -> str:
392 """[gated by GCO_ENABLE_INFRASTRUCTURE_DEPLOY] long-running.
394 `gco stacks bootstrap` — bootstrap CDK in an AWS account/region.
396 Typical wall-clock: 2-5 minutes. Required before any stack can be
397 deployed to a new account/region. Clients that speak FastMCP's
398 task protocol can receive a task ID immediately and poll
399 `tasks://gco/{task_id}` for progress; clients that don't run the
400 tool inline with progress streamed through the FastMCP Progress
401 dependency. Cancellation sends SIGTERM to the running CDK process
402 and partial CloudFormation state may remain — inspect via
403 stack_status or the AWS console.
405 Args:
406 region: Target AWS region.
407 account: Optional AWS account ID. Defaults to the caller's account.
408 """
409 argv = ["gco", "stacks", "bootstrap", "--region", region]
410 if account:
411 argv += ["--account", account]
412 return await _run_long_task(
413 argv,
414 ctx=ctx,
415 progress=progress,
416 is_stack_op=True,
417 total_units=1,
418 )
421# =============================================================================
422# Long-running stack lifecycle tools — gated by GCO_ENABLE_INFRASTRUCTURE_DESTROY
423# =============================================================================
425if is_enabled(FLAG_INFRASTRUCTURE_DESTROY):
426 _destroy_decorator_kwargs: dict[str, Any] = {"tags": {"infrastructure", "stacks"}}
427 if _TASK_CONFIG_OPTIONAL is not None: 427 ↛ 430line 427 didn't jump to line 430 because the condition on line 427 was always true
428 _destroy_decorator_kwargs["task"] = _TASK_CONFIG_OPTIONAL
430 if Progress is not None and CurrentContext is not None: 430 ↛ exitline 430 didn't exit the module because the condition on line 430 was always true
432 @mcp.tool(**_destroy_decorator_kwargs) # type: ignore[untyped-decorator]
433 @audit_logged
434 async def destroy_stack(
435 stack_name: str,
436 yes: bool = True,
437 *,
438 ctx: Any = CurrentContext(),
439 progress: Any = Progress(),
440 ) -> str:
441 """[gated by GCO_ENABLE_INFRASTRUCTURE_DESTROY] long-running.
443 `gco stacks destroy` — destroy a single CDK stack.
445 Typical wall-clock: 5-20 minutes per stack. Clients that speak
446 FastMCP's task protocol can receive a task ID immediately and
447 poll `tasks://gco/{task_id}` for progress; clients that don't
448 run the tool inline with progress streamed through the FastMCP
449 Progress dependency. Cancellation sends SIGTERM to the running
450 CDK process and partial CloudFormation state may remain —
451 inspect via stack_status or the AWS console before retrying.
453 Args:
454 stack_name: Stack to destroy (e.g. ``gco-us-east-1``).
455 yes: Skip the confirmation prompt (passes ``-y``). Defaults to True.
456 """
457 argv = ["gco", "stacks", "destroy", stack_name]
458 if yes:
459 argv.append("-y")
460 return await _run_long_task(
461 argv,
462 ctx=ctx,
463 progress=progress,
464 is_stack_op=True,
465 total_units=1,
466 )
468 @mcp.tool(**_destroy_decorator_kwargs) # type: ignore[untyped-decorator]
469 @audit_logged
470 async def destroy_all(
471 yes: bool = True,
472 parallel: bool = False,
473 max_workers: int | None = None,
474 *,
475 ctx: Any = CurrentContext(),
476 progress: Any = Progress(),
477 ) -> str:
478 """[gated by GCO_ENABLE_INFRASTRUCTURE_DESTROY] long-running.
480 `gco stacks destroy-all` — destroy every CDK stack in reverse dependency order.
482 Typical wall-clock: 20-40 minutes for a multi-region teardown.
483 Clients that speak FastMCP's task protocol can receive a task
484 ID immediately and poll `tasks://gco/{task_id}` for progress;
485 clients that don't run the tool inline with progress streamed
486 through the FastMCP Progress dependency. Cancellation sends
487 SIGTERM to the running CDK process and partial CloudFormation
488 state may remain — inspect via stack_status or the AWS console
489 before retrying.
491 Args:
492 yes: Skip the confirmation prompt (passes ``-y``). Defaults to True.
493 parallel: Destroy regional stacks concurrently when True.
494 max_workers: Cap on parallel destructions when ``parallel=True``.
495 """
496 argv = ["gco", "stacks", "destroy-all"]
497 if yes: 497 ↛ 499line 497 didn't jump to line 499 because the condition on line 497 was always true
498 argv.append("-y")
499 if parallel: 499 ↛ 501line 499 didn't jump to line 501 because the condition on line 499 was always true
500 argv.append("--parallel")
501 if max_workers is not None: 501 ↛ 503line 501 didn't jump to line 503 because the condition on line 501 was always true
502 argv += ["--max-workers", str(max_workers)]
503 return await _run_long_task(
504 argv,
505 ctx=ctx,
506 progress=progress,
507 is_stack_op=True,
508 total_units=_expected_stack_count_for_all(),
509 )