Coverage for mcp/run_mcp.py: 93%
128 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1#!/usr/bin/env python3
2"""
3GCO MCP Server — Exposes the GCO CLI as MCP tools for LLM interaction.
5Run with:
6 python mcp/run_mcp.py
8Add to Kiro MCP config (.kiro/settings/mcp.json):
9 {
10 "mcpServers": {
11 "gco": {
12 "command": "python3",
13 "args": ["mcp/run_mcp.py"],
14 "cwd": "/path/to/GCO"
15 }
16 }
17 }
19This file is a thin entrypoint. The actual implementation lives in the
20``mcp/`` directory:
22 mcp/
23 ├── server.py — FastMCP instance and instructions
24 ├── audit.py — Audit logging, sanitization, decorator
25 ├── iam.py — IAM role assumption
26 ├── cli_runner.py — _run_cli() subprocess wrapper
27 ├── version.py — Project version management
28 ├── tools/ — MCP tool definitions (one file per domain)
29 │ ├── jobs.py
30 │ ├── capacity.py
31 │ ├── inference.py
32 │ ├── costs.py
33 │ ├── stacks.py
34 │ ├── storage.py
35 │ └── models.py
36 └── resources/ — MCP resource definitions (one file per scheme)
37 ├── docs.py — docs:// (documentation + examples with metadata)
38 ├── source.py — source:// (full source code browser)
39 ├── k8s.py — k8s:// (cluster manifests)
40 ├── iam_policies.py — iam:// (IAM policy templates)
41 ├── infra.py — infra:// (Dockerfiles, Helm, CI/CD)
42 ├── ci.py — ci:// (GitHub Actions, workflows)
43 ├── demos.py — demos:// (walkthroughs, scripts)
44 ├── clients.py — clients:// (API client examples)
45 ├── scripts.py — scripts:// (utility scripts)
46 ├── tests.py — tests:// (test suite docs and patterns)
47 └── config.py — config:// (CDK config, feature toggles, env vars)
48"""
50import sys
51from pathlib import Path
53# Ensure the project root is on the path so CLI modules can be imported
54PROJECT_ROOT = Path(__file__).parent.parent
55sys.path.insert(0, str(PROJECT_ROOT))
57# Ensure the mcp/ directory is on the path so internal modules can import
58# each other without a package prefix (avoids shadowing the ``mcp`` PyPI
59# package that fastmcp depends on).
60MCP_DIR = Path(__file__).parent
61sys.path.insert(0, str(MCP_DIR))
63# --- Re-export everything the existing tests expect on ``run_mcp.*`` ---
65from audit import ( # noqa: E402, F401
66 _MCP_SERVER_VERSION,
67 _sanitize_arguments,
68 audit_logged,
69 audit_logger,
70 emit_startup_log,
71)
72from iam import assume_mcp_role # noqa: E402, F401
73from server import mcp # noqa: E402, F401
74from version import get_project_version # noqa: E402, F401
76# Re-export the project version for tests that check run_mcp._PROJECT_VERSION
77_PROJECT_VERSION = get_project_version()
79# --- Register all tools and resources ---
81from resources import register_all_resources # noqa: E402
82from tools import register_all_tools # noqa: E402
84register_all_tools()
85register_all_resources()
87# --- Re-export tool functions for backward compat with existing tests ---
88# Tests call e.g. run_mcp.list_jobs(), so we import them into this namespace.
90# Conditionally re-export reserve_capacity if it was registered.
91# contextlib.suppress is the idiomatic "swallow this exception" form.
92import contextlib as _contextlib # noqa: E402
93import importlib as _importlib # noqa: E402
94import os as _os # noqa: E402
96from tools.analytics import ( # noqa: E402, F401
97 analytics_doctor,
98 analytics_login_url,
99 analytics_user_add,
100 analytics_users_list,
101 disable_analytics,
102 enable_analytics,
103)
104from tools.capacity import ( # noqa: E402, F401
105 ai_recommend,
106 capacity_status,
107 check_capacity,
108 list_reservations,
109 recommend_region,
110 reservation_check,
111 spot_prices,
112)
113from tools.config import config_get # noqa: E402, F401
114from tools.costs import cost_by_region, cost_forecast, cost_summary, cost_trend # noqa: E402, F401
115from tools.dag import dag_run, dag_validate # noqa: E402, F401
116from tools.docs import find_docs # noqa: E402, F401
117from tools.examples import find_examples # noqa: E402, F401
118from tools.images import ( # noqa: E402, F401
119 images_describe,
120 images_init,
121 images_lifecycle_get,
122 images_lifecycle_set,
123 images_list,
124 images_orphans,
125 images_replication_get,
126 images_replication_status,
127 images_replication_sync,
128 images_tags,
129 images_uri,
130)
131from tools.inference import ( # noqa: E402, F401
132 canary_deploy,
133 chat_inference,
134 deploy_inference,
135 inference_health,
136 inference_status,
137 invoke_inference,
138 list_endpoint_models,
139 list_inference_endpoints,
140 promote_canary,
141 rollback_canary,
142 scale_inference,
143 start_inference,
144 stop_inference,
145 update_inference_image,
146)
147from tools.jobs import ( # noqa: E402, F401
148 cluster_health,
149 get_job,
150 get_job_events,
151 get_job_logs,
152 list_jobs,
153 queue_status,
154 submit_job_api,
155 submit_job_sqs,
156)
157from tools.models import get_model_uri, list_models # noqa: E402, F401
158from tools.nodepools import ( # noqa: E402, F401
159 nodepools_create_odcr,
160 nodepools_describe,
161 nodepools_list,
162)
163from tools.queue import queue_get, queue_list, queue_stats, queue_submit # noqa: E402, F401
164from tools.stacks import ( # noqa: E402, F401
165 aurora_status,
166 disable_aurora,
167 disable_fsx,
168 disable_valkey,
169 enable_aurora,
170 enable_fsx,
171 enable_valkey,
172 fsx_status,
173 list_stacks,
174 setup_cluster_access,
175 stack_diff,
176 stack_outputs,
177 stack_status,
178 stack_synth,
179 valkey_status,
180)
181from tools.storage import ( # noqa: E402, F401
182 files_access_points,
183 files_get,
184 list_file_systems,
185 list_storage_contents,
186)
187from tools.tasks import task_status, task_tail # noqa: E402, F401
188from tools.templates import ( # noqa: E402, F401
189 templates_create,
190 templates_get,
191 templates_list,
192 templates_run,
193)
194from tools.webhooks import webhooks_create, webhooks_get, webhooks_list # noqa: E402, F401
196with _contextlib.suppress(ImportError):
197 from tools.capacity import reserve_capacity # noqa: F401
199with _contextlib.suppress(ImportError):
200 from tools.images import images_build, images_push # noqa: F401
202with _contextlib.suppress(ImportError):
203 from tools.images import ( # noqa: F401
204 images_cleanup,
205 images_delete_repo,
206 images_delete_tag,
207 images_prune,
208 )
210with _contextlib.suppress(ImportError):
211 from tools.stacks import bootstrap_cdk, deploy_all, deploy_stack # noqa: F401
213with _contextlib.suppress(ImportError):
214 from tools.stacks import destroy_all, destroy_stack # noqa: F401
216# Destructive-operations gated tools — present only when
217# GCO_ENABLE_DESTRUCTIVE_OPERATIONS (or GCO_ENABLE_ALL_TOOLS) is set.
218with _contextlib.suppress(ImportError):
219 from tools.jobs import delete_job # noqa: F401
221with _contextlib.suppress(ImportError):
222 from tools.inference import delete_inference # noqa: F401
224with _contextlib.suppress(ImportError):
225 from tools.templates import delete_template # noqa: F401
227with _contextlib.suppress(ImportError):
228 from tools.webhooks import delete_webhook # noqa: F401
230with _contextlib.suppress(ImportError):
231 from tools.models import delete_model # noqa: F401
233with _contextlib.suppress(ImportError):
234 from tools.nodepools import delete_nodepool # noqa: F401
236with _contextlib.suppress(ImportError):
237 from tools.analytics import analytics_user_remove # noqa: F401
239with _contextlib.suppress(ImportError):
240 from tools.queue import cancel_queue_job # noqa: F401
242# Model-upload gated tool — present only when GCO_ENABLE_MODEL_UPLOAD
243# (or GCO_ENABLE_ALL_TOOLS) is set.
244with _contextlib.suppress(ImportError):
245 from tools.models import models_upload # noqa: F401
247# Also make reserve_capacity available after module reload (tests use
248# importlib.reload with GCO_ENABLE_CAPACITY_PURCHASE=true). The umbrella
249# flag GCO_ENABLE_ALL_TOOLS is also honoured here so the per-flag and the
250# umbrella both yield the same module-level rebinds.
251if (
252 _os.environ.get("GCO_ENABLE_CAPACITY_PURCHASE", "").lower() == "true"
253 or _os.environ.get("GCO_ENABLE_ALL_TOOLS", "").lower() == "true"
254):
255 from tools import capacity as _cap_mod # noqa: E402
257 _importlib.reload(_cap_mod)
258 if hasattr(_cap_mod, "reserve_capacity"): 258 ↛ 264line 258 didn't jump to line 264 because the condition on line 258 was always true
259 reserve_capacity = _cap_mod.reserve_capacity # noqa: F811
261# Reload tools.images when image-publish or destructive flags are set so
262# the gated build/push/delete tools are present after a test
263# ``importlib.reload(run_mcp)`` cycle. Mirrors the reserve_capacity pattern.
264if (
265 _os.environ.get("GCO_ENABLE_IMAGE_PUBLISH", "").lower() == "true"
266 or _os.environ.get("GCO_ENABLE_DESTRUCTIVE_OPERATIONS", "").lower() == "true"
267 or _os.environ.get("GCO_ENABLE_ALL_TOOLS", "").lower() == "true"
268):
269 from tools import images as _img_mod # noqa: E402
271 _importlib.reload(_img_mod)
272 for _name in (
273 "images_build",
274 "images_push",
275 "images_cleanup",
276 "images_prune",
277 "images_delete_tag",
278 "images_delete_repo",
279 ):
280 if hasattr(_img_mod, _name):
281 globals()[_name] = getattr(_img_mod, _name)
283# Reload tools.stacks when either infrastructure flag is set so the
284# gated deploy/destroy/bootstrap tools are present after a test
285# ``importlib.reload(run_mcp)`` cycle. Mirrors the reserve_capacity pattern.
286if (
287 _os.environ.get("GCO_ENABLE_INFRASTRUCTURE_DEPLOY", "").lower() == "true"
288 or _os.environ.get("GCO_ENABLE_INFRASTRUCTURE_DESTROY", "").lower() == "true"
289 or _os.environ.get("GCO_ENABLE_ALL_TOOLS", "").lower() == "true"
290):
291 from tools import stacks as _stacks_mod # noqa: E402
293 _importlib.reload(_stacks_mod)
294 for _name in (
295 "deploy_stack",
296 "deploy_all",
297 "bootstrap_cdk",
298 "destroy_stack",
299 "destroy_all",
300 ):
301 if hasattr(_stacks_mod, _name): 301 ↛ 294line 301 didn't jump to line 294 because the condition on line 301 was always true
302 globals()[_name] = getattr(_stacks_mod, _name)
304# Destructive-operations and model-upload gated reload blocks — mirror the
305# reserve_capacity pattern so flag-driven tests can do ``importlib.reload(
306# run_mcp)`` and have the gated names appear as module-level attributes.
307_DESTRUCTIVE_FLAG_ON = (
308 _os.environ.get("GCO_ENABLE_DESTRUCTIVE_OPERATIONS", "").lower() == "true"
309 or _os.environ.get("GCO_ENABLE_ALL_TOOLS", "").lower() == "true"
310)
311_MODEL_UPLOAD_FLAG_ON = (
312 _os.environ.get("GCO_ENABLE_MODEL_UPLOAD", "").lower() == "true"
313 or _os.environ.get("GCO_ENABLE_ALL_TOOLS", "").lower() == "true"
314)
316if _DESTRUCTIVE_FLAG_ON:
317 from tools import jobs as _jobs_mod # noqa: E402
319 _importlib.reload(_jobs_mod)
320 if hasattr(_jobs_mod, "delete_job"): 320 ↛ 323line 320 didn't jump to line 323 because the condition on line 320 was always true
321 delete_job = _jobs_mod.delete_job # noqa: F811
323 from tools import inference as _inf_mod # noqa: E402
325 _importlib.reload(_inf_mod)
326 if hasattr(_inf_mod, "delete_inference"): 326 ↛ 329line 326 didn't jump to line 329 because the condition on line 326 was always true
327 delete_inference = _inf_mod.delete_inference # noqa: F811
329 from tools import templates as _tpl_mod # noqa: E402
331 _importlib.reload(_tpl_mod)
332 if hasattr(_tpl_mod, "delete_template"): 332 ↛ 335line 332 didn't jump to line 335 because the condition on line 332 was always true
333 globals()["delete_template"] = _tpl_mod.delete_template
335 from tools import webhooks as _wh_mod # noqa: E402
337 _importlib.reload(_wh_mod)
338 if hasattr(_wh_mod, "delete_webhook"): 338 ↛ 341line 338 didn't jump to line 341 because the condition on line 338 was always true
339 globals()["delete_webhook"] = _wh_mod.delete_webhook
341 from tools import nodepools as _np_mod # noqa: E402
343 _importlib.reload(_np_mod)
344 if hasattr(_np_mod, "delete_nodepool"): 344 ↛ 347line 344 didn't jump to line 347 because the condition on line 344 was always true
345 globals()["delete_nodepool"] = _np_mod.delete_nodepool
347 from tools import analytics as _an_mod # noqa: E402
349 _importlib.reload(_an_mod)
350 if hasattr(_an_mod, "analytics_user_remove"): 350 ↛ 353line 350 didn't jump to line 353 because the condition on line 350 was always true
351 globals()["analytics_user_remove"] = _an_mod.analytics_user_remove
353 from tools import queue as _q_mod # noqa: E402
355 _importlib.reload(_q_mod)
356 if hasattr(_q_mod, "cancel_queue_job"): 356 ↛ 362line 356 didn't jump to line 362 because the condition on line 356 was always true
357 globals()["cancel_queue_job"] = _q_mod.cancel_queue_job
359# tools.models is reloaded if either the destructive flag (delete_model)
360# or the model-upload flag (models_upload) is set, so do it once here
361# regardless of which (or both) flipped.
362if _DESTRUCTIVE_FLAG_ON or _MODEL_UPLOAD_FLAG_ON:
363 from tools import models as _models_mod # noqa: E402
365 _importlib.reload(_models_mod)
366 for _name in ("delete_model", "models_upload"):
367 if hasattr(_models_mod, _name):
368 globals()[_name] = getattr(_models_mod, _name)
370# --- Re-export resource directory constants for tests ---
371from resources.ci import ( # noqa: E402, F401
372 GITHUB_ACTIONS_DIR,
373 GITHUB_CODEQL_DIR,
374 GITHUB_DIR,
375 GITHUB_ISSUE_TEMPLATE_DIR,
376 GITHUB_KIND_DIR,
377 GITHUB_SCRIPTS_DIR,
378 GITHUB_WORKFLOWS_DIR,
379)
380from resources.docs import DOCS_DIR, EXAMPLES_DIR # noqa: E402, F401
381from resources.infra import DOCKERFILES_DIR, HELM_CHARTS_FILE # noqa: E402, F401
382from resources.k8s import MANIFESTS_DIR # noqa: E402, F401
384# Declare every name that is intentionally re-exported for tests and
385# downstream consumers. This silences unused-import warnings from static
386# analyzers that don't recognise the per-line ruff markers above.
387__all__ = [
388 "DOCKERFILES_DIR",
389 "DOCS_DIR",
390 "EXAMPLES_DIR",
391 "GITHUB_ACTIONS_DIR",
392 "GITHUB_CODEQL_DIR",
393 "GITHUB_DIR",
394 "GITHUB_ISSUE_TEMPLATE_DIR",
395 "GITHUB_KIND_DIR",
396 "GITHUB_SCRIPTS_DIR",
397 "GITHUB_WORKFLOWS_DIR",
398 "HELM_CHARTS_FILE",
399 "MANIFESTS_DIR",
400 "_MCP_SERVER_VERSION",
401 "_PROJECT_VERSION",
402 "_sanitize_arguments",
403 "ai_recommend",
404 "analytics_doctor",
405 "analytics_login_url",
406 "analytics_user_add",
407 "analytics_user_remove",
408 "analytics_users_list",
409 "assume_mcp_role",
410 "audit_logged",
411 "audit_logger",
412 "aurora_status",
413 "bootstrap_cdk",
414 "canary_deploy",
415 "cancel_queue_job",
416 "capacity_status",
417 "chat_inference",
418 "check_capacity",
419 "cluster_health",
420 "config_get",
421 "cost_by_region",
422 "cost_forecast",
423 "cost_summary",
424 "cost_trend",
425 "dag_run",
426 "dag_validate",
427 "delete_inference",
428 "delete_job",
429 "delete_model",
430 "delete_nodepool",
431 "delete_template",
432 "delete_webhook",
433 "deploy_all",
434 "deploy_inference",
435 "deploy_stack",
436 "destroy_all",
437 "destroy_stack",
438 "disable_analytics",
439 "disable_aurora",
440 "disable_fsx",
441 "disable_valkey",
442 "emit_startup_log",
443 "enable_analytics",
444 "enable_aurora",
445 "enable_fsx",
446 "enable_valkey",
447 "files_access_points",
448 "files_get",
449 "find_docs",
450 "find_examples",
451 "fsx_status",
452 "get_job",
453 "get_job_events",
454 "get_job_logs",
455 "get_model_uri",
456 "get_project_version",
457 "images_build",
458 "images_cleanup",
459 "images_delete_repo",
460 "images_delete_tag",
461 "images_describe",
462 "images_init",
463 "images_lifecycle_get",
464 "images_lifecycle_set",
465 "images_list",
466 "images_orphans",
467 "images_prune",
468 "images_push",
469 "images_replication_get",
470 "images_replication_status",
471 "images_replication_sync",
472 "images_tags",
473 "images_uri",
474 "inference_health",
475 "inference_status",
476 "invoke_inference",
477 "list_endpoint_models",
478 "list_file_systems",
479 "list_inference_endpoints",
480 "list_jobs",
481 "list_models",
482 "list_reservations",
483 "list_stacks",
484 "list_storage_contents",
485 "mcp",
486 "models_upload",
487 "nodepools_create_odcr",
488 "nodepools_describe",
489 "nodepools_list",
490 "promote_canary",
491 "queue_get",
492 "queue_list",
493 "queue_stats",
494 "queue_status",
495 "queue_submit",
496 "recommend_region",
497 "reservation_check",
498 "rollback_canary",
499 "scale_inference",
500 "setup_cluster_access",
501 "spot_prices",
502 "stack_diff",
503 "stack_outputs",
504 "stack_status",
505 "stack_synth",
506 "start_inference",
507 "stop_inference",
508 "submit_job_api",
509 "submit_job_sqs",
510 "task_status",
511 "task_tail",
512 "templates_create",
513 "templates_get",
514 "templates_list",
515 "templates_run",
516 "update_inference_image",
517 "valkey_status",
518 "webhooks_create",
519 "webhooks_get",
520 "webhooks_list",
521]
523# --- Startup ---
525emit_startup_log()
527try:
528 assume_mcp_role()
529except Exception:
530 raise
532# =============================================================================
533# ENTRYPOINT
534# =============================================================================
536if __name__ == "__main__":
537 mcp.run()