Coverage for cli/commands/analytics_cmd.py: 90%
307 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""GCO analytics environment command group.
3Provides the ``gco analytics`` sub-commands:
5* ``enable`` / ``disable`` / ``status`` — flip the
6 ``analytics_environment.enabled`` toggle in ``cdk.json``.
7* ``users add`` / ``users list`` / ``users remove`` — manage Cognito
8 users against the auto-discovered pool id from ``gco-analytics``.
9* ``studio login`` — SRP-authenticate against Cognito and fetch a
10 SageMaker Studio presigned URL from ``/studio/login`` on the
11 existing ``gco-api-gateway``.
12* ``doctor`` — pre-flight checks before ``gco stacks deploy
13 gco-analytics``.
15The Click wiring mirrors ``stacks_cmd.py::fsx_cmd`` exactly. Every
16command delegates to helpers in :mod:`cli.analytics_user_mgmt` so the
17command layer stays thin and testable via ``click.testing.CliRunner``.
18"""
20from __future__ import annotations
22import json
23import os
24import sys
25import urllib.error
26from typing import Any
28import click
30from ..config import GCOConfig
31from ..output import get_output_formatter
33pass_config = click.make_pass_decorator(GCOConfig, ensure=True)
36_STACK_MISSING_MESSAGE = (
37 "gco-analytics stack not deployed — run `gco analytics enable` then "
38 "`gco stacks deploy gco-analytics`"
39)
42@click.group()
43@pass_config
44def analytics(config: Any) -> None:
45 """Manage the GCO analytics (SageMaker Studio + EMR) environment."""
48# ---------------------------------------------------------------------------
49# Toggle commands — enable / disable / status
50# ---------------------------------------------------------------------------
53@analytics.command("status")
54@pass_config
55def analytics_status(config: Any) -> None:
56 """Show the current analytics environment toggle state from cdk.json."""
57 from ..stacks import get_analytics_config
59 formatter = get_output_formatter(config)
60 try:
61 current = get_analytics_config()
62 formatter.print_info("Analytics environment config:")
63 formatter.print(current)
64 except Exception as exc: # noqa: BLE001 — surface every loader error
65 formatter.print_error(f"Failed to read analytics config: {exc}")
66 sys.exit(1)
69@analytics.command("enable")
70@click.option("--hyperpod", is_flag=True, help="Also enable SageMaker HyperPod job submission.")
71@click.option(
72 "--canvas",
73 is_flag=True,
74 help="Also enable the SageMaker Canvas no-code ML app.",
75)
76@click.option("--yes", "-y", is_flag=True, help="Skip confirmation.")
77@pass_config
78def analytics_enable(config: Any, hyperpod: bool, canvas: bool, yes: bool) -> None:
79 """Enable the analytics environment in cdk.json.
81 Flips ``analytics_environment.enabled`` to ``true``; ``--hyperpod``
82 additionally flips ``analytics_environment.hyperpod.enabled``, and
83 ``--canvas`` flips ``analytics_environment.canvas.enabled`` (which
84 attaches ``AmazonSageMakerCanvasFullAccess`` to the SageMaker
85 execution role). Prints the follow-up ``gco stacks deploy
86 gco-analytics`` command — does not deploy automatically.
87 """
88 from ..stacks import get_analytics_config, update_analytics_config
90 formatter = get_output_formatter(config)
92 if not yes:
93 formatter.print_info("Analytics environment will be enabled in cdk.json.")
94 if hyperpod: 94 ↛ 96line 94 didn't jump to line 96 because the condition on line 94 was always true
95 formatter.print_info(" Hyperpod sub-toggle will also be enabled.")
96 if canvas: 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true
97 formatter.print_info(" Canvas sub-toggle will also be enabled.")
98 click.confirm("\nEnable the analytics environment?", abort=True)
100 try:
101 current = get_analytics_config()
102 # Preserve everything the operator has set under ``hyperpod`` /
103 # ``canvas`` — the underlying helper replaces nested blocks
104 # wholesale, so we rebuild each sub-dict with only the field we own.
105 hyperpod_block = dict(current.get("hyperpod") or {})
106 if hyperpod:
107 hyperpod_block["enabled"] = True
108 hyperpod_block.setdefault("enabled", False)
110 canvas_block = dict(current.get("canvas") or {})
111 if canvas:
112 canvas_block["enabled"] = True
113 canvas_block.setdefault("enabled", False)
115 update_analytics_config(
116 {
117 "enabled": True,
118 "hyperpod": hyperpod_block,
119 "canvas": canvas_block,
120 }
121 )
122 formatter.print_success("Analytics environment enabled in cdk.json")
123 formatter.print_info("Run `gco stacks deploy gco-analytics` to apply changes")
124 except Exception as exc: # noqa: BLE001 — user-facing error from file I/O
125 formatter.print_error(f"Failed to enable analytics environment: {exc}")
126 sys.exit(1)
129@analytics.command("disable")
130@click.option("--yes", "-y", is_flag=True, help="Skip confirmation.")
131@pass_config
132def analytics_disable(config: Any, yes: bool) -> None:
133 """Disable the analytics environment in cdk.json.
135 Only flips ``analytics_environment.enabled`` to ``false``; the
136 ``hyperpod`` / ``canvas`` / ``cognito`` / ``efs`` sub-blocks are
137 left untouched so the operator's existing preferences survive a
138 disable/enable cycle.
139 """
140 from ..stacks import update_analytics_config
142 formatter = get_output_formatter(config)
144 if not yes:
145 formatter.print_warning("This will disable the analytics environment.")
146 formatter.print_warning(
147 "Existing SageMaker Studio / Cognito / EMR resources will be destroyed on next deploy."
148 )
149 click.confirm("Are you sure?", abort=True)
151 try:
152 update_analytics_config({"enabled": False})
153 formatter.print_success("Analytics environment disabled in cdk.json")
154 formatter.print_info("Run `gco stacks destroy gco-analytics` to tear down resources")
155 except Exception as exc: # noqa: BLE001 — user-facing error from file I/O
156 formatter.print_error(f"Failed to disable analytics environment: {exc}")
157 sys.exit(1)
160# ---------------------------------------------------------------------------
161# Users subgroup
162# ---------------------------------------------------------------------------
165@analytics.group("users")
166@pass_config
167def users_cmd(config: Any) -> None:
168 """Manage Cognito users who can sign in to SageMaker Studio."""
171def _require_cognito_pool_id(config: Any) -> tuple[str, str]:
172 """Return ``(pool_id, region)`` or exit with the documented error message."""
173 from ..analytics_user_mgmt import discover_cognito_pool_id
175 formatter = get_output_formatter(config)
176 region = config.api_gateway_region
177 pool_id = discover_cognito_pool_id(region, config.project_name)
178 if not pool_id:
179 formatter.print_error(_STACK_MISSING_MESSAGE)
180 sys.exit(1)
181 return pool_id, region
184@users_cmd.command("add")
185@click.option("--username", required=True, help="Cognito username to create.")
186@click.option("--email", help="Email address for the new user (optional).")
187@click.option(
188 "--no-email",
189 is_flag=True,
190 help="Suppress the Cognito welcome email (MessageAction=SUPPRESS).",
191)
192@click.option(
193 "--password",
194 envvar="GCO_STUDIO_PASSWORD",
195 help=(
196 "Set a permanent password via admin_set_user_password (also read "
197 "from $GCO_STUDIO_PASSWORD). Mutually exclusive with --generate-password."
198 ),
199)
200@click.option(
201 "--generate-password",
202 is_flag=True,
203 help=(
204 "Generate a strong random password, set it as permanent via "
205 "admin_set_user_password, and print it once. Mutually exclusive "
206 "with --password."
207 ),
208)
209@pass_config
210def users_add(
211 config: Any,
212 username: str,
213 email: str | None,
214 no_email: bool,
215 password: str | None,
216 generate_password: bool,
217) -> None:
218 """Create a Cognito user and print the temporary password exactly once.
220 When ``--password`` or ``--generate-password`` is passed, the user is
221 created and then has a permanent password set via
222 ``admin_set_user_password`` — this skips the ``NEW_PASSWORD_REQUIRED``
223 challenge on first login, so the resulting credentials work directly
224 with ``gco analytics studio login``.
225 """
226 from botocore.exceptions import ClientError
228 from ..analytics_user_mgmt import (
229 admin_create_user,
230 admin_set_user_password,
231 generate_strong_password,
232 )
234 formatter = get_output_formatter(config)
236 if password and generate_password:
237 formatter.print_error("--password and --generate-password are mutually exclusive")
238 sys.exit(1)
240 pool_id, region = _require_cognito_pool_id(config)
242 try:
243 _, temporary_password = admin_create_user(
244 pool_id=pool_id,
245 region=region,
246 username=username,
247 email=email,
248 suppress_email=no_email,
249 )
250 except ClientError as exc:
251 error_code = exc.response.get("Error", {}).get("Code", "Unknown")
252 formatter.print_error(f"Failed to create user {username}: {error_code}")
253 sys.exit(1)
255 formatter.print_success(f"Created Cognito user: {username}")
257 # Password path — explicit or generated — takes precedence over the
258 # temporary-password path so the resulting credentials don't get
259 # blocked by NEW_PASSWORD_REQUIRED on first sign-in.
260 if password or generate_password:
261 final_password = password or generate_strong_password()
262 try:
263 admin_set_user_password(
264 pool_id=pool_id,
265 region=region,
266 username=username,
267 password=final_password,
268 permanent=True,
269 )
270 except ClientError as exc:
271 error_code = exc.response.get("Error", {}).get("Code", "Unknown")
272 formatter.print_error(
273 f"User {username} created, but setting the password "
274 f"failed: {error_code}. Retry with "
275 "`aws cognito-idp admin-set-user-password --permanent`."
276 )
277 sys.exit(1)
279 if generate_password:
280 formatter.print_info(f"Generated password (printed exactly once): {final_password}")
281 else:
282 formatter.print_info(f"Password set (permanent) for {username}")
283 return
285 if temporary_password: 285 ↛ 286line 285 didn't jump to line 286 because the condition on line 285 was never true
286 formatter.print_info(f"Temporary password (printed exactly once): {temporary_password}")
287 else:
288 formatter.print_info(
289 "Cognito did not return a temporary password. "
290 "If --no-email was passed, set one via "
291 "`aws cognito-idp admin-set-user-password` "
292 "or re-run `gco analytics users add` with --password or --generate-password."
293 )
296@users_cmd.command("list")
297@click.option("--as-json", "as_json", is_flag=True, help="Emit JSON instead of a table.")
298@pass_config
299def users_list(config: Any, as_json: bool) -> None:
300 """List Cognito users in the analytics user pool."""
301 from botocore.exceptions import ClientError
303 from ..analytics_user_mgmt import list_users as _list_users
305 formatter = get_output_formatter(config)
306 pool_id, region = _require_cognito_pool_id(config)
308 try:
309 users = _list_users(pool_id, region)
310 except ClientError as exc:
311 error_code = exc.response.get("Error", {}).get("Code", "Unknown")
312 formatter.print_error(f"Failed to list users: {error_code}")
313 sys.exit(1)
315 if as_json:
316 print(json.dumps(users, indent=2))
317 return
318 formatter.print(users)
321@users_cmd.command("remove")
322@click.option("--username", required=True, help="Cognito username to remove.")
323@click.option("--yes", is_flag=True, help="Skip the confirmation prompt.")
324@pass_config
325def users_remove(config: Any, username: str, yes: bool) -> None:
326 """Delete a Cognito user from the analytics user pool."""
327 from botocore.exceptions import ClientError
329 from ..analytics_user_mgmt import admin_delete_user
331 formatter = get_output_formatter(config)
332 pool_id, region = _require_cognito_pool_id(config)
334 if not yes:
335 click.confirm(f"Delete Cognito user '{username}'?", abort=True)
337 try:
338 admin_delete_user(pool_id, region, username)
339 except ClientError as exc:
340 error_code = exc.response.get("Error", {}).get("Code", "Unknown")
341 formatter.print_error(f"Failed to delete user {username}: {error_code}")
342 sys.exit(1)
344 formatter.print_success(f"Deleted Cognito user: {username}")
347@users_cmd.command("set-password")
348@click.option("--username", required=True, help="Cognito username whose password to change.")
349@click.option(
350 "--password",
351 envvar="GCO_STUDIO_PASSWORD",
352 help=(
353 "New password (also read from $GCO_STUDIO_PASSWORD; prompted "
354 "otherwise). Mutually exclusive with --generate-password."
355 ),
356)
357@click.option(
358 "--generate-password",
359 is_flag=True,
360 help=(
361 "Generate a strong random password, set it, and print it once. "
362 "Mutually exclusive with --password."
363 ),
364)
365@click.option(
366 "--temporary",
367 is_flag=True,
368 help=(
369 "Set the password as temporary so the user is forced to change "
370 "it on first login (Permanent=false). Default is permanent."
371 ),
372)
373@click.option("--yes", "-y", is_flag=True, help="Skip the confirmation prompt.")
374@pass_config
375def users_set_password(
376 config: Any,
377 username: str,
378 password: str | None,
379 generate_password: bool,
380 temporary: bool,
381 yes: bool,
382) -> None:
383 """Change a Cognito user's password via AdminSetUserPassword.
385 By default the new password is marked ``Permanent=true`` so the
386 user can sign in directly with ``gco analytics studio login``
387 without the ``NEW_PASSWORD_REQUIRED`` challenge. Pass
388 ``--temporary`` to require the user to choose their own password
389 on first sign-in.
390 """
391 from botocore.exceptions import ClientError
393 from ..analytics_user_mgmt import admin_set_user_password, generate_strong_password
395 formatter = get_output_formatter(config)
397 if password and generate_password:
398 formatter.print_error("--password and --generate-password are mutually exclusive")
399 sys.exit(1)
401 pool_id, region = _require_cognito_pool_id(config)
403 if generate_password:
404 new_password = generate_strong_password()
405 elif password is not None: 405 ↛ 408line 405 didn't jump to line 408 because the condition on line 405 was always true
406 new_password = password
407 else:
408 new_password = click.prompt(
409 "New password",
410 hide_input=True,
411 confirmation_prompt=True,
412 )
414 if not yes: 414 ↛ 415line 414 didn't jump to line 415 because the condition on line 414 was never true
415 qualifier = "temporary" if temporary else "permanent"
416 click.confirm(
417 f"Set a new {qualifier} password for Cognito user '{username}'?",
418 abort=True,
419 )
421 try:
422 admin_set_user_password(
423 pool_id=pool_id,
424 region=region,
425 username=username,
426 password=new_password,
427 permanent=not temporary,
428 )
429 except ClientError as exc:
430 error_code = exc.response.get("Error", {}).get("Code", "Unknown")
431 formatter.print_error(f"Failed to set password for {username}: {error_code}")
432 sys.exit(1)
434 qualifier = "temporary" if temporary else "permanent"
435 formatter.print_success(f"Password set ({qualifier}) for {username}")
436 if generate_password:
437 formatter.print_info(f"Generated password (printed exactly once): {new_password}")
440# ---------------------------------------------------------------------------
441# Studio login subgroup
442# ---------------------------------------------------------------------------
445@analytics.group("studio")
446@pass_config
447def studio_cmd(config: Any) -> None:
448 """SageMaker Studio helpers (login, etc.)."""
451@studio_cmd.command("login")
452@click.option("--username", required=True, help="Cognito username to sign in with.")
453@click.option(
454 "--password",
455 envvar="GCO_STUDIO_PASSWORD",
456 help="Password (also read from $GCO_STUDIO_PASSWORD; prompted otherwise).",
457)
458@click.option("--api-url", help="Override the API Gateway base URL (otherwise auto-discovered).")
459@click.option("--open", "open_browser", is_flag=True, help="Open the URL in the default browser.")
460@pass_config
461def studio_login(
462 config: Any,
463 username: str,
464 password: str | None,
465 api_url: str | None,
466 open_browser: bool,
467) -> None:
468 """Sign in to SageMaker Studio via Cognito SRP and print the presigned URL."""
469 from botocore.exceptions import ClientError
471 from ..analytics_user_mgmt import (
472 discover_api_endpoint,
473 discover_cognito_client_id,
474 discover_cognito_pool_id,
475 fetch_studio_url,
476 srp_authenticate,
477 )
479 formatter = get_output_formatter(config)
480 region = config.api_gateway_region
481 project_name = config.project_name
483 pool_id = discover_cognito_pool_id(region, project_name)
484 client_id = discover_cognito_client_id(region, project_name)
485 if not pool_id or not client_id: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true
486 formatter.print_error(_STACK_MISSING_MESSAGE)
487 sys.exit(1)
489 api_base = (
490 api_url
491 or discover_api_endpoint(region, project_name)
492 or os.environ.get("GCO_API_GATEWAY_URL")
493 )
494 if not api_base:
495 formatter.print_error(
496 "Could not resolve API Gateway endpoint — pass --api-url or deploy gco-api-gateway."
497 )
498 sys.exit(1)
500 if password is None:
501 password = click.prompt("Password", hide_input=True)
503 try:
504 tokens = srp_authenticate(
505 pool_id=pool_id,
506 client_id=client_id,
507 username=username,
508 password=password,
509 region=region,
510 )
511 except ClientError as exc:
512 error_code = exc.response.get("Error", {}).get("Code", "Unknown")
513 formatter.print_error(f"Cognito authentication failed: {error_code}")
514 sys.exit(1)
516 id_token = tokens.get("IdToken")
517 if not id_token:
518 formatter.print_error("Cognito authentication failed: no IdToken returned")
519 sys.exit(1)
521 try:
522 # Poll until the Lambda returns HTTP 200 with the presigned URL.
523 # First-time logins trigger user-profile provisioning (30-60s);
524 # the Lambda returns HTTP 202 while the profile is pending.
525 import time as _time
527 max_wait = 120 # seconds
528 poll_interval = 5 # seconds
529 elapsed = 0
530 url = ""
531 expires_in = 0
533 while elapsed < max_wait: 533 ↛ 544line 533 didn't jump to line 544 because the condition on line 533 was always true
534 url, expires_in, _ = fetch_studio_url(api_base, id_token)
535 if url: 535 ↛ 538line 535 didn't jump to line 538 because the condition on line 535 was always true
536 break
537 # 202 -- profile still provisioning.
538 if elapsed == 0:
539 click.echo(" Waiting for user profile to provision...", nl=False)
540 click.echo(".", nl=False)
541 _time.sleep(poll_interval)
542 elapsed += poll_interval
544 if elapsed > 0 and url: 544 ↛ 545line 544 didn't jump to line 545 because the condition on line 544 was never true
545 click.echo(" ready")
546 elif not url: 546 ↛ 547line 546 didn't jump to line 547 because the condition on line 546 was never true
547 click.echo("")
548 formatter.print_error(
549 f"User profile did not become ready within {max_wait}s. Try again in a minute."
550 )
551 sys.exit(2)
552 except urllib.error.HTTPError as exc:
553 correlation_id = exc.headers.get("x-amzn-RequestId") if exc.headers else "N/A"
554 formatter.print_error(
555 f"login failed: HTTP {exc.code}, correlation_id={correlation_id or 'N/A'}"
556 )
557 sys.exit(2)
558 except urllib.error.URLError as exc:
559 formatter.print_error(f"login failed: network error: {exc.reason!r}")
560 sys.exit(2)
561 except ValueError as exc:
562 formatter.print_error(f"login failed: {exc}")
563 sys.exit(2)
565 # Print the URL on its own line for pipe-friendliness.
566 click.echo(url)
567 if open_browser: 567 ↛ 568line 567 didn't jump to line 568 because the condition on line 567 was never true
568 click.launch(url)
571# ---------------------------------------------------------------------------
572# Doctor subcommand
573# ---------------------------------------------------------------------------
576@analytics.command("doctor")
577@pass_config
578def analytics_doctor(config: Any) -> None:
579 """Run pre-flight checks before `gco stacks deploy gco-analytics`.
581 Exits non-zero on any failing check. Each check prints ``✓``/``✗``
582 plus a short remediation line so the operator knows exactly what
583 to fix.
584 """
585 from ..analytics_user_mgmt import (
586 check_ssm_parameter,
587 check_stack_complete,
588 scan_orphan_analytics_resources,
589 )
590 from ..config import _load_cdk_json
591 from ..stacks import _find_cdk_json
593 formatter = get_output_formatter(config)
594 any_failed = False
596 def _emit(name: str, ok: bool, remediation: str) -> None:
597 nonlocal any_failed
598 if ok:
599 click.echo(f" ✓ {name}")
600 else:
601 any_failed = True
602 click.echo(f" ✗ {name}")
603 if remediation: 603 ↛ exitline 603 didn't return from function '_emit' because the condition on line 603 was always true
604 click.echo(f" → {remediation}")
606 # 1. cdk.json parses
607 cdk_json_path = _find_cdk_json()
608 if cdk_json_path is None:
609 _emit(
610 "cdk.json present",
611 False,
612 "run `gco analytics doctor` from the project root (cdk.json not found).",
613 )
614 else:
615 try:
616 with open(cdk_json_path, encoding="utf-8") as fh:
617 json.load(fh)
618 _emit("cdk.json parses as JSON", True, "")
619 except json.JSONDecodeError as exc:
620 _emit(
621 "cdk.json parses as JSON",
622 False,
623 f"fix malformed JSON at {cdk_json_path}: {exc.msg} (line {exc.lineno})",
624 )
626 # 2. Prerequisite stacks healthy
627 for region, stack_name in (
628 (config.global_region, f"{config.project_name}-global"),
629 (config.api_gateway_region, f"{config.project_name}-api-gateway"),
630 ):
631 ok, remediation = check_stack_complete(region, stack_name)
632 _emit(
633 f"{stack_name} is CREATE_COMPLETE",
634 ok,
635 remediation or f"deploy with `gco stacks deploy {stack_name}`",
636 )
638 cdk_regions = _load_cdk_json()
639 regional_regions = cdk_regions.get("regional", []) if isinstance(cdk_regions, dict) else []
640 for region in regional_regions:
641 stack_name = f"{config.project_name}-{region}"
642 ok, remediation = check_stack_complete(region, stack_name)
643 _emit(
644 f"{stack_name} is CREATE_COMPLETE",
645 ok,
646 remediation or f"deploy with `gco stacks deploy {stack_name}`",
647 )
649 # 3. SSM cluster-shared-bucket parameters exist
650 ssm_prefix = "/gco/cluster-shared-bucket"
651 for suffix in ("name", "arn", "region"):
652 param = f"{ssm_prefix}/{suffix}"
653 ok, remediation = check_ssm_parameter(config.global_region, param)
654 _emit(
655 f"SSM parameter {param} exists",
656 ok,
657 remediation and f"deploy {config.project_name}-global first ({remediation})",
658 )
660 # 4. No orphaned retained analytics resources
661 orphan_cmds = scan_orphan_analytics_resources(config.api_gateway_region)
662 _emit(
663 "no orphaned retained analytics resources",
664 not orphan_cmds,
665 "; ".join(orphan_cmds) if orphan_cmds else "",
666 )
668 if any_failed:
669 formatter.print_error("Doctor checks failed — see remediation lines above.")
670 sys.exit(1)
671 formatter.print_success("All pre-flight checks passed.")