Coverage for cli/commands/stacks_cmd.py: 87%
520 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""Stack deployment and management commands."""
3import sys
4from typing import Any
6import click
8from ..config import GCOConfig
9from ..output import get_output_formatter
11pass_config = click.make_pass_decorator(GCOConfig, ensure=True)
14@click.group()
15@pass_config
16def stacks(config: Any) -> None:
17 """Deploy and manage GCO CDK stacks."""
18 pass
21@stacks.command("list")
22@click.option("--refresh", is_flag=True, help="Force refresh from AWS")
23@pass_config
24def list_stacks(config: Any, refresh: Any) -> None:
25 """List all GCO stacks (local CDK and deployed)."""
26 from ..stacks import get_stack_manager
28 formatter = get_output_formatter(config)
30 try:
31 manager = get_stack_manager(config)
32 local_stacks = manager.list_stacks()
34 formatter.print_info("Available CDK stacks:")
35 for stack in local_stacks:
36 print(f" - {stack}")
38 except Exception as e:
39 formatter.print_error(f"Failed to list stacks: {e}")
40 sys.exit(1)
43@stacks.command("synth")
44@click.argument("stack_name", required=False)
45@click.option("--quiet", "-q", is_flag=True, default=True, help="Quiet output")
46@pass_config
47def synth_stack(config: Any, stack_name: Any, quiet: Any) -> None:
48 """Synthesize CloudFormation templates."""
49 from ..stacks import get_stack_manager
51 formatter = get_output_formatter(config)
53 try:
54 manager = get_stack_manager(config)
55 output = manager.synth(stack_name, quiet=quiet)
56 if output: 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true
57 print(output)
58 formatter.print_success("CDK synthesis completed")
59 except Exception as e:
60 formatter.print_error(f"CDK synth failed: {e}")
61 sys.exit(1)
64@stacks.command("diff")
65@click.argument("stack_name", required=False)
66@pass_config
67def diff_stack(config: Any, stack_name: Any) -> None:
68 """Show differences between deployed and local stacks."""
69 from ..stacks import get_stack_manager
71 formatter = get_output_formatter(config)
73 try:
74 manager = get_stack_manager(config)
75 diff_output = manager.diff(stack_name)
76 if diff_output:
77 print(diff_output)
78 else:
79 formatter.print_success("No differences found")
80 except Exception as e:
81 formatter.print_error(f"CDK diff failed: {e}")
82 sys.exit(1)
85@stacks.command("deploy")
86@click.argument("stack_name")
87@click.option("--yes", "-y", is_flag=True, help="Skip approval prompts")
88@click.option("--outputs-file", "-o", help="Write outputs to file")
89@click.option("--tag", "-t", multiple=True, help="Add tags (key=value)")
90@pass_config
91def deploy_stack(config: Any, stack_name: Any, yes: Any, outputs_file: Any, tag: Any) -> None:
92 """Deploy a single CDK stack to AWS.
94 For deploying all stacks in the correct order, use 'deploy-all'.
96 Examples:
97 gco stacks deploy gco-us-east-1
98 gco stacks deploy gco-global -y
99 gco stacks deploy gco-us-east-1 -t Environment=prod
100 """
101 from ..stacks import get_stack_manager
103 formatter = get_output_formatter(config)
105 # Parse tags
106 tags = {}
107 for t in tag: 107 ↛ 108line 107 didn't jump to line 108 because the loop on line 107 never started
108 if "=" in t:
109 k, v = t.split("=", 1)
110 tags[k] = v
112 try:
113 manager = get_stack_manager(config)
115 formatter.print_info(f"Deploying {stack_name}...")
117 success = manager.deploy(
118 stack_name=stack_name,
119 require_approval=not yes,
120 outputs_file=outputs_file,
121 tags=tags if tags else None,
122 )
124 if success:
125 formatter.print_success("Deployment completed successfully")
126 else:
127 formatter.print_error("Deployment failed")
128 sys.exit(1)
130 except Exception as e:
131 formatter.print_error(f"Deployment failed: {e}")
132 sys.exit(1)
135@stacks.command("destroy")
136@click.argument("stack_name")
137@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
138@pass_config
139def destroy_stack(config: Any, stack_name: Any, yes: Any) -> None:
140 """Destroy a single CDK stack.
142 For destroying all stacks in the correct order, use 'destroy-all'.
144 Examples:
145 gco stacks destroy gco-us-east-1
146 gco stacks destroy gco-us-east-1 -y
147 """
148 from ..stacks import get_stack_manager
150 formatter = get_output_formatter(config)
152 if not yes: 152 ↛ 153line 152 didn't jump to line 153 because the condition on line 152 was never true
153 click.confirm(f"Are you sure you want to destroy {stack_name}?", abort=True)
155 try:
156 manager = get_stack_manager(config)
158 formatter.print_info(f"Destroying {stack_name}...")
160 success = manager.destroy(
161 stack_name=stack_name,
162 force=yes,
163 )
165 if success:
166 formatter.print_success(f"Stack {stack_name} destroyed successfully")
167 else:
168 formatter.print_error("Destroy failed")
169 sys.exit(1)
171 except Exception as e:
172 formatter.print_error(f"Destroy failed: {e}")
173 sys.exit(1)
176@stacks.command("deploy-all")
177@click.option("--yes", "-y", is_flag=True, help="Skip approval prompts")
178@click.option("--outputs-file", "-o", help="Write outputs to file")
179@click.option("--tag", "-t", multiple=True, help="Add tags (key=value)")
180@click.option("--parallel", "-p", is_flag=True, help="Deploy regional stacks in parallel")
181@click.option("--max-workers", "-w", default=4, help="Max parallel deployments (default: 4)")
182@pass_config
183def deploy_all_orchestrated(
184 config: Any, yes: Any, outputs_file: Any, tag: Any, parallel: Any, max_workers: Any
185) -> None:
186 """Deploy all stacks in the correct order.
188 Deploys in three phases:
189 1. Global stacks (gco-global, gco-api-gateway)
190 2. Regional stacks (gco-us-east-1, etc.) - can be parallelized
191 3. Monitoring stack (gco-monitoring) - depends on regional stacks
193 Use --parallel to deploy regional stacks concurrently, which can
194 significantly reduce total deployment time when deploying to
195 multiple regions.
197 Examples:
198 gco stacks deploy-all -y
199 gco stacks deploy-all -y --parallel
200 gco stacks deploy-all -y -p --max-workers 8
201 gco stacks deploy-all -y -t Environment=prod
202 """
203 from ..stacks import get_stack_manager
205 formatter = get_output_formatter(config)
207 # Parse tags
208 tags = {}
209 for t in tag:
210 if "=" in t: 210 ↛ 209line 210 didn't jump to line 209 because the condition on line 210 was always true
211 k, v = t.split("=", 1)
212 tags[k] = v
214 try:
215 manager = get_stack_manager(config)
216 stacks = manager.list_stacks()
218 formatter.print_info(f"Found {len(stacks)} stacks to deploy")
219 if parallel:
220 formatter.print_info(f"Parallel mode enabled (max workers: {max_workers})")
222 def on_start(stack_name: str) -> None:
223 formatter.print_info(f"Deploying {stack_name}...")
225 def on_complete(stack_name: str, success: bool) -> None:
226 if success:
227 formatter.print_success(f" ✓ {stack_name} deployed")
228 else:
229 formatter.print_error(f" ✗ {stack_name} failed")
231 success, successful, failed = manager.deploy_orchestrated(
232 require_approval=not yes,
233 outputs_file=outputs_file,
234 tags=tags if tags else None,
235 on_stack_start=on_start,
236 on_stack_complete=on_complete,
237 parallel=parallel,
238 max_workers=max_workers,
239 )
241 formatter.print_info("")
242 formatter.print_info(f"Deployed: {len(successful)}/{len(stacks)} stacks")
244 if success:
245 formatter.print_success("All stacks deployed successfully")
246 else:
247 formatter.print_error(f"Deployment failed. Failed stacks: {', '.join(failed)}")
248 sys.exit(1)
250 except Exception as e:
251 formatter.print_error(f"Deployment failed: {e}")
252 sys.exit(1)
255@stacks.command("destroy-all")
256@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
257@click.option("--parallel", "-p", is_flag=True, help="Destroy regional stacks in parallel")
258@click.option("--max-workers", "-w", default=4, help="Max parallel destructions (default: 4)")
259@pass_config
260def destroy_all_orchestrated(config: Any, yes: Any, parallel: Any, max_workers: Any) -> None:
261 """Destroy all stacks in the correct order.
263 Destroys in three phases:
264 1. Monitoring stack (gco-monitoring)
265 2. Regional stacks (gco-us-east-1, etc.) - can be parallelized
266 3. Global stacks (gco-api-gateway, gco-global)
268 Automatically retries up to 3 times (with 30s waits) if any stacks fail,
269 which handles transient issues like orphaned resources during teardown.
271 Use --parallel to destroy regional stacks concurrently, which can
272 significantly reduce total teardown time when destroying multiple
273 regional stacks.
275 Examples:
276 gco stacks destroy-all -y
277 gco stacks destroy-all -y --parallel
278 gco stacks destroy-all -y -p --max-workers 8
279 """
280 import time
282 from ..stacks import get_stack_destroy_order, get_stack_manager
284 formatter = get_output_formatter(config)
285 # Retry up to 3 times total. CloudFormation stack deletions can fail
286 # transiently — e.g., EKS leaves behind a cluster security group that
287 # blocks VPC deletion, but it gets cleaned up async. A 30-second wait
288 # between attempts is usually enough for the orphaned resources to clear.
289 max_attempts = 3
291 try:
292 manager = get_stack_manager(config)
293 stacks = manager.list_stacks()
294 ordered = get_stack_destroy_order(stacks)
296 if not yes: 296 ↛ 297line 296 didn't jump to line 297 because the condition on line 296 was never true
297 formatter.print_warning("This will destroy ALL GCO stacks:")
298 for stack in ordered:
299 formatter.print_info(f" - {stack}")
300 click.confirm("\nAre you sure you want to destroy all stacks?", abort=True)
302 total_stacks = len(stacks)
304 for attempt in range(1, max_attempts + 1):
305 if attempt > 1:
306 # Clean up EKS-managed security groups between retries.
307 # After the first attempt, the EKS cluster is deleted but its
308 # security group (eks-cluster-sg-*) may linger and block VPC deletion.
309 formatter.print_info("Cleaning up orphaned EKS resources...")
310 manager.cleanup_eks_security_groups()
311 formatter.print_warning(
312 f"Attempt {attempt}/{max_attempts}: waiting 30 seconds before retrying..."
313 )
314 time.sleep(30)
316 formatter.print_info(f"Destroying {len(stacks)} stacks...")
317 if parallel:
318 formatter.print_info(f"Parallel mode enabled (max workers: {max_workers})")
320 def on_start(stack_name: str) -> None:
321 formatter.print_info(f"Destroying {stack_name}...")
323 def on_complete(stack_name: str, success: bool) -> None:
324 if success:
325 formatter.print_success(f" ✓ {stack_name} destroyed")
326 else:
327 formatter.print_error(f" ✗ {stack_name} failed")
329 success, successful, failed = manager.destroy_orchestrated(
330 force=True,
331 on_stack_start=on_start,
332 on_stack_complete=on_complete,
333 parallel=parallel,
334 max_workers=max_workers,
335 )
337 if success:
338 break
340 if attempt < max_attempts:
341 formatter.print_warning(f"{len(failed)} stack(s) failed: {', '.join(failed)}")
343 formatter.print_info("")
344 formatter.print_info(f"Destroyed: {total_stacks - len(failed)}/{total_stacks} stacks")
346 if success:
347 formatter.print_success("All stacks destroyed successfully")
348 else:
349 formatter.print_error(f"Some stacks failed to destroy: {', '.join(failed)}")
350 sys.exit(1)
352 except Exception as e:
353 formatter.print_error(f"Destroy failed: {e}")
354 sys.exit(1)
357@stacks.command("bootstrap")
358@click.option("--account", "-a", help="AWS account ID")
359@click.option("--region", "-r", required=True, help="AWS region")
360@pass_config
361def bootstrap_cdk(config: Any, account: Any, region: Any) -> None:
362 """Bootstrap CDK in an AWS account/region.
364 This is required before deploying stacks to a new account/region.
366 Example:
367 gco stacks bootstrap --region us-east-1
368 gco stacks bootstrap -a 123456789012 -r eu-west-1
369 """
370 from ..stacks import get_stack_manager
372 formatter = get_output_formatter(config)
374 try:
375 manager = get_stack_manager(config)
376 formatter.print_info(f"Bootstrapping CDK in {region}...")
378 success = manager.bootstrap(account=account, region=region)
380 if success:
381 formatter.print_success(f"CDK bootstrapped in {region}")
382 else:
383 formatter.print_error("Bootstrap failed")
384 sys.exit(1)
386 except Exception as e:
387 formatter.print_error(f"Bootstrap failed: {e}")
388 sys.exit(1)
391@stacks.command("status")
392@click.argument("stack_name")
393@click.option("--region", "-r", required=True, help="AWS region")
394@pass_config
395def stack_status(config: Any, stack_name: Any, region: Any) -> None:
396 """Get detailed status of a deployed stack."""
397 from ..stacks import get_stack_manager
399 formatter = get_output_formatter(config)
401 try:
402 manager = get_stack_manager(config)
403 status = manager.get_stack_status(stack_name, region)
405 if status:
406 formatter.print(status.to_dict())
407 else:
408 formatter.print_error(f"Stack {stack_name} not found in {region}")
409 sys.exit(1)
411 except Exception as e:
412 formatter.print_error(f"Failed to get stack status: {e}")
413 sys.exit(1)
416@stacks.command("outputs")
417@click.argument("stack_name")
418@click.option("--region", "-r", required=True, help="AWS region")
419@pass_config
420def stack_outputs(config: Any, stack_name: Any, region: Any) -> None:
421 """Get outputs from a deployed stack."""
422 from ..stacks import get_stack_manager
424 formatter = get_output_formatter(config)
426 try:
427 manager = get_stack_manager(config)
428 outputs = manager.get_outputs(stack_name, region)
430 if outputs:
431 formatter.print(outputs)
432 else:
433 formatter.print_warning(f"No outputs found for {stack_name}")
435 except Exception as e:
436 formatter.print_error(f"Failed to get outputs: {e}")
437 sys.exit(1)
440@stacks.command("access")
441@click.option("--cluster", "-c", help="Cluster name (default: gco-{region})")
442@click.option("--region", "-r", help="AWS region (default: first deployment region)")
443@pass_config
444def setup_access(config: Any, cluster: Any, region: Any) -> None:
445 """Configure kubectl access to a GCO EKS cluster.
447 Updates kubeconfig, creates an EKS access entry for your IAM principal,
448 and associates the cluster admin policy. Handles assumed roles automatically.
450 Examples:
451 gco stacks access
452 gco stacks access -r us-west-2
453 gco stacks access -c my-cluster -r eu-west-1
454 """
455 import subprocess
457 from ..config import _load_cdk_json
459 formatter = get_output_formatter(config)
461 # Determine region
462 if not region:
463 cdk_regions = _load_cdk_json()
464 if cdk_regions and "regional" in cdk_regions: 464 ↛ 467line 464 didn't jump to line 467 because the condition on line 464 was always true
465 region = cdk_regions["regional"][0]
466 else:
467 region = config.default_region or "us-east-1"
469 # Determine cluster name
470 if not cluster:
471 cluster = f"gco-{region}"
473 formatter.print_info(f"Setting up access to cluster: {cluster} in region: {region}")
475 # Cluster endpoint access mode — warn early if the API server is
476 # private-only, since every kubectl call from outside the VPC will
477 # fail. We still try every step so the access entry + policy
478 # association land (those use the EKS control plane via boto3,
479 # which doesn't go through the cluster endpoint), but the verify
480 # step at the end will hit a connection timeout from the laptop.
481 private_endpoint_only = False
482 public_cidrs: list[str] = []
483 try:
484 endpoint_check = subprocess.run(
485 [
486 "aws",
487 "eks",
488 "describe-cluster",
489 "--name",
490 cluster,
491 "--region",
492 region,
493 "--query",
494 # Explicit ``+`` rather than implicit string concatenation
495 # so static analysers don't flag the multi-line literal as
496 # a possibly-missing comma between two list elements. The
497 # value is one JMESPath expression passed as a single
498 # ``--query`` argument.
499 "cluster.resourcesVpcConfig.{public:endpointPublicAccess,"
500 + "private:endpointPrivateAccess,publicCidrs:publicAccessCidrs}",
501 "--output",
502 "json",
503 ],
504 check=True,
505 capture_output=True,
506 text=True,
507 )
508 import json
510 endpoint_cfg = json.loads(endpoint_check.stdout or "{}")
511 is_public = bool(endpoint_cfg.get("public"))
512 public_cidrs = endpoint_cfg.get("publicCidrs") or []
513 if not is_public:
514 private_endpoint_only = True
515 formatter.print_warning(
516 f"Cluster {cluster!r} has endpointPublicAccess=false — kubectl from "
517 "outside the VPC will not be able to reach the API server. The access "
518 "entry and policy association below still apply, but the verify step "
519 "at the end will time out from this host."
520 )
521 formatter.print_warning(
522 "To enable kubectl from your laptop or CI runner, set "
523 '``eks_cluster.endpoint_access`` to ``"PUBLIC_AND_PRIVATE"`` in '
524 "``cdk.json`` and redeploy the regional stack: ``gco stacks deploy "
525 f"gco-{region} -y``."
526 )
527 elif public_cidrs:
528 # Public access is on but restricted to a CIDR allowlist — the
529 # caller's IP may or may not be in it.
530 formatter.print_info(
531 "Cluster API endpoint is public+private with a CIDR allowlist; "
532 f"verify your egress IP is covered by one of: {', '.join(public_cidrs)}"
533 )
534 except (subprocess.CalledProcessError, FileNotFoundError) as exc:
535 # Don't block setup if describe-cluster fails — the access steps
536 # below may still succeed (e.g. for a brand new cluster the caller
537 # already has permission to update).
538 formatter.print_info(f"Could not determine endpoint access mode: {exc}")
540 try:
541 # Step 1: Update kubeconfig
542 formatter.print_info("Updating kubeconfig...")
543 subprocess.run(
544 ["aws", "eks", "update-kubeconfig", "--name", cluster, "--region", region],
545 check=True,
546 capture_output=True,
547 text=True,
548 )
550 # Step 2: Get IAM principal
551 formatter.print_info("Getting your IAM principal...")
552 result = subprocess.run(
553 ["aws", "sts", "get-caller-identity", "--query", "Arn", "--output", "text"],
554 check=True,
555 capture_output=True,
556 text=True,
557 )
558 principal_arn = result.stdout.strip()
559 formatter.print_info(f"Principal: {principal_arn}")
561 # Handle assumed roles — extract the role ARN from the assumed-role ARN
562 if ":assumed-role/" in principal_arn:
563 import re
565 role_name = re.search(r":assumed-role/([^/]+)/", principal_arn)
566 if role_name: 566 ↛ 586line 566 didn't jump to line 586 because the condition on line 566 was always true
567 account_result = subprocess.run(
568 [
569 "aws",
570 "sts",
571 "get-caller-identity",
572 "--query",
573 "Account",
574 "--output",
575 "text",
576 ],
577 check=True,
578 capture_output=True,
579 text=True,
580 )
581 account_id = account_result.stdout.strip()
582 principal_arn = f"arn:aws:iam::{account_id}:role/{role_name.group(1)}"
583 formatter.print_info(f"Using role ARN: {principal_arn}")
585 # Step 3: Create access entry
586 formatter.print_info("Creating EKS access entry...")
587 try:
588 subprocess.run(
589 [
590 "aws",
591 "eks",
592 "create-access-entry",
593 "--cluster-name",
594 cluster,
595 "--region",
596 region,
597 "--principal-arn",
598 principal_arn,
599 ],
600 check=True,
601 capture_output=True,
602 text=True,
603 )
604 except subprocess.CalledProcessError:
605 formatter.print_info("Access entry may already exist")
607 # Step 4: Associate admin policy
608 formatter.print_info("Associating cluster admin policy...")
609 try:
610 subprocess.run(
611 [
612 "aws",
613 "eks",
614 "associate-access-policy",
615 "--cluster-name",
616 cluster,
617 "--region",
618 region,
619 "--principal-arn",
620 principal_arn,
621 "--policy-arn",
622 "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy",
623 "--access-scope",
624 "type=cluster",
625 ],
626 check=True,
627 capture_output=True,
628 text=True,
629 )
630 except subprocess.CalledProcessError:
631 formatter.print_info("Policy may already be associated")
633 # Step 5: Verify access
634 formatter.print_info("Waiting for permissions to propagate...")
635 import time
637 time.sleep(10)
639 result = subprocess.run(
640 ["kubectl", "get", "nodes", "--request-timeout=10s"],
641 capture_output=True,
642 text=True,
643 )
644 if result.returncode == 0:
645 node_count = len(
646 [line for line in result.stdout.strip().split("\n")[1:] if line.strip()]
647 )
648 print(result.stdout)
649 formatter.print_info(f"Access configured successfully. {node_count} node(s) ready.")
650 elif private_endpoint_only:
651 # Don't double-warn — we already explained this above. Just
652 # restate the fix so the operator doesn't have to scroll up.
653 formatter.print_warning(
654 "kubectl could not reach the API server, as expected for a "
655 "private-only cluster from outside the VPC. The IAM access entry "
656 "and admin policy association above did succeed, so kubectl will "
657 "work from inside the VPC (e.g. SSM Session Manager into a node) "
658 "or after redeploying with endpoint_access=PUBLIC_AND_PRIVATE."
659 )
660 else:
661 stderr = (result.stderr or "").strip()
662 # When the laptop's egress IP isn't in the CIDR allowlist, AWS
663 # returns the API server endpoint but kubectl times out at the
664 # TLS handshake. Surface the same actionable hint as the
665 # private-only case.
666 looks_like_network_block = (
667 "i/o timeout" in stderr
668 or "no route to host" in stderr
669 or "connection refused" in stderr
670 or "dial tcp" in stderr
671 )
672 if looks_like_network_block:
673 formatter.print_warning(
674 "kubectl could not reach the API server. If the cluster's "
675 "endpoint_access is restricted to a CIDR allowlist, confirm "
676 "your egress IP is covered, or set endpoint_access to "
677 f'"PUBLIC_AND_PRIVATE" in cdk.json and run: gco stacks deploy gco-{region} -y'
678 )
679 else:
680 formatter.print_warning(
681 "kubectl connected but no nodes found (cluster may be scaling to zero)"
682 )
684 except subprocess.CalledProcessError as e:
685 formatter.print_error(f"Command failed: {e.stderr or e.stdout or str(e)}")
686 sys.exit(1)
687 except FileNotFoundError as e:
688 formatter.print_error(f"Required tool not found: {e}")
689 sys.exit(1)
690 except Exception as e:
691 formatter.print_error(f"Failed to set up access: {e}")
692 sys.exit(1)
695@stacks.group("fsx")
696@pass_config
697def fsx_cmd(config: Any) -> None:
698 """Manage FSx for Lustre configuration."""
699 pass
702@fsx_cmd.command("status")
703@click.option("--region", "-r", help="Show config for specific region")
704@pass_config
705def fsx_status(config: Any, region: Any) -> None:
706 """Show current FSx for Lustre configuration status."""
707 from ..stacks import get_fsx_config
709 formatter = get_output_formatter(config)
711 try:
712 fsx_config = get_fsx_config(region)
713 if region:
714 formatter.print_info(f"FSx config for region: {region}")
715 else:
716 formatter.print_info("Global FSx config:")
717 formatter.print(fsx_config)
718 except Exception as e:
719 formatter.print_error(f"Failed to get FSx config: {e}")
720 sys.exit(1)
723@fsx_cmd.command("enable")
724@click.option("--region", "-r", help="Enable FSx for specific region only")
725@click.option("--storage-capacity", "-s", default=1200, help="Storage capacity in GiB (min 1200)")
726@click.option(
727 "--deployment-type",
728 "-d",
729 type=click.Choice(["SCRATCH_1", "SCRATCH_2", "PERSISTENT_1", "PERSISTENT_2"]),
730 default="SCRATCH_2",
731 help="FSx deployment type",
732)
733@click.option("--throughput", "-t", default=200, help="Per-unit storage throughput (MB/s)")
734@click.option("--compression", "-c", type=click.Choice(["LZ4", "NONE"]), default="LZ4")
735@click.option("--import-path", help="S3 path for data import (s3://bucket/prefix)")
736@click.option("--export-path", help="S3 path for data export (s3://bucket/prefix)")
737@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
738@pass_config
739def fsx_enable(
740 config: Any,
741 region: Any,
742 storage_capacity: Any,
743 deployment_type: Any,
744 throughput: Any,
745 compression: Any,
746 import_path: Any,
747 export_path: Any,
748 yes: Any,
749) -> None:
750 """Enable FSx for Lustre in the stack configuration.
752 FSx for Lustre provides high-performance parallel file system storage
753 ideal for ML training workloads requiring high throughput and low latency.
755 Examples:
756 gco stacks fsx enable
757 gco stacks fsx enable --region us-east-1
758 gco stacks fsx enable --storage-capacity 2400 --deployment-type PERSISTENT_2
759 gco stacks fsx enable -r us-west-2 --import-path s3://my-bucket/training-data
760 """
761 from ..stacks import update_fsx_config
763 formatter = get_output_formatter(config)
765 if storage_capacity < 1200:
766 formatter.print_error("Storage capacity must be at least 1200 GiB")
767 sys.exit(1)
769 scope = f"region {region}" if region else "all regions (global)"
771 if not yes: 771 ↛ 772line 771 didn't jump to line 772 because the condition on line 771 was never true
772 formatter.print_info(f"FSx for Lustre configuration for {scope}:")
773 formatter.print_info(f" Storage Capacity: {storage_capacity} GiB")
774 formatter.print_info(f" Deployment Type: {deployment_type}")
775 formatter.print_info(f" Throughput: {throughput} MB/s per TiB")
776 formatter.print_info(f" Compression: {compression}")
777 if import_path:
778 formatter.print_info(f" Import Path: {import_path}")
779 if export_path:
780 formatter.print_info(f" Export Path: {export_path}")
781 click.confirm(f"\nEnable FSx for Lustre for {scope}?", abort=True)
783 try:
784 fsx_settings = {
785 "enabled": True,
786 "storage_capacity_gib": storage_capacity,
787 "deployment_type": deployment_type,
788 "per_unit_storage_throughput": throughput,
789 "data_compression_type": compression,
790 "import_path": import_path,
791 "export_path": export_path,
792 "auto_import_policy": "NEW_CHANGED_DELETED" if import_path else None,
793 }
795 update_fsx_config(fsx_settings, region)
796 formatter.print_success(f"FSx for Lustre enabled in cdk.json for {scope}")
797 if region:
798 formatter.print_info(f"Run 'gco stacks deploy gco-{region}' to apply changes")
799 else:
800 formatter.print_info("Run 'gco stacks deploy' to apply changes")
802 except Exception as e:
803 formatter.print_error(f"Failed to enable FSx: {e}")
804 sys.exit(1)
807@fsx_cmd.command("disable")
808@click.option("--region", "-r", help="Disable FSx for specific region only")
809@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
810@pass_config
811def fsx_disable(config: Any, region: Any, yes: Any) -> None:
812 """Disable FSx for Lustre in the stack configuration.
814 Note: This only updates the configuration. Run 'gco stacks deploy'
815 to apply changes. Existing FSx file systems will be deleted.
817 Examples:
818 gco stacks fsx disable
819 gco stacks fsx disable --region us-east-1
820 """
821 from ..stacks import update_fsx_config
823 formatter = get_output_formatter(config)
825 scope = f"region {region}" if region else "all regions (global)"
827 if not yes: 827 ↛ 828line 827 didn't jump to line 828 because the condition on line 827 was never true
828 formatter.print_warning(f"This will disable FSx for Lustre for {scope}.")
829 formatter.print_warning("Existing FSx file systems will be deleted on next deploy.")
830 click.confirm("Are you sure?", abort=True)
832 try:
833 update_fsx_config({"enabled": False}, region)
834 formatter.print_success(f"FSx for Lustre disabled in cdk.json for {scope}")
835 if region:
836 formatter.print_info(f"Run 'gco stacks deploy gco-{region}' to apply changes")
837 else:
838 formatter.print_info("Run 'gco stacks deploy' to apply changes")
840 except Exception as e:
841 formatter.print_error(f"Failed to disable FSx: {e}")
842 sys.exit(1)
845# =============================================================================
846# Valkey commands
847# =============================================================================
850@stacks.group("valkey")
851@pass_config
852def valkey_cmd(config: Any) -> None:
853 """Manage Valkey Serverless cache configuration."""
854 pass
857@valkey_cmd.command("status")
858@pass_config
859def valkey_status(config: Any) -> None:
860 """Show current Valkey Serverless configuration status."""
861 from ..stacks import get_valkey_config
863 formatter = get_output_formatter(config)
865 try:
866 valkey_config = get_valkey_config()
867 formatter.print_info("Valkey config:")
868 formatter.print(valkey_config)
869 except Exception as e:
870 formatter.print_error(f"Failed to get Valkey config: {e}")
871 sys.exit(1)
874@valkey_cmd.command("enable")
875@click.option("--max-storage", default=5, help="Max data storage in GB (default: 5)")
876@click.option("--max-ecpu", default=5000, help="Max eCPU per second (default: 5000)")
877@click.option("--snapshot-retention", default=1, help="Snapshot retention in days (default: 1)")
878@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
879@pass_config
880def valkey_enable(
881 config: Any,
882 max_storage: Any,
883 max_ecpu: Any,
884 snapshot_retention: Any,
885 yes: Any,
886) -> None:
887 """Enable Valkey Serverless cache in the stack configuration.
889 Valkey provides a serverless key-value cache for prompt caching,
890 feature stores, session state, and low-latency data access.
892 Examples:
893 gco stacks valkey enable
894 gco stacks valkey enable --max-storage 10 --max-ecpu 10000
895 """
896 from ..stacks import update_valkey_config
898 formatter = get_output_formatter(config)
900 if not yes: 900 ↛ 901line 900 didn't jump to line 901 because the condition on line 900 was never true
901 formatter.print_info("Valkey Serverless configuration:")
902 formatter.print_info(f" Max Data Storage: {max_storage} GB")
903 formatter.print_info(f" Max eCPU/second: {max_ecpu}")
904 formatter.print_info(f" Snapshot Retention: {snapshot_retention} days")
905 click.confirm("\nEnable Valkey Serverless?", abort=True)
907 try:
908 valkey_settings = {
909 "enabled": True,
910 "max_data_storage_gb": max_storage,
911 "max_ecpu_per_second": max_ecpu,
912 "snapshot_retention_limit": snapshot_retention,
913 }
915 update_valkey_config(valkey_settings)
916 formatter.print_success("Valkey Serverless enabled in cdk.json")
917 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes")
919 except Exception as e:
920 formatter.print_error(f"Failed to enable Valkey: {e}")
921 sys.exit(1)
924@valkey_cmd.command("disable")
925@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
926@pass_config
927def valkey_disable(config: Any, yes: Any) -> None:
928 """Disable Valkey Serverless cache in the stack configuration.
930 Note: This only updates the configuration. Run 'gco stacks deploy-all -y'
931 to apply changes. Existing Valkey caches will be deleted.
933 Examples:
934 gco stacks valkey disable
935 """
936 from ..stacks import update_valkey_config
938 formatter = get_output_formatter(config)
940 if not yes: 940 ↛ 941line 940 didn't jump to line 941 because the condition on line 940 was never true
941 formatter.print_warning("This will disable Valkey Serverless.")
942 formatter.print_warning("Existing Valkey caches will be deleted on next deploy.")
943 click.confirm("Are you sure?", abort=True)
945 try:
946 update_valkey_config({"enabled": False})
947 formatter.print_success("Valkey Serverless disabled in cdk.json")
948 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes")
950 except Exception as e:
951 formatter.print_error(f"Failed to disable Valkey: {e}")
952 sys.exit(1)
955# =============================================================================
956# Aurora pgvector commands
957# =============================================================================
960@stacks.group("aurora")
961@pass_config
962def aurora_cmd(config: Any) -> None:
963 """Manage Aurora PostgreSQL (pgvector) configuration."""
964 pass
967@aurora_cmd.command("status")
968@pass_config
969def aurora_status(config: Any) -> None:
970 """Show current Aurora PostgreSQL (pgvector) configuration status."""
971 from ..stacks import get_aurora_config
973 formatter = get_output_formatter(config)
975 try:
976 aurora_config = get_aurora_config()
977 formatter.print_info("Aurora pgvector config:")
978 formatter.print(aurora_config)
979 except Exception as e:
980 formatter.print_error(f"Failed to get Aurora config: {e}")
981 sys.exit(1)
984@aurora_cmd.command("enable")
985@click.option("--min-acu", default=0, help="Minimum ACU (0 = scale to zero, default: 0)")
986@click.option("--max-acu", default=16, help="Maximum ACU (default: 16)")
987@click.option("--backup-retention", default=7, help="Backup retention in days (default: 7)")
988@click.option(
989 "--deletion-protection/--no-deletion-protection",
990 default=False,
991 help="Enable deletion protection",
992)
993@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
994@pass_config
995def aurora_enable(
996 config: Any,
997 min_acu: Any,
998 max_acu: Any,
999 backup_retention: Any,
1000 deletion_protection: Any,
1001 yes: Any,
1002) -> None:
1003 """Enable Aurora PostgreSQL with pgvector in the stack configuration.
1005 Aurora Serverless v2 with pgvector provides vector similarity search
1006 for RAG applications, semantic search, and embedding storage.
1008 Examples:
1009 gco stacks aurora enable
1010 gco stacks aurora enable --min-acu 2 --max-acu 32 --deletion-protection
1011 """
1012 from ..stacks import update_aurora_config
1014 formatter = get_output_formatter(config)
1016 if min_acu < 0:
1017 formatter.print_error("Minimum ACU must be >= 0")
1018 sys.exit(1)
1019 if max_acu < 1:
1020 formatter.print_error("Maximum ACU must be >= 1")
1021 sys.exit(1)
1022 if max_acu < min_acu:
1023 formatter.print_error("Maximum ACU must be >= minimum ACU")
1024 sys.exit(1)
1026 if not yes: 1026 ↛ 1027line 1026 didn't jump to line 1027 because the condition on line 1026 was never true
1027 formatter.print_info("Aurora pgvector configuration:")
1028 formatter.print_info(f" Min ACU: {min_acu} {'(scale to zero)' if min_acu == 0 else ''}")
1029 formatter.print_info(f" Max ACU: {max_acu}")
1030 formatter.print_info(f" Backup Retention: {backup_retention} days")
1031 formatter.print_info(f" Deletion Protection: {deletion_protection}")
1032 click.confirm("\nEnable Aurora pgvector?", abort=True)
1034 try:
1035 aurora_settings = {
1036 "enabled": True,
1037 "min_acu": min_acu,
1038 "max_acu": max_acu,
1039 "backup_retention_days": backup_retention,
1040 "deletion_protection": deletion_protection,
1041 }
1043 update_aurora_config(aurora_settings)
1044 formatter.print_success("Aurora pgvector enabled in cdk.json")
1045 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes")
1047 except Exception as e:
1048 formatter.print_error(f"Failed to enable Aurora: {e}")
1049 sys.exit(1)
1052@aurora_cmd.command("disable")
1053@click.option("--yes", "-y", is_flag=True, help="Skip confirmation")
1054@pass_config
1055def aurora_disable(config: Any, yes: Any) -> None:
1056 """Disable Aurora PostgreSQL (pgvector) in the stack configuration.
1058 Note: This only updates the configuration. Run 'gco stacks deploy-all -y'
1059 to apply changes. Existing Aurora clusters will be deleted unless
1060 deletion protection is enabled.
1062 Examples:
1063 gco stacks aurora disable
1064 """
1065 from ..stacks import update_aurora_config
1067 formatter = get_output_formatter(config)
1069 if not yes: 1069 ↛ 1070line 1069 didn't jump to line 1070 because the condition on line 1069 was never true
1070 formatter.print_warning("This will disable Aurora pgvector.")
1071 formatter.print_warning(
1072 "Existing Aurora clusters will be deleted on next deploy "
1073 "(unless deletion protection is enabled)."
1074 )
1075 click.confirm("Are you sure?", abort=True)
1077 try:
1078 update_aurora_config({"enabled": False})
1079 formatter.print_success("Aurora pgvector disabled in cdk.json")
1080 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes")
1082 except Exception as e:
1083 formatter.print_error(f"Failed to disable Aurora: {e}")
1084 sys.exit(1)