Coverage for cli/commands/stacks_cmd.py: 87%

520 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-15 15:07 +0000

1"""Stack deployment and management commands.""" 

2 

3import sys 

4from typing import Any 

5 

6import click 

7 

8from ..config import GCOConfig 

9from ..output import get_output_formatter 

10 

11pass_config = click.make_pass_decorator(GCOConfig, ensure=True) 

12 

13 

14@click.group() 

15@pass_config 

16def stacks(config: Any) -> None: 

17 """Deploy and manage GCO CDK stacks.""" 

18 pass 

19 

20 

21@stacks.command("list") 

22@click.option("--refresh", is_flag=True, help="Force refresh from AWS") 

23@pass_config 

24def list_stacks(config: Any, refresh: Any) -> None: 

25 """List all GCO stacks (local CDK and deployed).""" 

26 from ..stacks import get_stack_manager 

27 

28 formatter = get_output_formatter(config) 

29 

30 try: 

31 manager = get_stack_manager(config) 

32 local_stacks = manager.list_stacks() 

33 

34 formatter.print_info("Available CDK stacks:") 

35 for stack in local_stacks: 

36 print(f" - {stack}") 

37 

38 except Exception as e: 

39 formatter.print_error(f"Failed to list stacks: {e}") 

40 sys.exit(1) 

41 

42 

43@stacks.command("synth") 

44@click.argument("stack_name", required=False) 

45@click.option("--quiet", "-q", is_flag=True, default=True, help="Quiet output") 

46@pass_config 

47def synth_stack(config: Any, stack_name: Any, quiet: Any) -> None: 

48 """Synthesize CloudFormation templates.""" 

49 from ..stacks import get_stack_manager 

50 

51 formatter = get_output_formatter(config) 

52 

53 try: 

54 manager = get_stack_manager(config) 

55 output = manager.synth(stack_name, quiet=quiet) 

56 if output: 56 ↛ 58line 56 didn't jump to line 58 because the condition on line 56 was always true

57 print(output) 

58 formatter.print_success("CDK synthesis completed") 

59 except Exception as e: 

60 formatter.print_error(f"CDK synth failed: {e}") 

61 sys.exit(1) 

62 

63 

64@stacks.command("diff") 

65@click.argument("stack_name", required=False) 

66@pass_config 

67def diff_stack(config: Any, stack_name: Any) -> None: 

68 """Show differences between deployed and local stacks.""" 

69 from ..stacks import get_stack_manager 

70 

71 formatter = get_output_formatter(config) 

72 

73 try: 

74 manager = get_stack_manager(config) 

75 diff_output = manager.diff(stack_name) 

76 if diff_output: 

77 print(diff_output) 

78 else: 

79 formatter.print_success("No differences found") 

80 except Exception as e: 

81 formatter.print_error(f"CDK diff failed: {e}") 

82 sys.exit(1) 

83 

84 

85@stacks.command("deploy") 

86@click.argument("stack_name") 

87@click.option("--yes", "-y", is_flag=True, help="Skip approval prompts") 

88@click.option("--outputs-file", "-o", help="Write outputs to file") 

89@click.option("--tag", "-t", multiple=True, help="Add tags (key=value)") 

90@pass_config 

91def deploy_stack(config: Any, stack_name: Any, yes: Any, outputs_file: Any, tag: Any) -> None: 

92 """Deploy a single CDK stack to AWS. 

93 

94 For deploying all stacks in the correct order, use 'deploy-all'. 

95 

96 Examples: 

97 gco stacks deploy gco-us-east-1 

98 gco stacks deploy gco-global -y 

99 gco stacks deploy gco-us-east-1 -t Environment=prod 

100 """ 

101 from ..stacks import get_stack_manager 

102 

103 formatter = get_output_formatter(config) 

104 

105 # Parse tags 

106 tags = {} 

107 for t in tag: 107 ↛ 108line 107 didn't jump to line 108 because the loop on line 107 never started

108 if "=" in t: 

109 k, v = t.split("=", 1) 

110 tags[k] = v 

111 

112 try: 

113 manager = get_stack_manager(config) 

114 

115 formatter.print_info(f"Deploying {stack_name}...") 

116 

117 success = manager.deploy( 

118 stack_name=stack_name, 

119 require_approval=not yes, 

120 outputs_file=outputs_file, 

121 tags=tags if tags else None, 

122 ) 

123 

124 if success: 

125 formatter.print_success("Deployment completed successfully") 

126 else: 

127 formatter.print_error("Deployment failed") 

128 sys.exit(1) 

129 

130 except Exception as e: 

131 formatter.print_error(f"Deployment failed: {e}") 

132 sys.exit(1) 

133 

134 

135@stacks.command("destroy") 

136@click.argument("stack_name") 

137@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

138@pass_config 

139def destroy_stack(config: Any, stack_name: Any, yes: Any) -> None: 

140 """Destroy a single CDK stack. 

141 

142 For destroying all stacks in the correct order, use 'destroy-all'. 

143 

144 Examples: 

145 gco stacks destroy gco-us-east-1 

146 gco stacks destroy gco-us-east-1 -y 

147 """ 

148 from ..stacks import get_stack_manager 

149 

150 formatter = get_output_formatter(config) 

151 

152 if not yes: 152 ↛ 153line 152 didn't jump to line 153 because the condition on line 152 was never true

153 click.confirm(f"Are you sure you want to destroy {stack_name}?", abort=True) 

154 

155 try: 

156 manager = get_stack_manager(config) 

157 

158 formatter.print_info(f"Destroying {stack_name}...") 

159 

160 success = manager.destroy( 

161 stack_name=stack_name, 

162 force=yes, 

163 ) 

164 

165 if success: 

166 formatter.print_success(f"Stack {stack_name} destroyed successfully") 

167 else: 

168 formatter.print_error("Destroy failed") 

169 sys.exit(1) 

170 

171 except Exception as e: 

172 formatter.print_error(f"Destroy failed: {e}") 

173 sys.exit(1) 

174 

175 

176@stacks.command("deploy-all") 

177@click.option("--yes", "-y", is_flag=True, help="Skip approval prompts") 

178@click.option("--outputs-file", "-o", help="Write outputs to file") 

179@click.option("--tag", "-t", multiple=True, help="Add tags (key=value)") 

180@click.option("--parallel", "-p", is_flag=True, help="Deploy regional stacks in parallel") 

181@click.option("--max-workers", "-w", default=4, help="Max parallel deployments (default: 4)") 

182@pass_config 

183def deploy_all_orchestrated( 

184 config: Any, yes: Any, outputs_file: Any, tag: Any, parallel: Any, max_workers: Any 

185) -> None: 

186 """Deploy all stacks in the correct order. 

187 

188 Deploys in three phases: 

189 1. Global stacks (gco-global, gco-api-gateway) 

190 2. Regional stacks (gco-us-east-1, etc.) - can be parallelized 

191 3. Monitoring stack (gco-monitoring) - depends on regional stacks 

192 

193 Use --parallel to deploy regional stacks concurrently, which can 

194 significantly reduce total deployment time when deploying to 

195 multiple regions. 

196 

197 Examples: 

198 gco stacks deploy-all -y 

199 gco stacks deploy-all -y --parallel 

200 gco stacks deploy-all -y -p --max-workers 8 

201 gco stacks deploy-all -y -t Environment=prod 

202 """ 

203 from ..stacks import get_stack_manager 

204 

205 formatter = get_output_formatter(config) 

206 

207 # Parse tags 

208 tags = {} 

209 for t in tag: 

210 if "=" in t: 210 ↛ 209line 210 didn't jump to line 209 because the condition on line 210 was always true

211 k, v = t.split("=", 1) 

212 tags[k] = v 

213 

214 try: 

215 manager = get_stack_manager(config) 

216 stacks = manager.list_stacks() 

217 

218 formatter.print_info(f"Found {len(stacks)} stacks to deploy") 

219 if parallel: 

220 formatter.print_info(f"Parallel mode enabled (max workers: {max_workers})") 

221 

222 def on_start(stack_name: str) -> None: 

223 formatter.print_info(f"Deploying {stack_name}...") 

224 

225 def on_complete(stack_name: str, success: bool) -> None: 

226 if success: 

227 formatter.print_success(f"{stack_name} deployed") 

228 else: 

229 formatter.print_error(f"{stack_name} failed") 

230 

231 success, successful, failed = manager.deploy_orchestrated( 

232 require_approval=not yes, 

233 outputs_file=outputs_file, 

234 tags=tags if tags else None, 

235 on_stack_start=on_start, 

236 on_stack_complete=on_complete, 

237 parallel=parallel, 

238 max_workers=max_workers, 

239 ) 

240 

241 formatter.print_info("") 

242 formatter.print_info(f"Deployed: {len(successful)}/{len(stacks)} stacks") 

243 

244 if success: 

245 formatter.print_success("All stacks deployed successfully") 

246 else: 

247 formatter.print_error(f"Deployment failed. Failed stacks: {', '.join(failed)}") 

248 sys.exit(1) 

249 

250 except Exception as e: 

251 formatter.print_error(f"Deployment failed: {e}") 

252 sys.exit(1) 

253 

254 

255@stacks.command("destroy-all") 

256@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

257@click.option("--parallel", "-p", is_flag=True, help="Destroy regional stacks in parallel") 

258@click.option("--max-workers", "-w", default=4, help="Max parallel destructions (default: 4)") 

259@pass_config 

260def destroy_all_orchestrated(config: Any, yes: Any, parallel: Any, max_workers: Any) -> None: 

261 """Destroy all stacks in the correct order. 

262 

263 Destroys in three phases: 

264 1. Monitoring stack (gco-monitoring) 

265 2. Regional stacks (gco-us-east-1, etc.) - can be parallelized 

266 3. Global stacks (gco-api-gateway, gco-global) 

267 

268 Automatically retries up to 3 times (with 30s waits) if any stacks fail, 

269 which handles transient issues like orphaned resources during teardown. 

270 

271 Use --parallel to destroy regional stacks concurrently, which can 

272 significantly reduce total teardown time when destroying multiple 

273 regional stacks. 

274 

275 Examples: 

276 gco stacks destroy-all -y 

277 gco stacks destroy-all -y --parallel 

278 gco stacks destroy-all -y -p --max-workers 8 

279 """ 

280 import time 

281 

282 from ..stacks import get_stack_destroy_order, get_stack_manager 

283 

284 formatter = get_output_formatter(config) 

285 # Retry up to 3 times total. CloudFormation stack deletions can fail 

286 # transiently — e.g., EKS leaves behind a cluster security group that 

287 # blocks VPC deletion, but it gets cleaned up async. A 30-second wait 

288 # between attempts is usually enough for the orphaned resources to clear. 

289 max_attempts = 3 

290 

291 try: 

292 manager = get_stack_manager(config) 

293 stacks = manager.list_stacks() 

294 ordered = get_stack_destroy_order(stacks) 

295 

296 if not yes: 296 ↛ 297line 296 didn't jump to line 297 because the condition on line 296 was never true

297 formatter.print_warning("This will destroy ALL GCO stacks:") 

298 for stack in ordered: 

299 formatter.print_info(f" - {stack}") 

300 click.confirm("\nAre you sure you want to destroy all stacks?", abort=True) 

301 

302 total_stacks = len(stacks) 

303 

304 for attempt in range(1, max_attempts + 1): 

305 if attempt > 1: 

306 # Clean up EKS-managed security groups between retries. 

307 # After the first attempt, the EKS cluster is deleted but its 

308 # security group (eks-cluster-sg-*) may linger and block VPC deletion. 

309 formatter.print_info("Cleaning up orphaned EKS resources...") 

310 manager.cleanup_eks_security_groups() 

311 formatter.print_warning( 

312 f"Attempt {attempt}/{max_attempts}: waiting 30 seconds before retrying..." 

313 ) 

314 time.sleep(30) 

315 

316 formatter.print_info(f"Destroying {len(stacks)} stacks...") 

317 if parallel: 

318 formatter.print_info(f"Parallel mode enabled (max workers: {max_workers})") 

319 

320 def on_start(stack_name: str) -> None: 

321 formatter.print_info(f"Destroying {stack_name}...") 

322 

323 def on_complete(stack_name: str, success: bool) -> None: 

324 if success: 

325 formatter.print_success(f"{stack_name} destroyed") 

326 else: 

327 formatter.print_error(f"{stack_name} failed") 

328 

329 success, successful, failed = manager.destroy_orchestrated( 

330 force=True, 

331 on_stack_start=on_start, 

332 on_stack_complete=on_complete, 

333 parallel=parallel, 

334 max_workers=max_workers, 

335 ) 

336 

337 if success: 

338 break 

339 

340 if attempt < max_attempts: 

341 formatter.print_warning(f"{len(failed)} stack(s) failed: {', '.join(failed)}") 

342 

343 formatter.print_info("") 

344 formatter.print_info(f"Destroyed: {total_stacks - len(failed)}/{total_stacks} stacks") 

345 

346 if success: 

347 formatter.print_success("All stacks destroyed successfully") 

348 else: 

349 formatter.print_error(f"Some stacks failed to destroy: {', '.join(failed)}") 

350 sys.exit(1) 

351 

352 except Exception as e: 

353 formatter.print_error(f"Destroy failed: {e}") 

354 sys.exit(1) 

355 

356 

357@stacks.command("bootstrap") 

358@click.option("--account", "-a", help="AWS account ID") 

359@click.option("--region", "-r", required=True, help="AWS region") 

360@pass_config 

361def bootstrap_cdk(config: Any, account: Any, region: Any) -> None: 

362 """Bootstrap CDK in an AWS account/region. 

363 

364 This is required before deploying stacks to a new account/region. 

365 

366 Example: 

367 gco stacks bootstrap --region us-east-1 

368 gco stacks bootstrap -a 123456789012 -r eu-west-1 

369 """ 

370 from ..stacks import get_stack_manager 

371 

372 formatter = get_output_formatter(config) 

373 

374 try: 

375 manager = get_stack_manager(config) 

376 formatter.print_info(f"Bootstrapping CDK in {region}...") 

377 

378 success = manager.bootstrap(account=account, region=region) 

379 

380 if success: 

381 formatter.print_success(f"CDK bootstrapped in {region}") 

382 else: 

383 formatter.print_error("Bootstrap failed") 

384 sys.exit(1) 

385 

386 except Exception as e: 

387 formatter.print_error(f"Bootstrap failed: {e}") 

388 sys.exit(1) 

389 

390 

391@stacks.command("status") 

392@click.argument("stack_name") 

393@click.option("--region", "-r", required=True, help="AWS region") 

394@pass_config 

395def stack_status(config: Any, stack_name: Any, region: Any) -> None: 

396 """Get detailed status of a deployed stack.""" 

397 from ..stacks import get_stack_manager 

398 

399 formatter = get_output_formatter(config) 

400 

401 try: 

402 manager = get_stack_manager(config) 

403 status = manager.get_stack_status(stack_name, region) 

404 

405 if status: 

406 formatter.print(status.to_dict()) 

407 else: 

408 formatter.print_error(f"Stack {stack_name} not found in {region}") 

409 sys.exit(1) 

410 

411 except Exception as e: 

412 formatter.print_error(f"Failed to get stack status: {e}") 

413 sys.exit(1) 

414 

415 

416@stacks.command("outputs") 

417@click.argument("stack_name") 

418@click.option("--region", "-r", required=True, help="AWS region") 

419@pass_config 

420def stack_outputs(config: Any, stack_name: Any, region: Any) -> None: 

421 """Get outputs from a deployed stack.""" 

422 from ..stacks import get_stack_manager 

423 

424 formatter = get_output_formatter(config) 

425 

426 try: 

427 manager = get_stack_manager(config) 

428 outputs = manager.get_outputs(stack_name, region) 

429 

430 if outputs: 

431 formatter.print(outputs) 

432 else: 

433 formatter.print_warning(f"No outputs found for {stack_name}") 

434 

435 except Exception as e: 

436 formatter.print_error(f"Failed to get outputs: {e}") 

437 sys.exit(1) 

438 

439 

440@stacks.command("access") 

441@click.option("--cluster", "-c", help="Cluster name (default: gco-{region})") 

442@click.option("--region", "-r", help="AWS region (default: first deployment region)") 

443@pass_config 

444def setup_access(config: Any, cluster: Any, region: Any) -> None: 

445 """Configure kubectl access to a GCO EKS cluster. 

446 

447 Updates kubeconfig, creates an EKS access entry for your IAM principal, 

448 and associates the cluster admin policy. Handles assumed roles automatically. 

449 

450 Examples: 

451 gco stacks access 

452 gco stacks access -r us-west-2 

453 gco stacks access -c my-cluster -r eu-west-1 

454 """ 

455 import subprocess 

456 

457 from ..config import _load_cdk_json 

458 

459 formatter = get_output_formatter(config) 

460 

461 # Determine region 

462 if not region: 

463 cdk_regions = _load_cdk_json() 

464 if cdk_regions and "regional" in cdk_regions: 464 ↛ 467line 464 didn't jump to line 467 because the condition on line 464 was always true

465 region = cdk_regions["regional"][0] 

466 else: 

467 region = config.default_region or "us-east-1" 

468 

469 # Determine cluster name 

470 if not cluster: 

471 cluster = f"gco-{region}" 

472 

473 formatter.print_info(f"Setting up access to cluster: {cluster} in region: {region}") 

474 

475 # Cluster endpoint access mode — warn early if the API server is 

476 # private-only, since every kubectl call from outside the VPC will 

477 # fail. We still try every step so the access entry + policy 

478 # association land (those use the EKS control plane via boto3, 

479 # which doesn't go through the cluster endpoint), but the verify 

480 # step at the end will hit a connection timeout from the laptop. 

481 private_endpoint_only = False 

482 public_cidrs: list[str] = [] 

483 try: 

484 endpoint_check = subprocess.run( 

485 [ 

486 "aws", 

487 "eks", 

488 "describe-cluster", 

489 "--name", 

490 cluster, 

491 "--region", 

492 region, 

493 "--query", 

494 # Explicit ``+`` rather than implicit string concatenation 

495 # so static analysers don't flag the multi-line literal as 

496 # a possibly-missing comma between two list elements. The 

497 # value is one JMESPath expression passed as a single 

498 # ``--query`` argument. 

499 "cluster.resourcesVpcConfig.{public:endpointPublicAccess," 

500 + "private:endpointPrivateAccess,publicCidrs:publicAccessCidrs}", 

501 "--output", 

502 "json", 

503 ], 

504 check=True, 

505 capture_output=True, 

506 text=True, 

507 ) 

508 import json 

509 

510 endpoint_cfg = json.loads(endpoint_check.stdout or "{}") 

511 is_public = bool(endpoint_cfg.get("public")) 

512 public_cidrs = endpoint_cfg.get("publicCidrs") or [] 

513 if not is_public: 

514 private_endpoint_only = True 

515 formatter.print_warning( 

516 f"Cluster {cluster!r} has endpointPublicAccess=false — kubectl from " 

517 "outside the VPC will not be able to reach the API server. The access " 

518 "entry and policy association below still apply, but the verify step " 

519 "at the end will time out from this host." 

520 ) 

521 formatter.print_warning( 

522 "To enable kubectl from your laptop or CI runner, set " 

523 '``eks_cluster.endpoint_access`` to ``"PUBLIC_AND_PRIVATE"`` in ' 

524 "``cdk.json`` and redeploy the regional stack: ``gco stacks deploy " 

525 f"gco-{region} -y``." 

526 ) 

527 elif public_cidrs: 

528 # Public access is on but restricted to a CIDR allowlist — the 

529 # caller's IP may or may not be in it. 

530 formatter.print_info( 

531 "Cluster API endpoint is public+private with a CIDR allowlist; " 

532 f"verify your egress IP is covered by one of: {', '.join(public_cidrs)}" 

533 ) 

534 except (subprocess.CalledProcessError, FileNotFoundError) as exc: 

535 # Don't block setup if describe-cluster fails — the access steps 

536 # below may still succeed (e.g. for a brand new cluster the caller 

537 # already has permission to update). 

538 formatter.print_info(f"Could not determine endpoint access mode: {exc}") 

539 

540 try: 

541 # Step 1: Update kubeconfig 

542 formatter.print_info("Updating kubeconfig...") 

543 subprocess.run( 

544 ["aws", "eks", "update-kubeconfig", "--name", cluster, "--region", region], 

545 check=True, 

546 capture_output=True, 

547 text=True, 

548 ) 

549 

550 # Step 2: Get IAM principal 

551 formatter.print_info("Getting your IAM principal...") 

552 result = subprocess.run( 

553 ["aws", "sts", "get-caller-identity", "--query", "Arn", "--output", "text"], 

554 check=True, 

555 capture_output=True, 

556 text=True, 

557 ) 

558 principal_arn = result.stdout.strip() 

559 formatter.print_info(f"Principal: {principal_arn}") 

560 

561 # Handle assumed roles — extract the role ARN from the assumed-role ARN 

562 if ":assumed-role/" in principal_arn: 

563 import re 

564 

565 role_name = re.search(r":assumed-role/([^/]+)/", principal_arn) 

566 if role_name: 566 ↛ 586line 566 didn't jump to line 586 because the condition on line 566 was always true

567 account_result = subprocess.run( 

568 [ 

569 "aws", 

570 "sts", 

571 "get-caller-identity", 

572 "--query", 

573 "Account", 

574 "--output", 

575 "text", 

576 ], 

577 check=True, 

578 capture_output=True, 

579 text=True, 

580 ) 

581 account_id = account_result.stdout.strip() 

582 principal_arn = f"arn:aws:iam::{account_id}:role/{role_name.group(1)}" 

583 formatter.print_info(f"Using role ARN: {principal_arn}") 

584 

585 # Step 3: Create access entry 

586 formatter.print_info("Creating EKS access entry...") 

587 try: 

588 subprocess.run( 

589 [ 

590 "aws", 

591 "eks", 

592 "create-access-entry", 

593 "--cluster-name", 

594 cluster, 

595 "--region", 

596 region, 

597 "--principal-arn", 

598 principal_arn, 

599 ], 

600 check=True, 

601 capture_output=True, 

602 text=True, 

603 ) 

604 except subprocess.CalledProcessError: 

605 formatter.print_info("Access entry may already exist") 

606 

607 # Step 4: Associate admin policy 

608 formatter.print_info("Associating cluster admin policy...") 

609 try: 

610 subprocess.run( 

611 [ 

612 "aws", 

613 "eks", 

614 "associate-access-policy", 

615 "--cluster-name", 

616 cluster, 

617 "--region", 

618 region, 

619 "--principal-arn", 

620 principal_arn, 

621 "--policy-arn", 

622 "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy", 

623 "--access-scope", 

624 "type=cluster", 

625 ], 

626 check=True, 

627 capture_output=True, 

628 text=True, 

629 ) 

630 except subprocess.CalledProcessError: 

631 formatter.print_info("Policy may already be associated") 

632 

633 # Step 5: Verify access 

634 formatter.print_info("Waiting for permissions to propagate...") 

635 import time 

636 

637 time.sleep(10) 

638 

639 result = subprocess.run( 

640 ["kubectl", "get", "nodes", "--request-timeout=10s"], 

641 capture_output=True, 

642 text=True, 

643 ) 

644 if result.returncode == 0: 

645 node_count = len( 

646 [line for line in result.stdout.strip().split("\n")[1:] if line.strip()] 

647 ) 

648 print(result.stdout) 

649 formatter.print_info(f"Access configured successfully. {node_count} node(s) ready.") 

650 elif private_endpoint_only: 

651 # Don't double-warn — we already explained this above. Just 

652 # restate the fix so the operator doesn't have to scroll up. 

653 formatter.print_warning( 

654 "kubectl could not reach the API server, as expected for a " 

655 "private-only cluster from outside the VPC. The IAM access entry " 

656 "and admin policy association above did succeed, so kubectl will " 

657 "work from inside the VPC (e.g. SSM Session Manager into a node) " 

658 "or after redeploying with endpoint_access=PUBLIC_AND_PRIVATE." 

659 ) 

660 else: 

661 stderr = (result.stderr or "").strip() 

662 # When the laptop's egress IP isn't in the CIDR allowlist, AWS 

663 # returns the API server endpoint but kubectl times out at the 

664 # TLS handshake. Surface the same actionable hint as the 

665 # private-only case. 

666 looks_like_network_block = ( 

667 "i/o timeout" in stderr 

668 or "no route to host" in stderr 

669 or "connection refused" in stderr 

670 or "dial tcp" in stderr 

671 ) 

672 if looks_like_network_block: 

673 formatter.print_warning( 

674 "kubectl could not reach the API server. If the cluster's " 

675 "endpoint_access is restricted to a CIDR allowlist, confirm " 

676 "your egress IP is covered, or set endpoint_access to " 

677 f'"PUBLIC_AND_PRIVATE" in cdk.json and run: gco stacks deploy gco-{region} -y' 

678 ) 

679 else: 

680 formatter.print_warning( 

681 "kubectl connected but no nodes found (cluster may be scaling to zero)" 

682 ) 

683 

684 except subprocess.CalledProcessError as e: 

685 formatter.print_error(f"Command failed: {e.stderr or e.stdout or str(e)}") 

686 sys.exit(1) 

687 except FileNotFoundError as e: 

688 formatter.print_error(f"Required tool not found: {e}") 

689 sys.exit(1) 

690 except Exception as e: 

691 formatter.print_error(f"Failed to set up access: {e}") 

692 sys.exit(1) 

693 

694 

695@stacks.group("fsx") 

696@pass_config 

697def fsx_cmd(config: Any) -> None: 

698 """Manage FSx for Lustre configuration.""" 

699 pass 

700 

701 

702@fsx_cmd.command("status") 

703@click.option("--region", "-r", help="Show config for specific region") 

704@pass_config 

705def fsx_status(config: Any, region: Any) -> None: 

706 """Show current FSx for Lustre configuration status.""" 

707 from ..stacks import get_fsx_config 

708 

709 formatter = get_output_formatter(config) 

710 

711 try: 

712 fsx_config = get_fsx_config(region) 

713 if region: 

714 formatter.print_info(f"FSx config for region: {region}") 

715 else: 

716 formatter.print_info("Global FSx config:") 

717 formatter.print(fsx_config) 

718 except Exception as e: 

719 formatter.print_error(f"Failed to get FSx config: {e}") 

720 sys.exit(1) 

721 

722 

723@fsx_cmd.command("enable") 

724@click.option("--region", "-r", help="Enable FSx for specific region only") 

725@click.option("--storage-capacity", "-s", default=1200, help="Storage capacity in GiB (min 1200)") 

726@click.option( 

727 "--deployment-type", 

728 "-d", 

729 type=click.Choice(["SCRATCH_1", "SCRATCH_2", "PERSISTENT_1", "PERSISTENT_2"]), 

730 default="SCRATCH_2", 

731 help="FSx deployment type", 

732) 

733@click.option("--throughput", "-t", default=200, help="Per-unit storage throughput (MB/s)") 

734@click.option("--compression", "-c", type=click.Choice(["LZ4", "NONE"]), default="LZ4") 

735@click.option("--import-path", help="S3 path for data import (s3://bucket/prefix)") 

736@click.option("--export-path", help="S3 path for data export (s3://bucket/prefix)") 

737@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

738@pass_config 

739def fsx_enable( 

740 config: Any, 

741 region: Any, 

742 storage_capacity: Any, 

743 deployment_type: Any, 

744 throughput: Any, 

745 compression: Any, 

746 import_path: Any, 

747 export_path: Any, 

748 yes: Any, 

749) -> None: 

750 """Enable FSx for Lustre in the stack configuration. 

751 

752 FSx for Lustre provides high-performance parallel file system storage 

753 ideal for ML training workloads requiring high throughput and low latency. 

754 

755 Examples: 

756 gco stacks fsx enable 

757 gco stacks fsx enable --region us-east-1 

758 gco stacks fsx enable --storage-capacity 2400 --deployment-type PERSISTENT_2 

759 gco stacks fsx enable -r us-west-2 --import-path s3://my-bucket/training-data 

760 """ 

761 from ..stacks import update_fsx_config 

762 

763 formatter = get_output_formatter(config) 

764 

765 if storage_capacity < 1200: 

766 formatter.print_error("Storage capacity must be at least 1200 GiB") 

767 sys.exit(1) 

768 

769 scope = f"region {region}" if region else "all regions (global)" 

770 

771 if not yes: 771 ↛ 772line 771 didn't jump to line 772 because the condition on line 771 was never true

772 formatter.print_info(f"FSx for Lustre configuration for {scope}:") 

773 formatter.print_info(f" Storage Capacity: {storage_capacity} GiB") 

774 formatter.print_info(f" Deployment Type: {deployment_type}") 

775 formatter.print_info(f" Throughput: {throughput} MB/s per TiB") 

776 formatter.print_info(f" Compression: {compression}") 

777 if import_path: 

778 formatter.print_info(f" Import Path: {import_path}") 

779 if export_path: 

780 formatter.print_info(f" Export Path: {export_path}") 

781 click.confirm(f"\nEnable FSx for Lustre for {scope}?", abort=True) 

782 

783 try: 

784 fsx_settings = { 

785 "enabled": True, 

786 "storage_capacity_gib": storage_capacity, 

787 "deployment_type": deployment_type, 

788 "per_unit_storage_throughput": throughput, 

789 "data_compression_type": compression, 

790 "import_path": import_path, 

791 "export_path": export_path, 

792 "auto_import_policy": "NEW_CHANGED_DELETED" if import_path else None, 

793 } 

794 

795 update_fsx_config(fsx_settings, region) 

796 formatter.print_success(f"FSx for Lustre enabled in cdk.json for {scope}") 

797 if region: 

798 formatter.print_info(f"Run 'gco stacks deploy gco-{region}' to apply changes") 

799 else: 

800 formatter.print_info("Run 'gco stacks deploy' to apply changes") 

801 

802 except Exception as e: 

803 formatter.print_error(f"Failed to enable FSx: {e}") 

804 sys.exit(1) 

805 

806 

807@fsx_cmd.command("disable") 

808@click.option("--region", "-r", help="Disable FSx for specific region only") 

809@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

810@pass_config 

811def fsx_disable(config: Any, region: Any, yes: Any) -> None: 

812 """Disable FSx for Lustre in the stack configuration. 

813 

814 Note: This only updates the configuration. Run 'gco stacks deploy' 

815 to apply changes. Existing FSx file systems will be deleted. 

816 

817 Examples: 

818 gco stacks fsx disable 

819 gco stacks fsx disable --region us-east-1 

820 """ 

821 from ..stacks import update_fsx_config 

822 

823 formatter = get_output_formatter(config) 

824 

825 scope = f"region {region}" if region else "all regions (global)" 

826 

827 if not yes: 827 ↛ 828line 827 didn't jump to line 828 because the condition on line 827 was never true

828 formatter.print_warning(f"This will disable FSx for Lustre for {scope}.") 

829 formatter.print_warning("Existing FSx file systems will be deleted on next deploy.") 

830 click.confirm("Are you sure?", abort=True) 

831 

832 try: 

833 update_fsx_config({"enabled": False}, region) 

834 formatter.print_success(f"FSx for Lustre disabled in cdk.json for {scope}") 

835 if region: 

836 formatter.print_info(f"Run 'gco stacks deploy gco-{region}' to apply changes") 

837 else: 

838 formatter.print_info("Run 'gco stacks deploy' to apply changes") 

839 

840 except Exception as e: 

841 formatter.print_error(f"Failed to disable FSx: {e}") 

842 sys.exit(1) 

843 

844 

845# ============================================================================= 

846# Valkey commands 

847# ============================================================================= 

848 

849 

850@stacks.group("valkey") 

851@pass_config 

852def valkey_cmd(config: Any) -> None: 

853 """Manage Valkey Serverless cache configuration.""" 

854 pass 

855 

856 

857@valkey_cmd.command("status") 

858@pass_config 

859def valkey_status(config: Any) -> None: 

860 """Show current Valkey Serverless configuration status.""" 

861 from ..stacks import get_valkey_config 

862 

863 formatter = get_output_formatter(config) 

864 

865 try: 

866 valkey_config = get_valkey_config() 

867 formatter.print_info("Valkey config:") 

868 formatter.print(valkey_config) 

869 except Exception as e: 

870 formatter.print_error(f"Failed to get Valkey config: {e}") 

871 sys.exit(1) 

872 

873 

874@valkey_cmd.command("enable") 

875@click.option("--max-storage", default=5, help="Max data storage in GB (default: 5)") 

876@click.option("--max-ecpu", default=5000, help="Max eCPU per second (default: 5000)") 

877@click.option("--snapshot-retention", default=1, help="Snapshot retention in days (default: 1)") 

878@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

879@pass_config 

880def valkey_enable( 

881 config: Any, 

882 max_storage: Any, 

883 max_ecpu: Any, 

884 snapshot_retention: Any, 

885 yes: Any, 

886) -> None: 

887 """Enable Valkey Serverless cache in the stack configuration. 

888 

889 Valkey provides a serverless key-value cache for prompt caching, 

890 feature stores, session state, and low-latency data access. 

891 

892 Examples: 

893 gco stacks valkey enable 

894 gco stacks valkey enable --max-storage 10 --max-ecpu 10000 

895 """ 

896 from ..stacks import update_valkey_config 

897 

898 formatter = get_output_formatter(config) 

899 

900 if not yes: 900 ↛ 901line 900 didn't jump to line 901 because the condition on line 900 was never true

901 formatter.print_info("Valkey Serverless configuration:") 

902 formatter.print_info(f" Max Data Storage: {max_storage} GB") 

903 formatter.print_info(f" Max eCPU/second: {max_ecpu}") 

904 formatter.print_info(f" Snapshot Retention: {snapshot_retention} days") 

905 click.confirm("\nEnable Valkey Serverless?", abort=True) 

906 

907 try: 

908 valkey_settings = { 

909 "enabled": True, 

910 "max_data_storage_gb": max_storage, 

911 "max_ecpu_per_second": max_ecpu, 

912 "snapshot_retention_limit": snapshot_retention, 

913 } 

914 

915 update_valkey_config(valkey_settings) 

916 formatter.print_success("Valkey Serverless enabled in cdk.json") 

917 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes") 

918 

919 except Exception as e: 

920 formatter.print_error(f"Failed to enable Valkey: {e}") 

921 sys.exit(1) 

922 

923 

924@valkey_cmd.command("disable") 

925@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

926@pass_config 

927def valkey_disable(config: Any, yes: Any) -> None: 

928 """Disable Valkey Serverless cache in the stack configuration. 

929 

930 Note: This only updates the configuration. Run 'gco stacks deploy-all -y' 

931 to apply changes. Existing Valkey caches will be deleted. 

932 

933 Examples: 

934 gco stacks valkey disable 

935 """ 

936 from ..stacks import update_valkey_config 

937 

938 formatter = get_output_formatter(config) 

939 

940 if not yes: 940 ↛ 941line 940 didn't jump to line 941 because the condition on line 940 was never true

941 formatter.print_warning("This will disable Valkey Serverless.") 

942 formatter.print_warning("Existing Valkey caches will be deleted on next deploy.") 

943 click.confirm("Are you sure?", abort=True) 

944 

945 try: 

946 update_valkey_config({"enabled": False}) 

947 formatter.print_success("Valkey Serverless disabled in cdk.json") 

948 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes") 

949 

950 except Exception as e: 

951 formatter.print_error(f"Failed to disable Valkey: {e}") 

952 sys.exit(1) 

953 

954 

955# ============================================================================= 

956# Aurora pgvector commands 

957# ============================================================================= 

958 

959 

960@stacks.group("aurora") 

961@pass_config 

962def aurora_cmd(config: Any) -> None: 

963 """Manage Aurora PostgreSQL (pgvector) configuration.""" 

964 pass 

965 

966 

967@aurora_cmd.command("status") 

968@pass_config 

969def aurora_status(config: Any) -> None: 

970 """Show current Aurora PostgreSQL (pgvector) configuration status.""" 

971 from ..stacks import get_aurora_config 

972 

973 formatter = get_output_formatter(config) 

974 

975 try: 

976 aurora_config = get_aurora_config() 

977 formatter.print_info("Aurora pgvector config:") 

978 formatter.print(aurora_config) 

979 except Exception as e: 

980 formatter.print_error(f"Failed to get Aurora config: {e}") 

981 sys.exit(1) 

982 

983 

984@aurora_cmd.command("enable") 

985@click.option("--min-acu", default=0, help="Minimum ACU (0 = scale to zero, default: 0)") 

986@click.option("--max-acu", default=16, help="Maximum ACU (default: 16)") 

987@click.option("--backup-retention", default=7, help="Backup retention in days (default: 7)") 

988@click.option( 

989 "--deletion-protection/--no-deletion-protection", 

990 default=False, 

991 help="Enable deletion protection", 

992) 

993@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

994@pass_config 

995def aurora_enable( 

996 config: Any, 

997 min_acu: Any, 

998 max_acu: Any, 

999 backup_retention: Any, 

1000 deletion_protection: Any, 

1001 yes: Any, 

1002) -> None: 

1003 """Enable Aurora PostgreSQL with pgvector in the stack configuration. 

1004 

1005 Aurora Serverless v2 with pgvector provides vector similarity search 

1006 for RAG applications, semantic search, and embedding storage. 

1007 

1008 Examples: 

1009 gco stacks aurora enable 

1010 gco stacks aurora enable --min-acu 2 --max-acu 32 --deletion-protection 

1011 """ 

1012 from ..stacks import update_aurora_config 

1013 

1014 formatter = get_output_formatter(config) 

1015 

1016 if min_acu < 0: 

1017 formatter.print_error("Minimum ACU must be >= 0") 

1018 sys.exit(1) 

1019 if max_acu < 1: 

1020 formatter.print_error("Maximum ACU must be >= 1") 

1021 sys.exit(1) 

1022 if max_acu < min_acu: 

1023 formatter.print_error("Maximum ACU must be >= minimum ACU") 

1024 sys.exit(1) 

1025 

1026 if not yes: 1026 ↛ 1027line 1026 didn't jump to line 1027 because the condition on line 1026 was never true

1027 formatter.print_info("Aurora pgvector configuration:") 

1028 formatter.print_info(f" Min ACU: {min_acu} {'(scale to zero)' if min_acu == 0 else ''}") 

1029 formatter.print_info(f" Max ACU: {max_acu}") 

1030 formatter.print_info(f" Backup Retention: {backup_retention} days") 

1031 formatter.print_info(f" Deletion Protection: {deletion_protection}") 

1032 click.confirm("\nEnable Aurora pgvector?", abort=True) 

1033 

1034 try: 

1035 aurora_settings = { 

1036 "enabled": True, 

1037 "min_acu": min_acu, 

1038 "max_acu": max_acu, 

1039 "backup_retention_days": backup_retention, 

1040 "deletion_protection": deletion_protection, 

1041 } 

1042 

1043 update_aurora_config(aurora_settings) 

1044 formatter.print_success("Aurora pgvector enabled in cdk.json") 

1045 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes") 

1046 

1047 except Exception as e: 

1048 formatter.print_error(f"Failed to enable Aurora: {e}") 

1049 sys.exit(1) 

1050 

1051 

1052@aurora_cmd.command("disable") 

1053@click.option("--yes", "-y", is_flag=True, help="Skip confirmation") 

1054@pass_config 

1055def aurora_disable(config: Any, yes: Any) -> None: 

1056 """Disable Aurora PostgreSQL (pgvector) in the stack configuration. 

1057 

1058 Note: This only updates the configuration. Run 'gco stacks deploy-all -y' 

1059 to apply changes. Existing Aurora clusters will be deleted unless 

1060 deletion protection is enabled. 

1061 

1062 Examples: 

1063 gco stacks aurora disable 

1064 """ 

1065 from ..stacks import update_aurora_config 

1066 

1067 formatter = get_output_formatter(config) 

1068 

1069 if not yes: 1069 ↛ 1070line 1069 didn't jump to line 1070 because the condition on line 1069 was never true

1070 formatter.print_warning("This will disable Aurora pgvector.") 

1071 formatter.print_warning( 

1072 "Existing Aurora clusters will be deleted on next deploy " 

1073 "(unless deletion protection is enabled)." 

1074 ) 

1075 click.confirm("Are you sure?", abort=True) 

1076 

1077 try: 

1078 update_aurora_config({"enabled": False}) 

1079 formatter.print_success("Aurora pgvector disabled in cdk.json") 

1080 formatter.print_info("Run 'gco stacks deploy-all -y' to apply changes") 

1081 

1082 except Exception as e: 

1083 formatter.print_error(f"Failed to disable Aurora: {e}") 

1084 sys.exit(1)