Coverage for gco/stacks/api_gateway_global_stack.py: 99%
129 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-15 15:07 +0000
1"""
2Global API Gateway stack - Single authenticated entry point for all regions.
4This stack creates the centralized API Gateway that serves as the authenticated
5entry point for all GCO API requests. It provides:
6- Edge-optimized endpoint with CloudFront for global edge caching and DDoS protection
7- IAM authentication (AWS SigV4) for all requests
8- Lambda proxy that adds secret header for backend validation
9- Secrets Manager secret with automatic rotation for request authentication
10- Multi-region replication for the auth secret
11- CloudWatch logging for audit and debugging
13Security Flow:
14 1. Client signs request with AWS credentials (SigV4)
15 2. CloudFront edge location receives request (managed by AWS)
16 3. API Gateway validates IAM permissions
17 4. Lambda proxy retrieves secret from Secrets Manager
18 5. Lambda adds X-GCO-Auth-Token header
19 6. Request forwarded to Global Accelerator
20 7. Backend services validate the secret header
22Secret Rotation:
23 The auth token is automatically rotated daily. During rotation:
24 - A new token is generated and stored as AWSPENDING
25 - Backend services accept both AWSCURRENT and AWSPENDING tokens
26 - After validation, AWSPENDING becomes AWSCURRENT
27 - Multi-region replication ensures all regions receive the new token
29This ensures all traffic goes through the authenticated path and prevents
30direct access to the Global Accelerator or regional ALBs.
31"""
33import json
34from dataclasses import dataclass
35from typing import Any
37from aws_cdk import (
38 CfnOutput,
39 Duration,
40 Fn,
41 RemovalPolicy,
42 Stack,
43)
44from aws_cdk import aws_apigateway as apigateway
45from aws_cdk import aws_cognito as cognito
46from aws_cdk import aws_iam as iam
47from aws_cdk import aws_lambda as lambda_
48from aws_cdk import aws_logs as logs
49from aws_cdk import aws_secretsmanager as secretsmanager
50from aws_cdk import aws_wafv2 as wafv2
51from constructs import Construct
53from gco.stacks.constants import LAMBDA_PYTHON_RUNTIME
55# <pyflowchart-code-diagram> BEGIN - auto-inserted, do not edit
56# Flowchart(s) generated from this file:
57# * ``GCOApiGatewayGlobalStack.__init__`` -> ``diagrams/code_diagrams/gco/stacks/api_gateway_global_stack.GCOApiGatewayGlobalStack___init__.html``
58# (PNG: ``diagrams/code_diagrams/gco/stacks/api_gateway_global_stack.GCOApiGatewayGlobalStack___init__.png``)
59# Regenerate with ``python diagrams/code_diagrams/generate.py``.
60# <pyflowchart-code-diagram> END
63@dataclass(frozen=True)
64class AnalyticsApiConfig:
65 """Configuration handed from ``GCOAnalyticsStack`` to ``GCOApiGatewayGlobalStack``.
67 When ``GCOApiGatewayGlobalStack`` is constructed (or mutated via
68 :meth:`GCOApiGatewayGlobalStack.set_analytics_config`) with a non-``None``
69 instance of this dataclass, the stack wires a Cognito-authorized
70 ``/studio/*`` route tree onto the existing REST API. When the value is
71 ``None``, the stack is behaviorally identical to its pre-analytics shape
72 — no ``/studio/*`` resources, no Cognito authorizer, no additional
73 ``CfnOutput`` entries.
75 ``frozen=True`` makes the dataclass hashable and immutable so a single
76 config object can be safely shared across constructs without the risk
77 of accidental mutation after the synthesized template references its
78 fields.
80 Attributes:
81 user_pool_arn: Full ARN of the Cognito user pool that authenticates
82 Studio logins. Shape:
83 ``arn:aws:cognito-idp:<region>:<account>:userpool/<pool-id>``.
84 user_pool_client_id: Client id of the Studio user-pool client
85 (SRP auth). Used by the CLI's ``gco analytics studio login``
86 flow and surfaced to API Gateway outputs for discoverability.
87 presigned_url_lambda: The ``analytics-presigned-url`` Lambda
88 function created by ``GCOAnalyticsStack._create_presigned_url_lambda``.
89 Consumed by the ``/studio/login`` ``LambdaIntegration``.
90 studio_domain_name: SageMaker Studio domain name. Carried through
91 as context for the Lambda integration; the Lambda itself also
92 reads this value from its ``STUDIO_DOMAIN_NAME`` environment
93 variable set by the analytics stack.
94 callback_url: Concrete OAuth redirect target
95 (``https://<api>/prod/studio/callback``) used when the
96 Cognito hosted UI is enabled. The ``/studio/callback`` route
97 is wired as a stub here so the URL is reachable immediately
98 after deploy.
99 """
101 user_pool_arn: str
102 user_pool_client_id: str
103 presigned_url_lambda: lambda_.IFunction
104 studio_domain_name: str
105 callback_url: str
108class GCOApiGatewayGlobalStack(Stack):
109 """
110 Global API Gateway with IAM authentication.
112 This stack creates the single authenticated entry point for all GCO
113 API requests. All requests must be signed with AWS credentials.
115 Attributes:
116 secret: Secrets Manager secret for backend validation
117 proxy_lambda: Lambda function that proxies requests to Global Accelerator
118 aggregator_lambda: Lambda function for cross-region aggregation
119 api: REST API with IAM authentication
120 """
122 def __init__(
123 self,
124 scope: Construct,
125 construct_id: str,
126 global_accelerator_dns: str,
127 regional_endpoints: dict[str, str] | None = None,
128 analytics_config: AnalyticsApiConfig | None = None,
129 **kwargs: Any,
130 ) -> None:
131 super().__init__(scope, construct_id, **kwargs)
133 self.ga_dns = global_accelerator_dns
134 self.regional_endpoints = regional_endpoints or {}
135 # When analytics is disabled (the default) this stays ``None`` and
136 # the stack synthesizes exactly as it did pre-analytics. When
137 # non-``None``, ``_wire_studio_routes`` is invoked at the end of
138 # the constructor, after the IAM-authorized ``/api/v1/*`` and
139 # ``/inference/*`` methods are already attached — so Cognito and
140 # IAM authorization coexist at the method level rather than at
141 # the API level.
142 self.analytics_config: AnalyticsApiConfig | None = analytics_config
144 # Create secret token for ALB validation
145 self.secret = self._create_secret()
147 # Create proxy Lambda
148 self.proxy_lambda = self._create_proxy_lambda()
150 # Create cross-region aggregator Lambda
151 self.aggregator_lambda = self._create_aggregator_lambda()
153 # Create API Gateway
154 self.api = self._create_api_gateway()
156 # Create WAF WebACL and associate with API Gateway
157 self._create_waf()
159 # Export API endpoint
160 self._create_outputs()
162 # Wire /studio/* routes when analytics is explicitly enabled at
163 # construction time. Most deployments take the mutator path
164 # (:meth:`set_analytics_config`) because ``GCOAnalyticsStack`` is
165 # built after this stack in ``app.py``.
166 if self.analytics_config is not None: 166 ↛ 167line 166 didn't jump to line 167 because the condition on line 166 was never true
167 self._wire_studio_routes()
169 # Apply cdk-nag suppressions
170 self._apply_nag_suppressions()
172 def _apply_nag_suppressions(self) -> None:
173 """Apply cdk-nag suppressions for this stack."""
174 from gco.stacks.nag_suppressions import apply_all_suppressions
176 # API Gateway stack needs global_region for SSM parameter access suppressions
177 # The aggregator Lambda reads ALB hostnames from SSM in the global region
178 apply_all_suppressions(self, stack_type="api_gateway", global_region=self.region)
180 def _create_secret(self) -> secretsmanager.Secret:
181 """Create secret token for validating requests from API Gateway.
183 The secret is configured with:
184 - Automatic rotation every 30 days
185 - A rotation Lambda that generates new secure random tokens
186 - Multi-region replication can be enabled via add_replica_region()
187 """
188 secret = secretsmanager.Secret(
189 self,
190 "GCOAuthSecret",
191 secret_name="gco/api-gateway-auth-token", # nosec B106 — this is the secret path, not a password
192 description="Secret token for validating requests from API Gateway to ALB (auto-rotated)",
193 generate_secret_string=secretsmanager.SecretStringGenerator(
194 secret_string_template=json.dumps({"description": "GCO API Gateway auth token"}),
195 generate_string_key="token",
196 exclude_punctuation=True,
197 password_length=64,
198 ),
199 removal_policy=RemovalPolicy.DESTROY,
200 )
202 # Create rotation Lambda and store as instance attribute for monitoring
203 self.rotation_lambda = self._create_rotation_lambda(secret)
205 # Enable automatic rotation (daily for enhanced security)
206 secret.add_rotation_schedule(
207 "RotationSchedule",
208 automatically_after=Duration.days(1),
209 rotation_lambda=self.rotation_lambda,
210 )
212 return secret
214 def _create_rotation_lambda(self, secret: secretsmanager.Secret) -> lambda_.Function:
215 """Create Lambda function for secret rotation.
217 This Lambda implements the 4-step Secrets Manager rotation protocol:
218 1. createSecret - Generate new random token
219 2. setSecret - No-op (no external system)
220 3. testSecret - Validate token structure
221 4. finishSecret - Move AWSPENDING to AWSCURRENT
222 """
223 # Create IAM role for rotation Lambda
224 rotation_role = iam.Role(
225 self,
226 "RotationLambdaRole",
227 assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
228 managed_policies=[
229 iam.ManagedPolicy.from_aws_managed_policy_name(
230 "service-role/AWSLambdaBasicExecutionRole"
231 )
232 ],
233 )
235 # Grant permissions to manage the secret
236 secret.grant_read(rotation_role)
237 secret.grant_write(rotation_role)
239 # Additional permissions for rotation
240 rotation_role.add_to_policy(
241 iam.PolicyStatement(
242 actions=[
243 "secretsmanager:DescribeSecret",
244 "secretsmanager:GetSecretValue",
245 "secretsmanager:PutSecretValue",
246 "secretsmanager:UpdateSecretVersionStage",
247 ],
248 resources=[secret.secret_arn],
249 )
250 )
252 # Create log group for rotation Lambda
253 rotation_log_group = logs.LogGroup(
254 self,
255 "RotationLambdaLogGroup",
256 retention=logs.RetentionDays.ONE_MONTH,
257 removal_policy=RemovalPolicy.DESTROY,
258 )
260 # Create rotation Lambda
261 rotation_lambda = lambda_.Function(
262 self,
263 "SecretRotationFunction",
264 runtime=getattr(lambda_.Runtime, LAMBDA_PYTHON_RUNTIME),
265 handler="handler.lambda_handler",
266 code=lambda_.Code.from_asset("lambda/secret-rotation"),
267 timeout=Duration.seconds(30),
268 memory_size=128,
269 role=rotation_role,
270 log_group=rotation_log_group,
271 description="Rotates the GCO API Gateway auth token",
272 tracing=lambda_.Tracing.ACTIVE,
273 )
275 # Grant Secrets Manager permission to invoke the rotation Lambda
276 rotation_lambda.grant_invoke(iam.ServicePrincipal("secretsmanager.amazonaws.com"))
278 # cdk-nag suppression: CDK's grant methods generate Resource: * for
279 # the rotation function's execution role.
280 from cdk_nag import NagSuppressions
282 NagSuppressions.add_resource_suppressions(
283 rotation_role,
284 [
285 {
286 "id": "AwsSolutions-IAM5",
287 "reason": (
288 "The secret rotation Lambda needs secretsmanager:GetSecretValue "
289 "and PutSecretValue on the rotation secret. CDK's grant methods "
290 "generate Resource: * for the rotation function's execution role "
291 "because the secret ARN includes a random suffix not known at "
292 "synth time."
293 ),
294 "appliesTo": ["Resource::*"],
295 },
296 ],
297 apply_to_children=True,
298 )
300 return rotation_lambda
302 def _create_proxy_lambda(self) -> lambda_.Function:
303 """Create Lambda function that proxies requests to Global Accelerator."""
305 # Create IAM role
306 lambda_role = iam.Role(
307 self,
308 "ProxyLambdaRole",
309 assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
310 managed_policies=[
311 iam.ManagedPolicy.from_aws_managed_policy_name(
312 "service-role/AWSLambdaBasicExecutionRole"
313 )
314 ],
315 )
317 # Grant read access to secret
318 self.secret.grant_read(lambda_role)
320 # Create log group for Lambda
321 proxy_lambda_log_group = logs.LogGroup(
322 self,
323 "ProxyLambdaLogGroup",
324 retention=logs.RetentionDays.ONE_WEEK,
325 removal_policy=RemovalPolicy.DESTROY,
326 )
328 # Create Lambda function
329 proxy_lambda = lambda_.Function(
330 self,
331 "ApiGatewayProxyFunction",
332 runtime=getattr(lambda_.Runtime, LAMBDA_PYTHON_RUNTIME),
333 handler="handler.lambda_handler",
334 code=lambda_.Code.from_asset("lambda/api-gateway-proxy"),
335 timeout=Duration.seconds(29),
336 memory_size=256,
337 role=lambda_role,
338 environment={
339 "GLOBAL_ACCELERATOR_ENDPOINT": self.ga_dns,
340 "SECRET_ARN": self.secret.secret_arn,
341 },
342 log_group=proxy_lambda_log_group,
343 tracing=lambda_.Tracing.ACTIVE,
344 )
346 # cdk-nag suppression: the proxy Lambda's execution role needs
347 # broad network access for VPC Lambda execution.
348 from cdk_nag import NagSuppressions
350 NagSuppressions.add_resource_suppressions(
351 lambda_role,
352 [
353 {
354 "id": "AwsSolutions-IAM5",
355 "reason": (
356 "The API Gateway proxy Lambda forwards requests to regional ALBs. "
357 "Its execution role needs broad network access "
358 "(ec2:CreateNetworkInterface, etc.) for VPC Lambda execution. "
359 "These APIs do not support resource-level scoping."
360 ),
361 "appliesTo": ["Resource::*"],
362 },
363 ],
364 apply_to_children=True,
365 )
367 return proxy_lambda
369 def _create_aggregator_lambda(self) -> lambda_.Function:
370 """Create Lambda function for cross-region aggregation.
372 This Lambda queries all regional ALBs in parallel and aggregates
373 the results for global views of jobs, health, and metrics.
374 """
375 # Create IAM role
376 aggregator_role = iam.Role(
377 self,
378 "AggregatorLambdaRole",
379 assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
380 managed_policies=[
381 iam.ManagedPolicy.from_aws_managed_policy_name(
382 "service-role/AWSLambdaBasicExecutionRole"
383 )
384 ],
385 )
387 # Grant read access to secret
388 self.secret.grant_read(aggregator_role)
390 # Grant SSM read access for discovering regional endpoints
391 aggregator_role.add_to_policy(
392 iam.PolicyStatement(
393 effect=iam.Effect.ALLOW,
394 actions=["ssm:GetParametersByPath", "ssm:GetParameter"],
395 resources=[f"arn:aws:ssm:{self.region}:{self.account}:parameter/gco/*"],
396 )
397 )
399 # Create log group for Lambda
400 aggregator_log_group = logs.LogGroup(
401 self,
402 "AggregatorLambdaLogGroup",
403 retention=logs.RetentionDays.ONE_WEEK,
404 removal_policy=RemovalPolicy.DESTROY,
405 )
407 # Create Lambda function
408 aggregator_lambda = lambda_.Function(
409 self,
410 "CrossRegionAggregatorFunction",
411 runtime=getattr(lambda_.Runtime, LAMBDA_PYTHON_RUNTIME),
412 handler="handler.lambda_handler",
413 code=lambda_.Code.from_asset("lambda/cross-region-aggregator"),
414 timeout=Duration.seconds(29),
415 memory_size=512,
416 role=aggregator_role,
417 environment={
418 "SECRET_ARN": self.secret.secret_arn,
419 "PROJECT_NAME": "gco",
420 "GLOBAL_REGION": self.region,
421 },
422 log_group=aggregator_log_group,
423 description="Aggregates data from all regional GCO clusters",
424 tracing=lambda_.Tracing.ACTIVE,
425 )
427 # cdk-nag suppression: the aggregator Lambda reads SSM parameters
428 # and invokes regional endpoints.
429 from cdk_nag import NagSuppressions
431 NagSuppressions.add_resource_suppressions(
432 aggregator_role,
433 [
434 {
435 "id": "AwsSolutions-IAM5",
436 "reason": (
437 "The cross-region aggregator Lambda reads SSM parameters and "
438 "invokes regional endpoints. Its execution role needs "
439 "ssm:GetParameter on the project's parameter namespace and "
440 "secretsmanager:GetSecretValue for the auth token."
441 ),
442 "appliesTo": ["Resource::*"],
443 },
444 ],
445 apply_to_children=True,
446 )
448 return aggregator_lambda
450 def _create_api_gateway(self) -> apigateway.RestApi:
451 """Create API Gateway with IAM authentication."""
453 # Create CloudWatch log group
454 api_log_group = logs.LogGroup(
455 self,
456 "ApiGatewayLogs",
457 log_group_name="/aws/apigateway/gco-global",
458 retention=logs.RetentionDays.ONE_MONTH,
459 removal_policy=RemovalPolicy.DESTROY,
460 )
462 # Create REST API with edge-optimized endpoint
463 # Edge-optimized uses CloudFront for global edge caching and DDoS protection
464 api = apigateway.RestApi(
465 self,
466 "GCOGlobalApi",
467 rest_api_name="gco-global-api",
468 description="Global authenticated API for GCO (Global Capacity Orchestrator on AWS) (edge-optimized)",
469 endpoint_types=[apigateway.EndpointType.EDGE],
470 deploy=True,
471 deploy_options=apigateway.StageOptions(
472 stage_name="prod",
473 throttling_rate_limit=1000,
474 throttling_burst_limit=2000,
475 logging_level=apigateway.MethodLoggingLevel.INFO,
476 data_trace_enabled=True,
477 metrics_enabled=True,
478 tracing_enabled=True, # Enable X-Ray tracing for request analysis
479 access_log_destination=apigateway.LogGroupLogDestination(api_log_group),
480 access_log_format=apigateway.AccessLogFormat.json_with_standard_fields(
481 caller=True,
482 http_method=True,
483 ip=True,
484 protocol=True,
485 request_time=True,
486 resource_path=True,
487 response_length=True,
488 status=True,
489 user=True,
490 ),
491 ),
492 cloud_watch_role=True,
493 )
495 # Add resource policy to restrict to account
496 api.add_to_resource_policy(
497 iam.PolicyStatement(
498 effect=iam.Effect.ALLOW,
499 principals=[iam.AnyPrincipal()],
500 actions=["execute-api:Invoke"],
501 resources=["execute-api:/*"],
502 conditions={"StringEquals": {"aws:PrincipalAccount": self.account}},
503 )
504 )
506 # Allow Cognito-authorized requests on /studio/* paths. The Cognito
507 # authorizer on the method handles authentication; the resource
508 # policy just needs to not block the request before it reaches the
509 # authorizer. Cognito tokens don't carry aws:PrincipalAccount so
510 # the account-scoped statement above would reject them.
511 api.add_to_resource_policy(
512 iam.PolicyStatement(
513 effect=iam.Effect.ALLOW,
514 principals=[iam.AnyPrincipal()],
515 actions=["execute-api:Invoke"],
516 resources=["execute-api:/*/GET/studio/*"],
517 )
518 )
520 # Create Lambda integration
521 lambda_integration = apigateway.LambdaIntegration(
522 self.proxy_lambda, proxy=True, timeout=Duration.seconds(29)
523 )
525 # Create /api resource
526 api_resource = api.root.add_resource("api")
527 v1_resource = api_resource.add_resource("v1")
529 # Add proxy resource to catch all paths
530 proxy_resource = v1_resource.add_resource("{proxy+}")
532 # Add methods with IAM authentication
533 for method in ["GET", "POST", "PUT", "DELETE", "PATCH"]:
534 proxy_resource.add_method(
535 method,
536 lambda_integration,
537 authorization_type=apigateway.AuthorizationType.IAM,
538 method_responses=[
539 apigateway.MethodResponse(status_code="200"),
540 apigateway.MethodResponse(status_code="400"),
541 apigateway.MethodResponse(status_code="403"),
542 apigateway.MethodResponse(status_code="500"),
543 ],
544 )
546 # Create global aggregation routes
547 self._create_global_routes(api, v1_resource)
549 # Create inference proxy route
550 # /inference/{proxy+} → proxy Lambda → GA → ALB → K8s Ingress
551 self._create_inference_routes(api, lambda_integration)
553 return api
555 def _create_global_routes(
556 self, api: apigateway.RestApi, v1_resource: apigateway.Resource
557 ) -> None:
558 """Create routes for cross-region aggregation endpoints.
560 Routes:
561 GET /api/v1/global/jobs - List jobs across all regions
562 DELETE /api/v1/global/jobs - Bulk delete across all regions
563 GET /api/v1/global/health - Health status across all regions
564 GET /api/v1/global/status - Cluster status across all regions
565 """
566 # Create Lambda integration for aggregator
567 aggregator_integration = apigateway.LambdaIntegration(
568 self.aggregator_lambda, proxy=True, timeout=Duration.seconds(29)
569 )
571 # Create /global resource
572 global_resource = v1_resource.add_resource("global")
574 # /global/jobs
575 global_jobs = global_resource.add_resource("jobs")
576 for method in ["GET", "DELETE"]:
577 global_jobs.add_method(
578 method,
579 aggregator_integration,
580 authorization_type=apigateway.AuthorizationType.IAM,
581 method_responses=[
582 apigateway.MethodResponse(status_code="200"),
583 apigateway.MethodResponse(status_code="400"),
584 apigateway.MethodResponse(status_code="500"),
585 ],
586 )
588 # /global/health
589 global_health = global_resource.add_resource("health")
590 global_health.add_method(
591 "GET",
592 aggregator_integration,
593 authorization_type=apigateway.AuthorizationType.IAM,
594 method_responses=[
595 apigateway.MethodResponse(status_code="200"),
596 apigateway.MethodResponse(status_code="500"),
597 ],
598 )
600 # /global/status
601 global_status = global_resource.add_resource("status")
602 global_status.add_method(
603 "GET",
604 aggregator_integration,
605 authorization_type=apigateway.AuthorizationType.IAM,
606 method_responses=[
607 apigateway.MethodResponse(status_code="200"),
608 apigateway.MethodResponse(status_code="500"),
609 ],
610 )
612 def _create_inference_routes(
613 self,
614 api: apigateway.RestApi,
615 lambda_integration: apigateway.LambdaIntegration,
616 ) -> None:
617 """Create proxy route for inference endpoints.
619 Routes:
620 ANY /inference/{proxy+} → proxy Lambda → GA → ALB → K8s Ingress
622 This allows authenticated inference requests to flow through the
623 API Gateway with IAM auth, then get proxied to the regional ALB
624 where K8s Ingress routes them to the correct inference Service.
625 """
626 inference_resource = api.root.add_resource("inference")
627 inference_proxy = inference_resource.add_resource("{proxy+}")
629 for method in ["GET", "POST", "PUT", "DELETE", "PATCH"]:
630 inference_proxy.add_method(
631 method,
632 lambda_integration,
633 authorization_type=apigateway.AuthorizationType.IAM,
634 method_responses=[
635 apigateway.MethodResponse(status_code="200"),
636 apigateway.MethodResponse(status_code="400"),
637 apigateway.MethodResponse(status_code="404"),
638 apigateway.MethodResponse(status_code="500"),
639 apigateway.MethodResponse(status_code="502"),
640 ],
641 )
643 def _create_outputs(self) -> None:
644 """Export API Gateway endpoint."""
646 CfnOutput(
647 self,
648 "ApiEndpoint",
649 value=self.api.url,
650 description="Global API Gateway endpoint (IAM authenticated)",
651 export_name="gco-global-api-endpoint",
652 )
654 CfnOutput(
655 self,
656 "SecretArn",
657 value=self.secret.secret_arn,
658 description="Secret ARN for ALB validation",
659 export_name="gco-auth-secret-arn",
660 )
662 def set_analytics_config(self, config: AnalyticsApiConfig) -> None:
663 """Attach a post-construction ``AnalyticsApiConfig`` and wire ``/studio/*`` routes.
665 ``GCOAnalyticsStack`` is created *after* ``GCOApiGatewayGlobalStack``
666 in ``app.py`` (the regional stacks already declare a dependency on
667 the API gateway stack, so re-ordering the two global stacks would
668 ripple through the entire stack graph). This mutator lets
669 ``app.py`` defer attaching the analytics integration until after
670 both stacks exist, without changing the constructor contract or
671 the existing cross-stack dependency wiring.
673 MUST be called **at most once**, and only before stack synthesis
674 finishes. Calling it twice raises ``RuntimeError`` so the caller
675 cannot accidentally double-wire the Cognito authorizer (which
676 would produce two authorizers with overlapping identity sources
677 on the same REST API).
679 Args:
680 config: The ``AnalyticsApiConfig`` built from the
681 ``GCOAnalyticsStack`` attributes. Must be non-``None`` —
682 pass ``None`` at construction time instead if analytics
683 is disabled.
685 Raises:
686 RuntimeError: if the stack already has an attached
687 ``analytics_config`` (from either constructor kwarg or
688 a prior ``set_analytics_config`` call).
689 """
690 if self.analytics_config is not None:
691 raise RuntimeError(
692 "GCOApiGatewayGlobalStack.set_analytics_config may only be called "
693 "once. The stack already has an analytics_config attached."
694 )
695 self.analytics_config = config
696 self._wire_studio_routes()
698 def _wire_studio_routes(self) -> None:
699 """Attach the Cognito-authorized ``/studio/*`` route tree.
701 Called from ``__init__`` when an ``AnalyticsApiConfig`` is passed
702 to the constructor, or from :meth:`set_analytics_config` when the
703 config is attached post-construction. Safe to skip entirely when
704 analytics is disabled — the caller is responsible for gating on
705 ``self.analytics_config is not None``.
707 Wiring order matters: this runs *after* ``_create_api_gateway``
708 has already attached the IAM-authorized ``/api/v1/*`` and
709 ``/inference/*`` methods. The Cognito authorizer coexists with
710 those methods at the method level (not at the REST API level),
711 so the existing IAM-authorized methods are untouched — see the
712 coexistence assertion in
713 ``tests/test_api_gateway_analytics_config.py``.
715 Resources added:
717 * ``CognitoUserPoolsAuthorizer`` named ``StudioCognitoAuthorizer``
718 referencing ``UserPool.from_user_pool_arn(...)``.
719 * ``RequestValidator`` with ``validate_request_parameters=True``
720 attached to the ``/studio/login`` method via
721 ``request_validator_options``.
722 * ``/studio`` + ``/studio/login`` + ``/studio/callback``
723 resources.
724 * ``GET /studio/login`` — Cognito-authorized,
725 ``LambdaIntegration(presigned_url_lambda, proxy=True,
726 timeout=Duration.seconds(29))``.
727 * ``GET /studio/callback`` — unauthenticated stub MOCK
728 integration returning a 200 with an empty body; serves as the
729 OAuth redirect landing page when Cognito hosted UI is enabled.
730 * ``CfnOutput`` ``CognitoAuthorizerId`` with the authorizer's
731 ``authorizer_id``.
732 * ``CfnOutput`` ``StudioLoginUrl`` — concrete
733 ``https://<api-id>.execute-api.<region>.amazonaws.com/prod/studio/login``
734 constructed at deploy time via ``Fn.sub`` because the REST API
735 id is a deploy-time token.
736 """
737 assert self.analytics_config is not None, (
738 "_wire_studio_routes called without an AnalyticsApiConfig attached."
739 )
740 analytics_config = self.analytics_config
742 # Build the authorizer against the Cognito user pool that owns
743 # Studio identities. ``from_user_pool_arn`` is an import — no
744 # new Cognito resources are created in this stack.
745 user_pool = cognito.UserPool.from_user_pool_arn(
746 self,
747 "StudioUserPoolRef",
748 analytics_config.user_pool_arn,
749 )
750 authorizer = apigateway.CognitoUserPoolsAuthorizer(
751 self,
752 "StudioCognitoAuthorizer",
753 cognito_user_pools=[user_pool],
754 authorizer_name="gco-studio-cognito-authorizer",
755 )
756 # The authorizer attaches itself to the RestApi automatically
757 # the first time it is passed into ``add_method``. No explicit
758 # attach call is needed (and the CDK API does not expose a
759 # public one for ``CognitoUserPoolsAuthorizer``).
761 # Request validator — validates query/path parameters are
762 # present before the Lambda is invoked (the Cognito ID token
763 # itself is validated by the authorizer, not this validator).
764 studio_request_validator = apigateway.RequestValidator(
765 self,
766 "StudioRequestValidator",
767 rest_api=self.api,
768 request_validator_name="gco-studio-request-validator",
769 validate_request_parameters=True,
770 )
772 # /studio → /studio/login + /studio/callback
773 studio_resource = self.api.root.add_resource("studio")
774 login_resource = studio_resource.add_resource("login")
775 callback_resource = studio_resource.add_resource("callback")
777 # /studio/login — Cognito-authorized, proxies to the
778 # presigned-URL Lambda. 29-second integration timeout matches
779 # the Lambda timeout so the Lambda is the one that times out
780 # on slow SageMaker API calls rather than API Gateway.
781 login_integration = apigateway.LambdaIntegration(
782 analytics_config.presigned_url_lambda,
783 proxy=True,
784 timeout=Duration.seconds(29),
785 )
786 login_resource.add_method(
787 "GET",
788 login_integration,
789 authorization_type=apigateway.AuthorizationType.COGNITO,
790 authorizer=authorizer,
791 request_validator=studio_request_validator,
792 method_responses=[
793 apigateway.MethodResponse(status_code="200"),
794 apigateway.MethodResponse(status_code="400"),
795 apigateway.MethodResponse(status_code="401"),
796 apigateway.MethodResponse(status_code="404"),
797 apigateway.MethodResponse(status_code="500"),
798 ],
799 )
801 # /studio/callback — stub 200 OK landing page for the Cognito
802 # hosted UI OAuth redirect flow. Unauthenticated MOCK
803 # integration so the page is reachable without a signed
804 # request. The body is intentionally empty — the hosted UI
805 # consumes the query-string code parameter, not the response
806 # body.
807 callback_integration = apigateway.MockIntegration(
808 integration_responses=[
809 apigateway.IntegrationResponse(
810 status_code="200",
811 response_templates={"application/json": ""},
812 ),
813 ],
814 request_templates={"application/json": '{"statusCode": 200}'},
815 )
816 callback_method = callback_resource.add_method(
817 "GET",
818 callback_integration,
819 authorization_type=apigateway.AuthorizationType.NONE,
820 method_responses=[
821 apigateway.MethodResponse(status_code="200"),
822 ],
823 )
825 # /studio/callback is intentionally unauthenticated — it's the
826 # Cognito hosted-UI OAuth redirect landing page where the
827 # authorization ``code`` query-string parameter is consumed by
828 # the client-side JavaScript. Adding IAM or Cognito authorization
829 # here would break the OAuth flow because the browser redirect
830 # from Cognito does not carry SigV4 or an id-token header.
831 from cdk_nag import NagSuppressions as _CallbackNagSuppressions
833 _CallbackNagSuppressions.add_resource_suppressions(
834 callback_method,
835 [
836 {
837 "id": "AwsSolutions-APIG4",
838 "reason": (
839 "/studio/callback is the Cognito hosted-UI OAuth "
840 "redirect landing page. The browser redirect from "
841 "Cognito carries the authorization code as a "
842 "query-string parameter; it does NOT carry SigV4 "
843 "or an id-token header. Adding IAM or Cognito "
844 "authorization here would break the OAuth flow. "
845 "The route is a MOCK integration that returns an "
846 "empty 200 body; it does not expose any backend "
847 "resources."
848 ),
849 },
850 ],
851 )
853 # CfnOutputs — the CLI reads these for auto-discovery.
854 CfnOutput(
855 self,
856 "CognitoAuthorizerId",
857 value=authorizer.authorizer_id,
858 description="API Gateway authorizer id for the Studio Cognito authorizer",
859 export_name="gco-studio-cognito-authorizer-id",
860 )
861 # ``self.api.url`` already resolves to the deploy-time URL, but
862 # it points at the stage root. Use ``Fn.sub`` to append the
863 # concrete ``studio/login`` suffix so operators get a copy-
864 # pastable login URL in the stack outputs.
865 studio_login_url = Fn.sub(
866 "https://${ApiId}.execute-api.${AWS::Region}.${AWS::URLSuffix}/${Stage}/studio/login",
867 {
868 "ApiId": self.api.rest_api_id,
869 "Stage": self.api.deployment_stage.stage_name,
870 },
871 )
872 CfnOutput(
873 self,
874 "StudioLoginUrl",
875 value=studio_login_url,
876 description="Concrete URL for the /studio/login route (Cognito-authenticated)",
877 export_name="gco-studio-login-url",
878 )
880 def _create_waf(self) -> None:
881 """Create WAF WebACL with AWS Managed Rules for API Gateway protection.
883 This implements a comprehensive WAF setup using AWS Managed Rule Groups
884 for protection against:
885 - Common web exploits (OWASP Top 10)
886 - Known bad inputs
887 - SQL injection
888 - Linux-specific attacks
889 - IP reputation threats
890 - Anonymous IP addresses (Tor, VPNs, proxies)
892 The WebACL is associated with the API Gateway stage for edge protection.
893 Logging is enabled to CloudWatch Logs for compliance (HIPAA, NIST, PCI-DSS).
894 """
895 # Create CloudWatch Log Group for WAF logs
896 # WAF requires log group name to start with "aws-waf-logs-"
897 waf_log_group = logs.LogGroup(
898 self,
899 "WafLogGroup",
900 log_group_name="aws-waf-logs-gco-api-gateway",
901 retention=logs.RetentionDays.ONE_MONTH,
902 removal_policy=RemovalPolicy.DESTROY,
903 )
905 # Create WAF WebACL with AWS Managed Rules
906 # Note: For API Gateway (even edge-optimized), use REGIONAL scope
907 # The WAF is associated with the API Gateway stage, not CloudFront directly
908 #
909 # Rule priority ordering:
910 # 0 -> PerIPRateLimit (evaluated FIRST so abusive IPs are blocked
911 # before expensive managed rule groups run)
912 # 1-6 -> AWS Managed Rule Groups
913 waf_config = self.node.try_get_context("waf") or {}
914 per_ip_rate_limit = int(waf_config.get("per_ip_rate_limit", 100))
916 self.web_acl = wafv2.CfnWebACL(
917 self,
918 "GCOWebAcl",
919 name="gco-api-gateway-waf",
920 description="WAF WebACL for GCO API Gateway with AWS Managed Rules",
921 scope="REGIONAL", # REGIONAL for API Gateway association
922 default_action=wafv2.CfnWebACL.DefaultActionProperty(allow={}),
923 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
924 cloud_watch_metrics_enabled=True,
925 metric_name="GCOApiGatewayWaf",
926 sampled_requests_enabled=True,
927 ),
928 rules=[
929 # Rule 0: Per-source-IP rate limiting (HIGHEST PRIORITY).
930 # Evaluated before any AWS Managed Rule Group so that abusive
931 # IPs are blocked immediately without consuming WCUs on the
932 # heavier managed rule groups. Aggregates requests per source
933 # IP over a rolling 5-minute window (AWS WAF fixed behavior
934 # for rate-based statements).
935 #
936 # The limit is configurable via `cdk.json` context
937 # `waf.per_ip_rate_limit` (default: 100 requests / 5 min).
938 wafv2.CfnWebACL.RuleProperty(
939 name="PerIPRateLimit",
940 priority=0,
941 action=wafv2.CfnWebACL.RuleActionProperty(block={}),
942 statement=wafv2.CfnWebACL.StatementProperty(
943 rate_based_statement=wafv2.CfnWebACL.RateBasedStatementProperty(
944 limit=per_ip_rate_limit,
945 aggregate_key_type="IP",
946 )
947 ),
948 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
949 cloud_watch_metrics_enabled=True,
950 metric_name="PerIPRateLimit",
951 sampled_requests_enabled=True,
952 ),
953 ),
954 # Rule 1: AWS Managed Rules - Common Rule Set (OWASP Top 10)
955 wafv2.CfnWebACL.RuleProperty(
956 name="AWSManagedRulesCommonRuleSet",
957 priority=1,
958 override_action=wafv2.CfnWebACL.OverrideActionProperty(none={}),
959 statement=wafv2.CfnWebACL.StatementProperty(
960 managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
961 vendor_name="AWS",
962 name="AWSManagedRulesCommonRuleSet",
963 )
964 ),
965 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
966 cloud_watch_metrics_enabled=True,
967 metric_name="AWSManagedRulesCommonRuleSet",
968 sampled_requests_enabled=True,
969 ),
970 ),
971 # Rule 2: AWS Managed Rules - Known Bad Inputs
972 wafv2.CfnWebACL.RuleProperty(
973 name="AWSManagedRulesKnownBadInputsRuleSet",
974 priority=2,
975 override_action=wafv2.CfnWebACL.OverrideActionProperty(none={}),
976 statement=wafv2.CfnWebACL.StatementProperty(
977 managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
978 vendor_name="AWS",
979 name="AWSManagedRulesKnownBadInputsRuleSet",
980 )
981 ),
982 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
983 cloud_watch_metrics_enabled=True,
984 metric_name="AWSManagedRulesKnownBadInputsRuleSet",
985 sampled_requests_enabled=True,
986 ),
987 ),
988 # Rule 3: AWS Managed Rules - SQL Injection
989 wafv2.CfnWebACL.RuleProperty(
990 name="AWSManagedRulesSQLiRuleSet",
991 priority=3,
992 override_action=wafv2.CfnWebACL.OverrideActionProperty(none={}),
993 statement=wafv2.CfnWebACL.StatementProperty(
994 managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
995 vendor_name="AWS",
996 name="AWSManagedRulesSQLiRuleSet",
997 )
998 ),
999 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
1000 cloud_watch_metrics_enabled=True,
1001 metric_name="AWSManagedRulesSQLiRuleSet",
1002 sampled_requests_enabled=True,
1003 ),
1004 ),
1005 # Rule 4: AWS Managed Rules - Linux OS (protects against Linux-specific attacks)
1006 wafv2.CfnWebACL.RuleProperty(
1007 name="AWSManagedRulesLinuxRuleSet",
1008 priority=4,
1009 override_action=wafv2.CfnWebACL.OverrideActionProperty(none={}),
1010 statement=wafv2.CfnWebACL.StatementProperty(
1011 managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
1012 vendor_name="AWS",
1013 name="AWSManagedRulesLinuxRuleSet",
1014 )
1015 ),
1016 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
1017 cloud_watch_metrics_enabled=True,
1018 metric_name="AWSManagedRulesLinuxRuleSet",
1019 sampled_requests_enabled=True,
1020 ),
1021 ),
1022 # Rule 5: AWS Managed Rules - Amazon IP Reputation List
1023 wafv2.CfnWebACL.RuleProperty(
1024 name="AWSManagedRulesAmazonIpReputationList",
1025 priority=5,
1026 override_action=wafv2.CfnWebACL.OverrideActionProperty(none={}),
1027 statement=wafv2.CfnWebACL.StatementProperty(
1028 managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
1029 vendor_name="AWS",
1030 name="AWSManagedRulesAmazonIpReputationList",
1031 )
1032 ),
1033 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
1034 cloud_watch_metrics_enabled=True,
1035 metric_name="AWSManagedRulesAmazonIpReputationList",
1036 sampled_requests_enabled=True,
1037 ),
1038 ),
1039 # Rule 6: AWS Managed Rules - Anonymous IP List (blocks Tor, VPNs, proxies)
1040 wafv2.CfnWebACL.RuleProperty(
1041 name="AWSManagedRulesAnonymousIpList",
1042 priority=6,
1043 override_action=wafv2.CfnWebACL.OverrideActionProperty(none={}),
1044 statement=wafv2.CfnWebACL.StatementProperty(
1045 managed_rule_group_statement=wafv2.CfnWebACL.ManagedRuleGroupStatementProperty(
1046 vendor_name="AWS",
1047 name="AWSManagedRulesAnonymousIpList",
1048 )
1049 ),
1050 visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
1051 cloud_watch_metrics_enabled=True,
1052 metric_name="AWSManagedRulesAnonymousIpList",
1053 sampled_requests_enabled=True,
1054 ),
1055 ),
1056 ],
1057 )
1059 # Enable WAF logging to CloudWatch Logs
1060 # This is required for HIPAA, NIST 800-53, and PCI-DSS compliance
1061 wafv2.CfnLoggingConfiguration(
1062 self,
1063 "WafLoggingConfig",
1064 resource_arn=self.web_acl.attr_arn,
1065 log_destination_configs=[waf_log_group.log_group_arn],
1066 )
1068 # Associate WAF WebACL with API Gateway stage
1069 # For API Gateway, use the stage ARN format
1070 wafv2.CfnWebACLAssociation(
1071 self,
1072 "GCOWebAclAssociation",
1073 resource_arn=self.api.deployment_stage.stage_arn,
1074 web_acl_arn=self.web_acl.attr_arn,
1075 )
1077 # Output WAF WebACL ARN
1078 CfnOutput(
1079 self,
1080 "WebAclArn",
1081 value=self.web_acl.attr_arn,
1082 description="WAF WebACL ARN for API Gateway protection",
1083 export_name="gco-waf-webacl-arn",
1084 )