Skip to content

Commit

Permalink
LIME-1208 - ALign DL F.E scaling with HMRC KBV F.E with MinCapacity at 4
Browse files Browse the repository at this point in the history
	- MinCapacity 4 aimed at handling large inital burst traffic from 0% load
  • Loading branch information
smsgds committed Oct 21, 2024
1 parent 7084a0a commit 7e11ef1
Showing 1 changed file with 79 additions and 79 deletions.
158 changes: 79 additions & 79 deletions deploy/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Conditions:
- !Equals [!Ref Environment, production]
IsProduction: !Equals [!Ref Environment, production]
IsPerformance: !Or
- !Equals [!Ref Environment, dev]
- !Equals [!Ref Environment, build]
- !Equals [!Ref Environment, production]
UsePermissionsBoundary: !Not
Expand All @@ -69,7 +70,6 @@ Mappings:
dynatraceSecretArn: arn:aws:secretsmanager:eu-west-2:216552277552:secret:DynatraceNonProductionVariables
fargateCPUsize: "256"
fargateRAMsize: "512"
desiredTaskCount: 2
ga4Disabled: "false"
uaDisabled: "false"
languageToggleDisabled: "false"
Expand All @@ -78,7 +78,6 @@ Mappings:
dynatraceSecretArn: arn:aws:secretsmanager:eu-west-2:216552277552:secret:DynatraceNonProductionVariables
fargateCPUsize: "1024"
fargateRAMsize: "2048"
desiredTaskCount: 2
ga4Disabled: "false"
uaDisabled: "false"
languageToggleDisabled: "false"
Expand All @@ -87,7 +86,6 @@ Mappings:
dynatraceSecretArn: arn:aws:secretsmanager:eu-west-2:216552277552:secret:DynatraceNonProductionVariables
fargateCPUsize: "256"
fargateRAMsize: "512"
desiredTaskCount: 2
ga4Disabled: "false"
uaDisabled: "false"
languageToggleDisabled: "false"
Expand All @@ -96,7 +94,6 @@ Mappings:
dynatraceSecretArn: arn:aws:secretsmanager:eu-west-2:216552277552:secret:DynatraceNonProductionVariables
fargateCPUsize: "512"
fargateRAMsize: "1024"
desiredTaskCount: 2
ga4Disabled: "false"
uaDisabled: "false"
languageToggleDisabled: "false"
Expand All @@ -105,7 +102,6 @@ Mappings:
dynatraceSecretArn: arn:aws:secretsmanager:eu-west-2:216552277552:secret:DynatraceProductionVariables
fargateCPUsize: "1024"
fargateRAMsize: "2048"
desiredTaskCount: 2
ga4Disabled: "false"
uaDisabled: "false"
languageToggleDisabled: "false"
Expand Down Expand Up @@ -284,6 +280,9 @@ Resources:
HealthCheckEnabled: TRUE
HealthCheckProtocol: HTTP
HealthCheckPath: /healthcheck
HealthCheckTimeoutSeconds: 2
HealthCheckIntervalSeconds: 5
HealthyThresholdCount: 2
Matcher:
HttpCode: 200
Port: 80
Expand Down Expand Up @@ -443,10 +442,6 @@ Resources:
- UseCanaryDeployment
- CODE_DEPLOY
- ECS
DesiredCount: !FindInMap
- EnvironmentConfiguration
- !Ref "Environment"
- desiredTaskCount
EnableECSManagedTags: false
HealthCheckGracePeriodSeconds: !If
- UseCanaryDeployment
Expand Down Expand Up @@ -808,12 +803,33 @@ Resources:
ArnLike:
"kms:EncryptionContext:aws:logs:arn": !Sub "arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*"

PassportFrontSessionsTable:
Type: AWS::DynamoDB::Table
Properties:
# checkov:skip=CKV_AWS_28: Point in time recovery is not necessary for this table.
TableName: !Sub "cri-passport-front-sessions-${Environment}"
BillingMode: "PAY_PER_REQUEST"
AttributeDefinitions:
- AttributeName: "id"
AttributeType: "S"
KeySchema:
- AttributeName: "id"
KeyType: "HASH"
TimeToLiveSpecification:
AttributeName: "expires"
Enabled: true
SSESpecification:
# checkov:skip=CKV_AWS_119: Implement Customer Managed Keys in PYIC-1391
SSEEnabled: true
SSEType: KMS

# ECS Autoscaling
ECSAutoScalingTarget:
Condition: IsPerformance
Type: AWS::ApplicationAutoScaling::ScalableTarget
Properties:
MinCapacity: 4
MaxCapacity: 60
MinCapacity: 2
ResourceId: !Join
- '/'
- - "service"
Expand All @@ -823,109 +839,113 @@ Resources:
ScalableDimension: ecs:service:DesiredCount
ServiceNamespace: ecs

ECSAutoScalingPolicy:
EcsStepScaleOutPolicy:
Condition: IsPerformance
DependsOn: ECSAutoScalingTarget
Type: AWS::ApplicationAutoScaling::ScalingPolicy
Properties:
PolicyName: ECSAutoScalingPolicy
PolicyType: TargetTrackingScaling
ResourceId: !Join
- "/"
- - "service"
- !Ref PassportFrontEcsCluster
- !GetAtt PassportFrontEcsService.Name
ScalableDimension: ecs:service:DesiredCount
ServiceNamespace: ecs
TargetTrackingScalingPolicyConfiguration:
PredefinedMetricSpecification:
PredefinedMetricType: ECSServiceAverageCPUUtilization
TargetValue: 60
ScaleInCooldown: 420
ScaleOutCooldown: 60

StepScaleInPolicy:
Condition: IsPerformance
DependsOn: ECSAutoScalingTarget
Type: AWS::ApplicationAutoScaling::ScalingPolicy
Properties:
PolicyName: StepScalingInPolicy
PolicyName: EcsStepScalingOutPolicy
PolicyType: StepScaling
ResourceId: !Join
- '/'
- "/"
- - "service"
- !Ref PassportFrontEcsCluster
- !GetAtt PassportFrontEcsService.Name
ScalableDimension: ecs:service:DesiredCount
ServiceNamespace: ecs
StepScalingPolicyConfiguration:
AdjustmentType: PercentChangeInCapacity
Cooldown: 420
Cooldown:
180 # The policy will continue to respond to additional alarm breaches,
# even while a scaling activity is in progress. This means Application
# Auto Scaling will evaluate all alarm breaches as they occur.
# A cooldown period is used to protect against over-scaling due to
# multiple alarm breaches occurring in rapid succession.
MinAdjustmentMagnitude: 1
StepAdjustments:
- MetricIntervalUpperBound: -40
ScalingAdjustment: -50
- MetricIntervalUpperBound: 0 # 60%
ScalingAdjustment: 100 # Scale by 100% of containers if the metric is breached
# with <60% utilisation
- MetricIntervalLowerBound: 0 # 60%
MetricIntervalUpperBound: 30 # 90%
ScalingAdjustment: 200 # Scale by 200% of containers if the metric is breached
# with 80-90% utilisation
- MetricIntervalLowerBound: 30 # 90%
MetricIntervalUpperBound: 35 # 95%
ScalingAdjustment: 300 # Scale by 300% of containers if the metric is breached
# with 90-95% utilisation
- MetricIntervalLowerBound: 35 # 95%
ScalingAdjustment:
500 # Scale by 500% of containers if the metric is breached
# with >95% utilisation
# Note: CPU can scale greater than 100% in a burst mode
# on Fargate, so leave the upper bound open

StepScaleOutPolicy:
EcsStepScaleInPolicy:
Condition: IsPerformance
DependsOn: ECSAutoScalingTarget
Type: AWS::ApplicationAutoScaling::ScalingPolicy
Properties:
PolicyName: StepScalingOutPolicy
PolicyName: EcsStepScalingInPolicy
PolicyType: StepScaling
ResourceId: !Join
- '/'
- "/"
- - "service"
- !Ref PassportFrontEcsCluster
- !GetAtt PassportFrontEcsService.Name
ScalableDimension: ecs:service:DesiredCount
ServiceNamespace: ecs
StepScalingPolicyConfiguration:
AdjustmentType: PercentChangeInCapacity
Cooldown: 120
MinAdjustmentMagnitude: 5
Cooldown:
180 # The policy will continue to respond to additional alarm breaches,
# even while a scaling activity is in progress. This means Application
# Auto Scaling will evaluate all alarm breaches as they occur.
# A cooldown period is used to protect against under-scaling due to
# multiple alarm breaches occurring in rapid succession.
StepAdjustments:
- MetricIntervalLowerBound: 20
MetricIntervalUpperBound: 30
ScalingAdjustment: 200
- MetricIntervalLowerBound: 30
MetricIntervalUpperBound: 35
ScalingAdjustment: 300
- MetricIntervalLowerBound: 35
ScalingAdjustment: 500
- MetricIntervalUpperBound: -15 # 5%
ScalingAdjustment: -90 # Scale down by 90% of containers if the metric is breached
# with <5% utilisation
- MetricIntervalLowerBound: -15 # 5%
MetricIntervalUpperBound: 0 # 20%
ScalingAdjustment:
-50 # Scale down 50% of containers if the metric is breached
# with <20% utilisation

StepScaleOutAlarm:
EcsStepScaleOutAlarm:
Condition: IsPerformance
DependsOn: ECSAutoScalingTarget
Type: AWS::CloudWatch::Alarm
Properties:
ActionsEnabled: true
AlarmActions:
- !Ref StepScaleOutPolicy
AlarmDescription: "PassportFrontClusterOver60PercentCPU"
- !Ref EcsStepScaleOutPolicy
AlarmDescription: "EcsClusterOver60PercentCPU"
ComparisonOperator: "GreaterThanThreshold"
DatapointsToAlarm: "2"
DatapointsToAlarm: "1"
Dimensions:
- Name: ClusterName
Value: !Ref PassportFrontEcsCluster
- Name: ServiceName
Value: !GetAtt PassportFrontEcsService.Name
Unit: "Percent"
EvaluationPeriods: "2"
EvaluationPeriods: "1"
MetricName: "CPUUtilization"
Namespace: "AWS/ECS"
Statistic: "Average"
Period: "60"
Threshold: "60"

StepScaleInAlarm:
EcsStepScaleInAlarm:
Condition: IsPerformance
DependsOn: ECSAutoScalingTarget
Type: AWS::CloudWatch::Alarm
Properties:
ActionsEnabled: true
AlarmActions:
- !Ref StepScaleInPolicy
AlarmDescription: "PassportFrontClusterUnder60PercentCPU"
- !Ref EcsStepScaleInPolicy
AlarmDescription: "EcsClusterUnder60PercentCPU"
ComparisonOperator: "LessThanThreshold"
DatapointsToAlarm: "5"
Dimensions:
Expand All @@ -939,27 +959,7 @@ Resources:
Namespace: "AWS/ECS"
Statistic: "Average"
Period: "60"
Threshold: "60"

PassportFrontSessionsTable:
Type: AWS::DynamoDB::Table
Properties:
# checkov:skip=CKV_AWS_28: Point in time recovery is not necessary for this table.
TableName: !Sub "cri-passport-front-sessions-${Environment}"
BillingMode: "PAY_PER_REQUEST"
AttributeDefinitions:
- AttributeName: "id"
AttributeType: "S"
KeySchema:
- AttributeName: "id"
KeyType: "HASH"
TimeToLiveSpecification:
AttributeName: "expires"
Enabled: true
SSESpecification:
# checkov:skip=CKV_AWS_119: Implement Customer Managed Keys in PYIC-1391
SSEEnabled: true
SSEType: KMS
Threshold: "20"

####################################################################
# #
Expand Down

0 comments on commit 7e11ef1

Please sign in to comment.