메모리 예약이 70 %를 초과하는 경우 EC2 자동 확장 그룹에 용량 단위 1 개를 추가하도록 CloudWatch 경보를 설정했습니다. 경보는 적절한 순간에 트리거되었지만 이후 EC2 자동 확장 그룹에서 아무런 변화없이 16 시간 이상 경보 상태였습니다. 무엇이 잘못 될 수 있습니까?
내 ECS CloudFormation 템플릿은 다음과 같습니다.
ECSCluster:
Type: AWS::ECS::Cluster
Properties:
ClusterName: !Ref EnvironmentName
ECSAutoScalingGroup:
DependsOn: ECSCluster
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
VPCZoneIdentifier: !Ref Subnets
LaunchConfigurationName: !Ref ECSLaunchConfiguration
MinSize: !Ref ClusterMinSize
MaxSize: !Ref ClusterMaxSize
DesiredCapacity: !Ref ClusterDesiredCapacity
CreationPolicy:
ResourceSignal:
Timeout: PT15M
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: 1
MaxBatchSize: 1
PauseTime: PT15M
SuspendProcesses:
- HealthCheck
- ReplaceUnhealthy
- AZRebalance
- AlarmNotification
- ScheduledActions
WaitOnResourceSignals: true
ScaleUpPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref ECSAutoScalingGroup
Cooldown: '1'
ScalingAdjustment: '1'
MemoryReservationAlarmHigh:
Type: AWS::CloudWatch::Alarm
Properties:
EvaluationPeriods: '2'
Statistic: Average
Threshold: '70'
AlarmDescription: Alarm if Cluster Memory Reservation is too high
Period: '60'
AlarmActions:
- Ref: ScaleUpPolicy
Namespace: AWS/ECS
Dimensions:
- Name: ClusterName
Value: !Ref ECSCluster
ComparisonOperator: GreaterThanThreshold
MetricName: MemoryReservation
ScaleDownPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref ECSAutoScalingGroup
Cooldown: '1'
ScalingAdjustment: '-1'
MemoryReservationAlarmLow:
Type: AWS::CloudWatch::Alarm
Properties:
EvaluationPeriods: '2'
Statistic: Average
Threshold: '30'
AlarmDescription: Alarm if Cluster Memory Reservation is too Low
Period: '60'
AlarmActions:
- Ref: ScaleDownPolicy
Namespace: AWS/ECS
Dimensions:
- Name: ClusterName
Value: !Ref ECSCluster
ComparisonOperator: LessThanThreshold
MetricName: MemoryReservation
ECSLaunchConfiguration:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
KeyName: !If [IsProd, !Ref 'AWS::NoValue', !Ref KeyName]
ImageId: !Ref ECSAMI
InstanceType: !Ref InstanceType
SecurityGroups:
- !Ref SecurityGroup
IamInstanceProfile: !Ref ECSInstanceProfile
UserData:
"Fn::Base64": !Sub |
#!/bin/bash
source /etc/profile.d/proxy.sh
yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
yum install -y https://s3.amazonaws.com/amazoncloudwatch-agent/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm
yum install -y aws-cfn-bootstrap hibagent
cat >> /opt/aws/amazon-cloudwatch-agent/etc/common-config.toml <<EOF
[proxy]
http_proxy="${!http_proxy}"
https_proxy="${!https_proxy}"
no_proxy="${!no_proxy}"
EOF
/opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource ECSLaunchConfiguration
/opt/aws/bin/cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} --resource ECSAutoScalingGroup
/usr/bin/enable-ec2-spot-hibernation
Metadata:
AWS::CloudFormation::Init:
config:
packages:
yum:
collectd: []
commands:
01_add_instance_to_cluster:
command: !Sub echo ECS_CLUSTER=${ECSCluster} >> /etc/ecs/ecs.config
02_enable_cloudwatch_agent:
command: !Sub /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c ssm:${ECSCloudWatchParameter} -s
files:
/etc/cfn/cfn-hup.conf:
mode: 000400
owner: root
group: root
content: !Sub |
[main]
stack=${AWS::StackId}
region=${AWS::Region}
/etc/cfn/hooks.d/cfn-auto-reloader.conf:
content: !Sub |
[cfn-auto-reloader-hook]
triggers=post.update
path=Resources.ECSLaunchConfiguration.Metadata.AWS::CloudFormation::Init
action=/opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource ECSLaunchConfiguration
services:
sysvinit:
cfn-hup:
enabled: true
ensureRunning: true
files:
- /etc/cfn/cfn-hup.conf
- /etc/cfn/hooks.d/cfn-auto-reloader.conf
# This IAM Role is attached to all of the ECS hosts. It is based on the default role
# published here:
# http://docs.aws.amazon.com/AmazonECS/latest/developerguide/instance_IAM_role.html
#
# You can add other IAM policy statements here to allow access from your ECS hosts
# to other AWS services. Please note that this role will be used by ALL containers
# running on the ECS host.
ECSRole:
Type: AWS::IAM::Role
Properties:
Path: /
RoleName: !Sub ${EnvironmentName}-ECSRole-${AWS::Region}
AssumeRolePolicyDocument: |
{
"Statement": [{
"Action": "sts:AssumeRole",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
}
}]
}
ManagedPolicyArns:
- !Sub "arn:aws:iam::${AWS::AccountId}:policy/CSOPSRestrictionPolicy"
- !Sub "arn:aws:iam::${AWS::AccountId}:policy/HIPIAMRestrictionPolicy"
- !Sub "arn:aws:iam::${AWS::AccountId}:policy/HIPBasePolicy"
- arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM
- arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy
Policies:
- PolicyName: ecs-service
PolicyDocument: |
{
"Statement": [{
"Effect": "Allow",
"Action": [
"ecs:CreateCluster",
"ecs:DeregisterContainerInstance",
"ecs:DiscoverPollEndpoint",
"ecs:Poll",
"ecs:RegisterContainerInstance",
"ecs:StartTelemetrySession",
"ecs:Submit*",
"ecr:BatchCheckLayerAvailability",
"ecr:BatchGetImage",
"ecr:GetDownloadUrlForLayer",
"ecr:GetAuthorizationToken"
],
"Resource": "*"
}]
}
ECSInstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
Path: /
Roles:
- !Ref ECSRole
ECSServiceAutoScalingRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
Action:
- "sts:AssumeRole"
Effect: Allow
Principal:
Service:
- application-autoscaling.amazonaws.com
Path: /
ManagedPolicyArns:
- !Sub "arn:aws:iam::${AWS::AccountId}:policy/CSOPSRestrictionPolicy"
- !Sub "arn:aws:iam::${AWS::AccountId}:policy/HIPIAMRestrictionPolicy"
- !Sub "arn:aws:iam::${AWS::AccountId}:policy/HIPBasePolicy"
Policies:
- PolicyName: ecs-service-autoscaling
PolicyDocument:
Statement:
Effect: Allow
Action:
- application-autoscaling:*
- cloudwatch:DescribeAlarms
- cloudwatch:PutMetricAlarm
- ecs:DescribeServices
- ecs:UpdateService
Resource: "*"
ECSCloudWatchParameter:
Type: AWS::SSM::Parameter
Properties:
Description: CloudWatch Log configs for ECS cluster
Name: !Sub AmazonCloudWatch-${ECSCluster}-ECS
Type: String
Value: !Sub |
{
"logs": {
"force_flush_interval": 5,
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "/var/log/messages",
"log_group_name": "${ECSCluster}/var/log/messages",
"log_stream_name": "{instance_id}",
"timestamp_format": "%b %d %H:%M:%S"
},
{
"file_path": "/var/log/dmesg",
"log_group_name": "${ECSCluster}/var/log/dmesg",
"log_stream_name": "{instance_id}"
},
{
"file_path": "/var/log/docker",
"log_group_name": "${ECSCluster}/var/log/docker",
"log_stream_name": "{instance_id}",
"timestamp_format": "%Y-%m-%dT%H:%M:%S.%f"
},
{
"file_path": "/var/log/ecs/ecs-init.log",
"log_group_name": "${ECSCluster}/var/log/ecs/ecs-init.log",
"log_stream_name": "{instance_id}",
"timestamp_format": "%Y-%m-%dT%H:%M:%SZ"
},
{
"file_path": "/var/log/ecs/ecs-agent.log.*",
"log_group_name": "${ECSCluster}/var/log/ecs/ecs-agent.log",
"log_stream_name": "{instance_id}",
"timestamp_format": "%Y-%m-%dT%H:%M:%SZ"
},
{
"file_path": "/var/log/ecs/audit.log",
"log_group_name": "${ECSCluster}/var/log/ecs/audit.log",
"log_stream_name": "{instance_id}",
"timestamp_format": "%Y-%m-%dT%H:%M:%SZ"
}
]
}
}
},
"metrics": {
"append_dimensions": {
"AutoScalingGroupName": "${!aws:AutoScalingGroupName}",
"InstanceId": "${!aws:InstanceId}",
"InstanceType": "${!aws:InstanceType}"
},
"metrics_collected": {
"collectd": {
"metrics_aggregation_interval": 60
},
"disk": {
"measurement": [
"used_percent"
],
"metrics_collection_interval": 60,
"resources": [
"/"
]
},
"mem": {
"measurement": [
"mem_used_percent"
],
"metrics_collection_interval": 60
},
"statsd": {
"metrics_aggregation_interval": 60,
"metrics_collection_interval": 10,
"service_address": ":8125"
}
}
}
}
ECSClusterParameter:
Type: AWS::SSM::Parameter
Properties:
Description: !Sub ${EnvironmentName} - ECS Cluster
Name: !Sub /${EnvironmentName}/ecs-cluster
Type: String
Value: !Ref ECSCluster
ECSServiceAutoScalingRoleParameter:
Type: AWS::SSM::Parameter
Properties:
Description: !Sub ${EnvironmentName} - ECS Service ASG Role
Name: !Sub /${EnvironmentName}/ecs-service-asg-role
Type: String
Value: !GetAtt ECSServiceAutoScalingRole.Arn
알람 활동 내역 :
2019-12-26 11:40:54 Action Successfully executed action arn:aws:autoscaling:ap-southeast-2:031539715286:scalingPolicy:95e836b6-2f56-498d-b931-7ec4184bedc4:autoScalingGroupName/ECS-UEBZA8GAP8S7-ECSAutoScalingGroup-1BIBTJH5I50W9:policyName/ECS-UEBZA8GAP8S7-ScaleUpPolicy-17LUWE42DC7EO
2019-12-26 11:40:54 State update Alarm updated from OK to In alarm
일시 중단 된 프로세스가 없는지 확인하십시오. 경보 알림은 수신 경보가 조정 정책을 트리거하지 않음을 의미합니다. 시작은 원하는 값이 올라가더라도 아무것도 시작되지 않음을 의미합니다.
이를 유발할 수있는 기타 일반적인 문제 :
가중치를 사용하고 원하는 값을 1 씩 늘리지 만 가장 낮은 가중치가 1이 아닌 경우 확장이 불가능할 수 있습니다.
이 정책을 재정의 할 수있는 다른 조정 정책이 트리거되지 않았는지 확인합니다.
활동 기록을 확인하여 상태 확인 교체가 지속적으로 발생하지 않는지 확인합니다. 5 분 휴지 (ASG에 설정되지 않았기 때문에 기본값, 조정 정책 만 해당)가 시작되고 간단한 조정 정책이 차단되기 때문입니다.
원하는 것이 이미 Max에 있지 않은지 확인하십시오.
경보가 트리거되는 것 외에도 자동 확장 '작업'이 발생했음을 경보 기록에서 확인해야합니다 (작업은 실제로 경보가 경보 상태에 머물러있는 1 분마다 발생하며 평가 설정과는 관계가 없지만 첫 번째 설정 만 발생합니다.) 알람 내역에 게시 됨)
시작 실패에 대한 ASG 활동 기록을 확인합니다. 이는 스팟 인스턴스를 사용하는 경우 특히 일반적이며 ASG는 충분한 실패 후 결국 백 오프 상태로 전환됩니다. 그룹에 대한 모든 수동 업데이트는이 백 오프를 재설정합니다.
이 기사는 인터넷에서 수집됩니다. 재 인쇄 할 때 출처를 알려주십시오.
침해가 발생한 경우 연락 주시기 바랍니다[email protected] 삭제
몇 마디 만하겠습니다