Restauración de un volumen raíz a partir de la última instantánea - AWS Systems Manager

Restauración de un volumen raíz a partir de la última instantánea

El sistema operativo en un volumen raíz puede dañarse por varias razones. Por ejemplo, después de una operación de revisióno, las instancias podrían arrancar de manera incorrecta debido a un kernel o un registro dañado. La automatización de tareas comunes de solución de problemas, como la restauración de un volumen raíz a partir de la última instantánea tomada antes de la operación de aplicación de revisiones, puede reducir el tiempo de inactividad y agilizar los esfuerzos de solución de problemas. AWS Systems Manager Las acciones de Automation pueden ayudarlo a lograrlo. Automation es una capacidad de AWS Systems Manager.

El siguiente manual de procedimientos de AWS Systems Manager de ejemplo realiza estas acciones:

  • Utiliza la acción de automatización aws:executeAwsApi para recuperar detalles del volumen raíz de la instancia.

  • Utiliza la acción de automatización aws:executeScript para recuperar la última instantánea del volumen raíz.

  • Utiliza la acción de automatización aws:branch para continuar la automatización si se encuentra una instantánea para el volumen raíz.

YAML
--- description: Custom Automation Troubleshooting Example schemaVersion: '0.3' assumeRole: "{{ AutomationAssumeRole }}" parameters: AutomationAssumeRole: type: String description: "(Required) The ARN of the role that allows Automation to perform the actions on your behalf. If no role is specified, Systems Manager Automation uses your IAM permissions to use this runbook." default: '' InstanceId: type: String description: "(Required) The Instance Id whose root EBS volume you want to restore the latest Snapshot." default: '' mainSteps: - name: getInstanceDetails action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: DescribeInstances InstanceIds: - "{{ InstanceId }}" outputs: - Name: availabilityZone Selector: "$.Reservations[0].Instances[0].Placement.AvailabilityZone" Type: String - Name: rootDeviceName Selector: "$.Reservations[0].Instances[0].RootDeviceName" Type: String nextStep: getRootVolumeId - name: getRootVolumeId action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: DescribeVolumes Filters: - Name: attachment.device Values: ["{{ getInstanceDetails.rootDeviceName }}"] - Name: attachment.instance-id Values: ["{{ InstanceId }}"] outputs: - Name: rootVolumeId Selector: "$.Volumes[0].VolumeId" Type: String nextStep: getSnapshotsByStartTime - name: getSnapshotsByStartTime action: aws:executeScript timeoutSeconds: 45 onFailure: Abort inputs: Runtime: python3.8 Handler: getSnapshotsByStartTime InputPayload: rootVolumeId : "{{ getRootVolumeId.rootVolumeId }}" Script: |- def getSnapshotsByStartTime(events,context): import boto3 #Initialize client ec2 = boto3.client('ec2') rootVolumeId = events['rootVolumeId'] snapshotsQuery = ec2.describe_snapshots( Filters=[ { "Name": "volume-id", "Values": [rootVolumeId] } ] ) if not snapshotsQuery['Snapshots']: noSnapshotFoundString = "NoSnapshotFound" return { 'noSnapshotFound' : noSnapshotFoundString } else: jsonSnapshots = snapshotsQuery['Snapshots'] sortedSnapshots = sorted(jsonSnapshots, key=lambda k: k['StartTime'], reverse=True) latestSortedSnapshotId = sortedSnapshots[0]['SnapshotId'] return { 'latestSnapshotId' : latestSortedSnapshotId } outputs: - Name: Payload Selector: $.Payload Type: StringMap - Name: latestSnapshotId Selector: $.Payload.latestSnapshotId Type: String - Name: noSnapshotFound Selector: $.Payload.noSnapshotFound Type: String nextStep: branchFromResults - name: branchFromResults action: aws:branch onFailure: Abort inputs: Choices: - NextStep: createNewRootVolumeFromSnapshot Not: Variable: "{{ getSnapshotsByStartTime.noSnapshotFound }}" StringEquals: "NoSnapshotFound" isEnd: true - name: createNewRootVolumeFromSnapshot action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: CreateVolume AvailabilityZone: "{{ getInstanceDetails.availabilityZone }}" SnapshotId: "{{ getSnapshotsByStartTime.latestSnapshotId }}" outputs: - Name: newRootVolumeId Selector: "$.VolumeId" Type: String nextStep: stopInstance - name: stopInstance action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: StopInstances InstanceIds: - "{{ InstanceId }}" nextStep: verifyVolumeAvailability - name: verifyVolumeAvailability action: aws:waitForAwsResourceProperty timeoutSeconds: 120 inputs: Service: ec2 Api: DescribeVolumes VolumeIds: - "{{ createNewRootVolumeFromSnapshot.newRootVolumeId }}" PropertySelector: "$.Volumes[0].State" DesiredValues: - "available" nextStep: verifyInstanceStopped - name: verifyInstanceStopped action: aws:waitForAwsResourceProperty timeoutSeconds: 120 inputs: Service: ec2 Api: DescribeInstances InstanceIds: - "{{ InstanceId }}" PropertySelector: "$.Reservations[0].Instances[0].State.Name" DesiredValues: - "stopped" nextStep: detachRootVolume - name: detachRootVolume action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: DetachVolume VolumeId: "{{ getRootVolumeId.rootVolumeId }}" nextStep: verifyRootVolumeDetached - name: verifyRootVolumeDetached action: aws:waitForAwsResourceProperty timeoutSeconds: 30 inputs: Service: ec2 Api: DescribeVolumes VolumeIds: - "{{ getRootVolumeId.rootVolumeId }}" PropertySelector: "$.Volumes[0].State" DesiredValues: - "available" nextStep: attachNewRootVolume - name: attachNewRootVolume action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: AttachVolume Device: "{{ getInstanceDetails.rootDeviceName }}" InstanceId: "{{ InstanceId }}" VolumeId: "{{ createNewRootVolumeFromSnapshot.newRootVolumeId }}" nextStep: verifyNewRootVolumeAttached - name: verifyNewRootVolumeAttached action: aws:waitForAwsResourceProperty timeoutSeconds: 30 inputs: Service: ec2 Api: DescribeVolumes VolumeIds: - "{{ createNewRootVolumeFromSnapshot.newRootVolumeId }}" PropertySelector: "$.Volumes[0].Attachments[0].State" DesiredValues: - "attached" nextStep: startInstance - name: startInstance action: aws:executeAwsApi onFailure: Abort inputs: Service: ec2 Api: StartInstances InstanceIds: - "{{ InstanceId }}"
JSON
{ "description": "Custom Automation Troubleshooting Example", "schemaVersion": "0.3", "assumeRole": "{{ AutomationAssumeRole }}", "parameters": { "AutomationAssumeRole": { "type": "String", "description": "(Required) The ARN of the role that allows Automation to perform the actions on your behalf. If no role is specified, Systems Manager Automation uses your IAM permissions to run this runbook.", "default": "" }, "InstanceId": { "type": "String", "description": "(Required) The Instance Id whose root EBS volume you want to restore the latest Snapshot.", "default": "" } }, "mainSteps": [ { "name": "getInstanceDetails", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "DescribeInstances", "InstanceIds": [ "{{ InstanceId }}" ] }, "outputs": [ { "Name": "availabilityZone", "Selector": "$.Reservations[0].Instances[0].Placement.AvailabilityZone", "Type": "String" }, { "Name": "rootDeviceName", "Selector": "$.Reservations[0].Instances[0].RootDeviceName", "Type": "String" } ], "nextStep": "getRootVolumeId" }, { "name": "getRootVolumeId", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "DescribeVolumes", "Filters": [ { "Name": "attachment.device", "Values": [ "{{ getInstanceDetails.rootDeviceName }}" ] }, { "Name": "attachment.instance-id", "Values": [ "{{ InstanceId }}" ] } ] }, "outputs": [ { "Name": "rootVolumeId", "Selector": "$.Volumes[0].VolumeId", "Type": "String" } ], "nextStep": "getSnapshotsByStartTime" }, { "name": "getSnapshotsByStartTime", "action": "aws:executeScript", "timeoutSeconds": 45, "onFailure": "Continue", "inputs": { "Runtime": "python3.8", "Handler": "getSnapshotsByStartTime", "InputPayload": { "rootVolumeId": "{{ getRootVolumeId.rootVolumeId }}" }, "Attachment": "getSnapshotsByStartTime.py" }, "outputs": [ { "Name": "Payload", "Selector": "$.Payload", "Type": "StringMap" }, { "Name": "latestSnapshotId", "Selector": "$.Payload.latestSnapshotId", "Type": "String" }, { "Name": "noSnapshotFound", "Selector": "$.Payload.noSnapshotFound", "Type": "String" } ], "nextStep": "branchFromResults" }, { "name": "branchFromResults", "action": "aws:branch", "onFailure": "Abort", "inputs": { "Choices": [ { "NextStep": "createNewRootVolumeFromSnapshot", "Not": { "Variable": "{{ getSnapshotsByStartTime.noSnapshotFound }}", "StringEquals": "NoSnapshotFound" } } ] }, "isEnd": true }, { "name": "createNewRootVolumeFromSnapshot", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "CreateVolume", "AvailabilityZone": "{{ getInstanceDetails.availabilityZone }}", "SnapshotId": "{{ getSnapshotsByStartTime.latestSnapshotId }}" }, "outputs": [ { "Name": "newRootVolumeId", "Selector": "$.VolumeId", "Type": "String" } ], "nextStep": "stopInstance" }, { "name": "stopInstance", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "StopInstances", "InstanceIds": [ "{{ InstanceId }}" ] }, "nextStep": "verifyVolumeAvailability" }, { "name": "verifyVolumeAvailability", "action": "aws:waitForAwsResourceProperty", "timeoutSeconds": 120, "inputs": { "Service": "ec2", "Api": "DescribeVolumes", "VolumeIds": [ "{{ createNewRootVolumeFromSnapshot.newRootVolumeId }}" ], "PropertySelector": "$.Volumes[0].State", "DesiredValues": [ "available" ] }, "nextStep": "verifyInstanceStopped" }, { "name": "verifyInstanceStopped", "action": "aws:waitForAwsResourceProperty", "timeoutSeconds": 120, "inputs": { "Service": "ec2", "Api": "DescribeInstances", "InstanceIds": [ "{{ InstanceId }}" ], "PropertySelector": "$.Reservations[0].Instances[0].State.Name", "DesiredValues": [ "stopped" ] }, "nextStep": "detachRootVolume" }, { "name": "detachRootVolume", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "DetachVolume", "VolumeId": "{{ getRootVolumeId.rootVolumeId }}" }, "nextStep": "verifyRootVolumeDetached" }, { "name": "verifyRootVolumeDetached", "action": "aws:waitForAwsResourceProperty", "timeoutSeconds": 30, "inputs": { "Service": "ec2", "Api": "DescribeVolumes", "VolumeIds": [ "{{ getRootVolumeId.rootVolumeId }}" ], "PropertySelector": "$.Volumes[0].State", "DesiredValues": [ "available" ] }, "nextStep": "attachNewRootVolume" }, { "name": "attachNewRootVolume", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "AttachVolume", "Device": "{{ getInstanceDetails.rootDeviceName }}", "InstanceId": "{{ InstanceId }}", "VolumeId": "{{ createNewRootVolumeFromSnapshot.newRootVolumeId }}" }, "nextStep": "verifyNewRootVolumeAttached" }, { "name": "verifyNewRootVolumeAttached", "action": "aws:waitForAwsResourceProperty", "timeoutSeconds": 30, "inputs": { "Service": "ec2", "Api": "DescribeVolumes", "VolumeIds": [ "{{ createNewRootVolumeFromSnapshot.newRootVolumeId }}" ], "PropertySelector": "$.Volumes[0].Attachments[0].State", "DesiredValues": [ "attached" ] }, "nextStep": "startInstance" }, { "name": "startInstance", "action": "aws:executeAwsApi", "onFailure": "Abort", "inputs": { "Service": "ec2", "Api": "StartInstances", "InstanceIds": [ "{{ InstanceId }}" ] } } ], "files": { "getSnapshotsByStartTime.py": { "checksums": { "sha256": "sampleETagValue" } } } }