Anexe os volumes do EBS aos nós de cluster - AWS Data Pipeline

Anexe os volumes do EBS aos nós de cluster

Você pode anexar volumes do EBS a qualquer tipo de nó no cluster do EMR no seu pipeline. Para anexar volumes do EBS aos nós, use coreEbsConfiguration, masterEbsConfiguration e TaskEbsConfiguration na sua configuração EmrCluster.

Este exemplo de cluster do Amazon EMR usa volumes do Amazon EBS para seus nós central, principal e de tarefa. Para obter mais informações, consulte Volumes de Amazon EBS no Amazon EMR no Guia de gerenciamento do Amazon EMR.

Essas configurações são opcionais. Você pode usá-las em qualquer pipeline que usa um objeto EmrCluster.

No pipeline, clique na configuração de objeto EmrCluster, escolha Master EBS Configuration, Core EBS Configuration ou Task EBS Configuration e insira os detalhes de configuração semelhantes ao exemplo a seguir.

{ "objects": [ { "output": { "ref": "S3BackupLocation" }, "input": { "ref": "DDBSourceTable" }, "maximumRetries": "2", "name": "TableBackupActivity", "step": "s3://dynamodb-emr-#{myDDBRegion}/emr-ddb-storage-handler/2.1.0/emr-ddb-2.1.0.jar,org.apache.hadoop.dynamodb.tools.DynamoDbExport,#{output.directoryPath},#{input.tableName},#{input.readThroughputPercent}", "id": "TableBackupActivity", "runsOn": { "ref": "EmrClusterForBackup" }, "type": "EmrActivity", "resizeClusterBeforeRunning": "false" }, { "readThroughputPercent": "#{myDDBReadThroughputRatio}", "name": "DDBSourceTable", "id": "DDBSourceTable", "type": "DynamoDBDataNode", "tableName": "#{myDDBTableName}" }, { "directoryPath": "#{myOutputS3Loc}/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}", "name": "S3BackupLocation", "id": "S3BackupLocation", "type": "S3DataNode" }, { "name": "EmrClusterForBackup", "coreInstanceCount": "1", "taskInstanceCount": "1", "taskInstanceType": "m4.xlarge", "coreInstanceType": "m4.xlarge", "releaseLabel": "emr-4.7.0", "masterInstanceType": "m4.xlarge", "id": "EmrClusterForBackup", "subnetId": "#{mySubnetId}", "emrManagedMasterSecurityGroupId": "#{myMasterSecurityGroup}", "emrManagedSlaveSecurityGroupId": "#{mySlaveSecurityGroup}", "region": "#{myDDBRegion}", "type": "EmrCluster", "coreEbsConfiguration": { "ref": "EBSConfiguration" }, "masterEbsConfiguration": { "ref": "EBSConfiguration" }, "taskEbsConfiguration": { "ref": "EBSConfiguration" }, "keyPair": "user-key-pair" }, { "name": "EBSConfiguration", "id": "EBSConfiguration", "ebsOptimized": "true", "ebsBlockDeviceConfig" : [ { "ref": "EbsBlockDeviceConfig" } ], "type": "EbsConfiguration" }, { "name": "EbsBlockDeviceConfig", "id": "EbsBlockDeviceConfig", "type": "EbsBlockDeviceConfig", "volumesPerInstance" : "2", "volumeSpecification" : { "ref": "VolumeSpecification" } }, { "name": "VolumeSpecification", "id": "VolumeSpecification", "type": "VolumeSpecification", "sizeInGB": "500", "volumeType": "io1", "iops": "1000" }, { "failureAndRerunMode": "CASCADE", "resourceRole": "DataPipelineDefaultResourceRole", "role": "DataPipelineDefaultRole", "pipelineLogUri": "#{myPipelineLogUri}", "scheduleType": "ONDEMAND", "name": "Default", "id": "Default" } ], "parameters": [ { "description": "Output S3 folder", "id": "myOutputS3Loc", "type": "AWS::S3::ObjectKey" }, { "description": "Source DynamoDB table name", "id": "myDDBTableName", "type": "String" }, { "default": "0.25", "watermark": "Enter value between 0.1-1.0", "description": "DynamoDB read throughput ratio", "id": "myDDBReadThroughputRatio", "type": "Double" }, { "default": "us-east-1", "watermark": "us-east-1", "description": "Region of the DynamoDB table", "id": "myDDBRegion", "type": "String" } ], "values": { "myDDBRegion": "us-east-1", "myDDBTableName": "ddb_table", "myDDBReadThroughputRatio": "0.25", "myOutputS3Loc": "s3://s3_path", "mySubnetId": "subnet_id", "mySlaveSecurityGroup": "slave security group", "myMasterSecurityGroup": "master security group", "myPipelineLogUri": "s3://s3_path" } }