Manage SageMaker with Step Functions - AWS Step Functions

Manage SageMaker with Step Functions

Step Functions can control certain AWS services directly from the Amazon States Language. For more information, see the following:

Supported SageMaker APIs and syntax:

SageMaker Transform Job Example

The following includes a Task state that creates an Amazon SageMaker transform job, specifying the Amazon S3 location for DataSource and TransformOutput.

{ "SageMaker CreateTransformJob": { "Type": "Task", "Resource": "arn:aws:states:::sagemaker:createTransformJob.sync", "Parameters": { "ModelName": "SageMakerCreateTransformJobModel-9iFBKsYti9vr", "TransformInput": { "CompressionType": "None", "ContentType": "text/csv", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://my-s3bucket-example-1/TransformJobDataInput.txt" } } }, "TransformOutput": { "S3OutputPath": "s3://my-s3bucket-example-1/TransformJobOutputPath" }, "TransformResources": { "InstanceCount": 1, "InstanceType": "ml.m4.xlarge" }, "TransformJobName": "sfn-binary-classification-prediction" }, "Next": "ValidateOutput" },

SageMaker Training Job Example

The following includes a Task state that creates an Amazon SageMaker training job.

{ "SageMaker CreateTrainingJob":{ "Type":"Task", "Resource":"arn:aws:states:::sagemaker:createTrainingJob.sync", "Parameters":{ "TrainingJobName":"search-model", "ResourceConfig":{ "InstanceCount":4, "InstanceType":"ml.c4.8xlarge", "VolumeSizeInGB":20 }, "HyperParameters":{ "mode":"batch_skipgram", "epochs":"5", "min_count":"5", "sampling_threshold":"0.0001", "learning_rate":"0.025", "window_size":"5", "vector_dim":"300", "negative_samples":"5", "batch_size":"11" }, "AlgorithmSpecification":{ "TrainingImage":"...", "TrainingInputMode":"File" }, "OutputDataConfig":{ "S3OutputPath":"s3://bucket-name/doc-search/model" }, "StoppingCondition":{ "MaxRuntimeInSeconds":100000 }, "RoleArn":"arn:aws:iam::123456789012:role/docsearch-stepfunction-iam-role", "InputDataConfig":[ { "ChannelName":"train", "DataSource":{ "S3DataSource":{ "S3DataType":"S3Prefix", "S3Uri":"s3://bucket-name/doc-search/interim-data/training-data/", "S3DataDistributionType":"FullyReplicated" } } } ] }, "Retry":[ { "ErrorEquals":[ "SageMaker.AmazonSageMakerException" ], "IntervalSeconds":1, "MaxAttempts":100, "BackoffRate":1.1 }, { "ErrorEquals":[ "SageMaker.ResourceLimitExceededException" ], "IntervalSeconds":60, "MaxAttempts":5000, "BackoffRate":1 }, { "ErrorEquals":[ "States.Timeout" ], "IntervalSeconds":1, "MaxAttempts":5, "BackoffRate":1 } ], "Catch":[ { "ErrorEquals":[ "States.ALL" ], "ResultPath":"$.cause", "Next":"Sagemaker Training Job Error" } ], "Next":"Delete Interim Data Job" } }

SageMaker Labeling Job Example

The following includes a Task state that creates an Amazon SageMaker labeling job.

{ "StartAt": "SageMaker CreaateLabelingJob", "TimeoutSeconds": 3600, "States": { "SageMaker CreaateLabelingJob": { "Type": "Task", "Resource": "arn:aws:states:::sagemaker:createLabelingJob.sync", "Parameters": { "HumanTaskConfig": { "AnnotationConsolidationConfig": { "AnnotationConsolidationLambdaArn": "arn:aws:lambda:us-west-2:123456789012:function:ACS-TextMultiClass" }, "NumberOfHumanWorkersPerDataObject": 1, "PreHumanTaskLambdaArn": "arn:aws:lambda:us-west-2:123456789012:function:PRE-TextMultiClass", "TaskDescription": "Classify the following text", "TaskKeywords": [ "tc", "Labeling" ], "TaskTimeLimitInSeconds": 300, "TaskTitle": "Classify short bits of text", "UiConfig": { "UiTemplateS3Uri": "s3://s3bucket-example/TextClassification.template" }, "WorkteamArn": "arn:aws:sagemaker:us-west-2:123456789012:workteam/private-crowd/ExampleTesting" }, "InputConfig": { "DataAttributes": { "ContentClassifiers": [ "FreeOfPersonallyIdentifiableInformation", "FreeOfAdultContent" ] }, "DataSource": { "S3DataSource": { "ManifestS3Uri": "s3://s3bucket-example/manifest.json" } } }, "LabelAttributeName": "Categories", "LabelCategoryConfigS3Uri": "s3://s3bucket-example/labelcategories.json", "LabelingJobName": "example-job-name", "OutputConfig": { "S3OutputPath": "s3://s3bucket-example/output" }, "RoleArn": "arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole", "StoppingConditions": { "MaxHumanLabeledObjectCount": 10000, "MaxPercentageOfInputDatasetLabeled": 100 } }, "Next": "ValidateOutput" }, "ValidateOutput": { "Type": "Choice", "Choices": [ { "Not": { "Variable": "$.LabelingJobArn", "StringEquals": "" }, "Next": "Succeed" } ], "Default": "Fail" }, "Succeed": { "Type": "Succeed" }, "Fail": { "Type": "Fail", "Error": "InvalidOutput", "Cause": "Output is not what was expected. This could be due to a service outage or a misconfigured service integration." } } }

SageMaker Processing Job Example

The following includes a Task state that creates an Amazon SageMaker processing job.

{ "StartAt": "SageMaker CreateProcessingJob Sync", "TimeoutSeconds": 3600, "States": { "SageMaker CreateProcessingJob Sync": { "Type": "Task", "Resource": "arn:aws:states:::sagemaker:createProcessingJob.sync", "Parameters": { "AppSpecification": { "ImageUri": "737474898029.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3" }, "ProcessingResources": { "ClusterConfig": { "InstanceCount": 1, "InstanceType": "ml.t3.medium", "VolumeSizeInGB": 10 } }, "RoleArn": "arn:aws:iam::123456789012:role/SM-003-CreateProcessingJobAPIExecutionRole", "ProcessingJobName.$": "$.id" }, "Next": "ValidateOutput" }, "ValidateOutput": { "Type": "Choice", "Choices": [ { "Not": { "Variable": "$.ProcessingJobArn", "StringEquals": "" }, "Next": "Succeed" } ], "Default": "Fail" }, "Succeed": { "Type": "Succeed" }, "Fail": { "Type": "Fail", "Error": "InvalidConnectorOutput", "Cause": "Connector output is not what was expected. This could be due to a service outage or a misconfigured connector." } } }