There are more AWS SDK examples available in the AWS Doc SDK Examples
AWS Batch examples using AWS CLI with Bash script
The following code examples show you how to perform actions and implement common scenarios by using the AWS Command Line Interface with Bash script with AWS Batch.
Scenarios are code examples that show you how to accomplish specific tasks by calling multiple functions within a service or combined with other AWS services.
Each example includes a link to the complete source code, where you can find instructions on how to set up and run the code in context.
Topics
Scenarios
The following code example shows how to:
Create an IAM execution role
Create a compute environment
Create a job queue
Create a job definition
Clean up resources
- AWS CLI with Bash script
-
Note
There's more on GitHub. Find the complete example and learn how to set up and run in the Sample developer tutorials
repository. #!/bin/bash # AWS Batch Fargate Getting Started Script - Security Hardened Version # This script demonstrates creating AWS Batch resources with Fargate orchestration # set -euo pipefail # Exit on any error, undefined variables, and pipe failures # Configuration SCRIPT_NAME="batch-fargate-tutorial" LOG_FILE="${SCRIPT_NAME}-$(date +%Y%m%d-%H%M%S).log" RANDOM_SUFFIX=$(openssl rand -hex 6) COMPUTE_ENV_NAME="batch-fargate-compute-${RANDOM_SUFFIX}" JOB_QUEUE_NAME="batch-fargate-queue-${RANDOM_SUFFIX}" JOB_DEF_NAME="batch-fargate-jobdef-${RANDOM_SUFFIX}" JOB_NAME="batch-hello-world-${RANDOM_SUFFIX}" ROLE_NAME="BatchEcsTaskExecutionRole-${RANDOM_SUFFIX}" TRUST_POLICY_FILE="batch-trust-policy-${RANDOM_SUFFIX}.json" # Security: Set restrictive umask umask 0077 # Array to track created resources for cleanup CREATED_RESOURCES=() # Logging function with sanitization log() { local message="${1//[$'\t\r\n']/}" echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${message}" | tee -a "${LOG_FILE}" } # Error handling function handle_error() { log "ERROR: Script failed at line $1" log "Attempting to clean up resources created so far..." cleanup_resources exit 1 } # Set up error handling trap 'handle_error ${LINENO}' ERR # Validate AWS credentials validate_aws_credentials() { if ! aws sts get-caller-identity &>/dev/null; then log "ERROR: AWS credentials are not configured or invalid" exit 1 fi } # Function to wait for resource to be ready wait_for_compute_env() { local env_name="${1}" local max_attempts=60 local attempt=0 log "Waiting for compute environment ${env_name} to be VALID..." while [ ${attempt} -lt ${max_attempts} ]; do local status status=$(aws batch describe-compute-environments \ --compute-environments "${env_name}" \ --query 'computeEnvironments[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "${status}" = "VALID" ]; then log "Compute environment ${env_name} is ready" return 0 elif [ "${status}" = "INVALID" ] || [ "${status}" = "NOT_FOUND" ]; then log "ERROR: Compute environment ${env_name} failed to create properly" return 1 fi log "Compute environment status: ${status}. Waiting 10 seconds..." sleep 10 ((attempt++)) done log "ERROR: Timeout waiting for compute environment ${env_name}" return 1 } # Function to wait for job queue to be ready wait_for_job_queue() { local queue_name="${1}" local max_attempts=60 local attempt=0 log "Waiting for job queue ${queue_name} to be VALID..." while [ ${attempt} -lt ${max_attempts} ]; do local state state=$(aws batch describe-job-queues \ --job-queues "${queue_name}" \ --query 'jobQueues[0].state' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "${state}" = "ENABLED" ]; then log "Job queue ${queue_name} is ready" return 0 elif [ "${state}" = "DISABLED" ] || [ "${state}" = "NOT_FOUND" ]; then log "ERROR: Job queue ${queue_name} failed to create properly" return 1 fi log "Job queue state: ${state}. Waiting 10 seconds..." sleep 10 ((attempt++)) done log "ERROR: Timeout waiting for job queue ${queue_name}" return 1 } # Function to wait for job completion wait_for_job() { local job_id="${1}" local max_attempts=120 local attempt=0 log "Waiting for job ${job_id} to complete..." while [ ${attempt} -lt ${max_attempts} ]; do local status status=$(aws batch describe-jobs \ --jobs "${job_id}" \ --query 'jobs[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") if [ "${status}" = "SUCCEEDED" ]; then log "Job ${job_id} completed successfully" return 0 elif [ "${status}" = "FAILED" ]; then log "ERROR: Job ${job_id} failed" return 1 fi log "Job status: ${status}. Waiting 30 seconds..." sleep 30 ((attempt++)) done log "ERROR: Timeout waiting for job ${job_id}" return 1 } # Function to wait for resource state before deletion wait_for_resource_state() { local resource_type="${1}" local resource_name="${2}" local expected_state="${3}" local max_attempts=30 local attempt=0 log "Waiting for ${resource_type} ${resource_name} to reach state: ${expected_state}" while [ ${attempt} -lt ${max_attempts} ]; do local current_state="" case "${resource_type}" in "JOB_QUEUE") current_state=$(aws batch describe-job-queues \ --job-queues "${resource_name}" \ --query 'jobQueues[0].state' \ --output text 2>/dev/null || echo "NOT_FOUND") ;; "COMPUTE_ENV") current_state=$(aws batch describe-compute-environments \ --compute-environments "${resource_name}" \ --query 'computeEnvironments[0].status' \ --output text 2>/dev/null || echo "NOT_FOUND") ;; *) log "WARNING: Unknown resource type: ${resource_type}" return 1 ;; esac if [ "${current_state}" = "${expected_state}" ]; then log "${resource_type} ${resource_name} is now in state: ${expected_state}" return 0 fi log "${resource_type} ${resource_name} state: ${current_state} (waiting for ${expected_state})" sleep 10 ((attempt++)) done log "WARNING: ${resource_type} ${resource_name} did not reach expected state after ${max_attempts} attempts" return 1 } # Cleanup function cleanup_resources() { log "Starting cleanup of created resources..." # Clean up in reverse order of creation for ((i=${#CREATED_RESOURCES[@]}-1; i>=0; i--)); do local resource="${CREATED_RESOURCES[i]}" local resource_type local resource_name resource_type=$(echo "${resource}" | cut -d: -f1) resource_name=$(echo "${resource}" | cut -d: -f2-) log "Cleaning up ${resource_type}: ${resource_name}" case "${resource_type}" in "JOB_QUEUE") aws batch update-job-queue --job-queue "${resource_name}" --state DISABLED 2>/dev/null || true wait_for_resource_state "JOB_QUEUE" "${resource_name}" "DISABLED" || true aws batch delete-job-queue --job-queue "${resource_name}" 2>/dev/null || true ;; "COMPUTE_ENV") aws batch update-compute-environment --compute-environment "${resource_name}" --state DISABLED 2>/dev/null || true wait_for_resource_state "COMPUTE_ENV" "${resource_name}" "DISABLED" || true aws batch delete-compute-environment --compute-environment "${resource_name}" 2>/dev/null || true ;; "IAM_ROLE") aws iam detach-role-policy --role-name "${resource_name}" --policy-arn "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 2>/dev/null || true aws iam delete-role --role-name "${resource_name}" 2>/dev/null || true ;; "FILE") rm -f "${resource_name}" 2>/dev/null || true ;; *) log "WARNING: Unknown resource type for cleanup: ${resource_type}" ;; esac done log "Cleanup completed" } # Validate input parameters validate_inputs() { if [ -z "${ACCOUNT_ID:-}" ]; then log "ERROR: ACCOUNT_ID is not set" return 1 fi if [ -z "${DEFAULT_VPC:-}" ]; then log "ERROR: DEFAULT_VPC is not set" return 1 fi if [ -z "${SUBNETS:-}" ]; then log "ERROR: SUBNETS is not set" return 1 fi } # Validate container image format validate_container_image() { local image="${1}" # Check if image contains any shell metacharacters that could be dangerous if [[ "${image}" =~ [';$`|&<>()[]{}\\'] ]]; then log "ERROR: Container image contains potentially dangerous characters: ${image}" return 1 fi # Basic ECR/Docker image format validation if ! [[ "${image}" =~ ^[a-zA-Z0-9._/:-]+$ ]]; then log "ERROR: Container image format is invalid: ${image}" return 1 fi return 0 } # Main script execution main() { log "Starting AWS Batch Fargate tutorial script - Security Hardened Version" log "Log file: ${LOG_FILE}" # Validate AWS credentials validate_aws_credentials # Get AWS account ID log "Getting AWS account ID..." local ACCOUNT_ID ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) if [ -z "${ACCOUNT_ID}" ] || [ "${ACCOUNT_ID}" = "None" ]; then log "ERROR: Could not retrieve AWS account ID" exit 1 fi log "Account ID: ${ACCOUNT_ID}" # Get default VPC and subnets log "Getting default VPC and subnets..." local DEFAULT_VPC DEFAULT_VPC=$(aws ec2 describe-vpcs \ --filters "Name=is-default,Values=true" \ --query 'Vpcs[0].VpcId' \ --output text) if [ "${DEFAULT_VPC}" = "None" ] || [ "${DEFAULT_VPC}" = "null" ] || [ -z "${DEFAULT_VPC}" ]; then log "ERROR: No default VPC found. Please create a VPC first." exit 1 fi log "Default VPC: ${DEFAULT_VPC}" # Get subnets in the default VPC local SUBNETS SUBNETS=$(aws ec2 describe-subnets \ --filters "Name=vpc-id,Values=${DEFAULT_VPC}" \ --query 'Subnets[*].SubnetId' \ --output text) if [ -z "${SUBNETS}" ]; then log "ERROR: No subnets found in default VPC" exit 1 fi # Convert tab/space-separated subnets to JSON array format local SUBNET_ARRAY SUBNET_ARRAY=$(echo "${SUBNETS}" | tr '\t ' '\n' | sed 's/^/"/;s/$/"/' | paste -sd ',' -) log "Subnets: ${SUBNETS}" log "Subnet array: [${SUBNET_ARRAY}]" # Get default security group for the VPC local DEFAULT_SG DEFAULT_SG=$(aws ec2 describe-security-groups \ --filters "Name=vpc-id,Values=${DEFAULT_VPC}" "Name=group-name,Values=default" \ --query 'SecurityGroups[0].GroupId' \ --output text) if [ "${DEFAULT_SG}" = "None" ] || [ "${DEFAULT_SG}" = "null" ] || [ -z "${DEFAULT_SG}" ]; then log "ERROR: No default security group found in VPC" exit 1 fi log "Default security group: ${DEFAULT_SG}" # Step 1: Create IAM execution role log "Step 1: Creating IAM execution role..." # Create trust policy document with proper escaping cat > "${TRUST_POLICY_FILE}" << 'EOFPOLICY' { "Version":"2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "ecs-tasks.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } EOFPOLICY CREATED_RESOURCES+=("FILE:${TRUST_POLICY_FILE}") # Validate trust policy file before using it if ! jq empty "${TRUST_POLICY_FILE}" 2>/dev/null; then log "ERROR: Trust policy file is not valid JSON" exit 1 fi # Create the role aws iam create-role \ --role-name "${ROLE_NAME}" \ --assume-role-policy-document "file://${TRUST_POLICY_FILE}" CREATED_RESOURCES+=("IAM_ROLE:${ROLE_NAME}") # Attach policy aws iam attach-role-policy \ --role-name "${ROLE_NAME}" \ --policy-arn "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" log "IAM role created: ${ROLE_NAME}" # Wait for IAM role propagation log "Waiting for IAM role propagation (15 seconds)..." sleep 15 # Step 2: Create compute environment log "Step 2: Creating Fargate compute environment..." aws batch create-compute-environment \ --compute-environment-name "${COMPUTE_ENV_NAME}" \ --type MANAGED \ --state ENABLED \ --compute-resources "{ \"type\": \"FARGATE\", \"maxvCpus\": 256, \"subnets\": [${SUBNET_ARRAY}], \"securityGroupIds\": [\"${DEFAULT_SG}\"] }" CREATED_RESOURCES+=("COMPUTE_ENV:${COMPUTE_ENV_NAME}") # Wait for compute environment to be ready if ! wait_for_compute_env "${COMPUTE_ENV_NAME}"; then log "ERROR: Compute environment failed to reach VALID state" exit 1 fi # Step 3: Create job queue log "Step 3: Creating job queue..." aws batch create-job-queue \ --job-queue-name "${JOB_QUEUE_NAME}" \ --state ENABLED \ --priority 900 \ --compute-environment-order "order=1,computeEnvironment=${COMPUTE_ENV_NAME}" CREATED_RESOURCES+=("JOB_QUEUE:${JOB_QUEUE_NAME}") # Wait for job queue to be ready if ! wait_for_job_queue "${JOB_QUEUE_NAME}"; then log "ERROR: Job queue failed to reach ENABLED state" exit 1 fi # Step 4: Create job definition log "Step 4: Creating job definition..." local CONTAINER_IMAGE="busybox:latest" validate_container_image "${CONTAINER_IMAGE}" aws batch register-job-definition \ --job-definition-name "${JOB_DEF_NAME}" \ --type container \ --platform-capabilities FARGATE \ --container-properties "{ \"image\": \"${CONTAINER_IMAGE}\", \"resourceRequirements\": [ {\"type\": \"VCPU\", \"value\": \"0.25\"}, {\"type\": \"MEMORY\", \"value\": \"512\"} ], \"command\": [\"echo\", \"hello world\"], \"networkConfiguration\": { \"assignPublicIp\": \"DISABLED\" }, \"executionRoleArn\": \"arn:aws:iam::${ACCOUNT_ID}:role/${ROLE_NAME}\" }" log "Job definition created: ${JOB_DEF_NAME}" # Step 5: Submit job log "Step 5: Submitting job..." local JOB_ID JOB_ID=$(aws batch submit-job \ --job-name "${JOB_NAME}" \ --job-queue "${JOB_QUEUE_NAME}" \ --job-definition "${JOB_DEF_NAME}" \ --query 'jobId' \ --output text) if [ -z "${JOB_ID}" ] || [ "${JOB_ID}" = "None" ]; then log "ERROR: Failed to submit job" exit 1 fi log "Job submitted with ID: ${JOB_ID}" # Step 6: Wait for job completion and view output log "Step 6: Waiting for job completion..." if ! wait_for_job "${JOB_ID}"; then log "ERROR: Job failed or timed out" exit 1 fi # Get log stream name log "Getting job logs..." local LOG_STREAM LOG_STREAM=$(aws batch describe-jobs \ --jobs "${JOB_ID}" \ --query 'jobs[0].attempts[0].taskProperties.containers[0].logStreamName' \ --output text) if [ "${LOG_STREAM}" != "None" ] && [ "${LOG_STREAM}" != "null" ] && [ -n "${LOG_STREAM}" ]; then log "Log stream: ${LOG_STREAM}" log "Job output:" aws logs get-log-events \ --log-group-name "/aws/batch/job" \ --log-stream-name "${LOG_STREAM}" \ --query 'events[*].message' \ --output text 2>/dev/null | tee -a "${LOG_FILE}" || true else log "No log stream available for job" fi log "Tutorial completed successfully!" # Show created resources echo "" echo "===========================================" echo "CREATED RESOURCES" echo "===========================================" echo "The following resources were created:" for resource in "${CREATED_RESOURCES[@]}"; do echo " - ${resource}" done echo "" echo "===========================================" echo "CLEANUP" echo "===========================================" cleanup_resources log "All resources have been cleaned up" } # Run main function main "$@"-
For API details, see the following topics in AWS CLI Command Reference.
-