GlueScenario.java - AWS Code Sample

This version of the AWS Code Sample Catalog has been replaced by the AWS Code Library, which contains new and updated code examples.

GlueScenario.java

GlueScenario.java demonstrates how to perform multiple AWS Glue operations.

Note

To run this Java (SDK V2) code example, ensure that you have set up your development environment, including your credentials. For more information, see AWS SDK for Java 2.x Developer Guide.

/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: Apache-2.0 */ package com.example.glue; import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.glue.GlueClient; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.time.format.FormatStyle; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.concurrent.TimeUnit; import software.amazon.awssdk.services.glue.model.DatabaseInput; import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; import software.amazon.awssdk.services.glue.model.GlueException; import software.amazon.awssdk.services.glue.model.GetCrawlerResponse; import software.amazon.awssdk.services.glue.model.GetCrawlerRequest; import software.amazon.awssdk.services.glue.model.CreateCrawlerRequest; import software.amazon.awssdk.services.glue.model.GetTablesResponse; import software.amazon.awssdk.services.glue.model.GetTablesRequest; import software.amazon.awssdk.services.glue.model.StartCrawlerRequest; import software.amazon.awssdk.services.glue.model.GetDatabaseRequest; import software.amazon.awssdk.services.glue.model.CrawlerTargets; import software.amazon.awssdk.services.glue.model.GetDatabaseResponse; import software.amazon.awssdk.services.glue.model.Table; import software.amazon.awssdk.services.glue.model.StartJobRunResponse; import software.amazon.awssdk.services.glue.model.StartJobRunRequest; import software.amazon.awssdk.services.glue.model.GetJobsRequest; import software.amazon.awssdk.services.glue.model.CreateJobRequest; import software.amazon.awssdk.services.glue.model.GetJobRunsResponse; import software.amazon.awssdk.services.glue.model.GetJobsResponse; import software.amazon.awssdk.services.glue.model.Job; import software.amazon.awssdk.services.glue.model.WorkerType; import software.amazon.awssdk.services.glue.model.JobCommand; import software.amazon.awssdk.services.glue.model.GetJobRunsRequest; import software.amazon.awssdk.services.glue.model.JobRun; import software.amazon.awssdk.services.glue.model.S3Target; import software.amazon.awssdk.services.glue.model.DeleteJobRequest; import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; import software.amazon.awssdk.services.glue.model.DeleteCrawlerRequest; /** * * Before running this Java V2 code example, set up your development environment, including your credentials. * * For more information, see the following documentation topic: * * https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/get-started.html * * To set up the resources, see this documentation topic: * * https://docs.aws.amazon.com/glue/latest/ug/tutorial-add-crawler.html * * This example performs the following tasks: * * 1. CreateDatabase * 2. CreateCrawler * 3. GetCrawler * 4. StartCrawler * 5. GetDatabase * 6. GetTables * 7. CreateJob * 8. StartJobRun * 9. ListJobs * 10. GetJobRuns * 11. DeleteJob * 12. DeleteDatabase * 13. DeleteCrawler */ public class GlueScenario { public static void main(String[] args) throws InterruptedException { final String usage = "\n" + "Usage:\n" + " <iam> <s3Path> <cron> <dbName> <crawlerName> <jobName> \n\n" + "Where:\n" + " iam - The ARN of the IAM role that has AWS Glue and S3 permissions. \n" + " s3Path - The Amazon Simple Storage Service (Amazon S3) target that contains data (for example, CSV data).\n" + " cron - A cron expression used to specify the schedule (i.e., cron(15 12 * * ? *).\n" + " dbName - The database name. \n" + " crawlerName - The name of the crawler. \n" + " jobName - The name you assign to this job definition."+ " scriptLocation - The Amazon S3 path to a script that runs a job." + " locationUri - The location of the database" ; if (args.length != 8) { System.out.println(usage); System.exit(1); } String iam = args[0]; String s3Path = args[1]; String cron = args[2]; String dbName = args[3]; String crawlerName = args[4]; String jobName = args[5]; String scriptLocation = args[6]; String locationUri = args[7]; Region region = Region.US_EAST_1; GlueClient glueClient = GlueClient.builder() .region(region) .credentialsProvider(ProfileCredentialsProvider.create()) .build(); System.out.println("About to start the AWS Glue Scenario"); createDatabase(glueClient, dbName, locationUri); createGlueCrawler(glueClient, iam, s3Path, cron, dbName, crawlerName); getSpecificCrawler(glueClient, crawlerName); startSpecificCrawler(glueClient, crawlerName); getSpecificDatabase(glueClient, dbName); getGlueTables(glueClient, dbName); createJob(glueClient, jobName, iam, scriptLocation); startJob(glueClient, jobName); getAllJobs(glueClient); getJobRuns(glueClient, jobName); deleteJob(glueClient, jobName); System.out.println("*** Wait 5 MIN for the "+crawlerName +" to stop"); TimeUnit.MINUTES.sleep(5); deleteDatabase(glueClient, dbName); deleteSpecificCrawler(glueClient, crawlerName); System.out.println("Successfully completed the AWS Glue Scenario"); } public static void createDatabase(GlueClient glueClient, String dbName, String locationUri ) { try { DatabaseInput input = DatabaseInput.builder() .description("Built with the AWS SDK for Java V2") .name(dbName) .locationUri(locationUri) .build(); CreateDatabaseRequest request = CreateDatabaseRequest.builder() .databaseInput(input) .build(); glueClient.createDatabase(request); System.out.println("The database was successfully created"); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void createGlueCrawler(GlueClient glueClient, String iam, String s3Path, String cron, String dbName, String crawlerName) { try { S3Target s3Target = S3Target.builder() .path(s3Path) .build(); List<S3Target> targetList = new ArrayList<>(); targetList.add(s3Target); CrawlerTargets targets = CrawlerTargets.builder() .s3Targets(targetList) .build(); CreateCrawlerRequest crawlerRequest = CreateCrawlerRequest.builder() .databaseName(dbName) .name(crawlerName) .description("Created by the AWS Glue Java API") .targets(targets) .role(iam) .schedule(cron) .build(); glueClient.createCrawler(crawlerRequest); System.out.println(crawlerName +" was successfully created"); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void getSpecificCrawler(GlueClient glueClient, String crawlerName) { try { GetCrawlerRequest crawlerRequest = GetCrawlerRequest.builder() .name(crawlerName) .build(); GetCrawlerResponse response = glueClient.getCrawler(crawlerRequest); Instant createDate = response.crawler().creationTime(); DateTimeFormatter formatter = DateTimeFormatter.ofLocalizedDateTime( FormatStyle.SHORT ) .withLocale( Locale.US) .withZone( ZoneId.systemDefault() ); formatter.format( createDate ); System.out.println("The create date of the Crawler is " + createDate ); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void startSpecificCrawler(GlueClient glueClient, String crawlerName) { try { StartCrawlerRequest crawlerRequest = StartCrawlerRequest.builder() .name(crawlerName) .build(); glueClient.startCrawler(crawlerRequest); System.out.println(crawlerName +" was successfully started!"); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void getSpecificDatabase(GlueClient glueClient, String databaseName) { try { GetDatabaseRequest databasesRequest = GetDatabaseRequest.builder() .name(databaseName) .build(); GetDatabaseResponse response = glueClient.getDatabase(databasesRequest); Instant createDate = response.database().createTime(); // Convert the Instant to readable date. DateTimeFormatter formatter = DateTimeFormatter.ofLocalizedDateTime( FormatStyle.SHORT ) .withLocale( Locale.US) .withZone( ZoneId.systemDefault() ); formatter.format( createDate ); System.out.println("The create date of the database is " + createDate ); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void getGlueTables(GlueClient glueClient, String dbName){ try { GetTablesRequest tableRequest = GetTablesRequest.builder() .databaseName(dbName) .build(); GetTablesResponse response = glueClient.getTables(tableRequest); List<Table> tables = response.tableList(); for (Table table: tables) { System.out.println("Table name is: "+table.name()); } } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void startJob(GlueClient glueClient, String jobName) { try { StartJobRunRequest runRequest = StartJobRunRequest.builder() .workerType(WorkerType.G_1_X) .numberOfWorkers(10) .jobName(jobName) .build(); StartJobRunResponse response = glueClient.startJobRun(runRequest); System.out.println("The request Id of the job is "+ response.responseMetadata().requestId()); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void createJob(GlueClient glueClient, String jobName, String iam, String scriptLocation) { try { JobCommand command = JobCommand.builder() .pythonVersion("3") .name("MyJob1") .scriptLocation(scriptLocation) .build(); CreateJobRequest jobRequest = CreateJobRequest.builder() .description("A Job created by using the AWS SDK for Java V2") .glueVersion("2.0") .workerType(WorkerType.G_1_X) .numberOfWorkers(10) .name(jobName) .role(iam) .command(command) .build(); glueClient.createJob(jobRequest); System.out.println(jobName +" was successfully created."); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void getAllJobs(GlueClient glueClient) { try { GetJobsRequest jobsRequest = GetJobsRequest.builder() .maxResults(10) .build(); GetJobsResponse jobsResponse = glueClient.getJobs(jobsRequest); List<Job> jobs = jobsResponse.jobs(); for (Job job: jobs) { System.out.println("Job name is : "+job.name()); System.out.println("The job worker type is : "+job.workerType().name()); } } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void getJobRuns(GlueClient glueClient, String jobName) { try { GetJobRunsRequest runsRequest = GetJobRunsRequest.builder() .jobName(jobName) .maxResults(20) .build(); GetJobRunsResponse response = glueClient.getJobRuns(runsRequest); List<JobRun> jobRuns = response.jobRuns(); for (JobRun jobRun: jobRuns) { System.out.println("Job run state is "+jobRun.jobRunState().name()); System.out.println("Job run Id is "+jobRun.id()); System.out.println("The Glue version is "+jobRun.glueVersion()); } } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void deleteJob(GlueClient glueClient, String jobName) { try { DeleteJobRequest jobRequest = DeleteJobRequest.builder() .jobName(jobName) .build(); glueClient.deleteJob(jobRequest); System.out.println(jobName +" was successfully deleted"); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void deleteDatabase(GlueClient glueClient, String databaseName) { try { DeleteDatabaseRequest request = DeleteDatabaseRequest.builder() .name(databaseName) .build(); glueClient.deleteDatabase(request); System.out.println(databaseName +" was successfully deleted"); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } public static void deleteSpecificCrawler(GlueClient glueClient, String crawlerName) { try { DeleteCrawlerRequest deleteCrawlerRequest = DeleteCrawlerRequest.builder() .name(crawlerName) .build(); glueClient.deleteCrawler(deleteCrawlerRequest); System.out.println(crawlerName +" was deleted"); } catch (GlueException e) { System.err.println(e.awsErrorDetails().errorMessage()); System.exit(1); } } }