AnalyzeDocument.java - AWS Code Sample

AnalyzeDocument.java

AnalyzeDocument.java demonstrates how to analyze a document.

/** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * This file is licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. A copy of * the License is located at * * http://aws.amazon.com/apache2.0/ * * This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package com.example.textract; import software.amazon.awssdk.core.SdkBytes; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.textract.TextractClient; import software.amazon.awssdk.services.textract.model.AnalyzeDocumentRequest; import software.amazon.awssdk.services.textract.model.Document; import software.amazon.awssdk.services.textract.model.FeatureType; import software.amazon.awssdk.services.textract.model.AnalyzeDocumentResponse; import software.amazon.awssdk.services.textract.model.Block; import software.amazon.awssdk.services.textract.model.TextractException; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class AnalyzeDocument { public static void main(String[] args) { final String USAGE = "\n" + "Usage:\n" + " AnalyzeDocument <sourceDoc> \n\n" + "Where:\n" + " sourceDoc - The path where the document is located (must be an image, i.e., C:/AWS/book.png) \n"; if (args.length < 1) { System.out.println(USAGE); System.exit(1); } String sourceDoc = args[0]; Region region = Region.US_EAST_2; TextractClient textractClient = TextractClient.builder() .region(region) .build(); analyzeDoc(textractClient, sourceDoc); } public static void analyzeDoc(TextractClient textractClient, String sourceDoc) { try { InputStream sourceStream = new FileInputStream(new File(sourceDoc)); SdkBytes sourceBytes = SdkBytes.fromInputStream(sourceStream); // Get the input Document object as bytes Document myDoc = Document.builder() .bytes(sourceBytes) .build(); List<FeatureType> featureTypes = new ArrayList<FeatureType>(); featureTypes.add(FeatureType.FORMS); featureTypes.add(FeatureType.TABLES); AnalyzeDocumentRequest analyzeDocumentRequest = AnalyzeDocumentRequest.builder() .featureTypes(featureTypes) .document(myDoc) .build(); AnalyzeDocumentResponse analyzeDocument = textractClient.analyzeDocument(analyzeDocumentRequest); List<Block> docInfo = analyzeDocument.blocks(); Iterator<Block> blockIterator = docInfo.iterator(); while(blockIterator.hasNext()) { Block block = blockIterator.next(); System.out.println("The block type is " +block.blockType().toString()); } } catch (TextractException | FileNotFoundException e) { System.err.println(e.getMessage()); System.exit(1); } } }

Sample Details

Service: Amazon Textract

Last tested: 7-8-2020

Author: scmacdon - AWS

Type: full-example