搭SelectObjectContent配 AWS SDK 或命令列工具使用 - Amazon Simple Storage Service

本文為英文版的機器翻譯版本,如內容有任何歧義或不一致之處,概以英文版為準。

SelectObjectContent配 AWS SDK 或命令列工具使用

下列程式碼範例會示範如何使用SelectObjectContent

CLI
AWS CLI

根據 SQL 陳述式篩選 Amazon S3 物件的內容

下列select-object-content範例會my-data-file.csv使用指定的 SQL 陳述式篩選物件,並將輸出傳送至檔案。

aws s3api select-object-content \ --bucket my-bucket \ --key my-data-file.csv \ --expression "select * from s3object limit 100" \ --expression-type 'SQL' \ --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' \ --output-serialization '{"CSV": {}}' "output.csv"

此命令不會產生輸出。

Java
適用於 Java 2.x 的 SDK
注意

還有更多關於 GitHub。尋找完整範例,並了解如何在AWS 設定和執行程式碼範例儲存庫

下列範例顯示使用 JSON 物件的查詢。完整範例也會顯示 CSV 物件的使用方式。

import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.core.async.AsyncRequestBody; import software.amazon.awssdk.core.async.BlockingInputStreamAsyncRequestBody; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.model.CSVInput; import software.amazon.awssdk.services.s3.model.CSVOutput; import software.amazon.awssdk.services.s3.model.CompressionType; import software.amazon.awssdk.services.s3.model.ExpressionType; import software.amazon.awssdk.services.s3.model.FileHeaderInfo; import software.amazon.awssdk.services.s3.model.InputSerialization; import software.amazon.awssdk.services.s3.model.JSONInput; import software.amazon.awssdk.services.s3.model.JSONOutput; import software.amazon.awssdk.services.s3.model.JSONType; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.OutputSerialization; import software.amazon.awssdk.services.s3.model.Progress; import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.Stats; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.concurrent.CompletableFuture; public class SelectObjectContentExample { static final Logger logger = LoggerFactory.getLogger(SelectObjectContentExample.class); static final String BUCKET_NAME = "select-object-content-" + UUID.randomUUID(); static final S3AsyncClient s3AsyncClient = S3AsyncClient.create(); static String FILE_CSV = "csv"; static String FILE_JSON = "json"; static String URL_CSV = "https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.csv"; static String URL_JSON = "https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.json"; public static void main(String[] args) { SelectObjectContentExample selectObjectContentExample = new SelectObjectContentExample(); try { SelectObjectContentExample.setUp(); selectObjectContentExample.runSelectObjectContentMethodForJSON(); selectObjectContentExample.runSelectObjectContentMethodForCSV(); } catch (SdkException e) { logger.error(e.getMessage(), e); System.exit(1); } finally { SelectObjectContentExample.tearDown(); } } EventStreamInfo runSelectObjectContentMethodForJSON() { // Set up request parameters. final String queryExpression = "select * from s3object[*][*] c where c.area < 350000"; final String fileType = FILE_JSON; InputSerialization inputSerialization = InputSerialization.builder() .json(JSONInput.builder().type(JSONType.DOCUMENT).build()) .compressionType(CompressionType.NONE) .build(); OutputSerialization outputSerialization = OutputSerialization.builder() .json(JSONOutput.builder().recordDelimiter(null).build()) .build(); // Build the SelectObjectContentRequest. SelectObjectContentRequest select = SelectObjectContentRequest.builder() .bucket(BUCKET_NAME) .key(FILE_JSON) .expression(queryExpression) .expressionType(ExpressionType.SQL) .inputSerialization(inputSerialization) .outputSerialization(outputSerialization) .build(); EventStreamInfo eventStreamInfo = new EventStreamInfo(); // Call the selectObjectContent method with the request and a response handler. // Supply an EventStreamInfo object to the response handler to gather records and information from the response. s3AsyncClient.selectObjectContent(select, buildResponseHandler(eventStreamInfo)).join(); // Log out information gathered while processing the response stream. long recordCount = eventStreamInfo.getRecords().stream().mapToInt(record -> record.split("\n").length ).sum(); logger.info("Total records {}: {}", fileType, recordCount); logger.info("Visitor onRecords for fileType {} called {} times", fileType, eventStreamInfo.getCountOnRecordsCalled()); logger.info("Visitor onStats for fileType {}, {}", fileType, eventStreamInfo.getStats()); logger.info("Visitor onContinuations for fileType {}, {}", fileType, eventStreamInfo.getCountContinuationEvents()); return eventStreamInfo; } static SelectObjectContentResponseHandler buildResponseHandler(EventStreamInfo eventStreamInfo) { // Use a Visitor to process the response stream. This visitor logs information and gathers details while processing. final SelectObjectContentResponseHandler.Visitor visitor = SelectObjectContentResponseHandler.Visitor.builder() .onRecords(r -> { logger.info("Record event received."); eventStreamInfo.addRecord(r.payload().asUtf8String()); eventStreamInfo.incrementOnRecordsCalled(); }) .onCont(ce -> { logger.info("Continuation event received."); eventStreamInfo.incrementContinuationEvents(); }) .onProgress(pe -> { Progress progress = pe.details(); logger.info("Progress event received:\n bytesScanned:{}\nbytesProcessed: {}\nbytesReturned:{}", progress.bytesScanned(), progress.bytesProcessed(), progress.bytesReturned()); }) .onEnd(ee -> logger.info("End event received.")) .onStats(se -> { logger.info("Stats event received."); eventStreamInfo.addStats(se.details()); }) .build(); // Build the SelectObjectContentResponseHandler with the visitor that processes the stream. return SelectObjectContentResponseHandler.builder() .subscriber(visitor).build(); } // The EventStreamInfo class is used to store information gathered while processing the response stream. static class EventStreamInfo { private final List<String> records = new ArrayList<>(); private Integer countOnRecordsCalled = 0; private Integer countContinuationEvents = 0; private Stats stats; void incrementOnRecordsCalled() { countOnRecordsCalled++; } void incrementContinuationEvents() { countContinuationEvents++; } void addRecord(String record) { records.add(record); } void addStats(Stats stats) { this.stats = stats; } public List<String> getRecords() { return records; } public Integer getCountOnRecordsCalled() { return countOnRecordsCalled; } public Integer getCountContinuationEvents() { return countContinuationEvents; } public Stats getStats() { return stats; } }
  • 如需 API 詳細資訊,請參閱 AWS SDK for Java 2.x API 參考SelectObjectContent中的。

如需 AWS SDK 開發人員指南和程式碼範例的完整清單,請參閱透過 AWS SDK 使用此服務。此主題也包含有關入門的資訊和舊版 SDK 的詳細資訊。