file_transfer.py - AWS Code Sample

file_transfer.py

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Use Boto 3 managed file transfers to manage multipart uploads to and downloads from an Amazon S3 bucket. When the file to transfer is larger than the specified threshold, the transfer manager automatically uses multipart uploads or downloads. This demonstration shows how to use several of the available transfer manager settings and reports thread usage and time to transfer. """ import sys import threading import boto3 from boto3.s3.transfer import TransferConfig MB = 1024 * 1024 s3 = boto3.resource('s3') class TransferCallback: """ Handle callbacks from the transfer manager. The transfer manager periodically calls the __call__ method throughout the upload and download process so that it can take action, such as displaying progress to the user and collecting data about the transfer. """ def __init__(self, target_size): self._target_size = target_size self._total_transferred = 0 self._lock = threading.Lock() self.thread_info = {} def __call__(self, bytes_transferred): """ The callback method that is called by the transfer manager. Display progress during file transfer and collect per-thread transfer data. This method can be called by multiple threads, so shared instance data is protected by a thread lock. """ thread = threading.current_thread() with self._lock: self._total_transferred += bytes_transferred if thread.ident not in self.thread_info.keys(): self.thread_info[thread.ident] = bytes_transferred else: self.thread_info[thread.ident] += bytes_transferred target = self._target_size * MB sys.stdout.write( f"\r{self._total_transferred} of {target} transferred " f"({(self._total_transferred / target) * 100:.2f}%).") sys.stdout.flush() def upload_with_default_configuration(local_file_path, bucket_name, object_key, file_size_mb): """ Upload a file from a local folder to an Amazon S3 bucket, using the default configuration. """ transfer_callback = TransferCallback(file_size_mb) s3.Bucket(bucket_name).upload_file( local_file_path, object_key, Callback=transfer_callback) return transfer_callback.thread_info def upload_with_chunksize_and_meta(local_file_path, bucket_name, object_key, file_size_mb, metadata=None): """ Upload a file from a local folder to an Amazon S3 bucket, setting a multipart chunk size and adding metadata to the Amazon S3 object. The multipart chunk size controls the size of the chunks of data that are sent in the request. A smaller chunk size typically results in the transfer manager using more threads for the upload. The metadata is a set of key-value pairs that are stored with the object in Amazon S3. """ transfer_callback = TransferCallback(file_size_mb) config = TransferConfig(multipart_chunksize=1 * MB) extra_args = {'Metadata': metadata} if metadata else None s3.Bucket(bucket_name).upload_file( local_file_path, object_key, Config=config, ExtraArgs=extra_args, Callback=transfer_callback) return transfer_callback.thread_info def upload_with_high_threshold(local_file_path, bucket_name, object_key, file_size_mb): """ Upload a file from a local folder to an Amazon S3 bucket, setting a multipart threshold larger than the size of the file. Setting a multipart threshold larger than the size of the file results in the transfer manager sending the file as a standard upload instead of a multipart upload. """ transfer_callback = TransferCallback(file_size_mb) config = TransferConfig(multipart_threshold=file_size_mb * 2 * MB) s3.Bucket(bucket_name).upload_file( local_file_path, object_key, Config=config, Callback=transfer_callback) return transfer_callback.thread_info def upload_with_sse(local_file_path, bucket_name, object_key, file_size_mb, sse_key=None): """ Upload a file from a local folder to an Amazon S3 bucket, adding server-side encryption with customer-provided encryption keys to the object. When this kind of encryption is specified, Amazon S3 encrypts the object at rest and allows downloads only when the expected encryption key is provided in the download request. """ transfer_callback = TransferCallback(file_size_mb) if sse_key: extra_args = { 'SSECustomerAlgorithm': 'AES256', 'SSECustomerKey': sse_key} else: extra_args = None s3.Bucket(bucket_name).upload_file( local_file_path, object_key, ExtraArgs=extra_args, Callback=transfer_callback) return transfer_callback.thread_info def download_with_default_configuration(bucket_name, object_key, download_file_path, file_size_mb): """ Download a file from an Amazon S3 bucket to a local folder, using the default configuration. """ transfer_callback = TransferCallback(file_size_mb) s3.Bucket(bucket_name).Object(object_key).download_file( download_file_path, Callback=transfer_callback) return transfer_callback.thread_info def download_with_single_thread(bucket_name, object_key, download_file_path, file_size_mb): """ Download a file from an Amazon S3 bucket to a local folder, using a single thread. """ transfer_callback = TransferCallback(file_size_mb) config = TransferConfig(use_threads=False) s3.Bucket(bucket_name).Object(object_key).download_file( download_file_path, Config=config, Callback=transfer_callback) return transfer_callback.thread_info def download_with_high_threshold(bucket_name, object_key, download_file_path, file_size_mb): """ Download a file from an Amazon S3 bucket to a local folder, setting a multipart threshold larger than the size of the file. Setting a multipart threshold larger than the size of the file results in the transfer manager sending the file as a standard download instead of a multipart download. """ transfer_callback = TransferCallback(file_size_mb) config = TransferConfig(multipart_threshold=file_size_mb * 2 * MB) s3.Bucket(bucket_name).Object(object_key).download_file( download_file_path, Config=config, Callback=transfer_callback) return transfer_callback.thread_info def download_with_sse(bucket_name, object_key, download_file_path, file_size_mb, sse_key): """ Download a file from an Amazon S3 bucket to a local folder, adding a customer-provided encryption key to the request. When this kind of encryption is specified, Amazon S3 encrypts the object at rest and allows downloads only when the expected encryption key is provided in the download request. """ transfer_callback = TransferCallback(file_size_mb) if sse_key: extra_args = { 'SSECustomerAlgorithm': 'AES256', 'SSECustomerKey': sse_key} else: extra_args = None s3.Bucket(bucket_name).Object(object_key).download_file( download_file_path, ExtraArgs=extra_args, Callback=transfer_callback) return transfer_callback.thread_info