- MXNet Module
-
import numpy as np
import json
import mxnet as mx
import neomx # noqa: F401
from collections import namedtuple
Batch = namedtuple('Batch', ['data'])
# Change the context to mx.cpu() if deploying to a CPU endpoint
ctx = mx.gpu()
def model_fn(model_dir):
# The compiled model artifacts are saved with the prefix 'compiled'
sym, arg_params, aux_params = mx.model.load_checkpoint('compiled', 0)
mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
exe = mod.bind(for_training=False,
data_shapes=[('data', (1,3,224,224))],
label_shapes=mod._label_shapes)
mod.set_params(arg_params, aux_params, allow_missing=True)
# Run warm-up inference on empty data during model load (required for GPU)
data = mx.nd.empty((1,3,224,224), ctx=ctx)
mod.forward(Batch([data]))
return mod
def transform_fn(mod, image, input_content_type, output_content_type):
# pre-processing
decoded = mx.image.imdecode(image)
resized = mx.image.resize_short(decoded, 224)
cropped, crop_info = mx.image.center_crop(resized, (224, 224))
normalized = mx.image.color_normalize(cropped.astype(np.float32) / 255,
mean=mx.nd.array([0.485, 0.456, 0.406]),
std=mx.nd.array([0.229, 0.224, 0.225]))
transposed = normalized.transpose((2, 0, 1))
batchified = transposed.expand_dims(axis=0)
casted = batchified.astype(dtype='float32')
processed_input = casted.as_in_context(ctx)
# prediction/inference
mod.forward(Batch([processed_input]))
# post-processing
prob = mod.get_outputs()[0].asnumpy().tolist()
prob_json = json.dumps(prob)
return prob_json, output_content_type
- MXNet Gluon
import numpy as np
import json
import mxnet as mx
import neomx # noqa: F401
# Change the context to mx.cpu() if deploying to a CPU endpoint
ctx = mx.gpu()
def model_fn(model_dir):
# The compiled model artifacts are saved with the prefix 'compiled'
block = mx.gluon.nn.SymbolBlock.imports('compiled-symbol.json',['data'],'compiled-0000.params', ctx=ctx)
# Hybridize the model & pass required options for Neo: static_alloc=True & static_shape=True
block.hybridize(static_alloc=True, static_shape=True)
# Run warm-up inference on empty data during model load (required for GPU)
data = mx.nd.empty((1,3,224,224), ctx=ctx)
warm_up = block(data)
return block
def input_fn(image, input_content_type):
# pre-processing
decoded = mx.image.imdecode(image)
resized = mx.image.resize_short(decoded, 224)
cropped, crop_info = mx.image.center_crop(resized, (224, 224))
normalized = mx.image.color_normalize(cropped.astype(np.float32) / 255,
mean=mx.nd.array([0.485, 0.456, 0.406]),
std=mx.nd.array([0.229, 0.224, 0.225]))
transposed = normalized.transpose((2, 0, 1))
batchified = transposed.expand_dims(axis=0)
casted = batchified.astype(dtype='float32')
processed_input = casted.as_in_context(ctx)
return processed_input
def predict_fn(processed_input_data, block):
# prediction/inference
prediction = block(processed_input_data)
return prediction
def output_fn(prediction, output_content_type):
# post-processing
prob = prediction.asnumpy().tolist()
prob_json = json.dumps(prob)
return prob_json, output_content_type
- PyTorch 1.4 and Older
-
import os
import torch
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
from PIL import Image
import io
import json
import pickle
def model_fn(model_dir):
"""Load the model and return it.
Providing this function is optional.
There is a default model_fn available which will load the model
compiled using SageMaker Neo. You can override it here.
Keyword arguments:
model_dir -- the directory path where the model artifacts are present
"""
# The compiled model is saved as "compiled.pt"
model_path = os.path.join(model_dir, 'compiled.pt')
with torch.neo.config(model_dir=model_dir, neo_runtime=True):
model = torch.jit.load(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# We recommend that you run warm-up inference during model load
sample_input_path = os.path.join(model_dir, 'sample_input.pkl')
with open(sample_input_path, 'rb') as input_file:
model_input = pickle.load(input_file)
if torch.is_tensor(model_input):
model_input = model_input.to(device)
model(model_input)
elif isinstance(model_input, tuple):
model_input = (inp.to(device) for inp in model_input if torch.is_tensor(inp))
model(*model_input)
else:
print("Only supports a torch tensor or a tuple of torch tensors")
return model
def transform_fn(model, request_body, request_content_type,
response_content_type):
"""Run prediction and return the output.
The function
1. Pre-processes the input request
2. Runs prediction
3. Post-processes the prediction output.
"""
# preprocess
decoded = Image.open(io.BytesIO(request_body))
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[
0.485, 0.456, 0.406], std=[
0.229, 0.224, 0.225]),
])
normalized = preprocess(decoded)
batchified = normalized.unsqueeze(0)
# predict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batchified = batchified.to(device)
output = model.forward(batchified)
return json.dumps(output.cpu().numpy().tolist()), response_content_type
- PyTorch 1.5 and Newer
-
import os
import torch
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
from PIL import Image
import io
import json
import pickle
def model_fn(model_dir):
"""Load the model and return it.
Providing this function is optional.
There is a default_model_fn available, which will load the model
compiled using SageMaker Neo. You can override the default here.
The model_fn only needs to be defined if your model needs extra
steps to load, and can otherwise be left undefined.
Keyword arguments:
model_dir -- the directory path where the model artifacts are present
"""
# The compiled model is saved as "model.pt"
model_path = os.path.join(model_dir, 'model.pt')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.jit.load(model_path, map_location=device)
model = model.to(device)
return model
def transform_fn(model, request_body, request_content_type,
response_content_type):
"""Run prediction and return the output.
The function
1. Pre-processes the input request
2. Runs prediction
3. Post-processes the prediction output.
"""
# preprocess
decoded = Image.open(io.BytesIO(request_body))
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[
0.485, 0.456, 0.406], std=[
0.229, 0.224, 0.225]),
])
normalized = preprocess(decoded)
batchified = normalized.unsqueeze(0)
# predict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batchified = batchified.to(device)
output = model.forward(batchified)
return json.dumps(output.cpu().numpy().tolist()), response_content_type