DescribeEndpoint
Returns the description of an endpoint.
Request Syntax
{
"EndpointName": "string
"
}
Request Parameters
For information about the parameters that are common to all actions, see Common Parameters.
The request accepts the following data in JSON format.
- EndpointName
-
The name of the endpoint.
Type: String
Length Constraints: Maximum length of 63.
Pattern:
^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}
Required: Yes
Response Syntax
{
"AsyncInferenceConfig": {
"ClientConfig": {
"MaxConcurrentInvocationsPerInstance": number
},
"OutputConfig": {
"KmsKeyId": "string",
"NotificationConfig": {
"ErrorTopic": "string",
"IncludeInferenceResponseIn": [ "string" ],
"SuccessTopic": "string"
},
"S3FailurePath": "string",
"S3OutputPath": "string"
}
},
"CreationTime": number,
"DataCaptureConfig": {
"CaptureStatus": "string",
"CurrentSamplingPercentage": number,
"DestinationS3Uri": "string",
"EnableCapture": boolean,
"KmsKeyId": "string"
},
"EndpointArn": "string",
"EndpointConfigName": "string",
"EndpointName": "string",
"EndpointStatus": "string",
"ExplainerConfig": {
"ClarifyExplainerConfig": {
"EnableExplanations": "string",
"InferenceConfig": {
"ContentTemplate": "string",
"FeatureHeaders": [ "string" ],
"FeaturesAttribute": "string",
"FeatureTypes": [ "string" ],
"LabelAttribute": "string",
"LabelHeaders": [ "string" ],
"LabelIndex": number,
"MaxPayloadInMB": number,
"MaxRecordCount": number,
"ProbabilityAttribute": "string",
"ProbabilityIndex": number
},
"ShapConfig": {
"NumberOfSamples": number,
"Seed": number,
"ShapBaselineConfig": {
"MimeType": "string",
"ShapBaseline": "string",
"ShapBaselineUri": "string"
},
"TextConfig": {
"Granularity": "string",
"Language": "string"
},
"UseLogit": boolean
}
}
},
"FailureReason": "string",
"LastDeploymentConfig": {
"AutoRollbackConfiguration": {
"Alarms": [
{
"AlarmName": "string"
}
]
},
"BlueGreenUpdatePolicy": {
"MaximumExecutionTimeoutInSeconds": number,
"TerminationWaitInSeconds": number,
"TrafficRoutingConfiguration": {
"CanarySize": {
"Type": "string",
"Value": number
},
"LinearStepSize": {
"Type": "string",
"Value": number
},
"Type": "string",
"WaitIntervalInSeconds": number
}
},
"RollingUpdatePolicy": {
"MaximumBatchSize": {
"Type": "string",
"Value": number
},
"MaximumExecutionTimeoutInSeconds": number,
"RollbackMaximumBatchSize": {
"Type": "string",
"Value": number
},
"WaitIntervalInSeconds": number
}
},
"LastModifiedTime": number,
"PendingDeploymentSummary": {
"EndpointConfigName": "string",
"ProductionVariants": [
{
"AcceleratorType": "string",
"CurrentInstanceCount": number,
"CurrentServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"CurrentWeight": number,
"DeployedImages": [
{
"ResolutionTime": number,
"ResolvedImage": "string",
"SpecifiedImage": "string"
}
],
"DesiredInstanceCount": number,
"DesiredServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"DesiredWeight": number,
"InstanceType": "string",
"ManagedInstanceScaling": {
"MaxInstanceCount": number,
"MinInstanceCount": number,
"Status": "string"
},
"RoutingConfig": {
"RoutingStrategy": "string"
},
"VariantName": "string",
"VariantStatus": [
{
"StartTime": number,
"Status": "string",
"StatusMessage": "string"
}
]
}
],
"ShadowProductionVariants": [
{
"AcceleratorType": "string",
"CurrentInstanceCount": number,
"CurrentServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"CurrentWeight": number,
"DeployedImages": [
{
"ResolutionTime": number,
"ResolvedImage": "string",
"SpecifiedImage": "string"
}
],
"DesiredInstanceCount": number,
"DesiredServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"DesiredWeight": number,
"InstanceType": "string",
"ManagedInstanceScaling": {
"MaxInstanceCount": number,
"MinInstanceCount": number,
"Status": "string"
},
"RoutingConfig": {
"RoutingStrategy": "string"
},
"VariantName": "string",
"VariantStatus": [
{
"StartTime": number,
"Status": "string",
"StatusMessage": "string"
}
]
}
],
"StartTime": number
},
"ProductionVariants": [
{
"CurrentInstanceCount": number,
"CurrentServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"CurrentWeight": number,
"DeployedImages": [
{
"ResolutionTime": number,
"ResolvedImage": "string",
"SpecifiedImage": "string"
}
],
"DesiredInstanceCount": number,
"DesiredServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"DesiredWeight": number,
"ManagedInstanceScaling": {
"MaxInstanceCount": number,
"MinInstanceCount": number,
"Status": "string"
},
"RoutingConfig": {
"RoutingStrategy": "string"
},
"VariantName": "string",
"VariantStatus": [
{
"StartTime": number,
"Status": "string",
"StatusMessage": "string"
}
]
}
],
"ShadowProductionVariants": [
{
"CurrentInstanceCount": number,
"CurrentServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"CurrentWeight": number,
"DeployedImages": [
{
"ResolutionTime": number,
"ResolvedImage": "string",
"SpecifiedImage": "string"
}
],
"DesiredInstanceCount": number,
"DesiredServerlessConfig": {
"MaxConcurrency": number,
"MemorySizeInMB": number,
"ProvisionedConcurrency": number
},
"DesiredWeight": number,
"ManagedInstanceScaling": {
"MaxInstanceCount": number,
"MinInstanceCount": number,
"Status": "string"
},
"RoutingConfig": {
"RoutingStrategy": "string"
},
"VariantName": "string",
"VariantStatus": [
{
"StartTime": number,
"Status": "string",
"StatusMessage": "string"
}
]
}
]
}
Response Elements
If the action is successful, the service sends back an HTTP 200 response.
The following data is returned in JSON format by the service.
- AsyncInferenceConfig
-
Returns the description of an endpoint configuration created using the
CreateEndpointConfig
API.Type: AsyncInferenceConfig object
- CreationTime
-
A timestamp that shows when the endpoint was created.
Type: Timestamp
- DataCaptureConfig
-
The currently active data capture configuration used by your Endpoint.
Type: DataCaptureConfigSummary object
- EndpointArn
-
The Amazon Resource Name (ARN) of the endpoint.
Type: String
Length Constraints: Minimum length of 20. Maximum length of 2048.
Pattern:
arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:endpoint/.*
- EndpointConfigName
-
The name of the endpoint configuration associated with this endpoint.
Type: String
Length Constraints: Maximum length of 63.
Pattern:
^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}
- EndpointName
-
Name of the endpoint.
Type: String
Length Constraints: Maximum length of 63.
Pattern:
^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}
- EndpointStatus
-
The status of the endpoint.
-
OutOfService
: Endpoint is not available to take incoming requests. -
Creating
: CreateEndpoint is executing. -
Updating
: UpdateEndpoint or UpdateEndpointWeightsAndCapacities is executing. -
SystemUpdating
: Endpoint is undergoing maintenance and cannot be updated or deleted or re-scaled until it has completed. This maintenance operation does not change any customer-specified values such as VPC config, KMS encryption, model, instance type, or instance count. -
RollingBack
: Endpoint fails to scale up or down or change its variant weight and is in the process of rolling back to its previous configuration. Once the rollback completes, endpoint returns to anInService
status. This transitional status only applies to an endpoint that has autoscaling enabled and is undergoing variant weight or capacity changes as part of an UpdateEndpointWeightsAndCapacities call or when the UpdateEndpointWeightsAndCapacities operation is called explicitly. -
InService
: Endpoint is available to process incoming requests. -
Deleting
: DeleteEndpoint is executing. -
Failed
: Endpoint could not be created, updated, or re-scaled. Use theFailureReason
value returned by DescribeEndpoint for information about the failure. DeleteEndpoint is the only operation that can be performed on a failed endpoint. -
UpdateRollbackFailed
: Both the rolling deployment and auto-rollback failed. Your endpoint is in service with a mix of the old and new endpoint configurations. For information about how to remedy this issue and restore the endpoint's status toInService
, see Rolling Deployments.
Type: String
Valid Values:
OutOfService | Creating | Updating | SystemUpdating | RollingBack | InService | Deleting | Failed | UpdateRollbackFailed
-
- ExplainerConfig
-
The configuration parameters for an explainer.
Type: ExplainerConfig object
- FailureReason
-
If the status of the endpoint is
Failed
, the reason why it failed.Type: String
Length Constraints: Maximum length of 1024.
- LastDeploymentConfig
-
The most recent deployment configuration for the endpoint.
Type: DeploymentConfig object
- LastModifiedTime
-
A timestamp that shows when the endpoint was last modified.
Type: Timestamp
- PendingDeploymentSummary
-
Returns the summary of an in-progress deployment. This field is only returned when the endpoint is creating or updating with a new endpoint configuration.
Type: PendingDeploymentSummary object
- ProductionVariants
-
An array of ProductionVariantSummary objects, one for each model hosted behind this endpoint.
Type: Array of ProductionVariantSummary objects
Array Members: Minimum number of 1 item.
- ShadowProductionVariants
-
An array of ProductionVariantSummary objects, one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on
ProductionVariants
.Type: Array of ProductionVariantSummary objects
Array Members: Minimum number of 1 item.
Errors
For information about the errors that are common to all actions, see Common Errors.
See Also
For more information about using this API in one of the language-specific AWS SDKs, see the following: