Source code for autogluon.cloud.predictor.multimodal_cloud_predictor
import logging
import os
from typing import Optional, Tuple, Union
import pandas as pd
from autogluon.common.loaders import load_pd
from ..utils.ag_sagemaker import AutoGluonMultiModalRealtimePredictor
from ..utils.utils import convert_image_path_to_encoded_bytes_in_dataframe, is_image_file, read_image_bytes_and_encode
from .cloud_predictor import CloudPredictor
logger = logging.getLogger(__name__)
[docs]class MultiModalCloudPredictor(CloudPredictor):
predictor_file_name = "MultiModalCloudPredictor.pkl"
@property
def predictor_type(self) -> str:
"""
Type of the underneath AutoGluon Predictor
"""
return "multimodal"
@property
def _realtime_predictor_cls(self):
return AutoGluonMultiModalRealtimePredictor
def _get_local_predictor_cls(self):
from autogluon.multimodal import MultiModalPredictor
predictor_cls = MultiModalPredictor
return predictor_cls
def _load_predict_real_time_test_data(self, test_data, test_data_image_column):
import numpy as np
if isinstance(test_data, str):
if is_image_file(test_data):
test_data = [test_data]
else:
test_data = load_pd.load(test_data)
if isinstance(test_data, list):
test_data = np.array([read_image_bytes_and_encode(image) for image in test_data], dtype="object")
content_type = "application/x-npy"
if isinstance(test_data, pd.DataFrame):
if test_data_image_column is not None:
test_data = convert_image_path_to_encoded_bytes_in_dataframe(
dataframe=test_data, image_column=test_data_image_column
)
content_type = "application/x-parquet"
return test_data, content_type
[docs] def predict_real_time(
self,
test_data: Union[str, pd.DataFrame],
test_data_image_column: Optional[str] = None,
accept: str = "application/x-parquet",
) -> pd.Series:
"""
Predict with the deployed SageMaker endpoint. A deployed SageMaker endpoint is required.
This is intended to provide a low latency inference.
If you want to inference on a large dataset, use `predict()` instead.
Parameters
----------
test_data: Union(str, pandas.DataFrame)
The test data to be inferenced.
Can be a pandas.DataFrame or a local path to a csv file.
When predicting multimodality with image modality:
You need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
When predicting with only images:
Can be a pandas.DataFrame or a local path to a csv file.
Similarly, you need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
Or a local path to a single image file.
Or a list of local paths to image files.
test_data_image_column: default = None
If provided a csv file or pandas.DataFrame as the test_data and test_data involves image modality,
you must specify the column name corresponding to image paths.
The path MUST be an abspath
accept: str, default = application/x-parquet
Type of accept output content.
Valid options are application/x-parquet, text/csv, application/json
Returns
-------
Pandas.Series
Predict results in Series
"""
self._validate_predict_real_time_args(accept)
test_data, content_type = self._load_predict_real_time_test_data(
test_data=test_data, test_data_image_column=test_data_image_column
)
# Providing content type here because sagemaker serializer doesn't support change content type dynamically.
# Pass to `endpoint.predict()` call as `initial_args` instead
pred, _ = self._predict_real_time(test_data=test_data, accept=accept, ContentType=content_type)
return pred
[docs] def predict_proba_real_time(
self,
test_data: Union[str, pd.DataFrame],
test_data_image_column: Optional[str] = None,
accept: str = "application/x-parquet",
) -> Union[pd.DataFrame, pd.Series]:
"""
Predict with the deployed SageMaker endpoint. A deployed SageMaker endpoint is required.
This is intended to provide a low latency inference.
If you want to inference on a large dataset, use `predict()` instead.
Parameters
----------
test_data: Union(str, pandas.DataFrame)
The test data to be inferenced.
Can be a pandas.DataFrame or a local path to a csv file.
When predicting multimodality with image modality:
You need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
When predicting with only images:
Can be a pandas.DataFrame or a local path to a csv file.
Similarly, you need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
Or a local path to a single image file.
Or a list of local paths to image files.
test_data_image_column: default = None
If provided a csv file or pandas.DataFrame as the test_data and test_data involves image modality,
you must specify the column name corresponding to image paths.
The path MUST be an abspath
accept: str, default = application/x-parquet
Type of accept output content.
Valid options are application/x-parquet, text/csv, application/json
Returns
-------
Pandas.DataFrame or Pandas.Series
Will return a Pandas.Series when it's a regression problem. Will return a Pandas.DataFrame otherwise
"""
self._validate_predict_real_time_args(accept)
test_data, content_type = self._load_predict_real_time_test_data(
test_data=test_data, test_data_image_column=test_data_image_column
)
# Providing content type here because sagemaker serializer doesn't support change content type dynamically.
# Pass to `endpoint.predict()` call as `initial_args` instead
pred, proba = self._predict_real_time(test_data=test_data, accept=accept, ContentType=content_type)
if proba is None:
return pred
return proba
def _check_image_modality_only(self, test_data):
image_modality_only = False
if isinstance(test_data, str):
if os.path.isdir(test_data) or is_image_file(test_data):
image_modality_only = True
return image_modality_only
[docs] def predict(
self,
test_data: Union[str, pd.DataFrame],
test_data_image_column: Optional[str] = None,
**kwargs,
) -> Optional[pd.Series]:
"""
test_data: str
The test data to be inferenced.
Can be a pandas.DataFrame or a local path to a csv file.
When predicting multimodality with image modality:
You need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
When predicting with only images:
Can be a local path to a directory containing the images or a local path to a single image.
test_data_image_column: Optional(str)
If test_data involves image modality, you must specify the column name corresponding to image paths.
The path MUST be an abspath
kwargs:
Refer to `CloudPredictor.predict()`
"""
image_modality_only = self._check_image_modality_only(test_data)
if image_modality_only:
processed_args = self._prepare_image_predict_args(**kwargs)
return super().predict(
test_data,
test_data_image_column=None,
split_type=processed_args["split_type"],
content_type=processed_args["content_type"],
transformer_kwargs=processed_args["transformer_kwargs"],
**kwargs,
)
else:
return super().predict(
test_data,
test_data_image_column=test_data_image_column,
**kwargs,
)
[docs] def predict_proba(
self,
test_data: Union[str, pd.DataFrame],
test_data_image_column: Optional[str] = None,
**kwargs,
) -> Optional[Union[Tuple[pd.Series, Union[pd.DataFrame, pd.Series]], Union[pd.DataFrame, pd.Series]]]:
"""
test_data: str
The test data to be inferenced.
Can be a pandas.DataFrame or a local path to a csv file.
When predicting multimodality with image modality:
You need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
When predicting with only images:
Can be a local path to a directory containing the images or a local path to a single image.
test_data_image_column: Optional(str)
If test_data involves image modality, you must specify the column name corresponding to image paths.
The path MUST be an abspath
kwargs:
Refer to `CloudPredictor.predict()`
"""
image_modality_only = self._check_image_modality_only(test_data)
if image_modality_only:
processed_args = self._prepare_image_predict_args(**kwargs)
return super().predict_proba(
test_data,
test_data_image_column=None,
split_type=processed_args["split_type"],
content_type=processed_args["content_type"],
transformer_kwargs=processed_args["transformer_kwargs"],
**kwargs,
)
else:
return super().predict_proba(
test_data,
test_data_image_column=test_data_image_column,
**kwargs,
)