Source code for autogluon.cloud.predictor.tabular_cloud_predictor
import copy
import logging
import os
from typing import Optional, Union
import pandas as pd
import yaml
from autogluon.common.loaders import load_pd
from ..utils.utils import convert_image_path_to_encoded_bytes_in_dataframe
from .cloud_predictor import CloudPredictor
logger = logging.getLogger(__name__)
[docs]class TabularCloudPredictor(CloudPredictor):
predictor_file_name = "TabularCloudPredictor.pkl"
@property
def predictor_type(self):
"""
Type of the underneath AutoGluon Predictor
"""
return "tabular"
def _get_local_predictor_cls(self):
from autogluon.tabular import TabularPredictor
predictor_cls = TabularPredictor
return predictor_cls
def _construct_config(self, predictor_init_args, predictor_fit_args, leaderboard, **kwargs):
assert self.predictor_type is not None
if "feature_metadata" in predictor_fit_args:
predictor_fit_args = copy.deepcopy(predictor_fit_args)
feature_metadata = predictor_fit_args.pop("feature_metadata")
feature_metadata = dict(
type_map_raw=feature_metadata.type_map_raw,
type_map_special=feature_metadata.get_type_map_special(),
)
assert (
"feature_metadata" not in kwargs
), "feature_metadata in both `predictor_fit_args` and kwargs. This should not happen."
kwargs["feature_metadata"] = feature_metadata
config = dict(
predictor_type=self.predictor_type,
predictor_init_args=predictor_init_args,
predictor_fit_args=predictor_fit_args,
leaderboard=leaderboard,
**kwargs,
)
path = os.path.join(self.local_output_path, "utils", "config.yaml")
with open(path, "w") as f:
yaml.dump(config, f)
return path
def _load_predict_real_time_test_data(self, test_data, test_data_image_column):
if isinstance(test_data, str):
test_data = load_pd.load(test_data)
if isinstance(test_data, pd.DataFrame):
if test_data_image_column is not None:
test_data = convert_image_path_to_encoded_bytes_in_dataframe(test_data, test_data_image_column)
return test_data
[docs] def predict_real_time(
self,
test_data: Union[str, pd.DataFrame],
test_data_image_column: Optional[str] = None,
accept: str = "application/x-parquet",
):
"""
Predict with the deployed SageMaker endpoint. A deployed SageMaker endpoint is required.
This is intended to provide a low latency inference.
If you want to inference on a large dataset, use `predict()` instead.
Parameters
----------
test_data: Union(str, pandas.DataFrame)
The test data to be inferenced.
Can be a pandas.DataFrame or a local path to a csv file.
When predicting multimodality with image modality:
You need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
test_data_image_column: default = None
If test_data involves image modality, you must specify the column name corresponding to image paths.
The path MUST be an abspath
accept: str, default = application/x-parquet
Type of accept output content.
Valid options are application/x-parquet, text/csv, application/json
Returns
-------
Pandas.Series
Predict results in Series
"""
self._validate_predict_real_time_args(accept)
test_data = self._load_predict_real_time_test_data(
test_data=test_data, test_data_image_column=test_data_image_column
)
pred, _ = self._predict_real_time(test_data=test_data, accept=accept)
return pred
[docs] def predict_proba_real_time(
self,
test_data: Union[str, pd.DataFrame],
test_data_image_column: Optional[str] = None,
accept: str = "application/x-parquet",
):
"""
Predict with the deployed SageMaker endpoint. A deployed SageMaker endpoint is required.
This is intended to provide a low latency inference.
If you want to inference on a large dataset, use `predict()` instead.
Parameters
----------
test_data: Union(str, pandas.DataFrame)
The test data to be inferenced.
Can be a pandas.DataFrame or a local path to a csv file.
When predicting multimodality with image modality:
You need to specify `test_data_image_column`, and make sure the image column contains relative path to the image.
test_data_image_column: default = None
If test_data involves image modality, you must specify the column name corresponding to image paths.
The path MUST be an abspath
accept: str, default = application/x-parquet
Type of accept output content.
Valid options are application/x-parquet, text/csv, application/json
Returns
-------
Pandas.DataFrame or Pandas.Series
Will return a Pandas.Series when it's a regression problem. Will return a Pandas.DataFrame otherwise
"""
self._validate_predict_real_time_args(accept)
test_data = self._load_predict_real_time_test_data(
test_data=test_data, test_data_image_column=test_data_image_column
)
pred, proba = self._predict_real_time(test_data=test_data, accept=accept)
if proba is None:
return pred
return proba