"""Wrapper of the MultiModalPredictor."""
import logging
import os
from typing import Dict, Optional
import pandas as pd
from autogluon.common.features.types import (
R_CATEGORY,
R_FLOAT,
R_INT,
R_OBJECT,
S_IMAGE_PATH,
S_TEXT,
S_TEXT_AS_CATEGORY,
S_TEXT_NGRAM,
S_TEXT_SPECIAL,
)
from autogluon.common.utils.resource_utils import ResourceManager
from autogluon.common.utils.try_import import try_import_autogluon_multimodal
from autogluon.core.constants import BINARY, MULTICLASS, REGRESSION
from autogluon.core.models import AbstractModel
logger = logging.getLogger(__name__)
[docs]
class MultiModalPredictorModel(AbstractModel):
_NN_MODEL_NAME = "automm_model"
def __init__(self, **kwargs):
"""Wrapper of autogluon.multimodal.MultiModalPredictor.
The features can be a mix of
- image column
- text column
- categorical column
- numerical column
The labels can be categorical or numerical.
Parameters
----------
path
The directory to store the modeling outputs.
name
Name of subdirectory inside path where model will be saved.
problem_type
Type of problem that this model will handle.
Valid options: ['binary', 'multiclass', 'regression'].
eval_metric
The evaluation metric.
num_classes
The number of classes.
stopping_metric
The stopping metric.
model
The internal model object.
hyperparameters
The hyperparameters of the model
features
Names of the features.
feature_metadata
The feature metadata.
"""
super().__init__(**kwargs)
self._label_column_name = None
self._load_model = None # Whether to load inner model when loading.
def _get_default_auxiliary_params(self) -> dict:
default_auxiliary_params = super()._get_default_auxiliary_params()
extra_auxiliary_params = dict(
valid_raw_types=[R_INT, R_FLOAT, R_CATEGORY, R_OBJECT],
ignored_type_group_special=[S_TEXT_NGRAM, S_TEXT_AS_CATEGORY, S_TEXT_SPECIAL],
)
default_auxiliary_params.update(extra_auxiliary_params)
return default_auxiliary_params
@classmethod
def _get_default_ag_args(cls) -> dict:
default_ag_args = super()._get_default_ag_args()
extra_ag_args = {
"valid_stacker": False,
"problem_types": [BINARY, MULTICLASS, REGRESSION],
}
default_ag_args.update(extra_ag_args)
return default_ag_args
# FIXME: Enable parallel bagging once AutoMM supports being run within Ray without hanging
@classmethod
def _get_default_ag_args_ensemble(cls, **kwargs) -> dict:
default_ag_args_ensemble = super()._get_default_ag_args_ensemble(**kwargs)
extra_ag_args_ensemble = {"fold_fitting_strategy": "sequential_local"}
default_ag_args_ensemble.update(extra_ag_args_ensemble)
return default_ag_args_ensemble
def _set_default_params(self):
super()._set_default_params()
try_import_autogluon_multimodal()
def preprocess_fit(self, X, y, X_val=None, y_val=None, **kwargs):
"""
Preprocessing training and validation data.
This method is a placeholder for inheriting models to override with more complex functionality if needed.
"""
X = self.preprocess(X=X, **kwargs)
if X_val is not None:
X_val = self.preprocess(X=X_val, **kwargs)
return X, y, X_val, y_val
def _fit(
self,
X: pd.DataFrame,
y: pd.Series,
X_val: Optional[pd.DataFrame] = None,
y_val: Optional[pd.Series] = None,
time_limit: Optional[int] = None,
sample_weight=None,
**kwargs,
):
"""The internal fit function
Parameters
----------
X
Features of the training dataset
y
Labels of the training dataset
X_val
Features of the validation dataset
y_val
Labels of the validation dataset
time_limit
The time limits for the fit function
sample_weight
The weights of the samples
kwargs
Other keyword arguments
"""
try_import_autogluon_multimodal()
from autogluon.multimodal import MultiModalPredictor
# Decide name of the label column
if "label" in X.columns:
label_col_id = 0
while True:
self._label_column_name = "label{}".format(label_col_id)
if self._label_column_name not in X.columns:
break
label_col_id += 1
else:
self._label_column_name = "label"
X, y, X_val, y_val = self.preprocess_fit(X=X, y=y, X_val=X_val, y_val=y_val)
params = self._get_model_params()
max_features = params.pop("_max_features", None) # FIXME: `_max_features` is a hack. Instead use ag_args_fit and make generic
num_features = len(X.columns)
if max_features is not None and num_features > max_features:
raise AssertionError(
f"Feature count ({num_features}) is greater than max allowed features ({max_features}) for {self.name}. Skipping model... "
f"To increase the max allowed features, specify the value via the `_max_features` parameter "
f"(Fully ignore by specifying `None`. "
f"`_max_features` is experimental and will likely change API without warning in future releases."
)
# Get arguments from kwargs
verbosity = kwargs.get("verbosity", 2)
if verbosity <= 2:
enable_progress_bar = False
else:
enable_progress_bar = True
num_gpus = kwargs.get("num_gpus", None)
if sample_weight is not None: # TODO: support
logger.log(15, "sample_weight not yet supported for MultiModalPredictorModel, " "this model will ignore them in training.")
# Need to deep copy to avoid altering outer context
X = X.copy()
X.insert(len(X.columns), self._label_column_name, y)
if X_val is not None:
X_val = X_val.copy()
X_val.insert(len(X_val.columns), self._label_column_name, y_val)
column_types = self._construct_column_types()
verbosity_text = max(0, verbosity - 1)
root_logger = logging.getLogger("autogluon")
root_log_level = root_logger.level
# in self.save(), the model is saved to automm_nn_path
automm_nn_path = os.path.join(self.path, self._NN_MODEL_NAME)
self.model = MultiModalPredictor(
label=self._label_column_name,
problem_type=self.problem_type,
path=automm_nn_path,
eval_metric=self.eval_metric,
verbosity=verbosity_text,
enable_progress_bar=enable_progress_bar,
)
if num_gpus is not None:
params["env.num_gpus"] = num_gpus
presets = params.pop("presets", None)
seed = params.pop("seed", 0)
self.model.fit(
train_data=X,
tuning_data=X_val,
time_limit=time_limit,
presets=presets,
hyperparameters=params,
column_types=column_types,
seed=seed,
)
self.model.set_verbosity(verbosity)
root_logger.setLevel(root_log_level) # Reset log level
def _predict_proba(self, X, **kwargs):
X = self.preprocess(X, **kwargs)
self.model._enable_progress_bar = False
if self.problem_type == REGRESSION:
return self.model.predict(X, as_pandas=False)
y_pred_proba = self.model.predict_proba(X, as_pandas=False)
return self._convert_proba_to_unified_form(y_pred_proba)
def save(self, path: str = None, verbose=True) -> str:
self._load_model = self.model is not None
__model = self.model
self.model = None
# save this AbstractModel object without NN weights
path = super().save(path=path, verbose=verbose)
self.model = __model
if self._load_model:
automm_nn_path = os.path.join(path, self._NN_MODEL_NAME)
self.model.save(automm_nn_path)
logger.log(15, f"\tSaved AutoMM model weights and model hyperparameters to '{automm_nn_path}'.")
self._load_model = None
return path
@classmethod
def load(cls, path: str, reset_paths=True, verbose=True):
model = super().load(path=path, reset_paths=reset_paths, verbose=verbose)
if model._load_model:
try_import_autogluon_multimodal()
from autogluon.multimodal import MultiModalPredictor
model.model = MultiModalPredictor.load(os.path.join(path, cls._NN_MODEL_NAME))
model._load_model = None
return model
def _get_memory_size(self) -> int:
"""Return the memory size by calculating the total number of parameters.
Returns
-------
memory_size
The total memory size in bytes.
"""
total_size = self.model.model_size * 1e6 # convert from megabytes to bytes
return total_size
def _get_default_resources(self):
num_cpus = ResourceManager.get_cpu_count()
num_gpus = min(ResourceManager.get_gpu_count_torch(), 1) # Use single gpu training by default. Consider to revise it later.
return num_cpus, num_gpus
def get_minimum_resources(self, is_gpu_available=False) -> Dict[str, int]:
return {
"num_cpus": 1,
"num_gpus": 1,
}
def _construct_column_types(self) -> dict:
# Construct feature types input to MultimodalPredictor
features_image_path = set(self._feature_metadata.get_features(required_special_types=[S_IMAGE_PATH]))
features_text = set(self._feature_metadata.get_features(required_special_types=[S_TEXT]))
features_categorical = set(self._feature_metadata.get_features(valid_raw_types=[R_CATEGORY]))
features_numerical = set(self._feature_metadata.get_features(valid_raw_types=[R_INT, R_FLOAT]))
key_map = {
"image_path": features_image_path,
"text": features_text,
"categorical": features_categorical,
"numerical": features_numerical,
}
features = self._feature_metadata.get_features()
column_types = {}
for feature in features:
for key in ["image_path", "text", "categorical", "numerical"]:
if feature in key_map[key]:
column_types[feature] = key
break
return column_types
def _more_tags(self):
# `can_refit_full=False` because MultiModalPredictor does not communicate how to train until the best epoch in refit_full.
return {"can_refit_full": False}
@classmethod
def _class_tags(cls):
return {"handles_text": True}