Source code for autogluon.eda.visualization.model

from typing import Any, Dict, Optional

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from yellowbrick.contrib.wrapper import REGRESSOR, ContribEstimator
from yellowbrick.regressor import residuals_plot

from autogluon.core.constants import REGRESSION

from ..state import AnalysisState
from .base import AbstractVisualization
from .jupyter import JupyterMixin

__all__ = ["ConfusionMatrix", "FeatureImportance", "RegressionEvaluation", "ModelLeaderboard"]


[docs]class ConfusionMatrix(AbstractVisualization, JupyterMixin):
    """
    Render confusion matrix for binary/multiclass classificator.

    This visualization depends on :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator` analysis.

    Parameters
    ----------
    headers: bool, default = False
        if `True` then render headers
    namespace: str, default = None
        namespace to use; can be nested like `ns_a.ns_b.ns_c`
    fig_args: Optional[Dict[str, Any]] = None,
        kwargs to pass into chart figure

    Examples
    --------
    >>> import autogluon.eda.analysis as eda
    >>> import autogluon.eda.visualization as viz
    >>> import autogluon.eda.auto as auto
    >>>
    >>> df_train = ...
    >>> df_test = ...
    >>> predictor = ...
    >>>
    >>> auto.analyze(model=predictor, val_data=df_test, anlz_facets=[
    >>>     eda.model.AutoGluonModelEvaluator(),
    >>> ], viz_facets=[
    >>>     viz.model.ConfusionMatrix(fig_args=dict(figsize=(3,3)), annot_kws={"size": 12}),
    >>> ])

    See Also
    --------
    :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator`
    """

    def __init__(
        self,
        fig_args: Optional[Dict[str, Any]] = None,
        headers: bool = False,
        namespace: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(namespace, **kwargs)
        self.headers = headers

        if fig_args is None:
            fig_args = {}
        self.fig_args = fig_args

    def can_handle(self, state: AnalysisState) -> bool:
        return "model_evaluation" in state and "confusion_matrix" in state.model_evaluation

    def _render(self, state: AnalysisState) -> None:
        self.render_header_if_needed(state, "Confusion Matrix")
        labels = state.model_evaluation.labels
        cm = pd.DataFrame(state.model_evaluation.confusion_matrix, columns=labels, index=labels)
        cm.index.name = "Actual"
        cm.columns.name = "Predicted"
        normalized = state.model_evaluation.confusion_matrix_normalized
        fmt = ",.2%" if normalized else "d"

        cells_num = len(cm)
        fig_args = self.fig_args.copy()
        if "figsize" not in fig_args:
            fig_args["figsize"] = (cells_num, cells_num)

        fig, ax = plt.subplots(**fig_args)
        sns.heatmap(
            cm,
            ax=ax,
            cmap="Blues",
            annot=True,
            linewidths=0.5,
            linecolor="lightgrey",
            fmt=fmt,
            cbar=False,
            **self._kwargs,
        )
        plt.show(fig)


class _YellowbrickAutoGluonWrapper(ContribEstimator):
    _estimator_type = REGRESSOR

    def score(self, y_pred, y_true, **kwargs):
        # note: this is not conventional use of API: we pass y_pred since we already have predictions done
        return self.estimator.evaluate_predictions(y_pred, y_true)["r2"]

    def predict(self, y_pred, **kwargs):
        # note: this is not conventional use of API: we pass y_pred since we already have predictions done
        return y_pred


[docs]class RegressionEvaluation(AbstractVisualization, JupyterMixin):
    """
    This plot shows residuals on the vertical axis vs prediction on horizontal axis.

    This visualization depends on :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator` analysis.

    Parameters
    ----------
    residuals_plot_mode: Optional[str], default = 'qoq'
        Additional plot to render to the right of the main plot. The supported values:
        - `qoq` (default) - Q-Q plot, which is a common way to check that residuals are normally distributed. If the residuals are normally distributed,
        then their quantiles when plotted against quantiles of normal distribution should form a straight line.
        - `hist` - display histogram that our error is normally distributed around zero, which also generally indicates a well fitted model
        - any other value - don't render additional details
    headers: bool, default = False
        if `True` then render headers
    namespace: str, default = None
        namespace to use; can be nested like `ns_a.ns_b.ns_c`
    fig_args: Optional[Dict[str, Any]] = None,
        kwargs to pass into chart figure

    Examples
    --------
    >>> import autogluon.eda.analysis as eda
    >>> import autogluon.eda.visualization as viz
    >>> import autogluon.eda.auto as auto
    >>>
    >>> df_train = ...
    >>> df_test = ...
    >>> predictor = ...
    >>>
    >>> auto.analyze(model=predictor, val_data=df_test, anlz_facets=[
    >>>     eda.model.AutoGluonModelEvaluator(),
    >>> ], viz_facets=[
    >>>     viz.model.RegressionEvaluation(fig_args=dict(figsize=(6,6)), marker='o', scatter_kws={'s':5}),
    >>> ])

    See Also
    --------
    :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator`
    """

    def __init__(
        self,
        residuals_plot_mode: Optional[str] = "qoq",
        fig_args: Optional[Dict[str, Any]] = None,
        headers: bool = False,
        namespace: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(namespace, **kwargs)
        self.headers = headers
        self.residuals_analysis_mode = residuals_plot_mode

        if fig_args is None:
            fig_args = {}
        fig_args = {**{"figsize": (12, 6)}, **fig_args}
        self.fig_args = fig_args

    def can_handle(self, state: AnalysisState) -> bool:
        return "model_evaluation" in state and state.model_evaluation.problem_type == REGRESSION

    def _get_plot_mode(self):
        res_plot_kwargs = {
            "hist": dict(hist=True, qqplot=False),
            "qoq": dict(hist=False, qqplot=True),
        }.get(
            self.residuals_analysis_mode, dict(hist=False, qqplot=False)  # type: ignore
        )
        return res_plot_kwargs

    def _render(self, state: AnalysisState) -> None:
        self.render_header_if_needed(state, "Prediction vs Target")
        res_plot_kwargs = self._get_plot_mode()
        fig, ax = plt.subplots(**self.fig_args)
        y_pred_train, y_true_train, y_pred_test, y_true_test = RegressionEvaluation._repack_parameters(
            state.model_evaluation
        )
        residuals_plot(
            _YellowbrickAutoGluonWrapper(state.model),
            y_pred_train,
            y_true_train,
            y_pred_test,
            y_true_test,
            show=False,
            ax=ax,
            **res_plot_kwargs,
        )
        plt.show(fig)

    @staticmethod
    def _repack_parameters(ev):
        y_pred_train = ev.y_pred_train if "y_pred_train" in ev else ev.y_pred_val
        y_true_train = ev.y_true_train if "y_pred_train" in ev else ev.y_true_val
        y_pred_test = ev.y_pred_test if "y_true_test" in ev else (ev.y_pred_val if "y_pred_train" in ev else None)
        y_true_test = ev.y_true_test if "y_true_test" in ev else (ev.y_true_val if "y_pred_train" in ev else None)
        return y_pred_train, y_true_train, y_pred_test, y_true_test


[docs]class FeatureImportance(AbstractVisualization, JupyterMixin):
    """
    Render feature importance for the model.

    This visualization depends on :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator` analysis.

    Parameters
    ----------
    show_barplots: bool, default = False
        render features barplots if True
    headers: bool, default = False
        if `True` then render headers
    namespace: str, default = None
        namespace to use; can be nested like `ns_a.ns_b.ns_c`
    fig_args: Optional[Dict[str, Any]] = None,
        kwargs to pass into chart figure

    Examples
    --------
    >>> import autogluon.eda.analysis as eda
    >>> import autogluon.eda.visualization as viz
    >>> import autogluon.eda.auto as auto
    >>>
    >>> df_train = ...
    >>> df_test = ...
    >>> predictor = ...
    >>>
    >>> auto.analyze(model=predictor, val_data=df_test, anlz_facets=[
    >>>     eda.model.AutoGluonModelEvaluator(),
    >>> ], viz_facets=[
    >>>     viz.model.FeatureImportance(show_barplots=True)
    >>> ])

    See Also
    --------
    :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator`
    """

    def __init__(
        self,
        show_barplots: bool = False,
        fig_args: Optional[Dict[str, Any]] = None,
        headers: bool = False,
        namespace: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(namespace, **kwargs)
        self.headers = headers

        if fig_args is None:
            fig_args = {}
        self.fig_args = fig_args

        self.show_barplots = show_barplots

    def can_handle(self, state: AnalysisState) -> bool:
        return "model_evaluation" in state and "importance" in state.model_evaluation

    def _render(self, state: AnalysisState) -> None:
        self.render_header_if_needed(state, "Feature Importance")
        importance = state.model_evaluation.importance
        with pd.option_context("display.max_rows", 100 if len(importance) <= 100 else 20):
            self.display_obj(importance)
        if self.show_barplots:
            fig_args = self.fig_args.copy()
            if "figsize" not in fig_args:
                fig_args["figsize"] = (12, len(importance) / 4)

            fig, ax = plt.subplots(**fig_args)
            sns.barplot(ax=ax, data=importance.reset_index(), y="index", x="importance", **self._kwargs)
            plt.show(fig)


[docs]class ModelLeaderboard(AbstractVisualization, JupyterMixin):
    """
    Render model leaderboard for trained model ensemble.

    Parameters
    ----------
    headers: bool, default = False
        if `True` then render headers
    namespace: str, default = None
        namespace to use; can be nested like `ns_a.ns_b.ns_c`

    Examples
    --------
    >>> import autogluon.eda.analysis as eda
    >>> import autogluon.eda.visualization as viz
    >>> import autogluon.eda.auto as auto
    >>>
    >>> df_train = ...
    >>> df_test = ...
    >>> predictor = ...
    >>>
    >>> auto.analyze(model=predictor, val_data=df_test, anlz_facets=[
    >>>     eda.model.AutoGluonModelEvaluator(),
    >>> ], viz_facets=[
    >>>     viz.model.ModelLeaderboard(),
    >>> ])

    See Also
    --------
    :py:class:`~autogluon.eda.analysis.model.AutoGluonModelEvaluator`
    """

    def __init__(self, namespace: Optional[str] = None, headers: bool = False, **kwargs) -> None:
        super().__init__(namespace, **kwargs)
        self.headers = headers

    def can_handle(self, state: AnalysisState) -> bool:
        return "model_evaluation" in state and "leaderboard" in state.model_evaluation

    def _render(self, state: AnalysisState) -> None:
        self.render_header_if_needed(state, "Model Leaderboard")
        df = state.model_evaluation.leaderboard
        with pd.option_context("display.max_rows", 100 if len(df) <= 100 else 20):
            self.display_obj(df)