Source code for autogluon.features.generators.binned
import copy
import logging
import pandas as pd
from pandas import DataFrame
from autogluon.common.features.types import R_FLOAT, R_INT, S_BINNED
from .. import binning
from ..utils import get_smallest_valid_dtype_int
from .abstract import AbstractFeatureGenerator
logger = logging.getLogger(__name__)
# TODO: Add more parameters (possibly pass in binning function as an argument for full control)
[docs]
class BinnedFeatureGenerator(AbstractFeatureGenerator):
"""BinnedFeatureGenerator bins incoming int and float features to num_bins unique int values, maintaining relative rank order."""
def __init__(self, num_bins=10, **kwargs):
super().__init__(**kwargs)
self.num_bins = num_bins
def _fit_transform(self, X: DataFrame, **kwargs) -> (DataFrame, dict):
self._bin_map = self._get_bin_map(X=X)
self._astype_map = {feature: get_smallest_valid_dtype_int(min_val=0, max_val=len(bin_index)) for feature, bin_index in self._bin_map.items()}
X_out = self._transform(X)
type_group_map_special = copy.deepcopy(self.feature_metadata_in.type_group_map_special)
type_group_map_special[S_BINNED] += list(X_out.columns)
return X_out, type_group_map_special
def _transform(self, X: DataFrame) -> DataFrame:
return self._transform_bin(X)
@staticmethod
def get_default_infer_features_in_args() -> dict:
return dict(valid_raw_types=[R_INT, R_FLOAT])
def _get_bin_map(self, X: DataFrame) -> dict:
return binning.generate_bins(X, list(X.columns), ideal_bins=self.num_bins)
def _transform_bin(self, X: DataFrame):
X_out = dict()
for column in self._bin_map:
X_out[column] = binning.bin_column(series=X[column], bins=self._bin_map[column], dtype=self._astype_map[column])
X_out = pd.DataFrame(X_out, index=X.index)
return X_out
def _remove_features_in(self, features: list):
super()._remove_features_in(features)
if self._bin_map:
for feature in features:
if feature in self._bin_map:
self._bin_map.pop(feature)
if self._astype_map:
for feature in features:
if feature in self._astype_map:
self._astype_map.pop(feature)
def _more_tags(self):
return {"feature_interactions": False}