Source code for autogluon.features.generators.binned
importcopyimportloggingimportpandasaspdfrompandasimportDataFramefromautogluon.common.features.typesimportR_FLOAT,R_INT,S_BINNEDfrom..importbinningfrom..utilsimportget_smallest_valid_dtype_intfrom.abstractimportAbstractFeatureGeneratorlogger=logging.getLogger(__name__)# TODO: Add more parameters (possibly pass in binning function as an argument for full control)
[docs]classBinnedFeatureGenerator(AbstractFeatureGenerator):"""BinnedFeatureGenerator bins incoming int and float features to num_bins unique int values, maintaining relative rank order."""def__init__(self,num_bins=10,**kwargs):super().__init__(**kwargs)self.num_bins=num_binsdef_fit_transform(self,X:DataFrame,**kwargs)->(DataFrame,dict):self._bin_map=self._get_bin_map(X=X)self._astype_map={feature:get_smallest_valid_dtype_int(min_val=0,max_val=len(bin_index))forfeature,bin_indexinself._bin_map.items()}X_out=self._transform(X)type_group_map_special=copy.deepcopy(self.feature_metadata_in.type_group_map_special)type_group_map_special[S_BINNED]+=list(X_out.columns)returnX_out,type_group_map_specialdef_transform(self,X:DataFrame)->DataFrame:returnself._transform_bin(X)@staticmethoddefget_default_infer_features_in_args()->dict:returndict(valid_raw_types=[R_INT,R_FLOAT])def_get_bin_map(self,X:DataFrame)->dict:returnbinning.generate_bins(X,list(X.columns),ideal_bins=self.num_bins)def_transform_bin(self,X:DataFrame):X_out=dict()forcolumninself._bin_map:X_out[column]=binning.bin_column(series=X[column],bins=self._bin_map[column],dtype=self._astype_map[column])X_out=pd.DataFrame(X_out,index=X.index)returnX_outdef_remove_features_in(self,features:list):super()._remove_features_in(features)ifself._bin_map:forfeatureinfeatures:iffeatureinself._bin_map:self._bin_map.pop(feature)ifself._astype_map:forfeatureinfeatures:iffeatureinself._astype_map:self._astype_map.pop(feature)def_more_tags(self):return{"feature_interactions":False}