Source code for autogluon.features.generators.drop_unique
importloggingfrompandasimportDataFramefromautogluon.common.features.feature_metadataimportFeatureMetadatafromautogluon.common.features.typesimportR_CATEGORY,R_OBJECT,S_IMAGE_BYTEARRAY,S_IMAGE_PATH,S_TEXTfrom.abstractimportAbstractFeatureGeneratorlogger=logging.getLogger(__name__)# TODO: Not necessary to exist after fitting, can just update outer context feature_out/feature_in and then delete this
[docs]classDropUniqueFeatureGenerator(AbstractFeatureGenerator):"""Drops features which only have 1 unique value or which have nearly no repeated values (based on max_unique_ratio) and are of category or object type."""def__init__(self,max_unique_ratio=0.99,**kwargs):super().__init__(**kwargs)self.max_unique_ratio=max_unique_ratiodef_fit_transform(self,X:DataFrame,**kwargs)->(DataFrame,dict):features_to_drop=self._drop_unique_features(X,self.feature_metadata_in,max_unique_ratio=self.max_unique_ratio)self._remove_features_in(features_to_drop)X_out=X[self.features_in]returnX_out,self.feature_metadata_in.type_group_map_specialdef_transform(self,X:DataFrame)->DataFrame:returnX@staticmethoddefget_default_infer_features_in_args()->dict:returndict()# TODO: Consider NaN?@staticmethoddef_drop_unique_features(X:DataFrame,feature_metadata:FeatureMetadata,max_unique_ratio)->list:features_to_drop=[]X_len=len(X)max_unique_value_count=X_len*max_unique_ratioforcolumninX:unique_value_count=len(X[column].unique())# Drop features that are always the sameifunique_value_count==1:features_to_drop.append(column)eliffeature_metadata.get_feature_type_raw(column)in[R_CATEGORY,R_OBJECT]and(unique_value_count>max_unique_value_count):special_types=feature_metadata.get_feature_types_special(column)ifS_TEXTinspecial_types:# We should not drop a text columncontinueelifS_IMAGE_PATHinspecial_types:# We should not drop an image path columncontinueelifS_IMAGE_BYTEARRAYinspecial_types:# We should not drop an image bytearray columncontinueelse:features_to_drop.append(column)returnfeatures_to_dropdef_more_tags(self):return{"feature_interactions":False}