Source code for autogluon.features.generators.fillna
importloggingimportwarningsimportnumpyasnpfrompandasimportDataFramefromautogluon.common.features.typesimportR_OBJECTfrom.abstractimportAbstractFeatureGeneratorlogger=logging.getLogger(__name__)# TODO: Add fillna_special_map, fillna_combined_map to increase options# TODO: Add options to specify mean/median/mode for int/float# TODO: Add fillna_features for feature specific fill values
[docs]classFillNaFeatureGenerator(AbstractFeatureGenerator):""" Fills missing values in the data. Parameters ---------- fillna_map : dict, default {'object': ''} Map which dictates the fill values of NaNs. Keys are the raw types of the features as in self.feature_metadata_in.type_map_raw. If a feature's raw type is not present in fillna_map, its NaN values are filled to fillna_default. fillna_default, default np.nan The default fillna value if the feature's raw type is not present in fillna_map. Be careful about setting this to anything other than np.nan, as not all raw types can handle int, float, or string values. inplace : bool, default False If True, then the NaN values are filled inplace without copying the input data. This will alter the input data outside of the scope of this function. **kwargs : Refer to :class:`AbstractFeatureGenerator` documentation for details on valid key word arguments. """def__init__(self,fillna_map=None,fillna_default=np.nan,inplace=False,**kwargs):super().__init__(**kwargs)iffillna_mapisNone:fillna_map={R_OBJECT:""}self.fillna_map=fillna_mapself.fillna_default=fillna_defaultself._fillna_feature_map=Noneself.inplace=inplacedef_fit_transform(self,X:DataFrame,**kwargs)->(DataFrame,dict):features=self.feature_metadata_in.get_features()self._fillna_feature_map=dict()forfeatureinfeatures:feature_raw_type=self.feature_metadata_in.get_feature_type_raw(feature)feature_fillna_val=self.fillna_map.get(feature_raw_type,self.fillna_default)iffeature_fillna_valisnotnp.nan:self._fillna_feature_map[feature]=feature_fillna_valreturnself._transform(X),self.feature_metadata_in.type_group_map_specialdef_transform(self,X:DataFrame)->DataFrame:ifself._fillna_feature_map:withwarnings.catch_warnings():warnings.simplefilter(action="ignore",category=FutureWarning)# FIXME: v1.1 Remove this warning filter and resolve.# In Pandas 2.1, the `downcast` argument was deprecated,# but we need it to avoid incorrect type conversion.# Pandas authors may have not considered our edge-case.# We specifically want to have an object dtype not be converted to a numeric dtype,# even if all of the values can be converted to numeric.# However, without specifying `downcast=False`, it will be converted to numeric, which we don't want.# Note: Non-trivial to keep current functionality without specifying `downcast=False`...# Doing so may end up slowing down the code noticeably.ifself.inplace:X.fillna(self._fillna_feature_map,inplace=True,downcast=False)else:X=X.fillna(self._fillna_feature_map,inplace=False,downcast=False)returnX@staticmethoddefget_default_infer_features_in_args()->dict:returndict()def_remove_features_in(self,features):super()._remove_features_in(features)iffeatures:forfeatureinfeatures:self._fillna_feature_map.pop(feature,None)def_more_tags(self):return{"feature_interactions":False}