Source code for autogluon.core.models.ensemble.weighted_ensemble_model
from__future__importannotationsimportloggingfromcollectionsimportdefaultdictimportnumpyasnpimportpandasaspdfrom..greedy_ensemble.greedy_weighted_ensemble_modelimportGreedyWeightedEnsembleModelfrom.stacker_ensemble_modelimportStackerEnsembleModellogger=logging.getLogger(__name__)# TODO: v0.1 see if this can be removed and logic moved to greedy weighted ensemble model -> Use StackerEnsembleModel as stacker instead# TODO: Optimize predict speed when fit on kfold, can simply sum weights
[docs]classWeightedEnsembleModel(StackerEnsembleModel):""" Weighted ensemble meta-model that implements Ensemble Selection: https://www.cs.cornell.edu/~alexn/papers/shotgun.icml04.revised.rev2.pdf A :class:`autogluon.core.models.GreedyWeightedEnsembleModel` must be specified as the `model_base` to properly function. """def__init__(self,**kwargs):super().__init__(**kwargs)self.low_memory=Falsedef_fit(self,X,y,**kwargs):super()._fit(X,y,**kwargs)stack_columns=[]formodelinself.models:model=self.load_child(model,verbose=False)stack_columns=stack_columns+[stack_columnforstack_columninmodel.base_model_namesifstack_columnnotinstack_columns]self.stack_column_prefix_lst=[stack_columnforstack_columninself.stack_column_prefix_lstifstack_columninstack_columns]self.stack_columns,self.num_pred_cols_per_model=self.set_stack_columns(stack_column_prefix_lst=self.stack_column_prefix_lst)min_stack_column_prefix_to_model_map={k:vfork,vinself.stack_column_prefix_to_model_map.items()ifkinself.stack_column_prefix_lst}self.base_model_names=[base_model_nameforbase_model_nameinself.base_model_namesifbase_model_nameinmin_stack_column_prefix_to_model_map.values()]self.stack_column_prefix_to_model_map=min_stack_column_prefix_to_model_mapreturnselfdef_get_model_weights(self)->dict:weights_dict=defaultdict(int)num_models=len(self.models)formodelinself.models:model:GreedyWeightedEnsembleModel=self.load_child(model,verbose=False)model_weight_dict=model._get_model_weights()forkeyinmodel_weight_dict.keys():weights_dict[key]+=model_weight_dict[key]forkeyinweights_dict:weights_dict[key]=weights_dict[key]/num_modelsweights_dict=dict(weights_dict)returnweights_dictdefcompute_feature_importance(self,X,y,features=None,is_oof=True,**kwargs)->pd.DataFrame:logger.warning("Warning: non-raw feature importance calculation is not valid for weighted ensemble since it does not have features, returning ensemble weights instead...")ifis_oof:fi=pd.Series(self._get_model_weights()).sort_values(ascending=False)else:logger.warning("Warning: Feature importance calculation is not yet implemented for WeightedEnsembleModel on unseen data, returning generic feature importance...")fi=pd.Series(self._get_model_weights()).sort_values(ascending=False)fi_df=fi.to_frame(name="importance")fi_df["stddev"]=np.nanfi_df["p_score"]=np.nanfi_df["n"]=np.nan# TODO: Rewrite preprocess() in greedy_weighted_ensemble_model to enable# fi_df = super().compute_feature_importance(X=X, y=y, features_to_use=features_to_use, preprocess=preprocess, is_oof=is_oof, **kwargs)returnfi_dfdef_set_default_params(self):default_params={"use_orig_features":False}forparam,valindefault_params.items():self._set_default_param_value(param,val)super()._set_default_params()def_more_tags(self):""" This model can generate out-of-fold (oof) predictions by predicting directly on the training data. This will make the result slightly overfit, but the weighted ensemble has limited degrees of freedom intentionally, making the overfitting negligible. """tags={"can_get_oof_from_train":True,"print_weights":True,}returntags