模型融合
方法1: 模型平均
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin): def __init__(self, models): self.models = models def fit(self, X, y): self.clone_models = [clone(x) for x in self.models] for model in self.clone_models: model.fit(X, y) return self def predict(self, X): predictions = np.column_stack([model.predict(X) for model in self.clone_models]) return np.mean(predictions, axis=1)测试案例:
averaged_models = AveragingModels(models = [ENet, GBoost, KRR, lasso]) score = nmse_cv(averaged_models) print('Averaged base models score: {:.4f} ({:.4f}) \n'.format(score.mean(), score.std()))方法2: 模型叠加
class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin): def __init__(self, base_models, meta_model, n_folds=5): self.base_models = base_models self.meta_model = meta_model self.n_folds = n_folds # 将原来的模型clone出来,并且实现fit功能 def fit(self, X, y): self.clone_base_models = [list() for x in self.base_models] self.clone_meta_model = clone(self.meta_model) kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156) out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models))) # 使用K-fold的方法来进行交叉验证,将每次验证的结果作为新的特征来进行处理 for i, model in enumerate(self.base_models): for train_index, test_index in kfold.split(X, y): instance = clone(model) self.clone_base_models[i].append(instance) instance.fit(X[train_index], y[train_index]) y_pred = instance.predict(X[test_index]) out_of_fold_predictions[test_index, i] = y_pred # 将交叉验证预测出的结果(标签)和训练集中的标签值用元模型进行训练 self.clone_meta_model.fit(out_of_fold_predictions, y) return self def predict(self, X): # 得到各模型预测结果平均值的二维数组 meta_features = np.column_stack([ np.column_stack([model.predict(X) for model in base_models]).mean(axis=1) for base_models in self.clone_base_models ]) return self.clone_meta_model.predict(meta_features)测试案例:
stacked_averaged_models = StackingAveragedModels(base_models=(ENet, GBoost, KRR), meta_model=lasso) score = nmse_cv(stacked_averaged_models) print('Stacking Averaged models score: {:.4f} ({:.4f})'.format(score.mean(), score.std())) # 结果模型叠加分数比模型平均分数更低,模型效果更好测试项目链接:
https://www.kesci.com/home/project/5f7d31a6fab2e800300a4b3c