Python：拉普拉斯正则逻辑回归

科技2022-07-13 133

''' auther:Deniu He date:2020-09-30 ''' from copy import deepcopy import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.linear_model import LogisticRegression from scipy.spatial.distance import pdist,squareform import cvxpy as cvx class LRLR(): def __init__(self, X_labeled=None, y_labeled=None ,adjMatrix=None, X_pool=None): self.X = None self.y = None self.d = None ###------------------------------- self.X_pool = None self.adjMatrix = None self.N = None self.lapMatrix = None ###------------------------------ self.lambd1 = None self.lambd2 = None self.w = None self.P = None ###------------------------------ self.loglikehood = None self.L2regu = None self.Lapregu = None self.obj = None self.prob = None def fit(self,X_labeled,y_labeled,adjMatrix,X_pool): self.X = X_labeled self.y = np.vstack(y_labeled) self.d = X_labeled.shape[1] ###------------------------------- self.X_pool = X_pool self.adjMatrix = adjMatrix self.N = adjMatrix.shape[0] self.lapMatrix = np.diag(np.sum(adjMatrix,0)) - adjMatrix ###------------------------------ self.lambd1 = 0.1 self.lambd2 = 0.1 self.w = cvx.Variable((self.d, 1)) self.P = self.X_pool @ self.w ###------------------------------ self.loglikehood = cvx.sum(cvx.multiply(self.y, self.X @ self.w) - cvx.logistic(self.X @ self.w)) self.L2regu = self.lambd1 * cvx.pnorm(self.w, p=2)**2 self.Lapregu = self.lambd2 * cvx.quad_form(self.P,self.lapMatrix) self.obj = cvx.Maximize(self.loglikehood - self.Lapregu - self.L2regu) self.prob = cvx.Problem(self.obj).solve() def pred_prob(self,X_test): prob = 1 / (1 + cvx.exp(-(X_test @ self.w)).value) return prob def pred(self, X_test): result = [] for ele in 1 / (1 + cvx.exp(-(X_test @ self.w)).value): if ele >= 0.5: result.append(1) else: result.append(0) return np.array(result) if __name__ == '__main__': X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=2, cluster_std=[3, 3], random_state=123) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.992,random_state=1) print("测试数据个数=",len(y_test)) plt.scatter(X[:,0],X[:,1],c=y) plt.scatter(X_train[:,0],X_train[:,1],c='r',marker="*",s=200,label="Labeled instance") plt.legend() plt.show() distMatrix = squareform(pdist(X,metric='euclidean')) def adjacency_matrix(X): n = X.shape[0] nn_matrix = np.zeros((n,5)) for i in range(n): ord_idx = np.argsort(distMatrix[i]) neibor = [] for j in range(5+1): if i != ord_idx[j]: neibor.append(ord_idx[j]) neibor = np.array(neibor) nn_matrix[i] = neibor adj_matrix = np.zeros((n,n)) for i in range(n-1): for j in range(i,n): if i in nn_matrix[j] or j in nn_matrix[i]: adj_matrix[i,j] = 1 adj_matrix[j,i] = 1 return adj_matrix adjMatrix = adjacency_matrix(X=X) model = LRLR() model.fit(X_labeled=X_train,y_labeled=y_train,adjMatrix=adjMatrix,X_pool=X) y_pred = model.pred(X_test=X_test) acc = accuracy_score(y_true=y_test,y_pred=y_pred) print("精度：",acc) model2 = LogisticRegression() model2.fit(X=X_train,y=y_train) y_pred2 = model2.predict(X=X_test) acc2 = accuracy_score(y_true=y_test,y_pred=y_pred2) print("调包精度：",acc2)

Processed: 0.012, SQL: 8