出处:kaggle大佬的教程 链接:link. 个人复习总结用
# This Python 3 environment comes with many helpful analytics libraries installed # It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python # For example, here's several helpful packages to load in import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import matplotlib.pyplot as plt # Input data files are available in the "../input/" directory. # For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory # import warnings import warnings # filter warnings warnings.filterwarnings('ignore') from subprocess import check_output # print(check_output(["ls", "../input"]).decode("utf8")) # Any results you write to the current directory are saved as output.数据集:链接:https://pan.baidu.com/s/15jq7JF3aO10tEO6_S3XF6Q 提取码:a0n4
#导入数据集 x_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/X.npy') Y_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/Y.npy') #显示数据 img_size = 64 plt.subplot(1,2,1) plt.imshow(x_1[260].reshape(img_size,img_size)) plt.axis('off') plt.subplot(1,2,2) plt.imshow(x_1[900].reshape(img_size,img_size)) plt.axis('off') #410 means that we have 410 images (zero and one signs) #64 means that our image size is 64x64 (64x64 pixels) X = np.concatenate((x_1[204:409],x_1[822:1027]),axis=0)## from 0 to 204 is zero sign and from 205 to 410 is one sign z=np.zeros(205) o=np.ones(205) Y=np.concatenate((z,o),axis=0).reshape(X.shape[0],1)#Y是0和1各占一半构成的一维矩阵(不是410,),Y的410是标签 print("X shapes:",X.shape) print("Y shapes:",Y.shape)把X和Y划分为测试集和训练集,15%的测试集
# Then lets create x_train, y_train, x_test, y_test arrays from sklearn.model_selection import train_test_split X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=42) number_of_train = X_train.shape[0]#train的样本数(即行) number_of_test = X_test.shape[0]#test的样本数X是三维的,需要映射为二维,以便用作第一个深度学习模型的输入 标签数组Y已经是2维,不用动
X_train_flatten = X_train.reshape(number_of_train,X_train.shape[1]*X_train.shape[2]) X_test_flatten = X_test.reshape(number_of_test,X_test.shape[1]*X_test.shape[2]) print("X train flatten",X_train_flatten.shape) print("X train flatten",X_test_flatten.shape) x_train = X_train_flatten.T#这是转置是为了矩阵乘积,如y = (w.T)x + b x_test = X_test_flatten.T y_train = Y_train.T y_test = Y_test.T print("x train: ",x_train.shape) print("x test: ",x_test.shape) print("y train: ",y_train.shape) print("y test: ",y_test.shape)定义在神经网络传播中要用到的函数
# short description and example of definition (def) #初始化参数,权重w和偏差b(这里其实可以优化初试参数) def dummy(parameter): dummy_parameter = parameter + 5 return dummy_parameter result = dummy(3) #result=8 # lets initialize parameters # So what we need is dimension 4096 that is number of pixels as a parameter for our initialize method(def) def initialize_weights_and_bias(dimension): w = np.full((dimension,1),0.01) #np.full(shape, fill_value)可以生成一个元素为fill_value,形状为shape的array b=0.0 return w,b #前向propagation #calculation of z(设计sigmoid函数) def sigmoid(z): y_head = 1/(1+np.exp(-z)) return y_head #RelU函数 def relu(x): return np.maximum(0,x) y_head = sigmoid(0)#这里y_head的结果是0.5 # Forward propagation steps:() # find z = w.T*x+b # y_head = sigmoid(z) # loss(error) = loss(y,y_head) # cost = sum(loss) #需要用到的函数已经准备好,下面在前向传播中开始调用 def forward_propagation(w,b,x_train,y_train): z = np.dot(w.T,x_train)+b y_head = sigmoid(z)#probabilistic 0-1 loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)#成本函数是损失函数的总和,每个图像都有损失函数 cost = (np.sum(loss))/x_train.shape[1] # x_train.shape[1] is for scaling(缩放) return cost下面进行前向传播和反向传播的结合使用
def forward_backward_propagation(w,b,x_train,y_train): #前向传播,这里按前面写的步骤进行 z=np.dot(w.T,x_train)+b y_head = sigmoid(z) loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head) cost = (np.sum(loss))/x_train.shape[1] # x_train.shape[1] is for scaling #反向传播,获得权重和偏差的梯度,即偏导 derivative_weight = (np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1]#x_train.shape[1] is for scaling derivative_bias = np.sum(y_head-y_train)/x_train.shape[1] gradients = {"derivative_weight":derivative_weight,"derivative_bias":derivative_bias} return cost,gradients更新学习到的参数
# Updating(learning) parameters def update(w,b,x_train,y_train,learning_rate,number_of_iteration): cost_list=[]#搜集每一张图片的成本 cost_list2=[]#每隔10张搜集一次成本,用来画图 index=[]#每隔10张做索引,用来做x轴的坐标 ## updating(learning) parameters is number_of_iterarion times for i in range(number_of_iteration): #make forward and backward propagation and find cost and gradients cost,gradients = forward_backward_propagation(w,b,x_train,y_train) cost_list.append(cost) #lets update w = w - learning_rate * gradients['derivative_weight'] b = b - learning_rate * gradients['derivative_bias'] if i%10 == 0: cost_list2.append(cost) index.append(i) print("Cost after iteration %i:%f"%(i,cost)) #we update(learn) parameters weights and bias parameters = {"weight":w,"bias":b}#更新字典里的权重和偏差 plt.plot(index,cost_list2) plt.xticks(index,rotation='vertical')#设置x坐标名称的旋转度 plt.xlabel('number of iterarion') plt.ylabel('cost') plt.show() return parameters,gradients,cost_list #parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate = 0.009,number_of_iterarion = 200)定义函数预测
#prediction def predict(w,b,x_test): #x_test is a input for forward propagation #这里是进行x_test的测试集的预测 z = sigmoid(np.dot(w.T,x_test)+b)#得到输出结果 Y_prediction = np.zeros((1,x_test.shape[1]))#初始化测试集结果的一维矩阵 # if z is bigger than 0.5, our prediction is sign one (y_head=1), # if z is smaller than 0.5, our prediction is sign zero (y_head=0), for i in range(z.shape[1]): if z[0,i]<=0.5: Y_prediction[0,i]=0 else: Y_prediction[0,i]=1 return Y_prediction#返回预测结果 # predict(parameters["weight"],parameters["bias"],x_test)预测函数也有了,下面可以正式进行逻辑回归了
def logistic_regression(x_train,y_train,x_test,y_test,learning_rate,num_iterations): #initialize dimension = x_train.shape[0] #that is 4096 w,b = initialize_weights_and_bias(dimension) #do not change learning rate parameters,gradients,cost_list = update(w,b,x_train,y_train,learning_rate,num_iterations) y_prediction_test = predict(parameters['weight'],parameters['bias'],x_test) y_prediction_train = predict(parameters["weight"],parameters['bias'],x_train) #print train/test errors print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100)) print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100)) logistic_regression(x_train,y_train,x_test,y_test,learning_rate=0.01,num_iterations=150)从图上可以看出,随着迭代次数的增加,成本函数在下降,参数被训练的越来越好。
这上面的代码可以用sklearn库轻松代替,结果是一样的
from sklearn.linear_model import LogisticRegression as clf logreg = clf(random_state=42,max_iter=150) print("test accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_test.T,y_test.T)))#这里将训练模型和模型的使用合为一步 print("train accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_train.T,y_train.T)))以下是L层神经网络的实现代码,但是最好在服务器上运行
#L Layer Neural Network # reshaping x_train, x_test, y_train, y_test = x_train.T, x_test.T, y_train.T, y_test.T # Evaluating the ANN from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import cross_val_score from keras.models import Sequential # initialize neural network library from keras.layers import Dense # build our layers library def build_classifier(): classifier = Sequential() # initialize neural network classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train.shape[1])) classifier.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu')) classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) return classifier classifier = KerasClassifier(build_fn = build_classifier, epochs = 100) accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 3) mean = accuracies.mean() variance = accuracies.std() print("Accuracy mean: "+ str(mean)) print("Accuracy variance: "+ str(variance))