深度学习之神经网络初实战

科技2024-10-11 62

单层与多层神经网络实现

出处：kaggle大佬的教程链接：link. 个人复习总结用

# This Python 3 environment comes with many helpful analytics libraries installed # It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python # For example, here's several helpful packages to load in import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import matplotlib.pyplot as plt # Input data files are available in the "../input/" directory. # For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory # import warnings import warnings # filter warnings warnings.filterwarnings('ignore') from subprocess import check_output # print(check_output(["ls", "../input"]).decode("utf8")) # Any results you write to the current directory are saved as output.

数据集：链接：https://pan.baidu.com/s/15jq7JF3aO10tEO6_S3XF6Q 提取码：a0n4

#导入数据集 x_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/X.npy') Y_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/Y.npy') #显示数据 img_size = 64 plt.subplot(1,2,1) plt.imshow(x_1[260].reshape(img_size,img_size)) plt.axis('off') plt.subplot(1,2,2) plt.imshow(x_1[900].reshape(img_size,img_size)) plt.axis('off')

#410 means that we have 410 images (zero and one signs) #64 means that our image size is 64x64 (64x64 pixels) X = np.concatenate((x_1[204:409],x_1[822:1027]),axis=0)## from 0 to 204 is zero sign and from 205 to 410 is one sign z=np.zeros(205) o=np.ones(205) Y=np.concatenate((z,o),axis=0).reshape(X.shape[0],1)#Y是0和1各占一半构成的一维矩阵（不是410，），Y的410是标签 print("X shapes:",X.shape) print("Y shapes:",Y.shape)

把X和Y划分为测试集和训练集，15%的测试集

# Then lets create x_train, y_train, x_test, y_test arrays from sklearn.model_selection import train_test_split X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=42) number_of_train = X_train.shape[0]#train的样本数（即行） number_of_test = X_test.shape[0]#test的样本数

X是三维的，需要映射为二维，以便用作第一个深度学习模型的输入标签数组Y已经是2维，不用动

X_train_flatten = X_train.reshape(number_of_train,X_train.shape[1]*X_train.shape[2]) X_test_flatten = X_test.reshape(number_of_test,X_test.shape[1]*X_test.shape[2]) print("X train flatten",X_train_flatten.shape) print("X train flatten",X_test_flatten.shape) x_train = X_train_flatten.T#这是转置是为了矩阵乘积，如y = (w.T)x + b x_test = X_test_flatten.T y_train = Y_train.T y_test = Y_test.T print("x train: ",x_train.shape) print("x test: ",x_test.shape) print("y train: ",y_train.shape) print("y test: ",y_test.shape)

标题数据已经处理好，下面开始进行逻辑回归

定义在神经网络传播中要用到的函数

# short description and example of definition (def) #初始化参数，权重w和偏差b（这里其实可以优化初试参数） def dummy(parameter): dummy_parameter = parameter + 5 return dummy_parameter result = dummy(3) #result=8 # lets initialize parameters # So what we need is dimension 4096 that is number of pixels as a parameter for our initialize method(def) def initialize_weights_and_bias(dimension): w = np.full((dimension,1),0.01) #np.full(shape, fill_value)可以生成一个元素为fill_value，形状为shape的array b=0.0 return w,b #前向propagation #calculation of z（设计sigmoid函数） def sigmoid(z): y_head = 1/(1+np.exp(-z)) return y_head #RelU函数 def relu(x): return np.maximum(0,x) y_head = sigmoid(0)#这里y_head的结果是0.5 # Forward propagation steps:（） # find z = w.T*x+b # y_head = sigmoid(z) # loss(error) = loss(y,y_head) # cost = sum(loss) #需要用到的函数已经准备好，下面在前向传播中开始调用 def forward_propagation(w,b,x_train,y_train): z = np.dot(w.T,x_train)+b y_head = sigmoid(z)#probabilistic 0-1 loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)#成本函数是损失函数的总和，每个图像都有损失函数 cost = (np.sum(loss))/x_train.shape[1] # x_train.shape[1] is for scaling(缩放) return cost

下面进行前向传播和反向传播的结合使用

def forward_backward_propagation(w,b,x_train,y_train): #前向传播，这里按前面写的步骤进行 z=np.dot(w.T,x_train)+b y_head = sigmoid(z) loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head) cost = (np.sum(loss))/x_train.shape[1] # x_train.shape[1] is for scaling #反向传播，获得权重和偏差的梯度，即偏导 derivative_weight = (np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1]#x_train.shape[1] is for scaling derivative_bias = np.sum(y_head-y_train)/x_train.shape[1] gradients = {"derivative_weight":derivative_weight,"derivative_bias":derivative_bias} return cost,gradients

更新学习到的参数

# Updating(learning) parameters def update(w,b,x_train,y_train,learning_rate,number_of_iteration): cost_list=[]#搜集每一张图片的成本 cost_list2=[]#每隔10张搜集一次成本，用来画图 index=[]#每隔10张做索引，用来做x轴的坐标 ## updating(learning) parameters is number_of_iterarion times for i in range(number_of_iteration): #make forward and backward propagation and find cost and gradients cost,gradients = forward_backward_propagation(w,b,x_train,y_train) cost_list.append(cost) #lets update w = w - learning_rate * gradients['derivative_weight'] b = b - learning_rate * gradients['derivative_bias'] if i%10 == 0: cost_list2.append(cost) index.append(i) print("Cost after iteration %i:%f"%(i,cost)) #we update(learn) parameters weights and bias parameters = {"weight":w,"bias":b}#更新字典里的权重和偏差 plt.plot(index,cost_list2) plt.xticks(index,rotation='vertical')#设置x坐标名称的旋转度 plt.xlabel('number of iterarion') plt.ylabel('cost') plt.show() return parameters,gradients,cost_list #parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate = 0.009,number_of_iterarion = 200)

定义函数预测

#prediction def predict(w,b,x_test): #x_test is a input for forward propagation #这里是进行x_test的测试集的预测 z = sigmoid(np.dot(w.T,x_test)+b)#得到输出结果 Y_prediction = np.zeros((1,x_test.shape[1]))#初始化测试集结果的一维矩阵 # if z is bigger than 0.5, our prediction is sign one (y_head=1), # if z is smaller than 0.5, our prediction is sign zero (y_head=0), for i in range(z.shape[1]): if z[0,i]<=0.5: Y_prediction[0,i]=0 else: Y_prediction[0,i]=1 return Y_prediction#返回预测结果 # predict(parameters["weight"],parameters["bias"],x_test)

预测函数也有了，下面可以正式进行逻辑回归了

def logistic_regression(x_train,y_train,x_test,y_test,learning_rate,num_iterations): #initialize dimension = x_train.shape[0] #that is 4096 w,b = initialize_weights_and_bias(dimension) #do not change learning rate parameters,gradients,cost_list = update(w,b,x_train,y_train,learning_rate,num_iterations) y_prediction_test = predict(parameters['weight'],parameters['bias'],x_test) y_prediction_train = predict(parameters["weight"],parameters['bias'],x_train) #print train/test errors print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100)) print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100)) logistic_regression(x_train,y_train,x_test,y_test,learning_rate=0.01,num_iterations=150)

从图上可以看出，随着迭代次数的增加，成本函数在下降，参数被训练的越来越好。

上面的神经网络只有一层，即输入层和输出层（输入层不能算层数）

这上面的代码可以用sklearn库轻松代替,结果是一样的

from sklearn.linear_model import LogisticRegression as clf logreg = clf(random_state=42,max_iter=150) print("test accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_test.T,y_test.T)))#这里将训练模型和模型的使用合为一步 print("train accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_train.T,y_train.T)))

下面进行2层神经网络，情况类似，只是多了隐藏层及隐藏层的权重Wi和Bi

#2-Layer Neural Network # intialize parameters and layer sizes def initialize_parameters_and_layer_sizes_NN(x_train,y_train): parameters = {"weight1":np.random.randn(3,x_train.shape[0])*0.1, "bias1":np.zeros((3,1)), "weight2":np.random.randn(y_train.shape[0],3)*0.1, "bias2":np.zeros((y_train.shape[0],1))} return parameters #forward propagation def forward_propagation_NN(x_train,parameters): Z1 = np.dot(parameters['weight1'],x_train)+parameters['bias1'] A1 = np.tanh(Z1) Z2 = np.dot(parameters['weight2'],A1)+parameters['bias2'] A2 = sigmoid(Z2) cache = {"Z1":Z1, "A1":A1, "Z2":Z2, "A2":A2} return A2,cache #Loss function and Cost function #compute cost def computer_cost_NN(A2,Y,parameters): logprobs = np.multiply(np.log(A2),Y) cost = -np.sum(logprobs)/Y.shape[1] return cost # Backward Propagation def backward_propagation_NN(parameters,cache,X,Y): dZ2 = cache['A2']-Y dW2 = np.dot(dZ2,cache['A1'].T)/X.shape[1] db2 = np.sum(dZ2,axis=1,keepdims=True)/X.shape[1] dZ1 = np.dot(parameters['weight2'].T,dZ2)*(1-np.power(cache['A1'],2)) dW1 = np.dot(dZ1,X.T)/X.shape[1] db1 = np.sum(dZ1,axis = 1,keepdims=True)/X.shape[1] grads = {'dweight1':dW1, "dbias1": db1, "dweight2": dW2, "dbias2": db2} return grads #Update Parameters # update parameters def update_parameters_NN(parameters, grads, learning_rate = 0.01): parameters = {"weight1": parameters["weight1"]-learning_rate*grads["dweight1"], "bias1": parameters["bias1"]-learning_rate*grads["dbias1"], "weight2": parameters["weight2"]-learning_rate*grads["dweight2"], "bias2": parameters["bias2"]-learning_rate*grads["dbias2"]} return parameters #Prediction with learnt parameters weight and bias # prediction def predict_NN(parameters,x_test): # x_test is a input for forward propagation A2, cache = forward_propagation_NN(x_test,parameters) Y_prediction = np.zeros((1,x_test.shape[1])) # if z is bigger than 0.5, our prediction is sign one (y_head=1), # if z is smaller than 0.5, our prediction is sign zero (y_head=0), for i in range(A2.shape[1]): if A2[0,i]<= 0.5: Y_prediction[0,i] = 0 else: Y_prediction[0,i] = 1 return Y_prediction #创建模型 def two_layer_neural_network(x_train,y_train,x_test,y_test,num_iterations): cost_list = [] index_list = [] #initialize parameters and layer sizes parameters = initialize_parameters_and_layer_sizes_NN(x_train,y_train) for i in range(0,num_iterations): #forward propagation A2,cache = forward_propagation_NN(x_train,parameters) #compute cost cost = computer_cost_NN(A2,y_train,parameters) #back propagation grads = backward_propagation_NN(parameters,cache,x_train,y_train) #upgrade parameters parameters = update_parameters_NN(parameters,grads) if i %100==0: cost_list.append(cost) index_list.append(i) print("cost after iteration %i:%f"%(i,cost)) plt.plot(index_list,cost_list) plt.xticks(index_list,rotation='vertical') plt.xlabel("Number of Iterarion") plt.ylabel("Cost") plt.show() #做预测 y_prediction_test = predict_NN(parameters,x_test) y_prediction_train = predict_NN(parameters,x_train) print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100)) print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100)) return parameters parameters = two_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations=2500)

以下是L层神经网络的实现代码，但是最好在服务器上运行

#L Layer Neural Network # reshaping x_train, x_test, y_train, y_test = x_train.T, x_test.T, y_train.T, y_test.T # Evaluating the ANN from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import cross_val_score from keras.models import Sequential # initialize neural network library from keras.layers import Dense # build our layers library def build_classifier(): classifier = Sequential() # initialize neural network classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train.shape[1])) classifier.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu')) classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) return classifier classifier = KerasClassifier(build_fn = build_classifier, epochs = 100) accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 3) mean = accuracies.mean() variance = accuracies.std() print("Accuracy mean: "+ str(mean)) print("Accuracy variance: "+ str(variance))

Processed: 0.012, SQL: 8