逻辑回归 python实现 不用现成库

    科技2022-09-06  129

    函数名瞎写的,轻喷

    import numpy as np import pandas as pd import numpy.random import time import matplotlib.pyplot as plt %matplotlib inline f=open('data_banknote_authentication.txt','r',encoding='utf-8') data = f.read() fdata=data.split('\n') Mdata=pd.Series(fdata) adf=Mdata.str.split(',') adf 0 [3.6216, 8.6661, -2.8073, -0.44699, 0] 1 [4.5459, 8.1674, -2.4586, -1.4621, 0] 2 [3.866, -2.6383, 1.9242, 0.10645, 0] 3 [3.4566, 9.5228, -4.0112, -3.5944, 0] 4 [0.32924, -4.4552, 4.5718, -0.9888, 0] ... 1367 [0.40614, 1.3492, -1.4501, -0.55949, 1] 1368 [-1.3887, -4.8773, 6.4774, 0.34179, 1] 1369 [-3.7503, -13.4586, 17.5932, -2.7771, 1] 1370 [-3.5637, -8.3827, 12.393, -1.2823, 1] 1371 [-2.5419, -0.65804, 2.6842, 1.1952, 1] Length: 1372, dtype: object adf=pd.DataFrame(adf.values.tolist(),dtype=np.float32) adf 0123403.621608.66610-2.807300-0.446990.014.545908.16740-2.458600-1.462100.023.86600-2.638301.9242000.106450.033.456609.52280-4.011200-3.594400.040.32924-4.455204.571800-0.988800.0..................13670.406141.34920-1.450100-0.559491.01368-1.38870-4.877306.4774000.341791.01369-3.75030-13.4586017.593201-2.777101.01370-3.56370-8.3827012.393000-1.282301.01371-2.54190-0.658042.6842001.195201.0

    1372 rows × 5 columns

    adf.insert(0,'f',1) df=adf.values numpy.random.shuffle(df) x=df[:,0:5] y=df[:,5:6] def sigmoid(x): return 1 / (1 + np.exp(-x)) def hx(x,theta): return sigmoid(np.dot(x,theta.T))# dot 相乘相加,放入sigmoid将数值映射到概率 theta=np.zeros([1,5]) theta #参数 array([[0., 0., 0., 0., 0.]]) x[1370:1400,:] array([[ 1. , -1.87820005, -6.58650017, 4.84859991, -0.021566 ], [ 1. , 4.8906002 , -3.35840011, 3.42020011, 1.0905 ]]) y array([[1.], [0.], [1.], ..., [0.], [1.], [0.]]) def loss(x,y,theta): a=np.multiply(-y,np.log(hx(x,theta)))# multiply 两个数组对应位置相乘,结果成为数组(矩阵) b=np.multiply(1-y,np.log(1-hx(x,theta))) return np.sum(a-b)/len(y) loss(x,y,theta) 0.6931471805599454 def pd(x,y,theta): grad=np.zeros(theta.shape)#梯度结果,占位 error=(hx(x,theta)-y).ravel()# ravel 多维数组拉成一维数组 for i in range(len(theta.ravel())):# 所有参数的偏导 grad[0,i]=(np.sum(np.multiply(error,x[:,i]))/len(x)) return grad def DM1(x,y,theta,YuZhi,learnning_Rate): start_time=time.time() i=0 # k=0 grad=np.zeros(theta.shape) Loss=[loss(x,y,theta)] while True: grad=pd(x[k:k+1],y[k:k+1],theta) k+=1 if k>=len(x): k=0 numpy.random.shuffle(df) x=df[:,0:5] y=df[:,5:6] theta=theta-(learnning_Rate*grad) Loss.append(loss(x,y,theta)) i+=1 if i>YuZhi: break return theta,i-1,Loss,time.time()-start_time ENDtheta,times,ENDcosts,spend=DM1(x,y,theta,50000,0.003) print('耗时:%f'%(spend)) fig,ax=plt.subplots(figsize=(12,4)) ax.plot(np.arange(len(ENDcosts)),ENDcosts,'r') ax.set_xlabel("Iter") ax.set_ylabel("Loss") 耗时:11.588989

    print(ENDcosts[-1]) 0.03195626474525613
    Processed: 0.010, SQL: 9