K-means聚类步骤: 1.随机选取K个中心点; 2.每个数据点分配给K个中心点; 3.通过每个类的均值重新计算中心点; 4.对步骤2-3进行迭代计算。
import numpy as np import random class K_means(): def __init__(self,n_clusters=2,tolerance=0.0001,max_iter=300): #n_clusters是k(聚类数量),tolerance是允许误差,max_iter是迭代次数 self.k_ = n_clusters self.tolerance = tolerance self.max_iter_=max_iter def fit(self,data): #fit分为Estep和Mstep,分别计算上述步骤2-3. #random.sample(a,b)是指把返回b个a范围内的索引 centers = data[random.sample(range.shape[0],self.k_)] old_centers = np.copy(centers) labels = [[] for i in range(self.k_)] for iter_ in range(self.max_iter_): for idx,point in enumerate(data): diff = np.linalg.norm(old_centers-point,axis=1) labels[argmin(diff)].append(idx) for i in range(self.k_): points = data[labels[i],:] centers[i] = np.mean(points,axis=0) if np.sum(np.abs(old_centers-centers))<self.tolerance*self.k_: break old_centers = np.copy(centers) self.centers = centers self.fitted = True def predict(self,p_data): result = [] if not self.fitted: print("unfitted") return result for point in p_data: diff = np.linalg.norm(self.centers-point,axis=1) result.append(argmin(diff)) return result