课后习题,自新的簇中心坐标,也在图中 画出。

如图所示,假设平面上有8个点,坐标为A1=(2,10), A2=(2,5), A3=(8,4), A4=(5,8),A5=(7,5), A6=(6,4), A7=(1,2), A8=(4,9)。假设初始以A1和A4为两个簇的中心, 请使用K-均值聚类方法,计算并画出各点最终的聚类归属,各自新的簇中心坐标,也在图中 画出。

from numpy import *
import matplotlib.pyplot as plt
import operator

INF = 9999999.0

def distEclud(vecA, vecB):
	# 计算欧式距离
	return sqrt(sum(power(vecA - vecB, 2)))

def kMeans(dataSet, k, distMeans=distEclud):
	"""
	输入:数据集, 聚类个数, 距离计算函数, 生成随机质心函数
	输出:质心矩阵, 簇分配和距离矩阵
	"""
	m = shape(dataSet)[0]
	clusterAssment = mat(zeros((m, 2)))
	centroids = mat([[2,10],[5,8]] )  
	clusterChanged = True
	while clusterChanged:
		clusterChanged = False
		for i in range(m): # 寻找最近的质心
			minDist = INF
			minIndex = -1
			for j in range(k):
				distJI = distMeans(centroids[j, :], dataSet[i, :])
				if distJI < minDist:
					minDist = distJI
					minIndex = j
			if clusterAssment[i, 0] != minIndex:
				clusterChanged = True
			clusterAssment[i, :] = minIndex, minDist**2
		for cent in range(k): # 更新质心的位置
			ptsInClust = dataSet[nonzero(clusterAssment[:, 0].A == cent)[0]]
			centroids[cent, :] = mean(ptsInClust, axis=0)
	return centroids, clusterAssment

def plotFeature(dataSet, centroids, clusterAssment):
	m = shape(centroids)[0]
	fig = plt.figure()
	scatterMarkers = ['s', 'o']
	scatterColors = ['black', 'red']
	ax = fig.add_subplot(111)
	for i in range(m):
		ptsInCurCluster = dataSet[nonzero(clusterAssment[:, 0].A == i)[0], :]
		markerStyle = scatterMarkers[i % len(scatterMarkers)]
		colorSytle = scatterColors[i % len(scatterColors)]
		ax.scatter(ptsInCurCluster[:, 0].flatten().A[0], ptsInCurCluster[:, 1].flatten().A[0], marker=markerStyle, c=colorSytle, s=90)
	ax.scatter(centroids[:, 0].flatten().A[0], centroids[:, 1].flatten().A[0], marker='+', c='red', s=300)

if __name__ == '__main__':
	dataSet = mat([[2, 10], [2, 5],[8, 4],[5, 8], [7, 5],[6, 4],[1, 2],[4, 9]])
	resultCentroids, clustAssing = kMeans(dataSet, 2)
	print('*******************')
	print(resultCentroids)
	print('*******************')
	plotFeature(dataSet, resultCentroids, clustAssing)
	plt.show()