import numpy as np
import math
def get_initial_centrals(points):
centrals = []
new_points = [[points[i][0], points[i][1], i] for i in range(len(points))]
new_points_sorted1 = sorted(new_points, key=lambda x: (x[0], x[1], x[2])) # 按照x升序排列,若相同,按y升序排,再相同,按输入次序
initial_central1 = [new_points_sorted1[0][0], new_points_sorted1[0][1]]
new_points_sorted2 = sorted(new_points,key=lambda x:(x[0],x[1],-x[2]),reverse=True)
initial_central2 = [new_points_sorted2[0][0],new_points_sorted2[0][1]]
centrals.append(initial_central1)
centrals.append(initial_central2)
return centrals
def get_cluster(points,centrals):
cluster_content = [[] for i in range(len(centrals))]
for point in points:
all_dis = [math.sqrt((point[0]-central[0])**2+(point[1]-central[1])**2) for central in centrals]
cluster_ind = all_dis.index(min(all_dis))
cluster_content[cluster_ind].append(point)
return cluster_content
def update_central(cluster_content,centrals):
new_centrals = []
for i in range(len(centrals)):
if cluster_content[i]:
new_x = sum([ele[0] for ele in cluster_content[i]])/len(cluster_content[i])
new_y = sum([ele[1] for ele in cluster_content[i]])/len(cluster_content[i])
new_centrals.append([new_x,new_y])
else:
new_centrals.append(centrals[i])
return new_centrals
def cal_error(centrals,new_centrals):
error = 0
for i in range(len(centrals)):
error += math.sqrt((centrals[i][0]-new_centrals[i][0])**2 + (centrals[i][1]-new_centrals[i][1])**2)
return error
def cal_SSE(cluster_content,centrals):
all_dis = []
for i in range(len(centrals)):
sum_dis = 0
if cluster_content[i]:
for j in range(len(cluster_content[i])):
sum_dis += (cluster_content[i][j][0]-centrals[i][0])**2 + (cluster_content[i][j][1]-centrals[i][1])**2
all_dis.append(sum_dis)
return all_dis
# def sort_count(cluster_content):
# new = [len(content) for content in cluster_content]
# return sorted(new,reverse=True)
def k_means(points,centrals):
# centrals = get_initial_centrals(points)
T = 0
error = float('inf')
while T < 1000 or error >= 1e-6:
cluster_content = get_cluster(points,centrals)
new_centrals = update_central(cluster_content,centrals)
error = cal_error(centrals,new_centrals)
centrals = new_centrals
T += 1
cluster_content = get_cluster(points, centrals)
return centrals,cluster_content
def bi_k_means(N,points):
curr_cluster_content = [points] #当前所有簇的内容列表
curr_cluster_central = [] #当前所有簇心列表
emulate = [] #输出结果
if points:
#所有点分为一个簇,得到初始簇心
initial_central = [sum([p[0] for p in points]) /len(points),
sum([p[1] for p in points]) /len(points)]
curr_cluster_central = [initial_central]
while len(curr_cluster_content)<N:
#计算每个簇的sse
sse_list = []
for i,cluster in enumerate(curr_cluster_content):
if len(cluster) == 0:
sse_list.append(0)
else:
#计算当前簇的sse
sse = sum([(p[0]-curr_cluster_central[i][0])**2+(p[1]-curr_cluster_central[i][1])**2 for p in cluster])
sse_list.append(sse)
max_sse_inx = sse_list.index(max(sse_list))
cluster_to_split = curr_cluster_content[max_sse_inx]
if len(cluster_to_split) <= 1:
#无法再拆
break
#选好的簇进行二分
initial_centrals = get_initial_centrals(cluster_to_split)
# print(points)
new_centrals, new_cluster_content = k_means(cluster_to_split, initial_centrals)
curr_cluster_content.pop(max_sse_inx)
curr_cluster_content.extend(new_cluster_content) #每次都新添加两个簇的内容
#更新簇心列表
if new_centrals:
curr_cluster_central.pop(max_sse_inx)
curr_cluster_central.extend(new_centrals)
cluster_size = sorted([len(cluster) for cluster in curr_cluster_content],reverse=True)
emulate.append(cluster_size)
return emulate
if __name__ == '__main__':
lines = []
while True:
try:
line = input().split()
if line:
lines.append(line)
else:
break
except EOFError:
break
N,M = int(lines[0][0]),int(lines[1][0])
points = [[int(lines[i+2][0]),int(lines[i+2][1])] for i in range(M)]
emulate = bi_k_means(N,points)
for ele in emulate:
# print(str(ele[0]) +' ' + str(ele[1]))
str1 = ''
for id in ele:
str1 += str(id)+' '
print(str1)