import numpy as np import math def get_initial_centrals(points): centrals = [] new_points = [[points[i][0], points[i][1], i] for i in range(len(points))] new_points_sorted1 = sorted(new_points, key=lambda x: (x[0], x[1], x[2])) # 按照x升序排列,若相同,按y升序排,再相同,按输入次序 initial_central1 = [new_points_sorted1[0][0], new_points_sorted1[0][1]] new_points_sorted2 = sorted(new_points,key=lambda x:(x[0],x[1],-x[2]),reverse=True) initial_central2 = [new_points_sorted2[0][0],new_points_sorted2[0][1]] centrals.append(initial_central1) centrals.append(initial_central2) return centrals def get_cluster(points,centrals): cluster_content = [[] for i in range(len(centrals))] for point in points: all_dis = [math.sqrt((point[0]-central[0])**2+(point[1]-central[1])**2) for central in centrals] cluster_ind = all_dis.index(min(all_dis)) cluster_content[cluster_ind].append(point) return cluster_content def update_central(cluster_content,centrals): new_centrals = [] for i in range(len(centrals)): if cluster_content[i]: new_x = sum([ele[0] for ele in cluster_content[i]])/len(cluster_content[i]) new_y = sum([ele[1] for ele in cluster_content[i]])/len(cluster_content[i]) new_centrals.append([new_x,new_y]) else: new_centrals.append(centrals[i]) return new_centrals def cal_error(centrals,new_centrals): error = 0 for i in range(len(centrals)): error += math.sqrt((centrals[i][0]-new_centrals[i][0])**2 + (centrals[i][1]-new_centrals[i][1])**2) return error def cal_SSE(cluster_content,centrals): all_dis = [] for i in range(len(centrals)): sum_dis = 0 if cluster_content[i]: for j in range(len(cluster_content[i])): sum_dis += (cluster_content[i][j][0]-centrals[i][0])**2 + (cluster_content[i][j][1]-centrals[i][1])**2 all_dis.append(sum_dis) return all_dis # def sort_count(cluster_content): # new = [len(content) for content in cluster_content] # return sorted(new,reverse=True) def k_means(points,centrals): # centrals = get_initial_centrals(points) T = 0 error = float('inf') while T < 1000 or error >= 1e-6: cluster_content = get_cluster(points,centrals) new_centrals = update_central(cluster_content,centrals) error = cal_error(centrals,new_centrals) centrals = new_centrals T += 1 cluster_content = get_cluster(points, centrals) return centrals,cluster_content def bi_k_means(N,points): curr_cluster_content = [points] #当前所有簇的内容列表 curr_cluster_central = [] #当前所有簇心列表 emulate = [] #输出结果 if points: #所有点分为一个簇,得到初始簇心 initial_central = [sum([p[0] for p in points]) /len(points), sum([p[1] for p in points]) /len(points)] curr_cluster_central = [initial_central] while len(curr_cluster_content)<N: #计算每个簇的sse sse_list = [] for i,cluster in enumerate(curr_cluster_content): if len(cluster) == 0: sse_list.append(0) else: #计算当前簇的sse sse = sum([(p[0]-curr_cluster_central[i][0])**2+(p[1]-curr_cluster_central[i][1])**2 for p in cluster]) sse_list.append(sse) max_sse_inx = sse_list.index(max(sse_list)) cluster_to_split = curr_cluster_content[max_sse_inx] if len(cluster_to_split) <= 1: #无法再拆 break #选好的簇进行二分 initial_centrals = get_initial_centrals(cluster_to_split) # print(points) new_centrals, new_cluster_content = k_means(cluster_to_split, initial_centrals) curr_cluster_content.pop(max_sse_inx) curr_cluster_content.extend(new_cluster_content) #每次都新添加两个簇的内容 #更新簇心列表 if new_centrals: curr_cluster_central.pop(max_sse_inx) curr_cluster_central.extend(new_centrals) cluster_size = sorted([len(cluster) for cluster in curr_cluster_content],reverse=True) emulate.append(cluster_size) return emulate if __name__ == '__main__': lines = [] while True: try: line = input().split() if line: lines.append(line) else: break except EOFError: break N,M = int(lines[0][0]),int(lines[1][0]) points = [[int(lines[i+2][0]),int(lines[i+2][1])] for i in range(M)] emulate = bi_k_means(N,points) for ele in emulate: # print(str(ele[0]) +' ' + str(ele[1])) str1 = '' for id in ele: str1 += str(id)+' ' print(str1)