题目链接
题目描述
为了优化城市基站布局,需要将给定的 个基站坐标通过 K-Means 算法聚成
个簇。然后,通过计算每个簇的平均轮廓系数来评估其好坏。
任务是找到平均轮廓系数最低的那个簇,并输出该簇的质心坐标。
- K-Means 初始化:使用输入的前
个点作为初始质心。
- 轮廓系数:对于样本
,其轮廓系数
。
是
与其所在簇内其他样本的平均距离。
是
与其他各簇样本平均距离中的最小值。
- 若样本所在簇的大小小于等于 1,则
。
- 输出:平均轮廓系数最低的簇的质心坐标,四舍五入到两位小数,采用银行家舍入法。
解题思路
这是一个标准的算法实现题,需要分步完成 K-Means 聚类、轮廓系数计算和结果处理。
1. K-Means 聚类
K-Means 是一种迭代算法,用于将数据集划分为 个簇。
-
初始化:
- 根据题目要求,选择输入的前
个点作为初始的
个质心。
- 根据题目要求,选择输入的前
-
迭代: 重复以下两个步骤,直到簇的分配不再发生变化:
- 分配步骤 (Assignment Step):遍历每一个数据点,计算它到
个质心的欧几里得距离。将该点分配给距离最近的质心所代表的簇。
- 更新步骤 (Update Step):对于每个簇,重新计算其质心。新的质心是该簇内所有数据点坐标的平均值。
- 分配步骤 (Assignment Step):遍历每一个数据点,计算它到
2. 轮廓系数计算
在 K-Means 算法收敛后,我们得到了最终的簇划分。接下来,为每个簇计算其平均轮廓系数。
-
计算单个点的轮廓系数
: 对于每个点
,它属于簇
:
- 计算
:点
到簇
中所有其他点的平均距离。如果
只有一个点,则
可以视为 0。
- 计算
:对于任意其他簇
(
),计算点
到
中所有点的平均距离。
是这些平均距离中的最小值。
- 根据公式
计算得分。特殊地,如果一个簇的大小
,
直接记为 0。
- 计算
-
计算簇的平均轮廓系数:
- 对于每个簇
,其平均轮廓系数是该簇内所有点的
值的平均值。
- 对于每个簇
3. 寻找最差簇并输出
- 比较所有
个簇的平均轮廓系数,找到值最低的那个簇。
- 输出该簇的最终质心坐标。
4. 银行家舍入
银行家舍入法(Round Half to Even)是一种更精确的舍入规则:
- 如果要舍弃的部分
> 0.5
,则进位。 - 如果要舍弃的部分
< 0.5
,则舍去。 - 如果要舍弃的部分
= 0.5
,则舍入到最近的偶数。- 例如,
2.5
舍入为2
,3.5
舍入为4
。
- 例如,
实现:
- Python: 内置的
round()
函数默认就使用银行家舍入。 - Java: 使用
BigDecimal
的setScale(2, RoundingMode.HALF_EVEN)
。 - C++: 可以通过
rint()
函数或自定义逻辑实现。一个简单的方法是rint(value * 100.0) / 100.0
。
代码
#include <iostream>
#include <vector>
#include <cmath>
#include <iomanip>
#include <limits>
using namespace std;
struct Point {
double x, y;
};
double dist_sq(const Point& a, const Point& b) {
return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
}
// 银行家舍入
double bankers_round(double val) {
return rint(val * 100.0) / 100.0;
}
int main() {
ios::sync_with_stdio(false);
cin.tie(NULL);
int n, k;
cin >> n >> k;
vector<Point> points(n);
for (int i = 0; i < n; ++i) {
cin >> points[i].x >> points[i].y;
}
vector<Point> centroids(k);
for (int i = 0; i < k; ++i) {
centroids[i] = points[i];
}
vector<int> assignments(n);
bool changed = true;
while (changed) {
changed = false;
// 分配步骤
for (int i = 0; i < n; ++i) {
double min_dist_sq = numeric_limits<double>::max();
int best_cluster = -1;
for (int j = 0; j < k; ++j) {
double d = dist_sq(points[i], centroids[j]);
if (d < min_dist_sq) {
min_dist_sq = d;
best_cluster = j;
}
}
if (assignments[i] != best_cluster) {
assignments[i] = best_cluster;
changed = true;
}
}
// 更新步骤
vector<Point> new_centroids(k, {0.0, 0.0});
vector<int> counts(k, 0);
for (int i = 0; i < n; ++i) {
new_centroids[assignments[i]].x += points[i].x;
new_centroids[assignments[i]].y += points[i].y;
counts[assignments[i]]++;
}
for (int i = 0; i < k; ++i) {
if (counts[i] > 0) {
centroids[i] = {new_centroids[i].x / counts[i], new_centroids[i].y / counts[i]};
}
}
}
// 计算轮廓系数
vector<vector<int>> clusters(k);
for(int i = 0; i < n; ++i) clusters[assignments[i]].push_back(i);
double min_avg_silhouette = numeric_limits<double>::max();
int worst_cluster_idx = -1;
for (int i = 0; i < k; ++i) {
if (clusters[i].empty()) continue;
double total_silhouette = 0.0;
for (int p_idx : clusters[i]) {
double a_p = 0.0;
if (clusters[i].size() > 1) {
for (int other_idx : clusters[i]) {
if (p_idx != other_idx) {
a_p += sqrt(dist_sq(points[p_idx], points[other_idx]));
}
}
a_p /= (clusters[i].size() - 1);
}
double b_p = numeric_limits<double>::max();
for (int j = 0; j < k; ++j) {
if (i == j || clusters[j].empty()) continue;
double current_b = 0.0;
for (int other_idx : clusters[j]) {
current_b += sqrt(dist_sq(points[p_idx], points[other_idx]));
}
current_b /= clusters[j].size();
b_p = min(b_p, current_b);
}
double s_p = 0.0;
if (clusters[i].size() > 1 && b_p != numeric_limits<double>::max()) {
s_p = (b_p - a_p) / max(a_p, b_p);
}
total_silhouette += s_p;
}
double avg_silhouette = total_silhouette / clusters[i].size();
if (avg_silhouette < min_avg_silhouette) {
min_avg_silhouette = avg_silhouette;
worst_cluster_idx = i;
}
}
cout << fixed << setprecision(2) << bankers_round(centroids[worst_cluster_idx].x)
<< "," << bankers_round(centroids[worst_cluster_idx].y) << endl;
return 0;
}
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class Main {
static class Point {
double x, y;
Point(double x, double y) { this.x = x; this.y = y; }
}
static double distSq(Point a, Point b) {
return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
}
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int n = sc.nextInt();
int k = sc.nextInt();
List<Point> points = new ArrayList<>();
for (int i = 0; i < n; i++) {
points.add(new Point(sc.nextDouble(), sc.nextDouble()));
}
List<Point> centroids = new ArrayList<>();
for (int i = 0; i < k; i++) {
centroids.add(points.get(i));
}
int[] assignments = new int[n];
boolean changed = true;
while (changed) {
changed = false;
// 分配步骤
for (int i = 0; i < n; i++) {
double min_dist_sq = Double.MAX_VALUE;
int bestCluster = -1;
for (int j = 0; j < k; j++) {
double d = distSq(points.get(i), centroids.get(j));
if (d < min_dist_sq) {
min_dist_sq = d;
bestCluster = j;
}
}
if (assignments[i] != bestCluster) {
assignments[i] = bestCluster;
changed = true;
}
}
// 更新步骤
List<Point> newCentroids = new ArrayList<>();
for(int i = 0; i < k; i++) newCentroids.add(new Point(0, 0));
int[] counts = new int[k];
for (int i = 0; i < n; i++) {
int clusterIdx = assignments[i];
newCentroids.get(clusterIdx).x += points.get(i).x;
newCentroids.get(clusterIdx).y += points.get(i).y;
counts[clusterIdx]++;
}
for (int i = 0; i < k; i++) {
if (counts[i] > 0) {
centroids.set(i, new Point(newCentroids.get(i).x / counts[i], newCentroids.get(i).y / counts[i]));
}
}
}
List<List<Integer>> clusters = new ArrayList<>();
for(int i=0; i<k; i++) clusters.add(new ArrayList<>());
for(int i=0; i<n; i++) clusters.get(assignments[i]).add(i);
double minAvgSilhouette = Double.MAX_VALUE;
int worstClusterIdx = -1;
for (int i = 0; i < k; i++) {
if (clusters.get(i).isEmpty()) continue;
double totalSilhouette = 0.0;
for (int p_idx : clusters.get(i)) {
double a_p = 0.0;
if (clusters.get(i).size() > 1) {
for (int other_idx : clusters.get(i)) {
if (p_idx != other_idx) a_p += Math.sqrt(distSq(points.get(p_idx), points.get(other_idx)));
}
a_p /= (clusters.get(i).size() - 1);
}
double b_p = Double.MAX_VALUE;
for (int j = 0; j < k; j++) {
if (i == j || clusters.get(j).isEmpty()) continue;
double currentB = 0.0;
for (int other_idx : clusters.get(j)) {
currentB += Math.sqrt(distSq(points.get(p_idx), points.get(other_idx)));
}
b_p = Math.min(b_p, currentB / clusters.get(j).size());
}
double s_p = 0.0;
if (clusters.get(i).size() > 1 && b_p != Double.MAX_VALUE) {
s_p = (b_p - a_p) / Math.max(a_p, b_p);
}
totalSilhouette += s_p;
}
double avgSilhouette = totalSilhouette / clusters.get(i).size();
if (avgSilhouette < minAvgSilhouette) {
minAvgSilhouette = avgSilhouette;
worstClusterIdx = i;
}
}
BigDecimal x = new BigDecimal(centroids.get(worstClusterIdx).x).setScale(2, RoundingMode.HALF_EVEN);
BigDecimal y = new BigDecimal(centroids.get(worstClusterIdx).y).setScale(2, RoundingMode.HALF_EVEN);
System.out.printf("%.2f,%.2f\n", x, y);
}
}
import math
def dist_sq(p1, p2):
return (p1[0] - p2[0])**2 + (p1[1] - p2[1])**2
def solve():
n, k = map(int, input().split())
points = [tuple(map(int, input().split())) for _ in range(n)]
centroids = list(points[:k])
assignments = [-1] * n
while True:
new_assignments = list(assignments)
# 分配步骤
for i, p in enumerate(points):
min_dist_sq = float('inf')
best_cluster = -1
for j, c in enumerate(centroids):
d = dist_sq(p, c)
if d < min_dist_sq:
min_dist_sq = d
best_cluster = j
new_assignments[i] = best_cluster
if new_assignments == assignments:
break
assignments = new_assignments
# 更新步骤
new_centroids = [[0, 0] for _ in range(k)]
counts = [0] * k
for i, p in enumerate(points):
cluster_idx = assignments[i]
new_centroids[cluster_idx][0] += p[0]
new_centroids[cluster_idx][1] += p[1]
counts[cluster_idx] += 1
for i in range(k):
if counts[i] > 0:
centroids[i] = (new_centroids[i][0] / counts[i], new_centroids[i][1] / counts[i])
# 计算轮廓系数
clusters = [[] for _ in range(k)]
for i, cluster_idx in enumerate(assignments):
clusters[cluster_idx].append(i)
min_avg_silhouette = float('inf')
worst_cluster_idx = -1
for i in range(k):
if not clusters[i]:
continue
total_silhouette = 0
for p_idx in clusters[i]:
p = points[p_idx]
# Calculate a(p)
a_p = 0
if len(clusters[i]) > 1:
for other_idx in clusters[i]:
if p_idx != other_idx:
a_p += math.sqrt(dist_sq(p, points[other_idx]))
a_p /= (len(clusters[i]) - 1)
# Calculate b(p)
b_p = float('inf')
for j in range(k):
if i == j or not clusters[j]:
continue
current_b = 0
for other_idx in clusters[j]:
current_b += math.sqrt(dist_sq(p, points[other_idx]))
b_p = min(b_p, current_b / len(clusters[j]))
s_p = 0
if len(clusters[i]) > 1 and b_p != float('inf'):
s_p = (b_p - a_p) / max(a_p, b_p)
total_silhouette += s_p
avg_silhouette = total_silhouette / len(clusters[i])
if avg_silhouette < min_avg_silhouette:
min_avg_silhouette = avg_silhouette
worst_cluster_idx = i
worst_centroid = centroids[worst_cluster_idx]
# Python 的 round() 函数执行银行家舍入
x = round(worst_centroid[0], 2)
y = round(worst_centroid[1], 2)
print(f"{x:.2f},{y:.2f}")
solve()
算法及复杂度
- 算法:K-Means 聚类 + 轮廓系数计算
- 时间复杂度:
,其中
是 K-Means 的迭代次数,
是点数,
是簇数。K-Means 部分的复杂度是
。轮廓系数计算需要对每个点计算与其他所有点的距离,这部分的复杂度是
。
- 空间复杂度:
,用于存储数据点、质心和簇的分配信息。