划分问题即在原数组插板(高中的隔板法),所以分m组即枚举m-1个插板位置即可,比较当前loss和最小值,更新保存最小loss即可(这里可以进行一定的数学简化,标准差最小化等价于最小化各批次总成本的平方和(因为总成本固定)),代码如下:

#include <climits>
#include <iostream>
#include <vector>
using namespace std;

int main() {
    int n,m;
    cin>>n>>m;
    vector<int> table(n);
    for(auto& i : table) cin>>i;

    vector<int> preSum(n, table[0]);
    long long minLoss = LLONG_MAX;
    vector<int> bestSplits;

    vector<int> currentSplits;
    for(int i=1; i<n; i++) preSum[i] = preSum[i-1] + table[i];

    auto computeLoss = [&preSum](vector<int>& v){
        long long loss =0;
        for(int i=0; i<v.size();i++){
            int id = v[i];
            if(i==0) loss +=  preSum[id]*preSum[id];
            else loss += (preSum[id] - preSum[v[i-1]])* (preSum[id] - preSum[v[i-1]]);
        
            
        }
        loss += (preSum.back()-preSum[v.back()])* (preSum.back()-preSum[v.back()]);
        return loss;

    };

    auto dfs = [&](auto&& dfs, int start, int round){
        
        if( round == 0 ) {
            
            auto curloss = computeLoss(currentSplits);
            if(curloss < minLoss) {
                minLoss=curloss;
                bestSplits = currentSplits;
            }
            return; 
        }
        for(int i=start; i<n-1; i++){
            currentSplits.push_back(i);
            dfs(dfs,i+1,round-1);
            currentSplits.pop_back();

        }

    };
    dfs(dfs,0,m-1);
    vector<int> res;
    int last = -1;
    for (int split : bestSplits) {
        res.push_back(split - last);
        last = split;
    }
    res.push_back(n-1 - last);

    for (int i = 0; i < res.size(); i++) {
        if (i > 0) cout << " ";
        cout << res[i];
    }
    cout << endl;

    return 0;


}
// 64 位输出请用 printf("%lld")