发现可以 bitset 优化,但是自带的 bitset 不是很好实现这些操作

于是考虑手写 bitset,将每 64 位压成一个 ull,每次暴力翻转暴力统计即可。这个做法甚至比部分实现不够优秀的线段树跑的还要快。

#include <iostream>
using namespace std;

using ull = unsigned long long;

int n;
int q;
string s;
ull bs[7813];

void Solve() {
    cin >> n >> q >> s;
    for (int i = 0; i < n; i++) {
        if (s[i] == '1') {
            bs[i >> 6] |= 1ull << (i & 63);
        }
    }
    int op, l, r, lb, rb;
    while (q--) {
        cin >> op >> l >> r;
        l--;
        lb = l >> 6;
        rb = r >> 6;
        if (op == 1) {
            for (int i = lb; i < rb; i++) {
                bs[i] ^= -1;
            }
            bs[lb] ^= (1ull << (l & 63)) - 1;
            bs[rb] ^= (1ull << (r & 63)) - 1;
            continue;
        }
        int res = __builtin_popcountll(bs[rb] & (1ull << (r & 63)) - 1) -
                  __builtin_popcountll(bs[lb] & (1ull << (l & 63)) - 1);
        for (int i = lb; i < rb; i++) {
            res += __builtin_popcountll(bs[i]);
        }
        cout << res << '\n';
    }
}

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
    Solve();
}