只需要一组数据:a 数组 01 交替,操作全都是 2,1,n,1,因为只记录最大以及最小值时询问一定会递归到所有叶子,单次询问时间复杂度 \Theta\!\left(n\right),故即可将时间复杂度卡为 \Theta\!\left(nq\right)

附如下分块 + 排序 + 二分做法。如何选取块长:根据代码中的做法,设块长为 B,则初始化复杂度为 \Theta\!\left(\frac nB\cdot B\log B\right)=\Theta\!\left(n\log B\right),修改复杂度为 \Theta\!\left(B\log B+\frac nB\right),查询复杂度为 \Theta\!\left(B+\frac nB\log B\right),选取 B=\sqrt n最优,此时总时间复杂度为 \Theta\!\left(n\log\sqrt n+q\sqrt n\log\sqrt n\right)=\Theta\!\left(\left(n+q\sqrt n\right)\log n\right)

upd: 因为排序开销非常大,所以将块长减小可以很大程度优化运行时间,代码中设为了 81

#include <array>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;

const int MAXI = 1e5;
const int B = 81;
const int MAXB = MAXI / B;

using ll = long long;

struct Block {
    array<ll, B> sortedArr = {};
    ll tag = 0;
    int NumSmall(ll x) {
        return lower_bound(sortedArr.begin(), sortedArr.end(),
                           x - tag) - sortedArr.begin();
    }
};

int n;
int q;
array < ll, (MAXB + 1)* B > a = {};
array < Block, MAXB + 1 > blocks;

void InitBlock(int b) {
    auto& block = blocks[b];
    memcpy(block.sortedArr.data(), a.data() + b * B, B * sizeof(ll));
    sort(block.sortedArr.begin(), block.sortedArr.end());
}

void PushDownBlock(int b) {
    auto& block = blocks[b];
    if (block.tag) {
        for (int i = b * B; i < b * B + B; i++) {
            a[i] += block.tag;
        }
        block.tag = 0;
    }
}

void Init() {
    for (int i = 0; i <= n / B; i++) {
        InitBlock(i);
    }
}

void Addi(int l, int r, ll x) {
    int lb = l / B;
    int rb = r / B;
    if (lb == rb) {
        PushDownBlock(lb);
        for (int i = l; i <= r; i++) {
            a[i] += x;
        }
        InitBlock(lb);
        return;
    }
    PushDownBlock(lb);
    for (int i = l; i < lb * B + B; i++) {
        a[i] += x;
    }
    InitBlock(lb);
    PushDownBlock(rb);
    for (int i = rb * B; i <= r; i++) {
        a[i] += x;
    }
    InitBlock(rb);
    for (int i = lb + 1; i < rb; i++) {
        blocks[i].tag += x;
    }
}

int NumSmall(int l, int r, ll x) {
    int res = 0;
    int lb = l / B;
    int rb = r / B;
    // cout << lb << ',' << rb << endl;
    if (lb == rb) {
        for (int i = l; i <= r; i++) {
            if (a[i] + blocks[lb].tag < x) {
                res++;
            }
        }
        return res;
    }
    for (int i = l; i < lb * B + B; i++) {
        if (a[i] + blocks[lb].tag < x) {
            res++;
        }
    }
    for (int i = rb * B; i <= r; i++) {
        if (a[i] + blocks[rb].tag < x) {
            res++;
        }
    }
    for (int i = lb + 1; i < rb; i++) {
        res += blocks[i].NumSmall(x);
    }
    return res;
}

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
    cin >> n >> q;
    for (int i = 1; i <= n; i++) {
        cin >> a[i];
    }
    Init();
    while (q--) {
        int op, l, r, x;
        cin >> op >> l >> r >> x;
        if (op == 1) {
            Addi(l, r, x);
            continue;
        }
        cout << NumSmall(l, r, x) << '\n';
    }
}
// 64 位输出请用 printf("%lld")