题解:P11365 [Ynoi2024] 新本格魔法少女りすか
来一个无需树状数组的方法,可以避免清空的问题。
考虑序列分块。
散块对散块,一种求顺序对的方法是归并排序,注意同一个区间的不能算,所以需要提前排好序,稍微魔改一下,不要刻意清空。
整块对其他的,可以预处理
不要用 vector 来存区间,很慢。
交上去发现 TLE 的很惨烈,发现瓶颈在提前排序,使用大量的 sort 实在是太慢了!因此我们使用基数排序,这样就可以近似于线性复杂度排序了,这样就可以通过了。
排序部分代码:
#define Work(a,b,m)\
memset(s,0,256*sizeof(int));\
for(int i=0;i<n;++i)\
++s[a[i] m];\
for(int i=1;i<256;++i)\
s[i]+=s[i-1];\
for(int i=n-1;~i;--i)\
b[--s[a[i] m]]=a[i]
void sort(int *a, int n)
{
if (!n) return;
Work(a,bb,&255);
Work(bb,a,>>8&255);
Work(a,bb,>>16&255);
memcpy(a,bb,n*sizeof(int));
}
如果又 T 了用 C++98,会快一些,如果还 T 了也可以把一段 for 循环赋值换成 memcpy 以及加上 inline 和 register。
#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define ull unsigned long long
#define i128 __int128
#define pii pair<int, int>
#define pll pair<ll, ll>
#define all(x) (x).begin(), (x).end()
#define rint register int
const int N = 5e5 + 5, b = 556;
int n, m, id, cnt, tot, slen, a[N], in[N], bb[N], c[N], tmp[N], buc[N], md[N], rnl[N], rnr[N], vis[N], s[256];
vector<int>v[N];
ll sum, ans[N], sp[N];
struct node {
int l, r, pl, pr;
}nd[N];
inline void solve(rint l, rint r)
{
if (l == r) return;
rint mid = l + r >> 1;
solve(l, mid), solve(mid + 1, r);
rint L = md[l - 1] + 1, M = md[mid], R = md[r];
rint i = L, j = M + 1, *p = tmp;
while (i <= M && j <= R) {
if (c[i] < c[j]) {
sum += R - j + 1;
*p++ = c[i++];
} else {
*p++ = c[j++];
}
}
memcpy(p, c + i, (M - i + 1) * sizeof(int));
p += M - i + 1;
memcpy(p, c + j, (R - j + 1) * sizeof(int));
memcpy(c + L, tmp, (R - L + 1) * sizeof(int));
}
#define Work(a,b,m)\
memset(s,0,256*sizeof(int));\
for(int i=0;i<n;++i)\
++s[a[i] m];\
for(int i=1;i<256;++i)\
s[i]+=s[i-1];\
for(int i=n-1;~i;--i)\
b[--s[a[i] m]]=a[i]
void sort(int *a, int n)
{
if (!n) return;
Work(a,bb,&255);
Work(bb,a,>>8&255);
Work(a,bb,>>16&255);
memcpy(a,bb,n*sizeof(int));
}
inline void work(rint &l, rint &r)
{
rint pre = cnt;
if (in[l] == in[r] && r - l + 1 < b) {
for (int i = l; i <= r; ++i) c[++cnt] = a[i];
l = 1, r = 0;
} else {
for (; in[l] == in[l - 1]; ++l) {
c[++cnt] = a[l];
}
for (; in[r] == in[r + 1]; --r) {
c[++cnt] = a[r];
}
v[in[l]].push_back(id);
v[in[r] + 1].push_back(id);
}
sort(c + pre + 1, cnt - pre);
}
int main()
{
ios::sync_with_stdio(false);
cin.tie(0);
cin >> n >> m;
for (rint i = 1; i <= n; ++i) cin >> a[i];
for (rint i = 1; i <= n + 1; ++i) {
in[i] = (i + b - 1) / b;
}
for (rint i = 1; i <= m; ++i) {
rint len;
cin >> len;
id = i;
cnt = tot = 0;
rnl[i] = slen + 1, rnr[i] = slen + len;
for (rint j = rnl[i]; j <= rnr[i]; ++j) {
cin >> nd[j].l >> nd[j].r;
nd[j].pl = nd[j].l, nd[j].pr = nd[j].r;
work(nd[j].pl, nd[j].pr);
if (md[tot] != cnt) {
md[++tot] = cnt;
}
}
slen += len;
if (cnt) {
sum = 0;
solve(1, tot);
ans[i] += sum;
}
}
for (rint i = 1; i <= in[n]; ++i) {
memset(buc + 1, 0, n * sizeof(int));
int up = min(n, i * b);
for (rint j = (i - 1) * b + 1; j <= up; ++j) ++buc[a[j]];
for (rint j = n - 1; j >= 1; --j) buc[j] += buc[j + 1];
for (rint j = 1; j <= (i - 1) * b; ++j) sp[j] = sp[j - 1] + buc[a[j]];
memset(buc + 1, 0, n * sizeof(int));
for (rint j = (i - 1) * b + 1; j <= up; ++j) ++buc[a[j]];
for (rint j = 2; j <= n; ++j) buc[j] += buc[j - 1];
if (i * b <= n) {
sp[i * b] = 0;
for (rint j = i * b + 1; j <= n; ++j) sp[j] = sp[j - 1] + buc[a[j]];
}
for (rint j = 0; j < v[i].size(); ++j) {
vis[v[i][j]] ^= 1;
}
for (rint j = 1; j <= m; ++j) {
if (!vis[j]) continue;
ll ts = 0;
bool ok = 0;
for (rint k = rnl[j]; k <= rnr[j]; ++k) {
rint l = nd[k].l, r = nd[k].r, pl = nd[k].pl, pr = nd[k].pr;
if (pl <= pr && in[pl] <= i && i <= in[pr]) {
ok = 1;
} else {
ts += sp[r] - sp[l - 1];
if (ok && pl <= pr) ts -= sp[pr] - sp[pl - 1];
}
}
ans[j] += ts;
}
}
for (rint i = 1; i <= m; ++i) cout << ans[i] << '\n';
return 0;
}