在很多场合下能够代替 SAM 的算法:对 height 数组建立笛卡尔树
szh_AK_all · · 算法·理论
其实这是一篇学习笔记。
阅读本文需要一定的后缀数组及单调栈基础。
有关后缀数组的常用知识点
一般的,设
而有关子串的问题也常常和后缀结合起来,因为字符串的子串可以看成是该字符串的某个后缀的一段前缀,最经典的问题就是求一个字符串的本质不同子串个数。考虑每两个字典序排名相邻的后缀本质相同的前缀个数其实就是
关于 height 数组——引出笛卡尔树
在很多情况下,我们需要考虑每对
这个问题是不是很可以用笛卡尔树来解决啊!考虑对 height 建立笛卡尔树,那么在建树之后,对于一个节点
显然
在笛卡尔树上,每个节点
所以对于一个节点
下面举个例子:
模版代码:
#include <bits/stdc++.h>
using namespace std;
#define int long long
const int N = 1000005;
int a[N], b[N], sa[N], h[N], rk[N], t[N], q[N];
string s;
int n, m;
void getsa() {
memset(t, 0, sizeof(t));
memset(sa, 0, sizeof(sa));
for (int i = 1; i <= n; i++) {
a[i] = s[i];
++t[a[i]];
}
for (int i = 2; i <= 128; i++)
t[i] += t[i - 1];
for (int i = n; i >= 1; i--)
sa[t[a[i]]--] = i;
int now = 128;
for (int k = 1; k <= n; k *= 2) {
int cnt = 0;
for (int i = n - k + 1; i <= n; i++)
b[++cnt] = i;
for (int i = 1; i <= n; i++)
if (sa[i] > k)
b[++cnt] = sa[i] - k;
memset(t, 0, sizeof(t));
for (int i = 1; i <= n; i++)
t[a[i]]++;
for (int i = 2; i <= now; i++)
t[i] += t[i - 1];
for (int i = n; i >= 1; i--)
sa[t[a[b[i]]]--] = b[i], b[i] = 0;
swap(a, b);
int tot = 1;
a[sa[1]] = 1;
for (int i = 2; i <= n; i++) {
if (b[sa[i]] == b[sa[i - 1]] && b[sa[i] + k] == b[sa[i - 1] + k])
a[sa[i]] = tot;
else
a[sa[i]] = ++tot;
}
if (tot == n)
break;
now = tot;
}
}
void gethi() {
memset(rk, 0, sizeof(rk));
memset(h, 0, sizeof(h));
for (int i = 1; i <= n; i++)
rk[sa[i]] = i;
int now = 0;
for (int i = 1; i <= n; i++) {
if (rk[i] == 1)
continue;
if (now)
now--;
int j = sa[rk[i] - 1];
while (i + now <= n && j + now <= n && s[i + now] == s[j + now])
now++;
h[rk[i]] = now;
}
for (int i = 1; i <= n; i++)
h[i] = h[i + 1];
}
struct node {
int l, r, hi;
int ql, qr;
} p[N];
void bu(int d, int l, int r) {
if (!d)
return;
p[d].ql = l, p[d].qr = r;
bu(p[d].l, l, d);
bu(p[d].r, d + 1, r);
}
int qq[N];
void dikaer() {
int top = 0;
h[n] = -1;
for (int i = 1; i <= n; i++) {
int now = 0;
while (top && h[i] <= h[qq[top]]) {
p[qq[top]].r = now;
now = qq[top];
top--;
}
p[i].l = now;
qq[++top] = i;
}
}
int ans;
void dfs(int x) {
if (p[x].l)
dfs(p[x].l);
if (p[x].r)
dfs(p[x].r);
//做一些事情
}
signed main() {
cin >> s;
n = s.size();
s = " " + s;
getsa();
gethi();
dikaer();
bu(p[n].l, 1, n);
dfs(p[n].l);
}
例题
P3804 【模板】后缀自动机(SAM)
首先对 height 数组建出笛卡尔树。
注意到每个子串可以表示成原串的某个后缀的一段前缀,观察题目要求,出现次数不为
对于一个节点
Code
#include <bits/stdc++.h>
using namespace std;
#define int long long
const int N = 1000005;
int a[N], b[N], sa[N], h[N], rk[N], t[N], q[N];
string s;
int n, m;
void getsa() {
memset(t, 0, sizeof(t));
memset(sa, 0, sizeof(sa));
for (int i = 1; i <= n; i++) {
a[i] = s[i];
++t[a[i]];
}
for (int i = 2; i <= 128; i++)
t[i] += t[i - 1];
for (int i = n; i >= 1; i--)
sa[t[a[i]]--] = i;
int now = 128;
for (int k = 1; k <= n; k *= 2) {
int cnt = 0;
for (int i = n - k + 1; i <= n; i++)
b[++cnt] = i;
for (int i = 1; i <= n; i++)
if (sa[i] > k)
b[++cnt] = sa[i] - k;
memset(t, 0, sizeof(t));
for (int i = 1; i <= n; i++)
t[a[i]]++;
for (int i = 2; i <= now; i++)
t[i] += t[i - 1];
for (int i = n; i >= 1; i--)
sa[t[a[b[i]]]--] = b[i], b[i] = 0;
swap(a, b);
int tot = 1;
a[sa[1]] = 1;
for (int i = 2; i <= n; i++) {
if (b[sa[i]] == b[sa[i - 1]] && b[sa[i] + k] == b[sa[i - 1] + k])
a[sa[i]] = tot;
else
a[sa[i]] = ++tot;
}
if (tot == n)
break;
now = tot;
}
}
void gethi() {
memset(rk, 0, sizeof(rk));
memset(h, 0, sizeof(h));
for (int i = 1; i <= n; i++)
rk[sa[i]] = i;
int now = 0;
for (int i = 1; i <= n; i++) {
if (rk[i] == 1)
continue;
if (now)
now--;
int j = sa[rk[i] - 1];
while (i + now <= n && j + now <= n && s[i + now] == s[j + now])
now++;
h[rk[i]] = now;
}
for (int i = 1; i <= n; i++)
h[i] = h[i + 1];
}
struct node {
int l, r, hi;
int ql, qr;
} p[N];
void bu(int d, int l, int r) {
if (!d)
return;
p[d].ql = l, p[d].qr = r;
bu(p[d].l, l, d);
bu(p[d].r, d + 1, r);
}
int qq[N];
void dikaer() {
int top = 0;
h[n] = -1;
for (int i = 1; i <= n; i++) {
int now = 0;
while (top && h[i] <= h[qq[top]]) {
p[qq[top]].r = now;
now = qq[top];
top--;
}
p[i].l = now;
qq[++top] = i;
}
}
int ans;
void dfs(int x) {
if (p[x].l)
dfs(p[x].l);
if (p[x].r)
dfs(p[x].r);
ans = max(ans, h[x] * (p[x].qr - p[x].ql + 1));
}
signed main() {
cin >> s;
n = s.size();
s = " " + s;
getsa();
gethi();
dikaer();
bu(p[n].l, 1, n);
dfs(p[n].l);
cout << ans;
}
SP10079 STAMMER - Stammering Aliens
还是先建出笛卡尔树,题目要求出现次数不低于
当然我们还需要求每个节点所代表的左右子树的 LCP 最后出现的位置,也就是该节点及其子树所涵盖的起始位置最大的后缀,这个可以用
#include <bits/stdc++.h>
using namespace std;
#define int long long
const int N = 40005;
int a[N], b[N], sa[N], h[N], rk[N], t[N], q[N];
string s;
int n, m;
void getsa() {
memset(t, 0, sizeof(t));
memset(sa, 0, sizeof(sa));
for (int i = 1; i <= n; i++) {
a[i] = s[i];
++t[a[i]];
}
for (int i = 2; i <= 128; i++)
t[i] += t[i - 1];
for (int i = n; i >= 1; i--)
sa[t[a[i]]--] = i;
int now = 128;
for (int k = 1; k <= n; k *= 2) {
int cnt = 0;
for (int i = n - k + 1; i <= n; i++)
b[++cnt] = i;
for (int i = 1; i <= n; i++)
if (sa[i] > k)
b[++cnt] = sa[i] - k;
memset(t, 0, sizeof(t));
for (int i = 1; i <= n; i++)
t[a[i]]++;
for (int i = 2; i <= now; i++)
t[i] += t[i - 1];
for (int i = n; i >= 1; i--)
sa[t[a[b[i]]]--] = b[i], b[i] = 0;
swap(a, b);
int tot = 1;
a[sa[1]] = 1;
for (int i = 2; i <= n; i++) {
if (b[sa[i]] == b[sa[i - 1]] && b[sa[i] + k] == b[sa[i - 1] + k])
a[sa[i]] = tot;
else
a[sa[i]] = ++tot;
}
if (tot == n)
break;
now = tot;
}
}
void gethi() {
memset(rk, 0, sizeof(rk));
memset(h, 0, sizeof(h));
for (int i = 1; i <= n; i++)
rk[sa[i]] = i;
int now = 0;
for (int i = 1; i <= n; i++) {
if (rk[i] == 1)
continue;
if (now)
now--;
int j = sa[rk[i] - 1];
while (i + now <= n && j + now <= n && s[i + now] == s[j + now])
now++;
h[rk[i]] = now;
}
for (int i = 1; i <= n; i++)
h[i] = h[i + 1];
}
struct node {
int l, r, hi;
int ql, qr;
} p[N];
void bu(int d, int l, int r) {
if (!d)
return;
p[d].ql = l, p[d].qr = r;
bu(p[d].l, l, d);
bu(p[d].r, d + 1, r);
}
int qq[N];
void dikaer() {
int top = 0;
h[n] = -1;
for (int i = 1; i <= n; i++) {
int now = 0;
while (top && h[i] <= h[qq[top]]) {
p[qq[top]].r = now;
now = qq[top];
top--;
}
p[i].l = now;
qq[++top] = i;
}
}
int ans, wei = -1, maxn[N];
void dfs(int x) {
maxn[x] = max(sa[x], sa[x + 1]);
if (p[x].l)
dfs(p[x].l);
if (p[x].r)
dfs(p[x].r);
maxn[x] = max(maxn[x], max(maxn[p[x].l], maxn[p[x].r]));
if (p[x].qr - p[x].ql + 1 >= m && h[x] != 0) {
if (h[x] > ans || (h[x] == ans && maxn[x] > wei)) {
ans = h[x];
wei = maxn[x];
}
}
}
signed main() {
while (1) {
cin >> m;
if (!m)
return 0;
cin >> s;
n = s.size();
s = " " + s;
if (m == 1) {
cout << n << " " << 0 << "\n";
continue;
}
getsa();
gethi();
dikaer();
bu(p[n].l, 1, n);
dfs(p[n].l);
if (wei == -1)
cout << "none\n";
else
cout << ans << " " << wei - 1 << "\n";
ans = 0;
wei = -1;
memset(p, 0, sizeof(p));
memset(maxn, 0, sizeof(maxn));
memset(qq, 0, sizeof(qq));
}
}
P5115 Check,Check,Check one two!
记
那么可以考虑枚举每个
我们仍然可以对 height 数组建出笛卡尔树,对于当前点
考虑计算