\begin{aligned}
& \sum\limits_{j = 1} ^ d\sum\limits_{i = 0} ^ n \min(i, r) \sum\limits_{k = i} ^ n (-1) ^ {k - i} \dbinom {k} {i} g(k, j) \\
& \sum\limits_{i = 1} ^ n \min(i, r) \sum\limits_{k = i} ^ n (-1) ^ {k - i} \dbinom k i \sum\limits_{j = 1} ^ d g(k, j) \\
\end{aligned}
对每个 k 预处理 h(k) = \sum\limits_{j = 1} ^ d g(k, j) 即可做到 \mathcal{O}(nm)。
#include <bits/stdc++.h>
using namespace std;
#define TIME 1e3 * clock() / CLOCKS_PER_SEC
constexpr int N = 1.5e7 + 5;
constexpr int mod = 998244353;
void add(int &x, int y) {x += y, x >= mod && (x -= mod);}
inline int ksm(int a, int b) {
int s = 1;
while(b) {
if(b & 1) s = 1ll * s * a % mod;
a = 1ll * a * a % mod, b >>= 1;
}
return s;
}
int fc[N], ifc[N];
int bin(int n, int m) {
if(n < 0 || m > n) return 0;
return 1ll * fc[n] * ifc[m] % mod * ifc[n - m] % mod;
}
int n, d, r, ans, g[N];
int main() {
#ifdef ALEX_WEI
FILE* IN = freopen("1.in", "r", stdin);
FILE* OUT = freopen("1.out", "w", stdout);
#endif
ios::sync_with_stdio(0), cin.tie(0), cout.tie(0);
cin >> n >> d >> r;
for(int i = fc[0] = 1; i < N; i++) fc[i] = 1ll * fc[i - 1] * i % mod;
ifc[N - 1] = ksm(fc[N - 1], mod - 2);
for(int i = N - 2; ~i; i--) ifc[i] = 1ll * ifc[i + 1] * (i + 1) % mod;
for(int k = 1; k <= n; k++)
for(int j = 1; j * k <= d; j++)
add(g[k], 1ll * bin(n, k) * bin(d - k * j + n - 1, n - 1) % mod);
for(int i = 1; i <= n; i++) {
int s = 0;
for(int k = i; k <= n; k++) {
int coef = 1ll * bin(k, i) * g[k] % mod;
if(k - i & 1) coef = mod - coef;
add(s, coef);
}
add(ans, 1ll * min(i, r) * s % mod);
}
cout << (1ll * ans * ksm(bin(n + d - 1, d), mod - 2) + r) % mod << "\n";
return cerr << "Time: " << TIME << " ms\n", 0;
}
$$
\begin{aligned}
& \sum\limits_{i = 1} ^ n \min(i, r) \sum\limits_{k = i} ^ n (-1) ^ {k - i} \dbinom k i h(k) \\
& \sum\limits_{k = 1} ^ n (-1) ^ k h(k) \sum\limits_{i = 1} ^ k (-1) ^ {i} \dbinom k i \min(i, r) \\
\end{aligned}
$$
设 $z(k) = \dbinom {d - k + n - 1} {n - 1}$,则 $h(k) = \sum z(kj)$,直接狄利克雷后缀和即可。问题转化为快速计算
$$
\begin{aligned}
& \sum\limits_{i = 1} ^ k (-1) ^ {i} \dbinom k i \min(i, r) \\
& \sum\limits_{i = 1} ^ k (-1) ^ {i} \dbinom k i \sum\limits_{j = 1} ^ {\min(i, r)} 1 \\
& \sum\limits_{j = 1} ^ r \sum\limits_{i = j} ^ k (-1) ^ i \dbinom k i
\end{aligned}
$$
后者是经典的杨辉三角第 $k$ 行带有交错正负号的部分和,推导方法为上指标反转,斜求和,上指标反转:
$$
\begin{aligned}
& \sum\limits_{j = 1} ^ r \sum\limits_{i = j} ^ k (-1) ^ i \dbinom k i \\
& \sum\limits_{j = 1} ^ r \sum\limits_{i = j} ^ k (-1) ^ {2i} \dbinom {i - k - 1} i \\
& \sum\limits_{j = 1} ^ r \left(\dbinom {(k + 1) - k - 1} k - \dbinom {j - k - 1} {j - 1} \right) \\
\end{aligned}
$$
因为 $k > 0$,所以 $\dbinom 0 k = 0$,得
$$
\begin{aligned}
& \sum\limits_{j = 1} ^ r -\dbinom {j - k - 1} {j - 1} \\
& \sum\limits_{j = 1} ^ r -(-1) ^ {j - 1} \dbinom {j - 1 - (j - k - 1) - 1} {j - 1} \\
& \sum\limits_{j = 1} ^ r (-1) ^ j \dbinom {k - 1} {j - 1} \\
\end{aligned}
$$
继续正负交错部分和,得
$$
\begin{aligned}
& \sum\limits_{j = 1} ^ r (-1) ^ j \dbinom {k - 1} {j - 1} \\
& -\sum\limits_{j = 0} ^ {r - 1} (-1) ^ j \dbinom {k - 1} j \\
& -\sum\limits_{j = 0} ^ {r - 1} (-1) ^ {2j} \dbinom {j - k} j \\
& -\dbinom {r - k} {r - 1} \\
& -(-1) ^ {r - 1} \dbinom {k - 2} {r - 1} \\
& (-1) ^ r \dbinom {k - 2} {r - 1} \\
\end{aligned}
$$
时间复杂度 $\mathcal{O}(n + d\log\log d)$。
直接按 $i\leq r$ 和 $i > r$ 分类讨论,做杨辉三角一行正负交错部分和也可得到 $\mathcal{O}(1)$ 计算的式子。
```cpp
#include <bits/stdc++.h>
using namespace std;
#define TIME 1e3 * clock() / CLOCKS_PER_SEC
constexpr int N = 1.5e7 + 5;
constexpr int mod = 998244353;
void add(int &x, int y) {x += y, x >= mod && (x -= mod);}
inline int ksm(int a, int b) {
int s = 1;
while(b) {
if(b & 1) s = 1ll * s * a % mod;
a = 1ll * a * a % mod, b >>= 1;
}
return s;
}
int fc[N << 1], ifc[N << 1];
int bin(int n, int m) {
if(n < 0 || m > n) return 0;
return 1ll * fc[n] * ifc[m] % mod * ifc[n - m] % mod;
}
int n, d, r, ans, z[N];
int cnt, pr[N / 12];
bool vis[N];
int main() {
#ifdef ALEX_WEI
FILE* IN = freopen("1.in", "r", stdin);
FILE* OUT = freopen("1.out", "w", stdout);
#endif
ios::sync_with_stdio(0), cin.tie(0), cout.tie(0);
cin >> n >> d >> r;
for(int i = fc[0] = 1; i <= n + d; i++) fc[i] = 1ll * fc[i - 1] * i % mod;
ifc[n + d] = ksm(fc[n + d], mod - 2);
for(int i = n + d - 1; ~i; i--) ifc[i] = 1ll * ifc[i + 1] * (i + 1) % mod;
for(int i = 1; i <= d; i++) z[i] = bin(d - i + n - 1, n - 1);
for(int i = 2; i <= d; i++) {
if(!vis[i]) pr[++cnt] = i;
for(int j = 1; j <= cnt && i * pr[j] <= d; j++) {
vis[i * pr[j]] = 1;
if(i % pr[j] == 0) break;
}
}
for(int i = 1; i <= cnt; i++)
for(int j = d / pr[i]; j; j--)
add(z[j], z[j * pr[i]]);
for(int k = 1; k <= n; k++) {
int h = 1ll * z[k] * bin(n, k) % mod, s = 0;
if(k == 1) s = mod - 1;
else s = r & 1 ? mod - bin(k - 2, r - 1) : bin(k - 2, r - 1);
if(k & 1) s = mod - s;
add(ans, 1ll * h * s % mod);
}
cout << (1ll * ans * ksm(bin(n + d - 1, d), mod - 2) + r) % mod << "\n";
return cerr << "Time: " << TIME << " ms\n", 0;
}
```