FFT好题。

首先我们考虑如何用组合数学来求解。先放一下结论:

\(\displaystyle Ans[i]=\sum_{j=1}^ia_jC_{j+k-2}^{j-1}C_{i-j+k-1}^{i-j}\)

给一个简略的证明:

还是组合数学的老套路,我们考虑每一个位置对答案的贡献,贡献就是 \(a_j \times\)包含\(j\)位置的\(1\)阶子段和个数。对于包含\(j\)位置的\(1\)阶子段和个数,我们从\(k\)往1算比较麻烦,我们考虑从\(1\)\(k\)算。因为在算子段和时我们计算的是它包含的子段,反过来就是我们将\([j,j]\)这个子段向外扩展能扩展出多少子段。

左边是从\(j\)扩展到1,右边是从\(j\)扩展到i。左边的扩展方案数\(\times\)右边的扩展方案数=总的扩展方案数=包含\(j\)位置的\(1\)阶子段和个数。

左边的拓展方案数怎么求呢?我们一共可以扩展k次,每次可以扩展的长度为[0,j-1],最终扩展的总长度是j-1。用生成函数来表示的话就是\(\displaystyle (\sum_{i=0}^{j-1}x^i)^k\)\(x^{j-1}\)的系数。

结论:这个式子的n次系数是 \(C_{n+k-1}^{k-1}\)(C是组合数)。

证明:回想一下\(\displaystyle \sum_{i=0}^{j-1}x^i\)的每一次相乘的含义,可知\(\displaystyle (\sum_{i=0}^{j-1}x^i)^k\)中n次系数的含义就是经过k次组成n的方案数,我们可以将n看成是n个小球,k看成是k个盒子,因为组成n的每个 “1”是一样的,每个多项式是不一样的,所以球相同,盒子不同,方案数就是\(C_{n+k-1}^{k-1}\)

我才不会告诉你上面这两段是我粘的我之前的博客

所以左边的扩展的方案数就是\(C_{j+k-2}^{j-1}\),同理右边的就是\(C_{i-j+k-1}^{i-j}\)

\(A_j=a_jC_{j+k-2}^{j-1}\),\(B_j=C_{j+k-1}^{j}\),我们带进去就知道

\(\displaystyle Ans[i]=\sum_{j=i}^iA_jB_{i-j}\).

\(Ans=A*B\)

NTT一下就可以了。

等等,k这么大,怎么求C?

取模后的k依然很大,我们将 \(C_n^m\)拆开\(\displaystyle C_n^m=\frac{n!}{m!(n-m)!}=\frac{\prod_{i=n-m+1}^{n}i}{\prod_{i=1}^{m}i}\), 又因为\(C_n^0=1\)然后我们就能递推组合数啦,具体请参考代码。

#include<iostream>
#include<cstdio>
#define LL long long
#define DB double
using namespace std;
int n, k;
const int N = 400010, mod = 998244353, G = 3, Ginv = (mod + 1) / 3;
int r[N];
LL a[N], A[N], B[N], inv[N];
inline int read() 
{
	int res = 0; char ch = getchar(); bool XX = false;
	for (; !isdigit(ch); ch = getchar())(ch == '-') && (XX = true);
	for (; isdigit(ch); ch = getchar())res = (res << 3) + (res << 1) + (ch ^ 48);
	return XX ? -res : res;
}
inline LL ksm(LL a, LL b, LL mod) 
{
	LL res = 1;
	for (; b; b >>= 1, a = a * a % mod)
		if (b & 1)res = res * a % mod;
	return res;
}
inline LL mo(LL x) {return x >= mod ? x - mod : x;}
inline void NTT(LL *A, int lim, int opt) 
{
	for (int i = 0; i < lim; ++i)
		if (i < r[i])swap(A[i], A[r[i]]);
	int len, mid, j, k;
	LL wn, w, x, y;
	for (mid = 1; mid < lim; mid <<= 1) 
	{
		len = mid << 1;
		wn = ksm(opt == 1 ? G : Ginv, (mod - 1) / len, mod);
		for (j = 0; j < lim; j += len) 
		{
			w = 1;
			for (k = j; k < j + mid; ++k, w = w * wn % mod) 
			{
				x = A[k]; y = A[k + mid] * w % mod;
				A[k] = mo(x + y);
				A[k + mid] = mo(x - y + mod);
			}
		}
	}
	if (opt == 1)return;
	int ni = ksm(lim, mod - 2, mod);
	for (int i = 0; i < lim; ++i)A[i] = A[i] * ni % mod;
}
inline void MUL(LL *A, int n, LL *B, int m) 
{
	int lim = 1, i;
	while (lim < (n + m))lim <<= 1;
	for (i = 1; i < lim; ++i)
		r[i] = (r[i >> 1] >> 1) | ((i & 1) ? (lim >> 1) : 0);
	NTT(A, lim, 1); NTT(B, lim, 1);
	for (i = 0; i < lim; ++i)A[i] = A[i] * B[i] % mod;
	NTT(A, lim, -1);
}
void Write(int x, int opt) 
{
	if (opt && !x)putchar('0');
	if (!x)return;
	Write(x / 10, 0);
	putchar((x - x / 10 * 10) + '0');
}
signed main() 
{
	int i;
	cin >> n >> k;
	for (i = 1; i <= n; ++i)a[i] = read();
	inv[1] = 1;
	A[1] = 1; B[0] = 1; B[1] = k;
	for (i = 2; i <= n; ++i) 
	{
		inv[i] = mod - mod / i * inv[mod % i] % mod;
		A[i] = A[i - 1] * (i + k - 2) % mod * inv[i - 1] % mod;
		B[i] = B[i - 1] * (i + k - 1) % mod * inv[i] % mod;
	}
	for (i = 1; i <= n; ++i)A[i] = A[i] * a[i] % mod;
	MUL(A, n, B, n);
	for (i = 1; i <= n; ++i)Write(A[i], 1), putchar(' ');
	return 0;
}