首先数相同,位置不同的算作不同的方案,每多出一个位置就能多转移一次,所以我们可以写出这样的转移。

\(\displaystyle C[k]=\sum_{i\times j \%m==k}A[i]\times B[j]\)

我们平时写的FFT/NTT都是加号,这里是乘号,想要把乘号变成加号就要取\(log\),又因为是在mod m的意义下,m又是一个质数,因为对于\(m\)的原根\(g\),\(1\le i\le m-1\),\(g^i\)两两互不相同,所以我们可以找到m的原根,将原根作为底数,就能做多项式乘法了。

因为\(n\)比较大,所以要用多项式快速幂,当时我比较菜,还不会ln和exp的\(O(nlogn)\)快速幂,但实测\(O(nlog^2n)\)快速幂可过。

另外需要注意一下给的元素里可能有0,需要忽略。从乘法上看,倘若有了0,结果一定是0。从原根角度看,对0取log无意义。

#include<iostream>
#include<cstdio>
//#define int long long
#define LL long long
using namespace std;
int n, m, x, s, tmp, k, tot;
const int N = 100010, mod = 1004535809, G = 3, Ginv = (mod + 1) / 3;
int r[N], mo[N], to[N];
LL F[N], ans[N];
int read() 
{
	char ch; int x = 0, f = 1;
	while (!isdigit(ch = getchar())) {(ch == '-')&&(f = -f);}
	while (isdigit(ch)) {x = (x << 1) + (x << 3) + (ch ^ 48); ch = getchar();}
	return x * f;
}
LL ksm(LL a, LL b, LL mod) 
{
	LL res = 1;
	for (; b; b >>= 1, a = a * a % mod)
		if (b & 1)res = res * a % mod;
	return  res;
}
void NTT(LL *A, int lim, int opt) 
{
	for (int i = 0; i < lim; ++i)r[i] = (r[i >> 1] >> 1) | ((i & 1) ? (lim >> 1) : 0);
	for (int i = 0; i < lim; ++i)
		if (i < r[i])swap(A[i], A[r[i]]);
	int len;
	LL wn, w, x, y;
	for (int mid = 1; mid < lim; mid <<= 1) 
	{
		len = mid << 1;
		wn = ksm(opt == 1 ? G : Ginv, (mod - 1) / len, mod);
		for (int j = 0; j < lim; j += len) 
		{
			w = 1;
			for (int k = j; k < j + mid; ++k, w = w * wn % mod) 
			{
				x = A[k]; y = A[k + mid] * w % mod;
				A[k] = (x + y) % mod;
				A[k + mid] = (x - y + mod) % mod;
			}
		}
	}
	if (opt == 1)return;
	int ni = ksm(lim, mod - 2, mod);
	for (int i = 0; i < lim; ++i)A[i] = A[i] * ni % mod;
}
void MUL(LL *A, int n, LL *B, int m, LL *C) 
{
	static LL X[N], Y[N];
	int lim = 1;
	while (lim <= (n + m))lim <<= 1;
	for (int i = 0; i <= n; ++i)X[i] = A[i];
	for(int i = n + 1;i <= lim;++ i)X[i]=0;
	for (int i = 0; i <= m; ++i)Y[i] = B[i];
	for(int i = m + 1;i <= lim;++ i)Y[i]=0;
	NTT(X, lim, 1); NTT(Y, lim, 1);
	for (int i = 0; i < lim; ++i)X[i] = X[i] * Y[i] % mod;
	NTT(X, lim, -1);
	for (int i = 0; i < m - 1; ++i)X[i] = (X[i] + X[i + m - 1]) % mod;
	for (int i = 0; i < m - 1; ++i)C[i] = X[i];
}
int GET_ROOT(int m) 
{
	tot = 0;
	int tmp = m - 1;
	for (int i = 2; i * i <= tmp; ++i)
		if (!(tmp % i)) 
		{
			mo[++tot] = i;
			while (!(tmp % i))tmp /= i;
		}
	if (tmp != 1)mo[++tot] = tmp;
	bool flag;
	for (int i = 2; i <= m; ++i) 
	{
		flag = 1;
		for (int j = 1; j <= tot; ++j)
			if (ksm(i, (m - 1) / mo[j], m) == 1) 
			{
				flag = 0;
				break;
			}
		if (flag)return i;
	}
	return -1;
}
void YYCH(LL *A, int b, LL *c) 
{
	c[to[1]] = 1;
	for (; b; b >>= 1, MUL(A, m, A, m, A))
		if (b & 1)MUL(A, m, c, m, c);
}
signed main() 
{
	cin >> n >> m >> x >> s;
	k = GET_ROOT(m);
	tmp = 1;
	for (int i = 0; i < m - 1; ++i, tmp = tmp * k % m)to[tmp] = i;
	for (int i = 1; i <= s; ++i) 
	{
		tmp = read();
		if (tmp)F[to[tmp]]++;
	}
	YYCH(F, n, ans);
	cout << ans[to[x]];
	return 0;
}