string string string

    Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others)
    Total Submission(s): 2270    Accepted Submission(s): 711
Problem Description
Uncle Mao is a wonderful ACMER. One day he met an easy problem, but Uncle Mao was so lazy that he left the problem to you. I hope you can give him a solution.
Given a string s, we define a substring that happens exactly  k times as an important string, and you need to find out how many substrings which are important strings.
Input
The first line contains an integer  T ( T100) implying the number of test cases.
For each test case, there are two lines:
the first line contains an integer  k ( k1) which is described above;
the second line contain a string  s ( length(s)105).
It's guaranteed that  length(s)2106.
Output
For each test case, print the number of the important substrings in a line.
Sample Input
2
2
abcabc
3
abcabcabcabc
Sample Output
6
9
Source

题目大意:给出一个字符串以及一个整数k,统计所有子串中恰好出现k次的子串个数。


题解:后缀数组。仅出现k次的子串,他们后缀的rank一定是相邻在一起的。将所后缀排序后,k个一组依次统计。即第一次统计rank 1~k 的,第二次统计rank 2~k+1的……当统计到rank i~i+k-1 时,通过RMQ求得最长公共前缀L,这说明有L个子串重复出现至少k次,但是,这L个子串中长度较短的子串重复可能出现大于k次,必须减去。这时考虑rank i-1~i+k-1的LCP,设为x,说明L个子串中长度为前x个的至少出现k+1次,最后结果必须减去x,同理,考虑rank i~i+k 的LCP,设为y,说明L个子串中长度为的前y个是的至少出现k+1次的,最后结果必须减去y。综合考虑,L需要减去max(x,y).如图:


只有以图中阴影部分为终止点的子串才是仅重复k次的子串,共有L-max(x,y)=L-y个。累计所有统计结果就是答案。

代码:

#include<bits/stdc++.h>
#define N 1000010
#define LL long long
using namespace std;

char s[N];
int sa[N],t[N],t2[N],c[N],n,rak[N],height[N],f[N][22];

void build_sa(int m,char *s)
{
    int i,*x=t,*y=t2;
    for (i=0;i<m;i++)c[i]=0;
    for (i=0;i<n;i++)c[x[i]=s[i]]++;
    for (i=1;i<m;i++)c[i]+=c[i-1];
    for (i=n-1;i>=0;i--)sa[--c[x[i]]]=i;
    for (int k=1;k<=n;k<<=1)
    {
        int p=0;
        for (i=n-k;i<n;i++)y[p++]=i;
        for (i=0;i<n;i++)if (sa[i]>=k)y[p++]=sa[i]-k;
        for (i=0;i<m;i++)c[i]=0;
        for (i=0;i<n;i++)c[x[y[i]]]++;
        for (i=0;i<m;i++)c[i]+=c[i-1];
        for (i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
        swap(x,y);
        p=1; x[sa[0]]=0;
        for (i=1;i<n;i++)
            x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;
        if (p>=n) break;
        m=p;
    }
}

void getheight()
{
    int i,j,k=0;
    for (i=0;i<n;i++)rak[sa[i]]=i;
    for (i=0;i<n;i++)
    {
        if (k)k--;
        if (!rak[i])continue;
        j=sa[rak[i]-1];
        while (s[i+k]==s[j+k])k++;
        height[rak[i]]=k;
    }
}

int LCP(int l,int r)
{
    if (!l || r>=n)return 0;
    if (l==r)return n-sa[r]-1;
    l++;
    if (l==r)return height[r];
    int k=(int) log2(r-l);
    return min(f[l][k],f[r-(1<<k)+1][k]);
}

int main()
{
    int T,k;
    scanf("%d",&T);
    while (T--)
    {
        scanf("%d",&k);
        scanf("%s",s);
        n=strlen(s);s[n]='0';n++;
        build_sa(300,s);
        getheight();

        for (int i=0;i<n;i++) f[i][0]=height[i];
            for (int k=1;(1<<k)<n;k++)
                for (int i=0;i<n;i++) if (i+(1<<k-1)<n)
                    f[i][k]=min(f[i][k-1],f[i+(1<<k-1)][k-1]);
        LL ans=0;
        for (int i=1;i+k-1<n;i++)
        {
            ans+=max(0,LCP(i,i+k-1)-max(LCP(i-1,i+k-1),LCP(i,i+k)));
        }
        printf("%I64d\n",ans);
    }
}