题目

As we all know, Harry Porter learns magic at Hogwarts School. However, learning magical knowledge alone is insufficient to become a great magician. Sometimes, Harry also has to gain knowledge from other certain subjects, such as language, mathematics, English, and even algorithm.
Today, Harry is on his biological class, his teacher is doing experiment with DNA right now. But the clever teacher faces a difficult problem. He has lots of genes. Every time he picks gene a and gene b. If he want to connect gene a and gene b, he should calculate the length of longest part that the gene a’s suffix and gene b’s prefix can overlap together. For example gene a is "AAT" and gene b is "ATT", then the longest common part is "AT", so the answer is 2. And can you solve this difficult problem for him?

大意是给你一堆字符串,每次询问两个字符串的前缀和后缀的最大匹配。

思路

显然要用AC自动机做。

我们能用的有两棵树:Trie树和Fail树,一个代表前缀,一个代表后缀,很自然的往两棵树上去考虑。

对于前缀而言,在将字符串插入Trie树的时候,我们同时在它经过的每个节点上加入当前字符串的编号和它到这里时的长度。

离线询问。

然后在fail树上dfs,经过的点就将它上面的信息加入set中(每个字符串建一个set,表示当前字符串前面出现的前缀的值)。然后因为fail树记录的是后缀

,所以从根到某个节点路径上的每一个节点,都是当前点的后缀,这样前缀和后缀就建立匹配了。

因为i,j打错调了一天

代码

#include<bits/stdc++.h>
#define M 100005
#define clr(x,y) memset(x,y,sizeof(x))
using namespace std;
int n,m,mp[105];
int val[M],ttt[3],h[3][M],ans[M];
set<int>SS[M]; 
struct edge{
    int nxt,to,ex;  
}G[3][M<<1];
void Add(int a,int b,int c,int op){
    G[op][++ttt[op]]=(edge){h[op][a],b,c};
    h[op][a]=ttt[op];   
}
struct AC_automaton{
    int tt,tot;
    int pa[M][5],f[M];
    void clear(){
        clr(pa,0);
        clr(f,0);
        tt=tot=0;
    }
    void Insert(char *S,int d){
        int u=0,l=strlen(S);
        for(int i=0;i<l;i++){
            if(!pa[u][S[i]])pa[u][S[i]]=++tt;
            u=pa[u][S[i]];
            Add(u,d,i+1,1);
        }
        val[d]=u;
    }
    void get_fail(){
        queue<int>Q;
        for(int i=1;i<=4;i++){
            if(pa[0][i]!=0){
                f[pa[0][i]]=0;
                Q.push(pa[0][i]);   
            }
        }
        while(!Q.empty()){
            int u=Q.front();Q.pop();
            for(int i=1;i<=4;i++){
                if(pa[u][i]!=0){
                    f[pa[u][i]]=pa[f[u]][i];
                    Q.push(pa[u][i]);
                }
                else pa[u][i]=pa[f[u]][i];
            }
        }
        for(int i=1;i<=tt;i++)Add(f[i],i,-1,0);//fail树 
    }
}Tr;
char S[M];
void dfs(int x){
    for(int i=h[1][x];i;i=G[1][i].nxt)SS[G[1][i].to].insert(G[1][i].ex);
    for(int i=h[2][x];i;i=G[2][i].nxt){
        if(SS[G[2][i].to].empty())ans[G[2][i].ex]=0;
        else ans[G[2][i].ex]=*(--SS[G[2][i].to].end());
    }
    for(int i=h[0][x];i;i=G[0][i].nxt)dfs(G[0][i].to);
    for(int i=h[1][x];i;i=G[1][i].nxt)SS[G[1][i].to].erase(G[1][i].ex);
}
int main(){
    mp['A']=1;mp['T']=2;mp['C']=3;mp['G']=4;
    while(~scanf("%d%d",&n,&m)){
        Tr.clear();clr(h,0);clr(ttt,0);clr(val,0);
        for(int i=1;i<=n;i++){
            SS[i].clear();
            scanf("%s",S);
            int l=strlen(S);
            for(int j=0;j<l;j++)S[j]=mp[S[j]];
            Tr.Insert(S,i);
        }
        Tr.get_fail();
        for(int i=1,a,b;i<=m;i++){
            scanf("%d%d",&a,&b);
            Add(val[a],b,i,2);
        }
        dfs(0);
        for(int i=1;i<=m;i++)
            printf("%d\n",ans[i]);
    }
    return 0;
}

复杂度分析:

每个串的每个点,都会进出set一次,而串的总长不超过1e5,所以复杂度是\(O(nlogn)\)的。