[HNOI2006]最短母串问题——AC自动机+状压+bfs环形处理

Description

给定n个字符串（S1,S2,„,Sn），要求找到一个最短的字符串T，使得这n个字符串（S1,S2,„,Sn）都是T的子串。

32MB

Input

第一行是一个正整数n（n<=12），表示给定的字符串的个数。

以下的n行，每行有一个全由大写字母组成的字符串。每个字符串的长度不超过50.

Output

只有一行，为找到的最短的字符串T。在保证最短的前提下，

如果有多个字符串都满足要求，那么必须输出按字典序排列的第一个。

Sample Input

2
ABCD
BCDABC

Sample Output

ABCDABC

Solution

一看是一个AC自动机。

一看是一个状压。

一看AC自动机节点再记录一个has包含的字符串集合。

一看要输出方案，肯定也要先考虑怎么弄出最短的长度。

f[i][(1<<n)-1]表示，匹配到AC自动机上的i点，包含的字符串集合为。。。的最短长度。

一看转移有环，然后无法再加入新的阶段，因为会MLE会TLE

所以要环形处理。

一看是一个取min的do

所以可以考虑最短路。

dij，spfa复杂度卡不过。

一看边权只有1……

BFS大法吼！

长度OK

怎么处理方案？

ywy_c_asm:

一遍bfs求出最短距离len，然后再一遍dfs找方案。

dfs时，相当于再把bfs的最短路怎么来的再访问一遍。如果dis[y]=dis[x]+1那么可以转移的，才可以访问。

还需要知道一个点到终点的最短路。

（反向多起点BFS???不行，或运算不可逆）

我们dfs时就可以实现的。类似树形dp

然后如果一个点到一个(1<<n)-1状态的点距离为juli的话，如果有dis[x]+juli[x]==len，那么，这次选择的这个y，所填的字符，就是最终答案的一个字符。

直接加入答案字符串。

char从A~Z枚举。保证第一次搜到的是字典序最小的。

而且一定是连续加入ans字符串。

dfs开头放上，如果tot==n return；

代码：

#include<bits/stdc++.h>

using namespace std;

typedef long long ll;

const int N=;

const int M=;

const int U=**((<<)-)+;

const int inf=0x3f3f3f3f;

int n;

char s[];

struct trie{

    int fail[M],ch[M][];

    int has[M];

    int cnt;

    void ins(char *s,int l,int id){

        int now=;

        for(int i=;i<=l;i++){

            int x=s[i]-'A';

            if(!ch[now][x]) ch[now][x]=++cnt;

            now=ch[now][x];

        }

        has[now]|=(<<(id-));

    }

    void build(){

        queue<int>q;

        for(int i=;i<=;i++){

            if(ch[][i]) fail[ch[][i]]=,q.push(ch[][i]);

        }

        while(!q.empty()){

            int x=q.front();q.pop();

            has[x]|=has[fail[x]];

            for(int i=;i<=;i++){

                if(ch[x][i]){

                    fail[ch[x][i]]=ch[fail[x]][i];

                    q.push(ch[x][i]);

                }

                else ch[x][i]=ch[fail[x]][i];

            }

        }

    }

}ac;

int get(int ptr,int st){

    return ptr*(<<n)+st;

}

int dis[U];

bool vis[U];

struct node{

    int P,S;

};

queue<node>q;

void bfs(){

    memset(dis,0x3f,sizeof dis);

    int str=get(,);

    dis[str]=;

    vis[str]=;

    node nn;nn.P=,nn.S=;

    q.push(nn);

    while(!q.empty()){

        node lp=q.front();q.pop();

        for(int i=;i<=;i++){

            int to=ac.ch[lp.P][i];

            int NS=lp.S|ac.has[ac.ch[lp.P][i]];

            int NID=get(to,NS);

            if(!vis[NID]){

                dis[NID]=dis[get(lp.P,lp.S)]+;

                vis[NID]=;

                node kk;

                kk.P=to;kk.S=NS;

                q.push(kk);

            }

        }

    }

}

int len;

int tot;

char ans[M];

int juli[U];

void dfs(int ptr,int st){

    int now=get(ptr,st);

    juli[now]=inf;

    if(tot==len) return;

    if(st==(<<n)-) {

        juli[now]=;return;

    }

    for(int i=;i<=;i++){

        int to=ac.ch[ptr][i];

        int NS=st|ac.has[to];

        int NID=get(to,NS);

        if(dis[NID]==dis[now]+){

            if(!vis[NID]){

                vis[NID]=;

                dfs(to,NS);

            }

            juli[now]=min(juli[now],juli[NID]+);

            if(dis[now]+juli[now]==len){

                ans[++tot]='A'+i;return;

            }

        }

    }

}

int main(){

    scanf("%d",&n);

    for(int i=;i<=n;i++){

        scanf("%s",s+);

        int l=strlen(s+);

        ac.ins(s,l,i);

    }

    ac.build();

    bfs();

    len=inf;

    //for(int j=0;j<=(1<<n)-1;j++)

    //for(int i=0;i<=ac.cnt;i++){

    //    cout<<i<<" "<<j<<" : "<<dis[get(i,j)]<<endl;

    //}

    for(int i=;i<=ac.cnt;i++){

        int id=get(i,(<<n)-);

        len=min(len,dis[id]);

    }

    //cout<<" len "<<len<<endl;

    memset(vis,,sizeof vis);

    memset(juli,0x3f,sizeof juli);

    vis[get(,)]=;

    dfs(,);

    //int haha=dfs(0,0);

    for(int i=tot;i>=;i--){

        printf("%c",ans[i]);

    }

    return ;

}

但是不够优美。

为什么要bfs然后再dfs呢？

bfs也可以求前驱啊！！
bfs时，第一更新到的就是最短路。

如果我们char A~Z，那么更新到的char

也就叫from[y]，也就是到y这个点所形成的字典序最小字符串最后一个字符。

记录from，pre（也就是前驱）

bfs后，先找到len

再把所有f[i][(1<<n)-1]的字符找出来，cmp一下。

反正复杂度不超过600*600

代码：

#include<bits/stdc++.h>

using namespace std;

typedef long long ll;

const int N=;

const int M=;

const int U=**((<<)-)+;

const int inf=0x3f3f3f3f;

int n;

char s[];

struct trie{

    int fail[M],ch[M][];

    int has[M];

    int cnt;

    void ins(char *s,int l,int id){

        int now=;

        for(int i=;i<=l;i++){

            int x=s[i]-'A';

            if(!ch[now][x]) ch[now][x]=++cnt;

            now=ch[now][x];

        }

        has[now]|=(<<(id-));

    }

    void build(){

        queue<int>q;

        for(int i=;i<=;i++){

            if(ch[][i]) fail[ch[][i]]=,q.push(ch[][i]);

        }

        while(!q.empty()){

            int x=q.front();q.pop();

            has[x]|=has[fail[x]];

            for(int i=;i<=;i++){

                if(ch[x][i]){

                    fail[ch[x][i]]=ch[fail[x]][i];

                    q.push(ch[x][i]);

                }

                else ch[x][i]=ch[fail[x]][i];

            }

        }

    }

}ac;

int get(int ptr,int st){

    return ptr*(<<n)+st;

}

int dis[U];

bool vis[U];

struct node{

    int P,S;

};

queue<node>q;

int pre[U];

int from[U];

void bfs(){

    memset(dis,0x3f,sizeof dis);

    int str=get(,);

    dis[str]=;

    vis[str]=;

    pre[str]=-;//warning!!

    node nn;nn.P=,nn.S=;

    q.push(nn);

    while(!q.empty()){

        node lp=q.front();q.pop();

        for(int i=;i<=;i++){

            int to=ac.ch[lp.P][i];

            int NS=lp.S|ac.has[ac.ch[lp.P][i]];

            int NID=get(to,NS);

            if(!vis[NID]){

                dis[NID]=dis[get(lp.P,lp.S)]+;

                vis[NID]=;

                from[NID]=i+;//warning!!!!

                pre[NID]=get(lp.P,lp.S);

                node kk;

                kk.P=to;kk.S=NS;

                q.push(kk);

            }

        }

    }

}

int len;

int tot;

char ans[M];

char a[M];

bool fl;

bool cmp(char *a,char *b){//a better than b?

    for(int i=;i<=len;i++){

        if(a[i]<b[i]) return ;

        if(a[i]>b[i]) return ;

    }

}

int main(){

    scanf("%d",&n);

    for(int i=;i<=n;i++){

        scanf("%s",s+);

        int l=strlen(s+);

        ac.ins(s,l,i);

    }

    ac.build();

    bfs();

    len=inf;

    for(int i=;i<=ac.cnt;i++){

        int id=get(i,(<<n)-);

        len=min(len,dis[id]);

    }

    fl=false;

    //cout<<" len "<<len<<endl;

    for(int i=;i<=ac.cnt;i++){

        int id=get(i,(<<n)-);

        if(dis[id]==len){

            int tmp=len;

            int z=id;

            while(pre[z]!=-){

                //cout<<z<<endl;

                a[tmp]='A'+(from[z]-);

                z=pre[z];tmp--;

            }

            if(!fl){

                fl=true;

                memcpy(ans,a,sizeof a);

            }

            else{

                if(cmp(a,ans)) memcpy(ans,a,sizeof a);

            }

        }

    }

    printf("%s",ans+);

    return ;

}

但是还不够优美！！

为什么bfs之后还要再比较一遍字符串呢？？

bfs中，第一次到达一个(1<<n)-1的点,

这个点就一定是最优解的最后一个节点！！！

因为，bfs分层图保证了最短。

for char A~Z保证了字典序最优。

直接输出即可。

代码：

#include<bits/stdc++.h>

using namespace std;

typedef long long ll;

const int N=;

const int M=;

const int U=**((<<)-)+;

const int inf=0x3f3f3f3f;

int n;

char s[];

struct trie{

    int fail[M],ch[M][];

    int has[M];

    int cnt;

    void ins(char *s,int l,int id){

        int now=;

        for(int i=;i<=l;i++){

            int x=s[i]-'A';

            if(!ch[now][x]) ch[now][x]=++cnt;

            now=ch[now][x];

        }

        has[now]|=(<<(id-));

    }

    void build(){

        queue<int>q;

        for(int i=;i<=;i++){

            if(ch[][i]) fail[ch[][i]]=,q.push(ch[][i]);

        }

        while(!q.empty()){

            int x=q.front();q.pop();

            has[x]|=has[fail[x]];

            for(int i=;i<=;i++){

                if(ch[x][i]){

                    fail[ch[x][i]]=ch[fail[x]][i];

                    q.push(ch[x][i]);

                }

                else ch[x][i]=ch[fail[x]][i];

            }

        }

    }

}ac;

int get(int ptr,int st){

    return ptr*(<<n)+st;

}

int dis[U];

bool vis[U];

struct node{

    int P,S;

};

queue<node>q;

int pre[U];

int from[U];

int len;

char ans[M];

void bfs(){

    memset(dis,0x3f,sizeof dis);

    int str=get(,);

    dis[str]=;

    vis[str]=;

    pre[str]=-;//warning!!

    node nn;nn.P=,nn.S=;

    q.push(nn);

    while(!q.empty()){

        node lp=q.front();q.pop();

        for(int i=;i<=;i++){

            int to=ac.ch[lp.P][i];

            int NS=lp.S|ac.has[ac.ch[lp.P][i]];

            int NID=get(to,NS);

            if(!vis[NID]){

                dis[NID]=dis[get(lp.P,lp.S)]+;

                vis[NID]=;

                from[NID]=i+;//warning!!!!

                pre[NID]=get(lp.P,lp.S);

                node kk;

                kk.P=to;kk.S=NS;

                q.push(kk);

                if(NS==(<<n)-){

                    int z=NID;

                    while(pre[z]!=-){

                        ans[++len]='A'+(from[z]-);

                        z=pre[z];

                    }

                    return;

                }

            }

        }

    }

}

int main(){

    scanf("%d",&n);

    for(int i=;i<=n;i++){

        scanf("%s",s+);

        int l=strlen(s+);

        ac.ins(s,l,i);

    }

    ac.build();

    bfs();

    for(int i=len;i>=;i--) printf("%c",ans[i]);

    return ;

}

总结：

有的时候我们只关心最优答案。

但有的时候我们也关心方案。（毕竟知道方案比较实用）

方案的输出就要求高了一些。

但是肯定也是在最优答案的基础上的。

关于路径转移，凑字典序最小，经常通过松弛最优解的顺序，恰好可以保证松弛路径就是最小字典序。

本题就是一个很好的例子。