Ac自动机基础题集合

Ac_automaton的板子打熟以后发现碰到题不会做，而且还是比较纯的板子，只要改几处地方就可以，Ac_automation有许多优秀而fantasy的性质，下面粘几个题，来记录一下做题的心得。

1、Keywords Search

这个题最大的问题是我们很可能漏掉一些解。建Ac_automaton（Trie图），额外维护一个size变量，在单词末尾++。

然后我们拿主串在Ac_automaton上面跑，每次都遍历其fail链（及所有前缀fail），因为fail指向某个串的最长后缀，只要把size累加上，就能把可能漏掉的解全都找回来，遍历完后，size=-1，通过这个操作，我们可以保证，当一个节点被便利后，其所有可能更新答案的fail一定已经被遍历，时间复杂度更加优秀。而且照着代码画一画Trie图，发现只要把主串跑一遍，不需要别的操作，因为在Trie树的最底层，我们把空儿子已经指倒了其fail的儿子，形成一张图，这种“类环”模型可以放心大胆的把主串“扔”到Ac_automaton上“跑”一遍。

#include<iostream>

#include<algorithm>

#include<cmath>

#include<cstdio>

#include<cstring>

#include<vector>

#include<queue>

#include<map>

#include<set>

using namespace std;

int T,n;

char s[];

struct Ac_automation{

    int tot;

    struct node{

        node *son[];

        int size;

        node *fail;

        node(){

            memset(this,,sizeof(node));

        }

    };

    node *root;

    void init(){

        root=new node();

    }

    void insert(){

        int l=strlen(s+);

        node *now=root;

        for(int i=;i<=l;i++){

            if(!now->son[s[i]-'a']) now->son[s[i]-'a']=new node();

            now=now->son[s[i]-'a'];

        }now->size++;

    }

    void build(){

        queue<node*> q;

        for(int i=;i<;i++){

            if(root->son[i]){

                q.push(root->son[i]);

                root->son[i]->fail=root;

            }else root->son[i]=root;

        }

        while(!q.empty()){

            node *x=q.front();

            q.pop();

            for(int i=;i<;i++){

                if(x->son[i]){

                    x->son[i]->fail=x->fail->son[i];

                    q.push(x->son[i]);

                }else x->son[i]=x->fail->son[i];

            }

        }

    }

    int query(){

        node *now=root;

        int l=strlen(s+),ans=;

        for(int i=;i<=l;i++){

            now=now->son[s[i]-'a'];

            for(node *j=now;j!=root&&j->size!=-;j=j->fail){

                ans+=j->size;

                j->size=-;

            }

        }return ans;

    }

}Ac;

int main(){

    scanf("%d",&T);

    while(T--){

        Ac.init();

        scanf("%d",&n);

        for(int i=;i<=n;i++){

            scanf("%s",s+);

            Ac.insert();

        }

        Ac.build();

        scanf("%s",s+);

        printf("%d\n",Ac.query());

    }return ;

}

2、玄武密码

建立Ac_automaton时，维护一下fa关系，将单词末尾存下来。

建完以后，把主串在Ac_automaton上跑一遍，把所有其能到达的节点打上标记，还是一样，遍历fail链，当发现有一个位置已经被标记过，那么其以上的fail（原谅我这不标准的措辞）一定被标记过了，可以节省时间。

最后从单词尾沿其父亲向上走，同时记录没有被标记节点个数cnt，碰到一个标记节点，直接跳出，len-cnt就是被标记，即出现最长的那个啦。

#include<iostream>

#include<algorithm>

#include<cmath>

#include<cstdio>

#include<cstring>

#include<vector>

#include<queue>

#include<stack>

#include<map>

using namespace std;

char s[],s1[][];

int n,m;

int fuction(char x){

    switch(x){

        case 'W': return ;

        case 'S': return ;

        case 'N': return ;

        case 'E': return ;

    }

}

struct Ac_automation{

    struct node{

        node *fail;

        node *son[];

        node *fa;

        int v;

        node(){

            memset(this,,sizeof(node));

        }

    };

    node *root;

    node *tail[];

    void clear(){

        root=new node();

    }

    void insert(int num){

        node *now=root;

        int l=strlen(s1[num]+);

        for(int i=;i<=l;i++){

            int x=fuction(s1[num][i]);

//            cout<<"x="<<x<<" ";

            if(!now->son[x]) now->son[x]=new node();

//            cout<<"PP";

//            cout<<now->son[x]<<endl;

            now->son[x]->fa=now;

        now=now->son[x];

        }tail[num]=now;

    }

    void generate(){

        queue<node*>q;

        for(int i=;i<;i++)

            if(root->son[i]){

                q.push(root->son[i]);

                root->son[i]->fail=root;

            }else root->son[i]=root;

        while(!q.empty()){

            node *now=q.front();

            q.pop();

            for(int i=;i<;i++)

                if(now->son[i]){

                    q.push(now->son[i]);

                    now->son[i]->fail=now->fail->son[i];

                }else now->son[i]=now->fail->son[i];

        }

    }

    void running(){

        node *now=root;

        int l=strlen(s+);

        for(int i=;i<=l;i++){

            int x=fuction(s[i]);

            now=now->son[x];

            for(node *j=now;!j->v&&j!=root;j=j->fail)

                j->v=;

        }

    }

    int query(int x){

        int cnt=;

        for(node *i=tail[x];i!=root;i=i->fa,cnt++)

            if(i->v)break;

        return strlen(s1[x]+)-cnt;

    }

}Acm;

int main(){

    scanf("%d%d",&n,&m);

//    cout<<"m="<<m<<endl;

    scanf("%s",s+);

    Acm.clear();

    for(int i=;i<=m;i++){

        scanf("%s",s1[i]+);

        Acm.insert(i);

//        cout<<"i="<<i<<endl;

    }

    Acm.generate();

//    cout<<"A";

    Acm.running();

//    cout<<"B";

    for(int i=;i<=m;i++)

        printf("%d\n",Acm.query(i));

    return ;

}

3、单词

由于我们建立Ac_automaton时会指出fail指针，fail指针一定指向以前“出现”过的节点，就和随机数据生成一颗树一样，把所有fail指针“倒”过来也会形成一棵树，我们称之为“fail树”。

这道题，我们建立Ac_automation，记录词尾，对每个插入节点权值++。然后统计fail树的子树和，词尾的sum值即是答案。

这样做的理由是，fail指向某个单词的最长后缀，即该单词为fail指向单词的“父串”，所以某个单词x出现在单词y中，则y的fail指针会指向x（这里仅仅是指后缀情况，非后缀情况可以转化为后缀情况），由于fail树把fail指针倒过来，求出子树和，就是该节点为末尾的单词出现的次数，因为我们把每个插入节点的贡献计算在内。

实现过程中，没有必要真的建出fail树，建Ac_automaton时，我们对trie进行bfs，对所有节点进行了拓扑逆序遍历，存下来倒着往上fail树和就好，因为一个节点的fail肯定在他之前入队。

#include<iostream>

#include<algorithm>

#include<cmath>

#include<cstdio>

#include<cstring>

#include<vector>

#include<queue>

#include<stack>

using namespace std;

int n;

char s[][];

struct Ac_automation{

    struct node{

        node *fail;

        node *son[];

        int sum;

        node(){

            memset(this,,sizeof(node));

        }

    };

    node *root;

    node *tail[];

    vector<node*>ans;

    int num;

    void clear(){

        root=new node();

        num=;

    }

    void insert(int x){

        node *now=root;

        int l=strlen(s[x]+);

        for(int i=;i<=l;i++){

            if(!now->son[s[x][i]-'a']) now->son[s[x][i]-'a']=new node();

            now=now->son[s[x][i]-'a'];

            now->sum++;

        }tail[x]=now;

    }

    void generate(){

        queue<node*>q;

        for(int i=;i<;i++)

            if(root->son[i]){

                root->son[i]->fail=root;

                q.push(root->son[i]);

                ans.push_back(root->son[i]);

            }else root->son[i]=root;

        while(!q.empty()){

            node *now=q.front();

            q.pop();

            for(int i=;i<;i++)

                if(now->son[i]){

                    now->son[i]->fail=now->fail->son[i];

                    q.push(now->son[i]);

                    ans.push_back(now->son[i]);

                }else now->son[i]=now->fail->son[i];

        }

    }

    void handle(){

        for(int i=ans.size()-;i>=;i--) ans[i]->fail->sum+=ans[i]->sum;

    }

    int  query(int x){

        return tail[x]->sum;

    }

}Acm;

int main(){

//    freopen("word9.in","r",stdin);

    scanf("%d",&n);

    Acm.clear();

    for(int i=;i<=n;i++){

        scanf("%s",s[i]+);

        Acm.insert(i);

    }

    Acm.generate();

    Acm.handle();

    for(int i=;i<=n;i++)

        printf("%d\n",Acm.query(i));

    return ;

}

4、病毒

就是找一个串，使之在Ac_automaton（Trie图）上不停地跑，可就是跑不到任何一个单词的结尾。这是环的特点。

我们对单词结尾打标记，因为fail指向的单词是该单词的后继，该单词出现，则该单词后继肯定出现，若后继结尾有标记，则该单词尾也应该别标记，否则该单词一旦出现，其有标记的后继也会出现，这是我们不希望看到的。标记完了以后判环，遇到标记节点直接continue因为我们不能到他，做完这些处理，如果还有环，那就TAK了。

判环的话用dfs加栈判断（有点像Tarjan？），当然，不用建栈，打个标记看在不在栈中就完事了。

#include<iostream>

#include<algorithm>

#include<cmath>

#include<cstring>

#include<cstdio>

#include<vector>

#include<queue>

#include<stack>

#include<set>

#include<map>

using namespace std;

int n;

char s[];

bool flag;

struct Ac_automaton{

    struct node{

        node *fail;

        node *son[];

        int v,ins,vist;

        node(){memset(this,,sizeof(node));}

    };

    node *root;

    void clear(){

        root=new node();

    }

    void insert(){

        node *now=root;

        int l=strlen(s+);

//        cout<<"l="<<l<<endl;

        for(int i=;i<=l;++i){

//            cout<<"s[i]="<<s[i]-'0'<<endl;

            if(!now->son[s[i]-'']) now->son[s[i]-'']=new node();

            now=now->son[s[i]-''];

        }now->v=;

    }

    void generate(){

        queue<node*>q;

        for(int i=;i<=;i++)

            if(root->son[i]){

                q.push(root->son[i]);

                root->son[i]->fail=root;

            }else root->son[i]=root;

        while(!q.empty()){

            node *now=q.front();

            q.pop();

            for(int i=;i<=;i++)

                if(now->son[i]){

                    now->son[i]->fail=now->fail->son[i];

                    q.push(now->son[i]);

                    now->son[i]->v|=now->son[i]->fail->v;

                }else now->son[i]=now->fail->son[i];

        }

    }

    bool dfs(node *now){

        now->ins=;

        now->vist=;

        for(int i=;i<=;i++){

            node *nex=now->son[i];

            if(nex->ins) return ;

            if(nex->v) continue;

            if(nex->vist)continue;

            if(dfs(nex)) return ;

        }

        now->ins=;

        return ;

    }

}Acm;

int main(){

    scanf("%d",&n);

    Acm.clear();

    for(int i=;i<=n;i++){

        scanf("%s",s+);

        Acm.insert();

    }

    Acm.generate();

    Acm.dfs(Acm.root)?puts("TAK"):puts("NIE");

    return ;

}

然后就可以去想想Ac_automaton上的dp啦（更加头疼）。

Ac自动机基础题集合的更多相关文章

hdu 2896 病毒侵袭 AC自动机基础题
病毒侵袭 Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others) Total Submi ...
HDU 2222 AC自动机模板题
题目: http://acm.hdu.edu.cn/showproblem.php?pid=2222 AC自动机模板题我现在对AC自动机的理解还一般,就贴一下我参考学习的两篇博客的链接: http: ...
HDU 3065 (AC自动机模板题)
题目链接: http://acm.hdu.edu.cn/showproblem.php?pid=3065 题目大意:多个模式串,范围是大写字母.匹配串的字符范围是(0~127).问匹配串中含有哪几种模 ...
HDU 2896 (AC自动机模板题)
题目链接: http://acm.hdu.edu.cn/showproblem.php?pid=2896 题目大意:多个模式串.多个匹配串.其中串的字符范围是(0~127).问匹配串中含有哪几个模式串 ...
HDU 2222（AC自动机模板题)
题目链接: http://acm.hdu.edu.cn/showproblem.php?pid=2222 题目大意:多个模式串.问匹配串中含有多少个模式串.注意模式串有重复,所以要累计重复结果. 解题 ...
HDU3695(AC自动机模板题)
题意:给你n个字符串,再给你一个大的字符串A,问你着n个字符串在正的A和反的A里出现多少个? 其实就是AC自动机模板题啊( ╯□╰ ) 正着query一次再反着query一次就好了 /* gyt Li ...
hdu2222 KeyWords Search AC自动机入门题
/** 链接:http://acm.hdu.edu.cn/showproblem.php?pid=2222 题意:题意:给定N(N <= 10000)个长度不大于50的模式串,再给定一个长度为L ...
HDu-2896 病毒侵袭,AC自动机模板题！
病毒侵袭模板题,不多说了.. 题意:n个不同的字符串分别代表病毒特征,给出m次查询,每次一个字符串(网址),求这个字符串中有几个病毒特征,分别从大到小输出编号,最后输出所有的带病毒网址个数.格式请看 ...
[Bzoj3940] [AC自动机，USACO 2015 February Gold] Censor [AC自动机模板题]
AC自动机模板题(膜jcvb代码) #include <iostream> #include <algorithm> #include <cstdio> #incl ...

随机推荐

Elasticsearch安装中文分词插件ik
Elasticsearch默认提供的分词器,会把每一个汉字分开,而不是我们想要的依据关键词来分词.比如: curl -XPOST "http://localhost:9200/userinf ...
Android之利用EventBus进行数据传递
在项目中,不可避免的要在两个页面之间进行数据的传递,就算不传递,也需要进行刷新之类的,我们根据Google提供的库类方法,也是可以做的,主要有广播broadcastreceiver,startacti ...
typescript 入门例子 Hello world——ts就是一个宿主机语言
安装 TypeScript TypeScript 的命令行工具安装方法如下: npm install -g typescript 安装完成之后,就有了 tsc 命令.编译一个 TypeScript 文 ...
在 CentOS 7上安装并配置 Python 3.6 环境
前言按照此方法安装保证以下报错什么的统统都没有! 基础环境系统:centos7.4 软件:python3 Retrying (Retry(total=0, connect=None, read=N ...
Web开发必须知道的知识点
Web前端必须知道一.常用那几种浏览器测试.有哪些内核(Layout Engine) 1.浏览器:IE,Chrome,FireFox,Safari,Opera. 2.内核:Trident,Gecko ...
Python/Django 批量下载Excel
一.前提项目上需求的变更总是时时发生的,应对需求的我们,也只能变更我们代码,所以.继前两篇之后,我们的批量下载诞生了二.安装本文使用zipstream库进行压缩,安装方式:pip install ...
微信小程序上传多张图片，及php后台处理
微信小程序上传多张图片,级小程序页面布局直接来代码index.wxml <view class='body' style='width:{{windowWidth}}px;height:{{wi ...
ACM_水题你要信了（修改版）
水题你要信了 Time Limit: 2000/1000ms (Java/Others) Problem Description: 某发最近又认识了很多妹(han)子,可是妹(han)子一多不免有时会 ...
BOM 标记
BOM 是 Byte Order Mark 的简称,即字节序标记.用于标记文本流: 表示文本流的字节顺序,是小端序(little-endian)还是大端序(big-endian); 表示文本流是 Un ...
转：java中static、final、static final的区别
http://blog.csdn.net/qq1623267754/article/details/36190715 final可以修饰:属性,方法,类,局部变量(方法中的变量) final修饰的属性 ...

Ac自动机基础题集合

Ac自动机基础题集合的更多相关文章

随机推荐

热门专题