POJ3294 Life Forms —— 后缀数组最长公共子串

题目链接：https://vjudge.net/problem/POJ-3294

Life Forms

Time Limit: 5000MS		Memory Limit: 65536K
Total Submissions: 16905		Accepted: 4970

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3

abcdefg

bcdefgh

cdefghi

3

xxx

yyy

zzz

0

Sample Output

bcdefg

cdefgh

?

Source

Waterloo Local Contest, 2006.9.30

题意：

给出n个字符串，问是否存在至少出现于n/2+1个字符串中的公共子串。如果存在，输入长度最大的；如果有多个答案，按字典序输出所有。

题解：

1.将n个字符串拼接在一起，并且相邻两个之间用分隔符隔开，并且分隔符应各异。因此得到新串。

2.求出新串的后缀数组，然后二分公共子串的长度mid：可知当前的mid可将新串的后缀按排名的顺序将其分成若干组，且每一组的最长公共前缀都大于等于mid，于是就在每一组内统计出现了多少个字符串，如果>n/2，即表明当前mid合法，否则不合法，因此可以根据此规则最终求得长度。

3.由于题目还要求按字典序输出所有答案。所以，在求得长度之后，再遍历一遍sa[]数组，并且判断每个分组是否满足要求，若满足，则输出答案。

注意点：

1.每个分隔符应该不一样，如果一样，在求后缀数组的时候就很可能从当前字符串匹配到下一个字符串，而这是不可能的，因为对于每个字符，最多只能匹配到串尾。

2.输出答案时，为了避免同一组内多次输出（每一组对应着一个子串），应该加个标记。

代码如下：

 #include <iostream>

 #include <cstdio>

 #include <cstring>

 #include <algorithm>

 #include <vector>

 #include <cmath>

 #include <queue>

 #include <stack>

 #include <map>

 #include <string>

 #include <set>

 using namespace std;

 typedef long long LL;

 const int INF = 2e9;

 const LL LNF = 9e18;

 const int MOD = 1e9+;

 const int MAXN = 2e5+;

 int id[MAXN];   //记录属于哪个字符串

 int r[MAXN], sa[MAXN], Rank[MAXN], height[MAXN];

 int t1[MAXN], t2[MAXN], c[MAXN];

 bool cmp(int *r, int a, int b, int l)

 {

     return r[a]==r[b] && r[a+l]==r[b+l];

 }

 void DA(int str[], int sa[], int Rank[], int height[], int n, int m)

 {

     n++;

     int i, j, p, *x = t1, *y = t2;

     for(i = ; i<m; i++) c[i] = ;

     for(i = ; i<n; i++) c[x[i] = str[i]]++;

     for(i = ; i<m; i++) c[i] += c[i-];

     for(i = n-; i>=; i--) sa[--c[x[i]]] = i;

     for(j = ; j<=n; j <<= )

     {

         p = ;

         for(i = n-j; i<n; i++) y[p++] = i;

         for(i = ; i<n; i++) if(sa[i]>=j) y[p++] = sa[i]-j;

         for(i = ; i<m; i++) c[i] = ;

         for(i = ; i<n; i++) c[x[y[i]]]++;

         for(i = ; i<m; i++) c[i] += c[i-];

         for(i = n-; i>=; i--) sa[--c[x[y[i]]]] = y[i];

         swap(x, y);

         p = ; x[sa[]] = ;

         for(i = ; i<n; i++)

             x[sa[i]] = cmp(y, sa[i-], sa[i], j)?p-:p++;

         if(p>=n) break;

         m = p;

     }

     int k = ;

     n--;

     for(i = ; i<=n; i++) Rank[sa[i]] = i;

     for(i = ; i<n; i++)

     {

         if(k) k--;

         j = sa[Rank[i]-];

         while(str[i+k]==str[j+k]) k++;

         height[Rank[i]] = k;

     }

 }

 bool vis[];

 bool test(int n, int len, int k)

 {

     int cnt = ;

     memset(vis, false, sizeof(vis));

     for(int i = ; i<=len; i++)

     {

         if(height[i]<k)

         {

             cnt = ;

             memset(vis, false, sizeof(vis));

         }

         else

         {

             if(!vis[id[sa[i-]]]) vis[id[sa[i-]]] = true, cnt++;

             if(!vis[id[sa[i]]]) vis[id[sa[i]]] = true, cnt++;

             if(cnt>n/) return true;

         }

     }

     return false;

 }

 void Print(int n, int len, int k)

 {

     int cnt = , flag = false;

     memset(vis, false, sizeof(vis));

     for(int i = ; i<=len; i++)

     {

         if(height[i]<k)

         {

             flag = false;

             cnt = ;

             memset(vis, false, sizeof(vis));

         }

         else

         {

             if(!vis[id[sa[i-]]]) vis[id[sa[i-]]] = true, cnt++;

             if(!vis[id[sa[i]]]) vis[id[sa[i]]] = true, cnt++;

             if(cnt>n/ &&!flag)

             {

                 flag = true;    //表明当前组已经输出了

                 for(int j = sa[i]; j<sa[i]+k; j++)

                     putchar(r[j]+'a'-);

                 putchar('\n');

             }

         }

     }

 }

 char str[MAXN];

 int main()

 {

     int n, firCase = false;

     while(scanf("%d", &n)&&n)

     {

         int len = ;

         for(int i = ; i<n; i++)

         {

             scanf("%s", str);

             int LEN = strlen(str);

             for(int j = ; j<LEN; j++)

             {

                 r[len] = str[j]-'a'+;

                 id[len++] = i;

             }

             r[len] = +i;  //分隔符要各异

             id[len++] = i;

         }

         r[len] = ;

         DA(r,sa,Rank,height,len,);

         int l = , r = ;

         while(l<=r)

         {

             int mid = (l+r)>>;

             if(test(n,len,mid))

                 l = mid + ;

             else

                 r = mid - ;

         }

         if(firCase) printf("\n");

         firCase = true;

         if(r==) puts("?");

         else Print(n, len, r);

     }

 }

POJ3294 Life Forms —— 后缀数组最长公共子串的更多相关文章

POJ 2217 (后缀数组+最长公共子串)
题目链接: http://poj.org/problem?id=2217 题目大意: 求两个串的最长公共子串,注意子串是连续的,而子序列可以不连续. 解题思路: 后缀数组解法是这类问题的模板解法. 对 ...
POJ-2774-Long Long Message(后缀数组-最长公共子串)
题意: 给定两个字符串 A 和 B,求最长公共子串. 分析: 字符串的任何一个子串都是这个字符串的某个后缀的前缀. 求 A 和 B 的最长公共子串等价于求 A 的后缀和 B 的后缀的最长公共前缀的最大 ...
[poj 2274]后缀数组+最长公共子串
题目链接:http://poj.org/problem?id=2774 后缀数组真的太强大了,原本dp是O(nm)的复杂度,在这里只需要O(n+m). 做法:将两个串中间夹一个未出现过的字符接起来,然 ...
CSU1632Repeated Substrings（后缀数组/最长公共前缀）
题意就是求一个字符串的重复出现(出现次数>=2)的不同子串的个数. 标准解法是后缀数组.最长公共前缀的应用,对于样例aabaab,先将所有后缀排序: aab 3 aabaab 1 a ...
POJ3450 Corporate Identity —— 后缀数组最长公共子序列
题目链接:https://vjudge.net/problem/POJ-3450 Corporate Identity Time Limit: 3000MS Memory Limit: 65536 ...
POJ3294 Life Forms(后缀数组)
引用罗穗骞论文中的话: 将n 个字符串连起来,中间用不相同的且没有出现在字符串中的字符隔开,求后缀数组.然后二分答案,用和例3 同样的方法将后缀分成若干组,判断每组的后缀是否出现在不小于k 个的原串中 ...
POJ3415 Common Substrings —— 后缀数组 + 单调栈公共子串个数
题目链接:https://vjudge.net/problem/POJ-3415 Common Substrings Time Limit: 5000MS Memory Limit: 65536K ...
[SPOJ1811]Longest Common Substring 后缀自动机最长公共子串
题目链接:http://www.spoj.com/problems/LCS/ 题意如题目,求两个串的最大公共子串LCS. 首先对其中一个字符串A建立SAM,然后用另一个字符串B在上面跑. 用一个变量L ...
POJ 2774 (后缀数组最长公共字串) Long Long Message
用一个特殊字符将两个字符串连接起来,然后找最大的height,而且要求这两个相邻的后缀的第一个字符不能在同一个字符串中. #include <cstdio> #include <cs ...

随机推荐

Drools学习笔记
Drools是一款基于Java的开源规则引擎实现了将业务决策从应用程序中分离出来. 优点: 1.简化系统架构,优化应用 2.提高系统的可维护性和维护成本 3.方便系统的整合 4.减少编写“硬代码”业 ...
maven仓库中有jar包pom还报错
maven仓库中有jar包pom还报错就报错,咋啦? 这个包来源不明,自己拷贝进来的吧?你当我mvn是傻子?我要去网上验证一下: 我自己有个_remote.respositories文件,如果自己用 ...
iOS SDK具体解释之UIDevice（系统版本号，设备型号...）
原创Blog,转载请注明出处 blog.csdn.net/hello_hwc 欢迎关注我的iOS SDK具体解释专栏 blog.csdn.net/column/details/huangwenchen ...
eclipse右下角总显示自动下载xml和jar,如何关闭
windows->preference->查找download-选择Models->右侧下方Enable auto-download去掉 2.Automatic Updates-&g ...
centos配置ip
/etc/sysconfig/network-scripts/ifcfg-eth0文件做如下修改 DEVICE=eth0 HWADDR=08:00:27:46:1D:E1 TYPE=Ethernet ...
RF--- selenium
HTML5之Canvas绘图(一) ——基础篇
HTML5火的正热,最近有个想法也是要用到HTML的相关功能,所以也要好好学习一把. 好好看了一下Canvas的功能,感觉HTML5在客户端交互的功能性越来越强了,今天看了一下Canvas绘图,下边是 ...
spring boot 发布成包所需插件
在pom.xml里配置 <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId> ...
jquery单选框radio绑定click事件实现方法
本文实例讲述了jquery单选框radio绑定click事件实现方法.分享给大家供大家参考. 具体实现方法如下: 复制代码代码如下: <html><head><title ...
TP 自动验证规则
#自动验证 protected $_validate=array( #参数最后代表1 表示必须验证,0表示当这个字段存在的时候验证 array('username','require','账号不能为空 ...

POJ3294 Life Forms —— 后缀数组 最长公共子串

POJ3294 Life Forms —— 后缀数组 最长公共子串的更多相关文章

随机推荐

热门专题

POJ3294 Life Forms —— 后缀数组最长公共子串

POJ3294 Life Forms —— 后缀数组最长公共子串的更多相关文章