Front compression

Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others)

Total Submission(s): 1339 Accepted Submission(s): 496

Problem Description

Front compression is a type of delta encoding compression algorithm whereby common prefixes and their lengths are recorded so that they need not be duplicated. For example:

The size of the input is 43 bytes, while the size of the compressed output is
40. Here, every space and newline is also counted as 1 byte.

Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?

Input

There are multiple test cases. Process to the End of File.

The first line of each test case is a long string S made up of lowercase letters, whose length doesn't exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two
integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.

Output

For each test case, output the sizes of the input and corresponding compressed output.

Sample Input

frcode

2

0 6

0 6

unitedstatesofamerica

3

0 6

0 12

0 21

myxophytamyxopodnabnabbednabbingnabit

6

0 9

9 16

16 19

19 25

25 32

32 37

Sample Output

Author

Zejun Wu (watashi)

Source

field=problem&key=2013+Multi-University+Training+Contest+9&source=1&searchmode=source">2013 Multi-University Training Contest 9

Recommend

zhuyuanchen520 | We have carefully selected several similar problems for you:

pid=5061">5061
5060 5059

pid=5058">
5058

pid=5057">
5057

解题思路：后缀数组水题。试了两种模版。还是基数排序的快啊。。

板子1：

#include <iostream>

#include <cstdio>

#include <cstring>

#include <algorithm>

#include <cmath>

#define ll long long

#define maxn 100010

using namespace std;

char s[maxn];

int n,k,q;

int rank[maxn],sa[maxn],tmp[maxn],lcp[maxn];//lcp:0-n-1

bool cmp(int x,int y){

    if(rank[x]!=rank[y]) return rank[x]<rank[y];

    int sx=x+k<=n ?

rank[x+k]:-1;

    int sy=y+k<=n ? rank[y+k]:-1;

    return sx<sy;

}

void build_sa(){

    n=strlen(s);

    for(int i=0;i<=n;i++){

        sa[i]=i;

        rank[i]=i<n ?

s[i]:-1;

    }

    for(k=1;k<=n;k<<=1){

        sort(sa,sa+n+1,cmp);

        tmp[sa[0]]=0;

        for(int i=1;i<=n;i++){

            tmp[sa[i]]=tmp[sa[i-1]]+(cmp(sa[i-1],sa[i]) ? 1:0);

        }

        for(int i=0;i<=n;i++) rank[i]=tmp[i];

    }

}

void build_lcp(){

    n=strlen(s);

    //for(int i=0;i<=n;i++) rank[sa[i]]=i;

    int h=0;

    lcp[0]=0;

    for(int i=0;i<n;i++){

        int j=sa[rank[i]-1];

        if(h>0) h--;

        for(;j+h<n&&i+h<n;h++){

            if(s[j+h]!=s[i+h]) break;

        }

        lcp[rank[i]-1]=h;

    }

}

int dp[20][maxn],mm[maxn];

void init_RMQ(int n){

    mm[0]=-1;

    for(int i=1;i<=n;i++){//长度1-n

        mm[i]=(i&(i-1)) ? mm[i-1]:mm[i-1]+1;

    }

    for(int i=0;i<n;i++) dp[0][i]=lcp[i];

    for(int i=1;i<=mm[n];i++){

        for(int j=0;j+(1<<i)-1<n;j++){

            dp[i][j]=min(dp[i-1][j],dp[i-1][j+(1<<i>>1)]);

        }

    }

}

int RMQ(int x,int y){//[x,y-1]

    if(x==y) return n-x;

    x=rank[x],y=rank[y];

    if(x>y) swap(x,y);

    y--;

    int l=mm[y-x+1];

    return min(dp[l][x],dp[l][y-(1<<l)+1]);

}

void read(){

    scanf("%d",&q);

    ll sum1=0,sum2=0;

    int pl=-1,pr=-1,l,r;

    for(int i=0;i<q;i++){

        scanf("%d%d",&l,&r);

        sum1+=(r-l+1);

        if(pl==-1){

            sum2+=r-l+1;

        }else{

            int LCP=RMQ(pl,l);

            int ans=min(LCP,min(r-l,pr-pl));

            sum2+=(r-l-ans);

            if(ans==0) sum2+=1;

            else sum2+=(int)log10(ans*1.0)+1;

        }

        pl=l,pr=r;

    }

    printf("%I64d %I64d\n",sum1,sum2+2*q);

}

int main(){

    while(~scanf("%s",s)){

        build_sa();

        build_lcp();

        init_RMQ(n);

        read();

    }

    return 0;

}

板子2：

#include <iostream>

#include <cstdio>

#include <cstring>

#include <algorithm>

#include <cmath>

#define ll long long

#define maxn 100010

using namespace std;

char s[maxn];

int c[maxn],wa[maxn],wb[maxn],r[maxn];//求SA数组须要的中间变量，不须要赋值

 //待排序的字符串放在s数组中，从s[0]到s[n-1],长度为n,且最大值小于m,

 //除s[n-1]外的全部s[i]都大于0，r[n-1]=0

//函数结束以后结果放在sa数组中

int n,sa[maxn],lcp[maxn],rank[maxn];

bool cmp(int *r,int a,int b,int l){

    return r[a]==r[b]&&r[a+l]==r[b+l];

}

void build_sa(int n,int m){//数组长度，最大数字

    for(int i=0;i<=n;i++) r[i]=i<n ?

s[i]:0;

    n++;

    int i,j,p,*x=wa,*y=wb;

    //第一轮基数排序。假设s的最大值非常大，可改为高速排序

    for(i=0;i<m;i++) c[i]=0;

    for(i=0;i<n;i++) c[x[i]=r[i]]++;

    for(i=1;i<m;i++) c[i]+=c[i-1];

    for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;

    for(j=1;j<=n;j<<=1){

        p=0;

        //直接利用sa数组排序第二keyword

        for(i=n-j;i<n;i++) y[p++]=i;//后面的j个数第二keyword为空的最小

        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;

        //这样数组y保存的就是依照第二keyword排序的结果

        //基数排序第一keyword

        for(i=0;i<m;i++) c[i]=0;

        for(i=0;i<n;i++) c[x[y[i]]]++;

        for(i=1;i<m;i++) c[i]+=c[i-1];

        for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];

        //依据sa和x数组计算新的x数组

        swap(x,y);

        p=1,x[sa[0]]=0;

        for(i=1;i<n;i++)

        x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?

p-1:p++;

        if(p>=n) break;

        m=p;

    }

}

void build_lcp(int n){

    int i,j,k=0;

    for(i=0;i<=n;i++) rank[sa[i]]=i;

    lcp[0]=0;

    for(i=0;i<n;i++){

        j=sa[rank[i]-1];

        if(k) k--;

        while(s[i+k]==s[j+k]) k++;

        lcp[rank[i]-1]=k;

    }

}

int dp[20][maxn],mm[maxn];

void init_RMQ(int n){

    mm[0]=-1;

    for(int i=1;i<=n;i++){

        mm[i]=(i&(i-1)) ?

mm[i-1]:mm[i-1]+1;

    }

    for(int i=0;i<n;i++) dp[0][i]=lcp[i];

    for(int i=1;i<=mm[n];i++){

        for(int j=0;j+(1<<i)-1<n;j++){

            dp[i][j]=min(dp[i-1][j],dp[i-1][j+(1<<i>>1)]);

        }

    }

}

int RMQ(int x,int y){

    if(x==y) return n-x;

    x=rank[x],y=rank[y];

    if(x>y) swap(x,y);

    y--;

    int l=mm[y-x+1];

    return min(dp[l][x],dp[l][y-(1<<l)+1]);

}

int q;

void read(){

    scanf("%d",&q);

    ll sum1=0,sum2=0;

    int pl=-1,pr=-1,l,r;

    for(int i=0;i<q;i++){

        scanf("%d%d",&l,&r);

        sum1+=(r-l+1);

        if(pl==-1){

            sum2+=r-l+1;

        }else{

            int LCP=RMQ(pl,l);

            //cout<<i<<":"<<LCP<<endl;

            int ans=min(LCP,min(r-l,pr-pl));

            //cout<<i<<":"<<ans<<endl;

            sum2+=(r-l-ans);

            if(ans==0) sum2+=1;

            else sum2+=(int)log10(ans*1.0)+1;

        }

        pl=l,pr=r;

    }

    printf("%I64d %I64d\n",sum1,sum2+2*q);

}

int main(){

    while(~scanf("%s",s)){

        n=strlen(s);

        build_sa(n,128);

        build_lcp(n);

        /*for(int i=0;i<n;i++){

            cout<<i<<" "<<sa[i]<<" "<<lcp[i]<<endl;

        }*/

        init_RMQ(n);

        read();

    }

    return 0;

}

hdu4691 Front compression(后缀数组)的更多相关文章

HDU-4691 Front compression 后缀数组
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4691 后缀数组模板题,求出Height数组后,对Height做RMQ,然后直接统计就可以了... // ...
hdu4691 Front compression ——暴力 || 后缀数组
link:http://acm.hdu.edu.cn/showproblem.php?pid=4691 暴力,数据明显太水了吧,n=10^5, O(n^2)的复杂度哎喂.想让大家暴力写直接让n=100 ...
hdu 4691 Front compression （后缀数组）
hdu 4691 Front compression 题意:很简单的,就是给一个字符串,然后给出n个区间,输出两个ans,一个是所有区间的长度和,另一个是区间i跟区间i-1的最长公共前缀的长度的数值的 ...
HDU 4691 Front compression （2013多校9 1006题后缀数组）
Front compression Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Othe ...
HDU 4691 Front compression（后缀数组）
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4691 题意:给出Input,求出Compressed output.输出各用多少字节. 思路:求后缀数 ...
hdu4691(后缀数组)
算是后缀数组的入门题吧. 思路无比简单,要是直接套模板的话应该很容易秒掉. 关于后缀数组看高中神犇的论文就可以学会了算法合集之<后缀数组——处理字符串的有力工具> 话说这题暴力是可以过了 ...
bzoj 3172 单词 ac自动机|后缀数组
题目大意: 给定n个字符串连成了一篇文章,问每个字符串在这篇文章中出现的次数,可重复覆盖这里ac自动机和后缀数组都可以做当然后缀数组很容易就解决,但是相对时间消耗高这里就只讲ac自动机了将每个 ...
HDU5853 Jong Hyok and String（二分 + 后缀数组）
题目 Source http://acm.hdu.edu.cn/showproblem.php?pid=5853 Description Jong Hyok loves strings. One da ...
Ural1297 Palindrome（后缀数组）
[题目链接] http://acm.hust.edu.cn/vjudge/problem/viewProblem.action?id=12406 [题意] 求最长回文子串. [思路] 将字符串 ...

随机推荐

IDEA创建Maven项目显示一直加载中的问题
使用IDEA这款工具创建Maven项目的时候出现过下面这种情况: 红色区域即maven骨架加载不出来... 或 loading loading loading ... 有时候需要很长一段时间才能加载出 ...
vue 键盘监听事件
<template> <div class="hello"> <input v-on:keyup.enter="submit" t ...
Webpack的作用（一个基础的打包编译工具在做什么?）
结论: 转换ES6语法成ES5 处理模块加载依赖生成一个可以在浏览器加载执行的 js 文件第一个问题,转换语法,其实我们可以通过babel来做.核心步骤也就是: 通过babylon生成AST 通过 ...
apicloud 上传/更新App版本到 ios store 流程步骤
app更新上传APP的地址: https://itunesconnect.apple.com/login 苹果开发者中心: https://developer.apple.com/ app正式包更新 ...
mysql 修改默认的引擎
需求: mysql 的默认的引擎为MyISAM 虽然该引擎访问的速度快,但并不支持存储事物,也不支持外键,所以我们修改为innob Linux修改MySql默认存储引擎为InnoDB 一 ...
thymeleaf 拼接超链接
<dd><a th:href="@{/get/{id}(id=${user.id})}">基本资料</a></dd>
lvm硬盘管理及LVM扩容
1,创建分区 [root@host-10-158-172-44 ~]# fdisk /dev/vda Welcome to fdisk (util-linux 2.23.2). Changes wil ...
Git学习总结（7）——Git GUI学习教程
前言之前一直想一篇这样的东西,因为最初接触时,我也认真看了廖雪峰的教程,但是似乎我觉得讲得有点多,而且还是会给我带来很多多余且重复的操作负担,所以我希望能压缩一下它在我工作中的成本,但是搜索了一下并 ...
面试题——ArrayList和LinkedList的区别
List概括先回顾一下List在Collection的框架图: 从图中可以看出: List是一个接口,他继承Collection接口,代表有序的队列. AbstractList是一个抽象类, ,它继 ...
DGA特征挖掘
摘自:https://paper.seebug.org/papers/Archive/drops2/%E7%94%A8%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E8%A ...

hdu4691 Front compression(后缀数组)

Front compression

hdu4691 Front compression(后缀数组)的更多相关文章

随机推荐

热门专题