C语言实现KMP模式匹配算法

/*!

 * Description:

 * author scictor <scictor@gmail.com>

 * date 2018/7/4

 */

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

// https://tekmarathon.com/2013/05/14/algorithm-to-find-substring-in-a-string-kmp-algorithm/

/*What is Partial Match Table?

It is an array of size (pattern_length+1) where, for each position of i in pattern p, b[i] is defined such that it takes the ‘length of the longest proper suffix of P[1…i-1]’ that matches with the ‘prefix of P’.

What is longest prefix/suffix match??? Proper prefix is a prefix which is not same as the substring. Recall proper set which is a subset of a set but is not same as the set.

Why a prefix should match suffix of the pattern? its because when we shift the pattern its the prefix of P which comes towards the suffix. And also the key idea is that if we have successfully matched prefix P[1…i-1] of the pattern with the substring T[j-(i-1)…j-1] of the input string and P(i)!=T(j), then we dont need to reprocess any of the suffix T[j-(i-1)…j-1] since we know this portion of the text string is the prefix of the pattern that we have just matched.

*/

//"ababacb"

/**

 * Pre processes the pattern array based on proper prefixes and proper

 * suffixes at every position of the array

 *

 * @param ptrn

 *            word that is to be searched in the search string

 * @return partial match table which indicates

 */

void kmp_next(const char *pattern, int patternLen, int *next) {

    int i = , j = -;

    next[i] = j; // default next[0] = -1

    while (i < patternLen) {

        while (j >=  && pattern[i] != pattern[j]) {

            // if there is mismatch consider the next widest border

            // The borders to be examined are obtained in decreasing order from

            //  the values b[i], b[b[i]] etc.

            j = next[j];

        }

        i++;

        j++;

        next[i] = j;

    }

    for(int index = ; index < patternLen; ++index) printf("%d ", next[index]);

    return;

}

/**

     * Based on the pre processed array, search for the pattern in the text

     *

     * @param text

     *            text over which search happens

     * @param ptrn

     *            pattern that is to be searched

     */

//int matchIndex[128] = {0};

int kmp_search(const char *text, int textLen, const char *pattern, int patternLen) {

    int i = , j = ;

    // initialize new array and preprocess the pattern

    int next[patternLen + ];

    memset(next, 0x00, sizeof(next));

//    int idx = 0;

//    memset(matchIndex, 0x00, sizeof(matchIndex));

    kmp_next(pattern, patternLen, next);

    while (i < textLen) {

        while (j >=  && text[i] != pattern[j]) {

            j = next[j];

        }

        i++;

        j++;

        // a match is found

        //        if (j == patternLen) {

        //            printf("found substring at index:" + (i - patternLen));

        //            j = next[j];

        //        }

        if (j == patternLen) {

            printf("found substring at index:%d", (i - patternLen));

            //j = next[j];

            //matchIndex[idx++] = i - patternLen;

            return (i - patternLen);

        }

    }

//    for(int k = 0; k < idx; ++k)

//    {

//        printf("%d ", matchIndex[k]);

//    }

    return -;

}

/*

Index         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16  17  18  19  20  21  22  23

Text(T)       a b c a b d a b c a b  d  a  b  c  a  b   d   a   b   d   a   b   c

Pattern(P)    a b c a b d a b c

PMT (next[i])   -1 0 0 0 1 2 0 1 2 3

 */

// 4ms

int kmp(const char *text, int textLen, const char *pattern, int patternLen)

{

    int *T;

    int i, j;

    if (pattern[] == '\0')

        return ;

    // Construct the lookup table

    T = (int*) malloc( (patternLen + ) * sizeof(int) );

    T[] = -;

    for (i=; pattern[i] != '\0'; i++) {

        T[i+] = T[i] + ;

        while (T[i+] >  && pattern[i] != pattern[T[i+]-])

            T[i+] = T[T[i+]-] + ;

    }

    for(int k = ; k < patternLen; ++k) printf("%d ", T[k]);

    // Perform the search

    for (i=j=; text[i] != '\0'; ) {

        if (j <  || text[i] == pattern[j]) {

            ++i, ++j;

            if (pattern[j] == '\0') {

                return (i-j);

            }

        }

        else j = T[j];

    }

    free(T);

    return -;

}

/*const char *kmp_search(const char *text, const char *pattern)

{

    int *T;

    int i, j;

    const char *result = NULL;

    if (pattern[0] == '\0')

        return text;

    // Construct the lookup table

    T = (int*) malloc((strlen(pattern)+1) * sizeof(int) );

    T[0] = -1;

    for (i=0; pattern[i] != '\0'; i++) {

        T[i+1] = T[i] + 1;

        while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])

            T[i+1] = T[T[i+1]-1] + 1;

    }

    // Perform the search

    for (i=j=0; text[i] != '\0'; ) {

        if (j < 0 || text[i] == pattern[j]) {

            ++i, ++j;

            if (pattern[j] == '\0') {

                result = text+i-j;

                break;

            }

        }

        else j = T[j];

    }

    free(T);

    return result;

}

*/

nextval:

void preKmp(char *x, int m, int kmpNext[]) {

   int i, j;

   i = ;

   j = kmpNext[] = -;

   while (i < m) {

      while (j > - && x[i] != x[j])

         j = kmpNext[j];

      i++;

      j++;

      if (x[i] == x[j])

         kmpNext[i] = kmpNext[j];

      else

         kmpNext[i] = j;

   }

}

// text:y, len: n, match parttern:x, len: m

int KMP(char *y, int n, char *x, int m) {

   int i, j, kmpNext[m];

   /* Preprocessing */

   preKmp(x, m, kmpNext);

   /* Searching */

   i = j = ;

   while (j < n) {

      while (i > - && x[i] != y[j])

         i = kmpNext[i];

      i++;

      j++;

      // if (i >= m) {

      // printf("j-i=%d\n",j - i);

      // i = kmpNext[i];

      // }

       if (i == m) {

            //printf("j-i=%d\n",j - i);

            return j - i;

            //i = kmpNext[i];

      }

   }

    return -;

}

int strStr(char* haystack, char* needle) {

    int needleLen = strlen(needle);

    if(needleLen == ) return ;

    int hayLen = strlen(haystack);

    if(hayLen == ) return -;

    return KMP(haystack, hayLen, needle, needleLen);

}

C语言实现KMP模式匹配算法的更多相关文章

[从今天开始修炼数据结构]串、KMP模式匹配算法
[从今天开始修炼数据结构]基本概念 [从今天开始修炼数据结构]线性表及其实现以及实现有Itertor的ArrayList和LinkedList [从今天开始修炼数据结构]栈.斐波那契数列.逆波兰四则运 ...
KMP模式匹配算法
KMP模式匹配算法相信很多人对于这个还有点不了解,或者说是不懂,下面,通过一道题,来解决软考中的这个问题! 正题: aaabaaa,其next函数值为多少? 对于这个问题,我们应该怎么做呢? 1.整 ...
线性表-串：KMP模式匹配算法
一.简单模式匹配算法(略,逐字符比较即可) 二.KMP模式匹配算法 next数组:j为字符序号,从1开始. (1)当j=1时,next=0: (2)当存在前缀=后缀情况,next=相同字符数+1: ( ...
C++编程练习(7)----“KMP模式匹配算法“字符串匹配
子串在主串中的定位操作通常称做串的模式匹配. KMP模式匹配算法实现: /* Index_KMP.h头文件 */ #include<string> #include<sstream& ...
详细解读KMP模式匹配算法
转载请注明出处:http://blog.csdn.net/fightlei/article/details/52712461 首先我们需要了解什么是模式匹配? 子串定位运算又称为模式匹配(Patter ...
字符串的模式匹配算法——KMP模式匹配算法
朴素的模式匹配算法(C++) 朴素的模式匹配算法,暴力,容易理解 #include<iostream> using namespace std; int main() { string m ...
串、KMP模式匹配算法
串是由0个或者多个字符组成的有限序列,又名叫字符串. 串的比较: 串的比较是通过组成串的字符之间的编码来进行的,而字符的编码指的是字符在对应字符集中的序号. 计算机中常用的ASCII编码,由8位二进制 ...
浅谈KMP模式匹配算法
普通的模式匹配算法(BF算法) 子串的定位操作通常称为模式匹配算法假设有一个需求,需要我们从串"a b a b c a b c a c b a b"中,寻找内容为"a ...
数据结构（三）串---KMP模式匹配算法
(一)定义由于BF模式匹配算法的低效(有太多不必要的回溯和匹配),于是某三个前辈发表了一个模式匹配算法,可以大大避免重复遍历的情况,称之为克努特-莫里斯-普拉特算法,简称KMP算法 (二)KMP算法 ...

随机推荐

如何在java项目中使用lucene
lucene是一个开源的全文检索引擎工具包,但它不是一个成型的搜索引擎,它的功能就是负责将文本数据按照某种分词算法进行分词,分词后的结果存储在索引库中,然后根据关键字从索引库检检索. 那么应该如何使用 ...
个人作业Week1
一.<构建之法>提问 1.需求是什么?需求的规范需要明确吗? 2.一个人开发效率非常高,多人开发,个人效率随团队人数上升而直线下降,我们一般需要将大项目拆为小项目,使协作耦合产生的效率负影 ...
【读书笔记】Linux内核设计与实现（第三章）
3.1 进程处于执行期的程序. 进程就是正在执行的程序代码的实时结果.内核需要有效而又透明地管理所有细节. 执行线程(简称线程):在进程中活动的对象.每个线程都拥有一个独立的程序计数器.进程栈和一组 ...
junit-test
一.题目简介: 用单元测试junit4测试calculator类的加减乘除四种方法,来初步学习junit4的学习方法. 二.源码的github链接 :https://github.com/weare ...
Cron任务调度CronNET
Cron任务调度CronNET 阅读目录 1.Cron介绍和工具 2.CronNET介绍和使用 3.cron-expression-descriptor使用 4.资源如果用知乎,可以关注专栏:.NE ...
vue props 用法（转载）
前面的话组件接受的选项大部分与Vue实例一样,而选项props是组件中非常重要的一个选项.在 Vue 中,父子组件的关系可以总结为 props down, events up.父组件通过 props ...
PAT L3-021 神坛
https://pintia.cn/problem-sets/994805046380707840/problems/994805046577840128 在古老的迈瑞城,巍然屹立着 n 块神石.长老 ...
Sonatype Nexus 2.11.1-01 使用入门
nexus安装与启动 linux下: 安装路径 /home/maven/nexus-2.11.1-01/ 启动方法 ./bin/nexus start windows下: 管理员模式运行cmd.exe ...
传输层中的协议 TCP & UDP
面向连接的TCP协议 “面向连接”就是在正式通信前必须要与对方建立起连接.比如你给别人打电话,必须等线路接通了.对方拿起话筒才能相互通话.TCP(Transmission Control Protoc ...
[转帖]什么是Asp.net Core？和 .net core有什么区别？
什么是Asp.net Core?和 .net core有什么区别? https://www.cnblogs.com/itzhangxp/p/8322364.html 知道微软开始用 kestrel了 ...

C语言实现KMP模式匹配算法

C语言实现KMP模式匹配算法的更多相关文章

随机推荐

热门专题