TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm
unit Searches; (*-----------------------------------------------------------------------------*
| Components TSearch & TFileSearch |
| Version: 2.2 |
| Last Update: 10 June 2004 |
| Compilers: Delphi 3 - Delphi 7 |
| Author: Angus Johnson - angusj-AT-myrealbox-DOT-com |
| Copyright: © 2001 -2004 Angus Johnson |
| |
| Description: Delphi implementation of the |
| Boyer-Moore-Horspool search algorithm. |
*-----------------------------------------------------------------------------*) //10.06.: Added support for widestring searches interface uses
windows, sysutils, classes; type TBaseSearch = class(TComponent)
private
fPos : pchar;
fEnd : pchar;
fPattern : string;
fPatLen : integer;
fPatInitialized : boolean;
fCaseSensitive : boolean;
JumpShift : integer;
Shift : array[#..#] of integer;
CaseBlindTable : array[#..#] of char;
procedure InitPattern;
procedure MakeCaseBlindTable;
procedure SetCaseSensitive(CaseSens: boolean);
procedure SetPattern(const Pattern: string);
procedure SetWsPattern(const WsPattern: widestring);
function FindCaseSensitive: integer;
function FindCaseInsensitive: integer;
protected
fStart : pchar;
fDataLength : integer;
procedure ClearData;
procedure SetData(Data: pchar; DataLength: integer); virtual;
public
constructor Create(aowner: tcomponent); override;
destructor Destroy; override;
//The following Find functions return the based offset of Pattern
//else POSITION_EOF (-) if the Pattern is not found ...
function FindFirst: integer;
function FindNext: integer;
function FindFrom(StartPos: integer): integer;
//To simplify searching for widestring patterns -
//assign the WsPattern property instead of the Pattern property
property WsPattern: widestring write SetWsPattern;
property Data: pchar read fStart;
property DataSize: integer read fDataLength;
published
property CaseSensitive: boolean read fCaseSensitive write SetCaseSensitive;
property Pattern: string read fPattern write SetPattern;
end; TSearch = class(TBaseSearch)
public
//Changes visibility of base SetData() method to public ...
//Note: TSearch does NOT own the data. To avoid the overhead of
//copying it, it just gets a pointer to it.
procedure SetData(Data: pchar; DataLength: integer); override;
end; TFileSearch = class(TBaseSearch)
private
fFilename: string;
procedure SetFilename(const Filename: string);
procedure Closefile;
public
destructor Destroy; override;
published
//Assigning 'Filename' creates a memory map of the named file.
//This memory mapping will be closed when either the Filename property is
//assigned to '' or the FileSearch object is destroyed.
property Filename: string read fFilename write SetFilename;
end; procedure Register; const
POSITION_EOF = -; implementation procedure Register;
begin
RegisterComponents('Samples', [TSearch, TFileSearch]);
end; //------------------------------------------------------------------------------
// TBaseSearch methods ...
//------------------------------------------------------------------------------ procedure TBaseSearch.MakeCaseBlindTable;
var
i: char;
begin
for i:= # to # do
CaseBlindTable[i]:= ansilowercase(i)[];
end;
//------------------------------------------------------------------------------ constructor TBaseSearch.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
fStart := nil;
fPattern := '';
fPatLen := ;
MakeCaseBlindTable;
fCaseSensitive := false; //Default to case insensitive searches.
fPatInitialized := false;
end;
//------------------------------------------------------------------------------ destructor TBaseSearch.Destroy;
begin
ClearData;
inherited Destroy;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.ClearData;
begin
fStart := nil;
fPos := nil;
fEnd := nil;
fDataLength := ;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetPattern(const Pattern: string);
begin
if fPattern = Pattern then exit;
fPattern := Pattern;
fPatLen := length(Pattern);
fPatInitialized := false;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetWsPattern(const WsPattern: widestring);
begin
fPatLen := length(WsPattern)*;
fPatInitialized := false;
if fPatLen = then exit;
SetString(fPattern, pchar(pointer(WsPattern)), fPatLen);
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetData(Data: pchar; DataLength: integer);
begin
ClearData;
if (Data = nil) or (DataLength < ) then exit;
fStart := Data;
fDataLength := DataLength;
fEnd := fStart + fDataLength;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetCaseSensitive(CaseSens: boolean);
begin
if fCaseSensitive = CaseSens then exit;
fCaseSensitive := CaseSens;
fPatInitialized := false;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.InitPattern;
var
j: integer;
i: char;
begin
if fPatLen = then exit;
for i := # to # do Shift[i]:= fPatLen;
if fCaseSensitive then
begin
for j := to fPatLen- do
Shift[fPattern[j]]:= fPatLen - j;
JumpShift := Shift[fPattern[fPatLen]];
Shift[fPattern[fPatLen]] := ;
end else
begin
for j := to fPatLen- do
Shift[CaseBlindTable[fPattern[j]]]:= fPatLen - j;
JumpShift := Shift[CaseBlindTable[fPattern[fPatLen]]];
Shift[CaseBlindTable[fPattern[fPatLen]]] := ;
end;
fPatInitialized := true;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindFirst: integer;
begin
fPos := fStart+fPatLen-;
result := FindNext;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindFrom(StartPos: integer): integer;
begin
if StartPos < fPatLen- then //ie: StartPos must never be less than fPatLen-
fPos := fStart+fPatLen- else
fPos := fStart+StartPos;
result := FindNext;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindNext: integer;
begin
if not fPatInitialized then InitPattern;
if (fPatLen = ) or (fPatLen >= fDataLength) or (fPos >= fEnd) then
begin
fPos := fEnd;
result := POSITION_EOF;
exit;
end;
if fCaseSensitive then
result := FindCaseSensitive else
result := FindCaseInsensitive;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindCaseSensitive: integer;
var
i: integer;
j: pchar;
begin
result:= POSITION_EOF;
while fPos < fEnd do
begin
i := Shift[fPos^]; //test last character first
if i <> then //last char does not match
inc(fPos,i)
else
begin //last char matches at least
i := ;
j := fPos - fPatLen;
while (i < fPatLen) and (fPattern[i] = (j+i)^) do inc(i);
if (i = fPatLen) then
begin
result:= fPos-fStart-fPatLen+;
inc(fPos,fPatLen);
break; //FOUND!
end
else
inc(fPos,JumpShift);
end;
end;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindCaseInsensitive: integer;
var
i: integer;
j: pchar;
begin
result:= POSITION_EOF;
while fPos < fEnd do
begin
i := Shift[CaseBlindTable[fPos^]]; //test last character first
if i <> then //last char does not match
inc(fPos,i)
else
begin //last char matches at least
i := ;
j := fPos - fPatLen;
while (i < fPatLen) and
(CaseBlindTable[fPattern[i]] = CaseBlindTable[(j+i)^]) do inc(i);
if (i = fPatLen) then
begin
result:= fPos-fStart-fPatLen+;
inc(fPos,fPatLen);
break; //FOUND!
end
else
inc(fPos,JumpShift);
end;
end;
end; //------------------------------------------------------------------------------
// TSearch methods ...
//------------------------------------------------------------------------------ procedure TSearch.SetData(Data: pchar; DataLength: integer);
begin
inherited; //changes visibility of base method from protected to public
end; //------------------------------------------------------------------------------
// TFileSearch methods ...
//------------------------------------------------------------------------------ destructor TFileSearch.Destroy;
begin
CloseFile;
inherited Destroy;
end;
//------------------------------------------------------------------------------ procedure TFileSearch.SetFilename(const Filename: string);
var
filehandle: integer;
filemappinghandle: thandle;
size, highsize: integer;
begin
if (csDesigning in ComponentState) then
begin
fFilename := Filename;
exit;
end;
CloseFile;
if (Filename = '') or not FileExists(Filename) then exit;
filehandle := sysutils.FileOpen(Filename, fmopenread or fmsharedenynone);
if filehandle = then exit; //error
size := GetFileSize(filehandle, @highsize);
if (size <= ) or (highsize <> ) then //nb: files > gig not supported
begin
CloseHandle(filehandle);
exit;
end;
filemappinghandle :=
CreateFileMapping(filehandle, nil, page_readonly, , , nil);
if GetLastError = error_already_exists then filemappinghandle := ;
if filemappinghandle <> then
SetData(MapViewOfFile(filemappinghandle,file_map_read,,,),size);
if fStart <> nil then fFilename := Filename;
CloseHandle(filemappinghandle);
CloseHandle(filehandle);
end;
//------------------------------------------------------------------------------ procedure TFileSearch.CloseFile;
begin
if (csDesigning in ComponentState) then exit;
if (fStart <> nil) then UnmapViewOfFile(fStart);
fFilename := '';
ClearData;
end;
//------------------------------------------------------------------------------ end.
TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm的更多相关文章
- Leetcode OJ : Implement strStr() [ Boyer–Moore string search algorithm ] python solution
class Solution { public: int strStr(char *haystack, char *needle) { , skip[]; char *str = haystack, ...
- Moore majority vote algorithm(摩尔投票算法)
Boyer-Moore majority vote algorithm(摩尔投票算法) 简介 Boyer-Moore majority vote algorithm(摩尔投票算法)是一种在线性时间O( ...
- Boyer–Moore (BM)字符串搜索算法
在计算机科学里,Boyer-Moore字符串搜索算法是一种非常高效的字符串搜索算法.它由Bob Boyer和J Strother Moore设计于1977年.此算法仅对搜索目标字符串(关键字)进行预处 ...
- Boyer Moore算法(字符串匹配)
上一篇文章,我介绍了KMP算法. 但是,它并不是效率最高的算法,实际采用并不多.各种文本编辑器的"查找"功能(Ctrl+F),大多采用Boyer-Moore算法. Boyer-Mo ...
- Boyer-Moore 字符串匹配算法
字符串匹配问题的形式定义: 文本(Text)是一个长度为 n 的数组 T[1..n]: 模式(Pattern)是一个长度为 m 且 m≤n 的数组 P[1..m]: T 和 P 中的元素都属于有限的字 ...
- grep之字符串搜索算法Boyer-Moore由浅入深(比KMP快3-5倍)
这篇长文历时近两天终于完成了,前两天帮网站翻译一篇文章“为什么GNU grep如此之快?”,里面提及到grep速度快的一个重要原因是使用了Boyer-Moore算法作为字符串搜索算法,兴趣之下就想了解 ...
- grep之字符串搜索算法Boyer-Moore由浅入深(比KMP快3-5倍)(转)
这篇长文历时近两天终于完成了,前两天帮网站翻译一篇文章“为什么GNU grep如此之快?”,里面提及到grep速度快的一个重要原因是使用了Boyer-Moore算法作为字符串搜索算法,兴趣之下就想了解 ...
- KMP算法简单回顾
前言 虽从事企业应用的设计与开发,闲暇之时,还是偶尔涉猎数学和算法的东西,本篇根据个人角度来写一点关于KMP串匹配的东西,一方面向伟人致敬,另一方面也是练练手,头脑风暴.我在自娱自乐,路过的朋友别太认 ...
- Erlang/Elixir精选-第5期(20200106)
The forgotten ideas in computer science-Joe Armestrong 在2020年的第一期里面,一起回顾2018年Joe的 The forgotten idea ...
随机推荐
- MYSQL三种安装方式--二进制包安装
1. 把二进制包下载到/usr/local/src下 2. 如果是tar.gz包,则使用tar zxvf 进行解压 如果是tar包,则可以使用tar xvf 进行解压 3. $ mv mysql-5. ...
- C/C++——C语言数组名与指针
版权声明:原创文章,转载请注明出处. 1. 一维数组名与指针 对于一维数组来说,数组名就是指向该数组首地址的指针,对于: ]; array就是该数组的首地址,如果我们想定义一个指向该数组的指针,我们可 ...
- acm专题---键树
题目来源:http://hihocoder.com/problemset/problem/1014?sid=982973 #1014 : Trie树 时间限制:10000ms 单点时限:1000ms ...
- 如何将qlv格式的腾讯视频转换为mp4格式
一般来说,每个视频网站都会有自己的视频播放格式,如优酷的KUX.爱奇艺的QSV和腾讯的QLV等. 但是大家知道,优酷是有转码功能的,而就目前来说腾讯视频还没有转码功能,下面是将qlv格式的腾讯视频转换 ...
- shell视频
本帖最后由 Shell_HAT 于 2014-04-18 16:51 编辑 尚观全套RHCE视频http://pan.baidu.com/s/1pJvzVR1 马哥网络班-中级视频内容http://p ...
- python基础(8)--迭代器、生成器、装饰器
1.迭代器 迭代器是访问集合元素的一种方式.迭代器对象从集合的第一个元素开始访问,直到所有的元素被访问完结束.迭代器只能往前不会后退,不过这也没什么,因为人们很少在迭代途中往后退.另外,迭代器的一大优 ...
- linux中$的各种含义
我们先写一个简单的脚本,执行以后再解释各个变量的意义 # touch variable # vi variable 脚本内容如下: #!/bin/sh echo "number: ...
- day3修改配置文件
有如下配置文件,在指定文件位置添加一条新的记录: global log 127.0.0.1 local2 daemon maxconn log 127.0.0.1 local2 info defaul ...
- linux中top命令
经常用到top命令,也就简单看看进程多不多,卡不卡, 这次在网上找到一个归纳总结的,以供参考. 简介 top 命令是 Linux 下常用的性能分析工具,能够实时显示系统中各个进程的资源占用状况,类似于 ...
- 使用gradle建立java application
建立项目目录mkdir java-democd java-demo 初始化项目目录gradle init --type java-application 编译./gradlew build 运行./g ...