TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm
unit Searches; (*-----------------------------------------------------------------------------*
| Components TSearch & TFileSearch |
| Version: 2.2 |
| Last Update: 10 June 2004 |
| Compilers: Delphi 3 - Delphi 7 |
| Author: Angus Johnson - angusj-AT-myrealbox-DOT-com |
| Copyright: © 2001 -2004 Angus Johnson |
| |
| Description: Delphi implementation of the |
| Boyer-Moore-Horspool search algorithm. |
*-----------------------------------------------------------------------------*) //10.06.: Added support for widestring searches interface uses
windows, sysutils, classes; type TBaseSearch = class(TComponent)
private
fPos : pchar;
fEnd : pchar;
fPattern : string;
fPatLen : integer;
fPatInitialized : boolean;
fCaseSensitive : boolean;
JumpShift : integer;
Shift : array[#..#] of integer;
CaseBlindTable : array[#..#] of char;
procedure InitPattern;
procedure MakeCaseBlindTable;
procedure SetCaseSensitive(CaseSens: boolean);
procedure SetPattern(const Pattern: string);
procedure SetWsPattern(const WsPattern: widestring);
function FindCaseSensitive: integer;
function FindCaseInsensitive: integer;
protected
fStart : pchar;
fDataLength : integer;
procedure ClearData;
procedure SetData(Data: pchar; DataLength: integer); virtual;
public
constructor Create(aowner: tcomponent); override;
destructor Destroy; override;
//The following Find functions return the based offset of Pattern
//else POSITION_EOF (-) if the Pattern is not found ...
function FindFirst: integer;
function FindNext: integer;
function FindFrom(StartPos: integer): integer;
//To simplify searching for widestring patterns -
//assign the WsPattern property instead of the Pattern property
property WsPattern: widestring write SetWsPattern;
property Data: pchar read fStart;
property DataSize: integer read fDataLength;
published
property CaseSensitive: boolean read fCaseSensitive write SetCaseSensitive;
property Pattern: string read fPattern write SetPattern;
end; TSearch = class(TBaseSearch)
public
//Changes visibility of base SetData() method to public ...
//Note: TSearch does NOT own the data. To avoid the overhead of
//copying it, it just gets a pointer to it.
procedure SetData(Data: pchar; DataLength: integer); override;
end; TFileSearch = class(TBaseSearch)
private
fFilename: string;
procedure SetFilename(const Filename: string);
procedure Closefile;
public
destructor Destroy; override;
published
//Assigning 'Filename' creates a memory map of the named file.
//This memory mapping will be closed when either the Filename property is
//assigned to '' or the FileSearch object is destroyed.
property Filename: string read fFilename write SetFilename;
end; procedure Register; const
POSITION_EOF = -; implementation procedure Register;
begin
RegisterComponents('Samples', [TSearch, TFileSearch]);
end; //------------------------------------------------------------------------------
// TBaseSearch methods ...
//------------------------------------------------------------------------------ procedure TBaseSearch.MakeCaseBlindTable;
var
i: char;
begin
for i:= # to # do
CaseBlindTable[i]:= ansilowercase(i)[];
end;
//------------------------------------------------------------------------------ constructor TBaseSearch.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
fStart := nil;
fPattern := '';
fPatLen := ;
MakeCaseBlindTable;
fCaseSensitive := false; //Default to case insensitive searches.
fPatInitialized := false;
end;
//------------------------------------------------------------------------------ destructor TBaseSearch.Destroy;
begin
ClearData;
inherited Destroy;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.ClearData;
begin
fStart := nil;
fPos := nil;
fEnd := nil;
fDataLength := ;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetPattern(const Pattern: string);
begin
if fPattern = Pattern then exit;
fPattern := Pattern;
fPatLen := length(Pattern);
fPatInitialized := false;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetWsPattern(const WsPattern: widestring);
begin
fPatLen := length(WsPattern)*;
fPatInitialized := false;
if fPatLen = then exit;
SetString(fPattern, pchar(pointer(WsPattern)), fPatLen);
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetData(Data: pchar; DataLength: integer);
begin
ClearData;
if (Data = nil) or (DataLength < ) then exit;
fStart := Data;
fDataLength := DataLength;
fEnd := fStart + fDataLength;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.SetCaseSensitive(CaseSens: boolean);
begin
if fCaseSensitive = CaseSens then exit;
fCaseSensitive := CaseSens;
fPatInitialized := false;
end;
//------------------------------------------------------------------------------ procedure TBaseSearch.InitPattern;
var
j: integer;
i: char;
begin
if fPatLen = then exit;
for i := # to # do Shift[i]:= fPatLen;
if fCaseSensitive then
begin
for j := to fPatLen- do
Shift[fPattern[j]]:= fPatLen - j;
JumpShift := Shift[fPattern[fPatLen]];
Shift[fPattern[fPatLen]] := ;
end else
begin
for j := to fPatLen- do
Shift[CaseBlindTable[fPattern[j]]]:= fPatLen - j;
JumpShift := Shift[CaseBlindTable[fPattern[fPatLen]]];
Shift[CaseBlindTable[fPattern[fPatLen]]] := ;
end;
fPatInitialized := true;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindFirst: integer;
begin
fPos := fStart+fPatLen-;
result := FindNext;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindFrom(StartPos: integer): integer;
begin
if StartPos < fPatLen- then //ie: StartPos must never be less than fPatLen-
fPos := fStart+fPatLen- else
fPos := fStart+StartPos;
result := FindNext;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindNext: integer;
begin
if not fPatInitialized then InitPattern;
if (fPatLen = ) or (fPatLen >= fDataLength) or (fPos >= fEnd) then
begin
fPos := fEnd;
result := POSITION_EOF;
exit;
end;
if fCaseSensitive then
result := FindCaseSensitive else
result := FindCaseInsensitive;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindCaseSensitive: integer;
var
i: integer;
j: pchar;
begin
result:= POSITION_EOF;
while fPos < fEnd do
begin
i := Shift[fPos^]; //test last character first
if i <> then //last char does not match
inc(fPos,i)
else
begin //last char matches at least
i := ;
j := fPos - fPatLen;
while (i < fPatLen) and (fPattern[i] = (j+i)^) do inc(i);
if (i = fPatLen) then
begin
result:= fPos-fStart-fPatLen+;
inc(fPos,fPatLen);
break; //FOUND!
end
else
inc(fPos,JumpShift);
end;
end;
end;
//------------------------------------------------------------------------------ function TBaseSearch.FindCaseInsensitive: integer;
var
i: integer;
j: pchar;
begin
result:= POSITION_EOF;
while fPos < fEnd do
begin
i := Shift[CaseBlindTable[fPos^]]; //test last character first
if i <> then //last char does not match
inc(fPos,i)
else
begin //last char matches at least
i := ;
j := fPos - fPatLen;
while (i < fPatLen) and
(CaseBlindTable[fPattern[i]] = CaseBlindTable[(j+i)^]) do inc(i);
if (i = fPatLen) then
begin
result:= fPos-fStart-fPatLen+;
inc(fPos,fPatLen);
break; //FOUND!
end
else
inc(fPos,JumpShift);
end;
end;
end; //------------------------------------------------------------------------------
// TSearch methods ...
//------------------------------------------------------------------------------ procedure TSearch.SetData(Data: pchar; DataLength: integer);
begin
inherited; //changes visibility of base method from protected to public
end; //------------------------------------------------------------------------------
// TFileSearch methods ...
//------------------------------------------------------------------------------ destructor TFileSearch.Destroy;
begin
CloseFile;
inherited Destroy;
end;
//------------------------------------------------------------------------------ procedure TFileSearch.SetFilename(const Filename: string);
var
filehandle: integer;
filemappinghandle: thandle;
size, highsize: integer;
begin
if (csDesigning in ComponentState) then
begin
fFilename := Filename;
exit;
end;
CloseFile;
if (Filename = '') or not FileExists(Filename) then exit;
filehandle := sysutils.FileOpen(Filename, fmopenread or fmsharedenynone);
if filehandle = then exit; //error
size := GetFileSize(filehandle, @highsize);
if (size <= ) or (highsize <> ) then //nb: files > gig not supported
begin
CloseHandle(filehandle);
exit;
end;
filemappinghandle :=
CreateFileMapping(filehandle, nil, page_readonly, , , nil);
if GetLastError = error_already_exists then filemappinghandle := ;
if filemappinghandle <> then
SetData(MapViewOfFile(filemappinghandle,file_map_read,,,),size);
if fStart <> nil then fFilename := Filename;
CloseHandle(filemappinghandle);
CloseHandle(filehandle);
end;
//------------------------------------------------------------------------------ procedure TFileSearch.CloseFile;
begin
if (csDesigning in ComponentState) then exit;
if (fStart <> nil) then UnmapViewOfFile(fStart);
fFilename := '';
ClearData;
end;
//------------------------------------------------------------------------------ end.
TSearch & TFileSearch Version 2.2 -Boyer-Moore-Horspool search algorithm的更多相关文章
- Leetcode OJ : Implement strStr() [ Boyer–Moore string search algorithm ] python solution
class Solution { public: int strStr(char *haystack, char *needle) { , skip[]; char *str = haystack, ...
- Moore majority vote algorithm(摩尔投票算法)
Boyer-Moore majority vote algorithm(摩尔投票算法) 简介 Boyer-Moore majority vote algorithm(摩尔投票算法)是一种在线性时间O( ...
- Boyer–Moore (BM)字符串搜索算法
在计算机科学里,Boyer-Moore字符串搜索算法是一种非常高效的字符串搜索算法.它由Bob Boyer和J Strother Moore设计于1977年.此算法仅对搜索目标字符串(关键字)进行预处 ...
- Boyer Moore算法(字符串匹配)
上一篇文章,我介绍了KMP算法. 但是,它并不是效率最高的算法,实际采用并不多.各种文本编辑器的"查找"功能(Ctrl+F),大多采用Boyer-Moore算法. Boyer-Mo ...
- Boyer-Moore 字符串匹配算法
字符串匹配问题的形式定义: 文本(Text)是一个长度为 n 的数组 T[1..n]: 模式(Pattern)是一个长度为 m 且 m≤n 的数组 P[1..m]: T 和 P 中的元素都属于有限的字 ...
- grep之字符串搜索算法Boyer-Moore由浅入深(比KMP快3-5倍)
这篇长文历时近两天终于完成了,前两天帮网站翻译一篇文章“为什么GNU grep如此之快?”,里面提及到grep速度快的一个重要原因是使用了Boyer-Moore算法作为字符串搜索算法,兴趣之下就想了解 ...
- grep之字符串搜索算法Boyer-Moore由浅入深(比KMP快3-5倍)(转)
这篇长文历时近两天终于完成了,前两天帮网站翻译一篇文章“为什么GNU grep如此之快?”,里面提及到grep速度快的一个重要原因是使用了Boyer-Moore算法作为字符串搜索算法,兴趣之下就想了解 ...
- KMP算法简单回顾
前言 虽从事企业应用的设计与开发,闲暇之时,还是偶尔涉猎数学和算法的东西,本篇根据个人角度来写一点关于KMP串匹配的东西,一方面向伟人致敬,另一方面也是练练手,头脑风暴.我在自娱自乐,路过的朋友别太认 ...
- Erlang/Elixir精选-第5期(20200106)
The forgotten ideas in computer science-Joe Armestrong 在2020年的第一期里面,一起回顾2018年Joe的 The forgotten idea ...
随机推荐
- HOJ 1108
题目链接:HOJ-1108 题意为给定N和M,找出最小的K,使得K个N组成的数能被M整除.比如对于n=2,m=11,则k=2. 思路是抽屉原理,K个N组成的数modM的值最多只有M个. 具体看代码: ...
- eclipse maven jetty启动修改默认端口
如何修改eclipse中的maven项目jetty服务器的默认端口那?网上有很多办法,但配置上都没有效果,最后找到了简单.简洁的解决办法,就是在eclipse的jetty启动命令后面加上以下内容 je ...
- What I Learned as a Junior Developer Writing Tests for Legacy Code(转载)
I go to the gym and lift weights because I like the feeling of getting stronger and better. Two mont ...
- uWSGI+Nginx+Flask在Linux下的部署
搞了一天多,终于搞通了uWSGI的部署原理,下面总结一下遇到的一些坑,希望给读者能够少走弯路. 简单来说,uWSGI是一个web服务器,Nginx进行反向代理的其实跟这些服务器可以说没有 ...
- 当while read line 遇到 ssh
问题:while read line 中使用ssh只能读取一行? #!/bin/sh while read line do echo $line ssh root@$line "echo 1 ...
- 产生唯一的临时文件mkstemp()
INUX下建立临时的方法(函数)有很多, mktemp, tmpfile等等. 今天只推荐最安全最好用的一种: mkstemp. mkstemp (建立唯一临时文件)头文件: #include < ...
- Funny Car Racing(最短路变形)
描述 There is a funny car racing in a city with n junctions and m directed roads. The funny part is: e ...
- git - git命令中文显示乱码
使用git add添加要提交的文件的时候,如果文件名是中文,会显示形如897\232\350\256...的乱码,解决办法: git config --global core.quotepath ...
- Python 的十个自然语言处理工具
原文 先mark,后续尝试. 1.NLTK NLTK 在用 Python 处理自然语言的工具中处于领先的地位.它提供了 WordNet 这种方便处理词汇资源的借口,还有分类.分词.除茎.标注.语法分析 ...
- html禁止浏览器默认行为,让页面更像应用。
在html或body行内写入:oncontextmenu="return false" ondragstart='return false;' onselectstart=&quo ...