先贴上这两天刚出炉的C++代码。(利用 STL 偷了不少功夫,代码待优化)

Head.h

 #ifndef HEAD_H
#define HEAD_H #include "D:\\LiYangGuang\\VSPRO\\MYLSH\\HashTable.h" #include <iostream>
#include <fstream>
#include <time.h>
#include <cstdlib>
#include <vector>
#include <map>
#include <set>
#include <string> using namespace std; void loadData(bool (*data)[], int n, char *filename);
void createTable(HashTable HTSet[], bool data[][], bool extDat[][n][k] );
void insert(HT HTSet[], bool (*extDat)[n][k]);
void standHash(HT HTSet[]);
void search(vector<int>& record, bool query[], HT HTSet[]);
/*int getPosition(int V[], std::string s, int N);*/ #endif

HashTable.h

#include <string>
#include <vector> enum{ k = , l = , n = , M = n}; typedef struct
{
std::string key;
std::vector<int> elem; // element's index
} bucket; struct INT
{
bool used;
int val;
struct INT * next;
INT() : used(false), val(), next(NULL){}
}; typedef struct HashTable
{
int R[k]; // k random dimensions
int RNum[k]; // random numbers little than M
//string DC; // the contents of k dimensions
std::vector<bucket> BukSet;
INT Hash2[M];
} HT;

getPosition.h

#include <string>
inline int getPosition(int V[], std::string s, int N)
{
int position = 0;
for(int col = 0; col < k; ++col)
{
position += V[col] * (s[col] - '0');
position %= M;
}
return position;
}

computeDistance.h

inline int distance(bool v1[], bool v2[], int N)
{
int d = 0;
for(int i = 0; i < N; ++i)
d += v1[i] ^ v2[i]; return d; }

main.cpp

#include "Head.h"
#include "D:\\LiYangGuang\\VSPRO\\MYLSH\\computeDistance.h"
using namespace std;
// length of sub hashtable, as well the number of elements.
const int MAX_Q = 1000; HT HTSet[l]; bool data[n][128];
bool extDat[l][n][k]; bool query[MAX_Q][128]; // set the query item to 1000. int main(int argc, char *argv)
{
/************************************************************************/
/* Firstly, create the HashTables */
/************************************************************************/
char *filename = "D:\\LiYangGuang\\VSPRO\\MYLSH\\data.txt";
loadData(data, n, filename);
createTable(HTSet, data, extDat);
insert(HTSet,extDat);
standHash(HTSet); /************************************************************************/
/* Secondly, start the LSH search */
/************************************************************************/ char *queryFile = "D:\\LiYangGuang\\VSPRO\\MYLSH\\query.txt";
loadData(query, MAX_Q, queryFile);
clock_t time0 = clock();
for(int qId = 0; qId < MAX_Q; ++qId)
{
vector<int> record;
clock_t timeA = clock();
search(record, query[qId], HTSet);
set<int> Dis;
for(size_t i = 0; i < record.size(); ++i)
Dis.insert(distance(data[record[i]], query[qId]));
clock_t timeB = clock();
cout << "第 " << qId + 1 << " 次查询时间:" << timeB - timeA << endl;
}
clock_t time1 = clock();
cout << "总查询时间:" << time1 - time0 << endl; return 0; }

loadData.cpp

#include <string>
#include <fstream> void loadData(bool (*data)[128], int n, char* filename)
{
std::ifstream ifs;
ifs.open(filename, std::ios::in);
for(int row = 0; row < n; ++row)
{
std::string line;
getline(ifs, line);
for(int col = 0; col < 128; ++col)
data[row][col] = (line[col] - '0') & 1;
/* std::cout << row << std::endl;*/ }
ifs.close();
}

creatTable.cpp

#include "HashTable.h"
#include <ctime> void createTable(HT HTSet[], bool data[][128], bool extDat[][n][k] )
{
srand((unsigned)time(NULL));
for(int tableNum = 0; tableNum < l; ++tableNum)
{ /* creat the ith Table;*/ for(int randNum = 0; randNum < k; ++randNum)
{
HTSet[tableNum].R[randNum] = rand() % 128;
HTSet[tableNum].RNum[randNum] = rand() % M; for(int item = 0; item < n; ++item)
{
extDat[tableNum][item][randNum] =
data[item][HTSet[tableNum].R[randNum]];
}
}
}
}

insertData.cpp

#include "HashTable.h"
#include <iostream>
#include <map>
using namespace std; map<string, int> deRepeat;
bool equal(bool V[], bool V2[], int n)
{
int i = 0;
while(i < n)
{
if(V[i] != V2[i])
return false;
}
return true;
} string itoa(bool *v, int n, string s)
{
for(int i = 0; i < n; ++i)
s.push_back(v[i]+'0');
return s;
} void insert(HT HTSet[], bool (*extDat)[n][k])
{
for(int t = 0; t < l; ++ t) /* t: table */
{
int bktNum = 0;
bucket bkt;
bkt.key = string(itoa(extDat[t][0], k, string("")));
bkt.elem.push_back(0);
HTSet[t].BukSet.push_back(bkt);
deRepeat.insert(make_pair(bkt.key, bktNum++)); // 0 为 bucket 的位置
for(int item = 1; item < n; ++item)
{
cout << item << endl;
string key = itoa(extDat[t][item], k, string(""));
//map<string, int>::iterator it = deRepeat.find(key);
if(deRepeat.find(key) != deRepeat.end())
{
HTSet[t].BukSet[deRepeat.find(key)->second].elem.push_back(item);
cout << "exist" << endl;
}
else{
bucket bkt2;
bkt2.key = key;
bkt2.elem.push_back(item);
HTSet[t].BukSet.push_back(bkt2);
deRepeat.insert(make_pair(bkt2.key, bktNum++));
cout << "creat" << endl;
}
}
deRepeat.clear();
}
}

standHash.cpp

#include "HashTable.h"
#include <iostream>
#include "getPosition.h" void standHash(HT HTSet[])
{
for(int t = 0; t < l; ++t)
{
int BktLen = HTSet[t].BukSet.size();
for(int b = 0; b < BktLen; ++b)
{
int position = getPosition(HTSet[t].RNum, HTSet[t].BukSet[b].key, k);
INT *pIn = &HTSet[t].Hash2[position];
while(pIn->used && pIn->next != NULL)
pIn = pIn->next;
if(pIn->used){
pIn->next = new INT;
pIn->next->val = b;
pIn->next->used = true;
}else{
pIn->val = b;
pIn->used = true;
}
}
std::cout << "the " << t << "th HashTable has been finished." << std::endl;
}
}

search.cpp

#include "HashTable.h"
#include "getPosition.h"
#include <vector>
using namespace std; void search(vector<int>& record, bool query[128], HT HTSet[])
{
for(int t = 0; t < l; ++t)
{
string temKey;
int temPos = 0;
for(int c = 0; c < k; ++c)
temKey.push_back(query[HTSet[t].R[c]] + '0');
temPos = getPosition(HTSet[t].RNum, temKey, k);
vector<int> bktId;
INT *p = &HTSet[t].Hash2[temPos];
while(p != NULL && p->used)
{
bktId.push_back(p->val);
p = p->next;
}
for(size_t i = 0; i < bktId.size(); ++i)
{
bucket temB = HTSet[t].BukSet[bktId[i]];
if(temKey == temB.key)
{
for(size_t j = 0; j < temB.elem.size(); ++j)
record.push_back(temB.elem[j]);
}
}
}
}

稍后总结。

代码调整:

main.cpp

#include "Head.h"
#include "D:\\LiYangGuang\\VSPRO\\MYLSH\\MYLSH\\computeDistance.h"
using namespace std;
#pragma warning(disable: 4996)
// length of sub hashtable, as well the number of elements.
const int MAX_Q = 1000; HT HTSet[l]; bool data[n][128];
bool extDat[l][n][k]; bool query[MAX_Q][128]; // set the query item to 1000. void getFileName(int v, char *FileName)
{
itoa(v, FileName, 10);
strcat(FileName, ".txt");
} int main(int argc, char *argv)
{
/************************************************************************/
/* Firstly, create the HashTables */
/************************************************************************/
char *filename = "D:\\LiYangGuang\\VSPRO\\MYLSH\\data.txt";
loadData(data, n, filename);
createTable(HTSet, data, extDat);
insert(HTSet,extDat);
standHash(HTSet); char *queryFile = "D:\\LiYangGuang\\VSPRO\\MYLSH\\query.txt";
loadData(query, MAX_Q, queryFile);
/************************************************************************/
/* Secondly, start the linear Search */
// /************************************************************************/
//
// vector<RECORD> record2;
// clock_t LineTime1 = clock();
// for(int qId = 0; qId < MAX_Q; ++qId)
// {
// for(int i = 0; i < n; ++i)
// {
// RECORD tem;
// tem.Id = i;
// tem.Dis = distance(data[i], query[qId]);
// record2.push_back(tem);
// }
// record2.clear();
// }
// clock_t LineTime2 = clock();
// float LineTime = (float)(LineTime2 - LineTime1) / CLOCKS_PER_SEC;
// cout << "全部线性查询时间:" << LineTime << " s," << " 合"
// << LineTime / 60 << " minutes."<< endl;
//
// /************************************************************************/
// /* Thirdly, start the LSH search */
// /************************************************************************/
//
// clock_t time0 = clock();
// ofstream ofs;
// char outFileName[10] = { '\0'};
// int K = 1; /// define KNN
// getFileName(K, outFileName);
// ofs.out(outFileName);
//
// for(int qId = 0; qId < MAX_Q; ++qId)
// {
// vector<RECORD> record;
// clock_t timeA = clock();
// search(record, query[qId], HTSet, data);
// if(getkNN(record,K))
// clock_t timeB = clock();
// record.clear();
// cout << "第 " << qId + 1 << " 次查询时间:" <<
// (float)(timeB - timeA) / CLOCKS_PER_SEC << " s" << endl;
// }
// clock_t time1 = clock();
// cout << "总查询时间:" << (float)(time1 - time0) / CLOCKS_PER_SEC
// << " s." << endl;
/************************************************************************/
/* */
/************************************************************************/
ofstream ofs;
char outFileName[10] = { '\0'};
int K = 1; /// define KNN
getFileName(K, outFileName);
ofs.open(outFileName, ios::out);
//ofs.precision(3);
float TotalLinearTime, TotalLSHTime;
TotalLinearTime = TotalLSHTime = 0; float TotalError = 0;
int TotalMiss = 0; vector<RECORD> record2;
for(int qId = 0; qId < MAX_Q; ++qId)
{
cout << "第 " << qId << " 次查询" << endl;
clock_t LineTime1 = clock();
for(int i = 0; i < n; ++i)
{
RECORD tem;
tem.Id = i;
tem.Dis = computeDistance(data[i], query[qId], 128);
record2.push_back(tem);
}
getkNN(record2); // 利用其对距离排序
clock_t LineTime2 = clock();
float LineTime = (float)(LineTime2 - LineTime1) / CLOCKS_PER_SEC;
TotalLinearTime += LineTime; /************************************************************************/
/* Thirdly, start the LSH search */
/************************************************************************/ vector<RECORD> record;
clock_t timeA = clock();
search(record, query[qId], HTSet, data);
if(!getkNN(record, K))
{
float queryTime = (float)(clock() - timeA) / CLOCKS_PER_SEC;
TotalLSHTime += queryTime;
ofs << "Miss\t" << "LSH Time: " << queryTime
<< "s\tLinear time: " << LineTime << 's' << endl;
TotalMiss += 1;
}
else{
float queryTime = (float)(clock() - timeA) / CLOCKS_PER_SEC;
TotalLSHTime += queryTime;
float error = 0;
if(record[K-1].Dis == 0)
error = 1;
else
error = (float)record2[K-1].Dis / record[K-1].Dis;
ofs << "Error: " << error << "\tLSH Time: "
<< queryTime << "s\tLinear time: " << LineTime << 's' << endl;
TotalError += error; }
record.clear();
record2.clear();
}
ofs << "Average errror: " << TotalError / 817 << endl;//recitfy
ofs << "Miss ratio: " << TotalMiss / MAX_Q << endl;
ofs << "Total query time: " << "LSH, " << TotalLSHTime / 3600 << " h; "
<< "Linear, " << TotalLinearTime / 3600 << " h." << endl;
ofs.close(); return 0; }

computeDistance.h

inline int computeDistance(bool v1[], bool v2[], int N)
{
int d = 0;
for(int i = 0; i < N; ++i)
d += v1[i] ^ v2[i]; return d; }

Search.cpp

#include "HashTable.h"
#include "getPosition.h"
#include "computeDistance.h"
#include <vector>
using namespace std; /*** 加入 data 项是为了计算距离 ***/
void search(vector<RECORD>& record, bool query[128], HT HTSet[], bool data[][128])
{
for(int t = 0; t < l; ++t)
{
string temKey;
int temPos = 0;
for(int c = 0; c < k; ++c)
temKey.push_back(query[HTSet[t].R[c]] + '0');
temPos = getPosition(HTSet[t].RNum, temKey, k);
vector<int> bktId;
INT *p = &HTSet[t].Hash2[temPos];
while(p != NULL && p->used)
{
bktId.push_back(p->val);
p = p->next;
}
for(size_t i = 0; i < bktId.size(); ++i)
{
bucket temB = HTSet[t].BukSet[bktId[i]];
if(temKey == temB.key)
{
for(size_t j = 0; j < temB.elem.size(); ++j)
{
RECORD temp;
temp.Id = temB.elem[j];
temp.Dis = computeDistance(data[temp.Id], query, 128);
record.push_back(temp);
} }
}
}
}

相关截图:

实习日记:图像检索算法 LSH 的总结与分析的更多相关文章

  1. 实习日记:图像检索算法 LSH 的总结与分析(matlab)

    最开始仿真和精度测试,基于 matlab 完成的. Demo_MakeTable.m (生成 Hash 表) %======================================== %** ...

  2. OpenCV学习笔记(27)KAZE 算法原理与源码分析(一)非线性扩散滤波

    http://blog.csdn.net/chenyusiyuan/article/details/8710462 OpenCV学习笔记(27)KAZE 算法原理与源码分析(一)非线性扩散滤波 201 ...

  3. 第2章 rsync算法原理和工作流程分析

    本文通过示例详细分析rsync算法原理和rsync的工作流程,是对rsync官方技术报告和官方推荐文章的解释. 以下是本文的姊妹篇: 1.rsync(一):基本命令和用法 2.rsync(二):ino ...

  4. rsync算法原理和工作流程分析

    本文通过示例详细分析rsync算法原理和rsync的工作流程,是对rsync官方技术报告和官方推荐文章的解释.本文不会介绍如何使用rsync命令(见rsync基本用法),而是详细解释它如何实现高效的增 ...

  5. Python实现的选择排序算法原理与用法实例分析

    Python实现的选择排序算法原理与用法实例分析 这篇文章主要介绍了Python实现的选择排序算法,简单描述了选择排序的原理,并结合实例形式分析了Python实现与应用选择排序的具体操作技巧,需要的朋 ...

  6. Bag of Features (BOF)图像检索算法

    1.首先.我们用surf算法生成图像库中每幅图的特征点及描写叙述符. 2.再用k-means算法对图像库中的特征点进行训练,生成类心. 3.生成每幅图像的BOF.详细方法为:推断图像的每一个特征点与哪 ...

  7. TW实习日记:前三天

    今天是2018年7月20号,周五.从周一开始实习到现在,终于想起来要写日记这种东西了,可以记录一下自己这一天所学所做所知也是蛮不错的.先简单总结一下自己的大学生活吧,算是多姿多彩,体验了很多东西.在大 ...

  8. TW实习日记:第31-32天

    不知不觉的,实习的净工作天数,已经都超过一个月了.因为对工作内容不是很满意,所以打算月底离职,也不知道是公司太缺人还是我真的能干活,领导竟然三番两次找我让我再考虑...明天又要找我了,哎...随机应变 ...

  9. $2015 武汉森果公司web后端开发实习日记----书写是为了更好的思考

    找暑期实习,3月份分别投了百度和腾讯的实习简历,都止步于笔试,总结的主要原因有两点:基础知识不扎实,缺乏项目经验.后来到拉勾网等网站上寻找实习,看了很多家,都还是处于观望状态.后来参加了武汉实习吧在大 ...

随机推荐

  1. 实现手机扫描二维码页面登录,类似web微信-第三篇,手机客户端

    转自:http://www.cnblogs.com/fengyun99/p/3541254.html 上一篇,介绍了二维码生成的机制,紧接着,我们就要开发手机客户端来识别这个二维码. 二维码,实际上是 ...

  2. 记录一些容易忘记的属性 -- UITabBarController

    UIViewController中的  @property(nonatomic,copy) NSString *title;  // Localized title for use by a pare ...

  3. Android webservice的用法详细讲解

    Android webservice的用法详细讲解 看到有很多朋友对WebService还不是很了解,在此就详细的讲讲WebService,争取说得明白吧.此文章采用的项目是我毕业设计的webserv ...

  4. tinymce 编辑器 上传图片

    tinymce编辑器进行本地图片上传 首先下载tinymce.js之后 在form中添加一个<textarea>元素 给其一个id和name 然后就可以初始化编辑器了 tinymce.in ...

  5. windows核心编程---第七章 用户模式下的线程同步

    用户模式下的线程同步 系统中的线程必须访问系统资源,如堆.串口.文件.窗口以及其他资源.如果一个线程独占了对某个资源的访问,其他线程就无法完成工作.我们也必须限制线程在任何时刻都能访问任何资源.比如在 ...

  6. OD调试篇12

    Delphi的逆向 先看看今天需要破解的程序. 打开程序先出现了一个nag窗口,然后是unregistered未注册的提示,以及关于里的需要注册. 拖进die看了看      就是delphi写的.那 ...

  7. C#中的委托

    public delegate void SayHello(string name); class Program { static void Main(string[] args) { SayHel ...

  8. colormap

    http://cn.mathworks.com/help/matlab/ref/colormap.html

  9. Node.js高级编程读书笔记 - 1 基本概念

    Outline 1 概述和安装 1.1 安装Node 1.2 Node简介 2 Node核心API基础 2.1 加载模块 2.2 应用缓冲区处理.编码和解码二进制数据 2.3 使用时间发射器模式简化事 ...

  10. Type Project has no default.properties file! Edit the project properties to set one.

    Description Resource Path Location Type Project has no default.properties file! Edit the project pro ...