数据挖掘 FP-tree算法C++实现及源码

FP-growth挖掘算法

步骤一

扫描数据库，扫描数据库一次，得到频繁1-项集，把项按支持度递减排序，再一次扫描数据库，建立FP-tree

步骤二

对每个项，生成它的条件模式库

步骤三

用条件模式库构造对应的条件FP-tree，递归构造条件 FP-trees 同时增长其包含的频繁集，如果条件FP-tree直包含一个路径，则直接生成所包含的频繁集

C++源码

 #include<bits/stdc++.h>

 #include<string>

 #include<algorithm>

 #include<vector>

 #include<map>

 #include<sstream>

 #define suport 0.001//最小支持度

 using namespace std;

  struct FPtreeNode{//孩子兄弟链表法存储FP-tree

     int  data;

     int count;

     FPtreeNode *father;

     FPtreeNode *children;

     FPtreeNode *brother;

     FPtreeNode *next;

     FPtreeNode(){

         data=;

         count=;

         father=NULL;

         children=NULL;

         brother=NULL;

         next=NULL;

     }

 };

 struct resultNode{//结果

     string data;

     int count;

     resultNode(){

         count=;

     }

 };

 struct listNode{//头表

     int data;

     int count;

     FPtreeNode *next;

 };

 struct fptreeNode{//递归过程中的子FP-tree

     int data;

     int count;

     fptreeNode(){

         data=;

         count=;

     }

 };

 int line=;

 vector<string> Getfile(){

     vector<string> file;

         ifstream ifile("D://retail.txt");

     if(!ifile){

         cout<<"open file error"<<endl;

     }

     else{

         string temp;

         while(getline(ifile,temp)){

             line++;

             file.insert(file.end(),temp);

         }

     }

     ifile.close();

     return file;

 }

 bool cmp( listNode &a,listNode &b){

     return a.count>b.count;

 }    

 vector<listNode> Getheadlist(vector<string> &file){

         vector<listNode>L1;

         map<string,int>r1;

         string temp;

             string lk;

         for(int f=;f<file.size();f++){//第一次扫描数据库

             temp=file[f];

             int i;

             for( i=;i<temp.size();i++){

                 while(temp[i]!=' '&&temp[i]!='\n'&&i!=temp.size()){

                 lk+=temp[i];

                     i++;

                 }

                 if(r1.find(lk)!=r1.end())

                 r1[lk]++;

                 else

                 r1[lk]=;

                 lk.clear();//

             }

         }

         temp.clear();

         map<string,int>::iterator it;

         for(it=r1.begin();it!=r1.end();it++){//待删除 

             if(it->second>=ceil(suport*line)){

             string s=it->first;

             int x=atoi(s.c_str());//转换成整数

             listNode xx;

             xx.data=x;

             xx.count=it->second;

             xx.next=NULL;

             L1.insert(L1.end(),xx);

             }

         }

         sort(L1.begin(),L1.end(),cmp);

         return L1;

 }

 bool Isin(string temp,string str){

     int j;

     string s;

      for( j=;j<temp.size();j++){//遍历temp

                          while(temp[j]!=' '&&j<temp.size()&&temp[j]!='\n'){

                              s.insert(s.end(),temp[j]);

                              j++;

                          }

                         if(s==str)

                         break;

                         s.clear();

                      }

     if(j>=temp.size())

     return ;

     else

     return ;                 

 }

 vector<vector<int> > Get_FPfile(vector<string> &file,vector<listNode> &L1){//第二次扫描数据库 删除非频繁一项

     string temp;

     vector<vector<int> > rfile;

     vector<int >ri;

     for(int f=;f<file.size();f++){

         temp=file[f];

             for(int k=;k<L1.size();k++){

                 string s;

                 int n=L1[k].data;

                 stringstream ss;

                 ss<<n;

                 ss>>s;

                 if(Isin(temp,s)){

                     ri.push_back(n);

                 }

             }

             if(ri.size()>){

             rfile.push_back(ri);

             ri.clear();

             }

             temp.clear();

         }

         file.clear();

         return rfile;

 }

 int c=;

 void Linknext(FPtreeNode *newNode,vector<listNode> &L1){

                     for(int m=;m<L1.size();m++){

                     if(L1[m].data==newNode->data){

                             if(L1[m].next==NULL){

                             L1[m].next=newNode;

                             }

                             else{

                             FPtreeNode *t1=L1[m].next;

                             FPtreeNode *t2=L1[m].next;

                             while(t1){

                             t2=t1;

                             t1=t1->next;

                             }

                             t2->next=newNode;

                             }

                             break;

                         }

                     } 

 }

 FPtreeNode*  Buildtree(    vector<vector<int> > &rfile,vector<listNode> &L1){

     FPtreeNode *head=new FPtreeNode;

     FPtreeNode *p=head;

     FPtreeNode *q=head;

     int flag=;

     for(int i=;i<rfile.size();i++){

                 p=head;

                 int j=;

         while(j<rfile[i].size()){

             flag=;

             if(i==){//第一条

                 FPtreeNode *newNode=new FPtreeNode;

                 c++;

                 newNode->count=;

                 newNode->data=rfile[i][j];

                 newNode->father=p;

                 p->children=newNode;

                 p=p->children;

                 j++;

                 for(int m=;m<L1.size();m++){

                     if(L1[m].data==newNode->data){

                         L1[m].next=newNode;

                         break;

                     }

                 }

             }

             else{

                     p=p->children;

                 while(p&&j<rfile[i].size()){

                     if(p->data==rfile[i][j]){

                     p->count++;

                     q=p;//q->chilren=p;

                     p=p->children;

                     j++;

                     flag=;

                     }

                     else{//

                     q=p;//q->brother=p;

                     p=p->brother;

                     flag=;

                     }

             }

                 if(flag==){

                 while(j<rfile[i].size()){

                     FPtreeNode *newNode=new FPtreeNode;

                     c++;

                     newNode->count=;

                     newNode->father=q;

                     q->children=newNode;

                     newNode->data=rfile[i][j];

                     q=q->children;

                     j++;

                         //Linknext();

                     Linknext(newNode,L1);

                 }

             }

             else  if(flag==){

                 FPtreeNode *newNode=new FPtreeNode;c++;

                 newNode->count=;

                 newNode->data=rfile[i][j];

                 q->brother=newNode;

                 newNode->father=q->father;

                 q=q->brother;

                 j++;

                 Linknext(newNode,L1);

                 while(j<rfile[i].size()){

                     FPtreeNode *newNode=new FPtreeNode;

                     c++;

                     newNode->count=;

                     newNode->father=q;

                     q->children=newNode;

                     newNode->data=rfile[i][j];

                     q=q->children;

                     j++;

                             //Linknext();

                     Linknext(newNode,L1);

                 }

                 }

             }

         }

     }

     return head;

 }

 vector<string> GetFrequentItems(listNode &lk,FPtreeNode* &head){//生成条件模式基 rfile

     FPtreeNode *p=lk.next;

     vector<string> rfile;

     while(p){

         FPtreeNode* q=p;

         vector<string> temp;

         while(q->father!=head){

             q=q->father;

             stringstream ss;

             string x;

             int n;

             n=q->data;

             ss<<n;

             ss>>x;

             temp.push_back(x+" ");

         }

         reverse(temp.begin(),temp.end());

         string s;

         for(int j=;j<temp.size();j++){

                 s+=temp[j];

             }

         for(int i=;i<p->count;i++){

             if(s.size()>)

             rfile.push_back(s);

         }

         s.clear();

         p=p->next;

     }

     return rfile;

 }

 vector<resultNode> result;

 void Getresult(vector<listNode> &headlist,FPtreeNode* &head,string &base,vector<resultNode> &result){

     if(headlist.empty()){

         return;

     }

     for(auto p = headlist.rbegin(); p != headlist.rend(); p++){

         resultNode temp;

         int n;

         n=p->data;//int to string

         stringstream ss;

         string x;

         ss<<n;

         ss>>x;

         string xx=base+" "+x;

         temp.data=xx;

         temp.count=p->count;

         result.push_back(temp);

         /*****递归******/

         //产生条件模式基

         vector<string> file1=GetFrequentItems(*p,head);

         vector<listNode>headlist1= Getheadlist(file1);//getL1

         vector<vector<int> > rfile1=Get_FPfile(file1,headlist1);

         //建树

         FPtreeNode* Tree=Buildtree(rfile1,headlist1);

         string s=base+" "+x;

         //产生结果

         Getresult(headlist1,Tree,s,result);

     }

 }

 void Print(){

     for (auto p =result.cbegin(); p != result.cend(); p++)

     {

         cout << p->data << " "<<"("<<p->count<<")"<< endl;

     }

 }

 int main(){

     vector<string> file=Getfile();

     vector<listNode> headlist=Getheadlist(file);

     vector<vector<int> >rfile=Get_FPfile(file,headlist);

     FPtreeNode* head=Buildtree(rfile,headlist);

     string base=" ";

     Getresult(headlist,head,base,result);

     Print();

     cout<<result.size();

     return ;

 }

数据挖掘 FP-tree算法C++实现及源码的更多相关文章

FP Tree算法原理总结
在Apriori算法原理总结中,我们对Apriori算法的原理做了总结.作为一个挖掘频繁项集的算法,Apriori算法需要多次扫描数据,I/O是很大的瓶颈.为了解决这个问题,FP Tree算法(也称F ...
FP Tree算法原理总结（转载）
FP Tree算法原理总结在Apriori算法原理总结中,我们对Apriori算法的原理做了总结.作为一个挖掘频繁项集的算法,Apriori算法需要多次扫描数据,I/O是很大的瓶颈.为了解决这个问题 ...
数据挖掘：关联规则的apriori算法在weka的源码分析
相对于机器学习,关联规则的apriori算法更偏向于数据挖掘. 1) 测试文档中调用weka的关联规则apriori算法,如下 try { File file = new File("F:\ ...
中国象棋程序的设计与实现(六)--N皇后问题的算法设计与实现(源码+注释+截图)
八皇后问题,是一个古老而著名的问题,是回溯算法的典型例题. 该问题是十九世纪著名的数学家高斯1850年提出:在8X8格的国际象棋上摆放八个皇后,使其不能互相攻击,即任意两个皇后都不能处于同一行.同一列 ...
常用限流算法与Guava RateLimiter源码解析
在分布式系统中,应对高并发访问时,缓存.限流.降级是保护系统正常运行的常用方法.当请求量突发暴涨时,如果不加以限制访问,则可能导致整个系统崩溃,服务不可用.同时有一些业务场景,比如短信验证码,或者其它 ...
最快速的“高斯”模糊算法（附Android源码）
这是一个外国人的算法,本人是搬运工.参考:http://blog.ivank.net/fastest-gaussian-blur.html 1:高斯模糊算法(参考:http://www.rua ...
A*算法（附c源码）
关于A*算法网上介绍的有很多,我只是看了之后对这个算法用c写了一下,并测试无误后上传以分享一下,欢迎指正!下面是我找的一个介绍,并主要根据这个实现的. 寻路算法不止 A* 这一种, 还有递归, 非递归 ...
SIFT算法的教程及源码
1.ubc:DAVID LOWE---SIFT算法的创始人,两篇巨经典经典的文章http://www.cs.ubc.ca/~lowe/[1] 2.cmu:YanKe---PCASIFT,总结的SIFT ...
常见算法合集[java源码+持续更新中...]
一.引子本文搜集从各种资源上搜集高频面试算法,慢慢填充...每个算法都亲测可运行,原理有注释.Talk is cheap,show me the code! 走你~ 二.常见算法 2.1 判断单向链 ...

随机推荐

(18)C++ string和标准模板库
一.stringl类 1.string构造函数 string a1("abc");//初始化字符串 cout<<a1<<endl;//abc , '#'); ...
2019牛客多校第⑨场H Cutting Bamboos(主席树+二分)
原题:https://ac.nowcoder.com/acm/contest/889/H 题意: 给你一些竹子,q个询问,问你从第l到第r个竹子,如果你要用y次砍完它,并且每次砍下来的长度是相同的,问 ...
转 Jmeter参数化--Post请求的Post body 参数化
2018年01月22日 15:40:58 java2013liu 阅读数:2361收起个人分类: Jemter 一.使用body data设置参数: 1,首先,使用Fiddler录制post请求 ...
c++计算1到100以内的质数
自考c++实践的时候,有个求计算1-100的质数的问题,没搞出来由于考试使用的是Dev-C++开发工具,为了下次考试做准备,改用该工具,直接下载安装即可,不会涉及到什么破解等下载地址:https: ...
sass揭秘之@mixin，%，@function scss基本使用及操作函数
sass揭秘之@mixin,%,@function: 地址:https://www.w3cplus.com/preprocessor/sass-mixins-function-placeholder. ...
JavaScript去除数组中重复的数字
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title> ...
Docker 容器使用
Docker 客户端 docker 客户端非常简单 ,我们可以直接输入 docker 命令来查看到 Docker 客户端的所有命令选项. runoob@runoob:~# docker :~# doc ...
react 16.3+ 新生命周期
react 16.3版本出现了两个新的生命周期函数,并将逐渐废弃componentWillMount().componentWillReceiveProps().componentWillUpdate ...
【记录】API Gateway作用？与过滤器的区别？Nginx与Zuul区别？
网关(gateway)的作用: 网关可以拦截客户端所有请求,对该请求进行权限控制.负载均衡.日志管理.接口调用监控等过滤器与网关的区别是什么? 过滤器是拦截单个tomcat服务器请求. 网关是拦截整 ...
安装 sysbench的报错 /usr/bin/ld: cannot find -lmysqlclient_r 解决办法
首先你需要找到这个库的位置一般找的话需要将lib 给加上(注意:我这里是 -lmysqlclient_r 的报错,于是我找就找 libmysqlclient_r ) find / -name lib ...

数据挖掘 FP-tree算法C++实现及源码

数据挖掘 FP-tree算法C++实现及源码的更多相关文章

随机推荐

热门专题