Apriori算法-java

package com.yang;

import java.util.*;

public class Apriori {

private double minsup = 0.2;// 最小支持度
private double minconf = 0.2;// 最小置信度

// 注意使用IdentityHashMap，否则由于关联规则产生存在键值相同的会出现覆盖
private IdentityHashMap ruleMap = new IdentityHashMap();

//private String[] transSet = { "abc", "abc", "acde", "bcdf", "abcd", "abcdf" };// 事务集合
                                                                                   // ，
                                                                                   // 可以根据需要从构造函数里传入
   private String[] transSet = { "abe", "bd", "bc", "abd", "ac", "bc","ac","abce","abc" };// 事务集合
   private int itemCounts = 0;// 候选1项目集大小,即字母的个数
   private TreeSet[] frequencySet = new TreeSet[40];// 频繁项集数组，[0]:代表1频繁集...，TreeSet（）使用元素的自然顺序对元素进行排序
   private TreeSet maxFrequency = new TreeSet();// 最大频繁集[所有频繁的]
   private TreeSet candidate = new TreeSet();
   private TreeSet candidateSet[] = new TreeSet[40];// 候选集数组[0]:代表1候选集
   private int frequencyIndex;

public Apriori() {

maxFrequency = new TreeSet();
       itemCounts = counts();// 初始化1候选集的大小6个
       System.out.printf("1项集的大小"+itemCounts);
       // 初始化其他两个
       for (int i = 0; i < itemCounts; i++) {
           frequencySet[i] = new TreeSet();//初始化频繁项集数组
           candidateSet[i] = new TreeSet();//初始化候选集数组
       }
       candidateSet[0] = candidate;// 1候选集
   }

//主函数入口
   public static void main(String[] args) {
       Apriori ap = new Apriori();
       ap.run();
   }

   //方法运行
   public void run() {
       int k = 1;
       item1_gen();

do {
           k++;
           canditate_gen(k);
           frequent_gen(k);
       } while (!is_frequent_empty(k));
       frequencyIndex = k - 1;
       print_canditate();
       maxfrequent_gen();
       print_maxfrequent();
       ruleGen();
       rulePrint();
   }
   //记录每个事务中的元素出现次数，x在事务中出现的总次数。
   public double count_sup(String x) {
       int temp = 0;
       for (int i = 0; i < transSet.length; i++) {
           for (int j = 0; j < x.length(); j++) {
               if (transSet[i].indexOf(x.charAt(j)) == -1)//返回指定字符在此字符串中第一次出现处的索引，如果不作为一个字符串，返回-1
                   break;
               else if (j == (x.length() - 1))
                   temp++;
           }
       }
       return temp;
   }

   //统计1候选集的个数a,b,c,d,e,f,return值为6
   public int counts() {

String temp1 = null;
       char temp2 = 'a';
       // 遍历所有事务集String 加入集合，set自动去重了
       for (int i = 0; i < transSet.length; i++) {
           temp1 = transSet[i];
           for (int j = 0; j < temp1.length(); j++) {
               temp2 = temp1.charAt(j);//返回位置为j的temp1的值a
               candidate.add(String.valueOf(temp2));//treeSet添加会去掉重复的值
           }
       }
       return candidate.size();//中元素个数不重复，且递增排序
   }

//求1频繁集
   public void item1_gen() {
       String temp1 = "";
       double m = 0;

Iterator temp = candidateSet[0].iterator();//使用方法iterator()要求容器返回一个Iterator。
       while (temp.hasNext()) {//遍历temp（1候选集）
           temp1 = (String) temp.next();
           m = count_sup(temp1);//调用下面的方法，统计1候选集中每个元素个数，计算支持度时，用此m/transSet.length

// 符合条件的加入 1候选集
           if (m >= minsup * transSet.length) {//minsup * transSet.length的值为记录每个事务中的元素出现次数,判断是否1频繁集
               frequencySet[0].add(temp1);//1频繁集加入频繁项集数组，自动出去重复的集合
           }
       }
   }
   //求K候选集
   public void canditate_gen(int k) {
       String y = "", z = "", m = "";
       char c1 ,c2 ;

Iterator temp1 = frequencySet[k - 2].iterator();//iterator迭代器，用于数组遍历
Iterator temp2 = frequencySet[0].iterator();//遍历频繁项集数组，[0]:代表1频繁集
TreeSet h = new TreeSet();

while (temp1.hasNext()) {
y = (String) temp1.next();//
c1 = y.charAt(y.length() - 1);//返回指定y.length() - 1（数组的最后一个）的char值

while (temp2.hasNext()) {
z = (String) temp2.next();

c2 = z.charAt(0);//c2=a,b,c,d,e,f
               if (c1 >= c2)
                   continue;//大于最后一个字符才拼上。abd，而无adb
               else {
                   m = y + z;//m为字符串组合yz
                   h.add(m);//m加入TreeSet
               }
           }
           temp2 = frequencySet[0].iterator();
       }
       candidateSet[k - 1] = h;
   }

// k候选集=>k频繁集
public void frequent_gen(int k) {
String s1 = "";

Iterator ix = candidateSet[k - 1].iterator();//遍历K候选集ix
       while (ix.hasNext()) {
           s1 = (String) ix.next();//ix中的值s1
           if (count_sup(s1) >= (minsup * transSet.length)) {//s1项集支持度大于最小支持度
               frequencySet[k - 1].add(s1);//s1加入K频繁集中
           }
       }
   }
    //判断频繁集为空
   public boolean is_frequent_empty(int k) {
       if (frequencySet[k - 1].isEmpty())
           return true;
       else
           return false;
   }
   //打印候选集频繁集
   public void print_canditate() {

for (int i = 0; i < frequencySet[0].size(); i++) {
           Iterator ix = candidateSet[i].iterator();
           Iterator iy = frequencySet[i].iterator();
           System.out.print("候选集" + (i + 1) + ":");
           while (ix.hasNext()) {
               System.out.print((String) ix.next() + "\t");
           }
           System.out.print("\n" + "频繁集" + (i + 1) + ":");
           while (iy.hasNext()) {
               System.out.print((String) iy.next() + "\t");
           }
           System.out.println();
       }
   }

//求关联项集合
   public void maxfrequent_gen() {
       int i;
       for (i = 1; i < frequencyIndex; i++) {
           maxFrequency.addAll(frequencySet[i]);
       }
   }
    //打印频繁项集
   public void print_maxfrequent() {
       Iterator iterator = maxFrequency.iterator();
       System.out.print("频繁项集：");
       while (iterator.hasNext()) {
           System.out.print(((String) iterator.next()) + "\t");
       }
       System.out.println();
       System.out.println();
   }
   //关联规则项集
   public void ruleGen() {
       String s;
       Iterator iterator = maxFrequency.iterator();
       while (iterator.hasNext()) {
           s = (String) iterator.next();
           subGen(s);
       }
   }

//求关联规则
   //将1左移多少位，将s分成不重叠的两部分。生成所有关联规则。再判断支持度
   public void subGen(String s) {
       String x = "", y = "";
       for (int i = 1; i < (1 << s.length()) - 1; i++) {
           for (int j = 0; j < s.length(); j++) {
               if (((1 << j) & i) != 0) {
                   x += s.charAt(j);
               }
           }

for (int j = 0; j < s.length(); j++) {
if (((1 << j) & (~i)) != 0) {

y += s.charAt(j);

}
           }
           if (count_sup(x + y) / count_sup(x) >= minconf) {
               ruleMap.put(x, y);
           }
           x = "";
           y = "";

}
}

//打印关联规则
   public void rulePrint() {
       String x, y;
       float temp = 0;

Set hs = ruleMap.keySet();//迭代后只能用get取key，Set不包含重复元素的collection
       Iterator iterator = hs.iterator();
       System.out.println("关联规则：");
       while (iterator.hasNext()) {
           x = (String) iterator.next();

y = (String) ruleMap.get(x);

temp = (float) (count_sup(x + y) / count_sup(x));

System.out.println(x + (x.length() < 5 ? "\t" : "") + "-->" + y+ "\t" + "置信度: " + temp);

       }
   }
}
学习点：1.TreeSet.add自动去重

2.TreeSet[] frequencySet; TreeSet frequencySet[];两种方法定义的都是数组，似乎是相同的。

3.canditate_gen中，大于最后一个字符的时候才拼上，adb即abd，所以不存在adb这种。

4.生成关联规则时，是将1左移多少位。能够将所有的组合都生成。x与y不会重叠。

效果图：

Apriori算法-java的更多相关文章

频繁模式挖掘apriori算法介绍及Java实现
频繁模式是频繁地出如今数据集中的模式(如项集.子序列或者子结构).比如.频繁地同一时候出如今交易数据集中的商品(如牛奶和面包)的集合是频繁项集. 一些基本概念支持度:support(A=>B) ...
关联规则挖掘之apriori算法
前言: 众所周知,关联规则挖掘是数据挖掘中重要的一部分,如著名的啤酒和尿布的问题.今天要学习的是经典的关联规则挖掘算法--Apriori算法一.算法的基本原理由k项频繁集去导出k+1项频繁集. 二 ...
Apriori算法例子
1 Apriori介绍 Apriori算法使用频繁项集的先验知识,使用一种称作逐层搜索的迭代方法,k项集用于探索(k+1)项集.首先,通过扫描事务(交易)记录,找出所有的频繁1项集,该集合记做L1,然 ...
Apriori算法实例----Weka，R, Using Weka in my javacode
学习数据挖掘工具中,下面使用4种工具来对同一个数据集进行研究. 数据描述:下面这些数据是15个同学选修课程情况,在课程大纲中共有10门课程供学生选择,下面给出具体的选课情况,以ARFF数据文件保存,名 ...
玩转大数据：深入浅出大数据挖掘技术(Apriori算法、Tanagra工具、决策树)
一.本课程是怎么样的一门课程(全面介绍) 1.1.课程的背景 “大数据”作为时下最火热的IT行业的词汇,随之而来的数据仓库.数据分析.数据挖掘等等围绕大数据的商业价值的利用逐渐成为 ...
Apriori算法实现
Apriori算法原理:http://blog.csdn.net/kingzone_2008/article/details/8183768 import java.util.HashMap; imp ...
Apriori算法第二篇----详细分析和代码实现
1 Apriori介绍 Apriori算法使用频繁项集的先验知识,使用一种称作逐层搜索的迭代方法,k项集用于探索(k+1)项集.首先,通过扫描事务(交易)记录,找出所有的频繁1项集,该集合记做L1,然 ...
基于spark实现并行化Apriori算法
详细代码我已上传到github:click me 一. 实验要求在 Spark2.3 平台上实现 Apriori 频繁项集挖掘的并行化算法.要求程序利用 Spark 进行并行计算. ...
MapReduce实现Apriori算法
Apiroi算法在Hadoop MapReduce上的实现输入格式: 一行为一个Bucket 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 34 36 38 ...

随机推荐

C++设计模式-singleton单例模式_new
class nocopyable { protected: nocopyable(){}; virtual ~nocopyable(){}; nocopyable(const nocopyable ...
storm实战总结笔记
storm是一款开源的.分布式的.低延迟的.可扩展的.容错的实时计算框架,采用clojure和java的混合编程,总体两者的代码总量是55开的,但clojure语言具有很强的表现力,所以storm的核 ...
Multi-Objective Data Placement for Multi-Cloud Socially Aware Services---INFOCOM 2014
[标题] [作者] [来源] [对本文评价] [why] 存在的问题 [how] [不足] assumption in future work [相关方法或论文] [重点提示] [其它]
YII2.0 数据库增删改查
/*==================== dkhBaseModel 数据库增删改查方法 start ================================*/ //新增一条数据 publ ...
__stack_chk_fail栈检查失败
1. __stack_chk_fail的作用在了函数的局部变量和保存的指令指针(译注:此处指返回地址和EBP)之间.这个值被称作金丝雀(“canary”)值参考 http://www.freebu ...
android 5.0 -- Ripple 效果
Ripple 水波纹效果,也就是涟漪效果. 波纹效果有两种: 1,波纹有边界:波纹涟漪效果只是显示在控件内部 android:background="?android:attr/select ...
Linux后门账户控制
赋予用户sudo权限 vi /etc/sudoers 添加如下一行: USER ALL=(ALL) NOPASSWD: ALL (实现当前用户允许转换成任意用户及执行任意命令) 添加root权限账户 ...
[妙味JS基础]第五课：函数传参、重用、价格计算
知识点总结函数传参,传的参数＝数据类型(即:数值.字符串.布尔.函数.对象.未定义) 通过传参来重用代码 1.尽量保证 HTML 代码结构一致,可以通过父级选取子元素 2.把核心主程序实现,用函数包 ...
Xcode-App Transport Security has blocked a cleartext HTTP (http://) resource load since it is insecure.
在xcode中上报数据时候,logserver一直没有数据,后来发现控制台有一个提示: 找了半天是因为Xcode7禁止明码的HTTP请求,而自己使用的是Xcode7.2.1 解决办法:修改info.p ...
hdu 5874 Friends and Enemies icpc大连站网络赛 1007 数学
#include<stdio.h> #include<iostream> #include<algorithm> #include<math.h> #i ...

Apriori算法-java

Apriori算法-java的更多相关文章

随机推荐

热门专题