Weka——PrincipalComponents分析
package weka.filters.unsupervised.attribute;
PrincipalComponents
属性:
/** The data to transform analyse/transform. */
protected Instances m_TrainInstances; /** Keep a copy for the class attribute (if set). */
protected Instances m_TrainCopy; /** The header for the transformed data format. */
protected Instances m_TransformedFormat; /** Data has a class set. */
protected boolean m_HasClass; /** Class index. */
protected int m_ClassIndex; /** Number of attributes. */
protected int m_NumAttribs; /** Number of instances. */
protected int m_NumInstances; /** Correlation matrix for the original data. */
protected double[][] m_Correlation; /**
* If true, center (rather than standardize) the data and
* compute PCA from covariance (rather than correlation)
* matrix.
*/
private boolean m_center = false; /** Will hold the unordered linear transformations of the (normalized)
original data. */
protected double[][] m_Eigenvectors; /** Eigenvalues for the corresponding eigenvectors. */
protected double[] m_Eigenvalues = null; /** Sorted eigenvalues. */
protected int[] m_SortedEigens; /** sum of the eigenvalues. */
protected double m_SumOfEigenValues = 0.0; /** Filters for replacing missing values. */
protected ReplaceMissingValues m_ReplaceMissingFilter; /** Filter for turning nominal values into numeric ones. */
protected NominalToBinary m_NominalToBinaryFilter; /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */
protected Remove m_AttributeFilter; /** Filter for standardizing the data */
protected Standardize m_standardizeFilter; /** Filter for centering the data */
protected Center m_centerFilter; /** The number of attributes in the pc transformed data. */
protected int m_OutputNumAtts = -1; /** the amount of varaince to cover in the original data when
retaining the best n PC's. */
protected double m_CoverVariance = 0.95; /** maximum number of attributes in the transformed attribute name. */
protected int m_MaxAttrsInName = 5; /** maximum number of attributes in the transformed data (-1 for all). */
protected int m_MaxAttributes = -1;
计算协方差矩阵或相关系数矩阵
protected void fillCovariance() throws Exception {
if (!m_center) {
fillCorrelation();
return;
}
double[] att = new double[m_TrainInstances.numInstances()];
// now center the data by subtracting the mean
m_centerFilter = new Center();
m_centerFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter);
// now compute the covariance matrix
m_Correlation = new double[m_NumAttribs][m_NumAttribs];
for (int i = 0; i < m_NumAttribs; i++) {
for (int j = 0; j < m_NumAttribs; j++) {
double cov = 0;
for (int k = 0; k < m_NumInstances; k++) {
if (i == j) {
cov += (m_TrainInstances.instance(k).value(i) *
m_TrainInstances.instance(k).value(i));
} else {
cov += (m_TrainInstances.instance(k).value(i) *
m_TrainInstances.instance(k).value(j));
}
}
cov /= (double)(m_TrainInstances.numInstances() - 1);
m_Correlation[i][j] = cov;
m_Correlation[j][i] = cov;
}
}
}
/**
* Fill the correlation matrix.
*/
protected void fillCorrelation() throws Exception {
int i;
int j;
int k;
double[] att1;
double[] att2;
double corr;
m_Correlation = new double[m_NumAttribs][m_NumAttribs];
att1 = new double [m_NumInstances];
att2 = new double [m_NumInstances];
for (i = 0; i < m_NumAttribs; i++) {
for (j = 0; j < m_NumAttribs; j++) {
for (k = 0; k < m_NumInstances; k++) {
att1[k] = m_TrainInstances.instance(k).value(i);
att2[k] = m_TrainInstances.instance(k).value(j);
}
if (i == j) {
m_Correlation[i][j] = 1.0;
}
else {
corr = Utils.correlation(att1,att2,m_NumInstances);
m_Correlation[i][j] = corr;
m_Correlation[j][i] = corr;
}
}
}
// now standardize the input data
m_standardizeFilter = new Standardize();
m_standardizeFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);
}
处理数据
/**
* Transform an instance in original (unormalized) format.
*
* @param instance an instance in the original (unormalized) format
* @return a transformed instance
* @throws Exception if instance can't be transformed
*/
protected Instance convertInstance(Instance instance) throws Exception {
Instance result;
double[] newVals;
Instance tempInst;
double cumulative;
int i;
int j;
double tempval;
int numAttsLowerBound; newVals = new double[m_OutputNumAtts];
tempInst = (Instance) instance.copy(); m_ReplaceMissingFilter.input(tempInst);
m_ReplaceMissingFilter.batchFinished();
tempInst = m_ReplaceMissingFilter.output(); m_NominalToBinaryFilter.input(tempInst);
m_NominalToBinaryFilter.batchFinished();
tempInst = m_NominalToBinaryFilter.output(); if (m_AttributeFilter != null) {
m_AttributeFilter.input(tempInst);
m_AttributeFilter.batchFinished();
tempInst = m_AttributeFilter.output();
} if (!m_center) {
m_standardizeFilter.input(tempInst);
m_standardizeFilter.batchFinished();
tempInst = m_standardizeFilter.output();
} else {
m_centerFilter.input(tempInst);
m_centerFilter.batchFinished();
tempInst = m_centerFilter.output();
} if (m_HasClass)
newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex()); if (m_MaxAttributes > 0)
numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
else
numAttsLowerBound = 0;
if (numAttsLowerBound < 0)
numAttsLowerBound = 0; cumulative = 0;
for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
tempval = 0.0;
for (j = 0; j < m_NumAttribs; j++)
tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j); newVals[m_NumAttribs - i - 1] = tempval;
cumulative += m_Eigenvalues[m_SortedEigens[i]];
if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
break;
} // create instance
if (instance instanceof SparseInstance)
result = new SparseInstance(instance.weight(), newVals);
else
result = new DenseInstance(instance.weight(), newVals); return result;
} /**
* Initializes the filter with the given input data.
*
* @param instances the data to process
* @throws Exception in case the processing goes wrong
* @see #batchFinished()
*/
protected void setup(Instances instances) throws Exception {
int i;
int j;
Vector<Integer> deleteCols;
int[] todelete;
double[][] v;
Matrix corr;
EigenvalueDecomposition eig;
Matrix V; m_TrainInstances = new Instances(instances); // make a copy of the training data so that we can get the class
// column to append to the transformed data (if necessary)
m_TrainCopy = new Instances(m_TrainInstances, 0); m_ReplaceMissingFilter = new ReplaceMissingValues();
m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter); m_NominalToBinaryFilter = new NominalToBinary();
m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter); // delete any attributes with only one distinct value or are all missing
deleteCols = new Vector<Integer>();
for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
if (m_TrainInstances.numDistinctValues(i) <= 1)
deleteCols.addElement(i);
} if (m_TrainInstances.classIndex() >=0) {
// get rid of the class column
m_HasClass = true;
m_ClassIndex = m_TrainInstances.classIndex();
deleteCols.addElement(new Integer(m_ClassIndex));
} // remove columns from the data if necessary
if (deleteCols.size() > 0) {
m_AttributeFilter = new Remove();
todelete = new int [deleteCols.size()];
for (i = 0; i < deleteCols.size(); i++)
todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
m_AttributeFilter.setAttributeIndicesArray(todelete);
m_AttributeFilter.setInvertSelection(false);
m_AttributeFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
} // can evaluator handle the processed data ? e.g., enough attributes?
getCapabilities().testWithFail(m_TrainInstances); m_NumInstances = m_TrainInstances.numInstances();
m_NumAttribs = m_TrainInstances.numAttributes(); //fillCorrelation();
fillCovariance(); // get eigen vectors/values
corr = new Matrix(m_Correlation);
eig = corr.eig();
V = eig.getV();
v = new double[m_NumAttribs][m_NumAttribs];
for (i = 0; i < v.length; i++) {
for (j = 0; j < v[0].length; j++)
v[i][j] = V.get(i, j);
}
m_Eigenvectors = (double[][]) v.clone();
m_Eigenvalues = (double[]) eig.getRealEigenvalues().clone(); // any eigenvalues less than 0 are not worth anything --- change to 0
for (i = 0; i < m_Eigenvalues.length; i++) {
if (m_Eigenvalues[i] < 0)
m_Eigenvalues[i] = 0.0;
}
m_SortedEigens = Utils.sort(m_Eigenvalues);
m_SumOfEigenValues = Utils.sum(m_Eigenvalues); m_TransformedFormat = determineOutputFormat(m_TrainInstances);
setOutputFormat(m_TransformedFormat); m_TrainInstances = null;
}
Weka——PrincipalComponents分析的更多相关文章
- Weka关联规则分析
购物篮分析: Apriori算法: 参数设置: 1.car 如果设为真,则会挖掘类关联规则而不是全局关联规则. 2. classindex 类属性索引.如果设置为-1,最后的属性被当做类属性. 3. ...
- Weka算法Clusterers-DBSCAN源代码分析
假设说世界上仅仅能存在一种基于密度的聚类算法的话.那么它必须是DBSCAN(Density-based spatial clustering of applications with noise).D ...
- Weka算法Clusterers-Xmeans源代码分析(一)
<p></p><p><span style="font-size:18px">上几篇博客都是分析的分类器算法(有监督学习),这次就分 ...
- Weka学习之关联规则分析
步骤: (一) 选择数据源 (二)选择要分析的字段 (三)选择需要的关联规则算法 (四)点击start运行 (五) 分析结果 算法选择: Apriori算法参数含义 1.car:如果设为真,则会挖掘类 ...
- Weka算法Classifier-meta-AdaBoostM1源代码分析(一)
多分类器组合算法简单的来讲经常使用的有voting,bagging和boosting,当中就效果来说Boosting略占优势,而AdaBoostM1算法又相当于Boosting算法的"经典款 ...
- Weka算法Classifier-tree-J48源代码分析(一个)基本数据结构和算法
大约一年,我没有照顾的博客,再次拿起笔不知从何写上,想来想去手从最近使用Weka要正确书写. Weka为一个Java基础上的机器学习工具.上手简单,并提供图形化界面.提供如分类.聚类.频繁项挖掘等工具 ...
- 数据挖掘:关联规则的apriori算法在weka的源码分析
相对于机器学习,关联规则的apriori算法更偏向于数据挖掘. 1) 测试文档中调用weka的关联规则apriori算法,如下 try { File file = new File("F:\ ...
- Weka中数据挖掘与机器学习系列之Exploer界面(七)
不多说,直接上干货! Weka的Explorer(探索者)界面,是Weka的主要图形化用户界面,其全部功能都可通过菜单选择或表单填写进行访问.本博客将详细介绍Weka探索者界面的图形化用户界面.预处理 ...
- Weka算法算法翻译(部分)
目录 Weka算法翻译(部分) 1. 属性选择算法(select attributes) 1.1 属性评估方法 1.2 搜索方法 2. 分类算法 2.1 贝叶斯算法 2.2 Functions 2.3 ...
随机推荐
- 【easyswoole】 解决安装报错
在使用swoole 创建项目时候,报错 创建命令 composer create-project easyswoole/app easyswoole 错误信息: 解决办法,切换composer 源 镜 ...
- Elasticsearch 学习之 Marvel概念
概要 含义如下: 搜索速率:对于单个索引,它是每秒查找次数*分片数.对于多个索引,它是每个索引的搜索速率的总和. 搜索延迟:每个分片中的平均延迟. 索引速率:对于单个索引,它是每秒索引的数量*分片数量 ...
- LeetCode 9 Palindrome Number(回文数字判断)
Long Time No See ! 题目链接https://leetcode.com/problems/palindrome-number/?tab=Description 首先确定该数字的 ...
- MYSQL的索引和常见函数
MySQL的索引 索引机制 MySQL属于关系型数据库,为了提高查询速度,可以创建索引. 索引:由表中的一个或多个字段生成的键组成,这些键存储在数据结构(B树或者hash表中),于是又分为B树索引(I ...
- 上传控件---淘宝kissy uploader+瀑布流显示
介绍Uploader : Uploader 是由阿里集团前端工程师们发起创建的一个开源 JS 框架.它具备模块化.高扩展性.组件齐全,接口一致.自主开发.适合多种应用场景等特性. Uploader是非 ...
- IOS 7 更改导航栏文字到白色
To hide status bar in any viewcontroller: -(BOOL) prefersStatusBarHidden { return YES; } To change t ...
- 【咸鱼教程】BitmapLabel位图字体使用
引擎版本3.2.6 教程目录一 为什么要使用位图字体二 如何使用位图字体2.1 TextureMerger制作位图字体2.2 exml中使用位图字体三 Demo源码 一 为什么要使用位图字体egre ...
- 【CF860E】Arkady and a Nobody-men 长链剖分
[CF860E]Arkady and a Nobody-men 题意:给你一棵n个点的有根树.如果b是a的祖先,定义$r(a,b)$为b的子树中深度小于等于a的深度的点的个数(包括a).定义$z(a) ...
- iOS - 自动化编译打包(Jenkins)
从xcodebuild到shenzhen,再到Jenkins,完美演绎自动化操作. Features xcodebuild自动构建命令 简介 构建 生成ipa文件 利用 shenzhen 进行打包 J ...
- strut2的标签
DIY部落 新闻中心 交流论坛 千寻搜索 点击浏览该栏目下的更多电子书 收藏本站 struts2标签详解 文章整理: www.diybl.com 文章来源: 网络 去论坛 建我的b ...