Weka——PrincipalComponents分析

package weka.filters.unsupervised.attribute;

PrincipalComponents

属性：

  /** The data to transform analyse/transform. */

  protected Instances m_TrainInstances;

  /** Keep a copy for the class attribute (if set). */

  protected Instances m_TrainCopy;

  /** The header for the transformed data format. */

  protected Instances m_TransformedFormat;

  /** Data has a class set. */

  protected boolean m_HasClass;

  /** Class index. */

  protected int m_ClassIndex;

  /** Number of attributes. */

  protected int m_NumAttribs;

  /** Number of instances. */

  protected int m_NumInstances;

  /** Correlation matrix for the original data. */

  protected double[][] m_Correlation;

  /**

   * If true, center (rather than standardize) the data and

   * compute PCA from covariance (rather than correlation)

   * matrix.

   */

  private boolean m_center = false;

  /** Will hold the unordered linear transformations of the (normalized)

      original data. */

  protected double[][] m_Eigenvectors;

  /** Eigenvalues for the corresponding eigenvectors. */

  protected double[] m_Eigenvalues = null;

  /** Sorted eigenvalues. */

  protected int[] m_SortedEigens;

  /** sum of the eigenvalues. */

  protected double m_SumOfEigenValues = 0.0;

  /** Filters for replacing missing values. */

  protected ReplaceMissingValues m_ReplaceMissingFilter;

  /** Filter for turning nominal values into numeric ones. */

  protected NominalToBinary m_NominalToBinaryFilter;

  /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */

  protected Remove m_AttributeFilter;

  /** Filter for standardizing the data */

  protected Standardize m_standardizeFilter;

  /** Filter for centering the data */

  protected Center m_centerFilter;

  /** The number of attributes in the pc transformed data. */

  protected int m_OutputNumAtts = -1;  

  /** the amount of varaince to cover in the original data when

      retaining the best n PC's. */

  protected double m_CoverVariance = 0.95;

  /** maximum number of attributes in the transformed attribute name. */

  protected int m_MaxAttrsInName = 5;

  /** maximum number of attributes in the transformed data (-1 for all). */

  protected int m_MaxAttributes = -1;

计算协方差矩阵或相关系数矩阵

  protected void fillCovariance() throws Exception {    

    if (!m_center) {

      fillCorrelation();

      return;

    }

    double[] att = new double[m_TrainInstances.numInstances()];

    // now center the data by subtracting the mean

    m_centerFilter = new Center();

    m_centerFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter);

    // now compute the covariance matrix

    m_Correlation = new double[m_NumAttribs][m_NumAttribs];

    for (int i = 0; i < m_NumAttribs; i++) {

      for (int j = 0; j < m_NumAttribs; j++) {

        double cov = 0;

        for (int k = 0; k < m_NumInstances; k++) {

          if (i == j) {

            cov += (m_TrainInstances.instance(k).value(i) *

                m_TrainInstances.instance(k).value(i));

          } else {

          cov += (m_TrainInstances.instance(k).value(i) *

              m_TrainInstances.instance(k).value(j));

          }

        }

        cov /= (double)(m_TrainInstances.numInstances() - 1);

        m_Correlation[i][j] = cov;

        m_Correlation[j][i] = cov;

      }

    }

  }

  /**

   * Fill the correlation matrix.

   */

  protected void fillCorrelation() throws Exception {

    int        i;

    int        j;

    int        k;

    double[]     att1;

    double[]     att2;

    double     corr;

    m_Correlation = new double[m_NumAttribs][m_NumAttribs];

    att1          = new double [m_NumInstances];

    att2          = new double [m_NumInstances];

    for (i = 0; i < m_NumAttribs; i++) {

      for (j = 0; j < m_NumAttribs; j++) {

        for (k = 0; k < m_NumInstances; k++) {

          att1[k] = m_TrainInstances.instance(k).value(i);

          att2[k] = m_TrainInstances.instance(k).value(j);

        }

    if (i == j) {

      m_Correlation[i][j] = 1.0;

    }

    else {

      corr = Utils.correlation(att1,att2,m_NumInstances);

      m_Correlation[i][j] = corr;

      m_Correlation[j][i] = corr;

    }

      }

    }

    // now standardize the input data

    m_standardizeFilter = new Standardize();

    m_standardizeFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);

  }

处理数据

  /**

   * Transform an instance in original (unormalized) format.

   *

   * @param instance     an instance in the original (unormalized) format

   * @return         a transformed instance

   * @throws Exception     if instance can't be transformed

   */

  protected Instance convertInstance(Instance instance) throws Exception {

    Instance    result;

    double[]     newVals;

    Instance     tempInst;

    double     cumulative;

    int        i;

    int        j;

    double     tempval;

    int        numAttsLowerBound;

    newVals  = new double[m_OutputNumAtts];

    tempInst = (Instance) instance.copy();

    m_ReplaceMissingFilter.input(tempInst);

    m_ReplaceMissingFilter.batchFinished();

    tempInst = m_ReplaceMissingFilter.output();    

    m_NominalToBinaryFilter.input(tempInst);

    m_NominalToBinaryFilter.batchFinished();

    tempInst = m_NominalToBinaryFilter.output();

    if (m_AttributeFilter != null) {

      m_AttributeFilter.input(tempInst);

      m_AttributeFilter.batchFinished();

      tempInst = m_AttributeFilter.output();

    }

    if (!m_center) {

      m_standardizeFilter.input(tempInst);

      m_standardizeFilter.batchFinished();

      tempInst = m_standardizeFilter.output();

    } else {

      m_centerFilter.input(tempInst);

      m_centerFilter.batchFinished();

      tempInst = m_centerFilter.output();

    }

    if (m_HasClass)

      newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());

    if (m_MaxAttributes > 0)

      numAttsLowerBound = m_NumAttribs - m_MaxAttributes;

    else

      numAttsLowerBound = 0;

    if (numAttsLowerBound < 0)

      numAttsLowerBound = 0;

    cumulative = 0;

    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {

      tempval = 0.0;

      for (j = 0; j < m_NumAttribs; j++)

    tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j);

      newVals[m_NumAttribs - i - 1] = tempval;

      cumulative += m_Eigenvalues[m_SortedEigens[i]];

      if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)

    break;

    }

    // create instance

    if (instance instanceof SparseInstance)

      result = new SparseInstance(instance.weight(), newVals);

    else

      result = new DenseInstance(instance.weight(), newVals);

    return result;

  }

  /**

   * Initializes the filter with the given input data.

   *

   * @param instances   the data to process

   * @throws Exception  in case the processing goes wrong

   * @see               #batchFinished()

   */

  protected void setup(Instances instances) throws Exception {

    int                i;

    int                j;

    Vector<Integer>         deleteCols;

    int[]             todelete;

    double[][]             v;

    Matrix             corr;

    EigenvalueDecomposition     eig;

    Matrix             V;

    m_TrainInstances = new Instances(instances);

    // make a copy of the training data so that we can get the class

    // column to append to the transformed data (if necessary)

    m_TrainCopy = new Instances(m_TrainInstances, 0);

    m_ReplaceMissingFilter = new ReplaceMissingValues();

    m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);

    m_NominalToBinaryFilter = new NominalToBinary();

    m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);

    // delete any attributes with only one distinct value or are all missing

    deleteCols = new Vector<Integer>();

    for (i = 0; i < m_TrainInstances.numAttributes(); i++) {

      if (m_TrainInstances.numDistinctValues(i) <= 1)

    deleteCols.addElement(i);

    }

    if (m_TrainInstances.classIndex() >=0) {

      // get rid of the class column

      m_HasClass = true;

      m_ClassIndex = m_TrainInstances.classIndex();

      deleteCols.addElement(new Integer(m_ClassIndex));

    }

    // remove columns from the data if necessary

    if (deleteCols.size() > 0) {

      m_AttributeFilter = new Remove();

      todelete = new int [deleteCols.size()];

      for (i = 0; i < deleteCols.size(); i++)

    todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();

      m_AttributeFilter.setAttributeIndicesArray(todelete);

      m_AttributeFilter.setInvertSelection(false);

      m_AttributeFilter.setInputFormat(m_TrainInstances);

      m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);

    }

    // can evaluator handle the processed data ? e.g., enough attributes?

    getCapabilities().testWithFail(m_TrainInstances);

    m_NumInstances = m_TrainInstances.numInstances();

    m_NumAttribs   = m_TrainInstances.numAttributes();

    //fillCorrelation();

    fillCovariance();

    // get eigen vectors/values

    corr = new Matrix(m_Correlation);

    eig  = corr.eig();

    V    = eig.getV();

    v    = new double[m_NumAttribs][m_NumAttribs];

    for (i = 0; i < v.length; i++) {

      for (j = 0; j < v[0].length; j++)

        v[i][j] = V.get(i, j);

    }

    m_Eigenvectors = (double[][]) v.clone();

    m_Eigenvalues  = (double[]) eig.getRealEigenvalues().clone();

    // any eigenvalues less than 0 are not worth anything --- change to 0

    for (i = 0; i < m_Eigenvalues.length; i++) {

      if (m_Eigenvalues[i] < 0)

    m_Eigenvalues[i] = 0.0;

    }

    m_SortedEigens     = Utils.sort(m_Eigenvalues);

    m_SumOfEigenValues = Utils.sum(m_Eigenvalues);

    m_TransformedFormat = determineOutputFormat(m_TrainInstances);

    setOutputFormat(m_TransformedFormat);

    m_TrainInstances = null;

  }

Weka——PrincipalComponents分析的更多相关文章

Weka关联规则分析
购物篮分析: Apriori算法: 参数设置: 1.car 如果设为真,则会挖掘类关联规则而不是全局关联规则. 2. classindex 类属性索引.如果设置为-1,最后的属性被当做类属性. 3. ...
Weka算法Clusterers-DBSCAN源代码分析
假设说世界上仅仅能存在一种基于密度的聚类算法的话.那么它必须是DBSCAN(Density-based spatial clustering of applications with noise).D ...
Weka算法Clusterers-Xmeans源代码分析（一）
<p></p><p><span style="font-size:18px">上几篇博客都是分析的分类器算法(有监督学习),这次就分 ...
Weka学习之关联规则分析
步骤: (一) 选择数据源 (二)选择要分析的字段 (三)选择需要的关联规则算法 (四)点击start运行 (五) 分析结果算法选择: Apriori算法参数含义 1.car:如果设为真,则会挖掘类 ...
Weka算法Classifier-meta-AdaBoostM1源代码分析（一）
多分类器组合算法简单的来讲经常使用的有voting,bagging和boosting,当中就效果来说Boosting略占优势,而AdaBoostM1算法又相当于Boosting算法的"经典款 ...
Weka算法Classifier-tree-J48源代码分析（一个）基本数据结构和算法
大约一年,我没有照顾的博客,再次拿起笔不知从何写上,想来想去手从最近使用Weka要正确书写. Weka为一个Java基础上的机器学习工具.上手简单,并提供图形化界面.提供如分类.聚类.频繁项挖掘等工具 ...
数据挖掘：关联规则的apriori算法在weka的源码分析
相对于机器学习,关联规则的apriori算法更偏向于数据挖掘. 1) 测试文档中调用weka的关联规则apriori算法,如下 try { File file = new File("F:\ ...
Weka中数据挖掘与机器学习系列之Exploer界面（七）
不多说,直接上干货! Weka的Explorer(探索者)界面,是Weka的主要图形化用户界面,其全部功能都可通过菜单选择或表单填写进行访问.本博客将详细介绍Weka探索者界面的图形化用户界面.预处理 ...
Weka算法算法翻译（部分）
目录 Weka算法翻译(部分) 1. 属性选择算法(select attributes) 1.1 属性评估方法 1.2 搜索方法 2. 分类算法 2.1 贝叶斯算法 2.2 Functions 2.3 ...

随机推荐

set数组去重
new Set const arr = [{name:"wo"},{name:"shi"},{name:"wo"}] console.log ...
题目1091：棋盘游戏(DFS)
题目链接:http://ac.jobdu.com/problem.php?pid=1091 详解链接:https://github.com/zpfbuaa/JobduInCPlusPlus 参考代码: ...
第二步（仅供参考） sencha touch 使用cmd打包apk
最新版本的cmd可以直接将sencha touch项目打包成本地应用,不过还有很多不足,本文仅供参考通过sencha app build native命令可以直接将项目打包成本地应用,不过在命令运行 ...
第一步使用sencha touch cmd 4.0 创建项目、打包（加入全局变量、公用类、自定义扩展、资源文件）
参考资料: http://www.cnblogs.com/qqloving/archive/2013/04/25/3043606.html http://www.admin10000.com/docu ...
time_t到.NET DateTime的转换
time函数返回的time_t是一个utc时间且相对于1970年1月1日的total seconds,转换到DateTime只需以相同的方式转换回去即可. C/C++ auto t = time(); ...
关于javaagent拦截不到File类的问题
main类: Java code ? 1 2 3 4 public static void main(String[] args) { File file = new File(& ...
selenium中javascript调试
之前写了使用js输入长文件的文章,有同事在使用时,发现竟然无法输入,也不知道是什么原因,且用的还是id方式. 在参考网文后,才发现是js写的有问题,现总结一下 javascript调试,在firefo ...
yii---左查询使用
看到一些做关联查询的示例,例如使用hasMany(一对多),还是有一个hasOne(一对一)的,没有细看,下面是我看到的一个比较好用的一个: $query = (new \yii\db\Query() ...
MatLab Load cv::Mat 导入数据
我们有时候在项目中需要将OpenCV中的cv::Mat导入MatLab进行分析与处理,那么如果把数据转过去呢,我们的做法是首先将cv::Mat导出为txt文件,或者是yml文件,请参见我之前的博客Wr ...
mysql bin-logrow模式，base64转正常sql
可以通过以下命令查看日志是否开启查看 show global variables like '%log%'; 当bin-log的模式设置为row时不仅日志长得快 , 并且查看执行的sql时 , 也稍 ...

Weka——PrincipalComponents分析

Weka——PrincipalComponents分析的更多相关文章

随机推荐

热门专题