# file: dt_cls_dense_batch.py
#===============================================================================
# Copyright 2014-2018 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and
# your use of them is governed by the express license under which they were
# provided to you (License). Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute, disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express
# or implied warranties, other than those that are expressly stated in the
# License.
#=============================================================================== ## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>
## \example dt_cls_dense_batch.py import os
import sys from daal.algorithms.decision_tree.classification import prediction, training
from daal.algorithms import classifier
from daal.data_management import (
FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
)
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTables DAAL_PREFIX = os.path.join('..', 'data') # Input data set parameters
trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv') nFeatures = 5
nClasses = 5 # Model object for the decision tree classification algorithm
model = None
predictionResult = None
testGroundTruth = None def trainModel():
global model # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
) # Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth) # Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData) # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
pruneDataSource = FileDataSource(
pruneDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
) # Create Numeric Tables for pruning data and labels
pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth) # Retrieve the data from the input file
pruneDataSource.loadDataBlock(pruneMergedData) # Create an algorithm object to train the decision tree classification model
algorithm = training.Batch(nClasses) # Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
algorithm.input.setTable(training.dataForPruning, pruneData)
algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth) # Train the decision tree classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
model = trainingResult.get(classifier.training.model) def testModel():
global testGroundTruth, predictionResult # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
) # Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth) # Retrieve the data from input file
testDataSource.loadDataBlock(mergedData) # Create algorithm objects for decision tree classification prediction with the default method
algorithm = prediction.Batch() # Pass the testing data set and trained model to the algorithm
#print("Number of columns: {}".format(testData.getNumberOfColumns()))
algorithm.input.setTable(classifier.prediction.data, testData)
algorithm.input.setModel(classifier.prediction.model, model) # Compute prediction results and retrieve algorithm results
# (Result class from classifier.prediction)
predictionResult = algorithm.compute() def printResults(): printNumericTables(
testGroundTruth,
predictionResult.get(classifier.prediction.prediction),
"Ground truth", "Classification results",
"Decision tree classification results (first 20 observations):",
20, flt64=False
) if __name__ == "__main__": trainModel()
testModel()
printResults()

  

随机森林的:

# file: df_cls_dense_batch.py
#===============================================================================
# Copyright 2014-2018 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and
# your use of them is governed by the express license under which they were
# provided to you (License). Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute, disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents are provided as is, with no express
# or implied warranties, other than those that are expressly stated in the
# License.
#=============================================================================== ## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a>
## \example df_cls_dense_batch.py import os
import sys from daal.algorithms import decision_forest
from daal.algorithms.decision_forest.classification import prediction, training
from daal.algorithms import classifier
from daal.data_management import (
FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
MergedNumericTable, features
) utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTable, printNumericTables DAAL_PREFIX = os.path.join('..', 'data') # Input data set parameters
trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv')
testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv') nFeatures = 3
nClasses = 5 # Decision forest parameters
nTrees = 10
minObservationsInLeafNode = 8 # Model object for the decision forest classification algorithm
model = None
predictionResult = None
testGroundTruth = None def trainModel():
global model # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
) # Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth) # Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData) # Get the dictionary and update it with additional information about data
dict = trainData.getDictionary() # Add a feature type to the dictionary
dict[0].featureType = features.DAAL_CONTINUOUS
dict[1].featureType = features.DAAL_CONTINUOUS
dict[2].featureType = features.DAAL_CATEGORICAL # Create an algorithm object to train the decision forest classification model
algorithm = training.Batch(nClasses)
algorithm.parameter.nTrees = nTrees
algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
algorithm.parameter.featuresPerNode = nFeatures
algorithm.parameter.varImportance = decision_forest.training.MDI
algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError # Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth) # Train the decision forest classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
model = trainingResult.get(classifier.training.model)
printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ") def testModel():
global testGroundTruth, predictionResult # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
) # Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth) # Retrieve the data from input file
testDataSource.loadDataBlock(mergedData) # Get the dictionary and update it with additional information about data
dict = testData.getDictionary() # Add a feature type to the dictionary
dict[0].featureType = features.DAAL_CONTINUOUS
dict[1].featureType = features.DAAL_CONTINUOUS
dict[2].featureType = features.DAAL_CATEGORICAL # Create algorithm objects for decision forest classification prediction with the default method
algorithm = prediction.Batch(nClasses) # Pass the testing data set and trained model to the algorithm
algorithm.input.setTable(classifier.prediction.data, testData)
algorithm.input.setModel(classifier.prediction.model, model) # Compute prediction results and retrieve algorithm results
# (Result class from classifier.prediction)
predictionResult = algorithm.compute() def printResults():
printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10)
printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10); if __name__ == "__main__": trainModel()
testModel()
printResults()

  

Intel DAAL AI加速 ——传统决策树和随机森林的更多相关文章

  1. Intel DAAL AI加速——支持从数据预处理到模型预测,数据源必须使用DAAL的底层封装库

    数据源加速见官方文档(必须使用DAAL自己的库): Data Management Numeric Tables Tensors Data Sources Data Dictionaries Data ...

  2. Intel DAAL AI加速——神经网络

    # file: neural_net_dense_batch.py #================================================================= ...

  3. R语言︱决策树族——随机森林算法

    每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:有一篇<有监督学习选择深度学习 ...

  4. [ML学习笔记] 决策树与随机森林(Decision Tree&Random Forest)

    [ML学习笔记] 决策树与随机森林(Decision Tree&Random Forest) 决策树 决策树算法以树状结构表示数据分类的结果.每个决策点实现一个具有离散输出的测试函数,记为分支 ...

  5. web安全之机器学习入门——3.2 决策树与随机森林

    目录 简介 决策树简单用法 决策树检测P0P3爆破 决策树检测FTP爆破 随机森林检测FTP爆破 简介 决策树和随机森林算法是最常见的分类算法: 决策树,判断的逻辑很多时候和人的思维非常接近. 随机森 ...

  6. 逻辑斯蒂回归VS决策树VS随机森林

    LR 与SVM 不同 1.logistic regression适合需要得到一个分类概率的场景,SVM则没有分类概率 2.LR其实同样可以使用kernel,但是LR没有support vector在计 ...

  7. Machine Learning笔记整理 ------ (五)决策树、随机森林

    1. 决策树 一般的,一棵决策树包含一个根结点.若干内部结点和若干叶子结点,叶子节点对应决策结果,其他每个结点对应一个属性测试,每个结点包含的样本集合根据属性测试结果被划分到子结点中,而根结点包含样本 ...

  8. 美团店铺评价语言处理以及分类(tfidf,SVM,决策树,随机森林,Knn,ensemble)

    第一篇 数据清洗与分析部分 第二篇 可视化部分, 第三篇 朴素贝叶斯文本分类 支持向量机分类 支持向量机 网格搜索 临近法 决策树 随机森林 bagging方法 import pandas as pd ...

  9. chapter02 三种决策树模型:单一决策树、随机森林、GBDT(梯度提升决策树) 预测泰坦尼克号乘客生还情况

    单一标准的决策树:会根每维特征对预测结果的影响程度进行排序,进而决定不同特征从上至下构建分类节点的顺序.Random Forest Classifier:使用相同的训练样本同时搭建多个独立的分类模型, ...

随机推荐

  1. 根据wsdl文件,soupUI生成webservice客户端代码

    根据wsdl文件,soupUI生成webservice客户端代码 功能介绍: 对于面向WebServie接口开发时,当我们已经获取到WSDL文件后,可以使用soapUI工具生成对应的客户端和服务端代码 ...

  2. 解决国内 NPM 安装依赖速度慢问题

    不知道各位是否遇到这种情况,使用NPM(Node.js包管理工具)安装依赖时速度特别慢,为了安装Express,执行命令后两个多小时都没安装成功,最后只能取消安装,笔者20M带宽,应该不是我网络的原因 ...

  3. GitHub Desktop离线安装包

    GitHub Desktop离线安装包.上传时间是2017-02-05 版本3.3.4.0,Git shell版本是v2.11.0. 百度网盘的下载链接: http://pan.baidu.com/s ...

  4. 20145306 《网络攻防》 MSF基础应用

    20145306张文锦<网络对抗>MSF应用 Adobe阅读器渗透攻击 两台虚拟机,其中一台为kali,一台为windows xp sp3,并保证两台虚拟机可以ping通. 实验过程 进入 ...

  5. 判断某个ImageView的背景图是否是已知的背景图相等

    if (ibtn.getDrawable().getConstantState().equals(getResources().getDrawable(R.mipmap.image).getConst ...

  6. Python3基础 else 循环完整结束才执行

             Python : 3.7.0          OS : Ubuntu 18.04.1 LTS         IDE : PyCharm 2018.2.4       Conda ...

  7. JavaScript:值类型 引用类型

    JavaScript类型 (1)值类型:数值.布尔值.null.undefined. (2)引用类型:对象.数组.函数.  1.string var str="hongda"; v ...

  8. 常见几种校验方法(CS和校验、CRC16、CRC32、BCC异或校验)

    总结一些通讯协议中常用到的几种校验方法: 1.CS和校验(如:标准188协议校验方式) /// <summary> /// CS和校验 /// </summary> /// & ...

  9. C#学习笔记(二十):C#总结和月考讲解

    m1w1d2_console_variable_constant 输入Console.WriteLine(); 输出Console.ReadLine(); 快捷键 折叠代码:快捷键“Ctrl+ K + ...

  10. UVa 1025 城市里的间谍

    https://vjudge.net/problem/UVA-1025 题意:一个间谍要从第一个车站到第n个车站去会见另一个,在是期间有n个车站,有来回的车站,让你在时间T内时到达n,并且等车时间最短 ...