Intel DAAL AI加速 ——传统决策树和随机森林

# file: dt_cls_dense_batch.py

#===============================================================================

# Copyright 2014-2018 Intel Corporation.

#

# This software and the related documents are Intel copyrighted  materials,  and

# your use of  them is  governed by the  express license  under which  they were

# provided to you (License).  Unless the License provides otherwise, you may not

# use, modify, copy, publish, distribute,  disclose or transmit this software or

# the related documents without Intel's prior written permission.

#

# This software and the related documents  are provided as  is,  with no express

# or implied  warranties,  other  than those  that are  expressly stated  in the

# License.

#===============================================================================

## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>

## \example dt_cls_dense_batch.py

import os

import sys

from daal.algorithms.decision_tree.classification import prediction, training

from daal.algorithms import classifier

from daal.data_management import (

    FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable

)

utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))

if utils_folder not in sys.path:

    sys.path.insert(0, utils_folder)

from utils import printNumericTables

DAAL_PREFIX = os.path.join('..', 'data')

# Input data set parameters

trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')

pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')

testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')

nFeatures = 5

nClasses = 5

# Model object for the decision tree classification algorithm

model = None

predictionResult = None

testGroundTruth = None

def trainModel():

    global model

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file

    trainDataSource = FileDataSource(

        trainDatasetFileName,

        DataSourceIface.notAllocateNumericTable,

        DataSourceIface.doDictionaryFromContext

    )

    # Create Numeric Tables for training data and labels

    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)

    trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)

    mergedData = MergedNumericTable(trainData, trainGroundTruth)

    # Retrieve the data from the input file

    trainDataSource.loadDataBlock(mergedData)

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file

    pruneDataSource = FileDataSource(

        pruneDatasetFileName,

        DataSourceIface.notAllocateNumericTable,

        DataSourceIface.doDictionaryFromContext

    )

    # Create Numeric Tables for pruning data and labels

    pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)

    pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)

    pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)

    # Retrieve the data from the input file

    pruneDataSource.loadDataBlock(pruneMergedData)

    # Create an algorithm object to train the decision tree classification model

    algorithm = training.Batch(nClasses)

    # Pass the training data set and dependent values to the algorithm

    algorithm.input.set(classifier.training.data, trainData)

    algorithm.input.set(classifier.training.labels, trainGroundTruth)

    algorithm.input.setTable(training.dataForPruning, pruneData)

    algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)

    # Train the decision tree classification model and retrieve the results of the training algorithm

    trainingResult = algorithm.compute()

    model = trainingResult.get(classifier.training.model)

def testModel():

    global testGroundTruth, predictionResult

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file

    testDataSource = FileDataSource(

        testDatasetFileName,

        DataSourceIface.notAllocateNumericTable,

        DataSourceIface.doDictionaryFromContext

    )

    # Create Numeric Tables for testing data and labels

    testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)

    testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)

    mergedData = MergedNumericTable(testData, testGroundTruth)

    # Retrieve the data from input file

    testDataSource.loadDataBlock(mergedData)

    # Create algorithm objects for decision tree classification prediction with the default method

    algorithm = prediction.Batch()

    # Pass the testing data set and trained model to the algorithm

    #print("Number of columns: {}".format(testData.getNumberOfColumns()))

    algorithm.input.setTable(classifier.prediction.data,  testData)

    algorithm.input.setModel(classifier.prediction.model, model)

    # Compute prediction results and retrieve algorithm results

    # (Result class from classifier.prediction)

    predictionResult = algorithm.compute()

def printResults():

    printNumericTables(

        testGroundTruth,

        predictionResult.get(classifier.prediction.prediction),

        "Ground truth", "Classification results",

        "Decision tree classification results (first 20 observations):",

        20, flt64=False

    )

if __name__ == "__main__":

    trainModel()

    testModel()

    printResults()

随机森林的：

# file: df_cls_dense_batch.py

#===============================================================================

# Copyright 2014-2018 Intel Corporation.

#

# This software and the related documents are Intel copyrighted  materials,  and

# your use of  them is  governed by the  express license  under which  they were

# provided to you (License).  Unless the License provides otherwise, you may not

# use, modify, copy, publish, distribute,  disclose or transmit this software or

# the related documents without Intel's prior written permission.

#

# This software and the related documents  are provided as  is,  with no express

# or implied  warranties,  other  than those  that are  expressly stated  in the

# License.

#===============================================================================

## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a>

## \example df_cls_dense_batch.py

import os

import sys

from daal.algorithms import decision_forest

from daal.algorithms.decision_forest.classification import prediction, training

from daal.algorithms import classifier

from daal.data_management import (

    FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,

    MergedNumericTable, features

)

utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))

if utils_folder not in sys.path:

    sys.path.insert(0, utils_folder)

from utils import printNumericTable, printNumericTables

DAAL_PREFIX = os.path.join('..', 'data')

# Input data set parameters

trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv')

testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv')

nFeatures = 3

nClasses = 5

# Decision forest parameters

nTrees = 10

minObservationsInLeafNode = 8

# Model object for the decision forest classification algorithm

model = None

predictionResult = None

testGroundTruth = None

def trainModel():

    global model

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file

    trainDataSource = FileDataSource(

        trainDatasetFileName,

        DataSourceIface.notAllocateNumericTable,

        DataSourceIface.doDictionaryFromContext

    )

    # Create Numeric Tables for training data and labels

    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)

    trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)

    mergedData = MergedNumericTable(trainData, trainGroundTruth)

    # Retrieve the data from the input file

    trainDataSource.loadDataBlock(mergedData)

    #  Get the dictionary and update it with additional information about data

    dict = trainData.getDictionary()

    #  Add a feature type to the dictionary

    dict[0].featureType = features.DAAL_CONTINUOUS

    dict[1].featureType = features.DAAL_CONTINUOUS

    dict[2].featureType = features.DAAL_CATEGORICAL

    # Create an algorithm object to train the decision forest classification model

    algorithm = training.Batch(nClasses)

    algorithm.parameter.nTrees = nTrees

    algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode

    algorithm.parameter.featuresPerNode = nFeatures

    algorithm.parameter.varImportance = decision_forest.training.MDI

    algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError

    # Pass the training data set and dependent values to the algorithm

    algorithm.input.set(classifier.training.data, trainData)

    algorithm.input.set(classifier.training.labels, trainGroundTruth)

    # Train the decision forest classification model and retrieve the results of the training algorithm

    trainingResult = algorithm.compute()

    model = trainingResult.get(classifier.training.model)

    printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")

    printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")

def testModel():

    global testGroundTruth, predictionResult

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file

    testDataSource = FileDataSource(

        testDatasetFileName,

        DataSourceIface.notAllocateNumericTable,

        DataSourceIface.doDictionaryFromContext

    )

    # Create Numeric Tables for testing data and labels

    testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)

    testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)

    mergedData = MergedNumericTable(testData, testGroundTruth)

    # Retrieve the data from input file

    testDataSource.loadDataBlock(mergedData)

    #  Get the dictionary and update it with additional information about data

    dict = testData.getDictionary()

    #  Add a feature type to the dictionary

    dict[0].featureType = features.DAAL_CONTINUOUS

    dict[1].featureType = features.DAAL_CONTINUOUS

    dict[2].featureType = features.DAAL_CATEGORICAL

    # Create algorithm objects for decision forest classification prediction with the default method

    algorithm = prediction.Batch(nClasses)

    # Pass the testing data set and trained model to the algorithm

    algorithm.input.setTable(classifier.prediction.data,  testData)

    algorithm.input.setModel(classifier.prediction.model, model)

    # Compute prediction results and retrieve algorithm results

    # (Result class from classifier.prediction)

    predictionResult = algorithm.compute()

def printResults():

    printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10)

    printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10);

if __name__ == "__main__":

    trainModel()

    testModel()

    printResults()

Intel DAAL AI加速 ——传统决策树和随机森林的更多相关文章

Intel DAAL AI加速——支持从数据预处理到模型预测，数据源必须使用DAAL的底层封装库
数据源加速见官方文档(必须使用DAAL自己的库): Data Management Numeric Tables Tensors Data Sources Data Dictionaries Data ...
Intel DAAL AI加速——神经网络
# file: neural_net_dense_batch.py #================================================================= ...
R语言︱决策树族——随机森林算法
每每以为攀得众山小,可.每每又切实来到起点,大牛们,缓缓脚步来俺笔记葩分享一下吧,please~ --------------------------- 笔者寄语:有一篇<有监督学习选择深度学习 ...
[ML学习笔记] 决策树与随机森林（Decision Tree&Random Forest）
[ML学习笔记] 决策树与随机森林(Decision Tree&Random Forest) 决策树决策树算法以树状结构表示数据分类的结果.每个决策点实现一个具有离散输出的测试函数,记为分支 ...
web安全之机器学习入门——3.2 决策树与随机森林
目录简介决策树简单用法决策树检测P0P3爆破决策树检测FTP爆破随机森林检测FTP爆破简介决策树和随机森林算法是最常见的分类算法: 决策树,判断的逻辑很多时候和人的思维非常接近. 随机森 ...
逻辑斯蒂回归VS决策树VS随机森林
LR 与SVM 不同 1.logistic regression适合需要得到一个分类概率的场景,SVM则没有分类概率 2.LR其实同样可以使用kernel,但是LR没有support vector在计 ...
Machine Learning笔记整理 ------ （五）决策树、随机森林
1. 决策树一般的,一棵决策树包含一个根结点.若干内部结点和若干叶子结点,叶子节点对应决策结果,其他每个结点对应一个属性测试,每个结点包含的样本集合根据属性测试结果被划分到子结点中,而根结点包含样本 ...
美团店铺评价语言处理以及分类（tfidf，SVM，决策树，随机森林，Knn，ensemble）
第一篇数据清洗与分析部分第二篇可视化部分, 第三篇朴素贝叶斯文本分类支持向量机分类支持向量机网格搜索临近法决策树随机森林 bagging方法 import pandas as pd ...
chapter02 三种决策树模型：单一决策树、随机森林、GBDT（梯度提升决策树）预测泰坦尼克号乘客生还情况
单一标准的决策树:会根每维特征对预测结果的影响程度进行排序,进而决定不同特征从上至下构建分类节点的顺序.Random Forest Classifier:使用相同的训练样本同时搭建多个独立的分类模型, ...

随机推荐

Total Difference String
Total Difference Strings 给一个string列表,判断有多少个不同的string,返回个数相同的定义:字符串长度相等并从左到右,或从右往左是同样的字符 abc 和 cba 为视 ...
01: Centos7 常用命令
1.1 centos7中防火墙相关命令 1.查看状态 1. getenforce # 查看内核防火墙状态(disabled标识关闭) 2. systemctl status f ...
20165310java_teamExp1_week1
结对编程项目-四则运算-week1 需求分析第一周达成支持真分数的四则运算支持多运算符能手动输入n道题目,n由使用者输入后续拓展的可能能随机生成n道题目,n由使用者输入能够判断正误,错误 ...
VC++ 进度条更新方案
在实际开发中,如果有耗时操作,一般会在工作线程处理数据,然后处理完成后把时间传递到UI线程进行显示,切记不要在工作线程对UI进行操作. 场景: 1. 很多程序需要根据处理业务的进度来更新进度条,进度条 ...
updateByPrimaryKeySelective更新失败
问题:使用Mybatis中Mapper内置方法updateByPrimaryKeySelective更新失败. 发现:控制台打印出来的sql语句发现where条件出现所有属性. 解决:映射的实体类没有 ...
BZOJ 1044: [HAOI2008]木棍分割 DP 前缀和优化
题目链接咳咳咳,第一次没大看题解做DP 以前的我应该是这样的哇咔咔,这tm咋做,不管了,先看个题解,再写代码终于看懂了,卧槽咋写啊,算了还是抄吧第一问类似于noip的那个跳房子,随便做这里重 ...
Java8新特性：Function接口和Lambda表达式参考
Lambda基本:https://blog.csdn.net/wargon/article/details/80656575 https://www.cnblogs.com/hyyq/p/742566 ...
【第三十五章】 metrics（3）- codahale-metrics基本使用
 <dependency> <groupId>io.dropwizard.metrics</groupId> <a ...
[微信开发] - 微信支付 JSAPI 形式
微信官方的JSAPI文档微信官方的JSAPI支付SDK与DEMO下载查看JSAPI的API可以从这里看下载了支付DEMO其实有些地方不对的,比如如果做沙盒测试的时候,需要使用getsignkey ...
Java-master（github）教材整理
helloworld class HelloWorld { public static void main(String[] args) { System.out.println("hell ...

Intel DAAL AI加速 ——传统决策树和随机森林

Intel DAAL AI加速 ——传统决策树和随机森林的更多相关文章

随机推荐

热门专题