360确实很个性,哈哈,你个貔貅,只吃不吐!
Rtrees介绍!参考链接:http://docs.opencv.org/modules/ml/doc/random_trees.html

原文链接:Python opencv实现的手写字符串识别--SVM 、神经网络、 K近邻、 Boosting、Rtrees

代码copy:
#http://www.360doc.com/content/14/0811/10/13997450_400977867.shtml
#code from!
#dataset:http://yann.lecun.com/exdb/mnist/
from cv2.cv import *
import cv2
import os
import struct
import numpy class_n = 10
number_of_training_set = 2000 #0 for all, 60,000 max
number_of_test_set = 0 #0 for all, 10,000 max
#trainimagepath = r'.\data\train-images.idx3-ubyte'
trainimagepath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\train-images.idx3-ubyte'
#trainlabelpath = r'.\data\train-labels.idx1-ubyte'
#testimagepath = r'.\data\t10k-images.idx3-ubyte'
#testlabelpath = r'.\data\t10k-labels.idx1-ubyte'
trainlabelpath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\train-labels.idx1-ubyte'
testimagepath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\t10k-images.idx3-ubyte'
testlabelpath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\t10k-labels.idx1-ubyte' def evalfun(method, y_val, test_labels, test_number_of_images):
count = 0
for item in range(test_number_of_images):
if y_val[item] == test_labels[item]:
count += 1
print method + ':' + str(float(count)/test_number_of_images) def unroll_samples(samples):
sample_n, var_n = samples.shape
new_samples = numpy.zeros((sample_n * class_n, var_n+1), numpy.float32)
new_samples[:,:-1] = numpy.repeat(samples, class_n, axis=0)
new_samples[:,-1] = numpy.tile(numpy.arange(class_n), sample_n)
return new_samples def unroll_responses(responses):
sample_n = len(responses)
new_responses = numpy.zeros(sample_n*class_n, numpy.int32)
resp_idx = numpy.int32( responses + numpy.arange(sample_n)*class_n )
new_responses[resp_idx] = 1
return new_responses def readImage(filepath, re_size, number_of_images_set): f = open(filepath, 'rb')
byte = f.read(4)
magic_number = struct.unpack('>i',byte)[0]
byte = f.read(4)
number_of_images = struct.unpack('>i',byte)[0]
if number_of_images_set != 0:
number_of_images = number_of_images_set
#number_of_images = 30000
byte = f.read(4)
number_of_rows = struct.unpack('>i',byte)[0]
byte = f.read(4)
number_of_cols = struct.unpack('>i',byte)[0] images_readed_count = 0
images = numpy.array([], dtype = numpy.float32)
while images_readed_count < number_of_images:
## print 'starting read image' + str(images_readed_count) + \
## ' of ' + str(number_of_images)
byte = f.read(number_of_rows * number_of_cols)
pixels = [numpy.float32(struct.unpack('B', item)[0])/numpy.float32(255) for item in byte]
pixels = numpy.resize(pixels, (number_of_rows, number_of_cols))
left = number_of_cols
right = 0
top = number_of_rows
bottom = 0
for i in range(number_of_rows):
for j in range(number_of_cols):
if pixels[i, j] > 0:
if j < left:
left = j
if j > right:
right = j
if i < top:
top = i
if i > bottom:
bottom = i
if (bottom-top) > (right-left):
length = bottom - top
else:
length = right - left
midx = float(right + left)/2; midy = float(bottom + top)/2
left = int(midx - float(length)/2)
right = int(midx + float(length)/2) + 1
top = int(midy - float(length)/2)
bottom = int(midy + float(length)/2) + 1
left = left if left > 0 else 0
right = right if right < number_of_cols else number_of_cols
top = top if top > 0 else 0
bottom = bottom if bottom < number_of_rows else number_of_rows
pixels = pixels[left:right, top:bottom]
## print str(int(midx - float(length)/2)) + ':' + \
## str(int(midx + float(length)/2)) + ',' + \
## str(int(midy - float(length)/2)) + ':' + \
## str(int(midy + float(length)/2))
pixels = numpy.resize(cv2.resize(pixels, re_size), (1, re_size[0]*re_size[1]))
if images.size == 0:
images = numpy.array(pixels, dtype = numpy.float32)
else:
images = numpy.append(images, pixels, axis = 0)
images_readed_count += 1
f.close()
return number_of_images, images def readLabel(filepath, number_of_images_set):
f = open(filepath, 'rb')
byte = f.read(4)
magic_number = struct.unpack('>i',byte)[0]
byte = f.read(4)
number_of_images = struct.unpack('>i',byte)[0]
if number_of_images_set != 0:
number_of_images = number_of_images_set
#number_of_images = 10000 images_readed_count = 0
labels = numpy.array([], dtype = numpy.float32)
while images_readed_count < number_of_images:
byte = f.read(1)
labels = numpy.append(labels, numpy.float32(struct.unpack('B', byte)[0]))
images_readed_count += 1
f.close()
return number_of_images, labels ##[self.model.predict(s) for s in samples]
## params = dict( kernel_type = cv2.SVM_LINEAR,
## svm_type = cv2.SVM_C_SVC,
## C = 1 )
##
##samples = numpy.array(numpy.random.random((4,2)), dtype = numpy.float32)
##y_train = numpy.array([1.,0.,0.,1.], dtype = numpy.float32)
##
##clf = SVM()
##clf.train(samples, y_train)
##y_val = clf.predict(samples) def ANN_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.'
########ANN#########
modelnn = cv2.ANN_MLP()
sample_n, var_n = train_images.shape
new_train_labels = unroll_responses(train_labels).reshape(-1, class_n)
layer_sizes = numpy.int32([var_n, 100, class_n])
modelnn.create(layer_sizes)
params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),
train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,
bp_dw_scale = 0.001,
bp_moment_scale = 0.0 )
modelnn.train(train_images, numpy.float32(new_train_labels), None, params = params)
ret, resp = modelnn.predict(test_images)
y_val_nn = resp.argmax(-1)
evalfun('nn', y_val_nn, test_labels, test_number_of_images) return [] def KNearestPridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' ########KNearest#########
modelknn = cv2.KNearest()
modelknn.train(train_images,train_labels)
y_val_knn= modelknn.find_nearest(test_images, k = 3)
count_knn = 0
for item in range(test_number_of_images):
if y_val_knn[1][item][0] == test_labels[item]:
count_knn += 1
print 'knn:'+str(float(count_knn)/test_number_of_images) return [] def SVM_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' #######SVM##########
modelsvm = cv2.SVM()
modelsvm.train(train_images, train_labels)#, params = params
y_val_svm = [modelsvm.predict(test_image) for test_image in test_images]
evalfun('svm', y_val_svm, test_labels, test_number_of_images) return [] def RTrees_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' #######RTrees##########
modelRTtree = cv2.RTrees()
sample_n, var_n = train_images.shape
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=10)
modelRTtree.train(train_images, cv2.CV_ROW_SAMPLE, train_labels, varType = var_types, params = params)
y_val_RTtree = numpy.float32( [modelRTtree.predict(s) for s in test_images] )
evalfun('RTtree', y_val_RTtree, test_labels, test_number_of_images) return [] def Boost_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' #######Boost#########
modelBoost = cv2.Boost()
sample_n, var_n = train_images.shape
new_train_images = unroll_samples(train_images)
new_train_labels = unroll_responses(train_labels)
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=5) #, use_surrogates=False)
modelBoost.train(new_train_images, cv2.CV_ROW_SAMPLE, new_train_labels, varType = var_types, params=params)
new_test_images = unroll_samples(test_images)
y_val_Boost = numpy.array( [modelBoost.predict(s, returnSum = True) for s in new_test_images] )
y_val_Boost = y_val_Boost.reshape(-1, class_n).argmax(1)
evalfun('Boost', y_val_Boost, test_labels, test_number_of_images) return [] def ML_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.'
########ANN#########
modelnn = cv2.ANN_MLP()
sample_n, var_n = train_images.shape
new_train_labels = unroll_responses(train_labels).reshape(-1, class_n)
layer_sizes = numpy.int32([var_n, 100, class_n])
modelnn.create(layer_sizes)
params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),
train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,
bp_dw_scale = 0.001,
bp_moment_scale = 0.0 )
modelnn.train(train_images, numpy.float32(new_train_labels), None, params = params)
ret, resp = modelnn.predict(test_images)
y_val_nn = resp.argmax(-1)
evalfun('nn', y_val_nn, test_labels, test_number_of_images) ########KNearest#########
modelknn = cv2.KNearest()
modelknn.train(train_images,train_labels)
y_val_knn= modelknn.find_nearest(test_images, k = 3)
count_knn = 0
for item in range(test_number_of_images):
if y_val_knn[1][item][0] == test_labels[item]:
count_knn += 1
print 'knn:'+str(float(count_knn)/test_number_of_images)
#######SVM##########
modelsvm = cv2.SVM()
modelsvm.train(train_images, train_labels)#, params = params
y_val_svm = [modelsvm.predict(test_image) for test_image in test_images]
evalfun('svm', y_val_svm, test_labels, test_number_of_images) #######RTrees##########
modelRTtree = cv2.RTrees()
sample_n, var_n = train_images.shape
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=10)
modelRTtree.train(train_images, cv2.CV_ROW_SAMPLE, train_labels, varType = var_types, params = params)
y_val_RTtree = numpy.float32( [modelRTtree.predict(s) for s in test_images] )
evalfun('RTtree', y_val_RTtree, test_labels, test_number_of_images)
#######Boost#########
modelBoost = cv2.Boost()
sample_n, var_n = train_images.shape
new_train_images = unroll_samples(train_images)
new_train_labels = unroll_responses(train_labels)
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=5) #, use_surrogates=False)
modelBoost.train(new_train_images, cv2.CV_ROW_SAMPLE, new_train_labels, varType = var_types, params=params)
new_test_images = unroll_samples(test_images)
y_val_Boost = numpy.array( [modelBoost.predict(s, returnSum = True) for s in new_test_images] )
y_val_Boost = y_val_Boost.reshape(-1, class_n).argmax(1)
evalfun('Boost', y_val_Boost, test_labels, test_number_of_images) return []

运行结果:

tree.SVM_Pridict()
loaded images and labels.
svm:0.6251

PythonOpenCV--Rtrees随机森林的更多相关文章

  1. OpenCV:使用OpenCV3随机森林进行统计特征多类分析

    原文链接:在opencv3中的机器学习算法练习:对OCR进行分类 本文贴出的代码为自己的训练集所用,作为参考.可运行demo程序请拜访原作者. CNN作为图像识别和检测器,在分析物体结构分布的多类识别 ...

  2. OpenCV:使用 随机森林与GBDT

    随机森林顾名思义,是用随机的方式建立一个森林.简单来说,随机森林就是由多棵CART(Classification And Regression Tree)构成的.对于每棵树,它们使用的训练集是从总的训 ...

  3. scikit-learn随机森林调参小结

    在Bagging与随机森林算法原理小结中,我们对随机森林(Random Forest, 以下简称RF)的原理做了总结.本文就从实践的角度对RF做一个总结.重点讲述scikit-learn中RF的调参注 ...

  4. Bagging与随机森林算法原理小结

    在集成学习原理小结中,我们讲到了集成学习有两个流派,一个是boosting派系,它的特点是各个弱学习器之间有依赖关系.另一种是bagging流派,它的特点是各个弱学习器之间没有依赖关系,可以并行拟合. ...

  5. [Machine Learning & Algorithm] 随机森林(Random Forest)

    1 什么是随机森林? 作为新兴起的.高度灵活的一种机器学习算法,随机森林(Random Forest,简称RF)拥有广泛的应用前景,从市场营销到医疗保健保险,既可以用来做市场营销模拟的建模,统计客户来 ...

  6. 使用脚本自动配置matlab安装libsvm和随机森林工具箱

    前言 支持向量机(SVM)和随机森林 都是用于分类的机器学习算法. 这里我需要对网上的工具箱在matlab中进行配置. 效果演示: 1.双击运行“自动配置.bat” 2.matlab会自动启动,手动配 ...

  7. paper 130:MatLab分类器大全(svm,knn,随机森林等)

    train_data是训练特征数据, train_label是分类标签.Predict_label是预测的标签.MatLab训练数据, 得到语义标签向量 Scores(概率输出).1.逻辑回归(多项式 ...

  8. kaggle数据挖掘竞赛初步--Titanic<随机森林&特征重要性>

    完整代码: https://github.com/cindycindyhi/kaggle-Titanic 特征工程系列: Titanic系列之原始数据分析和数据处理 Titanic系列之数据变换 Ti ...

  9. 机器学习中的算法(1)-决策树模型组合之随机森林与GBDT

    版权声明: 本文由LeftNotEasy发布于http://leftnoteasy.cnblogs.com, 本文可以被全部的转载或者部分使用,但请注明出处,如果有问题,请联系wheeleast@gm ...

  10. ML—随机森林·1

    Introduction to Random forest(Simplified) With increase in computational power, we can now choose al ...

随机推荐

  1. vue: This relative module was not found

    这是今天运行vue项目报的一个错误,特地在此记录一下. 错误信息如下: ERROR Failed to compile with 1 errors This relative module was n ...

  2. SSH(远程登录)

    在linux中SSH服务对应两个配置文件: ssh特点:在传输数据的时候,对文件加密后传输. ssh作用:为远程登录会话和其他网络服务提供安全性协议. ssh小结: 1.SSH是安全的加密协议,用于远 ...

  3. PAT 1080. Graduate Admission

    It is said that in 2013, there were about 100 graduate schools ready to proceed over 40,000 applicat ...

  4. 使用idea创建maven项目时 需要注意的问题

    截几张图来说明吧 上面的红色框如果不选中  将来创建的工程中没有webapp目录以及下面的子目录 选中之后  然后一直“下一步”  直到工程创建 但是我第一次按照正确的方式操作时  工程中并没有src ...

  5. (4)Spring Boot使用别的json解析框架【从零开始学Spring Boot】

    此文章已经废弃,请看新版的博客的完美解决方案: 78. Spring Boot完美使用FastJson解析JSON数据[从零开始学Spring Boot] http://412887952-qq-co ...

  6. 洛谷 P2634 BZOJ 2152 【模板】点分治(聪聪可可)

    题目描述 聪聪和可可是兄弟俩,他们俩经常为了一些琐事打起来,例如家中只剩下最后一根冰棍而两人都想吃.两个人都想玩儿电脑(可是他们家只有一台电脑)……遇到这种问题,一般情况下石头剪刀布就好了,可是他们已 ...

  7. windows上开启winrm服务

    原文链接 因为进行远程powershell管理必须需要启用winrm的服务,所以需要在windows平台上启用winrm服务 winrm service 默认都是未启用的状态,先查看状态:如无返回信息 ...

  8. COGS——C610. 数对的个数

    http://cogs.pro/cogs/problem/problem.php?pid=610 Description出题是一件痛苦的事情!题目看多了也有审美疲劳,于是我舍弃了大家所熟悉的A+B P ...

  9. hdu1203--D - I NEED A OFFER!(转化01背包)

    D - I NEED A OFFER! Time Limit:1000MS     Memory Limit:32768KB     64bit IO Format:%I64d & %I64u ...

  10. [LeetCode]Wildcard Matching 通配符匹配(贪心)

    一開始採用递归写.TLE. class Solution { public: bool flag; int n,m; void dfs(int id0,const char *s,int id1,co ...