PythonOpenCV--Rtrees随机森林
360确实很个性,哈哈,你个貔貅,只吃不吐!
Rtrees介绍!参考链接:http://docs.opencv.org/modules/ml/doc/random_trees.html
原文链接:Python opencv实现的手写字符串识别--SVM 、神经网络、 K近邻、 Boosting、Rtrees
代码copy:
#http://www.360doc.com/content/14/0811/10/13997450_400977867.shtml
#code from!
#dataset:http://yann.lecun.com/exdb/mnist/
from cv2.cv import *
import cv2
import os
import struct
import numpy class_n = 10
number_of_training_set = 2000 #0 for all, 60,000 max
number_of_test_set = 0 #0 for all, 10,000 max
#trainimagepath = r'.\data\train-images.idx3-ubyte'
trainimagepath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\train-images.idx3-ubyte'
#trainlabelpath = r'.\data\train-labels.idx1-ubyte'
#testimagepath = r'.\data\t10k-images.idx3-ubyte'
#testlabelpath = r'.\data\t10k-labels.idx1-ubyte'
trainlabelpath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\train-labels.idx1-ubyte'
testimagepath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\t10k-images.idx3-ubyte'
testlabelpath = r'E:\Develope\EclipseWorks\MeachinLearning\Ch16_RandomTrees\data\t10k-labels.idx1-ubyte' def evalfun(method, y_val, test_labels, test_number_of_images):
count = 0
for item in range(test_number_of_images):
if y_val[item] == test_labels[item]:
count += 1
print method + ':' + str(float(count)/test_number_of_images) def unroll_samples(samples):
sample_n, var_n = samples.shape
new_samples = numpy.zeros((sample_n * class_n, var_n+1), numpy.float32)
new_samples[:,:-1] = numpy.repeat(samples, class_n, axis=0)
new_samples[:,-1] = numpy.tile(numpy.arange(class_n), sample_n)
return new_samples def unroll_responses(responses):
sample_n = len(responses)
new_responses = numpy.zeros(sample_n*class_n, numpy.int32)
resp_idx = numpy.int32( responses + numpy.arange(sample_n)*class_n )
new_responses[resp_idx] = 1
return new_responses def readImage(filepath, re_size, number_of_images_set): f = open(filepath, 'rb')
byte = f.read(4)
magic_number = struct.unpack('>i',byte)[0]
byte = f.read(4)
number_of_images = struct.unpack('>i',byte)[0]
if number_of_images_set != 0:
number_of_images = number_of_images_set
#number_of_images = 30000
byte = f.read(4)
number_of_rows = struct.unpack('>i',byte)[0]
byte = f.read(4)
number_of_cols = struct.unpack('>i',byte)[0] images_readed_count = 0
images = numpy.array([], dtype = numpy.float32)
while images_readed_count < number_of_images:
## print 'starting read image' + str(images_readed_count) + \
## ' of ' + str(number_of_images)
byte = f.read(number_of_rows * number_of_cols)
pixels = [numpy.float32(struct.unpack('B', item)[0])/numpy.float32(255) for item in byte]
pixels = numpy.resize(pixels, (number_of_rows, number_of_cols))
left = number_of_cols
right = 0
top = number_of_rows
bottom = 0
for i in range(number_of_rows):
for j in range(number_of_cols):
if pixels[i, j] > 0:
if j < left:
left = j
if j > right:
right = j
if i < top:
top = i
if i > bottom:
bottom = i
if (bottom-top) > (right-left):
length = bottom - top
else:
length = right - left
midx = float(right + left)/2; midy = float(bottom + top)/2
left = int(midx - float(length)/2)
right = int(midx + float(length)/2) + 1
top = int(midy - float(length)/2)
bottom = int(midy + float(length)/2) + 1
left = left if left > 0 else 0
right = right if right < number_of_cols else number_of_cols
top = top if top > 0 else 0
bottom = bottom if bottom < number_of_rows else number_of_rows
pixels = pixels[left:right, top:bottom]
## print str(int(midx - float(length)/2)) + ':' + \
## str(int(midx + float(length)/2)) + ',' + \
## str(int(midy - float(length)/2)) + ':' + \
## str(int(midy + float(length)/2))
pixels = numpy.resize(cv2.resize(pixels, re_size), (1, re_size[0]*re_size[1]))
if images.size == 0:
images = numpy.array(pixels, dtype = numpy.float32)
else:
images = numpy.append(images, pixels, axis = 0)
images_readed_count += 1
f.close()
return number_of_images, images def readLabel(filepath, number_of_images_set):
f = open(filepath, 'rb')
byte = f.read(4)
magic_number = struct.unpack('>i',byte)[0]
byte = f.read(4)
number_of_images = struct.unpack('>i',byte)[0]
if number_of_images_set != 0:
number_of_images = number_of_images_set
#number_of_images = 10000 images_readed_count = 0
labels = numpy.array([], dtype = numpy.float32)
while images_readed_count < number_of_images:
byte = f.read(1)
labels = numpy.append(labels, numpy.float32(struct.unpack('B', byte)[0]))
images_readed_count += 1
f.close()
return number_of_images, labels ##[self.model.predict(s) for s in samples]
## params = dict( kernel_type = cv2.SVM_LINEAR,
## svm_type = cv2.SVM_C_SVC,
## C = 1 )
##
##samples = numpy.array(numpy.random.random((4,2)), dtype = numpy.float32)
##y_train = numpy.array([1.,0.,0.,1.], dtype = numpy.float32)
##
##clf = SVM()
##clf.train(samples, y_train)
##y_val = clf.predict(samples) def ANN_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.'
########ANN#########
modelnn = cv2.ANN_MLP()
sample_n, var_n = train_images.shape
new_train_labels = unroll_responses(train_labels).reshape(-1, class_n)
layer_sizes = numpy.int32([var_n, 100, class_n])
modelnn.create(layer_sizes)
params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),
train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,
bp_dw_scale = 0.001,
bp_moment_scale = 0.0 )
modelnn.train(train_images, numpy.float32(new_train_labels), None, params = params)
ret, resp = modelnn.predict(test_images)
y_val_nn = resp.argmax(-1)
evalfun('nn', y_val_nn, test_labels, test_number_of_images) return [] def KNearestPridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' ########KNearest#########
modelknn = cv2.KNearest()
modelknn.train(train_images,train_labels)
y_val_knn= modelknn.find_nearest(test_images, k = 3)
count_knn = 0
for item in range(test_number_of_images):
if y_val_knn[1][item][0] == test_labels[item]:
count_knn += 1
print 'knn:'+str(float(count_knn)/test_number_of_images) return [] def SVM_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' #######SVM##########
modelsvm = cv2.SVM()
modelsvm.train(train_images, train_labels)#, params = params
y_val_svm = [modelsvm.predict(test_image) for test_image in test_images]
evalfun('svm', y_val_svm, test_labels, test_number_of_images) return [] def RTrees_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' #######RTrees##########
modelRTtree = cv2.RTrees()
sample_n, var_n = train_images.shape
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=10)
modelRTtree.train(train_images, cv2.CV_ROW_SAMPLE, train_labels, varType = var_types, params = params)
y_val_RTtree = numpy.float32( [modelRTtree.predict(s) for s in test_images] )
evalfun('RTtree', y_val_RTtree, test_labels, test_number_of_images) return [] def Boost_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.' #######Boost#########
modelBoost = cv2.Boost()
sample_n, var_n = train_images.shape
new_train_images = unroll_samples(train_images)
new_train_labels = unroll_responses(train_labels)
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=5) #, use_surrogates=False)
modelBoost.train(new_train_images, cv2.CV_ROW_SAMPLE, new_train_labels, varType = var_types, params=params)
new_test_images = unroll_samples(test_images)
y_val_Boost = numpy.array( [modelBoost.predict(s, returnSum = True) for s in new_test_images] )
y_val_Boost = y_val_Boost.reshape(-1, class_n).argmax(1)
evalfun('Boost', y_val_Boost, test_labels, test_number_of_images) return [] def ML_Pridict():
to_size = (8,8) number_of_training_set = 2000
train_number_of_images, train_images = readImage(trainimagepath, to_size, number_of_training_set)
train_number_of_images, train_labels = readLabel(trainlabelpath, number_of_training_set)
##train_images = train_images * 255
##train_images = cv2.normalize(train_images) number_of_test_set = 0
test_number_of_images, test_images = readImage(testimagepath, to_size, number_of_test_set)
test_number_of_images, test_labels = readLabel(testlabelpath, number_of_test_set)
print 'loaded images and labels.'
########ANN#########
modelnn = cv2.ANN_MLP()
sample_n, var_n = train_images.shape
new_train_labels = unroll_responses(train_labels).reshape(-1, class_n)
layer_sizes = numpy.int32([var_n, 100, class_n])
modelnn.create(layer_sizes)
params = dict( term_crit = (cv2.TERM_CRITERIA_COUNT, 300, 0.01),
train_method = cv2.ANN_MLP_TRAIN_PARAMS_BACKPROP,
bp_dw_scale = 0.001,
bp_moment_scale = 0.0 )
modelnn.train(train_images, numpy.float32(new_train_labels), None, params = params)
ret, resp = modelnn.predict(test_images)
y_val_nn = resp.argmax(-1)
evalfun('nn', y_val_nn, test_labels, test_number_of_images) ########KNearest#########
modelknn = cv2.KNearest()
modelknn.train(train_images,train_labels)
y_val_knn= modelknn.find_nearest(test_images, k = 3)
count_knn = 0
for item in range(test_number_of_images):
if y_val_knn[1][item][0] == test_labels[item]:
count_knn += 1
print 'knn:'+str(float(count_knn)/test_number_of_images)
#######SVM##########
modelsvm = cv2.SVM()
modelsvm.train(train_images, train_labels)#, params = params
y_val_svm = [modelsvm.predict(test_image) for test_image in test_images]
evalfun('svm', y_val_svm, test_labels, test_number_of_images) #######RTrees##########
modelRTtree = cv2.RTrees()
sample_n, var_n = train_images.shape
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=10)
modelRTtree.train(train_images, cv2.CV_ROW_SAMPLE, train_labels, varType = var_types, params = params)
y_val_RTtree = numpy.float32( [modelRTtree.predict(s) for s in test_images] )
evalfun('RTtree', y_val_RTtree, test_labels, test_number_of_images)
#######Boost#########
modelBoost = cv2.Boost()
sample_n, var_n = train_images.shape
new_train_images = unroll_samples(train_images)
new_train_labels = unroll_responses(train_labels)
var_types = numpy.array([cv2.CV_VAR_NUMERICAL] * var_n + [cv2.CV_VAR_CATEGORICAL, cv2.CV_VAR_CATEGORICAL], numpy.uint8)
params = dict(max_depth=5) #, use_surrogates=False)
modelBoost.train(new_train_images, cv2.CV_ROW_SAMPLE, new_train_labels, varType = var_types, params=params)
new_test_images = unroll_samples(test_images)
y_val_Boost = numpy.array( [modelBoost.predict(s, returnSum = True) for s in new_test_images] )
y_val_Boost = y_val_Boost.reshape(-1, class_n).argmax(1)
evalfun('Boost', y_val_Boost, test_labels, test_number_of_images) return []
运行结果:
tree.SVM_Pridict()
loaded images and labels.
svm:0.6251
PythonOpenCV--Rtrees随机森林的更多相关文章
- OpenCV:使用OpenCV3随机森林进行统计特征多类分析
原文链接:在opencv3中的机器学习算法练习:对OCR进行分类 本文贴出的代码为自己的训练集所用,作为参考.可运行demo程序请拜访原作者. CNN作为图像识别和检测器,在分析物体结构分布的多类识别 ...
- OpenCV:使用 随机森林与GBDT
随机森林顾名思义,是用随机的方式建立一个森林.简单来说,随机森林就是由多棵CART(Classification And Regression Tree)构成的.对于每棵树,它们使用的训练集是从总的训 ...
- scikit-learn随机森林调参小结
在Bagging与随机森林算法原理小结中,我们对随机森林(Random Forest, 以下简称RF)的原理做了总结.本文就从实践的角度对RF做一个总结.重点讲述scikit-learn中RF的调参注 ...
- Bagging与随机森林算法原理小结
在集成学习原理小结中,我们讲到了集成学习有两个流派,一个是boosting派系,它的特点是各个弱学习器之间有依赖关系.另一种是bagging流派,它的特点是各个弱学习器之间没有依赖关系,可以并行拟合. ...
- [Machine Learning & Algorithm] 随机森林(Random Forest)
1 什么是随机森林? 作为新兴起的.高度灵活的一种机器学习算法,随机森林(Random Forest,简称RF)拥有广泛的应用前景,从市场营销到医疗保健保险,既可以用来做市场营销模拟的建模,统计客户来 ...
- 使用脚本自动配置matlab安装libsvm和随机森林工具箱
前言 支持向量机(SVM)和随机森林 都是用于分类的机器学习算法. 这里我需要对网上的工具箱在matlab中进行配置. 效果演示: 1.双击运行“自动配置.bat” 2.matlab会自动启动,手动配 ...
- paper 130:MatLab分类器大全(svm,knn,随机森林等)
train_data是训练特征数据, train_label是分类标签.Predict_label是预测的标签.MatLab训练数据, 得到语义标签向量 Scores(概率输出).1.逻辑回归(多项式 ...
- kaggle数据挖掘竞赛初步--Titanic<随机森林&特征重要性>
完整代码: https://github.com/cindycindyhi/kaggle-Titanic 特征工程系列: Titanic系列之原始数据分析和数据处理 Titanic系列之数据变换 Ti ...
- 机器学习中的算法(1)-决策树模型组合之随机森林与GBDT
版权声明: 本文由LeftNotEasy发布于http://leftnoteasy.cnblogs.com, 本文可以被全部的转载或者部分使用,但请注明出处,如果有问题,请联系wheeleast@gm ...
- ML—随机森林·1
Introduction to Random forest(Simplified) With increase in computational power, we can now choose al ...
随机推荐
- js 中this到底指向哪里?
其实js的this指向很简单.我们记住下面3种情况. this 指向的是浏览器中的window.代码如下: function fn(){ this.name='yangkun'; this.age=2 ...
- [Ynoi2015]即便看不到未来
题目大意: 给定一个序列,每次询问,给出一个区间$[l,r]$. 设将区间内的元素去重后重排的数组为$p$,求$p$中长度为$1\sim 10$的极长值域连续段个数. 长度为$L$的极长值域连续段的定 ...
- [tyvj1935 Poetize3]导弹防御塔 (二分图多重匹配)
传送门 Description Freda控制着N座可以发射导弹的防御塔.每座塔都有足够数量的导弹,但是每座塔每次只能发射一枚.在发射导弹时,导弹需要T1秒才能从防御塔中射出,而在发射导弹后,发射这枚 ...
- 安装配置开源的laravel项目到本地环境
前言 从https://github.com 上down了一个laravel项目安装到本地环境的时候,其中遇到一些问题,这里梳理并记录下整个流程. git上下载项目代码,部署laravel项目的时候会 ...
- 学习EXTJS6(5)基本功能-进度条组件
Ext.ProgressBar有二种模式:手动和自动:手动:自己控制显示.进度.更新.清除.自动只需要调用Wait方法即可. 配置项: 配置项 类型 说明 renderTo String 指定页面 ...
- cxGrid主从表删除从表记录的困惑
cxgrid主从表显示方便直观. varADetailDC: TcxGridDataController;AView: TcxCustomGridTableView; with cxgrdbndtbl ...
- 教你 Shiro + SpringBoot 整合 JWT
本篇文章将教大家在 shiro + springBoot 的基础上整合 JWT (JSON Web Token) 如果对 shiro 如何整合 springBoot 还不了解的可以先去看我的上一篇文章 ...
- HDU - 2973 - YAPTCHA
先上题目: YAPTCHA Time Limit: 10000/5000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)To ...
- [bzoj3038/3211]上帝造题的七分钟2/花神游历各国_线段树
上帝造题的七分钟2 bzoj-3038 题目大意:给定一个序列,支持:区间开方:查询区间和. 注释:$1\le n\le 10^5$,$1\le val[i] \le 10^{12}$. 想法:这题还 ...
- linux程序设计——多线程(第十二章)
12.8 多线程 之前,总是让程序的主线程只创建一个线程.这节将演示怎样在同一个程序中创建多个线程,然后怎样以不同于其启动顺序将它们合并在一起.此外,还演示多线程编程时easy出现的时序问题. ...