吴裕雄 python 机器学习-DMT(2)
import matplotlib.pyplot as plt decisionNode = dict(boxstyle="sawtooth", fc="0.8")
leafNode = dict(boxstyle="round4", fc="0.8")
arrow_args = dict(arrowstyle="<-") def getNumLeafs(myTree):
numLeafs = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
numLeafs += getNumLeafs(secondDict[key])
else: numLeafs +=1
return numLeafs def getTreeDepth(myTree):
maxDepth = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
thisDepth = 1 + getTreeDepth(secondDict[key])
else: thisDepth = 1
if thisDepth > maxDepth: maxDepth = thisDepth
return maxDepth def plotNode(nodeTxt, centerPt, parentPt, nodeType):
createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args ) def plotMidText(cntrPt, parentPt, txtString):
xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30) def plotTree(myTree, parentPt, nodeTxt):
numLeafs = getNumLeafs(myTree)
depth = getTreeDepth(myTree)
for i in myTree.keys():
firstStr = i
break
cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
plotMidText(cntrPt, parentPt, nodeTxt)
plotNode(firstStr, cntrPt, parentPt, decisionNode)
secondDict = myTree[firstStr]
plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
plotTree(secondDict[key],cntrPt,str(key))
else:
plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD def createPlot(inTree):
fig = plt.figure(1, facecolor='white')
fig.clf()
axprops = dict(xticks=[], yticks=[])
createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
#createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
plotTree.totalW = float(getNumLeafs(inTree))
plotTree.totalD = float(getTreeDepth(inTree))
plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
plotTree(inTree, (0.5,1.0), '')
plt.show() def retrieveTree(i):
listOfTrees =[{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}},
{'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}
]
return listOfTrees[i] thisTree = retrieveTree(0)
createPlot(thisTree)
thisTree = retrieveTree(1)
createPlot(thisTree)
import numpy as np
import operator as op
from math import log def calcShannonEnt(dataSet):
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if(currentLabel not in labelCounts.keys()):
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
rowNum = len(dataSet)
for key in labelCounts:
prob = float(labelCounts[key])/rowNum
shannonEnt -= prob * log(prob,2)
return shannonEnt def splitDataSet(dataSet, axis, value):
retDataSet = []
for featVec in dataSet:
if(featVec[axis] == value):
reducedFeatVec = featVec[:axis]
reducedFeatVec.extend(featVec[axis+1:])
retDataSet.append(reducedFeatVec)
return retDataSet def chooseBestFeatureToSplit(dataSet):
numFeatures = np.shape(dataSet)[1]-1
baseEntropy = calcShannonEnt(dataSet)
bestInfoGain = 0.0
bestFeature = -1
for i in range(numFeatures):
featList = [example[i] for example in dataSet]
uniqueVals = set(featList)
newEntropy = 0.0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
prob = len(subDataSet)/float(len(dataSet))
newEntropy += prob * calcShannonEnt(subDataSet)
infoGain = baseEntropy - newEntropy
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeature = i
return bestFeature def majorityCnt(classList):
classCount={}
for vote in classList:
if(vote not in classCount.keys()):
classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def createTree(dataSet,labels):
classList = [example[-1] for example in dataSet]
if(classList.count(classList[0]) == len(classList)):
return classList[0]
if len(dataSet[0]) == 1:
return majorityCnt(classList)
bestFeat = chooseBestFeatureToSplit(dataSet)
bestFeatLabel = labels[bestFeat]
myTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [example[bestFeat] for example in dataSet]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:]
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
return myTree def classify(inputTree,featLabels,testVec):
for i in inputTree.keys():
firstStr = i
break
secondDict = inputTree[firstStr]
featIndex = featLabels.index(firstStr)
key = testVec[featIndex]
valueOfFeat = secondDict[key]
if isinstance(valueOfFeat, dict):
classLabel = classify(valueOfFeat, featLabels, testVec)
else:
classLabel = valueOfFeat
return classLabel data = open("D:\\LearningResource\\machinelearninginaction\\Ch03\\lenses.txt")
dataSet = [inst.strip().split("\t") for inst in data.readlines()]
print(dataSet)
print(np.shape(dataSet))
labels = ["age","prescript","astigmatic","tearRate"]
tree = createTree(dataSet,labels)
print(tree) import matplotlib.pyplot as plt decisionNode = dict(boxstyle="sawtooth", fc="0.8")
leafNode = dict(boxstyle="round4", fc="0.8")
arrow_args = dict(arrowstyle="<-") def getNumLeafs(myTree):
numLeafs = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
numLeafs += getNumLeafs(secondDict[key])
else: numLeafs +=1
return numLeafs def getTreeDepth(myTree):
maxDepth = 0
for i in myTree.keys():
firstStr = i
break
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
thisDepth = 1 + getTreeDepth(secondDict[key])
else: thisDepth = 1
if thisDepth > maxDepth: maxDepth = thisDepth
return maxDepth def plotNode(nodeTxt, centerPt, parentPt, nodeType):
createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',xytext=centerPt, textcoords='axes fraction',va="center", ha="center", bbox=nodeType, arrowprops=arrow_args ) def plotMidText(cntrPt, parentPt, txtString):
xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30) def plotTree(myTree, parentPt, nodeTxt):
numLeafs = getNumLeafs(myTree)
depth = getTreeDepth(myTree)
for i in myTree.keys():
firstStr = i
break
cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff)
plotMidText(cntrPt, parentPt, nodeTxt)
plotNode(firstStr, cntrPt, parentPt, decisionNode)
secondDict = myTree[firstStr]
plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
for key in secondDict.keys():
if type(secondDict[key]).__name__=='dict':
plotTree(secondDict[key],cntrPt,str(key))
else:
plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD def createPlot(inTree):
fig = plt.figure(1, facecolor='white')
fig.clf()
axprops = dict(xticks=[], yticks=[])
createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)
#createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
plotTree.totalW = float(getNumLeafs(inTree))
plotTree.totalD = float(getTreeDepth(inTree))
plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
plotTree(inTree, (0.5,1.0), '')
plt.show() createPlot(tree)
吴裕雄 python 机器学习-DMT(2)的更多相关文章
- 吴裕雄 python 机器学习-DMT(1)
import numpy as np import operator as op from math import log def createDataSet(): dataSet = [[1, 1, ...
- 吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——逻辑回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——ElasticNet回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——Lasso回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——岭回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——线性回归模型
import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...
随机推荐
- Docker使用札记 - Dockerfile指令
ARG ARG跟ENV都可以定义变量,不同在于ARG在构建期结束时是销毁,而ENV定义的是系统中的环境变量,不会在构建结束时销毁,在以后的构建中直接使用. 当ARG和ENV定义相同名称的变量时,ENV ...
- 初学Jmeter添加Http请求,执行接口测试
最近测试并发,刚开始使用的是录制方法,后面发现录制后无任何界面,加参数也不知从何着手,于是查了很多文章,终于慢慢的着手从http请求来测试并发了. 当然这是个遗留问题,先放在这里后面清楚了再回来补充: ...
- 【死磕 Spring】—— IoC 之加载 BeanDefinition
本文主要基于 Spring 5.0.6.RELEASE 摘要: 原创出处 http://cmsblogs.com/?p=2658 「小明哥」,谢谢! 作为「小明哥」的忠实读者,「老艿艿」略作修改,记录 ...
- 关于oracle中varchar2与nvarchar2的一点认识
今天在oracle 10g下测试了下varchar2与nvarchar2这两种类型,网上有很多关于这两种类型的区别的帖子,我还是自己测试了下. varchar2(size type),size最大为4 ...
- [Unity插件]Lua行为树(十):通用行为和通用条件节点
在行为树中,需要扩展的主要是行为节点和条件节点.一般来说,每当要创建一个节点时,就要新建一个节点文件.而对于一些简单的行为节点和条件节点,为了去掉新建文件的过程,可以写一个通用版本的行为节点和条件节点 ...
- python pyqt面板切换
- 获取手机浏览器IP的函数
function GetIP() { if (!empty($_SERVER['HTTP_CLIENT_IP'])) { //如果变量是非空或非零的值,则 empty()返回 FALSE. $IP = ...
- 自动化脚本编写环境部署_win7(RF)
第一步 安装Python并设置环境变量 1.安装python: python下载地址https://www.python.org/,建议用2.7.x版本 2.设置环境变量: 方法如下所示 第二步 安 ...
- jupyter notebook 小笔记
安装直接按这个步骤了: https://hub.mybinder.org/user/ipython-ipython-in-depth-xj8fraz8/notebooks/binder/Index.i ...
- Apache-Axis小结
以前用过axis, 不过好久不弄, 有忘记了.很多很多东西放在收藏夹里面, 但是长时间不去看,结果就是还是不熟悉!现在再简单总结一下吧. Axis开发服务器端webservice其实很简单. 1 下载 ...