import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def createDataSet():
group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group, labels data,labels = createDataSet()
print(data)
print(labels) test = np.array([[0,0.5]])
result = classify0(test,data,labels,3)
print(result)

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
print(trainData[0:4])
print(trainLabel[0:4]) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData)
print(ranges)
print(minVals)
print(normDataSet[0:4])
print(trainLabel[0:4]) testData = np.array([[0.5,0.3,0.5]])
result = classify0(testData, normDataSet, trainLabel, 5)
print(result)

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(listFromLine[-1])
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData) def datingClassTest():
hoRatio = 0.10 #hold out 10%
datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]):
errorCount += 1.0
print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))
print(errorCount) datingClassTest()
import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(listFromLine[-1])
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData) def datingClassTest():
hoRatio = 0.10 #hold out 10%
datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]):
errorCount += 1.0
print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))
print(errorCount) datingClassTest()

................................................

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals def classifyPerson():
resultList = ["not at all", "in samll doses", "in large doses"]
percentTats = float(input("percentage of time spent playing video game?"))
ffMiles = float(input("frequent flier miles earned per year?"))
iceCream = float(input("liters of ice cream consumed per year?"))
testData = np.array([percentTats,ffMiles,iceCream])
trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
normDataSet, ranges, minVals = autoNorm(trainData)
result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)
print("You will probably like this person: ",resultList[result-1]) classifyPerson()
import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals def classifyPerson():
resultList = ["not at all", "in samll doses", "in large doses"]
percentTats = float(input("percentage of time spent playing video game?"))
ffMiles = float(input("frequent flier miles earned per year?"))
iceCream = float(input("liters of ice cream consumed per year?"))
testData = np.array([percentTats,ffMiles,iceCream])
trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
normDataSet, ranges, minVals = autoNorm(trainData)
result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)
print("You will probably like this person: ",resultList[result-1]) classifyPerson()

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def img2vector(filename):
returnVect = []
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect.append(int(lineStr[j]))
return np.array([returnVect]) def handwritingClassTest():
hwLabels = []
trainingFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits') #load the training set
m = len(trainingFileList)
trainingMat = np.zeros((m,1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0] #take off .txt
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i,:] = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits\\%s' % fileNameStr)
testFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits') #iterate through the test set
mTest = len(testFileList)
errorCount = 0.0
for i in range(mTest):
fileNameStr = testFileList[i]
fileStr = fileNameStr.split('.')[0] #take off .txt
classNumStr = int(fileStr.split('_')[0])
vectorUnderTest = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits\\%s' % fileNameStr)
classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))
if (classifierResult != classNumStr):
errorCount += 1.0
print("\nthe total number of errors is: %d" % errorCount)
print("\nthe total error rate is: %f" % (errorCount/float(mTest))) handwritingClassTest()

.......................................

吴裕雄 python 机器学习-KNN算法(1)的更多相关文章

  1. 吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  2. 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  3. 吴裕雄 python 机器学习-KNN(2)

    import matplotlib import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import ...

  4. 吴裕雄 python 机器学习——半监督学习标准迭代式标记传播算法LabelPropagation模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...

  5. 吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  6. 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  7. 吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用

    import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...

  8. 吴裕雄 python 机器学习——半监督学习LabelSpreading模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...

  9. 吴裕雄 python 机器学习——人工神经网络与原始感知机模型

    import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from ...

随机推荐

  1. 503 Error: need EHLO and AUTH first

    设置OUTLOOK2013使用QQ邮箱,按照QQ邮箱的配置介绍设置好后,收邮件的服务可以了,但是发送邮件的服务失败,报错:503 Error: need EHLO and AUTH first,经查, ...

  2. git error: failed to push some refs to...

    在git环境下push代码时,报错如下: 用gitk查看提交的信息,发现没有生产changeID,主要原因是clone项目时没有把hook克隆下来,解决办法:将项目clone地址的&& ...

  3. Jquery ajax 参数 详解

    Jquery ajax 参数主要如下: url: 要求为String类型的参数,(默认为当前页地址)发送请求的地址. type: 要求为String类型的参数,请求方式(post或get)默认为get ...

  4. 安全测试8_Web安全实战3(命令注入)

    1.命令注入的概念:命令指的是操作系统的命令,注入表示通过WEB程序在服务器上拼接系统命令. 2.命令注入实战: 首先我们先打开DVWA,将安全级别调至为low级别,忘记了的可以参考之前的章节进行设置 ...

  5. ios自动监测更新

    http://blog.csdn.net/davidsph/article/details/8931718

  6. Java URLDecoder 和 URLEncoder 对中文进行编码和解码

    URLDecoder类包含一个decode(String s,String enc)静态方法,它可以将application/x-www-form-urlencoded MIME字符串转成普通字符串: ...

  7. 《算法》第六章部分程序 part 3

    ▶ 书中第六章部分程序,包括在加上自己补充的代码,后缀树的两种实现 ● 后缀树实现一 package package01; import java.util.Arrays; import edu.pr ...

  8. apt-get 使用指南

    # apt-get update——在修改/etc/apt/sources.list或者/etc/apt/preferences之後运行该命令.此外您需要定期运行这一命令以确保您的软件包列表是最新的. ...

  9. django之relacted.py(探秘django的关联field)

    生成model类对象时,传入的每个field对象都会调用其contribute_to_class函数,生成对应的属性. def contribute_to_class(self, cls, name, ...

  10. UEditor 在ie中报console未定义解决方案

    解决办法:                  1.注释掉该代码               2.或者加入如下代码即可,本人已经测试过,没有问题. window.console = window.con ...