import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def createDataSet():
group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group, labels data,labels = createDataSet()
print(data)
print(labels) test = np.array([[0,0.5]])
result = classify0(test,data,labels,3)
print(result)

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
print(trainData[0:4])
print(trainLabel[0:4]) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData)
print(ranges)
print(minVals)
print(normDataSet[0:4])
print(trainLabel[0:4]) testData = np.array([[0.5,0.3,0.5]])
result = classify0(testData, normDataSet, trainLabel, 5)
print(result)

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(listFromLine[-1])
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData) def datingClassTest():
hoRatio = 0.10 #hold out 10%
datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]):
errorCount += 1.0
print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))
print(errorCount) datingClassTest()
import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(listFromLine[-1])
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals normDataSet, ranges, minVals = autoNorm(trainData) def datingClassTest():
hoRatio = 0.10 #hold out 10%
datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))
if (classifierResult != datingLabels[i]):
errorCount += 1.0
print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))
print(errorCount) datingClassTest()

................................................

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals def classifyPerson():
resultList = ["not at all", "in samll doses", "in large doses"]
percentTats = float(input("percentage of time spent playing video game?"))
ffMiles = float(input("frequent flier miles earned per year?"))
iceCream = float(input("liters of ice cream consumed per year?"))
testData = np.array([percentTats,ffMiles,iceCream])
trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
normDataSet, ranges, minVals = autoNorm(trainData)
result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)
print("You will probably like this person: ",resultList[result-1]) classifyPerson()
import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def file2matrix(filename):
fr = open(filename)
returnMat = []
classLabelVector = [] #prepare labels return
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])
classLabelVector.append(int(listFromLine[-1]))
return np.array(returnMat),np.array(classLabelVector) def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1)) #element wise divide
return normDataSet, ranges, minVals def classifyPerson():
resultList = ["not at all", "in samll doses", "in large doses"]
percentTats = float(input("percentage of time spent playing video game?"))
ffMiles = float(input("frequent flier miles earned per year?"))
iceCream = float(input("liters of ice cream consumed per year?"))
testData = np.array([percentTats,ffMiles,iceCream])
trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")
normDataSet, ranges, minVals = autoNorm(trainData)
result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)
print("You will probably like this person: ",resultList[result-1]) classifyPerson()

import numpy as np
import operator as op
from os import listdir def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortedClassCount[0][0] def img2vector(filename):
returnVect = []
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect.append(int(lineStr[j]))
return np.array([returnVect]) def handwritingClassTest():
hwLabels = []
trainingFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits') #load the training set
m = len(trainingFileList)
trainingMat = np.zeros((m,1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0] #take off .txt
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i,:] = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits\\%s' % fileNameStr)
testFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits') #iterate through the test set
mTest = len(testFileList)
errorCount = 0.0
for i in range(mTest):
fileNameStr = testFileList[i]
fileStr = fileNameStr.split('.')[0] #take off .txt
classNumStr = int(fileStr.split('_')[0])
vectorUnderTest = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits\\%s' % fileNameStr)
classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))
if (classifierResult != classNumStr):
errorCount += 1.0
print("\nthe total number of errors is: %d" % errorCount)
print("\nthe total error rate is: %f" % (errorCount/float(mTest))) handwritingClassTest()

.......................................

吴裕雄 python 机器学习-KNN算法(1)的更多相关文章

  1. 吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  2. 吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...

  3. 吴裕雄 python 机器学习-KNN(2)

    import matplotlib import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import ...

  4. 吴裕雄 python 机器学习——半监督学习标准迭代式标记传播算法LabelPropagation模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...

  5. 吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  6. 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...

  7. 吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用

    import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...

  8. 吴裕雄 python 机器学习——半监督学习LabelSpreading模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...

  9. 吴裕雄 python 机器学习——人工神经网络与原始感知机模型

    import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from ...

随机推荐

  1. Structrued Streaming业务数据实时分析

    先启动spark-shell,记得启动nc服务 输入以下代码 scala> import org.apache.spark.sql.functions._ import org.apache.s ...

  2. 使用Nginx+Lua实现自定义WAF

    使用Nginx+Lua实现自定义WAF 版权声明:全部抄自赵班长的GitHub上waf项目 功能列表: 支持IP白名单和黑名单功能,直接将黑名单的IP访问拒绝. 支持URL白名单,将不需要过滤的URL ...

  3. Apache服务器下phalcon项目报Mod-Rewrite is not enabled问题

    问题如图: 项目已经按照官网的教程修改了.htaccess文件,仍旧报此错误,判断可能是apache未添加mod_rewrite,通过查询资料,经以下两步解决此问题: 1.执行sudo a2enmod ...

  4. liinux安装 mysql 及主从复制

    mariadb其实就是mysqlmysql已经被oracle收购,它即将闭源,马上要开始收费了因此还想免费试用开源的数据库mysql,就在centos7上,将mysql分支为mariadb 安装mar ...

  5. jmeter分布式压力测试实践+登录为例

    1.一张分布式压力的图解,如下 准备: 1.两台slave 2.一个master 3.待测目标地址 http://XXX 准备环境:linux环境,master如果可以最好有可视化电脑界面,便于jmx ...

  6. atom编辑器适用

    因为要在多平台下适用node,同事推荐atom.所以下载了进行试用. 1.下载 https://atom.io/

  7. SQL Server通过外部程序集注册正则表达式函数(CLR函数) [转]

    转自:http://blog.csdn.net/binguo168/article/details/76598581 1.下载dll程序集(通过C#编写的支持正则的方法),百度网盘下载: 1.1如果只 ...

  8. <转>如何用C++实现自动微分

    作者:李瞬生转摘链接:https://www.zhihu.com/question/48356514/answer/123290631来源:知乎著作权归作者所有. 实现 AD 有两种方式,函数重载与代 ...

  9. 【HQL】窗口函数

    LAG LAG(col,n,DEFAULT) :与lead相反,用于统计窗口内往上第n行值.第一个参数为列名,第二个参数为往上第n行(可选,默认为1),第三个参数为默认值(当往上第n行为NULL时候, ...

  10. socket-tcp

    server import socketip_port=('127.0.0.1',8080);back_log=5buffer_size=1024 serv=socket.socket(socket. ...