吴裕雄 python 机器学习-KNN算法（1）

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def createDataSet():

    group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])

    labels = ['A','A','B','B']

    return group, labels

data,labels = createDataSet()

print(data)

print(labels)

test = np.array([[0,0.5]])

result = classify0(test,data,labels,3)

print(result)

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def file2matrix(filename):

    fr = open(filename)

    returnMat = []

    classLabelVector = []                       #prepare labels return

    for line in fr.readlines():

        line = line.strip()

        listFromLine = line.split('\t')

        returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])

        classLabelVector.append(int(listFromLine[-1]))

    return np.array(returnMat),np.array(classLabelVector)

trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")

print(trainData[0:4])

print(trainLabel[0:4])

def autoNorm(dataSet):

    minVals = dataSet.min(0)

    maxVals = dataSet.max(0)

    ranges = maxVals - minVals

    normDataSet = np.zeros(np.shape(dataSet))

    m = dataSet.shape[0]

    normDataSet = dataSet - np.tile(minVals, (m,1))

    normDataSet = normDataSet/np.tile(ranges, (m,1))   #element wise divide

    return normDataSet, ranges, minVals

normDataSet, ranges, minVals = autoNorm(trainData)

print(ranges)

print(minVals)

print(normDataSet[0:4])

print(trainLabel[0:4])

testData = np.array([[0.5,0.3,0.5]])

result = classify0(testData, normDataSet, trainLabel, 5)

print(result)

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def file2matrix(filename):

    fr = open(filename)

    returnMat = []

    classLabelVector = []                       #prepare labels return

    for line in fr.readlines():

        line = line.strip()

        listFromLine = line.split('\t')

        returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])

        classLabelVector.append(listFromLine[-1])

    return np.array(returnMat),np.array(classLabelVector)

def autoNorm(dataSet):

    minVals = dataSet.min(0)

    maxVals = dataSet.max(0)

    ranges = maxVals - minVals

    normDataSet = np.zeros(np.shape(dataSet))

    m = dataSet.shape[0]

    normDataSet = dataSet - np.tile(minVals, (m,1))

    normDataSet = normDataSet/np.tile(ranges, (m,1))   #element wise divide

    return normDataSet, ranges, minVals

normDataSet, ranges, minVals = autoNorm(trainData)

def datingClassTest():

    hoRatio = 0.10      #hold out 10%

    datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")

    normMat, ranges, minVals = autoNorm(datingDataMat)

    m = normMat.shape[0]

    numTestVecs = int(m*hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):

        classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)

        print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))

        if (classifierResult != datingLabels[i]):

            errorCount += 1.0

    print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))

    print(errorCount)

datingClassTest()

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def file2matrix(filename):

    fr = open(filename)

    returnMat = []

    classLabelVector = []                       #prepare labels return

    for line in fr.readlines():

        line = line.strip()

        listFromLine = line.split('\t')

        returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])

        classLabelVector.append(listFromLine[-1])

    return np.array(returnMat),np.array(classLabelVector)

def autoNorm(dataSet):

    minVals = dataSet.min(0)

    maxVals = dataSet.max(0)

    ranges = maxVals - minVals

    normDataSet = np.zeros(np.shape(dataSet))

    m = dataSet.shape[0]

    normDataSet = dataSet - np.tile(minVals, (m,1))

    normDataSet = normDataSet/np.tile(ranges, (m,1))   #element wise divide

    return normDataSet, ranges, minVals

normDataSet, ranges, minVals = autoNorm(trainData)

def datingClassTest():

    hoRatio = 0.10      #hold out 10%

    datingDataMat,datingLabels = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet.txt")

    normMat, ranges, minVals = autoNorm(datingDataMat)

    m = normMat.shape[0]

    numTestVecs = int(m*hoRatio)

    errorCount = 0.0

    for i in range(numTestVecs):

        classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)

        print(('the classifier came back with: %s, the real answer is: %s') % (classifierResult, datingLabels[i]))

        if (classifierResult != datingLabels[i]):

            errorCount += 1.0

    print(('the total error rate is: %f') % (errorCount/float(numTestVecs)))

    print(errorCount)

datingClassTest()

................................................

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def file2matrix(filename):

    fr = open(filename)

    returnMat = []

    classLabelVector = []                       #prepare labels return

    for line in fr.readlines():

        line = line.strip()

        listFromLine = line.split('\t')

        returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])

        classLabelVector.append(int(listFromLine[-1]))

    return np.array(returnMat),np.array(classLabelVector)

def autoNorm(dataSet):

    minVals = dataSet.min(0)

    maxVals = dataSet.max(0)

    ranges = maxVals - minVals

    normDataSet = np.zeros(np.shape(dataSet))

    m = dataSet.shape[0]

    normDataSet = dataSet - np.tile(minVals, (m,1))

    normDataSet = normDataSet/np.tile(ranges, (m,1))   #element wise divide

    return normDataSet, ranges, minVals

def classifyPerson():

    resultList = ["not at all", "in samll doses", "in large doses"]

    percentTats = float(input("percentage of time spent playing video game?"))

    ffMiles = float(input("frequent flier miles earned per year?"))

    iceCream = float(input("liters of ice cream consumed per year?"))

    testData = np.array([percentTats,ffMiles,iceCream])

    trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")

    normDataSet, ranges, minVals = autoNorm(trainData)

    result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)

    print("You will probably like this person: ",resultList[result-1])

classifyPerson()

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def file2matrix(filename):

    fr = open(filename)

    returnMat = []

    classLabelVector = []                       #prepare labels return

    for line in fr.readlines():

        line = line.strip()

        listFromLine = line.split('\t')

        returnMat.append([float(listFromLine[0]),float(listFromLine[1]),float(listFromLine[2])])

        classLabelVector.append(int(listFromLine[-1]))

    return np.array(returnMat),np.array(classLabelVector)

def autoNorm(dataSet):

    minVals = dataSet.min(0)

    maxVals = dataSet.max(0)

    ranges = maxVals - minVals

    normDataSet = np.zeros(np.shape(dataSet))

    m = dataSet.shape[0]

    normDataSet = dataSet - np.tile(minVals, (m,1))

    normDataSet = normDataSet/np.tile(ranges, (m,1))   #element wise divide

    return normDataSet, ranges, minVals

def classifyPerson():

    resultList = ["not at all", "in samll doses", "in large doses"]

    percentTats = float(input("percentage of time spent playing video game?"))

    ffMiles = float(input("frequent flier miles earned per year?"))

    iceCream = float(input("liters of ice cream consumed per year?"))

    testData = np.array([percentTats,ffMiles,iceCream])

    trainData,trainLabel = file2matrix("D:\\LearningResource\\machinelearninginaction\\Ch02\\datingTestSet2.txt")

    normDataSet, ranges, minVals = autoNorm(trainData)

    result = classify0((testData-minVals)/ranges, normDataSet, trainLabel, 3)

    print("You will probably like this person: ",resultList[result-1])

classifyPerson()

import numpy as np

import operator as op

from os import listdir

def classify0(inX, dataSet, labels, k):

    dataSetSize = dataSet.shape[0]

    diffMat = np.tile(inX, (dataSetSize,1)) - dataSet

    sqDiffMat = diffMat**2

    sqDistances = sqDiffMat.sum(axis=1)

    distances = sqDistances**0.5

    sortedDistIndicies = distances.argsort()

    classCount={}

    for i in range(k):

        voteIlabel = labels[sortedDistIndicies[i]]

        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

    sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)

    return sortedClassCount[0][0]

def img2vector(filename):

    returnVect = []

    fr = open(filename)

    for i in range(32):

        lineStr = fr.readline()

        for j in range(32):

            returnVect.append(int(lineStr[j]))

    return np.array([returnVect])

def handwritingClassTest():

    hwLabels = []

    trainingFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits')           #load the training set

    m = len(trainingFileList)

    trainingMat = np.zeros((m,1024))

    for i in range(m):

        fileNameStr = trainingFileList[i]

        fileStr = fileNameStr.split('.')[0]     #take off .txt

        classNumStr = int(fileStr.split('_')[0])

        hwLabels.append(classNumStr)

        trainingMat[i,:] = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\trainingDigits\\%s' % fileNameStr)

    testFileList = listdir('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits')        #iterate through the test set

    mTest = len(testFileList)

    errorCount = 0.0

    for i in range(mTest):

        fileNameStr = testFileList[i]

        fileStr = fileNameStr.split('.')[0]     #take off .txt

        classNumStr = int(fileStr.split('_')[0])

        vectorUnderTest = img2vector('D:\\LearningResource\\machinelearninginaction\\Ch02\\testDigits\\%s' % fileNameStr)

        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)

        print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))

        if (classifierResult != classNumStr):

            errorCount += 1.0

    print("\nthe total number of errors is: %d" % errorCount)

    print("\nthe total error rate is: %f" % (errorCount/float(mTest)))

handwritingClassTest()

.......................................

吴裕雄 python 机器学习-KNN算法（1）的更多相关文章

吴裕雄 python 机器学习——KNN回归KNeighborsRegressor模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
吴裕雄 python 机器学习——KNN分类KNeighborsClassifier模型
import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from skle ...
吴裕雄 python 机器学习-KNN（2）
import matplotlib import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import ...
吴裕雄 python 机器学习——半监督学习标准迭代式标记传播算法LabelPropagation模型
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...
吴裕雄 python 机器学习——集成学习AdaBoost算法回归模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,ensemble from sklear ...
吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用
import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...
吴裕雄 python 机器学习——半监督学习LabelSpreading模型
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn import d ...
吴裕雄 python 机器学习——人工神经网络与原始感知机模型
import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from ...

随机推荐

JavaWeb项目中web.xml有关servlet的基本配置
JavaWeb项目中web.xml有关servlet的基本配置: 我们注意到,tomcat下的conf中也有一个web.xml文件,没错的,所有的JavaWeb项目中web.xml都继承自服务器下的w ...
Solr高效利用：Solr实现SQL的查询与统计
1.如何高效使用Solr查询功能 ?2.单个字段分组统计如何实现? 3.IN条件查询有几种方式? 4.多个字段分组统计是否只支持count? Cloudera公司已经推出了基于Hadoop平台的查询统 ...
初识vuejs
转行前端,时间也不短了,也见识到了前端行业的蓬勃发展,以及一些新鲜技术的层出不穷. 由于自身计算机基础的薄弱,更加上一直没有遇上一个公司力推新技术,所以一直以来基本上都是靠着jquery和则zepto ...
【Fiddler学习】Fiddler简介和Web抓包应用（转）
一.Fiddler是什么? Fiddler是一个http协议调试代理工具,它能够记录并检查所有你的电脑和互联网之间的http通讯,设置断点,查看所有的进出Fiddler的数据. Fiddler 要比其 ...
django, tornado
django 由多线程写的 tornaod 由epoll机制
打包django项目
1.安装pip install pyinstaller2.在django项目目录下执行pyi-makespec -D manage.py # 生成manage.spec文件3.执行pyinstalle ...
HPUX and AIX SSH 互信
HPUX side: su - orasid check file AIX side: su - sidadm double check
webform之Repeater控件
一.Repeater控件数据循环编辑 1.repeater包括五大模板: (1)HeaderTemplate:标题模板,对开头进行编辑,只执行一次 (2)FooterTemplate:页尾结束模板, ...
windows系统安装
系统最新地址:https://www.microsoft.com/zh-cn/software-download/windows10
类方法@classmethod
通常情况下,如果我们要使用一个类的方法,那我们只能将一个类实体化成一个对象,进而调用对象使用方法. 式例 1 比如: class Hello(object): def __init__: ...

吴裕雄 python 机器学习-KNN算法（1）

吴裕雄 python 机器学习-KNN算法（1）的更多相关文章

随机推荐

热门专题