吴裕雄 python 机器学习-Logistic（1）

import numpy as np

def loadDataSet():

    dataMat = []

    labelMat = []

    fr = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\testSet.txt')

    for line in fr.readlines():

        lineArr = line.strip().split()

        dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])

        labelMat.append(int(lineArr[2]))

    return dataMat,labelMat

dataMat,labelMat = loadDataSet()

print(dataMat)

print(labelMat)

def sigmoid(z):

    sigmoid = 1.0/(1+np.exp(-z))

    return sigmoid

def gradAscent(dataMatIn, classLabels):

    dataMatrix = np.mat(dataMatIn)

    labelMat = np.mat(classLabels).transpose()

    m,n = np.shape(dataMatrix)

    alpha = 0.001

    maxCycles = 500

    weights = np.ones((n,1))

    for k in range(maxCycles):

        h = sigmoid(dataMatrix*weights)

        error = (labelMat - h)

        weights = weights + alpha * dataMatrix.transpose()* error

    return weights

weights = gradAscent(dataMat,labelMat)

print(weights)

def stocGradAscent0(dataMatrix, classLabels):

    m,n = np.shape(dataMatrix)

    alpha = 0.01

    weights = np.ones(n)

    for i in range(m):

        h = sigmoid(sum(np.array(dataMatrix[i])*weights))

        error = classLabels[i] - h

        weights = weights + alpha * error * np.array(dataMatrix[i])

    return weights

weights = stocGradAscent0(dataMat,labelMat)

print(weights)

def stocGradAscent1(dataMatrix, classLabels, numIter=150):

    m,n = np.shape(dataMatrix)

    weights = np.ones(n)

    for j in range(numIter):

        dataIndex = list(range(m))

        for i in range(m):

            alpha = 4/(1.0+j+i)+0.0001

            randIndex = int(np.random.uniform(0,len(dataIndex)))

            h = sigmoid(sum(np.array(dataMatrix[randIndex])*weights))

            error = classLabels[randIndex] - h

            weights = weights + alpha * error * np.array(dataMatrix[randIndex])

            del(dataIndex[randIndex])

    return weights

weights = stocGradAscent1(dataMat,labelMat)

print(weights)

import matplotlib.pyplot as plt

def plotBestFit():

    dataMat,labelMat=loadDataSet()

    weights = gradAscent(dataMat,labelMat)

    dataArr = np.array(dataMat)

    n = np.shape(dataArr)[0]

    xcord1 = []

    ycord1 = []

    xcord2 = []

    ycord2 = []

    for i in range(n):

        if(int(labelMat[i])== 1):

            xcord1.append(dataArr[i,1])

            ycord1.append(dataArr[i,2])

        else:

            xcord2.append(dataArr[i,1])

            ycord2.append(dataArr[i,2])

    fig = plt.figure()

    ax = fig.add_subplot(111)

    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')

    ax.scatter(xcord2, ycord2, s=30, c='green')

    x = np.arange(-3.0, 3.0, 0.1)

    y = (-weights[0]-weights[1]*x)/weights[2]

    y = np.array(y).reshape(len(x))

    ax.plot(x, y)

    plt.xlabel('X1')

    plt.ylabel('X2');

    plt.show()

plotBestFit()

def classifyVector(z, weights):

    prob = sigmoid(sum(z*weights))

    if(prob > 0.5):

        return 1.0

    else:

        return 0.0

def colicTest():

    frTrain = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\horseColicTraining.txt')

    frTest = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\horseColicTest.txt')

    trainingSet = []

    trainingLabels = []

    for line in frTrain.readlines():

        currLine = line.strip().split('\t')

        lineArr =[]

        for i in range(21):

            lineArr.append(float(currLine[i]))

        trainingSet.append(lineArr)

        trainingLabels.append(float(currLine[21]))

    trainWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 1000)

    errorCount = 0

    numTestVec = 0.0

    for line in frTest.readlines():

        numTestVec += 1.0

        currLine = line.strip().split('\t')

        lineArr =[]

        for i in range(21):

            lineArr.append(float(currLine[i]))

        if(int(classifyVector(np.array(lineArr), trainWeights))!= int(currLine[21])):

            errorCount += 1

    errorRate = (float(errorCount)/numTestVec)

    print("the error rate of this test is: %f" % errorRate)

    return errorRate

errorRate = colicTest()

print(errorRate)

def multiTest():

    numTests = 10

    errorSum=0.0

    for k in range(numTests):

        errorSum += colicTest()

    print("after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests)))

multiTest()

吴裕雄 python 机器学习-Logistic（1）的更多相关文章

吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用
import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...
吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
吴裕雄 python 机器学习——逻辑回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
吴裕雄 python 机器学习——ElasticNet回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
吴裕雄 python 机器学习——Lasso回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
吴裕雄 python 机器学习——岭回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
吴裕雄 python 机器学习——线性回归模型
import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...

随机推荐

Getting Physical With Memory.CPU如何操作内存
原文标题:Getting Physical With Memory 原文地址:http://duartes.org/gustavo/blog/ [注:本人水平有限,只好挑一些国外高手的精彩文章翻译一下 ...
《机器学习实战》PCA降维
注释:由于各方面原因,理论部分不做介绍,网上很多自行百度吧! pca.py import numpy as np import matplotlib.pyplot as plt import math ...
Application failed to start because it could not find or load the QT platform plugin “windows”
只需要在.exe所在的目录下创建一个目录platforms添加进去缺失的dll即可如图: 参考链接:https://stackoverflow.com/questions/21268558/appli ...
java 怎样向一个已存在的文件中添加内容
如果想向某个文件最后添加内容,可使用FileWriter fw = new FileWriter("log.txt",true);在创建FileWriter时加个true就可以了. ...
Xcode6在iPhone5+iOS7模拟器上编译，上下有黑边问题
http://94it.net/a/jingxuanboke/2015/0113/447679.html
重识linux-linux的新增与删除用户组和切换命令
重识linux-linux的新增与删除用户组 1 相关文件 /etc/group /etc/gshadow 2操作相关 groupadd group1 groupmod group1 groupdel ...
代码：jquery小效果—— 吸顶
吸顶: 可以防止滚屏过程中,代码被多次调用 <script src="http://cdn.bootcss.com/jquery/1.11.1/jquery.min.js"& ...
hive随机采样
hive> select * from account limit 10;OKaccount.accountname account.accid account.platid ac ...
1. easyui tree 初始化的两种方式
/** * 查询角色分类 */function queryRoleCategoryTree(selectId) { var url = basePath + 'rest/roleCategoryCon ...
关于 Container ，Injection
1.容器的历史容器概念始于 1979 年提出的 UNIX chroot,它是一个 UNIX 操作系统的系统调用,将一个进程及其子进程的根目录改变到文件系统中的一个新位置,让这些进程只能访问到这个新的 ...

吴裕雄 python 机器学习-Logistic（1）

吴裕雄 python 机器学习-Logistic（1）的更多相关文章

随机推荐

热门专题