吴裕雄 python 机器学习-Logistic(1)
import numpy as np def loadDataSet():
dataMat = []
labelMat = []
fr = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat,labelMat dataMat,labelMat = loadDataSet()
print(dataMat)
print(labelMat)

def sigmoid(z):
sigmoid = 1.0/(1+np.exp(-z))
return sigmoid def gradAscent(dataMatIn, classLabels):
dataMatrix = np.mat(dataMatIn)
labelMat = np.mat(classLabels).transpose()
m,n = np.shape(dataMatrix)
alpha = 0.001
maxCycles = 500
weights = np.ones((n,1))
for k in range(maxCycles):
h = sigmoid(dataMatrix*weights)
error = (labelMat - h)
weights = weights + alpha * dataMatrix.transpose()* error
return weights weights = gradAscent(dataMat,labelMat)
print(weights)

def stocGradAscent0(dataMatrix, classLabels):
m,n = np.shape(dataMatrix)
alpha = 0.01
weights = np.ones(n)
for i in range(m):
h = sigmoid(sum(np.array(dataMatrix[i])*weights))
error = classLabels[i] - h
weights = weights + alpha * error * np.array(dataMatrix[i])
return weights weights = stocGradAscent0(dataMat,labelMat)
print(weights)

def stocGradAscent1(dataMatrix, classLabels, numIter=150):
m,n = np.shape(dataMatrix)
weights = np.ones(n)
for j in range(numIter):
dataIndex = list(range(m))
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001
randIndex = int(np.random.uniform(0,len(dataIndex)))
h = sigmoid(sum(np.array(dataMatrix[randIndex])*weights))
error = classLabels[randIndex] - h
weights = weights + alpha * error * np.array(dataMatrix[randIndex])
del(dataIndex[randIndex])
return weights weights = stocGradAscent1(dataMat,labelMat)
print(weights)

import matplotlib.pyplot as plt def plotBestFit():
dataMat,labelMat=loadDataSet()
weights = gradAscent(dataMat,labelMat)
dataArr = np.array(dataMat)
n = np.shape(dataArr)[0]
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(n):
if(int(labelMat[i])== 1):
xcord1.append(dataArr[i,1])
ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1])
ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = np.arange(-3.0, 3.0, 0.1)
y = (-weights[0]-weights[1]*x)/weights[2]
y = np.array(y).reshape(len(x))
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2');
plt.show() plotBestFit()

def classifyVector(z, weights):
prob = sigmoid(sum(z*weights))
if(prob > 0.5):
return 1.0
else:
return 0.0 def colicTest():
frTrain = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\horseColicTraining.txt')
frTest = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\horseColicTest.txt')
trainingSet = []
trainingLabels = []
for line in frTrain.readlines():
currLine = line.strip().split('\t')
lineArr =[]
for i in range(21):
lineArr.append(float(currLine[i]))
trainingSet.append(lineArr)
trainingLabels.append(float(currLine[21]))
trainWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 1000)
errorCount = 0
numTestVec = 0.0
for line in frTest.readlines():
numTestVec += 1.0
currLine = line.strip().split('\t')
lineArr =[]
for i in range(21):
lineArr.append(float(currLine[i]))
if(int(classifyVector(np.array(lineArr), trainWeights))!= int(currLine[21])):
errorCount += 1
errorRate = (float(errorCount)/numTestVec)
print("the error rate of this test is: %f" % errorRate)
return errorRate errorRate = colicTest()
print(errorRate) def multiTest():
numTests = 10
errorSum=0.0
for k in range(numTests):
errorSum += colicTest()
print("after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests))) multiTest()

吴裕雄 python 机器学习-Logistic(1)的更多相关文章
- 吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用
import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...
- 吴裕雄 python 机器学习——分类决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——回归决策树模型
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...
- 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——逻辑回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——ElasticNet回归
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...
- 吴裕雄 python 机器学习——Lasso回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——岭回归
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...
- 吴裕雄 python 机器学习——线性回归模型
import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...
随机推荐
- django-request获取数据
request 如果说 urls.py 是 Django 中前端页面和后台程序桥梁,那么 request 就是桥上负责运输的小汽车 可以说后端接收到的来至前端的信息几乎全部来自于requests中. ...
- 学习js第三天小结
1.冒泡排序分析: 例:将数组[9,8,7,6,5,4,3,2,1,0]按照从小打大的顺序进行冒泡排序. 演变过程: 第一趟: 8,7,6,5,4,3,2,1,0,9 比较了9次: 第二趟: 7 ...
- python—正则表达式
我们平时上网的时候,经常需要在一些网站上注册帐号,而注册帐号的时候对帐号信息会有一些要求. 比如: 上面的图片中,输入的邮件地址.密码.手机号 符合要求才可以注册成功. 我们是我们自己写的网站,那么我 ...
- win10 java1.7安装笔记
博主不选择安装C盘,选择在D盘安装,新建Java文件夹,在Java文件夹中新建两个子文件夹,一个命名jdk1.7,一个命名jre,如果不区分目录安装jdk和jre,会导致一些文件的缺失,导致一些错误, ...
- 01-spark基础
1.定义 Spark是一个由scala语言编写的实时计算系统 Spark支持的API包括Scala.Python.Java .R 2.功能 Spark Core: 将分布式数据抽象为弹性分布式数据集( ...
- 关于springboot
概念 Spring的优缺点 1. 优点(AOP和IOC简化开发) Spring是Java企业版(Java Enterprise Edition,JEE,也称J2EE)的轻量级代替品.无需开发重量级的E ...
- LeetCode 771 宝石和石头
Input: J = "aA", S = "aAAbbbb" Output: 3 解:J为宝石字符串,S为包含宝石的字符串,J中的字母如果在S中出现数字就➕1 ...
- 《C++数据结构-快速拾遗》 手写链表
注释:吕鑫老师C++对于找工作真的是很好的教程,基本什么方面都讲的很细致,但是对于大多数人只有快进快进再快进~~ 注释:基本链表信息自己百度,这里只是一个快速拾遗过程. 1.链表定义 typedef ...
- Vue.js模拟百度下拉框
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...
- gentoo AR9285 BCM57780 安装驱动
首先使用启动光盘启动, 然后 mount /dev/sda4 /mnt/gentoo 挂载硬盘 lspci -v 1>/mnt/gentoo/root/lspci_v.txt 输出信息到文件. ...