import numpy as np

def loadDataSet():
dataMat = []
labelMat = []
fr = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat,labelMat dataMat,labelMat = loadDataSet()
print(dataMat)
print(labelMat)

def sigmoid(z):
sigmoid = 1.0/(1+np.exp(-z))
return sigmoid def gradAscent(dataMatIn, classLabels):
dataMatrix = np.mat(dataMatIn)
labelMat = np.mat(classLabels).transpose()
m,n = np.shape(dataMatrix)
alpha = 0.001
maxCycles = 500
weights = np.ones((n,1))
for k in range(maxCycles):
h = sigmoid(dataMatrix*weights)
error = (labelMat - h)
weights = weights + alpha * dataMatrix.transpose()* error
return weights weights = gradAscent(dataMat,labelMat)
print(weights)

def stocGradAscent0(dataMatrix, classLabels):
m,n = np.shape(dataMatrix)
alpha = 0.01
weights = np.ones(n)
for i in range(m):
h = sigmoid(sum(np.array(dataMatrix[i])*weights))
error = classLabels[i] - h
weights = weights + alpha * error * np.array(dataMatrix[i])
return weights weights = stocGradAscent0(dataMat,labelMat)
print(weights)

def stocGradAscent1(dataMatrix, classLabels, numIter=150):
m,n = np.shape(dataMatrix)
weights = np.ones(n)
for j in range(numIter):
dataIndex = list(range(m))
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001
randIndex = int(np.random.uniform(0,len(dataIndex)))
h = sigmoid(sum(np.array(dataMatrix[randIndex])*weights))
error = classLabels[randIndex] - h
weights = weights + alpha * error * np.array(dataMatrix[randIndex])
del(dataIndex[randIndex])
return weights weights = stocGradAscent1(dataMat,labelMat)
print(weights)

import matplotlib.pyplot as plt

def plotBestFit():
dataMat,labelMat=loadDataSet()
weights = gradAscent(dataMat,labelMat)
dataArr = np.array(dataMat)
n = np.shape(dataArr)[0]
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(n):
if(int(labelMat[i])== 1):
xcord1.append(dataArr[i,1])
ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1])
ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = np.arange(-3.0, 3.0, 0.1)
y = (-weights[0]-weights[1]*x)/weights[2]
y = np.array(y).reshape(len(x))
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2');
plt.show() plotBestFit()

def classifyVector(z, weights):
prob = sigmoid(sum(z*weights))
if(prob > 0.5):
return 1.0
else:
return 0.0 def colicTest():
frTrain = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\horseColicTraining.txt')
frTest = open('D:\\LearningResource\\machinelearninginaction\\Ch05\\horseColicTest.txt')
trainingSet = []
trainingLabels = []
for line in frTrain.readlines():
currLine = line.strip().split('\t')
lineArr =[]
for i in range(21):
lineArr.append(float(currLine[i]))
trainingSet.append(lineArr)
trainingLabels.append(float(currLine[21]))
trainWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 1000)
errorCount = 0
numTestVec = 0.0
for line in frTest.readlines():
numTestVec += 1.0
currLine = line.strip().split('\t')
lineArr =[]
for i in range(21):
lineArr.append(float(currLine[i]))
if(int(classifyVector(np.array(lineArr), trainWeights))!= int(currLine[21])):
errorCount += 1
errorRate = (float(errorCount)/numTestVec)
print("the error rate of this test is: %f" % errorRate)
return errorRate errorRate = colicTest()
print(errorRate) def multiTest():
numTests = 10
errorSum=0.0
for k in range(numTests):
errorSum += colicTest()
print("after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests))) multiTest()

吴裕雄 python 机器学习-Logistic(1)的更多相关文章

  1. 吴裕雄 python 机器学习——人工神经网络感知机学习算法的应用

    import numpy as np from matplotlib import pyplot as plt from sklearn import neighbors, datasets from ...

  2. 吴裕雄 python 机器学习——分类决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  3. 吴裕雄 python 机器学习——回归决策树模型

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_s ...

  4. 吴裕雄 python 机器学习——线性判断分析LinearDiscriminantAnalysis

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  5. 吴裕雄 python 机器学习——逻辑回归

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  6. 吴裕雄 python 机器学习——ElasticNet回归

    import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot ...

  7. 吴裕雄 python 机器学习——Lasso回归

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...

  8. 吴裕雄 python 机器学习——岭回归

    import numpy as np import matplotlib.pyplot as plt from sklearn import datasets, linear_model from s ...

  9. 吴裕雄 python 机器学习——线性回归模型

    import numpy as np from sklearn import datasets,linear_model from sklearn.model_selection import tra ...

随机推荐

  1. Solr合并索引方式

    索引合并并不会判断uniqueKey,所以主键有重复不会判断主键会重复. 官方的解释是不要有重复. 要合并索引,它们必须满足以下要求: 这两个索引必须兼容:它们的架构应该包含相同的字段,并且它们应该以 ...

  2. Celery 在Windows下启动worker时出现错误:ValueError: not enough values to unpack (expected 3, got 0)

    在公司Linux环境下没有出现问题,在回到家后直接在Windows10下运行出现错误: ValueError: not enough values to unpack (expected 3, got ...

  3. eclipse聚合工程如何提交SVN,如何从SVN下载

    提交: 比如聚合工程为taotao-manager,包含了taotao1,taotao2,taotao3等项目,在提交SVN只需 提交taotao-manager就可以了 1.右键taotao-man ...

  4. python-开放类优化内存性能

    开放类:在运行期间,可动态向实例或类添加新成员,方法 1.实例不能添加方法到类,反之可以 class A: pass a = A() a.func = lambda x: x+1 a.func # & ...

  5. django-chunks文件

    with open(file_save_path, 'wb') as f: for chunk in file_content.chunks(): f.write(chunk)

  6. Maven下载私服上的jar包(全局)

    <mirror> <id>maven-public</id> <mirrorOf>maven-public</mirrorOf> <n ...

  7. Impala SQL 使用小记

    1.  impala端创建的表,DROP. hive会自动同步到. 但是通过hive DROP时,数据还会在,只是表的元数据没有了. 所以完全DROP表,需要impala端的DROP 2. impal ...

  8. SQL 优化tips 及误区

    1.    几个表进行join,然后过滤 等价于  分别过滤为小表后,再join? 并不完全. 2)确实比1)效率高, 但要注意一些NULL值过滤.否则2)得到的结果比1)多 2. left join ...

  9. GridEh 当前行

    DataSet当前行,不是当前选中的行 int arow = 0;    arow = cds1->RecNo;    arow = cds1->RecordCount;    arow ...

  10. Group by与 having

    注意:select 后的字段,必须要么包含在group by中,要么包含在having 后的聚合函数里. 1. GROUP BY 是分组查询, 一般 GROUP BY 是和聚合函数配合使用 group ...