logistic 回归（线性和非线性）

一：线性logistic 回归

代码如下：

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import scipy.optimize as opt

import seaborn as sns

#读取数据集

path = 'ex2data1.txt'

data = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])

#将正负数据集分开

positive = data[data['Admitted'].isin([1])]

negative = data[data['Admitted'].isin([0])]

'''

#查看分布

fig, ax = plt.subplots(figsize=(12, 8))

ax.scatter(positive['Exam 1'], positive['Exam 2'], s=60, c='b', marker='o', label='Admitted')

ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='UnAdmitted')

ax.legend()

ax.set_xlabel('Exam 1 Score')

ax.set_ylabel('Exam 2 Score')

plt.show()

'''

#sigmoid函数实现

def sigmoid(h):

    return 1 / (1 + np.exp(-h))

'''

#测试sigmoid函数

nums = np.arange(-10, 11, step=1)

fig, ax = plt.subplots(figsize=(12, 8))

ax.plot(nums, sigmoid(nums), 'k')

plt.show()

'''

#计算损失函数值

def cost(theta, X, y):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    part1 = np.multiply(-y, np.log(sigmoid(X * theta.T)))

    part2 = np.multiply((1-y), np.log(1-sigmoid(X * theta.T)))

    return np.sum(part1-part2) / len(X)

#在原矩阵第1列前加一列全1

data.insert(0, 'ones', 1)

cols = data.shape[1]

X = data.iloc[:, 0:cols-1]

y = data.iloc[:, cols-1:cols]

X = np.array(X.values)

y = np.array(y.values)

theta = np.zeros(3) #这里是一个行向量

#返回梯度向量，注意是向量

def gradient(theta, X, y):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    parameters = theta.ravel().shape[1]

    grad = np.zeros(parameters)

    error = sigmoid(X * theta.T) - y

    grad = error.T.dot(X)

    grad = grad / len(X)

    return grad

#通过高级算法计算出最好的theta值

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))

#print(cost(result[0], X, y))

#测试所得theta的性能

#计算原数据集的预测情况

def predict(theta, X):

    theta = np.matrix(theta)

    X = np.matrix(X)

    probability = sigmoid(X * theta.T)

    return [1 if i > 0.5 else 0 for i in probability]

theta_min = result[0]

predictions = predict(theta_min, X)

correct = [1 if((a == 1 and b == 1) or(a == 0 and b == 0)) else 0 for(a, b) in zip(predictions, y)]

accuracy = (sum(map(int, correct)) % len(correct))

print('accuracy = {0}%'.format(accuracy))#训练集测试准确度89%

# 作图

theta_temp = theta_min

theta_temp = theta_temp / theta_temp[2]

x = np.arange(130, step=0.1)

y = -(theta_temp[0] + theta_temp[1] * x)

#画出原点

sns.set(context='notebook', style='ticks', font_scale=1.5)

sns.lmplot('Exam 1', 'Exam 2', hue='Admitted', data=data,

           size=6,

           fit_reg=False,

           scatter_kws={"s": 25}

           )

#画出分界线

plt.plot(x, y, 'grey')

plt.xlim(0, 130)

plt.ylim(0, 130)

plt.title('Decision Boundary')

plt.show()

二：非线性logistic 回归（正则化）

代码如下：

import pandas as pd

import numpy as np

import scipy.optimize as opt

import matplotlib.pyplot as plt

path = 'ex2data2.txt'

data = pd.read_csv(path, header=None, names=['Test 1', 'Test 2', 'Accepted'])

positive = data[data['Accepted'].isin([1])]

negative = data[data['Accepted'].isin([0])]

'''

#显示原始数据的分布

fig, ax = plt.subplots(figsize=(12, 8))

ax.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accepted')

ax.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Unaccepted')

ax.legend() #显示右上角的Accepted 和 Unaccepted标签

ax.set_xlabel('Test 1 Score')

ax.set_ylabel('Test 2 Score')

plt.show()

'''

degree = 5

x1 = data['Test 1']

x2 = data['Test 2']

#在data的第三列插入一列全1

data.insert(3, 'Ones', 1)

#创建多项式特征值，最高阶为4

for i in range(1, degree):

    for j in range(0, i):

        data['F' + str(i) + str(j)] = np.power(x1, i-j) * np.power(x2, j)

#删除原数据中的test 1和test 2两列

data.drop('Test 1', axis=1, inplace=True)

data.drop('Test 2', axis=1, inplace=True)

#sigmoid函数实现

def sigmoid(h):

    return 1 / (1 + np.exp(-h))

def cost(theta, X, y, learnRate):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    first = np.multiply(-y, np.log(sigmoid(X * theta.T)))

    second = np.multiply((1 - y), np.log(1 - sigmoid(X * theta.T)))

    reg = (learnRate / (2 * len(X))) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2))

    return np.sum(first - second) / len(X) + reg

learnRate = 1

cols = data.shape[1]

X = data.iloc[:, 1:cols]

y = data.iloc[:, 0:1]

X = np.array(X)

y = np.array(y)

theta = np.zeros(X.shape[1])

#计算原数据集的预测情况

def predict(theta, X):

    theta = np.matrix(theta)

    X = np.matrix(X)

    probability = sigmoid(X * theta.T)

    return [1 if i > 0.5 else 0 for i in probability]

def gradientReg(theta, X, y, learnRate):

    theta = np.matrix(theta)

    X = np.matrix(X)

    y = np.matrix(y)

    paramates = int(theta.ravel().shape[1])

    grad = np.zeros(paramates)

    grad = (sigmoid(X * theta.T) - y).T * X / len(X) + (learnRate / len(X)) * theta[:, i]

    grad[0] = grad[0] - (learnRate / len(X)) * theta[:, i]

    return grad

result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradientReg, args=(X, y, learnRate))

print(result)

theta_min = np.matrix(result[0])

predictions = predict(theta_min, X)

correct = [1 if((a == 1 and b == 1) or(a == 0 and b == 0)) else 0 for(a, b) in zip(predictions, y)]

accuracy = (sum(map(int, correct)) % len(correct))

print('accuracy = {0}%'.format(accuracy))

logistic 回归（线性和非线性）的更多相关文章

浅谈Logistic回归及过拟合
判断学习速率是否合适?每步都下降即可.这篇先不整理吧... 这节学习的是逻辑回归(Logistic Regression),也算进入了比较正统的机器学习算法.啥叫正统呢?我概念里面机器学习算法一般是这 ...
机器学习公开课笔记(3)：Logistic回归
Logistic 回归通常是二元分类器(也可以用于多元分类),例如以下的分类问题 Email: spam / not spam Tumor: Malignant / benign 假设 (Hypot ...
Logistic回归总结
原文:http://blog.csdn.net/dongtingzhizi/article/details/15962797 Logistic回归总结作者:洞庭之子微博:洞庭之子-Bing (2 ...
机器学习(4)之Logistic回归
机器学习(4)之Logistic回归 1. 算法推导与之前学过的梯度下降等不同,Logistic回归是一类分类问题,而前者是回归问题.回归问题中,尝试预测的变量y是连续的变量,而在分类问题中,y是一 ...
Logistic回归（逻辑回归）和softmax回归
一.Logistic回归 Logistic回归(Logistic Regression,简称LR)是一种常用的处理二类分类问题的模型. 在二类分类问题中,把因变量y可能属于的两个类分别称为负类和正类, ...
logistic回归学习
logistic回归是一种分类方法,用于两分类的问题,其基本思想为: 寻找合适的假设函数,即分类函数,用来预测输入数据的结果: 构造损失函数,用来表示预测的输出结果与训练数据中实际类别之间的偏差: 最 ...
Logistic回归和SVM的异同
这个问题在最近面试的时候被问了几次,让谈一下Logistic回归(以下简称LR)和SVM的异同.由于之前没有对比分析过,而且不知道从哪个角度去分析,一时语塞,只能不知为不知. 现在对这二者做一个对比分 ...
机器学习-- Logistic回归 Logistic Regression
转载自:http://blog.csdn.net/linuxcumt/article/details/8572746 1.假设随Tumor Size变化,预测病人的肿瘤是恶性(malignant)还是 ...
【转载】logistic回归
原文地址:https://www.cnblogs.com/zichun-zeng/p/3824745.html 1. logistic回归与一般线性回归模型的区别: (1) 线性回归的结果变量 ...

随机推荐

SSL握手中win xp和SNI的那点事
SSL握手中win xp和SNI的那点事一.背景需求server1-3使用不同的域名对外提供https服务,用nginx作为前端负载均衡器并负责https集中解密工作(以用户访问的域名为依据进行流量 ...
01. Go 语言简介
Go语言简介引用原文地址:http://m.biancheng.net/golang/ Go语言也称 Golang,兼具效率.性能.安全.健壮等特性.这套Go语言教程(Golang教程)通俗易懂,深 ...
Mysql中事务ACID实现原理
引言照例,我们先来一个场景~ 面试官:"知道事务的四大特性么?"你:"懂,ACID嘛,原子性(Atomicity).一致性(Consistency).隔离性(Isola ...
#3145. 「APIO 2019」桥梁
#3145. 「APIO 2019」桥梁题目描述圣彼得堡市内所有水路长度总和约 282 千米,市内水域面积占城市面积的 7%.--来自维基百科圣彼得堡位于由 \(m\) 座桥梁连接而成的 \(n ...
Xml之Schema XSD约束{详细}
问题: 学习Schema其他标签的定义约束引入的方式: 基本格式: 1构建schema: 1.1 最基本的单位元素 1.2 元素属性 1.3 simpleType 定义类型 1.4 复合结构类型 ...
【计算机网络】UDP基础知识总结
1. UDP概念相关 [!NOTE] UDP(User Datagram Protocol),又叫用户数据报协议. UDP是一个无连接的.不可靠.基于数据报的传输协议.UDP只是报文(报文可以理解为一 ...
C#截图操作（几种截图方法）
公共函数获取屏幕截图private Bitmap GetScreenCapture(){ Rectangle tScreenRect = new Rectangle(0, 0, Screen.Prim ...
GO Map的初步使用
一.集合(Map) 1.1 什么是Map 张三:13910101201 李四:13801010134 map是Go中的内置类型,它将一个值与一个键关联起来.可以使用相应的键检索值. Map 是一种无序 ...
fastjson对于yyyy-MM-dd HH:mm格式的反序列化问题
原创GrayHJX 发布于2017-03-14 22:56:33 阅读数 6851 收藏展开问题:最近在工作中遇到这么一个问题:有个实体类,它有个date类型的属性,当在这个属性加上fastjs ...
C# read dll config
public static SqlConnection GetSqlConnection() { Configuration myDllConfig = ConfigurationManager.Op ...

logistic 回归（线性和非线性）

logistic 回归（线性和非线性）的更多相关文章

随机推荐

热门专题