一元回归_ols参数解读(推荐AAA)
python机器学习-乳腺癌细胞挖掘(博主亲自录制视频)

项目合作QQ:231469242
多重共线性测试需要改进
文件夹需要两个包


python3.0 anaconda
normality_check.py 正太检验
# -*- coding: utf-8 -*-
'''
Author:Toby
QQ:231469242,all right reversed,no commercial use
normality_check.py
正态性检验脚本 ''' import scipy
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors #正态分布测试
def check_normality(testData):
#20<样本数<50用normal test算法检验正态分布性
if 20<len(testData) <50:
p_value= stats.normaltest(testData)[1]
if p_value<0.05:
print("use normaltest")
print ("data are not normal distributed")
return False
else:
print("use normaltest")
print ("data are normal distributed")
return True #样本数小于50用Shapiro-Wilk算法检验正态分布性
if len(testData) <50:
p_value= stats.shapiro(testData)[1]
if p_value<0.05:
print ("use shapiro:")
print ("data are not normal distributed")
return False
else:
print ("use shapiro:")
print ("data are normal distributed")
return True if 300>=len(testData) >=50:
p_value= lillifors(testData)[1]
if p_value<0.05:
print ("use lillifors:")
print ("data are not normal distributed")
return False
else:
print ("use lillifors:")
print ("data are normal distributed")
return True if len(testData) >300:
p_value= stats.kstest(testData,'norm')[1]
if p_value<0.05:
print ("use kstest:")
print ("data are not normal distributed")
return False
else:
print ("use kstest:")
print ("data are normal distributed")
return True #对所有样本组进行正态性检验
def NormalTest(list_groups):
for group in list_groups:
#正态性检验
status=check_normality(group)
if status==False :
return False
return True
Rsquare_multimode.py 多种模型计算R平方
加入了线性显著检测和r相关系数显著检测,多重共线性,自相关,残差正太检验等等
# -*- coding: utf-8 -*-
#斯皮尔曼等级相关(Spearman’s correlation coefficient for ranked data)
import math,pylab,scipy
import numpy as np
import scipy.stats as stats
from scipy.stats import t
from scipy.stats import f
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.stats.diagnostic import lillifors
import normality_check
import statsmodels.formula.api as sm
x=[4.03,3.76,3.77,3.34,3.47,2.92,3.20,2.71,3.53,4.51]
y=[6.47,6.13,6.19,4.89,5.63,4.52,5.89,4.79,5.27,6.08] list_group=[x,y]
sample=len(x)
#显著性
a=0.05 #数据可视化
plt.plot(x,y,'ro')
#斯皮尔曼等级相关,非参数检验
def Spearmanr(x,y):
print("use spearmanr,Nonparametric tests")
#样本不一致时,发出警告
if len(x)!=len(y):
print ("warming,the samples are not equal!")
r,p=stats.spearmanr(x,y)
print("spearman r**2:",r**2)
print("spearman p:",p)
if sample<500 and p>0.05:
print("when sample < 500,p has no mean(>0.05)")
print("when sample > 500,p has mean") #皮尔森 ,参数检验
def Pearsonr(x,y):
print("use Pearson,parametric tests")
r,p=stats.pearsonr(x,y)
print("pearson r**2:",r**2)
print("pearson p:",p)
if sample<30:
print("when sample <30,pearson has no mean") #皮尔森 ,参数检验,带有详细参数
def Pearsonr_details(x,y,xLabel,yLabel,formula):
n=len(x)
df=n-2
data=pd.DataFrame({yLabel:y,xLabel:x})
result = sm.ols(formula, data).fit()
print(result.summary()) #模型F分布显著性分析
print('\n')
print("linear relation Significant test:...................................")
#如果F检验的P值<0.05,拒绝H0,x和y无显著关系,H1成立,x和y有显著关系
if result.f_pvalue<0.05:
print ("P value of f test<0.05,the linear relation is right.") #R的显著检验
print('\n')
print("R significant test:...................................")
r_square=result.rsquared
r=math.sqrt(r_square)
t_score=r*math.sqrt(n-2)/(math.sqrt(1-r**2))
t_std=t.isf(a/2,df)
if t_score<-t_std or t_score>t_std:
print ("R is significant according to its sample size")
else:
print ("R is not significant") #残差分析
print('\n')
print("residual error analysis:...................................")
states=normality_check.check_normality(result.resid)
if states==True:
print("the residual error are normal distributed")
else:
print("the residual error are not normal distributed") #残差偏态和峰态
Skew = stats.skew(result.resid, bias=True)
Kurtosis = stats.kurtosis(result.resid, fisher=False,bias=True)
if round(Skew,1)==0:
print("residual errors normality Skew:in middle,perfect match")
elif round(Skew,1)>0:
print("residual errors normality Skew:close right")
elif round(Skew,1)<0:
print("residual errors normality Skew:close left") if round(Kurtosis,1)==3:
print("residual errors normality Kurtosis:in middle,perfect match")
elif round(Kurtosis,1)>3:
print("residual errors normality Kurtosis:more peak")
elif round(Kurtosis,1)<3:
print("residual errors normality Kurtosis:more flat") #自相关分析autocorrelation
print('\n')
print("autocorrelation test:...................................")
DW = np.sum( np.diff( result.resid.values )**2.0 )/ result.ssr
if round(DW,1)==2:
print("Durbin-Watson close to 2,there is no autocorrelation.OLS model works well") #共线性检查
print('\n')
print("multicollinearity test:")
conditionNumber=result.condition_number
if conditionNumber>30:
print("conditionNumber>30,multicollinearity exists")
else:
print("conditionNumber<=30,multicollinearity not exists") #绘制残差图,用于方差齐性检验
Draw_residual(list(result.resid))
'''
result.rsquared
Out[28]: 0.61510660055413524
''' #kendalltau非参数检验
def Kendalltau(x,y):
print("use kendalltau,Nonparametric tests")
r,p=stats.kendalltau(x,y)
print("kendalltau r**2:",r**2)
print("kendalltau p:",p) #选择模型
def R_mode(x,y,xLabel,yLabel,formula):
#正态性检验
Normal_result=normality_check.NormalTest(list_group)
print ("normality result:",Normal_result)
if len(list_group)>2:
Kendalltau(x,y)
if Normal_result==False:
Spearmanr(x,y)
Kendalltau(x,y)
if Normal_result==True:
Pearsonr_details(x,y,xLabel,yLabel,formula) #调整的R方
def Adjust_Rsquare(r_square,n,k):
adjust_rSquare=1-((1-r_square)*(n-1)*1.0/(n-k-1))
return adjust_rSquare
'''
n=len(x)
n=10
k=1
r_square=0.615
Adjust_Rsquare(r_square,n,k)
Out[11]: 0.566875
''' #绘图
def Plot(x,y,yLabel,xLabel,Title):
plt.plot(x,y,'ro')
plt.ylabel(yLabel)
plt.xlabel(xLabel)
plt.title(Title)
plt.show() #绘图参数
yLabel='Alcohol'
xLabel='Tobacco'
Title='Sales in Several UK Regions'
Plot(x,y,yLabel,xLabel,Title)
formula='Alcohol ~ Tobacco' #绘制残点图
def Draw_residual(residual_list):
x=[i for i in range(1,len(residual_list)+1)]
y=residual_list
pylab.plot(x,y,'ro')
pylab.title("draw residual to check wrong number") # Pad margins so that markers don't get clipped by the axes,让点不与坐标轴重合
pylab.margins(0.3) #绘制网格
pylab.grid(True) pylab.show() R_mode(x,y,xLabel,yLabel,formula) '''
result.fittedvalues表示预测的y值阵列
result.fittedvalues
Out[42]:
0 6.094983
1 5.823391
2 5.833450
3 5.400915
4 5.531682
5 4.978439
6 5.260090
7 4.767201
8 5.592035
9 6.577813
dtype: float64 #计算残差的偏态
S = stats.skew(result.resid, bias=True)
Out[44]: -0.013678125910039975 K = stats.kurtosis(result.resid, fisher=False,bias=True)
K
Out[47]: 1.5271300905736027
'''
result.params 得到两个参数:x的系数和截距
截距
result.params[0]
x系数
result.params[1]




D.W统计量是用来检验残差分布是否为正态分布的,因为用OLS进行回归估计是假设模型残差服从正态分布的,因此,如果残差不服从正态分布,那么,模型将是有偏的,也就是说模型的解释能力是不强的。
D.W统计量在2左右说明残差是服从正态分布的,若偏离2太远,那么你所构建的模型
的解释能力就要受影响了。 jarque-bera解读
----样本是否符合正太分布



Jarque-Bera的P值接近于0,表明显著性高,数据服从正态分布。
Omnibus解读
Omnibus统计量的P值都接近于0,自变量的作用显著。 Omnibus tests are a kind of statistical test. They test whether the explained variance in a set of data is significantly greater than the unexplained variance, overall. One example is the F-test in the analysis of variance. There can be legitimate significant effects within a model even if the omnibus test is not significant. For instance, in a model with two independent variables, if only one variable exerts a significant effect on the dependent variable and the other does not, then the omnibus test may be non-significant. This fact does not affect the conclusions that may be drawn from the one significant variable. In order to test effects within an omnibus test, researchers often use contrasts. https://en.wikipedia.org/wiki/Omnibus_test
python信用评分卡建模(附代码,博主录制)

一元回归_ols参数解读(推荐AAA)的更多相关文章
- 一元回归1_基础(python代码实现)
python机器学习-乳腺癌细胞挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003&u ...
- 机器学习(2):简单线性回归 | 一元回归 | 损失计算 | MSE
前文再续书接上一回,机器学习的主要目的,是根据特征进行预测.预测到的信息,叫标签. 从特征映射出标签的诸多算法中,有一个简单的算法,叫简单线性回归.本文介绍简单线性回归的概念. (1)什么是简单线性回 ...
- main(int argc, char **argv)参数解读
main(int argc, char **argv)参数解读 编译生成了test.exe ,然后在控制台下相应的目录下输入:test 1 2 3 4 argc就是一个输入了多少个参数,包括te ...
- Python_sklearn机器学习库学习笔记(一)_一元回归
一.引入相关库 %matplotlib inline import matplotlib.pyplot as plt from matplotlib.font_manager import FontP ...
- sklearn_随机森林random forest原理_乳腺癌分类器建模(推荐AAA)
sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003& ...
- 支持向量机SVM原理_python sklearn建模乳腺癌细胞分类器(推荐AAA)
项目合作联系QQ:231469242 sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?cours ...
- 因子分析factor analysis_spss运用_python建模(推荐AAA)
sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003& ...
- 决策树decision tree原理介绍_python sklearn建模_乳腺癌细胞分类器(推荐AAA)
sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频) https://study.163.com/course/introduction.htm?courseId=1005269003& ...
- Java8 JVM参数解读
附录:https://www.liangzl.com/get-article-detail-134315.html 摘要: 我们知道java虚拟机启动时会带有很多的启动参数,Java命令本身就是一个多 ...
随机推荐
- JS中自定义事件的使用与触发
1. 事件的创建 JS中,最简单的创建事件方法,是使用Event构造器: var myEvent = new Event('event_name'); 但是为了能够传递数据,就需要使用 CustomE ...
- mysql基础知识大全
前言:本文主要为mysql基础知识的大总结,mysql的基础知识很多,这里作简单概括性的介绍,具体的细节还是需要自行搜索.当然本文还有很多遗漏的地方,后续会慢慢补充完善. 数据库和数据库软件 数据库是 ...
- java不用任何已有方法完全自写的去重法
package aa; class InsertSort{ private long[] a; private int nElems; //构造方法 public InsertSort(int max ...
- 作业要求20181113-4 Beta阶段第1周/共2周 Scrum立会报告+燃尽图 03
作业要求:https://edu.cnblogs.com/campus/nenu/2018fall/homework/2385 版本控制:[https://git.coding.net/lglr201 ...
- 王者荣耀交流协会 - 第7次Scrum会议(第二周)
1.例会照片 照片由王超(本人)拍摄,组内成员刘耀泽,高远博,王磊,王玉玲,王超,任思佳,袁玥全部到齐. 2.时间跨度: 2017年10月26日 17:05 — 17:47 ,总计42分钟. 3.地 ...
- CS小分队第一阶段冲刺站立会议(5月12日)
昨日成果:2048整体界面效果经组员韩雪冬美化之后档次提升了好几个,我为其添加了保存并显示最高分数的功能. 遇到困难:当我想把access数据库由accdb改成mdb时,发生未知错误 ,导致数据库无法 ...
- shader language学习(1)——shader language简介背景
shader language,称为着色语言,shade在英语是阴影.颜色深浅的意思.shader language基于物体本身属性和光照条件,计算美格橡塑的颜色值. 实际上这种解释具有明显的时代局限 ...
- apache 部署web.py
一.安装Mod_wsgi 1.先yum -y install httpd-devel,否则会提示没有apxs 2.如果在make时 wsgi报错apxs:Error: Command failed w ...
- php面试必知必会常见问题
1 说出常用的10个数组方法 我觉得数组比较最能体现PHP基础语法的一个数据结构了,下面给大家列一下常用的10个关于操作数组的函数 in_array(判断数组中是否有某个元素) implode(将数组 ...
- 苹果手机连wifi跳不出来登录网页解决办法
1.点开要连接wifi后面的小叹号“!”,打开“自动登录” 2.打开设置,关闭SAFARI的“阻止弹窗” 3.重新连接wifi