吴裕雄--天生自然 R语言开发学习:回归















#------------------------------------------------------------#
# R in Action (2nd ed): Chapter 8 #
# Regression #
# requires packages car, gvlma, MASS, leaps to be installed #
# install.packages(c("car", "gvlma", "MASS", "leaps")) #
#------------------------------------------------------------# par(ask=TRUE)
opar <- par(no.readonly=TRUE) # Listing 8.1 - Simple linear regression
fit <- lm(weight ~ height, data=women)
summary(fit)
women$weight
fitted(fit)
residuals(fit)
plot(women$height,women$weight,
main="Women Age 30-39",
xlab="Height (in inches)",
ylab="Weight (in pounds)")
# add the line of best fit
abline(fit) # Listing 8.2 - Polynomial regression
fit2 <- lm(weight ~ height + I(height^2), data=women)
summary(fit2)
plot(women$height,women$weight,
main="Women Age 30-39",
xlab="Height (in inches)",
ylab="Weight (in lbs)")
lines(women$height,fitted(fit2)) # Enhanced scatterplot for women data
library(car)
library(car)
scatterplot(weight ~ height, data=women,
spread=FALSE, smoother.args=list(lty=2), pch=19,
main="Women Age 30-39",
xlab="Height (inches)",
ylab="Weight (lbs.)") # Listing 8.3 - Examining bivariate relationships
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
cor(states)
library(car)
scatterplotMatrix(states, spread=FALSE, smoother.args=list(lty=2),
main="Scatter Plot Matrix") # Listing 8.4 - Multiple linear regression
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data=states)
summary(fit) # Listing 8.5 - Mutiple linear regression with a significant interaction term
fit <- lm(mpg ~ hp + wt + hp:wt, data=mtcars)
summary(fit) library(effects)
plot(effect("hp:wt", fit,, list(wt=c(2.2, 3.2, 4.2))), multiline=TRUE) # simple regression diagnostics
fit <- lm(weight ~ height, data=women)
par(mfrow=c(2,2))
plot(fit)
newfit <- lm(weight ~ height + I(height^2), data=women)
par(opar)
par(mfrow=c(2,2))
plot(newfit)
par(opar) # basic regression diagnostics for states data
opar <- par(no.readonly=TRUE)
fit <- lm(weight ~ height, data=women)
par(mfrow=c(2,2))
plot(fit)
par(opar) fit2 <- lm(weight ~ height + I(height^2), data=women)
opar <- par(no.readonly=TRUE)
par(mfrow=c(2,2))
plot(fit2)
par(opar) # Assessing normality
library(car)
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data=states)
qqPlot(fit, labels=row.names(states), id.method="identify",
simulate=TRUE, main="Q-Q Plot") # Listing 8.6 - Function for plotting studentized residuals
residplot <- function(fit, nbreaks=10) {
z <- rstudent(fit)
hist(z, breaks=nbreaks, freq=FALSE,
xlab="Studentized Residual",
main="Distribution of Errors")
rug(jitter(z), col="brown")
curve(dnorm(x, mean=mean(z), sd=sd(z)),
add=TRUE, col="blue", lwd=2)
lines(density(z)$x, density(z)$y,
col="red", lwd=2, lty=2)
legend("topright",
legend = c( "Normal Curve", "Kernel Density Curve"),
lty=1:2, col=c("blue","red"), cex=.7)
} residplot(fit) # Assessing linearity
library(car)
crPlots(fit) # Listing 8.7 - Assessing homoscedasticity
library(car)
ncvTest(fit)
spreadLevelPlot(fit) # Listing 8.8 - Global test of linear model assumptions
library(gvlma)
gvmodel <- gvlma(fit)
summary(gvmodel) # Listing 8.9 - Evaluating multi-collinearity
library(car)
vif(fit)
sqrt(vif(fit)) > 2 # problem? # Assessing outliers
library(car)
outlierTest(fit) # Identifying high leverage points
hat.plot <- function(fit) {
p <- length(coefficients(fit))
n <- length(fitted(fit))
plot(hatvalues(fit), main="Index Plot of Hat Values")
abline(h=c(2,3)*p/n, col="red", lty=2)
identify(1:n, hatvalues(fit), names(hatvalues(fit)))
}
hat.plot(fit) # Identifying influential observations # Cooks Distance D
# identify D values > 4/(n-k-1)
cutoff <- 4/(nrow(states)-length(fit$coefficients)-2)
plot(fit, which=4, cook.levels=cutoff)
abline(h=cutoff, lty=2, col="red") # Added variable plots
# add id.method="identify" to interactively identify points
library(car)
avPlots(fit, ask=FALSE, id.method="identify") # Influence Plot
library(car)
influencePlot(fit, id.method="identify", main="Influence Plot",
sub="Circle size is proportial to Cook's Distance" ) # Listing 8.10 - Box-Cox Transformation to normality
library(car)
summary(powerTransform(states$Murder)) # Box-Tidwell Transformations to linearity
library(car)
boxTidwell(Murder~Population+Illiteracy,data=states) # Listing 8.11 - Comparing nested models using the anova function
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit1 <- lm(Murder ~ Population + Illiteracy + Income + Frost,
data=states)
fit2 <- lm(Murder ~ Population + Illiteracy, data=states)
anova(fit2, fit1) # Listing 8.12 - Comparing models with the AIC
fit1 <- lm(Murder ~ Population + Illiteracy + Income + Frost,
data=states)
fit2 <- lm(Murder ~ Population + Illiteracy, data=states)
AIC(fit1,fit2) # Listing 8.13 - Backward stepwise selection
library(MASS)
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost,
data=states)
stepAIC(fit, direction="backward") # Listing 8.14 - All subsets regression
library(leaps)
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
leaps <-regsubsets(Murder ~ Population + Illiteracy + Income +
Frost, data=states, nbest=4)
plot(leaps, scale="adjr2")
library(car)
subsets(leaps, statistic="cp",
main="Cp Plot for All Subsets Regression")
abline(1,1,lty=2,col="red") # Listing 8.15 - Function for k-fold cross-validated R-square
shrinkage <- function(fit,k=10){
require(bootstrap) # define functions
theta.fit <- function(x,y){lsfit(x,y)}
theta.predict <- function(fit,x){cbind(1,x)%*%fit$coef} # matrix of predictors
x <- fit$model[,2:ncol(fit$model)]
# vector of predicted values
y <- fit$model[,1] results <- crossval(x,y,theta.fit,theta.predict,ngroup=k)
r2 <- cor(y, fit$fitted.values)**2 # raw R2
r2cv <- cor(y,results$cv.fit)**2 # cross-validated R2
cat("Original R-square =", r2, "\n")
cat(k, "Fold Cross-Validated R-square =", r2cv, "\n")
cat("Change =", r2-r2cv, "\n")
} # using it
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Income + Illiteracy + Frost, data=states)
shrinkage(fit)
fit2 <- lm(Murder~Population+Illiteracy,data=states)
shrinkage(fit2) # Calculating standardized regression coefficients
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
zstates <- as.data.frame(scale(states))
zfit <- lm(Murder~Population + Income + Illiteracy + Frost, data=zstates)
coef(zfit) # Listing 8.16 rlweights function for clculating relative importance of predictors
relweights <- function(fit,...){
R <- cor(fit$model)
nvar <- ncol(R)
rxx <- R[2:nvar, 2:nvar]
rxy <- R[2:nvar, 1]
svd <- eigen(rxx)
evec <- svd$vectors
ev <- svd$values
delta <- diag(sqrt(ev))
lambda <- evec %*% delta %*% t(evec)
lambdasq <- lambda ^ 2
beta <- solve(lambda) %*% rxy
rsquare <- colSums(beta ^ 2)
rawwgt <- lambdasq %*% beta ^ 2
import <- (rawwgt / rsquare) * 100
import <- as.data.frame(import)
row.names(import) <- names(fit$model[2:nvar])
names(import) <- "Weights"
import <- import[order(import),1, drop=FALSE]
dotchart(import$Weights, labels=row.names(import),
xlab="% of R-Square", pch=19,
main="Relative Importance of Predictor Variables",
sub=paste("Total R-Square=", round(rsquare, digits=3)),
...)
return(import)
} # Listing 8.17 - Applying the relweights function
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data=states)
relweights(fit, col="blue")
吴裕雄--天生自然 R语言开发学习:回归的更多相关文章
- 吴裕雄--天生自然 R语言开发学习:R语言的安装与配置
下载R语言和开发工具RStudio安装包 先安装R
- 吴裕雄--天生自然 R语言开发学习:数据集和数据结构
数据集的概念 数据集通常是由数据构成的一个矩形数组,行表示观测,列表示变量.表2-1提供了一个假想的病例数据集. 不同的行业对于数据集的行和列叫法不同.统计学家称它们为观测(observation)和 ...
- 吴裕雄--天生自然 R语言开发学习:导入数据
2.3.6 导入 SPSS 数据 IBM SPSS数据集可以通过foreign包中的函数read.spss()导入到R中,也可以使用Hmisc 包中的spss.get()函数.函数spss.get() ...
- 吴裕雄--天生自然 R语言开发学习:使用键盘、带分隔符的文本文件输入数据
R可从键盘.文本文件.Microsoft Excel和Access.流行的统计软件.特殊格 式的文件.多种关系型数据库管理系统.专业数据库.网站和在线服务中导入数据. 使用键盘了.有两种常见的方式:用 ...
- 吴裕雄--天生自然 R语言开发学习:R语言的简单介绍和使用
假设我们正在研究生理发育问 题,并收集了10名婴儿在出生后一年内的月龄和体重数据(见表1-).我们感兴趣的是体重的分 布及体重和月龄的关系. 可以使用函数c()以向量的形式输入月龄和体重数据,此函 数 ...
- 吴裕雄--天生自然 R语言开发学习:基础知识
1.基础数据结构 1.1 向量 # 创建向量a a <- c(1,2,3) print(a) 1.2 矩阵 #创建矩阵 mymat <- matrix(c(1:10), nrow=2, n ...
- 吴裕雄--天生自然 R语言开发学习:图形初阶(续二)
# ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...
- 吴裕雄--天生自然 R语言开发学习:图形初阶(续一)
# ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...
- 吴裕雄--天生自然 R语言开发学习:图形初阶
# ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...
- 吴裕雄--天生自然 R语言开发学习:基本图形(续二)
#---------------------------------------------------------------# # R in Action (2nd ed): Chapter 6 ...
随机推荐
- spring学习之依赖注入DI与控制反转IOC
一 Ioc基础 1.什么是Ioc? Ioc(Inversion of Control)既控制反转,Ioc不是一种技术,而是一种思想,在Java开发中意味着将设计好的对象交给容器来进行控制,并不是像传统 ...
- vscode Cannot edit in read-only editor.
原因 使用了runcode插件 这个错误一般出现在使用命令行输入的时候出现. 但是output页面是只读的,只能输出,不能用来输入. 解决 解放方法是,将run code设置为在Teminal中运行: ...
- 在mybatis框架中,延迟加载与连表查询的差异
1.引子 mybatis的延迟加载,主要应用于一个实体类中有复杂数据类型的属性,包括一对一和一对多的关系(在xml中用collection.association标签标识).这个种属性往往还对应着另一 ...
- Linux系统如何记录时间
1.内核在开机启动的时候会读取RTC硬件获取一个时间作为初始基准时间,这个基准时间对应一个jiiffies值(这个基准时间换算成jiffies值的方法是:用这个时间减去1970-01-01 00:0 ...
- Velocity脚本入门教程
下面资料整理自网络 一.Velocity介绍 Velocity是Apache公司的开源产品,是一套基于Java语言的模板引擎,可以很灵活的将后台数据对象与模板文件结合在一起,说的直白一点,就是允许任何 ...
- Python笔记_第三篇_面向对象_6.继承(单继承和多继承)
1. 概念解释: 继承:有两个类:A类和B类.那么A类就拥有了B类中的属性和方法. * 例如:Object:是所有类的父亲,还可以成为基类或者超类(super()) * 继承者为子类,被继承者成为父类 ...
- 树莓派docker搭建
树莓派上 Docker 的安装和使用 Docker 是一个开源的应用容器引擎,可以让开发者打包他们的应用以及依赖包到一个轻量级.可移植的容器中,然后发布到任何流行的 Linux 机器上,也可以实现虚拟 ...
- 题解 P3061 【[USACO12DEC]疯狂的栅栏Crazy Fences】
这道题的思想是首先我们找到所有的栅栏围成的空间,然后求每一只奶牛在哪几个栅栏空间之中,最后比较他们在的所有栅栏空间-----如果奶牛a和b同时在空间c,d和e内,那么他们一定在同一群中. 测试围栏的方 ...
- [SDOI2010]魔法猪学院(k短路)
A*板子题.我的code只能在luogu上过,bzoj上RE/MLE不清楚为啥. 蒟蒻到AFO前2个月不到的时间才学A*,A*其实就是bfs过程中进行剪支删除没必要的搜索.然后其实上这样剪支即可:如果 ...
- IE浏览器弹出的文件下载工具条实现自动另存为
IE浏览器中如果有一个文件可以下载,用鼠标点击一下文件的超链接,会在IE浏览器下方出现一个工具条,工具条上主要有“打开” “保存”两个按钮. 如果点击“保存”按钮,会把附件保存到系统的“下载”或“Do ...