#---------------------------------------------------------------------#
# R in Action (2nd ed): Chapter 7 #
# Basic statistics #
# requires packages npmc, ggm, gmodels, vcd, Hmisc, #
# pastecs, psych, doBy to be installed #
# install.packages(c("ggm", "gmodels", "vcd", "Hmisc", #
# "pastecs", "psych", "doBy")) #
#---------------------------------------------------------------------# mt <- mtcars[c("mpg", "hp", "wt", "am")]
head(mt) # Listing 7.1 - Descriptive stats via summary
mt <- mtcars[c("mpg", "hp", "wt", "am")]
summary(mt) # Listing 7.2 - descriptive stats via sapply
mystats <- function(x, na.omit=FALSE){
if (na.omit)
x <- x[!is.na(x)]
m <- mean(x)
n <- length(x)
s <- sd(x)
skew <- sum((x-m)^3/s^3)/n
kurt <- sum((x-m)^4/s^4)/n - 3
return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))
} myvars <- c("mpg", "hp", "wt")
sapply(mtcars[myvars], mystats) # Listing 7.3 - Descriptive stats via describe (Hmisc)
library(Hmisc)
myvars <- c("mpg", "hp", "wt")
describe(mtcars[myvars]) # Listing 7.,4 - Descriptive stats via stat.desc (pastecs)
library(pastecs)
myvars <- c("mpg", "hp", "wt")
stat.desc(mtcars[myvars]) # Listing 7.5 - Descriptive stats via describe (psych)
library(psych)
myvars <- c("mpg", "hp", "wt")
describe(mtcars[myvars]) # Listing 7.6 - Descriptive stats by group with aggregate
myvars <- c("mpg", "hp", "wt")
aggregate(mtcars[myvars], by=list(am=mtcars$am), mean)
aggregate(mtcars[myvars], by=list(am=mtcars$am), sd) # Listing 7.7 - Descriptive stats by group via by
dstats <- function(x)sapply(x, mystats)
myvars <- c("mpg", "hp", "wt")
by(mtcars[myvars], mtcars$am, dstats) # Listing 7.8 - Descriptive stats by group via summaryBy
library(doBy)
summaryBy(mpg+hp+wt~am, data=mtcars, FUN=mystats) # Listing 7.9 - Descriptive stats by group via describe.by (psych)
library(psych)
myvars <- c("mpg", "hp", "wt")
describeBy(mtcars[myvars], list(am=mtcars$am)) # summary statistics by group via the reshape package
library(reshape)
dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"),
id.vars=c("am", "cyl"))
cast(dfm, am + cyl + variable ~ ., dstats) # frequency tables
library(vcd)
head(Arthritis) # one way table
mytable <- with(Arthritis, table(Improved))
mytable # frequencies
prop.table(mytable) # proportions
prop.table(mytable)*100 # percentages # two way table
mytable <- xtabs(~ Treatment+Improved, data=Arthritis)
mytable # frequencies
margin.table(mytable,1) #row sums
margin.table(mytable, 2) # column sums
prop.table(mytable) # cell proportions
prop.table(mytable, 1) # row proportions
prop.table(mytable, 2) # column proportions
addmargins(mytable) # add row and column sums to table # more complex tables
addmargins(prop.table(mytable))
addmargins(prop.table(mytable, 1), 2)
addmargins(prop.table(mytable, 2), 1) # Listing 7.10 - Two way table using CrossTable
library(gmodels)
CrossTable(Arthritis$Treatment, Arthritis$Improved) # Listing 7.11 - Three way table
mytable <- xtabs(~ Treatment+Sex+Improved, data=Arthritis)
mytable
ftable(mytable)
margin.table(mytable, 1)
margin.table(mytable, 2)
margin.table(mytable, 2)
margin.table(mytable, c(1,3))
ftable(prop.table(mytable, c(1,2)))
ftable(addmargins(prop.table(mytable, c(1, 2)), 3)) # Listing 7.12 - Chi-square test of independence
library(vcd)
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
chisq.test(mytable)
mytable <- xtabs(~Improved+Sex, data=Arthritis)
chisq.test(mytable) # Fisher's exact test
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
fisher.test(mytable) # Chochran-Mantel-Haenszel test
mytable <- xtabs(~Treatment+Improved+Sex, data=Arthritis)
mantelhaen.test(mytable) # Listing 7.13 - Measures of association for a two-way table
library(vcd)
mytable <- xtabs(~Treatment+Improved, data=Arthritis)
assocstats(mytable) # Listing 7.14 Covariances and correlations
states<- state.x77[,1:6]
cov(states)
cor(states)
cor(states, method="spearman") x <- states[,c("Population", "Income", "Illiteracy", "HS Grad")]
y <- states[,c("Life Exp", "Murder")]
cor(x,y) # partial correlations
library(ggm)
# partial correlation of population and murder rate, controlling
# for income, illiteracy rate, and HS graduation rate
pcor(c(1,5,2,3,6), cov(states)) # Listing 7.15 - Testing a correlation coefficient for significance
cor.test(states[,3], states[,5]) # Listing 7.16 - Correlation matrix and tests of significance via corr.test
library(psych)
corr.test(states, use="complete") # t test
library(MASS)
t.test(Prob ~ So, data=UScrime) # dependent t test
sapply(UScrime[c("U1","U2")], function(x)(c(mean=mean(x),sd=sd(x))))
with(UScrime, t.test(U1, U2, paired=TRUE)) # Wilcoxon two group comparison
with(UScrime, by(Prob, So, median))
wilcox.test(Prob ~ So, data=UScrime) sapply(UScrime[c("U1", "U2")], median)
with(UScrime, wilcox.test(U1, U2, paired=TRUE)) # Kruskal Wallis test
states <- data.frame(state.region, state.x77)
kruskal.test(Illiteracy ~ state.region, data=states) # Listing 7.17 - Nonparametric multiple comparisons
source("http://www.statmethods.net/RiA/wmc.txt")
states <- data.frame(state.region, state.x77)
wmc(Illiteracy ~ state.region, data=states, method="holm")

吴裕雄--天生自然 R语言开发学习:基本统计分析的更多相关文章

  1. 吴裕雄--天生自然 R语言开发学习:R语言的安装与配置

    下载R语言和开发工具RStudio安装包 先安装R

  2. 吴裕雄--天生自然 R语言开发学习:数据集和数据结构

    数据集的概念 数据集通常是由数据构成的一个矩形数组,行表示观测,列表示变量.表2-1提供了一个假想的病例数据集. 不同的行业对于数据集的行和列叫法不同.统计学家称它们为观测(observation)和 ...

  3. 吴裕雄--天生自然 R语言开发学习:导入数据

    2.3.6 导入 SPSS 数据 IBM SPSS数据集可以通过foreign包中的函数read.spss()导入到R中,也可以使用Hmisc 包中的spss.get()函数.函数spss.get() ...

  4. 吴裕雄--天生自然 R语言开发学习:使用键盘、带分隔符的文本文件输入数据

    R可从键盘.文本文件.Microsoft Excel和Access.流行的统计软件.特殊格 式的文件.多种关系型数据库管理系统.专业数据库.网站和在线服务中导入数据. 使用键盘了.有两种常见的方式:用 ...

  5. 吴裕雄--天生自然 R语言开发学习:R语言的简单介绍和使用

    假设我们正在研究生理发育问 题,并收集了10名婴儿在出生后一年内的月龄和体重数据(见表1-).我们感兴趣的是体重的分 布及体重和月龄的关系. 可以使用函数c()以向量的形式输入月龄和体重数据,此函 数 ...

  6. 吴裕雄--天生自然 R语言开发学习:基础知识

    1.基础数据结构 1.1 向量 # 创建向量a a <- c(1,2,3) print(a) 1.2 矩阵 #创建矩阵 mymat <- matrix(c(1:10), nrow=2, n ...

  7. 吴裕雄--天生自然 R语言开发学习:图形初阶(续二)

    # ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...

  8. 吴裕雄--天生自然 R语言开发学习:图形初阶(续一)

    # ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...

  9. 吴裕雄--天生自然 R语言开发学习:图形初阶

    # ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...

  10. 吴裕雄--天生自然 R语言开发学习:基本图形(续二)

    #---------------------------------------------------------------# # R in Action (2nd ed): Chapter 6 ...

随机推荐

  1. dotnet core 链接mongodb

    导入命名空间 using MongoDB.Bson; using MongoDB.Driver; 测试示例: var client = new MongoClient("mongodb:// ...

  2. java.sql.BatchUpdateException: ORA-01691: Lob 段 CSASSSMBI.SYS_LOB0000076987C00003$$ 无法通过 128 (在表空间 HRDL_CSASS 中) 扩展

    问题: 在tomcat日志信息中出现:java.sql.BatchUpdateException: ORA-01691: Lob 段 CSASSSMBI.SYS_LOB0000076987C00003 ...

  3. Ubuntu---Git

    本篇文章简单总结了常用 Git 的使用 前言 设置用户信息 1, Git 是分布式的 SSH 代码管理工具,远程的代码管理是基于 SSH 的,所以要使用远程的 Git 则需要 SSH 的配置. ste ...

  4. console.log和alert的区别

    alert是同步的,如果不关闭弹出框,js代码就不会继续执行下去,这时候浏览器啥都干不了. console.log不会打断js的执行. 当要输出几十几百条信息的时候还是得用console.log,而且 ...

  5. 887C. Slava and tanks#轰炸弹坦克游戏(分析)

    题目出处:http://codeforces.com/problemset/problem/877/C 题目大意:按照游戏规则,求最小炸弹使用次数 #include<iostream> u ...

  6. Python实现自动处理表格,让你拥有更多的自由时间!

    相信有不少朋友日常工作会用到 Excel 处理各式表格文件,更有甚者可能要花大把时间来做繁琐耗时的表格整理工作.最近有朋友问可否编程来减轻表格整理工作量,今儿我们就通过实例来实现 Python 对表格 ...

  7. 如何用java实现图片与base64转换

    如果你是一个软件开发,不论前端后端工程师,图片的处理你是肯定要会的,关于图片的Base64编码,你可能有点陌生,但是这是一个软件工程师应该要掌握的知识点,现在很多网友把图片与base64转换都做成了小 ...

  8. ubantu中的mysql命令

    查看mysql的安装目录:which mysql 进入mysql的运行状态:mysql -uroot -p 56..a_

  9. kubernets基于容器日志的报警和服务自动恢复

    demo地址 https://github.com/cclient/kubernetes-filebeat-collector 高可用还谈不上,是对kubernete一种服务异常重启恢复的补充方案 之 ...

  10. Python语言学习前提:python安装和pycharm安装

    一.Windows系统python安装 1.python官网:https://www.python.org/downloads/ 2.官网首页:点击Downloads > Windows > ...