#-----------------------------------------------------------------------------#
# R in Action (2nd ed): Chapter 17 #
# Classification #
# requires packaged rpart, party, randomForest, kernlab, rattle #
# install.packages(c("rpart", "party", "randomForest", "e1071", "rpart.plot") #
# install.packages(rattle, dependencies = c("Depends", "Suggests")) #
#-----------------------------------------------------------------------------# par(ask=TRUE) # Listing 17.1 - Prepare the breast cancer data
loc <- "http://archive.ics.uci.edu/ml/machine-learning-databases/"
ds <- "breast-cancer-wisconsin/breast-cancer-wisconsin.data"
url <- paste(loc, ds, sep="") breast <- read.table(url, sep=",", header=FALSE, na.strings="?")
names(breast) <- c("ID", "clumpThickness", "sizeUniformity",
"shapeUniformity", "maginalAdhesion",
"singleEpithelialCellSize", "bareNuclei",
"blandChromatin", "normalNucleoli", "mitosis", "class") df <- breast[-1]
df$class <- factor(df$class, levels=c(2,4),
labels=c("benign", "malignant")) set.seed(1234)
train <- sample(nrow(df), 0.7*nrow(df))
df.train <- df[train,]
df.validate <- df[-train,]
table(df.train$class)
table(df.validate$class) # Listing 17.2 - Logistic regression with glm()
fit.logit <- glm(class~., data=df.train, family=binomial())
summary(fit.logit)
prob <- predict(fit.logit, df.validate, type="response")
logit.pred <- factor(prob > .5, levels=c(FALSE, TRUE),
labels=c("benign", "malignant"))
logit.perf <- table(df.validate$class, logit.pred,
dnn=c("Actual", "Predicted"))
logit.perf # Listing 17.3 - Creating a classical decision tree with rpart()
library(rpart)
set.seed(1234)
dtree <- rpart(class ~ ., data=df.train, method="class",
parms=list(split="information"))
dtree$cptable
plotcp(dtree) dtree.pruned <- prune(dtree, cp=.0125) library(rpart.plot)
prp(dtree.pruned, type = 2, extra = 104,
fallen.leaves = TRUE, main="Decision Tree") dtree.pred <- predict(dtree.pruned, df.validate, type="class")
dtree.perf <- table(df.validate$class, dtree.pred,
dnn=c("Actual", "Predicted"))
dtree.perf # Listing 17.4 - Creating a conditional inference tree with ctree()
library(party)
fit.ctree <- ctree(class~., data=df.train)
plot(fit.ctree, main="Conditional Inference Tree") ctree.pred <- predict(fit.ctree, df.validate, type="response")
ctree.perf <- table(df.validate$class, ctree.pred,
dnn=c("Actual", "Predicted"))
ctree.perf # Listing 17.5 - Random forest
library(randomForest)
set.seed(1234)
fit.forest <- randomForest(class~., data=df.train,
na.action=na.roughfix,
importance=TRUE)
fit.forest
importance(fit.forest, type=2)
forest.pred <- predict(fit.forest, df.validate)
forest.perf <- table(df.validate$class, forest.pred,
dnn=c("Actual", "Predicted"))
forest.perf # Listing 17.6 - A support vector machine
library(e1071)
set.seed(1234)
fit.svm <- svm(class~., data=df.train)
fit.svm
svm.pred <- predict(fit.svm, na.omit(df.validate))
svm.perf <- table(na.omit(df.validate)$class,
svm.pred, dnn=c("Actual", "Predicted"))
svm.perf # Listing 17.7 Tuning an RBF support vector machine (this can take a while)
set.seed(1234)
tuned <- tune.svm(class~., data=df.train,
gamma=10^(-6:1),
cost=10^(-10:10))
tuned
fit.svm <- svm(class~., data=df.train, gamma=.01, cost=1)
svm.pred <- predict(fit.svm, na.omit(df.validate))
svm.perf <- table(na.omit(df.validate)$class,
svm.pred, dnn=c("Actual", "Predicted"))
svm.perf # Listing 17.8 Function for assessing binary classification accuracy
performance <- function(table, n=2){
if(!all(dim(table) == c(2,2)))
stop("Must be a 2 x 2 table")
tn = table[1,1]
fp = table[1,2]
fn = table[2,1]
tp = table[2,2]
sensitivity = tp/(tp+fn)
specificity = tn/(tn+fp)
ppp = tp/(tp+fp)
npp = tn/(tn+fn)
hitrate = (tp+tn)/(tp+tn+fp+fn)
result <- paste("Sensitivity = ", round(sensitivity, n) ,
"\nSpecificity = ", round(specificity, n),
"\nPositive Predictive Value = ", round(ppp, n),
"\nNegative Predictive Value = ", round(npp, n),
"\nAccuracy = ", round(hitrate, n), "\n", sep="")
cat(result)
} # Listing 17.9 - Performance of breast cancer data classifiers
performance(dtree.perf)
performance(ctree.perf)
performance(forest.perf)
performance(svm.perf) # Using Rattle Package for data mining loc <- "http://archive.ics.uci.edu/ml/machine-learning-databases/"
ds <- "pima-indians-diabetes/pima-indians-diabetes.data"
url <- paste(loc, ds, sep="")
diabetes <- read.table(url, sep=",", header=FALSE)
names(diabetes) <- c("npregant", "plasma", "bp", "triceps",
"insulin", "bmi", "pedigree", "age", "class")
diabetes$class <- factor(diabetes$class, levels=c(0,1),
labels=c("normal", "diabetic"))
library(rattle)
rattle()

吴裕雄--天生自然 R语言开发学习:分类(续一)的更多相关文章

  1. 吴裕雄--天生自然 R语言开发学习:R语言的安装与配置

    下载R语言和开发工具RStudio安装包 先安装R

  2. 吴裕雄--天生自然 R语言开发学习:数据集和数据结构

    数据集的概念 数据集通常是由数据构成的一个矩形数组,行表示观测,列表示变量.表2-1提供了一个假想的病例数据集. 不同的行业对于数据集的行和列叫法不同.统计学家称它们为观测(observation)和 ...

  3. 吴裕雄--天生自然 R语言开发学习:导入数据

    2.3.6 导入 SPSS 数据 IBM SPSS数据集可以通过foreign包中的函数read.spss()导入到R中,也可以使用Hmisc 包中的spss.get()函数.函数spss.get() ...

  4. 吴裕雄--天生自然 R语言开发学习:使用键盘、带分隔符的文本文件输入数据

    R可从键盘.文本文件.Microsoft Excel和Access.流行的统计软件.特殊格 式的文件.多种关系型数据库管理系统.专业数据库.网站和在线服务中导入数据. 使用键盘了.有两种常见的方式:用 ...

  5. 吴裕雄--天生自然 R语言开发学习:R语言的简单介绍和使用

    假设我们正在研究生理发育问 题,并收集了10名婴儿在出生后一年内的月龄和体重数据(见表1-).我们感兴趣的是体重的分 布及体重和月龄的关系. 可以使用函数c()以向量的形式输入月龄和体重数据,此函 数 ...

  6. 吴裕雄--天生自然 R语言开发学习:基础知识

    1.基础数据结构 1.1 向量 # 创建向量a a <- c(1,2,3) print(a) 1.2 矩阵 #创建矩阵 mymat <- matrix(c(1:10), nrow=2, n ...

  7. 吴裕雄--天生自然 R语言开发学习:图形初阶(续二)

    # ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...

  8. 吴裕雄--天生自然 R语言开发学习:图形初阶(续一)

    # ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...

  9. 吴裕雄--天生自然 R语言开发学习:图形初阶

    # ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...

  10. 吴裕雄--天生自然 R语言开发学习:基本图形(续二)

    #---------------------------------------------------------------# # R in Action (2nd ed): Chapter 6 ...

随机推荐

  1. @Component, @Repository, @Service,@Controller的区别

    @Component, @Service, @Controller, @Repository是spring注解,注解后可以被spring框架所扫描并注入到spring容器来进行管理 @Componen ...

  2. java 利用管道实现线程间通信

    package com.lb; import java.io.IOException;import java.io.PipedInputStream;import java.io.PipedOutpu ...

  3. Sublime Text3 作Markdown编辑器 配置

    准备 Sublime 配置 1 Package Control 2 安装插件 测试 其他 1. 准备 Windows操作系统 Sublime Text3 编辑器 Web浏览器 2. Sublime 配 ...

  4. 卷积神经网络---padding、 pool、 Activation layer

    #coding:utf-8 import tensorflow as tf tf.reset_default_graph() image = tf.random_normal([1, 112, 96, ...

  5. LFW Face Database下载

    http://vis-www.cs.umass.edu/lfw/ Download the database: All images as gzipped tar file (173MB, md5su ...

  6. 迅为iTOP-开发板-驱动-can和rfid配置

    在迅为开发板中,在 4412,4418 以及 6818 中,有的开发板默认配置 RFID,有的默认配 置 CAN 驱动(IMX6 默认都配置). 本文档介绍如何配置 CAN 和 RFID 的驱动. 截 ...

  7. ansible-playbook编写服务器初始化脚本

    1.原理:通过limit的参数,限制新定义的服务器.即可给新买的服务器初始化优化.(如下图所示) 首先我们编写一个总入口的palybook脚本: init.yml --- - hosts: all u ...

  8. shell_跳板机推送公钥

    #!/bin/bash#push publickey to aap-servers#将局域网内可以ping通的主机ip保存到一个文件> ip_up.txtfor i in {2..10}do { ...

  9. 不同SQL数据库之间表数据的实时同步-发布与订阅

    https://blog.csdn.net/rand_muse/article/details/81326879 上述文章中,如果是实时同步,选择  事务发布即可 在快照代理 那里,不需要勾选  运行 ...

  10. Fiddler发送get post测试 笔记

    0 环境 系统环境:win7 1 操作 1 post 类似 2 get