R:ggplot2数据可视化—

,分为三个部分，此篇为Part1，推荐学习一些基础知识后阅读~

Part 1: Introduction to ggplot2, 覆盖构建简单图表并进行修饰的基础知识
Part 2: Customizing the Look and Feel, 更高级的自定义图形
Part 3: Top 50 Ggplot2 Visualizations - The Master List, 应用Part1、2部分知识创建进阶图形

1 理解ggplot语法

（1）对数据框类型数据进行可视化

（2）可以叠加层来不断丰富图形信息

让我们基于midwest数据集来初始化一个基本的图形：

# Setup

options(scipen=999)  # 关闭科学计数表示法 1e+06

library(ggplot2)

data("midwest", package = "ggplot2")  # 加载数据

# midwest <- read.csv("http://goo.gl/G1K41K") # alt source 

# 初始化 Ggplot

ggplot(midwest, aes(x=area, y=poptotal))  # area 和 poptotal 是'midwest'中的列

aes()函数用来专门指定x和y轴，源数据框的任何信息都需要在这个函数中特意指定。

2 线性模型拟合散点图

library(ggplot2)

g <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point() + geom_smooth(method="lm")  # set se=FALSE to turnoff confidence bands

plot(g)

?geom_smooth 查询该函数帮助文档

3 调整x y轴范围

#Method 1: By deleting the points outside the range

library(ggplot2)

g <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point() + geom_smooth(method="lm")  # set se=FALSE to turnoff confidence bands

# Delete the points outside the limits

g + xlim(c(0, 0.1)) + ylim(c(0, 1000000))   # deletes points

#Method 2: Zooming In

library(ggplot2)

g <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point() + geom_smooth(method="lm")  # set se=FALSE to turnoff confidence bands

# Zoom in without deleting the points outside the limits.

# As a result, the line of best fit is the same as the original plot.

g1 <- g + coord_cartesian(xlim=c(0,0.1), ylim=c(0, 1000000))  # zooms in

plot(g1)

4 改变标题

# Full Plot call

library(ggplot2)

ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point() +

  geom_smooth(method="lm") +

  coord_cartesian(xlim=c(0,0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

# or

g1 + ggtitle("Area Vs Population", subtitle="From midwest dataset") + xlab("Area") + ylab("Population")

5 改变点的颜色和大小

library(ggplot2)

ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point(col="steelblue", size=3) +   # Set static color and size for points

  geom_smooth(method="lm", col="firebrick") +  # change the color of line

  coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

改变颜色以反应另一列变量的类型

library(ggplot2)

gg <- ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.

  geom_smooth(method="lm", col="firebrick", size=2) +

  coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

plot(gg)

color, size, shape, stroke (thickness of boundary) and fill (fill color) 均可指定

也可以改变调色板

gg + scale_colour_brewer(palette = "Set1")  # change color palette

更多调色板可以在 RColorBrewer 包中找到

library(RColorBrewer)

head(brewer.pal.info, 10)  # show 10 palettes

#>          maxcolors category colorblind

#> BrBG            11      div       TRUE

#> PiYG            11      div       TRUE

#> PRGn            11      div       TRUE

#> PuOr            11      div       TRUE

#> RdBu            11      div       TRUE

#> RdGy            11      div      FALSE

#> RdYlBu          11      div       TRUE

#> RdYlGn          11      div      FALSE

#> Spectral        11      div      FALSE

#> Accent           8     qual      FALSE

6 改变x轴文本和刻度位置

breaks and labels

Step 1: Set the breaks

scale_x_continuous —— X 轴变量是连续变量

scale_x_date ——日期变量

library(ggplot2)

# Base plot

gg <- ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.

  geom_smooth(method="lm", col="firebrick", size=2) +

  coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

# Change breaks

gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01))

Step 2: Change the labels

改变 labels at the axis ticks. labels 需要和 breaks向量长度保持一致

library(ggplots)

# Base Plot

gg <- ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.

  geom_smooth(method="lm", col="firebrick", size=2) +

  coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

# Change breaks + label

gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = letters[1:11])

# Reverse X Axis Scale

gg + scale_x_reverse()

为轴标签自定义文本

Method 1: Using sprintf(). (Have formatted it as % in below example)

Method 2: Using a custom user defined function. (Formatted 1000’s to 1K scale)

library(ggplot2)

# Base Plot

gg <- ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.

  geom_smooth(method="lm", col="firebrick", size=2) +

  coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

# Change Axis Texts

gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = sprintf("%1.2f%%", seq(0, 0.1, 0.01))) +

  scale_y_continuous(breaks=seq(0, 1000000, 200000), labels = function(x){paste0(x/1000, 'K')})

使用内置主题一次性自定义整个主题

?theme_bw

theme_set() to set the theme before drawing the ggplot. Note that this setting will affect all future plots. *

Draw the ggplot and then add the overall theme setting (eg. theme_bw())

library(ggplot2)

# Base plot

gg <- ggplot(midwest, aes(x=area, y=poptotal)) +

  geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.

  geom_smooth(method="lm", col="firebrick", size=2) +

  coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) +

  labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")

gg <- gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01))

# method 1: Using theme_set()

theme_set(theme_classic())  # not run

gg

# method 2: Adding theme Layer itself.

gg + theme_bw() + labs(subtitle="BW Theme")

gg + theme_classic() + labs(subtitle="Classic Theme")

更多主题可以看看 the ggthemes package and the ggthemr package.

参考：

英文教程：http://r-statistics.co/Complete-Ggplot2-Tutorial-Part1-With-R-Code.html