Univariate plotting with pandas

import pandas as pd
reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=)
reviews.head() //bar
reviews['province'].value_counts().head().plot.bar()
(reviews['province'].value_counts().head() / len(reviews)).plot.bar()
reviews['points'].value_counts().sort_index().plot.bar() //line chart
reviews['points'].value_counts().sort_index().plot.line() //area chart
reviews['points'].value_counts().sort_index().plot.area() //histograms
reviews[reviews['price'] < ]['price'].plot.hist()
reviews['price'].plot.hist()
reviews[reviews['price'] > ] //pie chart
reviews['province'].value_counts().head().plot.pie()

Bivariate plotting with pandas

import pandas as pd
reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)
reviews.head() //Scatter plot
reviews[reviews['price'] < 100].sample(100).plot.scatter(x='price', y='points') //hexplot 数据相关性
reviews[reviews['price'] < 100].plot.hexbin(x='price', y='points', gridsize=15) //stackplot 数据堆叠
wine_counts.plot.bar(stacked=True)
wine_counts.plot.area() //Bivariate line chart 线集成
wine_counts.plot.line()

Plotting with seaborn

import pandas as pd
reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)
import seaborn as sns //Countplot
sns.countplot(reviews['points']) //KDE Plot 平滑去噪
sns.kdeplot(reviews.query('price < 200').price)
//对比线图
reviews[reviews['price'] < 200]['price'].value_counts().sort_index().plot.line()
//二维ked
sns.kdeplot(reviews[reviews['price'] < 200].loc[:, ['price', 'points']].dropna().sample(5000)) //Distplot
sns.distplot(reviews['points'], bins=10, kde=False) //jointplot
sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100])
sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100], kind='hex', gridsize=20) //Boxplot and violin plot 25%-75%,中线
df = reviews[reviews.variety.isin(reviews.variety.value_counts().head(5).index)] sns.boxplot(
x='variety',
y='points',
data=df
)

Faceting with seaborn

import pandas as pd
pd.set_option('max_columns', None)
df = pd.read_csv("../input/fifa-18-demo-player-dataset/CompleteDataset.csv", index_col=0) import re
import numpy as np
import seaborn as sns footballers = df.copy()
footballers['Unit'] = df['Value'].str[-1]
footballers['Value (M)'] = np.where(footballers['Unit'] == '', 0,
footballers['Value'].str[1:-1].replace(r'[a-zA-Z]',''))
footballers['Value (M)'] = footballers['Value (M)'].astype(float)
footballers['Value (M)'] = np.where(footballers['Unit'] == 'M',
footballers['Value (M)'],
footballers['Value (M)']/1000)
footballers = footballers.assign(Value=footballers['Value (M)'],
Position=footballers['Preferred Positions'].str.split().str[0]) //The FacetGrid
df = footballers[footballers['Position'].isin(['ST', 'GK'])]
g = sns.FacetGrid(df, col="Position")
g.map(sns.kdeplot, "Overall") df = footballers
g = sns.FacetGrid(df, col="Position", col_wrap=6)//,每行6列
g.map(sns.kdeplot, "Overall") df = footballers[footballers['Position'].isin(['ST', 'GK'])]
df = df[df['Club'].isin(['Real Madrid CF', 'FC Barcelona', 'Atlético Madrid'])]
g = sns.FacetGrid(df, row="Position", col="Club",
row_order=['GK', 'ST'],
col_order=['Atlético Madrid', 'FC Barcelona', 'Real Madrid CF'])
g.map(sns.violinplot, "Overall") //violin图 //Pairplot 数据分析第一步
sns.pairplot(footballers[['Overall', 'Potential', 'Value']])

Multivariate plotting

import pandas as pd
pd.set_option('max_columns', None)
df = pd.read_csv("../input/fifa-18-demo-player-dataset/CompleteDataset.csv", index_col=0) import re
import numpy as np footballers = df.copy()
footballers['Unit'] = df['Value'].str[-1]
footballers['Value (M)'] = np.where(footballers['Unit'] == '', 0,
footballers['Value'].str[1:-1].replace(r'[a-zA-Z]',''))
footballers['Value (M)'] = footballers['Value (M)'].astype(float)
footballers['Value (M)'] = np.where(footballers['Unit'] == 'M',
footballers['Value (M)'],
footballers['Value (M)']/1000)
footballers = footballers.assign(Value=footballers['Value (M)'],
Position=footballers['Preferred Positions'].str.split().str[0]) //Multivariate scatter plots
import seaborn as sns
sns.lmplot(x='Value', y='Overall', hue='Position',
data=footballers.loc[footballers['Position'].isin(['ST', 'RW', 'LW'])],
fit_reg=False) sns.lmplot(x='Value', y='Overall', markers=['o', 'x', '*'], hue='Position',
data=footballers.loc[footballers['Position'].isin(['ST', 'RW', 'LW'])],
fit_reg=False
) //Grouped box plot 分组的优势
f = (footballers
.loc[footballers['Position'].isin(['ST', 'GK'])]
.loc[:, ['Value', 'Overall', 'Aggression', 'Position']]
)
f = f[f["Overall"] >= 80]
f = f[f["Overall"] < 85]
f['Aggression'] = f['Aggression'].astype(float)
sns.boxplot(x="Overall", y="Aggression", hue='Position', data=f) //Heatmap
f = (
footballers.loc[:, ['Acceleration', 'Aggression', 'Agility', 'Balance', 'Ball control']]
.applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)
.dropna()
).corr()
sns.heatmap(f, annot=True) //Parallel Coordinates
from pandas.plotting import parallel_coordinates f = (
footballers.iloc[:, 12:17]
.loc[footballers['Position'].isin(['ST', 'GK'])]
.applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)
.dropna()
)
f['Position'] = footballers['Position']
f = f.sample(200)
parallel_coordinates(f, 'Position')

plotly

import pandas as pd
reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
reviews.head() from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True) #离线注入笔记本模式 import plotly.graph_objs as go
iplot([go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')]) iplot([go.Histogram2dContour(x=reviews.head(500)['points'],
y=reviews.head(500)['price'],
contours=go.Contours(coloring='heatmap')),
go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')]) #surface图
df = reviews.assign(n=0).groupby(['points', 'price'])['n'].count().reset_index() #先point分组再price分,再添加的‘n’列上执行计数,最后对首列的index重新排序
df = df[df["price"] < 100]
v = df.pivot(index='price', columns='points', values='n').fillna(0).values.tolist() #重塑数组后用0填充NAN值,再把values列变成list
iplot([go.Surface(z=v)]) #地理图
df = reviews['country'].replace("US", "United States").value_counts() iplot([go.Choropleth(
locationmode='country names',
locations=df.index.values,
text=df.index,
z=df.values
)])

Data Visualisation Cheet Sheet的更多相关文章

  1. Object对象方法 cheet sheet

    defineProperty create Object.create(prototype [, propertiesObject ]) prototype:没什么可说的,指定对象的原型 proper ...

  2. 使用Python对Twitter进行数据挖掘(Mining Twitter Data with Python)

    目录 1.Collecting data 1.1 Register Your App 1.2 Accessing the Data 1.3 Streaming 2.Text Pre-processin ...

  3. 学习笔记之Data Visualization

    Data visualization - Wikipedia https://en.wikipedia.org/wiki/Data_visualization Data visualization o ...

  4. Mining Twitter Data with Python

    目录 1.Collecting data 1.1 Register Your App 1.2 Accessing the Data 1.3 Streaming 2.Text Pre-processin ...

  5. Import Data from *.xlsx file to DB Table through OAF page(转)

    Use  Poi.jar Import Data from *.xlsx file to DB Table through OAF page Use Jxl.jar Import Data from ...

  6. 【翻译】Awesome R资源大全中文版来了,全球最火的R工具包一网打尽,超过300+工具,还在等什么?

    0.前言 虽然很早就知道R被微软收购,也很早知道R在统计分析处理方面很强大,开始一直没有行动过...直到 直到12月初在微软技术大会,看到我软的工程师演示R的使用,我就震惊了,然后最近在网上到处了解和 ...

  7. R统计分析处理

    [翻译]Awesome R资源大全中文版来了,全球最火的R工具包一网打尽,超过300+工具,还在等什么? 阅读目录 0.前言 1.集成开发环境 2.语法 3.数据操作 4.图形显示 5.HTML部件 ...

  8. Sed&awk笔记之awk篇

    http://blog.csdn.net/a81895898/article/details/8482333 Awk是什么 Awk.sed与grep,俗称Linux下的三剑客,它们之间有很多相似点,但 ...

  9. R工具包一网打尽

    这里有很多非常不错的R包和工具. 该想法来自于awesome-machine-learning. 这里是包的导航清单,看起来更方便 >>>导航清单 通过这些翻译了解这些工具包,以后干 ...

随机推荐

  1. SPSS科普 | 统计描述

    SPSS科普 | 统计描述 统计描述的目的就是了解数据的基本特征和分布规律,为进一步合理地选择统计方法提供依据.常用的有Frequencies.Descriptives 和Explore过程. 一.F ...

  2. 2018-12-21-WPF-弹出-popup-里面的-TextBox-无法输入汉字

    title author date CreateTime categories WPF 弹出 popup 里面的 TextBox 无法输入汉字 lindexi 2018-12-21 18:10:30 ...

  3. Cat- Linux必学的60个命令

    1.作用 cat(“concatenate”的缩写)命令用于连接并显示指定的一个和多个文件的有关信息,它的使用权限是所有用户. 2.格式 cat [options] 文件1 文件2…… 3.[opti ...

  4. JZOJ5857 【NOIP提高组模拟A组2018.9.8】没有上司的舞会

    题目 Description "那么真的有果尔德施坦因这样一个人?"他问道. "是啊,有这样一个人,他还活着.至于在哪里,我就不知道了." "那么那个 ...

  5. html常用标签详解4-列表标签

    列表标签 列表标签分为3大类:无序列表.有序列表.自定义列表.线面我会依依简单介绍 一.无序列表 有个type属性: 默认:disc:实心小圆点:circle:空心小圆点:square:实心小方块:n ...

  6. 使用cmd查看windows端口占用情况,并关闭应用

    在做开发的时候常常会遇到端口被占用的问题,下面是我在网上找的比较好用的一种关闭占用端口进程的方法 1.在运行中输入cmd打开dos命令窗口,比如我想找到端口8888对应的PID(通过PID找到相应的进 ...

  7. KOA 学习(七) 路由koa-router

    一.基本用法 var app = require('koa')(); var router = require('koa-router')(); router.get('/', function *( ...

  8. mybatis学习:mybatis注解开发一对一的查询配置

    实体类: public class Account { private Integer id; private Integer uid; private Double money; private U ...

  9. 【vuex】vue2-happyfri

    我发现我对使用vuex并不擅长,现在跟我一起多多研究项目,好好补补vuex吧 这个开源项目地址为:https://github.com/bailicangdu/vue2-happyfri 这是一个答题 ...

  10. 【转载】TSN简介及相关资源

    原文:https://blog.csdn.net/u012692537/article/details/86188392 一.简介 1.1 什么是TSN TSN(Time Sensitive Netw ...