吴裕雄 python 数据处理(2)
import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(data.head())
a = data.stack()
print(a)
b = a.unstack()
print(b)

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(data.head())
df = data.set_index("日期")
print(df.head())

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
print(a.info())

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
print(data.head())

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = data["价格"].groupby([data["出发地"],data["目的地"]]).mean()
print(a)

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_route_cnt.csv")
print(data.head())

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = data.groupby([data["出发地"],data["目的地"]],as_index=False).mean()
print(a)

import pandas as pd
data = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_route_cnt.csv")
print(data.head())
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
print(data_1.head())
a = data_1.groupby([data_1["出发地"],data_1["目的地"]],as_index=False).mean()
print(a.head())
b = pd.merge(a,data)
print(b.head())

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = pd.pivot_table(data_1,values=["价格"],index=["出发地"],columns=["目的地"])
print(a.head())

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = pd.pivot_table(data_1[data_1["出发地"]=="杭州"],values=["价格"],index=["出发地","目的地"],columns=["去程方式"])
print(a)

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(data_1.head())
print(data_1.isnull().head())
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
print(a.isnull())

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
b = a.dropna(axis=0)
print(b)

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
b = a.dropna(axis=1)
print(b)

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
b = a.fillna("missing")
print(b)

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
b = a.fillna(method="pad")
print(b)

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
b = a.fillna(method="bfill",limit=1)
print(b)

import pandas as pd
data_1 = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
a = pd.pivot_table(data_1,values=["最高气温"],index=["天气"],columns=["风向"])
print(a)
b = a.fillna(a.mean())
print(b)

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(np.shape(df))
print(df.head())
fig,ax = plt.subplots(1,1,figsize=(8,5))
ax.hist(df["最低气温"],bins=20)
plt.show()
d = df["最低气温"]
zscore = (d-d.mean())/d.std()
df["isOutlier"]=zscore.abs()>3
print(df.head())
a = df["isOutlier"].value_counts()
print(a)


%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\sale_data.csv")
print(np.shape(df))
print(df.head())
a = df[df["卖家"]=="夏奈凤凰旗舰店"]
fig,ax = plt.subplots(1,1,figsize=(8,5))
a.boxplot(column="成交量",ax=ax)
plt.show()
b = a["成交量"]
print(b.describe())
a["isOutlier"]=d>d.quantile(0.75)
c = a[a["isOutlier"]==True]
print(c)

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(np.shape(df))
print(df.head())
a = df.duplicated()
print(np.shape(a))
print(a[:5])

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(np.shape(df))
a = df.set_index("日期")
print(a.head())
b = a.duplicated()
print(b[:5])

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(np.shape(df))
a = df.set_index("日期")
print(a.head())
b = a.duplicated("最高气温")
print(b[:5])

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\hz_weather.csv")
print(type(df))
print(np.shape(df))
a = df.set_index("日期")
print(a.head())
b = a.drop_duplicates("最高气温")
print(np.shape(b))
print(b.head())

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
print(df.head())
print(df.info())

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = df.duplicated().value_counts()
print(a)

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = df.drop_duplicates()
b = a.duplicated().value_counts()
print(b)

import numpy as np
import pandas as pd
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
a = df.drop_duplicates()
print(a.describe())

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
print(np.shape(df))
fig,axes = plt.subplots(1,2,figsize=(12,5))
axes[0].hist(df["价格"],bins=20)
df.boxplot(column="价格",ax=axes[1])
plt.show()

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
df = pd.read_csv("F:\\python3_pachongAndDatareduce\\data\\pandas data\\qunar_free_trip.csv")
d = df["价格"]
zscore = (d-d.mean())/d.std()
print(zscore[0:3])
df["isOutlier"]=zscore.abs()>3.5
print(df["isOutlier"].value_counts())
a = df[df["isOutlier"]==True]
print(a.head())

吴裕雄 python 数据处理(2)的更多相关文章
- 吴裕雄 python 数据处理(3)
import time a = time.time()print(a)b = time.localtime()print(b)c = time.strftime("%Y-%m-%d %X&q ...
- 吴裕雄 python 数据处理(1)
import time print(time.time())print(time.localtime())print(time.strftime('%Y-%m-%d %X',time.localtim ...
- 吴裕雄 python 神经网络——TensorFlow 输入数据处理框架
import tensorflow as tf files = tf.train.match_filenames_once("E:\\MNIST_data\\output.tfrecords ...
- 吴裕雄 python神经网络 花朵图片识别(10)
import osimport numpy as npimport matplotlib.pyplot as pltfrom PIL import Image, ImageChopsfrom skim ...
- 吴裕雄 python神经网络 花朵图片识别(9)
import osimport numpy as npimport matplotlib.pyplot as pltfrom PIL import Image, ImageChopsfrom skim ...
- 吴裕雄 python 神经网络——TensorFlow pb文件保存方法
import tensorflow as tf from tensorflow.python.framework import graph_util v1 = tf.Variable(tf.const ...
- 吴裕雄 python 神经网络——TensorFlow 花瓣分类与迁移学习(4)
# -*- coding: utf-8 -*- import glob import os.path import numpy as np import tensorflow as tf from t ...
- 吴裕雄 python 神经网络——TensorFlow 花瓣分类与迁移学习(3)
import glob import os.path import numpy as np import tensorflow as tf from tensorflow.python.platfor ...
- 吴裕雄 python 神经网络——TensorFlow 花瓣分类与迁移学习(2)
import glob import os.path import numpy as np import tensorflow as tf from tensorflow.python.platfor ...
随机推荐
- qt 中文乱码
首先呢,声明一下,QString 是不存在中文支持问题的,很多人遇到问题,并不是本身 QString 的问题,而是没有将自己希望的字符串正确赋给QString. 很简单的问题,"我是中文&q ...
- 从Exchager数据交换到基于trade-off的系统设计
可以使用JDK提供的Exchager类进行同步交换:进行数据交换的双方将互相等待对方,直到双方的数据都准备完毕,才进行交换.Exchager类很少用到,但理解数据交换的时机却十分重要,这是一个基于tr ...
- 利用.pbk来实现ADSL开机自动拨号
当你新建拨号连接或者VPN连接之后在你的电脑里会创建一个.pbk的文件 这个.pbk的文件可以说是一个集合,将你电脑的所有连接都保存在一起. 同时你还可以将此连接复制起来传给其他人. 系统默认的.pb ...
- freePBX汉化方法记录——备忘
FreePBX汉化[root@bgcc69:/var/www/html/admin/i18n/zh_CN/LC_MESSAGES]$pwd/var/www/html/admin/i18n/zh_CN/ ...
- mysql设置索引
1.添加PRIMARY KEY(主键索引) 语法:ALTER TABLE `表名` ADD PRIMARY KEY ( `列名称` ) mysql>ALTER TABLE `table_name ...
- 常用模块:os模块,logging模块等
一 os模块 那么作为一个常用模块,os模块是与操作系统交互的一个模块. 那么os模块中我们常用的一般有以下几种: os.listdir('dirname') 以列表的形式列出指定目录下的所有文 ...
- BOM和DOM(cs)
前戏 到目前为止,我们已经学过了JavaScript的一些简单的语法.但是这些简单的语法,并没有和浏览器有任何交互. 也就是我们还不能制作一些我们经常看到的网页的一些交互,我们需要继续学习BOM和DO ...
- 内置锁(一)synchronized 介绍与用法
一.synchronized 的介绍 synchronized 是 Java语言的关键字,当它用来修饰一个方法或者一个代码块的时候,能够保证在同一时刻最多只有一个线程执行该段代码,而这段代码也被称 ...
- node实现缓存
内容: 1.缓存基本原理 2.node实现缓存 1.缓存基本原理 第一重要.缓存策略: cache-control:用于控制缓存,常见的取值有private.no-cache.max-age.must ...
- django中使用Form组件
内容: 1.Form组件介绍 2.Form组件常用字段 3.Form组件校验功能 4.Form组件内置正则校验 参考:https://www.cnblogs.com/liwenzhou/p/87478 ...