Python笔记 #14# Pandas: Selection

10 Minutes to pandas

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

dates = pd.date_range('', periods=3) # 创建 16 17 18 等六个日期

df = pd.DataFrame(np.random.randn(3,4), index=dates, columns=list('ABCD')) # 这是二维的，类似于一个

# Getting

# print(df['A']) # 选中一列

# 2013-01-01    0.469112

# 2013-01-02    1.212112

# 2013-01-03   -0.861849

# 2013-01-04    0.721555

# 2013-01-05   -0.424972

# 2013-01-06   -0.673690

# Freq: D, Name: A, dtype: float64

# print(df[0:3]) # 不包括第三行！

#                    A         B         C         D

# 2018-01-16 -0.621070 -0.558260 -0.068434 -1.225484

# 2018-01-17  0.500783 -0.289074 -0.251468 -0.935832

# 2018-01-18  0.299410  2.279664  0.325912  0.461620

# print(df['20180116':'20180117']) # 顾名思义

#                    A         B         C         D

# 2018-01-16 -0.009937  0.545212  0.682592  0.666001

# 2018-01-17  0.641140  0.539408  0.876006 -0.410707

# Selection by Label

# print(df)

# print(df.loc[dates[0]])

#                    A         B         C         D

# 2018-01-16 -1.531173  0.473732 -0.017051 -0.911358

# 2018-01-17 -2.153974  1.320710  1.970252 -1.992209

# 2018-01-18 -0.829090  1.096573  0.997688 -0.401185

# A   -1.531173

# B    0.473732

# C   -0.017051

# D   -0.911358

# Name: 2018-01-16 00:00:00, dtype: float64

# print(df)

# print(df.loc[:,['A','B']])

#                    A         B         C         D

# 2018-01-16  0.077497  1.364726  0.343679 -1.099019

# 2018-01-17 -0.573355 -0.939503  0.020275  1.073868

# 2018-01-18 -0.507676 -0.820279 -1.802128 -0.328978

#                    A         B

# 2018-01-16  0.077497  1.364726

# 2018-01-17 -0.573355 -0.939503

# 2018-01-18 -0.507676 -0.820279

# print(df)

# print(df.loc['20180116':'20180117',['A','B']])

#                    A         B         C         D

# 2018-01-16  2.526965  0.820404  0.095466  0.611306

# 2018-01-17 -1.359352  1.602012  0.337596  2.380324

# 2018-01-18 -0.453608  1.454857  1.443562  2.145979

#                    A         B

# 2018-01-16  2.526965  0.820404

# 2018-01-17 -1.359352  1.602012

# print(df)

# print(df.loc['20180116',['A','B']])

#                    A         B         C         D

# 2018-01-16 -0.143268 -0.954798  0.637066 -1.433980

# 2018-01-17  0.527822  1.673820  1.150244 -0.644368

# 2018-01-18  0.550647  0.012898  1.065985  2.614110

# A   -0.143268

# B   -0.954798

# Name: 2018-01-16 00:00:00, dtype: float64

# print(df)

# print(df.loc[dates[0],'A'])

#                    A         B         C         D

# 2018-01-16  0.557596 -0.140733  0.921194 -0.618365

# 2018-01-17  0.499742 -0.709669 -0.128449 -3.033026

# 2018-01-18  0.014871 -1.198496 -0.241682 -0.502687

# 0.5575964215814226

# print(df)

# print(df.at[dates[0],'A'])

# at的使用方法与loc类似，但是比loc有更快的访问数据的速度，而且只能访问单个元素，不能访问多个元素。

#                    A         B         C         D

# 2018-01-16  0.557596 -0.140733  0.921194 -0.618365

# 2018-01-17  0.499742 -0.709669 -0.128449 -3.033026

# 2018-01-18  0.014871 -1.198496 -0.241682 -0.502687

# 0.5575964215814226

# Selection by Position

# print(df)

# print(df.iloc[0])

# print(df.iloc[2])

#                    A         B         C         D

# 2018-01-16 -0.660315  0.116266 -0.914127  0.598307

# 2018-01-17 -1.882812  1.715777 -0.355752 -0.192475

# 2018-01-18  0.628092  0.700135  0.402080  0.949126

# A   -0.660315

# B    0.116266

# C   -0.914127

# D    0.598307

# Name: 2018-01-16 00:00:00, dtype: float64

# A    0.628092

# B    0.700135

# C    0.402080

# D    0.949126

# Name: 2018-01-18 00:00:00, dtype: float64

# print(df)

# print(df.iloc[0:1,1:3]) # [0:1] 不包括 1 ， [1:3] 不包括 3

#                    A         B         C         D

# 2018-01-16 -0.685245  1.835675 -0.630813 -0.408195

# 2018-01-17 -0.899057  0.257409  0.305275 -0.956311

# 2018-01-18 -1.111117  0.280925 -0.463713  0.882284

#                    B         C

# 2018-01-16  1.835675 -0.630813

# print(df)

# print(df.iloc[[1,2,0],[0,2]]) # 选第2行、第3行、第0行，第1列第3列

# print(df.iloc[1:2,:])

# print(df.iloc[:,1:2])

#                    A         B         C         D

# 2018-01-16  0.221714  0.357890 -0.905870 -0.099446

# 2018-01-17 -0.636384 -1.428893 -0.471488 -1.197841

# 2018-01-18  1.044619 -0.346529 -0.164955  0.201145

#                    A         C

# 2018-01-17 -0.636384 -0.471488

# 2018-01-18  1.044619 -0.164955

# 2018-01-16  0.221714 -0.905870

#                    A         B         C         D

# 2018-01-17 -0.636384 -1.428893 -0.471488 -1.197841

#                    B

# 2018-01-16  0.357890

# 2018-01-17 -1.428893

# 2018-01-18 -0.346529

# print(df.iloc[1,1])

# print(df.iat[1,1]) # 访问确切的值 比上面的快？

# -0.2891820477026986

# -0.2891820477026986

# Boolean Indexing

# print(df[df.A > 0]) # 多随机几次是有可能 empty set 的，选中的就是 df.A > 0 的那些行！

#                    A         B         C         D

# 2018-01-17  0.322452  0.803659 -0.982818  0.149446

# 2018-01-18  0.501591 -0.114393 -0.306871 -2.258557

# 上面几列都是 A 列数字大于 0 的

# print(df[df > 0]) # 这个是全局选值

#                    A         B         C         D

# 2018-01-16  1.453356       NaN  0.120802  0.368208

# 2018-01-17  0.459706  0.802484       NaN       NaN

# 2018-01-18       NaN  0.569428  0.952326  0.541748

# Setting

# Setting a new column automatically aligns the data by the indexes

# s1 = pd.Series([1, 2, 3], index=pd.date_range('20180116', periods=3))

# print(s1)

# print(df)

# df['F'] = s1

# print(df)

#

# 2018-01-16    1

# 2018-01-17    2

# 2018-01-18    3

# Freq: D, dtype: int64

#                    A         B         C         D

# 2018-01-16 -0.261046 -0.561609 -2.263514  2.359545

# 2018-01-17  0.563822 -1.301185  0.906939  0.478209

# 2018-01-18  0.942304  1.231033 -0.016457  0.659738

#                    A         B         C         D  F

# 2018-01-16 -0.261046 -0.561609 -2.263514  2.359545  1

# 2018-01-17  0.563822 -1.301185  0.906939  0.478209  2

# 2018-01-18  0.942304  1.231033 -0.016457  0.659738  3

# print(df)

# df.at[dates[0],'A'] = 0 # Setting values by label

# df.iat[0, 1] = 0 # Setting values by position

# df.loc[:,'D'] = np.array([99] * len(df)) # Setting by assigning with a numpy array

# print(df)

#                    A         B         C         D

# 2018-01-16  1.113651 -0.978514 -0.852811  0.933365

# 2018-01-17 -1.395547 -0.158742 -1.509723 -0.917854

# 2018-01-18  0.672396 -1.248654 -1.430043 -1.133012

#                    A         B         C   D

# 2018-01-16  0.000000  0.000000 -0.852811  99

# 2018-01-17 -1.395547 -0.158742 -1.509723  99

# 2018-01-18  0.672396 -1.248654 -1.430043  99

# A where operation with setting.

# df2 = df.copy()

# print(df2)

# df2[df2 > 0] = -df2

# print(df2)

#                    A         B         C         D

# 2018-01-16  0.824635 -0.914218 -0.953014  0.166094

# 2018-01-17 -0.037925  0.018838  0.927026  0.322848

# 2018-01-18  0.596024  0.851863 -0.548556  0.243168

#                    A         B         C         D

# 2018-01-16 -0.824635 -0.914218 -0.953014 -0.166094

# 2018-01-17 -0.037925 -0.018838 -0.927026 -0.322848

# 2018-01-18 -0.596024 -0.851863 -0.548556 -0.243168

Python笔记 #14# Pandas: Selection的更多相关文章

Python笔记 #18# Pandas: Grouping
10 Minutes to pandas 引 By “group by” we are referring to a process involving one or more of the foll ...
Python笔记 #17# Pandas: Merge
10 Minutes to pandas Concat df = pd.DataFrame(np.random.randn(10, 4)) print(df) # break it into piec ...
Python笔记 #16# Pandas: Operations
10 Minutes to pandas #Stats # shift 这玩意儿有啥用??? s = pd.Series([1,5,np.nan], index=dates).shift(0) # s ...
Python笔记 #15# Pandas: Missing Data
10 Minutes to pandas import pandas as pd import numpy as np import matplotlib.pyplot as plt dates = ...
Python笔记 #13# Pandas: Viewing Data
感觉很详细:数据分析:pandas 基础 import pandas as pd import numpy as np import matplotlib.pyplot as plt dates = ...
python笔记14
今日内容带参数的装饰器: flask框架 + django缓存 + 写装饰器实现被装饰的函数要执行N次模块 os sys time(三种类型) datetime 和 timezone[了解] 内容 ...
学习笔记之pandas
Python Data Analysis Library — pandas: Python Data Analysis Library https://pandas.pydata.org/ panda ...
Python数据分析之Pandas操作大全
从头到尾都是手码的,文中的所有示例也都是在Pycharm中运行过的,自己整理笔记的最大好处在于可以按照自己的思路来构建矿建,等到将来在需要的时候能够以最快的速度看懂并应用=_= 注:为方便表述,本章设 ...
Python数据分析之pandas学习
Python中的pandas模块进行数据分析. 接下来pandas介绍中将学习到如下8块内容:1.数据结构简介:DataFrame和Series2.数据索引index3.利用pandas查询数据4.利 ...

随机推荐

cocos2dx-3.x物理引擎Box2D介绍
理引擎 Cocos2d-x引擎内置了两种物理引擎,它们分别是Box2D和Chipmunk,都是非常优秀的2D物理引擎,而且x引擎将它们都内置在SDK中.Box2D使用较为广泛,在这里选择Box2D来进 ...
Android 实现动态匹配输入的内容 AutoCompleteTextView和MultiAutoCompleteTextView
AutoCompleteTextView1.功能:动态匹配输入的内容,如百度搜索引擎当输入文本时可以根据内容显示匹配的热门信息.2.独特属性:android:completionThreshold 设 ...
[转]C++结构体|类内存对齐详解
内存地址对齐,是一种在计算机内存中排列数据(表现为变量的地址).访问数据(表现为CPU读取数据)的一种方式,包含了两种相互独立又相互关联的部分:基本数据对齐和结构体数据对齐 . 为什么需要内存对齐?对 ...
ubuntu的两种网络连接模式
ubuntu的网络连接分成两种类型,一种是modern 模式, 这种模式的配置通过ifconfig命令来进行配置,重启之后失效,这种模式就是在destop 下右上角的网络连接.如图所示 modern模 ...
【python系列】python2.x和python3.x的区别
刚接触python使用的是python2.x的书籍,但是发现python3.x和python2.x有不小的区别,以下做一些记录性能 Py3.0运行 pystone benchmark的速度比Py2. ...
AB压力测试工具
1.安装AB工具: yum install httpd-tools 2.测试: ab -n -c http://localhost.com/ 其中-n表示请求数,-c表示并发数 3.测试结果 [roo ...
Hadoop讲解
1.简介 Hadoop是一款开源的大数据通用处理平台,其提供了分布式存储和分布式离线计算,适合大规模数据.流式数据(写一次,读多次),不适合低延时的访问.大量的小文件以及频繁修改的文件. *Hadoo ...
ZOJ 3210 A Stack or A Queue?
A Stack or A Queue? Time Limit: 1 Second Memory Limit: 32768 KB Do you know stack and queue? Th ...
HTML实例 - 购物商场页面
效果图代码 https://coding.net/u/James_Lian/p/Shopping-page/git 示例 https://coding.net/u/James_Lian/p/Shop ...
三个小时学会wordpress模板制作
最近接了一个项目需要用wordpress建站,版面相对简单,ytkah就琢磨着自己来设计wordpress模板,首页栏目页文章页(很多网站无外乎就这些页面),其中栏目页和首页又很像,都是调用文章列表. ...

Python笔记 #14# Pandas: Selection

Python笔记 #14# Pandas: Selection的更多相关文章

随机推荐

热门专题