Numpy

【数组切片】

In [115]: a = np.arange(12).reshape((3,4))                                                                                                                                                        

In [116]: a
Out[116]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]]) In [117]: a[1:,1:3]
Out[117]:
array([[ 5, 6],
[ 9, 10]]) In [118]:

  

【布尔值索引】找出数据中大于5的元素

In [134]: a = [ random.randint(0,10) for i in range(20) ]                                                                                                                                         

In [135]: a
Out[135]: [4, 4, 9, 2, 2, 5, 6, 4, 3, 9, 5, 7, 10, 4, 9, 10, 6, 10, 3, 8] In [136]: _
Out[136]: [4, 4, 9, 2, 2, 5, 6, 4, 3, 9, 5, 7, 10, 4, 9, 10, 6, 10, 3, 8] In [137]: a = np.array(a) In [138]: a
Out[138]:
array([ 4, 4, 9, 2, 2, 5, 6, 4, 3, 9, 5, 7, 10, 4, 9, 10, 6,
10, 3, 8]) In [139]: a>5
Out[139]:
array([False, False, True, False, False, False, True, False, False,
True, False, True, True, False, True, True, True, True,
False, True]) In [140]: a[a>5]
Out[140]: array([ 9, 6, 9, 7, 10, 9, 10, 6, 10, 8]) In [141]:

【布尔值索引】

- 找出数组中大于5的偶数元素

- 找出数组中大于5的书 或 偶数

In [143]: b = a[a>5]                                                                                                                                                                              

In [144]: b
Out[144]: array([ 9, 6, 9, 7, 10, 9, 10, 6, 10, 8]) In [145]: b[b%2==0]
Out[145]: array([ 6, 10, 10, 6, 10, 8]) In [146]: a[(a>5) & (a%2==0)]
Out[146]: array([ 6, 10, 10, 6, 10, 8]) In [147]: In [147]: a[(a>5) and (a%2==0)]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-147-fee222ad41de> in <module>
----> 1 a[(a>5) and (a%2==0)] ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() In [148]: In [148]: a[(a>5) | (a%2==0)]
Out[148]: array([ 4, 4, 9, 2, 2, 6, 4, 9, 7, 10, 4, 9, 10, 6, 10, 8]) In [149]: a[(a>5) or (a%2==0)]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-149-732531989282> in <module>
----> 1 a[(a>5) or (a%2==0)] ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() In [150]:

  

【花式索引(索引位置无规律)】

# 一位数组花式索引示例
In [153]: a = np.arange(10,20) In [154]: a
Out[154]: array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) In [155]: a[[1,3,4,9]]
Out[155]: array([11, 13, 14, 19]) In [156]: # 二维数组花式索引示例一(取第二行的第二列和第三列)
In [165]: a
Out[165]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]]) In [166]: a[2,[2,3]]
Out[166]: array([12, 13]) In [167]: # 二维数组花式索引示例二 (行:取第一行和第三行,列:取第一列和第三列)
In [167]: a
Out[167]:
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]]) In [168]: a[[1,3],[2,4]] # 注意:在对行和列同时使用花式索引时,解释效果不一样
Out[168]: array([ 7, 19]) In [169]: a[[1,3],:]
Out[169]:
array([[ 5, 6, 7, 8, 9],
[15, 16, 17, 18, 19]]) In [170]: a[[1,3],:][:,[2,4]]
Out[170]:
array([[ 7, 9],
[17, 19]]) In [171]:

【数值取整问题】

  向0取整(int) 四舍五入(round) 向上取整(math.ceil) 向下取整(math.floor)
1.7 1 2 2 1
-1.7 -1 -2 -1 -2
1.3 1 1 2 1
-1.3 -1 -1 -1 -2
In [53]: a = np.arange(-5.5,5.5)                                                                                                                                                                  

In [54]: a
Out[54]: array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5]) In [55]: np.trunc(a) # numpy向0取整
Out[55]: array([-5., -4., -3., -2., -1., -0., 0., 1., 2., 3., 4.]) In [56]: np.round(a) # numpy四舍五入取整
Out[56]: array([-6., -4., -4., -2., -2., -0., 0., 2., 2., 4., 4.]) In [57]: np.rint(a) # numpy.rint() 等价于 numpy.round()
Out[57]: array([-6., -4., -4., -2., -2., -0., 0., 2., 2., 4., 4.]) In [58]: np.ceil(a) # numpy向上取整
Out[58]: array([-5., -4., -3., -2., -1., -0., 1., 2., 3., 4., 5.]) In [59]: np.floor(a) # numpy向下取整
Out[59]: array([-6., -5., -4., -3., -2., -1., 0., 1., 2., 3., 4.]) In [60]:

  

numpy.modf():把整数和小数分开

In [62]: a
Out[62]: array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5]) In [63]: x,y = np.modf(a) In [64]: x
Out[64]: array([-0.5, -0.5, -0.5, -0.5, -0.5, -0.5, 0.5, 0.5, 0.5, 0.5, 0.5]) In [65]: y
Out[65]: array([-5., -4., -3., -2., -1., -0., 0., 1., 2., 3., 4.]) In [66]: x+y
Out[66]: array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5]) In [67]:

  

numpy.nan

In [85]: np.nan == np.nan
Out[85]: False In [86]: np.nan is np.nan
Out[86]: True In [87]:
In [77]: a = np.arange(5)                                                                                                                                                                         

In [78]: b = a/a
/usr/bin/ipython3:1: RuntimeWarning: invalid value encountered in true_divide
#!/usr/local/python3.6/bin/python3.6 In [79]: b
Out[79]: array([nan, 1., 1., 1., 1.]) In [80]: np.*nan?
np.isnan
np.nan In [81]: np.isnan(b)
Out[81]: array([ True, False, False, False, False]) In [82]: b[np.isnan(b)]
Out[82]: array([nan]) In [83]: b[~np.isnan(b)] # 取反
Out[83]: array([1., 1., 1., 1.]) In [84]:

numpy.inf

In [97]: np.inf == np.inf
Out[97]: True In [98]: np.inf is np.inf
Out[98]: True In [99]:
In [89]: a = np.arange(3,8)                                                                                                                                                                       

In [90]: a
Out[90]: array([3, 4, 5, 6, 7]) In [91]: b = [1,0,1,0,1] In [92]: c = a/b
/usr/bin/ipython3:1: RuntimeWarning: divide by zero encountered in true_divide
#!/usr/local/python3.6/bin/python3.6 In [93]: c
Out[93]: array([ 3., inf, 5., inf, 7.]) In [94]: c[c!=np.inf]
Out[94]: array([3., 5., 7.]) In [96]: c[~np.isinf(c)]
Out[96]: array([3., 5., 7.]) In [97]:

  

numpy.maximum 和 numpy.minimum

In [102]: a
Out[102]: array([3, 4, 5, 6, 7]) In [103]: b
Out[103]: array([2, 5, 3, 7, 4]) In [104]: np.maximum(a,b)
Out[104]: array([3, 5, 5, 7, 7]) In [105]: np.minimum(a,b)
Out[105]: array([2, 4, 3, 6, 4]) In [106]:

  

Pandas

series整数索引问题,推荐多使用iloc

In [137]: s1 =pd.Series(np.arange(10))                                                                                                                                                            

In [138]: s2 = s1[5:].copy()                                                                                                                                                                      

In [139]: s1
Out[139]:
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: int64 In [140]: s2
Out[140]:
5 5
6 6
7 7
8 8
9 9
dtype: int64 In [141]: In [147]: s2[5]
Out[147]: 5 In [148]: s2.loc[5]
Out[148]: 5 In [149]: s2.iloc[0]
Out[149]: 5 In [150]:

series数据对齐

In [150]: a = pd.Series([12,23,34],['c','a','d'])                                                                                                                                                 

In [151]: b = pd.Series([11,30,9],['d','c','a'])                                                                                                                                                  

In [152]: a+b
Out[152]:
a 32
c 42
d 45
dtype: int64 In [153]:
In [153]: b = pd.Series([11,30,9,100],['d','c','a','b'])                                                                                                                                          

In [154]: a+b
Out[154]:
a 32.0
b NaN # 缺失值
c 42.0
d 45.0
dtype: float64 In [155]: a.add(b, fill_value=0)
Out[155]:
a 32.0
b 100.0
c 42.0
d 45.0
dtype: float64 In [156]:

series缺失值处理

In [158]: s = a+b                                                                                                                                                                                 

In [159]: s
Out[159]:
a 32.0
b NaN
c 42.0
d 45.0
dtype: float64 In [160]: s.isnull()
Out[160]:
a False
b True
c False
d False
dtype: bool In [161]: s.notnull()
Out[161]:
a True
b False
c True
d True
dtype: bool In [162]:
# 扔掉缺失值
In [162]: s.dropna()
Out[162]:
a 32.0
c 42.0
d 45.0
dtype: float64 In [163]: # 设置缺失值
In [163]: s.fillna(0)
Out[163]:
a 32.0
b 0.0
c 42.0
d 45.0
dtype: float64 In [164]:
# 设置缺失值(均值)
In [166]: s.fillna(s.mean())
Out[166]:
a 32.000000
b 39.666667
c 42.000000
d 45.000000
dtype: float64 In [167]:

DataFrame创建示例

In [169]: pd.DataFrame({'one':[1,2,3],'two':[10,20,30]})
Out[169]:
one two
0 1 10
1 2 20
2 3 30 In [170]: pd.DataFrame({'one':[1,2,3],'two':[10,20,30]},index=['A','B','C'])
Out[170]:
one two
A 1 10
B 2 20
C 3 30 In [171]: df = _ In [172]: df
Out[172]:
one two
A 1 10
B 2 20
C 3 30 In [173]: pd.DataFrame({'one':pd.Series([1,2,3],index=['A','B','C']),'two':pd.Series([10,20,30,40],index=['B','A','C','D'])})
Out[173]:
one two
A 1.0 20
B 2.0 10
C 3.0 30
D NaN 40 In [174]:

读文件创建DataFrame

In [181]: !vim demo.csv                                                                                                                                                                           

In [182]: !cat demo.csv
'one','two','three'
'A',1,2,3
'B',4,5,6
'C',7,8,9 In [183]: pd.read_csv('demo.csv')
Out[183]:
'one' 'two' 'three'
'A' 1 2 3
'B' 4 5 6
'C' 7 8 9 In [184]:

DateFrame索引问题:获取单个元素

In [218]: df
Out[218]:
one two
A 1.0 20
B 2.0 10
C 3.0 30
D NaN 40 In [219]: df['two']['C']
Out[219]: 30 In [220]: df.two.C
Out[220]: 30 In [221]: df.loc['C','two']
Out[221]: 30 In [222]: df.iloc[2,1]
Out[222]: 30 In [223]:

DateFrame索引问题:获取多个元素

In [234]: df
Out[234]:
one two
A 1.0 20
B 2.0 10
C 3.0 30
D NaN 40 In [235]: df['one']
Out[235]:
A 1.0
B 2.0
C 3.0
D NaN
Name: one, dtype: float64 In [236]: df.loc['B']
Out[236]:
one 2.0
two 10.0
Name: B, dtype: float64 In [237]: df.loc['B',]
Out[237]:
one 2.0
two 10.0
Name: B, dtype: float64 In [238]: df.loc['B',:]
Out[238]:
one 2.0
two 10.0
Name: B, dtype: float64 In [239]: In [239]: df.iloc[1]
Out[239]:
one 2.0
two 10.0
Name: B, dtype: float64 In [240]: df.iloc[1,]
Out[240]:
one 2.0
two 10.0
Name: B, dtype: float64 In [241]: df.iloc[1,:]
Out[241]:
one 2.0
two 10.0
Name: B, dtype: float64 In [242]: df.iloc[1,:1]
Out[242]:
one 2.0
Name: B, dtype: float64 In [243]:

DataFrame数据对齐

In [243]: df
Out[243]:
one two
A 1.0 20
B 2.0 10
C 3.0 30
D NaN 40 In [244]: df = pd.DataFrame({'two':[1,2,3,4],'one':[4,5,6,7]},index=['C','D','B','A']) In [245]: df2 = _243 In [246]: df
Out[246]:
two one
C 1 4
D 2 5
B 3 6
A 4 7 In [247]: df2
Out[247]:
one two
A 1.0 20
B 2.0 10
C 3.0 30
D NaN 40 In [248]: df+df2
Out[248]:
one two
A 8.0 24
B 8.0 13
C 7.0 31
D NaN 42 In [249]:

DataFrame缺失值处理

In [268]: df
Out[268]:
one two
A 1.0 20.0
B 2.0 10.0
C 3.0 30.0
D NaN 40.0 In [269]: df.fillna(0)
Out[269]:
one two
A 1.0 20.0
B 2.0 10.0
C 3.0 30.0
D 0.0 40.0 In [270]: df.dropna()
Out[270]:
one two
A 1.0 20.0
B 2.0 10.0
C 3.0 30.0 In [271]: df.loc['D','two'] = np.nan In [272]: df.loc['B','two'] = np.nan In [273]: df
Out[273]:
one two
A 1.0 20.0
B 2.0 NaN
C 3.0 30.0
D NaN NaN In [274]: df.dropna() # 删除含有NaN的行
Out[274]:
one two
A 1.0 20.0
C 3.0 30.0 In [275]: df.dropna(how='all') # 整行都是NaN才删除该行
Out[275]:
one two
A 1.0 20.0
B 2.0 NaN
C 3.0 30.0 In [276]: df.dropna(how='any') # 默认
Out[276]:
one two
A 1.0 20.0
C 3.0 30.0 In [277]:

axis指定删除整列

In [282]: df
Out[282]:
one two
A 1.0 20.0
B 2.0 10.0
C 3.0 30.0
D 4.0 10.0 In [283]: df.iloc[2,1] = np.nan In [284]: df
Out[284]:
one two
A 1.0 20.0
B 2.0 10.0
C 3.0 NaN
D 4.0 10.0 In [285]: df.dropna(axis=1) # 删除含有NaN的列
Out[285]:
one
A 1.0
B 2.0
C 3.0
D 4.0 In [286]: df.dropna(axis=0) # 默认
Out[286]:
one two
A 1.0 20.0
B 2.0 10.0
D 4.0 10.0 In [287]:

  

DataFrame排序

# 按值排序
In [17]: df
Out[17]:
two one
C 1 4.0
D 2 NaN
B 3 6.0
A 4 7.0 In [18]: df.mean()
Out[18]:
two 2.500000
one 5.666667
dtype: float64 In [19]: df.mean(axis=1)
Out[19]:
C 2.5
D 2.0
B 4.5
A 5.5
dtype: float64 In [20]: df.sort_values(by='one')
Out[20]:
two one
C 1 4.0
B 3 6.0
A 4 7.0
D 2 NaN In [21]: df.sort_values(by='one',ascending=False)
Out[21]:
two one
A 4 7.0
B 3 6.0
C 1 4.0
D 2 NaN In [22]:
In [23]: df.sort_values(by='B',axis=1)
Out[23]:
two one
C 1 4.0
D 2 NaN
B 3 6.0
A 4 7.0 In [24]: df.sort_values(by='B',axis=1,ascending=False)
Out[24]:
one two
C 4.0 1
D NaN 2
B 6.0 3
A 7.0 4 In [25]: # 按标签排序
In [68]: df
Out[68]:
two one
C 1 4.0
D 2 NaN
B 3 6.0
A 4 7.0 In [69]: df2
Out[69]:
four
C 50
A 60
D 70
B 80 In [70]: df3 = pd.concat([df, df2], axis=1, join_axes=[df.index]) # concat合并df In [71]: df3
Out[71]:
two one four
C 1 4.0 50
D 2 NaN 70
B 3 6.0 80
A 4 7.0 60 In [72]: df3.sort_index()
Out[72]:
two one four
A 4 7.0 60
B 3 6.0 80
C 1 4.0 50
D 2 NaN 70 In [73]: df3.sort_index(ascending=False)
Out[73]:
two one four
D 2 NaN 70
C 1 4.0 50
B 3 6.0 80
A 4 7.0 60 In [74]: df3.sort_index(ascending=False, axis=1)
Out[74]:
two one four
C 1 4.0 50
D 2 NaN 70
B 3 6.0 80
A 4 7.0 60 In [75]: df3.sort_index(axis=1)
Out[75]:
four one two
C 50 4.0 1
D 70 NaN 2
B 80 6.0 3
A 60 7.0 4 In [76]:

pandas批量解析时间对象

In [83]: pd.to_datetime(['2001-01-01','2010-Apr-09','02/04/2019','2019/02/03'])
Out[83]: DatetimeIndex(['2001-01-01', '2010-04-09', '2019-02-04', '2019-02-03'], dtype='datetime64[ns]', freq=None) In [84]:

pandas生成时间对象

In [90]: pd.date_range?                                                                                                                                                                           

In [91]: pd.date_range(start='2018-01-01',end='2018-02-01')
Out[91]:
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
'2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
'2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
'2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
'2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
'2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
'2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
'2018-01-29', '2018-01-30', '2018-01-31', '2018-02-01'],
dtype='datetime64[ns]', freq='D') In [92]: pd.date_range(start='2018-01-01',periods=30)
Out[92]:
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
'2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
'2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
'2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
'2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
'2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
'2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
'2018-01-29', '2018-01-30'],
dtype='datetime64[ns]', freq='D') In [93]: In [96]: pd.date_range(start='2018-01-01',periods=30,freq='H')
Out[96]:
DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
'2018-01-01 02:00:00', '2018-01-01 03:00:00',
'2018-01-01 04:00:00', '2018-01-01 05:00:00',
'2018-01-01 06:00:00', '2018-01-01 07:00:00',
'2018-01-01 08:00:00', '2018-01-01 09:00:00',
'2018-01-01 10:00:00', '2018-01-01 11:00:00',
'2018-01-01 12:00:00', '2018-01-01 13:00:00',
'2018-01-01 14:00:00', '2018-01-01 15:00:00',
'2018-01-01 16:00:00', '2018-01-01 17:00:00',
'2018-01-01 18:00:00', '2018-01-01 19:00:00',
'2018-01-01 20:00:00', '2018-01-01 21:00:00',
'2018-01-01 22:00:00', '2018-01-01 23:00:00',
'2018-01-02 00:00:00', '2018-01-02 01:00:00',
'2018-01-02 02:00:00', '2018-01-02 03:00:00',
'2018-01-02 04:00:00', '2018-01-02 05:00:00'],
dtype='datetime64[ns]', freq='H') In [97]: pd.date_range(start='2018-01-01',periods=30,freq='W')
Out[97]:
DatetimeIndex(['2018-01-07', '2018-01-14', '2018-01-21', '2018-01-28',
'2018-02-04', '2018-02-11', '2018-02-18', '2018-02-25',
'2018-03-04', '2018-03-11', '2018-03-18', '2018-03-25',
'2018-04-01', '2018-04-08', '2018-04-15', '2018-04-22',
'2018-04-29', '2018-05-06', '2018-05-13', '2018-05-20',
'2018-05-27', '2018-06-03', '2018-06-10', '2018-06-17',
'2018-06-24', '2018-07-01', '2018-07-08', '2018-07-15',
'2018-07-22', '2018-07-29'],
dtype='datetime64[ns]', freq='W-SUN') In [98]: pd.date_range(start='2018-01-01',periods=30,freq='W-MON')
Out[98]:
DatetimeIndex(['2018-01-01', '2018-01-08', '2018-01-15', '2018-01-22',
'2018-01-29', '2018-02-05', '2018-02-12', '2018-02-19',
'2018-02-26', '2018-03-05', '2018-03-12', '2018-03-19',
'2018-03-26', '2018-04-02', '2018-04-09', '2018-04-16',
'2018-04-23', '2018-04-30', '2018-05-07', '2018-05-14',
'2018-05-21', '2018-05-28', '2018-06-04', '2018-06-11',
'2018-06-18', '2018-06-25', '2018-07-02', '2018-07-09',
'2018-07-16', '2018-07-23'],
dtype='datetime64[ns]', freq='W-MON') In [99]: In [99]: pd.date_range(start='2018-01-01',periods=30,freq='B')
Out[99]:
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
'2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
'2018-01-11', '2018-01-12', '2018-01-15', '2018-01-16',
'2018-01-17', '2018-01-18', '2018-01-19', '2018-01-22',
'2018-01-23', '2018-01-24', '2018-01-25', '2018-01-26',
'2018-01-29', '2018-01-30', '2018-01-31', '2018-02-01',
'2018-02-02', '2018-02-05', '2018-02-06', '2018-02-07',
'2018-02-08', '2018-02-09'],
dtype='datetime64[ns]', freq='B') In [100]: dt = _ In [101]: type(dt)
Out[101]: pandas.core.indexes.datetimes.DatetimeIndex In [102]: dt[0]
Out[102]: Timestamp('2018-01-01 00:00:00', freq='B')
In [105]: dt[0].to_pydatetime()
Out[105]: datetime.datetime(2018, 1, 1, 0, 0) In [106]:

时间间隔指定非常灵活

In [107]: pd.date_range(start='2018-01-01',periods=30,freq='1h20min')
Out[107]:
DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:20:00',
'2018-01-01 02:40:00', '2018-01-01 04:00:00',
'2018-01-01 05:20:00', '2018-01-01 06:40:00',
'2018-01-01 08:00:00', '2018-01-01 09:20:00',
'2018-01-01 10:40:00', '2018-01-01 12:00:00',
'2018-01-01 13:20:00', '2018-01-01 14:40:00',
'2018-01-01 16:00:00', '2018-01-01 17:20:00',
'2018-01-01 18:40:00', '2018-01-01 20:00:00',
'2018-01-01 21:20:00', '2018-01-01 22:40:00',
'2018-01-02 00:00:00', '2018-01-02 01:20:00',
'2018-01-02 02:40:00', '2018-01-02 04:00:00',
'2018-01-02 05:20:00', '2018-01-02 06:40:00',
'2018-01-02 08:00:00', '2018-01-02 09:20:00',
'2018-01-02 10:40:00', '2018-01-02 12:00:00',
'2018-01-02 13:20:00', '2018-01-02 14:40:00'],
dtype='datetime64[ns]', freq='80T') In [108]:

pandas时间序列切片/截取

In [121]: series = pd.Series(np.arange(1000),index=pd.date_range(start='2017-01-01',periods=1000))                                                                                                

In [122]: series
Out[122]:
2017-01-01 0
2017-01-02 1
2017-01-03 2
2017-01-04 3
2017-01-05 4
2017-01-06 5
2017-01-07 6
2017-01-08 7
2017-01-09 8
2017-01-10 9
2017-01-11 10
2017-01-12 11
2017-01-13 12
2017-01-14 13
2017-01-15 14
2017-01-16 15
2017-01-17 16
2017-01-18 17
2017-01-19 18
2017-01-20 19
2017-01-21 20
2017-01-22 21
2017-01-23 22
2017-01-24 23
2017-01-25 24
2017-01-26 25
2017-01-27 26
2017-01-28 27
2017-01-29 28
2017-01-30 29
...
2019-08-29 970
2019-08-30 971
2019-08-31 972
2019-09-01 973
2019-09-02 974
2019-09-03 975
2019-09-04 976
2019-09-05 977
2019-09-06 978
2019-09-07 979
2019-09-08 980
2019-09-09 981
2019-09-10 982
2019-09-11 983
2019-09-12 984
2019-09-13 985
2019-09-14 986
2019-09-15 987
2019-09-16 988
2019-09-17 989
2019-09-18 990
2019-09-19 991
2019-09-20 992
2019-09-21 993
2019-09-22 994
2019-09-23 995
2019-09-24 996
2019-09-25 997
2019-09-26 998
2019-09-27 999
Freq: D, Length: 1000, dtype: int64 In [123]: In [123]: series.index
Out[123]:
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
'2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
'2017-01-09', '2017-01-10',
...
'2019-09-18', '2019-09-19', '2019-09-20', '2019-09-21',
'2019-09-22', '2019-09-23', '2019-09-24', '2019-09-25',
'2019-09-26', '2019-09-27'],
dtype='datetime64[ns]', length=1000, freq='D') In [124]: series.head()
Out[124]:
2017-01-01 0
2017-01-02 1
2017-01-03 2
2017-01-04 3
2017-01-05 4
Freq: D, dtype: int64 In [125]: series.tail()
Out[125]:
2019-09-23 995
2019-09-24 996
2019-09-25 997
2019-09-26 998
2019-09-27 999
Freq: D, dtype: int64 In [126]: series['2018-03']
Out[126]:
2018-03-01 424
2018-03-02 425
2018-03-03 426
2018-03-04 427
2018-03-05 428
2018-03-06 429
2018-03-07 430
2018-03-08 431
2018-03-09 432
2018-03-10 433
2018-03-11 434
2018-03-12 435
2018-03-13 436
2018-03-14 437
2018-03-15 438
2018-03-16 439
2018-03-17 440
2018-03-18 441
2018-03-19 442
2018-03-20 443
2018-03-21 444
2018-03-22 445
2018-03-23 446
2018-03-24 447
2018-03-25 448
2018-03-26 449
2018-03-27 450
2018-03-28 451
2018-03-29 452
2018-03-30 453
2018-03-31 454
Freq: D, dtype: int64 In [128]: series['2018-12':'2019-01-10']
Out[128]:
2018-12-01 699
2018-12-02 700
2018-12-03 701
2018-12-04 702
2018-12-05 703
2018-12-06 704
2018-12-07 705
2018-12-08 706
2018-12-09 707
2018-12-10 708
2018-12-11 709
2018-12-12 710
2018-12-13 711
2018-12-14 712
2018-12-15 713
2018-12-16 714
2018-12-17 715
2018-12-18 716
2018-12-19 717
2018-12-20 718
2018-12-21 719
2018-12-22 720
2018-12-23 721
2018-12-24 722
2018-12-25 723
2018-12-26 724
2018-12-27 725
2018-12-28 726
2018-12-29 727
2018-12-30 728
2018-12-31 729
2019-01-01 730
2019-01-02 731
2019-01-03 732
2019-01-04 733
2019-01-05 734
2019-01-06 735
2019-01-07 736
2019-01-08 737
2019-01-09 738
2019-01-10 739
Freq: D, dtype: int64 In [129]:

resample重新采样并做计算

In [132]: series.resample('M').sum()
Out[132]:
2017-01-31 465
2017-02-28 1246
2017-03-31 2294
2017-04-30 3135
2017-05-31 4185
2017-06-30 4965
2017-07-31 6076
2017-08-31 7037
2017-09-30 7725
2017-10-31 8928
2017-11-30 9555
2017-12-31 10819
2018-01-31 11780
2018-02-28 11466
2018-03-31 13609
2018-04-30 14085
2018-05-31 15500
2018-06-30 15915
2018-07-31 17391
2018-08-31 18352
2018-09-30 18675
2018-10-31 20243
2018-11-30 20505
2018-12-31 22134
2019-01-31 23095
2019-02-28 21686
2019-03-31 24924
2019-04-30 25035
2019-05-31 26815
2019-06-30 26865
2019-07-31 28706
2019-08-31 29667
2019-09-30 26622
Freq: M, dtype: int64 In [133]: series.resample('M').mean()
Out[133]:
2017-01-31 15.0
2017-02-28 44.5
2017-03-31 74.0
2017-04-30 104.5
2017-05-31 135.0
2017-06-30 165.5
2017-07-31 196.0
2017-08-31 227.0
2017-09-30 257.5
2017-10-31 288.0
2017-11-30 318.5
2017-12-31 349.0
2018-01-31 380.0
2018-02-28 409.5
2018-03-31 439.0
2018-04-30 469.5
2018-05-31 500.0
2018-06-30 530.5
2018-07-31 561.0
2018-08-31 592.0
2018-09-30 622.5
2018-10-31 653.0
2018-11-30 683.5
2018-12-31 714.0
2019-01-31 745.0
2019-02-28 774.5
2019-03-31 804.0
2019-04-30 834.5
2019-05-31 865.0
2019-06-30 895.5
2019-07-31 926.0
2019-08-31 957.0
2019-09-30 986.0
Freq: M, dtype: float64 In [134]:

pandas文件读取

In [14]: pd.read_csv('601318.csv', index_col='date', parse_dates=['date'])
Out[14]:
Unnamed: 0 open close high low volume code
date
2007-03-01 0 21.878 20.473 22.302 20.040 1977633.51 601318
2007-03-02 1 20.565 20.307 20.758 20.075 425048.32 601318
2007-03-05 2 20.119 19.419 20.202 19.047 419196.74 601318
2007-03-06 3 19.253 19.800 20.128 19.143 297727.88 601318
2007-03-07 4 19.817 20.338 20.522 19.651 287463.78 601318
2007-03-08 5 20.171 20.093 20.272 19.988 130983.83 601318
2007-03-09 6 20.084 19.922 20.171 19.559 160887.79 601318
2007-03-12 7 19.821 19.563 19.821 19.471 145353.06 601318
2007-03-13 8 19.607 19.642 19.804 19.524 102319.68 601318
2007-03-14 9 19.384 19.664 19.734 19.161 173306.56 601318
2007-03-15 10 19.918 19.673 20.342 19.603 152521.90 601318
2007-03-16 11 19.686 19.782 20.106 19.428 227547.24 601318
2007-03-20 12 20.478 20.031 20.530 19.909 222026.87 601318
2007-03-21 13 20.040 19.734 20.128 19.646 136728.32 601318
2007-03-22 14 19.887 19.848 20.093 19.791 167509.84 601318
2007-03-23 15 19.839 19.760 19.922 19.563 139810.14 601318
2007-03-26 16 19.778 20.101 20.215 19.769 223266.79 601318
2007-03-27 17 20.036 20.088 20.285 19.966 139338.19 601318
2007-03-28 18 20.084 20.382 20.522 19.944 258263.69 601318
2007-03-29 19 20.482 20.740 21.349 20.338 461986.18 601318
2007-03-30 20 20.548 20.587 20.946 20.443 144617.20 601318
2007-04-02 21 20.587 21.174 21.309 20.587 231445.03 601318
2007-04-03 22 21.187 21.095 21.335 20.959 132712.04 601318
2007-04-04 23 21.099 20.911 21.222 20.806 122454.69 601318
2007-04-05 24 20.915 20.968 21.003 20.653 122865.38 601318
2007-04-06 25 20.863 21.007 21.419 20.784 195208.52 601318
2007-04-09 26 21.042 22.582 22.705 20.872 462770.21 601318
2007-04-10 27 22.316 23.112 23.488 22.316 407823.90 601318
2007-04-11 28 23.138 23.427 24.145 23.016 243446.50 601318
2007-04-12 29 23.619 23.383 25.378 23.169 159270.43 601318
... ... ... ... ... ... ... ...
2017-11-06 2533 64.690 64.010 64.700 62.920 908570.00 601318
2017-11-07 2534 64.300 65.370 66.570 64.300 1173565.00 601318
2017-11-08 2535 65.400 64.610 66.350 64.320 867820.00 601318
2017-11-09 2536 64.500 66.330 66.390 64.400 708669.00 601318
2017-11-10 2537 66.000 69.890 69.950 65.930 1254060.00 601318
2017-11-13 2538 70.100 70.150 70.570 69.480 752207.00 601318
2017-11-14 2539 70.690 70.420 71.290 69.770 801748.00 601318
2017-11-15 2540 69.980 69.200 70.430 68.590 1009459.00 601318
2017-11-16 2541 68.800 73.010 73.110 68.750 1163764.00 601318
2017-11-17 2542 72.700 75.270 75.320 71.800 1580393.00 601318
2017-11-20 2543 74.780 75.710 76.490 74.070 1141281.00 601318
2017-11-21 2544 75.130 78.440 79.680 75.130 1445569.00 601318
2017-11-22 2545 79.500 77.450 79.960 76.580 1293487.00 601318
2017-11-23 2546 76.600 74.320 78.440 73.700 1576210.00 601318
2017-11-24 2547 74.150 74.620 75.460 72.710 1317843.00 601318
2017-11-27 2548 74.700 73.550 74.900 71.550 1637232.00 601318
2017-11-28 2549 72.700 72.730 73.540 71.880 786469.00 601318
2017-11-29 2550 73.540 72.420 74.190 71.260 875004.00 601318
2017-11-30 2551 71.370 69.920 71.670 69.550 1163733.00 601318
2017-12-01 2552 69.650 68.100 70.180 67.910 1393046.00 601318
2017-12-04 2553 67.600 69.390 70.350 67.370 1159283.00 601318
2017-12-05 2554 68.900 71.200 71.500 68.780 1692539.00 601318
2017-12-06 2555 70.900 69.400 71.100 68.000 1245607.00 601318
2017-12-07 2556 69.350 68.640 69.810 67.600 859703.00 601318
2017-12-08 2557 68.940 71.490 71.860 68.660 1095632.00 601318
2017-12-11 2558 71.200 73.250 73.310 70.820 1139927.00 601318
2017-12-12 2559 73.250 71.210 73.560 71.170 777900.00 601318
2017-12-13 2560 71.210 72.120 72.620 70.200 865117.00 601318
2017-12-14 2561 72.120 71.010 72.160 70.600 676186.00 601318
2017-12-15 2562 70.690 70.380 71.440 70.050 735547.00 601318 [2563 rows x 7 columns] In [15]: df = _ In [16]: df.index
Out[16]:
DatetimeIndex(['2007-03-01', '2007-03-02', '2007-03-05', '2007-03-06',
'2007-03-07', '2007-03-08', '2007-03-09', '2007-03-12',
'2007-03-13', '2007-03-14',
...
'2017-12-04', '2017-12-05', '2017-12-06', '2017-12-07',
'2017-12-08', '2017-12-11', '2017-12-12', '2017-12-13',
'2017-12-14', '2017-12-15'],
dtype='datetime64[ns]', name='date', length=2563, freq=None) In [17]:

如果csv文件没有首行(列名)

pd.read_csv('601318.csv', header=None, names=['A','B','C','D','E','F','G','H'])

如果csv文件里有一些缺失的值,比如有的是NaN,有的是None,那么如何让pandas正确解释?

na_values参数指定列表,即列表里的字符串都会被解释成numpy.nan

pd.read_csv('601318.csv', na_values=['None','none','nan','NaN'])
In [8]: pd.read_*?
pd.read_clipboard
pd.read_csv
pd.read_excel
pd.read_feather
pd.read_fwf
pd.read_gbq
pd.read_hdf
pd.read_html
pd.read_json
pd.read_msgpack
pd.read_parquet
pd.read_pickle
pd.read_sas
pd.read_sql
pd.read_sql_query
pd.read_sql_table
pd.read_stata
pd.read_table In [9]:

  

groupby按照索引聚合数据

有原始数据如下,需要按照索引聚合,即 Aggragate for duplicate Indices

In [64]: df
Out[64]:
concurrence p2p_dl p2p_ul cdn_dl isp_local_p2p_ul isp_remote_p2p_ul isp_other_p2p_ul isp_unknown_p2p_ul
2019-07-23 00:00:00 2.0 0.0 952181.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 3.0 0.0 288200.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 4.0 0.0 11921229.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 5.0 0.0 8938038.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 4.0 0.0 1967635.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 6.0 0.0 19436976.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 41.0 0.0 182659387.0 358400.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 44.0 0.0 13396980.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 13.0 0.0 4225576.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 18.0 0.0 28843115.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 1.0 0.0 15952.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 23.0 0.0 64174376.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 73.0 0.0 448441433.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 2.0 0.0 1492338.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 20.0 0.0 26001517.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 67.0 0.0 189485455.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 157.0 0.0 181990022.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 4.0 0.0 4209738.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 2.0 0.0 1887856.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 25.0 0.0 61364395.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 14.0 0.0 14395728.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 47.0 0.0 62243987.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 4.0 0.0 5284136.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:00:00 3.0 0.0 7591219.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ...
2019-07-23 00:10:00 3.0 0.0 29797700.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 3.0 0.0 12962682.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 5.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 3.0 0.0 1107695.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 13.0 0.0 102279733.0 1034525.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 19.0 0.0 41296504.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 2.0 0.0 8613982.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 3.0 0.0 8017425.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 1.0 0.0 1665251.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 57.0 0.0 158300081.0 28603381.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 22.0 0.0 106194450.0 216074.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 132.0 0.0 238920037.0 6613339.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 65.0 0.0 570891024.0 1917279.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 19.0 0.0 265779751.0 1758985.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 38.0 0.0 56797177.0 1384116.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 42.0 0.0 985598578.0 3860560.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 207.0 0.0 824804811.0 20935193.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 24.0 0.0 115753257.0 1573962.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 2.0 0.0 1635388.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 10.0 0.0 1007358.0 219390.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 93.0 0.0 401098219.0 2656469.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 36.0 0.0 126658914.0 2714817.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 14.0 0.0 52857937.0 811010.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 52.0 0.0 252881233.0 2057686.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 11.0 0.0 101013831.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 26.0 0.0 48285406.0 904998.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 1.0 0.0 1582081.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 2.0 0.0 1380872.0 0.0 0.0 0.0 0.0 0.0 [201 rows x 8 columns] In [65]:

Aggragate for duplicate Indices

In [58]: df2 = df.groupby(df.index).sum()                                                                                                                                                                                                 

In [59]: df2
Out[59]:
concurrence p2p_dl p2p_ul cdn_dl isp_local_p2p_ul isp_remote_p2p_ul isp_other_p2p_ul isp_unknown_p2p_ul
2019-07-23 00:00:00 1624.0 0.0 6.363896e+09 358400.0 0.0 0.0 0.0 0.0
2019-07-23 00:05:00 1648.0 0.0 5.045862e+09 48245645.0 0.0 0.0 0.0 0.0
2019-07-23 00:10:00 1633.0 502475.0 6.116968e+09 116286357.0 0.0 0.0 0.0 0.0 In [60]: df2.to_dict()
Out[60]:
{'concurrence': {'2019-07-23 00:00:00': 1624.0,
'2019-07-23 00:05:00': 1648.0,
'2019-07-23 00:10:00': 1633.0},
'p2p_dl': {'2019-07-23 00:00:00': 0.0,
'2019-07-23 00:05:00': 0.0,
'2019-07-23 00:10:00': 502475.0},
'p2p_ul': {'2019-07-23 00:00:00': 6363895723.0,
'2019-07-23 00:05:00': 5045861525.0,
'2019-07-23 00:10:00': 6116968304.0},
'cdn_dl': {'2019-07-23 00:00:00': 358400.0,
'2019-07-23 00:05:00': 48245645.0,
'2019-07-23 00:10:00': 116286357.0},
'isp_local_p2p_ul': {'2019-07-23 00:00:00': 0.0,
'2019-07-23 00:05:00': 0.0,
'2019-07-23 00:10:00': 0.0},
'isp_remote_p2p_ul': {'2019-07-23 00:00:00': 0.0,
'2019-07-23 00:05:00': 0.0,
'2019-07-23 00:10:00': 0.0},
'isp_other_p2p_ul': {'2019-07-23 00:00:00': 0.0,
'2019-07-23 00:05:00': 0.0,
'2019-07-23 00:10:00': 0.0},
'isp_unknown_p2p_ul': {'2019-07-23 00:00:00': 0.0,
'2019-07-23 00:05:00': 0.0,
'2019-07-23 00:10:00': 0.0}} In [61]:
In [76]: df2.to_dict(orient="list")
Out[76]:
{'concurrence': [1624.0, 1648.0, 1633.0],
'p2p_dl': [0.0, 0.0, 502475.0],
'p2p_ul': [6363895723.0, 5045861525.0, 6116968304.0],
'cdn_dl': [358400.0, 48245645.0, 116286357.0],
'isp_local_p2p_ul': [0.0, 0.0, 0.0],
'isp_remote_p2p_ul': [0.0, 0.0, 0.0],
'isp_other_p2p_ul': [0.0, 0.0, 0.0],
'isp_unknown_p2p_ul': [0.0, 0.0, 0.0]} In [77]: df2.index
Out[77]: Index(['2019-07-23 00:00:00', '2019-07-23 00:05:00', '2019-07-23 00:10:00'], dtype='object') In [78]: df2.index.tolist()
Out[78]: ['2019-07-23 00:00:00', '2019-07-23 00:05:00', '2019-07-23 00:10:00'] In [79]:

  

numpy&pandas补充常用示例的更多相关文章

  1. 常用统计分析python包开源学习代码 numpy pandas matplotlib

    常用统计分析python包开源学习代码 numpy pandas matplotlib 待办 https://github.com/zmzhouXJTU/Python-Data-Analysis

  2. numpy 和 pandas 中常用的一些函数及其参数

    numpy中有一些常用的用来产生随机数的函数,randn()和rand()就属于这其中. numpy.random.randn(d0, d1, …, dn)是从标准正态分布中返回一个或多个样本值.  ...

  3. numpy&pandas基础

    numpy基础 import numpy as np 定义array In [156]: np.ones(3) Out[156]: array([1., 1., 1.]) In [157]: np.o ...

  4. (数据科学学习手札134)pyjanitor:为pandas补充更多功能

    本文示例代码及文件已上传至我的Github仓库https://github.com/CNFeffery/DataScienceStudyNotes 1 简介 pandas发展了如此多年,所包含的功能已 ...

  5. python 数据分析工具之 numpy pandas matplotlib

    作为一个网络技术人员,机器学习是一种很有必要学习的技术,在这个数据爆炸的时代更是如此. python做数据分析,最常用以下几个库 numpy pandas matplotlib 一.Numpy库 为了 ...

  6. pandas学习(常用数学统计方法总结、读取或保存数据、缺省值和异常值处理)

    pandas学习(常用数学统计方法总结.读取或保存数据.缺省值和异常值处理) 目录 常用数学统计方法总结 读取或保存数据 缺省值和异常值处理 常用数学统计方法总结 count 计算非NA值的数量 de ...

  7. Numpy Pandas

    数据分析 : 是把隐藏在一些看似杂乱无章的数据背后的信息提炼出来,总结出所研究对象的内在规律. 数据分析三剑客 -  Numpy Pandas Matplotlib # Numpy 基于一维或多维的数 ...

  8. python重要的第三方库pandas模块常用函数解析之DataFrame

    pandas模块常用函数解析之DataFrame 关注公众号"轻松学编程"了解更多. 以下命令都是在浏览器中输入. cmd命令窗口输入:jupyter notebook 打开浏览器 ...

  9. pandas模块常用函数解析之Series(详解)

    pandas模块常用函数解析之Series 关注公众号"轻松学编程"了解更多. 以下命令都是在浏览器中输入. cmd命令窗口输入:jupyter notebook 打开浏览器输入网 ...

随机推荐

  1. Java关于日期的计算持续汇总~

    /** * 00 * 描述:传入Date date.转为 String yyyyMMdd. * [时间 2019-04-18 15:41:12 作者 陶攀峰] */ public static Str ...

  2. 为什么作为下游的WSUS更新服务器总有一直处于下载状态的文件

    /* Style Definitions */ table.MsoNormalTable {mso-style-name:普通表格; mso-tstyle-rowband-size:0; mso-ts ...

  3. Scrapy框架-Item Pipeline

    目录 1. Item Pipeline 3. 完善之前的案例: 3.1. item写入JSON文件 3.2. 启用一个Item Pipeline组件 3.3. 重新启动爬虫 1. Item Pipel ...

  4. linux驱动简单介绍

     linux驱动简单介绍 驱动基本介绍 驱动.顾名思义就是“驱使硬件设备行动”.设备驱动与底层硬件之间打交道,按照硬件设备的具体操作方式来读写设备寄存器,最终完成一系列操作. 设备 驱动充当了应用程序 ...

  5. 360 随身 WiFi3 在 Ubuntu 14.04 下的使用

    由于 360 随身 WiFi3 采用 Mediaek 代号 0e8d:760c 的芯片,目前没有官方或第三方 Linux 驱动,所以造成 Linux 用户的诸多困扰. 本文给出一个迂回的解决方案:在 ...

  6. 关于 Angular 跨域请求携带 Cookie 的问题

    在前端开发调试接口的时候都会遇到跨域请求的问题.传统的方式是使用 Nginx 反向代理解决跨域.比如所有接口都在 a.com 的域下,通过 Nginx 将所有请求代理到 a.com 的域下即可. 使用 ...

  7. 11 Django RESTful framework 实现缓存

    01-安装 pip install drf-extensions 02-导入 from rest_framework_extensions.cache.mixins import CacheRespo ...

  8. SSZipArchive的使用详解和遇到的问题

    https://blog.csdn.net/zhengang007/article/details/51019479 2016年03月30日 版权声明:本文为博主原创文章,转载请注明作者和原文链接. ...

  9. 2019-04-05 Spring Boot学习记录

    1. 使用步骤 ① 在pom.xml 增加父级依赖(spring-boot-starter-parent) ② 增加项目起步依赖,如spring-boot-starter-web ③ 配置JDK版本插 ...

  10. react 事件绑定 es5/es6

    // vscode shift + ctrl + v 预览 es 5 写法 无参函数的绑定 first methods 定义函数: handleClick(e) { // e - 事件对象 e.pre ...