001_python实现数据分析

一、

# coding:utf8

# !/usr/bin/python

# import numpy as np

import pandas as pd

import np

def example2():

    '''

    Describing a numeric ``Series``.

    :return:

    '''

    s = pd.Series([1, 2, 3])

    print s.describe()

    '''

    count    3.0

    mean     2.0

    std      1.0

    min      1.0

    25%      1.5

    50%      2.0

    75%      2.5

    max      3.0

    dtype: float64

    '''

def example3():

    '''

    Describing a categorical ``Series``.

    :return:

    '''

    s = pd.Series(['a', 'a', 'b', 'c'])

    print s.describe()

    '''

    count     4

    unique    3

    top       a

    freq      2

    dtype: object

    '''

def example4():

    '''

    Describing a timestamp ``Series``.

    :return:

    '''

    s = pd.Series([

        np.datetime64("2000-01-01"),

        np.datetime64("2010-01-01"),

        np.datetime64("2010-01-01")

        ])

    print s.describe()

    '''

    count                       3

    unique                      2

    top       2010-01-01 00:00:00

    freq                        2

    first     2000-01-01 00:00:00

    last      2010-01-01 00:00:00

    dtype: object

    '''

def example5():

    '''

    Describing a ``DataFrame``. By default only numeric fields are returned.

    :return:

    '''

    df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),

                       'numeric': [1, 2, 3],

                        'object': ['a', 'b', 'c']})

    print df.describe()

    '''

    #Describing all columns of a ``DataFrame`` regardless of data type.

    print df.describe(include='all')

    #Describing a column from a ``DataFrame`` by accessing it as an attribute.

    print df.numeric.describe()

    #Including only numeric columns in a ``DataFrame`` description.

    print df.describe(include=[np.number])

    #Including only string columns in a ``DataFrame`` description.

    print df.describe(include=[np.object])

    #Including only categorical columns from a ``DataFrame`` description.

    print df.describe(include=['category'])

    #Excluding numeric columns from a ``DataFrame`` description.

    print df.describe(exclude=[np.number])

    #Excluding object columns from a ``DataFrame`` description.

    print df.describe(exclude=[np.object])

    '''

def example1():

    dic1={'000':{'a':1,'b':2,'c':3},'001':{'d':4,'e':5,'f':6}}

    df2=pd.DataFrame(dic1)

    # print df2.describe()

    '''

           000  001

    count  3.0  3.0

    mean   2.0  5.0

    std    1.0  1.0

    min    1.0  4.0

    25%    1.5  4.5

    50%    2.0  5.0

    75%    2.5  5.5

    max    3.0  6.0

    '''

    print "返回非NAN数据项数量=>count()\n{count}\n".format(count = df2.describe().count())

    print "返回中位数,等价第50位百分位数的值=>median()\n{median}\n".format(median = df2.describe().median())

    print "返回数据的众值=>mode()\n{mode}\n".format(mode = df2.describe().mode())

    print "返回数据的标准差(描述离散度)=>std()\n{std}\n".format(std = df2.describe().std())

    print "返回方差=>var()\n{var}\n".format(var = df2.describe().var())

    print "偏态系数(skewness,表示数据分布的对称程度)=>skew()\n{skew}\n".format(skew = df2.describe().skew())

def main():

    example1()

if __name__ == '__main__':

    main()

输出=>

返回非NAN数据项数量=>count()

000    8

001    8

dtype: int64

返回中位数,等价第50位百分位数的值=>median()

000    2.00

001    4.75

dtype: float64

返回数据的众值=>mode()

   000  001

0  1.0  5.0

1  2.0  NaN

2  3.0  NaN

返回数据的标准差(描述离散度)=>std()

000    0.801784

001    1.603567

dtype: float64

返回方差=>var()

000    0.642857

001    2.571429

dtype: float64

偏态系数(skewness,表示数据分布的对称程度)=>skew()

000    0.000000

001   -1.299187

dtype: float64

001_python实现数据分析的更多相关文章

利用Python进行数据分析基础系列随笔汇总
一共 15 篇随笔,主要是为了记录数据分析过程中的一些小 demo,分享给其他需要的网友,更为了方便以后自己查看,15 篇随笔,每篇内容基本都是以一句说明加一段代码的方式, 保持简单小巧,看起来也清晰 ...
利用Python进行数据分析(10) pandas基础: 处理缺失数据
数据不完整在数据分析的过程中很常见. pandas使用浮点值NaN表示浮点和非浮点数组里的缺失数据. pandas使用isnull()和notnull()函数来判断缺失情况. 对于缺失数据一般处理 ...
利用Python进行数据分析(12) pandas基础: 数据合并
pandas 提供了三种主要方法可以对数据进行合并: pandas.merge()方法:数据库风格的合并: pandas.concat()方法:轴向连接,即沿着一条轴将多个对象堆叠到一起: 实例方法c ...
利用Python进行数据分析(5) NumPy基础: ndarray索引和切片
概念理解索引即通过一个无符号整数值获取数组里的值. 切片即对数组里某个片段的描述. 一维数组一维数组的索引一维数组的索引和Python列表的功能类似: 一维数组的切片一维数组的切片语法格式为a ...
利用Python进行数据分析(9) pandas基础: 汇总统计和计算
pandas 对象拥有一些常用的数学和统计方法. 例如,sum() 方法,进行列小计: sum() 方法传入 axis=1 指定为横向汇总,即行小计: idxmax() 获取最大值对应的索 ...
利用Python进行数据分析(8) pandas基础: Series和DataFrame的基本操作
一.reindex() 方法:重新索引针对 Series 重新索引指的是根据index参数重新进行排序. 如果传入的索引值在数据里不存在,则不会报错,而是添加缺失值的新行. 不想用缺失值,可以用 ...
利用Python进行数据分析(7) pandas基础: Series和DataFrame的简单介绍
一.pandas 是什么 pandas 是基于 NumPy 的一个 Python 数据分析包,主要目的是为了数据分析.它提供了大量高级的数据结构和对数据处理的方法. pandas 有两个主要的数据结构 ...
利用Python进行数据分析(4) NumPy基础: ndarray简单介绍
一.NumPy 是什么 NumPy 是 Python 科学计算的基础包,它专为进行严格的数字处理而产生.在之前的随笔里已有更加详细的介绍,这里不再赘述. 利用 Python 进行数据分析(一)简单介绍 ...
利用Python进行数据分析(3) 使用IPython提高开发效率
一.IPython 简介 IPython 是一个交互式的 Python 解释器,而且它更加高效. 它和大多传统工作模式(编辑 -> 编译 -> 运行)不同的是, 它采用的工作模式是:执 ...

随机推荐

vue2.0 日历日程表，可进行二次开发.
由于工作业务需求,要写一个日程表,日程表写之前要先生成日历,废话不多说,直接上代码: <!DOCTYPE html> <html lang="zh-CN"&g ...
WebSocket整合SSM(Spring,Struts2,Maven)
一.WebSocket与HTTP长轮询 WebSocket 属于HTML5 规范的一部分,提供的一种在单个 TCP 连接上进行全双工通讯的协议.允许服务端主动向客户端推送数据.在 WebSocket ...
select * 和 select 所有字段的区别
阅读本文大概需要 1 分钟. 之前发过的文章中,关于 select * 和 select 所有字段的知识,有描述不恰当,这次重新纠正下,加深下理解. MySQL 5.1.37 表记录数 41,547, ...
Struts第一个案例搭建
1.引入依赖 <dependency> <groupId>javaee</groupId> <artifactId>javaee-api</art ...
每日分享！～ JavaScript（js数组如何在指定的位置插入一个元素）
这个想法是在一个面试题中看到的: 题目是这样的: // 一个数组,在指定的index 位置插入一个元素,返回一个新的数组,不改变原来的数组 <script> function inse ...
The connection to the server localhost:8080 was refused - did you specify the right host or port?
The connection to the server localhost:8080 was refused - did you specify the right host or port? 解决 ...
Fescar(Seata)-Springcloud流程分析-1阶段
Fescar是阿里18年开源的分布式事务的框架.Fescar的开源对分布式事务框架领域影响很大.作为开源大户,Fescar来自阿里的GTS,经历了好几次双十一的考验,一经开源便颇受关注.今天就来看了F ...
挖一挖@Bean这个东西
有Bean得治任何一个正常程序的访问都会在内存中创建非常多的对象,对象与对象之间还会出现很多依赖关系(一个处理业务逻辑的类中几乎都会使用到别的类的实例),一般的做法都是使用new关键字来创建对象,对 ...
Hadoop+Hbase分布式集群架构“完全篇”
本文收录在Linux运维企业架构实战系列前言:本篇博客是博主踩过无数坑,反复查阅资料,一步步搭建,操作完成后整理的个人心得,分享给大家~~~ 1.认识Hadoop和Hbase 1.1 hadoop简 ...
.NET ClrProfiler ILRewrite 商业级APM原理
Demo:https://github.com/caozhiyuan/ClrProfiler.Trace 背景为了实现自动.无依赖地跟踪分析应用程序性能(达到商业级APM效果),作者希望能动态修改应 ...

001_python实现数据分析

001_python实现数据分析的更多相关文章

随机推荐

热门专题