# import pandas
import pandas as pd # creating a DataFrame
pd.DataFrame({'Yes': [50, 31], 'No': [101, 2]})

# another example of creating a dataframe
pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 'Sue': ['Pretty good.', 'Bland']})

pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'],
'Sue': ['Pretty good.', 'Bland.']},
index = ['Product A', 'Product B'])

# creating a pandas series
pd.Series([1, 2, 3, 4, 5])

# we can think of a Series as a column of a DataFrame.
# we can assign index values to Series in same way as pandas DataFrame
pd.Series([10, 20, 30], index=['2015 sales', '2016 sales', '2017 sales'], name='Product A')

# reading a csv file and storing it in a variable
wine_reviews = pd.read_csv("F:\\kaggleDataSet\\wine-reviews\\winemag-data-130k-v2.csv")
# we can use the 'shape' attribute to check size of dataset
wine_reviews.shape

# To show first five rows of data, use 'head()' method
wine_reviews.head()

wine_reviews = pd.read_csv("F:\\kaggleDataSet\\wine-reviews\\winemag-data-130k-v2.csv", index_col=0)
wine_reviews.head()

wine_reviews.head().to_csv("F:\\wine_reviews.csv")

import pandas as pd
reviews = pd.read_csv("F:\\kaggleDataSet\\wine-reviews\\winemag-data-130k-v2.csv", index_col=0)
pd.set_option("display.max_rows", 5)
reviews

# access 'country' property (or column) of 'reviews'
reviews.country

# Another way to do above operation
# when a column name contains space, we have to use this method
reviews['country']

# To access first row of country column
reviews['country'][0]

# returns first row
reviews.iloc[0]

# returns first column (country) (all rows due to ':')
reviews.iloc[:, 0]

# retruns first 3 rows of first column
reviews.iloc[:3, 0]

# we can pass a list of indices of rows/columns to select
reviews.iloc[[0, 1, 2, 3], 0]

# We can also pass negative numbers as we do in Python
reviews.iloc[-5:]

# To select first entry in country column
reviews.loc[0, 'country']

# select columns by name using 'loc'
reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']]

# 'set_index' to the 'title' field
reviews.set_index('title')

# 1. Find out whether wine is produced in Italy
reviews.country == 'Italy'

# 2. Now select all wines produced in Italy
reviews.loc[reviews.country == 'Italy'] #reviews[reviews.country == 'Italy']

# Add one more condition for points to find better than average wines produced in Italy
reviews.loc[(reviews.country == 'Italy') & (reviews.points >= 90)] # use | for 'OR' condition

reviews.loc[reviews.country.isin(['Italy', 'France'])]

reviews.loc[reviews.price.notnull()]

reviews['critic'] = 'everyone'
reviews.critic

# using iterable for assigning
reviews['index_backwards'] = range(len(reviews), 0, -1)
reviews['index_backwards']

吴裕雄--天生自然 python数据分析:葡萄酒分析的更多相关文章

  1. 吴裕雄--天生自然 PYTHON数据分析:所有美国股票和etf的历史日价格和成交量分析

    # This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...

  2. 吴裕雄--天生自然 python数据分析:健康指标聚集分析(健康分析)

    # This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...

  3. 吴裕雄--天生自然 PYTHON数据分析:基于Keras的CNN分析太空深处寻找系外行星数据

    #We import libraries for linear algebra, graphs, and evaluation of results import numpy as np import ...

  4. 吴裕雄--天生自然 PYTHON数据分析:钦奈水资源管理分析

    df = pd.read_csv("F:\\kaggleDataSet\\chennai-water\\chennai_reservoir_levels.csv") df[&quo ...

  5. 吴裕雄--天生自然 PYTHON数据分析:糖尿病视网膜病变数据分析(完整版)

    # This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...

  6. 吴裕雄--天生自然 PYTHON数据分析:人类发展报告——HDI, GDI,健康,全球人口数据数据分析

    import pandas as pd # Data analysis import numpy as np #Data analysis import seaborn as sns # Data v ...

  7. 吴裕雄--天生自然 python数据分析:医疗费数据分析

    import numpy as np import pandas as pd import os import matplotlib.pyplot as pl import seaborn as sn ...

  8. 吴裕雄--天生自然 python数据分析:基于Keras使用CNN神经网络处理手写数据集

    import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mp ...

  9. 吴裕雄--天生自然 PYTHON数据分析:医疗数据分析

    import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.rea ...

随机推荐

  1. ios 监控键盘状态

    增加键盘显示和隐藏事件监听 NotificationCenter.default.addObserver(self, selector: #selector(keyboardWillShow(noti ...

  2. Maven依赖三板大斧

    一:问题出现场景 记得有一次,面试时候面试官问了个问题,来哥们,“你们项目是maven搭建哈,你的项目里如果出现架包冲突了,你们怎么解决的?”. 我:......,装作很淡定,我们是通过报错,定位哪个 ...

  3. 解决 nginx 启动错误 nginx: [emerg] host not found in upstream

    解决办法来自于:https://blog.csdn.net/Hreticent/article/details/86074502 感谢这个朋友简单而清晰的解决办法. 在配置nginx支持PHP的时候, ...

  4. python 知识点补充

    python 知识点补充 简明 python 教程 r 或 R 来指定一个 原始(Raw) 字符串 Python 是强(Strongly)面向对象的,因为所有的一切都是对象, 包括数字.字符串与 函数 ...

  5. 学习4412开发板gdb和gdbserver的调试

    因为有很多的小伙伴是从单片机转过来的,用惯了单片机上的JLINK调试程序,换到Linux上非常的不习惯.确实,如果能设置断点,单步调试,查看变量,那确实是太爽了,那么在我们的Linux可以做到吗,答案 ...

  6. [JS]實作LinkedList鏈結串列

    由於自身資料結構的基礎薄弱,買了一本JavaScript資料結構與演算法實作的書來看,重新把LinkedList鏈結串列學習了一遍,並用JS實作出來. LinkedList鏈結串列 要存放多個元素,最 ...

  7. 吴裕雄--天生自然python学习笔记:python 用firebase实现英汉词典进阶版

    用 post 方法创建的数据会自动产生一个 id (Key ),但有时也常常为了取得这个 id 而让程序难以处理 . 以英汉词典标准版来说,它的数据结构如下: 如果将每条数据都改为{eword:cwo ...

  8. 华硕X450j清灰教程

    近期本人电脑咳嗽,电脑发烧,风扇呼呼的吹(电脑风扇好像出了问题),去了电脑上商城买了一支止咳糖浆(HEATSINK COMPOUNDS)硅脂, 废话不多说了 1 ,当然要把电脑关了,卸下三排螺丝,然后 ...

  9. day15-接口类

    # 一.接口类的作用是规范它的子类,跟后面学习的接口没关系. from abc import abstractmethod,ABCMeta class Pay(metaclass=ABCMeta): ...

  10. git 学习系列(一)

    目录 git 简介 git的升级 建立仓库 克隆仓库 查看主机名 查看仓库初始状态 将文件提交到暂存区 查看修改详情 提交修改 查看修改记录 查看个人配置信息(在 .gitconfig 文件中) 查看 ...