用Plotily处理数据的基本操作

import pandas as pd
# 导入数据.scv
df = pd.read_csv(" .csv")
# 查看前五行数据
df.head()
# 查看一下数据描述
df.descirbe()
# 查看一下数据的形状
df.shape
# 查看一下数据集中都包含哪些列
df.columns 

# 对数据进行可视化
import matplotlib.pyplot as plt
import seaborn as sns
# 使用Jupyter Notebook
# import warnings
# warnings.filterwarnings("ignore")
# %matplotlib inline

# 创建自定义图像
figure(num=None, figsize=None, dpi=None, facecolor=None, edgecolor=None, frameon=True)
# 标题
plt.title(" ")
# 画出数据分布图
sns.displot(df[" "]) 

# !pip install plotly # 安装Plotily库
# 导入绘图工具库
import plotly.offline as offline
import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
offline.init_notebook_mode()

# 查看表格某列中有多少个不同值的快捷方法，并计算每个不同值有在该列中有多少重复值
temp = df[" "].value_counts()
# 画出柱状图，查看不同值所占比重
trace = [go.Bar(x = temp.index, y = (temp / temp.sum()) * 100)]
# 设置图的字体颜色等
layout = go.Layout(
    title=" ",
    xaxis=dict(title=' ',
               tickfont=dict(size= , color='rgb(  ,  ,  )')),
    yaxis=dict(title=' ',
               titlefont=dict(size= , color='rgb(  ,  ,  )'),
               tickfont=dict(size= , color='rgb(  ,  ,  )'))
)
# 显示图形
fig = go.Figure(data=trace, layout=layout)
iplot(fig, filename=' ')

# 画出饼状图
trace = [go.Pie(labels=temp.index, values=temp.values)]
# 设置图题
layout = go.Layout(
    title=' ',
)
# 显示图形
fig = go.Figure(data=trace, layout=layout)
iplot(fig)

# 画出饼状图，圆环型
trace = [go.Pie(labels=temp.index, values=temp.values, hole=0.6)]

temp1 = df["FLAG_OWN_CAR"].value_counts()
temp2 = df["FLAG_OWN_REALTY"].value_counts()

# 画出两个饼状图
trace = [go.Pie(labels=temp1.index, values=temp1.values, domain={"x": [0, .48]}, hole=0.6),
         go.Pie(labels=temp2.index, values=temp2.values, domain={"x": [0.5, 1]}, hole=0.6)]
# 设置图中的字体，图题等
layout = go.Layout(
    title=' ',
    annotations=[{"font": {
        "size":  },
        "showarrow":  ,
        "text": " ",
        "x": 0. , # 坐标
        "y": 0. },
        {"font": {
         "size": },
         "showarrow": ,
         "text": " ",
         "x": 0. ,
         "y": 0. }])
# 显示图形
fig = go.Figure(data=trace, layout=layout)
iplot(fig)

# 计数方法
temp_y0 = []
temp_y1 = []
for val in temp.index:
    temp_y1.append(np.sum(df["TARGET"][df["TYPE"] == val] == 1))
    temp_y0.append(np.sum(df["TARGET"][df["TYPE"] == val] == 0))
temp_y1 = np.array(temp_y1)
temp_y0 = np.array(temp_y0)

# 删除掉存在缺失值的特征列
df_drop = df.dropna(axis=1)
df_drop.head()

# 将其编码成为数值形式
from sklearn import preprocessing
# 取出非数值的列
categorical_feats = [
    f for f in df_drop.columns if df_drop[f].dtype == 'object'
]
# 对非数值的列进行编码
for col in categorical_feats:
    lb = preprocessing.LabelEncoder()
    lb.fit(list(df_drop[col].values.astype('str')))
    df_drop[col] = lb.transform(list(df_drop[col].values.astype('str')))
df_drop.head()

# 划分数据
# 删除ID
df_drop1 = df_drop.drop("ID", axis=1)
# 提取训练特征数据和目标值
data_X = df_drop1.drop(" ", axis=1)
data_y = df_drop1[' ']
#划分数据集为训练数据集和测试数据集
from sklearn import model_selection
train_x, test_x, train_y, test_y = model_selection.train_test_split(data_X.values, data_y.values, test_size=0.8, random_state=0)

# 构建预测模型
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()  # 构建模型
model.fit(train_x, train_y)  # 训练模型
from sklearn import metrics
y_pred = model.predict(test_x)  # 预测测试集
metrics.accuracy_score(y_pred, test_y)  # 评价预测结果
print(metrics.classification_report(y_pred, test_y))

features = data_X.columns.values  # 取出数据集中的列名，即特征名
# 得到特征与其重要性
x, y = (list(x) for x in zip(*sorted(zip(model.feature_importances_, features), reverse=False)))
# 画出柱状图
trace2 = go.Bar(x=x, y=y, marker=dict(color=x, colorscale='Viridis', reversescale=True), name=' ', orientation='h',)
# 设置图题、字体等
layout = dict(title=' ', width=900, height=2000,
              yaxis=dict(showgrid=False, showline=False, showticklabels=True,), margin=dict(l=300,))
# 显示图形
fig1 = go.Figure(data=[trace2])
fig1['layout'].update(layout)
iplot(fig1, filename='plots')

from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

# 构建 7 种算法
models = [LogisticRegression(solver='lbfgs'),       # 逻辑回归
          RandomForestClassifier(n_estimators=100),  # 随机森林
          DecisionTreeClassifier(),                 # 决策树
          MLPClassifier(max_iter=100),              # 多层感知机
          AdaBoostClassifier(),                     # 自适应梯度提升
          BaggingClassifier(),                      # 装袋算法
          GradientBoostingClassifier()]             # 梯度提升算法

model_name = ['LogisticRegression',
              'RandomForestClassifier',
              "DecisionTreeClassifier",
              'MLPClassifier',
              'AdaBoostClassifier',
              'BaggingClassifier',
              'GradientBoostingClassifier']

acc = []        # 存放各算法的准确率
f1 = []         # 存放各算法的 f1 值
recall = []     # 存放各算法的召回率

for model in models:  # 训练每个算法
    model.fit(train_x, train_y)
    acc.append(model.score(test_x, test_y))
    y_pred = model.predict(test_x)
    f1.append(metrics.f1_score(y_pred, test_y))
    recall.append(metrics.recall_score(y_pred, test_y))

# 打印每种算法的评估结果
pd.DataFrame({"name": model_name, "acc": acc, "f1": f1, "recall": recall})

用Plotily处理数据的基本操作的更多相关文章

MySQL：数据表基本操作
数据表基本操作注意点: 1.数据表中已经有数据时,轻易修改数据类型,有可能因为不同的数据类型的数据在机器中存储的方式及长度并不相同,修改数据类型可能会影响到数据表中已有的数据类型. 2. 数据表 ...
MySQL之终端（Terminal）管理数据库、数据表、数据的基本操作(转)
MySQL有很多的可视化管理工具,比如“mysql-workbench”和“sequel-pro-”. 现在我写MySQL的终端命令操作的文章,是想强化一下自己对于MySQL的理解,总会比使用图形化的 ...
MySQL系列：数据表基本操作（2）
1. 指定数据库 mysql> use portal; 2. 数据库表基本操作 2.1 查看数据表 mysql> show tables; +------------------+ | T ...
pandas学习（创建数据，基本操作）
pandas学习(一) Pandas基本数据结构 Series类型数据 Dataframe类型基本操作 Pandas基本数据结构两种常用数据结构: Series 一维数组,与Numpy中的一维ar ...
MySQL 数据库、数据表、数据的基本操作
1.数据库(database)管理 1.1 create 创建数据库 create database firstDB; 1.2 show 查看所有数据库 mysql> show database ...
ASP.NET的一般处理程序对数据的基本操作
TableList.ashx: <%@ WebHandler Language="C#" Class="TableList" %> using Sy ...
mysql数据的基本操作
本文内容: 插入数据: 查询数据修改数据删除数据首发日期:2018-04-11 插入数据: 给所有字段插入数据: 插入单条记录:insert into 表名 values(值列表); 插入多条记 ...
MySQL开发——【数据的基本操作】
增加数据基本语法: insert into 数据表 [字段名称1,字段名称2..] values (数据1,数据2...); 特别注意:针对数据类型整型.浮点型数据可以不加单引或双引号,但是如果字段 ...
MySQL 5.6学习笔记（数据表基本操作）
1. 创建数据表 1.1 最基本的语法 CREATE TABLE tbl_name (col_name column_definition,...) [table_options] -column_d ...

随机推荐

11 ~ express ~ 解决 cookie 中文报错的问题
使用cookies包需要注意:1,cookie中是不能有中文的,一旦有中文,就会报错2,cookie是通过中间件的形式直接挂载到 req对象上的,那么cookies有的方法,req.cookies就 ...
安装kubernetes遇见coredns坑
安装kubernetes遇见问题 kubectl describe pod coredns -n kube-system, 查看发现coredns readiness 一直unhealthy, 并且一 ...
Python基础+爬虫基础
Python基础+爬虫基础一.python的安装: 1.建议安装Anaconda,会自己安装一些Python的类库以及自动的配置环境变量,比较方便. 二.基础介绍 1.什么是命名空间:x=1,1存在 ...
《新标准C++程序设计》3.1.1-3.1.3（C++学习笔记5）
构造函数 1.构造函数的概念和作用 (1)概念构造函数就是一类特殊的成员函数,其名字和类一样,不写返回值类型(void也不可以写),可以有参数,可以重载. 如果定义类时没写构造函数,则编译器生成一个 ...
JavaScript把两个数组对象合并成一个一一对应的数组对象
合并数组或者对象在数组或对象前面加...,是es6的新写法,然后数组的map方法会返回数组. var obj1 = [{ , "model": "XQG70-S1208F ...
Neo4j--常用的查询语句
参考 https://www.w3cschool.cn/neo4j 准备工作插入一堆朝代节点插入我大明皇帝节点创建大明皇帝统治大明王朝的关系看一下结果 WHERE WHERE 语法 WHERE ...
第22章—开启HTTPS
spring boot 系列学习记录:http://www.cnblogs.com/jinxiaohang/p/8111057.html 码云源码地址:https://gitee.com/jinxia ...
18 12 29 css background
background属性属性解释 background属性是css中应用比较多,且比较重要的一个属性,它是负责给盒子设置背景图片和背景颜色的,background是一个复合属性,它可以分解成如下几个 ...
（2）MongoDB副本集自动故障转移全流程原理
前文我们搭建MongoDB三成员副本集,了解集群基本特性,今天我们围绕下图聊一聊背后的细节. 默认搭建的replica set均在主节点读写,辅助节点冗余部署,形成高可用和备份, 具备自动故障转移的能 ...
C基础带你手写 redis ae 事件驱动模型
引言 - 整体认识 redis ae 事件驱动模型, 网上聊得很多. 但当你仔细看完一篇又一篇之后, 可能你看的很舒服, 但对于作者为什么要这么写, 出发点, 好处, 缺点 ... 可能还是好模糊, ...

用Plotily处理数据的基本操作

用Plotily处理数据的基本操作的更多相关文章

随机推荐

热门专题