Python 建模步骤
#%%
#载入数据 、查看相关信息
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder print('第一步:加载、查看数据') file_path = r'D:\train\201905data\liwang.csv' band_data = pd.read_csv(file_path,encoding='UTF-8') band_data.info() band_data.shape #%%
#
print('第二步:清洗、处理数据,某些数据可以使用数据库处理数据代替') #数据清洗:缺失值处理:丢去、
#查看缺失值
band_data.isnull().sum band_data = band_data.dropna()
#band_data = band_data.drop(['state'],axis=1)
# 去除空格
band_data['voice_mail_plan'] = band_data['voice_mail_plan'].map(lambda x: x.strip())
band_data['intl_plan'] = band_data['intl_plan'].map(lambda x: x.strip())
band_data['churned'] = band_data['churned'].map(lambda x: x.strip())
band_data['voice_mail_plan'] = band_data['voice_mail_plan'].map({'no':0, 'yes':1})
band_data.intl_plan = band_data.intl_plan.map({'no':0, 'yes':1}) for column in band_data.columns:
if band_data[column].dtype == type(object):
le = LabelEncoder()
band_data[column] = le.fit_transform(band_data[column]) #band_data = band_data.drop(['phone_number'],axis=1)
#band_data['churned'] = band_data['churned'].replace([' True.',' False.'],[1,0])
#band_data['intl_plan'] = band_data['intl_plan'].replace([' yes',' no'],[1,0])
#band_data['voice_mail_plan'] = band_data['voice_mail_plan'].replace([' yes',' no'],[1,0]) #%%
# 模型 [重复、调优]
print('第三步:选择、训练模型') x = band_data.drop(['churned'],axis=1)
y = band_data['churned'] from sklearn import model_selection
train,test,t_train,t_test = model_selection.train_test_split(x,y,test_size=0.3,random_state=1) from sklearn import tree
model = tree.DecisionTreeClassifier(max_depth=2)
model.fit(train,t_train) fea_res = pd.DataFrame(x.columns,columns=['features'])
fea_res['importance'] = model.feature_importances_ t_name= band_data['churned'].value_counts()
t_name.index import graphviz import os
os.environ["PATH"] += os.pathsep + r'D:\software\developmentEnvironment\graphviz-2.38\release\bin' dot_data= tree.export_graphviz(model,out_file=None,feature_names=x.columns,max_depth=2,
class_names=t_name.index.astype(str),
filled=True, rounded=True,
special_characters=False)
graph = graphviz.Source(dot_data)
#graph
graph.render("dtr") #%%
print('第四步:查看、分析模型') #结果预测
res = model.predict(test) #混淆矩阵
from sklearn.metrics import confusion_matrix
confmat = confusion_matrix(t_test,res)
print(confmat) #分类指标 https://blog.csdn.net/akadiao/article/details/78788864
from sklearn.metrics import classification_report
print(classification_report(t_test,res)) #%%
print('第五步:保存模型') from sklearn.externals import joblib
joblib.dump(model,r'D:\train\201905data\mymodel.model') #%%
print('第六步:加载新数据、使用模型')
file_path_do = r'D:\train\201905data\do_liwang.csv' deal_data = pd.read_csv(file_path_do,encoding='UTF-8') #数据清洗:缺失值处理 deal_data = deal_data.dropna()
deal_data['voice_mail_plan'] = deal_data['voice_mail_plan'].map(lambda x: x.strip())
deal_data['intl_plan'] = deal_data['intl_plan'].map(lambda x: x.strip())
deal_data['churned'] = deal_data['churned'].map(lambda x: x.strip())
deal_data['voice_mail_plan'] = deal_data['voice_mail_plan'].map({'no':0, 'yes':1})
deal_data.intl_plan = deal_data.intl_plan.map({'no':0, 'yes':1}) for column in deal_data.columns:
if deal_data[column].dtype == type(object):
le = LabelEncoder()
deal_data[column] = le.fit_transform(deal_data[column])
#数据清洗 #加载模型
model_file_path = r'D:\train\201905data\mymodel.model'
deal_model = joblib.load(model_file_path)
#预测
res = deal_model.predict(deal_data.drop(['churned'],axis=1)) #%%
print('第七步:执行模型,提供数据')
result_file_path = r'D:\train\201905data\result_liwang.csv' deal_data.insert(1,'pre_result',res)
deal_data[['state','pre_result']].to_csv(result_file_path,sep=',',index=True,encoding='UTF-8')
Python 建模步骤的更多相关文章
- Python学习步骤如何安排?
一.清楚学习目标 无论是学习什么知识,都要有一个对学习目标的清楚认识. 只有这样才能朝着目标持续前进,少走弯路,从学习中得到不断的提升,享受python学习计划的过程. 二.基本python 知识学习 ...
- Linux系统下升级Python版本步骤(suse系统)
Linux系统下升级Python版本步骤(suse系统) http://blog.csdn.net/lifengling1234/article/details/53536493
- 决策树python建模中的坑 :ValueError: Expected 2D array, got 1D array instead:
决策树python建模中的坑 代码 #coding=utf-8 from sklearn.feature_extraction import DictVectorizerimport csvfrom ...
- odoo 14 python 单元测试步骤
# odoo 14 python 单元测试步骤 # 一.在模块根目录创建tests目录 # 二.在tests目录下创建__init__.py文件 # 三.继承TransactionCase(Singl ...
- 逻辑回归--美国挑战者号飞船事故_同盾分数与多头借贷Python建模实战
python信用评分卡(附代码,博主录制) https://study.163.com/course/introduction.htm?courseId=1005214003&utm_camp ...
- Python机器学习步骤
推荐学习顺序 学习机器学习得有个步骤, 下面大家就能按照自己所需, 来探索这个网站. 图中请找到 "Start", 然后依次沿着箭头, 看看有没有不了解/没学过的地方, 接着, 就 ...
- 正态分布-python建模
sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campai ...
- T分布在医药领域应用-python建模
sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campai ...
- 下载及安装Python详细步骤
安装python分三个步骤: *下载python *安装python *检查是否安装成功 1.下载Python (1)python下载地址https://www.python.org/download ...
随机推荐
- MySQL数据库 (5)
--视图,触发器,函数,存储过程,事务,索引
- 收藏 SpringBoot :thymeleaf 使用详解
转载大神们笔记 --比较详细的 http://www.importnew.com/25826.html http://www.niugebbs.com/xdmrhf/1046839.html http ...
- 线程池(4)Executors.newScheduledThreadPool-只执行1次
例子1:延迟3秒后,只执行1次 ScheduledExecutorService es = Executors.newScheduledThreadPool(5); log.info("开始 ...
- 058 Length of Last Word 最后一个单词的长度
给定一个字符串, 包含大小写字母.空格 ' ',请返回其最后一个单词的长度.如果不存在最后一个单词,请返回 0 .注意事项:一个单词的界定是,由字母组成,但不包含任何的空格.案例:输入: " ...
- Canada Cup 2016 D. Contest Balloons 好题。优先队列 + 简单贪心
http://codeforces.com/contest/725/problem/D 这题一看就是贪心的了,w - t最小的那个,肯定是优先打死. 但是一直都不会写,为什么呢,因为这个太像二分答案了 ...
- PHP&Java 调用C#的WCF
步骤一:用C#声明WCF [ServiceContract] public interface IService1 { [OperationContract] void DoWork(); [Oper ...
- eclipse查看jar包源文件
话不多说上链接 https://www.cnblogs.com/1995hxt/p/5252098.html这里介绍了完整的流程,亲自试过,可以的! 以防以后要用的时候找不到文件的下载地址,所以就先在 ...
- Volley与Picasso的对比
Volley与Picasso的对比 想写一篇文章来对比一下Volley以及Picasso,有人或许会说了,Volley和Picasso的服务对象都不同,Picasso是专注于图片的下载以及处理,而Vo ...
- 【javascript类库】zepto和jquery的md5加密插件
[javascript类库]zepto和jquery的md5加密插件 相信很多人对jQuery并不陌生,这款封装良好的插件被很多开发者使用. zepto可以说是jQuery在移动端的替代产品,它比jQ ...
- Writable和Comparable
WritableComparable接口相当于继承了上述两个接口的新接口 : Public interface WritableComparable<T>extends Writable, ...