我的代码-models

# coding: utf-8

# In[1]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc
from imblearn.over_sampling import SMOTE

# In[37]:

data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")
data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors='coerce')
data.Target = data.Target.astype("category")
for i in range(7,25):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
nz = Normalizer()
data.iloc[:,17:19]=pd.DataFrame(nz.fit_transform(data.iloc[:,17:19]),columns=data.iloc[:,17:19].columns)
data.iloc[:,7:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,7:10]),columns=data.iloc[:,7:10].columns)
data.to_csv(r"D:\Users\sgg91044\Desktop\impution\AEM214_imputed_normalized.csv")

# In[2]:

data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")
data.head()

# In[3]:

data.iloc[:,5:23] = data.iloc[:,5:23].apply(pd.to_numeric,errors='coerce')
data.Target = data.Target.astype("category")

# In[4]:

Y = data.Target
X = data.drop(columns='Target')

# In[5]:

X=X.drop(columns=['slotid','Recipe_Name','defect_count'])

# In[6]:

# In[7]:

X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)

# In[8]:

sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)

# In[9]:

print(y_train.value_counts(), np.bincount(y_train_smote))

# In[10]:

from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)

# In[11]:

# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)

# In[ ]:

# Make predictions on the test data
y_pred = random_forest.predict_proba(X_test)

# In[13]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[14]:

f1_score(y_pred=y_pred,y_true=y_test)

# In[15]:

print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")

# In[16]:

print(confusion_matrix(y_pred=y_pred,y_true=y_test))

# In[21]:

svc=SVC(kernel='poly',degree=2,gamma=1,coef0=0)

# In[ ]:

svc.fit(x_train_smote,y_train_smote)

# In[ ]:

from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(activation='relu', solver='adam', alpha=0.0001)

# In[17]:

tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
{'kernel':['poly'],'degree':[2,3,5]}]
clf = GridSearchCV(SVC(),param_grid=tuned_parameters,cv=3,scoring='recall',verbose=True)
clf.fit(x_train_smote,y_train_smote)

# In[18]:

data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling1.csv")
data.iloc[:,7:26] = data.iloc[:,7:26].apply(pd.to_numeric,errors='coerce')
data.Target = data.Target.astype("category")
data.eqpid = data.eqpid.astype("category")
Y = data.Target
X = data.drop(columns='Target')
X=X.drop(columns=['eqpid','lotid','Chamber','slotid','Step','Recipie_Name','defect_count'])
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
print(y_train.value_counts(), np.bincount(y_train_smote))
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)

# In[19]:

# Make predictions on the test data
y_pred = random_forest.predict(X_test)
print(classification_report(y_pred=y_pred,y_true=y_test))

# In[20]:

print(confusion_matrix(y_pred=y_pred,y_true=y_test))

# In[21]:

f1_score(y_pred=y_pred,y_true=y_test)

# In[22]:

print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")

# In[71]:

data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling3.csv")
data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors='coerce')
data.Target = data.Target.astype("category")
Y = data.Target
X = data.drop(columns='Target')
X=X.drop(columns=['eqpid','lotid','Chamber','slotid','Step','Recipie_Name','defect_count'])
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
print(y_train.value_counts(), np.bincount(y_train_smote))
from sklearn.ensemble import RandomForestClassifier

# In[72]:

# Make predictions on the test data
y_pred = random_forest.predict(X_test)
print(classification_report(y_pred=y_pred,y_true=y_test))

# In[53]:

f1_score(y_pred=y_pred,y_true=y_test)

# In[54]:

print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")

# In[55]:

data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling2.csv")
data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors='coerce')
data.Target = data.Target.astype("category")
Y = data.Target
X = data.drop(columns='Target')
X=X.drop(columns=['eqpid','lotid','Chamber','slotid','Step','Recipie_Name','defect_count'])
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
print(y_train.value_counts(), np.bincount(y_train_smote))
from sklearn.ensemble import RandomForestClassifier

# In[57]:

# Make predictions on the test data
y_pred = random_forest.predict(X_test)
print(classification_report(y_pred=y_pred,y_true=y_test))

# In[58]:

f1_score(y_pred=y_pred,y_true=y_test)

# In[59]:

print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")

# In[ ]:

import flask

我的代码-models的更多相关文章

【Django】基于Django架构网站代码的目录结构
经典的Django项目源码目录结构 Django在一个项目的目录结构划分方面缺乏必要的规范.在Django的官方文档中并没有给出大型项目的代码建议目录结构,网上的文章也是根据项目的不同结构也有适当的 ...
使用 CodeIgniter 框架快速开发 PHP 应用(三)
原文:使用 CodeIgniter 框架快速开发 PHP 应用(三) 分析网站结构既然我们已经安装 CI ,我们开始了解它如何工作.读者已经知道 CI 实现了MVC式样. 通过对目录和文件的内容进行分 ...
Django__RBAC
RBAC : 基于角色的权限访问控制(Role-Based Access Control) RBAC 模型作为目前最为广泛接受的权限模型角色访问控制(RBAC)引入了Role的概念,目的是为了隔离U ...
用beego开发服务端应用
用beego开发服务端应用说明 Quick Start 安装创建应用编译运行打包发布代码生成开发文档目录结构说明使用配置文件 beego默认参数路由设置路由的表述方式直接设置路由 ...
从零搭建基于golang的个人博客网站
原文链接 : http://www.bugclosed.com/post/14 从零搭建个人博客网站需要包括云服务器(虚拟主机),域名,程序环境,博客程序等方面.本博客就是通过这几个环节建立起来的, ...
Django——微信消息推送
前言微信公众号的分类微信消息推送公众号已认证公众号服务号已认证服务号企业号基于:微信认证服务号主动推送微信消息. 前提:关注服务号环境:沙箱环境沙箱环境地址: https://m ...
xadmin的使用
01-下载源码 GitHub地址:https://github.com/sshwsfc/xadmin # 安装xadmin 由于使用的是Django2.0的版本,所以需要安装xadmin项目djang ...
python django基础一web框架的本质
web框架的本质就是一个socket服务端,而浏览器就是一个socker客户端,基于请求做出相应,客户端先请求,服务器做出对应响应按照http协议的请求发送,服务器按照http协议来相应,这样的通信 ...
Django之win7下安装与命令行工具
Django之win7下安装与命令行工具下载安装 pip3 install django 注意:自动添加环境变量测试是否安装成功 1.输入python 2.输入import django 3.输入 ...

随机推荐

HBase运维实践－聊聊RIT的那点事
相信长时间运维HBase集群的童鞋肯定都会对RIT(Region-In-Transition,很多参考资料误解为Region-In-Transaction,需要注意)有一种咬牙切齿的痛恨感,一旦Reg ...
深度学习环境搭建：Tensorflow1.4.0+Ubuntu16.04+Python3.5+Cuda8.0+Cudnn6.0
目录深度学习环境搭建:Tensorflow1.4.0+Ubuntu16.04+Python3.5+Cuda8.0+Cudnn6.0 Reference 硬件说明: 软件准备: 1. 安装Ubuntu ...
一小时学会 C# 6.0
一.字符串插值 (String Interpolation) C# 6之前我们拼接字符串时需要这样 var Name = "Jack"; var results = "H ...
Spring中ClassPathXmlApplication与FileSystemXmlApplicationContext的区别
Spring中ClassPathXmlApplication与FileSystemXmlApplicationContext的区别一.概述在项目中遇到加载不到Spring配置文件,简单分析后,写此 ...
VR外包—长年承接虚拟现实项目和AR外包游戏、软件（北京动点飞扬软件）
VR外包AR外包公司(虚拟现实外包公司)承接虚拟现实项目开发(企业.教育.游戏) 可公对公签正规合同,开发票. 我们是北京的公司.专业团队,成员为专业 VR/AR 产品公司一线开发人员,有大型产品开发 ...
burpsuit 无法导入证书，抓取https的解决办法
想用burpsuit中转https流量,需要安装证书: 确保浏览器能访问http 后,访问:http://burp/ 点击右上角下载证书. 然后导入,这些网上都有方法. 但如果你试了后: ①提示导入失 ...
Jellyfish详解
一.Jellyfish简介 JELLYFISH是CBCB(Center for Bioinformatics and Computational Biology)的Guillaume Marçais ...
guxh的python笔记三：装饰器
1,函数作用域这种情况可以顺利执行: total = 0 def run(): print(total) 这种情况会报错: total = 0 def run(): print(total) tot ...
第K个幸运数字（4、7）
题目:4和7是两个幸运数字,我们定义,十进制表示中,每一位只有4和7两个数的正整数都是幸运数字,前几个幸运数字为:4,7,44,47,74,77,444,447······输出第K个数字. 思路是:将 ...
HTML：foreach
<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"% ...

我的代码-models

我的代码-models的更多相关文章

随机推荐

热门专题