# coding: utf-8

# In[18]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc

# In[32]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\bad_wafer_data_pivot.csv")

# In[33]:

data.head()

# In[34]:

index=data.drop(columns=["defect_count","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MAX.","HELK_MEAN","HELK_SD","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE","THR3_SD"])
index=index.drop(columns="Target")
index

# In[35]:

data=data.drop(columns=["lotid","Step","Recipie_Name","defect_count"])
data.head()

# In[36]:

ohe = OneHotEncoder()
le = LabelEncoder()

# In[37]:

data.head()

# In[40]:

data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
data.head()

# In[41]:

data=data.drop(columns=["eqpid","slotid","Chamber"])
data.head()

# In[42]:

nz = Normalizer()
data.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(data.iloc[:,10:12]),columns=data.iloc[:,10:12].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
data.head()

# In[43]:

def cleaning():
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\bad_wafer_data_pivot.csv")
data=data.drop(columns=["lotid","Step","Recipie_Name","defect_count"])
le = LabelEncoder()
data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
data=data.drop(columns=["eqpid","slotid","Chamber"])
nz = Normalizer()
data.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(data.iloc[:,10:12]),columns=data.iloc[:,10:12].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

我的代码-cleaning的更多相关文章

  1. AGC010 - C: Cleaning

    原题链接 题意简述 给出一棵个节点的树,每个点有点权.每次可以选择两个叶节点并将连接它们的路径上的节点的点权-1(包括叶节点).求能否将所有节点的点权都变为0. 分析 先考虑最简单的情况.在这种情况下 ...

  2. 【bzoj1672】[USACO2005 Dec]Cleaning Shifts 清理牛棚

    题目描述 Farmer John's cows, pampered since birth, have reached new heights of fastidiousness. They now ...

  3. Coursera-Getting and Cleaning Data-week1-课程笔记

    博客总目录,记录学习R与数据分析的一切:http://www.cnblogs.com/weibaar/p/4507801.html -- Sunday, January 11, 2015 课程概述 G ...

  4. Coursera-Getting and Cleaning Data-Week2-课程笔记

    Coursera-Getting and Cleaning Data-Week2 Saturday, January 17, 2015 课程概述 week2主要是介绍从各个来源读取数据.包括MySql ...

  5. Coursera-Getting and Cleaning Data-Week3-dplyr+tidyr+lubridate的组合拳

    Coursera-Getting and Cleaning Data-Week3 Wednesday, February 04, 2015 好久不写笔记了,年底略忙.. Getting and Cle ...

  6. Coursera-Getting and Cleaning Data-week4-R语言中的正则表达式以及文本处理

    博客总目录:http://www.cnblogs.com/weibaar/p/4507801.html Thursday, January 29, 2015 补上第四周笔记,以及本次课程总结. 第四周 ...

  7. poj 2376 Cleaning Shifts

    http://poj.org/problem?id=2376 Cleaning Shifts Time Limit: 1000MS   Memory Limit: 65536K Total Submi ...

  8. JAVA版Kafka代码及配置解释

    伟大的程序员版权所有,转载请注明:http://www.lenggirl.com/bigdata/java-kafka.html.html 一.JAVA代码 kafka是吞吐量巨大的一个消息系统,它是 ...

  9. POJ 2376 Cleaning Shifts(轮班打扫)

    POJ 2376 Cleaning Shifts(轮班打扫) Time Limit: 1000MS   Memory Limit: 65536K [Description] [题目描述] Farmer ...

随机推荐

  1. CSS3 3D酷炫立方体变换动画

    我爱撸码,撸码使我感到快乐! 大家好,我是Counter,本章微博主要利用了CSS3的一些新特性, 主要用到关键帧来使3D图形运动起来,涉及到了一些抽象的思想,立体的想象. 先给大家看看完成的效果,代 ...

  2. linux驱动编写(pwm驱动)【转】

    本文转载自:https://blog.csdn.net/feixiaoxing/article/details/79889240 pwm方波可以用来控制很多的设备,比如它可以被用来控制电机.简单来说, ...

  3. MySQL中的decimal

    MySQL DECIMAL数据类型用于在数据库中存储精确的数值.我们经常将DECIMAL数据类型用于保留准确精确度的列,例如会计系统中的货币数据. 要定义数据类型为DECIMAL的列,请使用以下语法: ...

  4. Token国内地铁使用城市

    天津 广州 深圳 南京 武汉 台北 高雄

  5. Vue mixins(混入)

    建立一个公共组件,然后对该组件进行混入继承. 注意会走两个生命周期,谨慎使用 mixins混入,相当于生成new 组件:组件引用,相当与在父组件内开辟了一块单独的空间 mixins适用于,两个有非常相 ...

  6. 正则表达式中pw、IDCard和EM匹配

    1密码强度正则 //密码强度正则,最少6位,包括至少1个大写字母,1个小写字母,1个数字,1个特殊字符 var pPattern = /^.*(?=.{6,})(?=.*\d)(?=.*[A-Z])( ...

  7. MapServer Configuring with IIS

    MapServer Configuring with IIS 一.前言 1.MapServer简介 MapServer是一个用C语言编写的开源地理数据呈现引擎.除了浏览地理信息系统数据之外,MapSe ...

  8. Debian/Ubuntu/Deepin下AndroidStudio2/3打开AVD模拟器无反应

    Debian系AS无法启动模拟器 问题描述 早在半年前将开发环境从windows迁移到了Linux:当时用的是Debian系统,也是在安装完成AndroidStudio之后无法开启模拟器,也没出现什么 ...

  9. python全局变量

    定义函数里面的叫局部变量,出了函数外面就不能用了 局部变量函数被调用时,他的变量才生效 局部变量定义在内存里面,用完就会被释放,全局变量不会释放 当有相同名的局部变量和全局变量,函数会先找自己的变量, ...

  10. Java框架部分---面试题

    说说Spring? Spring的核心是控制反转.依赖注入,Aop(面向切面)相当于把每个bean与bean之间的关系交给第 三方容器进行管理. 说SpringIOC.SpringAOP? Sprin ...