# coding: utf-8

# In[18]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc

# In[32]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\bad_wafer_data_pivot.csv")

# In[33]:

data.head()

# In[34]:

index=data.drop(columns=["defect_count","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MAX.","HELK_MEAN","HELK_SD","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE","THR3_SD"])
index=index.drop(columns="Target")
index

# In[35]:

data=data.drop(columns=["lotid","Step","Recipie_Name","defect_count"])
data.head()

# In[36]:

ohe = OneHotEncoder()
le = LabelEncoder()

# In[37]:

data.head()

# In[40]:

data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
data.head()

# In[41]:

data=data.drop(columns=["eqpid","slotid","Chamber"])
data.head()

# In[42]:

nz = Normalizer()
data.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(data.iloc[:,10:12]),columns=data.iloc[:,10:12].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
data.head()

# In[43]:

def cleaning():
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\bad_wafer_data_pivot.csv")
data=data.drop(columns=["lotid","Step","Recipie_Name","defect_count"])
le = LabelEncoder()
data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
data=data.drop(columns=["eqpid","slotid","Chamber"])
nz = Normalizer()
data.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(data.iloc[:,10:12]),columns=data.iloc[:,10:12].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

我的代码-cleaning的更多相关文章

  1. AGC010 - C: Cleaning

    原题链接 题意简述 给出一棵个节点的树,每个点有点权.每次可以选择两个叶节点并将连接它们的路径上的节点的点权-1(包括叶节点).求能否将所有节点的点权都变为0. 分析 先考虑最简单的情况.在这种情况下 ...

  2. 【bzoj1672】[USACO2005 Dec]Cleaning Shifts 清理牛棚

    题目描述 Farmer John's cows, pampered since birth, have reached new heights of fastidiousness. They now ...

  3. Coursera-Getting and Cleaning Data-week1-课程笔记

    博客总目录,记录学习R与数据分析的一切:http://www.cnblogs.com/weibaar/p/4507801.html -- Sunday, January 11, 2015 课程概述 G ...

  4. Coursera-Getting and Cleaning Data-Week2-课程笔记

    Coursera-Getting and Cleaning Data-Week2 Saturday, January 17, 2015 课程概述 week2主要是介绍从各个来源读取数据.包括MySql ...

  5. Coursera-Getting and Cleaning Data-Week3-dplyr+tidyr+lubridate的组合拳

    Coursera-Getting and Cleaning Data-Week3 Wednesday, February 04, 2015 好久不写笔记了,年底略忙.. Getting and Cle ...

  6. Coursera-Getting and Cleaning Data-week4-R语言中的正则表达式以及文本处理

    博客总目录:http://www.cnblogs.com/weibaar/p/4507801.html Thursday, January 29, 2015 补上第四周笔记,以及本次课程总结. 第四周 ...

  7. poj 2376 Cleaning Shifts

    http://poj.org/problem?id=2376 Cleaning Shifts Time Limit: 1000MS   Memory Limit: 65536K Total Submi ...

  8. JAVA版Kafka代码及配置解释

    伟大的程序员版权所有,转载请注明:http://www.lenggirl.com/bigdata/java-kafka.html.html 一.JAVA代码 kafka是吞吐量巨大的一个消息系统,它是 ...

  9. POJ 2376 Cleaning Shifts(轮班打扫)

    POJ 2376 Cleaning Shifts(轮班打扫) Time Limit: 1000MS   Memory Limit: 65536K [Description] [题目描述] Farmer ...

随机推荐

  1. java程序员面试交流项目经验

    粘贴自:https://blog.csdn.net/wangyuxuan_java/article/details/8778211 1:请你介绍一下你自己 这是面试官常问的问题.一般人回答这个问题过于 ...

  2. 【HNOI 2018】寻宝游戏

    Problem Description 某大学每年都会有一次 \(Mystery\ Hunt\) 的活动,玩家需要根据设置的线索解谜,找到宝藏的位置,前一年获胜的队伍可以获得这一年出题的机会. 作为新 ...

  3. Vue:(四)Ajax(Vue-Resource)

    Vue 要实现异步加载需要使用到 vue-resource 库.(挂载到vue实例上) (一)Vue-Resource引入 <script src="https://cdn.stati ...

  4. java IO和NIO区别

    面向流与面向缓冲 Java NIO和IO之间第一个最大的区别是,IO是面向流的,NIO是面向缓冲区的. Java IO面向流意味着每次从流中读一个或多个字节,直至读取所有字节,它们没有被缓存在任何地方 ...

  5. 整理this笔记

    1.在浏览器全局环境中this指向的是Window console.log(this); //Window 2.在事件处理函数中的this,这个事件是由谁触发,this就指向谁 3.直接执行一个函数的 ...

  6. 『Python CoolBook』Cython_高效数组操作

    数组运算加速是至关科学计算重要的领域,本节我们以一个简单函数为例,使用C语言为python数组加速. 一.Cython 本函数为一维数组修剪最大最小值 version1 @cython.boundsc ...

  7. 【IDE】我的花里胡哨VS

    我的 VS2017 效果图,花里胡哨但十分养眼,利于C/C++ Coding~ 一.主题设置 工具 → 扩展和更新 下载插件 Color Theme Editor for Visual Studio ...

  8. win10下vs2015编译的程序如何运行在win7等系统(无需安装Redistributable)

    最近新写的程序要做beta测试,在做绿色版(免安装版)时遇到了问题,vs2015做的项目本以为像之前的vs版本一样把msvcrXXX.dll还有另外几个运行时库都放到exe旁边即可,然并卵...,在w ...

  9. LimeSDR环境安装与测试

    虚拟机:ubuntu虚拟机建议4g内存,64g硬盘,usb3.0已开启 //否则编译过程耗尽内存 1 换阿里云源(加速)# deb cdrom:[Ubuntu 16.04 LTS _Xenial Xe ...

  10. C# 用 WebClient 的 Post 方法向 WebServer 传输数据

    帮朋友做一个通过Web简单传输数据的例子,百度了一下抄了段代码,完成,效果如下: 其中textBox1里面是客户端需要传输过去的数据,textBox2里面是接收到的返回数据. 代码如下: using ...