# coding: utf-8

# In[1]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc

# In[2]:

data = pd.read_csv("D:/Users/SGG91044/Desktop/MEP_no_defect_data_pivot_test.csv")

# In[3]:

data.head()

# In[4]:

data.drop(columns=["lotid","waferid","defect_count","eqpid","Chamber","Step","Recipie_Name"],inplace=True)
data

# In[5]:

data.iloc[:,0:17] = data.iloc[:,0:17].apply(pd.to_numeric,errors='coerce')

# In[6]:

for i in range(0,17):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)

# In[10]:

nz = Normalizer()
X=data.iloc[:,0:19]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:17]),columns=data.iloc[:,0:17].columns)

# In[11]:

X

# In[12]:

X_train, X_test = train_test_split(
X, test_size=0.3, random_state=8)

# In[30]:

# fit the model
clf = IsolationForest( max_samples=10000,random_state=10 )
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)

# In[35]:

scores_pred = clf.decision_function(X_train.values)
scores_pred

# In[36]:

clf.decision_function(X_test)

我的代码-unsupervised learning的更多相关文章

  1. Machine Learning Algorithms Study Notes(4)—无监督学习(unsupervised learning)

    1    Unsupervised Learning 1.1    k-means clustering algorithm 1.1.1    算法思想 1.1.2    k-means的不足之处 1 ...

  2. Unsupervised Learning: Use Cases

    Unsupervised Learning: Use Cases Contents Visualization K-Means Clustering Transfer Learning K-Neare ...

  3. Unsupervised Learning and Text Mining of Emotion Terms Using R

    Unsupervised learning refers to data science approaches that involve learning without a prior knowle ...

  4. Supervised Learning and Unsupervised Learning

    Supervised Learning In supervised learning, we are given a data set and already know what our correc ...

  5. Unsupervised learning无监督学习

    Unsupervised learning allows us to approach problems with little or no idea what our results should ...

  6. PredNet --- Deep Predictive coding networks for video prediction and unsupervised learning --- 论文笔记

    PredNet --- Deep Predictive coding networks for video prediction and unsupervised learning   ICLR 20 ...

  7. 131.005 Unsupervised Learning - Cluster | 非监督学习 - 聚类

    @(131 - Machine Learning | 机器学习) 零. Goal How Unsupervised Learning fills in that model gap from the ...

  8. Unsupervised learning, attention, and other mysteries

    Unsupervised learning, attention, and other mysteries Get notified when our free report “Future of M ...

  9. Coursera 机器学习 第8章(上) Unsupervised Learning 学习笔记

    8 Unsupervised Learning8.1 Clustering8.1.1 Unsupervised Learning: Introduction集群(聚类)的概念.什么是无监督学习:对于无 ...

随机推荐

  1. null 和System.DBNull.Value

    row[column]的值为DBNull.Value的话,说明它是从数据库中取到值了,对应了数据库中的空值:但如果row[column]的值为null的话,说明没有从数据库中取到值. DBNull.V ...

  2. vue.js笔记总结

    一份不错的vue.js基础笔记!!!! 第一章 Vue.js是什么? Vue(法语)同view(英语) Vue.js是一套构建用户界面(view)的MVVM框架.Vue.js的核心库只关注视图层,并且 ...

  3. java切割音频文件

    工具: 一个jar包即可:jave-1.0.2.jar 可以切割wav格式的音频文件 完整工程目录 就一个jar包,一个main类 代码: package com.zit; import java.i ...

  4. ubuntu上安装并使用mysql数据库

    一.安装Mysql 最简单的方式就是apt-get安装 安装核心程序 sudo apt-get install mysql-client-core-5.6 安装客户端程序 sudo apt-get i ...

  5. python基础之作业1---用户登录

    作业:编写登陆接口 输入用户名密码 认证成功后显示欢迎信息 输错三次后锁定 import sys, os, getpass os.system('clear')i = 0while i < 3: ...

  6. angular $resouse服务

    创建服务 var taskInstancesResource = function ($resource) { var resource = $resource('/ssc-cutover/rest/ ...

  7. 前段学习的roadmap

    引自http://www.cnblogs.com/IMxinu/p/9693041.html

  8. 谱聚类(Spectral Clustring)原理

    谱聚类(spectral clustering)是广泛使用的聚类算法,比起传统的K-Means算法,谱聚类对数据分布的适应性更强,聚类效果也很优秀,同时聚类的计算量也小很多,更加难能可贵的是实现起来也 ...

  9. 安装mavlink遇到的问题(future找不到)

    从官网下载mavlink(git clone https://github.com/mavlink/mavlink.git) 然后进入mavlink 目录执行 git submodule update ...

  10. visual c++如何显示行号

    工具 -> 选项 -> 文本编辑器