# coding: utf-8

# In[2]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

# In[1]:

import pandas as pd
import numpy as np

# In[151]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\auto_data\5mins_data_2.csv")
data.head()

# In[152]:

data=data.iloc[:,1:]
data.drop(['ooc','oos'],axis=1,inplace=True)
data.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
pivoted = data.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)

# In[153]:

final

# In[154]:

#normalize
final=final.drop(columns=["eqpid","chamber","lotid","wafer","recipe"])
final= final.dropna(axis=0, how='any')

# In[155]:

final

# In[156]:

final.iloc[:,8:10]=nz.transform(final.iloc[:,8:10])
final

# In[157]:

final.iloc[:,0:3]=nz.transform(final.iloc[:,0:3])

# In[158]:

final

# In[20]:

#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final

# In[166]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03.csv")
data.head()

# In[167]:

data=data.drop(columns=['ooc','oos','RNK'])
pivoted = data.pivot_table(index=['eqpid','Chamber','lotid','slotid','stage','Recipie_Name','finishtime'],columns="Param_Name",values="data1",aggfunc=np.sum)

# In[171]:

pivoted

# In[173]:

# In[177]:

import pandas as pd
import numpy as np
pivoted.reset_index(inplace=True)
columns=["eqpid","Chamber","lotid","slotid","Recipie_Name","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
final= final.dropna(axis=0, how='any')

# In[180]:

index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
final=final.drop(columns=["eqpid","Chamber","lotid","slotid","Recipie_Name"])
final.head()

# In[181]:

final.iloc[:,8:10]=nz.transform(final.iloc[:,8:10])
final.iloc[:,0:3]=nz.transform(final.iloc[:,0:3])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)

# In[182]:

final.head()

# In[185]:

from sklearn.externals import joblib
random_forest=joblib.load(r'D:\Users\sgg91044\Desktop\deployment\model_RF.pkl')

# In[186]:

final_pred = random_forest.predict_proba(final)

# In[194]:

final_pred
final_pred=pd.DataFrame(final_pred)

# In[195]:

index.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03index.csv')
final.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03data.csv')
final_pred.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03result.csv')

# In[196]:

index.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep01-03index1.csv')

# In[217]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov.csv')

# In[218]:

data.head()

# In[219]:

data=data.drop(columns=["layer_id","ooc","oos"])
pivoted = data.pivot_table(index=['eqpid','Chamber','lotid','slotid','defect_count','stage','Recipie_Name','finishtime'],columns="Param_Name",values="data1",aggfunc=np.sum)

# In[220]:

pivoted

# In[226]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_pivoted.csv')

# In[227]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_pivoted.csv')
data.head()

# In[228]:

Index=data.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE","Target"])
Index.head()

# In[229]:

data=data.drop(columns=["eqpid","Chamber","lotid","slotid","defect_count","stage","Recipie_Name","finishtime"])
data.head()

# In[232]:

good_wafer= data[data.Target ==0]
good_wafer

# In[233]:

good_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good.csv')

# In[234]:

good_wafer= pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good.csv')
good_wafer.head()
good_wafer=good_wafer.drop(columns=["eqpid","Chamber","lotid","slotid","defect_count","stage","Recipie_Name","finishtime"])

# In[240]:

for i in range(0,16):
med = np.median(good_wafer.iloc[:,i][good_wafer.iloc[:,i].isna() == False])
good_wafer.iloc[:,i] = good_wafer.iloc[:,i].fillna(med)

# In[242]:

good_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good_imputed.csv')

# In[236]:

bad_wafer= data[data.Target ==1]
bad_wafer
bad_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_badd.csv')

# In[213]:

bad_wafer= pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_bad.csv')
bad_wafer.head()
bad_wafer=bad_wafer.drop(columns=["eqpid","Chamber","lotid","slotid","defect_count","stage","Recipie_Name","finishtime"])

# In[237]:

for i in range(0,15):
med = np.median(bad_wafer.iloc[:,i][bad_wafer.iloc[:,i].isna() == False])
bad_wafer.iloc[:,i] = bad_wafer.iloc[:,i].fillna(med)

# In[238]:

bad_wafer.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_bad_imputed.csv')

# In[259]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed.csv')

# In[260]:

data.head()

# In[261]:

data=data.drop(columns=['lotid','defect_count','stage','Recipie_Name','finishtime'])
data.head()

# In[262]:

data.eqpid = data.eqpid.astype("category")
data.Chamber = data.Chamber.astype("category")
le = LabelEncoder()
data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,2])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,1])

# In[263]:

Trace_back = pd.concat([data[["eqpid","Chamber","slotid"]],data[["eqp_encoded","chamber_encoded","slot_encoded"]]],axis=1)
data.head(20)

# In[264]:

data.drop(columns=['eqpid','Chamber','slotid'],inplace=True)
data.head()

# In[265]:

data.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed_edcoded.csv')

# In[266]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed_edcoded.csv')

# In[267]:

data.head()

# In[268]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

# In[277]:

data1=pd.read_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\Nov_good_imputed.csv')

# In[278]:

data1.head(20)

# In[279]:

data1.iloc[:,11:13]=nz.transform(data1.iloc[:,11:13])
data1.iloc[:,3:6]=nz.transform(data1.iloc[:,3:6])

# In[280]:

data1["SUM_ETCM"]=np.array(data1.ETCM_PHA4)+np.array(data1.ETCM_PHB4)+np.array(data1.ETCM_PHC4)

# In[281]:

data1["eqp_encoded"] = le.transform(data1.iloc[:,0])
data1["slot_encoded"] = le.transform(data1.iloc[:,2])
data1['chamber_encoded'] = le.transform(data1.iloc[:,1])

# In[276]:

data1.to_csv(r'D:\Users\sgg91044\Desktop\sep_oct_data\sep_oct_good_imputed_encoded.csv')

# In[3]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1.csv')
data.head()

# In[4]:

data=data.drop(columns=["step",'parametername'])
data.head()

# In[5]:

pivoted = data.pivot_table(index=['eqpid','chamber','lotid','slotid','stage','recipe','finishtime'],columns="Param_Name",values="data1",aggfunc=np.sum)

# In[6]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1_pivoted.csv')

# In[30]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1_pivoted.csv')
data

# In[31]:

data=data.drop(columns=["stage","finishtime","recipe",'HELK_MAX.'])
data.head()

# In[32]:

for i in range(4,19):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)

# In[33]:

data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,12:14]=nz.transform(data.iloc[:,12:14])
data.iloc[:,4:7]=nz.transform(data.iloc[:,4:7])

# In[34]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.chamber = data.chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.chamber = data.chamber.astype("category")
data.slotid = data.slotid.astype("category")

# In[35]:

data=data.drop(columns=["lotid"])

# In[36]:

data

# In[107]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc

# In[167]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\model_data_1.csv')

# In[168]:

data.eqpid1 = data.eqpid1.astype("category")
data.chamber1 = data.chamber1.astype("category")
data.wafer1 = data.wafer1.astype("category")

# In[169]:

data.Target = data.Target.astype("category")
y = data.Target
X = data.drop(columns='Target')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=8)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, oob_score = True, verbose = 1, n_jobs = -1)
random_forest.fit(x_train_smote,y_train_smote)
y_pred = random_forest.predict(X_test)
#print(classification_report(y_pred=y_pred,y_true=y_test))

# In[170]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[135]:

from sklearn.externals import joblib

joblib.dump(random_forest, r'D:\Users\sgg91044\Desktop\deployment\model_RF_test.pkl')

# In[136]:

from sklearn.externals import joblib
random_forest=joblib.load(r'D:\Users\sgg91044\Desktop\deployment\model_RF_test.pkl')

# In[221]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\OK\test_AD_ok_pivoted_15.csv')

# In[222]:

data.head()

# In[223]:

lotid=data.lotid
data=data.drop(columns=['lotid','recipe','finishtime'])

# In[224]:

for i in range(3,18):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)

# In[225]:

data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,11:13]=nz.transform(data.iloc[:,11:13])
data.iloc[:,3:6]=nz.transform(data.iloc[:,3:6])

# In[226]:

data.head()

# In[227]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)

# In[228]:

def encode_eqpid(eqpid1):
return int(eqpid1[-2:])-1

def encode_chamber(chamber1):
if chamber1 == 'A':
return 0
else:
return 1

def encode_slotid(wafer1):
if wafer1 > 0:
return wafer1-1

data.eqpid1 = data.eqpid1.apply(encode_eqpid)
data.chamber1 = data.chamber1.apply(encode_chamber)
data.wafer1 = data.wafer1.apply(encode_slotid)
data.eqpid1 = data.eqpid1.astype("category")
data.chamber1 = data.chamber1.astype("category")
data.wafer1 = data.wafer1.astype("category")

# In[ ]:

data.to_csv()

# In[229]:

y_pred = random_forest.predict(data)

# In[230]:

y_pred=pd.DataFrame(y_pred)

# In[219]:

y_pred.to_csv(r'D:\Users\sgg91044\Desktop\y_pred_ok.csv')

# In[220]:

lotid.to_csv(r'D:\Users\sgg91044\Desktop\lotid_ok.csv')

# In[37]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)

# In[38]:

data.head()

# In[43]:

data.to_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data1_test.csv')

# In[137]:

data1=pd.read_csv(r'D:\Users\sgg91044\Desktop\AEM2_pivotdata_12-13-10-29.csv')
data1.head()
data1.eqpid1 = data1.eqpid1.astype("category")
data1.chamber1 = data1.chamber1.astype("category")
data1.wafer1 = data1.wafer1.astype("category")

# In[138]:

y_pred = random_forest.predict(data1)

# In[139]:

print(y_pred)

# In[204]:

data1=pd.read_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\OK\test_AD_ok.csv')
data1.head()

# In[205]:

data1=data1.drop(columns=['waferid','Step'])
pivoted = data1.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe','finishtime'],columns="param_name",values="data",aggfunc=np.sum)

# In[206]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\OK\test_AD_ok_pivoted.csv')

# In[152]:

data2=pd.read_csv(r'D:\Users\sgg91044\Desktop\all_parameters.csv')
data2.head()

# In[153]:

data2=data2.drop(columns=['layerid','waferid','stg','stage','step','parametername','ooc','oos','RNK'])
pivoted = data2.pivot_table(index=['eqpid','chamber','lotid','sloitid','defect','recipe','finishtime'],columns="param_name",values="data",aggfunc=np.sum)

# In[154]:

pivoted.to_csv(r'D:\Users\sgg91044\Desktop\all_parameters_pivoted.csv')

# In[159]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\all_parameters_deleted.csv')

# In[160]:

data.head()

# In[161]:

for i in range(7,61):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
data

# In[162]:

data=data.drop(columns=['lotid','defect','recipe','finishtime'])
data.head()

# In[163]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.chamber = data.chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.chamber = data.chamber.astype("category")
data.slotid = data.slotid.astype("category")

# In[164]:

data.head()

# In[165]:

data.Target = data.Target.astype("category")
y = data.Target
X = data.drop(columns='Target')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=8)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, oob_score = True, verbose = 1, n_jobs = -1)
random_forest.fit(x_train_smote,y_train_smote)
y_pred = random_forest.predict(X_test)
#print(classification_report(y_pred=y_pred,y_true=y_test))

# In[166]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[234]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\data_01-10\data_pivoted.csv')
data

# In[236]:

data=data.drop(columns=['layer_id','stage','Recipie_Name','finishtime','defect_count','lotid'])
data.head()

# In[238]:

for i in range(3,18):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,11:13]=nz.transform(data.iloc[:,11:13])
data.iloc[:,3:6]=nz.transform(data.iloc[:,3:6])

# In[239]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(Chamber):
if Chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.Chamber = data.Chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.Chamber = data.Chamber.astype("category")
data.slotid = data.slotid.astype("category")
data.head()

# In[240]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)
data.Target = data.Target.astype("category")
y = data.Target
X = data.drop(columns='Target')
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=8)
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, oob_score = True, verbose = 1, n_jobs = -1)
random_forest.fit(x_train_smote,y_train_smote)
y_pred = random_forest.predict(X_test)
#print(classification_report(y_pred=y_pred,y_true=y_test))

# In[241]:

print(classification_report(y_pred=y_pred,y_true=y_test))

# In[242]:

from sklearn.externals import joblib

joblib.dump(random_forest, r'D:\Users\sgg91044\Desktop\deployment\model_RF_Mclass.pkl')

# In[250]:

data=pd.read_csv(r'D:\Users\sgg91044\Desktop\Some wafer Examples from Engg team\Defective\test_AD_15.csv')
lotid=data.lotid
data=data.drop(columns=['lotid','recipe','finishtime'])
for i in range(3,18):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
data1=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#le = LabelEncoder()
#data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
#data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
#data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
nz = Normalizer()
data1.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data1.iloc[:,8:10]),columns=data1.iloc[:,8:10].columns)
data1.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data1.iloc[:,0:3]),columns=data1.iloc[:,0:3].columns)
data.iloc[:,11:13]=nz.transform(data.iloc[:,11:13])
data.iloc[:,3:6]=nz.transform(data.iloc[:,3:6])
data.head()

# In[251]:

def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(Chamber):
if Chamber == 'A':
return 0
else:
return 1

def encode_slotid(slotid):
if slotid > 0:
return slotid-1

data.eqpid = data.eqpid.apply(encode_eqpid)
data.Chamber = data.Chamber.apply(encode_chamber)
data.slotid = data.slotid.apply(encode_slotid)
data.eqpid = data.eqpid.astype("category")
data.Chamber = data.Chamber.astype("category")
data.slotid = data.slotid.astype("category")
data.head()

# In[252]:

data["SUM_ETCM"]=np.array(data.ETCM_PHA4)+np.array(data.ETCM_PHB4)+np.array(data.ETCM_PHC4)
from sklearn.externals import joblib
random_forest=joblib.load(r'D:\Users\sgg91044\Desktop\deployment\model_RF_Mclass.pkl')

# In[253]:

y_pred = random_forest.predict(data)

# In[254]:

y_pred=pd.DataFrame(y_pred)
y_pred.to_csv(r'D:\Users\sgg91044\Desktop\y_pred_defect.csv')

我的代码-test models的更多相关文章

  1. [Django]models定义choices 字典中的页面显示值

    问题: 在django的models.py 在.我们定义一些choices元组,类别似一些字典值.通常下拉框或单个复选框,例如 0相应的M 1妇女和其他有关 class Area(models.Mod ...

  2. 在线教育平台搭建 预览和models

    一.前言 1.1.项目介绍 在线演示地址:mxonline.mtianyan.cn 开发环境: python:3.6.4 Django:2.0.2 后台管理:xadmin 系统概括: 系统具有完整的用 ...

  3. Django框架----权限组件(具体代码实现)

    1.settings """ Django settings for day80 project. Generated by 'django-admin startpro ...

  4. python测试开发django-11.模型models详解

    前言 Django 模型是与数据库相关的,与数据库相关的代码一般写在 models.py 中,Django 支持 sqlite3, MySQL, PostgreSQL等数据库 只需要在settings ...

  5. 6 Django系列之关于models的sql语句日常用法总结

    preface Django提供了强大的ORM,我们可以通过ORM快速的写出我们想要对数据做什么样操作的代码.下面就说说我在日常工作中的用法: 外键关联精确查询 应用场景:表A host字段关联到了表 ...

  6. Django中models定义的choices字典使用get_FooName_display()在页面中显示值

    问题 在django的models.py 中,我们定义了一些choices的元组,类似一些字典值,一般都是下拉框或者单多选框,例如 0对应男 1对应女等等 看下例子: class Area(model ...

  7. 七:mvc使用CodeFirst(代码优先)创建数据库

    1. 理解EF CodeFirst模式特点 2. 使用CodeFirst模式生成数据库 1. CodeFirst模式(代码优先) Code First是Entity Framework提供的一种新的编 ...

  8. Django]models中定义的choices 字典在页面中显示值

    在django的models.py 中,我们定义了一些choices的元组,类似一些字典值,一般都是下拉框或者单多选框,例如 0对应男 1对应女等 class Area(models.Model): ...

  9. 网站开发学习Python实现-Django的models学习-生鲜项目(6.3.2)

    @ 目录 1.说明 2.模型类的设计 3.代码的具体实现 4.详情地址 关于作者 1.说明 models是django的很重要的部分,所以深入研究. 本文章的所研究项目为黑马教育python课程中的项 ...

随机推荐

  1. 安装easydict

    在运行lightheadrcnn做test时,提示缺少easydict 不知道什么原因,用pip install easydict或者conda install easydict都没有用,不能安装ea ...

  2. C# foreach 值类型及引用类型迭代变量改变的方式

    C#中foreach不能改变迭代变量的值 然而此种说法只适用与值类型,更改值类型时会改变在栈上的内存分布 引用类型由于是引用地址的变更,不影响内存分布,所以能够在foreach中更改 至于引用类型中的 ...

  3. R语言预测实战(游浩麟)笔记2

    特征构建技术 特征变换,对原始的某个特征通过一定的规则或映射得到新特征的方法,主要方法包括概念分层.标准化.离散化.函数变换以及深入表达.特征变换主要由人工完成,属于比较基础的特征构建方法. 概念分层 ...

  4. 安装SQl Server 报错 "需要 Microsoft.NET Framework 3.5 ServicePack 1" 解决方法

    前言 之前装Sql Server都没遇到过这样的问题, 昨天重装了系统之后, 然后安装SQl Server 报错,提示 "需要 Microsoft.NET Framework 3.5 Ser ...

  5. JAVA 第1课

    JAVA第一课 电脑识别的进制:二进制,八进制,十六进制 二进制来表示高低电压,类似于抗战时期的发报机.2进制的存储 8进制和16进制:计算器,在计算的时候有一定的临时存储,8位或者16位禁止的存储 ...

  6. Map集合练习题

    (Map)已知某学校的教学课程内容安排如下: 完成下列要求:1) 使用一个Map,以老师的名字作为键,以老师教授的课程名作为值,表示上述课程安排.2) 增加了一位新老师Allen 教JDBC3) Lu ...

  7. git上传到github时犯的错误

    以下是git的正确顺序 git config --global user.name "xxx" 全局注册名字 git config --global user.email &quo ...

  8. SSL backend error when using OpenSSL pycurl install error

    centos7 pip install pycurl 错误 pip uninstall pycurl export PYCURL_SSL_LIBRARY=nss pip install pycurl ...

  9. 二、redis持久化

    一.redis持久化 1 RDB持久化(定redis的数据定时dump到磁盘上的RDB持久化)RDB持久化是指在指定的时间间隔内将内存中的数据集快照写入磁盘,实际操作过程是fork一个子进程,先将数据 ...

  10. ASP.NET Core2.2 多用户验证和授权

    asp.net core2.2 用户验证 和授权有很详细和特贴心的介绍,我感兴趣的主要是这两篇: cookie身份验证 基于角色的授权 我的项目有两类用户: 微信公众号用户,用户名为公众号的openi ...