我的代码-data pulling
# coding: utf-8
import datetime
import time
from sqlalchemy.engine import create_engine
from sqlalchemy.pool import NullPool
import pyodbc
import pandas as pd
import numpy as np
import shutil
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
conn = create_engine("mssql+pyodbc://sedaldr:s1e6daldr@MSEDA101\MSEDA/SSMC_IEDA_DEV2?driver=ODBC+Driver+11+for+SQL+Server",connect_args={'connect_timeout':15},poolclass=NullPool)
def main():
#examples for normalizer
def encode_eqpid(eqpid):
return int(eqpid[-2:])-1
def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1
def encode_wafer(wafer):
if wafer > 0:
return wafer-1
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#data=data.drop(columns=['lotid','defect_count','stage','Recipie_Name','finishtime'])
#data=data.drop(columns=['eqpid','Chamber','slotid'])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
#pulling data
now = datetime.datetime.now()
print("1st time to pull the data",now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5df = now-datetime.timedelta(minutes=5)
now_5af = now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5df.year,now_5df.month,now_5df.day,now_5df.hour,now_5df.minute,now_5df.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
#put SQL query part as string
query_part1 = "SELECT DISTINCT b.eqpid, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +1,1) as Chamber, b.lotid,a.slotid, a.waferid, LEFT(b.ParameterName, CHARINDEX('-', b.ParameterName) -1) as Param_Name, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +3,+1) as Step, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +5, LEN( b.parametername)) as Recipie_Name, b.parametername, a.[data] as data1,a.finishtime,a.ooc,a.oos FROM [SSMC_RTM].[dbo].[rtm_tbl_massdata] a INNER JOIN [SSMC_RTM].[dbo].[rtm_tbl_datahist] b ON a.[datahist_fno]=b.[fno] WHERE b.parametername LIKE '%-4-%' AND b.eqpid LIKE 'AEM2%' AND a.finishtime BETWEEN"
query_part2 = "AND b.parametername NOT LIKE '%-$' AND( b.parametername LIKE 'ETCM_PHA4_A%' OR b.parametername LIKE 'ETCM_PHA4_B%' OR b.parametername LIKE 'ETCM_PHB4_A%' OR b.parametername LIKE 'ETCM_PHB4_B%' OR b.parametername LIKE 'ETCM_PHC4_A%' OR b.parametername LIKE 'ETCM_PHC4_B%' OR b.parametername LIKE 'HELK_MEAN_A%' OR b.parametername LIKE 'HELK_MEAN_B%' OR b.parametername LIKE 'LOWERCHM_PRESS_A%' OR b.parametername LIKE 'LOWERCHM_PRESS_B%' OR b.parametername LIKE 'PBK4_A%' OR b.parametername LIKE 'PBK4_B%' OR b.parametername LIKE 'RR23_MEAN_A%' OR b.parametername LIKE 'RR23_MEAN_B%' OR b.parametername LIKE 'RR23_MAX._A%' OR b.parametername LIKE 'RR23_MAX._B%' OR b.parametername LIKE 'RR13_MEAN_A%' OR b.parametername LIKE 'RR13_MEAN_B%' OR b.parametername LIKE 'RR13_MAX._A%' OR b.parametername LIKE 'RR13_MAX._B%' OR b.parametername LIKE 'THR3_MAX._A%' OR b.parametername LIKE 'THR3_MAX._B%' OR b.parametername LIKE 'THR3_MAX._DIFF_A%' OR b.parametername LIKE 'THR3_MAX._DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_A%' OR b.parametername LIKE 'THR3_MEAN_B%' OR b.parametername LIKE 'THR3_MEAN_DIFF_A%' OR b.parametername LIKE 'THR3_MEAN_DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_A%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_B%' )"
#sum 3 strings to get query string
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
query_sum
#run SQL query by python
data=pd.read_sql_query(query_sum,conn)
#save the dataset
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('1st time to pull the data successfully')
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
var=1
while var==1:
for i in range(999999):
print ('%d time try to pull the data today'%(i+2))
# Decide if the time equals to what we setting
correct_time=datetime.datetime.strptime(now_5af_string,'%m-%d %H:%M:%S')
mth = now_5af.month
day = now_5af.day
hour = now_5af.hour
mins = now_5af.minute
print('5 mins after last start:',day,hour,mins)
lastfi_time1 = datetime.datetime.strptime(last_fini_time1,'%m-%d %H:%M:%S')
delta= correct_time - lastfi_time1
print('delta=',delta)
if delta.days == 0:
x=1
while x==1:
now= datetime.datetime.now()
if now.hour == hour and now.minute == mins and now.day == day:
break
# if not equal, sleep 20 seconds then start again
time.sleep(20)
print ('%d time try to pull the data today,test again...'%(i+2),now)
#run SQL query by python
print ('%d time try to pull the data is running,5mins'%(i+2),now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5bf = now-datetime.timedelta(minutes=5)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5bf.year,now_5bf.month,now_5bf.day,now_5bf.hour,now_5bf.minute,now_5bf.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
data=pd.read_sql_query(query_sum,conn)
else:
#run SQL query by python
print('%d time to pull the data is running,>5mins'%(i+2),now)
query_time = ''' '%s' AND '20%s' '''%(now_string,last_fini_time)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
now= datetime.datetime.now()
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
data=pd.read_sql_query(query_sum,conn)
#save the dataset
fo = open("time.txt", "w")
fo.write('')
fo.close()
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('%d time to pull the data successfully'%(i+2))
i=+1
#time loop setting
#mins_timing = 5*i
#mins = mins+(mins_timing-((mins_timing//60)*60))
#hour = hour+(mins//60)
#if mins >59:
#mins = mins-60
#else :
#a=1
#if mth != now.month:
#mth = now.month
#day = now.day
#else:
#day = day +(hour//24)
#hour = hour%24
#year=now.year
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
main()
import os
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "w")
fo.write('')
fo.close()
#fo = open("time.txt","r+")
#last_fini_time = fo.read()
#last_fini_time1= last_fini_time[3:17]
#fo.close()
#os.remove(path=r"C:\Users\sgg91044\time.txt")
now= datetime.datetime.now()
now_5af=now+datetime.timedelta(minutes=5)
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
query_time = ''' '18-%s' AND '%s' '''%(now_5af_string,last_fini_time)
query_time
import threading
def pulldata()
print('start to pull data')
print('son thread runs successfully')
if __name__ == '__main__':
print('we are in main thread ')
print('pulling data part run in son thread')
sub_thread = threading.Thread(target=pulldata)
print('set up son thread and run')
sub_thread.setDaemon(True)
sub_thread.start()
print('let main thread wait son thread for 3s')
sub_thread.join(3)
print('main thread finished')
我的代码-data pulling的更多相关文章
- iview table 勾选当前行代码 data key _checked: true
给 data 项设置特殊 key _checked: true 可以默认选中当前项
- JS魔法堂:Data URI Scheme介绍
一.前言 上周五公司内部的Any Topic Conf.上我和同事们分享了这个主题,有同事说这个有用,有同事说这个没啥用,后来还延伸到网站性能的话题上,大家讨论的激烈程度让我觉得这次选题还不错.本篇先 ...
- BaseHttpListActivity,几行代码搞定Android Http列表请求、加载和缓存
Android开发中,向服务器请求一个列表并显示是非常常见的需求,但实现起来比较麻烦,代码繁杂. 随着应用的更新迭代,这种需求越来越多,我渐渐发现了实现这种需求的代码的共同点. 于是我将Activit ...
- data:image/png;base64
大家可能注意到了,网页上有些图片的src或css背景图片的url后面跟了一大串字符,比如: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAkAAAAJ ...
- 通过data:image/png;base64把图片直接写在src里
从网上下了个源文件查看时候发现了引用图片的地址不是在本地上的,而是后面跟了一大串字符data:image/png;base64...查了一下资料分析如下: 关于用base64存储图片 网页上有些图片的 ...
- data:image/png;base64是什么
大家可能注意到了,网页上有些图片的src或css背景图片的url后面跟了一大串字符,比如: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAkAAAAJ ...
- html image -- data:image/png;base64
1, data:image/png;base64 <!DOCTYPE HTML> <html> <head> <meta http-equiv=" ...
- 【转】浏览器中的data类型的Url格式,data:image/png,data:image/jpeg!
所谓"data"类型的Url格式,是在RFC2397中 提出的,目的对于一些"小"的数据,可以在网页中直接嵌入,而不是从外部文件载入.例如对于img这个Tag, ...
- 使用Visual Studio 2010写Data Url生成工具C#版本
声明:本文系本人按照真实经历原创.未经许可,谢绝转载. 此文百度经验版本号:怎样用Visual Studio 2010打造Data Url生成工具 源代码下载:用Visual Studio 2010编 ...
随机推荐
- Python2.x与3.x对比
1.默认支持中文 2.不再兼容2.x 3.核心语法调整 4.新特性默认只支持3.x
- linux基础之grep
grep: Global search REgular expression and Print out the line 作用: 文本搜索工具,根据用户指定的模式对目标文本逐行进行匹配检查,打印匹配 ...
- Learning-Python【15】:内置函数
截止到Python版本3.6.2,一共为我们提供了68个内置函数.它们就是Python提供的直接可以拿来使用的所有函数. 这个表的顺序是按照首字母的排列顺序来的,都混乱的堆在一起.比如,oct和bin ...
- JS开发工具WebStorm使用快捷键
快捷键可以提高开发效率,最好用的就是这些! 代码编辑 Ctrl + d 复制整行 Ctrl + '-/+' 模块折叠 Ctrl + [ ] 括号匹配 Ctrl + F12 结构展示 Shif ...
- MySQL连接java
1.创建一个java project,建一个lib文件. 下载MySQL-connecor. 将下载好的文件打开,将mysql-connector-java-5.0.8-bin.jar文件粘贴复制到l ...
- appDesign
1原则 简约,实用,懒人模式 2模块划分 以实用目的,而非以工具
- ES6与ES5对比 模板字符串
var title = '你好' ES5 var tpl1 = '<div>' + '<span>' + title + '</span>' '</div&g ...
- 连手机logcat,出现read:unexpected EOF
使用logcat时,出现: 网上搜原因解释为log太多,普遍的解决方法是: adb logcat -G 20m 根本解决方法推荐:开发者设置,增大log size
- 详细解析HTML基础结构
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title> ...
- Docker创建Tomcat镜像
1.创建Dockerfile文件 vi Dockerfile Dockerfile内容 FROM debian ADD ./jdk-8u191-linux-x64.tar.gz /usr/local/ ...