ORDER BY today_used ASC' % (MAX_USED_TIMES)
python D:\pymine\clean\spider_map\get_bd_uid_rest_b.py python D:\pymine\clean\spider_map\get_bd_uid_rest.py python D:\pymine\clean\spider_map\get_bd_uid_28_other20_b.py #MAX_USED_TIMES = 1900
python D:\pymine\clean\spider_map\get_bd_uid_28_other20.py python D:\pymine\clean\spider_map\get_bd_uid.py python D:\pymine\clean\spider_map\get_bd_uid.py python D:\pymine\clean\spider_map\get_bd_uid.py

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST' db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db) pcity_list = []
pcity_file = '%s\\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
c_ = 0
for i in pf:
c_ += 1
if c_ == 3:
c_ = 0
pcity_list.append(i.replace(' ', '').replace('\n', '') + '市')
pcity_sorted_list = sorted(pcity_list) target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list_pass = target_city_list_big for i in pcity_list:
if i not in target_city_list_big:
target_city_list_pass.append(i) # def db_init_key_table():
# conn = sqlite3.connect(db)
# c = conn.cursor()
# sql = 'DELETE FROM baidu_map_key_used'
# c.execute(sql)
# conn.commit()
# pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
# with open(pcity_file, 'r', encoding='utf-8') as pf:
# c_ = 0
# for i in pf:
# if len(i) < 4:
# continue
# author, key = i.replace('\n', '').split('\t')
# localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
# sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
# author, key, localtime_, 0)
# c.execute(sql)
# conn.commit()
# conn.close() # db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11] def db_get_one_effective():
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES) res = c.execute(sql).fetchone()
if res is None:
return DB_KEY_EXHAUST
else:
return res[0]
conn.close def db_update_one_today_used(key):
conn = sqlite3.connect(db)
c = conn.cursor()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
localtime_, key)
c.execute(sql)
conn.commit()
conn.close() dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str) def chk_if_requested_file():
for f in requested_file_dir:
to_in = f.split('.txt')[0]
if to_in not in requested_file_list:
requested_file_list.append(to_in) chk_if_requested_file() def write_requested_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
# 上海市虹口区岳阳医院?.txt
fname = fname.replace('?', '')
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_)
print('ok', threading.get_ident(), request_name) def write_requested_exception_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
# 上海市虹口区岳阳医院?.txt
fname = fname.replace('?', '')
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_) request_dic = {} target_city_list = [] def gen_request_dic_list():
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
fname_open = '%s\\%s' % (curPath, fname_source)
FEXCEL = '%s%s' % (fname_open, '.xlsx')
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[0]
nrows, ncols = table.nrows, table.ncols
for i in range(1, nrows):
l = table.row_values(i)
dbid, area_code, name_, request_name, type_, city, district, addr, street = l
# if city not in target_city_list:
# continue
if city in target_city_list_pass:
continue
if city not in target_city_list:
target_city_list.append(city)
request_name_chk = '%s%s%s' % (city, district, request_name)
if request_name_chk in requested_file_list:
continue
if city not in request_dic:
request_dic[city] = {}
if district not in request_dic[city]:
request_dic[city][district] = {}
request_dic[city][district] = []
if request_name not in request_dic[city][district]:
request_dic[city][district].append(request_name) gen_request_dic_list() fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK' def fun_(city):
for district in request_dic[city]:
for request_name in request_dic[city][district]:
request_name_chk = '%s%s%s' % (city, district, request_name)
chk_if_requested_file()
if request_name_chk in requested_file_list:
continue
ak = db_get_one_effective()
if ak == DB_KEY_EXHAUST:
print(DB_KEY_EXHAUST)
break
else:
url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
try:
bd_res_json_str = requests.get(url_).text
db_update_one_today_used(ak)
write_requested_res(request_name_chk, bd_res_json_str)
except Exception:
bd_res_json_str = '请求百度-异常'
write_requested_exception_res(request_name_chk, bd_res_json_str)
print(bd_res_json_str) class MyThread(threading.Thread):
def __init__(self, func, args):
threading.Thread.__init__(self)
self.func, self.args = func, args def run(self):
self.func(self.args) thread_sum = len(target_city_list) def main():
threads_list = []
for nloop in range(0, thread_sum, 1):
city = target_city_list[nloop]
thread_instance = MyThread(fun_, (city))
threads_list.append(thread_instance)
for t in threads_list:
t.setDaemon = False
t.start()
for t in threads_list:
t.join() if __name__ == '__main__':
main()
import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST' db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db) pcity_list = []
pcity_file = '%s\\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
c_ = 0
for i in pf:
c_ += 1
if c_ == 3:
c_ = 0
pcity_list.append(i.replace(' ', '').replace('\n', '') + '市')
pcity_sorted_list = sorted(pcity_list) target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list = [] for i in pcity_list:
if i not in target_city_list_big:
target_city_list.append(i) # def db_init_key_table():
# conn = sqlite3.connect(db)
# c = conn.cursor()
# sql = 'DELETE FROM baidu_map_key_used'
# c.execute(sql)
# conn.commit()
# pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
# with open(pcity_file, 'r', encoding='utf-8') as pf:
# c_ = 0
# for i in pf:
# if len(i) < 4:
# continue
# author, key = i.replace('\n', '').split('\t')
# localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
# sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
# author, key, localtime_, 0)
# c.execute(sql)
# conn.commit()
# conn.close() # db_init_key_table()
target_city_list = target_city_list[11:] def db_get_one_effective():
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ' % (MAX_USED_TIMES)
res = c.execute(sql).fetchone()
if res is None:
return DB_KEY_EXHAUST
else:
return res[0]
conn.close def db_update_one_today_used(key):
conn = sqlite3.connect(db)
c = conn.cursor()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
localtime_, key)
c.execute(sql)
conn.commit()
conn.close() dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str) def chk_if_requested_file():
for f in requested_file_dir:
to_in = f.split('.txt')[0]
if to_in not in requested_file_list:
requested_file_list.append(to_in) chk_if_requested_file() def write_requested_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
# 上海市虹口区岳阳医院?.txt
fname = fname.replace('?', '')
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_)
print('ok', threading.get_ident(), request_name) def write_requested_exception_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
# 上海市虹口区岳阳医院?.txt
fname = fname.replace('?', '')
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_) request_dic = {} def gen_request_dic_list():
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
fname_open = '%s\\%s' % (curPath, fname_source)
FEXCEL = '%s%s' % (fname_open, '.xlsx')
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[0]
nrows, ncols = table.nrows, table.ncols
for i in range(1, nrows):
l = table.row_values(i)
dbid, area_code, name_, request_name, type_, city, district, addr, street = l
if city not in target_city_list:
continue
request_name_chk = '%s%s%s' % (city, district, request_name)
if request_name_chk in requested_file_list:
continue
if city not in request_dic:
request_dic[city] = {}
if district not in request_dic[city]:
request_dic[city][district] = {}
request_dic[city][district] = []
if request_name not in request_dic[city][district]:
request_dic[city][district].append(request_name) gen_request_dic_list() fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK' def fun_(city):
for district in request_dic[city]:
for request_name in request_dic[city][district]:
request_name_chk = '%s%s%s' % (city, district, request_name)
chk_if_requested_file()
if request_name_chk in requested_file_list:
continue
ak = db_get_one_effective()
if ak == DB_KEY_EXHAUST:
print(DB_KEY_EXHAUST)
break
else:
url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
try:
bd_res_json_str = requests.get(url_).text
db_update_one_today_used(ak)
write_requested_res(request_name_chk, bd_res_json_str)
except Exception:
bd_res_json_str = '请求百度-异常'
write_requested_exception_res(request_name_chk, bd_res_json_str)
print(bd_res_json_str) class MyThread(threading.Thread):
def __init__(self, func, args):
threading.Thread.__init__(self)
self.func, self.args = func, args def run(self):
self.func(self.args) thread_sum = len(target_city_list) def main():
threads_list = []
for nloop in range(0, thread_sum, 1):
city = target_city_list[nloop]
if city not in request_dic:
continue
thread_instance = MyThread(fun_, (city))
threads_list.append(thread_instance)
for t in threads_list:
t.setDaemon = False
t.start()
for t in threads_list:
t.join() if __name__ == '__main__':
main()
import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST' db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db) pcity_list = []
pcity_file = '%s\\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
c_ = 0
for i in pf:
c_ += 1
if c_ == 3:
c_ = 0
pcity_list.append(i.replace(' ', '').replace('\n', '') + '市')
pcity_sorted_list = sorted(pcity_list) target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list = [] for i in pcity_list:
if i not in target_city_list_big:
target_city_list.append(i) # def db_init_key_table():
# conn = sqlite3.connect(db)
# c = conn.cursor()
# sql = 'DELETE FROM baidu_map_key_used'
# c.execute(sql)
# conn.commit()
# pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
# with open(pcity_file, 'r', encoding='utf-8') as pf:
# c_ = 0
# for i in pf:
# if len(i) < 4:
# continue
# author, key = i.replace('\n', '').split('\t')
# localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
# sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
# author, key, localtime_, 0)
# c.execute(sql)
# conn.commit()
# conn.close() # db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11]
target_city_list =target_city_list[11:] def db_get_one_effective():
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES) res = c.execute(sql).fetchone()
if res is None:
return DB_KEY_EXHAUST
else:
return res[0]
conn.close def db_update_one_today_used(key):
conn = sqlite3.connect(db)
c = conn.cursor()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
localtime_, key)
c.execute(sql)
conn.commit()
conn.close() dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str) def chk_if_requested_file():
for f in requested_file_dir:
to_in = f.split('.txt')[0]
if to_in not in requested_file_list:
requested_file_list.append(to_in) chk_if_requested_file() def write_requested_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
# 上海市虹口区岳阳医院?.txt
fname = fname.replace('?', '')
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_)
print('ok', threading.get_ident(), request_name) def write_requested_exception_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
# 上海市虹口区岳阳医院?.txt
fname = fname.replace('?', '')
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_) request_dic = {} def gen_request_dic_list():
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
fname_open = '%s\\%s' % (curPath, fname_source)
FEXCEL = '%s%s' % (fname_open, '.xlsx')
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[0]
nrows, ncols = table.nrows, table.ncols
for i in range(1, nrows):
l = table.row_values(i)
dbid, area_code, name_, request_name, type_, city, district, addr, street = l
if city not in target_city_list:
continue
request_name_chk = '%s%s%s' % (city, district, request_name)
if request_name_chk in requested_file_list:
continue
if city not in request_dic:
request_dic[city] = {}
if district not in request_dic[city]:
request_dic[city][district] = {}
request_dic[city][district] = []
if request_name not in request_dic[city][district]:
request_dic[city][district].append(request_name) gen_request_dic_list() fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821' # http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK' def fun_(city):
for district in request_dic[city]:
for request_name in request_dic[city][district]:
request_name_chk = '%s%s%s' % (city, district, request_name)
chk_if_requested_file()
if request_name_chk in requested_file_list:
continue
ak = db_get_one_effective()
if ak == DB_KEY_EXHAUST:
print(DB_KEY_EXHAUST)
break
else:
url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
try:
bd_res_json_str = requests.get(url_).text
db_update_one_today_used(ak)
write_requested_res(request_name_chk, bd_res_json_str)
except Exception:
bd_res_json_str = '请求百度-异常'
write_requested_exception_res(request_name_chk, bd_res_json_str)
print(bd_res_json_str) class MyThread(threading.Thread):
def __init__(self, func, args):
threading.Thread.__init__(self)
self.func, self.args = func, args def run(self):
self.func(self.args) thread_sum = len(target_city_list) def main():
threads_list = []
for nloop in range(0, thread_sum, 1):
city = target_city_list[nloop]
thread_instance = MyThread(fun_, (city))
threads_list.append(thread_instance)
for t in threads_list:
t.setDaemon = False
t.start()
for t in threads_list:
t.join() if __name__ == '__main__':
main()
ORDER BY today_used ASC' % (MAX_USED_TIMES)的更多相关文章
- order by id asc得出的排序是什么原理
我们要用order by id asc得出的排序应该是,4,好了原理就这么简. sql实现方法,代码如下: : 代码如下: $sql ="Select 字段 from 表名 where id ...
- select * from (select P.*,ROWNUM RN FROM(select * from Mp_Relatedart where pubbaseid=785 order by ID ASC )P)M WHERE M.RN>2 and M.RN <= 7
select * from (select P.*,ROWNUM RN FROM(select * from Mp_Relatedart where pubbaseid=785 order by ID ...
- oracle的row_number() OVER (ORDER BY COL2 asc)和row_number() OVER (PARTITION BY COL1 ORDER BY COL2)的用法
转自:https://jingyan.baidu.com/article/9989c74604a644f648ecfef3.html SELECT ROW_NUMBER() OVER(PARTITIO ...
- json.dumps(i['bd_res'], ensure_ascii=False)
json.dumps(i['bd_res'], ensure_ascii=False) import xlrd import time import sys import os import requ ...
- csv .xlsx
def gen_file_data(fodir, fname, sheet_index=0, ): if fname.find('.xlsx') > -1: fname_open = '%s\\ ...
- rm_invalid_file
import xlrd import time import sys import os import requests import sqlite3 import threading curPath ...
- 谈谈MySQL中的降序索引 order by id DESC / ASC
今天这篇主要讲order by 语句中的多个字段asc desc的问题.mysql5中,索引存储的排序方式是ASC的,没有DESC的索引.现在能够理解为啥order by 默认是按照ASC来排序的了吧 ...
- Oracle查询排序asc/desc 多列 order by
查询结果的排序 显示EMP表中不同的部门编号. 如果要在查询的同时排序显示结果,可以使用如下的语句: SELECT 字段列表 FROM 表名 WHERE 条件 ORDER BY 字段名1 [ASC|D ...
- Oracle 查询语句(where,order by ,like,in,distinct)
select * from production;alter table production add productionprice number(7,2); UPDATE production s ...
随机推荐
- 对Linux文件权限的理解
755,775,777,ugoa 等分别代表什么含义?这些数字是如何得到的? 1.常用的linux文件权限: 444 -r--r--r-- 600 -rw------- 644 -rw-r--r-- ...
- 2016.6.20 tomcat端口始终被占用
我在使用tomcat7时,服务开启时,始终提示端口被占用. 但是从进程中又看不到开启的tomcat. 最后在资源监视器中,结束重复开启的tomcat. (注意是资源监视器,刚开始的时候看成管理器,怎么 ...
- [ElasticSearch]Java API 之 词条查询(Term Level Query)
1. 词条查询(Term Query) 词条查询是ElasticSearch的一个简单查询.它仅匹配在给定字段中含有该词条的文档,而且是确切的.未经分析的词条.term 查询 会查找我们设定的准确值 ...
- cocos2d-x ios游戏开发初认识(六) 渲染的优化
做程序开发肯定要考虑到内存的优化,毕竟iphone本身的内存就不是非常大.这一节主要说这个cocos2d开发对内存的优化,详细表如今,既能够对同样的精灵(图片)仅仅渲染一次,也能够对不能的精灵仅仅渲染 ...
- angular controller的一些用法
最近公司的项目是es6+angular.其中的代码格式还在逐步摸索中.感谢今天同事每天帮我解惑. 今天简单梳理一下controller的一些用法 之前看书所熟知的都是 这是最普通的一种 //html ...
- 【BIEE】02_新建资料库并创建简单分析
一.新建资料库 1.开始→打开BI管理→点击新建资料库 2.文件→新建资料档案库 下一步 在下面的框中一次填入 连接类型:OCI 10g/11g(直接选择即可) 数据库名称:(DESCRIPTION ...
- JavaScript 取数组最值的方法
1.用Math的max,min函数 var array = [10,2,3,4,5,6,30,8,9]; Math.max.apply(null,array); Math.min.apply(null ...
- 500 lines or less
今天碰到一本书 <500 lines or less>突然就想在博客上记录一下自己的阅读经历了. 现在记录一下这本书的地址 http://aosabook.org/en/index.htm ...
- LeetCode LinkList 23. Merge k Sorted Lists
这两天一直也没有顾上记录一下自己做过的题目,回头看看,感觉忘的好快,今天做了一个hard,刚开始觉得挺难得,想了两种方法,一种是每次都从k个list中选取最小的一个,为空的直接跳过,再就是每次合并其中 ...
- sql CHARINDEX() 与 PATINDEX() LEN() substring() COLLATE RAISERROR
CHARINDEX() 在一个表达式中搜索另一个表达式并返回其起始位置(如果找到). CHARINDEX ( expressionToFind , expressionToSearch [ , st ...