文章编辑器 文本替换 操作dom 发帖 富文本 今日头条发布富文本的实现 键盘化的html
js 修改 iframe
it=document.getElementById('ueditor_0').contentWindow.document.getElementsByTagName("body")[0];
it.innerHTML='<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
- from selenium import webdriver
- from time import sleep
- import time
- from selenium.webdriver.common.keys import Keys
- import os
- import requests
- import time
- import threading
- import logging
- import random
- start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
- os_sep = os.sep
- this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
- -1]
- logf = this_file_name + '.log'
- try:
- logging.basicConfig(level=logging.INFO,
- format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
- datefmt='%a, %d %b %Y %H:%M:%S',
- filename=logf,
- filemode='a')
- except Exception as e:
- s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
- with open(logf, 'a') as fo:
- fo.write(s)
- print(s)
- os._exit(4002)
- logging.info('START')
- img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png'
- img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\'
- def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'):
- r = '%s%s' % (img_dir, local_default)
- try:
- bytes = requests.get(img_url)._content
- r = '%s%s%s%s%s' % (
- img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
- img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=',
- '_fxlequal_').replace(
- '&', '_fxland_'), '.png')
- if bytes != 0:
- with open(r, 'wb')as f:
- f.write(bytes)
- except Exception as e:
- print(e)
- return r
- import pymysql
- h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke_', 'star_media_helper'
- def mysql_fetch(sql, res_type='tuple'):
- global h, pt, u, p, db
- try:
- conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
- except Exception as e:
- print(e)
- return ()
- if res_type == 'dic':
- cursor = conn.cursor(pymysql.cursors.DictCursor)
- else:
- cursor = conn.cursor()
- cursor.execute(sql)
- conn.commit()
- cursor.close()
- conn.close()
- return cursor.fetchall()
- def mysql_write(sql):
- global h, pt, u, p, db
- try:
- conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
- except Exception as e:
- print(e)
- return 1
- cursor = conn.cursor()
- cursor.execute(sql)
- conn.commit()
- cursor.close()
- conn.close()
- return 0
- import random
- while True:
- logging.info('LOOP----')
- sql = 'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )'
- sql = 'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )'
- res = mysql_fetch(sql)
- ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res]
- for ac in ac_l:
- myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid']
- # 发布限制条件逻辑
- sql = "SELECT * FROM joke__star_helper_relation_wukong_question WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format(
- toutiao_uid, int(time.time()));
- sql = "SELECT * FROM joke__helper_article_publish WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format(
- toutiao_uid, int(time.time()));
- print(sql)
- logging.info(sql)
- res_content = mysql_fetch(sql, 'dic')
- if len(res_content) == 0:
- continue
- id_article_list = [i['id_article_list'] for i in res_content]
- sql = 'SELECT * FROM joke__helper_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke__helper_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format(
- ','.join([i['id_article_list'] for i in res_content]), toutiao_uid)
- # sql = 'SELECT * FROM joke__star_helper_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke__star_helper_toutiaouser_wukong_question) LIMIT 1'
- logging.info(sql)
- res_content = mysql_fetch(sql, 'dic')
- if len(res_content) == 0:
- continue
- browser = webdriver.Chrome()
- f_url_l = ['https://www.toutiao.com/group/1589657566362638/',
- 'https://www.wukong.com/question/6388670742287876353/',
- 'https://www.wukong.com/tag/6215497898671475202/']
- f_url_l += ['https://www.wukong.com/question/6512777037948649741/',
- 'https://www.wukong.com/question/6469247721038414093/',
- 'https://www.wukong.com/question/6481502080249889037/']
- # f_url_l = []
- f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514661446876398088/',
- 'https://www.toutiao.com/a6514778729951003150/']
- f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/',
- 'https://www.toutiao.com/a6513334304318161411/']
- f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
- # browser.get(random.choice(f_url_l))
- browser.get(f_url_l_a)
- time.sleep(random.randint(10, 20))
- js = 'window.location.href="https://sso.toutiao.com/login/";'
- js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";'
- browser.execute_script(js)
- time.sleep(random.randint(10, 20))
- # js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";'
- browser.execute_script(js)
- ac_type = 'qq'
- if ac_type == 'qq':
- myid, mypwd = ac['u'], ac['p']
- xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]'
- browser.find_element_by_xpath(xp).click()
- time.sleep(10)
- js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"')
- browser.execute_script(js)
- js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"')
- browser.execute_script(js)
- time.sleep(random.randint(5, 15))
- xp_newpage = '//*[@id="go"]'
- browser.find_element_by_xpath(xp_newpage).click()
- time.sleep(random.randint(10, 20))
- elif ac_type == 'mail_qq':
- continue
- time.sleep(5)
- browser.refresh()
- js = 'window.location.href="https://www.toutiao.com/";'
- browser.execute_script(js)
- browser.refresh()
- time.sleep(6)
- js = 'window.location.href="https://www.wukong.com/";'
- js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";'
- js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
- browser.execute_script(js)
- time.sleep(6)
- # title
- js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"')
- js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万')
- browser.execute_script(js)
- time.sleep(2)
- fhtml, dbhtml_str = 'toutaio.db.html', ''
- with open(fhtml, 'r', encoding='utf-8') as fr:
- for hi in fr:
- dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', ''))
- db_html = dbhtml_str
- # db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
- js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(
- db_html)
- browser.execute_script(js)
- time.sleep(2)
- xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
- xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
- browser.find_element_by_xpath(xp).click()
- dd = 9
- # js = 'document.getElementsByClassName("ask")[0].click();'
- # browser.execute_script(js)
- # time.sleep(12)
- # time.sleep(random.randint(10, 20))
- # # 需要键盘事件 反爬虫
- # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input')
- # tmp_target.send_keys(Keys.SPACE)
- # tmp_target.send_keys(Keys.CONTROL, 'a')
- # tmp_target.send_keys(Keys.CONTROL, 'x')
- # tmp_target.send_keys(Keys.CONTROL, 'v')
- # tmp_target.send_keys(Keys.BACK_SPACE)
- # time.sleep(random.randint(10, 20))
- # res_content = []
- for i in res_content[0:1]:
- dbid, content, img_list = i['id'], i['content'], i['img_list']
- tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏']
- tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生']
- tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格']
- s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么',
- random.choice(tmp_l_2), '的', random.choice(tmp_l))
- js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s)
- browser.execute_script(js)
- time.sleep(12)
- #
- # tmp_target.send_keys(Keys.SPACE)
- js = 'document.getElementsByClassName("step-btn next")[0].click();'
- browser.execute_script(js)
- # step-btn submit
- js = 'document.getElementsByClassName("step-btn submit")[0].click();'
- browser.execute_script(js)
- time.sleep(12)
- #
- js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid)
- browser.execute_script(js)
- time.sleep(12)
- res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[
- 0].get_attribute('href')
- # print(i)
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea'
- # try:
- # browser.find_element_by_xpath(xp_newpage)
- # except Exception as e:
- # print(e)
- # break
- # browser.find_element_by_xpath(xp_newpage).click()
- # words = content
- # # Message: SyntaxError: unterminated string literal
- # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘')
- # # Message: SyntaxError: missing ; before statement
- # mytxt = mytxt.replace("'", '‘')
- # # 2000 头条
- # mytxt = mytxt[0:2000]
- # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt
- #
- # # 需要键盘事件 反爬虫
- # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a')
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x')
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v')
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
- # time.sleep(random.randint(2, 5))
- #
- # try:
- # # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"')
- # # browser.execute_script(js)
- # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"')
- # browser.execute_script(js)
- # time.sleep(3)
- # except Exception as jse:
- # print('.getElementsByTagName("textarea")--log-', jse)
- # continue
- #
- # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span'
- # browser.find_element_by_xpath(xp_newpage).click()
- # time.sleep(3)
- # try:
- # upload = browser.find_element_by_id('fileElem')
- #
- # logs_img = ''
- # img_url_list = img_list.split(',')
- #
- # for imgid in img_url_list:
- # img_url = 'http://192.168.2.212:83/file/get?type=star_helper&id=199'.replace('199', str(imgid))
- # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
- # local_default='default.DONOT_REMOVE.png')
- # print(local_img_path)
- # time.sleep(random.randint(2, 4))
- # logs_img += img_url
- # logs_img += local_img_path
- # upload.send_keys(local_img_path)
- # time.sleep(random.randint(3, 7))
- # except Exception as ee:
- # img_url_default = ''
- # img_url = img_url_default
- # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
- # local_default='default.DONOT_REMOVE.png')
- # sleep(2)
- # logs_img += img_url
- # logs_img += local_img_path
- # # upload.send_keys(local_img_path)
- # logging.exception(ee)
- #
- # try:
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul'
- # browser.find_element_by_xpath(xp_newpage).click()
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a'
- # browser.find_element_by_xpath(xp_newpage).click()
- #
- # time.sleep(random.randint(8, 20))
- # js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"'
- # browser.execute_script(js)
- #
- # time.sleep(random.randint(2, 5))
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a'
- # browser.find_element_by_xpath(xp_newpage).click()
- # time.sleep(random.randint(3, 6))
- # url_curr = browser.current_url
- #
- # with open('toutiao_success.log', 'a', encoding='utf-8') as f:
- # logs = '%s%s%s%s%s\n' % (
- # time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
- # logs_img)
- # print(logs)
- # f.write(logs)
- sql = 'INSERT INTO joke__helper_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % (
- dbid, res_url, int(time.time()), toutiao_uid)
- mysql_write(sql)
- print(sql)
- time.sleep(random.randint(20, 30))
- js = 'window.location.href="https://www.wukong.com/"'
- js = 'window.location.href="https://www.toutiao.com/"'
- browser.execute_script(js)
- # except Exception as e_url_jump:
- # print('e_url_jump', e_url_jump)
- try:
- browser.quit()
- except Exception as e1:
- print(e1)
- logging.exception(e1)
- time.sleep(random.randint(120, 300))
- xp_newpage = '//*[@id="title"]'
- mytxt = '林志玲捐款记录被翻出 单笔高达千万'
- browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
- <div class='article' id='artibody'>
- <div class='img_wrapper'>
- <img alt='林志玲' src='http://n.sinaimg.cn/ent/transform/703/w253h450/20180416/77p2-fzcyxmv1344655.jpg'>
- <span class='img_descr'>林志玲</span>
- </div>
- <div class='img_wrapper'>
- <img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/CVWm-fzcyxmv1342897.jpg'>
- <span class='img_descr'>林志玲捐款记录</span>
- </div>
- <div class='img_wrapper'>
- <img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/hXMn-fzcyxmv1342914.jpg'>
- <span class='img_descr'>林志玲捐款记录</span>
- </div> <!--video-list-->
- <div class='video-2017' id='videoList0'></div>
- <!--/video-list-->
- <p> 新浪娱乐讯 据台湾媒体报道,林志玲
- 自出道以来,热心公益,甚至创立了自己的基金会,每年固定发行公益年历。近日明星从事公益的话题发烧,她也被网友挖出,几乎每个月都在转帐捐款,且其中一笔高达1000万人民币,更让网友惊呼连连。
- </p>
- <div id='ad_44124' class='otherContent_01'
- style='display: block; margin: 10px 20px 10px 0px; float: left; overflow: hidden; clear: both; padding: 4px; width: 300px; height: 250px;'>
- </div>
- <p>
- 林志玲被网友翻出,2016年至2018年间的捐款纪录,几乎每个月都有记录,且最低都是人民币万元起跳,其中甚至有一笔高达1000万人民币,捐款项目是“筑巢行动”,不少人看到明细,都惊讶表示,原来志玲姐姐私下默默捐了这么多善款,还有人笑称:“她是不是拿着手机,无聊就转帐的那种人?”、“真的人美心也美”、“太圈粉了”、“志玲姐姐真的太低调了”。</p>
- <p>
- 43岁的林志玲1998年出道,从伸展台转战影视圈,尚未出名前就热心公益,更在2011年,主动以个人名义,成立“志玲姐姐慈善基金会”。她也固定每年拍摄公益写真年历,所得全数捐给儿福机构,或是帮助弱势孩童急难救助等,多年从不间断,且义卖期间,她从不公开做宣传,低调行善,受到不少人赞赏。</p>
- <p>
- 林志玲2016年受访曾透露,投入公益的契机,是因为身边罹癌友人的一句话,才让她下定决心。当时这位好友问她:“你希望离开后,怎样被大家记得?”她想了一想,认为既然是公众人物,就应该让大家记得自己微笑的样子,要用这样的身分,做些有影响力的事,从此将公益当做自我赋予的使命,一做就是好多年。ETtoday/文</p>
- <p class='article-editor'>(责编:kita)</p>
- <div style='font-size: 0px; height: 0px; clear: both;'></div>
- </div>
- <!-- 非定向300*250按钮 end -->
- </div>
- from selenium import webdriver
- from time import sleep
- import time
- from selenium.webdriver.common.keys import Keys
- import os
- import requests
- import time
- import threading
- import logging
- import random
- start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
- os_sep = os.sep
- this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
- -1]
- logf = this_file_name + '.log'
- try:
- logging.basicConfig(level=logging.INFO,
- format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
- datefmt='%a, %d %b %Y %H:%M:%S',
- filename=logf,
- filemode='a')
- except Exception as e:
- s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
- with open(logf, 'a') as fo:
- fo.write(s)
- print(s)
- os._exit(4002)
- logging.info('START')
- img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png'
- img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\'
- def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'):
- r = '%s%s' % (img_dir, local_default)
- try:
- bytes = requests.get(img_url)._content
- r = '%s%s%s%s%s' % (
- img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
- img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=',
- '_fxlequal_').replace(
- '&', '_fxland_'), '.png')
- if bytes != 0:
- with open(r, 'wb')as f:
- f.write(bytes)
- except Exception as e:
- print(e)
- return r
- import pymysql
- h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke', 'star_media_joke'
- def mysql_fetch(sql, res_type='tuple'):
- global h, pt, u, p, db
- try:
- conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
- except Exception as e:
- print(e)
- return ()
- if res_type == 'dic':
- cursor = conn.cursor(pymysql.cursors.DictCursor)
- else:
- cursor = conn.cursor()
- cursor.execute(sql)
- conn.commit()
- cursor.close()
- conn.close()
- return cursor.fetchall()
- def mysql_write(sql):
- global h, pt, u, p, db
- try:
- conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
- except Exception as e:
- print(e)
- return 1
- cursor = conn.cursor()
- cursor.execute(sql)
- conn.commit()
- cursor.close()
- conn.close()
- return 0
- import random
- while True:
- logging.info('LOOP----')
- sql = 'SELECT username,password,toutiaoid FROM joke_star_joke_joke_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )'
- sql = 'SELECT username,password,toutiaoid FROM joke_star_joke_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )'
- res = mysql_fetch(sql)
- ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res]
- for ac in ac_l:
- myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid']
- # 发布限制条件逻辑
- sql = "SELECT * FROM joke_star_joke_relation_wukong_question WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format(
- toutiao_uid, int(time.time()));
- sql = "SELECT * FROM joke_joke_article_publish WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format(
- toutiao_uid, int(time.time()));
- print(sql)
- logging.info(sql)
- res_content = mysql_fetch(sql, 'dic')
- if len(res_content) == 0:
- continue
- id_article_list = [i['id_article_list'] for i in res_content]
- sql = 'SELECT * FROM joke_joke_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke_joke_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format(
- ','.join([i['id_article_list'] for i in res_content]), toutiao_uid)
- # sql = 'SELECT * FROM joke_star_joke_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke_star_joke_toutiaouser_wukong_question) LIMIT 1'
- logging.info(sql)
- res_content = mysql_fetch(sql, 'dic')
- if len(res_content) == 0:
- continue
- browser = webdriver.Chrome()
- f_url_l = ['https://www.toutiao.com/group/1589657566362638/',
- 'https://www.wukong.com/question/6388670742287876353/',
- 'https://www.wukong.com/tag/6215497898671475202/']
- f_url_l += ['https://www.wukong.com/question/6512777037948649741/',
- 'https://www.wukong.com/question/6469247721038414093/',
- 'https://www.wukong.com/question/6481502080249889037/']
- # f_url_l = []
- f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514778729951003150/']
- f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/',
- 'https://www.toutiao.com/a6513334304318161411/']
- f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
- # browser.get(random.choice(f_url_l))
- browser.get(f_url_l_a)
- time.sleep(random.randint(10, 20))
- js = 'window.location.href="https://sso.toutiao.com/login/";'
- js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";'
- browser.execute_script(js)
- time.sleep(random.randint(10, 20))
- # js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";'
- browser.execute_script(js)
- ac_type = 'qq'
- if ac_type == 'qq':
- myid, mypwd = ac['u'], ac['p']
- xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]'
- browser.find_element_by_xpath(xp).click()
- time.sleep(10)
- js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"')
- browser.execute_script(js)
- js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"')
- browser.execute_script(js)
- time.sleep(random.randint(5, 15))
- xp_newpage = '//*[@id="go"]'
- browser.find_element_by_xpath(xp_newpage).click()
- time.sleep(random.randint(10, 20))
- elif ac_type == 'mail_qq':
- continue
- time.sleep(5)
- browser.refresh()
- js = 'window.location.href="https://www.toutiao.com/";'
- browser.execute_script(js)
- browser.refresh()
- time.sleep(6)
- js = 'window.location.href="https://www.wukong.com/";'
- js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";'
- js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
- browser.execute_script(js)
- time.sleep(6)
- # title
- # js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"')
- # js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万')
- # browser.execute_script(js)
- xp_newpage = '//*[@id="title"]'
- mytxt = '林志玲捐款记录被翻出 单笔高达千万'
- browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
- time.sleep(2)
- fhtml, dbhtml_str = 'toutaio.db.html', ''
- with open(fhtml, 'r', encoding='utf-8') as fr:
- for hi in fr:
- dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', ''))
- xp = '//*[@id="edui18_body"]/div[1]'
- # //*[@id="edui18_body"]/div[1]
- browser.find_element_by_xpath(xp).click()
- time.sleep(2)
- # //*[@id="images"]/div[1]/div
- xp = '//*[@id="images"]/div[1]/div'
- xp = '//*[@id="images"]/div[1]/div/span'
- browser.find_element_by_xpath(xp).click()
- time.sleep(1)
- db_html = dbhtml_str
- # db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
- js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(
- db_html)
- browser.execute_script(js)
- xp = '//*[@id="edui18_body"]/div[1]'
- # //*[@id="edui18_body"]/div[1]
- browser.find_element_by_xpath(xp).click()
- time.sleep(2)
- # //*[@id="images"]/div[1]/div
- xp = '//*[@id="images"]/div[1]/div'
- xp = '//*[@id="images"]/div[1]/div/span'
- browser.find_element_by_xpath(xp).click()
- time.sleep(1)
- # xp='//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[2]/div[1]/div[2]/i'
- # browser.find_element_by_xpath(xp)
- # xp='//*[@id="pgc-text-img"]/div/div[1]/div[1]'
- # browser.find_element_by_xpath(xp)
- xp = '//*[@id="pgc-text-img"]/div/div[2]/div/button[1]'
- browser.find_element_by_xpath(xp)
- xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div/div/label[3]/div/input'
- browser.find_element_by_xpath(xp)
- time.sleep(2)
- time.sleep(2)
- xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
- xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
- browser.find_element_by_xpath(xp).click()
- dd = 9
- # js = 'document.getElementsByClassName("ask")[0].click();'
- # browser.execute_script(js)
- # time.sleep(12)
- # time.sleep(random.randint(10, 20))
- # # 需要键盘事件 反爬虫
- # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input')
- # tmp_target.send_keys(Keys.SPACE)
- # tmp_target.send_keys(Keys.CONTROL, 'a')
- # tmp_target.send_keys(Keys.CONTROL, 'x')
- # tmp_target.send_keys(Keys.CONTROL, 'v')
- # tmp_target.send_keys(Keys.BACK_SPACE)
- # time.sleep(random.randint(10, 20))
- # res_content = []
- for i in res_content[0:1]:
- dbid, content, img_list = i['id'], i['content'], i['img_list']
- tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏']
- tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生']
- tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格']
- s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么',
- random.choice(tmp_l_2), '的', random.choice(tmp_l))
- js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s)
- browser.execute_script(js)
- time.sleep(12)
- #
- # tmp_target.send_keys(Keys.SPACE)
- js = 'document.getElementsByClassName("step-btn next")[0].click();'
- browser.execute_script(js)
- # step-btn submit
- js = 'document.getElementsByClassName("step-btn submit")[0].click();'
- browser.execute_script(js)
- time.sleep(12)
- #
- js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid)
- browser.execute_script(js)
- time.sleep(12)
- res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[
- 0].get_attribute('href')
- # print(i)
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea'
- # try:
- # browser.find_element_by_xpath(xp_newpage)
- # except Exception as e:
- # print(e)
- # break
- # browser.find_element_by_xpath(xp_newpage).click()
- # words = content
- # # Message: SyntaxError: unterminated string literal
- # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘')
- # # Message: SyntaxError: missing ; before statement
- # mytxt = mytxt.replace("'", '‘')
- # # 2000 头条
- # mytxt = mytxt[0:2000]
- # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt
- #
- # # 需要键盘事件 反爬虫
- # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a')
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x')
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v')
- # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
- # time.sleep(random.randint(2, 5))
- #
- # try:
- # # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"')
- # # browser.execute_script(js)
- # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"')
- # browser.execute_script(js)
- # time.sleep(3)
- # except Exception as jse:
- # print('.getElementsByTagName("textarea")--log-', jse)
- # continue
- #
- # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span'
- # browser.find_element_by_xpath(xp_newpage).click()
- # time.sleep(3)
- # try:
- # upload = browser.find_element_by_id('fileElem')
- #
- # logs_img = ''
- # img_url_list = img_list.split(',')
- #
- # for imgid in img_url_list:
- # img_url = 'http://192.168.2.212:83/file/get?type=star_joke&id=199'.replace('199', str(imgid))
- # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
- # local_default='default.DONOT_REMOVE.png')
- # print(local_img_path)
- # time.sleep(random.randint(2, 4))
- # logs_img += img_url
- # logs_img += local_img_path
- # upload.send_keys(local_img_path)
- # time.sleep(random.randint(3, 7))
- # except Exception as ee:
- # img_url_default = ''
- # img_url = img_url_default
- # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
- # local_default='default.DONOT_REMOVE.png')
- # sleep(2)
- # logs_img += img_url
- # logs_img += local_img_path
- # # upload.send_keys(local_img_path)
- # logging.exception(ee)
- #
- # try:
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul'
- # browser.find_element_by_xpath(xp_newpage).click()
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a'
- # browser.find_element_by_xpath(xp_newpage).click()
- #
- # time.sleep(random.randint(8, 20))
- # js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"'
- # browser.execute_script(js)
- #
- # time.sleep(random.randint(2, 5))
- # xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a'
- # browser.find_element_by_xpath(xp_newpage).click()
- # time.sleep(random.randint(3, 6))
- # url_curr = browser.current_url
- #
- # with open('toutiao_success.log', 'a', encoding='utf-8') as f:
- # logs = '%s%s%s%s%s\n' % (
- # time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
- # logs_img)
- # print(logs)
- # f.write(logs)
- sql = 'INSERT INTO joke_joke_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % (
- dbid, res_url, int(time.time()), toutiao_uid)
- mysql_write(sql)
- print(sql)
- time.sleep(random.randint(20, 30))
- js = 'window.location.href="https://www.wukong.com/"'
- js = 'window.location.href="https://www.toutiao.com/"'
- browser.execute_script(js)
- # except Exception as e_url_jump:
- # print('e_url_jump', e_url_jump)
- try:
- browser.quit()
- except Exception as e1:
- print(e1)
- logging.exception(e1)
- time.sleep(random.randint(120, 300))
- <img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" alt="pgc-image/152385934210854ceb909ec" _src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" buttonadded="true">
- '''
- <img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true">
- '''
- dbhtml_str_ = dbhtml_str
- img_n = dbhtml_str_.count('<img')
- s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">'
- s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">'
- # s = "<img onload='editor.fireEvent(\'contentchange\')' src='https://p1.pstatp.com/large/pgc-image/TTimgCode' _src='https://p1.pstatp.com/large/pgc-image/TTimgCode' alt='pgc-image/TTimgCode' buttonadded='true'>"
- ss = ''
- l = pgc_img_url_l_toutiao
- for i in range(img_n):
- if i == 0:
- p1 = dbhtml_str.index('<img ', 0)
- else:
- p1 = dbhtml_str.index('<img ', p1 + 3)
- tmp = '{}{}'.format(dbhtml_str[0:p1].replace('>', 'X'), dbhtml_str[p1:])
- p2 = tmp.index('>')
- ss = s.replace('TTimgCode', l[0].split('/')[-1])
- dbhtml_str = '{}{}{}'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:])
- del l[0]
- print('-----------------')
- print(dbhtml_str)
- time.sleep(2)
- js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
- browser.execute_script(js)
- time.sleep(6)
- xp_newpage = '//*[@id="title"]'
- mytxt = d['title']
- browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
- browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
- time.sleep(2)
- # SAVE NOT DEL
- """
- xp = '//*[@id="edui18_body"]/div[1]'
- # //*[@id="edui18_body"]/div[1]
- browser.find_element_by_xpath(xp).click()
- time.sleep(2)
- xp = '//*[@id="images"]/div[1]/div/span'
- browser.find_element_by_xpath(xp).click()
- time.sleep(3)
- '''
- 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(dbhtml_str.replace('onload="editor.fireEvent(\'contentchange\')"','').replace('"',"'").replace('\n',''))
- '''
- # 结合浏览器控制台,拼接符合语法的js字符串
- r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''}
- dbhtml_str_py_js = dbhtml_str
- for k in r_d:
- dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
- dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ')
- js = 'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"'.format(
- dbhtml_str_py_js)
- browser.execute_script(js)
- """
- # 激活编辑区
- browser.find_element_by_class_name('ql-container').click()
- # 进行上传图片圆圈操作
- browser.find_element_by_class_name('icon-pic_tool').click()
- # 激活目标上传口
- browser.find_element_by_class_name('tui-tab-list').find_elements_by_class_name('tui-tab')[-1].click()
- # 关闭上传口
- browser.find_element_by_class_name('tui-tab-panel-active').find_elements_by_class_name('tui-btn')[
- -1].click()
- # 结合浏览器控制台,拼接符合语法的js字符串
- r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''}
- dbhtml_str_py_js = dbhtml_str
- for k in r_d:
- dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
- dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ')
- #传入键盘化的html
- # document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML = "44"
- js = 'document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML ="{}"'.format(
- dbhtml_str_py_js)
- browser.execute_script(js)
文章编辑器 文本替换 操作dom 发帖 富文本 今日头条发布富文本的实现 键盘化的html的更多相关文章
- 【JavaScript实用技巧(二)】Js操作DOM(由问题引发的文章改版,新人大佬都可)
[JavaScript实用技巧(二)]Js操作DOM(由问题引发的文章改版,新人大佬都可!) 博客说明 文章所涉及的资料来自互联网整理和个人总结,意在于个人学习和经验汇总,如有什么地方侵权,请联系本人 ...
- Javascript - ExtJs - 整合百度文章编辑器
ExtJs - 整合百度文章编辑器(ExtJs UEditor) 第一步:去官网下载最新版本的UEditor,UEditor下载. 第二步:在编辑器根目录创建一个Extjs-Editor.js,录入以 ...
- 使用JavaScript操作DOM节点元素的常用方法(创建/删除/替换/复制等)
getElementById(id)这是通过id来访问某一元素,最常用的之一,例:<html><body><div id="myid">test ...
- JavaScript 操作DOM对象
1)JavaScript 操作DOM對象 1.DOM:是Document Object Model 的缩写,及文档对象模型 2.DOM通常分为三类:DOM Core(核心).HTML-DOM 和 ...
- jQuery操作DOM节点的方法总结
1.parent():获得当前匹配元素集合中每个元素的父元素,该方法只会向上一级对 DOM 树进行遍历 $('li.item-a').parent().css('background-color', ...
- *jquery操作DOM总结 (原创:最全、最系统、实例展示)
jquery操作DOM包括八个方面: 一:jquery对DOM节点的基本操作:二:jquery对DOM节点的CSS样式操作:三:jquery遍历DOM节点:四:jquery创建DOM节点:五:jque ...
- Angular开发实践(七): 跨平台操作DOM及渲染器Renderer2
在<Angular开发实践(六):服务端渲染>这篇文章的最后,我们也提到了在服务端渲染中需要牢记的几件事件,其中就包括不要使用window. document. navigator等浏览器 ...
- jQuery——操作DOM
所谓Web体验,就是Web服务器与Web浏览器之间的合作.过去,都是由服务器生成HTML文档,然后浏览器负责解释并显示该文档.后来,我们可以用CSS技术来动态修改页面的外观. ###操作属性 jQue ...
- Javascript操作DOM常用API总结
基本概念 在讲解操作DOM的api之前,首先我们来复习一下一些基本概念,这些概念是掌握api的关键,必须理解它们. Node类型 DOM1级定义了一个Node接口,该接口由DOM中所有节点类型实现.这 ...
随机推荐
- 还是关于编码——decode & encode的探究
最近被py3.4中的编码折磨的不要不要的,decode & encode的使用.功能貌似在2.7—3.0有一个巨大的变化.网上查询的一些解答很多是基于2.7中的unicode功能,给出的解答是 ...
- 6.11 将分割数据转换为多值IN列表
问题 已经有了分隔数据,想要将其转换为WHERE子句IN列表中的项目.考虑下面的字符串: 7654,7698,7782,7788 要将该字符串用在WHERE子句中,但是下面的SQL语句是错误的,因为E ...
- parsley之验证属性设置
parsley.js添加表单验证功能,直接在html元素中添加对应属性: Name API Description Required #2.0必填 required HTML5 data-parsle ...
- Intel Processor Exception Handling
当一个进程的行为超出预期时,系统将把它kill掉. On Intel IA-32 and Intel 64 architecture processors, each architecturally- ...
- Vue实战之插件 sweetalert 的使用
安装npm install sweetalert2@7.15.1 --save 封装 sweetalertimport swal from 'sweetalert2' export default { ...
- vue基础---介绍
(1)声明式渲染 Vue.js 的核心是采用简洁的模板语法来声明式地将数据渲染进 DOM 的系统: ①文本 <div id="app"> {{ message }} & ...
- vuecli3.x config
module.exports = { // 基本路径 baseUrl: process.env.NODE_ENV === 'production' ? '/' : '/', // 输出文件目录 out ...
- 我的第一次"闭包"应用
结论: 闭包可以当作强类型语言如C++.Java的全局变量使用,非常巧妙 需求: ssm项目,使用pagehelper分页,在写前一页.后一页.第一页.最后一页等页面跳转时,遇到了问题,如果查询全部的 ...
- Crossword Answers UVA - 232
题目大意 感觉挺水的一道题.找出左面右面不存在或者是黑色的格子的白各,然后编号输出一横向单词和竖向单词(具体看原题) 解析 ①找出各个格子的编号 ②对每个节点搜索一下 ③输出的时候注意最后一个数据后面 ...
- ORACLE 查看当前用户信息(用户,表视图,索引,表空间,同义词,存储过程,约束条件)
1.用户 查看当前用户的缺省表空间 SQL>select username,default_tablespace from user_users; 查看当前用户的角色 SQL>select ...