#!/usr/bin/env python
# coding:utf-8
#__author__ = 'Logan'
  
 
import MySQLdb
import sys
import datetime
import time
  
class ClassMigrate(object):
  def _get_argv(self):
    self.usage = """
      usage():
      python daily_migration.py --source=192.168.1.4:3306/db_name:tab_name/proxy/password \\
                    --dest=192.168.1.150:13301/db_name_archive:tab_name_201601/proxy/password \\
                    --delete_strategy=delete --primary_key=auto_id --date_col=ut --time_interval=180
      """
    if len(sys.argv) == 1:
      print self.usage
      sys.exit(1)
    elif sys.argv[1] == '--help' or sys.argv[1] == '-h':
        print self.usage
        sys.exit()
    elif len(sys.argv) > 2:
      for i in sys.argv[1:]:
        _argv = i.split('=')
        if _argv[0] == '--source':
          _list = _argv[1].split('/')
          self.source_host = _list[0].split(':')[0]
          self.source_port = int(_list[0].split(':')[1])
          self.source_db = _list[1].split(':')[0]
          self.source_tab = _list[1].split(':')[1]
          self.source_user = _list[2]
          self.source_password = _list[3]
        elif _argv[0] == '--dest':
          _list = _argv[1].split('/')
          self.dest_host = _list[0].split(':')[0]
          self.dest_port = int(_list[0].split(':')[1])
          self.dest_db = _list[1].split(':')[0]
          self.dest_tab = _list[1].split(':')[1]
          self.dest_user = _list[2]
          self.dest_password = _list[3]
        elif _argv[0] == '--delete_strategy':
          self.deleteStrategy = _argv[1]
          if self.deleteStrategy not in ('delete', 'drop'):
            print (self.usage)
            sys.exit(1)
        elif _argv[0] == '--primary_key':
          self.pk = _argv[1]
        elif _argv[0] == '--date_col':
          self.date_col = _argv[1]
        elif _argv[0] == '--time_interval':
          self.interval = _argv[1]
        else:
          print (self.usage)
          sys.exit(1)
  
  def __init__(self):
    self._get_argv()
## --------------------------------------------------------------------
    self.sourcedb_conn_str = MySQLdb.connect(host=self.source_host, port=self.source_port, user=self.source_user, passwd=self.source_password, db=self.source_db, charset='utf8')
    self.sourcedb_conn_str.autocommit(True)
    self.destdb_conn_str = MySQLdb.connect(host=self.dest_host, port=self.dest_port, user=self.dest_user, passwd=self.dest_password, db=self.dest_db, charset='utf8')
    self.destdb_conn_str.autocommit(True)
## --------------------------------------------------------------------
    self.template_tab = self.source_tab + '_template'
    self.step_size = 20000
## --------------------------------------------------------------------
    self._migCompleteState = False
    self._deleteCompleteState = False
## --------------------------------------------------------------------
    self.source_cnt = ''
    self.source_min_id = ''
    self.source_max_id = ''
    self.source_checksum = ''
    self.dest_cn = ''
## --------------------------------------------------------------------
    self.today = time.strftime("%Y-%m-%d")
    # self.today = '2016-05-30 09:59:40'
 
def sourcedb_query(self, sql, sql_type):
    try:
      cr = self.sourcedb_conn_str.cursor()
      cr.execute(sql)
      if sql_type == 'select':
        return cr.fetchall()
      elif sql_type == 'dml':
        rows = self.sourcedb_conn_str.affected_rows()
        return rows
      else:
        return True
    except Exception, e:
      print (str(e) + "<br>")
      return False
    finally:
      cr.close()
 
  def destdb_query(self, sql, sql_type, values=''):
    try:
      cr = self.destdb_conn_str.cursor()
      if sql_type == 'select':
        cr.execute(sql)
        return cr.fetchall()
      elif sql_type == 'insertmany':
        cr.executemany(sql, values)
        rows = self.destdb_conn_str.affected_rows()
        return rows
      else:
        cr.execute(sql)
        return True
    except Exception, e:
      print (str(e) + "<br>")
      return False
    finally:
      cr.close()
  
 def create_table_from_source(self):
    '''''因为tab_name表的数据需要迁移到archive引擎表,所以不适合使用这种方式。 预留作其他用途。'''
    try:
      sql = "show create table %s;" % self.source_tab
      create_str = self.sourcedb_query(sql, 'select')[0][1]
      create_str = create_str.replace('CREATE TABLE', 'CREATE TABLE IF NOT EXISTS')
      self.destdb_query(create_str, 'ddl')
      return True
    except Exception, e:
      print (str(e) + "<br>")
      return False
 
  def create_table_from_template(self):
    try:
      sql = 'CREATE TABLE IF NOT EXISTS %s like %s;' % (self.dest_tab, self.template_tab)
      state = self.destdb_query(sql, 'ddl')
      if state:
        return True
      else:
        return False
    except Exception, e:
      print (str(e + "<br>") + "<br>")
      return False
 
  def get_min_max(self):
    """ 创建目标表、并获取源表需要迁移的总条数、最小id、最大id """
    try:
      print ("\nStarting Migrate at -- %s <br>") % (datetime.datetime.now().__str__())
      sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
           and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \
            % (self.pk, self.pk, self.source_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval)
      q = self.sourcedb_query(sql, 'select')
      self.source_cnt = q[0][0]
      self.source_min_id = q[0][1]
      self.source_max_id = q[0][2]
      self.source_checksum = str(self.source_cnt) + '_' + str(self.source_min_id) + '_' + str(self.source_max_id)
      if self.source_cnt == 0 or self.source_min_id == -1 or self.source_max_id == -1:
        print ("There is 0 record in source table been matched! <br>")
        return False
      else:
        return True
    except Exception, e:
      print (str(e) + "<br>")
      return False
  
  def migrate_2_destdb(self):
    try:
      get_min_max_id = self.get_min_max()
      if get_min_max_id:
        k = self.source_min_id
        desc_sql = "desc %s;" % self.source_tab
        # self.filed = []
        cols = self.sourcedb_query(desc_sql, 'select')
        # for j in cols:
        #   self.filed.append(j[0])
        fileds = "%s," * len(cols) # 源表有多少个字段,就拼凑多少个%s,拼接到insert语句
        fileds = fileds.rstrip(',')
        while k <= self.source_max_id:
          sql = """select * from %s where %s >= %d and %s< %d \
               and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """\
             % (self.source_tab, self.pk, k, self.pk, k+self.step_size, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval)
          print ("\n%s <br>") % sql
          starttime = datetime.datetime.now()
          results = self.sourcedb_query(sql, 'select')
          insert_sql = "insert into " + self.dest_tab + " values (%s)" % fileds
          rows = self.destdb_query(insert_sql, 'insertmany', results)
          if rows == False:
            print ("Insert failed!! <br>")
          else:
            print ("Inserted %s rows. <br>") % rows
          endtime = datetime.datetime.now()
          timeinterval = endtime - starttime
          print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds <br>")
          k += self.step_size
        print ("\nInsert complete at -- %s <br>") % (datetime.datetime.now().__str__())
        return True
      else:
        return False
    except Exception, e:
      print (str(e) + "<br>")
      return False
   
  def verify_total_cnt(self):
    try:
      sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
           and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \
            % (self.pk, self.pk, self.dest_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval)
      dest_result = self.destdb_query(sql, 'select')
      self.dest_cnt = dest_result[0][0]
      dest_checksum = str(self.dest_cnt) + '_' + str(dest_result[0][1]) + '_' + str(dest_result[0][2])
      print ("source_checksum: %s, dest_checksum: %s <br>") % (self.source_checksum, dest_checksum)
      if self.source_cnt == dest_result[0][0] and dest_result[0][0] != 0 and self.source_checksum == dest_checksum:
        self._migCompleteState = True
        print ("Verify successfully !!<br>")
      else:
        print ("Verify failed !!<br>")
        sys.exit(77)
    except Exception, e:
      print (str(e) + "<br>")
   
  def drop_daily_partition(self):
    try:
      if self._migCompleteState:
        sql = """explain partitions select * from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00')
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """\
             % (self.source_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval)
        partition_name = self.sourcedb_query(sql, 'select')
        partition_name = partition_name[0][3]
  
  
  
  
        sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s partition (%s)""" \
            % (self.pk, self.pk, self.source_tab, partition_name)
        q = self.sourcedb_query(sql, 'select')
        source_cnt = q[0][0]
        source_min_id = q[0][1]
        source_max_id = q[0][2]
        checksum = str(source_cnt) + '_' + str(source_min_id) + '_' + str(source_max_id)
        if source_cnt == 0 or source_min_id == -1 or source_max_id == -1:
          print ("There is 0 record in source PARTITION been matched! <br>")
        else:
          if checksum == self.source_checksum:
            drop_par_sql = "alter table %s drop partition %s;" % (self.source_tab, partition_name)
            droped = self.sourcedb_query(drop_par_sql, 'ddl')
            if droped:
              print (drop_par_sql + " <br>")
              print ("\nDrop partition complete at -- %s <br>") % (datetime.datetime.now().__str__())
              self._deleteCompleteState = True
            else:
              print (drop_par_sql + " <br>")
              print ("Drop partition failed.. <br>")
          else:
            print ("The partition %s checksum failed !! Drop failed !!") % partition_name
            sys.exit(77)
    except Exception, e:
      print (str(e) + "<br>")
  
  def delete_data(self):
    try:
      if self._migCompleteState:
        k = self.source_min_id
        while k <= self.source_max_id:
          sql = """delete from %s where %s >= %d and %s< %d \
               and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \
             % (self.source_tab, self.pk, k, self.pk, k+self.step_size, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval)
          print ("\n%s <br>") % sql
          starttime = datetime.datetime.now()
          rows = self.sourcedb_query(sql, 'dml')
          if rows == False:
            print ("Delete failed!! <br>")
          else:
            print ("Deleted %s rows. <br>") % rows
          endtime = datetime.datetime.now()
          timeinterval = endtime - starttime
          print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds <br>")
          time.sleep(1)
          k += self.step_size
        print ("\nDelete complete at -- %s <br>") % (datetime.datetime.now().__str__())
        self._deleteCompleteState = True
    except Exception, e:
      print (str(e) + "<br>")
        
  def do(self):
    tab_create = self.create_table_from_template()
    if tab_create:
      migration = self.migrate_2_destdb()
      if migration:
        self.verify_total_cnt()
        if self._migCompleteState:
          if self.deleteStrategy == 'drop':
            self.drop_daily_partition()
          else:
            self.delete_data()
          print ("\n<br>")
          print ("====="*5 + '<br>')
          print ("source_total_cnt: %s <br>") % self.source_cnt
          print ("dest_total_cnt: %s <br>") % self.dest_cnt
          print ("====="*5 + '<br>')
          if self._deleteCompleteState:
            print ("\nFinal result: Successfully !! <br>")
            sys.exit(88)
          else:
            print ("\nFinal result: Failed !! <br>")
            sys.exit(254)
    else:
      print ("Create table failed ! Exiting. . .")
      sys.exit(255)
   
f = ClassMigrate()
f.do()

每天迁移MySQL历史数据到历史库Python脚本的更多相关文章

  1. 从零开始学安全(三十五)●mysql 盲注手工自定义python脚本

    import requests import string #mysql 手动注入 通用脚本 适用盲注 可以跟具自己的需求更改 def home(): url="url" list ...

  2. 迁移mysql数据到oracle上

    转自:http://www.cnblogs.com/Warmsunshine/p/4651283.html 我是生成的文件里面的master.sql里面的sql,一个一个拷出来的. 迁移mysql数据 ...

  3. 基于binlog来分析mysql的行记录修改情况(python脚本分析)

          最近写完mysql flashback,突然发现还有有这种使用场景:有些情况下,可能会统计在某个时间段内,MySQL修改了多少数据量?发生了多少事务?主要是哪些表格发生变动?变动的数量是怎 ...

  4. MySQL利用binlog恢复误操作数据(python脚本)

    在人工手动进行一些数据库写操作的时候(比方说数据订正),尤其是一些不可控的批量更新或删除,通常都建议备份后操作.不过不怕万一,就怕一万,有备无患总是好的.在线上或者测试环境误操作导致数据被删除或者更新 ...

  5. mysql更新(三)语句 库的操作 表的操作

    04-初始mysql语句   本节课先对mysql的基本语法初体验. 操作文件夹(库) 增 create database db1 charset utf8; 查 # 查看当前创建的数据库 show ...

  6. HBase——使用Put迁移MySql数据到Hbase

    先上code: /** * 功能:迁移mysql上电池历史数据到hbase * Created by liuhuichao on 2016/12/6. */ public class MySqlToH ...

  7. mysql数据库从删库到跑路之mysql基础

    一 数据库是什么 之前所学,数据要永久保存,比如用户注册的用户信息,都是保存于文件中,而文件只能存在于某一台机器上. 如果我们不考虑从文件中读取数据的效率问题,并且假设我们的程序所有的组件都运行在一台 ...

  8. 使用第三方库连接MySql数据库:PyMysql库和Pandas库

    使用PyMysql库和Pandas库链接Mysql 1 系统环境 系统版本:Win10 64位 Mysql版本: 8.0.15 MySQL Community Server - GPL pymysql ...

  9. 32.修改IK分词器源码来基于mysql热更新词库

    主要知识点, 修改IK分词器源码来基于mysql热更新词库     一.IK增加新词的原因 在第32小节中学习到了直接在es的词库中增加词语,来扩充自已的词库,但是这样做有以下缺点: (1)每次添加完 ...

随机推荐

  1. Rowid和Rownum

    Rowid和Rownum对于数据库开发人员来说基本很少用到,因为在企业数据库开发中大多都是进行数据批处理,但是对于其他数据库人员来说还是会用到的. rowid和rownum都是虚列,但含义完全不同.r ...

  2. Vue项目发布的问题--http://localhost:8088/static/fonts/fontawesome-webfont.af7ae50.woff2

    问题:ngnix将8080转成80对外访问,找不对woff2等文件 一\ 搭建环境 ngnix-->conf中 server { listen 80; server_name 10.9.240. ...

  3. jquery 中 live() 对于js的需求版本导致不可用解决办法

    $('body').on('click','.edit', function() {            var id = $(this).parent().attr('id');          ...

  4. 实现html页面转pdf

    实现方式比较简单,使用js代码实现的. 需要引入的js: 1.jQuery.js: 2.html2canvas.js(实现方式是先将Html页面转换成image图片然后在转换为pdf,所以转换之后会有 ...

  5. Mariadb常用管理操作

    一 Mariadb常用管理操作 纯干货,没有一点废话,全是使用频率最高和常用的操作,运维必不可少的基础资料. 1.1 创建数据库 >create database <db_name> ...

  6. 为ScrollView增加圆角的三种方式,及自定义属性【在Linearlayout中新增ScrollView支持滚动 后续】

    获取圆角的几种方案如下:方案一:通过shape来实现,给scrollView增加背景来实现方案二:通过自定义ScrollView,还要自定义属性,在dispatchDraw中不停的裁剪方案三:用And ...

  7. insert()与substr()函数

    insert()函数与substr()函数 insert()函数: insert ( pos, str2);--将字符串str2插入到原字符串下标为pos的字符前 insert (pos, n, c) ...

  8. Kong网关安装之Docker版(1)

    前言: Kong 是天生的微服务网关.她的官方简介是:Kong 是一个云原生,高效,可扩展的分布式 API 网关. 自 2015 年在 github 开源后,广泛受到关注,目前已收获 1.9w+ 的 ...

  9. Python包安装过程

    以下是paramiko-1.7.7.1的安装过程,可以看到整个过程分为步,第一步是build,就是拷贝源文件到build文件夹里, F:\VMFiles\tmpFiles\paramiko-1.7.7 ...

  10. FIFO 深度了解

    嘻哈第二篇,深度聊聊各种细节. 优化与跨时钟阈分析