gp sql

appendonly

-- drop table if exists test_appendonly;

-- create table test_appendonly with(appendonly=true, compresslevel=5) as

-- select generate_series(0, 1000) a, 'helloworld'::varchar(50) b

-- distributed by(a);

select oid from pg_class where relname='test_appendonly';

select oid, oid::regclass from pg_class where relname='test_appendonly' or relname like '%212266%';

-- \d pg_aoseg.pg_aoseg_212266

select * from get_ao_compression_ratio('test_appendonly');

select sum(eofuncompressed)/sum(eof) as compression_ratio from gp_dist_random('pg_aoseg.pg_aoseg_212266');

select * from get_ao_distribution('test_appendonly') order by segmentid;

select gp_segment_id, tupcount from gp_dist_random('pg_aoseg.pg_aoseg_212266') order by gp_segment_id;

drop table if exists public.table_info cascade;

create table public.table_info (

tablename text, -- 表名

subparname text, -- 分区各

tablecount bigint, -- 表的行数

tablesize bigint, -- 表大小

prettysize text, -- 格式化大小输出

max_div_avg float, -- 斜率，最大节点数据量/平均节点数据量

compression_ratio text -- 压缩率

);

-- 获取表信息

create or replace function public.get_table_info(tablename text) returns setof table_info as $$

def one_table_info(plpy, tablename, subparname, aosegname, privilege):

    aosegsql = ""

    # plpy.info(privilege)

    if privilege == '1':

        aosegsql = '''

            select '%s' tablename, '%s' subparname,

                coalesce(sum(tupcount)::bigint, 0) tablecount,

                coalesce(sum(eof)::bigint, 0) tablesize,

                pg_size_pretty(coalesce(sum(tupcount)::bigint, 0)) prettysize,

                coalesce(max(tupcount)::bigint, 1)/(case when coalesce(avg(tupcount), 1.0) = 0 then 1

                    else coalesce(avg(tupcount), 1.0) end) max_div_avg,

                coalesce(sum(eofuncompressed), 1)/(case when coalesce(sum(eof), 1.0) = 0 then 1

                    else coalesce(sum(eof), 1.0) end) compression_ratio

            from gp_dist_random('%s');

        '''%(tablename, subparname, aosegname)

    else:

        aosegsql = '''

            select '%s' tablename, '%s' subparname,

                0 tablecount, 0 tablesize, 'permission denied' prettysize,

                0 max_div_avg, 0 compression_ratio;

        '''%(tablename, subparname)

    plpy.info(aosegsql)

    result_rv=plpy.execute(aosegsql)

    # plpy.info(result_rv[0]);

    return result_rv[0]

try:

    table_name = tablename.lower().split('.')[1]

    table_schema = tablename.lower().split('.')[0]

except(IndexError):

    plpy.error('Please in put "tableschema.table_name"')

# check version of database

check_version_sql = """

    select substring(version(), 'Database (.*) build') as version;

"""

rv = plpy.execute(check_version_sql)

version = rv[0]['version']

plpy.execute("set enable_seqscan=off")

# get table oid

get_table_oid = ''

if version > '3.4.0':

    get_table_oid = """

        select a.oid, reloptions, b.segrelid, regclass2text(b.segrelid::regclass) aosegname, relstorage,

            case has_table_privilege(user, b.segrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_class a left join pg_appendonly b on a.oid=b.relid where a.oid='%s'::regclass;

    """%(tablename)

else:

    get_table_oid = """

        select oid, reloptions, relaosegrelid, regclass2text(relaosegrelid::regclass) aosegname, relstorage,

            case has_table_privilege(user, relaosegrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_class where oid='%s'::regclass;

    """%(tablename)

try:

    rv_oid = plpy.execute(get_table_oid, 5)

    if not rv_oid:

        plpy.error('Did not find any relation named "' + tablename + '".')

except (Error):

    plpy.error('Did not find any relation named "' + tablename + '".')

#

table_oid = rv_oid[0]['oid']

if rv_oid[0]['relstorage'] != 'a':

    plpy.error(tablename + ' is not appendonly table, this function only support appendonly talbe')

# plpy.info('table_oid')

# check if table is partitin table

check_par_table="select count(*) from pg_partition where parrelid=%s"%(table_oid)

if version > '3.4.0':

    tablecount_sql = """

        select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pa.segrelid,

            regclass2text(pa.segrelid::regclass) aosegname,

            case has_table_privilege(user, pa.segrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_partition pp, pg_partition_rule prl, pg_appendonly pa

        where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pa.relid=prl.parchildrelid

        order by prl.parruleord;

    """%(table_oid)

else:

    tablecount_sql = """

        select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pc.relaosegrelid,

            regclass2text(pc.relaosegrelid::regclass) aosegname,

            case has_table_privilege(user, pc.relaosegrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_partition pp, pg_partition_rule prl, pg_class pc

        where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pc.oid=prl.parchildrelid

        and relaosegrelid <> 0 order by prl.parruleord;

    """%(table_oid)

rv = plpy.execute(check_par_table)

if rv[0]['count'] == 1:

    al = plpy.execute(tablecount_sql)

    result_rv = []

    rv_tmp = []

    totalcount = 0

    totalsize = 0

    unzipsize = 0

    compression_ratio = 1

    for i in al:

        rv_ao = one_table_info(plpy, tablename, i['parname'], i['aosegname'], str(i['privilege']))

        rv_tmp.append(rv_ao)

        totalsize = totalsize + rv_ao['tablesize']

        totalcount = totalcount + rv_ao['tablecount']

        unzipsize = unzipsize + rv_ao['tablesize'] * rv_ao['compression_ratio']

    if totalsize == 0:

        compression_total = 1

    else:

        compression_ratio = unzipsize/totalsize

    total_count_sql = """

        select '%s' as tablename, '###ALL###' as subparname, %d as tablecount, %d as tablesize,

            pg_size_pretty(%d::bigint) prettysize, null as max_div_avg, %f as compression_ratio;

    """%(tablename, totalcount, totalsize, totalsize, compression_ratio)

    a2 = plpy.execute(total_count_sql)

    result_rv.append(a2[0])

    plpy.info('===' + total_count_sql)

    for i in rv_tmp:

        result_rv.append(i)

    return result_rv;

else:

    result_rv = []

    rv_ao = one_table_info(plpy, tablename, '', rv_oid[0]['aosegname'], str(rv_oid[0]['privilege']));

    result_rv.append(rv_ao)

    return result_rv

$$ language plpythonu;

select * from get_table_info('public.test_appendonly');

select get_table_info('public.test_appendonly');

hostname

-- create language plpythonu ;

create or replace function public.hostname() returns text as $$

import socket;

return socket.gethostname();

$$ language plpythonu;

create or replace function public.reverse(str text) returns text as $$

if str != None:

return str[::-1]

else:

return None

$$ language plpythonu;

create or replace function public.json_parse(data text) returns text as $$

import json

try:

mydata = json.loads(data)

except:

return ['Parse json error']

returndata = []

try:

for people in mydata['people']:

returndata.append(people['firstName'] + ' ' + people['lastName'])

except:

return ['Parse json error']

return returndata

$$ language plpythonu;

select hostname();

select hostname() from gp_dist_random('gp_id');

select gp_segment_id, count(1) from gp_dist_random('pg_class') group by 1 order by 1;

select * from gp_configuration;

select * from gp_segment_configuration;

select * from pg_filespace_entry;

select * from gp_configuration_history;

-- 判断某个表是否是分区表

select count(1) from pg_partition where parrelid='public.tb_partition_list_yyyymmdd'::regclass;

select * from pg_partition_rule;

drop view public.v_pg_partitions;

create view public.v_pg_partitions as

select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,

case

when pp.parkind='h'::"char" then 'hash'::text

when pp.parkind='r'::"char" then 'range'::text

when pp.parkind='l'::"char" then 'list'::text

else NULL::text

end as partitiontype,

case

when pg_get_expr(prl.parrangeend, prl.parchildrelid) = ''

then pg_get_expr(prl.parlistvalues, prl.parchildrelid)

else pg_get_expr(prl.parrangeend, prl.parchildrelid)

end as HIGH_VALUE,

pg_get_partition_rule_def(prl.oid, true) as partitionboundary,

prl.parruleord as partitionposition

from pg_partition pp, pg_partition_rule prl

where pp.paristemplate = false and prl.paroid=pp.oid;

select * from public.v_pg_partitions where tableoid='tb_partition_list_yyyymmdd'::regclass order by partitionposition;

partition

drop table if exists public.tb_partition_range_yyyymmdd cascade;

create table public.tb_partition_range_yyyymmdd (

id numeric,

yyyymmdd date

) with(appendonly=true, compresslevel=5)

distributed by(id)

partition by range(yyyymmdd)

(

partition p20120811 start ('2012-08-11'::date) end ('2012-08-12'::date)

-- partition p20120812 start ('2012-08-12'::date) end ('2012-08-13'::date)

);

drop table if exists public.tb_partition_list_yyyymmdd cascade;

create table public.tb_partition_list_yyyymmdd (

id numeric,

yyyymmdd varchar(128)

) with(appendonly=true, compresslevel=5)

distributed by(id)

partition by list(yyyymmdd)

(

partition p20120811 values('20120811'),

partition p20120812 values('20120812')

);

drop view if exists public.v_pg_add_partitions cascade;

create view public.v_pg_add_partitions as

select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,

case

when pp.parkind='h'::"char" then 'hash'::text

when pp.parkind='r'::"char" then 'range'::text

when pp.parkind='l'::"char" then 'list'::text

else NULL::text

end as partitiontype,

translate(pg_get_expr(prl.parlistvalues, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', '') as partitionlistvalue,

substring(translate(pg_get_expr(prl.parrangestart, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangestart,

substring(translate(pg_get_expr(prl.parrangeend, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangeend,

prl.parruleord as partitionposition,

substring(parlistvalues, 'consttype ([0-9]+)')::integer::regtype listtype,

substring(parrangeend, 'consttype ([0-9]+)')::integer::regtype rangetype

from pg_partition pp, pg_partition_rule prl where pp.paristemplate=false and prl.paroid=pp.oid;

create or replace function public.add_partition_info(tableoid oid, days_from_now integer) returns setof text as $$

import datetime

def now():

    d = datetime.datetime.now()

    format = '%Y%m%d'

    return datetime.datetime.strftime(d, format)

def add_day(d, n):

    format = '%Y%m%d'

    d2 = datetime.datetime.strptime(d, format)

    d3 = d2 + datetime.timedelta(days = n)

    return datetime.datetime.strftime(d3, format)

def add_month(d, n):

    format = '%Y%m%d'

    formatymd = '%Y%m01'

    if d.__len__() == 6:

        format = '%Y%m'

        formatymd = '%Y%m'

    d2 = datetime.datetime.strptime(d, format)

    d3 = d2 + datetime.timedelta(days = 31 * n)

    return datetime.datetime.strftime(d3, formatymd)

relist = []

# pre_value 是上一个分区的值，主要是 list 分区时使用

sql = """select *, tableoid::regclass tablename, lead(case when partitionrangeend <>  '' then partitionrangeend

 else partitionlistvalue end) over(partition by tableoid order by partitionposition desc) as pre_value,

 row_number() over(partition by tableoid order by partitionposition desc) rn

 from v_pg_add_partitions where substr(partitionname, 1, 3) = 'p20' and tableoid=%s;"""%(tableoid)

rv = plpy.execute(sql);

sql_relation = "select array_to_string(reloptions, ',') reloptions from pg_class where oid=%s"%(tableoid)

rv_relation = plpy.execute(sql_relation)

if rv.nrows() == -1:

    return []

else:

    reloptions = rv_relation[0]['reloptions']

    tablename = rv[0]['tablename']

    partitiontype = rv[0]['partitiontype']

    partitionname = rv[0]['partitionname']

    pre_value = rv[0]['pre_value']

    now_add_7days = add_day(now(), days_from_now)

    # 处理 range 分区

    if partitiontype == 'range':

        rangetype = rv[0]['rangetype']

        partitionrangestart = rv[0]['partitionrangestart']

        partitionrangeend = rv[0]['partitionrangeend']

        interval = int(partitionrangeend) - int(partitionrangestart)

        # 按月分区

        if partitionname.__len__() == 7:

            func_add = add_month

            interval = int(partitionrangeend[0:6]) - int(partitionrangestart[0:6])

        # 按天分区

        elif partitionname.__len__() == 9:

            func_add = add_day

        # 分区名不规范，不处理

        else:

            return []

        partitionrangestart = now()

        while partitionrangestart < now_add_7days:

            partitionrangeend = func_add(partitionrangestart,  )

            partitionname = 'p' + partitionrangestart

            add_sql = "alter table %s add partition %s start ('%s'::%s) end ('%s'::%s)"%(tablename, partitionname, partitionrangestart, rangetype, partitionrangeend, rangetype)

            if reloptions != None and reloptions != '':

                add_sql += 'with(%s);'%(reloptions)

            else:

                add_sql += ';'

            plpy.execute(add_sql);

            relist.append(add_sql)

            partitionrangestart = func_add(partitionrangestart, interval)

    # 处理 list 分区

    if partitiontype == 'list':

        listtype = rv[0]['listtype']

        partitionlistvalue = rv[0]['partitionlistvalue']

        interval = int(partitionlistvalue) - int(pre_value)

        # 按月分区

        if partitionname.__len__() == 7:

            func_add = add_month

        # 按天分区

        elif partitionname.__len__() == 9:

            func_add = add_day

        # 分区名不规范，不处理

        else:

            return []

        partitionlistvalue = now()

        while partitionlistvalue < now_add_7days:

            partitionname = 'p' + partitionlistvalue

            add_sql = "alter table %s add partition %s values('%s'::%s)"%(tablename, partitionname, partitionlistvalue, listtype)

            if reloptions != None and reloptions != '':

                add_sql += 'with(%s);'%(reloptions)

            else:

                add_sql += ';'

            plpy.execute(add_sql);

            relist.append(add_sql)

            partitionlistvalue = func_add(partitionlistvalue, interval)

return relist

$$ language plpythonu;

select add_partition_info('tb_partition_list_yyyymmdd'::regclass, 1);

select 'grant select on ' || nspname || '.' || relname || ' to gpadmin;' from pg_class a, pg_namespace b

where relname not like '%_1_prt%' and relkind='r' and has_table_privilege('gpadmin', a.oid, 'select')='f'

and a.relnamespace=b.oid and nspname not in ('pg_catalog', 'information_schema') and nspname not like '%pg_tmp%';

view

-- 1.获取表的字段信息，表名pg_class，schema在pg_namespace，字段信息pg_attribute

select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a,

(

select c.oid from pg_catalog.pg_class c left join pg_catalog.pg_namespace n on n.oid=c.relnamespace

where c.relname='pg_class' and n.nspname='pg_catalog'

) b

where a.attrelid=b.oid and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 1.1 获取表的字段信息

select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a

where a.attrelid='pg_catalog.pg_class'::regclass and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 2. 获取表的分布键gp_distribution_policy中

-- 2.1 localoid与pg_class的oid关联，attrnums是一个数组，记录字段attnum，与pg_attribute中的attnum关联

drop table if exists public.cxfa2 cascade;

create table public.cxfa2(a int, b int, c int, d int) distributed by(c, a);

select * from gp_distribution_policy where localoid='cxfa2'::regclass;

select * from gp_distribution_policy a, (select generate_series(1, 10)) i (i), pg_attribute b

where a.attrnums[i.i] is not null and a.localoid=b.attrelid and a.attrnums[i.i]=b.attnum

and a.localoid='public.cxfa2'::regclass order by i.i;

-- 3. 获取建表的时间

select * from pg_stat_file('pg_hba.conf');

drop type if exists public.stat_file cascade;

create type public.stat_file as (

size bigint,

access timestamp(0),

modification timestamp(0),

change timestamp(0)

);

create or replace function public.get_file_stat(filename text) returns stat_file as $$

import os, time

size = None

access = None

modification = None

change = None

try:

    a = os.stat(filename)

    size = int(a.st_size)

    access = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_atime))

    modification = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_mtime))

    change = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_ctime))

except Exception, e:

    pass

return [size, access, modification, change]

$$ language plpythonu;

select * from get_file_stat('pg_hba.conf');

drop view if exists public.v_table_modify_time cascade;

create view public.v_table_modify_time as

select tab_oid, schemaname, tablename, (filestat).access, (filestat).modification, (filestat).change

from

(

select a.oid tab_oid, e.nspname as schemaname, a.relname as tablename,

get_file_stat(fselocation || '/' ||

case when reltablespace=1664 then 'global'

when reltablespace=0 then 'base' || '/' || d.oid

else reltablespace || '/' || d.oid

end

|| '/' || relfilenode) as filestat

from pg_class a, pg_tablespace b, pg_filespace_entry c, pg_namespace e, pg_database d

where d.datname=current_database()

and (case when a.reltablespace = 0 then 1663 else a.reltablespace end)=b.oid

and b.spcfsoid=c.fsefsoid

and e.oid=a.relnamespace

and c.fsedbid=1

and a.relstorage in ('a', 'h')

and a.relkind='r'

) t;

select 'drop table ' || schemaname || '.' || tablename || ' cascade;'

from v_table_modify_time where access < now() - '1 days'::interval and tablename not like '%_1_prt_p%'

and schemaname='public' order by access;

-- 自定义类型转换

select castfunc::regprocedure from pg_cast where castsource='text'::regtype and casttarget='date'::regtype;

-- select '20180526'::date;

-- select date('20180526');

-- select date('2018-05-26');

-- select cast('2018-05-26' as date);

create or replace function public.regclass2text(a regclass) returns text as $$

return a;

$$ language plpythonu;

drop cast if exists (regclass as text) cascade;

create cast(regclass as text) with function regclass2text(a regclass);

select 57377::regclass::text;

create view v_gp_configuration as

select content

from gp_segment_configuration a, pg_filespace_entry b, pg_filespace create

where a.dbid=b.fsedbid and b.fsefsoid=c.oid and c.fsname='pg_system';

https://www.cnblogs.com/someblue/p/4225694.html

https://blog.csdn.net/menggudaoke/article/details/78843749

gp sql的更多相关文章

GP SQL 优化
1.收集统计信息vacuum full analyze ZCXT.ZCOT_PS_PROJECT; 2.检查表的数据量分布select gp_segment_id,count(*) from fact ...
GP数据库常用SQL语句
GP数据库常用SQL语句 --1,查看列名以及类型 select upper(column_name) ,data_type from information_schema.columns wher ...
使用sql查询mysql/oracle/sql server/gp数据库中指定表的字段信息(字段名/字段类型/字段长度/是否是主键/是否为空)
1,根据数据库类型拼接不同URL /** * 根据类型不同拼接连接的URL * @param dbType 1:mysql.2:oracle.3:sql server.4:gp * @param ip ...
GP查询表状态常用SQL
年终巨献史上最全 ——LINQ to SQL语句
LINQ to SQL语句(1)之Where 适用场景:实现过滤,查询等功能. 说明:与SQL命令中的Where作用相似,都是起到范围限定也就是过滤作用的,而判断条件就是它后面所接的子句.Where操 ...
Linq to SQL 语法查询(链接查询，子查询 & in操作 & join，分组统计等)
Linq to SQL 语法查询(链接查询,子查询 & in操作 & join,分组统计等) 子查询描述:查询订单数超过5的顾客信息查询句法: var 子查询 = from c i ...
LINQ to SQL语句(6)之Group By/Having
适用场景:分组数据,为我们查找数据缩小范围. 说明:分配并返回对传入参数进行分组操作后的可枚举对象.分组:延迟 1.简单形式: var q = from p in db.Products group ...
你必须知道的Microsoft SQL Server一
不知道为什么我Win10环境下安装的Sqlserver2012,智能提示的功能基本上没有用,没办法,我还是选择安装插件SQL Prompt 5吧.下载地址:http://www.uzzf.com/so ...
Oracle学习笔记十使用PL/SQL
PL/SQL 简介 PL/SQL 是过程语言(Procedural Language)与结构化查询语言(SQL)结合而成的编程语言,是对 SQL 的扩展,它支持多种数据类型,如大对象和集合类型,可使用 ...

随机推荐

BZOJ1718: [Usaco2006 Jan] Redundant Paths 分离的路径【边双模板】【傻逼题】
LINK 经典傻逼套路就是把所有边双缩点之后叶子节点的个数 //Author: dream_maker #include<bits/stdc++.h> using namespace s ...
渐进式 jpg 和交错式 gif png 提高图片站体验
渐进式 jpg 和交错式 gif png 提高图片站体验= 渐进式的JPG比原始JPG还要小!! 让图片性感的露给你看~google picasa 和 smashing magazine 都有用到搞 ...
CentOS 6.0 VNC远程桌面配置方法(转帖)
问题:新装开发机,安装VNC软件后,按照下面文档配置后,无法用VNC view连接,关闭防火墙后可以连上解决方法:说明问题出在防火墙配置上,除了允许15900端口外,还有其他要设,经过排查后,加上如 ...
git push 免密码
git push 免密码通用情况使用ssh协议 git add 使用tab键自动补全的中文文件名乱码 jupyter notebook 创建密码 git push 免密码通用情况 1.使用文件创 ...
Tomcat 容器的设计和实现
Tomcat 容器是对 Servlet 规范的实现,也称为 Servlet 引擎.在分析 Tomcat 容器的设计和实现之前,首先简单了解一下 Servlet 规范,弄清楚 Tomcat 究竟要实现什 ...
java正则表达式实战例子，持续更新，记下来后面就不用重新写了。。。
1.去掉HTML标签: /** * 去掉HTML外面的标签 * @author CY * */ public class TrimHTML { public static void main(Stri ...
PY安装模块
Python安装失败原因 0环境 , pip版本一般为 7.x , 所以一般需要先升级pip版本 , 也就是执行 ```shellpython -m pip install --upgrade pip ...
关于Android App开发知识体系的一个小总结
前言本文从热更新.异步并发.性能优化.网络请求等多个方面对Android App开发的知识体系进行了一个分类总结.欢迎大家沟通交流. 热更新 [原]热更新开源项目Tinker源码解析之Dex热更新 ...
Traits
'folly/Traits.h' Implements traits complementary to those provided in <type_traits> Implements ...
Linux nohup和&的功效
nohup和&究竟有啥区别?不少同学进行了回复,但并不是所有同学都理解得全对,今天把自己挖的坑自己填了. 测试代码如下: 是一个输出hello与循环轮数的死循环程序,每输出一行就休眠1秒. 使 ...

gp sql

appendonly

hostname

partition

view

gp sql的更多相关文章

随机推荐

热门专题