Spark-PySpark sql各种内置函数
_functions = {
'lit': 'Creates a :class:`Column` of literal value.',
'col': 'Returns a :class:`Column` based on the given column name.'根据给定的列名返回一个:class:`Column`
'column': 'Returns a :class:`Column` based on the given column name.',根据给定的列名返回一个:class:`Column`
'asc': 'Returns a sort expression based on the ascending order of the given column name.',返回基于给定列名的升序的排序表达式
'desc': 'Returns a sort expression based on the descending order of the given column name.',返回基于给定列名的降序的排序表达式
'upper': 'Converts a string expression to upper case.',将字符串表达式转换为大写
'lower': 'Converts a string expression to upper case.',将字符串表达式转换为大写
'sqrt': 'Computes the square root of the specified float value.',计算指定浮点值的平方根
'abs': 'Computes the absolute value.',计算绝对值
'max': 'Aggregate function: returns the maximum value of the expression in a group.',聚合函数:返回组中表达式的最大值
'min': 'Aggregate function: returns the minimum value of the expression in a group.',聚合函数:返回组中表达式的最小值
'count': 'Aggregate function: returns the number of items in a group.',聚合函数:返回组中的项目数
'sum': 'Aggregate function: returns the sum of all values in the expression.',聚合函数:返回表达式中所有值的总和
'avg': 'Aggregate function: returns the average of the values in a group.',聚合函数:返回组中值的平均值
'mean': 'Aggregate function: returns the average of the values in a group.',聚合函数:返回组中值的平均值
'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',聚合函数:返回表达式中不同值的总和
}
_functions_1_4 = {
# unary math functions
'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' +
'0.0 through pi.',
'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' +
'-pi/2 through pi/2.',
'atan': 'Computes the tangent inverse of the given value.',
'cbrt': 'Computes the cube-root of the given value.',
'ceil': 'Computes the ceiling of the given value.',
'cos': 'Computes the cosine of the given value.',
'cosh': 'Computes the hyperbolic cosine of the given value.',
'exp': 'Computes the exponential of the given value.',
'expm1': 'Computes the exponential of the given value minus one.',
'floor': 'Computes the floor of the given value.',
'log': 'Computes the natural logarithm of the given value.',
'log10': 'Computes the logarithm of the given value in Base 10.',
'log1p': 'Computes the natural logarithm of the given value plus one.',
'rint': 'Returns the double value that is closest in value to the argument and' +
' is equal to a mathematical integer.',
'signum': 'Computes the signum of the given value.',
'sin': 'Computes the sine of the given value.',
'sinh': 'Computes the hyperbolic sine of the given value.',
'tan': 'Computes the tangent of the given value.',
'tanh': 'Computes the hyperbolic tangent of the given value.',
'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',
'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',
'bitwiseNOT': 'Computes bitwise not.',
}
_functions_1_6 = {
# unary math functions
'stddev': 'Aggregate function: returns the unbiased sample standard deviation of' +
' the expression in a group.',
'stddev_samp': 'Aggregate function: returns the unbiased sample standard deviation of' +
' the expression in a group.',
'stddev_pop': 'Aggregate function: returns population standard deviation of' +
' the expression in a group.',
'variance': 'Aggregate function: returns the population variance of the values in a group.',
'var_samp': 'Aggregate function: returns the unbiased variance of the values in a group.',
'var_pop': 'Aggregate function: returns the population variance of the values in a group.',
'skewness': 'Aggregate function: returns the skewness of the values in a group.',
'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
'collect_list': 'Aggregate function: returns a list of objects with duplicates.',
'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' +
' eliminated.',
}
_functions_2_1 = {
# unary math functions
'degrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
'measured in degrees.',
'radians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
'measured in radians.',
}
_functions_2_2 = {
'to_date': 'Converts a string date into a DateType using the (optionally) specified format.',
'to_timestamp': 'Converts a string timestamp into a timestamp type using the ' +
'(optionally) specified format.',
}
# math functions that take two arguments as input
_binary_mathfunctions = {
'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
'polar coordinates (r, theta).',
'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.',
'pow': 'Returns the value of the first argument raised to the power of the second argument.',
}
_window_functions = {
'row_number':
"""returns a sequential number starting at 1 within a window partition.""",
'dense_rank':
"""returns the rank of rows within a window partition, without any gaps.
The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
sequence when there are ties. That is, if you were ranking a competition using dense_rank
and had three people tie for second place, you would say that all three were in second
place and that the next person came in third. Rank would give me sequential numbers, making
the person that came in third place (after the ties) would register as coming in fifth.
This is equivalent to the DENSE_RANK function in SQL.""",
'rank':
"""returns the rank of rows within a window partition.
The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
sequence when there are ties. That is, if you were ranking a competition using dense_rank
and had three people tie for second place, you would say that all three were in second
place and that the next person came in third. Rank would give me sequential numbers, making
the person that came in third place (after the ties) would register as coming in fifth.
This is equivalent to the RANK function in SQL.""",
'cume_dist':
"""returns the cumulative distribution of values within a window partition,
i.e. the fraction of rows that are below the current row.""",
'percent_rank':
"""returns the relative rank (i.e. percentile) of rows within a window partition.""",
}
Spark-PySpark sql各种内置函数的更多相关文章
- SQL Server 内置函数、临时对象、流程控制
SQL Server 内置函数 日期时间函数 --返回当前系统日期时间 select getdate() as [datetime],sysdatetime() as [datetime2] getd ...
- [SQL]SUTFF内置函数的用法 (删除指定长度的字符并在指定的起始点插入另一组字符)
STUFF 删除指定长度的字符并在指定的起始点插入另一组字符. 语法 STUFF ( character_expression , start , length , character_express ...
- 10、SQL Server 内置函数、临时对象、流程控制
SQL Server 内置函数 日期时间函数 --返回当前系统日期时间 select getdate() as [datetime],sysdatetime() as [datetime2] getd ...
- sql server内置函数
MSDN标准文档:https://msdn.microsoft.com/zh-cn/library/ff848784(v=sql.120).aspx 配置函数 select @@servername ...
- Sql Server内置函数实现MD5加密
实例 MD5加密“123456”: HashBytes('MD5','123456') 结果:0xE10ADC3949BA59ABBE56E057F20F883E (提示:看完最后,结果要进行转换.) ...
- 总结Sql Server内置函数实现MD5加密
--MD5加密 --HashBytes ('加密方式', '待加密的值') --加密方式= MD2 | MD4 | MD5 | SHA | SHA1 --返回值类型:varbinary(maximum ...
- mysql 内置函数和sql server 内置函数的区别
以下函数均没有对参数做说明,使用的使用需要了解其参数内容 数据库 sql server mysql oracle 举例 获得当前系统时间 getdate() now() sysdate 注意不是函数 ...
- SQL Server 内置函数实现MD5加密
一.MD5加密 HASHBYTES ('加密方式', '待加密的值') 加密方式= MD2 | MD4 | MD5 | SHA | SHA1 返回值类型:varbinary(maxim ...
- [SQL]SUTFF内置函数的用法
STUFF 删除指定长度的字符并在指定的起始点插入另一组字符. 语法 STUFF ( character_expression , start , length , character_express ...
随机推荐
- IO库中的宽字符语言
wchar_t是C/C++的字符类型,是一种扩展的存储方式.wchar_t类型主要用在国际化程序的实现中,但它不等同于uni编码.uni编码的字符一般以wchar_t类型存. IO库为了支持宽字符语言 ...
- nigx下配置tp5.1路由
打开宝塔面板,找到你要配置路由的网站并找到配置文件(如图1) (图1) 2.在配置文件里添加一下代码 set $root = /www/wwwroot/www.blogs.test/public; # ...
- @Transactional spring事务回滚相关
还可以设置回滚点,看下面 /** * 用户登录接口 * * * 1明确事务方法前的命名规则 * 2保证事务方法执行的时间尽可能的短,不允许出现循环操作,不允许出现RPC等网络请求操作 * 3不允许所有 ...
- Postman之获得登录的token,并设置为全局变量
1.调通登录接口(可以参考上篇博客) 网址:Postman之简单使用 2.粘贴以下代码到Tests中 //把json字符串转化为对象 var data=JSON.parse(responseBody) ...
- Android开发build出现java.lang.NumberFormatException: For input string: "tle 0x7f0800aa"错误的解决方案
查看异常栈没有发现项目代码的问题,因为问题是出现在layout文件中. 全局查找tle这个,发现在某个layout文件中title一词被变成ti tle了,结果Android就xjb报错了. 参考
- Azure中配置和发布 Nginx docker到互联网
当Azure build一个Niginx docker 镜像时,无法通过浏览器访问这个镜像,于是想到了把这个网站发布到互联网中,这样就能验证网站是否正确. 本问跳过如何创建Azure Ubantu的虚 ...
- Qt表格导出图片
概述:qt中把某个控件导出保存为图片导出并不复杂,网上也有一堆方法.但是对于tableview中数据很多的情况下势必会出现滚动条,用传统的截屏抓图势会有滚动条,图片数据展示不全.在这我使用了一种折中方 ...
- spring boot入门与进阶教程
SpringBoot入门.SpringBoot进阶.Spring Cloud微服务.Spring Ecosystem 微服务相关.Spring Boot 入门 IDEA 版本.Spring Boot集 ...
- 如何编写一个路由器的界面1-Luci开发入门
Howto:如何写Module(模块)-----------------这一部分主要是翻译github上的document 注意:如果您打算将模块加入LUCI整合之前,您应该阅读Module参考. 本 ...
- 网络初级篇之OSPF(二)实验
一.实验目的: 下面关于OSPF的实验,仔细看配置过程,以增加对OSPF的理解. 二.实现目标: 使用OSPF实现所有主机之间的通信 三.配置过程: 1.AR1的配置过程: ...