Hadoop之Hive UDAF TopN函数实现
public class GenericUDAFTopNRow extends AbstractGenericUDAFResolver {
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
if (parameters.length < 2) {
throw new UDFArgumentTypeException(parameters.length - 1,"At least two argument is expected.");
}
if(!(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(parameters[0]) instanceof WritableIntObjectInspector)){
throw new UDFArgumentTypeException(0,"The first argument must be integer,"+TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(parameters[0]).getClass());
}
if (!ObjectInspectorUtils.compareSupported(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(parameters[1]))) {
throw new UDFArgumentTypeException(1,"Cannot support comparison of map<> type or complex type containing map<>.");
}
return new TopNEvaluator();
}
static class TopNBuffer implements AggregationBuffer {
List<Object[]> container;
}
public static class TopNEvaluator extends GenericUDAFEvaluator {
int size;
String[] fieldNM;
ObjectInspector[] fieldOI;
ObjectInspector[] originalOI;
StandardListObjectInspector partialOI;
StandardStructObjectInspector partialElemOI;
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
super.init(m, parameters);
if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) {
this.originalOI = new ObjectInspector[parameters.length];
System.arraycopy(parameters, 0, this.originalOI, 0, parameters.length);
this.size = parameters.length-1;
this.fieldNM = new String[this.size];
this.fieldOI = new ObjectInspector[this.size];
for (int i = 0; i < this.size; i++) {
this.fieldNM[i] = "f" + i;
this.fieldOI[i] = ObjectInspectorUtils.getStandardObjectInspector(parameters[i+1]);
}
return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList(this.fieldNM),Arrays.asList(this.fieldOI)));
} else if (m == Mode.PARTIAL2 || m == Mode.FINAL) {
this.partialOI = (StandardListObjectInspector) parameters[0];
this.partialElemOI=(StandardStructObjectInspector) this.partialOI.getListElementObjectInspector();
List<? extends StructField> structFieldRefs = this.partialElemOI.getAllStructFieldRefs();
this.size = structFieldRefs.size();
this.fieldNM = new String[this.size];
this.fieldOI = new ObjectInspector[this.size];
for (int i = 0; i < this.size; i++) {
StructField sf = structFieldRefs.get(i);
this.fieldNM[i] = sf.getFieldName();
this.fieldOI[i] = sf.getFieldObjectInspector();
}
return ObjectInspectorUtils.getStandardObjectInspector(this.partialOI);
}
return null;
}
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
TopNBuffer buffer = new TopNBuffer();
reset(buffer);
return buffer;
}
@Override
public void reset(AggregationBuffer agg) throws HiveException {
TopNBuffer buffer = (TopNBuffer) agg;
buffer.container = new LinkedList<Object[]>();
}
@Override
public void iterate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
/*如果查询结果为空,不作处理*/
if(isEmptySet(agg,parameters)){
return;
}
TopNBuffer buffer = (TopNBuffer) agg;
int n = ((WritableIntObjectInspector)this.originalOI[0]).get(parameters[0]);
int s = buffer.container.size();
if(s < n){
Object[] elemVal = new Object[this.size];
for (int j = 0; j < this.size; j++) {
elemVal[j] = ObjectInspectorUtils.copyToStandardObject(parameters[j+1],this.originalOI[j+1]);
}
buffer.container.add(elemVal);
/*make sure the size should be n*/
while(buffer.container.size() < n){
buffer.container.add(new Object[this.size]);
}
}else{
for(int i = 0;i < s;i++){
if (ObjectInspectorUtils.compare(buffer.container.get(i)[0],this.fieldOI[0], parameters[1], this.originalOI[1]) < 0) {
Object[] elemVal = new Object[this.size];
for(int j=0;j<this.size;j++){
elemVal[j] = ObjectInspectorUtils.copyToStandardObject(parameters[j+1],this.originalOI[j+1]);
}
buffer.container.add(i, elemVal);
break;
}
}
/*make sure the size should be n*/
while(buffer.container.size() > n){
buffer.container.remove(n);
}
}
}
@Override
public Object terminatePartial(AggregationBuffer agg)
throws HiveException {
TopNBuffer buffer = (TopNBuffer) agg;
return buffer.container.isEmpty()?null:buffer.container;
}
@Override
public void merge(AggregationBuffer agg, Object partial) {
/*如果查询结果为空,不作处理*/
if(isEmptySet(agg,partial)){
return;
}
TopNBuffer buffer = (TopNBuffer) agg;
List<?> listVal = this.partialOI.getList(partial);
final int cn = Math.max(buffer.container.size(), listVal.size());
List<Object[]> values = new LinkedList<Object[]>();
for(Object elemObj:listVal){
List<Object> elemVal=this.partialElemOI.getStructFieldsDataAsList(elemObj);
Object[] value=new Object[this.size];
for(int i=0,n=elemVal.size();i<n;i++){
value[i]=ObjectInspectorUtils.copyToStandardObject(elemVal.get(i), this.fieldOI[i]);
}
values.add(value);
}
buffer.container=mergeSortNotNull(buffer.container, values);
while(buffer.container.size()<cn){
buffer.container.add(new Object[this.size]);
}
while(buffer.container.size() > cn){
buffer.container.remove(cn);
}
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
TopNBuffer buffer = (TopNBuffer) agg;
return buffer.container.isEmpty()?null:buffer.container;
}
private List<Object[]> mergeSortNotNull(List<Object[]> list1, List<Object[]> list2){
List<Object[]> result=new LinkedList<Object[]>();
int i1=0, i2=0, n1=list1.size(), n2=list2.size();
while(i1<n1 && i2<n2){
if(list1.get(i1)[0]==null){
i1++;
continue;
}
if(list2.get(i2)[0]==null){
i2++;
continue;
}
int cp = ObjectInspectorUtils.compare(list1.get(i1)[0],this.fieldOI[0], list2.get(i2)[0], this.fieldOI[0]);
if(cp > 0){
result.add(list1.get(i1));
i1++;
}else if(cp<0){
result.add(list2.get(i2));
i2++;
}else{
result.add(list1.get(i1));
i1++;
i2++;
}
}
while(i1<n1){
if(list1.get(i1)[0]==null){
i1++;
continue;
}
result.add(list1.get(i1));
i1++;
}
while(i2<n2){
if(list2.get(i2)[0]==null){
i2++;
continue;
}
result.add(list2.get(i2));
i2++;
}
return result;
}
private boolean isEmptySet(AggregationBuffer agg, Object[] parameters){
if(agg==null || parameters==null){
return true;
}else{
for(int i=0; i<parameters.length; i++){
if(parameters[i]!=null){
return false;
}
}
return true;
}
}
private boolean isEmptySet(AggregationBuffer agg, Object parameter){
return (agg==null) || (parameter==null);
}
}
}
Hadoop之Hive UDAF TopN函数实现的更多相关文章
- Hadoop生态圈-Hive的自定义函数之UDAF(User-Defined Aggregation Function)
Hadoop生态圈-Hive的自定义函数之UDAF(User-Defined Aggregation Function) 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.
- Hadoop生态圈-hive编写自定义函数
Hadoop生态圈-hive编写自定义函数 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.
- Hadoop生态圈-Hive的自定义函数之UDTF(User-Defined Table-Generating Functions)
Hadoop生态圈-Hive的自定义函数之UDTF(User-Defined Table-Generating Functions) 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.
- Hadoop生态圈-Hive的自定义函数之UDF(User-Defined-Function)
Hadoop生态圈-Hive的自定义函数之UDF(User-Defined-Function) 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.
- Hadoop生态圈-Hive函数
Hadoop生态圈-Hive函数 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.
- Hive执行count函数失败,Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException)
Hive执行count函数失败 1.现象: 0: jdbc:hive2://192.168.137.12:10000> select count(*) from emp; INFO : Numb ...
- Hive UDAF开发之同时计算最大值与最小值
卷首语 前一篇文章hive UDAF开发入门和运行过程详解(转)里面讲过UDAF的开发过程,其中说到如果要深入理解UDAF的执行,可以看看求平均值的UDF的源码 本人在看完源码后,也还是没能十分理解里 ...
- hive UDAF开发入门和运行过程详解(转)
介绍 hive的用户自定义聚合函数(UDAF)是一个很好的功能,集成了先进的数据处理.hive有两种UDAF:简单和通用.顾名思义,简单的UDAF,写的相当简单的,但因为使用Java反射导致性能损失, ...
- [Hive_12] Hive 的自定义函数
0. 说明 UDF //user define function //输入单行,输出单行,类似于 format_number(age,'000') UDTF //user define table-g ...
随机推荐
- 在java 中,数组与 List<T> 类型的相互转换
在java中,数组与List<T> 之前进行互相转换,转换方法可总结为以下几种: 一. 将 数组转换成List<T> 1. 使用 Collections 的addAll 方法 ...
- 二模10day2解题报告
T1.最多因子数(divisors) 给出范围l,r求其中约数和最大的最小整数. 非常深井冰的题目:如果特判加暴力的话分数低的可怜 AC做法要用到分解质因数和线性筛(这俩好写),然而,一个一个枚举还是 ...
- 图解CISCO 3550忘记密码解决方法
图解CISCO3550忘记密码解决方法 Cisco网络设备密码忘记怎么初始出厂默认值?这时网友常常会提出的问题,怎么解决,有人说啦,去百度去谷歌一下不就行啦,然而这对初学者任是个挑战,虽然步骤只有简单 ...
- LoadRunner用户行为模拟器 《第三篇》
用户行为模拟器简称VU,VU通过运行VU脚本模拟了用户对软件的操作行为.VU是基于网络协议的.很明显,被测服务器是通过各种各样的网络协议与客户端打交道的.VU要“骗过”被测服务器,当然就要遵守这些协议 ...
- Linux:系统的密码忘记了,登录不上
可先通过进入单用户模式,修改下密码再登录记录. 第一步: 重启系统,在进入系统之前不断的按键盘左上角的“Esc”键,会进入如下页面: 然后按e进入编辑页面 第二步: 进入如下页面后,通过键盘的上下方向 ...
- boost实现串口通信(一):小试牛刀
/************************************************************************/ /* 功能:boost实现串口通信类 */ /* ...
- Linux命令之type
1:linux命令可以分为很多类,其中常见得类型: (1):builtin --内置命令 (2):alias --命令别名 (3):file --外部命令 具体有哪些内置命令以及内置命令各个用法: [ ...
- pap与chap协议
1.pap:直接在网络上发送密码明文 2.chap: 网络上发送的是密码的密文;server给client发一段随机数(challenge),client利用随机数对密码进行加密,将用户名和加密后的密 ...
- C++ sstream 中处理字符串
C++引入ostringstream.istringstream.stringstream这三个类,要使用他们创建对象就必须包含<sstream>这个头文件. istringstream的 ...
- 无法运行maven项目
tomcat Server Location 选择 User Tomcat installation 设置CATALINA_HOME环境变量(tomcat start.bat启动不了)1.CATALI ...