public class GenericUDAFTopNRow extends AbstractGenericUDAFResolver {

@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
   throws SemanticException {
  if (parameters.length < 2) {
   throw new UDFArgumentTypeException(parameters.length - 1,"At least two argument is expected.");
  }

if(!(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(parameters[0]) instanceof WritableIntObjectInspector)){
   throw new UDFArgumentTypeException(0,"The first argument must be integer,"+TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(parameters[0]).getClass());
  }
  if (!ObjectInspectorUtils.compareSupported(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(parameters[1]))) {
   throw new UDFArgumentTypeException(1,"Cannot support comparison of map<> type or complex type containing map<>.");
  }

return new TopNEvaluator();
}

static class TopNBuffer implements AggregationBuffer {
  List<Object[]> container;
}

public static class TopNEvaluator extends GenericUDAFEvaluator {
  int size;
  String[] fieldNM;
  ObjectInspector[] fieldOI;
  ObjectInspector[] originalOI;
  StandardListObjectInspector partialOI;
  StandardStructObjectInspector partialElemOI;

@Override
  public ObjectInspector init(Mode m, ObjectInspector[] parameters)
    throws HiveException {
   super.init(m, parameters);
   if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) {
    this.originalOI = new ObjectInspector[parameters.length];
    System.arraycopy(parameters, 0, this.originalOI, 0, parameters.length);
    this.size = parameters.length-1;
    this.fieldNM = new String[this.size];
    this.fieldOI = new ObjectInspector[this.size];
    for (int i = 0; i < this.size; i++) {
     this.fieldNM[i] = "f" + i;
     this.fieldOI[i] = ObjectInspectorUtils.getStandardObjectInspector(parameters[i+1]);
    }
    return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList(this.fieldNM),Arrays.asList(this.fieldOI)));
   } else if (m == Mode.PARTIAL2 || m == Mode.FINAL) {
    this.partialOI = (StandardListObjectInspector) parameters[0];
    this.partialElemOI=(StandardStructObjectInspector) this.partialOI.getListElementObjectInspector();
    List<? extends StructField> structFieldRefs = this.partialElemOI.getAllStructFieldRefs();
    this.size = structFieldRefs.size();
    this.fieldNM = new String[this.size];
    this.fieldOI = new ObjectInspector[this.size];
    for (int i = 0; i < this.size; i++) {
     StructField sf = structFieldRefs.get(i);
     this.fieldNM[i] = sf.getFieldName();
     this.fieldOI[i] = sf.getFieldObjectInspector();
    }
    return ObjectInspectorUtils.getStandardObjectInspector(this.partialOI);
   }

return null;

}
  @Override
  public AggregationBuffer getNewAggregationBuffer() throws HiveException {
   TopNBuffer buffer = new TopNBuffer();
   reset(buffer);
   return buffer;
  }

@Override
  public void reset(AggregationBuffer agg) throws HiveException {
   TopNBuffer buffer = (TopNBuffer) agg;
   buffer.container = new LinkedList<Object[]>();
  }

@Override
  public void iterate(AggregationBuffer agg, Object[] parameters)
    throws HiveException {
   /*如果查询结果为空,不作处理*/
   if(isEmptySet(agg,parameters)){
    return;
   }
   TopNBuffer buffer = (TopNBuffer) agg;
   int n = ((WritableIntObjectInspector)this.originalOI[0]).get(parameters[0]);
   int s = buffer.container.size();
   if(s < n){
    Object[] elemVal = new Object[this.size];
    for (int j = 0; j < this.size; j++) {
     elemVal[j] = ObjectInspectorUtils.copyToStandardObject(parameters[j+1],this.originalOI[j+1]);
    }
    buffer.container.add(elemVal);
    /*make sure the size should be n*/
    while(buffer.container.size() < n){
     buffer.container.add(new Object[this.size]);
    }
   }else{
    for(int i = 0;i < s;i++){
     if (ObjectInspectorUtils.compare(buffer.container.get(i)[0],this.fieldOI[0], parameters[1], this.originalOI[1]) < 0) {
      Object[] elemVal = new Object[this.size];
      for(int j=0;j<this.size;j++){
       elemVal[j] = ObjectInspectorUtils.copyToStandardObject(parameters[j+1],this.originalOI[j+1]);
      }
      buffer.container.add(i, elemVal);
      break;
     }
    }
    /*make sure the size should be n*/
    while(buffer.container.size() > n){
     buffer.container.remove(n);
    }
   }
  }

@Override
  public Object terminatePartial(AggregationBuffer agg)
    throws HiveException {
   TopNBuffer buffer = (TopNBuffer) agg;
   return buffer.container.isEmpty()?null:buffer.container;
  }

@Override
  public void merge(AggregationBuffer agg, Object partial) {
   /*如果查询结果为空,不作处理*/
   if(isEmptySet(agg,partial)){
    return;
   }
   TopNBuffer buffer = (TopNBuffer) agg;
   List<?> listVal = this.partialOI.getList(partial);
   final int cn = Math.max(buffer.container.size(), listVal.size());
   List<Object[]> values = new LinkedList<Object[]>();
   for(Object elemObj:listVal){
    List<Object> elemVal=this.partialElemOI.getStructFieldsDataAsList(elemObj);
    Object[] value=new Object[this.size];
    for(int i=0,n=elemVal.size();i<n;i++){
     value[i]=ObjectInspectorUtils.copyToStandardObject(elemVal.get(i), this.fieldOI[i]);
    }
    values.add(value);
   }
   buffer.container=mergeSortNotNull(buffer.container, values);
   while(buffer.container.size()<cn){
    buffer.container.add(new Object[this.size]);
   }
   while(buffer.container.size() > cn){
    buffer.container.remove(cn);
   }
  }

@Override
  public Object terminate(AggregationBuffer agg) throws HiveException {
   TopNBuffer buffer = (TopNBuffer) agg;
   return buffer.container.isEmpty()?null:buffer.container;
  }
  private List<Object[]> mergeSortNotNull(List<Object[]> list1, List<Object[]> list2){
   List<Object[]> result=new LinkedList<Object[]>();
   int i1=0, i2=0, n1=list1.size(), n2=list2.size();
   while(i1<n1 && i2<n2){
    if(list1.get(i1)[0]==null){
     i1++;
     continue;
    }
    if(list2.get(i2)[0]==null){
     i2++;
     continue;
    }
    int cp = ObjectInspectorUtils.compare(list1.get(i1)[0],this.fieldOI[0], list2.get(i2)[0], this.fieldOI[0]);
    if(cp > 0){
     result.add(list1.get(i1));
     i1++;
    }else if(cp<0){
     result.add(list2.get(i2));
     i2++;
    }else{
     result.add(list1.get(i1));
     i1++;
     i2++;
    }
   }
   while(i1<n1){
    if(list1.get(i1)[0]==null){
     i1++;
     continue;
    }
    result.add(list1.get(i1));
    i1++;
   }
   while(i2<n2){
    if(list2.get(i2)[0]==null){
     i2++;
     continue;
    }
    result.add(list2.get(i2));
    i2++;
   }
   return result;
  }
  private boolean isEmptySet(AggregationBuffer agg, Object[] parameters){
   if(agg==null || parameters==null){
    return true;
   }else{
    for(int i=0; i<parameters.length; i++){
     if(parameters[i]!=null){
      return false;
     }
    }
    return true;
   }
  }
  private boolean isEmptySet(AggregationBuffer agg, Object parameter){
   return (agg==null) || (parameter==null);
  }
}

}

Hadoop之Hive UDAF TopN函数实现的更多相关文章

  1. Hadoop生态圈-Hive的自定义函数之UDAF(User-Defined Aggregation Function)

    Hadoop生态圈-Hive的自定义函数之UDAF(User-Defined Aggregation Function) 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.

  2. Hadoop生态圈-hive编写自定义函数

    Hadoop生态圈-hive编写自定义函数 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.

  3. Hadoop生态圈-Hive的自定义函数之UDTF(User-Defined Table-Generating Functions)

    Hadoop生态圈-Hive的自定义函数之UDTF(User-Defined Table-Generating Functions) 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.

  4. Hadoop生态圈-Hive的自定义函数之UDF(User-Defined-Function)

    Hadoop生态圈-Hive的自定义函数之UDF(User-Defined-Function) 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.

  5. Hadoop生态圈-Hive函数

    Hadoop生态圈-Hive函数 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.

  6. Hive执行count函数失败,Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException)

    Hive执行count函数失败 1.现象: 0: jdbc:hive2://192.168.137.12:10000> select count(*) from emp; INFO : Numb ...

  7. Hive UDAF开发之同时计算最大值与最小值

    卷首语 前一篇文章hive UDAF开发入门和运行过程详解(转)里面讲过UDAF的开发过程,其中说到如果要深入理解UDAF的执行,可以看看求平均值的UDF的源码 本人在看完源码后,也还是没能十分理解里 ...

  8. hive UDAF开发入门和运行过程详解(转)

    介绍 hive的用户自定义聚合函数(UDAF)是一个很好的功能,集成了先进的数据处理.hive有两种UDAF:简单和通用.顾名思义,简单的UDAF,写的相当简单的,但因为使用Java反射导致性能损失, ...

  9. [Hive_12] Hive 的自定义函数

    0. 说明 UDF //user define function //输入单行,输出单行,类似于 format_number(age,'000') UDTF //user define table-g ...

随机推荐

  1. vsftp.conf

    anonymous_enable=NO local_enable=YES write_enable=YES dirmessage_enable=YES xferlog_enable=YES xferl ...

  2. python时间相关

    1.格式化时间time.strftime,返回字符串 import time time.strftime('%Y-%m-%d %H:%M:%S') 2.时间差 timedelta from datet ...

  3. mysql中字符集和校对规则

    首先,明确一下字符集和校对规则的概念:    字符集(charset):是一套符号和编码    校对规则(collation):是在字符集内用于比较字符的一套规则,比如有的规则区分大小写,有的则无视 ...

  4. Oracle建立表空间和用户

    Oracle建立表空间和用户 建立表空间和用户的步骤: 用户 建立:create user 用户名 identified by "密码"; 授权:grant create sess ...

  5. python 基础知识(一)

    python 基础知识(一) 一.python发展介绍 Python的创始人为Guido van Rossum.1989年圣诞节期间,在阿姆斯特丹,Guido为了打发圣诞节的无趣,决心开发一个新的脚本 ...

  6. 解决服务器复制中SID冲突问题

    解决服务器复制中SID冲突问题 如果你有多部的主机需要安装,最快的方法是什么?想必就是用像GHOST之类的软件来进行硬盘的复制.当然,如果是安装在虚拟机之中,则需要复制虚拟的硬盘档案即可,以微软的VP ...

  7. 父窗口调用iframe子窗口方法

    一.父窗口调用iframe子窗口方法 1.HTML语法:<iframe name="myFrame" src="child.html"></i ...

  8. HTTP长连接200万尝试及调优

    对于一个server,我们一般考虑他所能支撑的qps,但有那么一种应用, 我们需要关注的是它能支撑的连接数个数,而并非qps,当然qps也是我们需要考虑的性能点之一.这种应用常见于消息推送系统,也称为 ...

  9. Android WebRTC 音视频开发总结(四)-- webrtc传输模块

    在介绍WebRTC通讯之前我们先来看一个P2P视频聊天包括的主要过程,转载请说明出处(博客园RTC.Blacker): 音视频数据采集->编码->发送->接收->解码-> ...

  10. Configuring Report Manager

     Steps to configure and get Reports in Report manager. 1. Enable getting Reports in Report Manager. ...