CharsRefIntHashMap并不比HashMap<String, Integer>快
我模仿lucene的BytesRef写了一个CharsRefIntHashMap,实測效果并不如HashMap<String, Integer>。代码例如以下:
package com.dp.arts.lucenex.utils;
import org.apache.lucene.util.CharsRef;
public interface CharsRefIntMap
{
public static
abstract class CharsRefIntEntryAccessor {
public abstract
void access(char[] arr,
int offset, int length,
int value);
}
public void incKey(CharsRef key);
public void incKey(CharsRef key,
int add);
public void incKey(char[] arr,
int offset, int length);
public void incKey(char[] arr,
int offset, int length,
int add);
public int get(CharsRef key);
public int get(CharsRef key,
int no_entry_value);
public int get(char[] arr,
int offset, int length);
public int get(char[] arr,
int offset, int length,
int no_entry_value);
public int size();
public void forEach(CharsRefIntEntryAccessor accesor);
}
import java.util.Arrays;
import org.apache.lucene.util.CharsRef;
import com.dp.arts.lucenex.utils.CharsRefIntMap.CharsRefIntEntryAccessor;
public class CharsRefIntHashMap implements CharsRefIntMap
{
public static final int DEFAULT_CAPACITY = 16;
private char[][] arrs;
private int[] offsets;
private int[] lengths;
private int[] ords;
private int[] values;
private int hashSize;
private int halfHashSize;
private int hashMask;
private int count;
public CharsRefIntHashMap() {
this(DEFAULT_CAPACITY);
}
public CharsRefIntHashMap(int capacity) {
assert capacity > 0 && ( (capacity & (capacity - 1)) == 0);
arrs = new char[capacity][];
offsets = new int[capacity];
lengths = new int[capacity];
ords = new int[capacity];
values = new int[capacity];
Arrays.fill(ords, -1);
hashSize = capacity;
halfHashSize = (capacity >>> 1);
hashMask = capacity - 1;
}
@Override
public void incKey(CharsRef key) {
int code = charsHashCode(key.chars, key.offset, key.length);
incKey(key.chars, key.offset, key.length, code, 1);
}
@Override
public void incKey(CharsRef key, int add) {
int code = charsHashCode(key.chars, key.offset, key.length);
incKey(key.chars, key.offset, key.length, code, add);
}
@Override
public void incKey(char[] arr, int offset, int length) {
int code = charsHashCode(arr, offset, length);
incKey(arr, offset, length, code, 1);
}
@Override
public void incKey(char[] arr, int offset, int length, int add) {
int code = charsHashCode(arr, offset, length);
incKey(arr, offset, length, code, add);
}
private void incKey(char[] arr, int offset, int length, int code, int add) {
int pos = (code & hashMask);
int e = ords[pos];
while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = (code & hashMask);
e = ords[pos];
}
if (e == -1) {
// new entry.
arrs[count] = arr;
offsets[count] = offset;
lengths[count] = length;
values[count] = add;
ords[pos] = count;
++count;
if (count == halfHashSize) {
rehash((hashSize << 1));
}
} else {
values[e] += add;
}
}
private void rehash(int newSize) {
char[][] newArrs = new char[newSize][];
int[] newOffsets = new int[newSize];
int[] newLengths = new int[newSize];
int[] newValues = new int[newSize];
System.arraycopy(arrs, 0, newArrs, 0, halfHashSize);
System.arraycopy(offsets, 0, newOffsets, 0, halfHashSize);
System.arraycopy(lengths, 0, newLengths, 0, halfHashSize);
System.arraycopy(values, 0, newValues, 0, halfHashSize);
final int[] newOrds = new int[newSize];
Arrays.fill(newOrds, -1);
final int newHashMask = newSize - 1;
for (int i = 0; i < hashSize; ++i) {
int e0 = ords[i];
if (e0 != -1) {
char[] arr = newArrs[e0];
int offset = newOffsets[e0];
int length = newLengths[e0];
int code = charsHashCode(arr, offset, length);
int pos = code & newHashMask;
while (newOrds[pos] != -1) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = code & newHashMask;
}
newOrds[pos] = e0;
}
}
ords = newOrds;
arrs = newArrs;
offsets = newOffsets;
lengths = newLengths;
values = newValues;
hashSize = newSize;
halfHashSize = (newSize >> 1);
hashMask = newHashMask;
}
public int charsHashCode(char[] chars, int offset, int length) {
final int prime = 31;
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
result = prime * result + chars[i];
}
return result;
}
public boolean charsEquals(char[] lhsArr, int lhsOffset, int lhsLength, char[] rhsArr, int rhsOffset, int rhsLength) {
if (lhsLength == rhsLength) {
int otherUpto = rhsOffset;
final int end = lhsOffset + lhsLength;
for (int upto = lhsOffset; upto < end; upto++, otherUpto++) {
if (lhsArr[upto] != rhsArr[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
@Override
public int get(CharsRef key) {
return get(key.chars, key.offset, key.length, 0);
}
@Override
public int get(CharsRef key, int no_entry_key) {
return get(key.chars, key.offset, key.length, no_entry_key);
}
@Override
public int get(char[] arr, int offset, int length) {
return get(arr, offset, length, 0);
}
@Override
public int get(char[] arr, int offset, int length, int no_entry_key) {
int code = charsHashCode(arr, offset, length);
int pos = (code & hashMask);
int e = ords[pos];
while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = (code & hashMask);
e = ords[pos];
}
return e == -1 ? no_entry_key : values[e];
}
@Override
public void forEach(CharsRefIntEntryAccessor accessor) {
for (int i = 0; i < hashSize; ++i) {
int pos = ords[i];
if (pos != -1) {
accessor.access(arrs[pos], offsets[pos], lengths[pos], values[pos]);
}
}
}
@Override
public int size() {
return count;
}
// for test only.
public int hashSize() {
return hashSize;
}
}
package com.dp.arts.lucenex.utils;
import java.util.HashMap;
import java.util.Random;
import
org.apache.lucene.util.CharsRef;
public class CharsRefIntHashMapBenchmark
{
private static Random
randGen = null;
private
static char[]
numbersAndLetters =
null;
static {
randGen =
new Random();
numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
}
private static
final String randomString(int length) {
if (length < 1) {
return
null;
}
char [] randBuffer =
new char[length];
for (int i=0; i<randBuffer.length; i++) {
randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
}
return new String(randBuffer);
}
public static
void main(String[] args) {
final int MAX = 100000;
String[] strs = new String[10000];
int[] values =
new int[MAX];
for (int i = 0; i < 10000; ++i) {
strs[i] = randomString(randGen.nextInt(10) + 1);
}
for (int i = 0; i < MAX; ++i) {
values[i] = randGen.nextInt(10000);
}
char[][] arrs =
new char[MAX][];
int offsets[] =
new int[MAX];
int counts[] =
new int[MAX];
for (int i = 0; i < MAX; ++i) {
String s = strs[values[i]];
arrs[i] = StringMisc.toCharArray(s);
offsets[i] = StringMisc.getOffset(s);
counts[i] = StringMisc.getCount(s);
}
long start = System.currentTimeMillis();
CharsRefIntHashMap map = new CharsRefIntHashMap();
for (int j = 0; j < 100; ++j) {
for (int i = 0; i < MAX; ++i) {
map.incKey(arrs[i], offsets[i], counts[i]);
}}
System.err.println("CharsRefIntHashMap time elapsed: " + (System.currentTimeMillis() - start) +
"ms.");
start = System.currentTimeMillis();
HashMap<String, Integer> oldMap = new HashMap<String, Integer>();
for (int j = 0; j < 100; ++j) {
for (int i = 0; i < MAX; ++i) {
String s = strs[values[i]];
Integer v = oldMap.get(s);
if (v ==
null) {
v = new Integer(1);
oldMap.put(s, v);
} else {
v += 1;
}
}}
System.err.println("Origin string map time elapsed: " + (System.currentTimeMillis() - start) +
"ms.");
}
}
CharsRefIntHashMap并不比HashMap<String, Integer>快的更多相关文章
- string integer == equals 转
java中的数据类型,可分为两类: 1.基本数据类型,也称原始数据类型.byte,short,char,int,long,float,double,boolean 他们之间的比较,应用双等号(== ...
- String,Integer,int类型之间的相互转换
String, Integer, int 三种类型之间可以两两进行转换 1. 基本数据类型到包装数据类型的转换 int -> Integer (两种方法) Integer it1 = new I ...
- Junit + String/Integer/ArrayList/HashMap/TreeMap 基本使用Demo
package JavaTest.test; import java.util.ArrayList; import java.util.HashMap; import java.util.List; ...
- List<Map<String, Integer>> 同key的value全部累加合并
public static void main(String[] args){ List<Map<String,Object>> list1 = new ArrayList&l ...
- Delphi out 参数 string Integer
http://www.delphibasics.co.uk/RTL.asp?Name=Out http://stackoverflow.com/questions/14507310/whats-the ...
- string(Integer)类的equals和==区别和联系(验证密码的时候用得到)
“==”在八种原始数据类型中,判断的是两边的值是否相等.对于对象类型来说,判断的是内存地址,所以为true所满足的条件就是两边的引用指向同一个对象. 比如String s1 = "abcde ...
- java方法笔记: split,instanceof,StringBuilder(),append(),string ,Integer,Math,正则表达式
http://swiftlet.net/archives/709 如果分隔符在前方,会显示“”,在尾部不显示: *|做分割,需要加\\ API文档如下 1.instanceof 查看前者是否是后者的实 ...
- Java String Integer转换 练习:编程求字符串“100”和“150”按十进制数值做差后的结果以字符串形式输出。
package com.swift; public class String_To_Integer_Test { public static void main(String[] args) { /* ...
- Map<String,Integer>acount字符串出现的次数
随机推荐
- 二进制部署Kubernetes-v1.14.1集群
一.部署Kubernetes集群 1.1 Kubernetes介绍 Kubernetes(K8S)是Google开源的容器集群管理系统,K8S在Docker容器技术的基础之上,大大地提高了容器化部署应 ...
- 微信公众号开发之文本消息自动回复,以及系统关注自动回复,php代码
以tshop为例 直接上代码: 企业 cc_wx_sys表为自建,存储系统消息的配置的 字段: id type key status <?php /** * tpshop * ========= ...
- CAS配置(2)之主配置
WEB-INF目录 1.cas.properties文件(打开关闭SSL,主题,定制页面设置) #默认端口配置 #server.name=http://localhost:8080server.nam ...
- poj1958-汉诺四塔问题(三种方法)
链接:http://poj.org/problem?id=1958 大意:汉诺塔升级版,四根柱子,n个盘子,求最少移动次数: 两种方法 递推or递归(当然还有思路3--打表) 思路1:递推(或者DP? ...
- ajax-工作原理,包含技术,缺陷
1:原理 2:包含技术 3:缺陷
- OpenCV:OpenCV目标检测Boost方法训练
在古老的CNN方法出现以后,并不能适用于图像中目标检测.20世纪60年代,Hubel和Wiesel( 百度百科 )在研究猫脑皮层中用于局部敏感和方向选择的神经元时发现其独特的网络结构可以有效地降低反馈 ...
- VTK:VTK嵌入MFC成功
VTK作为医学显示库,得到较多使用.作为较为上层的设计,对OpenGL进行了封装,并且有Windows.Linux.安卓等开发版本,可移植性较强. 不过VES暂时没有编译成功. 以下是嵌入MFC-ID ...
- Python学习①. 基础语法
Python 简介 Python 是一种解释型,面向对象的语言.特点是语法简单,可跨平台 Python 基础语法 交互式编程 交互式编程不需要创建脚本文件,是通过 Python 解释器的交互模式进来编 ...
- python处理中文编码
python2 读取excle中的数据时,对于汉字的读取报错: 代码:data[num][4]={"content": "测试"} data=data[num] ...
- 浅析Python3中的bytes和str类型 (转)
原文出处:https://www.cnblogs.com/chownjy/p/6625299.html#undefined Python 3最重要的新特性之一是对字符串和二进制数据流做了明确的区分.文 ...