public class HyperLogLog extends Object implements NumDistinctValueEstimator
This is an implementation of the following variants of hyperloglog (HLL) algorithm Original - Original HLL algorithm from Flajolet et. al from http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf HLLNoBias - Google's implementation of bias correction based on lookup table http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf HLL++ - Google's implementation of HLL++ algorithm that uses SPARSE registers http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf Following are the constructor parameters that determines which algorithm is used numRegisterIndexBits - number of LSB hashcode bits to be used as register index. Default is 14. min = 4 and max = 16 numHashBits - number of bits for hashcode. Default is 64. min = 32 and max = 128 encoding - Type of encoding to use (SPARSE or DENSE). The algorithm automatically switches to DENSE beyond a threshold. Default: SPARSE enableBitPacking - To enable bit packing or not. Bit packing improves compression at the cost of more CPU cycles. Default: true noBias - Use Google's bias table lookup for short range bias correction. Enabling this will highly improve the estimation accuracy for short range values. Default: true
Modifier and Type | Class and Description |
---|---|
static class |
HyperLogLog.EncodingType |
static class |
HyperLogLog.HyperLogLogBuilder |
LOG
Modifier and Type | Method and Description |
---|---|
void |
add(long hashcode) |
void |
addBoolean(boolean val) |
void |
addByte(byte val) |
void |
addBytes(byte[] val) |
void |
addChar(char val) |
void |
addDouble(double val) |
void |
addFloat(float val) |
void |
addInt(int val) |
void |
addLong(long val) |
void |
addShort(short val) |
void |
addString(String val)
Java's default charset will be used for strings.
|
void |
addString(String val,
Charset charset) |
void |
addToEstimator(double d) |
void |
addToEstimator(org.apache.hadoop.hive.common.type.HiveDecimal decimal) |
void |
addToEstimator(long v) |
void |
addToEstimator(String s) |
static HyperLogLog.HyperLogLogBuilder |
builder() |
boolean |
canMerge(NumDistinctValueEstimator o) |
long |
count() |
NumDistinctValueEstimator |
deserialize(byte[] buf) |
boolean |
equals(Object obj) |
long |
estimateNumDistinctValues() |
HyperLogLog.EncodingType |
getEncoding() |
HLLDenseRegister |
getHLLDenseRegister() |
HLLSparseRegister |
getHLLSparseRegister() |
int |
getNumRegisterIndexBits() |
double |
getStandardError() |
int |
hashCode() |
int |
lengthFor(org.apache.hadoop.hive.ql.util.JavaDataModel model) |
void |
merge(HyperLogLog hll)
Merge the specified hyperloglog to the current one.
|
void |
mergeEstimators(NumDistinctValueEstimator o) |
void |
reset() |
byte[] |
serialize() |
void |
setCount(long count) |
void |
setEncoding(HyperLogLog.EncodingType encoding) |
void |
setHLLDenseRegister(byte[] reg)
Reconstruct dense registers from byte array
|
void |
setHLLSparseRegister(int[] reg)
Reconstruct sparse map from serialized integer list
|
HyperLogLog |
squash(int p0)
Reduces the accuracy of the HLL provided to a smaller size
|
String |
toString() |
String |
toStringExtended() |
public static HyperLogLog.HyperLogLogBuilder builder()
public void addBoolean(boolean val)
public void addByte(byte val)
public void addBytes(byte[] val)
public void addShort(short val)
public void addInt(int val)
public void addLong(long val)
public void addFloat(float val)
public void addDouble(double val)
public void addChar(char val)
public void addString(String val)
val
- - input stringpublic void add(long hashcode)
public long estimateNumDistinctValues()
estimateNumDistinctValues
in interface NumDistinctValueEstimator
public long count()
public void setCount(long count)
public double getStandardError()
public HLLDenseRegister getHLLDenseRegister()
public HLLSparseRegister getHLLSparseRegister()
public void setHLLSparseRegister(int[] reg)
reg
- - uncompressed and delta decoded integer listpublic void setHLLDenseRegister(byte[] reg)
reg
- - unpacked byte arraypublic void merge(HyperLogLog hll)
hll
- - hyperloglog to be mergedIllegalArgumentException
public HyperLogLog squash(int p0)
p0
- - new p size for the new HyperLogLog (smaller or no change)public String toStringExtended()
public int getNumRegisterIndexBits()
public HyperLogLog.EncodingType getEncoding()
public void setEncoding(HyperLogLog.EncodingType encoding)
public void reset()
reset
in interface NumDistinctValueEstimator
public byte[] serialize()
serialize
in interface NumDistinctValueEstimator
public NumDistinctValueEstimator deserialize(byte[] buf)
deserialize
in interface NumDistinctValueEstimator
public void addToEstimator(long v)
addToEstimator
in interface NumDistinctValueEstimator
public void addToEstimator(String s)
addToEstimator
in interface NumDistinctValueEstimator
public void addToEstimator(double d)
addToEstimator
in interface NumDistinctValueEstimator
public void addToEstimator(org.apache.hadoop.hive.common.type.HiveDecimal decimal)
addToEstimator
in interface NumDistinctValueEstimator
public void mergeEstimators(NumDistinctValueEstimator o)
mergeEstimators
in interface NumDistinctValueEstimator
public int lengthFor(org.apache.hadoop.hive.ql.util.JavaDataModel model)
lengthFor
in interface NumDistinctValueEstimator
public boolean canMerge(NumDistinctValueEstimator o)
canMerge
in interface NumDistinctValueEstimator
Copyright © 2022 The Apache Software Foundation. All rights reserved.