public class StatsUtils extends Object
Constructor and Description |
---|
StatsUtils() |
Modifier and Type | Method and Description |
---|---|
static Long |
addWithExpDecay(List<Long> distinctVals) |
static Statistics |
collectStatistics(HiveConf conf,
PrunedPartitionList partList,
Table table,
List<ColumnInfo> schema,
List<String> neededColumns,
List<String> referencedColumns,
boolean fetchColStats,
boolean fetchPartStats) |
static Statistics |
collectStatistics(HiveConf conf,
PrunedPartitionList partList,
Table table,
TableScanOperator tableScanOperator)
Collect table, partition and column level statistics
|
static int |
estimateRowSizeFromSchema(HiveConf conf,
List<ColumnInfo> schema,
List<String> neededColumns) |
static long |
getAvailableMemory(org.apache.hadoop.conf.Configuration conf) |
static long |
getAvgColLenOfFixedLengthTypes(String colType)
Get size of fixed length primitives
|
static long |
getAvgColLenOfVariableLengthTypes(HiveConf conf,
ObjectInspector oi,
String colType)
Get the raw data size of variable length data types
|
static List<Long> |
getBasicStatForPartitions(Table table,
List<Partition> parts,
String statType)
Get basic stats of partitions
|
static long |
getBasicStatForTable(Table table,
String statType)
Get basic stats of table
|
static ColStatistics |
getColStatistics(ColumnStatisticsObj cso,
String tabName,
String colName)
Convert ColumnStatisticsObj to ColStatistics
|
static ColStatistics |
getColStatisticsFromExpression(HiveConf conf,
Statistics parentStats,
ExprNodeDesc end)
Get column statistics expression nodes
|
static List<ColStatistics> |
getColStatisticsFromExprMap(HiveConf conf,
Statistics parentStats,
Map<String,ExprNodeDesc> colExprMap,
RowSchema rowSchema)
Get column statistics from parent statistics.
|
static List<ColStatistics> |
getColStatisticsUpdatingTableAlias(Statistics parentStats,
RowSchema rowSchema)
Get column statistics from parent statistics given the
row schema of its child.
|
static long |
getDataSizeFromColumnStats(long numRows,
List<ColStatistics> colStats)
Compute raw data size from column statistics
|
static List<Long> |
getFileSizeForPartitions(HiveConf conf,
List<Partition> parts)
Find the bytes on disks occupied by list of partitions
|
static long |
getFileSizeForTable(HiveConf conf,
Table table)
Find the bytes on disk occupied by a table
|
static String |
getFullyQualifiedTableName(String dbName,
String tabName) |
static long |
getMaxIfOverflow(long val)
negative number of rows or data sizes are invalid.
|
static int |
getNDVPartitionColumn(Set<Partition> partitions,
String partColName) |
static int |
getNumBitVectorsForNDVEstimation(HiveConf conf) |
static long |
getNumRows(Table table)
Get number of rows of a give table
|
static List<String> |
getQualifedReducerKeyNames(List<String> keyExprs)
Get qualified column name from output key column names
|
static long |
getRawDataSize(Table table)
Get raw data size of a give table
|
static float |
getScaledSelectivity(ColStatistics csPK,
ColStatistics csFK)
Scale selectivity based on key range ratio.
|
static long |
getSizeOfComplexTypes(HiveConf conf,
ObjectInspector oi)
Get the size of complex data types
|
static long |
getSizeOfMap(StandardConstantMapObjectInspector scmoi)
Estimate the size of map object
|
static long |
getSizeOfPrimitiveTypeArraysFromType(String colType,
int length)
Get the size of arrays of primitive types
|
static long |
getSizeOfStruct(StandardConstantStructObjectInspector soi) |
static long |
getSumIgnoreNegatives(List<Long> vals)
Get sum of all values in the list that are >0
|
static List<ColStatistics> |
getTableColumnStats(Table table,
List<ColumnInfo> schema,
List<String> neededColumns)
Get table level column statistics from metastore for needed columns
|
static long |
getTotalSize(Table table)
Get total size of a give table
|
static long |
getWritableSize(ObjectInspector oi,
Object value)
Get size of primitive data types based on their respective writable object inspector
|
static void |
inferAndSetPrimaryKey(long numRows,
List<ColStatistics> colStats)
Based on the provided column statistics and number of rows, this method infers if the column
can be primary key.
|
static boolean |
inferForeignKey(ColStatistics csPK,
ColStatistics csFK)
Infer foreign key relationship from given column statistics.
|
static long |
safeAdd(long a,
long b)
Bounded addition - overflows become MAX_VALUE
|
static long |
safeMult(long a,
double b)
Bounded multiplication - overflows become MAX_VALUE
|
static long |
safeMult(long a,
long b)
Bounded multiplication - overflows become MAX_VALUE
|
public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator) throws HiveException
conf
- - hive configurationpartList
- - partition listtable
- - tabletableScanOperator
- - table scan operatorHiveException
public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, List<ColumnInfo> schema, List<String> neededColumns, List<String> referencedColumns, boolean fetchColStats, boolean fetchPartStats) throws HiveException
HiveException
public static void inferAndSetPrimaryKey(long numRows, List<ColStatistics> colStats)
numRows
- - number of rowscolStats
- - column statisticspublic static boolean inferForeignKey(ColStatistics csPK, ColStatistics csFK)
csPK
- - column statistics of primary keycsFK
- - column statistics of potential foreign keypublic static float getScaledSelectivity(ColStatistics csPK, ColStatistics csFK)
csPK
- - column statistics of primary keycsFK
- - column statistics of potential foreign keypublic static int getNDVPartitionColumn(Set<Partition> partitions, String partColName)
public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns)
public static long getFileSizeForTable(HiveConf conf, Table table)
conf
- - hive conftable
- - tablepublic static List<Long> getFileSizeForPartitions(HiveConf conf, List<Partition> parts)
conf
- - hive confparts
- - partition listpublic static long getSumIgnoreNegatives(List<Long> vals)
vals
- - list of valuespublic static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName, String colName)
cso
- - ColumnStatisticsObjtabName
- - table namecolName
- - column namepublic static List<ColStatistics> getTableColumnStats(Table table, List<ColumnInfo> schema, List<String> neededColumns)
table
- - tableschema
- - output schemaneededColumns
- - list of needed columnspublic static long getAvgColLenOfVariableLengthTypes(HiveConf conf, ObjectInspector oi, String colType)
conf
- - hive confoi
- - object inspectorcolType
- - column typepublic static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi)
conf
- - hive confoi
- - object inspectorpublic static long getAvgColLenOfFixedLengthTypes(String colType)
colType
- - column typepublic static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length)
colType
- - column typelength
- - array lengthpublic static long getSizeOfMap(StandardConstantMapObjectInspector scmoi)
scmoi
- - object inspectorpublic static long getSizeOfStruct(StandardConstantStructObjectInspector soi)
public static long getWritableSize(ObjectInspector oi, Object value)
oi
- - object inspectorvalue
- - valuepublic static List<ColStatistics> getColStatisticsFromExprMap(HiveConf conf, Statistics parentStats, Map<String,ExprNodeDesc> colExprMap, RowSchema rowSchema)
conf
- - hive confparentStats
- - parent statisticscolExprMap
- - column expression maprowSchema
- - row schemapublic static List<ColStatistics> getColStatisticsUpdatingTableAlias(Statistics parentStats, RowSchema rowSchema)
parentStats
- - parent statisticsrowSchema
- - row schemapublic static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end)
conf
- - hive confparentStats
- - parent statisticsend
- - expression nodespublic static long getNumRows(Table table)
public static long getRawDataSize(Table table)
public static long getTotalSize(Table table)
public static long getBasicStatForTable(Table table, String statType)
table
- - tablestatType
- - type of statspublic static List<Long> getBasicStatForPartitions(Table table, List<Partition> parts, String statType)
table
- - tableparts
- - partitionsstatType
- - type of statspublic static long getDataSizeFromColumnStats(long numRows, List<ColStatistics> colStats)
numRows
- - number of rowscolStats
- - column statisticspublic static String getFullyQualifiedTableName(String dbName, String tabName)
public static List<String> getQualifedReducerKeyNames(List<String> keyExprs)
keyExprs
- - output key namespublic static long getAvailableMemory(org.apache.hadoop.conf.Configuration conf)
public static long getMaxIfOverflow(long val)
val
- - input valuepublic static long safeMult(long a, double b)
public static long safeAdd(long a, long b)
public static long safeMult(long a, long b)
public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException
SemanticException
Copyright © 2016 The Apache Software Foundation. All rights reserved.