public final class Utilities extends Object
Modifier and Type | Class and Description |
---|---|
static class |
Utilities.CollectionPersistenceDelegate |
static class |
Utilities.CommonTokenDelegate
Need to serialize org.antlr.runtime.CommonToken
|
static class |
Utilities.DatePersistenceDelegate
DatePersistenceDelegate.
|
static class |
Utilities.EnumDelegate
Java 1.5 workaround.
|
static class |
Utilities.ListDelegate |
static class |
Utilities.MapDelegate |
static class |
Utilities.PathDelegate |
static class |
Utilities.ReduceField
ReduceField:
KEY: record key
VALUE: record value
|
static class |
Utilities.SetDelegate |
static class |
Utilities.SQLCommand<T> |
static class |
Utilities.StreamStatus
StreamStatus.
|
static class |
Utilities.TimestampPersistenceDelegate
TimestampPersistenceDelegate.
|
Modifier and Type | Field and Description |
---|---|
static int |
carriageReturnCode |
static int |
ctrlaCode |
static TableDesc |
defaultTd |
static String |
HADOOP_LOCAL_FS
The object in the reducer are composed of these top level fields.
|
static String |
HIVE_ADDED_JARS |
static String |
INDENT |
static String |
INPUT_NAME |
static String |
MAP_PLAN_NAME |
static String |
MAPNAME |
static String |
MAPRED_MAPPER_CLASS |
static String |
MAPRED_REDUCER_CLASS |
static String |
MERGE_PLAN_NAME |
static int |
newLineCode |
static String |
NSTR |
static String |
nullStringOutput |
static String |
nullStringStorage |
static Random |
randGen |
static String |
REDUCE_PLAN_NAME |
static List<String> |
reduceFieldNameList |
static String |
REDUCENAME |
static ThreadLocal<com.esotericsoftware.kryo.Kryo> |
runtimeSerializationKryo |
static ThreadLocal<com.esotericsoftware.kryo.Kryo> |
sparkSerializationKryo |
static char |
sqlEscapeChar |
static String |
suffix |
static int |
tabCode |
Modifier and Type | Method and Description |
---|---|
static String |
abbreviate(String str,
int max)
convert "From src insert blah blah" to "From src insert ...
|
static ClassLoader |
addToClassPath(ClassLoader cloader,
String[] newPaths)
Add new elements to the classpath.
|
static void |
cacheBaseWork(org.apache.hadoop.conf.Configuration conf,
String name,
BaseWork work,
org.apache.hadoop.fs.Path hiveScratchDir) |
static void |
cacheMapWork(org.apache.hadoop.conf.Configuration conf,
MapWork work,
org.apache.hadoop.fs.Path hiveScratchDir) |
static void |
clearWork(org.apache.hadoop.conf.Configuration conf) |
static void |
clearWorkMap() |
static void |
clearWorkMapForConf(org.apache.hadoop.conf.Configuration conf) |
static BaseWork |
cloneBaseWork(BaseWork plan)
Clones using the powers of XML.
|
static List<Operator<?>> |
cloneOperatorTree(org.apache.hadoop.conf.Configuration conf,
List<Operator<?>> roots) |
static MapredWork |
clonePlan(MapredWork plan)
Clones using the powers of XML.
|
static Connection |
connectWithRetry(String connectionString,
long waitWindow,
int maxRetries)
Retry connecting to a database with random backoff (same as the one implemented in HDFS-767).
|
static boolean |
contentsEqual(InputStream is1,
InputStream is2,
boolean ignoreWhitespace) |
static void |
copyTableJobPropertiesToConf(TableDesc tbl,
org.apache.hadoop.conf.Configuration job)
Copies the storage handler properties configured for a table descriptor to a runtime job
configuration.
|
static void |
copyTablePropertiesToConf(TableDesc tbl,
org.apache.hadoop.mapred.JobConf job)
Copies the storage handler proeprites configured for a table descriptor to a runtime job
configuration.
|
static OutputStream |
createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
OutputStream out)
Convert an output stream to a compressed output stream based on codecs and compression options
specified in the Job Configuration.
|
static OutputStream |
createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
OutputStream out,
boolean isCompressed)
Convert an output stream to a compressed output stream based on codecs codecs in the Job
Configuration.
|
static boolean |
createDirsWithPermission(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path mkdirPath,
org.apache.hadoop.fs.permission.FsPermission fsPermission,
boolean recursive) |
static RCFile.Writer |
createRCFileWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
boolean isCompressed,
org.apache.hadoop.util.Progressable progressable)
Create a RCFile output stream based on job configuration Uses user supplied compression flag
(rather than obtaining it from the Job Configuration).
|
static org.apache.hadoop.io.SequenceFile.Writer |
createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
Class<?> keyClass,
Class<?> valClass,
boolean isCompressed,
org.apache.hadoop.util.Progressable progressable)
Create a sequencefile output stream based on job configuration Uses user supplied compression
flag (rather than obtaining it from the Job Configuration).
|
static org.apache.hadoop.io.SequenceFile.Writer |
createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
Class<?> keyClass,
Class<?> valClass,
org.apache.hadoop.util.Progressable progressable)
Create a sequencefile output stream based on job configuration.
|
static File |
createTempDir(String baseDir)
Create a temp dir in specified baseDir
This can go away once hive moves to support only JDK 7
and can use Files.createTempDirectory
Guava Files.createTempDir() does not take a base dir
|
static void |
createTmpDirs(org.apache.hadoop.conf.Configuration conf,
MapWork mWork)
Hive uses tmp directories to capture the output of each FileSinkOperator.
|
static void |
createTmpDirs(org.apache.hadoop.conf.Configuration conf,
ReduceWork rWork)
Hive uses tmp directories to capture the output of each FileSinkOperator.
|
static ExprNodeGenericFuncDesc |
deserializeExpression(String s) |
static ExprNodeGenericFuncDesc |
deserializeExpressionFromKryo(byte[] bytes)
Deserializes expression from Kryo.
|
static <T extends Serializable> |
deserializeObject(String s,
Class<T> clazz) |
static <T> T |
deserializePlan(InputStream in,
Class<T> planClass,
org.apache.hadoop.conf.Configuration conf)
Deserializes the plan.
|
static String |
escapeSqlLike(String key)
Escape the '_', '%', as well as the escape characters inside the string key.
|
static int |
estimateNumberOfReducers(HiveConf conf,
org.apache.hadoop.fs.ContentSummary inputSummary,
MapWork work,
boolean finalMapRed)
Estimate the number of reducers needed for this job, based on job input,
and configuration parameters.
|
static int |
estimateReducers(long totalInputFileSize,
long bytesPerReducer,
int maxReducers,
boolean powersOfTwo) |
static <T> T |
executeWithRetry(Utilities.SQLCommand<T> cmd,
PreparedStatement stmt,
long baseWindow,
int maxRetries)
Retry SQL execution with random backoff (same as the one implemented in HDFS-767).
|
static String |
formatBinaryString(byte[] array,
int start,
int length) |
static String |
formatMsecToStr(long msec)
Format number of milliseconds to strings
|
static String |
generateFileName(Byte tag,
String bigBucketFileName) |
static String |
generatePath(org.apache.hadoop.fs.Path baseURI,
String filename) |
static org.apache.hadoop.fs.Path |
generatePath(org.apache.hadoop.fs.Path basePath,
String dumpFilePrefix,
Byte tag,
String bigBucketFileName) |
static String |
generateTarFileName(String name) |
static org.apache.hadoop.fs.Path |
generateTarPath(org.apache.hadoop.fs.Path basePath,
String filename) |
static org.apache.hadoop.fs.Path |
generateTmpPath(org.apache.hadoop.fs.Path basePath,
String id) |
static String |
getBucketFileNameFromPathSubString(String bucketName) |
static List<String> |
getColumnNames(Properties props) |
static List<String> |
getColumnNamesFromFieldSchema(List<FieldSchema> partCols) |
static List<String> |
getColumnNamesFromSortCols(List<Order> sortCols) |
static List<String> |
getColumnTypes(Properties props) |
static String |
getDatabaseName(String dbTableName)
Accepts qualified name which is in the form of dbname.tablename and returns dbname from it
|
static String[] |
getDbTableName(String dbtable)
Extract db and table name from dbtable string, where db and table are separated by "."
If there is no db name part, set the current sessions default db
|
static String[] |
getDbTableName(String defaultDb,
String dbtable) |
static int |
getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
Gets the default notification interval to send progress updates to the tracker.
|
static List<String> |
getFieldSchemaString(List<FieldSchema> fl) |
static String |
getFileExtension(org.apache.hadoop.mapred.JobConf jc,
boolean isCompressed)
Deprecated.
|
static String |
getFileExtension(org.apache.hadoop.mapred.JobConf jc,
boolean isCompressed,
HiveOutputFormat<?,?> hiveOutputFormat)
Based on compression option, output format, and configured output codec -
get extension for output file.
|
static String |
getFileNameFromDirName(String dirName) |
static int |
getFooterCount(TableDesc table,
org.apache.hadoop.mapred.JobConf job)
Get footer line count for a table.
|
static List<LinkedHashMap<String,String>> |
getFullDPSpecs(org.apache.hadoop.conf.Configuration conf,
DynamicPartitionCtx dpCtx)
Construct a list of full partition spec from Dynamic Partition Context and the directory names
corresponding to these dynamic partitions.
|
static String |
getHashedStatsPrefix(String statsPrefix,
int maxPrefixLength)
If statsPrefix's length is greater than maxPrefixLength and maxPrefixLength > 0,
then it returns an MD5 hash of statsPrefix followed by path separator, otherwise
it returns statsPrefix
|
static int |
getHeaderCount(TableDesc table)
Get header line count for a table.
|
static double |
getHighestSamplePercentage(MapWork work)
Returns the highest sample percentage of any alias in the given MapWork
|
static List<org.apache.hadoop.fs.Path> |
getInputPaths(org.apache.hadoop.mapred.JobConf job,
MapWork work,
org.apache.hadoop.fs.Path hiveScratchDir,
Context ctx,
boolean skipDummy)
Computes a list of all input paths needed to compute the given MapWork.
|
static List<org.apache.hadoop.fs.Path> |
getInputPathsTez(org.apache.hadoop.mapred.JobConf job,
MapWork work)
On Tez we're not creating dummy files when getting/setting input paths.
|
static org.apache.hadoop.fs.ContentSummary |
getInputSummary(Context ctx,
MapWork work,
org.apache.hadoop.fs.PathFilter filter)
Calculate the total size of input files.
|
static List<String> |
getInternalColumnNamesFromSignature(List<ColumnInfo> colInfos) |
static Set<String> |
getJarFilesByPath(String path)
get the jar files from specified directory or get jar files by several jar names sperated by comma
|
static MapredWork |
getMapRedWork(org.apache.hadoop.conf.Configuration conf) |
static MapWork |
getMapWork(org.apache.hadoop.conf.Configuration conf) |
static Map<Integer,String> |
getMapWorkVectorScratchColumnTypeMap(org.apache.hadoop.conf.Configuration hiveConf) |
static BaseWork |
getMergeWork(org.apache.hadoop.mapred.JobConf jconf) |
static BaseWork |
getMergeWork(org.apache.hadoop.mapred.JobConf jconf,
String prefix) |
static List<ExecDriver> |
getMRTasks(List<Task<? extends Serializable>> tasks) |
static String |
getNameMessage(Exception e) |
static String |
getOpTreeSkel(Operator<?> op) |
static PartitionDesc |
getPartitionDesc(Partition part) |
static PartitionDesc |
getPartitionDescFromTableDesc(TableDesc tblDesc,
Partition part) |
static org.apache.hadoop.fs.Path |
getPlanPath(org.apache.hadoop.conf.Configuration conf) |
static String |
getPrefixedTaskIdFromFilename(String filename)
Get the part-spec + task id from the filename.
|
static String |
getQualifiedPath(HiveConf conf,
org.apache.hadoop.fs.Path path)
Convert path to qualified path.
|
static long |
getRandomWaitTime(long baseWindow,
int failures,
Random r)
Introducing a random factor to the wait time before another retry.
|
static ReduceWork |
getReduceWork(org.apache.hadoop.conf.Configuration conf) |
static String |
getResourceFiles(org.apache.hadoop.conf.Configuration conf,
SessionState.ResourceType t) |
static ClassLoader |
getSessionSpecifiedClassLoader()
get session specified class loader and get current class loader if fall
|
static List<SparkTask> |
getSparkTasks(List<Task<? extends Serializable>> tasks) |
static StatsPublisher |
getStatsPublisher(org.apache.hadoop.mapred.JobConf jc) |
static TableDesc |
getTableDesc(String cols,
String colTypes) |
static TableDesc |
getTableDesc(Table tbl) |
static String |
getTableName(String dbTableName)
Accepts qualified name which is in the form of dbname.tablename and returns tablename from it
|
static String |
getTaskId(org.apache.hadoop.conf.Configuration hconf)
Gets the task id if we are running as a Hadoop job.
|
static String |
getTaskIdFromFilename(String filename)
Get the task id from the filename.
|
static List<TezTask> |
getTezTasks(List<Task<? extends Serializable>> tasks) |
static long |
getTotalInputFileSize(org.apache.hadoop.fs.ContentSummary inputSummary,
MapWork work,
double highestSamplePercentage)
Computes the total input file size.
|
static long |
getTotalInputNumFiles(org.apache.hadoop.fs.ContentSummary inputSummary,
MapWork work,
double highestSamplePercentage)
Computes the total number of input files.
|
static boolean |
isCopyFile(String filename) |
static boolean |
isDefaultNameNode(HiveConf conf)
Checks if current hive script was executed with non-default namenode
|
static boolean |
isEmptyPath(org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.fs.Path dirPath) |
static boolean |
isEmptyPath(org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.fs.Path dirPath,
Context ctx) |
static boolean |
isPerfOrAboveLogging(HiveConf conf)
Checks if the current HiveServer2 logging operation level is >= PERFORMANCE.
|
static boolean |
isTempPath(org.apache.hadoop.fs.FileStatus file)
Detect if the supplied file is a temporary path.
|
static boolean |
isVectorMode(org.apache.hadoop.conf.Configuration conf)
Returns true if a plan is both configured for vectorized execution
and vectorization is allowed.
|
static String |
join(String... elements) |
static org.apache.hadoop.fs.FileStatus[] |
listStatusIfExists(org.apache.hadoop.fs.Path path,
org.apache.hadoop.fs.FileSystem fs)
returns null if path is not exist
|
static ArrayList |
makeList(Object... olist) |
static HashMap |
makeMap(Object... olist) |
static Properties |
makeProperties(String... olist) |
static List<String> |
mergeUniqElems(List<String> src,
List<String> dest) |
static void |
mvFileToFinalPath(org.apache.hadoop.fs.Path specPath,
org.apache.hadoop.conf.Configuration hconf,
boolean success,
org.apache.commons.logging.Log log,
DynamicPartitionCtx dpCtx,
FileSinkDesc conf,
org.apache.hadoop.mapred.Reporter reporter) |
static String |
now() |
static PreparedStatement |
prepareWithRetry(Connection conn,
String stmt,
long waitWindow,
int maxRetries)
Retry preparing a SQL statement with random backoff (same as the one implemented in HDFS-767).
|
static Utilities.StreamStatus |
readColumn(DataInput in,
OutputStream out) |
static String |
realFile(String newFile,
org.apache.hadoop.conf.Configuration conf)
Shamelessly cloned from GenericOptionsParser.
|
protected static void |
removeField(com.esotericsoftware.kryo.Kryo kryo,
Class type,
String fieldName) |
static void |
removeFromClassPath(String[] pathsToRemove)
remove elements from the classpath.
|
static HashMap<String,org.apache.hadoop.fs.FileStatus> |
removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items,
org.apache.hadoop.fs.FileSystem fs) |
static void |
removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Remove all temporary files and duplicate (double-committed) files from a given directory.
|
static ArrayList<String> |
removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
DynamicPartitionCtx dpCtx)
Remove all temporary files and duplicate (double-committed) files from a given directory.
|
static String |
removeValueTag(String column) |
static void |
rename(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
Rename src to dst, or in the case dst already exists, move files in src to dst.
|
static void |
renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
Rename src to dst, or in the case dst already exists, move files in src to dst.
|
static String |
replaceTaskIdFromFilename(String filename,
int bucketNum)
Replace the task id from the filename.
|
static String |
replaceTaskIdFromFilename(String filename,
String fileId) |
static void |
restoreSessionSpecifiedClassLoader(ClassLoader prev) |
static void |
reworkMapRedWork(Task<? extends Serializable> task,
boolean reworkMapredWork,
HiveConf conf)
The check here is kind of not clean.
|
static String |
serializeExpression(ExprNodeGenericFuncDesc expr) |
static byte[] |
serializeExpressionToKryo(ExprNodeGenericFuncDesc expr)
Serializes expression via Kryo.
|
static String |
serializeObject(Serializable expr) |
static void |
serializePlan(Object plan,
OutputStream out,
org.apache.hadoop.conf.Configuration conf)
Serializes the plan.
|
static void |
setBaseWork(org.apache.hadoop.conf.Configuration conf,
String name,
BaseWork work)
Pushes work into the global work map
|
static void |
setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op) |
static void |
setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op,
boolean excludeVCs) |
static void |
setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op) |
static void |
setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op,
boolean excludeVCs) |
static void |
setInputAttributes(org.apache.hadoop.conf.Configuration conf,
MapWork mWork)
Set hive input format, and input format file if necessary.
|
static void |
setInputPaths(org.apache.hadoop.mapred.JobConf job,
List<org.apache.hadoop.fs.Path> pathsToAdd)
setInputPaths add all the paths in the provided list to the Job conf object
as input paths for the job.
|
static void |
setMapRedWork(org.apache.hadoop.conf.Configuration conf,
MapredWork w,
org.apache.hadoop.fs.Path hiveScratchDir) |
static void |
setMapWork(org.apache.hadoop.conf.Configuration conf,
MapWork work) |
static org.apache.hadoop.fs.Path |
setMapWork(org.apache.hadoop.conf.Configuration conf,
MapWork w,
org.apache.hadoop.fs.Path hiveScratchDir,
boolean useCache) |
static org.apache.hadoop.fs.Path |
setMergeWork(org.apache.hadoop.mapred.JobConf conf,
MergeJoinWork mergeJoinWork,
org.apache.hadoop.fs.Path mrScratchDir,
boolean useCache) |
static void |
setQueryTimeout(Statement stmt,
int timeout) |
static void |
setReduceWork(org.apache.hadoop.conf.Configuration conf,
ReduceWork work) |
static org.apache.hadoop.fs.Path |
setReduceWork(org.apache.hadoop.conf.Configuration conf,
ReduceWork w,
org.apache.hadoop.fs.Path hiveScratchDir,
boolean useCache) |
static void |
setWorkflowAdjacencies(org.apache.hadoop.conf.Configuration conf,
QueryPlan plan) |
static double |
showTime(long time) |
static boolean |
skipHeader(org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable> currRecReader,
int headerCount,
org.apache.hadoop.io.WritableComparable key,
org.apache.hadoop.io.Writable value)
Skip header lines in the table file when reading the record.
|
static long |
sumOf(Map<String,Long> aliasToSize,
Set<String> aliases) |
static long |
sumOfExcept(Map<String,Long> aliasToSize,
Set<String> aliases,
Set<String> excepts) |
static org.apache.hadoop.fs.Path |
toTaskTempPath(org.apache.hadoop.fs.Path orig) |
static org.apache.hadoop.fs.Path |
toTempPath(org.apache.hadoop.fs.Path orig) |
static org.apache.hadoop.fs.Path |
toTempPath(String orig)
Given a path, convert to a temporary path.
|
static void |
validateColumnNames(List<String> colNames,
List<String> checkCols) |
public static String HADOOP_LOCAL_FS
public static String MAP_PLAN_NAME
public static String REDUCE_PLAN_NAME
public static String MERGE_PLAN_NAME
public static final String INPUT_NAME
public static final String MAPRED_MAPPER_CLASS
public static final String MAPRED_REDUCER_CLASS
public static final String HIVE_ADDED_JARS
public static String MAPNAME
public static String REDUCENAME
public static ThreadLocal<com.esotericsoftware.kryo.Kryo> runtimeSerializationKryo
public static ThreadLocal<com.esotericsoftware.kryo.Kryo> sparkSerializationKryo
public static TableDesc defaultTd
public static final int carriageReturnCode
public static final int newLineCode
public static final int tabCode
public static final int ctrlaCode
public static final String INDENT
public static String nullStringStorage
public static String nullStringOutput
public static Random randGen
public static final String NSTR
public static String suffix
public static final char sqlEscapeChar
public static void clearWork(org.apache.hadoop.conf.Configuration conf)
public static MapredWork getMapRedWork(org.apache.hadoop.conf.Configuration conf)
public static void cacheMapWork(org.apache.hadoop.conf.Configuration conf, MapWork work, org.apache.hadoop.fs.Path hiveScratchDir)
public static void setMapWork(org.apache.hadoop.conf.Configuration conf, MapWork work)
public static MapWork getMapWork(org.apache.hadoop.conf.Configuration conf)
public static void setReduceWork(org.apache.hadoop.conf.Configuration conf, ReduceWork work)
public static ReduceWork getReduceWork(org.apache.hadoop.conf.Configuration conf)
public static org.apache.hadoop.fs.Path setMergeWork(org.apache.hadoop.mapred.JobConf conf, MergeJoinWork mergeJoinWork, org.apache.hadoop.fs.Path mrScratchDir, boolean useCache)
public static BaseWork getMergeWork(org.apache.hadoop.mapred.JobConf jconf)
public static BaseWork getMergeWork(org.apache.hadoop.mapred.JobConf jconf, String prefix)
public static void cacheBaseWork(org.apache.hadoop.conf.Configuration conf, String name, BaseWork work, org.apache.hadoop.fs.Path hiveScratchDir)
public static void setBaseWork(org.apache.hadoop.conf.Configuration conf, String name, BaseWork work)
public static Map<Integer,String> getMapWorkVectorScratchColumnTypeMap(org.apache.hadoop.conf.Configuration hiveConf)
public static void setWorkflowAdjacencies(org.apache.hadoop.conf.Configuration conf, QueryPlan plan)
public static List<String> getFieldSchemaString(List<FieldSchema> fl)
public static void setMapRedWork(org.apache.hadoop.conf.Configuration conf, MapredWork w, org.apache.hadoop.fs.Path hiveScratchDir)
public static org.apache.hadoop.fs.Path setMapWork(org.apache.hadoop.conf.Configuration conf, MapWork w, org.apache.hadoop.fs.Path hiveScratchDir, boolean useCache)
public static org.apache.hadoop.fs.Path setReduceWork(org.apache.hadoop.conf.Configuration conf, ReduceWork w, org.apache.hadoop.fs.Path hiveScratchDir, boolean useCache)
public static org.apache.hadoop.fs.Path getPlanPath(org.apache.hadoop.conf.Configuration conf)
public static byte[] serializeExpressionToKryo(ExprNodeGenericFuncDesc expr)
expr
- Expression.public static ExprNodeGenericFuncDesc deserializeExpressionFromKryo(byte[] bytes)
bytes
- Bytes containing the expression.public static String serializeExpression(ExprNodeGenericFuncDesc expr)
public static ExprNodeGenericFuncDesc deserializeExpression(String s)
public static String serializeObject(Serializable expr)
public static <T extends Serializable> T deserializeObject(String s, Class<T> clazz)
public static List<Operator<?>> cloneOperatorTree(org.apache.hadoop.conf.Configuration conf, List<Operator<?>> roots)
public static void serializePlan(Object plan, OutputStream out, org.apache.hadoop.conf.Configuration conf)
plan
- The plan, such as QueryPlan, MapredWork, etc.out
- The stream to write to.conf
- to pick which serialization format is desired.public static <T> T deserializePlan(InputStream in, Class<T> planClass, org.apache.hadoop.conf.Configuration conf)
in
- The stream to read from.planClass
- class of planconf
- configurationpublic static MapredWork clonePlan(MapredWork plan)
plan
- The plan.public static BaseWork cloneBaseWork(BaseWork plan)
plan
- The plan.protected static void removeField(com.esotericsoftware.kryo.Kryo kryo, Class type, String fieldName)
public static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
public static Properties makeProperties(String... olist)
public static PartitionDesc getPartitionDesc(Partition part) throws HiveException
HiveException
public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part) throws HiveException
HiveException
public static boolean contentsEqual(InputStream is1, InputStream is2, boolean ignoreWhitespace) throws IOException
IOException
public static String abbreviate(String str, int max)
public static Utilities.StreamStatus readColumn(DataInput in, OutputStream out) throws IOException
IOException
public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out) throws IOException
jc
- Job Configurationout
- Output Stream to be converted into compressed output streamIOException
public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out, boolean isCompressed) throws IOException
jc
- Job Configurationout
- Output Stream to be converted into compressed output streamisCompressed
- whether the output stream needs to be compressed or notIOException
@Deprecated public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed)
getFileExtension(JobConf, boolean, HiveOutputFormat)
jc
- Job ConfigurationisCompressed
- Whether the output file is compressed or notpublic static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed, HiveOutputFormat<?,?> hiveOutputFormat)
The property hive.output.file.extension
is used to determine
the extension - if set, it will override other logic for choosing an
extension.
jc
- Job ConfigurationisCompressed
- Whether the output file is compressed or nothiveOutputFormat
- The output format, used to detect if the format is textpublic static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass, org.apache.hadoop.util.Progressable progressable) throws IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdkeyClass
- Java Class for keyvalClass
- Java Class for valueIOException
public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass, boolean isCompressed, org.apache.hadoop.util.Progressable progressable) throws IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdkeyClass
- Java Class for keyvalClass
- Java Class for valueIOException
public static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, boolean isCompressed, org.apache.hadoop.util.Progressable progressable) throws IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdIOException
public static String realFile(String newFile, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static org.apache.hadoop.fs.Path toTaskTempPath(org.apache.hadoop.fs.Path orig)
public static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)
public static org.apache.hadoop.fs.Path toTempPath(String orig)
public static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
public static void rename(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst) throws IOException, HiveException
fs
- the FileSystem where src and dst are on.src
- the src directorydst
- the target directoryIOException
HiveException
public static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst) throws IOException, HiveException
fs
- the FileSystem where src and dst are on.src
- the src directorydst
- the target directoryIOException
HiveException
public static String getTaskIdFromFilename(String filename)
filename
- filename to extract taskid frompublic static String getPrefixedTaskIdFromFilename(String filename)
filename
- filename to extract taskid frompublic static String replaceTaskIdFromFilename(String filename, int bucketNum)
filename
- filename to replace taskid "0_0" or "0_0.gz" by 33 to "33_0" or "33_0.gz"public static String replaceTaskIdFromFilename(String filename, String fileId)
public static org.apache.hadoop.fs.FileStatus[] listStatusIfExists(org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static void mvFileToFinalPath(org.apache.hadoop.fs.Path specPath, org.apache.hadoop.conf.Configuration hconf, boolean success, org.apache.commons.logging.Log log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, org.apache.hadoop.mapred.Reporter reporter) throws IOException, HiveException
IOException
HiveException
public static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws IOException
IOException
public static ArrayList<String> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, DynamicPartitionCtx dpCtx) throws IOException
IOException
public static HashMap<String,org.apache.hadoop.fs.FileStatus> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static boolean isCopyFile(String filename)
public static String getBucketFileNameFromPathSubString(String bucketName)
public static String getResourceFiles(org.apache.hadoop.conf.Configuration conf, SessionState.ResourceType t)
public static ClassLoader getSessionSpecifiedClassLoader()
public static void restoreSessionSpecifiedClassLoader(ClassLoader prev)
public static Set<String> getJarFilesByPath(String path)
path
- public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths) throws Exception
newPaths
- Array of classpath elementsException
public static void removeFromClassPath(String[] pathsToRemove) throws Exception
pathsToRemove
- Array of classpath elementsException
public static String formatBinaryString(byte[] array, int start, int length)
public static List<String> getColumnNamesFromSortCols(List<Order> sortCols)
public static List<String> getColumnNamesFromFieldSchema(List<FieldSchema> partCols)
public static List<String> getInternalColumnNamesFromSignature(List<ColumnInfo> colInfos)
public static List<String> getColumnNames(Properties props)
public static List<String> getColumnTypes(Properties props)
public static String[] getDbTableName(String dbtable) throws SemanticException
dbtable
- HiveException
SemanticException
public static String[] getDbTableName(String defaultDb, String dbtable) throws SemanticException
SemanticException
public static String getDatabaseName(String dbTableName) throws SemanticException
dbTableName
- SemanticException
- input string is not qualified namepublic static String getTableName(String dbTableName) throws SemanticException
dbTableName
- SemanticException
- input string is not qualified namepublic static void validateColumnNames(List<String> colNames, List<String> checkCols) throws SemanticException
SemanticException
public static int getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
hconf
- public static void copyTableJobPropertiesToConf(TableDesc tbl, org.apache.hadoop.conf.Configuration job)
tbl
- table descriptor from which to readjob
- configuration which receives configured propertiespublic static void copyTablePropertiesToConf(TableDesc tbl, org.apache.hadoop.mapred.JobConf job)
copyTablePropertiesToConf(org.apache.hadoop.hive.ql.plan.TableDesc, org.apache.hadoop.mapred.JobConf)
in that it does not allow parameters already set in the job to override the values from the
table. This is important for setting the config up for reading,
as the job may already have values in it from another table.tbl
- job
- public static org.apache.hadoop.fs.ContentSummary getInputSummary(Context ctx, MapWork work, org.apache.hadoop.fs.PathFilter filter) throws IOException
ctx
- the hadoop job contextwork
- map reduce job planfilter
- filter to apply to the input paths before calculating sizeIOException
public static long sumOfExcept(Map<String,Long> aliasToSize, Set<String> aliases, Set<String> excepts)
public static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.fs.Path dirPath, Context ctx) throws Exception
Exception
public static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.fs.Path dirPath) throws Exception
Exception
public static List<TezTask> getTezTasks(List<Task<? extends Serializable>> tasks)
public static List<SparkTask> getSparkTasks(List<Task<? extends Serializable>> tasks)
public static List<ExecDriver> getMRTasks(List<Task<? extends Serializable>> tasks)
public static List<LinkedHashMap<String,String>> getFullDPSpecs(org.apache.hadoop.conf.Configuration conf, DynamicPartitionCtx dpCtx) throws HiveException
HiveException
public static StatsPublisher getStatsPublisher(org.apache.hadoop.mapred.JobConf jc)
public static String getHashedStatsPrefix(String statsPrefix, int maxPrefixLength)
statsPrefix
- prefix of stats keymaxPrefixLength
- max length of stats keypublic static void setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf, Operator op)
public static void setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf, Operator op, boolean excludeVCs)
public static void setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf, Operator op)
public static void setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf, Operator op, boolean excludeVCs)
public static org.apache.hadoop.fs.Path generatePath(org.apache.hadoop.fs.Path basePath, String dumpFilePrefix, Byte tag, String bigBucketFileName)
public static org.apache.hadoop.fs.Path generateTmpPath(org.apache.hadoop.fs.Path basePath, String id)
public static org.apache.hadoop.fs.Path generateTarPath(org.apache.hadoop.fs.Path basePath, String filename)
public static String now()
public static double showTime(long time)
public static void reworkMapRedWork(Task<? extends Serializable> task, boolean reworkMapredWork, HiveConf conf) throws SemanticException
task
- reworkMapredWork
- conf
- SemanticException
public static <T> T executeWithRetry(Utilities.SQLCommand<T> cmd, PreparedStatement stmt, long baseWindow, int maxRetries) throws SQLException
cmd
- the SQL commandstmt
- the prepared statement of SQL.baseWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(long, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static Connection connectWithRetry(String connectionString, long waitWindow, int maxRetries) throws SQLException
connectionString
- the JDBC connection string.waitWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(long, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static PreparedStatement prepareWithRetry(Connection conn, String stmt, long waitWindow, int maxRetries) throws SQLException
conn
- a JDBC connection.stmt
- the SQL statement to be prepared.waitWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(long, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static void setQueryTimeout(Statement stmt, int timeout) throws SQLException
SQLException
public static long getRandomWaitTime(long baseWindow, int failures, Random r)
baseWindow
- the base waiting window.failures
- number of failures so far.r
- a random generator.public static String escapeSqlLike(String key)
key
- the string that will be used for the SQL LIKE operator.public static String formatMsecToStr(long msec)
msec
- millisecondspublic static int estimateNumberOfReducers(HiveConf conf, org.apache.hadoop.fs.ContentSummary inputSummary, MapWork work, boolean finalMapRed) throws IOException
IOException
public static int estimateReducers(long totalInputFileSize, long bytesPerReducer, int maxReducers, boolean powersOfTwo)
public static long getTotalInputFileSize(org.apache.hadoop.fs.ContentSummary inputSummary, MapWork work, double highestSamplePercentage)
inputSummary
- work
- highestSamplePercentage
- public static long getTotalInputNumFiles(org.apache.hadoop.fs.ContentSummary inputSummary, MapWork work, double highestSamplePercentage)
inputSummary
- work
- highestSamplePercentage
- public static double getHighestSamplePercentage(MapWork work)
public static List<org.apache.hadoop.fs.Path> getInputPathsTez(org.apache.hadoop.mapred.JobConf job, MapWork work) throws Exception
Exception
public static List<org.apache.hadoop.fs.Path> getInputPaths(org.apache.hadoop.mapred.JobConf job, MapWork work, org.apache.hadoop.fs.Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception
job
- JobConf used to run the jobwork
- MapWork encapsulating the info about the taskhiveScratchDir
- The tmp dir used to create dummy files if neededctx
- Context objectException
public static void setInputPaths(org.apache.hadoop.mapred.JobConf job, List<org.apache.hadoop.fs.Path> pathsToAdd)
job
- pathsToAdd
- public static void setInputAttributes(org.apache.hadoop.conf.Configuration conf, MapWork mWork)
public static void createTmpDirs(org.apache.hadoop.conf.Configuration conf, MapWork mWork) throws IOException
conf
- Used to get the right FileSystemmWork
- Used to find FileSinkOperatorsIOException
public static void createTmpDirs(org.apache.hadoop.conf.Configuration conf, ReduceWork rWork) throws IOException
conf
- Used to get the right FileSystemrWork
- Used to find FileSinkOperatorsIOException
public static boolean createDirsWithPermission(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path mkdirPath, org.apache.hadoop.fs.permission.FsPermission fsPermission, boolean recursive) throws IOException
IOException
public static boolean isVectorMode(org.apache.hadoop.conf.Configuration conf)
public static void clearWorkMapForConf(org.apache.hadoop.conf.Configuration conf)
public static void clearWorkMap()
public static File createTempDir(String baseDir)
baseDir
- - directory under which new temp dir will be createdpublic static boolean skipHeader(org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable> currRecReader, int headerCount, org.apache.hadoop.io.WritableComparable key, org.apache.hadoop.io.Writable value) throws IOException
currRecReader
- Record reader.headerCount
- Header line number of the table files.key
- Key of current reading record.value
- Value of current reading record.IOException
public static int getHeaderCount(TableDesc table) throws IOException
table
- Table description for target table.IOException
public static int getFooterCount(TableDesc table, org.apache.hadoop.mapred.JobConf job) throws IOException
table
- Table description for target table.job
- Job configuration for current job.IOException
public static String getQualifiedPath(HiveConf conf, org.apache.hadoop.fs.Path path) throws HiveException
conf
- Hive configuration.path
- Path to convert.HiveException
public static boolean isDefaultNameNode(HiveConf conf)
public static boolean isPerfOrAboveLogging(HiveConf conf)
conf
- Hive configuration.Copyright © 2017 The Apache Software Foundation. All rights reserved.