public final class HiveFileFormatUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
HiveFileFormatUtils.NullOutputCommitter |
Modifier and Type | Method and Description |
---|---|
static boolean |
checkInputFormat(org.apache.hadoop.fs.FileSystem fs,
HiveConf conf,
Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormatCls,
ArrayList<org.apache.hadoop.fs.FileStatus> files)
checks if files are in same format as the given input format.
|
static List<String> |
doGetAliasesFromPath(Map<String,ArrayList<String>> pathToAliases,
org.apache.hadoop.fs.Path dir)
Get the list of aliases from the opeerator tree that are needed for the path
|
static List<Operator<? extends OperatorDesc>> |
doGetWorksFromPath(Map<String,ArrayList<String>> pathToAliases,
Map<String,Operator<? extends OperatorDesc>> aliasToWork,
org.apache.hadoop.fs.Path dir)
Get the list of operators from the operator tree that are needed for the path
|
static RecordUpdater |
getAcidRecordUpdater(org.apache.hadoop.mapred.JobConf jc,
TableDesc tableInfo,
int bucket,
FileSinkDesc conf,
org.apache.hadoop.fs.Path outPath,
ObjectInspector inspector,
org.apache.hadoop.mapred.Reporter reporter,
int rowIdColNum) |
static HiveOutputFormat<?,?> |
getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf,
PartitionDesc partDesc) |
static HiveOutputFormat<?,?> |
getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf,
TableDesc tableDesc) |
static FileSinkOperator.RecordWriter |
getHiveRecordWriter(org.apache.hadoop.mapred.JobConf jc,
TableDesc tableInfo,
Class<? extends org.apache.hadoop.io.Writable> outputClass,
FileSinkDesc conf,
org.apache.hadoop.fs.Path outPath,
org.apache.hadoop.mapred.Reporter reporter) |
static Class<? extends InputFormatChecker> |
getInputFormatChecker(Class<?> inputFormat)
get an InputFormatChecker for a file format.
|
static org.apache.hadoop.fs.Path |
getOutputFormatFinalPath(org.apache.hadoop.fs.Path parent,
String taskId,
org.apache.hadoop.mapred.JobConf jc,
HiveOutputFormat<?,?> hiveOutputFormat,
boolean isCompressed,
org.apache.hadoop.fs.Path defaultFinalPath)
Deprecated.
|
static Class<? extends org.apache.hadoop.mapred.OutputFormat> |
getOutputFormatSubstitute(Class<?> origin)
get a OutputFormat's substitute HiveOutputFormat.
|
static PartitionDesc |
getPartitionDescFromPathRecursively(Map<String,PartitionDesc> pathToPartitionInfo,
org.apache.hadoop.fs.Path dir,
Map<Map<String,PartitionDesc>,Map<String,PartitionDesc>> cacheMap) |
static PartitionDesc |
getPartitionDescFromPathRecursively(Map<String,PartitionDesc> pathToPartitionInfo,
org.apache.hadoop.fs.Path dir,
Map<Map<String,PartitionDesc>,Map<String,PartitionDesc>> cacheMap,
boolean ignoreSchema) |
static FileSinkOperator.RecordWriter |
getRecordWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.mapred.OutputFormat<?,?> outputFormat,
Class<? extends org.apache.hadoop.io.Writable> valueClass,
boolean isCompressed,
Properties tableProp,
org.apache.hadoop.fs.Path outPath,
org.apache.hadoop.mapred.Reporter reporter) |
static void |
prepareJobOutput(org.apache.hadoop.mapred.JobConf conf)
Hive uses side effect files exclusively for it's output.
|
static void |
registerInputFormatChecker(Class<? extends org.apache.hadoop.mapred.InputFormat> format,
Class<? extends InputFormatChecker> checker)
register an InputFormatChecker for a given InputFormat.
|
static void |
registerOutputFormatSubstitute(Class<?> origin,
Class<? extends HiveOutputFormat> substitute)
register a substitute.
|
public static void registerOutputFormatSubstitute(Class<?> origin, Class<? extends HiveOutputFormat> substitute)
origin
- the class that need to be substitutedsubstitute
- public static Class<? extends org.apache.hadoop.mapred.OutputFormat> getOutputFormatSubstitute(Class<?> origin)
@Deprecated public static org.apache.hadoop.fs.Path getOutputFormatFinalPath(org.apache.hadoop.fs.Path parent, String taskId, org.apache.hadoop.mapred.JobConf jc, HiveOutputFormat<?,?> hiveOutputFormat, boolean isCompressed, org.apache.hadoop.fs.Path defaultFinalPath) throws IOException
parent
- parent dir of the expected final output pathjc
- job configurationIOException
public static void registerInputFormatChecker(Class<? extends org.apache.hadoop.mapred.InputFormat> format, Class<? extends InputFormatChecker> checker)
format
- the class that need to be substitutedchecker
- public static Class<? extends InputFormatChecker> getInputFormatChecker(Class<?> inputFormat)
public static boolean checkInputFormat(org.apache.hadoop.fs.FileSystem fs, HiveConf conf, Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormatCls, ArrayList<org.apache.hadoop.fs.FileStatus> files) throws HiveException
HiveException
public static FileSinkOperator.RecordWriter getHiveRecordWriter(org.apache.hadoop.mapred.JobConf jc, TableDesc tableInfo, Class<? extends org.apache.hadoop.io.Writable> outputClass, FileSinkDesc conf, org.apache.hadoop.fs.Path outPath, org.apache.hadoop.mapred.Reporter reporter) throws HiveException
HiveException
public static FileSinkOperator.RecordWriter getRecordWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.mapred.OutputFormat<?,?> outputFormat, Class<? extends org.apache.hadoop.io.Writable> valueClass, boolean isCompressed, Properties tableProp, org.apache.hadoop.fs.Path outPath, org.apache.hadoop.mapred.Reporter reporter) throws IOException, HiveException
IOException
HiveException
public static HiveOutputFormat<?,?> getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf, TableDesc tableDesc) throws HiveException
HiveException
public static HiveOutputFormat<?,?> getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf, PartitionDesc partDesc) throws HiveException
HiveException
public static RecordUpdater getAcidRecordUpdater(org.apache.hadoop.mapred.JobConf jc, TableDesc tableInfo, int bucket, FileSinkDesc conf, org.apache.hadoop.fs.Path outPath, ObjectInspector inspector, org.apache.hadoop.mapred.Reporter reporter, int rowIdColNum) throws HiveException, IOException
HiveException
IOException
public static PartitionDesc getPartitionDescFromPathRecursively(Map<String,PartitionDesc> pathToPartitionInfo, org.apache.hadoop.fs.Path dir, Map<Map<String,PartitionDesc>,Map<String,PartitionDesc>> cacheMap) throws IOException
IOException
public static PartitionDesc getPartitionDescFromPathRecursively(Map<String,PartitionDesc> pathToPartitionInfo, org.apache.hadoop.fs.Path dir, Map<Map<String,PartitionDesc>,Map<String,PartitionDesc>> cacheMap, boolean ignoreSchema) throws IOException
IOException
public static List<Operator<? extends OperatorDesc>> doGetWorksFromPath(Map<String,ArrayList<String>> pathToAliases, Map<String,Operator<? extends OperatorDesc>> aliasToWork, org.apache.hadoop.fs.Path dir)
pathToAliases
- mapping from path to aliasesaliasToWork
- The operator tree to be invoked for a given aliasdir
- The path to look forpublic static List<String> doGetAliasesFromPath(Map<String,ArrayList<String>> pathToAliases, org.apache.hadoop.fs.Path dir)
pathToAliases
- mapping from path to aliasesdir
- The path to look forpublic static void prepareJobOutput(org.apache.hadoop.mapred.JobConf conf)
Copyright © 2017 The Apache Software Foundation. All rights reserved.