public final class HiveFileFormatUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
HiveFileFormatUtils.FileChecker |
static class |
HiveFileFormatUtils.NullOutputCommitter |
Modifier and Type | Method and Description |
---|---|
static boolean |
checkInputFormat(org.apache.hadoop.fs.FileSystem fs,
HiveConf conf,
Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormatCls,
List<org.apache.hadoop.fs.FileStatus> files)
checks if files are in same format as the given input format.
|
static List<String> |
doGetAliasesFromPath(Map<org.apache.hadoop.fs.Path,ArrayList<String>> pathToAliases,
org.apache.hadoop.fs.Path dir)
Get the list of aliases from the opeerator tree that are needed for the path
|
static List<Operator<? extends OperatorDesc>> |
doGetWorksFromPath(Map<org.apache.hadoop.fs.Path,ArrayList<String>> pathToAliases,
Map<String,Operator<? extends OperatorDesc>> aliasToWork,
org.apache.hadoop.fs.Path dir)
Get the list of operators from the operator tree that are needed for the path
|
static RecordUpdater |
getAcidRecordUpdater(org.apache.hadoop.mapred.JobConf jc,
TableDesc tableInfo,
int bucket,
FileSinkDesc conf,
org.apache.hadoop.fs.Path outPath,
ObjectInspector inspector,
org.apache.hadoop.mapred.Reporter reporter,
int rowIdColNum) |
static <T> T |
getFromPathRecursively(Map<org.apache.hadoop.fs.Path,T> pathToPartitionInfo,
org.apache.hadoop.fs.Path dir,
Map<Map<org.apache.hadoop.fs.Path,T>,Map<org.apache.hadoop.fs.Path,T>> cacheMap) |
static <T> T |
getFromPathRecursively(Map<org.apache.hadoop.fs.Path,T> pathToPartitionInfo,
org.apache.hadoop.fs.Path dir,
Map<Map<org.apache.hadoop.fs.Path,T>,Map<org.apache.hadoop.fs.Path,T>> cacheMap,
boolean ignoreSchema) |
static <T> T |
getFromPathRecursively(Map<org.apache.hadoop.fs.Path,T> pathToPartitionInfo,
org.apache.hadoop.fs.Path dir,
Map<Map<org.apache.hadoop.fs.Path,T>,Map<org.apache.hadoop.fs.Path,T>> cacheMap,
boolean ignoreSchema,
boolean ifPresent) |
static HiveOutputFormat<?,?> |
getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf,
PartitionDesc partDesc) |
static HiveOutputFormat<?,?> |
getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf,
TableDesc tableDesc) |
static FileSinkOperator.RecordWriter |
getHiveRecordWriter(org.apache.hadoop.mapred.JobConf jc,
TableDesc tableInfo,
Class<? extends org.apache.hadoop.io.Writable> outputClass,
FileSinkDesc conf,
org.apache.hadoop.fs.Path outPath,
org.apache.hadoop.mapred.Reporter reporter) |
static Class<? extends org.apache.hadoop.mapred.OutputFormat> |
getOutputFormatSubstitute(Class<?> origin)
get a OutputFormat's substitute HiveOutputFormat.
|
static FileSinkOperator.RecordWriter |
getRecordWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.mapred.OutputFormat<?,?> outputFormat,
Class<? extends org.apache.hadoop.io.Writable> valueClass,
boolean isCompressed,
Properties tableProp,
org.apache.hadoop.fs.Path outPath,
org.apache.hadoop.mapred.Reporter reporter) |
static void |
prepareJobOutput(org.apache.hadoop.mapred.JobConf conf)
Hive uses side effect files exclusively for it's output.
|
public static Class<? extends org.apache.hadoop.mapred.OutputFormat> getOutputFormatSubstitute(Class<?> origin)
public static boolean checkInputFormat(org.apache.hadoop.fs.FileSystem fs, HiveConf conf, Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormatCls, List<org.apache.hadoop.fs.FileStatus> files) throws HiveException
HiveException
public static FileSinkOperator.RecordWriter getHiveRecordWriter(org.apache.hadoop.mapred.JobConf jc, TableDesc tableInfo, Class<? extends org.apache.hadoop.io.Writable> outputClass, FileSinkDesc conf, org.apache.hadoop.fs.Path outPath, org.apache.hadoop.mapred.Reporter reporter) throws HiveException
HiveException
public static FileSinkOperator.RecordWriter getRecordWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.mapred.OutputFormat<?,?> outputFormat, Class<? extends org.apache.hadoop.io.Writable> valueClass, boolean isCompressed, Properties tableProp, org.apache.hadoop.fs.Path outPath, org.apache.hadoop.mapred.Reporter reporter) throws IOException, HiveException
IOException
HiveException
public static HiveOutputFormat<?,?> getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf, TableDesc tableDesc) throws HiveException
HiveException
public static HiveOutputFormat<?,?> getHiveOutputFormat(org.apache.hadoop.conf.Configuration conf, PartitionDesc partDesc) throws HiveException
HiveException
public static RecordUpdater getAcidRecordUpdater(org.apache.hadoop.mapred.JobConf jc, TableDesc tableInfo, int bucket, FileSinkDesc conf, org.apache.hadoop.fs.Path outPath, ObjectInspector inspector, org.apache.hadoop.mapred.Reporter reporter, int rowIdColNum) throws HiveException, IOException
HiveException
IOException
public static <T> T getFromPathRecursively(Map<org.apache.hadoop.fs.Path,T> pathToPartitionInfo, org.apache.hadoop.fs.Path dir, Map<Map<org.apache.hadoop.fs.Path,T>,Map<org.apache.hadoop.fs.Path,T>> cacheMap) throws IOException
IOException
public static <T> T getFromPathRecursively(Map<org.apache.hadoop.fs.Path,T> pathToPartitionInfo, org.apache.hadoop.fs.Path dir, Map<Map<org.apache.hadoop.fs.Path,T>,Map<org.apache.hadoop.fs.Path,T>> cacheMap, boolean ignoreSchema) throws IOException
IOException
public static <T> T getFromPathRecursively(Map<org.apache.hadoop.fs.Path,T> pathToPartitionInfo, org.apache.hadoop.fs.Path dir, Map<Map<org.apache.hadoop.fs.Path,T>,Map<org.apache.hadoop.fs.Path,T>> cacheMap, boolean ignoreSchema, boolean ifPresent) throws IOException
IOException
public static List<Operator<? extends OperatorDesc>> doGetWorksFromPath(Map<org.apache.hadoop.fs.Path,ArrayList<String>> pathToAliases, Map<String,Operator<? extends OperatorDesc>> aliasToWork, org.apache.hadoop.fs.Path dir)
pathToAliases
- mapping from path to aliasesaliasToWork
- The operator tree to be invoked for a given aliasdir
- The path to look forpublic static List<String> doGetAliasesFromPath(Map<org.apache.hadoop.fs.Path,ArrayList<String>> pathToAliases, org.apache.hadoop.fs.Path dir)
pathToAliases
- mapping from path to aliasesdir
- The path to look forpublic static void prepareJobOutput(org.apache.hadoop.mapred.JobConf conf)
Copyright © 2022 The Apache Software Foundation. All rights reserved.