public class AcidUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
AcidUtils.AcidBaseFileInfo
A simple wrapper class that stores the information about a base file and its type.
|
static class |
AcidUtils.AcidBaseFileType |
static class |
AcidUtils.AcidOperationalProperties
Current syntax for creating full acid transactional tables is any one of following 3 ways:
create table T (a int, b int) stored as orc tblproperties('transactional'='true').
|
static class |
AcidUtils.AnyIdDirFilter |
static interface |
AcidUtils.Directory |
static class |
AcidUtils.DirectoryImpl |
static class |
AcidUtils.IdPathFilter |
static class |
AcidUtils.MetaDataFile
General facility to place a metadta file into a dir created by acid/compactor write.
|
static class |
AcidUtils.Operation |
static class |
AcidUtils.OrcAcidVersion
Logic related to versioning acid data format.
|
static class |
AcidUtils.ParsedDelta
Immutable
|
Modifier and Type | Field and Description |
---|---|
static String |
BASE_PREFIX |
static org.apache.hadoop.fs.PathFilter |
baseFileFilter |
static Pattern |
BUCKET_DIGIT_PATTERN |
static String |
BUCKET_DIGITS |
static Pattern |
BUCKET_PATTERN |
static String |
BUCKET_PREFIX |
static org.apache.hadoop.fs.PathFilter |
bucketFileFilter |
static String |
CONF_ACID_KEY |
static String |
DELETE_DELTA_PREFIX |
static org.apache.hadoop.fs.PathFilter |
deleteEventDeltaDirFilter |
static String |
DELTA_DIGITS |
static String |
DELTA_PREFIX |
static String |
DELTA_SIDE_FILE_SUFFIX
Acid Streaming Ingest writes multiple transactions to the same file.
|
static org.apache.hadoop.fs.PathFilter |
deltaFileFilter |
static org.apache.hadoop.fs.PathFilter |
hiddenFileFilter |
static Pattern |
LEGACY_BUCKET_DIGIT_PATTERN |
static String |
LEGACY_FILE_BUCKET_DIGITS |
static int |
MAX_STATEMENTS_PER_TXN
This must be in sync with
STATEMENT_DIGITS |
static Pattern |
ORIGINAL_PATTERN |
static Pattern |
ORIGINAL_PATTERN_COPY |
static org.apache.hadoop.fs.PathFilter |
originalBucketFilter
A write into a non-aicd table produces files like 0000_0 or 0000_0_copy_1
(Unless via Load Data statement)
|
static String |
STATEMENT_DIGITS
10K statements per tx.
|
Modifier and Type | Method and Description |
---|---|
static String |
baseDir(long writeId) |
static String |
baseOrDeltaSubdir(boolean baseDirRequired,
long min,
long max,
int statementId)
Return a base or delta directory string
according to the given "baseDirRequired".
|
static org.apache.hadoop.fs.Path |
createBucketFile(org.apache.hadoop.fs.Path subdir,
int bucket)
Create the bucket filename in Acid format
|
static org.apache.hadoop.fs.Path |
createFilename(org.apache.hadoop.fs.Path directory,
AcidOutputFormat.Options options)
Create a filename for a bucket file.
|
static HadoopShims.HdfsFileStatusWithId |
createOriginalObj(HadoopShims.HdfsFileStatusWithId childWithId,
org.apache.hadoop.fs.FileStatus child) |
static String |
deleteDeltaSubdir(long min,
long max)
This is format of delete delta dir name prior to Hive 2.2.x
|
static String |
deleteDeltaSubdir(long min,
long max,
int statementId)
Each write statement in a transaction creates its own delete delta dir,
when split-update acid operational property is turned on.
|
static String |
deltaSubdir(long min,
long max)
This is format of delta dir name prior to Hive 1.3.x
|
static String |
deltaSubdir(long min,
long max,
int statementId)
Each write statement in a transaction creates its own delta dir.
|
static org.apache.hadoop.fs.Path[] |
deserializeDeleteDeltas(org.apache.hadoop.fs.Path root,
List<AcidInputFormat.DeltaMetaData> deleteDeltas)
Convert the list of begin/end write id pairs to a list of delete delta
directories.
|
static Long |
extractWriteId(org.apache.hadoop.fs.Path file) |
static void |
findOriginals(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.FileStatus stat,
List<HadoopShims.HdfsFileStatusWithId> original,
Ref<Boolean> useFileIds,
boolean ignoreEmptyFiles,
boolean recursive)
Find the original files (non-ACID layout) recursively under the partition directory.
|
static List<org.apache.hadoop.fs.FileStatus> |
getAcidFilesForStats(Table table,
org.apache.hadoop.fs.Path dir,
org.apache.hadoop.conf.Configuration jc,
org.apache.hadoop.fs.FileSystem fs) |
static AcidUtils.AcidOperationalProperties |
getAcidOperationalProperties(org.apache.hadoop.conf.Configuration conf)
Returns the acidOperationalProperties for a given configuration.
|
static AcidUtils.AcidOperationalProperties |
getAcidOperationalProperties(Map<String,String> parameters)
Returns the acidOperationalProperties for a given map.
|
static AcidUtils.AcidOperationalProperties |
getAcidOperationalProperties(Properties props)
Returns the acidOperationalProperties for a given set of properties.
|
static AcidUtils.AcidOperationalProperties |
getAcidOperationalProperties(Table table)
Returns the acidOperationalProperties for a given table.
|
static AcidUtils.Directory |
getAcidState(org.apache.hadoop.fs.Path directory,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.hive.common.ValidWriteIdList writeIdList) |
static AcidUtils.Directory |
getAcidState(org.apache.hadoop.fs.Path directory,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.hive.common.ValidWriteIdList writeIdList,
boolean useFileIds,
boolean ignoreEmptyFiles)
Get the ACID state of the given directory.
|
static AcidUtils.Directory |
getAcidState(org.apache.hadoop.fs.Path directory,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.hive.common.ValidWriteIdList writeIdList,
Ref<Boolean> useFileIds,
boolean ignoreEmptyFiles,
Map<String,String> tblproperties) |
static String |
getAcidSubDir(org.apache.hadoop.fs.Path dataPath) |
static String |
getFullTableName(String dbName,
String tableName) |
static long |
getLogicalLength(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.FileStatus file)
See comments at
DELTA_SIDE_FILE_SUFFIX . |
static org.apache.hadoop.fs.Path[] |
getPaths(List<AcidUtils.ParsedDelta> deltas)
Convert a list of deltas to a list of delta directories.
|
static org.apache.hadoop.hive.common.ValidWriteIdList |
getTableValidWriteIdList(org.apache.hadoop.conf.Configuration conf,
String fullTableName)
Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList.
|
static List<org.apache.hadoop.fs.Path> |
getValidDataPaths(org.apache.hadoop.fs.Path dataPath,
org.apache.hadoop.conf.Configuration conf,
String validWriteIdStr) |
static org.apache.hadoop.hive.common.ValidTxnWriteIdList |
getValidTxnWriteIdList(org.apache.hadoop.conf.Configuration conf)
Get the ValidTxnWriteIdList saved in the configuration.
|
static boolean |
isAcid(org.apache.hadoop.fs.Path directory,
org.apache.hadoop.conf.Configuration conf)
Is the given directory in ACID format?
|
static boolean |
isAcidEnabled(HiveConf hiveConf) |
static boolean |
isDeleteDelta(org.apache.hadoop.fs.Path p) |
static boolean |
isFullAcidScan(org.apache.hadoop.conf.Configuration conf) |
static boolean |
isFullAcidTable(CreateTableDesc td) |
static boolean |
isFullAcidTable(Map<String,String> params) |
static boolean |
isFullAcidTable(Table table)
Should produce the same result as
TxnUtils.isAcidTable(org.apache.hadoop.hive.metastore.api.Table) |
static boolean |
isFullAcidTable(Table table)
Should produce the same result as
TxnUtils.isAcidTable(org.apache.hadoop.hive.metastore.api.Table) |
static boolean |
isInsertOnlyTable(Map<String,String> params)
Checks if a table is a transactional table that only supports INSERT, but not UPDATE/DELETE
|
static boolean |
isInsertOnlyTable(Map<String,String> params,
boolean isCtas) |
static boolean |
isInsertOnlyTable(Properties params) |
static boolean |
isInsertOnlyTable(Table table) |
static boolean |
isRemovedInsertOnlyTable(Set<String> removedSet) |
static boolean |
isTablePropertyTransactional(org.apache.hadoop.conf.Configuration conf) |
static boolean |
isTablePropertyTransactional(Map<String,String> parameters) |
static boolean |
isTablePropertyTransactional(Properties props) |
static Boolean |
isToInsertOnlyTable(Table tbl,
Map<String,String> props)
The method for altering table props; may set the table to MM, non-MM, or not affect MM.
|
static boolean |
isTransactionalTable(CreateTableDesc table) |
static boolean |
isTransactionalTable(Map<String,String> props) |
static boolean |
isTransactionalTable(Table table) |
static boolean |
isTransactionalTable(Table table) |
static long |
parseBase(org.apache.hadoop.fs.Path path)
Get the write id from a base directory name.
|
static AcidOutputFormat.Options |
parseBaseOrDeltaBucketFilename(org.apache.hadoop.fs.Path bucketFile,
org.apache.hadoop.conf.Configuration conf)
Parse a bucket filename back into the options that would have created
the file.
|
static int |
parseBucketId(org.apache.hadoop.fs.Path bucketFile)
Get the bucket id from the file path
|
static AcidUtils.ParsedDelta |
parsedDelta(org.apache.hadoop.fs.Path deltaDir,
org.apache.hadoop.fs.FileSystem fs) |
static AcidUtils.ParsedDelta |
parsedDelta(org.apache.hadoop.fs.Path deltaDir,
String deltaPrefix,
org.apache.hadoop.fs.FileSystem fs) |
static List<AcidInputFormat.DeltaMetaData> |
serializeDeltas(List<AcidUtils.ParsedDelta> deltas)
Convert the list of deltas into an equivalent list of begin/end
write id pairs.
|
static void |
setAcidOperationalProperties(org.apache.hadoop.conf.Configuration conf,
boolean isTxnTable,
AcidUtils.AcidOperationalProperties properties)
Sets the acidOperationalProperties in the configuration object argument.
|
static void |
setAcidOperationalProperties(Map<String,String> parameters,
boolean isTxnTable,
AcidUtils.AcidOperationalProperties properties)
Sets the acidOperationalProperties in the map object argument.
|
static void |
setNonTransactional(Map<String,String> tblProps) |
static void |
setValidWriteIdList(org.apache.hadoop.conf.Configuration conf,
TableScanDesc tsDesc)
Set the valid write id list for the current table scan.
|
static void |
setValidWriteIdList(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.hive.common.ValidWriteIdList validWriteIds)
Set the valid write id list for the current table scan.
|
static DataOperationType |
toDataOperationType(AcidUtils.Operation op)
Logically this should have been defined in Operation but that causes a dependency
on metastore package from exec jar (from the cluster) which is not allowed.
|
public static final String CONF_ACID_KEY
public static final String BASE_PREFIX
public static final org.apache.hadoop.fs.PathFilter baseFileFilter
public static final String DELTA_PREFIX
public static final String DELETE_DELTA_PREFIX
public static final String DELTA_SIDE_FILE_SUFFIX
OrcAcidUtils.getSideFile(Path)
side file which stores the length of
the primary file as of the last commit (OrcRecordUpdater.flush()
). That is the 'logical length'.
Once the primary is closed, the side file is deleted (logical length = actual length) but if
the writer dies or the primary file is being read while its still being written to, anything
past the logical length should be ignored.OrcAcidUtils.DELTA_SIDE_FILE_SUFFIX
,
OrcAcidUtils.getLastFlushLength(FileSystem, Path)
,
getLogicalLength(FileSystem, FileStatus)
,
Constant Field Valuespublic static final org.apache.hadoop.fs.PathFilter deltaFileFilter
public static final org.apache.hadoop.fs.PathFilter deleteEventDeltaDirFilter
public static final String BUCKET_PREFIX
public static final org.apache.hadoop.fs.PathFilter bucketFileFilter
public static final String BUCKET_DIGITS
public static final String LEGACY_FILE_BUCKET_DIGITS
public static final String DELTA_DIGITS
public static final String STATEMENT_DIGITS
public static final int MAX_STATEMENTS_PER_TXN
STATEMENT_DIGITS
public static final Pattern BUCKET_DIGIT_PATTERN
public static final Pattern LEGACY_BUCKET_DIGIT_PATTERN
public static final org.apache.hadoop.fs.PathFilter originalBucketFilter
public static final Pattern BUCKET_PATTERN
public static final Pattern ORIGINAL_PATTERN
public static final Pattern ORIGINAL_PATTERN_COPY
Utilities.COPY_KEYWORD
public static final org.apache.hadoop.fs.PathFilter hiddenFileFilter
public static org.apache.hadoop.fs.Path createBucketFile(org.apache.hadoop.fs.Path subdir, int bucket)
subdir
- the subdirectory for the bucket.bucket
- the bucket numberpublic static String deltaSubdir(long min, long max)
public static String deltaSubdir(long min, long max, int statementId)
public static String deleteDeltaSubdir(long min, long max)
public static String deleteDeltaSubdir(long min, long max, int statementId)
public static String baseDir(long writeId)
public static String baseOrDeltaSubdir(boolean baseDirRequired, long min, long max, int statementId)
public static org.apache.hadoop.fs.Path createFilename(org.apache.hadoop.fs.Path directory, AcidOutputFormat.Options options)
directory
- the partition directoryoptions
- the options for writing the bucketpublic static long parseBase(org.apache.hadoop.fs.Path path)
path
- the base directory namepublic static int parseBucketId(org.apache.hadoop.fs.Path bucketFile)
bucketFile
- - bucket file pathpublic static AcidOutputFormat.Options parseBaseOrDeltaBucketFilename(org.apache.hadoop.fs.Path bucketFile, org.apache.hadoop.conf.Configuration conf) throws IOException
bucketFile
- the path to a bucket fileconf
- the configurationIOException
public static DataOperationType toDataOperationType(AcidUtils.Operation op)
public static org.apache.hadoop.fs.Path[] getPaths(List<AcidUtils.ParsedDelta> deltas)
deltas
- the list of deltas out of a Directory object.public static List<AcidInputFormat.DeltaMetaData> serializeDeltas(List<AcidUtils.ParsedDelta> deltas)
deltas
is sorted.deltas
- public static org.apache.hadoop.fs.Path[] deserializeDeleteDeltas(org.apache.hadoop.fs.Path root, List<AcidInputFormat.DeltaMetaData> deleteDeltas) throws IOException
deltaSubdir(long, long, int)
root
- the root directorydeleteDeltas
- list of begin/end write id pairsIOException
public static AcidUtils.ParsedDelta parsedDelta(org.apache.hadoop.fs.Path deltaDir, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static AcidUtils.ParsedDelta parsedDelta(org.apache.hadoop.fs.Path deltaDir, String deltaPrefix, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static boolean isAcid(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf) throws IOException
directory
- the partition directory to checkconf
- the query configurationIOException
public static AcidUtils.Directory getAcidState(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.hive.common.ValidWriteIdList writeIdList) throws IOException
IOException
public static AcidUtils.Directory getAcidState(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.hive.common.ValidWriteIdList writeIdList, boolean useFileIds, boolean ignoreEmptyFiles) throws IOException
directory
- the partition directory to analyzeconf
- the configurationwriteIdList
- the list of write ids that we are readingIOException
public static AcidUtils.Directory getAcidState(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.hive.common.ValidWriteIdList writeIdList, Ref<Boolean> useFileIds, boolean ignoreEmptyFiles, Map<String,String> tblproperties) throws IOException
IOException
public static HadoopShims.HdfsFileStatusWithId createOriginalObj(HadoopShims.HdfsFileStatusWithId childWithId, org.apache.hadoop.fs.FileStatus child)
public static void findOriginals(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.FileStatus stat, List<HadoopShims.HdfsFileStatusWithId> original, Ref<Boolean> useFileIds, boolean ignoreEmptyFiles, boolean recursive) throws IOException
fs
- the file systemstat
- the directory to addoriginal
- the list of original filesIOException
public static boolean isTablePropertyTransactional(Properties props)
public static boolean isTablePropertyTransactional(Map<String,String> parameters)
public static boolean isTablePropertyTransactional(org.apache.hadoop.conf.Configuration conf)
public static boolean isDeleteDelta(org.apache.hadoop.fs.Path p)
p
- - not nullpublic static boolean isTransactionalTable(CreateTableDesc table)
public static boolean isFullAcidTable(Table table)
TxnUtils.isAcidTable(org.apache.hadoop.hive.metastore.api.Table)
public static boolean isTransactionalTable(Table table)
public static boolean isFullAcidTable(Table table)
TxnUtils.isAcidTable(org.apache.hadoop.hive.metastore.api.Table)
public static boolean isTransactionalTable(Table table)
public static boolean isFullAcidTable(CreateTableDesc td)
public static boolean isFullAcidScan(org.apache.hadoop.conf.Configuration conf)
public static void setAcidOperationalProperties(org.apache.hadoop.conf.Configuration conf, boolean isTxnTable, AcidUtils.AcidOperationalProperties properties)
conf
- Mutable configuration objectproperties
- An acidOperationalProperties object to initialize from. If this is null,
we assume this is a full transactional table.public static void setAcidOperationalProperties(Map<String,String> parameters, boolean isTxnTable, AcidUtils.AcidOperationalProperties properties)
parameters
- Mutable map objectproperties
- An acidOperationalProperties object to initialize from.public static AcidUtils.AcidOperationalProperties getAcidOperationalProperties(Table table)
table
- A table objectpublic static AcidUtils.AcidOperationalProperties getAcidOperationalProperties(org.apache.hadoop.conf.Configuration conf)
conf
- A configuration objectpublic static AcidUtils.AcidOperationalProperties getAcidOperationalProperties(Properties props)
props
- A properties objectpublic static AcidUtils.AcidOperationalProperties getAcidOperationalProperties(Map<String,String> parameters)
parameters
- A parameters objectpublic static long getLogicalLength(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.FileStatus file) throws IOException
DELTA_SIDE_FILE_SUFFIX
.
Returns the logical end of file for an acid data file.
This relies on the fact that if delta_x_y has no committed transactions it wil be filtered out
by getAcidState(Path, Configuration, ValidWriteIdList)
and so won't be read at all.file
- - data file to read/compute splits onIOException
public static boolean isInsertOnlyTable(Map<String,String> params)
params
- table propertiespublic static boolean isInsertOnlyTable(Table table)
public static boolean isInsertOnlyTable(Map<String,String> params, boolean isCtas)
public static boolean isInsertOnlyTable(Properties params)
public static Boolean isToInsertOnlyTable(Table tbl, Map<String,String> props)
tbl
- object image before alter table command (or null if not retrieved yet).props
- prop values set in this alter table commandpublic static org.apache.hadoop.hive.common.ValidTxnWriteIdList getValidTxnWriteIdList(org.apache.hadoop.conf.Configuration conf)
public static org.apache.hadoop.hive.common.ValidWriteIdList getTableValidWriteIdList(org.apache.hadoop.conf.Configuration conf, String fullTableName)
public static void setValidWriteIdList(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.hive.common.ValidWriteIdList validWriteIds)
public static void setValidWriteIdList(org.apache.hadoop.conf.Configuration conf, TableScanDesc tsDesc)
public static List<org.apache.hadoop.fs.FileStatus> getAcidFilesForStats(Table table, org.apache.hadoop.fs.Path dir, org.apache.hadoop.conf.Configuration jc, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static List<org.apache.hadoop.fs.Path> getValidDataPaths(org.apache.hadoop.fs.Path dataPath, org.apache.hadoop.conf.Configuration conf, String validWriteIdStr) throws IOException
IOException
public static String getAcidSubDir(org.apache.hadoop.fs.Path dataPath)
public static boolean isAcidEnabled(HiveConf hiveConf)
public static Long extractWriteId(org.apache.hadoop.fs.Path file)
Copyright © 2022 The Apache Software Foundation. All rights reserved.