public class DagUtils extends Object
Modifier and Type | Class and Description |
---|---|
static class |
DagUtils.ValueHashPartitioner |
Modifier and Type | Field and Description |
---|---|
static String |
TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX |
static String |
TEZ_MERGE_WORK_FILE_PREFIXES |
static String |
TEZ_TMP_DIR_KEY |
Modifier and Type | Method and Description |
---|---|
void |
addCredentials(BaseWork work,
org.apache.tez.dag.api.DAG dag)
Set up credentials for the base work on secure clusters
|
boolean |
checkOrWaitForTheFile(org.apache.hadoop.fs.FileSystem srcFs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.conf.Configuration conf,
Object notifier,
int waitAttempts,
long sleepInterval,
boolean doLog) |
org.apache.hadoop.mapred.JobConf |
createConfiguration(HiveConf hiveConf)
Creates and initializes a JobConf object that can be used to execute
the DAG.
|
String |
createDagName(org.apache.hadoop.conf.Configuration conf,
QueryPlan plan) |
org.apache.tez.dag.api.Edge |
createEdge(org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex v,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
BaseWork work,
TezWork tezWork)
Given two vertices and the configuration for the source vertex, createEdge
will create an Edge object that connects the two.
|
org.apache.tez.dag.api.GroupInputEdge |
createEdge(org.apache.tez.dag.api.VertexGroup group,
org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
BaseWork work,
TezWork tezWork)
Given a Vertex group and a vertex createEdge will create an
Edge between them.
|
org.apache.tez.dag.api.PreWarmVertex |
createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf,
int numContainers,
Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) |
org.apache.hadoop.fs.Path |
createTezDir(org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.conf.Configuration conf)
createTezDir creates a temporary directory in the scratchDir folder to
be used with Tez.
|
static Map<String,org.apache.hadoop.yarn.api.records.LocalResource> |
createTezLrMap(org.apache.hadoop.yarn.api.records.LocalResource appJarLr,
Collection<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr) |
org.apache.tez.dag.api.Vertex |
createVertex(org.apache.hadoop.mapred.JobConf conf,
BaseWork work,
org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.fs.FileSystem fileSystem,
Context ctx,
boolean hasChildren,
TezWork tezWork,
TezWork.VertexType vertexType,
Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources)
Create a vertex from a given work object.
|
static String |
getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr) |
static org.apache.hadoop.yarn.api.records.Resource |
getContainerResource(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.fs.Path |
getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) |
String |
getExecJarPathLocal(org.apache.hadoop.conf.Configuration configuration) |
org.apache.hadoop.fs.FileStatus |
getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) |
static DagUtils |
getInstance()
Singleton
|
String |
getResourceBaseName(org.apache.hadoop.fs.Path path) |
static Map<String,org.apache.hadoop.yarn.api.records.LocalResource> |
getResourcesUpdatableForAm(Collection<org.apache.hadoop.yarn.api.records.LocalResource> allNonAppResources) |
static String[] |
getTempFilesFromConf(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.fs.Path |
getTezDir(org.apache.hadoop.fs.Path scratchDir)
Gets the tez dir that belongs to the hive scratch dir
|
static String |
getUserSpecifiedDagName(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.mapred.JobConf |
initializeVertexConf(org.apache.hadoop.mapred.JobConf conf,
Context context,
BaseWork work)
Creates and initializes the JobConf object for a given BaseWork object.
|
org.apache.hadoop.yarn.api.records.LocalResource |
localizeResource(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.yarn.api.records.LocalResourceType type,
org.apache.hadoop.conf.Configuration conf)
Localizes a resources.
|
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFiles(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf,
String[] inputOutputJars,
String[] skipJars)
Localizes files, archives and jars from a provided array of names.
|
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFilesFromConf(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf)
Localizes files, archives and jars the user has instructed us
to provide on the cluster as resources for execution.
|
static org.apache.hadoop.fs.FileStatus |
validateTargetDir(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf) |
public static final String TEZ_TMP_DIR_KEY
public static final String TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX
public static final String TEZ_MERGE_WORK_FILE_PREFIXES
public org.apache.tez.dag.api.GroupInputEdge createEdge(org.apache.tez.dag.api.VertexGroup group, org.apache.hadoop.mapred.JobConf vConf, org.apache.tez.dag.api.Vertex w, TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) throws IOException
group
- The parent VertexGroupvConf
- The job conf of one of the parrent (grouped) verticesw
- The child vertexedgeProp
- the edge property of connection between the two
endpoints.IOException
public org.apache.tez.dag.api.Edge createEdge(org.apache.hadoop.mapred.JobConf vConf, org.apache.tez.dag.api.Vertex v, org.apache.tez.dag.api.Vertex w, TezEdgeProperty edgeProp, BaseWork work, TezWork tezWork) throws IOException
vConf
- JobConf of the first (source) vertexv
- The first vertex (source)w
- The second vertex (sink)IOException
public static org.apache.hadoop.yarn.api.records.Resource getContainerResource(org.apache.hadoop.conf.Configuration conf)
public static Map<String,org.apache.hadoop.yarn.api.records.LocalResource> createTezLrMap(org.apache.hadoop.yarn.api.records.LocalResource appJarLr, Collection<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr)
public org.apache.tez.dag.api.PreWarmVertex createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf, int numContainers, Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) throws IOException, org.apache.tez.dag.api.TezException
numContainers
- number of containers to pre-warmlocalResources
- additional resources to pre-warm withIOException
org.apache.tez.dag.api.TezException
public org.apache.hadoop.fs.Path getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) throws LoginException, IOException
conf
- LoginException
- if we are unable to figure user informationIOException
- when any dfs operation fails.public List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
conf
- IOException
- when hdfs operation failsLoginException
- when getDefaultDestDir fails with the same exceptionpublic static String[] getTempFilesFromConf(org.apache.hadoop.conf.Configuration conf)
public List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFiles(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf, String[] inputOutputJars, String[] skipJars) throws IOException, LoginException
hdfsDirPathStr
- Destination directory in HDFS.conf
- Configuration.inputOutputJars
- The file names to localize.IOException
- when hdfs operation fails.LoginException
- when getDefaultDestDir fails with the same exceptionpublic org.apache.hadoop.fs.FileStatus getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
IOException
LoginException
public static org.apache.hadoop.fs.FileStatus validateTargetDir(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public String getExecJarPathLocal(org.apache.hadoop.conf.Configuration configuration) throws URISyntaxException
URISyntaxException
public static String getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr)
public String getResourceBaseName(org.apache.hadoop.fs.Path path)
path
- - the string from which we try to determine the resource base namepublic org.apache.hadoop.yarn.api.records.LocalResource localizeResource(org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dest, org.apache.hadoop.yarn.api.records.LocalResourceType type, org.apache.hadoop.conf.Configuration conf) throws IOException
src
- path to the source for the resourcedest
- path in hdfs for the resourcetype
- local resource type (File/Archive)conf
- IOException
- when any file system related calls fails.public boolean checkOrWaitForTheFile(org.apache.hadoop.fs.FileSystem srcFs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dest, org.apache.hadoop.conf.Configuration conf, Object notifier, int waitAttempts, long sleepInterval, boolean doLog) throws IOException
IOException
public org.apache.hadoop.mapred.JobConf createConfiguration(HiveConf hiveConf) throws IOException
hiveConf
- Current conf for the executionIOException
public org.apache.hadoop.mapred.JobConf initializeVertexConf(org.apache.hadoop.mapred.JobConf conf, Context context, BaseWork work)
conf
- Any configurations in conf will be copied to the resulting new JobConf object.work
- BaseWork will be used to populate the configuration object.public org.apache.tez.dag.api.Vertex createVertex(org.apache.hadoop.mapred.JobConf conf, BaseWork work, org.apache.hadoop.fs.Path scratchDir, org.apache.hadoop.fs.FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork, TezWork.VertexType vertexType, Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) throws Exception
conf
- JobConf to be used to this execution unitwork
- The instance of BaseWork representing the actual work to be performed
by this vertex.scratchDir
- HDFS scratch dir for this execution unit.fileSystem
- FS corresponding to scratchDir and LocalResourcesctx
- This query's contextException
public void addCredentials(BaseWork work, org.apache.tez.dag.api.DAG dag) throws IOException
IOException
public org.apache.hadoop.fs.Path createTezDir(org.apache.hadoop.fs.Path scratchDir, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public org.apache.hadoop.fs.Path getTezDir(org.apache.hadoop.fs.Path scratchDir)
public static DagUtils getInstance()
public String createDagName(org.apache.hadoop.conf.Configuration conf, QueryPlan plan)
public static String getUserSpecifiedDagName(org.apache.hadoop.conf.Configuration conf)
public static Map<String,org.apache.hadoop.yarn.api.records.LocalResource> getResourcesUpdatableForAm(Collection<org.apache.hadoop.yarn.api.records.LocalResource> allNonAppResources)
Copyright © 2022 The Apache Software Foundation. All rights reserved.