public class DagUtils extends Object
Modifier and Type | Field and Description |
---|---|
static String |
TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX |
static String |
TEZ_MERGE_WORK_FILE_PREFIXES |
static String |
TEZ_TMP_DIR_KEY |
Modifier and Type | Method and Description |
---|---|
void |
addCredentials(BaseWork work,
org.apache.tez.dag.api.DAG dag)
Set up credentials for the base work on secure clusters
|
org.apache.hadoop.mapred.JobConf |
createConfiguration(HiveConf hiveConf)
Creates and initializes a JobConf object that can be used to execute
the DAG.
|
org.apache.tez.dag.api.Edge |
createEdge(org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex v,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
Given two vertices and the configuration for the source vertex, createEdge
will create an Edge object that connects the two.
|
org.apache.tez.dag.api.GroupInputEdge |
createEdge(org.apache.tez.dag.api.VertexGroup group,
org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
Given a Vertex group and a vertex createEdge will create an
Edge between them.
|
org.apache.tez.dag.api.PreWarmVertex |
createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf,
int numContainers,
Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) |
org.apache.hadoop.fs.Path |
createTezDir(org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.conf.Configuration conf)
createTezDir creates a temporary directory in the scratchDir folder to
be used with Tez.
|
org.apache.tez.dag.api.Vertex |
createVertex(org.apache.hadoop.mapred.JobConf conf,
BaseWork work,
org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.yarn.api.records.LocalResource appJarLr,
List<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr,
org.apache.hadoop.fs.FileSystem fileSystem,
Context ctx,
boolean hasChildren,
TezWork tezWork,
TezWork.VertexType vertexType)
Create a vertex from a given work object.
|
String |
getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr) |
static org.apache.hadoop.yarn.api.records.Resource |
getContainerResource(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.fs.Path |
getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) |
String |
getExecJarPathLocal() |
org.apache.hadoop.fs.FileStatus |
getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) |
static DagUtils |
getInstance()
Singleton
|
String |
getResourceBaseName(org.apache.hadoop.fs.Path path) |
org.apache.hadoop.fs.Path |
getTezDir(org.apache.hadoop.fs.Path scratchDir)
Gets the tez dir that belongs to the hive scratch dir
|
org.apache.hadoop.mapred.JobConf |
initializeVertexConf(org.apache.hadoop.mapred.JobConf conf,
Context context,
BaseWork work)
Creates and initializes the JobConf object for a given BaseWork object.
|
org.apache.hadoop.yarn.api.records.LocalResource |
localizeResource(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.yarn.api.records.LocalResourceType type,
org.apache.hadoop.conf.Configuration conf) |
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFiles(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf,
String[] inputOutputJars)
Localizes files, archives and jars from a provided array of names.
|
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFilesFromConf(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf)
Localizes files, archives and jars the user has instructed us
to provide on the cluster as resources for execution.
|
static org.apache.hadoop.fs.FileStatus |
validateTargetDir(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf) |
public static final String TEZ_TMP_DIR_KEY
public static final String TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX
public static final String TEZ_MERGE_WORK_FILE_PREFIXES
public org.apache.tez.dag.api.GroupInputEdge createEdge(org.apache.tez.dag.api.VertexGroup group, org.apache.hadoop.mapred.JobConf vConf, org.apache.tez.dag.api.Vertex w, TezEdgeProperty edgeProp, TezWork.VertexType vertexType) throws IOException
group
- The parent VertexGroupvConf
- The job conf of one of the parrent (grouped) verticesw
- The child vertexedgeProp
- the edge property of connection between the two
endpoints.IOException
public org.apache.tez.dag.api.Edge createEdge(org.apache.hadoop.mapred.JobConf vConf, org.apache.tez.dag.api.Vertex v, org.apache.tez.dag.api.Vertex w, TezEdgeProperty edgeProp, TezWork.VertexType vertexType) throws IOException
vConf
- JobConf of the first (source) vertexv
- The first vertex (source)w
- The second vertex (sink)IOException
public static org.apache.hadoop.yarn.api.records.Resource getContainerResource(org.apache.hadoop.conf.Configuration conf)
public org.apache.tez.dag.api.PreWarmVertex createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf, int numContainers, Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) throws IOException, org.apache.tez.dag.api.TezException
numContainers
- number of containers to pre-warmlocalResources
- additional resources to pre-warm withIOException
org.apache.tez.dag.api.TezException
public org.apache.hadoop.fs.Path getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) throws LoginException, IOException
conf
- LoginException
- if we are unable to figure user informationIOException
- when any dfs operation fails.public List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
conf
- IOException
- when hdfs operation failsLoginException
- when getDefaultDestDir fails with the same exceptionpublic List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFiles(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf, String[] inputOutputJars) throws IOException, LoginException
hdfsDirPathStr
- Destination directory in HDFS.conf
- Configuration.inputOutputJars
- The file names to localize.IOException
- when hdfs operation fails.LoginException
- when getDefaultDestDir fails with the same exceptionpublic org.apache.hadoop.fs.FileStatus getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
IOException
LoginException
public static org.apache.hadoop.fs.FileStatus validateTargetDir(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public String getExecJarPathLocal() throws URISyntaxException
URISyntaxException
public String getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr)
public String getResourceBaseName(org.apache.hadoop.fs.Path path)
path
- - the string from which we try to determine the resource base namepublic org.apache.hadoop.yarn.api.records.LocalResource localizeResource(org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dest, org.apache.hadoop.yarn.api.records.LocalResourceType type, org.apache.hadoop.conf.Configuration conf) throws IOException
src
- path to the source for the resourcedest
- path in hdfs for the resourcetype
- local resource type (File/Archive)conf
- IOException
- when any file system related calls fails.public org.apache.hadoop.mapred.JobConf createConfiguration(HiveConf hiveConf) throws IOException
hiveConf
- Current hiveConf for the executionIOException
public org.apache.hadoop.mapred.JobConf initializeVertexConf(org.apache.hadoop.mapred.JobConf conf, Context context, BaseWork work)
conf
- Any configurations in conf will be copied to the resulting new JobConf object.work
- BaseWork will be used to populate the configuration object.public org.apache.tez.dag.api.Vertex createVertex(org.apache.hadoop.mapred.JobConf conf, BaseWork work, org.apache.hadoop.fs.Path scratchDir, org.apache.hadoop.yarn.api.records.LocalResource appJarLr, List<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr, org.apache.hadoop.fs.FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork, TezWork.VertexType vertexType) throws Exception
conf
- JobConf to be used to this execution unitwork
- The instance of BaseWork representing the actual work to be performed
by this vertex.scratchDir
- HDFS scratch dir for this execution unit.appJarLr
- Local resource for hive-exec.additionalLr
- fileSystem
- FS corresponding to scratchDir and LocalResourcesctx
- This query's contextException
public void addCredentials(BaseWork work, org.apache.tez.dag.api.DAG dag)
public org.apache.hadoop.fs.Path createTezDir(org.apache.hadoop.fs.Path scratchDir, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public org.apache.hadoop.fs.Path getTezDir(org.apache.hadoop.fs.Path scratchDir)
public static DagUtils getInstance()
Copyright © 2017 The Apache Software Foundation. All rights reserved.