public class DagUtils extends Object
Modifier and Type | Field and Description |
---|---|
static String |
TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX |
static String |
TEZ_MERGE_WORK_FILE_PREFIXES |
static String |
TEZ_TMP_DIR_KEY |
Modifier and Type | Method and Description |
---|---|
void |
addCredentials(BaseWork work,
org.apache.tez.dag.api.DAG dag)
Set up credentials for the base work on secure clusters
|
boolean |
checkOrWaitForTheFile(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.conf.Configuration conf,
Object notifier,
int waitAttempts,
long sleepInterval,
boolean doLog) |
org.apache.hadoop.mapred.JobConf |
createConfiguration(HiveConf hiveConf)
Creates and initializes a JobConf object that can be used to execute
the DAG.
|
String |
createDagName(org.apache.hadoop.conf.Configuration conf,
QueryPlan plan) |
org.apache.tez.dag.api.Edge |
createEdge(org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex v,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
Given two vertices and the configuration for the source vertex, createEdge
will create an Edge object that connects the two.
|
org.apache.tez.dag.api.GroupInputEdge |
createEdge(org.apache.tez.dag.api.VertexGroup group,
org.apache.hadoop.mapred.JobConf vConf,
org.apache.tez.dag.api.Vertex w,
TezEdgeProperty edgeProp,
TezWork.VertexType vertexType)
Given a Vertex group and a vertex createEdge will create an
Edge between them.
|
org.apache.tez.dag.api.PreWarmVertex |
createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf,
int numContainers,
Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) |
org.apache.hadoop.fs.Path |
createTezDir(org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.conf.Configuration conf)
createTezDir creates a temporary directory in the scratchDir folder to
be used with Tez.
|
org.apache.tez.dag.api.Vertex |
createVertex(org.apache.hadoop.mapred.JobConf conf,
BaseWork work,
org.apache.hadoop.fs.Path scratchDir,
org.apache.hadoop.yarn.api.records.LocalResource appJarLr,
List<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr,
org.apache.hadoop.fs.FileSystem fileSystem,
Context ctx,
boolean hasChildren,
TezWork tezWork,
TezWork.VertexType vertexType)
Create a vertex from a given work object.
|
String |
getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr) |
static org.apache.hadoop.yarn.api.records.Resource |
getContainerResource(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.fs.Path |
getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) |
String |
getExecJarPathLocal() |
org.apache.hadoop.fs.FileStatus |
getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) |
static DagUtils |
getInstance()
Singleton
|
String |
getResourceBaseName(org.apache.hadoop.fs.Path path) |
org.apache.hadoop.fs.Path |
getTezDir(org.apache.hadoop.fs.Path scratchDir)
Gets the tez dir that belongs to the hive scratch dir
|
static String |
getUserSpecifiedDagName(org.apache.hadoop.conf.Configuration conf) |
org.apache.hadoop.mapred.JobConf |
initializeVertexConf(org.apache.hadoop.mapred.JobConf conf,
Context context,
BaseWork work)
Creates and initializes the JobConf object for a given BaseWork object.
|
org.apache.hadoop.yarn.api.records.LocalResource |
localizeResource(org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dest,
org.apache.hadoop.yarn.api.records.LocalResourceType type,
org.apache.hadoop.conf.Configuration conf)
Localizes a resources.
|
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFiles(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf,
String[] inputOutputJars)
Localizes files, archives and jars from a provided array of names.
|
List<org.apache.hadoop.yarn.api.records.LocalResource> |
localizeTempFilesFromConf(String hdfsDirPathStr,
org.apache.hadoop.conf.Configuration conf)
Localizes files, archives and jars the user has instructed us
to provide on the cluster as resources for execution.
|
static org.apache.hadoop.fs.FileStatus |
validateTargetDir(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration conf) |
public static final String TEZ_TMP_DIR_KEY
public static final String TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX
public static final String TEZ_MERGE_WORK_FILE_PREFIXES
public org.apache.tez.dag.api.GroupInputEdge createEdge(org.apache.tez.dag.api.VertexGroup group, org.apache.hadoop.mapred.JobConf vConf, org.apache.tez.dag.api.Vertex w, TezEdgeProperty edgeProp, TezWork.VertexType vertexType) throws IOException
group
- The parent VertexGroupvConf
- The job conf of one of the parrent (grouped) verticesw
- The child vertexedgeProp
- the edge property of connection between the two
endpoints.IOException
public org.apache.tez.dag.api.Edge createEdge(org.apache.hadoop.mapred.JobConf vConf, org.apache.tez.dag.api.Vertex v, org.apache.tez.dag.api.Vertex w, TezEdgeProperty edgeProp, TezWork.VertexType vertexType) throws IOException
vConf
- JobConf of the first (source) vertexv
- The first vertex (source)w
- The second vertex (sink)IOException
public static org.apache.hadoop.yarn.api.records.Resource getContainerResource(org.apache.hadoop.conf.Configuration conf)
public org.apache.tez.dag.api.PreWarmVertex createPreWarmVertex(org.apache.tez.dag.api.TezConfiguration conf, int numContainers, Map<String,org.apache.hadoop.yarn.api.records.LocalResource> localResources) throws IOException, org.apache.tez.dag.api.TezException
numContainers
- number of containers to pre-warmlocalResources
- additional resources to pre-warm withIOException
org.apache.tez.dag.api.TezException
public org.apache.hadoop.fs.Path getDefaultDestDir(org.apache.hadoop.conf.Configuration conf) throws LoginException, IOException
conf
- LoginException
- if we are unable to figure user informationIOException
- when any dfs operation fails.public List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
conf
- IOException
- when hdfs operation failsLoginException
- when getDefaultDestDir fails with the same exceptionpublic List<org.apache.hadoop.yarn.api.records.LocalResource> localizeTempFiles(String hdfsDirPathStr, org.apache.hadoop.conf.Configuration conf, String[] inputOutputJars) throws IOException, LoginException
hdfsDirPathStr
- Destination directory in HDFS.conf
- Configuration.inputOutputJars
- The file names to localize.IOException
- when hdfs operation fails.LoginException
- when getDefaultDestDir fails with the same exceptionpublic org.apache.hadoop.fs.FileStatus getHiveJarDirectory(org.apache.hadoop.conf.Configuration conf) throws IOException, LoginException
IOException
LoginException
public static org.apache.hadoop.fs.FileStatus validateTargetDir(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public String getExecJarPathLocal() throws URISyntaxException
URISyntaxException
public String getBaseName(org.apache.hadoop.yarn.api.records.LocalResource lr)
public String getResourceBaseName(org.apache.hadoop.fs.Path path)
path
- - the string from which we try to determine the resource base namepublic org.apache.hadoop.yarn.api.records.LocalResource localizeResource(org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dest, org.apache.hadoop.yarn.api.records.LocalResourceType type, org.apache.hadoop.conf.Configuration conf) throws IOException
src
- path to the source for the resourcedest
- path in hdfs for the resourcetype
- local resource type (File/Archive)conf
- IOException
- when any file system related calls fails.public boolean checkOrWaitForTheFile(org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dest, org.apache.hadoop.conf.Configuration conf, Object notifier, int waitAttempts, long sleepInterval, boolean doLog) throws IOException
IOException
public org.apache.hadoop.mapred.JobConf createConfiguration(HiveConf hiveConf) throws IOException
hiveConf
- Current hiveConf for the executionIOException
public org.apache.hadoop.mapred.JobConf initializeVertexConf(org.apache.hadoop.mapred.JobConf conf, Context context, BaseWork work)
conf
- Any configurations in conf will be copied to the resulting new JobConf object.work
- BaseWork will be used to populate the configuration object.public org.apache.tez.dag.api.Vertex createVertex(org.apache.hadoop.mapred.JobConf conf, BaseWork work, org.apache.hadoop.fs.Path scratchDir, org.apache.hadoop.yarn.api.records.LocalResource appJarLr, List<org.apache.hadoop.yarn.api.records.LocalResource> additionalLr, org.apache.hadoop.fs.FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork, TezWork.VertexType vertexType) throws Exception
conf
- JobConf to be used to this execution unitwork
- The instance of BaseWork representing the actual work to be performed
by this vertex.scratchDir
- HDFS scratch dir for this execution unit.appJarLr
- Local resource for hive-exec.additionalLr
- fileSystem
- FS corresponding to scratchDir and LocalResourcesctx
- This query's contextException
public void addCredentials(BaseWork work, org.apache.tez.dag.api.DAG dag)
public org.apache.hadoop.fs.Path createTezDir(org.apache.hadoop.fs.Path scratchDir, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public org.apache.hadoop.fs.Path getTezDir(org.apache.hadoop.fs.Path scratchDir)
public static DagUtils getInstance()
public String createDagName(org.apache.hadoop.conf.Configuration conf, QueryPlan plan)
public static String getUserSpecifiedDagName(org.apache.hadoop.conf.Configuration conf)
Copyright © 2016 The Apache Software Foundation. All rights reserved.