public class SparkUtilities extends Object
Constructor and Description |
---|
SparkUtilities() |
Modifier and Type | Method and Description |
---|---|
static void |
collectOp(Collection<Operator<?>> result,
Operator<?> root,
Class<?> clazz)
Recursively find all operators under root, that are of class clazz or are the sub-class of clazz, and
put them in result.
|
static <T extends Operator<?>> |
collectOp(Operator<?> root,
Class<T> cls,
Collection<T> result,
Set<Operator<?>> seen)
Collect operators of type T starting from root.
|
static org.apache.hadoop.io.BytesWritable |
copyBytesWritable(org.apache.hadoop.io.BytesWritable bw) |
static HiveKey |
copyHiveKey(HiveKey key) |
static SparkTask |
createSparkTask(HiveConf conf) |
static SparkTask |
createSparkTask(SparkWork work,
HiveConf conf) |
static SparkPartitionPruningSinkOperator |
findReusableDPPSink(Operator<? extends OperatorDesc> branchingOP,
List<Operator<? extends OperatorDesc>> list) |
static org.apache.hadoop.fs.Path |
generateTmpPathForPartitionPruning(org.apache.hadoop.fs.Path basePath,
String id)
Generate a temporary path for dynamic partition pruning in Spark branch
TODO: no longer need this if we use accumulator!
|
static SparkSession |
getSparkSession(HiveConf conf,
SparkSessionManager sparkSessionManager) |
static String |
getWorkId(BaseWork work)
Return the ID for this BaseWork, in String form.
|
static boolean |
isDedicatedCluster(org.apache.hadoop.conf.Configuration conf) |
static boolean |
isDirectDPPBranch(Operator<?> op) |
static boolean |
needUploadToHDFS(URI source,
org.apache.spark.SparkConf sparkConf) |
static void |
removeEmptySparkTask(SparkTask currTask)
remove currTask from the children of its parentTask
remove currTask from the parent of its childrenTask
|
static void |
removeNestedDPP(OptimizeSparkProcContext procContext)
For DPP sinks w/ common join, we'll split the tree and what's above the branching
operator is computed multiple times.
|
static String |
reverseDNSLookupURL(String url) |
static URI |
uploadToHDFS(URI source,
HiveConf conf)
Uploads a local file to HDFS
|
public static org.apache.hadoop.io.BytesWritable copyBytesWritable(org.apache.hadoop.io.BytesWritable bw)
public static URI uploadToHDFS(URI source, HiveConf conf) throws IOException
source
- conf
- IOException
public static boolean needUploadToHDFS(URI source, org.apache.spark.SparkConf sparkConf)
public static boolean isDedicatedCluster(org.apache.hadoop.conf.Configuration conf)
public static SparkSession getSparkSession(HiveConf conf, SparkSessionManager sparkSessionManager) throws HiveException
HiveException
public static org.apache.hadoop.fs.Path generateTmpPathForPartitionPruning(org.apache.hadoop.fs.Path basePath, String id)
basePath
- id
- public static String getWorkId(BaseWork work)
work
- the input BaseWorkpublic static void collectOp(Collection<Operator<?>> result, Operator<?> root, Class<?> clazz)
result
- all operators under root that are of class clazzroot
- the root operator under which all operators will be examinedclazz
- clas to collect. Must NOT be null.public static <T extends Operator<?>> void collectOp(Operator<?> root, Class<T> cls, Collection<T> result, Set<Operator<?>> seen)
public static void removeEmptySparkTask(SparkTask currTask)
currTask
- public static SparkPartitionPruningSinkOperator findReusableDPPSink(Operator<? extends OperatorDesc> branchingOP, List<Operator<? extends OperatorDesc>> list)
public static void removeNestedDPP(OptimizeSparkProcContext procContext)
public static boolean isDirectDPPBranch(Operator<?> op)
public static String reverseDNSLookupURL(String url) throws UnknownHostException
UnknownHostException
Copyright © 2022 The Apache Software Foundation. All rights reserved.