public class CrawlDbReader extends Configured implements Closeable, Tool
Modifier and Type | Class and Description |
---|---|
static class |
CrawlDbReader.CrawlDatumCsvOutputFormat |
static class |
CrawlDbReader.CrawlDbDumpMapper |
static class |
CrawlDbReader.CrawlDbStatCombiner |
static class |
CrawlDbReader.CrawlDbStatMapper |
static class |
CrawlDbReader.CrawlDbStatReducer |
static class |
CrawlDbReader.CrawlDbTopNMapper |
static class |
CrawlDbReader.CrawlDbTopNReducer |
Modifier and Type | Field and Description |
---|---|
static org.slf4j.Logger |
LOG |
Constructor and Description |
---|
CrawlDbReader() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
CrawlDatum |
get(String crawlDb,
String url,
JobConf config) |
static void |
main(String[] args) |
void |
processDumpJob(String crawlDb,
String output,
JobConf config,
String format,
String regex,
String status,
Integer retry) |
void |
processStatJob(String crawlDb,
Configuration config,
boolean sort) |
void |
processTopNJob(String crawlDb,
long topN,
float min,
String output,
JobConf config) |
Object |
query(Map<String,String> args,
Configuration conf,
String type,
String crawlId) |
void |
readUrl(String crawlDb,
String url,
JobConf config) |
int |
run(String[] args) |
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public void close()
close
in interface Closeable
close
in interface AutoCloseable
public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException
IOException
public CrawlDatum get(String crawlDb, String url, JobConf config) throws IOException
IOException
public void readUrl(String crawlDb, String url, JobConf config) throws IOException
IOException
public void processDumpJob(String crawlDb, String output, JobConf config, String format, String regex, String status, Integer retry) throws IOException
IOException
public void processTopNJob(String crawlDb, long topN, float min, String output, JobConf config) throws IOException
IOException
public int run(String[] args) throws IOException
run
in interface Tool
IOException
Copyright © 2015 The Apache Software Foundation