public class CrawlDb extends NutchTool implements Tool
Modifier and Type | Field and Description |
---|---|
static String |
CRAWLDB_ADDITIONS_ALLOWED |
static String |
CRAWLDB_PURGE_404 |
static String |
CURRENT_NAME |
static String |
LOCK_NAME |
static org.slf4j.Logger |
LOG |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
CrawlDb() |
CrawlDb(Configuration conf) |
Modifier and Type | Method and Description |
---|---|
static JobConf |
createJob(Configuration config,
Path crawlDb) |
static void |
install(JobConf job,
Path crawlDb) |
static void |
main(String[] args) |
Map<String,Object> |
run(Map<String,String> args,
String crawlId)
Runs the tool, using a map of arguments.
|
int |
run(String[] args) |
void |
update(Path crawlDb,
Path[] segments,
boolean normalize,
boolean filter) |
void |
update(Path crawlDb,
Path[] segments,
boolean normalize,
boolean filter,
boolean additionsAllowed,
boolean force) |
getProgress, getStatus, killJob, stopJob
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public static final org.slf4j.Logger LOG
public static final String CRAWLDB_ADDITIONS_ALLOWED
public static final String CRAWLDB_PURGE_404
public static final String CURRENT_NAME
public static final String LOCK_NAME
public CrawlDb()
public CrawlDb(Configuration conf)
public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter) throws IOException
IOException
public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter, boolean additionsAllowed, boolean force) throws IOException
IOException
public static JobConf createJob(Configuration config, Path crawlDb) throws IOException
IOException
public static void install(JobConf job, Path crawlDb) throws IOException
IOException
Copyright © 2015 The Apache Software Foundation