org.apache.nutch.crawl
Class CrawlDb

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.nutch.crawl.CrawlDb
All Implemented Interfaces:
Configurable, Tool

public class CrawlDb
extends Configured
implements Tool

This class takes the output of the fetcher and updates the crawldb accordingly.


Field Summary
static String CRAWLDB_ADDITIONS_ALLOWED
           
static String CRAWLDB_PURGE_404
           
static String CURRENT_NAME
           
static String LOCK_NAME
           
static org.slf4j.Logger LOG
           
 
Constructor Summary
CrawlDb()
           
CrawlDb(Configuration conf)
           
 
Method Summary
static JobConf createJob(Configuration config, Path crawlDb)
           
static void install(JobConf job, Path crawlDb)
           
static void main(String[] args)
           
 int run(String[] args)
           
 void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter)
           
 void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter, boolean additionsAllowed, boolean force)
           
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.hadoop.conf.Configurable
getConf, setConf
 

Field Detail

LOG

public static final org.slf4j.Logger LOG

CRAWLDB_ADDITIONS_ALLOWED

public static final String CRAWLDB_ADDITIONS_ALLOWED
See Also:
Constant Field Values

CRAWLDB_PURGE_404

public static final String CRAWLDB_PURGE_404
See Also:
Constant Field Values

CURRENT_NAME

public static final String CURRENT_NAME
See Also:
Constant Field Values

LOCK_NAME

public static final String LOCK_NAME
See Also:
Constant Field Values
Constructor Detail

CrawlDb

public CrawlDb()

CrawlDb

public CrawlDb(Configuration conf)
Method Detail

update

public void update(Path crawlDb,
                   Path[] segments,
                   boolean normalize,
                   boolean filter)
            throws IOException
Throws:
IOException

update

public void update(Path crawlDb,
                   Path[] segments,
                   boolean normalize,
                   boolean filter,
                   boolean additionsAllowed,
                   boolean force)
            throws IOException
Throws:
IOException

createJob

public static JobConf createJob(Configuration config,
                                Path crawlDb)
                         throws IOException
Throws:
IOException

install

public static void install(JobConf job,
                           Path crawlDb)
                    throws IOException
Throws:
IOException

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception

run

public int run(String[] args)
        throws Exception
Specified by:
run in interface Tool
Throws:
Exception


Copyright © 2011 The Apache Software Foundation