org.apache.nutch.crawl
Class CrawlDbReader

java.lang.Object
  extended by org.apache.nutch.crawl.CrawlDbReader
All Implemented Interfaces:
Closeable

public class CrawlDbReader
extends Object
implements Closeable

Read utility for the CrawlDB.

Author:
Andrzej Bialecki

Nested Class Summary
static class CrawlDbReader.CrawlDatumCsvOutputFormat
           
static class CrawlDbReader.CrawlDbDumpMapper
           
static class CrawlDbReader.CrawlDbStatCombiner
           
static class CrawlDbReader.CrawlDbStatMapper
           
static class CrawlDbReader.CrawlDbStatReducer
           
static class CrawlDbReader.CrawlDbTopNMapper
           
static class CrawlDbReader.CrawlDbTopNReducer
           
 
Field Summary
static org.slf4j.Logger LOG
           
 
Constructor Summary
CrawlDbReader()
           
 
Method Summary
 void close()
           
 CrawlDatum get(String crawlDb, String url, Configuration config)
           
static void main(String[] args)
           
 void processDumpJob(String crawlDb, String output, Configuration config, String format, String regex, String status)
           
 void processStatJob(String crawlDb, Configuration config, boolean sort)
           
 void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config)
           
 void readUrl(String crawlDb, String url, Configuration config)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

public static final org.slf4j.Logger LOG
Constructor Detail

CrawlDbReader

public CrawlDbReader()
Method Detail

close

public void close()
Specified by:
close in interface Closeable

processStatJob

public void processStatJob(String crawlDb,
                           Configuration config,
                           boolean sort)
                    throws IOException
Throws:
IOException

get

public CrawlDatum get(String crawlDb,
                      String url,
                      Configuration config)
               throws IOException
Throws:
IOException

readUrl

public void readUrl(String crawlDb,
                    String url,
                    Configuration config)
             throws IOException
Throws:
IOException

processDumpJob

public void processDumpJob(String crawlDb,
                           String output,
                           Configuration config,
                           String format,
                           String regex,
                           String status)
                    throws IOException
Throws:
IOException

processTopNJob

public void processTopNJob(String crawlDb,
                           long topN,
                           float min,
                           String output,
                           Configuration config)
                    throws IOException
Throws:
IOException

main

public static void main(String[] args)
                 throws IOException
Throws:
IOException


Copyright © 2012 The Apache Software Foundation