org.apache.nutch.indexer
Class IndexerMapReduce

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.nutch.indexer.IndexerMapReduce
All Implemented Interfaces:
Closeable, Configurable, JobConfigurable, Mapper<Text,Writable,Text,NutchWritable>, Reducer<Text,NutchWritable,Text,org.apache.nutch.indexer.NutchIndexAction>

public class IndexerMapReduce
extends Configured
implements Mapper<Text,Writable,Text,NutchWritable>, Reducer<Text,NutchWritable,Text,org.apache.nutch.indexer.NutchIndexAction>


Field Summary
static String INDEXER_DELETE
           
static String INDEXER_DELETE_ROBOTS_NOINDEX
           
static String INDEXER_SKIP_NOTMODIFIED
           
static org.slf4j.Logger LOG
           
static String URL_FILTERING
           
static String URL_NORMALIZING
           
 
Constructor Summary
IndexerMapReduce()
           
 
Method Summary
 void close()
           
 void configure(JobConf job)
           
static void initMRJob(Path crawlDb, Path linkDb, Collection<Path> segments, JobConf job)
           
 void map(Text key, Writable value, OutputCollector<Text,NutchWritable> output, Reporter reporter)
           
 void reduce(Text key, Iterator<NutchWritable> values, OutputCollector<Text,org.apache.nutch.indexer.NutchIndexAction> output, Reporter reporter)
           
 
Methods inherited from class org.apache.hadoop.conf.Configured
getConf, setConf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

public static final org.slf4j.Logger LOG

INDEXER_DELETE

public static final String INDEXER_DELETE
See Also:
Constant Field Values

INDEXER_DELETE_ROBOTS_NOINDEX

public static final String INDEXER_DELETE_ROBOTS_NOINDEX
See Also:
Constant Field Values

INDEXER_SKIP_NOTMODIFIED

public static final String INDEXER_SKIP_NOTMODIFIED
See Also:
Constant Field Values

URL_FILTERING

public static final String URL_FILTERING
See Also:
Constant Field Values

URL_NORMALIZING

public static final String URL_NORMALIZING
See Also:
Constant Field Values
Constructor Detail

IndexerMapReduce

public IndexerMapReduce()
Method Detail

configure

public void configure(JobConf job)
Specified by:
configure in interface JobConfigurable

map

public void map(Text key,
                Writable value,
                OutputCollector<Text,NutchWritable> output,
                Reporter reporter)
         throws IOException
Specified by:
map in interface Mapper<Text,Writable,Text,NutchWritable>
Throws:
IOException

reduce

public void reduce(Text key,
                   Iterator<NutchWritable> values,
                   OutputCollector<Text,org.apache.nutch.indexer.NutchIndexAction> output,
                   Reporter reporter)
            throws IOException
Specified by:
reduce in interface Reducer<Text,NutchWritable,Text,org.apache.nutch.indexer.NutchIndexAction>
Throws:
IOException

close

public void close()
           throws IOException
Specified by:
close in interface Closeable
Throws:
IOException

initMRJob

public static void initMRJob(Path crawlDb,
                             Path linkDb,
                             Collection<Path> segments,
                             JobConf job)


Copyright © 2012 The Apache Software Foundation