public class IndexerMapReduce extends org.apache.hadoop.conf.Configured implements org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,NutchWritable>, org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,NutchWritable,org.apache.hadoop.io.Text,NutchIndexAction>
Modifier and Type | Field and Description |
---|---|
static String |
INDEXER_DELETE |
static String |
INDEXER_DELETE_ROBOTS_NOINDEX |
static String |
INDEXER_PARAMS |
static String |
INDEXER_SKIP_NOTMODIFIED |
static org.slf4j.Logger |
LOG |
static String |
URL_FILTERING |
static String |
URL_NORMALIZING |
Constructor and Description |
---|
IndexerMapReduce() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
void |
configure(org.apache.hadoop.mapred.JobConf job) |
static void |
initMRJob(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path linkDb,
Collection<org.apache.hadoop.fs.Path> segments,
org.apache.hadoop.mapred.JobConf job) |
void |
map(org.apache.hadoop.io.Text key,
org.apache.hadoop.io.Writable value,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,NutchWritable> output,
org.apache.hadoop.mapred.Reporter reporter) |
void |
reduce(org.apache.hadoop.io.Text key,
Iterator<NutchWritable> values,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,NutchIndexAction> output,
org.apache.hadoop.mapred.Reporter reporter) |
public static final org.slf4j.Logger LOG
public static final String INDEXER_PARAMS
public static final String INDEXER_DELETE
public static final String INDEXER_DELETE_ROBOTS_NOINDEX
public static final String INDEXER_SKIP_NOTMODIFIED
public static final String URL_FILTERING
public static final String URL_NORMALIZING
public void configure(org.apache.hadoop.mapred.JobConf job)
configure
in interface org.apache.hadoop.mapred.JobConfigurable
public void map(org.apache.hadoop.io.Text key, org.apache.hadoop.io.Writable value, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,NutchWritable> output, org.apache.hadoop.mapred.Reporter reporter) throws IOException
map
in interface org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,NutchWritable>
IOException
public void reduce(org.apache.hadoop.io.Text key, Iterator<NutchWritable> values, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,NutchIndexAction> output, org.apache.hadoop.mapred.Reporter reporter) throws IOException
reduce
in interface org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,NutchWritable,org.apache.hadoop.io.Text,NutchIndexAction>
IOException
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
IOException
public static void initMRJob(org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path linkDb, Collection<org.apache.hadoop.fs.Path> segments, org.apache.hadoop.mapred.JobConf job)
Copyright © 2014 The Apache Software Foundation