public class CrawlDbFilter extends Object implements org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,CrawlDatum,org.apache.hadoop.io.Text,CrawlDatum>
Modifier and Type | Field and Description |
---|---|
static org.slf4j.Logger |
LOG |
static String |
URL_FILTERING |
static String |
URL_NORMALIZING |
static String |
URL_NORMALIZING_SCOPE |
Constructor and Description |
---|
CrawlDbFilter() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
void |
configure(org.apache.hadoop.mapred.JobConf job) |
void |
map(org.apache.hadoop.io.Text key,
CrawlDatum value,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,CrawlDatum> output,
org.apache.hadoop.mapred.Reporter reporter) |
public static final String URL_FILTERING
public static final String URL_NORMALIZING
public static final String URL_NORMALIZING_SCOPE
public static final org.slf4j.Logger LOG
public void configure(org.apache.hadoop.mapred.JobConf job)
configure
in interface org.apache.hadoop.mapred.JobConfigurable
public void close()
close
in interface Closeable
close
in interface AutoCloseable
public void map(org.apache.hadoop.io.Text key, CrawlDatum value, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,CrawlDatum> output, org.apache.hadoop.mapred.Reporter reporter) throws IOException
map
in interface org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,CrawlDatum,org.apache.hadoop.io.Text,CrawlDatum>
IOException
Copyright © 2014 The Apache Software Foundation