public class ScoreUpdater extends org.apache.hadoop.conf.Configured implements org.apache.hadoop.util.Tool, org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable>, org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable,org.apache.hadoop.io.Text,CrawlDatum>
Modifier and Type | Field and Description |
---|---|
static org.slf4j.Logger |
LOG |
Constructor and Description |
---|
ScoreUpdater() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
void |
configure(org.apache.hadoop.mapred.JobConf conf) |
static void |
main(String[] args) |
void |
map(org.apache.hadoop.io.Text key,
org.apache.hadoop.io.Writable value,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable> output,
org.apache.hadoop.mapred.Reporter reporter)
Changes input into ObjectWritables.
|
void |
reduce(org.apache.hadoop.io.Text key,
Iterator<org.apache.hadoop.io.ObjectWritable> values,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,CrawlDatum> output,
org.apache.hadoop.mapred.Reporter reporter)
Creates new CrawlDatum objects with the updated score from the NodeDb or
with a cleared score.
|
int |
run(String[] args)
Runs the ScoreUpdater tool.
|
void |
update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path webGraphDb)
Updates the inlink score in the web graph node databsae into the crawl
database.
|
public void configure(org.apache.hadoop.mapred.JobConf conf)
configure
in interface org.apache.hadoop.mapred.JobConfigurable
public void map(org.apache.hadoop.io.Text key, org.apache.hadoop.io.Writable value, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable> output, org.apache.hadoop.mapred.Reporter reporter) throws IOException
map
in interface org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable>
IOException
public void reduce(org.apache.hadoop.io.Text key, Iterator<org.apache.hadoop.io.ObjectWritable> values, org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,CrawlDatum> output, org.apache.hadoop.mapred.Reporter reporter) throws IOException
reduce
in interface org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable,org.apache.hadoop.io.Text,CrawlDatum>
IOException
public void close()
close
in interface Closeable
close
in interface AutoCloseable
public void update(org.apache.hadoop.fs.Path crawlDb, org.apache.hadoop.fs.Path webGraphDb) throws IOException
crawlDb
- The crawl database to updatewebGraphDb
- The webgraph database to use.IOException
- If an error occurs while updating the scores.Copyright © 2014 The Apache Software Foundation