org.apache.nutch.util.domain
Class DomainStatistics

java.lang.Object
  extended by org.apache.hadoop.mapred.MapReduceBase
      extended by org.apache.nutch.util.domain.DomainStatistics
All Implemented Interfaces:
Closeable, Configurable, JobConfigurable, Mapper<Text,CrawlDatum,Text,LongWritable>, Reducer<Text,LongWritable,LongWritable,Text>, Tool

public class DomainStatistics
extends MapReduceBase
implements Tool, Mapper<Text,CrawlDatum,Text,LongWritable>, Reducer<Text,LongWritable,LongWritable,Text>

Extracts some very basic statistics about domains from the crawldb


Nested Class Summary
static class DomainStatistics.DomainStatisticsCombiner
           
static class DomainStatistics.MyCounter
           
 
Constructor Summary
DomainStatistics()
           
 
Method Summary
 void configure(JobConf job)
           
 Configuration getConf()
           
static void main(String[] args)
           
 void map(Text urlText, CrawlDatum datum, OutputCollector<Text,LongWritable> output, Reporter reporter)
           
 void reduce(Text key, Iterator<LongWritable> values, OutputCollector<LongWritable,Text> output, Reporter reporter)
           
 int run(String[] args)
           
 void setConf(Configuration conf)
           
 
Methods inherited from class org.apache.hadoop.mapred.MapReduceBase
close
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface java.io.Closeable
close
 

Constructor Detail

DomainStatistics

public DomainStatistics()
Method Detail

run

public int run(String[] args)
        throws IOException
Specified by:
run in interface Tool
Throws:
IOException

configure

public void configure(JobConf job)
Specified by:
configure in interface JobConfigurable
Overrides:
configure in class MapReduceBase

getConf

public Configuration getConf()
Specified by:
getConf in interface Configurable

setConf

public void setConf(Configuration conf)
Specified by:
setConf in interface Configurable

map

public void map(Text urlText,
                CrawlDatum datum,
                OutputCollector<Text,LongWritable> output,
                Reporter reporter)
         throws IOException
Specified by:
map in interface Mapper<Text,CrawlDatum,Text,LongWritable>
Throws:
IOException

reduce

public void reduce(Text key,
                   Iterator<LongWritable> values,
                   OutputCollector<LongWritable,Text> output,
                   Reporter reporter)
            throws IOException
Specified by:
reduce in interface Reducer<Text,LongWritable,LongWritable,Text>
Throws:
IOException

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception


Copyright © 2011 The Apache Software Foundation