public class ParseSegment extends NutchTool implements Tool, Mapper<WritableComparable<?>,Content,Text,ParseImpl>, Reducer<Text,Writable,Text,Writable>
Modifier and Type | Field and Description |
---|---|
static org.slf4j.Logger |
LOG |
static String |
SKIP_TRUNCATED |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
ParseSegment() |
ParseSegment(Configuration conf) |
Modifier and Type | Method and Description |
---|---|
void |
close() |
void |
configure(JobConf job) |
static boolean |
isTruncated(Content content)
Checks if the page's content is truncated.
|
static void |
main(String[] args) |
void |
map(WritableComparable<?> key,
Content content,
OutputCollector<Text,ParseImpl> output,
Reporter reporter) |
void |
parse(Path segment) |
void |
reduce(Text key,
Iterator<Writable> values,
OutputCollector<Text,Writable> output,
Reporter reporter) |
Map<String,Object> |
run(Map<String,String> args,
String crawlId)
Runs the tool, using a map of arguments.
|
int |
run(String[] args) |
getProgress, getStatus, killJob, stopJob
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public static final org.slf4j.Logger LOG
public static final String SKIP_TRUNCATED
public ParseSegment()
public ParseSegment(Configuration conf)
public void configure(JobConf job)
configure
in interface JobConfigurable
public void close()
close
in interface Closeable
close
in interface AutoCloseable
public void map(WritableComparable<?> key, Content content, OutputCollector<Text,ParseImpl> output, Reporter reporter) throws IOException
map
in interface Mapper<WritableComparable<?>,Content,Text,ParseImpl>
IOException
public static boolean isTruncated(Content content)
content
- true
. When it is not, or when
it could be determined, false
.public void reduce(Text key, Iterator<Writable> values, OutputCollector<Text,Writable> output, Reporter reporter) throws IOException
public void parse(Path segment) throws IOException
IOException
Copyright © 2015 The Apache Software Foundation