org.apache.nutch.parse
Class ParseData

java.lang.Object
  extended by org.apache.hadoop.io.VersionedWritable
      extended by org.apache.nutch.parse.ParseData
All Implemented Interfaces:
Writable

public final class ParseData
extends VersionedWritable

Data extracted from a page's content.

See Also:
Parse.getData()

Field Summary
static String DIR_NAME
           
 
Constructor Summary
ParseData()
           
ParseData(ParseStatus status, String title, Outlink[] outlinks, Metadata contentMeta)
           
ParseData(ParseStatus status, String title, Outlink[] outlinks, Metadata contentMeta, Metadata parseMeta)
           
 
Method Summary
 boolean equals(Object o)
           
 Metadata getContentMeta()
          The original Metadata retrieved from content
 String getMeta(String name)
          Get a metadata single value.
 Outlink[] getOutlinks()
          The outlinks of the page.
 Metadata getParseMeta()
          Other content properties.
 ParseStatus getStatus()
          The status of parsing the page.
 String getTitle()
          The title of the page.
 byte getVersion()
           
static void main(String[] argv)
           
static ParseData read(DataInput in)
           
 void readFields(DataInput in)
           
 void setParseMeta(Metadata parseMeta)
           
 String toString()
           
 void write(DataOutput out)
           
 
Methods inherited from class java.lang.Object
clone, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

DIR_NAME

public static final String DIR_NAME
See Also:
Constant Field Values
Constructor Detail

ParseData

public ParseData()

ParseData

public ParseData(ParseStatus status,
                 String title,
                 Outlink[] outlinks,
                 Metadata contentMeta)

ParseData

public ParseData(ParseStatus status,
                 String title,
                 Outlink[] outlinks,
                 Metadata contentMeta,
                 Metadata parseMeta)
Method Detail

getStatus

public ParseStatus getStatus()
The status of parsing the page.


getTitle

public String getTitle()
The title of the page.


getOutlinks

public Outlink[] getOutlinks()
The outlinks of the page.


getContentMeta

public Metadata getContentMeta()
The original Metadata retrieved from content


getParseMeta

public Metadata getParseMeta()
Other content properties. This is the place to find format-specific properties. Different parser implementations for different content types will populate this differently.


setParseMeta

public void setParseMeta(Metadata parseMeta)

getMeta

public String getMeta(String name)
Get a metadata single value. This method first looks for the metadata value in the parse metadata. If no value is found it the looks for the metadata in the content metadata.

See Also:
getContentMeta(), getParseMeta()

getVersion

public byte getVersion()
Specified by:
getVersion in class VersionedWritable

readFields

public final void readFields(DataInput in)
                      throws IOException
Specified by:
readFields in interface Writable
Overrides:
readFields in class VersionedWritable
Throws:
IOException

write

public final void write(DataOutput out)
                 throws IOException
Specified by:
write in interface Writable
Overrides:
write in class VersionedWritable
Throws:
IOException

read

public static ParseData read(DataInput in)
                      throws IOException
Throws:
IOException

equals

public boolean equals(Object o)
Overrides:
equals in class Object

toString

public String toString()
Overrides:
toString in class Object

main

public static void main(String[] argv)
                 throws Exception
Throws:
Exception


Copyright © 2011 The Apache Software Foundation