public class HeadingsParseFilter extends Object implements HtmlParseFilter
Modifier and Type | Field and Description |
---|---|
protected static Pattern |
whitespacePattern
Pattern used to strip surpluss whitespace
|
X_POINT_ID
Constructor and Description |
---|
HeadingsParseFilter() |
Modifier and Type | Method and Description |
---|---|
ParseResult |
filter(Content content,
ParseResult parseResult,
HTMLMetaTags metaTags,
DocumentFragment doc)
Adds metadata or otherwise modifies a parse of HTML content, given
the DOM tree of a page.
|
org.apache.hadoop.conf.Configuration |
getConf() |
protected List<String> |
getElement(DocumentFragment doc,
String element)
Finds the specified element and returns its value
|
protected static String |
getNodeValue(Node node)
Returns the text value of the specified Node and child nodes
|
void |
setConf(org.apache.hadoop.conf.Configuration conf) |
protected static Pattern whitespacePattern
public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc)
HtmlParseFilter
filter
in interface HtmlParseFilter
public void setConf(org.apache.hadoop.conf.Configuration conf)
setConf
in interface org.apache.hadoop.conf.Configurable
public org.apache.hadoop.conf.Configuration getConf()
getConf
in interface org.apache.hadoop.conf.Configurable
protected List<String> getElement(DocumentFragment doc, String element)
Copyright © 2014 The Apache Software Foundation