org.apache.jackrabbit.extractor
Class HTMLParser

java.lang.Object
  extended by org.apache.xerces.parsers.XMLParser
      extended by org.apache.xerces.parsers.AbstractXMLDocumentParser
          extended by org.apache.xerces.parsers.AbstractSAXParser
              extended by org.apache.jackrabbit.extractor.HTMLParser
All Implemented Interfaces:
org.apache.xerces.xni.XMLDocumentHandler, org.apache.xerces.xni.XMLDTDContentModelHandler, org.apache.xerces.xni.XMLDTDHandler, org.apache.xerces.xs.PSVIProvider, Parser, XMLReader

public class HTMLParser
extends org.apache.xerces.parsers.AbstractSAXParser

Helper class for HTML parsing


Nested Class Summary
 
Nested classes/interfaces inherited from class org.apache.xerces.parsers.AbstractSAXParser
org.apache.xerces.parsers.AbstractSAXParser.AttributesProxy, org.apache.xerces.parsers.AbstractSAXParser.LocatorProxy
 
Field Summary
 
Fields inherited from class org.apache.xerces.parsers.AbstractSAXParser
ALLOW_UE_AND_NOTATION_EVENTS, DECLARATION_HANDLER, DOM_NODE, fContentHandler, fDeclaredAttrs, fDeclHandler, fDocumentHandler, fDTDHandler, fLexicalHandler, fLexicalHandlerParameterEntities, fNamespaceContext, fNamespacePrefixes, fNamespaces, fParseInProgress, fQName, fResolveDTDURIs, fStandalone, fUseEntityResolver2, fVersion, fXMLNSURIs, LEXICAL_HANDLER, NAMESPACES, STRING_INTERNING
 
Fields inherited from class org.apache.xerces.parsers.AbstractXMLDocumentParser
fDocumentSource, fDTDContentModelSource, fDTDSource, fInDTD
 
Fields inherited from class org.apache.xerces.parsers.XMLParser
ENTITY_RESOLVER, ERROR_HANDLER, fConfiguration
 
Fields inherited from interface org.apache.xerces.xni.XMLDTDHandler
CONDITIONAL_IGNORE, CONDITIONAL_INCLUDE
 
Fields inherited from interface org.apache.xerces.xni.XMLDTDContentModelHandler
OCCURS_ONE_OR_MORE, OCCURS_ZERO_OR_MORE, OCCURS_ZERO_OR_ONE, SEPARATOR_CHOICE, SEPARATOR_SEQUENCE
 
Constructor Summary
HTMLParser()
           
 
Method Summary
 void characters(org.apache.xerces.xni.XMLString xmlString, org.apache.xerces.xni.Augmentations augmentations)
           
 String getContents()
          Returns parsed content
 void startDocument(org.apache.xerces.xni.XMLLocator arg0, String arg1, org.apache.xerces.xni.NamespaceContext arg2, org.apache.xerces.xni.Augmentations arg3)
           
 
Methods inherited from class org.apache.xerces.parsers.AbstractSAXParser
attributeDecl, comment, doctypeDecl, elementDecl, endCDATA, endDocument, endDTD, endElement, endExternalSubset, endGeneralEntity, endNamespaceMapping, endParameterEntity, externalEntityDecl, getAttributePSVI, getAttributePSVIByName, getContentHandler, getDeclHandler, getDTDHandler, getElementPSVI, getEntityResolver, getErrorHandler, getFeature, getLexicalHandler, getProperty, ignorableWhitespace, internalEntityDecl, notationDecl, parse, parse, processingInstruction, reset, setContentHandler, setDeclHandler, setDocumentHandler, setDTDHandler, setEntityResolver, setErrorHandler, setFeature, setLexicalHandler, setLocale, setProperty, startCDATA, startElement, startExternalSubset, startGeneralEntity, startNamespaceMapping, startParameterEntity, unparsedEntityDecl, xmlDecl
 
Methods inherited from class org.apache.xerces.parsers.AbstractXMLDocumentParser
any, element, empty, emptyElement, endAttlist, endConditional, endContentModel, endGroup, getDocumentSource, getDTDContentModelSource, getDTDSource, ignoredCharacters, occurrence, pcdata, separator, setDocumentSource, setDTDContentModelSource, setDTDSource, startAttlist, startConditional, startContentModel, startDTD, startGroup, textDecl
 
Methods inherited from class org.apache.xerces.parsers.XMLParser
parse
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

HTMLParser

public HTMLParser()
Method Detail

startDocument

public void startDocument(org.apache.xerces.xni.XMLLocator arg0,
                          String arg1,
                          org.apache.xerces.xni.NamespaceContext arg2,
                          org.apache.xerces.xni.Augmentations arg3)
                   throws org.apache.xerces.xni.XNIException
Specified by:
startDocument in interface org.apache.xerces.xni.XMLDocumentHandler
Overrides:
startDocument in class org.apache.xerces.parsers.AbstractSAXParser
Throws:
org.apache.xerces.xni.XNIException

characters

public void characters(org.apache.xerces.xni.XMLString xmlString,
                       org.apache.xerces.xni.Augmentations augmentations)
                throws org.apache.xerces.xni.XNIException
Specified by:
characters in interface org.apache.xerces.xni.XMLDocumentHandler
Overrides:
characters in class org.apache.xerces.parsers.AbstractSAXParser
Throws:
org.apache.xerces.xni.XNIException

getContents

public String getContents()
Returns parsed content

Returns:
String Parsed content


Copyright © 2004-2009 The Apache Software Foundation. All Rights Reserved.