http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback

PDF Document

Download
Dev Snapshots
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

SAXParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  * 
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  * 
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  * 
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer. 
00013  * 
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  * 
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:  
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  * 
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written 
00029  *    permission, please contact apache\@apache.org.
00030  * 
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  * 
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  * 
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: SAXParser.hpp,v $
00059  * Revision 1.9  2000/08/02 18:05:15  jpolast
00060  * changes required for sax2
00061  * (changed private members to protected)
00062  *
00063  * Revision 1.8  2000/04/12 22:58:30  roddey
00064  * Added support for 'auto validate' mode.
00065  *
00066  * Revision 1.7  2000/03/03 01:29:34  roddey
00067  * Added a scanReset()/parseReset() method to the scanner and
00068  * parsers, to allow for reset after early exit from a progressive parse.
00069  * Added calls to new Terminate() call to all of the samples. Improved
00070  * documentation in SAX and DOM parsers.
00071  *
00072  * Revision 1.6  2000/02/17 03:54:27  rahulj
00073  * Added some new getters to query the parser state and
00074  * clarified the documentation.
00075  *
00076  * Revision 1.5  2000/02/16 03:42:58  rahulj
00077  * Finished documenting the SAX Driver implementation.
00078  *
00079  * Revision 1.4  2000/02/15 04:47:37  rahulj
00080  * Documenting the SAXParser framework. Not done yet.
00081  *
00082  * Revision 1.3  2000/02/06 07:47:56  rahulj
00083  * Year 2K copyright swat.
00084  *
00085  * Revision 1.2  1999/12/15 19:57:48  roddey
00086  * Got rid of redundant 'const' on boolean return value. Some compilers choke
00087  * on this and its useless.
00088  *
00089  * Revision 1.1.1.1  1999/11/09 01:07:51  twl
00090  * Initial checkin
00091  *
00092  * Revision 1.6  1999/11/08 20:44:54  rahul
00093  * Swat for adding in Product name and CVS comment log variable.
00094  *
00095  */
00096 
00097 #if !defined(SAXPARSER_HPP)
00098 #define SAXPARSER_HPP
00099 
00100 #include <sax/Parser.hpp>
00101 #include <internal/VecAttrListImpl.hpp>
00102 #include <framework/XMLDocumentHandler.hpp>
00103 #include <framework/XMLElementDecl.hpp>
00104 #include <framework/XMLEntityHandler.hpp>
00105 #include <framework/XMLErrorReporter.hpp>
00106 #include <validators/DTD/DocTypeHandler.hpp>
00107 
00108 class DocumentHandler;
00109 class EntityResolver;
00110 class XMLPScanToken;
00111 class XMLScanner;
00112 class XMLValidator;
00113 
00114 
00124 
00125 class  SAXParser :
00126 
00127     public Parser
00128     , public XMLDocumentHandler
00129     , public XMLErrorReporter
00130     , public XMLEntityHandler
00131     , public DocTypeHandler
00132 {
00133 public :
00134     // -----------------------------------------------------------------------
00135     //  Class types
00136     // -----------------------------------------------------------------------
00137     enum ValSchemes
00138     {
00139         Val_Never
00140         , Val_Always
00141         , Val_Auto
00142     };
00143 
00144 
00145     // -----------------------------------------------------------------------
00146     //  Constructors and Destructor
00147     // -----------------------------------------------------------------------
00148 
00150     //@{
00156     SAXParser(XMLValidator* const valToAdopt = 0);
00157 
00161     ~SAXParser();
00162     //@}
00163 
00164 
00166     //@{
00173     DocumentHandler* getDocumentHandler();
00174 
00181     const DocumentHandler* getDocumentHandler() const;
00182 
00189     EntityResolver* getEntityResolver();
00190 
00197     const EntityResolver* getEntityResolver() const;
00198 
00205     ErrorHandler* getErrorHandler();
00206 
00213     const ErrorHandler* getErrorHandler() const;
00214 
00221     const XMLScanner& getScanner() const;
00222 
00229     const XMLValidator& getValidator() const;
00230 
00237     ValSchemes getValidationScheme() const;
00238 
00248     bool getDoNamespaces() const;
00249 
00259     bool getExitOnFirstFatalError() const;
00260     //@}
00261 
00262 
00263     // -----------------------------------------------------------------------
00264     //  Setter methods
00265     // -----------------------------------------------------------------------
00266 
00268     //@{
00286     void setDoNamespaces(const bool newState);
00287 
00299     void setValidationScheme(const ValSchemes newScheme);
00300 
00316     void setExitOnFirstFatalError(const bool newState);
00317     //@}
00318 
00319 
00320     // -----------------------------------------------------------------------
00321     //  Advanced document handler list maintenance methods
00322     // -----------------------------------------------------------------------
00323 
00325     //@{
00339     void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00340 
00350     bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00351     //@}
00352 
00353 
00354     // -----------------------------------------------------------------------
00355     //  Implementation of the SAXParser interface
00356     // -----------------------------------------------------------------------
00357 
00359     //@{
00372     virtual void parse(const InputSource& source, const bool reuseValidator = false);
00373 
00386     virtual void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00387 
00398     virtual void parse(const char* const systemId, const bool reuseValidator = false);
00399 
00410     virtual void setDocumentHandler(DocumentHandler* const handler);
00411 
00421     virtual void setDTDHandler(DTDHandler* const handler);
00422 
00433     virtual void setErrorHandler(ErrorHandler* const handler);
00434 
00446     virtual void setEntityResolver(EntityResolver* const resolver);
00447     //@}
00448 
00449 
00450     // -----------------------------------------------------------------------
00451     //  Progressive scan methods
00452     // -----------------------------------------------------------------------
00453 
00455     //@{
00456 
00488     bool parseFirst
00489     (
00490         const   XMLCh* const    systemId
00491         ,       XMLPScanToken&  toFill
00492         , const bool            reuseValidator = false
00493     );
00494 
00525     bool parseFirst
00526     (
00527         const   char* const     systemId
00528         ,       XMLPScanToken&  toFill
00529         , const bool            reuseValidator = false
00530     );
00531 
00562     bool parseFirst
00563     (
00564         const   InputSource&    source
00565         ,       XMLPScanToken&  toFill
00566         , const bool            reuseValidator = false
00567     );
00568 
00593     bool parseNext(XMLPScanToken& token);
00594 
00616     void parseReset(XMLPScanToken& token);
00617 
00618     //@}
00619 
00620 
00621 
00622     // -----------------------------------------------------------------------
00623     //  Implementation of the DocTypeHandler Interface
00624     // -----------------------------------------------------------------------
00625 
00627     //@{
00642     virtual void attDef
00643     (
00644         const   DTDElementDecl& elemDecl
00645         , const DTDAttDef&      attDef
00646         , const bool            ignoring
00647     );
00648 
00658     virtual void doctypeComment
00659     (
00660         const   XMLCh* const    comment
00661     );
00662 
00679     virtual void doctypeDecl
00680     (
00681         const   DTDElementDecl& elemDecl
00682         , const XMLCh* const    publicId
00683         , const XMLCh* const    systemId
00684         , const bool            hasIntSubset
00685     );
00686 
00700     virtual void doctypePI
00701     (
00702         const   XMLCh* const    target
00703         , const XMLCh* const    data
00704     );
00705 
00717     virtual void doctypeWhitespace
00718     (
00719         const   XMLCh* const    chars
00720         , const unsigned int    length
00721     );
00722 
00735     virtual void elementDecl
00736     (
00737         const   DTDElementDecl& decl
00738         , const bool            isIgnored
00739     );
00740 
00751     virtual void endAttList
00752     (
00753         const   DTDElementDecl& elemDecl
00754     );
00755 
00762     virtual void endIntSubset();
00763 
00770     virtual void endExtSubset();
00771 
00786     virtual void entityDecl
00787     (
00788         const   DTDEntityDecl&  entityDecl
00789         , const bool            isPEDecl
00790         , const bool            isIgnored
00791     );
00792 
00797     virtual void resetDocType();
00798 
00811     virtual void notationDecl
00812     (
00813         const   XMLNotationDecl&    notDecl
00814         , const bool                isIgnored
00815     );
00816 
00827     virtual void startAttList
00828     (
00829         const   DTDElementDecl& elemDecl
00830     );
00831 
00838     virtual void startIntSubset();
00839 
00846     virtual void startExtSubset();
00847 
00860     virtual void TextDecl
00861     (
00862         const   XMLCh* const    versionStr
00863         , const XMLCh* const    encodingStr
00864     );
00865     //@}
00866 
00867 
00868     // -----------------------------------------------------------------------
00869     //  Implementation of the XMLDocumentHandler interface
00870     // -----------------------------------------------------------------------
00871 
00873     //@{
00889     virtual void docCharacters
00890     (
00891         const   XMLCh* const    chars
00892         , const unsigned int    length
00893         , const bool            cdataSection
00894     );
00895 
00905     virtual void docComment
00906     (
00907         const   XMLCh* const    comment
00908     );
00909 
00929     virtual void docPI
00930     (
00931         const   XMLCh* const    target
00932         , const XMLCh* const    data
00933     );
00934 
00946     virtual void endDocument();
00947 
00964     virtual void endElement
00965     (
00966         const   XMLElementDecl& elemDecl
00967         , const unsigned int    urlId
00968         , const bool            isRoot
00969     );
00970 
00981     virtual void endEntityReference
00982     (
00983         const   XMLEntityDecl&  entDecl
00984     );
00985 
01005     virtual void ignorableWhitespace
01006     (
01007         const   XMLCh* const    chars
01008         , const unsigned int    length
01009         , const bool            cdataSection
01010     );
01011 
01016     virtual void resetDocument();
01017 
01028     virtual void startDocument();
01029 
01056     virtual void startElement
01057     (
01058         const   XMLElementDecl&         elemDecl
01059         , const unsigned int            urlId
01060         , const XMLCh* const            elemPrefix
01061         , const RefVectorOf<XMLAttr>&   attrList
01062         , const unsigned int            attrCount
01063         , const bool                    isEmpty
01064         , const bool                    isRoot
01065     );
01066 
01076     virtual void startEntityReference
01077     (
01078         const   XMLEntityDecl&  entDecl
01079     );
01080 
01098     virtual void XMLDecl
01099     (
01100         const   XMLCh* const    versionStr
01101         , const XMLCh* const    encodingStr
01102         , const XMLCh* const    standaloneStr
01103         , const XMLCh* const    actualEncodingStr
01104     );
01105     //@}
01106 
01107 
01108     // -----------------------------------------------------------------------
01109     //  Implementation of the XMLErrorReporter interface
01110     // -----------------------------------------------------------------------
01111 
01113     //@{
01137     virtual void error
01138     (
01139         const   unsigned int                errCode
01140         , const XMLCh* const                msgDomain
01141         , const XMLErrorReporter::ErrTypes  errType
01142         , const XMLCh* const                errorText
01143         , const XMLCh* const                systemId
01144         , const XMLCh* const                publicId
01145         , const unsigned int                lineNum
01146         , const unsigned int                colNum
01147     );
01148 
01157     virtual void resetErrors();
01158     //@}
01159 
01160 
01161     // -----------------------------------------------------------------------
01162     //  Implementation of the XMLEntityHandler interface
01163     // -----------------------------------------------------------------------
01164 
01166     //@{
01178     virtual void endInputSource(const InputSource& inputSource);
01179 
01194     virtual bool expandSystemId
01195     (
01196         const   XMLCh* const    systemId
01197         ,       XMLBuffer&      toFill
01198     );
01199 
01207     virtual void resetEntities();
01208 
01223     virtual InputSource* resolveEntity
01224     (
01225         const   XMLCh* const    publicId
01226         , const XMLCh* const    systemId
01227     );
01228 
01240     virtual void startInputSource(const InputSource& inputSource);
01241     //@}
01242 
01243 
01245     //@{
01256     bool getDoValidation() const;
01257 
01271     void setDoValidation(const bool newState);
01272     //@}
01273 
01274 
01275 protected :
01276     // -----------------------------------------------------------------------
01277     //  Unimplemented constructors and operators
01278     // -----------------------------------------------------------------------
01279     SAXParser(const SAXParser&);
01280     void operator=(const SAXParser&);
01281 
01282 
01283     // -----------------------------------------------------------------------
01284     //  Private data members
01285     //
01286     //  fAttrList
01287     //      A temporary implementation of the basic SAX attribute list
01288     //      interface. We use this one over and over on each startElement
01289     //      event to allow SAX-like access to the element attributes.
01290     //
01291     //  fDocHandler
01292     //      The installed SAX doc handler, if any. Null if none.
01293     //
01294     //  fDTDHandler
01295     //      The installed SAX DTD handler, if any. Null if none.
01296     //
01297     //  fElemDepth
01298     //      This is used to track the element nesting depth, so that we can
01299     //      know when we are inside content. This is so we can ignore char
01300     //      data outside of content.
01301     //
01302     //  fEntityResolver
01303     //      The installed SAX entity handler, if any. Null if none.
01304     //
01305     //  fErrorHandler
01306     //      The installed SAX error handler, if any. Null if none.
01307     //
01308     //  fAdvDHCount
01309     //  fAdvDHList
01310     //  fAdvDHListSize
01311     //      This is an array of pointers to XMLDocumentHandlers, which is
01312     //      how we see installed advanced document handlers. There will
01313     //      usually not be very many at all, so a simple array is used
01314     //      instead of a collection, for performance. It will grow if needed,
01315     //      but that is unlikely.
01316     //
01317     //      The count is how many handlers are currently installed. The size
01318     //      is how big the array itself is (for expansion purposes.) When
01319     //      count == size, is time to expand.
01320     //
01321     //  fParseInProgress
01322     //      This flag is set once a parse starts. It is used to prevent
01323     //      multiple entrance or reentrance of the parser.
01324     //
01325     //  fScanner
01326     //      The scanner being used by this parser. It is created internally
01327     //      during construction.
01328     //
01329     //  fValidator
01330     //      The validator that is installed. If none is provided, we will
01331     //      create and install a DTD validator. We install this on the
01332     //      scanner we create, which it will use to do validation. We set
01333     //      ourself on it as the error reporter for validity errors.
01334     // -----------------------------------------------------------------------
01335     VecAttrListImpl         fAttrList;
01336     DocumentHandler*        fDocHandler;
01337     DTDHandler*             fDTDHandler;
01338     unsigned int            fElemDepth;
01339     EntityResolver*         fEntityResolver;
01340     ErrorHandler*           fErrorHandler;
01341     unsigned int            fAdvDHCount;
01342     XMLDocumentHandler**    fAdvDHList;
01343     unsigned int            fAdvDHListSize;
01344     bool                    fParseInProgress;
01345     XMLScanner*             fScanner;
01346     XMLValidator*           fValidator;
01347 };
01348 
01349 
01350 // ---------------------------------------------------------------------------
01351 //  SAXParser: Getter methods
01352 // ---------------------------------------------------------------------------
01353 inline DocumentHandler* SAXParser::getDocumentHandler()
01354 {
01355     return fDocHandler;
01356 }
01357 
01358 inline const DocumentHandler* SAXParser::getDocumentHandler() const
01359 {
01360     return fDocHandler;
01361 }
01362 
01363 inline EntityResolver* SAXParser::getEntityResolver()
01364 {
01365     return fEntityResolver;
01366 }
01367 
01368 inline const EntityResolver* SAXParser::getEntityResolver() const
01369 {
01370     return fEntityResolver;
01371 }
01372 
01373 inline ErrorHandler* SAXParser::getErrorHandler()
01374 {
01375     return fErrorHandler;
01376 }
01377 
01378 inline const ErrorHandler* SAXParser::getErrorHandler() const
01379 {
01380     return fErrorHandler;
01381 }
01382 
01383 inline const XMLScanner& SAXParser::getScanner() const
01384 {
01385     return *fScanner;
01386 }
01387 
01388 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.