http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Installation
Build

API Docs
Samples
Programming
Migration
FAQs

Releases
Feedback

PDF Document

Download
Dev Snapshots
CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

DOMParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Id: DOMParser.hpp,v 1.13 2000/09/12 23:05:30 aruna1 Exp $
00059  *
00060  */
00061 
00062 #if !defined(DOMPARSER_HPP)
00063 #define DOMPARSER_HPP
00064 
00065 
00066 #include <dom/DOM_Document.hpp>
00067 #include <framework/XMLDocumentHandler.hpp>
00068 #include <framework/XMLErrorReporter.hpp>
00069 #include <framework/XMLEntityHandler.hpp>
00070 #include <util/ValueStackOf.hpp>
00071 
00072 #include <validators/DTD/DocTypeHandler.hpp>
00073 #include <dom/DOM_DocumentType.hpp>
00074 #include <validators/DTD/DTDElementDecl.hpp>
00075 #include <validators/DTD/DTDValidator.hpp>
00076 
00077 class EntityResolver;
00078 class ErrorHandler;
00079 class XMLPScanToken;
00080 class XMLScanner;
00081 class XMLValidator;
00082 
00083 
00094 class  DOMParser :
00095 
00096     public XMLDocumentHandler
00097     , public XMLErrorReporter
00098     , public XMLEntityHandler
00099     , public DocTypeHandler
00100 {
00101 public :
00102     // -----------------------------------------------------------------------
00103     //  Class types
00104     // -----------------------------------------------------------------------
00105     enum ValSchemes
00106     {
00107         Val_Never
00108         , Val_Always
00109         , Val_Auto
00110     };
00111 
00112 
00113     // -----------------------------------------------------------------------
00114     //  Constructors and Detructor
00115     // -----------------------------------------------------------------------
00116 
00118     //@{
00128     DOMParser(XMLValidator* const valToAdopt = 0);
00129 
00133     ~DOMParser();
00134 
00135     //@}
00136 
00142     void reset();
00143 
00144 
00145     // -----------------------------------------------------------------------
00146     //  Getter methods
00147     // -----------------------------------------------------------------------
00148 
00150     //@{
00151 
00161     DOM_Document getDocument();
00162 
00170     ErrorHandler* getErrorHandler();
00171 
00179     const ErrorHandler* getErrorHandler() const;
00180 
00188     EntityResolver* getEntityResolver();
00189 
00197     const EntityResolver* getEntityResolver() const;
00198 
00206     const XMLScanner& getScanner() const;
00207 
00215     const XMLValidator& getValidator() const;
00216 
00223     ValSchemes getValidationScheme() const;
00224 
00235     bool getDoNamespaces() const;
00236 
00249     bool getExitOnFirstFatalError() const;
00250 
00262     bool getExpandEntityReferences() const;
00263 
00281     bool  getCreateEntityReferenceNodes()const;
00282 
00293     bool getIncludeIgnorableWhitespace() const;
00294 
00304     bool getToCreateXMLDeclTypeNode() const;
00305 
00306 
00307     //@}
00308 
00309 
00310     // -----------------------------------------------------------------------
00311     //  Setter methods
00312     // -----------------------------------------------------------------------
00313 
00315     //@{
00316 
00330     void setErrorHandler(ErrorHandler* const handler);
00331 
00347     void setEntityResolver(EntityResolver* const handler);
00348 
00367     void setDoNamespaces(const bool newState);
00368 
00385     void setExitOnFirstFatalError(const bool newState);
00386 
00401     void setExpandEntityReferences(const bool expand);
00402 
00419     void setCreateEntityReferenceNodes(const bool create);
00420 
00439     void setIncludeIgnorableWhitespace(const bool include);
00440 
00452     void setValidationScheme(const ValSchemes newScheme);
00453 
00464     void setToCreateXMLDeclTypeNode(const bool create);
00465 
00466     //@}
00467 
00468 
00469     // -----------------------------------------------------------------------
00470     //  Parsing methods
00471     // -----------------------------------------------------------------------
00472 
00474     //@{
00475 
00489     void parse(const InputSource& source, const bool reuseValidator = false);
00490 
00505     void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00506 
00518     void parse(const char* const systemId, const bool reuseValidator = false);
00519 
00549     bool parseFirst
00550     (
00551         const   XMLCh* const    systemId
00552         ,       XMLPScanToken&  toFill
00553         , const bool            reuseValidator = false
00554     );
00555 
00586     bool parseFirst
00587     (
00588         const   char* const     systemId
00589         ,       XMLPScanToken&  toFill
00590         , const bool            reuseValidator = false
00591     );
00592 
00623     bool parseFirst
00624     (
00625         const   InputSource&    source
00626         ,       XMLPScanToken&  toFill
00627         , const bool            reuseValidator = false
00628     );
00629 
00652     bool parseNext(XMLPScanToken& token);
00653 
00679     void parseReset(XMLPScanToken& token);
00680 
00681     //@}
00682 
00683 
00684 
00685     // -----------------------------------------------------------------------
00686     //  Implementation of the XMLErrorReporter interface.
00687     // -----------------------------------------------------------------------
00688 
00690     //@{
00691 
00716     virtual void error
00717     (
00718         const   unsigned int                errCode
00719         , const XMLCh* const                msgDomain
00720         , const XMLErrorReporter::ErrTypes  errType
00721         , const XMLCh* const                errorText
00722         , const XMLCh* const                systemId
00723         , const XMLCh* const                publicId
00724         , const unsigned int                lineNum
00725         , const unsigned int                colNum
00726     );
00727 
00736     virtual void resetErrors();
00737     //@}
00738 
00739 
00740     // -----------------------------------------------------------------------
00741     //  Implementation of the XMLEntityHandler interface.
00742     // -----------------------------------------------------------------------
00743 
00745     //@{
00746 
00759     virtual void endInputSource(const InputSource& inputSource);
00760 
00776     virtual bool expandSystemId
00777     (
00778         const   XMLCh* const    systemId
00779         ,       XMLBuffer&      toFill
00780     );
00781 
00790     virtual void resetEntities();
00791 
00807     virtual InputSource* resolveEntity
00808     (
00809         const   XMLCh* const    publicId
00810         , const XMLCh* const    systemId
00811     );
00812 
00825     virtual void startInputSource(const InputSource& inputSource);
00826 
00827     //@}
00828 
00829 
00830 
00831     // -----------------------------------------------------------------------
00832     //  Implementation of the XMLDocumentHandler interface.
00833     // -----------------------------------------------------------------------
00834 
00836     //@{
00837 
00850     virtual void docCharacters
00851     (
00852         const   XMLCh* const    chars
00853         , const unsigned int    length
00854         , const bool            cdataSection
00855     );
00856 
00865     virtual void docComment
00866     (
00867         const   XMLCh* const    comment
00868     );
00869 
00882     virtual void docPI
00883     (
00884         const   XMLCh* const    target
00885         , const XMLCh* const    data
00886     );
00887 
00892     virtual void endDocument();
00893 
00907     virtual void endElement
00908     (
00909         const   XMLElementDecl& elemDecl
00910         , const unsigned int    urlId
00911         , const bool            isRoot
00912     );
00913 
00922     virtual void endEntityReference
00923     (
00924         const   XMLEntityDecl&  entDecl
00925     );
00926 
00945     virtual void ignorableWhitespace
00946     (
00947         const   XMLCh* const    chars
00948         , const unsigned int    length
00949         , const bool            cdataSection
00950     );
00951 
00958     virtual void resetDocument();
00959 
00964     virtual void startDocument();
00965 
00993     virtual void startElement
00994     (
00995         const   XMLElementDecl&         elemDecl
00996         , const unsigned int            urlId
00997         , const XMLCh* const            elemPrefix
00998         , const RefVectorOf<XMLAttr>&   attrList
00999         , const unsigned int            attrCount
01000         , const bool                    isEmpty
01001         , const bool                    isRoot
01002     );
01003 
01013     virtual void startEntityReference
01014     (
01015         const   XMLEntityDecl&  entDecl
01016     );
01017 
01036     virtual void XMLDecl
01037     (
01038         const   XMLCh* const    versionStr
01039         , const XMLCh* const    encodingStr
01040         , const XMLCh* const    standaloneStr
01041         , const XMLCh* const    actualEncStr
01042     );
01043     //@}
01044 
01045 
01047     //@{
01058     bool getDoValidation() const;
01059 
01073     void setDoValidation(const bool newState);
01074     //doctypehandler interfaces
01075     virtual void attDef
01076     (
01077         const   DTDElementDecl&     elemDecl
01078         , const DTDAttDef&          attDef
01079         , const bool                ignoring
01080     );
01081 
01082     virtual void doctypeComment
01083     (
01084         const   XMLCh* const    comment
01085     );
01086 
01087     virtual void doctypeDecl
01088     (
01089         const   DTDElementDecl& elemDecl
01090         , const XMLCh* const    publicId
01091         , const XMLCh* const    systemId
01092         , const bool            hasIntSubset
01093     );
01094 
01095     virtual void doctypePI
01096     (
01097         const   XMLCh* const    target
01098         , const XMLCh* const    data
01099     );
01100 
01101     virtual void doctypeWhitespace
01102     (
01103         const   XMLCh* const    chars
01104         , const unsigned int    length
01105     );
01106 
01107     virtual void elementDecl
01108     (
01109         const   DTDElementDecl& decl
01110         , const bool            isIgnored
01111     );
01112 
01113     virtual void endAttList
01114     (
01115         const   DTDElementDecl& elemDecl
01116     );
01117 
01118     virtual void endIntSubset();
01119 
01120     virtual void endExtSubset();
01121 
01122     virtual void entityDecl
01123     (
01124         const   DTDEntityDecl&  entityDecl
01125         , const bool            isPEDecl
01126         , const bool            isIgnored
01127     );
01128 
01129     virtual void resetDocType();
01130 
01131     virtual void notationDecl
01132     (
01133         const   XMLNotationDecl&    notDecl
01134         , const bool                isIgnored
01135     );
01136 
01137     virtual void startAttList
01138     (
01139         const   DTDElementDecl& elemDecl
01140     );
01141 
01142     virtual void startIntSubset();
01143 
01144     virtual void startExtSubset();
01145 
01146     virtual void TextDecl
01147     (
01148         const   XMLCh* const    versionStr
01149         , const XMLCh* const    encodingStr
01150     );
01151 
01152 
01153     //@}
01154 
01155 
01156 protected :
01157     // -----------------------------------------------------------------------
01158     //  Protected getter methods
01159     // -----------------------------------------------------------------------
01160 
01162     //@{
01168     DOM_Node getCurrentNode();
01169 
01170     //@}
01171 
01172 
01173     // -----------------------------------------------------------------------
01174     //  Protected setter methods
01175     // -----------------------------------------------------------------------
01176 
01178     //@{
01179 
01187     void setCurrentNode(DOM_Node toSet);
01188 
01195     void setDocument(DOM_Document toSet);
01196     //@}
01197 
01198 
01199 private :
01200     //local private function to populate the doctype data
01201     virtual void populateDocumentType();
01202 
01203     // -----------------------------------------------------------------------
01204     //  Private data members
01205     //
01206     //  fCurrentNode
01207     //  fCurrentParent
01208     //      Used to track the current node during nested element events. Since
01209     //      the tree must be built from a set of disjoint callbacks, we need
01210     //      these to keep up with where we currently are.
01211     //
01212     //  fDocument
01213     //      The root document object, filled with the document contents.
01214     //
01215     //  fEntityResolver
01216     //      The installed SAX entity resolver, if any. Null if none.
01217     //
01218     //  fErrorHandler
01219     //      The installed SAX error handler, if any. Null if none.
01220     //
01221     //  fCreateEntityReferenceNode
01222     //      Indicates whether entity reference nodes should be created.
01223     //
01224     //  fIncludeIgnorableWhitespace
01225     //      Indicates whether ignorable whiltespace should be added to
01226     //      the DOM tree for validating parsers.
01227     //
01228     //  fNodeStack
01229     //      Used to track previous parent nodes during nested element events.
01230     //
01231     //  fParseInProgress
01232     //      Used to prevent multiple entrance to the parser while its doing
01233     //      a parse.
01234     //
01235     //  fScanner
01236     //      The scanner used for this parser. This is created during the
01237     //      constructor.
01238     //
01239     //  fValidator
01240     //      The validator that is installed. If none is provided, we will
01241     //      create and install a DTD validator. We install this on the
01242     //      scanner we create, which it will use to do validation. We set
01243     //      ourself on it as the error reporter for validity errors.
01244     //
01245     //  fWithinElement
01246     //      A flag to indicate that the parser is within at least one level
01247     //      of element processing.
01248     //
01249     //  fDocumentType
01250     //      Used to store and update the documentType variable information
01251     //      in fDocument
01252     //
01253     //  fOldDocTypeHandler
01254     //      Used to chain the old documentType node if the user has set it
01255     //      from outside
01256     //
01257     //  fToCreateXMLDecTypeNode
01258     //      A flag to create a DOM_XMLDecl node in the ODM tree if it exists
01259     //      This is an extension to xerces implementation
01260     //
01261     // -----------------------------------------------------------------------
01262     DOM_Node                fCurrentParent;
01263     DOM_Node                fCurrentNode;
01264     DOM_Document            fDocument;
01265     EntityResolver*         fEntityResolver;
01266     ErrorHandler*           fErrorHandler;
01267     bool                    fCreateEntityReferenceNodes;
01268     bool                    fIncludeIgnorableWhitespace;
01269     ValueStackOf<DOM_Node>* fNodeStack;
01270     bool                    fParseInProgress;
01271     XMLScanner*             fScanner;
01272     XMLValidator*           fValidator;
01273     bool                    fWithinElement;
01274     DocumentTypeImpl*       fDocumentType;
01275     DocTypeHandler*         fOldDocTypeHandler;
01276     bool                    fToCreateXMLDeclTypeNode;
01277 };
01278 
01279 
01280 
01281 // ---------------------------------------------------------------------------
01282 //  DOMParser: Handlers for the XMLEntityHandler interface
01283 // ---------------------------------------------------------------------------
01284 inline void DOMParser::endInputSource(const InputSource&)
01285 {
01286     // The DOM entity resolver doesn't handle this
01287 }
01288 
01289 inline bool DOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
01290 {
01291     // The DOM entity resolver doesn't handle this
01292     return false;
01293 }
01294 
01295 inline void DOMParser::resetEntities()
01296 {
01297     // Nothing to do on this one
01298 }
01299 
01300 inline void DOMParser::startInputSource(const InputSource&)
01301 {
01302     // The DOM entity resolver doesn't handle this
01303 }
01304 
01305 
01306 // ---------------------------------------------------------------------------
01307 //  DOMParser: Handlers for the XMLDocumentHandler interface
01308 // ---------------------------------------------------------------------------
01309 inline void DOMParser::endDocument()
01310 {
01311     // Not used in DOM at this time
01312 }
01313 
01314 
01315 
01316 // ---------------------------------------------------------------------------
01317 //  DOMParser: Getter methods
01318 // ---------------------------------------------------------------------------
01319 inline DOM_Document DOMParser::getDocument()
01320 {
01321     return fDocument;
01322 }
01323 
01324 inline ErrorHandler* DOMParser::getErrorHandler()
01325 {
01326     return fErrorHandler;
01327 }
01328 
01329 inline const ErrorHandler* DOMParser::getErrorHandler() const
01330 {
01331     return fErrorHandler;
01332 }
01333 
01334 inline EntityResolver* DOMParser::getEntityResolver()
01335 {
01336     return fEntityResolver;
01337 }
01338 
01339 inline const EntityResolver* DOMParser::getEntityResolver() const
01340 {
01341     return fEntityResolver;
01342 }
01343 
01344 inline bool DOMParser::getExpandEntityReferences() const
01345 {
01346     return fCreateEntityReferenceNodes;
01347 }
01348 inline bool DOMParser::getCreateEntityReferenceNodes() const
01349 {
01350     return fCreateEntityReferenceNodes;
01351 }
01352 
01353 inline bool DOMParser::getIncludeIgnorableWhitespace() const
01354 {
01355     return fIncludeIgnorableWhitespace;
01356 }
01357 
01358 inline const XMLScanner& DOMParser::getScanner() const
01359 {
01360     return *fScanner;
01361 }
01362 
01363 inline bool DOMParser::getToCreateXMLDeclTypeNode() const
01364 {
01365     return fToCreateXMLDeclTypeNode;
01366 }
01367 
01368 
01369 // ---------------------------------------------------------------------------
01370 //  DOMParser: Setter methods
01371 // ---------------------------------------------------------------------------
01372 inline void DOMParser::setExpandEntityReferences(const bool expand)
01373 {
01374     fCreateEntityReferenceNodes = expand;
01375 }
01376 
01377 inline void DOMParser::setCreateEntityReferenceNodes(const bool create)
01378 {
01379     fCreateEntityReferenceNodes = create;
01380 }
01381 
01382 inline void DOMParser::setIncludeIgnorableWhitespace(const bool include)
01383 {
01384     fIncludeIgnorableWhitespace = include;
01385 }
01386 
01387 inline void DOMParser::setToCreateXMLDeclTypeNode(const bool create)
01388 {
01389     fToCreateXMLDeclTypeNode = create;
01390 }
01391 
01392 
01393 // ---------------------------------------------------------------------------
01394 //  DOMParser: Protected getter methods
01395 // ---------------------------------------------------------------------------
01396 inline DOM_Node DOMParser::getCurrentNode()
01397 {
01398     return fCurrentNode;
01399 }
01400 
01401 
01402 // ---------------------------------------------------------------------------
01403 //  DOMParser: Protected setter methods
01404 // ---------------------------------------------------------------------------
01405 inline void DOMParser::setCurrentNode(DOM_Node toSet)
01406 {
01407     fCurrentNode = toSet;
01408 }
01409 
01410 inline void DOMParser::setDocument(DOM_Document toSet)
01411 {
01412     fDocument = toSet;
01413 }
01414 
01415 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.