00001 /*
00002 * The Apache Software License, Version 1.1
00003 *
00004 * Copyright (c) 1999-2000 The Apache Software Foundation. All rights
00005 * reserved.
00006 *
00007 * Redistribution and use in source and binary forms, with or without
00008 * modification, are permitted provided that the following conditions
00009 * are met:
00010 *
00011 * 1. Redistributions of source code must retain the above copyright
00012 * notice, this list of conditions and the following disclaimer.
00013 *
00014 * 2. Redistributions in binary form must reproduce the above copyright
00015 * notice, this list of conditions and the following disclaimer in
00016 * the documentation and/or other materials provided with the
00017 * distribution.
00018 *
00019 * 3. The end-user documentation included with the redistribution,
00020 * if any, must include the following acknowledgment:
00021 * "This product includes software developed by the
00022 * Apache Software Foundation (http://www.apache.org/)."
00023 * Alternately, this acknowledgment may appear in the software itself,
00024 * if and wherever such third-party acknowledgments normally appear.
00025 *
00026 * 4. The names "Xerces" and "Apache Software Foundation" must
00027 * not be used to endorse or promote products derived from this
00028 * software without prior written permission. For written
00029 * permission, please contact apache\@apache.org.
00030 *
00031 * 5. Products derived from this software may not be called "Apache",
00032 * nor may "Apache" appear in their name, without prior written
00033 * permission of the Apache Software Foundation.
00034 *
00035 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046 * SUCH DAMAGE.
00047 * ====================================================================
00048 *
00049 * This software consists of voluntary contributions made by many
00050 * individuals on behalf of the Apache Software Foundation, and was
00051 * originally based on software copyright (c) 1999, International
00052 * Business Machines, Inc., http://www.ibm.com . For more information
00053 * on the Apache Software Foundation, please see
00054 * <http://www.apache.org/>.
00055 */
00056
00057 /*
00058 * $Id: DOMParser.hpp,v 1.11 2000/05/02 19:22:38 aruna1 Exp $
00059 *
00060 */
00061
00062 #if !defined(DOMPARSER_HPP)
00063 #define DOMPARSER_HPP
00064
00065
00066 #include <dom/DOM_Document.hpp>
00067 #include <framework/XMLDocumentHandler.hpp>
00068 #include <framework/XMLErrorReporter.hpp>
00069 #include <framework/XMLEntityHandler.hpp>
00070 #include <util/ValueStackOf.hpp>
00071
00072 #include <validators/DTD/DocTypeHandler.hpp>
00073 #include <dom/DOM_DocumentType.hpp>
00074 #include <validators/DTD/DTDElementDecl.hpp>
00075 #include <validators/DTD/DTDValidator.hpp>
00076 #include <util/NameIdPool.hpp>
00077
00078 class EntityResolver;
00079 class ErrorHandler;
00080 class XMLPScanToken;
00081 class XMLScanner;
00082 class XMLValidator;
00083
00084
00095 class DOMParser :
00096
00097 public XMLDocumentHandler
00098 , public XMLErrorReporter
00099 , public XMLEntityHandler
00100 , public DocTypeHandler
00101 {
00102 public :
00103 // -----------------------------------------------------------------------
00104 // Class types
00105 // -----------------------------------------------------------------------
00106 enum ValSchemes
00107 {
00108 Val_Never
00109 , Val_Always
00110 , Val_Auto
00111 };
00112
00113
00114 // -----------------------------------------------------------------------
00115 // Constructors and Detructor
00116 // -----------------------------------------------------------------------
00117
00119 //@{
00129 DOMParser(XMLValidator* const valToAdopt = 0);
00130
00134 ~DOMParser();
00135
00136 //@}
00137
00143 void reset();
00144
00145
00146 // -----------------------------------------------------------------------
00147 // Getter methods
00148 // -----------------------------------------------------------------------
00149
00151 //@{
00152
00162 DOM_Document getDocument();
00163
00171 ErrorHandler* getErrorHandler();
00172
00180 const ErrorHandler* getErrorHandler() const;
00181
00189 EntityResolver* getEntityResolver();
00190
00198 const EntityResolver* getEntityResolver() const;
00199
00207 const XMLScanner& getScanner() const;
00208
00216 const XMLValidator& getValidator() const;
00217
00224 ValSchemes getValidationScheme() const;
00225
00236 bool getDoNamespaces() const;
00237
00250 bool getExitOnFirstFatalError() const;
00251
00262 bool getExpandEntityReferences() const;
00263
00274 bool getIncludeIgnorableWhitespace() const;
00275
00285 bool getToCreateXMLDeclTypeNode() const;
00286
00287
00288 //@}
00289
00290
00291 // -----------------------------------------------------------------------
00292 // Setter methods
00293 // -----------------------------------------------------------------------
00294
00296 //@{
00297
00311 void setErrorHandler(ErrorHandler* const handler);
00312
00328 void setEntityResolver(EntityResolver* const handler);
00329
00348 void setDoNamespaces(const bool newState);
00349
00366 void setExitOnFirstFatalError(const bool newState);
00367
00381 void setExpandEntityReferences(const bool expand);
00382
00401 void setIncludeIgnorableWhitespace(const bool include);
00402
00414 void setValidationScheme(const ValSchemes newScheme);
00415
00426 void setToCreateXMLDeclTypeNode(const bool create);
00427
00428 //@}
00429
00430
00431 // -----------------------------------------------------------------------
00432 // Parsing methods
00433 // -----------------------------------------------------------------------
00434
00436 //@{
00437
00451 void parse(const InputSource& source, const bool reuseValidator = false);
00452
00467 void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00468
00480 void parse(const char* const systemId, const bool reuseValidator = false);
00481
00511 bool parseFirst
00512 (
00513 const XMLCh* const systemId
00514 , XMLPScanToken& toFill
00515 , const bool reuseValidator = false
00516 );
00517
00548 bool parseFirst
00549 (
00550 const char* const systemId
00551 , XMLPScanToken& toFill
00552 , const bool reuseValidator = false
00553 );
00554
00585 bool parseFirst
00586 (
00587 const InputSource& source
00588 , XMLPScanToken& toFill
00589 , const bool reuseValidator = false
00590 );
00591
00614 bool parseNext(XMLPScanToken& token);
00615
00641 void parseReset(XMLPScanToken& token);
00642
00643 //@}
00644
00645
00646
00647 // -----------------------------------------------------------------------
00648 // Implementation of the XMLErrorReporter interface.
00649 // -----------------------------------------------------------------------
00650
00652 //@{
00653
00678 virtual void error
00679 (
00680 const unsigned int errCode
00681 , const XMLCh* const msgDomain
00682 , const XMLErrorReporter::ErrTypes errType
00683 , const XMLCh* const errorText
00684 , const XMLCh* const systemId
00685 , const XMLCh* const publicId
00686 , const unsigned int lineNum
00687 , const unsigned int colNum
00688 );
00689
00698 virtual void resetErrors();
00699 //@}
00700
00701
00702 // -----------------------------------------------------------------------
00703 // Implementation of the XMLEntityHandler interface.
00704 // -----------------------------------------------------------------------
00705
00707 //@{
00708
00721 virtual void endInputSource(const InputSource& inputSource);
00722
00738 virtual bool expandSystemId
00739 (
00740 const XMLCh* const systemId
00741 , XMLBuffer& toFill
00742 );
00743
00752 virtual void resetEntities();
00753
00769 virtual InputSource* resolveEntity
00770 (
00771 const XMLCh* const publicId
00772 , const XMLCh* const systemId
00773 );
00774
00787 virtual void startInputSource(const InputSource& inputSource);
00788
00789 //@}
00790
00791
00792
00793 // -----------------------------------------------------------------------
00794 // Implementation of the XMLDocumentHandler interface.
00795 // -----------------------------------------------------------------------
00796
00798 //@{
00799
00812 virtual void docCharacters
00813 (
00814 const XMLCh* const chars
00815 , const unsigned int length
00816 , const bool cdataSection
00817 );
00818
00827 virtual void docComment
00828 (
00829 const XMLCh* const comment
00830 );
00831
00844 virtual void docPI
00845 (
00846 const XMLCh* const target
00847 , const XMLCh* const data
00848 );
00849
00854 virtual void endDocument();
00855
00869 virtual void endElement
00870 (
00871 const XMLElementDecl& elemDecl
00872 , const unsigned int urlId
00873 , const bool isRoot
00874 );
00875
00884 virtual void endEntityReference
00885 (
00886 const XMLEntityDecl& entDecl
00887 );
00888
00907 virtual void ignorableWhitespace
00908 (
00909 const XMLCh* const chars
00910 , const unsigned int length
00911 , const bool cdataSection
00912 );
00913
00920 virtual void resetDocument();
00921
00926 virtual void startDocument();
00927
00955 virtual void startElement
00956 (
00957 const XMLElementDecl& elemDecl
00958 , const unsigned int urlId
00959 , const XMLCh* const elemPrefix
00960 , const RefVectorOf<XMLAttr>& attrList
00961 , const unsigned int attrCount
00962 , const bool isEmpty
00963 , const bool isRoot
00964 );
00965
00975 virtual void startEntityReference
00976 (
00977 const XMLEntityDecl& entDecl
00978 );
00979
00998 virtual void XMLDecl
00999 (
01000 const XMLCh* const versionStr
01001 , const XMLCh* const encodingStr
01002 , const XMLCh* const standaloneStr
01003 , const XMLCh* const actualEncStr
01004 );
01005 //@}
01006
01007
01009 //@{
01020 bool getDoValidation() const;
01021
01035 void setDoValidation(const bool newState);
01036 //doctypehandler interfaces
01037 virtual void attDef
01038 (
01039 const DTDElementDecl& elemDecl
01040 , const DTDAttDef& attDef
01041 , const bool ignoring
01042 );
01043
01044 virtual void doctypeComment
01045 (
01046 const XMLCh* const comment
01047 );
01048
01049 virtual void doctypeDecl
01050 (
01051 const DTDElementDecl& elemDecl
01052 , const XMLCh* const publicId
01053 , const XMLCh* const systemId
01054 , const bool hasIntSubset
01055 );
01056
01057 virtual void doctypePI
01058 (
01059 const XMLCh* const target
01060 , const XMLCh* const data
01061 );
01062
01063 virtual void doctypeWhitespace
01064 (
01065 const XMLCh* const chars
01066 , const unsigned int length
01067 );
01068
01069 virtual void elementDecl
01070 (
01071 const DTDElementDecl& decl
01072 , const bool isIgnored
01073 );
01074
01075 virtual void endAttList
01076 (
01077 const DTDElementDecl& elemDecl
01078 );
01079
01080 virtual void endIntSubset();
01081
01082 virtual void endExtSubset();
01083
01084 virtual void entityDecl
01085 (
01086 const DTDEntityDecl& entityDecl
01087 , const bool isPEDecl
01088 , const bool isIgnored
01089 );
01090
01091 virtual void resetDocType();
01092
01093 virtual void notationDecl
01094 (
01095 const XMLNotationDecl& notDecl
01096 , const bool isIgnored
01097 );
01098
01099 virtual void startAttList
01100 (
01101 const DTDElementDecl& elemDecl
01102 );
01103
01104 virtual void startIntSubset();
01105
01106 virtual void startExtSubset();
01107
01108 virtual void TextDecl
01109 (
01110 const XMLCh* const versionStr
01111 , const XMLCh* const encodingStr
01112 );
01113
01114
01115 //@}
01116
01117
01118 protected :
01119 // -----------------------------------------------------------------------
01120 // Protected getter methods
01121 // -----------------------------------------------------------------------
01122
01124 //@{
01130 DOM_Node getCurrentNode();
01131
01132 //@}
01133
01134
01135 // -----------------------------------------------------------------------
01136 // Protected setter methods
01137 // -----------------------------------------------------------------------
01138
01140 //@{
01141
01149 void setCurrentNode(DOM_Node toSet);
01150
01157 void setDocument(DOM_Document toSet);
01158 //@}
01159
01160
01161 private :
01162 //local private function to populate the doctype data
01163 virtual void populateDocumentType();
01164
01165 // -----------------------------------------------------------------------
01166 // Private data members
01167 //
01168 // fCurrentNode
01169 // fCurrentParent
01170 // Used to track the current node during nested element events. Since
01171 // the tree must be built from a set of disjoint callbacks, we need
01172 // these to keep up with where we currently are.
01173 //
01174 // fDocument
01175 // The root document object, filled with the document contents.
01176 //
01177 // fEntityResolver
01178 // The installed SAX entity resolver, if any. Null if none.
01179 //
01180 // fErrorHandler
01181 // The installed SAX error handler, if any. Null if none.
01182 //
01183 // fExpandEntityReference
01184 // Indicates whether entity reference nodes should be expanded to
01185 // its constituent text nodes or just created a single (end result)
01186 // text node.
01187 //
01188 // fIncludeIgnorableWhitespace
01189 // Indicates whether ignorable whiltespace should be added to
01190 // the DOM tree for validating parsers.
01191 //
01192 // fNodeStack
01193 // Used to track previous parent nodes during nested element events.
01194 //
01195 // fParseInProgress
01196 // Used to prevent multiple entrance to the parser while its doing
01197 // a parse.
01198 //
01199 // fScanner
01200 // The scanner used for this parser. This is created during the
01201 // constructor.
01202 //
01203 // fValidator
01204 // The validator that is installed. If none is provided, we will
01205 // create and install a DTD validator. We install this on the
01206 // scanner we create, which it will use to do validation. We set
01207 // ourself on it as the error reporter for validity errors.
01208 //
01209 // fWithinElement
01210 // A flag to indicate that the parser is within at least one level
01211 // of element processing.
01212 //
01213 // fDocumentType
01214 // Used to store and update the documentType variable information
01215 // in fDocument
01216 //
01217 // fOldDocTypeHandler
01218 // Used to chain the old documentType node if the user has set it
01219 // from outside
01220 //
01221 // fToCreateXMLDecTypeNode
01222 // A flag to create a DOM_XMLDecl node in the ODM tree if it exists
01223 // This is an extension to xerces implementation
01224 //
01225 // -----------------------------------------------------------------------
01226 DOM_Node fCurrentParent;
01227 DOM_Node fCurrentNode;
01228 DOM_Document fDocument;
01229 EntityResolver* fEntityResolver;
01230 ErrorHandler* fErrorHandler;
01231 bool fExpandEntityReferences;
01232 bool fIncludeIgnorableWhitespace;
01233 ValueStackOf<DOM_Node>* fNodeStack;
01234 bool fParseInProgress;
01235 XMLScanner* fScanner;
01236 XMLValidator* fValidator;
01237 bool fWithinElement;
01238 DocumentTypeImpl* fDocumentType;
01239 DocTypeHandler* fOldDocTypeHandler;
01240 bool fToCreateXMLDeclTypeNode;
01241 };
01242
01243
01244
01245 // ---------------------------------------------------------------------------
01246 // DOMParser: Handlers for the XMLEntityHandler interface
01247 // ---------------------------------------------------------------------------
01248 inline void DOMParser::endInputSource(const InputSource&)
01249 {
01250 // The DOM entity resolver doesn't handle this
01251 }
01252
01253 inline bool DOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
01254 {
01255 // The DOM entity resolver doesn't handle this
01256 return false;
01257 }
01258
01259 inline void DOMParser::resetEntities()
01260 {
01261 // Nothing to do on this one
01262 }
01263
01264 inline void DOMParser::startInputSource(const InputSource&)
01265 {
01266 // The DOM entity resolver doesn't handle this
01267 }
01268
01269
01270 // ---------------------------------------------------------------------------
01271 // DOMParser: Handlers for the XMLDocumentHandler interface
01272 // ---------------------------------------------------------------------------
01273 inline void DOMParser::endDocument()
01274 {
01275 // Not used in DOM at this time
01276 }
01277
01278
01279
01280 // ---------------------------------------------------------------------------
01281 // DOMParser: Getter methods
01282 // ---------------------------------------------------------------------------
01283 inline DOM_Document DOMParser::getDocument()
01284 {
01285 return fDocument;
01286 }
01287
01288 inline ErrorHandler* DOMParser::getErrorHandler()
01289 {
01290 return fErrorHandler;
01291 }
01292
01293 inline const ErrorHandler* DOMParser::getErrorHandler() const
01294 {
01295 return fErrorHandler;
01296 }
01297
01298 inline EntityResolver* DOMParser::getEntityResolver()
01299 {
01300 return fEntityResolver;
01301 }
01302
01303 inline const EntityResolver* DOMParser::getEntityResolver() const
01304 {
01305 return fEntityResolver;
01306 }
01307
01308 inline bool DOMParser::getExpandEntityReferences() const
01309 {
01310 return fExpandEntityReferences;
01311 }
01312
01313 inline bool DOMParser::getIncludeIgnorableWhitespace() const
01314 {
01315 return fIncludeIgnorableWhitespace;
01316 }
01317
01318 inline const XMLScanner& DOMParser::getScanner() const
01319 {
01320 return *fScanner;
01321 }
01322
01323 inline bool DOMParser::getToCreateXMLDeclTypeNode() const
01324 {
01325 return fToCreateXMLDeclTypeNode;
01326 }
01327
01328
01329 // ---------------------------------------------------------------------------
01330 // DOMParser: Setter methods
01331 // ---------------------------------------------------------------------------
01332 inline void DOMParser::setExpandEntityReferences(const bool expand)
01333 {
01334 fExpandEntityReferences = expand;
01335 }
01336
01337 inline void DOMParser::setIncludeIgnorableWhitespace(const bool include)
01338 {
01339 fIncludeIgnorableWhitespace = include;
01340 }
01341
01342 inline void DOMParser::setToCreateXMLDeclTypeNode(const bool create)
01343 {
01344 fToCreateXMLDeclTypeNode = create;
01345 }
01346
01347
01348 // ---------------------------------------------------------------------------
01349 // DOMParser: Protected getter methods
01350 // ---------------------------------------------------------------------------
01351 inline DOM_Node DOMParser::getCurrentNode()
01352 {
01353 return fCurrentNode;
01354 }
01355
01356
01357 // ---------------------------------------------------------------------------
01358 // DOMParser: Protected setter methods
01359 // ---------------------------------------------------------------------------
01360 inline void DOMParser::setCurrentNode(DOM_Node toSet)
01361 {
01362 fCurrentNode = toSet;
01363 }
01364
01365 inline void DOMParser::setDocument(DOM_Document toSet)
01366 {
01367 fDocument = toSet;
01368 }
01369
01370 #endif