00001 /*
00002 * The Apache Software License, Version 1.1
00003 *
00004 * Copyright (c) 1999-2000 The Apache Software Foundation. All rights
00005 * reserved.
00006 *
00007 * Redistribution and use in source and binary forms, with or without
00008 * modification, are permitted provided that the following conditions
00009 * are met:
00010 *
00011 * 1. Redistributions of source code must retain the above copyright
00012 * notice, this list of conditions and the following disclaimer.
00013 *
00014 * 2. Redistributions in binary form must reproduce the above copyright
00015 * notice, this list of conditions and the following disclaimer in
00016 * the documentation and/or other materials provided with the
00017 * distribution.
00018 *
00019 * 3. The end-user documentation included with the redistribution,
00020 * if any, must include the following acknowledgment:
00021 * "This product includes software developed by the
00022 * Apache Software Foundation (http://www.apache.org/)."
00023 * Alternately, this acknowledgment may appear in the software itself,
00024 * if and wherever such third-party acknowledgments normally appear.
00025 *
00026 * 4. The names "Xerces" and "Apache Software Foundation" must
00027 * not be used to endorse or promote products derived from this
00028 * software without prior written permission. For written
00029 * permission, please contact apache\@apache.org.
00030 *
00031 * 5. Products derived from this software may not be called "Apache",
00032 * nor may "Apache" appear in their name, without prior written
00033 * permission of the Apache Software Foundation.
00034 *
00035 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046 * SUCH DAMAGE.
00047 * ====================================================================
00048 *
00049 * This software consists of voluntary contributions made by many
00050 * individuals on behalf of the Apache Software Foundation, and was
00051 * originally based on software copyright (c) 1999, International
00052 * Business Machines, Inc., http://www.ibm.com . For more information
00053 * on the Apache Software Foundation, please see
00054 * <http://www.apache.org/>.
00055 */
00056
00057 /*
00058 * $Log: SAXParser.hpp,v $
00059 * Revision 1.8 2000/04/12 22:58:30 roddey
00060 * Added support for 'auto validate' mode.
00061 *
00062 * Revision 1.7 2000/03/03 01:29:34 roddey
00063 * Added a scanReset()/parseReset() method to the scanner and
00064 * parsers, to allow for reset after early exit from a progressive parse.
00065 * Added calls to new Terminate() call to all of the samples. Improved
00066 * documentation in SAX and DOM parsers.
00067 *
00068 * Revision 1.6 2000/02/17 03:54:27 rahulj
00069 * Added some new getters to query the parser state and
00070 * clarified the documentation.
00071 *
00072 * Revision 1.5 2000/02/16 03:42:58 rahulj
00073 * Finished documenting the SAX Driver implementation.
00074 *
00075 * Revision 1.4 2000/02/15 04:47:37 rahulj
00076 * Documenting the SAXParser framework. Not done yet.
00077 *
00078 * Revision 1.3 2000/02/06 07:47:56 rahulj
00079 * Year 2K copyright swat.
00080 *
00081 * Revision 1.2 1999/12/15 19:57:48 roddey
00082 * Got rid of redundant 'const' on boolean return value. Some compilers choke
00083 * on this and its useless.
00084 *
00085 * Revision 1.1.1.1 1999/11/09 01:07:51 twl
00086 * Initial checkin
00087 *
00088 * Revision 1.6 1999/11/08 20:44:54 rahul
00089 * Swat for adding in Product name and CVS comment log variable.
00090 *
00091 */
00092
00093 #if !defined(SAXPARSER_HPP)
00094 #define SAXPARSER_HPP
00095
00096 #include <sax/Parser.hpp>
00097 #include <internal/VecAttrListImpl.hpp>
00098 #include <framework/XMLDocumentHandler.hpp>
00099 #include <framework/XMLElementDecl.hpp>
00100 #include <framework/XMLEntityHandler.hpp>
00101 #include <framework/XMLErrorReporter.hpp>
00102 #include <validators/DTD/DocTypeHandler.hpp>
00103
00104 class DocumentHandler;
00105 class EntityResolver;
00106 class XMLPScanToken;
00107 class XMLScanner;
00108 class XMLValidator;
00109
00110
00120
00121 class SAXParser :
00122
00123 public Parser
00124 , public XMLDocumentHandler
00125 , public XMLErrorReporter
00126 , public XMLEntityHandler
00127 , public DocTypeHandler
00128 {
00129 public :
00130 // -----------------------------------------------------------------------
00131 // Class types
00132 // -----------------------------------------------------------------------
00133 enum ValSchemes
00134 {
00135 Val_Never
00136 , Val_Always
00137 , Val_Auto
00138 };
00139
00140
00141 // -----------------------------------------------------------------------
00142 // Constructors and Destructor
00143 // -----------------------------------------------------------------------
00144
00146 //@{
00152 SAXParser(XMLValidator* const valToAdopt = 0);
00153
00157 ~SAXParser();
00158 //@}
00159
00160
00162 //@{
00169 DocumentHandler* getDocumentHandler();
00170
00177 const DocumentHandler* getDocumentHandler() const;
00178
00185 EntityResolver* getEntityResolver();
00186
00193 const EntityResolver* getEntityResolver() const;
00194
00201 ErrorHandler* getErrorHandler();
00202
00209 const ErrorHandler* getErrorHandler() const;
00210
00217 const XMLScanner& getScanner() const;
00218
00225 const XMLValidator& getValidator() const;
00226
00233 ValSchemes getValidationScheme() const;
00234
00244 bool getDoNamespaces() const;
00245
00255 bool getExitOnFirstFatalError() const;
00256 //@}
00257
00258
00259 // -----------------------------------------------------------------------
00260 // Setter methods
00261 // -----------------------------------------------------------------------
00262
00264 //@{
00282 void setDoNamespaces(const bool newState);
00283
00295 void setValidationScheme(const ValSchemes newScheme);
00296
00312 void setExitOnFirstFatalError(const bool newState);
00313 //@}
00314
00315
00316 // -----------------------------------------------------------------------
00317 // Advanced document handler list maintenance methods
00318 // -----------------------------------------------------------------------
00319
00321 //@{
00335 void installAdvDocHandler(XMLDocumentHandler* const toInstall);
00336
00346 bool removeAdvDocHandler(XMLDocumentHandler* const toRemove);
00347 //@}
00348
00349
00350 // -----------------------------------------------------------------------
00351 // Implementation of the SAXParser interface
00352 // -----------------------------------------------------------------------
00353
00355 //@{
00368 virtual void parse(const InputSource& source, const bool reuseValidator = false);
00369
00382 virtual void parse(const XMLCh* const systemId, const bool reuseValidator = false);
00383
00394 virtual void parse(const char* const systemId, const bool reuseValidator = false);
00395
00406 virtual void setDocumentHandler(DocumentHandler* const handler);
00407
00417 virtual void setDTDHandler(DTDHandler* const handler);
00418
00429 virtual void setErrorHandler(ErrorHandler* const handler);
00430
00442 virtual void setEntityResolver(EntityResolver* const resolver);
00443 //@}
00444
00445
00446 // -----------------------------------------------------------------------
00447 // Progressive scan methods
00448 // -----------------------------------------------------------------------
00449
00451 //@{
00452
00484 bool parseFirst
00485 (
00486 const XMLCh* const systemId
00487 , XMLPScanToken& toFill
00488 , const bool reuseValidator = false
00489 );
00490
00521 bool parseFirst
00522 (
00523 const char* const systemId
00524 , XMLPScanToken& toFill
00525 , const bool reuseValidator = false
00526 );
00527
00558 bool parseFirst
00559 (
00560 const InputSource& source
00561 , XMLPScanToken& toFill
00562 , const bool reuseValidator = false
00563 );
00564
00589 bool parseNext(XMLPScanToken& token);
00590
00612 void parseReset(XMLPScanToken& token);
00613
00614 //@}
00615
00616
00617
00618 // -----------------------------------------------------------------------
00619 // Implementation of the DocTypeHandler Interface
00620 // -----------------------------------------------------------------------
00621
00623 //@{
00638 virtual void attDef
00639 (
00640 const DTDElementDecl& elemDecl
00641 , const DTDAttDef& attDef
00642 , const bool ignoring
00643 );
00644
00654 virtual void doctypeComment
00655 (
00656 const XMLCh* const comment
00657 );
00658
00675 virtual void doctypeDecl
00676 (
00677 const DTDElementDecl& elemDecl
00678 , const XMLCh* const publicId
00679 , const XMLCh* const systemId
00680 , const bool hasIntSubset
00681 );
00682
00696 virtual void doctypePI
00697 (
00698 const XMLCh* const target
00699 , const XMLCh* const data
00700 );
00701
00713 virtual void doctypeWhitespace
00714 (
00715 const XMLCh* const chars
00716 , const unsigned int length
00717 );
00718
00731 virtual void elementDecl
00732 (
00733 const DTDElementDecl& decl
00734 , const bool isIgnored
00735 );
00736
00747 virtual void endAttList
00748 (
00749 const DTDElementDecl& elemDecl
00750 );
00751
00758 virtual void endIntSubset();
00759
00766 virtual void endExtSubset();
00767
00782 virtual void entityDecl
00783 (
00784 const DTDEntityDecl& entityDecl
00785 , const bool isPEDecl
00786 , const bool isIgnored
00787 );
00788
00793 virtual void resetDocType();
00794
00807 virtual void notationDecl
00808 (
00809 const XMLNotationDecl& notDecl
00810 , const bool isIgnored
00811 );
00812
00823 virtual void startAttList
00824 (
00825 const DTDElementDecl& elemDecl
00826 );
00827
00834 virtual void startIntSubset();
00835
00842 virtual void startExtSubset();
00843
00856 virtual void TextDecl
00857 (
00858 const XMLCh* const versionStr
00859 , const XMLCh* const encodingStr
00860 );
00861 //@}
00862
00863
00864 // -----------------------------------------------------------------------
00865 // Implementation of the XMLDocumentHandler interface
00866 // -----------------------------------------------------------------------
00867
00869 //@{
00885 virtual void docCharacters
00886 (
00887 const XMLCh* const chars
00888 , const unsigned int length
00889 , const bool cdataSection
00890 );
00891
00901 virtual void docComment
00902 (
00903 const XMLCh* const comment
00904 );
00905
00925 virtual void docPI
00926 (
00927 const XMLCh* const target
00928 , const XMLCh* const data
00929 );
00930
00942 virtual void endDocument();
00943
00960 virtual void endElement
00961 (
00962 const XMLElementDecl& elemDecl
00963 , const unsigned int urlId
00964 , const bool isRoot
00965 );
00966
00977 virtual void endEntityReference
00978 (
00979 const XMLEntityDecl& entDecl
00980 );
00981
01001 virtual void ignorableWhitespace
01002 (
01003 const XMLCh* const chars
01004 , const unsigned int length
01005 , const bool cdataSection
01006 );
01007
01012 virtual void resetDocument();
01013
01024 virtual void startDocument();
01025
01052 virtual void startElement
01053 (
01054 const XMLElementDecl& elemDecl
01055 , const unsigned int urlId
01056 , const XMLCh* const elemPrefix
01057 , const RefVectorOf<XMLAttr>& attrList
01058 , const unsigned int attrCount
01059 , const bool isEmpty
01060 , const bool isRoot
01061 );
01062
01072 virtual void startEntityReference
01073 (
01074 const XMLEntityDecl& entDecl
01075 );
01076
01094 virtual void XMLDecl
01095 (
01096 const XMLCh* const versionStr
01097 , const XMLCh* const encodingStr
01098 , const XMLCh* const standaloneStr
01099 , const XMLCh* const actualEncodingStr
01100 );
01101 //@}
01102
01103
01104 // -----------------------------------------------------------------------
01105 // Implementation of the XMLErrorReporter interface
01106 // -----------------------------------------------------------------------
01107
01109 //@{
01133 virtual void error
01134 (
01135 const unsigned int errCode
01136 , const XMLCh* const msgDomain
01137 , const XMLErrorReporter::ErrTypes errType
01138 , const XMLCh* const errorText
01139 , const XMLCh* const systemId
01140 , const XMLCh* const publicId
01141 , const unsigned int lineNum
01142 , const unsigned int colNum
01143 );
01144
01153 virtual void resetErrors();
01154 //@}
01155
01156
01157 // -----------------------------------------------------------------------
01158 // Implementation of the XMLEntityHandler interface
01159 // -----------------------------------------------------------------------
01160
01162 //@{
01174 virtual void endInputSource(const InputSource& inputSource);
01175
01190 virtual bool expandSystemId
01191 (
01192 const XMLCh* const systemId
01193 , XMLBuffer& toFill
01194 );
01195
01203 virtual void resetEntities();
01204
01219 virtual InputSource* resolveEntity
01220 (
01221 const XMLCh* const publicId
01222 , const XMLCh* const systemId
01223 );
01224
01236 virtual void startInputSource(const InputSource& inputSource);
01237 //@}
01238
01239
01241 //@{
01252 bool getDoValidation() const;
01253
01267 void setDoValidation(const bool newState);
01268 //@}
01269
01270
01271 private :
01272 // -----------------------------------------------------------------------
01273 // Unimplemented constructors and operators
01274 // -----------------------------------------------------------------------
01275 SAXParser(const SAXParser&);
01276 void operator=(const SAXParser&);
01277
01278
01279 // -----------------------------------------------------------------------
01280 // Private data members
01281 //
01282 // fAttrList
01283 // A temporary implementation of the basic SAX attribute list
01284 // interface. We use this one over and over on each startElement
01285 // event to allow SAX-like access to the element attributes.
01286 //
01287 // fDocHandler
01288 // The installed SAX doc handler, if any. Null if none.
01289 //
01290 // fDTDHandler
01291 // The installed SAX DTD handler, if any. Null if none.
01292 //
01293 // fElemDepth
01294 // This is used to track the element nesting depth, so that we can
01295 // know when we are inside content. This is so we can ignore char
01296 // data outside of content.
01297 //
01298 // fEntityResolver
01299 // The installed SAX entity handler, if any. Null if none.
01300 //
01301 // fErrorHandler
01302 // The installed SAX error handler, if any. Null if none.
01303 //
01304 // fAdvDHCount
01305 // fAdvDHList
01306 // fAdvDHListSize
01307 // This is an array of pointers to XMLDocumentHandlers, which is
01308 // how we see installed advanced document handlers. There will
01309 // usually not be very many at all, so a simple array is used
01310 // instead of a collection, for performance. It will grow if needed,
01311 // but that is unlikely.
01312 //
01313 // The count is how many handlers are currently installed. The size
01314 // is how big the array itself is (for expansion purposes.) When
01315 // count == size, is time to expand.
01316 //
01317 // fParseInProgress
01318 // This flag is set once a parse starts. It is used to prevent
01319 // multiple entrance or reentrance of the parser.
01320 //
01321 // fScanner
01322 // The scanner being used by this parser. It is created internally
01323 // during construction.
01324 //
01325 // fValidator
01326 // The validator that is installed. If none is provided, we will
01327 // create and install a DTD validator. We install this on the
01328 // scanner we create, which it will use to do validation. We set
01329 // ourself on it as the error reporter for validity errors.
01330 // -----------------------------------------------------------------------
01331 VecAttrListImpl fAttrList;
01332 DocumentHandler* fDocHandler;
01333 DTDHandler* fDTDHandler;
01334 unsigned int fElemDepth;
01335 EntityResolver* fEntityResolver;
01336 ErrorHandler* fErrorHandler;
01337 unsigned int fAdvDHCount;
01338 XMLDocumentHandler** fAdvDHList;
01339 unsigned int fAdvDHListSize;
01340 bool fParseInProgress;
01341 XMLScanner* fScanner;
01342 XMLValidator* fValidator;
01343 };
01344
01345
01346 // ---------------------------------------------------------------------------
01347 // SAXParser: Getter methods
01348 // ---------------------------------------------------------------------------
01349 inline DocumentHandler* SAXParser::getDocumentHandler()
01350 {
01351 return fDocHandler;
01352 }
01353
01354 inline const DocumentHandler* SAXParser::getDocumentHandler() const
01355 {
01356 return fDocHandler;
01357 }
01358
01359 inline EntityResolver* SAXParser::getEntityResolver()
01360 {
01361 return fEntityResolver;
01362 }
01363
01364 inline const EntityResolver* SAXParser::getEntityResolver() const
01365 {
01366 return fEntityResolver;
01367 }
01368
01369 inline ErrorHandler* SAXParser::getErrorHandler()
01370 {
01371 return fErrorHandler;
01372 }
01373
01374 inline const ErrorHandler* SAXParser::getErrorHandler() const
01375 {
01376 return fErrorHandler;
01377 }
01378
01379 inline const XMLScanner& SAXParser::getScanner() const
01380 {
01381 return *fScanner;
01382 }
01383
01384 #endif