View Javadoc

1   /*
2    * Copyright 2001-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.mirae.j2me.xml;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.InputStreamReader;
22  import java.io.Reader;
23  import java.util.Hashtable;
24  import java.util.Stack;
25  
26  import javax.microedition.io.Connector;
27  
28  import org.apache.mirae.j2me.xml.sax.AttributesImpl;
29  import org.apache.mirae.j2me.xml.sax.LocatorImpl;
30  import org.xml.sax.InputSource;
31  import org.xml.sax.SAXException;
32  import org.xml.sax.SAXParseException;
33  import org.xml.sax.helpers.DefaultHandler;
34  
35  /***
36   * This is a non-validating parser implementation for J2ME JAXP
37   * @author Ias (iasandcb@tmax.co.kr)
38   *  
39   */
40  public class NonValidatingParser {
41  
42      protected Hashtable attributeDefaultValues;
43      protected Hashtable attributeTypes;
44      private Hashtable elementTypes;
45      private boolean endOfDocument;
46      private EntityResolver entityResolver;
47  
48      private DefaultHandler handler;
49      private LocatorImpl locator;
50      private Hashtable namespaces;
51  
52      private EntityResolver parameterEntityResolver;
53      private Stack prefixes;
54      private PushBackReader reader;
55  
56      private String rootElement;
57      private SAXParserImpl saxParser;
58  
59      /***
60  	 * Constructor with source, handler and SAXParserImpl
61  	 * 
62  	 * @param source
63  	 * @param handler
64  	 * @param saxParser
65  	 * @throws IOException
66  	 */
67      public NonValidatingParser(InputSource source, DefaultHandler handler, SAXParserImpl saxParser) throws IOException {
68      	
69      	Reader sourceReader = source.getCharacterStream();
70          if (sourceReader == null) {
71              InputStream ips = source.getByteStream();
72              if (ips == null) {
73                  String systemId = source.getSystemId();      
74                  
75                  ips = Connector.openInputStream(systemId);                
76                  
77                  if (ips == null) {
78                      throw new IOException();
79                  }
80              }
81              sourceReader = new InputStreamReader(ips);
82          }
83  
84          locator = new LocatorImpl(source.getPublicId(), source.getSystemId(), 1, 1);
85          reader = new PushBackReader(sourceReader, locator);
86          namespaces = new Hashtable();
87          prefixes = new Stack();
88          this.handler = handler;
89          this.saxParser = saxParser;
90  
91          attributeDefaultValues = new Hashtable();
92          attributeTypes = new Hashtable();
93  
94          entityResolver = new EntityResolver();
95          parameterEntityResolver = new EntityResolver();
96  
97          elementTypes = new Hashtable();
98      }
99  
100     private boolean checkLiteral(String literal) throws Exception {
101         for (int i = 0; i < literal.length(); i++) {
102             if (reader.read() != literal.charAt(i)) {
103                 return false;
104             }
105         }
106 
107         return true;
108     }
109 
110     private String closeCdataTag() throws Exception {
111         boolean closed = false;
112         StringBuffer buffer = new StringBuffer();
113         while (!closed) {
114             char ch = reader.read();
115 
116             if (ch == ']') {
117                 char ch2 = reader.read();
118 
119                 if (ch2 == ']') {
120                     char ch3 = reader.read();
121 
122                     if (ch3 == '>') {
123                         closed = true;
124                     }
125                     else {
126                         buffer.append(ch);
127                         buffer.append(ch2);
128                         buffer.append(ch3);
129                     }
130                 }
131                 else {
132                     buffer.append(ch);
133                     buffer.append(ch2);
134                 }
135             }
136             else {
137                 buffer.append(ch);
138             }
139         }
140         return buffer.toString();
141     }
142 
143     private String closeDtdTag() throws Exception {
144         boolean closed = false;
145         StringBuffer buffer = new StringBuffer();
146         while (!closed) {
147             char ch = reader.read();
148 
149             if (ch == '?') {
150                 char ch2 = reader.read();
151 
152                 if (ch2 == '>') {
153                     closed = true;
154                 }
155                 else {
156                     buffer.append(ch);
157                     buffer.append(ch2);
158                 }
159             }
160             else {
161                 buffer.append(ch);
162             }
163         }
164         return buffer.toString();
165     }
166 
167     private String closeInstructionTag() throws Exception {
168         boolean closed = false;
169         StringBuffer buffer = new StringBuffer();
170         while (!closed) {
171             char ch = reader.read();
172 
173             if (ch == '?') {
174                 char ch2 = reader.read();
175 
176                 if (ch2 == '>') {
177                     closed = true;
178                 }
179                 else {
180                     buffer.append(ch);
181                     buffer.append(ch2);
182                 }
183             }
184             else {
185                 buffer.append(ch);
186             }
187         }
188         return buffer.toString();
189     }
190 
191     /***
192 	 * start to parse the given document
193 	 * 
194 	 * @throws SAXException
195 	 * @throws IOException
196 	 */
197     public void parse() throws SAXException, IOException {
198         handler.setDocumentLocator(locator);
199         String token;
200         try {
201             while (!reader.isEof()) {
202                 token = readToken('&');
203                 char leading = token.charAt(0);
204                 switch (leading) {
205                     case '<' :
206                         if (endOfDocument) {
207                             handler.fatalError(new SAXParseException("End of Document", locator));
208                             break;
209                         }
210                         parseTag(false);
211                         break;
212 
213                     case '\n' :
214                         break;
215                     case ' ' :
216                         break;
217                     case '\t' :
218                         break;
219                     case '\r' :
220                         break;
221 
222                     default :
223                         if (reader.isEof()) {
224                             break;
225                         }
226                         if (endOfDocument) {
227                             handler.fatalError(new SAXParseException("End of Document", locator));
228                             break;
229                         }
230                         handler.fatalError(new SAXParseException("'" + leading + "' invaid input", locator));
231                 }
232             }
233         }
234         catch (SAXException se) {
235             throw se;
236         }
237         catch (IOException ie) {
238             throw ie;
239         }
240         catch (Exception e) {
241             throw new SAXException(e.getMessage());
242         }
243         finally {
244             handler.endDocument();
245         }
246 
247     }
248 
249     protected void parseAttribute(AttributesImpl attributes) throws Exception {
250         String key = readIdentifier();
251         skipWhitespace();
252 
253         if (!readToken('&').equals("=")) {
254             handler.fatalError(new SAXParseException("'=' expected", locator));
255         }
256 
257         skipWhitespace();
258         String type = attributes.getTypeFromDeclaration(key);
259         String value = readAttributeValue(!type.equals("CDATA"), entityResolver);
260         if (key.equals("xmlns")) {
261             String prefix = "";
262             namespaces.put(prefix, value);
263             prefixes.push(prefix);
264             if (saxParser.isNamespacePrefixAware()) {
265                 attributes.add(key, type, value);
266             }
267             else {
268                 handler.startPrefixMapping(prefix, value);
269             }
270         }
271         else if (key.startsWith("xmlns:")) {
272             String prefix = key.substring(6);
273             namespaces.put(prefix, value);
274             prefixes.push(prefix);
275             if (saxParser.isNamespacePrefixAware()) {
276                 attributes.add(key, type, value);
277             }
278             else {
279                 handler.startPrefixMapping(prefix, value);
280             }
281         }
282         else {
283             attributes.add(key, type, value);
284         }
285     }
286 
287     private void parseCdata() throws Exception {
288         if (checkLiteral("CDATA[")) {
289             String data = closeCdataTag();
290             handler.characters(data.toCharArray(), 0, data.length());
291         }
292         else {
293             handler.fatalError(new SAXParseException("'<![[CDATA[' expected", locator));
294         }
295     }
296 
297     private char parseCharLiteral(String entity) throws Exception {
298         if (entity.charAt(2) == 'x') {
299             entity = entity.substring(3, entity.length() - 1);
300             return (char) Integer.parseInt(entity, 16);
301         }
302         else {
303             entity = entity.substring(2, entity.length() - 1);
304             return (char) Integer.parseInt(entity, 10);
305         }
306     }
307 
308     private void parseDocType() throws Exception {
309         if (!checkLiteral("OCTYPE")) {
310             handler.fatalError(new SAXParseException("'<!DOCTYPE' expected", locator));
311         }
312 
313         skipWhitespace();
314         String systemID = null;
315         StringBuffer publicID = new StringBuffer();
316         rootElement = readIdentifier();
317         skipWhitespace();
318         char ch = reader.read();
319         if (ch == 'P') {
320             systemID = readPublicId(publicID);
321             handler.skippedEntity("[dtd]");
322             skipWhitespace();
323             ch = reader.read();
324         }
325         else if (ch == 'S') {
326             systemID = readSystemId();
327             handler.skippedEntity("[dtd]");
328             skipWhitespace();
329             ch = reader.read();
330         }
331 
332         if (ch == '[') {
333             parseDtd();
334             skipWhitespace();
335             ch = reader.read();
336         }
337 
338         if (ch != '>') {
339             handler.fatalError(new SAXParseException("'>' expected", locator));
340         }
341 
342     }
343 
344     private void parseDtd() throws Exception {
345         skipWhitespace();
346         for (;;) {
347             String str = readToken('%');
348             char ch = str.charAt(0);
349 
350             if (ch == '%') {
351                 parseDtdEntity(parameterEntityResolver);
352                 continue;
353             }
354             else if (ch == '<') {
355                 parseDtdDeclaration(entityResolver);
356             }
357             else if (ch == ']') {
358                 return; // end internal DTD
359             }
360             else {
361                 handler.fatalError(new SAXParseException("'" + str + "' invalid input", locator));
362             }
363             do {
364                 ch = reader.read();
365             }
366             while ((ch == ' ') || (ch == '\t') || (ch == '\n') || (ch == '\r'));
367 
368             reader.unread();
369         }
370     }
371 
372     protected void parseDtdAttributeList(EntityResolver entityResolver) throws Exception {
373         if (!checkLiteral("TTLIST")) {
374             skipTag();
375             return;
376         }
377 
378         skipWhitespace();
379         String str = readToken('%');
380         char ch = str.charAt(0);
381         while (ch == '%') {
382             parseEntity(str, parameterEntityResolver);
383             str = readToken('%');
384             ch = str.charAt(0);
385         }
386         reader.unread();
387         String elementName = readIdentifier();
388         skipWhitespace();
389 
390         str = readToken('%');
391         ch = str.charAt(0);
392         while (ch == '%') {
393             parseEntity(str, parameterEntityResolver);
394             str = readToken('%');
395             ch = str.charAt(0);
396         }
397 
398         Hashtable defaultValues = new Hashtable();
399         Hashtable types = new Hashtable();
400 
401         while (ch != '>') {
402             reader.unread();
403             String attName = readIdentifier();
404             skipWhitespace();
405             str = readToken('%');
406             ch = str.charAt(0);
407             while (ch == '%') {
408                 parseEntity(str, parameterEntityResolver);
409                 str = readToken('%');
410                 ch = str.charAt(0);
411             }
412 
413             if (ch == '(') {
414                 StringBuffer typeBuffer = new StringBuffer();
415                 typeBuffer.append(str);
416                 while (ch != ')') {
417                     str = readToken('%');
418                     ch = str.charAt(0);
419                     while (ch == '%') {
420                         parseEntity(str, parameterEntityResolver);
421                         str = readToken('%');
422                         ch = str.charAt(0);
423                     }
424                     typeBuffer.append(str);
425                 }
426                 types.put(attName, typeBuffer.toString());
427             }
428             else {
429                 reader.unread();
430                 String type = readIdentifier();
431                 types.put(attName, type);
432             }
433 
434             skipWhitespace();
435             str = readToken('%');
436             ch = str.charAt(0);
437             while (ch == '%') {
438                 parseEntity(str, parameterEntityResolver);
439                 str = readToken('%');
440                 ch = str.charAt(0);
441             }
442 
443             if (ch == '#') {
444                 str = readIdentifier();
445                 skipWhitespace();
446 
447                 if (!str.equals("FIXED")) {
448                     skipWhitespace();
449 
450                     str = readToken('%');
451                     ch = str.charAt(0);
452                     while (ch == '%') {
453                         parseEntity(str, parameterEntityResolver);
454                         str = readToken('%');
455                         ch = str.charAt(0);
456                     }
457 
458                     continue;
459                 }
460             }
461             else {
462                 reader.unread();
463             }
464 
465             String value = readString('%', parameterEntityResolver);
466             defaultValues.put(attName, value);
467             skipWhitespace();
468 
469             str = readToken('%');
470             ch = str.charAt(0);
471             while (ch == '%') {
472                 parseEntity(str, parameterEntityResolver);
473                 str = readToken('%');
474                 ch = str.charAt(0);
475             }
476         }
477 
478         if (!types.isEmpty()) {
479             attributeTypes.put(elementName, types);
480         }
481 
482         if (!defaultValues.isEmpty()) {
483             attributeDefaultValues.put(elementName, defaultValues);
484         }
485     }
486 
487     private void parseDtdConditionalSection(EntityResolver entityResolver) throws Exception {
488         skipWhitespace();
489 
490         // paramete entity should be supported. call readString instead.
491         String str = readString('%', entityResolver);
492         char ch = str.charAt(0);
493 
494         if (ch != 'I') {
495             skipTag();
496             return;
497         }
498 
499         str = readToken('%');
500         ch = str.charAt(0);
501 
502         switch (ch) {
503             case 'G' :
504                 parseDtdIgnoreSection(entityResolver);
505                 return;
506 
507             case 'N' :
508                 break;
509 
510             default :
511                 skipTag();
512                 return;
513         }
514 
515         if (!checkLiteral("CLUDE")) {
516             skipTag();
517             return;
518         }
519 
520         skipWhitespace();
521 
522         str = readToken('%');
523         ch = str.charAt(0);
524 
525         if (ch != '[') {
526             skipTag();
527             return;
528         }
529 
530         reader.addString(closeCdataTag());
531     }
532 
533     private void parseDtdDeclaration(EntityResolver entityResolver) throws Exception {
534         String str = readToken('%');
535         char ch = str.charAt(0);
536 
537         if (ch != '!') {
538             skipTag();
539             return;
540         }
541 
542         str = readToken('%');
543         ch = str.charAt(0);
544 
545         switch (ch) {
546             case '-' :
547                 skipComment();
548                 break;
549 
550             case '[' :
551                 parseDtdConditionalSection(entityResolver);
552                 break;
553 
554             case 'E' :
555                 ch = reader.read();
556                 if (ch == 'L') {
557                     parseDtdElement(entityResolver);
558                 }
559                 else {
560                     reader.unread();
561                     parseDtdEntity(entityResolver);
562                 }
563                 break;
564 
565             case 'A' :
566                 parseDtdAttributeList(entityResolver);
567                 break;
568 
569             case 'N' :
570                 parseDtdNotation(entityResolver);
571                 break;
572 
573             default :
574                 skipTag();
575         }
576     }
577 
578     protected void parseDtdElement(EntityResolver entityResolver) throws Exception {
579         if (!checkLiteral("EMENT")) {
580             skipTag();
581             return;
582         }
583 
584         skipWhitespace();
585         String str = readToken('%');
586         char ch = str.charAt(0);
587         while (ch == '%') {
588             parseEntity(str, parameterEntityResolver);
589             str = readToken('%');
590             ch = str.charAt(0);
591         }
592         reader.unread();
593         String elementName = readIdentifier();
594         skipWhitespace();
595 
596         str = readToken('%');
597         ch = str.charAt(0);
598         while (ch == '%') {
599             parseEntity(str, parameterEntityResolver);
600             str = readToken('%');
601             ch = str.charAt(0);
602         }
603 
604         StringBuffer typeBuffer = new StringBuffer();
605         reader.unread();
606         skipWhitespace();
607         while (ch != '>') {
608             str = readToken('%');
609             ch = str.charAt(0);
610             while (ch == '%') {
611                 parseEntity(str, parameterEntityResolver);
612                 str = readToken('%');
613                 ch = str.charAt(0);
614             }
615             typeBuffer.append(str);
616         }
617         elementTypes.put(elementName, typeBuffer.toString());
618 
619     }
620 
621     private void parseDtdEntity(EntityResolver entityResolver) throws Exception {
622         if (!checkLiteral("NTITY")) {
623             skipTag();
624             return;
625         }
626 
627         skipWhitespace();
628         char ch = readChar('\0');
629 
630         if (ch == '%') {
631             skipWhitespace();
632             entityResolver = parameterEntityResolver;
633         }
634         else {
635             reader.unread();
636         }
637 
638         String name = readIdentifier();
639         skipWhitespace();
640         ch = readChar('%');
641         String systemId = null;
642         String publicId = null;
643         String notationMark = null;
644         String notationName = null;
645 
646         switch (ch) {
647             case 'P' :
648                 if (!checkLiteral("UBLIC")) {
649                     skipTag();
650                     return;
651                 }
652 
653                 skipWhitespace();
654                 publicId = readString('%', parameterEntityResolver);
655                 skipWhitespace();
656                 systemId = readString('%', parameterEntityResolver);
657                 skipWhitespace();
658                 notationMark = readString('%', parameterEntityResolver);
659                 if (notationMark.startsWith("NDATA")) {
660                     skipWhitespace();
661                     notationName = readString('%', parameterEntityResolver);
662                 }
663                 break;
664 
665             case 'S' :
666                 if (!checkLiteral("YSTEM")) {
667                     skipTag();
668                     return;
669                 }
670 
671                 skipWhitespace();
672                 systemId = readString('%', parameterEntityResolver);
673                 skipWhitespace();
674                 notationMark = readString('%', parameterEntityResolver);
675                 if (notationMark.startsWith("NDATA")) {
676                     skipWhitespace();
677                     notationName = readString('%', parameterEntityResolver);
678                 }
679                 break;
680 
681             case '"' :
682             case '\'' :
683                 reader.unread();
684                 String value = readString('%', parameterEntityResolver);
685                 entityResolver.addInternalEntity(name, value);
686                 skipWhitespace();
687                 readChar('%');
688                 break;
689 
690             default :
691                 skipTag();
692         }
693 
694         if (notationName != null) {
695             handler.unparsedEntityDecl(name, publicId, systemId, notationName);
696         }
697         if (systemId != null) {
698             entityResolver.addExternalEntity(name, publicId, systemId);
699         }
700     }
701 
702     private void parseDtdIgnoreSection(EntityResolver entityResolver) throws Exception {
703         if (!checkLiteral("NORE")) {
704             skipTag();
705             return;
706         }
707 
708         skipWhitespace();
709 
710         String str = readToken('%');
711         char ch = str.charAt(0);
712 
713         if (ch != '[') {
714             skipTag();
715             return;
716         }
717 
718         closeCdataTag();
719     }
720 
721     private void parseDtdNotation(EntityResolver entityResolver) throws Exception {
722         if (!checkLiteral("OTATION")) {
723             skipTag();
724             return;
725         }
726         skipWhitespace();
727         char ch = readChar('\0');
728 
729         if (ch == '%') {
730             skipWhitespace();
731             entityResolver = parameterEntityResolver;
732         }
733         else {
734             reader.unread();
735         }
736 
737         String name = readIdentifier();
738         skipWhitespace();
739         ch = readChar('%');
740         String systemId = null;
741         String publicId = null;
742 
743         switch (ch) {
744             case 'P' :
745                 if (!checkLiteral("UBLIC")) {
746                     skipTag();
747                     return;
748                 }
749 
750                 skipWhitespace();
751                 publicId = readString('%', parameterEntityResolver);
752                 skipWhitespace();
753                 systemId = readString('%', parameterEntityResolver);
754                 skipWhitespace();
755                 readChar('%');
756                 break;
757 
758             case 'S' :
759                 if (!checkLiteral("YSTEM")) {
760                     skipTag();
761                     return;
762                 }
763 
764                 skipWhitespace();
765                 systemId = readString('%', parameterEntityResolver);
766                 skipWhitespace();
767                 readChar('%');
768                 break;
769 
770             default :
771                 skipTag();
772         }
773 
774         handler.notationDecl(name, publicId, systemId);
775     }
776 
777     protected void parseElement() throws Exception {
778         String qName = readIdentifier();
779         String localName = qName;
780         skipWhitespace();
781         String prefix = null;
782         int colonIndex = qName.indexOf(':');
783 
784         if (colonIndex > 0) {
785             prefix = qName.substring(0, colonIndex);
786             localName = qName.substring(colonIndex + 1);
787         }
788 
789         Hashtable belongedAttributeDefaultValues = (Hashtable) attributeDefaultValues.get(qName);
790         Hashtable belongedAttributeTypes = (Hashtable) attributeTypes.get(qName);
791         AttributesImpl attributes = new AttributesImpl(namespaces, belongedAttributeDefaultValues, belongedAttributeTypes);
792         int previousPrefixesSize = prefixes.size();
793 
794         char ch;
795         for (;;) {
796             ch = reader.read();
797 
798             if ((ch == '/') || (ch == '>')) {
799                 break;
800             }
801 
802             reader.unread();
803 
804             parseAttribute(attributes);
805             skipWhitespace();
806         }
807 
808         String namespaceUri = "";
809         if (saxParser.isNamespaceAware()) {
810             if (prefix == null) {
811                 String defaultNamespaceUri = (String) namespaces.get("");
812                 if (defaultNamespaceUri != null) {
813                     namespaceUri = defaultNamespaceUri;
814                 }
815             }
816             else {
817                 String mappedNamespaceUri = (String) namespaces.get(prefix);
818                 if (mappedNamespaceUri != null) {
819                     namespaceUri = mappedNamespaceUri;
820                 }
821             }
822         }
823         if (rootElement == null) {
824             rootElement = qName;
825         }
826         handler.startElement(namespaceUri, localName, qName, attributes);
827 
828         if (ch == '/') {
829             if (reader.read() != '>') {
830                 handler.fatalError(new SAXParseException("'>' expected", locator));
831             }
832 
833             handler.endElement(namespaceUri, localName, qName);
834             int currentPrefixesSize = prefixes.size();
835             for (int i = currentPrefixesSize; i > previousPrefixesSize; i--) {
836                 String mappedPrefix = (String) prefixes.pop();
837                 handler.endPrefixMapping(mappedPrefix);
838                 namespaces.remove(mappedPrefix);
839             }
840             if (qName.equals(rootElement)) {
841                 endOfDocument = true;
842             }
843             return;
844         }
845 
846         StringBuffer buffer = new StringBuffer();
847         String str;
848 
849         for (;;) {
850 
851             for (;;) {
852                 if (buffer.length() == 0) {
853                     String elementType = (String) elementTypes.get(qName);
854                     if ((elementType != null) && ((elementType.equals("EMPTY") || elementType.indexOf("#PCDATA") < 0))) {
855                         char[] whitespaces = parseWhitespace();
856                         if (whitespaces.length > 0) {
857                             handler.ignorableWhitespace(whitespaces, 0, whitespaces.length);
858                         }
859                     }
860                     else {
861                         skipWhitespace();
862                     }
863                 }
864                 str = readToken('&');
865 
866                 if ((str.charAt(0) == '&') && (str.charAt(1) != '#')) {
867                     parseEntity(str, entityResolver);
868                 }
869                 else {
870                     break;
871                 }
872             }
873 
874             if (str.charAt(0) == '<') {
875                 if (buffer.length() > 0) {
876                     char[] characters = new char[buffer.length()];
877                     buffer.getChars(0, buffer.length(), characters, 0);
878                     handler.characters(characters, 0, buffer.length());
879                     buffer.setLength(0);
880                 }
881                 str = readToken('\0');
882 
883                 if (str.charAt(0) == '/') {
884                     skipWhitespace();
885                     str = readIdentifier();
886 
887                     if (!str.equals(qName)) {
888                         handler.fatalError(new SAXParseException("Wrong closing tag: " + str, locator));
889                     }
890 
891                     skipWhitespace();
892 
893                     if (reader.read() != '>') {
894                         handler.fatalError(new SAXParseException("closing tag must be empty", locator));
895                     }
896 
897                     handler.endElement(null, localName, qName);
898                     int currentPrefixesSize = prefixes.size();
899                     for (int i = currentPrefixesSize; i > previousPrefixesSize; i--) {
900                         String mappedPrefix = (String) prefixes.pop();
901                         handler.endPrefixMapping(mappedPrefix);
902                         namespaces.remove(mappedPrefix);
903                     }
904                     if (qName.equals(rootElement)) {
905                         endOfDocument = true;
906                     }
907                     break;
908                 }
909                 else { // <[^/]
910                     reader.unread();
911                     parseTag(true);
912                 }
913             }
914             else { // [^<]
915                 if (str.charAt(0) == '&') {
916                     ch = parseCharLiteral(str);
917                     buffer.append(ch);
918                 }
919                 else {
920                     buffer.append(str);
921                 }
922             }
923         }
924     }
925 
926     private void parseEntity(String entity, EntityResolver entityResolver) throws Exception {
927         entity = entity.substring(1, entity.length() - 1);
928         Object obj = entityResolver.getEntity(entity);
929 
930         if (obj == null) {
931             handler.warning(new SAXParseException("'" + entity + "' invalid entity", locator));
932         }
933         else if (obj instanceof java.lang.String) {
934             reader.addString((String) obj);
935         }
936         else {
937             String[] id = (String[]) obj;
938             // NOTE: The following implementation depends on my decision on how this parser should work.
939             String publicId = id[0];
940             String systemId = id[1];
941             InputSource entitySource = handler.resolveEntity(publicId, systemId);
942             if (entitySource == null) {
943                 if (systemId == null) {
944                     handler.skippedEntity(entityResolver == parameterEntityResolver ? "&" + entity : entity);
945                     return;
946                 }
947                 entitySource = new InputSource(systemId);
948             }
949             Reader entityReader = entitySource.getCharacterStream();
950             if (entityReader == null) {
951                 InputStream ips = entitySource.getByteStream();
952                 if (ips == null) {
953                     String entitySystemId = entitySource.getSystemId();
954                     ips = Connector.openInputStream(entitySystemId);
955                     if (ips == null) {
956                         throw new IOException();
957                     }
958                 }
959                 entityReader = new InputStreamReader(ips);
960             }
961 
962             StringBuffer replaceTextBuffer = new StringBuffer();
963             char[] buffer = new char[256];
964             int length;
965             while (((length = entityReader.read(buffer)) != -1)) {
966                 replaceTextBuffer.append(buffer, 0, length);
967             }
968             reader.addString(replaceTextBuffer.toString());
969         }
970 
971     }
972 
973     private void parsePi() throws Exception {
974         skipWhitespace();
975         String target = readIdentifier();
976         skipWhitespace();
977         if (target.equals("xml")) {
978             closeInstructionTag();
979             handler.startDocument();
980         }
981         else {
982             String data = closeInstructionTag();
983             handler.processingInstruction(target, data);
984         }
985     }
986 
987     protected void parseSpecialTag(boolean allowCdata) throws Exception {
988         String str = readToken('&');
989         char ch = str.charAt(0);
990 
991         if (ch == '&') {
992             handler.fatalError(new SAXParseException("No entity reference is expected here (" + str + ")", locator));
993         }
994 
995         switch (ch) {
996             case '[' :
997                 if (allowCdata) {
998                     parseCdata();
999                 }
1000                 else {
1001                     handler.fatalError(new SAXParseException("No CDATA section is expected here", locator));
1002                 }
1003 
1004                 return;
1005 
1006             case 'D' :
1007                 parseDocType();
1008                 return;
1009 
1010             case '-' :
1011                 skipComment();
1012                 return;
1013         }
1014     }
1015 
1016     private void parseTag(boolean allowCdata) throws Exception {
1017         String token = readToken('&');
1018         char leading = token.charAt(0);
1019         if (leading == '&') {
1020             handler.fatalError(new SAXParseException("No entity reference is expected here (" + token + ")", locator));
1021         }
1022         switch (leading) {
1023             case '?' :
1024                 parsePi();
1025                 break;
1026 
1027             case '!' :
1028                 parseSpecialTag(allowCdata);
1029                 break;
1030 
1031             default :
1032                 reader.unread();
1033                 parseElement();
1034         }
1035 
1036     }
1037 
1038     private char[] parseWhitespace() throws Exception {
1039         char ch;
1040         StringBuffer whitespaces = new StringBuffer();
1041         do {
1042             ch = reader.read();
1043             switch (ch) {
1044                 case '\t' :
1045                     whitespaces.append(ch);
1046                     break;
1047                 case '\r' :
1048                     break;
1049                 case '\n' :
1050                     whitespaces.append(ch);
1051                     break;
1052                 case ' ' :
1053                     whitespaces.append(ch);
1054                     break;
1055                 default :
1056                     reader.unread();
1057             }
1058         }
1059         while ((ch == ' ') || (ch == '\t') || (ch == '\n') || (ch == '\r'));
1060         return whitespaces.toString().toCharArray();
1061     }
1062 
1063     private char readChar(char entityChar) throws Exception {
1064         String str = readToken(entityChar);
1065         char ch = str.charAt(0);
1066 
1067         if (ch == entityChar) {
1068             handler.fatalError(new SAXParseException("No entity reference is expected here (" + str + ")", locator));
1069         }
1070 
1071         return ch;
1072     }
1073 
1074     private String readPublicId(StringBuffer publicId) throws Exception {
1075         if (!checkLiteral("UBLIC")) {
1076             return null;
1077         }
1078 
1079         skipWhitespace();
1080         publicId.append(readString('\0', null));
1081         skipWhitespace();
1082         return readString('\0', null);
1083     }
1084 
1085     private String readString(char entityChar, EntityResolver entityResolver) throws Exception {
1086         StringBuffer result = new StringBuffer();
1087         char delim = reader.read();
1088 
1089         if ((delim != '\'') && (delim != '"')) {
1090             handler.fatalError(new SAXParseException("delimitor expected", locator));
1091         }
1092 
1093         for (;;) {
1094             String str = readToken(entityChar);
1095             char ch = str.charAt(0);
1096 
1097             // time to normalize
1098 
1099             if (ch == entityChar) {
1100                 if (str.charAt(1) == '#') {
1101                     result.append(parseCharLiteral(str));
1102                 }
1103                 else {
1104                     parseEntity(str, entityResolver);
1105                 }
1106             }
1107             else if (ch == '&') {
1108                 if (str.charAt(1) == '#') {
1109                     result.append(parseCharLiteral(str));
1110                 }
1111                 else {
1112                     result.append(str);
1113                 }
1114             }
1115             else if (ch == delim) {
1116                 break;
1117             }
1118             else {
1119                 result.append(ch);
1120             }
1121         }
1122 
1123         return result.toString();
1124     }
1125 
1126     private String readAttributeValue(boolean isNotCdata, EntityResolver entityResolver) throws Exception {
1127         StringBuffer resultBuffer = new StringBuffer();
1128         boolean wasSpace = false;
1129         char delim = reader.read();
1130 
1131         if ((delim != '\'') && (delim != '"')) {
1132             handler.fatalError(new SAXParseException("delimitor expected", locator));
1133         }
1134 
1135         for (;;) {
1136             String str = readToken('&');
1137             char ch = str.charAt(0);
1138 
1139             // end of line normalization
1140             if (ch == '\r') {
1141                 str = readToken('&');
1142                 ch = str.charAt(0);
1143                 if (ch != '\n') {
1144                     ch = '\n';
1145                 }
1146             }
1147 
1148             if (ch == '&') {
1149                 if (str.charAt(1) == '#') {
1150                     resultBuffer.append(parseCharLiteral(str));
1151                 }
1152                 else {
1153                     parseEntity(str, entityResolver);
1154                 }
1155             }
1156             else if (ch == delim) {
1157                 break;
1158             }
1159             else {
1160                 if (ch == '\n' || ch == '\t') {
1161                     ch = ' ';
1162                 }
1163                 if (ch == ' ' && isNotCdata && wasSpace) {
1164                     continue;
1165                 }
1166                 resultBuffer.append(ch);
1167                 if (ch == ' ' && isNotCdata && !wasSpace) {
1168                     wasSpace = true;
1169                 }
1170                 if (ch != ' ' && isNotCdata && wasSpace) {
1171                     wasSpace = false;
1172                 }
1173             }
1174         }
1175 
1176         String result = resultBuffer.toString();
1177         if (isNotCdata) {
1178             result = result.trim();
1179         }
1180         return result;
1181     }
1182 
1183     private String readSystemId() throws Exception {
1184         if (!checkLiteral("YSTEM")) {
1185             return null;
1186         }
1187 
1188         skipWhitespace();
1189         return readString('\0', null);
1190     }
1191 
1192     private String readToken(char entityChar) throws Exception {
1193         char ch = reader.read();
1194         StringBuffer buf = new StringBuffer();
1195         buf.append(ch);
1196 
1197         if (ch == entityChar) {
1198             while (ch != ';') {
1199                 ch = reader.read();
1200                 buf.append(ch);
1201             }
1202         }
1203 
1204         return buf.toString();
1205     }
1206     private String readIdentifier() throws Exception {
1207         StringBuffer result = new StringBuffer();
1208 
1209         for (;;) {
1210             char ch = reader.read();
1211 
1212             if ((ch == '_')
1213                 || (ch == ':')
1214                 || (ch == '-')
1215                 || (ch == '.')
1216                 || ((ch >= 'a') && (ch <= 'z'))
1217                 || ((ch >= 'A') && (ch <= 'Z'))
1218                 || ((ch >= '0') && (ch <= '9'))
1219                 || (ch > '\u007E')) {
1220                 result.append(ch);
1221             }
1222             else {
1223                 reader.unread();
1224                 break;
1225             }
1226         }
1227 
1228         return result.toString();
1229     }
1230 
1231     private void skipComment() throws Exception {
1232         if (reader.read() != '-') {
1233             handler.fatalError(new SAXParseException("'<!--' expected", locator));
1234         }
1235 
1236         int dashesRead = 0;
1237 
1238         for (;;) {
1239             char ch = reader.read();
1240 
1241             switch (ch) {
1242                 case '-' :
1243                     dashesRead++;
1244                     break;
1245 
1246                 case '>' :
1247                     if (dashesRead == 2) {
1248                         return;
1249                     }
1250 
1251                 default :
1252                     dashesRead = 0;
1253             }
1254         }
1255     }
1256 
1257     private void skipTag() throws Exception {
1258         int level = 1;
1259 
1260         while (level > 0) {
1261             char ch = reader.read();
1262 
1263             switch (ch) {
1264                 case '<' :
1265                     ++level;
1266                     break;
1267 
1268                 case '>' :
1269                     --level;
1270                     break;
1271             }
1272         }
1273     }
1274 
1275     private void skipWhitespace() throws Exception {
1276         char ch;
1277         do {
1278             ch = reader.read();
1279             switch (ch) {
1280                 case '\t' :
1281                     break;
1282                 case '\r' :
1283                     break;
1284                 case '\n' :
1285                     break;
1286                 case ' ' :
1287                     break;
1288                 default :
1289                     reader.unread();
1290             }
1291         }
1292         while ((ch == ' ') || (ch == '\t') || (ch == '\n') || (ch == '\r'));
1293     }
1294 
1295 }