// $Id$ options { STATIC = false; DEBUG_PARSER = false; DEBUG_TOKEN_MANAGER = false; } PARSER_BEGIN(SimpleDataLanguageParser) // Copyright 2004 The Apache Software Foundation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. ///CLOVER:OFF package org.apache.hivemind.sdl.parser; import org.xml.sax.*; import org.xml.sax.helpers.*; public class SimpleDataLanguageParser { private ContentHandler _handler; private SDLLocator _locator = new SDLLocator(); private boolean _firstElement = true; ///CLOVER:ON private void handleException(SAXException ex) throws ParseException { throw new SystemParseException(ex); } /** * Returns the locator used by this parser instance; which is needed to generate * SAXParseExceptions in SDLResourceParser. */ public Locator getLocator() { return _locator; } private void updateLocator() { _locator.update(token.beginLine, token.beginColumn); } private void fireStartDocument() throws ParseException { try { _handler.startDocument(); } catch (SAXException ex) { handleException(ex); } } private void fireEndDocument() throws ParseException { try { _handler.endDocument(); } catch (SAXException ex) { handleException(ex); } } private void fireStartElement(String elementName, Attributes attributes) throws ParseException { // No namespace, no qualified name try { _handler.startElement("", elementName, null, attributes); } catch (SAXException ex) { handleException(ex); } } private void fireEndElement(String elementName) throws ParseException { // No namespace, no qualified name try { _handler.endElement("", elementName, elementName); } catch (SAXException ex) { handleException(ex); } } private void fireCharacters(String string) throws ParseException { try { _handler.characters(string.toCharArray(), 0, string.length()); } catch (SAXException ex) { handleException(ex); } } private void fireCharactersForExtendedLiteral(String string) throws ParseException { try { _handler.characters(string.toCharArray(), 2, string.length() - 4); } catch (SAXException ex) { handleException(ex); } } private void addAttribute(AttributesImpl attributes, String name, String value) { attributes.addAttribute("", name, name, "CDATA", value); } private String unquote(String input) { StringBuffer buffer = new StringBuffer(input.length()); char[] chars = input.toCharArray(); int state = 0; for (int i = 1; i < chars.length - 1; i++) { char ch = chars[i]; switch (state) { case 0: if (ch == '\\') { state = 1; continue; } buffer.append(ch); continue; case 1: state = 0; if (ch == '\\' || ch == '"') { buffer.append(ch); continue; } if (ch == 'n') { buffer.append('\n'); continue; } if (ch == 't') { buffer.append('\t'); continue; } if (ch == 'r') { buffer.append('\r'); continue; } buffer.append('\\'); buffer.append(ch); default: } } // state == 1 means a slash just before the end of the string. // Is this the right thing to do? if (state == 1) buffer.append('\\'); return buffer.toString(); } // Removes the "<<" and ">>" from an extended literal string. private String defang(String input) { int length = input.length(); return input.substring(2, length - 2); // The remainder of this class is generated by JavaCC. ///CLOVER:OFF } } PARSER_END(SimpleDataLanguageParser) /* Standard whitespace to ignore between tokens. */ SKIP : { " " | "\t" | "\n" | "\r" } /* This is interesting; order is important (first match wins). needs to be ahead of and friends or you get an error. */ TOKEN : { < NUMERIC_LITERAL: ()? ( (()+ ()?) | (()* ()+) ) > | < SIMPLE_ID: ["a" - "z", "A" - "Z", "_"] ( [ "a" - "z", "A" - "Z", "0" - "9", "_", "-"] )* > | < COMPLEX_ID: ( )* > | < SEGMENTED_ID: ( )* > | < QUOTED_LITERAL: ((~["\""]) | ("\\" ["\"", "\\", "n", "r", "t"]))* > | < EXTENDED_LITERAL: "<<" ( (~[">"]) | ( ">" ~[">"]) )* ">>" > // Ant-syntax symbol. | < SYMBOL: "${" "}" > | < #DIGIT: ["0" - "9"] > | < #SIGN: ("+" | "-") > | < OPAREN: "(" > | < CPAREN: ")" > | < OBRACE: "{" > | < CBRACE: "}" > | < EQ: "=" > | < #QUOTE: "\"" > | < #DOT: "." > | < #COLON: ":" > } SKIP: { "//": SINGLE_LINE_COMMENT | "/*": MULTILINE_COMMENT } SKIP: { "\"": DEFAULT } SKIP: { < ["\n", "\r"] > : DEFAULT | < ~[] > } SKIP: { "*/": DEFAULT | < ~[] > } /** * Parses an SDL document from a stream provided in the constructor. An instance * of SimpleDataLanguageParser should be used once and then discarded ... it will * be left in an unknown state after parsing a stream (especially if an error * occurs). * */ void parse(ContentHandler handler) : { _handler = handler; } { { // setDocumentLocator() is invoked once, before any other method // is invoked. _handler.setDocumentLocator(_locator); } element() { fireEndDocument(); } } void element() : { String elementName; AttributesImpl attributes = new AttributesImpl(); } { { updateLocator(); elementName = token.image; if (_firstElement) { fireStartDocument(); _firstElement = false; } } (element_attributes(attributes))? { fireStartElement(elementName, attributes); } (element_body())? { updateLocator(); fireEndElement(elementName); } } void element_attributes(AttributesImpl attributes) : {} { ( (attribute(attributes))* ) } void attribute(AttributesImpl attributes) : { String attributeName; } { { attributeName = token.image; } attribute_value(attributeName, attributes) } void attribute_value(String attributeName, AttributesImpl attributes) : {} { // Because of matching rules I just don't understand, it is not possible to // create an ATTRIBUTE_VALUE token for this list of possible values. ( | | | | ) { addAttribute(attributes, attributeName, token.image); } | { addAttribute(attributes, attributeName, unquote(token.image)); } | { addAttribute(attributes, attributeName, defang(token.image)); } } void element_body() : {} { (body_content())* } void body_content() : {} { element() | { fireCharacters(unquote(token.image)); } // A may not be a literal, since it will look like a new element. | ( | | ) { fireCharacters(token.image); } | { fireCharactersForExtendedLiteral(token.image); } }