// $Id$

options
{
  STATIC = false;  
  DEBUG_PARSER = false;
  DEBUG_TOKEN_MANAGER = false;
}

PARSER_BEGIN(SimpleDataLanguageParser)
//  Copyright 2004 The Apache Software Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

///CLOVER:OFF
package org.apache.hivemind.sdl.parser;

import org.xml.sax.*;
import org.xml.sax.helpers.*;

public class SimpleDataLanguageParser
{
	private ContentHandler _handler;
	private SDLLocator _locator = new SDLLocator();
	private boolean _firstElement = true;
		
///CLOVER:ON		
	private void handleException(SAXException ex)
	throws ParseException
	{
		throw new SystemParseException(ex);
	}
	
	/**
	 * Returns the locator used by this parser instance; which is needed to generate
	 * SAXParseExceptions in SDLResourceParser.
	 */
	 
	public Locator getLocator()
	{
		return _locator;
	}
	
	private void updateLocator()
	{
	  _locator.update(token.beginLine, token.beginColumn);
	}

    private void fireStartDocument() throws ParseException
    {
      	try { _handler.startDocument(); }
		catch (SAXException ex) { handleException(ex); }
	}
		
    private void fireEndDocument() throws ParseException
    {
		try { _handler.endDocument(); }
		catch (SAXException ex) { handleException(ex); }
	}

	private void fireStartElement(String elementName, Attributes attributes) throws ParseException
	{
		// No namespace, no qualified name
		
		try { _handler.startElement("", elementName, null, attributes); }
		catch (SAXException ex) { handleException(ex); }
	}

	private void fireEndElement(String elementName) throws ParseException
	{
		// No namespace, no qualified name
		
		try { _handler.endElement("", elementName, elementName); }
		catch (SAXException ex) { handleException(ex); }
	}

    private void fireCharacters(String string) throws ParseException
    {
      try { _handler.characters(string.toCharArray(), 0, string.length()); }
      catch (SAXException ex) { handleException(ex); }
    }
    
    private void fireCharactersForExtendedLiteral(String string) throws ParseException
    {
      try
      {
        _handler.characters(string.toCharArray(), 2, string.length() - 4);
      }
      catch (SAXException ex) { handleException(ex); }
    }

	private void addAttribute(AttributesImpl attributes, String name, String value)
	{
		attributes.addAttribute("", name, name, "CDATA", value);
	}

	private String unquote(String input)
	{
		StringBuffer buffer = new StringBuffer(input.length());
		
		char[] chars = input.toCharArray();
		
		int state = 0;
		
		for (int i = 1; i < chars.length - 1; i++)
		{
		  char ch = chars[i];
		  
		  switch (state)
		  {
		    case 0:
		    
		    	if (ch == '\\')
		    	{
		    	  state = 1;
		    	  continue;
		    	}
		    	
		    	buffer.append(ch);
		    	continue;
		    	
		   	case 1:
		   	
				state = 0;
				
				if (ch == '\\' || ch == '"') { buffer.append(ch); continue; }
				
				if (ch == 'n') { buffer.append('\n'); continue; }
				
				if (ch == 't') { buffer.append('\t'); continue; }
				
				if (ch == 'r') { buffer.append('\r'); continue; }
				
				buffer.append('\\');
				buffer.append(ch);
		
			default:
		  }
		}			
		
		// state == 1 means a slash just before the end of the string.
		// Is this the right thing to do?
		
		if (state == 1) buffer.append('\\');
		
		return buffer.toString();
	}

	// Removes the "<<" and ">>" from an extended literal string.
	
	private String defang(String input)
	{
		int length = input.length();
		
		return input.substring(2, length - 2);
		
// The remainder of this class is generated by JavaCC.
///CLOVER:OFF		
	}


}
PARSER_END(SimpleDataLanguageParser)


/* Standard whitespace to ignore between tokens. */

SKIP :
{
  " "
| "\t"
| "\n"
| "\r"
}

/* This is interesting; order is important (first match wins).  <LITERAL>
   needs to be ahead of <NUMERIC_LITERAL> and friends or you get an error. */
   
TOKEN :
{

  < NUMERIC_LITERAL: (<SIGN>)? ( ((<DIGIT>)+ (<DOT>)?) | ((<DIGIT>)* <DOT> (<DIGIT>)+) ) >

| < SIMPLE_ID: ["a" - "z", "A" - "Z", "_"] ( [ "a" - "z", "A" - "Z", "0" - "9", "_", "-"] )* >
| < COMPLEX_ID: <SIMPLE_ID> ( <DOT> <SIMPLE_ID> )* >
| < SEGMENTED_ID: <COMPLEX_ID> ( <COLON> <COMPLEX_ID> )* >

| < QUOTED_LITERAL: <QUOTE> ((~["\""]) | ("\\"  ["\"", "\\", "n", "r", "t"]))* <QUOTE> >

| < EXTENDED_LITERAL: "<<" ( (~[">"]) | ( ">" ~[">"]) )* ">>" >

	// Ant-syntax symbol. 

| < SYMBOL: "${" <COMPLEX_ID> "}" >

| < #DIGIT: ["0" - "9"] >
| < #SIGN: ("+" | "-") >
| < OPAREN: "(" >
| < CPAREN: ")" >
| < OBRACE: "{" >
| < CBRACE: "}" >
| < EQ: "=" >
| < #QUOTE: "\"" >
| < #DOT: "." >
| < #COLON: ":" >
}

SKIP:
{
  "//": SINGLE_LINE_COMMENT   
| "/*": MULTILINE_COMMENT
}

<EXTRACTING_QUOTED_LITERAL> SKIP:
{
  "\"": DEFAULT
}

<SINGLE_LINE_COMMENT> SKIP:
{
  < ["\n", "\r"] > : DEFAULT
| < ~[] >
}

<MULTILINE_COMMENT> SKIP:
{
  "*/": DEFAULT
| < ~[] >
}

/**
 * Parses an SDL document from a stream provided in the constructor. An instance
 * of SimpleDataLanguageParser should be used once and then discarded ... it will
 * be left in an unknown state after parsing a stream (especially if an error
 * occurs).
 *
 */
 
void parse(ContentHandler handler) : 
{
  _handler = handler;  

}
{
	{ 	
		// setDocumentLocator() is invoked once, before any other method
		// is invoked.
		
		_handler.setDocumentLocator(_locator);		
	}

	element() { fireEndDocument(); } <EOF>

}

void element() : 
{
  String elementName;
  AttributesImpl attributes = new AttributesImpl();
}
{
  <SIMPLE_ID>
  
  	{
  		updateLocator(); 
  		elementName = token.image; 
  		
  		if (_firstElement) 
  		{ 
  			fireStartDocument(); 
  			_firstElement = false;
  		}
  	}
  
  (element_attributes(attributes))?
  							{ fireStartElement(elementName, attributes); }
  
  
  (element_body())?			{ updateLocator(); fireEndElement(elementName); }
}

void element_attributes(AttributesImpl attributes) : {}
{
  (<OPAREN> (attribute(attributes))* <CPAREN>)
}

void attribute(AttributesImpl attributes) : 
{
  String attributeName;
}
{
  <SIMPLE_ID> 		{ attributeName = token.image; }
  
  <EQ> attribute_value(attributeName, attributes)
}

void attribute_value(String attributeName, AttributesImpl attributes) : {}
{
 
   // Because of matching rules I just don't understand, it is not possible to
   // create an ATTRIBUTE_VALUE token for this list of possible values.
   
 ( <NUMERIC_LITERAL> | <SIMPLE_ID> | <COMPLEX_ID> | <SEGMENTED_ID> | <SYMBOL> ) { addAttribute(attributes, attributeName, token.image); }
| <QUOTED_LITERAL>  { addAttribute(attributes, attributeName, unquote(token.image)); }
| <EXTENDED_LITERAL> { addAttribute(attributes, attributeName, defang(token.image)); }
}

void element_body() : {}
{
  <OBRACE> (body_content())* <CBRACE> 
}

void body_content() : {}
{
  element()
| <QUOTED_LITERAL>   { fireCharacters(unquote(token.image)); }

  // A <SIMPLE_ID> may not be a literal, since it will look like a new element.
  
| ( <COMPLEX_ID> | <NUMERIC_LITERAL> | <SYMBOL> ) { fireCharacters(token.image); }
| <EXTENDED_LITERAL> { fireCharactersForExtendedLiteral(token.image); }
}