// (c) Copyright 2006 Hewlett-Packard Development Company, LP options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true ; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(STL_Parser) /* * (c) Copyright 2006 Hewlett-Packard Development Company, LP * All rights reserved. */ package lang.parser ; import lang.* ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.query.core.Var; public class STL_Parser extends ParserBase { } PARSER_END(STL_Parser) // --- Entry point Item parse() : { Item list = Item.createList() ; Item elt ; } { // list = BareList() list = List() { return list ; } } Item List() : { Token t ; } { ( t = { Item list = Item.createList(t.beginLine, t.beginColumn) ; } BareList(list) // | list = BareList() // | list = BareList() ) { return list ; } } Item BareList(Item list) : { Item elt ; } { ( elt = WordOrList() { list.getList().add(elt) ; } )* { return list ; } } Item Word() : { String str ; Token t ; Node node ; Var v ; } { (t = |t = ) { return Item.createWord(t.image, t.beginLine, t.beginColumn) ; } | node = GraphTerm() // Not quite right for line/col - this is the last token { return Item.createNode(node, token.beginLine, token.beginColumn) ; } | ( t = | t = ) { v = createVariable(t.image, t.beginLine, t.beginColumn) ; return Item.createNode(v) ; } | t = { v = createVariable() ; return Item.createNode(v, t.beginLine, t.beginColumn) ; } } Item WordOrList() : { Item item ; } { item = Word() { return item ; } | item = List() { return item ; } } // Abstract terminals (wrapped in grammar rules) Node GraphTerm() : { Node n ; } { n = IRIref() { return n ; } | n = RDFLiteral() { return n ; } | n = NumericLiteral() { return n ; } // | n = BooleanLiteral() { return n ; } | n = BlankNode() { return n ; } //| // { return nRDFnil ; } // { return nRDFnil ; } } Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; Node uri = null ; String qname = null ; } ( ( t = { lang = stripChars(t.image, 1) ; } ) | uri = IRIref() )? { return makeNode(lex, lang, uri) ; } } Node NumericLiteral() : { Token t ; } { t = { return makeNodeInteger(t.image) ; } | t = { return makeNodeDecimal(t.image) ; } | t = { return makeNodeDouble(t.image) ; } } // Node BooleanLiteral() : {} // { // { return XSD_TRUE ; } // | // { return XSD_FALSE ; } // } String String() : { Token t ; String lex ; } { ( t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; return lex ; } } Node IRIref() : { Node n ; } { n = Q_IRI_REF() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t ; Node n ; } { t = { return createNodeFromQName(t.image, t.beginLine, t.beginColumn) ; } } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image, t.beginLine, t.beginColumn) ; } //| // { return createBNode() ; } // { return createBNode() ; } } Node Q_IRI_REF() : { Token t ; } { t = { return createNodeFromURI(t.image, t.beginLine, t.beginColumn) ; } } // (c) Copyright 2006 Hewlett-Packard Development Company, LP // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> } SPECIAL_TOKEN : { } // TOKEN: // { // // | // } TOKEN: { // Includes # for relative URIs ","<","\u0000"-"\u0020"])* ">" > // | )? ":" > | )? ":" ()? > | > //| > | > | > | ()+("-" ()+)* > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } // ------------------------------------------------- //TOKEN [IGNORE_CASE] : { } TOKEN : { < INTEGER: (["-","+"])? > | < DECIMAL: (["-","+"])? ( ()+ "." ()* | "." ()+ ) > | < DOUBLE: (["+","-"])? ( (["0"-"9"])+ "." (["0"-"9"])* | "." (["0"-"9"])+ () | (["0"-"9"])+ ) > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < #QUOTE_3D: "\"\"\""> | < #QUOTE_3S: "'''"> | | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | )* "\"" > | < STRING_LITERAL_LONG1: ( ("'" | "''")? (~["'","\\"] | ))* > | < STRING_LITERAL_LONG2: ( ("\"" | "\"\"")? (~["\"","\\"] | ))* > | < DIGITS: (["0"-"9"])+> //| } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < DATATYPE: "^^"> | < AT: "@"> //| < ANON: (|)* > //| < NIL: (|)* > | < HOOK: "?" > // | < SEMICOLON: ";" > // | < COMMA: "," > // | < DOT: "." > } // Operator - let this all drop through to Word? // TOKEN : // { // < EQ: "=" > // | < NE: "!=" > // | < GT: ">" > // | < LT: "<" > // | < LE: "<=" > // | < GE: ">=" > // // | < BANG: "!" > // | < TILDE: "~" > // //| < COLON: ":" > // // | < SC_OR: "||" > // | < SC_AND: "&&" > // // | < PLUS: "+" > // | < MINUS: "-" > // | < STAR: "*" > // | < SLASH: "/" > // // //| < AMP: "&" > // //| < REM: "%" > // // | | | | | | | // | | // | | // | | | > // } TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1P: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] > // [#x10000-#xEFFFF] | <#NCCHAR1: | "_" > // No trailing DOTs in qnames. | // #NCCHAR without "." <#NCCHAR: ( | "-" | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > | // NCNAME but no leading "_", no trailing ".", can have dot inside prefix name. <#NCNAME_PREFIX: ((|".")* )? > | // With a leading "_", no dot at end of local name. <#NCNAME: ((|".")* )? > | // NCNAME without "-" and ".", allowing leading digits. <#VARNAME: ( | ["0"-"9"] ) ( | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > } TOKEN: { > // Anything that isn't structural (LPAREN and RPAREN) | } // Catch-all tokens. Must be last. // Any non-whitespace. Causes a parser exception, rather than a // token manager error (with hidden line numbers). // Only bad IRIs (e.g. spaces) now give unhelpful parse errors. TOKEN: { <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > } /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */ /* * (c) Copyright 2006 Hewlett-Packard Development Company, LP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */