// ARQ/SPARQL Grammar - native syntax for the query engine // // Author: Andy Seaborne andy.seaborne@hp.com // (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP // All rights reserved. // See end of file for details. // // Constraint expression is derived from Java : // example java1.2-a.jj grammer in JavaCC distribution // Much modifed over time. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(SPARQLParser) /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * All rights reserved. */ package com.hp.hpl.jena.query.parser.sparql ; import java.util.* ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.query.* ; import com.hp.hpl.jena.query.core.* ; import com.hp.hpl.jena.query.expr.* ; public class SPARQLParser extends SPARQLParserBase { } PARSER_END(SPARQLParser) void CompilationUnit(): { } { Query() } void Query() : { } { Prolog() ( SelectClause() | ConstructClause() | DescribeClause() | AskClause() ) ( WhereClause() ) ? ( LimitClause() ) ? } void Prolog() : {} { ( BaseDecl() ) ? ( PrefixDecl() )* } void BaseDecl() : { Node n ; } { n = QuotedURIref() { getQuery().setBaseURI(n.getURI()) ; } } void PrefixDecl() : { Token t ; Node n ; } { ( LOOKAHEAD(2) t = n = QuotedURIref() // Has the trailing ":" { getQuery().setPrefix(fixupPrefix(t), n.getURI()) ; } | n = QuotedURIref() { getQuery().setPrefix("", n.getURI()) ; } ) } // ---- Query type clauses void SelectClause() : { Node v ; Token t = null ;} { ( LOOKAHEAD(3) // Need to hop "SELECT" and DISTINCT to look at ?var or STAR (t = )? { getQuery().setQueryResultStar(true) ; } ) { getQuery().setType(Query.QueryTypeSelect) ; getQuery().setDistinct( t != null ) ; } } void DescribeClause() : { Node n ; } { { getQuery().setType(Query.QueryTypeDescribe) ; } ( LOOKAHEAD(2) // Extra lookahead needs for URIs or Vars. ( n = VarOrURI() { getQuery().addDescribeNode(n) ; } )+ { getQuery().setQueryResultStar(false) ; } | { getQuery().setQueryResultStar(true) ; } ) } void ConstructClause() : { Template t ; } { { getQuery().setType(Query.QueryTypeConstruct) ; } // ( LOOKAHEAD(2) t = ConstructTemplate() { getQuery().setQueryResultStar(false) ; getQuery().setConstructTemplate(t) ; } // | // // { getQuery().setQueryResultStar(true) ; } // ) } void AskClause() : {} { { getQuery().setType(Query.QueryTypeAsk) ; } } void WhereClause() : { Element el ; } { ()? el = GraphPattern() { getQuery().setQueryElement(el) ; } } void LimitClause() : { Token t ; } { t = { getQuery().setLimit(integerValue(t.image)) ; } } // ---- General Graph Pattern // Basic building block. Element GraphPattern() : { Element el ; } { { ElementGroup elg = new ElementGroup() ; } PatternElement(elg) ( LOOKAHEAD(2) PatternElement(elg) )* ()? {return elg ; } } // ---- All the elements that can make up a pattern void PatternElement(ElementGroup elg) : { Element el = null ; Constraint c = null ; } { ( Triples(elg) | el = OptionalGraphPattern() { elg.addElement(el) ; } | LOOKAHEAD(3) // LOOKAHEAD needed to distinguish nested graph patterns // (plain {} in a group) {} UNION {} el = UnionGraphPattern() { elg.addElement(el) ; } | el = GraphPattern() { elg.addElement(el) ; } | el = GraphGraphPattern() { elg.addElement(el) ; } | el = Constraint() { elg.addElement(el) ; } ) } // ---- Definitions of each pattern element Element OptionalGraphPattern() : { Element el ; } { el = GraphPattern() { return new ElementOptional(el) ; } } Element GraphGraphPattern() : { Element el ; Node n ;} { ( LOOKAHEAD(2) el = GraphPattern() { el = new ElementNamedGraph(el) ; } | n = VarOrURI() el = GraphPattern() { el = new ElementNamedGraph(n, el) ; } ) { return el ; } } Element UnionGraphPattern() : { Element el = null ; ElementUnion el2 = null ; } { el = GraphPattern() ( { if ( el2 == null ) { el2 = new ElementUnion() ; el2.addElement(el) ; } } el = GraphPattern() { el2.addElement(el) ; } )* { return (el2==null)? el : el2 ; } } Element Constraint() : { Constraint c ; } { c = Expression() { ElementConstraints ec = new ElementConstraints() ; ec.addConstraint(c) ; return ec ; } } // -------- Construct patterns Template ConstructTemplate() : { Template t ; } { { TemplateGroup g = new TemplateGroup() ; } Triples(g) ( LOOKAHEAD(2) Triples(g) )* ()? { return g ; } } // -------- Triple lists with property and object lists // The grammar construct between the DOTs void Triples(TripleCollector acc) : { Node s ; } { LOOKAHEAD(2) // Find [] s = VarOrTerm() PropertyList(s, acc) | // Any of the tripe generating syntax elements s = TriplesNode(acc) (PropertyList(s, acc))? } void PropertyList(Node s, TripleCollector acc) : { Node p ; } { ( p = VarOrURI() | { p = nRDFtype ; } ) ObjectList(s, p, acc) ( LOOKAHEAD(2) PropertyList(s, acc) )* | Reification(s, acc) } void ObjectList(Node s, Node p, TripleCollector acc): { Node o ; } { // The mark ensures that tripless end up in a nicer order // with t { int mark = acc.mark() ; } ( LOOKAHEAD(2) // [] or [ :p :q ] o = VarOrTerm() | o = TriplesNode(acc) ) { Triple t = new Triple(s,p,o) ; acc.addTriple(mark, t) ; } ( LOOKAHEAD(2) ObjectList(s, p, acc ) ) * } // -------- Triple expansions // Anything that can stand in a node slot and which is // a number of triples Node TriplesNode(TripleCollector acc) : { Node n ; } { n = Collection(acc) { return n ; } | n = BlankNodePropertyList(acc) { return n ; } | n = Reification(null, acc) { return n ; } } Node BlankNodePropertyList(TripleCollector acc) : { } { { Node n = createBNode() ; } PropertyList(n, acc) { return n ; } } Node Reification(Node id, TripleCollector acc) : { Node s , p , o ; int mark ; } { // For generality, should be AndyNode for s/p/o // Insert reification triple before the resulting subtriples (if any) "<<" { if ( id == null ) id = createBNode() ; mark = acc.mark() ; } s = GraphNode(acc) // VarOrTerm() { insert(acc, mark, id, nRDFsubject, s) ; } { mark = acc.mark() ; } p = GraphNode(acc) // VarOrURI() { insert(acc, mark, id, nRDFpredicate, p) ; } { mark = acc.mark() ; } o = GraphNode(acc) // VarOrTerm() { insert(acc, mark, id, nRDFobject, o) ; } ">>" { return id ; } } // ------- RDF collections Node Collection(TripleCollector acc) : { Node n ; } { n = CollectionTail(acc) { return n ; } } Node CollectionTail(TripleCollector acc) : { Node n ; int mark ; } { ( { mark = acc.mark() ; } n = GraphNode(acc) { Node item = createBNode() ; insert(acc, mark, item, nRDFfirst, n) ; mark = acc.mark() ; } n = CollectionTail(acc) { insert(acc, mark, item, nRDFrest, n) ; return item ; } | { return nRDFnil ; } ) } // -------- Nodes in a graph pattern or template Node GraphNode(TripleCollector acc) : { Node n ; } { LOOKAHEAD(2) // [] or [ :p :q ] n = VarOrTerm() { return n ; } | n = TriplesNode(acc) { return n ; } } Node VarOrTerm() : {Node n = null ; } { ( n = Var() | n = GraphTerm() ) { return n ; } } // Property + DESCRIBE Node VarOrURI() : {Node n = null ; } { ( n = Var() | n = URI() ) { return n ; } } Node Var() : { Token t ;} { t = { return Node.createVariable(t.image.substring(1)) ; } } Node GraphTerm() : { Node n ; } { n = RDFTerm() { return n ; } | { return nRDFnil ; } // Maybe could move BlankNode() here from RDFTerm. } // Constraint syntax follows. // **** Debug point Expr Expression() : { Expr n ; } { n = ConditionalOrExpression() { return n ; } } Expr ConditionalOrExpression() : { Expr n1, n2 ; } { n1 = ConditionalXorExpression() ( n2 = ConditionalXorExpression() { n1 = new E_LogicalOr(n1,n2) ; } )* { return n1 ; } } Expr ConditionalXorExpression() : { Expr n ; } { n = ConditionalAndExpression() // Skip this //( ConditionalAndExpression() { return n ; } } Expr ConditionalAndExpression() : { Expr n1, n2 ;} { n1 = ValueLogical() ( n2 = ValueLogical() { n1 = new E_LogicalAnd(n1,n2) ; } )* { return n1 ; } } // End of boolean expressions Expr ValueLogical() : { Expr n ; } { n = RelationalExpression() { return n ; } } Expr RelationalExpression() : { Expr n1, n2 ; } { n1 = NumericExpression() ( n2 = NumericExpression() { n1 = new E_Equal(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_NotEqual(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_LessThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_LessThanOrEqual(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThanOrEqual(n1,n2) ; } )? { return n1 ; } } // **** Debug point Expr NumericExpression () : { Expr n ; } { n = AdditiveExpression() { return n ; } } Expr AdditiveExpression() : { Expr n1,n2 ; } { n1 = MultiplicativeExpression() ( n2 = MultiplicativeExpression() { n1 = new E_Add(n1, n2) ; } | n2 = MultiplicativeExpression() { n1 = new E_Subtract(n1, n2) ; } )* { return n1 ; } } Expr MultiplicativeExpression() : { Expr n1,n2 ; } { n1 = UnaryExpression() ( n2 = UnaryExpression() { n1 = new E_Multiply(n1, n2) ; } | n2 = UnaryExpression() { n1 = new E_Divide(n1, n2) ; } // | n2 = UnaryExpression() // { n1 = new E_Modulus(n1, n2) ; } )* { return n1 ; } } Expr UnaryExpression() : { Expr n ; } { ( | ) n = CallExpression() { return new E_LogicalNot(n) ; } | n = CallExpression() { return new E_UnaryPlus(n) ; } | n = CallExpression() { return new E_UnaryMinus(n) ; } | n = CallExpression() { return n ; } } Expr CallExpression() : { Expr expr ; Node gn ; } { // STR expr = Expression() { return new E_Str(expr) ; } | // Lang expr = Expression() { return new E_Lang(expr) ; } | // Datatype expr = Expression() { return new E_Datatype(expr) ; } | // Regular expression matcher { String s1 = null ; String s2 = null ; } expr = Expression() s1 = String() ( s2 = String() )? { return new E_Regex(expr, s1, s2) ; } | gn = Var() { return new E_Bound(new NodeVar(gn)) ; } | expr = Expression() { return new E_IsURI(expr) ; } | expr = Expression() { return new E_IsBlank(expr) ; } | expr = Expression() { return new E_IsLiteral(expr) ; } // | gn = Var() // { return new E_IsURI(asExpr(gn)) ; } // // | gn = Var() // { return new E_IsBlank(asExpr(gn)) ; } // // | gn = Var() // { return new E_IsLiteral(asExpr(gn)) ; } | //Casts and functions LOOKAHEAD(2) // Differentiate "q:name()" and "q:name" expr = FunctionCall() { return expr ; } | expr = PrimaryExpression() { return expr ; } } Expr PrimaryExpression() : { Expr expr ; Node gn ; } { ( gn = Var() { return asExpr(gn) ; } | gn = RDFTerm() { return asExpr(gn) ; } | expr = Expression() { return expr ; } ) } Expr FunctionCall() : { Node fname ; List a ; } { fname = URI() a = ArgList() { Expr e = new E_Function(fname.getURI(), a) ; return e ; } } List ArgList() : { Expr expr ; } { { List args = new ArrayList() ; } ( expr = Expression() { args.add(expr) ; } ( expr = Expression() { args.add(expr) ; } )* )? { return args ; } } // Term as used in expressions Node RDFTerm() : { Node n ; } { n = URI() { return n ; } | n = RDFLiteral() { return n ; } | n = NumericLiteral() { return n ; } | n = BooleanLiteral() { return n ; } | n = BlankNode() { return n ; } } Node NumericLiteral() : { Node n ; } { n = Integer() { return n ; } | n = FloatingPoint() { return n ; } } Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; Node uri = null ; } ( t = { lang = stripChars(t.image, 1) ; } ) ? ( uri = URI() )? { return makeNode(lex, lang, uri) ; } } Node BooleanLiteral() : {} { { return XSD_TRUE ; } | { return XSD_FALSE ; } } String String() : { Token t ; String lex = null ; } { ( t = | t = ) { lex = stripQuotes(t.image) ; lex = unescape(lex) ; return lex ; } } Node URI() : { Node n ; } { n = QuotedURIref() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t ; } { ( t = { return Node.createURI(fixupQName(t)) ; } | t = { return Node.createURI(fixupQName(t)) ; } ) } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image) ; } | { return createBNode() ; } } Node QuotedURIref() : { Token t ; } { t = { String s = stripQuotes(t.image) ; s = fixupURI(s) ; return Node.createURI(s) ; } } Node Integer() : { Token t ; } { t = { return makeNodeInteger(t.image) ; } // | // Looses the lexical form. // t = { return makeNodeInteger(integerValue(t.image)) ; } } Node FloatingPoint() : { Token t ; } { t = { return makeNodeDouble(t.image) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } SPECIAL_TOKEN : { } // Main tokens */ TOKEN: { // Includes # for relative URIs |"#"|"_"|"/"|"&") (~[">"," "])* ">" > // The QName() rule uses both of these - PrefixDecl() just the first | )? ":" > | )? ":" (|)? > | |) > | |) > | ()+("-" ()+)? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } // ------------------------------------------------- // LITERALS TOKEN : { // No sign - that is done in the grammar. < INTEGER_10: /*(["-","+"])?*/ > // | < INTEGER_16: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+ > | < FLOATING_POINT: //(["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* ()? | "." (["0"-"9"])+ ()? | (["0"-"9"])+ //NB Must have exponent for this case. > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "\"" > | < DIGITS: (["0"-"9"])+> } // ------------------------------------------------- // Keyworks : includes operators that are words and should be // before general things like IDENTIFIER which swallow almost // anything TOKEN [IGNORE_CASE] : { // Prologue < SPARQLParserBase: "base" > | < PREFIX: "prefix" > // Result forms | < SELECT: "select" > | < DISTINCT: "distinct" > | < LIMIT: "limit" > | < DESCRIBE: "describe" > | < CONSTRUCT: "construct" > | < ASK: "ask" > // Graph pattern operators | < WHERE: "where" > | < AND: "and" > | < GRAPH: "graph" > | < OR: "or" > | < UNION: "union" > | < OPTIONAL: "optional" > | < FILTER: "filter" > // Expression operators | < BOUND: "bound" > | < STR: "str" > | < DTYPE: "datatype" > | < LANG: "lang" > | < IS_URI: "isURI" > | < IS_BNODE: "isBlank" > | < IS_LITERAL: "isLiteral" > | < REGEX: "regex" > | | | } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < EQ: "=" > | < NE: "!=" > | < GT: ">" > | < LT: "<" > | < LE: "<=" > // Maybe: | "=>" > | < GE: ">=" > // Maybe: | "=<" > | < BANG: "!" > | < TILDE: "~" > | < COLON: ":" > | < SC_OR: "||" > | < SC_AND: "&&" > // | < INCR: "++" > // | < DECR: "--" > | < PLUS: "+" > | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > //| < AMP: "&" > //| < REM: "%" > //| < LSHIFT: "<<" > //| < RSIGNEDSHIFT: ">>" > // | < STR_MATCH: ("=~"|"~~") > // | < STR_NMATCH: "!~"> | < DATATYPE: "^^"> | < AT: "@"> } // Notes: // XML 1.1 http://www.w3.org/TR/xml11/ // XML Namespces 1.1 http://www.w3.org/TR/xml-names11/ // Prefix ':' LocalPart // Prefix is an NCName // LocalPart is an NCName // // // An XML Name, minus the ":" // NCName ::= NCNameStartChar NCNameChar* // NCNameChar ::= NameChar - ':' // NCNameStartChar ::= NameStartChar - ':' // NameChar and NameSartChar defined in XML 1.1 // NameStartChar := ":" | [A-Z] | "_" | [a-z] | // was: [#xC0-#x2FF] | // now: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x370-#x37D] | [#x37F-#x1FFF] | // [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | // [#x3001-#xD7FF] | [#xF900-#xEFFFF] // NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | // [#x0300-#x036F] | [#x203F-#x2040] TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > | // #NCCHAR without a dot. <#NCCHAR_END: ( | "_" | "-" | ["0"-"9"] | "\u00B7" )> | // Full NCNAME, putting back the "." <#NCCHAR_FULL: ( | "." ) > | // NCNAME but no leading "_", no trailing "." <#NCNAME1: (()* )? > | // With a leading "_", no trailing "." <#NCNAME2: "_" (()* )? > | // With trailing dot, no "_" to start <#NCNAME_PREFIX: ()* > } /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */