// ARQ Grammar - native syntax for the query engine // Author: Andy Seaborne andy.seaborne@hp.com // (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP // All rights reserved. // See end of file for details. // // Constraint expression is derived from Java : // example java1.2-a.jj grammer in JavaCC distribution // Much modifed over time. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(ARQParser) /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * All rights reserved. */ package com.hp.hpl.jena.query.parser.arq ; import java.util.* ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.query.* ; import com.hp.hpl.jena.query.core.* ; import com.hp.hpl.jena.query.expr.* ; public class ARQParser extends ARQParserBase { } PARSER_END(ARQParser) void CompilationUnit(): { } { Query() } void Query() : { } { Prolog() ( SelectClause() | ConstructClause() | DescribeClause() | AskClause() ) // ARQ=1 ( LOOKAHEAD(2) GraphClause() | NamedGraphClause() )* // ARQ=0 ( WhereClause() ) ? ( LimitClause() ) ? } void Prolog() : {} { ( BaseDecl() ) ? ( LOOKAHEAD(2) PrefixDecl() )* } // ---- Query type clauses void SelectClause() : { Node v ; Token t = null ;} { ( LOOKAHEAD(3) // Need to hop "SELECT" and DISTINCT to look at ?var or STAR (t = )? { getQuery().setQueryResultStar(true) ; } ) { getQuery().setType(Query.QueryTypeSelect) ; getQuery().setDistinct( t != null ) ; } } void DescribeClause() : { Node n ; } { { getQuery().setType(Query.QueryTypeDescribe) ; } ( LOOKAHEAD(2) // Extra lookahead needs for URIs or Vars. ( n = VarOrURI() { getQuery().addDescribeNode(n) ; } )+ { getQuery().setQueryResultStar(false) ; } | { getQuery().setQueryResultStar(true) ; } ) } void ConstructClause() : { Template t ; } { { getQuery().setType(Query.QueryTypeConstruct) ; } ( LOOKAHEAD(2) t = ConstructTemplate() { getQuery().setQueryResultStar(false) ; getQuery().setConstructTemplate(t) ; } | { getQuery().setQueryResultStar(true) ; } ) } void AskClause() : {} { { getQuery().setType(Query.QueryTypeAsk) ; } } // ---- Dataset setting - not in SPARQL void GraphClause() : {Node n ;} { (n = SourceSelector() { getQuery().addGraphURI(n.getURI()) ; } )+ } void NamedGraphClause() : {Node n ;} { ( n = SourceSelector() { getQuery().addNamedGraphURI(n.getURI()) ; } )+ } Node SourceSelector() : { Node n ; } { n = URI() { if ( ! n.isURI() ) throw new QueryException("Not a URI: "+n.toString()) ; return n ; } } void WhereClause() : { Element el ; } { ()? el = GraphPattern() { getQuery().setQueryElement(el) ; } } void LimitClause() : { Token t ; } { t = { getQuery().setLimit(integerValue(t.image)) ; } } void BaseDecl() : { Node n ; } { n = QuotedURIref() { getQuery().setBaseURI(n.getURI()) ; } } void PrefixDecl() : { Token t ; Node n ; } { ( LOOKAHEAD(2) t = n = QuotedURIref() // Has the trailing ":" { getQuery().setPrefix(fixupPrefix(t), n.getURI()) ; } | n = QuotedURIref() { getQuery().setPrefix("", n.getURI()) ; } ) } // ---- General Graph Pattern // Basic building block. Element GraphPattern() : { Element el ; } { { ElementGroup elg = new ElementGroup() ; } PatternElement(elg) ( LOOKAHEAD(2) PatternElement(elg) )* ()? {return elg ; } } // ---- All the elements that can make up a pattern void PatternElement(ElementGroup elg) : { Element el = null ; Constraint c = null ; } { ( //LOOKAHEAD(2) -- Needed if no FILTER keyword TriplePatternList(elg) | el = OptionalGraphPattern() { elg.addElement(el) ; } | LOOKAHEAD(3) // LOOKAHEAD needed to distinguish nested graph patterns // (plain {} in a group) {} UNION {} el = UnionGraphPattern() { elg.addElement(el) ; } | el = GraphPattern() { elg.addElement(el) ; } | el = GraphGraphPattern() { elg.addElement(el) ; } | el = UnsaidGraphPattern() { elg.addElement(el) ; } | el = Constraint() { elg.addElement(el) ; } ) } // ---- Definitions of each pattern element Element OptionalGraphPattern() : { Element el ; } { el = GraphPattern() { return new ElementOptional(el) ; } } Element GraphGraphPattern() : { Element el ; Node n ;} { ( LOOKAHEAD(2) el = GraphPattern() { el = new ElementNamedGraph(el) ; } | n = VarOrURI() el = GraphPattern() { el = new ElementNamedGraph(n, el) ; } ) { return el ; } } Element UnionGraphPattern() : { Element el = null ; ElementUnion el2 = null ; } { el = GraphPattern() ( { if ( el2 == null ) { el2 = new ElementUnion() ; el2.addElement(el) ; } } el = GraphPattern() { el2.addElement(el) ; } )* { return (el2==null)? el : el2 ; } } Element UnsaidGraphPattern() : { Element el ; } { el = GraphPattern() { el = new ElementUnsaid(el) ; return el ; } } Element Constraint() : { Constraint c ; } { c = Expression() { ElementConstraints ec = new ElementConstraints() ; ec.addConstraint(c) ; return ec ; } } // -------- Construct patterns Template ConstructTemplate() : { Template t ; } { { TemplateGroup g = new TemplateGroup() ; } TripleTemplate(g) ( LOOKAHEAD(2) TripleTemplate(g) )* ()? { return g ; } } void TripleTemplate(TripleCollector acc) : {} { // Lookahead over all the tokens making up a literal : lexicial ^^datatype LOOKAHEAD(3) Reification(null, acc) | TriplesBySubject(acc) } // -------- Triple patterns with property and object lists // The grammar construct between the DOTs void TriplePatternList(TripleCollector acc) : { } { // Lookahead over all the tokens making up a literal : lexicial ^^datatype LOOKAHEAD(3) Reification(null, acc) | TriplesBySubject(acc) } void TriplesBySubject(TripleCollector acc) : { Node s ; } { LOOKAHEAD(2) // Find [] s = VarOrTerm() PropertyList(s, acc) | s = Collection(acc) (PropertyList(s, acc))? | s = BlankNodePropertyList(acc) (PropertyList(s, acc))? } void PropertyList(Node s, TripleCollector acc) : { Node p ; } { p = VarOrURI() ObjectList(s, p, acc) ( LOOKAHEAD(2) PropertyList(s, acc) )* | Reification(s, acc) } void ObjectList(Node s, Node p, TripleCollector acc): { Node o ; } { (LOOKAHEAD(2) o = VarOrTerm() | o = Collection(acc) | o = BlankNodePropertyList(acc) ) // Not ideal order : better to alloc bNode. insert then do prop list { Triple t = new Triple(s,p,o) ; acc.addTriple(t) ; } ( LOOKAHEAD(2) ObjectList(s, p, acc ) ) * } Node BlankNodePropertyList(TripleCollector acc) : { } { { Node n = createBNode() ; } PropertyList(n, acc) { return n ; } } Node Reification(Node id, TripleCollector acc) : { Node s , p , o ; } { { if ( id == null ) id = createBNode() ; } // For generality, should be VarOrTermOrList for s and o // Also should allow reification like lists or bNodes[] "<<" s = VarOrTerm() p = VarOrURI() o = VarOrTerm() ">>" { reification(acc, id, s, p, o) ; return id ; } } // -------- Items in a graph pattern or template Node RDFTerm() : { Node n = null ; } { ( n = BlankNode() | n = Constant() ) { return n ; } } // Node VarOrTermOrList(TripleCollector acc) : { Node n ; } // { // n = VarOrTerm() { return n ; } // | // n = Collection(elg) { return n ; } // } Node VarOrTerm() : {Node n = null ; } { ( n = Var() | n = RDFTerm() ) { return n ; } } // Property + DESCRIBE Node VarOrURI() : {Node n = null ; } { ( n = Var() | n = URI() ) { return n ; } } Node VarOrLiteral() : { Node n = null ; } { n = Var() { return n ; } | n = Constant() { return n ; } } Node Collection(TripleCollector acc) : { Node n ; } { n = CollectionTail(null, acc) { return n ; } } Node CollectionTail(Node previous, TripleCollector acc) : { Node n ; } { ( n = CollectionElement(acc) { Node item = Node.createAnon() ; if ( previous != null ) listRest(acc, previous, item) ; listFirst(acc, item, n) ; } CollectionTail(item, acc) { return item ; } | { if ( previous != null ) listRest(acc, previous, listNil()) ; return listNil() ; } ) } Node CollectionElement(TripleCollector acc) : { Node n ; } { LOOKAHEAD(2) // [] n = VarOrTerm() { return n ; } | n = Collection(acc) { return n ; } | n = BlankNodePropertyList(acc) { return n ; } } Node Var() : { Token t ;} { t = { return Node.createVariable(t.image.substring(1)) ; } } /******************************************************************/ // Constraint syntax follows. // **** Debug point Expr Expression() : { Expr n ; } { n = ConditionalOrExpression() { return n ; } } Expr ConditionalOrExpression() : { Expr n1, n2 ; } { n1 = ConditionalXorExpression() ( n2 = ConditionalXorExpression() { n1 = new E_LogicalOr(n1,n2) ; } )* { return n1 ; } } Expr ConditionalXorExpression() : { Expr n ; } { n = ConditionalAndExpression() // Skip this //( ConditionalAndExpression() { return n ; } } Expr ConditionalAndExpression() : { Expr n1, n2 ;} { n1 = ValueLogical() ( n2 = ValueLogical() { n1 = new E_LogicalAnd(n1,n2) ; } )* { return n1 ; } } // End of boolean expressions /******************************************************************/ Expr ValueLogical() : { Expr n ; } { n = RelationalExpression() { return n ; } } Expr RelationalExpression() : { Expr n1, n2 ; } { n1 = NumericExpression() ( n2 = NumericExpression() { n1 = new E_Equal(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_NotEqual(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_LessThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_LessThanOrEqual(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThanOrEqual(n1,n2) ; } )? { return n1 ; } } /******************************************************************/ // **** Debug point Expr NumericExpression () : { Expr n ; } { n = AdditiveExpression() { return n ; } } Expr AdditiveExpression() : { Expr n1,n2 ; } { n1 = MultiplicativeExpression() ( n2 = MultiplicativeExpression() { n1 = new E_Add(n1, n2) ; } | n2 = MultiplicativeExpression() { n1 = new E_Subtract(n1, n2) ; } )* { return n1 ; } } Expr MultiplicativeExpression() : { Expr n1,n2 ; } { n1 = UnaryExpression() ( n2 = UnaryExpression() { n1 = new E_Multiply(n1, n2) ; } | n2 = UnaryExpression() { n1 = new E_Divide(n1, n2) ; } // | n2 = UnaryExpression() // { n1 = new E_Modulus(n1, n2) ; } )* { return n1 ; } } Expr UnaryExpression() : { Expr n ; } { ( | ) n = CallExpression() { return new E_LogicalNot(n) ; } | n = CallExpression() { return new E_UnaryPlus(n) ; } | n = CallExpression() { return new E_UnaryMinus(n) ; } | n = CallExpression() { return n ; } } Expr CallExpression() : { Expr expr ; Node gn ; } { expr = Expression() { return new E_Str(expr) ; } | gn = VarOrLiteral() { return new E_Lang(asExpr(gn)) ; } | expr = Expression() { return new E_Datatype(expr) ; } | { String s1 = null ; String s2 = null ; } expr = Expression() s1 = String() ( s2 = String() )? { return new E_Regexp(expr, s1, s2) ; } | gn = Var() { return new E_Bound(new NodeVar(gn)) ; } | gn = Var() { return new E_IsURI(asExpr(gn)) ; } | gn = Var() { return new E_IsBlank(asExpr(gn)) ; } | gn = Var() { return new E_IsLiteral(asExpr(gn)) ; } | LOOKAHEAD(2) expr = FunctionCall() { return expr ; } // // cast and function // { Node uri ; } uri = URI() n = Expression() // { return new E_Function(uri, n) ; } | expr = PrimaryExpression() { return expr ; } } Expr PrimaryExpression() : { Expr expr ; Node gn ; } { ( gn = Var() { return asExpr(gn) ; } | gn = Constant() { return asExpr(gn) ; } | expr = Expression() { return expr ; } ) } Expr FunctionCall() : { Node fname ; List a ; } { fname = URI() a = ArgList() { Expr e = new E_Function(fname.getURI(), a) ; return e ; } } List ArgList() : { Node n ; } { { List args = new ArrayList() ; } ( n = VarOrLiteral() { args.add(asExpr(n)) ; } ( n = VarOrLiteral() { args.add(asExpr(n)) ; } )* )? { return args ; } } /******************************************************************/ Node Constant() : { Node n ; } { n = URI() { return n ; } | n = NumericLiteral() { return n ; } | n = RDFLiteral() { return n ; } | n = BooleanLiteral() { return n ; } } Node NumericLiteral() : { Node n ; } { n = Integer() { return n ; } | n = FloatingPoint() { return n ; } } Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; Node uri = null ; } ( t = { lang = stripChars(t.image, 1) ; } ) ? ( uri = URI() )? { return makeNode(lex, lang, uri) ; } } Node BooleanLiteral() : {} { { return XSD_TRUE ; } | { return XSD_FALSE ; } } String String() : { Token t ; String lex = null ; } { ( t = | t = ) { lex = stripQuotes(t.image) ; lex = unescape(lex) ; return lex ; } } Node URI() : { Node n ; } { n = QuotedURIref() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t ; } { ( t = { return Node.createURI(fixupQName(t)) ; } | t = { return Node.createURI(fixupQName(t)) ; } ) } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image) ; } | { return createBNode() ; } } Node QuotedURIref() : { Token t ; } { t = { String s = stripQuotes(t.image) ; s = fixupURI(s) ; return Node.createURI(s) ; } } Node Integer() : { Token t ; } { t = { return makeNodeInteger(t.image) ; } // | // Looses the lexical form. // t = { return makeNodeInteger(integerValue(t.image)) ; } } Node FloatingPoint() : { Token t ; } { t = { return makeNodeDouble(t.image) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } SPECIAL_TOKEN : { } // Main tokens */ TOKEN: { // Includes # for relative URIs |"#"|"_") (~[">"," "])* ">" > // The QName() rule uses both of these - PrefixDecl() just the first | )? ":" > | )? ":" (|)? > | |) > | |) > | ()+("-" ()+)? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> // Can't write a useful pattern token because the delimiter is variable. // | } // ------------------------------------------------- // LITERALS TOKEN : { // No sign - that is done in the grammar. < INTEGER_10: /*(["-","+"])?*/ > // | < INTEGER_16: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+ > | < FLOATING_POINT: //(["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* ()? | "." (["0"-"9"])+ ()? | (["0"-"9"])+ //NB Must have exponent for this case. > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "\"" > | < DIGITS: (["0"-"9"])+> } // ------------------------------------------------- // Keyworks : includes operators that are words and should be // before general things like IDENTIFIER which swallow almost // anything TOKEN [IGNORE_CASE] : { // Prologue < BASE: "base" > | < PREFIX: "prefix" > // Result forms | < SELECT: "select" > | < DISTINCT: "distinct" > | < LIMIT: "limit" > | < DESCRIBE: "describe" > | < CONSTRUCT: "construct" > | < ASK: "ask" > // Dataset | < NAMED: "named" > | < FROM: "from" > // Graph pattern operators | < WHERE: "where" > | < AND: "and" > | < GRAPH: "graph" > | < OR: "or" > | < UNION: "union" > | < OPTIONAL: "optional" > | < UNSAID: "unsaid" > | < FILTER: "filter" > // Expression operators | < BOUND: "bound" > | < STR: "str" > | < DTYPE: "datatype" > | < LANG: "lang" > | < IS_URI: "isURI" > | < IS_BNODE: "isBlank" > | < IS_LITERAL: "isLiteral" > | < REGEXP: "regexp" > | | } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < EQ: "=" > | < NE: "!=" > | < GT: ">" > | < LT: "<" > | < LE: "<=" > // Maybe: | "=>" > | < GE: ">=" > // Maybe: | "=<" > | < BANG: "!" > | < TILDE: "~" > | < COLON: ":" > | < SC_OR: "||" > | < SC_AND: "&&" > // | < INCR: "++" > // | < DECR: "--" > | < PLUS: "+" > | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > //| < AMP: "&" > //| < REM: "%" > //| < LSHIFT: "<<" > //| < RSIGNEDSHIFT: ">>" > // | < STR_MATCH: ("=~"|"~~") > // | < STR_NMATCH: "!~"> | < DATATYPE: "^^"> | < AT: "@"> } // Notes: // XML 1.1 http://www.w3.org/TR/xml11/ // XML Namespces 1.1 http://www.w3.org/TR/xml-names11/ // Prefix ':' LocalPart // Prefix is an NCName // LocalPart is an NCName // // // An XML Name, minus the ":" // NCName ::= NCNameStartChar NCNameChar* // NCNameChar ::= NameChar - ':' // NCNameStartChar ::= NameStartChar - ':' // NameChar and NameSartChar defined in XML 1.1 // NameStartChar := ":" | [A-Z] | "_" | [a-z] | // was: [#xC0-#x2FF] | // now: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x370-#x37D] | [#x37F-#x1FFF] | // [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | // [#x3001-#xD7FF] | [#xF900-#xEFFFF] // NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | // [#x0300-#x036F] | [#x203F-#x2040] // TOKEN: // { // // The use of qnames with starting "_:" is confusing. // // They are legal XML 1.1 but look like bNode ids from // // some RDF syntaxes. // // // This can't be purely done by tokenizing without states. // // // // XML 1.1 NCNameStartChar // <#NCCHAR1: // ["A"-"Z"] | "_" |["a"-"z"] | // ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | // ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | // ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | // ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > // | // // Full NCNAME // ( | "." | "-" | ["0"-"9"] | "\u00B7" )* > // } // SPARQL addition: // "Prefix" can't start with "_" because some RDF // syntaxes use "_:" // for bNodes. TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > | // Full NCNAME, putting back the "_" <#NCCHAR_FULL: ( | "_" | "." | "-" | ["0"-"9"] | "\u00B7" )> | // Does not allow leading "_" <#NCNAME1: ()* > | // With a leading "_" <#NCNAME2: "_" ()* > } /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */