// SPARQL Syntax // // Author: Andy Seaborne andy.seaborne@hp.com // // (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP // All rights reserved. // See end of file for details. // Constraint expression is derived from Java : // example java1.2-a.jj grammer in JavaCC distribution // Much modifed over time. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode // CHECK JAVA_UNICODE_ESCAPE = true; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(SPARQLParser) /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * All rights reserved. */ package com.hp.hpl.jena.query.parser.sparql ; import java.util.* ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.query.* ; import com.hp.hpl.jena.query.core.* ; import com.hp.hpl.jena.query.expr.* ; import com.hp.hpl.jena.rdf.model.AnonId ; public class SPARQLParser extends SPARQLParserBase { public void setQuery(Query query) { super.query = query ; } } PARSER_END(SPARQLParser) void CompilationUnit(): {} { // The tests for trailing junk // but does not work for "//..." Query() } // **** Debug point void Query() : { Element el ; } { Prolog() ( SelectClause() | ConstructClause() | DescribeClause() | AskClause() ) ( GraphClause() )* ( NamedGraphClause() )* ( WhereClause() ) ? ( LimitClause() ) ? } void Prolog() : {} { ( BaseDecl() ) ? ( PrefixDecl() )* } void SelectClause() : { Node v ; Token t = null ;} { ( LOOKAHEAD(3) // Need to hop "SELECT" and DISTINCT to look at ?var or STAR (t = )? ) { query.setType(Query.QueryTypeSelect) ; query.setDistinct( t != null ) ; } } void DescribeClause() : { Node n ; } { { query.setType(Query.QueryTypeDescribe) ; } ( LOOKAHEAD(2) // Extra lookahead needs for URIs or Vars. ( n = VarOrURI() { query.addDescribeNode(n) ; } )+ | ) } void ConstructClause() : { Element el ; } { { query.setType(Query.QueryTypeConstruct) ; } ( LOOKAHEAD(2) el = ConstructPattern() { query.setConstructPattern(el) ; } | // Fixup later ) } void AskClause() : {} { { query.setType(Query.QueryTypeAsk) ; } } void GraphClause() : {Node n ;} { (n = SourceSelector() { query.addGraphURI(n.getURI()) ; } )+ } void NamedGraphClause() : {Node n ;} { ( n = SourceSelector() { query.addNamedGraphURI(n.getURI()) ; } )+ } Node SourceSelector() : { Node n ; } { n = URI() { if ( ! n.isURI() ) throw new QueryException("Not a URI: "+n.toString()) ; return n ; } } void WhereClause() : { Element el ; } { el = GraphPattern() { query.setQueryElement(el) ; } } void LimitClause() : { long v ; } { v = Integer() { query.setLimit(v) ; } } // ---- General Graph Pattern // Top of graph patterns Element GraphPattern() : { Element el ; } { el = PatternGroup() { return el ; } } // A group made up of a list of triple patterns and other things. // No outer brackets here. Element PatternGroup() : {Element el ; } { el = GraphOrPattern() { return el ; } } Element GraphOrPattern() : { Element el = null ; ElementUnion el2 = null ; } { el = GraphAndPattern() ( { if ( el2 == null ) { el2 = new ElementUnion() ; el2.addElement(el) ; } } el = GraphAndPattern() { el2.addElement(el) ; } )* { return (el2==null)? el : el2 ; } } // TIDY: Check all ()* productions and convert to ()+ Element GraphAndPattern() : { Element el ; } { { ElementGroup eg = new ElementGroup() ; } ( el = PatternElement() { eg.addElement(el) ; } )+ { return eg ; } } // A single thing -- one of // + a single triple pattern // + A group { ... } // + OPTIONAL // + GRAPH // + UNSAID - removed // + Constraint Element PatternElement() : { Element el = null ; Constraint c = null ; } { ( LOOKAHEAD(3) el = TriplePattern() | el = GroupGraphPattern() | el = NamedGraphPattern() | el = OptionalGraphPattern() // | // el = UnsaidGraphPattern() | el = ConstraintPattern() ) { return el ; } } Element PatternElementAsGroup() : { Element el ; } { el = PatternElement() { if ( ! ( el instanceof ElementGroup ) ) { ElementGroup elg = new ElementGroup() ; elg.addElement(el) ; el = elg ; } return el ; } } // ---- The various components of a graph pattern. Element GroupGraphPattern() : { Element el ; } { el = PatternGroup() { return el ; } } Element NamedGraphPattern() : { Element el ; Node n ;} { ( LOOKAHEAD(2) el = PatternElementAsGroup() { el = new ElementNamedGraph(el) ; } | n = VarOrURI() el = PatternElementAsGroup() { el = new ElementNamedGraph(n, el) ; } ) { return el ; } } // Element UnsaidGraphPattern() : { Element el ; } // { // el = PatternElementAsGroup() // { el = new ElementUnsaid(el) ; // return el ; // } // } Element OptionalGraphPattern() : { Element el ; } { ( el = PatternElementAsGroup() | el = PatternGroup() ) { return new ElementOptional(el) ; } } Element ConstraintPattern() : { Constraint c ; } { c = Expression() { ElementConstraints ec = new ElementConstraints() ; ec.addConstraint(c) ; return ec ; } } // ---- Triple Patterns // Merge with next as rename as TriplePattern Element TriplePattern() : {Node s; Node p ; Node o ; } { s = VarOrURI() p = VarOrURI() o = VarOrLiteral() { Triple t = new Triple(s,p,o) ; ElementTriplePattern e = new ElementTriplePattern() ; e.setTriple(t) ; return e ; } } // -------- -- Construct patterns Element ConstructPattern() : { Element el ; } { // A sequence of elements without needing delimiters { ElementGroup elg = new ElementGroup() ; } ( el = ConstructElement() { elg.addElement(el) ;} )+ { return elg ; } } // Element ConstructGroup() : { Element el ; } // { // { ElementGroup elg = new ElementGroup() ; } // ( el = ConstructElement() { elg.addElement(el) ;} )+ // { return elg ; } // } Element ConstructElement() : { Element el ; } { el = TripleTemplate() { return el ; } } // Like a triple pattern but with possible bNodes. Element TripleTemplate() : {Node s; Node p ; Node o ; } { ( s = VarOrURI() | s = BNode() ) p = VarOrURI() ( o = VarOrLiteral() | o = BNode() ) { Triple t = new Triple(s,p,o) ; ElementTriplePattern e = new ElementTriplePattern() ; e.setTriple(t) ; return e ; } } // ---- Node VarOrURI() : {Node n = null ; } { ( n = VarAsNode() | n = URI() ) { return n ; } } Node VarOrLiteral() : { Node n = null ; NodeValue v = null ; } { ( n = VarAsNode() { return n ; } | v = Literal() { return v.getNode() ; } ) } Node VarAsNode() : { Token t ;} { t = { return Node.createVariable(t.image.substring(1)) ; } } ExprNode VarAsExpr() : { Token t ;} { t = { return new NodeVar(t.image.substring(1)) ; } } void BaseDecl() : { Node n ; } { n = QuotedURI() { query.setBaseURI(n.getURI()) ; } } void PrefixDecl() : { Token t ; Node n ; } { ( LOOKAHEAD(2) t = n = QuotedURI() // Has the trailing ":" { query.setPrefix(fixupPrefix(t), n.getURI()) ; } | n = QuotedURI() { query.setPrefix("", n.getURI()) ; } ) } // ------------------------------------------------- // Constraint syntax follows. // **** Debug point ExprNode Expression() : { ExprNode n ; } { n = ConditionalOrExpression() { return n ; } } ExprNode ConditionalOrExpression() : { ExprNode n1, n2 ; } { n1 = ConditionalXorExpression() ( n2 = ConditionalXorExpression() { n1 = new E_LogicalOr(n1,n2) ; } )* { return n1 ; } } ExprNode ConditionalXorExpression() : { ExprNode n ; } { n = ConditionalAndExpression() // Skip this //( ConditionalAndExpression() { return n ; } } ExprNode ConditionalAndExpression() : { ExprNode n1, n2 ;} { n1 = ValueLogical() ( n2 = ValueLogical() { n1 = new E_LogicalAnd(n1,n2) ; } )* { return n1 ; } } // End of boolean expressions // (except unary NOT which is very tightly binding) // PatternElements that are not operations on boolean terms. ExprNode ValueLogical() : { ExprNode n ; } { n = StringEqualityExpression() { return n ; } } ExprNode StringEqualityExpression() : { ExprNode n1,n2; NodePatternLiteral r ; } { n1 = NumericalLogical() ( n2 = NumericalLogical() { n1 = new E_StringEqual(n1, n2) ; } | n2 = NumericalLogical() { n1 = new E_StringNotEqual(n1, n2) ; } | r = PatternLiteral() { n1 = new E_StringMatch(n1, r) ; } | r = PatternLiteral() { n1 = new E_StringNotMatch(n1, r) ; } )* { return n1 ; } } // Expressions that involve comparing numbers. ExprNode NumericalLogical() : { ExprNode n ; } { n = EqualityExpression() { return n ; } } ExprNode EqualityExpression() : { ExprNode n1, n2 ; } { n1 = RelationalExpression() ( n2 = RelationalExpression() { n1 = new E_Equal(n1,n2) ; } | n2 = RelationalExpression() { n1 = new E_NotEqual(n1,n2) ; } )? { return n1 ; } } ExprNode RelationalExpression() : { ExprNode n1, n2 ; } { n1 = NumericExpression() ( n2 = NumericExpression() { n1 = new E_LessThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_LessThanOrEqual(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThanOrEqual(n1,n2) ; } )? { return n1 ; } } // **** Debug point ExprNode NumericExpression () : { ExprNode n ; } { n = AdditiveExpression() { return n ; } } ExprNode AdditiveExpression() : { ExprNode n1,n2 ; } { n1 = MultiplicativeExpression() ( n2 = MultiplicativeExpression() { n1 = new E_Add(n1, n2) ; } | n2 = MultiplicativeExpression() { n1 = new E_Subtract(n1, n2) ; } )* { return n1 ; } } ExprNode MultiplicativeExpression() : { ExprNode n1,n2 ; } { n1 = UnaryExpression() ( n2 = UnaryExpression() { n1 = new E_Multiply(n1, n2) ; } | n2 = UnaryExpression() { n1 = new E_Divide(n1, n2) ; } | n2 = UnaryExpression() { n1 = new E_Modulus(n1, n2) ; } )* { return n1 ; } } ExprNode UnaryExpression() : { ExprNode n ; } { ( | ) n = BuiltinExpression() { return new E_LogicalNot(n) ; } | n = BuiltinExpression() { return new E_UnaryPlus(n) ; } | n = BuiltinExpression() { return new E_UnaryMinus(n) ; } | n = BuiltinExpression() { return n ; } } ExprNode BuiltinExpression() : { ExprNode n ; } { n = VarAsExpr() { return new E_Bound(n) ; } | n = VarOrLiteralAsExpr() { return new E_Str(n) ; } | n = VarOrLiteralAsExpr() { return new E_Lang(n) ; } | n = VarOrLiteralAsExpr() { return new E_Datatype(n) ; } | n = VarOrLiteralAsExpr() { return new E_IsURI(n) ; } | n = VarOrLiteralAsExpr() { return new E_IsBlank(n) ; } | n = VarOrLiteralAsExpr() { return new E_IsLiteral(n) ; } | LOOKAHEAD(2) { Node uri ; } uri = URI() n = Expression() { return new E_Cast(uri, n) ; } | n = PrimaryExpression() { return n ; } } ExprNode PrimaryExpression() : {ExprNode n ; } { ( n = VarAsExpr() | n = Literal() | n = FunctionCall() | n = Expression() ) { return n ; } } ExprNode FunctionCall() : { Node fname ; List a ; } { fname = URI() a = ArgList() { ExprNode e = new E_Function(fname.getURI(), a) ; return e ; } } List ArgList() : { ExprNode e ; } { { List args = new ArrayList() ; } ( e = VarOrLiteralAsExpr() { args.add(e) ; } ( e = VarOrLiteralAsExpr() { args.add(e) ; } )* )? { return args ; } } ExprNode VarOrLiteralAsExpr() : { ExprNode n ; } { ( n = Literal() { return n ; } | n = VarAsExpr() { return n ; } ) } // Literal terms (as in query literals - any value in the query) // Not "RDF literals". NodeValue Literal() : { NodeValue n ; } { { Node gn ; } gn = URI() { return NodeValue.makeNode(gn) ; } | n = NumericLiteral() { return n ; } | n = TextLiteral() { return n ; } } NodeValue NumericLiteral() : { long v; double d ; } { v = Integer() { return NodeValue.makeInt(v) ; } | d = FloatingPoint() { return NodeValue.makeDouble(d) ; } } NodeValue TextLiteral() : { Token t ; String lex = null ; } { ( t = | t = ) { lex = stripQuotes(t.image) ; lex = unescape(lex) ; } // Optional lang tag and datatype. { String lang = null ; Node uri = null ; } ( t = { lang = stripChars(t.image, 1) ; } ) ? ( uri = URI() )? { return NodeValue.makeNode(lex, lang, uri) ; } } // Effectively turn off tokenizing. // NB : Layout (for HTML-izeation scripts) TOKEN : { } NodePatternLiteral PatternLiteral() : {} { { int state = token_source.curLexState ; token_source.SwitchTo(READ_REGEX) ; String pattern = regexPattern(this) ; String modifiers = regexModifiers(this) ; } // Pattern language is: [m]/pattern/[i][m][s][x] // Note the leading "m" is optional because // is // often in conflict with URIs so the convenience // of, say "!", as a leading marker is good. { token_source.SwitchTo(state) ; return new NodePatternLiteral(pattern, modifiers) ; } } // Node URL() : { Node n ; } // { // //QuotedURI() // n = URI() { return n ; } // } Node URI() : { Node n ; } { n = QuotedURI() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t = null ; } { ( t = { return Node.createURI(fixupQName(t)) ; } | t = { return Node.createURI(fixupQName(t)) ; } ) } Node BNode() : { Token t = null ; } { t = { return Node.createAnon(new AnonId(t.image)) ; } } Node QuotedURI() : { Token t ; } { t = { String s = stripQuotes(t.image) ; s = fixupURI(s) ; return Node.createURI(s) ; } } long Integer() : { Token t ; } { t = { return integerValue(t.image) ; } | t = { return integerValue(t.image) ; } } double FloatingPoint() : { Token t ; } { t = { return doubleValue(t.image) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } SPECIAL_TOKEN : { } // Main tokens */ TOKEN : { // Includes # for relative URIs |"#"|"_") (~[">"," "])* ">" > // The QName() rule uses both of these - PrefixDecl() just the first | )? ":" > | )? ":" (|)? > | |) > | |) > | ()+("-" ()+)? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> // Can't write a useful pattern token because the delimiter is variable. // | } // ------------------------------------------------- // LITERALS TOKEN : { // No sign - that is done in the grammar. // Otherwise 1+2 tends to get "1" and "+2" (maximal tokens) < INTEGER_10: /*(["-","+"])?*/ > | < INTEGER_16: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+ > | < FLOATING_POINT: //(["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* ()? | "." (["0"-"9"])+ ()? | (["0"-"9"])+ //NB Must have exponent for this case. > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "\"" > | < DIGITS: (["0"-"9"])+> } // ------------------------------------------------- // Keyworks : includes operators that are words and should be // before general things like IDENTIFIER which swallow almost // anything TOKEN [IGNORE_CASE] : { // Prologue < BASE: "base" > | < PREFIX: "prefix" > // Result forms | < SELECT: "select" > | < DISTINCT: "distinct" > | < LIMIT: "limit" > | < DESCRIBE: "describe" > | < CONSTRUCT: "construct" > | < ASK: "ask" > // Dataset | < WITH: "with" > | < FROM: "from" > // Graph pattern operators | < WHERE: "where" > | < AND: "and" > | < GRAPH: "graph" > | < OR: "or" > | < UNION: "union" > | < OPTIONAL: "optional" > //| < UNSAID: "unsaid" > // Expression operators | < STR_EQ: "eq" > | < STR_NE: "ne" > | < BOUND: "bound" > | < STR: "str" > | < DTYPE: "datatype" > | < LANG: "lang" > | < IS_URI: "isURI" > | < IS_BNODE: "isBlank" > | < IS_LITERAL: "isLiteral" > | < AS: "as"> } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < GT: ">" > | < LT: "<" > | < BANG: "!" > | < TILDE: "~" > | < HOOK: "?" > | < COLON: ":" > | < EQ: "==" > | < NEQ: "!=" > | < LE: "<=" > // Maybe: | "=>" > | < GE: ">=" > // Maybe: | "=<" > | < SC_OR: "||" > | < SC_AND: "&&" > //| < INCR: "++" > //| < DECR: "--" > | < PLUS: "+" > | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > | < AMP: "&" > | < REM: "%" > | < LSHIFT: "<<" > | < RSIGNEDSHIFT: ">>" > // The tokens for string EQ and string NE are done as keywords | < STR_MATCH: ("=~"|"~~") > | < STR_NMATCH: "!~"> | < DATATYPE: "^^"> | < AT: "@"> } // Notes: // XML 1.1 http://www.w3.org/TR/xml11/ // XML Namespces 1.1 http://www.w3.org/TR/xml-names11/ // Prefix ':' LocalPart // Prefix is an NCName // LocalPart is an NCName // // // An XML Name, minus the ":" // NCName ::= NCNameStartChar NCNameChar* // NCNameChar ::= NameChar - ':' // NCNameStartChar ::= NameStartChar - ':' // NameChar and NameSartChar defined in XML 1.1 // NameStartChar := ":" | [A-Z] | "_" | [a-z] | // was: [#xC0-#x2FF] | // now: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x370-#x37D] | [#x37F-#x1FFF] | // [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | // [#x3001-#xD7FF] | [#xF900-#xEFFFF] // NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | // [#x0300-#x036F] | [#x203F-#x2040] // TOKEN: // { // // The use of qnames with starting "_:" is confusing. // // They are legal XML 1.1 but look like bNode ids from // // some RDF syntaxes. // // // This can't be purely done by tokenizing without states. // // // // XML 1.1 NCNameStartChar // <#NCCHAR1: // ["A"-"Z"] | "_" |["a"-"z"] | // ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | // ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | // ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | // ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > // | // // Full NCNAME // ( | "." | "-" | ["0"-"9"] | "\u00B7" )* > // } // SPARQL addition: // "Prefix" can't start with "_" because some RDF // syntaxes use "_:" // for bNodes. TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > | // Full NCNAME, putting back the "_" <#NCCHAR_FULL: ( | "_" | "." | "-" | ["0"-"9"] | "\u00B7" )> | // Does not allow leading "_" <#NCNAME1: ()* > | // With a leading "_" <#NCNAME2: "_" ()* > } /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */