// SPARQL Syntax // // Author: Andy Seaborne andy.seaborne@hp.com // // (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP // All rights reserved. // See end of file for details. // Constraint expression is derived from Java : // example java1.2-a.jj grammer in JavaCC distribution // Much modifed over time. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode // CHECK JAVA_UNICODE_ESCAPE = true; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(SPARQLaltParser) /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * All rights reserved. */ package com.hp.hpl.jena.query.parser.sparql_alt ; import java.util.* ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.query.* ; import com.hp.hpl.jena.query.core.* ; import com.hp.hpl.jena.query.expr.* ; public class SPARQLaltParser extends SPARQLaltParserBase { } PARSER_END(SPARQLaltParser) void CompilationUnit(): {} { // The tests for trailing junk // but does not work for "//..." Query() } // **** Debug point void Query() : { Element el ; } { Prolog() ( SelectClause() | ConstructClause() | DescribeClause() | AskClause() ) ( GraphClause() )* ( NamedGraphClause() )* ( WhereClause() ) ? ( LimitClause() ) ? } void Prolog() : {} { ( BaseDecl() ) ? ( PrefixDecl() )* } void SelectClause() : { Node v ; Token t = null ;} { ( LOOKAHEAD(3) // Need to hop "SELECT" and DISTINCT to look at ?var or STAR (t = )? { getQuery().setQueryResultStar(true) ; } ) { getQuery().setType(Query.QueryTypeSelect) ; getQuery().setDistinct( t != null ) ; } } void DescribeClause() : { Node n ; } { { getQuery().setType(Query.QueryTypeDescribe) ; } ( LOOKAHEAD(2) // Extra lookahead needs for URIs or Vars. ( n = VarOrURI() { getQuery().addDescribeNode(n) ; } )+ { getQuery().setQueryResultStar(false) ; } | { getQuery().setQueryResultStar(true) ; } ) } void ConstructClause() : { Template t ; } { { getQuery().setType(Query.QueryTypeConstruct) ; } ( LOOKAHEAD(2) t = ConstructTemplate() { getQuery().setQueryResultStar(false) ; getQuery().setConstructTemplate(t) ; } | { getQuery().setQueryResultStar(true) ; } ) } void AskClause() : {} { { getQuery().setType(Query.QueryTypeAsk) ; } } void GraphClause() : {Node n ;} { (n = SourceSelector() { getQuery().addGraphURI(n.getURI()) ; } )+ } void NamedGraphClause() : {Node n ;} { ( n = SourceSelector() { getQuery().addNamedGraphURI(n.getURI()) ; } )+ } Node SourceSelector() : { Node n ; } { n = URIref() { if ( ! n.isURI() ) throw new QueryException("Not a URI: "+n.toString()) ; return n ; } } void WhereClause() : { Element el ; } { el = GraphPattern() { getQuery().setQueryElement(el) ; } } void LimitClause() : { Token t ; } { t = { getQuery().setLimit(integerValue(t.image)) ; } } // ---- General Graph Pattern // Top of graph patterns Element GraphPattern() : { Element el ; } { el = PatternGroup() { return el ; } } // A group made up of a list of triple patterns and other things. // No outer brackets here. Element PatternGroup() : {Element el ; } { el = GraphOrPattern() { return el ; } } Element GraphOrPattern() : { Element el = null ; ElementUnion el2 = null ; } { el = GraphAndPattern() ( { if ( el2 == null ) { el2 = new ElementUnion() ; el2.addElement(el) ; } } el = GraphAndPattern() { el2.addElement(el) ; } )* { return (el2==null)? el : el2 ; } } // TIDY: Check all ()* productions and convert to ()+ Element GraphAndPattern() : { Element el ; } { { ElementGroup eg = new ElementGroup() ; } ( PatternElement(eg) )+ { return eg ; } } // A single thing -- one of // + a single triple pattern // + A group { ... } // + OPTIONAL // + GRAPH // + UNSAID - removed // + Constraint void PatternElement(ElementGroup elg) : { Element el = null ; Constraint c = null ; } { ( LOOKAHEAD(3) TriplePattern(elg) | el = GroupGraphPattern() { elg.addElement(el) ; } | el = NamedGraphPattern() { elg.addElement(el) ; } | el = OptionalGraphPattern() { elg.addElement(el) ; } // | // el = UnsaidGraphPattern() { elg.addElement(el) ; } | el = ConstraintPattern() { elg.addElement(el) ; } // LOOKAHEAD(3?) between constraints and TriplePattern needed // el = ConstraintPattern() // el = ConstraintPattern() ) } Element PatternElementAsGroup() : { Element el ; } { { ElementGroup elg = new ElementGroup() ; } ( TriplePattern(elg) | el = GroupGraphPattern() { elg.addElement(el) ; } ) { return elg ; } // { ElementGroup elg = new ElementGroup() ; } // PatternElement(elg) ; // { if ( ! ( el instanceof ElementGroup ) ) // { ElementGroup elg = new ElementGroup() ; elg.addElement(el) ; el = elg ; } // return elg ; // } } // ---- The various components of a graph pattern. Element GroupGraphPattern() : { Element el ; } { el = PatternGroup() { return el ; } } Element NamedGraphPattern() : { Element el ; Node n ;} { ( LOOKAHEAD(2) el = PatternElementAsGroup() { el = new ElementNamedGraph(el) ; } | n = VarOrURI() el = PatternElementAsGroup() { el = new ElementNamedGraph(n, el) ; } ) { return el ; } } // Element UnsaidGraphPattern() : { Element el ; } // { // el = PatternElementAsGroup() // { el = new ElementUnsaid(el) ; // return el ; // } // } Element OptionalGraphPattern() : { Element el ; } { el = PatternElementAsGroup() { return new ElementOptional(el) ; } } Element ConstraintPattern() : { Constraint c ; } { c = Expression() { ElementConstraints ec = new ElementConstraints() ; ec.addConstraint(c) ; return ec ; } } // ---- Triple Patterns // Merge with next as rename as TriplePattern void TriplePattern(TripleCollector acc) : {Node s; Node p ; Node o ; } { s = VarOrURI() p = VarOrURI() o = VarOrLiteral() { Triple t = new Triple(s,p,o) ; acc.addTriple(t) ; } } // -------- -- Construct patterns Template ConstructTemplate() : { Template t ; } { { TemplateGroup g = new TemplateGroup() ; } // There is only one kind of thing in a ConstructTemplate // so we skip any idea of "ConstructTemplate" (all possible single items) ( t = TripleTemplate() { g.add(t) ; } )+ { return g ; } } // Template ConstructGroup() : { Element el ; } // { // { TemplateGroup g = new TemplateGroup() ; } // ( t = ConstructElement() { g.add(t) ;} )+ // { return elg ; } // } // // Template ConstructElement() : { Template t ; } // { // t = TripleTemplate() // { return t ; } // } // Like a triple pattern but with possible bNodes. Template TripleTemplate() : {Node s; Node p ; Node o ; } { ( s = VarOrURI() | s = BNode() ) p = VarOrURI() ( o = VarOrLiteral() | o = BNode() ) { return new TemplateTriple(s,p,o) ; } } // ---- Node VarOrURI() : {Node n = null ; } { ( n = Var() | n = URIref() ) { return n ; } } Node VarOrLiteral() : { Node n = null ; } { n = Var() { return n ; } | n = Literal() { return n ; } } Node Var() : { Token t ;} { t = { return Node.createVariable(t.image.substring(1)) ; } } void BaseDecl() : { Node n ; } { n = QuotedURIref() { getQuery().setBaseURI(n.getURI()) ; } } void PrefixDecl() : { Token t ; Node n ; } { ( LOOKAHEAD(2) t = n = QuotedURIref() // Has the trailing ":" { getQuery().setPrefix(fixupPrefix(t), n.getURI()) ; } | n = QuotedURIref() { getQuery().setPrefix("", n.getURI()) ; } ) } // ------------------------------------------------- // Constraint syntax follows. // **** Debug point Expr Expression() : { Expr n ; } { n = ConditionalOrExpression() { return n ; } } Expr ConditionalOrExpression() : { Expr n1, n2 ; } { n1 = ConditionalXorExpression() ( n2 = ConditionalXorExpression() { n1 = new E_LogicalOr(n1,n2) ; } )* { return n1 ; } } Expr ConditionalXorExpression() : { Expr n ; } { n = ConditionalAndExpression() // Skip this //( ConditionalAndExpression() { return n ; } } Expr ConditionalAndExpression() : { Expr n1, n2 ;} { n1 = ValueLogical() ( n2 = ValueLogical() { n1 = new E_LogicalAnd(n1,n2) ; } )* { return n1 ; } } // End of boolean expressions // (except unary NOT which is very tightly binding) // PatternElements that are not operations on boolean terms. Expr ValueLogical() : { Expr n ; } { n = StringEqualityExpression() { return n ; } } Expr StringEqualityExpression() : { Expr n1,n2; NodePatternLiteral r ; } { n1 = EqualityExpression() ( r = PatternLiteral() { n1 = new E_StringMatch(n1, r) ; } | r = PatternLiteral() { n1 = new E_StringNotMatch(n1, r) ; } )* { return n1 ; } } Expr EqualityExpression() : { Expr n1, n2 ; } { n1 = RelationalExpression() ( n2 = RelationalExpression() { n1 = new E_Equal(n1,n2) ; } | n2 = RelationalExpression() { n1 = new E_NotEqual(n1,n2) ; } )? { return n1 ; } } Expr RelationalExpression() : { Expr n1, n2 ; } { n1 = NumericExpression() ( n2 = NumericExpression() { n1 = new E_LessThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThan(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_LessThanOrEqual(n1,n2) ; } | n2 = NumericExpression() { n1 = new E_GreaterThanOrEqual(n1,n2) ; } )? { return n1 ; } } // **** Debug point Expr NumericExpression () : { Expr n ; } { n = AdditiveExpression() { return n ; } } Expr AdditiveExpression() : { Expr n1,n2 ; } { n1 = MultiplicativeExpression() ( n2 = MultiplicativeExpression() { n1 = new E_Add(n1, n2) ; } | n2 = MultiplicativeExpression() { n1 = new E_Subtract(n1, n2) ; } )* { return n1 ; } } Expr MultiplicativeExpression() : { Expr n1,n2 ; } { n1 = UnaryExpression() ( n2 = UnaryExpression() { n1 = new E_Multiply(n1, n2) ; } | n2 = UnaryExpression() { n1 = new E_Divide(n1, n2) ; } | n2 = UnaryExpression() { n1 = new E_Modulus(n1, n2) ; } )* { return n1 ; } } Expr UnaryExpression() : { Expr n ; } { ( | ) n = BuiltinExpression() { return new E_LogicalNot(n) ; } | n = BuiltinExpression() { return new E_UnaryPlus(n) ; } | n = BuiltinExpression() { return new E_UnaryMinus(n) ; } | n = BuiltinExpression() { return n ; } } Expr BuiltinExpression() : { Expr n ; Node gn ; } { n = Expression() { return new E_Str(n) ; } | gn = VarOrLiteral() { return new E_Lang(asExpr(gn)) ; } | n = Expression() { return new E_Datatype(n) ; } // Builtins | gn = Var() { return new E_Bound(new NodeVar(gn)) ; } | gn = Var() { return new E_IsURI(asExpr(gn)) ; } | gn = Var() { return new E_IsBlank(asExpr(gn)) ; } | gn = Var() { return new E_IsLiteral(asExpr(gn)) ; } | LOOKAHEAD(2) // cast { Node uri ; } uri = URIref() n = Expression() { return new E_Cast(uri, n) ; } | n = PrimaryExpression() { return n ; } } Expr PrimaryExpression() : {Expr n ; Node gn ; } { ( gn = Var() { return asExpr(gn) ; } | gn = Literal() { return asExpr(gn) ; } | n = FunctionCall() { return n ; } | n = Expression() { return n ; } ) } Expr FunctionCall() : { Node fname ; List a ; } { fname = URIref() a = ArgList() { Expr e = new E_Function(fname.getURI(), a) ; return e ; } } List ArgList() : { Node n ; } { { List args = new ArrayList() ; } ( n = VarOrLiteral() { args.add(asExpr(n)) ; } ( n = VarOrLiteral() { args.add(asExpr(n)) ; } )* )? { return args ; } } // ---------------------------------- // Literal terms (as in query literals - any value in the query) Node Literal() : { Node n ; } { n = URIref() { return n ; } | n = NumericLiteral() { return n ; } | n = TextLiteral() { return n ; } } Node NumericLiteral() : { Node n ; } { n = Integer() { return n ; } | n = FloatingPoint() { return n ; } } Node TextLiteral() : { Token t ; String lex = null ; } { ( t = | t = ) { lex = stripQuotes(t.image) ; lex = unescape(lex) ; } // Optional lang tag and datatype. { String lang = null ; Node uri = null ; } ( t = { lang = stripChars(t.image, 1) ; } ) ? ( uri = URIref() )? { return makeNode(lex, lang, uri) ; } } // Effectively turn off tokenizing. // NB : Layout (for HTML-izeation scripts) TOKEN : { } NodePatternLiteral PatternLiteral() : {} { { int state = token_source.curLexState ; token_source.SwitchTo(READ_REGEX) ; String pattern = regexPattern(this) ; String modifiers = regexModifiers(this) ; } // Pattern language is: [m]/pattern/[i][m][s][x] // Note the leading "m" is optional because // is // often in conflict with URIs so the convenience // of, say "!", as a leading marker is good. { token_source.SwitchTo(state) ; return new NodePatternLiteral(pattern, modifiers) ; } } // Node URL() : { Node n ; } // { // //QuotedURI() // n = URI() { return n ; } // } Node URIref() : { Node n ; } { n = QuotedURIref() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t = null ; } { ( t = { return Node.createURI(fixupQName(t)) ; } | t = { return Node.createURI(fixupQName(t)) ; } ) } Node BNode() : { Token t = null ; } { t = { return createBNode(t.image) ; } } Node QuotedURIref() : { Token t ; } { t = { String s = stripQuotes(t.image) ; s = fixupURI(s) ; return Node.createURI(s) ; } } Node Integer() : { Token t ; } { t = { return makeNodeInteger(t.image) ; } // | // Looses the lexical form. // t = { return makeNodeInteger(integerValue(t.image)) ; } } Node FloatingPoint() : { Token t ; } { t = { return makeNodeDouble(t.image) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } SPECIAL_TOKEN : { } // Main tokens */ TOKEN : { // Includes # for relative URIs |"#"|"_") (~[">"," "])* ">" > // The QName() rule uses both of these - PrefixDecl() just the first | )? ":" > | )? ":" (|)? > | |) > | |) > | ()+("-" ()+)? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> // Can't write a useful pattern token because the delimiter is variable. // | } // ------------------------------------------------- // LITERALS TOKEN : { // No sign - that is done in the grammar. // Otherwise 1+2 tends to get "1" and "+2" (maximal tokens) < INTEGER_10: /*(["-","+"])?*/ > //| < INTEGER_16: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+ > | < FLOATING_POINT: //(["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* ()? | "." (["0"-"9"])+ ()? | (["0"-"9"])+ //NB Must have exponent for this case. > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | ("\\" ~["\n","\r"]) )* "\"" > | < DIGITS: (["0"-"9"])+> } // ------------------------------------------------- // Keyworks : includes operators that are words and should be // before general things like IDENTIFIER which swallow almost // anything TOKEN [IGNORE_CASE] : { // Prologue < BASE: "base" > | < PREFIX: "prefix" > // Result forms | < SELECT: "select" > | < DISTINCT: "distinct" > | < LIMIT: "limit" > | < DESCRIBE: "describe" > | < CONSTRUCT: "construct" > | < ASK: "ask" > // Dataset | < WITH: "with" > | < FROM: "from" > // Graph pattern operators | < WHERE: "where" > | < AND: "and" > | < GRAPH: "graph" > | < OR: "or" > | < UNION: "union" > | < OPTIONAL: "optional" > //| < UNSAID: "unsaid" > // Expression operators | < KW_EQ: "eq" > | < KW_NE: "ne" > | < KW_GT: "gt" > | < KW_GE: "ge" > | < KW_LT: "lt" > | < KW_LE: "le" > | < BOUND: "bound" > | < STR: "str" > | < DTYPE: "datatype" > | < LANG: "lang" > | < IS_URI: "isURI" > | < IS_BNODE: "isBlank" > | < IS_LITERAL: "isLiteral" > | < AS: "as"> } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < GT: ">" > | < LT: "<" > | < BANG: "!" > | < TILDE: "~" > | < HOOK: "?" > | < COLON: ":" > | < EQ: "=" > | < NE: "!=" > | < LE: "<=" > // Maybe: | "=>" > | < GE: ">=" > // Maybe: | "=<" > | < SC_OR: "||" > | < SC_AND: "&&" > //| < INCR: "++" > //| < DECR: "--" > | < PLUS: "+" > | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > | < AMP: "&" > | < REM: "%" > | < LSHIFT: "<<" > | < RSIGNEDSHIFT: ">>" > // The tokens for string EQ and string NE are done as keywords | < STR_MATCH: ("=~"|"~~") > | < STR_NMATCH: "!~"> | < DATATYPE: "^^"> | < AT: "@"> } // Notes: // XML 1.1 http://www.w3.org/TR/xml11/ // XML Namespces 1.1 http://www.w3.org/TR/xml-names11/ // Prefix ':' LocalPart // Prefix is an NCName // LocalPart is an NCName // // // An XML Name, minus the ":" // NCName ::= NCNameStartChar NCNameChar* // NCNameChar ::= NameChar - ':' // NCNameStartChar ::= NameStartChar - ':' // NameChar and NameSartChar defined in XML 1.1 // NameStartChar := ":" | [A-Z] | "_" | [a-z] | // was: [#xC0-#x2FF] | // now: [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x370-#x37D] | [#x37F-#x1FFF] | // [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | // [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | // [#x3001-#xD7FF] | [#xF900-#xEFFFF] // NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | // [#x0300-#x036F] | [#x203F-#x2040] // TOKEN: // { // // The use of qnames with starting "_:" is confusing. // // They are legal XML 1.1 but look like bNode ids from // // some RDF syntaxes. // // // This can't be purely done by tokenizing without states. // // // // XML 1.1 NCNameStartChar // <#NCCHAR1: // ["A"-"Z"] | "_" |["a"-"z"] | // ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | // ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | // ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | // ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > // | // // Full NCNAME // ( | "." | "-" | ["0"-"9"] | "\u00B7" )* > // } // SPARQL addition: // "Prefix" can't start with "_" because some RDF // syntaxes use "_:" // for bNodes. TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFF"] > | // Full NCNAME, putting back the "_" <#NCCHAR_FULL: ( | "_" | "." | "-" | ["0"-"9"] | "\u00B7" )> | // Does not allow leading "_" <#NCNAME1: ()* > | // With a leading "_" <#NCNAME2: "_" ()* > } /* * (c) Copyright 2004, 2005 Hewlett-Packard Development Company, LP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */