// (c) Copyright 2005 Hewlett-Packard Development Company, LP // Development grammar for paths. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true; UNICODE_INPUT = false ; // Do \ u processing outside javacc // JAVA_UNICODE_ESCAPE = false ; // UNICODE_INPUT = true ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(PropertyPath) /* * (c) Copyright 2005 Hewlett-Packard Development Company, LP * All rights reserved. */ package dev.path ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.query.core.* ; public class PropertyPath extends com.hp.hpl.jena.query.lang.ParserBase { } PARSER_END(PropertyPath) // --- Entry point void N3() : {} { { ElementGroup elg = new ElementGroup() ; } (Triples(elg))? } // -------- PATH // Wiring: the cases of // s p o // s p1 p2 o // should lead to mere triple patterns accumulated. // A Node is returned by PathExpr? // Use of a modifier or an "|" leads to new E_PathFOO() in the group P_Path Path() : { P_Path p ; } { p = PathExpr() { return p ; } } P_Path PathExpr() : { P_Path p , p2 ; } { p = PathElt() ( p2 = PathExpr() // OR { return new P_Alt(p, p2) ; } // // Different from N3 meaning // | PathExpr() // ! Forward path // | PathExpr() // ^ Reverse path // // // Confusing - not "s -> p -> o" // Only forward paths % & ~ @ + = | p2 = PathExpr() // -> Forward path :p/:q { return new P_Seq(p, p2) ; } // | p2 = PathExpr() // <- Reverse path : // { return new P_SeqR(p, p2) ; } | p2 = PathExpr() { return new P_Seq(p, p2) ; } // | PathExpr() )? { return p ; } } P_Path PathElt() : { Node n ; P_Path p ; } { //n = Verb() n = IRIref() { p = new P_Node(n) ; } ( p = PathMod(p))? { return p ; } | p = Path() ( p = PathMod(p))? { return p ; } } P_Path PathMod(P_Path p) : { int i1 =-1 ; int i2 = -1 ; } { ( { return new P_Mod(p, 0, -1) ; } | { return new P_Mod(p, 0, 1) ; } | { return new P_Mod(p, 1, -1) ; } | // Excludes empty {} ( i1 = Integer() ( ( i2 = Integer())?)? | (i2 = Integer())? ) { return new P_Mod(p, i1, i2) ; } ) } int Integer() : {Token t ;} { t = { return makePositiveInteger(t.image) ; } } // -------- PATH TOKEN: { < PLING: "!" > | < VBAR: "|" > | < CARROT: "^" > | < FPATH: "->" > | < RPATH: "<-" > } // N3 // ---- TRIPLES // -------- Triple lists with property and object lists void Triples(TripleCollector acc) : { } { Triples1(acc) ( (Triples(acc))?)? } void Triples1(TripleCollector acc) : { Node s ; } { s = VarOrTerm() PropertyListNotEmpty(s, acc) | // Any of the triple generating syntax elements s = TriplesNode(acc) PropertyList(s, acc) } void PropertyList(Node s, TripleCollector acc) : { } { ( PropertyListNotEmpty(s, acc) ) ? } void PropertyListNotEmpty(Node s, TripleCollector acc) : { Node p ; } { ( p = Verb() | Path() { p = Node.createURI("temp") ; } // TEMP HACK ) ObjectList(s, p, acc) ( PropertyList(s, acc) ) ? } void ObjectList(Node s, Node p, TripleCollector acc): { Node o ; } { // The mark ensures that triples end up in a nicer order o = GraphNode(acc) { Triple t = new Triple(s,p,o) ; acc.addTriple(t) ; } ( ObjectList(s, p , acc) )? } Node Verb() : {Node p ;} { ( p = VarOrBlankNodeOrIRIref() | { p = nRDFtype ; } ) { return p ; } } // -------- Triple expansions // Anything that can stand in a node slot and which is // a number of triples Node TriplesNode(TripleCollector acc) : { Node n ; } { n = Collection(acc) { return n ; } | n = BlankNodePropertyList(acc) { return n ; } } Node BlankNodePropertyList(TripleCollector acc) : { } { { Node n = createBNode() ; } PropertyListNotEmpty(n, acc) { return n ; } } // ------- RDF collections Node Collection(TripleCollector acc) : { Node listHead = nRDFnil ; Node lastCell = null ; int mark ; Node n ; } { ( { Node cell = createBNode() ; if ( listHead == nRDFnil ) listHead = cell ; if ( lastCell != null ) insert(acc, lastCell, nRDFrest, cell) ; mark = acc.mark() ; } n = GraphNode(acc) { insert(acc, mark, cell, nRDFfirst, n) ; lastCell = cell ; } ) + // Not * here - "()" is handled separately. { if ( lastCell != null ) insert(acc, lastCell, nRDFrest, nRDFnil) ; return listHead ; } } // -------- Nodes in a graph pattern or template Node GraphNode(TripleCollector acc) : { Node n ; } { n = VarOrTerm() { return n ; } | n = TriplesNode(acc) { return n ; } } Node VarOrTerm() : {Node n = null ; } { ( n = Var() | n = GraphTerm() ) { return n ; } } // Property (if no bNodes) + DESCRIBE Node VarOrIRIref() : {Node n = null ; } { ( n = Var() | n = IRIref() ) { return n ; } } // GRAPH and property if including blank nodes are properties. Node VarOrBlankNodeOrIRIref() : {Node n = null ; } { ( n = Var() | n = BlankNode() | n = IRIref() ) { return n ; } } Node Var() : { Token t ;} { ( t = | t = ) { return createVariable(t.image, t.beginLine, t.beginColumn) ; } } Node GraphTerm() : { Node n ; } { n = IRIref() { return n ; } | n = RDFLiteral() { return n ; } | { boolean positive = true ; } ({positive=false;}|)? n = NumericLiteral(true) { return n ; } | n = BooleanLiteral() { return n ; } | n = BlankNode() { return n ; } | // { return nRDFnil ; } { return nRDFnil ; } } // ---- Basic terms Node NumericLiteral(boolean positive) : { Token t ; } { t = { return makeNodeInteger(positive, t.image) ; } | t = { return makeNodeDouble(positive, t.image) ; } // | // t = { return makeNodeDouble(positive, t.image) ; } } Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; Node uri = null ; } ( ( t = { lang = stripChars(t.image, 1) ; } ) | ( uri = IRIref() ) )? { return makeNode(lex, lang, uri) ; } } Node BooleanLiteral() : {} { { return XSD_TRUE ; } | { return XSD_FALSE ; } } String String() : { Token t ; } { ( t = | t = | t = | t = ) { String lex = stripQuotes(t.image) ; lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; return lex ; } } Node IRIref() : { Node n ; } { n = Q_IRI_REF() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t ; } { ( t = { return createURIfromQName(t.image, t.beginLine, t.beginColumn) ; } | t = { return createURIfromQName(t.image, t.beginLine, t.beginColumn) ; } ) } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image, t.beginLine, t.beginColumn) ; } | // { return createBNode() ; } { return createBNode() ; } } Node Q_IRI_REF() : { Token t ; } { t = { return createNodeFromURI(t.image, t.beginLine, t.beginColumn) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> } SPECIAL_TOKEN : { } // Main tokens */ TOKEN: { // Includes # for relative URIs ","<","\u0000"-"\u0020"])* ">" > | )? ":" > | )? ":" ()? > | > //| > | > | > | ()+("-" ()+)* > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } // ------------------------------------------------- // Keyworks : includes operators that are words and should be // before general things like IDENTIFIER which swallow almost // anything TOKEN : { } // Before HEX rule! TOKEN [IGNORE_CASE] : { // Prologue < BASE: "base" > | < PREFIX: "prefix" > // Result forms | < SELECT: "select" > | < DISTINCT: "distinct" > | < DESCRIBE: "describe" > | < CONSTRUCT: "construct" > | < ASK: "ask" > | < LIMIT: "limit" > | < OFFSET: "offset" > | < ORDER: "order" > | < BY: "by" > | < ASC: "asc" > | < DESC: "desc" > // Dataset | < NAMED: "named" > | < FROM: "from" > // Graph pattern operators | < WHERE: "where" > | < AND: "and" > | < GRAPH: "graph" > | < OPTIONAL: "optional" > | < UNION: "union" > | < FILTER: "filter" > // Expression operators | < BOUND: "bound" > | < STR: "str" > // | < URI: "uri" > | < DTYPE: "datatype" > | < LANG: "lang" > | < LANGMATCHES: "langmatches" > //| < IS_IRI: ("isIRI"|"isURI") > | < IS_URI: "isURI" > | < IS_IRI: "isIRI" > | < IS_BLANK: "isBlank" > | < IS_LITERAL: "isLiteral" > | < REGEX: "regex" > | | } // ------------------------------------------------- TOKEN : { < INTEGER: /*(["-","+"])?*/ > // | // < DECIMAL: /*(["-","+"])?*/ ()+ "." ()* | "." ()+ > // Required exponent. // | < FLOATING_POINT: // //(["+","-"])? // (["0"-"9"])+ "." (["0"-"9"])* () // | "." (["0"-"9"])+ () // | (["0"-"9"])+ // > | < FLOATING_POINT: //(["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* ()? | "." (["0"-"9"])+ ()? | (["0"-"9"])+ > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < #QUOTE_3D: "\"\"\""> | < #QUOTE_3S: "'''"> // | ) // | ("U" )> // // // | // | ) > | | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | )* "\"" > | < STRING_LITERAL_LONG1: ( ~["'","\\"] | | ("'" ~["'"]) | ("''" ~["'"]))* > | < STRING_LITERAL_LONG2: ( ~["\"","\\"] | | ("\"" ~["\""]) | ("\"\"" ~["\""]))* > | < DIGITS: (["0"-"9"])+> | } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > // I don't know why white space is needed to be included here but not ANON | ()* > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > // ()* is not needed but is in NIL | < ANON: ()* > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < EQ: "=" > | < NE: "!=" > | < GT: ">" > | < LT: "<" > | < LE: "<=" > // Maybe: | "=>" > | < GE: ">=" > // Maybe: | "=<" > | < DOLLAR: "$"> | < QMARK: "?"> | < TILDE: "~" > | < COLON: ":" > | < SC_OR: "||" > | < SC_AND: "&&" > | < PLUS: "+" > | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > | < RSLASH: "\\" > //| < AMP: "&" > //| < REM: "%" > | < DATATYPE: "^^"> | < AT: "@"> } // See XML chars.txt for notes TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1p: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] > // [#x10000-#xEFFFF] | <#NCCHAR1: | "_" > // No trailing DOTs in qnames. | // #NCCHAR without "." <#NCCHAR: ( | "-" | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > | // NCNAME but no leading "_", no trailing ".", can have dot inside prefix name. <#NCNAME_PREFIX: ((|".")* )? > | // With a leading "_", no dot at end of local name. <#NCNAME: ((|".")* )? > | // NCNAME without "-" and ".", allowing leading digits. <#VARNAME: ( | ["0"-"9"] ) ( | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > } /* * (c) Copyright 2005 Hewlett-Packard Development Company, LP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */