/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Turtle & N3 for Jena // N3 compatible: // Projection to Turtle is done by filtering non-Trutle forms. // A consequence is that where Turtle and N3 are incompatible (e.g. // whitespace processing) if follows N3. // // All legal Turtle documents are acceped by this grammar. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true ; // We use a UTF-8 encoded stream anyway so the setting of // this is not important. // and it does not make any difference to the code generated! // One a warning is issued if true. UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(TurtleParser) /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hp.hpl.jena.n3.turtle.parser ; import com.hp.hpl.jena.n3.turtle.ParserBase ; import com.hp.hpl.jena.graph.* ; public class TurtleParser extends ParserBase { } PARSER_END(TurtleParser) // --- Entry point void parse() : {} { (Statement())* } void Statement() : {} { (Directive() | TriplesSameSubject() ) } void Directive() : { Token t ; Node n ; } { // Need a token for @prefix? // Looks like a LANTAG t = n = Q_IRI_REF() { String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ; setPrefix(t.beginLine, t.beginColumn, s, n.getURI()) ; } } // -------- PATH TOKEN: { < PLING: "!" > | < VBAR: "|" > | < CARROT: "^" > | < FPATH: "->" > | < RPATH: "<-" > } // N3 // ---- TRIPLES // <<<<< SPARQL extract void TriplesSameSubject() : { Node s ; } { s = VarOrTerm() PropertyListNotEmpty(s) | // Any of the triple generating syntax elements s = TriplesNode() PropertyList(s) } void PropertyList(Node s) : { } { ( PropertyListNotEmpty(s) ) ? } // >>>>> SPARQL extract // Non-recursive for Turtle long PropertyList tests void PropertyListNotEmpty(Node s) : { Node p ; } { p = Verb() ObjectList(s, p) ( (p = Verb() ObjectList(s, p))? )* } // Non-recursive for Turtle long PropertyList tests void ObjectList(Node s, Node p): { Node o ; } { Object(s, p) ( Object(s, p) )* } void Object(Node s, Node p): { Node o ; } { o = GraphNode() { Triple t = new Triple(s,p,o) ; emitTriple(token.beginLine, token.beginColumn, t) ; } } // <<<<< SPARQL extract Node Verb() : {Node p ;} { ( p = IRIref() | { p = nRDFtype ; } | { p = nLogImplies ; if ( strictTurtle ) raiseException("=> (log:implies) not legalin Turtle", token.beginLine, token.beginColumn ) ; } ) { return p ; } } // -------- Triple expansions // Anything that can stand in a node slot and which is // a number of triples Node TriplesNode() : { Node n ; } { n = Collection() { return n ; } | n = BlankNodePropertyList() { return n ; } } Node BlankNodePropertyList() : { } { { Node n = createBNode() ; } PropertyListNotEmpty(n) { return n ; } } // ------- RDF collections // Code not as SPARQL/ARQ because of output ordering. Node Collection() : { Node listHead = nRDFnil ; Node lastCell = null ; Node n ; } { ( { Node cell = createBNode() ; if ( listHead == nRDFnil ) listHead = cell ; if ( lastCell != null ) emitTriple(token.beginLine, token.beginColumn, new Triple(lastCell, nRDFrest, cell)) ; } n = GraphNode() { emitTriple(token.beginLine, token.beginColumn, new Triple(cell, nRDFfirst, n)) ; lastCell = cell ; } ) + // Not * here - "()" is handled separately. { if ( lastCell != null ) emitTriple(token.beginLine, token.beginColumn, new Triple(lastCell, nRDFrest, nRDFnil)) ; return listHead ; } } // -------- Nodes in a graph pattern or template Node GraphNode() : { Node n ; } { n = VarOrTerm() { return n ; } | n = TriplesNode() { return n ; } } Node VarOrTerm() : {Node n = null ; } { ( n = Var() | n = GraphTerm() | n = Formula() ) { return n ; } } Node Formula() : {Token t ; } { t = { startFormula(t.beginLine, t.beginColumn) ; } // Need to sort this out and merge with Statement above TriplesSameSubject() ( (TriplesSameSubject())? )* t = { endFormula(t.beginLine, t.beginColumn) ; } { return null ; } } // >>>>> SPARQL extract Node Var() : { Token t ;} { ( t = | t = ) { return createVariable(t.image, t.beginLine, t.beginColumn) ; } } Node GraphTerm() : { Node n ; } { n = IRIref() { return n ; } | n = RDFLiteral() { return n ; } | // Cleaner sign handling in Turtle. n = NumericLiteral() { return n ; } // | // n = BooleanLiteral() { return n ; } | n = BlankNode() { return n ; } | // { return nRDFnil ; } { return nRDFnil ; } } // ---- Basic terms Node NumericLiteral() : { Token t ; } { t = { return makeNodeInteger(t.image) ; } | t = { return makeNodeDecimal(t.image) ; } | t = { return makeNodeDouble(t.image) ; } } // >>>>> SPARQL extract // Langtag oddity. Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; Node uri = null ; } ( lang = Langtag() | ( uri = IRIref() ) )? { return makeNode(lex, lang, uri) ; } } String Langtag() : { Token t ; } { // Enumerate the directives here ( t = | t = ) { String lang = stripChars(t.image, 1) ; return lang ; } } // >>>>> SPARQL extract // Node BooleanLiteral() : {} // { // { return XSD_TRUE ; } // | // { return XSD_FALSE ; } // } // <<<<< SPARQL extract String String() : { Token t ; String lex ; } { ( t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; return lex ; } } Node IRIref() : { Node n ; } { n = Q_IRI_REF() { return n ; } | n = QName() { return n ; } } Node QName() : { Token t ; } { ( t = { return createURIfromQName(t.image, t.beginLine, t.beginColumn) ; } | t = { return createURIfromQName(t.image, t.beginLine, t.beginColumn) ; } ) } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image, t.beginLine, t.beginColumn) ; } | // { return createBNode() ; } { return createBNode() ; } } Node Q_IRI_REF() : { Token t ; } { t = { return createNodeFromURI(t.image, t.beginLine, t.beginColumn) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> } SPECIAL_TOKEN : { } // ------------------------------------------------- // Keyworks : directives before LANGTAG TOKEN : { } // Before HEX rule! TOKEN [IGNORE_CASE] : { // Prologue < PREFIX: "@prefix" > | < BASE: "@base" > //| < TRUE: "true" > //| < FALSE: "false" > } // ------------------------------------------------- TOKEN : { < INTEGER: (["-","+"])? > | < DECIMAL: (["-","+"])? (()+ "." ()* | "." ()+) > // Required exponent. | < DOUBLE: (["+","-"])? ( (["0"-"9"])+ "." (["0"-"9"])* | "." (["0"-"9"])+ () | (["0"-"9"])+ ) > // Optional exponent. // | < DOUBLE: // //(["+","-"])? // (["0"-"9"])+ "." (["0"-"9"])* ()? // | "." (["0"-"9"])+ ()? // | (["0"-"9"])+ // > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < #QUOTE_3D: "\"\"\""> | < #QUOTE_3S: "'''"> // | ) // | ("U" )> // // // | // | ) > | | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | )* "\"" > | < STRING_LITERAL_LONG1: ( ~["'","\\"] | | ("'" ~["'"]) | ("''" ~["'"]))* > | < STRING_LITERAL_LONG2: ( ~["\"","\\"] | | ("\"" ~["\""]) | ("\"\"" ~["\""]))* > | < DIGITS: (["0"-"9"])+> | } TOKEN: { // Includes # for relative URIs ","<","\u0000"-"\u0020"])* ">" > | )? ":" > | )? ":" ()? > | > //| > | > | > | ()+("-" ()+)* > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | (|)* > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < ANON: (|)* > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < EQ: "=" > //| < NE: "!=" > //| < GT: ">" > //| < LT: "<" > //| < LE: "<=" > // Maybe: | "=>" > //| < GE: ">=" > // Maybe: | "=<" > | "> | < DOLLAR: "$"> | < QMARK: "?"> | < TILDE: "~" > | < COLON: ":" > // | < PLUS: "+" > // | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > | < RSLASH: "\\" > //| < AMP: "&" > //| < REM: "%" > | < DATATYPE: "^^"> | < AT: "@"> } TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1p: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] > // [#x10000-#xEFFFF] | <#NCCHAR1: | "_" > // No trailing DOTs in qnames. | // #NCCHAR without "." <#NCCHAR: ( | "-" | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > | // NCNAME but no leading "_", no trailing ".", can have dot inside prefix name. <#NCNAME_PREFIX: ((|".")* )? > | // With a leading "_", no dot at end of local name. <#NCNAME: ((|".")* )? > | // NCNAME without "-" and ".", allowing leading digits. <#VARNAME: ( | ["0"-"9"] ) ( | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > } /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */