/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Turtle & N3 for Jena // N3 compatible: // All legal Turtle documents are acceped by this grammar. // Some N3 features, which are stil RDF, are provided. options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true ; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(TurtleParser) /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hp.hpl.jena.n3.turtle.parser ; import com.hp.hpl.jena.n3.turtle.ParserBase ; import com.hp.hpl.jena.graph.* ; public class TurtleParser extends ParserBase { } PARSER_END(TurtleParser) // --- Entry point void parse() : {} { ()? (Statement())* } void Statement() : {} { (Directive() | TriplesSameSubject() ) } void Directive() : { Token t ; String iri ; } { // Note that @prefix looks like a LANTAG t = iri = IRI_REF() { String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ; setPrefix(t.beginLine, t.beginColumn, s, iri) ; } | t = iri = IRI_REF() { setBase(iri, t.beginLine, t.beginColumn) ; } } // -------- PATH TOKEN: { < PLING: "!" > | < VBAR: "|" > | < CARROT: "^" > | < FPATH: "->" > | < RPATH: "<-" > } // ---- TRIPLES void TriplesSameSubject() : { Node s ; } { s = VarOrTerm() // Emit Triple checks it's a valid triple. PropertyListNotEmpty(s) | // Any of the triple generating syntax elements s = TriplesNode() PropertyList(s) } void PropertyList(Node s) : { } { ( PropertyListNotEmpty(s) ) ? } // Non-recursive for Turtle long PropertyList tests void PropertyListNotEmpty(Node s) : { Node p ; } { p = Verb() ObjectList(s, p) ( (p = Verb() ObjectList(s, p))? )* } // Non-recursive for Turtle long PropertyList tests void ObjectList(Node s, Node p): { Node o ; } { Object(s, p) ( Object(s, p) )* } void Object(Node s, Node p): { Node o ; } { o = GraphNode() { Triple t = new Triple(s,p,o) ; emitTriple(token.beginLine, token.beginColumn, t) ; } } Node Verb() : { Node p ; String iri ;} { ( iri = IRIref() { p = createNode(iri) ; } | { p = nRDFtype ; } | { p = nOwlSameAs ; if ( strictTurtle ) throwParseException("= (owl:sameAs) not legal in Turtle", token.beginLine, token.beginColumn ) ; } | { p = nLogImplies ; if ( strictTurtle ) throwParseException("=> (log:implies) not legal in Turtle", token.beginLine, token.beginColumn ) ; } ) { return p ; } } // -------- Triple expansions // Anything that can stand in a node slot and which is // a number of triples Node TriplesNode() : { Node n ; } { n = Collection() { return n ; } | n = BlankNodePropertyList() { return n ; } } Node BlankNodePropertyList() : { } { { Node n = createBNode() ; } PropertyListNotEmpty(n) { return n ; } } // ------- RDF collections // Code not as SPARQL/ARQ because of output ordering. Node Collection() : { Node listHead = nRDFnil ; Node lastCell = null ; Node n ; } { ( { Node cell = createBNode() ; if ( listHead == nRDFnil ) listHead = cell ; if ( lastCell != null ) emitTriple(token.beginLine, token.beginColumn, new Triple(lastCell, nRDFrest, cell)) ; } n = GraphNode() { emitTriple(token.beginLine, token.beginColumn, new Triple(cell, nRDFfirst, n)) ; lastCell = cell ; } ) + // Not * here - "()" is handled separately. { if ( lastCell != null ) emitTriple(token.beginLine, token.beginColumn, new Triple(lastCell, nRDFrest, nRDFnil)) ; return listHead ; } } // -------- Nodes in a graph pattern or template Node GraphNode() : { Node n ; } { n = VarOrTerm() { return n ; } | n = TriplesNode() { return n ; } } Node VarOrTerm() : {Node n = null ; } { ( n = Var() | n = GraphTerm() | n = Formula() ) { return n ; } } Node Formula() : {Token t ; } { t = { startFormula(t.beginLine, t.beginColumn) ; } TriplesSameSubject() ( (TriplesSameSubject())? )* t = { endFormula(t.beginLine, t.beginColumn) ; } { return null ; } } Node Var() : { Token t ;} { t = { return createVariable(t.image, t.beginLine, t.beginColumn) ; } } Node GraphTerm() : { Node n ; String iri ; } { iri = IRIref() { return createNode(iri) ; } | n = RDFLiteral() { return n ; } | // Cleaner sign handling in Turtle. n = NumericLiteral() { return n ; } | n = BooleanLiteral() { return n ; } | n = BlankNode() { return n ; } | // { return nRDFnil ; } { return nRDFnil ; } } // ---- Basic terms Node NumericLiteral() : { Token t ; } { t = { return createLiteralInteger(t.image) ; } | t = { return createLiteralDecimal(t.image) ; } | t = { return createLiteralDouble(t.image) ; } } Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; String dt = null ; } ( lang = Langtag() | ( dt = IRIref() ) )? { return createLiteral(lex, lang, dt) ; } } String Langtag() : { Token t ; } { // Enumerate the directives here because they look like language tags. ( t = | t = AnyDirective() ) { String lang = stripChars(t.image, 1) ; return lang ; } } Token AnyDirective() : { Token t ; } { ( t = | t = ) { return t ; } } Node BooleanLiteral() : {} { { return XSD_TRUE ; } | { return XSD_FALSE ; } } String String() : { Token t ; String lex ; } { ( t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; return lex ; } } String IRIref() : { String iri ; } { iri = IRI_REF() { return iri ; } | iri = PrefixedName() { return iri ; } } String PrefixedName() : { Token t ; } { ( t = { return resolvePName(t.image, t.beginLine, t.beginColumn) ; } | t = { return resolvePName(t.image, t.beginLine, t.beginColumn) ; } ) } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image, t.beginLine, t.beginColumn) ; } | // { return createBNode() ; } { return createBNode() ; } } String IRI_REF() : { Token t ; } { t = { return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; } } // ------------------------------------------ // Tokens // Comments and whitespace SKIP : { " " | "\t" | "\n" | "\r" | "\f" } TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> } SPECIAL_TOKEN : { } // ------------------------------------------------- // Keywords : directives before LANGTAG TOKEN : { // Prologue | < PREFIX: "@prefix" > | < BASE: "@base" > } TOKEN [IGNORE_CASE] : { < TRUE: "true" > | < FALSE: "false" > // ------------------------------------------------- | < INTEGER: (["-","+"])? > | < DECIMAL: (["-","+"])? (()+ "." ()* | "." ()+) > // Required exponent. | < DOUBLE: (["+","-"])? ( (["0"-"9"])+ "." (["0"-"9"])* | "." (["0"-"9"])+ () | (["0"-"9"])+ ) > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < #QUOTE_3D: "\"\"\""> | < #QUOTE_3S: "'''"> // "u" done by javacc input stream. // "U" escapes not supported yet for Java strings | | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | )* "\"" > | < STRING_LITERAL_LONG1: ( ~["'","\\"] | | ("'" ~["'"]) | ("''" ~["'"]))* > | < STRING_LITERAL_LONG2: ( ~["\"","\\"] | | ("\"" ~["\""]) | ("\"\"" ~["\""]))* > | < DIGITS: (["0"-"9"])+> // | } TOKEN: { // Includes # for relative URIs ","<", "\"", "{", "}", "^", "\\", "|", "`", "\u0000"-"\u0020"])* ">" > | )? ":" > | > | > | > | ()+("-" ()+)* > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } TOKEN : { < LPAREN: "(" > | < RPAREN: ")" > | (|)* > | < LBRACE: "{" > | < RBRACE: "}" > | < LBRACKET: "[" > | < RBRACKET: "]" > | < ANON: (|)* > | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > } // Operator TOKEN : { < EQ: "=" > | "> | < DOLLAR: "$"> | < QMARK: "?"> | < TILDE: "~" > | < COLON: ":" > // | < PLUS: "+" > // | < MINUS: "-" > | < STAR: "*" > | < SLASH: "/" > | < RSLASH: "\\" > | < BOM: "\uFEFF"> //| < AMP: "&" > //| < REM: "%" > | < DATATYPE: "^^"> | < AT: "@"> } TOKEN: { <#PN_CHARS_BASE: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] > // [#x10000-#xEFFFF] | <#PN_CHARS_U: | "_" > | // No DOT <#PN_CHARS: ( | "-" | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > | // No leading "_", no trailing ".", can have dot inside prefix name. <#PN_PREFIX: ((|".")* )? > | // With a leading "_", no dot at end of local name. <#PN_LOCAL: ( | ["0"-"9"]) ((|".")* )? > | // NCNAME without "-" and ".", allowing leading digits. <#VARNAME: ( | ["0"-"9"] ) ( | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > } // Catch-all tokens. Must be last. // Any non-whitespace. Causes a parser exception, rather than a // token manager error (with hidden line numbers). // Only bad IRIs (e.g. spaces) now give unhelpful parse errors. TOKEN: { <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > } /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */