// (c) Copyright 2007 Hewlett-Packard Development Company, LP options { // Use \ u escapes in streams AND use a reader for the query // => get both raw and escaped unicode JAVA_UNICODE_ESCAPE = true ; UNICODE_INPUT = false ; STATIC = false ; // DEBUG_PARSER = true ; // DEBUG_TOKEN_MANAGER = true ; } PARSER_BEGIN(Term_Parser) /* * (c) Copyright 2007 Hewlett-Packard Development Company, LP * All rights reserved. */ package com.hp.hpl.jena.sparql.sse.parser ; import com.hp.hpl.jena.sparql.sse.* ; import com.hp.hpl.jena.graph.* ; import com.hp.hpl.jena.sparql.core.Var; public class Term_Parser extends ParserTermBase { } PARSER_END(Term_Parser) // --- Entry point Node term() : {Node node ; } { node = RDFTerm() { return node ; } } Node termStripWS() : {Node node ; } { ()* node = RDFTerm() ()* { return node ; } } Node RDFTerm() : { Node n ; } { n = IRIref() { return n ; } | n = Var() { return n ; } | n = RDFLiteral() { return n ; } | n = NumericLiteral() { return n ; } // | n = BooleanLiteral() { return n ; } | n = BlankNode() { return n ; } //| // { return nRDFnil ; } // { return nRDFnil ; } } Node RDFLiteral() : { Token t ; String lex = null ; } { lex = String() // Optional lang tag and datatype. { String lang = null ; Node uri = null ; String qname = null ; } ( ( t = { lang = stripChars(t.image, 1) ; } ) | uri = IRIref() )? { return makeNode(lex, lang, uri) ; } } Node NumericLiteral() : { Token t ; } { t = { return makeNodeInteger(t.image) ; } | t = { return makeNodeDecimal(t.image) ; } | t = { return makeNodeDouble(t.image) ; } } // Node BooleanLiteral() : {} // { // { return XSD_TRUE ; } // | // { return XSD_FALSE ; } // } Node Var() : { Token t ; Var v ; } { ( t = | t = ) { return createVariable(t.image, t.beginLine, t.beginColumn) ; } | t = { return createVariable() ; } } String String() : { Token t ; String lex ; } { ( t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; return lex ; } } Node IRIref() : { Node n ; } { n = Q_IRI_REF() { return n ; } // | n = QName() { return n ; } } Node QName() : { Token t ; Node n ; } { t = { return createNodeFromPrefixedName(t.image, t.beginLine, t.beginColumn) ; } } Node BlankNode() : { Token t = null ; } { t = { return createBNode(t.image, t.beginLine, t.beginColumn) ; } | { return createBNode() ; } } Node Q_IRI_REF() : { Token t ; } { t = { return createNodeFromURI(t.image, t.beginLine, t.beginColumn) ; } } // NO WHITESPACE SKIPPING // SKIP : { " " | "\t" | "\n" | "\r" | "\f" } TOKEN: { } TOKEN: { ","<","\u0000"-"\u0020"])* ">" > | )? ":" ()? > | > | > | > | ()+("-" ()+)* > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } TOKEN : { < #DIGITS: (["0"-"9"])+> | < INTEGER: (["+","-"])? > | < DECIMAL: (["+","-"])? ( "." ()* | "." ) > | < DOUBLE: // Required exponent. ( (["+","-"])? (["0"-"9"])+ "." (["0"-"9"])* | "." (["0"-"9"])+ () | (["0"-"9"])+ ) > | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > | < #QUOTE_3D: "\"\"\""> | < #QUOTE_3S: "'''"> | | < STRING_LITERAL1: // Single quoted string "'" ( (~["'","\\","\n","\r"]) | )* "'" > | < STRING_LITERAL2: // Double quoted string "\"" ( (~["\"","\\","\n","\r"]) | )* "\"" > | < STRING_LITERAL_LONG1: ( ("'" | "''")? (~["'","\\"] | ))* > | < STRING_LITERAL_LONG2: ( ("\"" | "\"\"")? (~["\"","\\"] | ))* > } TOKEN : { // < LPAREN: "(" > // | < RPAREN: ")" > // // // | < LBRACE: "{" > // | < RBRACE: "}" > // // | < LBRACKET: "[" > // | < RBRACKET: "]" > < ANON: "[]"> | < DATATYPE: "^^"> | < #AT: "@"> | < HOOK: "?" > } TOKEN: { // XML 1.1 NCNameStartChar without "_" <#NCCHAR1P: ["A"-"Z"] | ["a"-"z"] | ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] > // [#x10000-#xEFFFF] | <#NCCHAR1: | "_" > // No trailing DOTs in qnames. | // #NCCHAR without "." <#NCCHAR: ( | "-" | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > | // NCNAME but no leading "_", no trailing ".", can have dot inside prefix name. <#NCNAME_PREFIX: ((|".")* )? > | // With a leading "_", no dot at end of local name. <#NCNAME: ((|".")* )? > | // NCNAME without "-" and ".", allowing leading digits. <#VARNAME: ( | ["0"-"9"] ) ( | ["0"-"9"] | "\u00B7" | ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > } // Can we add @foo? TOKEN: { > // Anything that isn't structural (LPAREN and RPAREN / LBRACKET/RBRACKET) //| } // Catch-all tokens. Must be last. // Any non-whitespace. Causes a parser exception, rather than a // token manager error (with hidden line numbers). // Only bad IRIs (e.g. spaces) now give unhelpful parse errors. TOKEN: { <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > } #include "copyright.inc" /* # Local Variables: # tab-width: 4 # indent-tabs-mode: nil # comment-default-style: "//" # End: */