/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * This grammar is used to process %declare and %default commands * within pig script, and substitute parameter values */ options { // Generate non-static functions STATIC = false; IGNORE_CASE = true; JAVA_UNICODE_ESCAPE = true; //DEBUG_PARSER = true; LOOKAHEAD = 2; } PARSER_BEGIN(PigFileParser) package org.apache.pig.tools.parameters; import java.io.IOException; import java.io.PrintWriter; import java.util.Hashtable; import java.util.Stack; import java.io.Writer; import java.lang.StringBuilder; //warnings in by code generated by javacc cannot be fixed here, // so suppressing all warnings for this class. But this does not help in //supressing Warnings in other classes generated by this .jj file @SuppressWarnings("all") public class PigFileParser { private PreprocessorContext pc; private Writer out; public void setContext(PreprocessorContext pc) { this.pc = pc; } public void setOutputWriter(Writer out) { this.out = out; } private static String unquote(String s) { if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'') return s.substring(1, s.length()-1); else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"') return s.substring(1, s.length()-1); else return s; } } PARSER_END(PigFileParser) TOKEN_MGR_DECLS : { int pigBlockLevel = 0; int funcBlockLevel = 0; int tupleSchemaLevel = 0; int bagSchemaLevel = 0; int bagConstantLevel = 0; int prevState = DEFAULT; Stack stack = new Stack(); public int getState(int state) { if(!stack.empty()) return stack.pop(); return state; } public void saveState(int state) { stack.push(state); } } MORE : { <"define" ()+ ()* "(" > : PIG_START } MORE : { <"'"> {prevState = PIG_START;} : IN_STRING | <"`"> {prevState = PIG_START;} : IN_COMMAND | <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = PIG_START;} : SCHEMA_DEFINITION | <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t")+ > {prevState = PIG_START;} : GENERATE | <"{"> {pigBlockLevel = 1;} : IN_BLOCK | <"}"> {if (true) throw new TokenMgrError("Unmatched '}'", TokenMgrError.LEXICAL_ERROR);} | <";"> : PIG_END | <"--"> {prevState = PIG_START;} : SINGLE_LINE_COMMENT | <"/*"> {prevState = PIG_START;} : MULTI_LINE_COMMENT | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <("\n" | "\r" | "\r\n")> {SwitchTo(prevState);} | <(~[])> } MORE : { <"*/"> {SwitchTo(prevState);} | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"\\'"> | <"'"> { SwitchTo(prevState);} | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"\\`"> | <"`"> { SwitchTo(prevState);} | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"{"> { bagConstantLevel++; prevState = getState(prevState); saveState(prevState); prevState = GENERATE; } : BAG_CONSTANT | <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+> { prevState = getState(prevState); saveState(prevState); prevState = GENERATE; } : SCHEMA_DEFINITION | <";"> { prevState = getState(prevState); if(prevState == PIG_START) { input_stream.backup(1); image.deleteCharAt(image.length()-1); } SwitchTo(prevState); } | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"("> {tupleSchemaLevel++;} | <")"> {tupleSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); } | <"{"> {bagSchemaLevel++;} | <"}"> {bagSchemaLevel--; if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) SwitchTo(prevState); } | <("," | ";" )> { if ((tupleSchemaLevel == 0) && (bagSchemaLevel == 0)) { input_stream.backup(1); image.deleteCharAt(image.length()-1); SwitchTo(prevState); } } | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"{"> {bagConstantLevel++;} | <"}"> {bagConstantLevel--; if (bagConstantLevel == 0) SwitchTo(prevState);} | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"\""> {prevState = IN_BLOCK;} : IN_DOUBLE_QUOTED_STRING | <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = IN_BLOCK;} : SCHEMA_DEFINITION | <(" " | "\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"](" " | "\t")+> {prevState = IN_BLOCK;} : GENERATE | <"{"> {pigBlockLevel++;} | <"}"(";")?> {pigBlockLevel--; if (pigBlockLevel == 0) SwitchTo(PIG_END);} | <"'"> {prevState = IN_BLOCK;} : IN_STRING | <"`"> {prevState = IN_BLOCK;} : IN_COMMAND | <"--"> {prevState = IN_BLOCK;} : SINGLE_LINE_COMMENT | <"/*"> {prevState = IN_BLOCK;} : MULTI_LINE_COMMENT | <("\n" | "\r" | "\r\n")> | <(~[])> } MORE : { <"\\\""> | <"\""> { SwitchTo(prevState);} | <("\n" | "\r" | "\r\n")> | <(~[])> } TOKEN : { { matchedToken.image = image.toString(); }: DEFAULT } TOKEN : { | | } // comments(single line and multi-line) TOKEN : { } TOKEN: { <#LETTER : ["a"-"z", "A"-"Z"] > | <#DIGIT : ["0"-"9"] > | <#SPECIALCHAR : ["_"] > | <#DOLLAR : ["$"]> } TOKEN : { | } TOKEN : { )*( | | )*> | | | // see others() rule for use of OTHER and NOT_OTHER_CHAR // others() is supposed to match 'everything else'. To ensure that others() // don't swallow other(all the ones above) tokens, it uses two tokens OTHER and NOT_OTHER_CHAR // NOT_OTHER_CHAR consists of first characters of other tokens, and OTHER consists of one // or more chars that don't belong to NOT_OTHER_CHAR. Since the tokeniser matches the longest // match, other tokens will get matched instead of NOT_OTHER_CHAR. | } void Parse() throws IOException : {} { (input())* } void input() throws IOException : { String s; Token strTok; } { strTok = { //System.out.println("Pig image: \n" + strTok.image); out.append(strTok.image ); } | ( param_value(true) // overwrite=true ) | ( param_value(false) // overwrite=false ) | s = paramString(){} { //process an ordinary pig line - perform substitution String sub_line = pc.substitute(s); out.append(sub_line ); } | strTok = {} { out.append(strTok.image ); } | write_ignore_toks() } void param_value(boolean overwrite) throws IOException: { Token id, val; String s; String other; Token strTok; } { (ignore_toks_nonewline())* id= (ignore_toks_nonewline())* ( s=others() (ignore_toks_nonewline())* write_newline() { pc.processOrdLine(id.image , s, overwrite);} | val= // this construct is for cases like a.123 {s = val.image;} ( LOOKAHEAD( 2 ) other = others() (ignore_toks_nonewline())* write_newline() { s += other; } )? {pc.processOrdLine(id.image , s, overwrite);} | val= { pc.processShellCmd(id.image , val.image, overwrite); } | val= { s = unquote(val.image); pc.processOrdLine(id.image, s, overwrite); } ) } //match others, see comments above //on description of OTHER , NOT_OTHER_CHAR String others() throws IOException : { Token t; StringBuilder sb = new StringBuilder(); } { ( t= { sb.append(t.image); } | t= { sb.append(t.image); } ) ( t= { sb.append(t.image); } | t= { sb.append(t.image); } | t= { //eg of this match is a unquoted filename - /d1/abc/ sb.append(t.image); } | t= { sb.append(t.image); } | t= { sb.append(t.image); } )*{ return sb.toString(); } } // a string that can contain parameter String paramString() throws IOException : {Token t; String str; } { str = others() { return str; } | t= { return t.image; } | t = {} { return t.image; } } // write the newlines,spaces and comments to preserve formatting void write_ignore_toks() throws IOException : { String str;} { str = ignore_toks(){ out.append(str); } } // match the newlines,spaces and comments String ignore_toks_nonewline() throws IOException : { Token t; } { t = { return t.image; } | t = { return t.image; } } // match the newlines,spaces and comments String ignore_toks() throws IOException : { Token t; String str; } { t = { return t.image; } | str = space_or_newline(){ return str; } } String space_or_newline() : { Token t; } { t = { return t.image; } | t = { return t.image; } } void write_newline() throws IOException: { Token t; } { t = { out.append(t.image); } }