View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.imaging.common;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.PushbackInputStream;
24  import java.util.Map;
25  
26  import org.apache.commons.imaging.ImagingException;
27  
28  /**
29   * A rudimentary preprocessor and parser for the C programming language.
30   *
31   * FIXME replace this by a parser generated via ANTLR (if we really need it?!)
32   */
33  public class BasicCParser {
34      /**
35       * Parses the hexadecimal-base escape-sequence found at index {@code i} of {@code string}.
36       *
37       * <p>
38       * Helper-function for {@code unescapeString()}.
39       * </p>
40       *
41       * @param i             the index of the escape-sequence in the string
42       * @param stringBuilder the stringBuilder to append the escape-char to
43       * @param string        the string whose chars are parsed
44       * @return the new index i
45       * @since 1.0-alpha3
46       */
47      private static int appendHex(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
48          if (i + 2 >= string.length()) {
49              throw new ImagingException("Parsing XPM file failed, " + "hex constant in string too short");
50          }
51          final char hex1 = string.charAt(i + 1);
52          final char hex2 = string.charAt(i + 2);
53          i += 2;
54          int constant;
55          try {
56              constant = Integer.parseInt(hex1 + Character.toString(hex2), 16);
57          } catch (final NumberFormatException nfe) {
58              throw new ImagingException("Parsing XPM file failed, " + "hex constant invalid", nfe);
59          }
60          stringBuilder.append((char) constant);
61          return i;
62      }
63  
64      /**
65       * Parses the octal-base escape-sequence found at index {@code i} of {@code string}.
66       *
67       * <p>
68       * Helper-function for {@code unescapeString()}.
69       * </p>
70       *
71       * @param i             the index of the escape-sequence in the string
72       * @param stringBuilder the stringBuilder to append the escape-char to
73       * @param string        the string whose chars are parsed
74       * @return the new index i
75       * @since 1.0-alpha3
76       */
77      private static int appendOct(int i, final StringBuilder stringBuilder, final String string) {
78          int length = 1;
79          if (i + 1 < string.length() && '0' <= string.charAt(i + 1) && string.charAt(i + 1) <= '7') {
80              ++length;
81          }
82          if (i + 2 < string.length() && '0' <= string.charAt(i + 2) && string.charAt(i + 2) <= '7') {
83              ++length;
84          }
85          int constant = 0;
86          for (int j = 0; j < length; j++) {
87              constant *= 8;
88              constant += string.charAt(i + j) - '0';
89          }
90          i += length - 1;
91          stringBuilder.append((char) constant);
92          return i;
93      }
94  
95      /**
96       * Parses the {@code i:th} escape-char in the input {@code string} and appends it to {@code stringBuilder}.
97       *
98       * <p>
99       * Helper-function for {@code unescapeString()}.
100      * </p>
101      *
102      * @param i             the index of the escape-char in the string
103      * @param stringBuilder the stringBuilder to append the escape-char to
104      * @param string        the string whose chars are parsed
105      * @return the new index i
106      * @since 1.0-alpha3
107      */
108     private static int parseEscape(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
109         final char c = string.charAt(i);
110         switch (c) {
111         case '\\':
112             stringBuilder.append('\\');
113             break;
114         case '"':
115             stringBuilder.append('"');
116             break;
117         case '\'':
118             stringBuilder.append('\'');
119             break;
120         case 'x':
121             i = appendHex(i, stringBuilder, string);
122             break;
123         case '0':
124         case '1':
125         case '2':
126         case '3':
127         case '4':
128         case '5':
129         case '6':
130         case '7':
131             i = appendOct(i, stringBuilder, string);
132             break;
133         case 'a':
134             stringBuilder.append((char) 0x07);
135             break;
136         case 'b':
137             stringBuilder.append((char) 0x08);
138             break;
139         case 'f':
140             stringBuilder.append((char) 0x0c);
141             break;
142         case 'n':
143             stringBuilder.append((char) 0x0a);
144             break;
145         case 'r':
146             stringBuilder.append((char) 0x0d);
147             break;
148         case 't':
149             stringBuilder.append((char) 0x09);
150             break;
151         case 'v':
152             stringBuilder.append((char) 0x0b);
153             break;
154         default:
155             throw new ImagingException("Parsing XPM file failed, " + "invalid escape sequence");
156         }
157         return i;
158 
159     }
160 
161     public static ByteArrayOutputStream preprocess(final InputStream is, final StringBuilder firstComment, final Map<String, String> defines)
162             throws IOException, ImagingException {
163         boolean inSingleQuotes = false;
164         boolean inString = false;
165         boolean inComment = false;
166         boolean inDirective = false;
167         boolean hadSlash = false;
168         boolean hadStar = false;
169         boolean hadBackSlash = false;
170         final ByteArrayOutputStream out = new ByteArrayOutputStream();
171         boolean seenFirstComment = firstComment == null;
172         final StringBuilder directiveBuffer = new StringBuilder();
173         for (int c = is.read(); c != -1; c = is.read()) {
174             if (inComment) {
175                 if (c == '*') {
176                     if (hadStar && !seenFirstComment) {
177                         firstComment.append('*');
178                     }
179                     hadStar = true;
180                 } else if (c == '/') {
181                     if (hadStar) {
182                         hadStar = false;
183                         inComment = false;
184                         seenFirstComment = true;
185                     } else if (!seenFirstComment) {
186                         firstComment.append((char) c);
187                     }
188                 } else {
189                     if (hadStar && !seenFirstComment) {
190                         firstComment.append('*');
191                     }
192                     hadStar = false;
193                     if (!seenFirstComment) {
194                         firstComment.append((char) c);
195                     }
196                 }
197             } else if (inSingleQuotes) {
198                 switch (c) {
199                 case '\\':
200                     if (hadBackSlash) {
201                         out.write('\\');
202                         out.write('\\');
203                         hadBackSlash = false;
204                     } else {
205                         hadBackSlash = true;
206                     }
207                     break;
208                 case '\'':
209                     if (hadBackSlash) {
210                         out.write('\\');
211                         hadBackSlash = false;
212                     } else {
213                         inSingleQuotes = false;
214                     }
215                     out.write('\'');
216                     break;
217                 case '\r':
218                 case '\n':
219                     throw new ImagingException("Unterminated single quote in file");
220                 default:
221                     if (hadBackSlash) {
222                         out.write('\\');
223                         hadBackSlash = false;
224                     }
225                     out.write(c);
226                     break;
227                 }
228             } else if (inString) {
229                 switch (c) {
230                 case '\\':
231                     if (hadBackSlash) {
232                         out.write('\\');
233                         out.write('\\');
234                         hadBackSlash = false;
235                     } else {
236                         hadBackSlash = true;
237                     }
238                     break;
239                 case '"':
240                     if (hadBackSlash) {
241                         out.write('\\');
242                         hadBackSlash = false;
243                     } else {
244                         inString = false;
245                     }
246                     out.write('"');
247                     break;
248                 case '\r':
249                 case '\n':
250                     throw new ImagingException("Unterminated string in file");
251                 default:
252                     if (hadBackSlash) {
253                         out.write('\\');
254                         hadBackSlash = false;
255                     }
256                     out.write(c);
257                     break;
258                 }
259             } else if (inDirective) {
260                 if (c == '\r' || c == '\n') {
261                     inDirective = false;
262                     final String[] tokens = tokenizeRow(directiveBuffer.toString());
263                     if (tokens.length < 2 || tokens.length > 3) {
264                         throw new ImagingException("Bad preprocessor directive");
265                     }
266                     if (!tokens[0].equals("define")) {
267                         throw new ImagingException("Invalid/unsupported " + "preprocessor directive '" + tokens[0] + "'");
268                     }
269                     defines.put(tokens[1], tokens.length == 3 ? tokens[2] : null);
270                     directiveBuffer.setLength(0);
271                 } else {
272                     directiveBuffer.append((char) c);
273                 }
274             } else {
275                 switch (c) {
276                 case '/':
277                     if (hadSlash) {
278                         out.write('/');
279                     }
280                     hadSlash = true;
281                     break;
282                 case '*':
283                     if (hadSlash) {
284                         inComment = true;
285                         hadSlash = false;
286                     } else {
287                         out.write(c);
288                     }
289                     break;
290                 case '\'':
291                     if (hadSlash) {
292                         out.write('/');
293                     }
294                     hadSlash = false;
295                     out.write(c);
296                     inSingleQuotes = true;
297                     break;
298                 case '"':
299                     if (hadSlash) {
300                         out.write('/');
301                     }
302                     hadSlash = false;
303                     out.write(c);
304                     inString = true;
305                     break;
306                 case '#':
307                     if (defines == null) {
308                         throw new ImagingException("Unexpected preprocessor directive");
309                     }
310                     inDirective = true;
311                     break;
312                 default:
313                     if (hadSlash) {
314                         out.write('/');
315                     }
316                     hadSlash = false;
317                     out.write(c);
318                     // Only whitespace allowed before first comment:
319                     if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
320                         seenFirstComment = true;
321                     }
322                     break;
323                 }
324             }
325         }
326         if (hadSlash) {
327             out.write('/');
328         }
329         if (hadStar) {
330             out.write('*');
331         }
332         if (inString) {
333             throw new ImagingException("Unterminated string at the end of file");
334         }
335         if (inComment) {
336             throw new ImagingException("Unterminated comment at the end of file");
337         }
338         return out;
339     }
340 
341     public static String[] tokenizeRow(final String row) {
342         final String[] tokens = row.split("[ \t]");
343         int numLiveTokens = 0;
344         for (final String token : tokens) {
345             if (token != null && !token.isEmpty()) {
346                 ++numLiveTokens;
347             }
348         }
349         final String[] liveTokens = Allocator.array(numLiveTokens, String[]::new, 24);
350         int next = 0;
351         for (final String token : tokens) {
352             if (token != null && !token.isEmpty()) {
353                 liveTokens[next++] = token;
354             }
355         }
356         return liveTokens;
357     }
358 
359     public static void unescapeString(final StringBuilder stringBuilder, final String string) throws ImagingException {
360         if (string.length() < 2) {
361             throw new ImagingException("Parsing XPM file failed, " + "string is too short");
362         }
363         if (string.charAt(0) != '"' || string.charAt(string.length() - 1) != '"') {
364             throw new ImagingException("Parsing XPM file failed, " + "string not surrounded by '\"'");
365         }
366         boolean hadBackSlash = false;
367         for (int i = 1; i < string.length() - 1; i++) {
368             final char c = string.charAt(i);
369             if (hadBackSlash) {
370                 i = parseEscape(i, stringBuilder, string);
371                 hadBackSlash = false;
372             } else if (c == '\\') {
373                 hadBackSlash = true;
374             } else if (c == '"') {
375                 throw new ImagingException("Parsing XPM file failed, " + "extra '\"' found in string");
376             } else {
377                 stringBuilder.append(c);
378             }
379         }
380         if (hadBackSlash) {
381             throw new ImagingException("Parsing XPM file failed, " + "unterminated escape sequence found in string");
382         }
383     }
384 
385     private final PushbackInputStream is;
386 
387     public BasicCParser(final ByteArrayInputStream is) {
388         this.is = new PushbackInputStream(is);
389     }
390 
391     public String nextToken() throws IOException, ImagingException {
392         // I don't know how complete the C parsing in an XPM file
393         // is meant to be, this is just the very basics...
394 
395         boolean inString = false;
396         boolean inIdentifier = false;
397         boolean hadBackSlash = false;
398         final StringBuilder token = new StringBuilder();
399         for (int c = is.read(); c != -1; c = is.read()) {
400             if (inString) {
401                 switch (c) {
402                 case '\\':
403                     token.append('\\');
404                     hadBackSlash = !hadBackSlash;
405                     break;
406                 case '"':
407                     token.append('"');
408                     if (!hadBackSlash) {
409                         return token.toString();
410                     }
411                     hadBackSlash = false;
412                     break;
413                 case '\r':
414                 case '\n':
415                     throw new ImagingException("Unterminated string in XPM file");
416                 default:
417                     token.append((char) c);
418                     hadBackSlash = false;
419                     break;
420                 }
421             } else if (inIdentifier) {
422                 if (!Character.isLetterOrDigit(c) && c != '_') {
423                     is.unread(c);
424                     return token.toString();
425                 }
426                 token.append((char) c);
427             } else if (c == '"') {
428                 token.append('"');
429                 inString = true;
430             } else if (Character.isLetterOrDigit(c) || c == '_') {
431                 token.append((char) c);
432                 inIdentifier = true;
433             } else if (c == '{' || c == '}' || c == '[' || c == ']' || c == '*' || c == ';' || c == '=' || c == ',') {
434                 token.append((char) c);
435                 return token.toString();
436             } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
437                 // ignore
438             } else {
439                 throw new ImagingException("Unhandled/invalid character '" + (char) c + "' found in XPM file");
440             }
441         }
442 
443         if (inIdentifier) {
444             return token.toString();
445         }
446         if (inString) {
447             throw new ImagingException("Unterminated string ends XMP file");
448         }
449         return null;
450     }
451 
452 }