View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.chukwa.util;
20  
21  
22  public class RecordConstants {
23    static final char[] CTRL_A = { '\u0001' };
24    static final char[] CTRL_B = { '\u0002' };
25    static final char[] CTRL_C = { '\u0003' };
26    static final char[] CTRL_D = { '\u0004' };
27    // public static final String FIELD_SEPARATOR = new String(CTRL_A);
28    public static final String DEFAULT_FIELD_SEPARATOR = "-#-";
29    public static final String DEFAULT_RECORD_SEPARATOR = "\n";
30    public static final String RECORD_SEPARATOR_ESCAPE_SEQ = new String(CTRL_D);// may
31                                                                                // want
32                                                                                // this
33                                                                                // to
34                                                                                // be
35                                                                                // very
36                                                                                // obscure
37                                                                                // ,
38                                                                                // e
39                                                                                // .
40                                                                                // g
41                                                                                // .
42                                                                                // new
43                                                                                // String
44                                                                                // (
45                                                                                // CTRL_B
46                                                                                // )
47                                                                                // +
48                                                                                // new
49                                                                                // String
50                                                                                // (
51                                                                                // CTRL_C
52                                                                                // )
53                                                                                // +
54                                                                                // new
55                                                                                // String
56                                                                                // (
57                                                                                // CTRL_D
58                                                                                // )
59  
60    /**
61     * Insert the default chukwa escape sequence in <code>record</code> before all
62     * occurances of <code>recordSeparator</code> <i>except</i> the final one if
63     * the final record separator occurs at the end of the <code>record</code>
64     * 
65     * @param recordSeparator The record separator that we are escaping. This is
66     *        chunk source application specific
67     * @param record The string representing the entire record, including the
68     *        final record delimiter
69     * @return The string with appropriate <code>recordSeparator</code>s escaped
70     */
71    public static String escapeAllButLastRecordSeparator(String recordSeparator,
72        String record) {
73      String escapedRecord = "";
74      if (record.endsWith(recordSeparator)) {
75        escapedRecord = record.substring(0,
76            record.length() - recordSeparator.length()).replaceAll(
77            recordSeparator, RECORD_SEPARATOR_ESCAPE_SEQ + recordSeparator)
78            + recordSeparator;
79      }
80      return escapedRecord;
81    }
82  
83    /**
84     * Insert the default chukwa escape sequence in <code>record</code> before all
85     * occurances of <code>recordSeparator</code>. This is assuming that you are
86     * not passing the final record separator in with the <code>record</code>,
87     * because it would be escaped too.
88     * 
89     * @param recordSeparator The record separator that we are escaping. This is
90     *        chunk source application specific
91     * @param record The string representing the entire record, including the
92     *        final record delimiter
93     * @return The string with all <code>recordSeparator</code>s escaped
94     */
95    public static String escapeAllRecordSeparators(String recordSeparator,
96        String record) {
97      return record.replaceAll(recordSeparator, RECORD_SEPARATOR_ESCAPE_SEQ
98          + recordSeparator);
99    }
100 
101   public static String recoverRecordSeparators(String recordSeparator,
102       String record) {
103     return record.replaceAll(RECORD_SEPARATOR_ESCAPE_SEQ + recordSeparator,
104         recordSeparator);
105   }
106 
107 }