View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.accumulo.core.iterators.user;
18  
19  import java.io.IOException;
20  import java.util.Arrays;
21  import java.util.Map;
22  
23  import org.apache.accumulo.core.client.IteratorSetting;
24  import org.apache.accumulo.core.data.ByteSequence;
25  import org.apache.accumulo.core.data.Key;
26  import org.apache.accumulo.core.data.Value;
27  import org.apache.accumulo.core.iterators.Filter;
28  import org.apache.accumulo.core.iterators.IteratorEnvironment;
29  import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
30  
31  /**
32   * This iterator provides exact string matching. It searches both the Key and Value for the string. The string to match is specified by the "term" option.
33   */
34  public class GrepIterator extends Filter {
35    
36    private byte term[];
37    
38    @Override
39    public boolean accept(Key k, Value v) {
40      return match(v.get()) || match(k.getRowData()) || match(k.getColumnFamilyData()) || match(k.getColumnQualifierData());
41    }
42    
43    private boolean match(ByteSequence bs) {
44      return indexOf(bs.getBackingArray(), bs.offset(), bs.length(), term) >= 0;
45    }
46    
47    private boolean match(byte[] ba) {
48      return indexOf(ba, 0, ba.length, term) >= 0;
49    }
50    
51    // copied code below from java string and modified
52    
53    private static int indexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target) {
54      byte first = target[0];
55      int targetCount = target.length;
56      int max = sourceOffset + (sourceCount - targetCount);
57      
58      for (int i = sourceOffset; i <= max; i++) {
59        /* Look for first character. */
60        if (source[i] != first) {
61          while (++i <= max && source[i] != first)
62            continue;
63        }
64        
65        /* Found first character, now look at the rest of v2 */
66        if (i <= max) {
67          int j = i + 1;
68          int end = j + targetCount - 1;
69          for (int k = 1; j < end && source[j] == target[k]; j++, k++)
70            continue;
71          
72          if (j == end) {
73            /* Found whole string. */
74            return i - sourceOffset;
75          }
76        }
77      }
78      return -1;
79    }
80    
81    @Override
82    public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
83      GrepIterator copy = (GrepIterator) super.deepCopy(env);
84      copy.term = Arrays.copyOf(term, term.length);
85      return copy;
86    }
87    
88    @Override
89    public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
90      super.init(source, options, env);
91      term = options.get("term").getBytes();
92    }
93    
94    /**
95     * Encode the grep term as an option for a ScanIterator
96     * 
97     * @param cfg
98     * @param term
99     */
100   public static void setTerm(IteratorSetting cfg, String term) {
101     cfg.addOption("term", term);
102   }
103 }