View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.accumulo.core.iterators.user;
18  
19  import java.io.IOException;
20  import java.util.Collection;
21  import java.util.HashSet;
22  import java.util.Map;
23  
24  import org.apache.accumulo.core.data.ArrayByteSequence;
25  import org.apache.accumulo.core.data.ByteSequence;
26  import org.apache.accumulo.core.data.Key;
27  import org.apache.accumulo.core.data.Range;
28  import org.apache.accumulo.core.data.Value;
29  import org.apache.accumulo.core.iterators.IteratorEnvironment;
30  import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
31  import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
32  
33  /**
34   * An iterator for deleting whole rows.
35   * 
36   * After setting this iterator up for your table, to delete a row insert a row with empty column family, empty column qualifier, empty column visibility, and a
37   * value of DEL_ROW. Do not use empty columns for anything else when using this iterator.
38   * 
39   * When using this iterator the locality group containing the row deletes will always be read. The locality group containing the empty column family will
40   * contain row deletes. Always reading this locality group can have an impact on performance.
41   * 
42   * For example assume there are two locality groups, one containing large images and one containing small metadata about the images. If row deletes are in the
43   * same locality group as the images, then this will significantly slow down scans and major compactions that are only reading the metadata locality group.
44   * Therefore, you would want to put the empty column family in the locality group that contains the metadata. Another option is to put the empty column in its
45   * own locality group. Which is best depends on your data.
46   * 
47   */
48  
49  public class RowDeletingIterator implements SortedKeyValueIterator<Key,Value> {
50    
51    public static final Value DELETE_ROW_VALUE = new Value("DEL_ROW".getBytes());
52    private SortedKeyValueIterator<Key,Value> source;
53    private boolean propogateDeletes;
54    private ByteSequence currentRow;
55    private boolean currentRowDeleted;
56    private long deleteTS;
57    
58    private boolean dropEmptyColFams;
59    
60    private static final ByteSequence EMPTY = new ArrayByteSequence(new byte[] {});
61    
62    private RowDeletingIterator(SortedKeyValueIterator<Key,Value> source, boolean propogateDeletes2) {
63      this.source = source;
64      this.propogateDeletes = propogateDeletes2;
65    }
66    
67    public RowDeletingIterator() {}
68    
69    @Override
70    public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
71      return new RowDeletingIterator(source.deepCopy(env), propogateDeletes);
72    }
73    
74    @Override
75    public Key getTopKey() {
76      return source.getTopKey();
77    }
78    
79    @Override
80    public Value getTopValue() {
81      return source.getTopValue();
82    }
83    
84    @Override
85    public boolean hasTop() {
86      return source.hasTop();
87    }
88    
89    @Override
90    public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
91      this.source = source;
92      this.propogateDeletes = (env.getIteratorScope() == IteratorScope.majc && !env.isFullMajorCompaction()) || env.getIteratorScope() == IteratorScope.minc;
93    }
94    
95    @Override
96    public void next() throws IOException {
97      source.next();
98      consumeDeleted();
99      consumeEmptyColFams();
100   }
101   
102   private void consumeEmptyColFams() throws IOException {
103     while (dropEmptyColFams && source.hasTop() && source.getTopKey().getColumnFamilyData().length() == 0) {
104       source.next();
105       consumeDeleted();
106     }
107   }
108   
109   private boolean isDeleteMarker(Key key, Value val) {
110     return key.getColumnFamilyData().length() == 0 && key.getColumnQualifierData().length() == 0 && key.getColumnVisibilityData().length() == 0
111         && val.equals(DELETE_ROW_VALUE);
112   }
113   
114   private void consumeDeleted() throws IOException {
115     // this method tries to do as little work as possible when nothing is deleted
116     while (source.hasTop()) {
117       if (currentRowDeleted) {
118         while (source.hasTop() && currentRow.equals(source.getTopKey().getRowData()) && source.getTopKey().getTimestamp() <= deleteTS) {
119           source.next();
120         }
121         
122         if (source.hasTop() && !currentRow.equals(source.getTopKey().getRowData())) {
123           currentRowDeleted = false;
124         }
125       }
126       
127       if (!currentRowDeleted && source.hasTop() && isDeleteMarker(source.getTopKey(), source.getTopValue())) {
128         currentRow = source.getTopKey().getRowData();
129         currentRowDeleted = true;
130         deleteTS = source.getTopKey().getTimestamp();
131         
132         if (propogateDeletes)
133           break;
134       } else {
135         break;
136       }
137     }
138     
139   }
140   
141   @Override
142   public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
143     
144     if (inclusive && !columnFamilies.contains(EMPTY)) {
145       columnFamilies = new HashSet<ByteSequence>(columnFamilies);
146       columnFamilies.add(EMPTY);
147       dropEmptyColFams = true;
148     } else if (!inclusive && columnFamilies.contains(EMPTY)) {
149       columnFamilies = new HashSet<ByteSequence>(columnFamilies);
150       columnFamilies.remove(EMPTY);
151       dropEmptyColFams = true;
152     } else {
153       dropEmptyColFams = false;
154     }
155     
156     currentRowDeleted = false;
157     
158     if (range.getStartKey() != null) {
159       // seek to beginning of row
160       Range newRange = new Range(new Key(range.getStartKey().getRow()), true, range.getEndKey(), range.isEndKeyInclusive());
161       source.seek(newRange, columnFamilies, inclusive);
162       consumeDeleted();
163       consumeEmptyColFams();
164       
165       if (source.hasTop() && range.beforeStartKey(source.getTopKey())) {
166         source.seek(range, columnFamilies, inclusive);
167         consumeDeleted();
168         consumeEmptyColFams();
169       }
170     } else {
171       source.seek(range, columnFamilies, inclusive);
172       consumeDeleted();
173       consumeEmptyColFams();
174     }
175     
176   }
177   
178 }