1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.accumulo.core.iterators.user;
18
19 import java.io.IOException;
20 import java.util.Collection;
21 import java.util.HashSet;
22 import java.util.Map;
23
24 import org.apache.accumulo.core.data.ArrayByteSequence;
25 import org.apache.accumulo.core.data.ByteSequence;
26 import org.apache.accumulo.core.data.Key;
27 import org.apache.accumulo.core.data.Range;
28 import org.apache.accumulo.core.data.Value;
29 import org.apache.accumulo.core.iterators.IteratorEnvironment;
30 import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
31 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
32
33 /**
34 * An iterator for deleting whole rows.
35 *
36 * After setting this iterator up for your table, to delete a row insert a row with empty column family, empty column qualifier, empty column visibility, and a
37 * value of DEL_ROW. Do not use empty columns for anything else when using this iterator.
38 *
39 * When using this iterator the locality group containing the row deletes will always be read. The locality group containing the empty column family will
40 * contain row deletes. Always reading this locality group can have an impact on performance.
41 *
42 * For example assume there are two locality groups, one containing large images and one containing small metadata about the images. If row deletes are in the
43 * same locality group as the images, then this will significantly slow down scans and major compactions that are only reading the metadata locality group.
44 * Therefore, you would want to put the empty column family in the locality group that contains the metadata. Another option is to put the empty column in its
45 * own locality group. Which is best depends on your data.
46 *
47 */
48
49 public class RowDeletingIterator implements SortedKeyValueIterator<Key,Value> {
50
51 public static final Value DELETE_ROW_VALUE = new Value("DEL_ROW".getBytes());
52 private SortedKeyValueIterator<Key,Value> source;
53 private boolean propogateDeletes;
54 private ByteSequence currentRow;
55 private boolean currentRowDeleted;
56 private long deleteTS;
57
58 private boolean dropEmptyColFams;
59
60 private static final ByteSequence EMPTY = new ArrayByteSequence(new byte[] {});
61
62 private RowDeletingIterator(SortedKeyValueIterator<Key,Value> source, boolean propogateDeletes2) {
63 this.source = source;
64 this.propogateDeletes = propogateDeletes2;
65 }
66
67 public RowDeletingIterator() {}
68
69 @Override
70 public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
71 return new RowDeletingIterator(source.deepCopy(env), propogateDeletes);
72 }
73
74 @Override
75 public Key getTopKey() {
76 return source.getTopKey();
77 }
78
79 @Override
80 public Value getTopValue() {
81 return source.getTopValue();
82 }
83
84 @Override
85 public boolean hasTop() {
86 return source.hasTop();
87 }
88
89 @Override
90 public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
91 this.source = source;
92 this.propogateDeletes = (env.getIteratorScope() == IteratorScope.majc && !env.isFullMajorCompaction()) || env.getIteratorScope() == IteratorScope.minc;
93 }
94
95 @Override
96 public void next() throws IOException {
97 source.next();
98 consumeDeleted();
99 consumeEmptyColFams();
100 }
101
102 private void consumeEmptyColFams() throws IOException {
103 while (dropEmptyColFams && source.hasTop() && source.getTopKey().getColumnFamilyData().length() == 0) {
104 source.next();
105 consumeDeleted();
106 }
107 }
108
109 private boolean isDeleteMarker(Key key, Value val) {
110 return key.getColumnFamilyData().length() == 0 && key.getColumnQualifierData().length() == 0 && key.getColumnVisibilityData().length() == 0
111 && val.equals(DELETE_ROW_VALUE);
112 }
113
114 private void consumeDeleted() throws IOException {
115
116 while (source.hasTop()) {
117 if (currentRowDeleted) {
118 while (source.hasTop() && currentRow.equals(source.getTopKey().getRowData()) && source.getTopKey().getTimestamp() <= deleteTS) {
119 source.next();
120 }
121
122 if (source.hasTop() && !currentRow.equals(source.getTopKey().getRowData())) {
123 currentRowDeleted = false;
124 }
125 }
126
127 if (!currentRowDeleted && source.hasTop() && isDeleteMarker(source.getTopKey(), source.getTopValue())) {
128 currentRow = source.getTopKey().getRowData();
129 currentRowDeleted = true;
130 deleteTS = source.getTopKey().getTimestamp();
131
132 if (propogateDeletes)
133 break;
134 } else {
135 break;
136 }
137 }
138
139 }
140
141 @Override
142 public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
143
144 if (inclusive && !columnFamilies.contains(EMPTY)) {
145 columnFamilies = new HashSet<ByteSequence>(columnFamilies);
146 columnFamilies.add(EMPTY);
147 dropEmptyColFams = true;
148 } else if (!inclusive && columnFamilies.contains(EMPTY)) {
149 columnFamilies = new HashSet<ByteSequence>(columnFamilies);
150 columnFamilies.remove(EMPTY);
151 dropEmptyColFams = true;
152 } else {
153 dropEmptyColFams = false;
154 }
155
156 currentRowDeleted = false;
157
158 if (range.getStartKey() != null) {
159
160 Range newRange = new Range(new Key(range.getStartKey().getRow()), true, range.getEndKey(), range.isEndKeyInclusive());
161 source.seek(newRange, columnFamilies, inclusive);
162 consumeDeleted();
163 consumeEmptyColFams();
164
165 if (source.hasTop() && range.beforeStartKey(source.getTopKey())) {
166 source.seek(range, columnFamilies, inclusive);
167 consumeDeleted();
168 consumeEmptyColFams();
169 }
170 } else {
171 source.seek(range, columnFamilies, inclusive);
172 consumeDeleted();
173 consumeEmptyColFams();
174 }
175
176 }
177
178 }