View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.CellComparator;
27  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
28  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
29  import org.apache.hadoop.hbase.HRegionInfo;
30  import org.apache.hadoop.hbase.HTableDescriptor;
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.classification.InterfaceStability;
33  import org.apache.hadoop.hbase.client.Append;
34  import org.apache.hadoop.hbase.client.Delete;
35  import org.apache.hadoop.hbase.client.Get;
36  import org.apache.hadoop.hbase.client.Increment;
37  import org.apache.hadoop.hbase.client.IsolationLevel;
38  import org.apache.hadoop.hbase.client.Mutation;
39  import org.apache.hadoop.hbase.client.Put;
40  import org.apache.hadoop.hbase.client.Result;
41  import org.apache.hadoop.hbase.client.RowMutations;
42  import org.apache.hadoop.hbase.client.Scan;
43  import org.apache.hadoop.hbase.conf.ConfigurationObserver;
44  import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
45  import org.apache.hadoop.hbase.filter.ByteArrayComparable;
46  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
47  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
48  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
49  import org.apache.hadoop.hbase.util.Pair;
50  import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay;
51  
52  import com.google.common.annotations.VisibleForTesting;
53  import com.google.protobuf.Message;
54  import com.google.protobuf.RpcController;
55  import com.google.protobuf.Service;
56  
57  /**
58   * Regions store data for a certain region of a table.  It stores all columns
59   * for each row. A given table consists of one or more Regions.
60   *
61   * <p>An Region is defined by its table and its key extent.
62   *
63   * <p>Locking at the Region level serves only one purpose: preventing the
64   * region from being closed (and consequently split) while other operations
65   * are ongoing. Each row level operation obtains both a row lock and a region
66   * read lock for the duration of the operation. While a scanner is being
67   * constructed, getScanner holds a read lock. If the scanner is successfully
68   * constructed, it holds a read lock until it is closed. A close takes out a
69   * write lock and consequently will block for ongoing operations and will block
70   * new operations from starting while the close is in progress.
71   */
72  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
73  @InterfaceStability.Evolving
74  public interface Region extends ConfigurationObserver {
75  
76    ///////////////////////////////////////////////////////////////////////////
77    // Region state
78  
79    /** @return region information for this region */
80    HRegionInfo getRegionInfo();
81  
82    /** @return table descriptor for this region */
83    HTableDescriptor getTableDesc();
84  
85    /** @return true if region is available (not closed and not closing) */
86    boolean isAvailable();
87  
88    /** @return true if region is closed */
89    boolean isClosed();
90  
91    /** @return True if closing process has started */
92    boolean isClosing();
93  
94    /** @return True if region is in recovering state */
95    boolean isRecovering();
96  
97    /** @return True if region is read only */
98    boolean isReadOnly();
99  
100   /**
101    * Return the list of Stores managed by this region
102    * <p>Use with caution.  Exposed for use of fixup utilities.
103    * @return a list of the Stores managed by this region
104    */
105   List<Store> getStores();
106 
107   /**
108    * Return the Store for the given family
109    * <p>Use with caution.  Exposed for use of fixup utilities.
110    * @return the Store for the given family
111    */
112   Store getStore(byte[] family);
113 
114   /** @return list of store file names for the given families */
115   List<String> getStoreFileList(byte [][] columns);
116 
117   /**
118    * Check the region's underlying store files, open the files that have not
119    * been opened yet, and remove the store file readers for store files no
120    * longer available.
121    * @throws IOException
122    */
123   boolean refreshStoreFiles() throws IOException;
124 
125   /** @return the latest sequence number that was read from storage when this region was opened */
126   long getOpenSeqNum();
127 
128   /** @return the max sequence id of flushed data on this region; no edit in memory will have
129    * a sequence id that is less that what is returned here.
130    */
131   long getMaxFlushedSeqId();
132 
133   /** @return the oldest flushed sequence id for the given family; can be beyond
134    * {@link #getMaxFlushedSeqId()} in case where we've flushed a subset of a regions column
135    * families
136    * @deprecated Since version 1.2.0. Exposes too much about our internals; shutting it down.
137    * Do not use.
138    */
139   @VisibleForTesting
140   @Deprecated
141   public long getOldestSeqIdOfStore(byte[] familyName);
142 
143   /**
144    * This can be used to determine the last time all files of this region were major compacted.
145    * @param majorCompactioOnly Only consider HFile that are the result of major compaction
146    * @return the timestamp of the oldest HFile for all stores of this region
147    */
148   long getOldestHfileTs(boolean majorCompactioOnly) throws IOException;
149 
150   /**
151    * @return map of column family names to max sequence id that was read from storage when this
152    * region was opened
153    */
154   public Map<byte[], Long> getMaxStoreSeqId();
155 
156   /** @return true if loading column families on demand by default */
157   boolean isLoadingCfsOnDemandDefault();
158 
159   /** @return readpoint considering given IsolationLevel */
160   long getReadpoint(IsolationLevel isolationLevel);
161 
162   /**
163    * @return The earliest time a store in the region was flushed. All
164    *         other stores in the region would have been flushed either at, or
165    *         after this time.
166    */
167   long getEarliestFlushTimeForAllStores();
168 
169   ///////////////////////////////////////////////////////////////////////////
170   // Metrics
171 
172   /** @return read requests count for this region */
173   long getReadRequestsCount();
174 
175   /**
176    * Update the read request count for this region
177    * @param i increment
178    */
179   void updateReadRequestsCount(long i);
180 
181   /** @return write request count for this region */
182   long getWriteRequestsCount();
183 
184   /**
185    * Update the write request count for this region
186    * @param i increment
187    */
188   void updateWriteRequestsCount(long i);
189 
190   /** @return memstore size for this region, in bytes */
191   long getMemstoreSize();
192 
193   /** @return the number of mutations processed bypassing the WAL */
194   long getNumMutationsWithoutWAL();
195 
196   /** @return the size of data processed bypassing the WAL, in bytes */
197   long getDataInMemoryWithoutWAL();
198 
199   /** @return the number of blocked requests */
200   long getBlockedRequestsCount();
201 
202   /** @return the number of checkAndMutate guards that passed */
203   long getCheckAndMutateChecksPassed();
204 
205   /** @return the number of failed checkAndMutate guards */
206   long getCheckAndMutateChecksFailed();
207 
208   /** @return the MetricsRegion for this region */
209   MetricsRegion getMetrics();
210 
211   /** @return the block distribution for all Stores managed by this region */
212   HDFSBlocksDistribution getHDFSBlocksDistribution();
213 
214   ///////////////////////////////////////////////////////////////////////////
215   // Locking
216 
217   // Region read locks
218 
219   /**
220    * Operation enum is used in {@link Region#startRegionOperation} to provide context for
221    * various checks before any region operation begins.
222    */
223   enum Operation {
224     ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
225     REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT
226   }
227 
228   /**
229    * This method needs to be called before any public call that reads or
230    * modifies data.
231    * Acquires a read lock and checks if the region is closing or closed.
232    * <p>{@link #closeRegionOperation} MUST then always be called after
233    * the operation has completed, whether it succeeded or failed.
234    * @throws IOException
235    */
236   void startRegionOperation() throws IOException;
237 
238   /**
239    * This method needs to be called before any public call that reads or
240    * modifies data.
241    * Acquires a read lock and checks if the region is closing or closed.
242    * <p>{@link #closeRegionOperation} MUST then always be called after
243    * the operation has completed, whether it succeeded or failed.
244    * @param op The operation is about to be taken on the region
245    * @throws IOException
246    */
247   void startRegionOperation(Operation op) throws IOException;
248 
249   /**
250    * Closes the region operation lock.
251    * @throws IOException
252    */
253   void closeRegionOperation() throws IOException;
254 
255   // Row write locks
256 
257   /**
258    * Row lock held by a given thread.
259    * One thread may acquire multiple locks on the same row simultaneously.
260    * The locks must be released by calling release() from the same thread.
261    */
262   public interface RowLock {
263     /**
264      * Release the given lock.  If there are no remaining locks held by the current thread
265      * then unlock the row and allow other threads to acquire the lock.
266      * @throws IllegalArgumentException if called by a different thread than the lock owning
267      *     thread
268      */
269     void release();
270   }
271 
272   /**
273    * Tries to acquire a lock on the given row.
274    * @param waitForLock if true, will block until the lock is available.
275    *        Otherwise, just tries to obtain the lock and returns
276    *        false if unavailable.
277    * @return the row lock if acquired,
278    *   null if waitForLock was false and the lock was not acquired
279    * @throws IOException if waitForLock was true and the lock could not be acquired after waiting
280    */
281   RowLock getRowLock(byte[] row, boolean waitForLock) throws IOException;
282 
283   /**
284    * If the given list of row locks is not null, releases all locks.
285    */
286   void releaseRowLocks(List<RowLock> rowLocks);
287 
288   ///////////////////////////////////////////////////////////////////////////
289   // Region operations
290 
291   /**
292    * Perform one or more append operations on a row.
293    * @param append
294    * @param nonceGroup
295    * @param nonce
296    * @return result of the operation
297    * @throws IOException
298    */
299   Result append(Append append, long nonceGroup, long nonce) throws IOException;
300 
301   /**
302    * Perform a batch of mutations.
303    * <p>
304    * Note this supports only Put and Delete mutations and will ignore other types passed.
305    * @param mutations the list of mutations
306    * @param nonceGroup
307    * @param nonce
308    * @return an array of OperationStatus which internally contains the
309    *         OperationStatusCode and the exceptionMessage if any.
310    * @throws IOException
311    */
312   OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce)
313       throws IOException;
314 
315   /**
316    * Replay a batch of mutations.
317    * @param mutations mutations to replay.
318    * @param replaySeqId
319    * @return an array of OperationStatus which internally contains the
320    *         OperationStatusCode and the exceptionMessage if any.
321    * @throws IOException
322    */
323    OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException;
324 
325   /**
326    * Atomically checks if a row/family/qualifier value matches the expected val
327    * If it does, it performs the row mutations.  If the passed value is null, t
328    * is for the lack of column (ie: non-existence)
329    * @param row to check
330    * @param family column family to check
331    * @param qualifier column qualifier to check
332    * @param compareOp the comparison operator
333    * @param comparator
334    * @param mutation
335    * @param writeToWAL
336    * @return true if mutation was applied, false otherwise
337    * @throws IOException
338    */
339   boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
340       ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException;
341 
342   /**
343    * Atomically checks if a row/family/qualifier value matches the expected val
344    * If it does, it performs the row mutations.  If the passed value is null, t
345    * is for the lack of column (ie: non-existence)
346    * @param row to check
347    * @param family column family to check
348    * @param qualifier column qualifier to check
349    * @param compareOp the comparison operator
350    * @param comparator
351    * @param mutations
352    * @param writeToWAL
353    * @return true if mutation was applied, false otherwise
354    * @throws IOException
355    */
356   boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
357       ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL)
358       throws IOException;
359 
360   /**
361    * Deletes the specified cells/row.
362    * @param delete
363    * @throws IOException
364    */
365   void delete(Delete delete) throws IOException;
366 
367   /**
368    * Do a get based on the get parameter.
369    * @param get query parameters
370    * @return result of the operation
371    */
372   Result get(Get get) throws IOException;
373 
374   /**
375    * Do a get based on the get parameter.
376    * @param get query parameters
377    * @param withCoprocessor invoke coprocessor or not. We don't want to
378    * always invoke cp.
379    * @return list of cells resulting from the operation
380    */
381   List<Cell> get(Get get, boolean withCoprocessor) throws IOException;
382 
383   /**
384    * Return an iterator that scans over the HRegion, returning the indicated
385    * columns and rows specified by the {@link Scan}.
386    * <p>
387    * This Iterator must be closed by the caller.
388    *
389    * @param scan configured {@link Scan}
390    * @return RegionScanner
391    * @throws IOException read exceptions
392    */
393   RegionScanner getScanner(Scan scan) throws IOException;
394 
395   /** The comparator to be used with the region */
396   CellComparator getCellCompartor();
397 
398   /**
399    * Perform one or more increment operations on a row.
400    * @param increment
401    * @param nonceGroup
402    * @param nonce
403    * @return result of the operation
404    * @throws IOException
405    */
406   Result increment(Increment increment, long nonceGroup, long nonce) throws IOException;
407 
408   /**
409    * Performs multiple mutations atomically on a single row. Currently
410    * {@link Put} and {@link Delete} are supported.
411    *
412    * @param mutations object that specifies the set of mutations to perform atomically
413    * @throws IOException
414    */
415   void mutateRow(RowMutations mutations) throws IOException;
416 
417   /**
418    * Perform atomic mutations within the region.
419    *
420    * @param mutations The list of mutations to perform.
421    * <code>mutations</code> can contain operations for multiple rows.
422    * Caller has to ensure that all rows are contained in this region.
423    * @param rowsToLock Rows to lock
424    * @param nonceGroup Optional nonce group of the operation (client Id)
425    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
426    * If multiple rows are locked care should be taken that
427    * <code>rowsToLock</code> is sorted in order to avoid deadlocks.
428    * @throws IOException
429    */
430   void mutateRowsWithLocks(Collection<Mutation> mutations, Collection<byte[]> rowsToLock,
431       long nonceGroup, long nonce) throws IOException;
432 
433   /**
434    * Performs atomic multiple reads and writes on a given row.
435    *
436    * @param processor The object defines the reads and writes to a row.
437    */
438   void processRowsWithLocks(RowProcessor<?,?> processor) throws IOException;
439 
440   /**
441    * Performs atomic multiple reads and writes on a given row.
442    *
443    * @param processor The object defines the reads and writes to a row.
444    * @param nonceGroup Optional nonce group of the operation (client Id)
445    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
446    */
447   void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce)
448       throws IOException;
449 
450   /**
451    * Performs atomic multiple reads and writes on a given row.
452    *
453    * @param processor The object defines the reads and writes to a row.
454    * @param timeout The timeout of the processor.process() execution
455    *                Use a negative number to switch off the time bound
456    * @param nonceGroup Optional nonce group of the operation (client Id)
457    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
458    */
459   void processRowsWithLocks(RowProcessor<?,?> processor, long timeout, long nonceGroup, long nonce)
460       throws IOException;
461 
462   /**
463    * Puts some data in the table.
464    * @param put
465    * @throws IOException
466    */
467   void put(Put put) throws IOException;
468 
469   /**
470    * Listener class to enable callers of
471    * bulkLoadHFile() to perform any necessary
472    * pre/post processing of a given bulkload call
473    */
474   interface BulkLoadListener {
475 
476     /**
477      * Called before an HFile is actually loaded
478      * @param family family being loaded to
479      * @param srcPath path of HFile
480      * @return final path to be used for actual loading
481      * @throws IOException
482      */
483     String prepareBulkLoad(byte[] family, String srcPath) throws IOException;
484 
485     /**
486      * Called after a successful HFile load
487      * @param family family being loaded to
488      * @param srcPath path of HFile
489      * @throws IOException
490      */
491     void doneBulkLoad(byte[] family, String srcPath) throws IOException;
492 
493     /**
494      * Called after a failed HFile load
495      * @param family family being loaded to
496      * @param srcPath path of HFile
497      * @throws IOException
498      */
499     void failedBulkLoad(byte[] family, String srcPath) throws IOException;
500   }
501 
502   /**
503    * Attempts to atomically load a group of hfiles.  This is critical for loading
504    * rows with multiple column families atomically.
505    *
506    * @param familyPaths List of Pair&lt;byte[] column family, String hfilePath&gt;
507    * @param bulkLoadListener Internal hooks enabling massaging/preparation of a
508    * file about to be bulk loaded
509    * @param assignSeqId
510    * @return true if successful, false if failed recoverably
511    * @throws IOException if failed unrecoverably.
512    */
513   boolean bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths, boolean assignSeqId,
514       BulkLoadListener bulkLoadListener) throws IOException;
515 
516   ///////////////////////////////////////////////////////////////////////////
517   // Coprocessors
518 
519   /** @return the coprocessor host */
520   RegionCoprocessorHost getCoprocessorHost();
521 
522   /**
523    * Executes a single protocol buffer coprocessor endpoint {@link Service} method using
524    * the registered protocol handlers.  {@link Service} implementations must be registered via the
525    * {@link Region#registerService(com.google.protobuf.Service)}
526    * method before they are available.
527    *
528    * @param controller an {@code RpcContoller} implementation to pass to the invoked service
529    * @param call a {@code CoprocessorServiceCall} instance identifying the service, method,
530    *     and parameters for the method invocation
531    * @return a protocol buffer {@code Message} instance containing the method's result
532    * @throws IOException if no registered service handler is found or an error
533    *     occurs during the invocation
534    * @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service)
535    */
536   Message execService(RpcController controller, CoprocessorServiceCall call) throws IOException;
537 
538   /**
539    * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to
540    * be available for handling
541    * {@link Region#execService(com.google.protobuf.RpcController,
542    *    org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall)}} calls.
543    *
544    * <p>
545    * Only a single instance may be registered per region for a given {@link Service} subclass (the
546    * instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}.
547    * After the first registration, subsequent calls with the same service name will fail with
548    * a return value of {@code false}.
549    * </p>
550    * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint
551    * @return {@code true} if the registration was successful, {@code false}
552    * otherwise
553    */
554   boolean registerService(Service instance);
555 
556   ///////////////////////////////////////////////////////////////////////////
557   // RowMutation processor support
558 
559   /**
560    * Check the collection of families for validity.
561    * @param families
562    * @throws NoSuchColumnFamilyException
563    */
564   void checkFamilies(Collection<byte[]> families) throws NoSuchColumnFamilyException;
565 
566   /**
567    * Check the collection of families for valid timestamps
568    * @param familyMap
569    * @param now current timestamp
570    * @throws FailedSanityCheckException
571    */
572   void checkTimestamps(Map<byte[], List<Cell>> familyMap, long now)
573       throws FailedSanityCheckException;
574 
575   /**
576    * Prepare a delete for a row mutation processor
577    * @param delete The passed delete is modified by this method. WARNING!
578    * @throws IOException
579    */
580   void prepareDelete(Delete delete) throws IOException;
581 
582   /**
583    * Set up correct timestamps in the KVs in Delete object.
584    * <p>Caller should have the row and region locks.
585    * @param mutation
586    * @param familyCellMap
587    * @param now
588    * @throws IOException
589    */
590   void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyCellMap,
591       byte[] now) throws IOException;
592 
593   /**
594    * Replace any cell timestamps set to {@link org.apache.hadoop.hbase.HConstants#LATEST_TIMESTAMP}
595    * provided current timestamp.
596    * @param values
597    * @param now
598    */
599   void updateCellTimestamps(final Iterable<List<Cell>> values, final byte[] now)
600       throws IOException;
601 
602   ///////////////////////////////////////////////////////////////////////////
603   // Flushes, compactions, splits, etc.
604   // Wizards only, please
605 
606   interface FlushResult {
607     enum Result {
608       FLUSHED_NO_COMPACTION_NEEDED,
609       FLUSHED_COMPACTION_NEEDED,
610       // Special case where a flush didn't run because there's nothing in the memstores. Used when
611       // bulk loading to know when we can still load even if a flush didn't happen.
612       CANNOT_FLUSH_MEMSTORE_EMPTY,
613       CANNOT_FLUSH
614     }
615 
616     /** @return the detailed result code */
617     Result getResult();
618 
619     /** @return true if the memstores were flushed, else false */
620     boolean isFlushSucceeded();
621 
622     /** @return True if the flush requested a compaction, else false */
623     boolean isCompactionNeeded();
624   }
625 
626   /**
627    * Flush the cache.
628    *
629    * <p>When this method is called the cache will be flushed unless:
630    * <ol>
631    *   <li>the cache is empty</li>
632    *   <li>the region is closed.</li>
633    *   <li>a flush is already in progress</li>
634    *   <li>writes are disabled</li>
635    * </ol>
636    *
637    * <p>This method may block for some time, so it should not be called from a
638    * time-sensitive thread.
639    * @param force whether we want to force a flush of all stores
640    * @return FlushResult indicating whether the flush was successful or not and if
641    * the region needs compacting
642    *
643    * @throws IOException general io exceptions
644    * because a snapshot was not properly persisted.
645    */
646   FlushResult flush(boolean force) throws IOException;
647 
648   /**
649    * Synchronously compact all stores in the region.
650    * <p>This operation could block for a long time, so don't call it from a
651    * time-sensitive thread.
652    * <p>Note that no locks are taken to prevent possible conflicts between
653    * compaction and splitting activities. The regionserver does not normally compact
654    * and split in parallel. However by calling this method you may introduce
655    * unexpected and unhandled concurrency. Don't do this unless you know what
656    * you are doing.
657    *
658    * @param majorCompaction True to force a major compaction regardless of thresholds
659    * @throws IOException
660    */
661   void compact(final boolean majorCompaction) throws IOException;
662 
663   /**
664    * Trigger major compaction on all stores in the region.
665    * <p>
666    * Compaction will be performed asynchronously to this call by the RegionServer's
667    * CompactSplitThread. See also {@link Store#triggerMajorCompaction()}
668    * @throws IOException
669    */
670   void triggerMajorCompaction() throws IOException;
671 
672   /**
673    * @return if a given region is in compaction now.
674    */
675   CompactionState getCompactionState();
676 
677   /** Wait for all current flushes and compactions of the region to complete */
678   void waitForFlushesAndCompactions();
679 
680 }