1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.regionserver; 19 20 import java.io.IOException; 21 import java.util.Collection; 22 import java.util.List; 23 import java.util.Map; 24 25 import org.apache.hadoop.hbase.Cell; 26 import org.apache.hadoop.hbase.CellComparator; 27 import org.apache.hadoop.hbase.HBaseInterfaceAudience; 28 import org.apache.hadoop.hbase.HDFSBlocksDistribution; 29 import org.apache.hadoop.hbase.HRegionInfo; 30 import org.apache.hadoop.hbase.HTableDescriptor; 31 import org.apache.hadoop.hbase.classification.InterfaceAudience; 32 import org.apache.hadoop.hbase.classification.InterfaceStability; 33 import org.apache.hadoop.hbase.client.Append; 34 import org.apache.hadoop.hbase.client.Delete; 35 import org.apache.hadoop.hbase.client.Get; 36 import org.apache.hadoop.hbase.client.Increment; 37 import org.apache.hadoop.hbase.client.IsolationLevel; 38 import org.apache.hadoop.hbase.client.Mutation; 39 import org.apache.hadoop.hbase.client.Put; 40 import org.apache.hadoop.hbase.client.Result; 41 import org.apache.hadoop.hbase.client.RowMutations; 42 import org.apache.hadoop.hbase.client.Scan; 43 import org.apache.hadoop.hbase.conf.ConfigurationObserver; 44 import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; 45 import org.apache.hadoop.hbase.filter.ByteArrayComparable; 46 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 47 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; 48 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall; 49 import org.apache.hadoop.hbase.util.Pair; 50 import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay; 51 52 import com.google.common.annotations.VisibleForTesting; 53 import com.google.protobuf.Message; 54 import com.google.protobuf.RpcController; 55 import com.google.protobuf.Service; 56 57 /** 58 * Regions store data for a certain region of a table. It stores all columns 59 * for each row. A given table consists of one or more Regions. 60 * 61 * <p>An Region is defined by its table and its key extent. 62 * 63 * <p>Locking at the Region level serves only one purpose: preventing the 64 * region from being closed (and consequently split) while other operations 65 * are ongoing. Each row level operation obtains both a row lock and a region 66 * read lock for the duration of the operation. While a scanner is being 67 * constructed, getScanner holds a read lock. If the scanner is successfully 68 * constructed, it holds a read lock until it is closed. A close takes out a 69 * write lock and consequently will block for ongoing operations and will block 70 * new operations from starting while the close is in progress. 71 */ 72 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC) 73 @InterfaceStability.Evolving 74 public interface Region extends ConfigurationObserver { 75 76 /////////////////////////////////////////////////////////////////////////// 77 // Region state 78 79 /** @return region information for this region */ 80 HRegionInfo getRegionInfo(); 81 82 /** @return table descriptor for this region */ 83 HTableDescriptor getTableDesc(); 84 85 /** @return true if region is available (not closed and not closing) */ 86 boolean isAvailable(); 87 88 /** @return true if region is closed */ 89 boolean isClosed(); 90 91 /** @return True if closing process has started */ 92 boolean isClosing(); 93 94 /** @return True if region is in recovering state */ 95 boolean isRecovering(); 96 97 /** @return True if region is read only */ 98 boolean isReadOnly(); 99 100 /** 101 * Return the list of Stores managed by this region 102 * <p>Use with caution. Exposed for use of fixup utilities. 103 * @return a list of the Stores managed by this region 104 */ 105 List<Store> getStores(); 106 107 /** 108 * Return the Store for the given family 109 * <p>Use with caution. Exposed for use of fixup utilities. 110 * @return the Store for the given family 111 */ 112 Store getStore(byte[] family); 113 114 /** @return list of store file names for the given families */ 115 List<String> getStoreFileList(byte [][] columns); 116 117 /** 118 * Check the region's underlying store files, open the files that have not 119 * been opened yet, and remove the store file readers for store files no 120 * longer available. 121 * @throws IOException 122 */ 123 boolean refreshStoreFiles() throws IOException; 124 125 /** @return the latest sequence number that was read from storage when this region was opened */ 126 long getOpenSeqNum(); 127 128 /** @return the max sequence id of flushed data on this region; no edit in memory will have 129 * a sequence id that is less that what is returned here. 130 */ 131 long getMaxFlushedSeqId(); 132 133 /** @return the oldest flushed sequence id for the given family; can be beyond 134 * {@link #getMaxFlushedSeqId()} in case where we've flushed a subset of a regions column 135 * families 136 * @deprecated Since version 1.2.0. Exposes too much about our internals; shutting it down. 137 * Do not use. 138 */ 139 @VisibleForTesting 140 @Deprecated 141 public long getOldestSeqIdOfStore(byte[] familyName); 142 143 /** 144 * This can be used to determine the last time all files of this region were major compacted. 145 * @param majorCompactioOnly Only consider HFile that are the result of major compaction 146 * @return the timestamp of the oldest HFile for all stores of this region 147 */ 148 long getOldestHfileTs(boolean majorCompactioOnly) throws IOException; 149 150 /** 151 * @return map of column family names to max sequence id that was read from storage when this 152 * region was opened 153 */ 154 public Map<byte[], Long> getMaxStoreSeqId(); 155 156 /** @return true if loading column families on demand by default */ 157 boolean isLoadingCfsOnDemandDefault(); 158 159 /** @return readpoint considering given IsolationLevel */ 160 long getReadpoint(IsolationLevel isolationLevel); 161 162 /** 163 * @return The earliest time a store in the region was flushed. All 164 * other stores in the region would have been flushed either at, or 165 * after this time. 166 */ 167 long getEarliestFlushTimeForAllStores(); 168 169 /////////////////////////////////////////////////////////////////////////// 170 // Metrics 171 172 /** @return read requests count for this region */ 173 long getReadRequestsCount(); 174 175 /** 176 * Update the read request count for this region 177 * @param i increment 178 */ 179 void updateReadRequestsCount(long i); 180 181 /** @return write request count for this region */ 182 long getWriteRequestsCount(); 183 184 /** 185 * Update the write request count for this region 186 * @param i increment 187 */ 188 void updateWriteRequestsCount(long i); 189 190 /** @return memstore size for this region, in bytes */ 191 long getMemstoreSize(); 192 193 /** @return the number of mutations processed bypassing the WAL */ 194 long getNumMutationsWithoutWAL(); 195 196 /** @return the size of data processed bypassing the WAL, in bytes */ 197 long getDataInMemoryWithoutWAL(); 198 199 /** @return the number of blocked requests */ 200 long getBlockedRequestsCount(); 201 202 /** @return the number of checkAndMutate guards that passed */ 203 long getCheckAndMutateChecksPassed(); 204 205 /** @return the number of failed checkAndMutate guards */ 206 long getCheckAndMutateChecksFailed(); 207 208 /** @return the MetricsRegion for this region */ 209 MetricsRegion getMetrics(); 210 211 /** @return the block distribution for all Stores managed by this region */ 212 HDFSBlocksDistribution getHDFSBlocksDistribution(); 213 214 /////////////////////////////////////////////////////////////////////////// 215 // Locking 216 217 // Region read locks 218 219 /** 220 * Operation enum is used in {@link Region#startRegionOperation} to provide context for 221 * various checks before any region operation begins. 222 */ 223 enum Operation { 224 ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE, 225 REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT 226 } 227 228 /** 229 * This method needs to be called before any public call that reads or 230 * modifies data. 231 * Acquires a read lock and checks if the region is closing or closed. 232 * <p>{@link #closeRegionOperation} MUST then always be called after 233 * the operation has completed, whether it succeeded or failed. 234 * @throws IOException 235 */ 236 void startRegionOperation() throws IOException; 237 238 /** 239 * This method needs to be called before any public call that reads or 240 * modifies data. 241 * Acquires a read lock and checks if the region is closing or closed. 242 * <p>{@link #closeRegionOperation} MUST then always be called after 243 * the operation has completed, whether it succeeded or failed. 244 * @param op The operation is about to be taken on the region 245 * @throws IOException 246 */ 247 void startRegionOperation(Operation op) throws IOException; 248 249 /** 250 * Closes the region operation lock. 251 * @throws IOException 252 */ 253 void closeRegionOperation() throws IOException; 254 255 // Row write locks 256 257 /** 258 * Row lock held by a given thread. 259 * One thread may acquire multiple locks on the same row simultaneously. 260 * The locks must be released by calling release() from the same thread. 261 */ 262 public interface RowLock { 263 /** 264 * Release the given lock. If there are no remaining locks held by the current thread 265 * then unlock the row and allow other threads to acquire the lock. 266 * @throws IllegalArgumentException if called by a different thread than the lock owning 267 * thread 268 */ 269 void release(); 270 } 271 272 /** 273 * Tries to acquire a lock on the given row. 274 * @param waitForLock if true, will block until the lock is available. 275 * Otherwise, just tries to obtain the lock and returns 276 * false if unavailable. 277 * @return the row lock if acquired, 278 * null if waitForLock was false and the lock was not acquired 279 * @throws IOException if waitForLock was true and the lock could not be acquired after waiting 280 */ 281 RowLock getRowLock(byte[] row, boolean waitForLock) throws IOException; 282 283 /** 284 * If the given list of row locks is not null, releases all locks. 285 */ 286 void releaseRowLocks(List<RowLock> rowLocks); 287 288 /////////////////////////////////////////////////////////////////////////// 289 // Region operations 290 291 /** 292 * Perform one or more append operations on a row. 293 * @param append 294 * @param nonceGroup 295 * @param nonce 296 * @return result of the operation 297 * @throws IOException 298 */ 299 Result append(Append append, long nonceGroup, long nonce) throws IOException; 300 301 /** 302 * Perform a batch of mutations. 303 * <p> 304 * Note this supports only Put and Delete mutations and will ignore other types passed. 305 * @param mutations the list of mutations 306 * @param nonceGroup 307 * @param nonce 308 * @return an array of OperationStatus which internally contains the 309 * OperationStatusCode and the exceptionMessage if any. 310 * @throws IOException 311 */ 312 OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce) 313 throws IOException; 314 315 /** 316 * Replay a batch of mutations. 317 * @param mutations mutations to replay. 318 * @param replaySeqId 319 * @return an array of OperationStatus which internally contains the 320 * OperationStatusCode and the exceptionMessage if any. 321 * @throws IOException 322 */ 323 OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException; 324 325 /** 326 * Atomically checks if a row/family/qualifier value matches the expected val 327 * If it does, it performs the row mutations. If the passed value is null, t 328 * is for the lack of column (ie: non-existence) 329 * @param row to check 330 * @param family column family to check 331 * @param qualifier column qualifier to check 332 * @param compareOp the comparison operator 333 * @param comparator 334 * @param mutation 335 * @param writeToWAL 336 * @return true if mutation was applied, false otherwise 337 * @throws IOException 338 */ 339 boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp, 340 ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException; 341 342 /** 343 * Atomically checks if a row/family/qualifier value matches the expected val 344 * If it does, it performs the row mutations. If the passed value is null, t 345 * is for the lack of column (ie: non-existence) 346 * @param row to check 347 * @param family column family to check 348 * @param qualifier column qualifier to check 349 * @param compareOp the comparison operator 350 * @param comparator 351 * @param mutations 352 * @param writeToWAL 353 * @return true if mutation was applied, false otherwise 354 * @throws IOException 355 */ 356 boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp, 357 ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL) 358 throws IOException; 359 360 /** 361 * Deletes the specified cells/row. 362 * @param delete 363 * @throws IOException 364 */ 365 void delete(Delete delete) throws IOException; 366 367 /** 368 * Do a get based on the get parameter. 369 * @param get query parameters 370 * @return result of the operation 371 */ 372 Result get(Get get) throws IOException; 373 374 /** 375 * Do a get based on the get parameter. 376 * @param get query parameters 377 * @param withCoprocessor invoke coprocessor or not. We don't want to 378 * always invoke cp. 379 * @return list of cells resulting from the operation 380 */ 381 List<Cell> get(Get get, boolean withCoprocessor) throws IOException; 382 383 /** 384 * Return an iterator that scans over the HRegion, returning the indicated 385 * columns and rows specified by the {@link Scan}. 386 * <p> 387 * This Iterator must be closed by the caller. 388 * 389 * @param scan configured {@link Scan} 390 * @return RegionScanner 391 * @throws IOException read exceptions 392 */ 393 RegionScanner getScanner(Scan scan) throws IOException; 394 395 /** The comparator to be used with the region */ 396 CellComparator getCellCompartor(); 397 398 /** 399 * Perform one or more increment operations on a row. 400 * @param increment 401 * @param nonceGroup 402 * @param nonce 403 * @return result of the operation 404 * @throws IOException 405 */ 406 Result increment(Increment increment, long nonceGroup, long nonce) throws IOException; 407 408 /** 409 * Performs multiple mutations atomically on a single row. Currently 410 * {@link Put} and {@link Delete} are supported. 411 * 412 * @param mutations object that specifies the set of mutations to perform atomically 413 * @throws IOException 414 */ 415 void mutateRow(RowMutations mutations) throws IOException; 416 417 /** 418 * Perform atomic mutations within the region. 419 * 420 * @param mutations The list of mutations to perform. 421 * <code>mutations</code> can contain operations for multiple rows. 422 * Caller has to ensure that all rows are contained in this region. 423 * @param rowsToLock Rows to lock 424 * @param nonceGroup Optional nonce group of the operation (client Id) 425 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence") 426 * If multiple rows are locked care should be taken that 427 * <code>rowsToLock</code> is sorted in order to avoid deadlocks. 428 * @throws IOException 429 */ 430 void mutateRowsWithLocks(Collection<Mutation> mutations, Collection<byte[]> rowsToLock, 431 long nonceGroup, long nonce) throws IOException; 432 433 /** 434 * Performs atomic multiple reads and writes on a given row. 435 * 436 * @param processor The object defines the reads and writes to a row. 437 */ 438 void processRowsWithLocks(RowProcessor<?,?> processor) throws IOException; 439 440 /** 441 * Performs atomic multiple reads and writes on a given row. 442 * 443 * @param processor The object defines the reads and writes to a row. 444 * @param nonceGroup Optional nonce group of the operation (client Id) 445 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence") 446 */ 447 void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce) 448 throws IOException; 449 450 /** 451 * Performs atomic multiple reads and writes on a given row. 452 * 453 * @param processor The object defines the reads and writes to a row. 454 * @param timeout The timeout of the processor.process() execution 455 * Use a negative number to switch off the time bound 456 * @param nonceGroup Optional nonce group of the operation (client Id) 457 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence") 458 */ 459 void processRowsWithLocks(RowProcessor<?,?> processor, long timeout, long nonceGroup, long nonce) 460 throws IOException; 461 462 /** 463 * Puts some data in the table. 464 * @param put 465 * @throws IOException 466 */ 467 void put(Put put) throws IOException; 468 469 /** 470 * Listener class to enable callers of 471 * bulkLoadHFile() to perform any necessary 472 * pre/post processing of a given bulkload call 473 */ 474 interface BulkLoadListener { 475 476 /** 477 * Called before an HFile is actually loaded 478 * @param family family being loaded to 479 * @param srcPath path of HFile 480 * @return final path to be used for actual loading 481 * @throws IOException 482 */ 483 String prepareBulkLoad(byte[] family, String srcPath) throws IOException; 484 485 /** 486 * Called after a successful HFile load 487 * @param family family being loaded to 488 * @param srcPath path of HFile 489 * @throws IOException 490 */ 491 void doneBulkLoad(byte[] family, String srcPath) throws IOException; 492 493 /** 494 * Called after a failed HFile load 495 * @param family family being loaded to 496 * @param srcPath path of HFile 497 * @throws IOException 498 */ 499 void failedBulkLoad(byte[] family, String srcPath) throws IOException; 500 } 501 502 /** 503 * Attempts to atomically load a group of hfiles. This is critical for loading 504 * rows with multiple column families atomically. 505 * 506 * @param familyPaths List of Pair<byte[] column family, String hfilePath> 507 * @param bulkLoadListener Internal hooks enabling massaging/preparation of a 508 * file about to be bulk loaded 509 * @param assignSeqId 510 * @return true if successful, false if failed recoverably 511 * @throws IOException if failed unrecoverably. 512 */ 513 boolean bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths, boolean assignSeqId, 514 BulkLoadListener bulkLoadListener) throws IOException; 515 516 /////////////////////////////////////////////////////////////////////////// 517 // Coprocessors 518 519 /** @return the coprocessor host */ 520 RegionCoprocessorHost getCoprocessorHost(); 521 522 /** 523 * Executes a single protocol buffer coprocessor endpoint {@link Service} method using 524 * the registered protocol handlers. {@link Service} implementations must be registered via the 525 * {@link Region#registerService(com.google.protobuf.Service)} 526 * method before they are available. 527 * 528 * @param controller an {@code RpcContoller} implementation to pass to the invoked service 529 * @param call a {@code CoprocessorServiceCall} instance identifying the service, method, 530 * and parameters for the method invocation 531 * @return a protocol buffer {@code Message} instance containing the method's result 532 * @throws IOException if no registered service handler is found or an error 533 * occurs during the invocation 534 * @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service) 535 */ 536 Message execService(RpcController controller, CoprocessorServiceCall call) throws IOException; 537 538 /** 539 * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to 540 * be available for handling 541 * {@link Region#execService(com.google.protobuf.RpcController, 542 * org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall)}} calls. 543 * 544 * <p> 545 * Only a single instance may be registered per region for a given {@link Service} subclass (the 546 * instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}. 547 * After the first registration, subsequent calls with the same service name will fail with 548 * a return value of {@code false}. 549 * </p> 550 * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint 551 * @return {@code true} if the registration was successful, {@code false} 552 * otherwise 553 */ 554 boolean registerService(Service instance); 555 556 /////////////////////////////////////////////////////////////////////////// 557 // RowMutation processor support 558 559 /** 560 * Check the collection of families for validity. 561 * @param families 562 * @throws NoSuchColumnFamilyException 563 */ 564 void checkFamilies(Collection<byte[]> families) throws NoSuchColumnFamilyException; 565 566 /** 567 * Check the collection of families for valid timestamps 568 * @param familyMap 569 * @param now current timestamp 570 * @throws FailedSanityCheckException 571 */ 572 void checkTimestamps(Map<byte[], List<Cell>> familyMap, long now) 573 throws FailedSanityCheckException; 574 575 /** 576 * Prepare a delete for a row mutation processor 577 * @param delete The passed delete is modified by this method. WARNING! 578 * @throws IOException 579 */ 580 void prepareDelete(Delete delete) throws IOException; 581 582 /** 583 * Set up correct timestamps in the KVs in Delete object. 584 * <p>Caller should have the row and region locks. 585 * @param mutation 586 * @param familyCellMap 587 * @param now 588 * @throws IOException 589 */ 590 void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyCellMap, 591 byte[] now) throws IOException; 592 593 /** 594 * Replace any cell timestamps set to {@link org.apache.hadoop.hbase.HConstants#LATEST_TIMESTAMP} 595 * provided current timestamp. 596 * @param values 597 * @param now 598 */ 599 void updateCellTimestamps(final Iterable<List<Cell>> values, final byte[] now) 600 throws IOException; 601 602 /////////////////////////////////////////////////////////////////////////// 603 // Flushes, compactions, splits, etc. 604 // Wizards only, please 605 606 interface FlushResult { 607 enum Result { 608 FLUSHED_NO_COMPACTION_NEEDED, 609 FLUSHED_COMPACTION_NEEDED, 610 // Special case where a flush didn't run because there's nothing in the memstores. Used when 611 // bulk loading to know when we can still load even if a flush didn't happen. 612 CANNOT_FLUSH_MEMSTORE_EMPTY, 613 CANNOT_FLUSH 614 } 615 616 /** @return the detailed result code */ 617 Result getResult(); 618 619 /** @return true if the memstores were flushed, else false */ 620 boolean isFlushSucceeded(); 621 622 /** @return True if the flush requested a compaction, else false */ 623 boolean isCompactionNeeded(); 624 } 625 626 /** 627 * Flush the cache. 628 * 629 * <p>When this method is called the cache will be flushed unless: 630 * <ol> 631 * <li>the cache is empty</li> 632 * <li>the region is closed.</li> 633 * <li>a flush is already in progress</li> 634 * <li>writes are disabled</li> 635 * </ol> 636 * 637 * <p>This method may block for some time, so it should not be called from a 638 * time-sensitive thread. 639 * @param force whether we want to force a flush of all stores 640 * @return FlushResult indicating whether the flush was successful or not and if 641 * the region needs compacting 642 * 643 * @throws IOException general io exceptions 644 * because a snapshot was not properly persisted. 645 */ 646 FlushResult flush(boolean force) throws IOException; 647 648 /** 649 * Synchronously compact all stores in the region. 650 * <p>This operation could block for a long time, so don't call it from a 651 * time-sensitive thread. 652 * <p>Note that no locks are taken to prevent possible conflicts between 653 * compaction and splitting activities. The regionserver does not normally compact 654 * and split in parallel. However by calling this method you may introduce 655 * unexpected and unhandled concurrency. Don't do this unless you know what 656 * you are doing. 657 * 658 * @param majorCompaction True to force a major compaction regardless of thresholds 659 * @throws IOException 660 */ 661 void compact(final boolean majorCompaction) throws IOException; 662 663 /** 664 * Trigger major compaction on all stores in the region. 665 * <p> 666 * Compaction will be performed asynchronously to this call by the RegionServer's 667 * CompactSplitThread. See also {@link Store#triggerMajorCompaction()} 668 * @throws IOException 669 */ 670 void triggerMajorCompaction() throws IOException; 671 672 /** 673 * @return if a given region is in compaction now. 674 */ 675 CompactionState getCompactionState(); 676 677 /** Wait for all current flushes and compactions of the region to complete */ 678 void waitForFlushesAndCompactions(); 679 680 }