View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  import java.util.concurrent.atomic.AtomicInteger;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.HTableDescriptor;
38  import org.apache.hadoop.hbase.MetaTableAccessor;
39  import org.apache.hadoop.hbase.ScheduledChore;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.backup.HFileArchiver;
43  import org.apache.hadoop.hbase.classification.InterfaceAudience;
44  import org.apache.hadoop.hbase.client.Connection;
45  import org.apache.hadoop.hbase.client.Result;
46  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.FSUtils;
49  import org.apache.hadoop.hbase.util.Pair;
50  import org.apache.hadoop.hbase.util.PairOfSameType;
51  import org.apache.hadoop.hbase.util.Triple;
52  
53  /**
54   * A janitor for the catalog tables.  Scans the <code>hbase:meta</code> catalog
55   * table on a period looking for unused regions to garbage collect.
56   */
57  @InterfaceAudience.Private
58  public class CatalogJanitor extends ScheduledChore {
59    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
60    private final Server server;
61    private final MasterServices services;
62    private AtomicBoolean enabled = new AtomicBoolean(true);
63    private AtomicBoolean alreadyRunning = new AtomicBoolean(false);
64    private final Connection connection;
65  
66    CatalogJanitor(final Server server, final MasterServices services) {
67      super("CatalogJanitor-" + server.getServerName().toShortString(), server, server
68          .getConfiguration().getInt("hbase.catalogjanitor.interval", 300000));
69      this.server = server;
70      this.services = services;
71      this.connection = server.getConnection();
72    }
73  
74    @Override
75    protected boolean initialChore() {
76      try {
77        if (this.enabled.get()) scan();
78      } catch (IOException e) {
79        LOG.warn("Failed initial scan of catalog table", e);
80        return false;
81      }
82      return true;
83    }
84  
85    /**
86     * @param enabled
87     */
88    public boolean setEnabled(final boolean enabled) {
89      return this.enabled.getAndSet(enabled);
90    }
91  
92    boolean getEnabled() {
93      return this.enabled.get();
94    }
95  
96    @Override
97    protected void chore() {
98      try {
99        if (this.enabled.get()) {
100         scan();
101       } else {
102         LOG.warn("CatalogJanitor disabled! Not running scan.");
103       }
104     } catch (IOException e) {
105       LOG.warn("Failed scan of catalog table", e);
106     }
107   }
108 
109   /**
110    * Scans hbase:meta and returns a number of scanned rows, and a map of merged
111    * regions, and an ordered map of split parents.
112    * @return triple of scanned rows, map of merged regions and map of split
113    *         parent regioninfos
114    * @throws IOException
115    */
116   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>
117     getMergedRegionsAndSplitParents() throws IOException {
118     return getMergedRegionsAndSplitParents(null);
119   }
120 
121   /**
122    * Scans hbase:meta and returns a number of scanned rows, and a map of merged
123    * regions, and an ordered map of split parents. if the given table name is
124    * null, return merged regions and split parents of all tables, else only the
125    * specified table
126    * @param tableName null represents all tables
127    * @return triple of scanned rows, and map of merged regions, and map of split
128    *         parent regioninfos
129    * @throws IOException
130    */
131   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>
132     getMergedRegionsAndSplitParents(final TableName tableName) throws IOException {
133     final boolean isTableSpecified = (tableName != null);
134     // TODO: Only works with single hbase:meta region currently.  Fix.
135     final AtomicInteger count = new AtomicInteger(0);
136     // Keep Map of found split parents.  There are candidates for cleanup.
137     // Use a comparator that has split parents come before its daughters.
138     final Map<HRegionInfo, Result> splitParents =
139       new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
140     final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
141     // This visitor collects split parents and counts rows in the hbase:meta table
142 
143     MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() {
144       @Override
145       public boolean visit(Result r) throws IOException {
146         if (r == null || r.isEmpty()) return true;
147         count.incrementAndGet();
148         HRegionInfo info = MetaTableAccessor.getHRegionInfo(r);
149         if (info == null) return true; // Keep scanning
150         if (isTableSpecified
151             && info.getTable().compareTo(tableName) > 0) {
152           // Another table, stop scanning
153           return false;
154         }
155         if (info.isSplitParent()) splitParents.put(info, r);
156         if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
157           mergedRegions.put(info, r);
158         }
159         // Returning true means "keep scanning"
160         return true;
161       }
162     };
163 
164     // Run full scan of hbase:meta catalog table passing in our custom visitor with
165     // the start row
166     MetaTableAccessor.scanMetaForTableRegions(this.connection, visitor, tableName);
167 
168     return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(
169         count.get(), mergedRegions, splitParents);
170   }
171 
172   /**
173    * If merged region no longer holds reference to the merge regions, archive
174    * merge region on hdfs and perform deleting references in hbase:meta
175    * @param mergedRegion
176    * @param regionA
177    * @param regionB
178    * @return true if we delete references in merged region on hbase:meta and archive
179    *         the files on the file system
180    * @throws IOException
181    */
182   boolean cleanMergeRegion(final HRegionInfo mergedRegion,
183       final HRegionInfo regionA, final HRegionInfo regionB) throws IOException {
184     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
185     Path rootdir = this.services.getMasterFileSystem().getRootDir();
186     Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable());
187     HTableDescriptor htd = getTableDescriptor(mergedRegion.getTable());
188     HRegionFileSystem regionFs = null;
189     try {
190       regionFs = HRegionFileSystem.openRegionFromFileSystem(
191           this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
192     } catch (IOException e) {
193       LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
194     }
195     if (regionFs == null || !regionFs.hasReferences(htd)) {
196       LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and "
197           + regionB.getRegionNameAsString()
198           + " from fs because merged region no longer holds references");
199       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
200       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
201       MetaTableAccessor.deleteMergeQualifiers(server.getConnection(),
202         mergedRegion);
203       return true;
204     }
205     return false;
206   }
207 
208   /**
209    * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
210    * garbage to collect.
211    * @return number of cleaned regions
212    * @throws IOException
213    */
214   int scan() throws IOException {
215     try {
216       if (!alreadyRunning.compareAndSet(false, true)) {
217         return 0;
218       }
219       Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple =
220         getMergedRegionsAndSplitParents();
221       int count = scanTriple.getFirst();
222       /**
223        * clean merge regions first
224        */
225       int mergeCleaned = 0;
226       Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
227       for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
228         PairOfSameType<HRegionInfo> p = MetaTableAccessor.getMergeRegions(e.getValue());
229         HRegionInfo regionA = p.getFirst();
230         HRegionInfo regionB = p.getSecond();
231         if (regionA == null || regionB == null) {
232           LOG.warn("Unexpected references regionA="
233               + (regionA == null ? "null" : regionA.getRegionNameAsString())
234               + ",regionB="
235               + (regionB == null ? "null" : regionB.getRegionNameAsString())
236               + " in merged region " + e.getKey().getRegionNameAsString());
237         } else {
238           if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
239             mergeCleaned++;
240           }
241         }
242       }
243       /**
244        * clean split parents
245        */
246       Map<HRegionInfo, Result> splitParents = scanTriple.getThird();
247 
248       // Now work on our list of found parents. See if any we can clean up.
249       int splitCleaned = 0;
250       // regions whose parents are still around
251       HashSet<String> parentNotCleaned = new HashSet<String>();
252       for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
253         if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
254             cleanParent(e.getKey(), e.getValue())) {
255           splitCleaned++;
256         } else {
257           // We could not clean the parent, so it's daughters should not be
258           // cleaned either (HBASE-6160)
259           PairOfSameType<HRegionInfo> daughters =
260               MetaTableAccessor.getDaughterRegions(e.getValue());
261           parentNotCleaned.add(daughters.getFirst().getEncodedName());
262           parentNotCleaned.add(daughters.getSecond().getEncodedName());
263         }
264       }
265       if ((mergeCleaned + splitCleaned) != 0) {
266         LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
267             + " unreferenced merged region(s) and " + splitCleaned
268             + " unreferenced parent region(s)");
269       } else if (LOG.isTraceEnabled()) {
270         LOG.trace("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
271             + " unreferenced merged region(s) and " + splitCleaned
272             + " unreferenced parent region(s)");
273       }
274       return mergeCleaned + splitCleaned;
275     } finally {
276       alreadyRunning.set(false);
277     }
278   }
279 
280   /**
281    * Compare HRegionInfos in a way that has split parents sort BEFORE their
282    * daughters.
283    */
284   static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
285     Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
286     @Override
287     public int compare(HRegionInfo left, HRegionInfo right) {
288       // This comparator differs from the one HRegionInfo in that it sorts
289       // parent before daughters.
290       if (left == null) return -1;
291       if (right == null) return 1;
292       // Same table name.
293       int result = left.getTable().compareTo(right.getTable());
294       if (result != 0) return result;
295       // Compare start keys.
296       result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
297       if (result != 0) return result;
298       // Compare end keys, but flip the operands so parent comes first
299       result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
300 
301       return result;
302     }
303   }
304 
305   /**
306    * If daughters no longer hold reference to the parents, delete the parent.
307    * @param parent HRegionInfo of split offlined parent
308    * @param rowContent Content of <code>parent</code> row in
309    * <code>metaRegionName</code>
310    * @return True if we removed <code>parent</code> from meta table and from
311    * the filesystem.
312    * @throws IOException
313    */
314   boolean cleanParent(final HRegionInfo parent, Result rowContent)
315   throws IOException {
316     boolean result = false;
317     // Check whether it is a merged region and not clean reference
318     // No necessary to check MERGEB_QUALIFIER because these two qualifiers will
319     // be inserted/deleted together
320     if (rowContent.getValue(HConstants.CATALOG_FAMILY,
321         HConstants.MERGEA_QUALIFIER) != null) {
322       // wait cleaning merge region first
323       return result;
324     }
325     // Run checks on each daughter split.
326     PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
327     Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
328     Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
329     if (hasNoReferences(a) && hasNoReferences(b)) {
330       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
331         " because daughter splits no longer hold references");
332       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
333       if (LOG.isTraceEnabled()) LOG.trace("Archiving parent region: " + parent);
334       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
335       MetaTableAccessor.deleteRegion(this.connection, parent);
336       result = true;
337     }
338     return result;
339   }
340 
341   /**
342    * @param p A pair where the first boolean says whether or not the daughter
343    * region directory exists in the filesystem and then the second boolean says
344    * whether the daughter has references to the parent.
345    * @return True the passed <code>p</code> signifies no references.
346    */
347   private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
348     return !p.getFirst() || !p.getSecond();
349   }
350 
351   /**
352    * Checks if a daughter region -- either splitA or splitB -- still holds
353    * references to parent.
354    * @param parent Parent region
355    * @param daughter Daughter region
356    * @return A pair where the first boolean says whether or not the daughter
357    * region directory exists in the filesystem and then the second boolean says
358    * whether the daughter has references to the parent.
359    * @throws IOException
360    */
361   Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent, final HRegionInfo daughter)
362   throws IOException {
363     if (daughter == null)  {
364       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
365     }
366 
367     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
368     Path rootdir = this.services.getMasterFileSystem().getRootDir();
369     Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable());
370 
371     Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());
372 
373     HRegionFileSystem regionFs = null;
374 
375     try {
376       if (!FSUtils.isExists(fs, daughterRegionDir)) {
377         return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
378       }
379     } catch (IOException ioe) {
380       LOG.warn("Error trying to determine if daughter region exists, " +
381                "assuming exists and has references", ioe);
382       return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.TRUE);
383     }
384 
385     try {
386       regionFs = HRegionFileSystem.openRegionFromFileSystem(
387           this.services.getConfiguration(), fs, tabledir, daughter, true);
388     } catch (IOException e) {
389       LOG.warn("Error trying to determine referenced files from : " + daughter.getEncodedName()
390           + ", to: " + parent.getEncodedName() + " assuming has references", e);
391       return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.TRUE);
392     }
393 
394     boolean references = false;
395     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTable());
396     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
397       if ((references = regionFs.hasReferences(family.getNameAsString()))) {
398         break;
399       }
400     }
401     return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.valueOf(references));
402   }
403 
404   private HTableDescriptor getTableDescriptor(final TableName tableName)
405       throws FileNotFoundException, IOException {
406     return this.services.getTableDescriptors().get(tableName);
407   }
408 
409   /**
410    * Checks if the specified region has merge qualifiers, if so, try to clean
411    * them
412    * @param region
413    * @return true if the specified region doesn't have merge qualifier now
414    * @throws IOException
415    */
416   public boolean cleanMergeQualifier(final HRegionInfo region)
417       throws IOException {
418     // Get merge regions if it is a merged region and already has merge
419     // qualifier
420     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaTableAccessor
421         .getRegionsFromMergeQualifier(this.services.getConnection(),
422           region.getRegionName());
423     if (mergeRegions == null
424         || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) {
425       // It doesn't have merge qualifier, no need to clean
426       return true;
427     }
428     // It shouldn't happen, we must insert/delete these two qualifiers together
429     if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) {
430       LOG.error("Merged region " + region.getRegionNameAsString()
431           + " has only one merge qualifier in META.");
432       return false;
433     }
434     return cleanMergeRegion(region, mergeRegions.getFirst(),
435         mergeRegions.getSecond());
436   }
437 }