View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
38  import org.apache.hadoop.hbase.io.Reference;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    private static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     * The mob del file has (_del) as suffix.
53     */
54    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
55  
56    /** Regex that will work for hfiles */
57    private static final Pattern HFILE_NAME_PATTERN =
58      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
59  
60    /**
61     * A non-capture group, for del files, so that this can be embedded.
62     * A del file has (_del) as suffix.
63     */
64    public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)";
65  
66    /** Regex that will work for del files */
67    private static final Pattern DELFILE_NAME_PATTERN =
68      Pattern.compile("^(" + DELFILE_NAME_REGEX + ")");
69  
70    /**
71     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
72     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
73     * If reference, then the regex has more than just one group.
74     * Group 1, hfile/hfilelink pattern, is this file's id.
75     * Group 2 '(.+)' is the reference's parent region name.
76     */
77    private static final Pattern REF_NAME_PATTERN =
78      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
79        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
80  
81    // Configuration
82    private Configuration conf;
83  
84    // FileSystem handle
85    private final FileSystem fs;
86  
87    // HDFS blocks distribution information
88    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
89  
90    // If this storefile references another, this is the reference instance.
91    private final Reference reference;
92  
93    // If this storefile is a link to another, this is the link instance.
94    private final HFileLink link;
95  
96    private final Path initialPath;
97  
98    private RegionCoprocessorHost coprocessorHost;
99  
100   /**
101    * Create a Store File Info
102    * @param conf the {@link Configuration} to use
103    * @param fs The current file system to use.
104    * @param initialPath The {@link Path} of the file
105    */
106   public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
107       throws IOException {
108     assert fs != null;
109     assert initialPath != null;
110     assert conf != null;
111 
112     this.fs = fs;
113     this.conf = conf;
114     this.initialPath = initialPath;
115     Path p = initialPath;
116     if (HFileLink.isHFileLink(p)) {
117       // HFileLink
118       this.reference = null;
119       this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
120       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
121     } else if (isReference(p)) {
122       this.reference = Reference.read(fs, p);
123       Path referencePath = getReferredToFile(p);
124       if (HFileLink.isHFileLink(referencePath)) {
125         // HFileLink Reference
126         this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
127       } else {
128         // Reference
129         this.link = null;
130       }
131       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
132               " reference to " + referencePath);
133     } else if (isHFile(p)) {
134       // HFile
135       this.reference = null;
136       this.link = null;
137     } else {
138       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
139     }
140   }
141 
142   /**
143    * Create a Store File Info
144    * @param conf the {@link Configuration} to use
145    * @param fs The current file system to use.
146    * @param fileStatus The {@link FileStatus} of the file
147    */
148   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
149       throws IOException {
150     this(conf, fs, fileStatus.getPath());
151   }
152 
153   /**
154    * Create a Store File Info from an HFileLink
155    * @param conf the {@link Configuration} to use
156    * @param fs The current file system to use.
157    * @param fileStatus The {@link FileStatus} of the file
158    */
159   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
160       final HFileLink link)
161       throws IOException {
162     this.fs = fs;
163     this.conf = conf;
164     // initialPath can be null only if we get a link.
165     this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
166       // HFileLink
167     this.reference = null;
168     this.link = link;
169   }
170 
171   /**
172    * Create a Store File Info from an HFileLink
173    * @param conf
174    * @param fs
175    * @param fileStatus
176    * @param reference
177    * @throws IOException
178    */
179   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
180       final Reference reference)
181       throws IOException {
182     this.fs = fs;
183     this.conf = conf;
184     this.initialPath = fileStatus.getPath();
185     this.reference = reference;
186     this.link = null;
187   }
188 
189   /**
190    * Sets the region coprocessor env.
191    * @param coprocessorHost
192    */
193   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
194     this.coprocessorHost = coprocessorHost;
195   }
196 
197   /*
198    * @return the Reference object associated to this StoreFileInfo.
199    *         null if the StoreFile is not a reference.
200    */
201   public Reference getReference() {
202     return this.reference;
203   }
204 
205   /** @return True if the store file is a Reference */
206   public boolean isReference() {
207     return this.reference != null;
208   }
209 
210   /** @return True if the store file is a top Reference */
211   public boolean isTopReference() {
212     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
213   }
214 
215   /** @return True if the store file is a link */
216   public boolean isLink() {
217     return this.link != null && this.reference == null;
218   }
219 
220   /** @return the HDFS block distribution */
221   public HDFSBlocksDistribution getHDFSBlockDistribution() {
222     return this.hdfsBlocksDistribution;
223   }
224 
225   /**
226    * Open a Reader for the StoreFile
227    * @param fs The current file system to use.
228    * @param cacheConf The cache configuration and block cache reference.
229    * @return The StoreFile.Reader for the file
230    */
231   public StoreFile.Reader open(final FileSystem fs,
232       final CacheConfig cacheConf, final boolean canUseDropBehind) throws IOException {
233     FSDataInputStreamWrapper in;
234     FileStatus status;
235 
236     final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
237     if (this.link != null) {
238       // HFileLink
239       in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind);
240       status = this.link.getFileStatus(fs);
241     } else if (this.reference != null) {
242       // HFile Reference
243       Path referencePath = getReferredToFile(this.getPath());
244       in = new FSDataInputStreamWrapper(fs, referencePath,
245           doDropBehind);
246       status = fs.getFileStatus(referencePath);
247     } else {
248       in = new FSDataInputStreamWrapper(fs, this.getPath(),
249           doDropBehind);
250       status = fs.getFileStatus(initialPath);
251     }
252     long length = status.getLen();
253     hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
254 
255     StoreFile.Reader reader = null;
256     if (this.coprocessorHost != null) {
257       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
258         cacheConf, reference);
259     }
260     if (reader == null) {
261       if (this.reference != null) {
262         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
263           conf);
264       } else {
265         reader = new StoreFile.Reader(fs, status.getPath(), in, length, cacheConf, conf);
266       }
267     }
268     if (this.coprocessorHost != null) {
269       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
270         cacheConf, reference, reader);
271     }
272     return reader;
273   }
274 
275   /**
276    * Compute the HDFS Block Distribution for this StoreFile
277    */
278   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
279       throws IOException {
280 
281     // guard against the case where we get the FileStatus from link, but by the time we
282     // call compute the file is moved again
283     if (this.link != null) {
284       FileNotFoundException exToThrow = null;
285       for (int i = 0; i < this.link.getLocations().length; i++) {
286         try {
287           return computeHDFSBlocksDistributionInternal(fs);
288         } catch (FileNotFoundException ex) {
289           // try the other location
290           exToThrow = ex;
291         }
292       }
293       throw exToThrow;
294     } else {
295       return computeHDFSBlocksDistributionInternal(fs);
296     }
297   }
298 
299   private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
300       throws IOException {
301     FileStatus status = getReferencedFileStatus(fs);
302     if (this.reference != null) {
303       return computeRefFileHDFSBlockDistribution(fs, reference, status);
304     } else {
305       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
306     }
307   }
308 
309   /**
310    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
311    * @param fs The current file system to use.
312    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
313    */
314   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
315     FileStatus status;
316     if (this.reference != null) {
317       if (this.link != null) {
318         FileNotFoundException exToThrow = null;
319         for (int i = 0; i < this.link.getLocations().length; i++) {
320           // HFileLink Reference
321           try {
322             return link.getFileStatus(fs);
323           } catch (FileNotFoundException ex) {
324             // try the other location
325             exToThrow = ex;
326           }
327         }
328         throw exToThrow;
329       } else {
330         // HFile Reference
331         Path referencePath = getReferredToFile(this.getPath());
332         status = fs.getFileStatus(referencePath);
333       }
334     } else {
335       if (this.link != null) {
336         FileNotFoundException exToThrow = null;
337         for (int i = 0; i < this.link.getLocations().length; i++) {
338           // HFileLink
339           try {
340             return link.getFileStatus(fs);
341           } catch (FileNotFoundException ex) {
342             // try the other location
343             exToThrow = ex;
344           }
345         }
346         throw exToThrow;
347       } else {
348         status = fs.getFileStatus(initialPath);
349       }
350     }
351     return status;
352   }
353 
354   /** @return The {@link Path} of the file */
355   public Path getPath() {
356     return initialPath;
357   }
358 
359   /** @return The {@link FileStatus} of the file */
360   public FileStatus getFileStatus() throws IOException {
361     return getReferencedFileStatus(fs);
362   }
363 
364   /** @return Get the modification time of the file. */
365   public long getModificationTime() throws IOException {
366     return getFileStatus().getModificationTime();
367   }
368 
369   @Override
370   public String toString() {
371     return this.getPath() +
372       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
373   }
374 
375   /**
376    * @param path Path to check.
377    * @return True if the path has format of a HFile.
378    */
379   public static boolean isHFile(final Path path) {
380     return isHFile(path.getName());
381   }
382 
383   public static boolean isHFile(final String fileName) {
384     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
385     return m.matches() && m.groupCount() > 0;
386   }
387 
388   /**
389    * @param path Path to check.
390    * @return True if the path has format of a del file.
391    */
392   public static boolean isDelFile(final Path path) {
393     return isDelFile(path.getName());
394   }
395 
396   /**
397    * @param fileName Sting version of path to validate.
398    * @return True if the file name has format of a del file.
399    */
400   public static boolean isDelFile(final String fileName) {
401     Matcher m = DELFILE_NAME_PATTERN.matcher(fileName);
402     return m.matches() && m.groupCount() > 0;
403   }
404 
405   /**
406    * @param path Path to check.
407    * @return True if the path has format of a HStoreFile reference.
408    */
409   public static boolean isReference(final Path path) {
410     return isReference(path.getName());
411   }
412 
413   /**
414    * @param name file name to check.
415    * @return True if the path has format of a HStoreFile reference.
416    */
417   public static boolean isReference(final String name) {
418     Matcher m = REF_NAME_PATTERN.matcher(name);
419     return m.matches() && m.groupCount() > 1;
420   }
421 
422   /*
423    * Return path to the file referred to by a Reference.  Presumes a directory
424    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
425    * @param p Path to a Reference file.
426    * @return Calculated path to parent region file.
427    * @throws IllegalArgumentException when path regex fails to match.
428    */
429   public static Path getReferredToFile(final Path p) {
430     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
431     if (m == null || !m.matches()) {
432       LOG.warn("Failed match of store file name " + p.toString());
433       throw new IllegalArgumentException("Failed match of store file name " +
434           p.toString());
435     }
436 
437     // Other region name is suffix on the passed Reference file name
438     String otherRegion = m.group(2);
439     // Tabledir is up two directories from where Reference was written.
440     Path tableDir = p.getParent().getParent().getParent();
441     String nameStrippedOfSuffix = m.group(1);
442     if (LOG.isDebugEnabled()) {
443       LOG.debug("reference '" + p + "' to region=" + otherRegion
444         + " hfile=" + nameStrippedOfSuffix);
445     }
446 
447     // Build up new path with the referenced region in place of our current
448     // region in the reference path.  Also strip regionname suffix from name.
449     return new Path(new Path(new Path(tableDir, otherRegion),
450       p.getParent().getName()), nameStrippedOfSuffix);
451   }
452 
453   /**
454    * Validate the store file name.
455    * @param fileName name of the file to validate
456    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
457    */
458   public static boolean validateStoreFileName(final String fileName) {
459     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
460       return(true);
461     return !fileName.contains("-");
462   }
463 
464   /**
465    * Return if the specified file is a valid store file or not.
466    * @param fileStatus The {@link FileStatus} of the file
467    * @return <tt>true</tt> if the file is valid
468    */
469   public static boolean isValid(final FileStatus fileStatus)
470       throws IOException {
471     final Path p = fileStatus.getPath();
472 
473     if (fileStatus.isDirectory())
474       return false;
475 
476     // Check for empty hfile. Should never be the case but can happen
477     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
478     // NOTE: that the HFileLink is just a name, so it's an empty file.
479     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
480       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
481       return false;
482     }
483 
484     return validateStoreFileName(p.getName());
485   }
486 
487   /**
488    * helper function to compute HDFS blocks distribution of a given reference
489    * file.For reference file, we don't compute the exact value. We use some
490    * estimate instead given it might be good enough. we assume bottom part
491    * takes the first half of reference file, top part takes the second half
492    * of the reference file. This is just estimate, given
493    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
494    * If this estimate isn't good enough, we can improve it later.
495    * @param fs  The FileSystem
496    * @param reference  The reference
497    * @param status  The reference FileStatus
498    * @return HDFS blocks distribution
499    */
500   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
501       final FileSystem fs, final Reference reference, final FileStatus status)
502       throws IOException {
503     if (status == null) {
504       return null;
505     }
506 
507     long start = 0;
508     long length = 0;
509 
510     if (Reference.isTopFileRegion(reference.getFileRegion())) {
511       start = status.getLen()/2;
512       length = status.getLen() - status.getLen()/2;
513     } else {
514       start = 0;
515       length = status.getLen()/2;
516     }
517     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
518   }
519 
520   @Override
521   public boolean equals(Object that) {
522     if (this == that) return true;
523     if (that == null) return false;
524 
525     if (!(that instanceof StoreFileInfo)) return false;
526 
527     StoreFileInfo o = (StoreFileInfo)that;
528     if (initialPath != null && o.initialPath == null) return false;
529     if (initialPath == null && o.initialPath != null) return false;
530     if (initialPath != o.initialPath && initialPath != null
531             && !initialPath.equals(o.initialPath)) return false;
532 
533     if (reference != null && o.reference == null) return false;
534     if (reference == null && o.reference != null) return false;
535     if (reference != o.reference && reference != null
536             && !reference.equals(o.reference)) return false;
537 
538     if (link != null && o.link == null) return false;
539     if (link == null && o.link != null) return false;
540     if (link != o.link && link != null && !link.equals(o.link)) return false;
541 
542     return true;
543   };
544 
545 
546   @Override
547   public int hashCode() {
548     int hash = 17;
549     hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
550     hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
551     hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
552     return  hash;
553   }
554 }