View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import javax.annotation.Nullable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import com.google.common.annotations.VisibleForTesting;
32  import com.google.common.primitives.Ints;
33  import org.apache.commons.lang.NotImplementedException;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FSDataOutputStream;
39  import org.apache.hadoop.fs.FileStatus;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.fs.PathFilter;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.TableDescriptor;
46  import org.apache.hadoop.hbase.TableDescriptors;
47  import org.apache.hadoop.hbase.TableInfoMissingException;
48  import org.apache.hadoop.hbase.TableName;
49  import org.apache.hadoop.hbase.classification.InterfaceAudience;
50  import org.apache.hadoop.hbase.exceptions.DeserializationException;
51  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52  
53  /**
54   * Implementation of {@link TableDescriptors} that reads descriptors from the
55   * passed filesystem.  It expects descriptors to be in a file in the
56   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
57   *  -- i.e. does not modify the filesystem or can be read and write.
58   *
59   * <p>Also has utility for keeping up the table descriptors tableinfo file.
60   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
61   * of the table directory in the filesystem.
62   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
63   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
64   * is always increasing.  It starts at zero.  The table schema file with the
65   * highest sequenceid has the most recent schema edit. Usually there is one file
66   * only, the most recent but there may be short periods where there are more
67   * than one file. Old files are eventually cleaned.  Presumption is that there
68   * will not be lots of concurrent clients making table schema edits.  If so,
69   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
70   */
71  @InterfaceAudience.Private
72  public class FSTableDescriptors implements TableDescriptors {
73    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
74    private final FileSystem fs;
75    private final Path rootdir;
76    private final boolean fsreadonly;
77    private volatile boolean usecache;
78    private volatile boolean fsvisited;
79  
80    @VisibleForTesting long cachehits = 0;
81    @VisibleForTesting long invocations = 0;
82  
83    /** The file name prefix used to store HTD in HDFS  */
84    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
85    static final String TABLEINFO_DIR = ".tabledesc";
86    static final String TMP_DIR = ".tmp";
87  
88    // This cache does not age out the old stuff.  Thinking is that the amount
89    // of data we keep up in here is so small, no need to do occasional purge.
90    // TODO.
91    private final Map<TableName, TableDescriptor> cache =
92      new ConcurrentHashMap<TableName, TableDescriptor>();
93  
94    /**
95     * Table descriptor for <code>hbase:meta</code> catalog table
96     */
97    private final HTableDescriptor metaTableDescritor;
98  
99    /**
100    * Construct a FSTableDescriptors instance using the hbase root dir of the given
101    * conf and the filesystem where that root dir lives.
102    * This instance can do write operations (is not read only).
103    */
104   public FSTableDescriptors(final Configuration conf) throws IOException {
105     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
106   }
107 
108   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
109   throws IOException {
110     this(conf, fs, rootdir, false, true);
111   }
112 
113   /**
114    * @param fsreadonly True if we are read-only when it comes to filesystem
115    * operations; i.e. on remove, we do not do delete in fs.
116    */
117   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
118     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
119     super();
120     this.fs = fs;
121     this.rootdir = rootdir;
122     this.fsreadonly = fsreadonly;
123     this.usecache = usecache;
124 
125     this.metaTableDescritor = TableDescriptor.metaTableDescriptor(conf);
126   }
127 
128   public void setCacheOn() throws IOException {
129     this.cache.clear();
130     this.usecache = true;
131   }
132 
133   public void setCacheOff() throws IOException {
134     this.usecache = false;
135     this.cache.clear();
136   }
137 
138   @VisibleForTesting
139   public boolean isUsecache() {
140     return this.usecache;
141   }
142 
143   /**
144    * Get the current table descriptor for the given table, or null if none exists.
145    *
146    * Uses a local cache of the descriptor but still checks the filesystem on each call
147    * to see if a newer file has been created since the cached one was read.
148    */
149   @Override
150   @Nullable
151   public TableDescriptor getDescriptor(final TableName tablename)
152   throws IOException {
153     invocations++;
154     if (TableName.META_TABLE_NAME.equals(tablename)) {
155       cachehits++;
156       return new TableDescriptor(metaTableDescritor);
157     }
158     // hbase:meta is already handled. If some one tries to get the descriptor for
159     // .logs, .oldlogs or .corrupt throw an exception.
160     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
161        throw new IOException("No descriptor found for non table = " + tablename);
162     }
163 
164     if (usecache) {
165       // Look in cache of descriptors.
166       TableDescriptor cachedtdm = this.cache.get(tablename);
167       if (cachedtdm != null) {
168         cachehits++;
169         return cachedtdm;
170       }
171     }
172     TableDescriptor tdmt = null;
173     try {
174       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename, !fsreadonly);
175     } catch (NullPointerException e) {
176       LOG.debug("Exception during readTableDecriptor. Current table name = "
177           + tablename, e);
178     } catch (IOException ioe) {
179       LOG.debug("Exception during readTableDecriptor. Current table name = "
180           + tablename, ioe);
181     }
182     // last HTD written wins
183     if (usecache && tdmt != null) {
184       this.cache.put(tablename, tdmt);
185     }
186 
187     return tdmt;
188   }
189 
190   /**
191    * Get the current table descriptor for the given table, or null if none exists.
192    *
193    * Uses a local cache of the descriptor but still checks the filesystem on each call
194    * to see if a newer file has been created since the cached one was read.
195    */
196   @Override
197   public HTableDescriptor get(TableName tableName) throws IOException {
198     if (TableName.META_TABLE_NAME.equals(tableName)) {
199       cachehits++;
200       return metaTableDescritor;
201     }
202     TableDescriptor descriptor = getDescriptor(tableName);
203     return descriptor == null ? null : descriptor.getHTableDescriptor();
204   }
205 
206   /**
207    * Returns a map from table name to table descriptor for all tables.
208    */
209   @Override
210   public Map<String, TableDescriptor> getAllDescriptors()
211   throws IOException {
212     Map<String, TableDescriptor> tds = new TreeMap<String, TableDescriptor>();
213 
214     if (fsvisited && usecache) {
215       for (Map.Entry<TableName, TableDescriptor> entry: this.cache.entrySet()) {
216         tds.put(entry.getKey().toString(), entry.getValue());
217       }
218       // add hbase:meta to the response
219       tds.put(this.metaTableDescritor.getNameAsString(),
220           new TableDescriptor(metaTableDescritor));
221     } else {
222       LOG.debug("Fetching table descriptors from the filesystem.");
223       boolean allvisited = true;
224       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
225         TableDescriptor htd = null;
226         try {
227           htd = getDescriptor(FSUtils.getTableName(d));
228         } catch (FileNotFoundException fnfe) {
229           // inability of retrieving one HTD shouldn't stop getting the remaining
230           LOG.warn("Trouble retrieving htd", fnfe);
231         }
232         if (htd == null) {
233           allvisited = false;
234           continue;
235         } else {
236           tds.put(htd.getHTableDescriptor().getTableName().getNameAsString(), htd);
237         }
238         fsvisited = allvisited;
239       }
240     }
241     return tds;
242   }
243 
244   /**
245    * Returns a map from table name to table descriptor for all tables.
246    */
247   @Override
248   public Map<String, HTableDescriptor> getAll() throws IOException {
249     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
250     Map<String, TableDescriptor> allDescriptors = getAllDescriptors();
251     for (Map.Entry<String, TableDescriptor> entry : allDescriptors
252         .entrySet()) {
253       htds.put(entry.getKey(), entry.getValue().getHTableDescriptor());
254     }
255     return htds;
256   }
257 
258   /**
259     * Find descriptors by namespace.
260     * @see #get(org.apache.hadoop.hbase.TableName)
261     */
262   @Override
263   public Map<String, HTableDescriptor> getByNamespace(String name)
264   throws IOException {
265     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
266     List<Path> tableDirs =
267         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
268     for (Path d: tableDirs) {
269       HTableDescriptor htd = null;
270       try {
271         htd = get(FSUtils.getTableName(d));
272       } catch (FileNotFoundException fnfe) {
273         // inability of retrieving one HTD shouldn't stop getting the remaining
274         LOG.warn("Trouble retrieving htd", fnfe);
275       }
276       if (htd == null) continue;
277       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
278     }
279     return htds;
280   }
281 
282   /**
283    * Adds (or updates) the table descriptor to the FileSystem
284    * and updates the local cache with it.
285    */
286   @Override
287   public void add(TableDescriptor htd) throws IOException {
288     if (fsreadonly) {
289       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
290     }
291     TableName tableName = htd.getHTableDescriptor().getTableName();
292     if (TableName.META_TABLE_NAME.equals(tableName)) {
293       throw new NotImplementedException();
294     }
295     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
296       throw new NotImplementedException(
297         "Cannot add a table descriptor for a reserved subdirectory name: "
298             + htd.getHTableDescriptor().getNameAsString());
299     }
300     updateTableDescriptor(htd);
301   }
302 
303   /**
304    * Adds (or updates) the table descriptor to the FileSystem
305    * and updates the local cache with it.
306    */
307   @Override
308   public void add(HTableDescriptor htd) throws IOException {
309     if (fsreadonly) {
310       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
311     }
312     TableName tableName = htd.getTableName();
313     if (TableName.META_TABLE_NAME.equals(tableName)) {
314       throw new NotImplementedException();
315     }
316     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
317       throw new NotImplementedException(
318           "Cannot add a table descriptor for a reserved subdirectory name: "
319               + htd.getNameAsString());
320     }
321     TableDescriptor descriptor = getDescriptor(htd.getTableName());
322     if (descriptor == null)
323       descriptor = new TableDescriptor(htd);
324     else
325       descriptor.setHTableDescriptor(htd);
326     updateTableDescriptor(descriptor);
327   }
328 
329   /**
330    * Removes the table descriptor from the local cache and returns it.
331    * If not in read only mode, it also deletes the entire table directory(!)
332    * from the FileSystem.
333    */
334   @Override
335   public HTableDescriptor remove(final TableName tablename)
336   throws IOException {
337     if (fsreadonly) {
338       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
339     }
340     Path tabledir = getTableDir(tablename);
341     if (this.fs.exists(tabledir)) {
342       if (!this.fs.delete(tabledir, true)) {
343         throw new IOException("Failed delete of " + tabledir.toString());
344       }
345     }
346     TableDescriptor descriptor = this.cache.remove(tablename);
347     if (descriptor == null) {
348       return null;
349     } else {
350       return descriptor.getHTableDescriptor();
351     }
352   }
353 
354   /**
355    * Checks if a current table info file exists for the given table
356    *
357    * @param tableName name of table
358    * @return true if exists
359    * @throws IOException
360    */
361   public boolean isTableInfoExists(TableName tableName) throws IOException {
362     return getTableInfoPath(tableName) != null;
363   }
364 
365   /**
366    * Find the most current table info file for the given table in the hbase root directory.
367    * @return The file status of the current table info file or null if it does not exist
368    */
369   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
370     Path tableDir = getTableDir(tableName);
371     return getTableInfoPath(tableDir);
372   }
373 
374   private FileStatus getTableInfoPath(Path tableDir)
375   throws IOException {
376     return getTableInfoPath(fs, tableDir, !fsreadonly);
377   }
378 
379   /**
380    * Find the most current table info file for the table located in the given table directory.
381    *
382    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
383    * files and takes the 'current' one - meaning the one with the highest sequence number if present
384    * or no sequence number at all if none exist (for backward compatibility from before there
385    * were sequence numbers).
386    *
387    * @return The file status of the current table info file or null if it does not exist
388    * @throws IOException
389    */
390   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
391   throws IOException {
392     return getTableInfoPath(fs, tableDir, false);
393   }
394 
395   /**
396    * Find the most current table info file for the table in the given table directory.
397    *
398    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
399    * files and takes the 'current' one - meaning the one with the highest sequence number if
400    * present or no sequence number at all if none exist (for backward compatibility from before
401    * there were sequence numbers).
402    * If there are multiple table info files found and removeOldFiles is true it also deletes the
403    * older files.
404    *
405    * @return The file status of the current table info file or null if none exist
406    * @throws IOException
407    */
408   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
409   throws IOException {
410     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
411     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
412   }
413 
414   /**
415    * Find the most current table info file in the given directory
416    *
417    * Looks within the given directory for any table info files
418    * and takes the 'current' one - meaning the one with the highest sequence number if present
419    * or no sequence number at all if none exist (for backward compatibility from before there
420    * were sequence numbers).
421    * If there are multiple possible files found
422    * and the we're not in read only mode it also deletes the older files.
423    *
424    * @return The file status of the current table info file or null if it does not exist
425    * @throws IOException
426    */
427   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
428   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
429   throws IOException {
430     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
431     if (status == null || status.length < 1) return null;
432     FileStatus mostCurrent = null;
433     for (FileStatus file : status) {
434       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
435         mostCurrent = file;
436       }
437     }
438     if (removeOldFiles && status.length > 1) {
439       // Clean away old versions
440       for (FileStatus file : status) {
441         Path path = file.getPath();
442         if (file != mostCurrent) {
443           if (!fs.delete(file.getPath(), false)) {
444             LOG.warn("Failed cleanup of " + path);
445           } else {
446             LOG.debug("Cleaned up old tableinfo file " + path);
447           }
448         }
449       }
450     }
451     return mostCurrent;
452   }
453 
454   /**
455    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
456    * reverse order.
457    */
458   @VisibleForTesting
459   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
460   new Comparator<FileStatus>() {
461     @Override
462     public int compare(FileStatus left, FileStatus right) {
463       return right.compareTo(left);
464     }};
465 
466   /**
467    * Return the table directory in HDFS
468    */
469   @VisibleForTesting Path getTableDir(final TableName tableName) {
470     return FSUtils.getTableDir(rootdir, tableName);
471   }
472 
473   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
474     @Override
475     public boolean accept(Path p) {
476       // Accept any file that starts with TABLEINFO_NAME
477       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
478     }};
479 
480   /**
481    * Width of the sequenceid that is a suffix on a tableinfo file.
482    */
483   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
484 
485   /*
486    * @param number Number to use as suffix.
487    * @return Returns zero-prefixed decimal version of passed
488    * number (Does absolute in case number is negative).
489    */
490   private static String formatTableInfoSequenceId(final int number) {
491     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
492     int d = Math.abs(number);
493     for (int i = b.length - 1; i >= 0; i--) {
494       b[i] = (byte)((d % 10) + '0');
495       d /= 10;
496     }
497     return Bytes.toString(b);
498   }
499 
500   /**
501    * Regex to eat up sequenceid suffix on a .tableinfo file.
502    * Use regex because may encounter oldstyle .tableinfos where there is no
503    * sequenceid on the end.
504    */
505   private static final Pattern TABLEINFO_FILE_REGEX =
506     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
507 
508   /**
509    * @param p Path to a <code>.tableinfo</code> file.
510    * @return The current editid or 0 if none found.
511    */
512   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
513     if (p == null) return 0;
514     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
515     if (!m.matches()) throw new IllegalArgumentException(p.toString());
516     String suffix = m.group(2);
517     if (suffix == null || suffix.length() <= 0) return 0;
518     return Integer.parseInt(m.group(2));
519   }
520 
521   /**
522    * @param sequenceid
523    * @return Name of tableinfo file.
524    */
525   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
526     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
527   }
528 
529   /**
530    * Returns the latest table descriptor for the given table directly from the file system
531    * if it exists, bypassing the local cache.
532    * Returns null if it's not found.
533    */
534   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
535       Path hbaseRootDir, TableName tableName) throws IOException {
536     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
537     return getTableDescriptorFromFs(fs, tableDir);
538   }
539 
540   /**
541    * Returns the latest table descriptor for the given table directly from the file system
542    * if it exists, bypassing the local cache.
543    * Returns null if it's not found.
544    */
545   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
546    Path hbaseRootDir, TableName tableName, boolean rewritePb) throws IOException {
547     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
548     return getTableDescriptorFromFs(fs, tableDir, rewritePb);
549   }
550   /**
551    * Returns the latest table descriptor for the table located at the given directory
552    * directly from the file system if it exists.
553    * @throws TableInfoMissingException if there is no descriptor
554    */
555   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
556     throws IOException {
557     return getTableDescriptorFromFs(fs, tableDir, false);
558   }
559 
560   /**
561    * Returns the latest table descriptor for the table located at the given directory
562    * directly from the file system if it exists.
563    * @throws TableInfoMissingException if there is no descriptor
564    */
565   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir,
566     boolean rewritePb)
567   throws IOException {
568     FileStatus status = getTableInfoPath(fs, tableDir, false);
569     if (status == null) {
570       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
571     }
572     return readTableDescriptor(fs, status, rewritePb);
573   }
574 
575   private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
576       boolean rewritePb) throws IOException {
577     int len = Ints.checkedCast(status.getLen());
578     byte [] content = new byte[len];
579     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
580     try {
581       fsDataInputStream.readFully(content);
582     } finally {
583       fsDataInputStream.close();
584     }
585     TableDescriptor td = null;
586     try {
587       td = TableDescriptor.parseFrom(content);
588     } catch (DeserializationException e) {
589       // we have old HTableDescriptor here
590       try {
591         HTableDescriptor htd = HTableDescriptor.parseFrom(content);
592         LOG.warn("Found old table descriptor, converting to new format for table " +
593             htd.getTableName() + "; NOTE table will be in ENABLED state!");
594         td = new TableDescriptor(htd);
595         if (rewritePb) rewriteTableDescriptor(fs, status, td);
596       } catch (DeserializationException e1) {
597         throw new IOException("content=" + Bytes.toShort(content), e);
598       }
599     }
600     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
601       // Convert the file over to be pb before leaving here.
602       rewriteTableDescriptor(fs, status, td);
603     }
604     return td;
605   }
606 
607   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
608       final TableDescriptor td)
609   throws IOException {
610     Path tableInfoDir = status.getPath().getParent();
611     Path tableDir = tableInfoDir.getParent();
612     writeTableDescriptor(fs, td, tableDir, status);
613   }
614 
615   /**
616    * Update table descriptor on the file system
617    * @throws IOException Thrown if failed update.
618    * @throws NotImplementedException if in read only mode
619    */
620   @VisibleForTesting Path updateTableDescriptor(TableDescriptor td)
621   throws IOException {
622     if (fsreadonly) {
623       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
624     }
625     TableName tableName = td.getHTableDescriptor().getTableName();
626     Path tableDir = getTableDir(tableName);
627     Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
628     if (p == null) throw new IOException("Failed update");
629     LOG.info("Updated tableinfo=" + p);
630     if (usecache) {
631       this.cache.put(td.getHTableDescriptor().getTableName(), td);
632     }
633     return p;
634   }
635 
636   /**
637    * Deletes all the table descriptor files from the file system.
638    * Used in unit tests only.
639    * @throws NotImplementedException if in read only mode
640    */
641   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
642     if (fsreadonly) {
643       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
644     }
645 
646     Path tableDir = getTableDir(tableName);
647     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
648     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
649   }
650 
651   /**
652    * Deletes files matching the table info file pattern within the given directory
653    * whose sequenceId is at most the given max sequenceId.
654    */
655   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
656   throws IOException {
657     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
658     for (FileStatus file : status) {
659       Path path = file.getPath();
660       int sequenceId = getTableInfoSequenceId(path);
661       if (sequenceId <= maxSequenceId) {
662         boolean success = FSUtils.delete(fs, path, false);
663         if (success) {
664           LOG.debug("Deleted table descriptor at " + path);
665         } else {
666           LOG.error("Failed to delete descriptor at " + path);
667         }
668       }
669     }
670   }
671 
672   /**
673    * Attempts to write a new table descriptor to the given table's directory.
674    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
675    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
676    * not already in use.
677    * Removes the current descriptor file if passed in.
678    *
679    * @return Descriptor file or null if we failed write.
680    */
681   private static Path writeTableDescriptor(final FileSystem fs,
682     final TableDescriptor htd, final Path tableDir,
683     final FileStatus currentDescriptorFile)
684   throws IOException {
685     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
686     // This directory is never removed to avoid removing it out from under a concurrent writer.
687     Path tmpTableDir = new Path(tableDir, TMP_DIR);
688     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
689 
690     // What is current sequenceid?  We read the current sequenceid from
691     // the current file.  After we read it, another thread could come in and
692     // compete with us writing out next version of file.  The below retries
693     // should help in this case some but its hard to do guarantees in face of
694     // concurrent schema edits.
695     int currentSequenceId = currentDescriptorFile == null ? 0 :
696       getTableInfoSequenceId(currentDescriptorFile.getPath());
697     int newSequenceId = currentSequenceId;
698 
699     // Put arbitrary upperbound on how often we retry
700     int retries = 10;
701     int retrymax = currentSequenceId + retries;
702     Path tableInfoDirPath = null;
703     do {
704       newSequenceId += 1;
705       String filename = getTableInfoFileName(newSequenceId);
706       Path tempPath = new Path(tmpTableDir, filename);
707       if (fs.exists(tempPath)) {
708         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
709         continue;
710       }
711       tableInfoDirPath = new Path(tableInfoDir, filename);
712       try {
713         writeTD(fs, tempPath, htd);
714         fs.mkdirs(tableInfoDirPath.getParent());
715         if (!fs.rename(tempPath, tableInfoDirPath)) {
716           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
717         }
718         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
719       } catch (IOException ioe) {
720         // Presume clash of names or something; go around again.
721         LOG.debug("Failed write and/or rename; retrying", ioe);
722         if (!FSUtils.deleteDirectory(fs, tempPath)) {
723           LOG.warn("Failed cleanup of " + tempPath);
724         }
725         tableInfoDirPath = null;
726         continue;
727       }
728       break;
729     } while (newSequenceId < retrymax);
730     if (tableInfoDirPath != null) {
731       // if we succeeded, remove old table info files.
732       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
733     }
734     return tableInfoDirPath;
735   }
736 
737   private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd)
738   throws IOException {
739     FSDataOutputStream out = fs.create(p, false);
740     try {
741       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
742       // the toString version of HTD.  Now we just write out the pb serialization.
743       out.write(htd.toByteArray());
744     } finally {
745       out.close();
746     }
747   }
748 
749   /**
750    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
751    * Used by tests.
752    * @return True if we successfully created file.
753    */
754   public boolean createTableDescriptor(TableDescriptor htd) throws IOException {
755     return createTableDescriptor(htd, false);
756   }
757 
758   /**
759    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
760    * Used by tests.
761    * @return True if we successfully created file.
762    */
763   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
764     return createTableDescriptor(new TableDescriptor(htd), false);
765   }
766 
767   /**
768    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
769    * forceCreation is true then even if previous table descriptor is present it
770    * will be overwritten
771    *
772    * @return True if we successfully created file.
773    */
774   public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation)
775   throws IOException {
776     Path tableDir = getTableDir(htd.getHTableDescriptor().getTableName());
777     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
778   }
779 
780   /**
781    * Create tables descriptor for given HTableDescriptor. Default TableDescriptor state
782    * will be used (typically ENABLED).
783    */
784   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
785       throws IOException {
786     return createTableDescriptor(new TableDescriptor(htd), forceCreation);
787   }
788 
789   /**
790    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
791    * a new table or snapshot a table.
792    * @param tableDir table directory under which we should write the file
793    * @param htd description of the table to write
794    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
795    *          be overwritten
796    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
797    *         already exists and we weren't forcing the descriptor creation.
798    * @throws IOException if a filesystem error occurs
799    */
800   public boolean createTableDescriptorForTableDirectory(Path tableDir,
801       TableDescriptor htd, boolean forceCreation) throws IOException {
802     if (fsreadonly) {
803       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
804     }
805     FileStatus status = getTableInfoPath(fs, tableDir);
806     if (status != null) {
807       LOG.debug("Current tableInfoPath = " + status.getPath());
808       if (!forceCreation) {
809         if (fs.exists(status.getPath()) && status.getLen() > 0) {
810           if (readTableDescriptor(fs, status, false).equals(htd)) {
811             LOG.debug("TableInfo already exists.. Skipping creation");
812             return false;
813           }
814         }
815       }
816     }
817     Path p = writeTableDescriptor(fs, htd, tableDir, status);
818     return p != null;
819   }
820 
821 }
822