1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import java.io.IOException;
22 import java.io.InterruptedIOException;
23 import java.util.ArrayList;
24 import java.util.HashSet;
25 import java.util.List;
26 import java.util.Set;
27 import java.util.concurrent.locks.Lock;
28 import java.util.concurrent.locks.ReentrantLock;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.hbase.classification.InterfaceAudience;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FileStatus;
35 import org.apache.hadoop.fs.FileSystem;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.fs.PathFilter;
38 import org.apache.hadoop.fs.permission.FsPermission;
39 import org.apache.hadoop.hbase.ClusterId;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HConstants;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.HTableDescriptor;
44 import org.apache.hadoop.hbase.Server;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableDescriptor;
47 import org.apache.hadoop.hbase.TableName;
48 import org.apache.hadoop.hbase.backup.HFileArchiver;
49 import org.apache.hadoop.hbase.exceptions.DeserializationException;
50 import org.apache.hadoop.hbase.fs.HFileSystem;
51 import org.apache.hadoop.hbase.mob.MobConstants;
52 import org.apache.hadoop.hbase.mob.MobUtils;
53 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
54 import org.apache.hadoop.hbase.regionserver.HRegion;
55 import org.apache.hadoop.hbase.wal.DefaultWALProvider;
56 import org.apache.hadoop.hbase.wal.WALSplitter;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
59 import org.apache.hadoop.hbase.util.FSTableDescriptors;
60 import org.apache.hadoop.hbase.util.FSUtils;
61 import org.apache.hadoop.ipc.RemoteException;
62
63 import com.google.common.annotations.VisibleForTesting;
64
65
66
67
68
69
70 @InterfaceAudience.Private
71 public class MasterFileSystem {
72 private static final Log LOG = LogFactory.getLog(MasterFileSystem.class.getName());
73
74 Configuration conf;
75
76 Server master;
77
78 private final MetricsMasterFileSystem metricsMasterFilesystem = new MetricsMasterFileSystem();
79
80 private ClusterId clusterId;
81
82 private final FileSystem fs;
83
84 private volatile boolean fsOk = true;
85
86 private final Path oldLogDir;
87
88 private final Path rootdir;
89
90 private final Path tempdir;
91
92 final Lock splitLogLock = new ReentrantLock();
93 final boolean distributedLogReplay;
94 final SplitLogManager splitLogManager;
95 private final MasterServices services;
96
97 final static PathFilter META_FILTER = new PathFilter() {
98 @Override
99 public boolean accept(Path p) {
100 return DefaultWALProvider.isMetaFile(p);
101 }
102 };
103
104 final static PathFilter NON_META_FILTER = new PathFilter() {
105 @Override
106 public boolean accept(Path p) {
107 return !DefaultWALProvider.isMetaFile(p);
108 }
109 };
110
111 public MasterFileSystem(Server master, MasterServices services)
112 throws IOException {
113 this.conf = master.getConfiguration();
114 this.master = master;
115 this.services = services;
116
117
118
119
120 this.rootdir = FSUtils.getRootDir(conf);
121 this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY);
122
123
124 this.fs = this.rootdir.getFileSystem(conf);
125 FSUtils.setFsDefault(conf, new Path(this.fs.getUri()));
126
127 fs.setConf(conf);
128
129
130 this.oldLogDir = createInitialFileSystemLayout();
131 HFileSystem.addLocationsOrderInterceptor(conf);
132 this.splitLogManager =
133 new SplitLogManager(master, master.getConfiguration(), master, services,
134 master.getServerName());
135 this.distributedLogReplay = this.splitLogManager.isLogReplaying();
136 }
137
138 @VisibleForTesting
139 SplitLogManager getSplitLogManager() {
140 return this.splitLogManager;
141 }
142
143
144
145
146
147
148
149
150
151
152
153 private Path createInitialFileSystemLayout() throws IOException {
154
155 checkRootDir(this.rootdir, conf, this.fs);
156
157
158 checkTempDir(this.tempdir, conf, this.fs);
159
160 Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME);
161
162
163 if(!this.fs.exists(oldLogDir)) {
164 this.fs.mkdirs(oldLogDir);
165 }
166
167 return oldLogDir;
168 }
169
170 public FileSystem getFileSystem() {
171 return this.fs;
172 }
173
174
175
176
177
178 public Path getOldLogDir() {
179 return this.oldLogDir;
180 }
181
182
183
184
185
186
187 public boolean checkFileSystem() {
188 if (this.fsOk) {
189 try {
190 FSUtils.checkFileSystemAvailable(this.fs);
191 FSUtils.checkDfsSafeMode(this.conf);
192 } catch (IOException e) {
193 master.abort("Shutting down HBase cluster: file system not available", e);
194 this.fsOk = false;
195 }
196 }
197 return this.fsOk;
198 }
199
200
201
202
203 public Path getRootDir() {
204 return this.rootdir;
205 }
206
207
208
209
210 public Path getTempDir() {
211 return this.tempdir;
212 }
213
214
215
216
217 public ClusterId getClusterId() {
218 return clusterId;
219 }
220
221
222
223
224
225 Set<ServerName> getFailedServersFromLogFolders() {
226 boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors",
227 WALSplitter.SPLIT_SKIP_ERRORS_DEFAULT);
228
229 Set<ServerName> serverNames = new HashSet<ServerName>();
230 Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
231
232 do {
233 if (master.isStopped()) {
234 LOG.warn("Master stopped while trying to get failed servers.");
235 break;
236 }
237 try {
238 if (!this.fs.exists(logsDirPath)) return serverNames;
239 FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null);
240
241
242 Set<ServerName> onlineServers = ((HMaster) master).getServerManager().getOnlineServers()
243 .keySet();
244
245 if (logFolders == null || logFolders.length == 0) {
246 LOG.debug("No log files to split, proceeding...");
247 return serverNames;
248 }
249 for (FileStatus status : logFolders) {
250 FileStatus[] curLogFiles = FSUtils.listStatus(this.fs, status.getPath(), null);
251 if (curLogFiles == null || curLogFiles.length == 0) {
252
253 continue;
254 }
255 final ServerName serverName = DefaultWALProvider.getServerNameFromWALDirectoryName(
256 status.getPath());
257 if (null == serverName) {
258 LOG.warn("Log folder " + status.getPath() + " doesn't look like its name includes a " +
259 "region server name; leaving in place. If you see later errors about missing " +
260 "write ahead logs they may be saved in this location.");
261 } else if (!onlineServers.contains(serverName)) {
262 LOG.info("Log folder " + status.getPath() + " doesn't belong "
263 + "to a known region server, splitting");
264 serverNames.add(serverName);
265 } else {
266 LOG.info("Log folder " + status.getPath() + " belongs to an existing region server");
267 }
268 }
269 retrySplitting = false;
270 } catch (IOException ioe) {
271 LOG.warn("Failed getting failed servers to be recovered.", ioe);
272 if (!checkFileSystem()) {
273 LOG.warn("Bad Filesystem, exiting");
274 Runtime.getRuntime().halt(1);
275 }
276 try {
277 if (retrySplitting) {
278 Thread.sleep(conf.getInt("hbase.hlog.split.failure.retry.interval", 30 * 1000));
279 }
280 } catch (InterruptedException e) {
281 LOG.warn("Interrupted, aborting since cannot return w/o splitting");
282 Thread.currentThread().interrupt();
283 retrySplitting = false;
284 Runtime.getRuntime().halt(1);
285 }
286 }
287 } while (retrySplitting);
288
289 return serverNames;
290 }
291
292 public void splitLog(final ServerName serverName) throws IOException {
293 Set<ServerName> serverNames = new HashSet<ServerName>();
294 serverNames.add(serverName);
295 splitLog(serverNames);
296 }
297
298
299
300
301
302
303 public void splitMetaLog(final ServerName serverName) throws IOException {
304 Set<ServerName> serverNames = new HashSet<ServerName>();
305 serverNames.add(serverName);
306 splitMetaLog(serverNames);
307 }
308
309
310
311
312
313
314 public void splitMetaLog(final Set<ServerName> serverNames) throws IOException {
315 splitLog(serverNames, META_FILTER);
316 }
317
318 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UL_UNRELEASED_LOCK", justification=
319 "We only release this lock when we set it. Updates to code that uses it should verify use " +
320 "of the guard boolean.")
321 private List<Path> getLogDirs(final Set<ServerName> serverNames) throws IOException {
322 List<Path> logDirs = new ArrayList<Path>();
323 boolean needReleaseLock = false;
324 if (!this.services.isInitialized()) {
325
326 this.splitLogLock.lock();
327 needReleaseLock = true;
328 }
329 try {
330 for (ServerName serverName : serverNames) {
331 Path logDir = new Path(this.rootdir,
332 DefaultWALProvider.getWALDirectoryName(serverName.toString()));
333 Path splitDir = logDir.suffix(DefaultWALProvider.SPLITTING_EXT);
334
335 if (fs.exists(logDir)) {
336 if (!this.fs.rename(logDir, splitDir)) {
337 throw new IOException("Failed fs.rename for log split: " + logDir);
338 }
339 logDir = splitDir;
340 LOG.debug("Renamed region directory: " + splitDir);
341 } else if (!fs.exists(splitDir)) {
342 LOG.info("Log dir for server " + serverName + " does not exist");
343 continue;
344 }
345 logDirs.add(splitDir);
346 }
347 } finally {
348 if (needReleaseLock) {
349 this.splitLogLock.unlock();
350 }
351 }
352 return logDirs;
353 }
354
355
356
357
358
359
360
361 public void prepareLogReplay(ServerName serverName, Set<HRegionInfo> regions) throws IOException {
362 if (!this.distributedLogReplay) {
363 return;
364 }
365
366 if (regions == null || regions.isEmpty()) {
367 return;
368 }
369 this.splitLogManager.markRegionsRecovering(serverName, regions);
370 }
371
372 public void splitLog(final Set<ServerName> serverNames) throws IOException {
373 splitLog(serverNames, NON_META_FILTER);
374 }
375
376
377
378
379
380
381 void removeStaleRecoveringRegionsFromZK(final Set<ServerName> failedServers)
382 throws IOException, InterruptedIOException {
383 this.splitLogManager.removeStaleRecoveringRegions(failedServers);
384 }
385
386
387
388
389
390
391
392
393
394 public void splitLog(final Set<ServerName> serverNames, PathFilter filter) throws IOException {
395 long splitTime = 0, splitLogSize = 0;
396 List<Path> logDirs = getLogDirs(serverNames);
397
398 splitLogManager.handleDeadWorkers(serverNames);
399 splitTime = EnvironmentEdgeManager.currentTime();
400 splitLogSize = splitLogManager.splitLogDistributed(serverNames, logDirs, filter);
401 splitTime = EnvironmentEdgeManager.currentTime() - splitTime;
402
403 if (this.metricsMasterFilesystem != null) {
404 if (filter == META_FILTER) {
405 this.metricsMasterFilesystem.addMetaWALSplit(splitTime, splitLogSize);
406 } else {
407 this.metricsMasterFilesystem.addSplit(splitTime, splitLogSize);
408 }
409 }
410 }
411
412
413
414
415
416
417
418
419
420
421 private Path checkRootDir(final Path rd, final Configuration c,
422 final FileSystem fs)
423 throws IOException {
424
425 FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
426
427 boolean isSecurityEnabled = "kerberos".equalsIgnoreCase(c.get("hbase.security.authentication"));
428 FsPermission rootDirPerms = new FsPermission(c.get("hbase.rootdir.perms", "700"));
429
430
431 try {
432 if (!fs.exists(rd)) {
433 if (isSecurityEnabled) {
434 fs.mkdirs(rd, rootDirPerms);
435 } else {
436 fs.mkdirs(rd);
437 }
438
439
440
441
442
443
444
445 FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
446 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
447 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
448 } else {
449 if (!fs.isDirectory(rd)) {
450 throw new IllegalArgumentException(rd.toString() + " is not a directory");
451 }
452 if (isSecurityEnabled && !rootDirPerms.equals(fs.getFileStatus(rd).getPermission())) {
453
454 LOG.warn("Found rootdir permissions NOT matching expected \"hbase.rootdir.perms\" for "
455 + "rootdir=" + rd.toString() + " permissions=" + fs.getFileStatus(rd).getPermission()
456 + " and \"hbase.rootdir.perms\" configured as "
457 + c.get("hbase.rootdir.perms", "700") + ". Automatically setting the permissions. You"
458 + " can change the permissions by setting \"hbase.rootdir.perms\" in hbase-site.xml "
459 + "and restarting the master");
460 fs.setPermission(rd, rootDirPerms);
461 }
462
463 FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
464 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
465 HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
466 }
467 } catch (DeserializationException de) {
468 LOG.fatal("Please fix invalid configuration for " + HConstants.HBASE_DIR, de);
469 IOException ioe = new IOException();
470 ioe.initCause(de);
471 throw ioe;
472 } catch (IllegalArgumentException iae) {
473 LOG.fatal("Please fix invalid configuration for "
474 + HConstants.HBASE_DIR + " " + rd.toString(), iae);
475 throw iae;
476 }
477
478 if (!FSUtils.checkClusterIdExists(fs, rd, c.getInt(
479 HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000))) {
480 FSUtils.setClusterId(fs, rd, new ClusterId(), c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
481 }
482 clusterId = FSUtils.getClusterId(fs, rd);
483
484
485 if (!FSUtils.metaRegionExists(fs, rd)) {
486 bootstrap(rd, c);
487 }
488
489
490
491
492
493 FSTableDescriptors fsd = new FSTableDescriptors(c, fs, rd);
494 fsd.createTableDescriptor(
495 new TableDescriptor(fsd.get(TableName.META_TABLE_NAME)));
496
497 return rd;
498 }
499
500
501
502
503
504 private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs)
505 throws IOException {
506
507 if (fs.exists(tmpdir)) {
508
509
510 for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) {
511 for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) {
512 HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir);
513 }
514 }
515 if (!fs.delete(tmpdir, true)) {
516 throw new IOException("Unable to clean the temp directory: " + tmpdir);
517 }
518 }
519
520
521 if (!fs.mkdirs(tmpdir)) {
522 throw new IOException("HBase temp directory '" + tmpdir + "' creation failure.");
523 }
524 }
525
526 private static void bootstrap(final Path rd, final Configuration c)
527 throws IOException {
528 LOG.info("BOOTSTRAP: creating hbase:meta region");
529 try {
530
531
532
533
534 HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
535 HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
536 setInfoFamilyCachingForMeta(metaDescriptor, false);
537 HRegion meta = HRegion.createHRegion(metaHRI, rd, c, metaDescriptor, null);
538 setInfoFamilyCachingForMeta(metaDescriptor, true);
539 meta.close();
540 } catch (IOException e) {
541 e = e instanceof RemoteException ?
542 ((RemoteException)e).unwrapRemoteException() : e;
543 LOG.error("bootstrap", e);
544 throw e;
545 }
546 }
547
548
549
550
551 public static void setInfoFamilyCachingForMeta(HTableDescriptor metaDescriptor, final boolean b) {
552 for (HColumnDescriptor hcd: metaDescriptor.getColumnFamilies()) {
553 if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) {
554 hcd.setBlockCacheEnabled(b);
555 hcd.setInMemory(b);
556 }
557 }
558 }
559
560 public void deleteFamilyFromFS(HRegionInfo region, byte[] familyName, boolean hasMob)
561 throws IOException {
562
563 Path tableDir = FSUtils.getTableDir(rootdir, region.getTable());
564 HFileArchiver.archiveFamily(fs, conf, region, tableDir, familyName);
565
566
567 Path familyDir = new Path(tableDir,
568 new Path(region.getEncodedName(), Bytes.toString(familyName)));
569 if (fs.delete(familyDir, true) == false) {
570 if (fs.exists(familyDir)) {
571 throw new IOException("Could not delete family "
572 + Bytes.toString(familyName) + " from FileSystem for region "
573 + region.getRegionNameAsString() + "(" + region.getEncodedName()
574 + ")");
575 }
576 }
577
578
579 if (hasMob) {
580 Path mobTableDir =
581 FSUtils.getTableDir(new Path(getRootDir(), MobConstants.MOB_DIR_NAME), region.getTable());
582 HRegionInfo mobRegionInfo = MobUtils.getMobRegionInfo(region.getTable());
583 Path mobFamilyDir =
584 new Path(mobTableDir,
585 new Path(mobRegionInfo.getEncodedName(), Bytes.toString(familyName)));
586
587 MobUtils.archiveMobStoreFiles(conf, fs, mobRegionInfo, mobFamilyDir, familyName);
588
589 if (!fs.delete(mobFamilyDir, true)) {
590 throw new IOException("Could not delete mob store files for family "
591 + Bytes.toString(familyName) + " from FileSystem region "
592 + mobRegionInfo.getRegionNameAsString() + "(" + mobRegionInfo.getEncodedName() + ")");
593 }
594 }
595 }
596
597 public void stop() {
598 if (splitLogManager != null) {
599 this.splitLogManager.stop();
600 }
601 }
602
603
604
605
606
607
608 public void setLogRecoveryMode() throws IOException {
609 this.splitLogManager.setRecoveryMode(false);
610 }
611
612 public RecoveryMode getLogRecoveryMode() {
613 return this.splitLogManager.getRecoveryMode();
614 }
615
616 public void logFileSystemState(Log log) throws IOException {
617 FSUtils.logFileSystemState(fs, rootdir, log);
618 }
619 }