1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.zookeeper;
19
20 import java.io.EOFException;
21 import java.io.IOException;
22 import java.net.ConnectException;
23 import java.net.NoRouteToHostException;
24 import java.net.SocketException;
25 import java.net.SocketTimeoutException;
26 import java.rmi.UnknownHostException;
27 import java.util.ArrayList;
28 import java.util.List;
29
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.hbase.HConstants;
35 import org.apache.hadoop.hbase.HRegionInfo;
36 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
37 import org.apache.hadoop.hbase.ServerName;
38 import org.apache.hadoop.hbase.classification.InterfaceAudience;
39 import org.apache.hadoop.hbase.client.HConnection;
40 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
41 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
42 import org.apache.hadoop.hbase.exceptions.DeserializationException;
43 import org.apache.hadoop.hbase.ipc.FailedServerException;
44 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
45 import org.apache.hadoop.hbase.master.RegionState;
46 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
47 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
48 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
49 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
50 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
51 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
52 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
53 import org.apache.hadoop.hbase.util.Bytes;
54 import org.apache.hadoop.hbase.util.Pair;
55 import org.apache.hadoop.ipc.RemoteException;
56 import org.apache.zookeeper.KeeperException;
57
58 import com.google.common.base.Stopwatch;
59 import com.google.protobuf.InvalidProtocolBufferException;
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 @InterfaceAudience.Private
77 public class MetaTableLocator {
78 private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
79
80
81 private volatile boolean stopped = false;
82
83
84
85
86
87 public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
88 return getMetaRegionLocation(zkw) != null;
89 }
90
91
92
93
94
95 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
96 return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
97 }
98
99
100
101
102
103
104
105 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw,
106 int replicaId) {
107 ServerName serverName = getMetaRegionLocation(zkw, replicaId);
108 List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
109 list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica(
110 HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName));
111 return list;
112 }
113
114
115
116
117
118 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
119 return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
120 }
121
122
123
124
125
126
127
128 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) {
129 List<Pair<HRegionInfo, ServerName>> result;
130 result = getMetaRegionsAndLocations(zkw, replicaId);
131 return getListOfHRegionInfos(result);
132 }
133
134 private List<HRegionInfo> getListOfHRegionInfos(
135 final List<Pair<HRegionInfo, ServerName>> pairs) {
136 if (pairs == null || pairs.isEmpty()) return null;
137 List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
138 for (Pair<HRegionInfo, ServerName> pair: pairs) {
139 result.add(pair.getFirst());
140 }
141 return result;
142 }
143
144
145
146
147
148
149 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
150 try {
151 RegionState state = getMetaRegionState(zkw);
152 return state.isOpened() ? state.getServerName() : null;
153 } catch (KeeperException ke) {
154 return null;
155 }
156 }
157
158
159
160
161
162
163
164 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) {
165 try {
166 RegionState state = getMetaRegionState(zkw, replicaId);
167 return state.isOpened() ? state.getServerName() : null;
168 } catch (KeeperException ke) {
169 return null;
170 }
171 }
172
173
174
175
176
177
178
179
180
181
182
183
184
185 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
186 throws InterruptedException, NotAllMetaRegionsOnlineException {
187 return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
188 }
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)
204 throws InterruptedException, NotAllMetaRegionsOnlineException {
205 try {
206 if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
207 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
208 + "There could be a mismatch with the one configured in the master.";
209 LOG.error(errorMsg);
210 throw new IllegalArgumentException(errorMsg);
211 }
212 } catch (KeeperException e) {
213 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
214 }
215 ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
216
217 if (sn == null) {
218 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
219 }
220
221 return sn;
222 }
223
224
225
226
227
228
229
230
231 public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
232 Stopwatch stopwatch = new Stopwatch().start();
233 while (!stopped) {
234 try {
235 if (waitMetaRegionLocation(zkw, 100) != null) break;
236 long sleepTime = stopwatch.elapsedMillis();
237
238 if ((sleepTime + 1) % 10000 == 0) {
239 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
240 }
241 } catch (NotAllMetaRegionsOnlineException e) {
242 if (LOG.isTraceEnabled()) {
243 LOG.trace("hbase:meta still not available, sleeping and retrying." +
244 " Reason: " + e.getMessage());
245 }
246 }
247 }
248 }
249
250
251
252
253
254
255
256
257
258
259
260 public boolean verifyMetaRegionLocation(HConnection hConnection,
261 ZooKeeperWatcher zkw, final long timeout)
262 throws InterruptedException, IOException {
263 return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID);
264 }
265
266
267
268
269
270
271
272
273
274
275
276 public boolean verifyMetaRegionLocation(HConnection hConnection,
277 ZooKeeperWatcher zkw, final long timeout, int replicaId)
278 throws InterruptedException, IOException {
279 AdminProtos.AdminService.BlockingInterface service = null;
280 try {
281 service = getMetaServerConnection(hConnection, zkw, timeout, replicaId);
282 } catch (NotAllMetaRegionsOnlineException e) {
283
284 } catch (ServerNotRunningYetException e) {
285
286 } catch (UnknownHostException e) {
287
288 } catch (RegionServerStoppedException e) {
289
290 }
291 return (service != null) && verifyRegionLocation(service,
292 getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
293 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName());
294 }
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311 private boolean verifyRegionLocation(AdminService.BlockingInterface hostingServer,
312 final ServerName address, final byte [] regionName)
313 throws IOException {
314 if (hostingServer == null) {
315 LOG.info("Passed hostingServer is null");
316 return false;
317 }
318 Throwable t;
319 try {
320
321 return ProtobufUtil.getRegionInfo(hostingServer, regionName) != null;
322 } catch (ConnectException e) {
323 t = e;
324 } catch (RetriesExhaustedException e) {
325 t = e;
326 } catch (RemoteException e) {
327 IOException ioe = e.unwrapRemoteException();
328 t = ioe;
329 } catch (IOException e) {
330 Throwable cause = e.getCause();
331 if (cause != null && cause instanceof EOFException) {
332 t = cause;
333 } else if (cause != null && cause.getMessage() != null
334 && cause.getMessage().contains("Connection reset")) {
335 t = cause;
336 } else {
337 t = e;
338 }
339 }
340 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
341 " at address=" + address + ", exception=" + t.getMessage());
342 return false;
343 }
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358 private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
359 ZooKeeperWatcher zkw, long timeout, int replicaId)
360 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
361 return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout));
362 }
363
364
365
366
367
368
369
370
371 @SuppressWarnings("deprecation")
372 private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
373 ServerName sn)
374 throws IOException {
375 if (sn == null) {
376 return null;
377 }
378 AdminService.BlockingInterface service = null;
379 try {
380 service = hConnection.getAdmin(sn);
381 } catch (RetriesExhaustedException e) {
382 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
383
384 } else {
385 throw e;
386 }
387 } catch (SocketTimeoutException e) {
388 LOG.debug("Timed out connecting to " + sn);
389 } catch (NoRouteToHostException e) {
390 LOG.debug("Connecting to " + sn, e);
391 } catch (SocketException e) {
392 LOG.debug("Exception connecting to " + sn);
393 } catch (UnknownHostException e) {
394 LOG.debug("Unknown host exception connecting to " + sn);
395 } catch (FailedServerException e) {
396 if (LOG.isDebugEnabled()) {
397 LOG.debug("Server " + sn + " is in failed server list.");
398 }
399 } catch (IOException ioe) {
400 Throwable cause = ioe.getCause();
401 if (ioe instanceof ConnectException) {
402
403 } else if (cause != null && cause instanceof EOFException) {
404
405 } else if (cause != null && cause.getMessage() != null &&
406 cause.getMessage().toLowerCase().contains("connection reset")) {
407
408 } else {
409 throw ioe;
410 }
411
412 }
413 return service;
414 }
415
416
417
418
419
420
421
422
423
424 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
425 ServerName serverName, RegionState.State state) throws KeeperException {
426 setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state);
427 }
428
429
430
431
432
433
434
435
436
437
438 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
439 ServerName serverName, int replicaId, RegionState.State state) throws KeeperException {
440 LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
441
442
443 MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
444 .setServer(ProtobufUtil.toServerName(serverName))
445 .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
446 .setState(state.convert()).build();
447 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
448 try {
449 ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
450 } catch(KeeperException.NoNodeException nne) {
451 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
452 LOG.debug("META region location doesn't exist, create it");
453 } else {
454 LOG.debug("META region location doesn't exist for replicaId " + replicaId +
455 ", create it");
456 }
457 ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
458 }
459 }
460
461
462
463
464 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
465 return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
466 }
467
468
469
470
471
472
473
474
475 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)
476 throws KeeperException {
477 RegionState.State state = RegionState.State.OPEN;
478 ServerName serverName = null;
479 try {
480 byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId));
481 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
482 try {
483 int prefixLen = ProtobufUtil.lengthOfPBMagic();
484 ZooKeeperProtos.MetaRegionServer rl =
485 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
486 (data, prefixLen, data.length - prefixLen);
487 if (rl.hasState()) {
488 state = RegionState.State.convert(rl.getState());
489 }
490 HBaseProtos.ServerName sn = rl.getServer();
491 serverName = ServerName.valueOf(
492 sn.getHostName(), sn.getPort(), sn.getStartCode());
493 } catch (InvalidProtocolBufferException e) {
494 throw new DeserializationException("Unable to parse meta region location");
495 }
496 } else {
497
498 serverName = ServerName.parseFrom(data);
499 }
500 } catch (DeserializationException e) {
501 throw ZKUtil.convert(e);
502 } catch (InterruptedException e) {
503 Thread.currentThread().interrupt();
504 }
505 if (serverName == null) {
506 state = RegionState.State.OFFLINE;
507 }
508 return new RegionState(
509 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId),
510 state, serverName);
511 }
512
513
514
515
516
517
518 public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
519 throws KeeperException {
520 deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID);
521 }
522
523 public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)
524 throws KeeperException {
525 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
526 LOG.info("Deleting hbase:meta region location in ZooKeeper");
527 } else {
528 LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
529 }
530 try {
531
532 ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId));
533 } catch(KeeperException.NoNodeException nne) {
534
535 }
536 }
537
538
539
540
541
542
543
544
545
546 public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw,
547 final long timeout, Configuration conf)
548 throws InterruptedException {
549 int numReplicasConfigured = 1;
550 try {
551 List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
552 numReplicasConfigured = metaReplicaNodes.size();
553 } catch (KeeperException e) {
554 LOG.warn("Got ZK exception " + e);
555 }
556 List<ServerName> servers = new ArrayList<ServerName>(numReplicasConfigured);
557 ServerName server = blockUntilAvailable(zkw, timeout);
558 if (server == null) return null;
559 servers.add(server);
560
561 for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
562
563 servers.add(getMetaRegionLocation(zkw, replicaId));
564 }
565 return servers;
566 }
567
568
569
570
571
572
573
574
575 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
576 final long timeout)
577 throws InterruptedException {
578 return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
579 }
580
581
582
583
584
585
586
587
588
589 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId,
590 final long timeout)
591 throws InterruptedException {
592 if (timeout < 0) throw new IllegalArgumentException();
593 if (zkw == null) throw new IllegalArgumentException();
594 Stopwatch sw = new Stopwatch().start();
595 ServerName sn = null;
596 try {
597 while (true) {
598 sn = getMetaRegionLocation(zkw, replicaId);
599 if (sn != null || sw.elapsedMillis()
600 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
601 break;
602 }
603 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
604 }
605 } finally {
606 sw.stop();
607 }
608 return sn;
609 }
610
611
612
613
614
615 public void stop() {
616 if (!stopped) {
617 LOG.debug("Stopping MetaTableLocator");
618 stopped = true;
619 }
620 }
621 }