001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import com.google.common.annotations.VisibleForTesting;
021import com.google.common.base.Joiner;
022import com.google.common.base.Preconditions;
023import com.google.common.collect.Lists;
024
025import org.apache.commons.logging.Log;
026import org.apache.commons.logging.LogFactory;
027import org.apache.hadoop.HadoopIllegalArgumentException;
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Trash;
032import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
033import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
034import org.apache.hadoop.ha.HAServiceStatus;
035import org.apache.hadoop.ha.HealthCheckFailedException;
036import org.apache.hadoop.ha.ServiceFailedException;
037import org.apache.hadoop.hdfs.DFSConfigKeys;
038import org.apache.hadoop.hdfs.DFSUtil;
039import org.apache.hadoop.hdfs.HAUtil;
040import org.apache.hadoop.hdfs.HdfsConfiguration;
041import org.apache.hadoop.hdfs.protocol.ClientProtocol;
042import org.apache.hadoop.hdfs.protocol.HdfsConstants;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
045import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
046import org.apache.hadoop.hdfs.server.namenode.ha.*;
047import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
048import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
049import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
050import org.apache.hadoop.hdfs.server.protocol.*;
051import org.apache.hadoop.ipc.Server;
052import org.apache.hadoop.ipc.StandbyException;
053import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
054import org.apache.hadoop.metrics2.util.MBeans;
055import org.apache.hadoop.net.NetUtils;
056import org.apache.hadoop.security.AccessControlException;
057import org.apache.hadoop.security.RefreshUserMappingsProtocol;
058import org.apache.hadoop.security.SecurityUtil;
059import org.apache.hadoop.security.UserGroupInformation;
060import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
061import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
062import org.apache.hadoop.tools.GetUserMappingsProtocol;
063import org.apache.hadoop.tracing.SpanReceiverHost;
064import org.apache.hadoop.tracing.TraceAdminProtocol;
065import org.apache.hadoop.util.ExitUtil.ExitException;
066import org.apache.hadoop.util.GenericOptionsParser;
067import org.apache.hadoop.util.JvmPauseMonitor;
068import org.apache.hadoop.util.ServicePlugin;
069import org.apache.hadoop.util.StringUtils;
070
071import javax.management.ObjectName;
072
073import java.io.IOException;
074import java.io.PrintStream;
075import java.net.InetSocketAddress;
076import java.net.URI;
077import java.security.PrivilegedExceptionAction;
078import java.util.ArrayList;
079import java.util.Arrays;
080import java.util.Collection;
081import java.util.List;
082import java.util.concurrent.atomic.AtomicBoolean;
083
084import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
085import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
086import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
087import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
088import static org.apache.hadoop.util.ExitUtil.terminate;
089import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
090
091/**********************************************************
092 * NameNode serves as both directory namespace manager and
093 * "inode table" for the Hadoop DFS.  There is a single NameNode
094 * running in any DFS deployment.  (Well, except when there
095 * is a second backup/failover NameNode, or when using federated NameNodes.)
096 *
097 * The NameNode controls two critical tables:
098 *   1)  filename->blocksequence (namespace)
099 *   2)  block->machinelist ("inodes")
100 *
101 * The first table is stored on disk and is very precious.
102 * The second table is rebuilt every time the NameNode comes up.
103 *
104 * 'NameNode' refers to both this class as well as the 'NameNode server'.
105 * The 'FSNamesystem' class actually performs most of the filesystem
106 * management.  The majority of the 'NameNode' class itself is concerned
107 * with exposing the IPC interface and the HTTP server to the outside world,
108 * plus some configuration management.
109 *
110 * NameNode implements the
111 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which
112 * allows clients to ask for DFS services.
113 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for
114 * direct use by authors of DFS client code.  End-users should instead use the
115 * {@link org.apache.hadoop.fs.FileSystem} class.
116 *
117 * NameNode also implements the
118 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface,
119 * used by DataNodes that actually store DFS data blocks.  These
120 * methods are invoked repeatedly and automatically by all the
121 * DataNodes in a DFS deployment.
122 *
123 * NameNode also implements the
124 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface,
125 * used by secondary namenodes or rebalancing processes to get partial
126 * NameNode state, for example partial blocksMap etc.
127 **********************************************************/
128@InterfaceAudience.Private
129public class NameNode implements NameNodeStatusMXBean {
130  static{
131    HdfsConfiguration.init();
132  }
133
134  /**
135   * Categories of operations supported by the namenode.
136   */
137  public static enum OperationCategory {
138    /** Operations that are state agnostic */
139    UNCHECKED,
140    /** Read operation that does not change the namespace state */
141    READ,
142    /** Write operation that changes the namespace state */
143    WRITE,
144    /** Operations related to checkpointing */
145    CHECKPOINT,
146    /** Operations related to {@link JournalProtocol} */
147    JOURNAL
148  }
149  
150  /**
151   * HDFS configuration can have three types of parameters:
152   * <ol>
153   * <li>Parameters that are common for all the name services in the cluster.</li>
154   * <li>Parameters that are specific to a name service. These keys are suffixed
155   * with nameserviceId in the configuration. For example,
156   * "dfs.namenode.rpc-address.nameservice1".</li>
157   * <li>Parameters that are specific to a single name node. These keys are suffixed
158   * with nameserviceId and namenodeId in the configuration. for example,
159   * "dfs.namenode.rpc-address.nameservice1.namenode1"</li>
160   * </ol>
161   * 
162   * In the latter cases, operators may specify the configuration without
163   * any suffix, with a nameservice suffix, or with a nameservice and namenode
164   * suffix. The more specific suffix will take precedence.
165   * 
166   * These keys are specific to a given namenode, and thus may be configured
167   * globally, for a nameservice, or for a specific namenode within a nameservice.
168   */
169  public static final String[] NAMENODE_SPECIFIC_KEYS = {
170    DFS_NAMENODE_RPC_ADDRESS_KEY,
171    DFS_NAMENODE_RPC_BIND_HOST_KEY,
172    DFS_NAMENODE_NAME_DIR_KEY,
173    DFS_NAMENODE_EDITS_DIR_KEY,
174    DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
175    DFS_NAMENODE_CHECKPOINT_DIR_KEY,
176    DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
177    DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
178    DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
179    DFS_NAMENODE_HTTP_ADDRESS_KEY,
180    DFS_NAMENODE_HTTPS_ADDRESS_KEY,
181    DFS_NAMENODE_HTTP_BIND_HOST_KEY,
182    DFS_NAMENODE_HTTPS_BIND_HOST_KEY,
183    DFS_NAMENODE_KEYTAB_FILE_KEY,
184    DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
185    DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
186    DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
187    DFS_NAMENODE_BACKUP_ADDRESS_KEY,
188    DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,
189    DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY,
190    DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY,
191    DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY,
192    DFS_HA_FENCE_METHODS_KEY,
193    DFS_HA_ZKFC_PORT_KEY,
194    DFS_HA_FENCE_METHODS_KEY
195  };
196  
197  /**
198   * @see #NAMENODE_SPECIFIC_KEYS
199   * These keys are specific to a nameservice, but may not be overridden
200   * for a specific namenode.
201   */
202  public static final String[] NAMESERVICE_SPECIFIC_KEYS = {
203    DFS_HA_AUTO_FAILOVER_ENABLED_KEY
204  };
205  
206  private static final String USAGE = "Usage: java NameNode ["
207      + StartupOption.BACKUP.getName() + "] | \n\t["
208      + StartupOption.CHECKPOINT.getName() + "] | \n\t["
209      + StartupOption.FORMAT.getName() + " ["
210      + StartupOption.CLUSTERID.getName() + " cid ] ["
211      + StartupOption.FORCE.getName() + "] ["
212      + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t["
213      + StartupOption.UPGRADE.getName() + 
214        " [" + StartupOption.CLUSTERID.getName() + " cid]" +
215        " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t["
216      + StartupOption.UPGRADEONLY.getName() + 
217        " [" + StartupOption.CLUSTERID.getName() + " cid]" +
218        " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t["
219      + StartupOption.ROLLBACK.getName() + "] | \n\t["
220      + StartupOption.ROLLINGUPGRADE.getName() + " "
221      + RollingUpgradeStartupOption.getAllOptionString() + " ] | \n\t["
222      + StartupOption.FINALIZE.getName() + "] | \n\t["
223      + StartupOption.IMPORT.getName() + "] | \n\t["
224      + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t["
225      + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t["
226      + StartupOption.RECOVER.getName() + " [ "
227      + StartupOption.FORCE.getName() + "] ] | \n\t["
228      + StartupOption.METADATAVERSION.getName() + " ] "
229      + " ]";
230
231  
232  public long getProtocolVersion(String protocol, 
233                                 long clientVersion) throws IOException {
234    if (protocol.equals(ClientProtocol.class.getName())) {
235      return ClientProtocol.versionID; 
236    } else if (protocol.equals(DatanodeProtocol.class.getName())){
237      return DatanodeProtocol.versionID;
238    } else if (protocol.equals(NamenodeProtocol.class.getName())){
239      return NamenodeProtocol.versionID;
240    } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){
241      return RefreshAuthorizationPolicyProtocol.versionID;
242    } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){
243      return RefreshUserMappingsProtocol.versionID;
244    } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) {
245      return RefreshCallQueueProtocol.versionID;
246    } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
247      return GetUserMappingsProtocol.versionID;
248    } else if (protocol.equals(TraceAdminProtocol.class.getName())){
249      return TraceAdminProtocol.versionID;
250    } else {
251      throw new IOException("Unknown protocol to name node: " + protocol);
252    }
253  }
254    
255  public static final int DEFAULT_PORT = 8020;
256  public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
257  public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
258  public static final Log blockStateChangeLog = LogFactory.getLog("BlockStateChange");
259  public static final HAState ACTIVE_STATE = new ActiveState();
260  public static final HAState STANDBY_STATE = new StandbyState();
261  
262  protected FSNamesystem namesystem; 
263  protected final Configuration conf;
264  protected final NamenodeRole role;
265  private volatile HAState state;
266  private final boolean haEnabled;
267  private final HAContext haContext;
268  protected final boolean allowStaleStandbyReads;
269  private AtomicBoolean started = new AtomicBoolean(false); 
270
271  
272  /** httpServer */
273  protected NameNodeHttpServer httpServer;
274  private Thread emptier;
275  /** only used for testing purposes  */
276  protected boolean stopRequested = false;
277  /** Registration information of this name-node  */
278  protected NamenodeRegistration nodeRegistration;
279  /** Activated plug-ins. */
280  private List<ServicePlugin> plugins;
281  
282  private NameNodeRpcServer rpcServer;
283
284  private JvmPauseMonitor pauseMonitor;
285  private ObjectName nameNodeStatusBeanName;
286  SpanReceiverHost spanReceiverHost;
287  /**
288   * The namenode address that clients will use to access this namenode
289   * or the name service. For HA configurations using logical URI, it
290   * will be the logical address.
291   */
292  private String clientNamenodeAddress;
293  
294  /** Format a new filesystem.  Destroys any filesystem that may already
295   * exist at this location.  **/
296  public static void format(Configuration conf) throws IOException {
297    format(conf, true, true);
298  }
299
300  static NameNodeMetrics metrics;
301  private static final StartupProgress startupProgress = new StartupProgress();
302  /** Return the {@link FSNamesystem} object.
303   * @return {@link FSNamesystem} object.
304   */
305  public FSNamesystem getNamesystem() {
306    return namesystem;
307  }
308
309  public NamenodeProtocols getRpcServer() {
310    return rpcServer;
311  }
312  
313  static void initMetrics(Configuration conf, NamenodeRole role) {
314    metrics = NameNodeMetrics.create(conf, role);
315  }
316
317  public static NameNodeMetrics getNameNodeMetrics() {
318    return metrics;
319  }
320
321  /**
322   * Returns object used for reporting namenode startup progress.
323   * 
324   * @return StartupProgress for reporting namenode startup progress
325   */
326  public static StartupProgress getStartupProgress() {
327    return startupProgress;
328  }
329
330  /**
331   * Return the service name of the issued delegation token.
332   *
333   * @return The name service id in HA-mode, or the rpc address in non-HA mode
334   */
335  public String getTokenServiceName() {
336    return getClientNamenodeAddress();
337  }
338
339  /**
340   * Set the namenode address that will be used by clients to access this
341   * namenode or name service. This needs to be called before the config
342   * is overriden.
343   */
344  public void setClientNamenodeAddress(Configuration conf) {
345    String nnAddr = conf.get(FS_DEFAULT_NAME_KEY);
346    if (nnAddr == null) {
347      // default fs is not set.
348      clientNamenodeAddress = null;
349      return;
350    }
351
352    LOG.info(FS_DEFAULT_NAME_KEY + " is " + nnAddr);
353    URI nnUri = URI.create(nnAddr);
354
355    String nnHost = nnUri.getHost();
356    if (nnHost == null) {
357      clientNamenodeAddress = null;
358      return;
359    }
360
361    if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) {
362      // host name is logical
363      clientNamenodeAddress = nnHost;
364    } else if (nnUri.getPort() > 0) {
365      // physical address with a valid port
366      clientNamenodeAddress = nnUri.getAuthority();
367    } else {
368      // the port is missing or 0. Figure out real bind address later.
369      clientNamenodeAddress = null;
370      return;
371    }
372    LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
373        + " this namenode/service.");
374  }
375
376  /**
377   * Get the namenode address to be used by clients.
378   * @return nn address
379   */
380  public String getClientNamenodeAddress() {
381    return clientNamenodeAddress;
382  }
383
384  public static InetSocketAddress getAddress(String address) {
385    return NetUtils.createSocketAddr(address, DEFAULT_PORT);
386  }
387  
388  /**
389   * Set the configuration property for the service rpc address
390   * to address
391   */
392  public static void setServiceAddress(Configuration conf,
393                                           String address) {
394    LOG.info("Setting ADDRESS " + address);
395    conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address);
396  }
397  
398  /**
399   * Fetches the address for services to use when connecting to namenode
400   * based on the value of fallback returns null if the special
401   * address is not specified or returns the default namenode address
402   * to be used by both clients and services.
403   * Services here are datanodes, backup node, any non client connection
404   */
405  public static InetSocketAddress getServiceAddress(Configuration conf,
406                                                        boolean fallback) {
407    String addr = conf.get(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
408    if (addr == null || addr.isEmpty()) {
409      return fallback ? getAddress(conf) : null;
410    }
411    return getAddress(addr);
412  }
413
414  public static InetSocketAddress getAddress(Configuration conf) {
415    URI filesystemURI = FileSystem.getDefaultUri(conf);
416    return getAddress(filesystemURI);
417  }
418
419
420  /**
421   * @return address of file system
422   */
423  public static InetSocketAddress getAddress(URI filesystemURI) {
424    String authority = filesystemURI.getAuthority();
425    if (authority == null) {
426      throw new IllegalArgumentException(String.format(
427          "Invalid URI for NameNode address (check %s): %s has no authority.",
428          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString()));
429    }
430    if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(
431        filesystemURI.getScheme())) {
432      throw new IllegalArgumentException(String.format(
433          "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.",
434          FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(),
435          HdfsConstants.HDFS_URI_SCHEME));
436    }
437    return getAddress(authority);
438  }
439
440  public static URI getUri(InetSocketAddress namenode) {
441    int port = namenode.getPort();
442    String portString = port == DEFAULT_PORT ? "" : (":"+port);
443    return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 
444        + namenode.getHostName()+portString);
445  }
446
447  //
448  // Common NameNode methods implementation for the active name-node role.
449  //
450  public NamenodeRole getRole() {
451    return role;
452  }
453
454  boolean isRole(NamenodeRole that) {
455    return role.equals(that);
456  }
457
458  /**
459   * Given a configuration get the address of the service rpc server
460   * If the service rpc is not configured returns null
461   */
462  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
463    return NameNode.getServiceAddress(conf, false);
464  }
465
466  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
467    return getAddress(conf);
468  }
469  
470  /** Given a configuration get the bind host of the service rpc server
471   *  If the bind host is not configured returns null.
472   */
473  protected String getServiceRpcServerBindHost(Configuration conf) {
474    String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY);
475    if (addr == null || addr.isEmpty()) {
476      return null;
477    }
478    return addr;
479  }
480
481  /** Given a configuration get the bind host of the client rpc server
482   *  If the bind host is not configured returns null.
483   */
484  protected String getRpcServerBindHost(Configuration conf) {
485    String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY);
486    if (addr == null || addr.isEmpty()) {
487      return null;
488    }
489    return addr;
490  }
491   
492  /**
493   * Modifies the configuration passed to contain the service rpc address setting
494   */
495  protected void setRpcServiceServerAddress(Configuration conf,
496      InetSocketAddress serviceRPCAddress) {
497    setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress));
498  }
499
500  protected void setRpcServerAddress(Configuration conf,
501      InetSocketAddress rpcAddress) {
502    FileSystem.setDefaultUri(conf, getUri(rpcAddress));
503  }
504
505  protected InetSocketAddress getHttpServerAddress(Configuration conf) {
506    return getHttpAddress(conf);
507  }
508
509  /**
510   * HTTP server address for binding the endpoint. This method is
511   * for use by the NameNode and its derivatives. It may return
512   * a different address than the one that should be used by clients to
513   * connect to the NameNode. See
514   * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY}
515   *
516   * @param conf
517   * @return
518   */
519  protected InetSocketAddress getHttpServerBindAddress(Configuration conf) {
520    InetSocketAddress bindAddress = getHttpServerAddress(conf);
521
522    // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the
523    // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY.
524    final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY);
525    if (bindHost != null && !bindHost.isEmpty()) {
526      bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort());
527    }
528
529    return bindAddress;
530  }
531
532  /** @return the NameNode HTTP address. */
533  public static InetSocketAddress getHttpAddress(Configuration conf) {
534    return  NetUtils.createSocketAddr(
535        conf.get(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT));
536  }
537
538  protected void loadNamesystem(Configuration conf) throws IOException {
539    this.namesystem = FSNamesystem.loadFromDisk(conf);
540  }
541
542  NamenodeRegistration getRegistration() {
543    return nodeRegistration;
544  }
545
546  NamenodeRegistration setRegistration() {
547    nodeRegistration = new NamenodeRegistration(
548        NetUtils.getHostPortString(rpcServer.getRpcAddress()),
549        NetUtils.getHostPortString(getHttpAddress()),
550        getFSImage().getStorage(), getRole());
551    return nodeRegistration;
552  }
553
554  /* optimize ugi lookup for RPC operations to avoid a trip through
555   * UGI.getCurrentUser which is synch'ed
556   */
557  public static UserGroupInformation getRemoteUser() throws IOException {
558    UserGroupInformation ugi = Server.getRemoteUser();
559    return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
560  }
561
562
563  /**
564   * Login as the configured user for the NameNode.
565   */
566  void loginAsNameNodeUser(Configuration conf) throws IOException {
567    InetSocketAddress socAddr = getRpcServerAddress(conf);
568    SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
569        DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
570  }
571  
572  /**
573   * Initialize name-node.
574   * 
575   * @param conf the configuration
576   */
577  protected void initialize(Configuration conf) throws IOException {
578    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
579      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
580      if (intervals != null) {
581        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
582          intervals);
583      }
584    }
585
586    UserGroupInformation.setConfiguration(conf);
587    loginAsNameNodeUser(conf);
588
589    NameNode.initMetrics(conf, this.getRole());
590    StartupProgressMetrics.register(startupProgress);
591
592    if (NamenodeRole.NAMENODE == role) {
593      startHttpServer(conf);
594    }
595
596    this.spanReceiverHost = SpanReceiverHost.getInstance(conf);
597
598    loadNamesystem(conf);
599
600    rpcServer = createRpcServer(conf);
601    if (clientNamenodeAddress == null) {
602      // This is expected for MiniDFSCluster. Set it now using 
603      // the RPC server's bind address.
604      clientNamenodeAddress = 
605          NetUtils.getHostPortString(rpcServer.getRpcAddress());
606      LOG.info("Clients are to use " + clientNamenodeAddress + " to access"
607          + " this namenode/service.");
608    }
609    if (NamenodeRole.NAMENODE == role) {
610      httpServer.setNameNodeAddress(getNameNodeAddress());
611      httpServer.setFSImage(getFSImage());
612    }
613    
614    pauseMonitor = new JvmPauseMonitor(conf);
615    pauseMonitor.start();
616    metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
617    
618    startCommonServices(conf);
619  }
620  
621  /**
622   * Create the RPC server implementation. Used as an extension point for the
623   * BackupNode.
624   */
625  protected NameNodeRpcServer createRpcServer(Configuration conf)
626      throws IOException {
627    return new NameNodeRpcServer(conf, this);
628  }
629
630  /** Start the services common to active and standby states */
631  private void startCommonServices(Configuration conf) throws IOException {
632    namesystem.startCommonServices(conf, haContext);
633    registerNNSMXBean();
634    if (NamenodeRole.NAMENODE != role) {
635      startHttpServer(conf);
636      httpServer.setNameNodeAddress(getNameNodeAddress());
637      httpServer.setFSImage(getFSImage());
638    }
639    rpcServer.start();
640    plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
641        ServicePlugin.class);
642    for (ServicePlugin p: plugins) {
643      try {
644        p.start(this);
645      } catch (Throwable t) {
646        LOG.warn("ServicePlugin " + p + " could not be started", t);
647      }
648    }
649    LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress());
650    if (rpcServer.getServiceRpcAddress() != null) {
651      LOG.info(getRole() + " service RPC up at: "
652          + rpcServer.getServiceRpcAddress());
653    }
654  }
655  
656  private void stopCommonServices() {
657    if(rpcServer != null) rpcServer.stop();
658    if(namesystem != null) namesystem.close();
659    if (pauseMonitor != null) pauseMonitor.stop();
660    if (plugins != null) {
661      for (ServicePlugin p : plugins) {
662        try {
663          p.stop();
664        } catch (Throwable t) {
665          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
666        }
667      }
668    }   
669    stopHttpServer();
670  }
671  
672  private void startTrashEmptier(final Configuration conf) throws IOException {
673    long trashInterval =
674        conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
675    if (trashInterval == 0) {
676      return;
677    } else if (trashInterval < 0) {
678      throw new IOException("Cannot start trash emptier with negative interval."
679          + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
680    }
681    
682    // This may be called from the transitionToActive code path, in which
683    // case the current user is the administrator, not the NN. The trash
684    // emptier needs to run as the NN. See HDFS-3972.
685    FileSystem fs = SecurityUtil.doAsLoginUser(
686        new PrivilegedExceptionAction<FileSystem>() {
687          @Override
688          public FileSystem run() throws IOException {
689            return FileSystem.get(conf);
690          }
691        });
692    this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
693    this.emptier.setDaemon(true);
694    this.emptier.start();
695  }
696  
697  private void stopTrashEmptier() {
698    if (this.emptier != null) {
699      emptier.interrupt();
700      emptier = null;
701    }
702  }
703  
704  private void startHttpServer(final Configuration conf) throws IOException {
705    httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf));
706    httpServer.start();
707    httpServer.setStartupProgress(startupProgress);
708  }
709  
710  private void stopHttpServer() {
711    try {
712      if (httpServer != null) httpServer.stop();
713    } catch (Exception e) {
714      LOG.error("Exception while stopping httpserver", e);
715    }
716  }
717
718  /**
719   * Start NameNode.
720   * <p>
721   * The name-node can be started with one of the following startup options:
722   * <ul> 
723   * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li>
724   * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li>
725   * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li>
726   * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li>
727   * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster  
728   * <li>{@link StartupOption#UPGRADEONLY UPGRADEONLY} - upgrade the cluster  
729   * upgrade and create a snapshot of the current file system state</li> 
730   * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node
731   * metadata</li>
732   * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the  
733   *            cluster back to the previous state</li>
734   * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 
735   *            previous upgrade</li>
736   * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li>
737   * </ul>
738   * The option is passed via configuration field: 
739   * <tt>dfs.namenode.startup</tt>
740   * 
741   * The conf will be modified to reflect the actual ports on which 
742   * the NameNode is up and running if the user passes the port as
743   * <code>zero</code> in the conf.
744   * 
745   * @param conf  confirguration
746   * @throws IOException
747   */
748  public NameNode(Configuration conf) throws IOException {
749    this(conf, NamenodeRole.NAMENODE);
750  }
751
752  protected NameNode(Configuration conf, NamenodeRole role) 
753      throws IOException { 
754    this.conf = conf;
755    this.role = role;
756    setClientNamenodeAddress(conf);
757    String nsId = getNameServiceId(conf);
758    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
759    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
760    state = createHAState(getStartupOption(conf));
761    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
762    this.haContext = createHAContext();
763    try {
764      initializeGenericKeys(conf, nsId, namenodeId);
765      initialize(conf);
766      try {
767        haContext.writeLock();
768        state.prepareToEnterState(haContext);
769        state.enterState(haContext);
770      } finally {
771        haContext.writeUnlock();
772      }
773    } catch (IOException e) {
774      this.stop();
775      throw e;
776    } catch (HadoopIllegalArgumentException e) {
777      this.stop();
778      throw e;
779    }
780    this.started.set(true);
781  }
782
783  protected HAState createHAState(StartupOption startOpt) {
784    if (!haEnabled || startOpt == StartupOption.UPGRADE 
785        || startOpt == StartupOption.UPGRADEONLY) {
786      return ACTIVE_STATE;
787    } else {
788      return STANDBY_STATE;
789    }
790  }
791
792  protected HAContext createHAContext() {
793    return new NameNodeHAContext();
794  }
795
796  /**
797   * Wait for service to finish.
798   * (Normally, it runs forever.)
799   */
800  public void join() {
801    try {
802      rpcServer.join();
803    } catch (InterruptedException ie) {
804      LOG.info("Caught interrupted exception ", ie);
805    }
806  }
807
808  /**
809   * Stop all NameNode threads and wait for all to finish.
810   */
811  public void stop() {
812    synchronized(this) {
813      if (stopRequested)
814        return;
815      stopRequested = true;
816    }
817    try {
818      if (state != null) {
819        state.exitState(haContext);
820      }
821    } catch (ServiceFailedException e) {
822      LOG.warn("Encountered exception while exiting state ", e);
823    } finally {
824      stopCommonServices();
825      if (metrics != null) {
826        metrics.shutdown();
827      }
828      if (namesystem != null) {
829        namesystem.shutdown();
830      }
831      if (nameNodeStatusBeanName != null) {
832        MBeans.unregister(nameNodeStatusBeanName);
833        nameNodeStatusBeanName = null;
834      }
835      if (this.spanReceiverHost != null) {
836        this.spanReceiverHost.closeReceivers();
837      }
838    }
839  }
840
841  synchronized boolean isStopRequested() {
842    return stopRequested;
843  }
844
845  /**
846   * Is the cluster currently in safe mode?
847   */
848  public boolean isInSafeMode() {
849    return namesystem.isInSafeMode();
850  }
851    
852  /** get FSImage */
853  @VisibleForTesting
854  public FSImage getFSImage() {
855    return namesystem.getFSImage();
856  }
857
858  /**
859   * @return NameNode RPC address
860   */
861  public InetSocketAddress getNameNodeAddress() {
862    return rpcServer.getRpcAddress();
863  }
864
865  /**
866   * @return NameNode RPC address in "host:port" string form
867   */
868  public String getNameNodeAddressHostPortString() {
869    return NetUtils.getHostPortString(rpcServer.getRpcAddress());
870  }
871
872  /**
873   * @return NameNode service RPC address if configured, the
874   *    NameNode RPC address otherwise
875   */
876  public InetSocketAddress getServiceRpcAddress() {
877    final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress();
878    return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr;
879  }
880
881  /**
882   * @return NameNode HTTP address, used by the Web UI, image transfer,
883   *    and HTTP-based file system clients like Hftp and WebHDFS
884   */
885  public InetSocketAddress getHttpAddress() {
886    return httpServer.getHttpAddress();
887  }
888
889  /**
890   * @return NameNode HTTPS address, used by the Web UI, image transfer,
891   *    and HTTP-based file system clients like Hftp and WebHDFS
892   */
893  public InetSocketAddress getHttpsAddress() {
894    return httpServer.getHttpsAddress();
895  }
896
897  /**
898   * Verify that configured directories exist, then
899   * Interactively confirm that formatting is desired 
900   * for each existing directory and format them.
901   * 
902   * @param conf configuration to use
903   * @param force if true, format regardless of whether dirs exist
904   * @return true if formatting was aborted, false otherwise
905   * @throws IOException
906   */
907  private static boolean format(Configuration conf, boolean force,
908      boolean isInteractive) throws IOException {
909    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
910    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
911    initializeGenericKeys(conf, nsId, namenodeId);
912    checkAllowFormat(conf);
913
914    if (UserGroupInformation.isSecurityEnabled()) {
915      InetSocketAddress socAddr = getAddress(conf);
916      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
917          DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
918    }
919    
920    Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf);
921    List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf);
922    List<URI> dirsToPrompt = new ArrayList<URI>();
923    dirsToPrompt.addAll(nameDirsToFormat);
924    dirsToPrompt.addAll(sharedDirs);
925    List<URI> editDirsToFormat = 
926                 FSNamesystem.getNamespaceEditsDirs(conf);
927
928    // if clusterID is not provided - see if you can find the current one
929    String clusterId = StartupOption.FORMAT.getClusterId();
930    if(clusterId == null || clusterId.equals("")) {
931      //Generate a new cluster id
932      clusterId = NNStorage.newClusterID();
933    }
934    System.out.println("Formatting using clusterid: " + clusterId);
935    
936    FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
937    try {
938      FSNamesystem fsn = new FSNamesystem(conf, fsImage);
939      fsImage.getEditLog().initJournalsForWrite();
940
941      if (!fsImage.confirmFormat(force, isInteractive)) {
942        return true; // aborted
943      }
944
945      fsImage.format(fsn, clusterId);
946    } catch (IOException ioe) {
947      LOG.warn("Encountered exception during format: ", ioe);
948      fsImage.close();
949      throw ioe;
950    }
951    return false;
952  }
953
954  public static void checkAllowFormat(Configuration conf) throws IOException {
955    if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
956        DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
957      throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
958                + " is set to false for this filesystem, so it "
959                + "cannot be formatted. You will need to set "
960                + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
961                + "to true in order to format this filesystem");
962    }
963  }
964  
965  @VisibleForTesting
966  public static boolean initializeSharedEdits(Configuration conf) throws IOException {
967    return initializeSharedEdits(conf, true);
968  }
969  
970  @VisibleForTesting
971  public static boolean initializeSharedEdits(Configuration conf,
972      boolean force) throws IOException {
973    return initializeSharedEdits(conf, force, false);
974  }
975
976  /**
977   * Clone the supplied configuration but remove the shared edits dirs.
978   *
979   * @param conf Supplies the original configuration.
980   * @return Cloned configuration without the shared edit dirs.
981   * @throws IOException on failure to generate the configuration.
982   */
983  private static Configuration getConfigurationWithoutSharedEdits(
984      Configuration conf)
985      throws IOException {
986    List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false);
987    String editsDirsString = Joiner.on(",").join(editsDirs);
988
989    Configuration confWithoutShared = new Configuration(conf);
990    confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
991    confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
992        editsDirsString);
993    return confWithoutShared;
994  }
995
996  /**
997   * Format a new shared edits dir and copy in enough edit log segments so that
998   * the standby NN can start up.
999   * 
1000   * @param conf configuration
1001   * @param force format regardless of whether or not the shared edits dir exists
1002   * @param interactive prompt the user when a dir exists
1003   * @return true if the command aborts, false otherwise
1004   */
1005  private static boolean initializeSharedEdits(Configuration conf,
1006      boolean force, boolean interactive) throws IOException {
1007    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1008    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1009    initializeGenericKeys(conf, nsId, namenodeId);
1010    
1011    if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) {
1012      LOG.fatal("No shared edits directory configured for namespace " +
1013          nsId + " namenode " + namenodeId);
1014      return false;
1015    }
1016
1017    if (UserGroupInformation.isSecurityEnabled()) {
1018      InetSocketAddress socAddr = getAddress(conf);
1019      SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY,
1020          DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName());
1021    }
1022
1023    NNStorage existingStorage = null;
1024    FSImage sharedEditsImage = null;
1025    try {
1026      FSNamesystem fsns =
1027          FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf));
1028      
1029      existingStorage = fsns.getFSImage().getStorage();
1030      NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
1031      
1032      List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
1033      
1034      sharedEditsImage = new FSImage(conf,
1035          Lists.<URI>newArrayList(),
1036          sharedEditsDirs);
1037      sharedEditsImage.getEditLog().initJournalsForWrite();
1038      
1039      if (!sharedEditsImage.confirmFormat(force, interactive)) {
1040        return true; // abort
1041      }
1042      
1043      NNStorage newSharedStorage = sharedEditsImage.getStorage();
1044      // Call Storage.format instead of FSImage.format here, since we don't
1045      // actually want to save a checkpoint - just prime the dirs with
1046      // the existing namespace info
1047      newSharedStorage.format(nsInfo);
1048      sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
1049
1050      // Need to make sure the edit log segments are in good shape to initialize
1051      // the shared edits dir.
1052      fsns.getFSImage().getEditLog().close();
1053      fsns.getFSImage().getEditLog().initJournalsForWrite();
1054      fsns.getFSImage().getEditLog().recoverUnclosedStreams();
1055
1056      copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage,
1057          conf);
1058    } catch (IOException ioe) {
1059      LOG.error("Could not initialize shared edits dir", ioe);
1060      return true; // aborted
1061    } finally {
1062      if (sharedEditsImage != null) {
1063        try {
1064          sharedEditsImage.close();
1065        }  catch (IOException ioe) {
1066          LOG.warn("Could not close sharedEditsImage", ioe);
1067        }
1068      }
1069      // Have to unlock storage explicitly for the case when we're running in a
1070      // unit test, which runs in the same JVM as NNs.
1071      if (existingStorage != null) {
1072        try {
1073          existingStorage.unlockAll();
1074        } catch (IOException ioe) {
1075          LOG.warn("Could not unlock storage directories", ioe);
1076          return true; // aborted
1077        }
1078      }
1079    }
1080    return false; // did not abort
1081  }
1082
1083  private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns,
1084      Collection<URI> sharedEditsDirs, NNStorage newSharedStorage,
1085      Configuration conf) throws IOException {
1086    Preconditions.checkArgument(!sharedEditsDirs.isEmpty(),
1087        "No shared edits specified");
1088    // Copy edit log segments into the new shared edits dir.
1089    List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs);
1090    FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage,
1091        sharedEditsUris);
1092    newSharedEditLog.initJournalsForWrite();
1093    newSharedEditLog.recoverUnclosedStreams();
1094    
1095    FSEditLog sourceEditLog = fsns.getFSImage().editLog;
1096    
1097    long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId();
1098    
1099    Collection<EditLogInputStream> streams = null;
1100    try {
1101      streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0);
1102
1103      // Set the nextTxid to the CheckpointTxId+1
1104      newSharedEditLog.setNextTxId(fromTxId + 1);
1105
1106      // Copy all edits after last CheckpointTxId to shared edits dir
1107      for (EditLogInputStream stream : streams) {
1108        LOG.debug("Beginning to copy stream " + stream + " to shared edits");
1109        FSEditLogOp op;
1110        boolean segmentOpen = false;
1111        while ((op = stream.readOp()) != null) {
1112          if (LOG.isTraceEnabled()) {
1113            LOG.trace("copying op: " + op);
1114          }
1115          if (!segmentOpen) {
1116            newSharedEditLog.startLogSegment(op.txid, false);
1117            segmentOpen = true;
1118          }
1119
1120          newSharedEditLog.logEdit(op);
1121
1122          if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) {
1123            newSharedEditLog.logSync();
1124            newSharedEditLog.endCurrentLogSegment(false);
1125            LOG.debug("ending log segment because of END_LOG_SEGMENT op in "
1126                + stream);
1127            segmentOpen = false;
1128          }
1129        }
1130
1131        if (segmentOpen) {
1132          LOG.debug("ending log segment because of end of stream in " + stream);
1133          newSharedEditLog.logSync();
1134          newSharedEditLog.endCurrentLogSegment(false);
1135          segmentOpen = false;
1136        }
1137      }
1138    } finally {
1139      if (streams != null) {
1140        FSEditLog.closeAllStreams(streams);
1141      }
1142    }
1143  }
1144  
1145  @VisibleForTesting
1146  public static boolean doRollback(Configuration conf,
1147      boolean isConfirmationNeeded) throws IOException {
1148    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1149    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1150    initializeGenericKeys(conf, nsId, namenodeId);
1151
1152    FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
1153    System.err.print(
1154        "\"rollBack\" will remove the current state of the file system,\n"
1155        + "returning you to the state prior to initiating your recent.\n"
1156        + "upgrade. This action is permanent and cannot be undone. If you\n"
1157        + "are performing a rollback in an HA environment, you should be\n"
1158        + "certain that no NameNode process is running on any host.");
1159    if (isConfirmationNeeded) {
1160      if (!confirmPrompt("Roll back file system state?")) {
1161        System.err.println("Rollback aborted.");
1162        return true;
1163      }
1164    }
1165    nsys.getFSImage().doRollback(nsys);
1166    return false;
1167  }
1168
1169  private static void printUsage(PrintStream out) {
1170    out.println(USAGE + "\n");
1171  }
1172
1173  @VisibleForTesting
1174  static StartupOption parseArguments(String args[]) {
1175    int argsLen = (args == null) ? 0 : args.length;
1176    StartupOption startOpt = StartupOption.REGULAR;
1177    for(int i=0; i < argsLen; i++) {
1178      String cmd = args[i];
1179      if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) {
1180        startOpt = StartupOption.FORMAT;
1181        for (i = i + 1; i < argsLen; i++) {
1182          if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1183            i++;
1184            if (i >= argsLen) {
1185              // if no cluster id specified, return null
1186              LOG.fatal("Must specify a valid cluster ID after the "
1187                  + StartupOption.CLUSTERID.getName() + " flag");
1188              return null;
1189            }
1190            String clusterId = args[i];
1191            // Make sure an id is specified and not another flag
1192            if (clusterId.isEmpty() ||
1193                clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) ||
1194                clusterId.equalsIgnoreCase(
1195                    StartupOption.NONINTERACTIVE.getName())) {
1196              LOG.fatal("Must specify a valid cluster ID after the "
1197                  + StartupOption.CLUSTERID.getName() + " flag");
1198              return null;
1199            }
1200            startOpt.setClusterId(clusterId);
1201          }
1202
1203          if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) {
1204            startOpt.setForceFormat(true);
1205          }
1206
1207          if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) {
1208            startOpt.setInteractiveFormat(false);
1209          }
1210        }
1211      } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) {
1212        startOpt = StartupOption.GENCLUSTERID;
1213      } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
1214        startOpt = StartupOption.REGULAR;
1215      } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) {
1216        startOpt = StartupOption.BACKUP;
1217      } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) {
1218        startOpt = StartupOption.CHECKPOINT;
1219      } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)
1220          || StartupOption.UPGRADEONLY.getName().equalsIgnoreCase(cmd)) {
1221        startOpt = StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) ? 
1222            StartupOption.UPGRADE : StartupOption.UPGRADEONLY;
1223        /* Can be followed by CLUSTERID with a required parameter or
1224         * RENAMERESERVED with an optional parameter
1225         */
1226        while (i + 1 < argsLen) {
1227          String flag = args[i + 1];
1228          if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
1229            if (i + 2 < argsLen) {
1230              i += 2;
1231              startOpt.setClusterId(args[i]);
1232            } else {
1233              LOG.fatal("Must specify a valid cluster ID after the "
1234                  + StartupOption.CLUSTERID.getName() + " flag");
1235              return null;
1236            }
1237          } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
1238              .getName())) {
1239            if (i + 2 < argsLen) {
1240              FSImageFormat.setRenameReservedPairs(args[i + 2]);
1241              i += 2;
1242            } else {
1243              FSImageFormat.useDefaultRenameReservedPairs();
1244              i += 1;
1245            }
1246          } else {
1247            LOG.fatal("Unknown upgrade flag " + flag);
1248            return null;
1249          }
1250        }
1251      } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) {
1252        startOpt = StartupOption.ROLLINGUPGRADE;
1253        ++i;
1254        if (i >= argsLen) {
1255          LOG.fatal("Must specify a rolling upgrade startup option "
1256              + RollingUpgradeStartupOption.getAllOptionString());
1257          return null;
1258        }
1259        startOpt.setRollingUpgradeStartupOption(args[i]);
1260      } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
1261        startOpt = StartupOption.ROLLBACK;
1262      } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) {
1263        startOpt = StartupOption.FINALIZE;
1264      } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) {
1265        startOpt = StartupOption.IMPORT;
1266      } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
1267        startOpt = StartupOption.BOOTSTRAPSTANDBY;
1268        return startOpt;
1269      } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
1270        startOpt = StartupOption.INITIALIZESHAREDEDITS;
1271        for (i = i + 1 ; i < argsLen; i++) {
1272          if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) {
1273            startOpt.setInteractiveFormat(false);
1274          } else if (StartupOption.FORCE.getName().equals(args[i])) {
1275            startOpt.setForceFormat(true);
1276          } else {
1277            LOG.fatal("Invalid argument: " + args[i]);
1278            return null;
1279          }
1280        }
1281        return startOpt;
1282      } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) {
1283        if (startOpt != StartupOption.REGULAR) {
1284          throw new RuntimeException("Can't combine -recover with " +
1285              "other startup options.");
1286        }
1287        startOpt = StartupOption.RECOVER;
1288        while (++i < argsLen) {
1289          if (args[i].equalsIgnoreCase(
1290                StartupOption.FORCE.getName())) {
1291            startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE);
1292          } else {
1293            throw new RuntimeException("Error parsing recovery options: " + 
1294              "can't understand option \"" + args[i] + "\"");
1295          }
1296        }
1297      } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) {
1298        startOpt = StartupOption.METADATAVERSION;
1299      } else {
1300        return null;
1301      }
1302    }
1303    return startOpt;
1304  }
1305
1306  private static void setStartupOption(Configuration conf, StartupOption opt) {
1307    conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name());
1308  }
1309
1310  static StartupOption getStartupOption(Configuration conf) {
1311    return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY,
1312                                          StartupOption.REGULAR.toString()));
1313  }
1314
1315  private static void doRecovery(StartupOption startOpt, Configuration conf)
1316      throws IOException {
1317    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1318    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1319    initializeGenericKeys(conf, nsId, namenodeId);
1320    if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
1321      if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
1322          "This mode is intended to recover lost metadata on a corrupt " +
1323          "filesystem.  Metadata recovery mode often permanently deletes " +
1324          "data from your HDFS filesystem.  Please back up your edit log " +
1325          "and fsimage before trying this!\n\n" +
1326          "Are you ready to proceed? (Y/N)\n")) {
1327        System.err.println("Recovery aborted at user request.\n");
1328        return;
1329      }
1330    }
1331    MetaRecoveryContext.LOG.info("starting recovery...");
1332    UserGroupInformation.setConfiguration(conf);
1333    NameNode.initMetrics(conf, startOpt.toNodeRole());
1334    FSNamesystem fsn = null;
1335    try {
1336      fsn = FSNamesystem.loadFromDisk(conf);
1337      fsn.getFSImage().saveNamespace(fsn);
1338      MetaRecoveryContext.LOG.info("RECOVERY COMPLETE");
1339    } catch (IOException e) {
1340      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1341      throw e;
1342    } catch (RuntimeException e) {
1343      MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e);
1344      throw e;
1345    } finally {
1346      if (fsn != null)
1347        fsn.close();
1348    }
1349  }
1350
1351  /**
1352   * Verify that configured directories exist, then print the metadata versions
1353   * of the software and the image.
1354   *
1355   * @param conf configuration to use
1356   * @throws IOException
1357   */
1358  private static boolean printMetadataVersion(Configuration conf)
1359    throws IOException {
1360    final String nsId = DFSUtil.getNamenodeNameServiceId(conf);
1361    final String namenodeId = HAUtil.getNameNodeId(conf, nsId);
1362    NameNode.initializeGenericKeys(conf, nsId, namenodeId);
1363    final FSImage fsImage = new FSImage(conf);
1364    final FSNamesystem fs = new FSNamesystem(conf, fsImage, false);
1365    return fsImage.recoverTransitionRead(
1366      StartupOption.METADATAVERSION, fs, null);
1367  }
1368
1369  public static NameNode createNameNode(String argv[], Configuration conf)
1370      throws IOException {
1371    LOG.info("createNameNode " + Arrays.asList(argv));
1372    if (conf == null)
1373      conf = new HdfsConfiguration();
1374    // Parse out some generic args into Configuration.
1375    GenericOptionsParser hParser = new GenericOptionsParser(conf, argv);
1376    argv = hParser.getRemainingArgs();
1377    // Parse the rest, NN specific args.
1378    StartupOption startOpt = parseArguments(argv);
1379    if (startOpt == null) {
1380      printUsage(System.err);
1381      return null;
1382    }
1383    setStartupOption(conf, startOpt);
1384
1385    switch (startOpt) {
1386      case FORMAT: {
1387        boolean aborted = format(conf, startOpt.getForceFormat(),
1388            startOpt.getInteractiveFormat());
1389        terminate(aborted ? 1 : 0);
1390        return null; // avoid javac warning
1391      }
1392      case GENCLUSTERID: {
1393        System.err.println("Generating new cluster id:");
1394        System.out.println(NNStorage.newClusterID());
1395        terminate(0);
1396        return null;
1397      }
1398      case FINALIZE: {
1399        System.err.println("Use of the argument '" + StartupOption.FINALIZE +
1400            "' is no longer supported. To finalize an upgrade, start the NN " +
1401            " and then run `hdfs dfsadmin -finalizeUpgrade'");
1402        terminate(1);
1403        return null; // avoid javac warning
1404      }
1405      case ROLLBACK: {
1406        boolean aborted = doRollback(conf, true);
1407        terminate(aborted ? 1 : 0);
1408        return null; // avoid warning
1409      }
1410      case BOOTSTRAPSTANDBY: {
1411        String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
1412        int rc = BootstrapStandby.run(toolArgs, conf);
1413        terminate(rc);
1414        return null; // avoid warning
1415      }
1416      case INITIALIZESHAREDEDITS: {
1417        boolean aborted = initializeSharedEdits(conf,
1418            startOpt.getForceFormat(),
1419            startOpt.getInteractiveFormat());
1420        terminate(aborted ? 1 : 0);
1421        return null; // avoid warning
1422      }
1423      case BACKUP:
1424      case CHECKPOINT: {
1425        NamenodeRole role = startOpt.toNodeRole();
1426        DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
1427        return new BackupNode(conf, role);
1428      }
1429      case RECOVER: {
1430        NameNode.doRecovery(startOpt, conf);
1431        return null;
1432      }
1433      case METADATAVERSION: {
1434        printMetadataVersion(conf);
1435        terminate(0);
1436        return null; // avoid javac warning
1437      }
1438      case UPGRADEONLY: {
1439        DefaultMetricsSystem.initialize("NameNode");
1440        new NameNode(conf);
1441        terminate(0);
1442        return null;
1443      }
1444      default: {
1445        DefaultMetricsSystem.initialize("NameNode");
1446        return new NameNode(conf);
1447      }
1448    }
1449  }
1450
1451  /**
1452   * In federation configuration is set for a set of
1453   * namenode and secondary namenode/backup/checkpointer, which are
1454   * grouped under a logical nameservice ID. The configuration keys specific 
1455   * to them have suffix set to configured nameserviceId.
1456   * 
1457   * This method copies the value from specific key of format key.nameserviceId
1458   * to key, to set up the generic configuration. Once this is done, only
1459   * generic version of the configuration is read in rest of the code, for
1460   * backward compatibility and simpler code changes.
1461   * 
1462   * @param conf
1463   *          Configuration object to lookup specific key and to set the value
1464   *          to the key passed. Note the conf object is modified
1465   * @param nameserviceId name service Id (to distinguish federated NNs)
1466   * @param namenodeId the namenode ID (to distinguish HA NNs)
1467   * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
1468   */
1469  public static void initializeGenericKeys(Configuration conf,
1470      String nameserviceId, String namenodeId) {
1471    if ((nameserviceId != null && !nameserviceId.isEmpty()) || 
1472        (namenodeId != null && !namenodeId.isEmpty())) {
1473      if (nameserviceId != null) {
1474        conf.set(DFS_NAMESERVICE_ID, nameserviceId);
1475      }
1476      if (namenodeId != null) {
1477        conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
1478      }
1479      
1480      DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
1481          NAMENODE_SPECIFIC_KEYS);
1482      DFSUtil.setGenericConf(conf, nameserviceId, null,
1483          NAMESERVICE_SPECIFIC_KEYS);
1484    }
1485    
1486    // If the RPC address is set use it to (re-)configure the default FS
1487    if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
1488      URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
1489          + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
1490      conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString());
1491      LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString());
1492    }
1493  }
1494    
1495  /** 
1496   * Get the name service Id for the node
1497   * @return name service Id or null if federation is not configured
1498   */
1499  protected String getNameServiceId(Configuration conf) {
1500    return DFSUtil.getNamenodeNameServiceId(conf);
1501  }
1502  
1503  /**
1504   */
1505  public static void main(String argv[]) throws Exception {
1506    if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) {
1507      System.exit(0);
1508    }
1509
1510    try {
1511      StringUtils.startupShutdownMessage(NameNode.class, argv, LOG);
1512      NameNode namenode = createNameNode(argv, null);
1513      if (namenode != null) {
1514        namenode.join();
1515      }
1516    } catch (Throwable e) {
1517      LOG.fatal("Failed to start namenode.", e);
1518      terminate(1, e);
1519    }
1520  }
1521
1522  synchronized void monitorHealth() 
1523      throws HealthCheckFailedException, AccessControlException {
1524    namesystem.checkSuperuserPrivilege();
1525    if (!haEnabled) {
1526      return; // no-op, if HA is not enabled
1527    }
1528    getNamesystem().checkAvailableResources();
1529    if (!getNamesystem().nameNodeHasResourcesAvailable()) {
1530      throw new HealthCheckFailedException(
1531          "The NameNode has no resources available");
1532    }
1533  }
1534  
1535  synchronized void transitionToActive() 
1536      throws ServiceFailedException, AccessControlException {
1537    namesystem.checkSuperuserPrivilege();
1538    if (!haEnabled) {
1539      throw new ServiceFailedException("HA for namenode is not enabled");
1540    }
1541    state.setState(haContext, ACTIVE_STATE);
1542  }
1543  
1544  synchronized void transitionToStandby() 
1545      throws ServiceFailedException, AccessControlException {
1546    namesystem.checkSuperuserPrivilege();
1547    if (!haEnabled) {
1548      throw new ServiceFailedException("HA for namenode is not enabled");
1549    }
1550    state.setState(haContext, STANDBY_STATE);
1551  }
1552
1553  synchronized HAServiceStatus getServiceStatus()
1554      throws ServiceFailedException, AccessControlException {
1555    namesystem.checkSuperuserPrivilege();
1556    if (!haEnabled) {
1557      throw new ServiceFailedException("HA for namenode is not enabled");
1558    }
1559    if (state == null) {
1560      return new HAServiceStatus(HAServiceState.INITIALIZING);
1561    }
1562    HAServiceState retState = state.getServiceState();
1563    HAServiceStatus ret = new HAServiceStatus(retState);
1564    if (retState == HAServiceState.STANDBY) {
1565      String safemodeTip = namesystem.getSafeModeTip();
1566      if (!safemodeTip.isEmpty()) {
1567        ret.setNotReadyToBecomeActive(
1568            "The NameNode is in safemode. " +
1569            safemodeTip);
1570      } else {
1571        ret.setReadyToBecomeActive();
1572      }
1573    } else if (retState == HAServiceState.ACTIVE) {
1574      ret.setReadyToBecomeActive();
1575    } else {
1576      ret.setNotReadyToBecomeActive("State is " + state);
1577    }
1578    return ret;
1579  }
1580
1581  synchronized HAServiceState getServiceState() {
1582    if (state == null) {
1583      return HAServiceState.INITIALIZING;
1584    }
1585    return state.getServiceState();
1586  }
1587
1588  /**
1589   * Register NameNodeStatusMXBean
1590   */
1591  private void registerNNSMXBean() {
1592    nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this);
1593  }
1594
1595  @Override // NameNodeStatusMXBean
1596  public String getNNRole() {
1597    String roleStr = "";
1598    NamenodeRole role = getRole();
1599    if (null != role) {
1600      roleStr = role.toString();
1601    }
1602    return roleStr;
1603  }
1604
1605  @Override // NameNodeStatusMXBean
1606  public String getState() {
1607    String servStateStr = "";
1608    HAServiceState servState = getServiceState();
1609    if (null != servState) {
1610      servStateStr = servState.toString();
1611    }
1612    return servStateStr;
1613  }
1614
1615  @Override // NameNodeStatusMXBean
1616  public String getHostAndPort() {
1617    return getNameNodeAddressHostPortString();
1618  }
1619
1620  @Override // NameNodeStatusMXBean
1621  public boolean isSecurityEnabled() {
1622    return UserGroupInformation.isSecurityEnabled();
1623  }
1624
1625  /**
1626   * Shutdown the NN immediately in an ungraceful way. Used when it would be
1627   * unsafe for the NN to continue operating, e.g. during a failed HA state
1628   * transition.
1629   * 
1630   * @param t exception which warrants the shutdown. Printed to the NN log
1631   *          before exit.
1632   * @throws ExitException thrown only for testing.
1633   */
1634  protected synchronized void doImmediateShutdown(Throwable t)
1635      throws ExitException {
1636    String message = "Error encountered requiring NN shutdown. " +
1637        "Shutting down immediately.";
1638    try {
1639      LOG.fatal(message, t);
1640    } catch (Throwable ignored) {
1641      // This is unlikely to happen, but there's nothing we can do if it does.
1642    }
1643    terminate(1, t);
1644  }
1645  
1646  /**
1647   * Class used to expose {@link NameNode} as context to {@link HAState}
1648   */
1649  protected class NameNodeHAContext implements HAContext {
1650    @Override
1651    public void setState(HAState s) {
1652      state = s;
1653    }
1654
1655    @Override
1656    public HAState getState() {
1657      return state;
1658    }
1659
1660    @Override
1661    public void startActiveServices() throws IOException {
1662      try {
1663        namesystem.startActiveServices();
1664        startTrashEmptier(conf);
1665      } catch (Throwable t) {
1666        doImmediateShutdown(t);
1667      }
1668    }
1669
1670    @Override
1671    public void stopActiveServices() throws IOException {
1672      try {
1673        if (namesystem != null) {
1674          namesystem.stopActiveServices();
1675        }
1676        stopTrashEmptier();
1677      } catch (Throwable t) {
1678        doImmediateShutdown(t);
1679      }
1680    }
1681
1682    @Override
1683    public void startStandbyServices() throws IOException {
1684      try {
1685        namesystem.startStandbyServices(conf);
1686      } catch (Throwable t) {
1687        doImmediateShutdown(t);
1688      }
1689    }
1690
1691    @Override
1692    public void prepareToStopStandbyServices() throws ServiceFailedException {
1693      try {
1694        namesystem.prepareToStopStandbyServices();
1695      } catch (Throwable t) {
1696        doImmediateShutdown(t);
1697      }
1698    }
1699    
1700    @Override
1701    public void stopStandbyServices() throws IOException {
1702      try {
1703        if (namesystem != null) {
1704          namesystem.stopStandbyServices();
1705        }
1706      } catch (Throwable t) {
1707        doImmediateShutdown(t);
1708      }
1709    }
1710    
1711    @Override
1712    public void writeLock() {
1713      namesystem.writeLock();
1714      namesystem.lockRetryCache();
1715    }
1716    
1717    @Override
1718    public void writeUnlock() {
1719      namesystem.unlockRetryCache();
1720      namesystem.writeUnlock();
1721    }
1722    
1723    /** Check if an operation of given category is allowed */
1724    @Override
1725    public void checkOperation(final OperationCategory op)
1726        throws StandbyException {
1727      state.checkOperation(haContext, op);
1728    }
1729    
1730    @Override
1731    public boolean allowStaleReads() {
1732      return allowStaleStandbyReads;
1733    }
1734
1735  }
1736  
1737  public boolean isStandbyState() {
1738    return (state.equals(STANDBY_STATE));
1739  }
1740  
1741  public boolean isActiveState() {
1742    return (state.equals(ACTIVE_STATE));
1743  }
1744
1745  /**
1746   * Returns whether the NameNode is completely started
1747   */
1748  boolean isStarted() {
1749    return this.started.get();
1750  }
1751
1752  /**
1753   * Check that a request to change this node's HA state is valid.
1754   * In particular, verifies that, if auto failover is enabled, non-forced
1755   * requests from the HAAdmin CLI are rejected, and vice versa.
1756   *
1757   * @param req the request to check
1758   * @throws AccessControlException if the request is disallowed
1759   */
1760  void checkHaStateChange(StateChangeRequestInfo req)
1761      throws AccessControlException {
1762    boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY,
1763        DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT);
1764    switch (req.getSource()) {
1765    case REQUEST_BY_USER:
1766      if (autoHaEnabled) {
1767        throw new AccessControlException(
1768            "Manual HA control for this NameNode is disallowed, because " +
1769            "automatic HA is enabled.");
1770      }
1771      break;
1772    case REQUEST_BY_USER_FORCED:
1773      if (autoHaEnabled) {
1774        LOG.warn("Allowing manual HA control from " +
1775            Server.getRemoteAddress() +
1776            " even though automatic HA is enabled, because the user " +
1777            "specified the force flag");
1778      }
1779      break;
1780    case REQUEST_BY_ZKFC:
1781      if (!autoHaEnabled) {
1782        throw new AccessControlException(
1783            "Request from ZK failover controller at " +
1784            Server.getRemoteAddress() + " denied since automatic HA " +
1785            "is not enabled"); 
1786      }
1787      break;
1788    }
1789  }
1790}