001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; 021import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 022import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 023import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; 024import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; 025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; 026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; 027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; 028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; 029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; 030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY; 031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT; 032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY; 033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT; 034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY; 035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; 036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; 037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; 040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; 041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; 042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; 043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; 044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; 045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; 046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; 047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; 048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; 049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; 050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY; 051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT; 052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY; 053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT; 054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; 055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; 057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; 058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; 059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; 060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; 061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; 062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; 063import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC; 064import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT; 065import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; 066import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; 067import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 068import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; 069import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; 070import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; 071import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; 072import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; 073import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; 074import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; 075import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; 076import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; 077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; 078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT; 079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY; 080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT; 081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY; 082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; 084import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; 085import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; 086import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; 087import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; 088import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; 089import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; 090import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT; 091import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT; 092import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY; 093import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; 094import static org.apache.hadoop.util.Time.now; 095 096import java.io.BufferedWriter; 097import java.io.ByteArrayInputStream; 098import java.io.DataInput; 099import java.io.DataInputStream; 100import java.io.DataOutputStream; 101import java.io.File; 102import java.io.FileNotFoundException; 103import java.io.FileOutputStream; 104import java.io.IOException; 105import java.io.OutputStreamWriter; 106import java.io.PrintWriter; 107import java.io.StringWriter; 108import java.lang.management.ManagementFactory; 109import java.net.InetAddress; 110import java.net.URI; 111import java.security.GeneralSecurityException; 112import java.security.NoSuchAlgorithmException; 113import java.util.ArrayList; 114import java.util.Arrays; 115import java.util.Collection; 116import java.util.Collections; 117import java.util.Date; 118import java.util.EnumSet; 119import java.util.HashMap; 120import java.util.HashSet; 121import java.util.Iterator; 122import java.util.LinkedHashSet; 123import java.util.List; 124import java.util.Map; 125import java.util.Set; 126import java.util.UUID; 127import java.util.concurrent.TimeUnit; 128import java.util.concurrent.locks.Condition; 129import java.util.concurrent.locks.ReentrantLock; 130import java.util.concurrent.locks.ReentrantReadWriteLock; 131 132import javax.management.NotCompliantMBeanException; 133import javax.management.ObjectName; 134import javax.management.StandardMBean; 135 136import org.apache.commons.logging.Log; 137import org.apache.commons.logging.LogFactory; 138import org.apache.commons.logging.impl.Log4JLogger; 139import org.apache.hadoop.HadoopIllegalArgumentException; 140import org.apache.hadoop.classification.InterfaceAudience; 141import org.apache.hadoop.conf.Configuration; 142import org.apache.hadoop.crypto.CipherSuite; 143import org.apache.hadoop.crypto.CryptoProtocolVersion; 144import org.apache.hadoop.crypto.key.KeyProvider; 145import org.apache.hadoop.crypto.CryptoCodec; 146import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; 147import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; 148import org.apache.hadoop.fs.CacheFlag; 149import org.apache.hadoop.fs.ContentSummary; 150import org.apache.hadoop.fs.CreateFlag; 151import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; 152import org.apache.hadoop.fs.FileAlreadyExistsException; 153import org.apache.hadoop.fs.FileEncryptionInfo; 154import org.apache.hadoop.fs.FileStatus; 155import org.apache.hadoop.fs.FileSystem; 156import org.apache.hadoop.fs.FsServerDefaults; 157import org.apache.hadoop.fs.InvalidPathException; 158import org.apache.hadoop.fs.Options; 159import org.apache.hadoop.fs.Options.Rename; 160import org.apache.hadoop.fs.ParentNotDirectoryException; 161import org.apache.hadoop.fs.Path; 162import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; 163import org.apache.hadoop.fs.UnresolvedLinkException; 164import org.apache.hadoop.fs.XAttr; 165import org.apache.hadoop.fs.XAttrSetFlag; 166import org.apache.hadoop.fs.permission.AclEntry; 167import org.apache.hadoop.fs.permission.AclStatus; 168import org.apache.hadoop.fs.permission.FsAction; 169import org.apache.hadoop.fs.permission.FsPermission; 170import org.apache.hadoop.fs.permission.PermissionStatus; 171import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 172import org.apache.hadoop.ha.ServiceFailedException; 173import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; 174import org.apache.hadoop.hdfs.DFSConfigKeys; 175import org.apache.hadoop.hdfs.DFSUtil; 176import org.apache.hadoop.hdfs.HAUtil; 177import org.apache.hadoop.hdfs.HdfsConfiguration; 178import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException; 179import org.apache.hadoop.hdfs.XAttrHelper; 180import org.apache.hadoop.hdfs.protocol.AclException; 181import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 182import org.apache.hadoop.hdfs.protocol.Block; 183import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; 184import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 185import org.apache.hadoop.hdfs.protocol.CachePoolEntry; 186import org.apache.hadoop.hdfs.protocol.CachePoolInfo; 187import org.apache.hadoop.hdfs.protocol.ClientProtocol; 188import org.apache.hadoop.hdfs.protocol.DatanodeID; 189import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 190import org.apache.hadoop.hdfs.protocol.DirectoryListing; 191import org.apache.hadoop.hdfs.protocol.EncryptionZone; 192import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 193import org.apache.hadoop.hdfs.protocol.HdfsConstants; 194import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; 195import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; 196import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 197import org.apache.hadoop.hdfs.protocol.LocatedBlock; 198import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 199import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 200import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; 201import org.apache.hadoop.hdfs.protocol.RollingUpgradeException; 202import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; 203import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; 204import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; 205import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; 206import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure; 207import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; 208import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; 209import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 210import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; 211import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState; 212import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; 213import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; 214import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; 215import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 216import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; 217import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; 218import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; 219import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; 220import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException; 221import org.apache.hadoop.hdfs.server.common.GenerationStamp; 222import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 223import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 224import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 225import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 226import org.apache.hadoop.hdfs.server.common.Storage; 227import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; 228import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 229import org.apache.hadoop.hdfs.server.common.Util; 230import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; 231import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 232import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; 233import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 234import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 235import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; 236import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; 237import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 238import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; 239import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; 240import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 241import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature; 242import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 243import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; 244import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 245import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 246import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 247import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; 248import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 249import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 250import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; 251import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; 252import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; 253import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; 254import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; 255import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; 256import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; 257import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 258import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 259import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; 260import org.apache.hadoop.hdfs.server.protocol.StorageReport; 261import org.apache.hadoop.hdfs.util.ChunkedArrayList; 262import org.apache.hadoop.io.IOUtils; 263import org.apache.hadoop.io.Text; 264import org.apache.hadoop.ipc.RetriableException; 265import org.apache.hadoop.ipc.RetryCache; 266import org.apache.hadoop.ipc.RetryCache.CacheEntry; 267import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload; 268import org.apache.hadoop.ipc.Server; 269import org.apache.hadoop.ipc.StandbyException; 270import org.apache.hadoop.metrics2.annotation.Metric; 271import org.apache.hadoop.metrics2.annotation.Metrics; 272import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 273import org.apache.hadoop.metrics2.util.MBeans; 274import org.apache.hadoop.net.NetworkTopology; 275import org.apache.hadoop.net.Node; 276import org.apache.hadoop.net.NodeBase; 277import org.apache.hadoop.security.AccessControlException; 278import org.apache.hadoop.security.UserGroupInformation; 279import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 280import org.apache.hadoop.security.token.SecretManager.InvalidToken; 281import org.apache.hadoop.security.token.Token; 282import org.apache.hadoop.security.token.TokenIdentifier; 283import org.apache.hadoop.security.token.delegation.DelegationKey; 284import org.apache.hadoop.util.Daemon; 285import org.apache.hadoop.util.DataChecksum; 286import org.apache.hadoop.util.StringUtils; 287import org.apache.hadoop.util.Time; 288import org.apache.hadoop.util.VersionInfo; 289import org.apache.log4j.Appender; 290import org.apache.log4j.AsyncAppender; 291import org.apache.log4j.Logger; 292import org.mortbay.util.ajax.JSON; 293 294import com.google.common.annotations.VisibleForTesting; 295import com.google.common.base.Charsets; 296import com.google.common.base.Preconditions; 297import com.google.common.collect.ImmutableMap; 298import com.google.common.collect.Lists; 299 300/*************************************************** 301 * FSNamesystem does the actual bookkeeping work for the 302 * DataNode. 303 * 304 * It tracks several important tables. 305 * 306 * 1) valid fsname --> blocklist (kept on disk, logged) 307 * 2) Set of all valid blocks (inverted #1) 308 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) 309 * 4) machine --> blocklist (inverted #2) 310 * 5) LRU cache of updated-heartbeat machines 311 ***************************************************/ 312@InterfaceAudience.Private 313@Metrics(context="dfs") 314public class FSNamesystem implements Namesystem, FSClusterStats, 315 FSNamesystemMBean, NameNodeMXBean { 316 public static final Log LOG = LogFactory.getLog(FSNamesystem.class); 317 318 private static final ThreadLocal<StringBuilder> auditBuffer = 319 new ThreadLocal<StringBuilder>() { 320 @Override 321 protected StringBuilder initialValue() { 322 return new StringBuilder(); 323 } 324 }; 325 326 @VisibleForTesting 327 public boolean isAuditEnabled() { 328 return !isDefaultAuditLogger || auditLog.isInfoEnabled(); 329 } 330 331 private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink) 332 throws IOException { 333 return (isAuditEnabled() && isExternalInvocation()) 334 ? dir.getFileInfo(path, resolveSymlink, false, false) : null; 335 } 336 337 private void logAuditEvent(boolean succeeded, String cmd, String src) 338 throws IOException { 339 logAuditEvent(succeeded, cmd, src, null, null); 340 } 341 342 private void logAuditEvent(boolean succeeded, String cmd, String src, 343 String dst, HdfsFileStatus stat) throws IOException { 344 if (isAuditEnabled() && isExternalInvocation()) { 345 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(), 346 cmd, src, dst, stat); 347 } 348 } 349 350 private void logAuditEvent(boolean succeeded, 351 UserGroupInformation ugi, InetAddress addr, String cmd, String src, 352 String dst, HdfsFileStatus stat) { 353 FileStatus status = null; 354 if (stat != null) { 355 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; 356 Path path = dst != null ? new Path(dst) : new Path(src); 357 status = new FileStatus(stat.getLen(), stat.isDir(), 358 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), 359 stat.getAccessTime(), stat.getPermission(), stat.getOwner(), 360 stat.getGroup(), symlink, path); 361 } 362 for (AuditLogger logger : auditLoggers) { 363 if (logger instanceof HdfsAuditLogger) { 364 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; 365 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst, 366 status, ugi, dtSecretManager); 367 } else { 368 logger.logAuditEvent(succeeded, ugi.toString(), addr, 369 cmd, src, dst, status); 370 } 371 } 372 } 373 374 /** 375 * Logger for audit events, noting successful FSNamesystem operations. Emits 376 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated 377 * <code>key=value</code> pairs to be written for the following properties: 378 * <code> 379 * ugi=<ugi in RPC> 380 * ip=<remote IP> 381 * cmd=<command> 382 * src=<src path> 383 * dst=<dst path (optional)> 384 * perm=<permissions (optional)> 385 * </code> 386 */ 387 public static final Log auditLog = LogFactory.getLog( 388 FSNamesystem.class.getName() + ".audit"); 389 390 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; 391 static int BLOCK_DELETION_INCREMENT = 1000; 392 private final boolean isPermissionEnabled; 393 private final UserGroupInformation fsOwner; 394 private final String fsOwnerShortUserName; 395 private final String supergroup; 396 private final boolean standbyShouldCheckpoint; 397 398 // Scan interval is not configurable. 399 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = 400 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); 401 final DelegationTokenSecretManager dtSecretManager; 402 private final boolean alwaysUseDelegationTokensForTests; 403 404 private static final Step STEP_AWAITING_REPORTED_BLOCKS = 405 new Step(StepType.AWAITING_REPORTED_BLOCKS); 406 407 // Tracks whether the default audit logger is the only configured audit 408 // logger; this allows isAuditEnabled() to return false in case the 409 // underlying logger is disabled, and avoid some unnecessary work. 410 private final boolean isDefaultAuditLogger; 411 private final List<AuditLogger> auditLoggers; 412 413 /** The namespace tree. */ 414 FSDirectory dir; 415 private final BlockManager blockManager; 416 private final SnapshotManager snapshotManager; 417 private final CacheManager cacheManager; 418 private final DatanodeStatistics datanodeStatistics; 419 420 // whether setStoragePolicy is allowed. 421 private final boolean isStoragePolicyEnabled; 422 423 private String nameserviceId; 424 425 private volatile RollingUpgradeInfo rollingUpgradeInfo = null; 426 /** 427 * A flag that indicates whether the checkpointer should checkpoint a rollback 428 * fsimage. The edit log tailer sets this flag. The checkpoint will create a 429 * rollback fsimage if the flag is true, and then change the flag to false. 430 */ 431 private volatile boolean needRollbackFsImage; 432 433 // Block pool ID used by this namenode 434 private String blockPoolId; 435 436 final LeaseManager leaseManager = new LeaseManager(this); 437 438 volatile Daemon smmthread = null; // SafeModeMonitor thread 439 440 Daemon nnrmthread = null; // NamenodeResourceMonitor thread 441 442 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread 443 444 // A daemon to periodically clean up corrupt lazyPersist files 445 // from the name space. 446 Daemon lazyPersistFileScrubber = null; 447 /** 448 * When an active namenode will roll its own edit log, in # edits 449 */ 450 private final long editLogRollerThreshold; 451 /** 452 * Check interval of an active namenode's edit log roller thread 453 */ 454 private final int editLogRollerInterval; 455 456 /** 457 * How frequently we scan and unlink corrupt lazyPersist files. 458 * (In seconds) 459 */ 460 private final int lazyPersistFileScrubIntervalSec; 461 462 private volatile boolean hasResourcesAvailable = false; 463 private volatile boolean fsRunning = true; 464 465 /** The start time of the namesystem. */ 466 private final long startTime = now(); 467 468 /** The interval of namenode checking for the disk space availability */ 469 private final long resourceRecheckInterval; 470 471 // The actual resource checker instance. 472 NameNodeResourceChecker nnResourceChecker; 473 474 private final FsServerDefaults serverDefaults; 475 private final boolean supportAppends; 476 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure; 477 478 private volatile SafeModeInfo safeMode; // safe mode information 479 480 private final long maxFsObjects; // maximum number of fs objects 481 482 private final long minBlockSize; // minimum block size 483 private final long maxBlocksPerFile; // maximum # of blocks per file 484 485 /** 486 * The global generation stamp for legacy blocks with randomly 487 * generated block IDs. 488 */ 489 private final GenerationStamp generationStampV1 = new GenerationStamp(); 490 491 /** 492 * The global generation stamp for this file system. 493 */ 494 private final GenerationStamp generationStampV2 = new GenerationStamp(); 495 496 /** 497 * The value of the generation stamp when the first switch to sequential 498 * block IDs was made. Blocks with generation stamps below this value 499 * have randomly allocated block IDs. Blocks with generation stamps above 500 * this value had sequentially allocated block IDs. Read from the fsImage 501 * (or initialized as an offset from the V1 (legacy) generation stamp on 502 * upgrade). 503 */ 504 private long generationStampV1Limit = 505 GenerationStamp.GRANDFATHER_GENERATION_STAMP; 506 507 /** 508 * The global block ID space for this file system. 509 */ 510 @VisibleForTesting 511 private final SequentialBlockIdGenerator blockIdGenerator; 512 513 // precision of access times. 514 private final long accessTimePrecision; 515 516 /** Lock to protect FSNamesystem. */ 517 private final FSNamesystemLock fsLock; 518 519 /** 520 * Used when this NN is in standby state to read from the shared edit log. 521 */ 522 private EditLogTailer editLogTailer = null; 523 524 /** 525 * Used when this NN is in standby state to perform checkpoints. 526 */ 527 private StandbyCheckpointer standbyCheckpointer; 528 529 /** 530 * Reference to the NN's HAContext object. This is only set once 531 * {@link #startCommonServices(Configuration, HAContext)} is called. 532 */ 533 private HAContext haContext; 534 535 private final boolean haEnabled; 536 537 /** flag indicating whether replication queues have been initialized */ 538 boolean initializedReplQueues = false; 539 540 /** 541 * Whether the namenode is in the middle of starting the active service 542 */ 543 private volatile boolean startingActiveService = false; 544 545 private INodeId inodeId; 546 547 private final RetryCache retryCache; 548 549 private final NNConf nnConf; 550 551 private KeyProviderCryptoExtension provider = null; 552 private KeyProvider.Options providerOptions = null; 553 554 private final CryptoCodec codec; 555 556 private volatile boolean imageLoaded = false; 557 private final Condition cond; 558 559 private final FSImage fsImage; 560 561 /** 562 * Notify that loading of this FSDirectory is complete, and 563 * it is imageLoaded for use 564 */ 565 void imageLoadComplete() { 566 Preconditions.checkState(!imageLoaded, "FSDirectory already loaded"); 567 setImageLoaded(); 568 } 569 570 void setImageLoaded() { 571 if(imageLoaded) return; 572 writeLock(); 573 try { 574 setImageLoaded(true); 575 dir.markNameCacheInitialized(); 576 cond.signalAll(); 577 } finally { 578 writeUnlock(); 579 } 580 } 581 582 //This is for testing purposes only 583 @VisibleForTesting 584 boolean isImageLoaded() { 585 return imageLoaded; 586 } 587 588 // exposed for unit tests 589 protected void setImageLoaded(boolean flag) { 590 imageLoaded = flag; 591 } 592 593 /** 594 * Block until the object is imageLoaded to be used. 595 */ 596 void waitForLoadingFSImage() { 597 if (!imageLoaded) { 598 writeLock(); 599 try { 600 while (!imageLoaded) { 601 try { 602 cond.await(5000, TimeUnit.MILLISECONDS); 603 } catch (InterruptedException ignored) { 604 } 605 } 606 } finally { 607 writeUnlock(); 608 } 609 } 610 } 611 612 /** 613 * Set the last allocated inode id when fsimage or editlog is loaded. 614 */ 615 public void resetLastInodeId(long newValue) throws IOException { 616 try { 617 inodeId.skipTo(newValue); 618 } catch(IllegalStateException ise) { 619 throw new IOException(ise); 620 } 621 } 622 623 /** Should only be used for tests to reset to any value */ 624 void resetLastInodeIdWithoutChecking(long newValue) { 625 inodeId.setCurrentValue(newValue); 626 } 627 628 /** @return the last inode ID. */ 629 public long getLastInodeId() { 630 return inodeId.getCurrentValue(); 631 } 632 633 /** Allocate a new inode ID. */ 634 public long allocateNewInodeId() { 635 return inodeId.nextValue(); 636 } 637 638 /** 639 * Clear all loaded data 640 */ 641 void clear() { 642 dir.reset(); 643 dtSecretManager.reset(); 644 generationStampV1.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP); 645 generationStampV2.setCurrentValue(GenerationStamp.LAST_RESERVED_STAMP); 646 blockIdGenerator.setCurrentValue( 647 SequentialBlockIdGenerator.LAST_RESERVED_BLOCK_ID); 648 generationStampV1Limit = GenerationStamp.GRANDFATHER_GENERATION_STAMP; 649 leaseManager.removeAllLeases(); 650 inodeId.setCurrentValue(INodeId.LAST_RESERVED_ID); 651 snapshotManager.clearSnapshottableDirs(); 652 cacheManager.clear(); 653 setImageLoaded(false); 654 blockManager.clear(); 655 } 656 657 @VisibleForTesting 658 LeaseManager getLeaseManager() { 659 return leaseManager; 660 } 661 662 boolean isHaEnabled() { 663 return haEnabled; 664 } 665 666 /** 667 * Check the supplied configuration for correctness. 668 * @param conf Supplies the configuration to validate. 669 * @throws IOException if the configuration could not be queried. 670 * @throws IllegalArgumentException if the configuration is invalid. 671 */ 672 private static void checkConfiguration(Configuration conf) 673 throws IOException { 674 675 final Collection<URI> namespaceDirs = 676 FSNamesystem.getNamespaceDirs(conf); 677 final Collection<URI> editsDirs = 678 FSNamesystem.getNamespaceEditsDirs(conf); 679 final Collection<URI> requiredEditsDirs = 680 FSNamesystem.getRequiredNamespaceEditsDirs(conf); 681 final Collection<URI> sharedEditsDirs = 682 FSNamesystem.getSharedEditsDirs(conf); 683 684 for (URI u : requiredEditsDirs) { 685 if (u.toString().compareTo( 686 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) { 687 continue; 688 } 689 690 // Each required directory must also be in editsDirs or in 691 // sharedEditsDirs. 692 if (!editsDirs.contains(u) && 693 !sharedEditsDirs.contains(u)) { 694 throw new IllegalArgumentException( 695 "Required edits directory " + u.toString() + " not present in " + 696 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " + 697 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" + 698 editsDirs.toString() + "; " + 699 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" + 700 requiredEditsDirs.toString() + ". " + 701 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" + 702 sharedEditsDirs.toString() + "."); 703 } 704 } 705 706 if (namespaceDirs.size() == 1) { 707 LOG.warn("Only one image storage directory (" 708 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of data loss" 709 + " due to lack of redundant storage directories!"); 710 } 711 if (editsDirs.size() == 1) { 712 LOG.warn("Only one namespace edits storage directory (" 713 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of data loss" 714 + " due to lack of redundant storage directories!"); 715 } 716 } 717 718 /** 719 * Instantiates an FSNamesystem loaded from the image and edits 720 * directories specified in the passed Configuration. 721 * 722 * @param conf the Configuration which specifies the storage directories 723 * from which to load 724 * @return an FSNamesystem which contains the loaded namespace 725 * @throws IOException if loading fails 726 */ 727 static FSNamesystem loadFromDisk(Configuration conf) throws IOException { 728 729 checkConfiguration(conf); 730 FSImage fsImage = new FSImage(conf, 731 FSNamesystem.getNamespaceDirs(conf), 732 FSNamesystem.getNamespaceEditsDirs(conf)); 733 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); 734 StartupOption startOpt = NameNode.getStartupOption(conf); 735 if (startOpt == StartupOption.RECOVER) { 736 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); 737 } 738 739 long loadStart = now(); 740 try { 741 namesystem.loadFSImage(startOpt); 742 } catch (IOException ioe) { 743 LOG.warn("Encountered exception loading fsimage", ioe); 744 fsImage.close(); 745 throw ioe; 746 } 747 long timeTakenToLoadFSImage = now() - loadStart; 748 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); 749 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); 750 if (nnMetrics != null) { 751 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); 752 } 753 return namesystem; 754 } 755 756 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException { 757 this(conf, fsImage, false); 758 } 759 760 /** 761 * Create an FSNamesystem associated with the specified image. 762 * 763 * Note that this does not load any data off of disk -- if you would 764 * like that behavior, use {@link #loadFromDisk(Configuration)} 765 * 766 * @param conf configuration 767 * @param fsImage The FSImage to associate with 768 * @param ignoreRetryCache Whether or not should ignore the retry cache setup 769 * step. For Secondary NN this should be set to true. 770 * @throws IOException on bad configuration 771 */ 772 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache) 773 throws IOException { 774 provider = DFSUtil.createKeyProviderCryptoExtension(conf); 775 if (provider == null) { 776 LOG.info("No KeyProvider found."); 777 } else { 778 LOG.info("Found KeyProvider: " + provider.toString()); 779 } 780 providerOptions = KeyProvider.options(conf); 781 this.codec = CryptoCodec.getInstance(conf); 782 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, 783 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { 784 LOG.info("Enabling async auditlog"); 785 enableAsyncAuditLog(); 786 } 787 boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true); 788 LOG.info("fsLock is fair:" + fair); 789 fsLock = new FSNamesystemLock(fair); 790 cond = fsLock.writeLock().newCondition(); 791 this.fsImage = fsImage; 792 try { 793 resourceRecheckInterval = conf.getLong( 794 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 795 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT); 796 797 this.blockManager = new BlockManager(this, this, conf); 798 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics(); 799 this.blockIdGenerator = new SequentialBlockIdGenerator(this.blockManager); 800 801 this.isStoragePolicyEnabled = 802 conf.getBoolean(DFS_STORAGE_POLICY_ENABLED_KEY, 803 DFS_STORAGE_POLICY_ENABLED_DEFAULT); 804 805 this.fsOwner = UserGroupInformation.getCurrentUser(); 806 this.fsOwnerShortUserName = fsOwner.getShortUserName(); 807 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 808 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 809 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY, 810 DFS_PERMISSIONS_ENABLED_DEFAULT); 811 LOG.info("fsOwner = " + fsOwner); 812 LOG.info("supergroup = " + supergroup); 813 LOG.info("isPermissionEnabled = " + isPermissionEnabled); 814 815 // block allocation has to be persisted in HA using a shared edits directory 816 // so that the standby has up-to-date namespace information 817 nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 818 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId); 819 820 // Sanity check the HA-related config. 821 if (nameserviceId != null) { 822 LOG.info("Determined nameservice ID: " + nameserviceId); 823 } 824 LOG.info("HA Enabled: " + haEnabled); 825 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) { 826 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf)); 827 throw new IOException("Invalid configuration: a shared edits dir " + 828 "must not be specified if HA is not enabled."); 829 } 830 831 // Get the checksum type from config 832 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT); 833 DataChecksum.Type checksumType; 834 try { 835 checksumType = DataChecksum.Type.valueOf(checksumTypeStr); 836 } catch (IllegalArgumentException iae) { 837 throw new IOException("Invalid checksum type in " 838 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr); 839 } 840 841 this.serverDefaults = new FsServerDefaults( 842 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT), 843 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT), 844 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT), 845 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT), 846 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT), 847 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT), 848 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT), 849 checksumType); 850 851 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 852 DFS_NAMENODE_MAX_OBJECTS_DEFAULT); 853 854 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 855 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT); 856 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY, 857 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT); 858 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 859 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 860 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT); 861 LOG.info("Append Enabled: " + supportAppends); 862 863 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); 864 865 this.standbyShouldCheckpoint = conf.getBoolean( 866 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); 867 // # edit autoroll threshold is a multiple of the checkpoint threshold 868 this.editLogRollerThreshold = (long) 869 (conf.getFloat( 870 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 871 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * 872 conf.getLong( 873 DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 874 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); 875 this.editLogRollerInterval = conf.getInt( 876 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 877 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); 878 this.inodeId = new INodeId(); 879 880 this.lazyPersistFileScrubIntervalSec = conf.getInt( 881 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, 882 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT); 883 884 if (this.lazyPersistFileScrubIntervalSec == 0) { 885 throw new IllegalArgumentException( 886 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC + " must be non-zero."); 887 } 888 889 // For testing purposes, allow the DT secret manager to be started regardless 890 // of whether security is enabled. 891 alwaysUseDelegationTokensForTests = conf.getBoolean( 892 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, 893 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); 894 895 this.dtSecretManager = createDelegationTokenSecretManager(conf); 896 this.dir = new FSDirectory(this, conf); 897 this.snapshotManager = new SnapshotManager(dir); 898 this.cacheManager = new CacheManager(this, conf, blockManager); 899 this.safeMode = new SafeModeInfo(conf); 900 this.auditLoggers = initAuditLoggers(conf); 901 this.isDefaultAuditLogger = auditLoggers.size() == 1 && 902 auditLoggers.get(0) instanceof DefaultAuditLogger; 903 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf); 904 this.nnConf = new NNConf(conf); 905 } catch(IOException e) { 906 LOG.error(getClass().getSimpleName() + " initialization failed.", e); 907 close(); 908 throw e; 909 } catch (RuntimeException re) { 910 LOG.error(getClass().getSimpleName() + " initialization failed.", re); 911 close(); 912 throw re; 913 } 914 } 915 916 @VisibleForTesting 917 public RetryCache getRetryCache() { 918 return retryCache; 919 } 920 921 void lockRetryCache() { 922 if (retryCache != null) { 923 retryCache.lock(); 924 } 925 } 926 927 void unlockRetryCache() { 928 if (retryCache != null) { 929 retryCache.unlock(); 930 } 931 } 932 933 /** Whether or not retry cache is enabled */ 934 boolean hasRetryCache() { 935 return retryCache != null; 936 } 937 938 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) { 939 if (retryCache != null) { 940 retryCache.addCacheEntryWithPayload(clientId, callId, payload); 941 } 942 } 943 944 void addCacheEntry(byte[] clientId, int callId) { 945 if (retryCache != null) { 946 retryCache.addCacheEntry(clientId, callId); 947 } 948 } 949 950 @VisibleForTesting 951 public KeyProviderCryptoExtension getProvider() { 952 return provider; 953 } 954 955 @VisibleForTesting 956 static RetryCache initRetryCache(Configuration conf) { 957 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, 958 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT); 959 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled")); 960 if (enable) { 961 float heapPercent = conf.getFloat( 962 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY, 963 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT); 964 long entryExpiryMillis = conf.getLong( 965 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY, 966 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT); 967 LOG.info("Retry cache will use " + heapPercent 968 + " of total heap and retry cache entry expiry time is " 969 + entryExpiryMillis + " millis"); 970 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; 971 return new RetryCache("NameNodeRetryCache", heapPercent, 972 entryExpiryNanos); 973 } 974 return null; 975 } 976 977 private List<AuditLogger> initAuditLoggers(Configuration conf) { 978 // Initialize the custom access loggers if configured. 979 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); 980 List<AuditLogger> auditLoggers = Lists.newArrayList(); 981 if (alClasses != null && !alClasses.isEmpty()) { 982 for (String className : alClasses) { 983 try { 984 AuditLogger logger; 985 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { 986 logger = new DefaultAuditLogger(); 987 } else { 988 logger = (AuditLogger) Class.forName(className).newInstance(); 989 } 990 logger.initialize(conf); 991 auditLoggers.add(logger); 992 } catch (RuntimeException re) { 993 throw re; 994 } catch (Exception e) { 995 throw new RuntimeException(e); 996 } 997 } 998 } 999 1000 // Make sure there is at least one logger installed. 1001 if (auditLoggers.isEmpty()) { 1002 auditLoggers.add(new DefaultAuditLogger()); 1003 } 1004 return Collections.unmodifiableList(auditLoggers); 1005 } 1006 1007 private void loadFSImage(StartupOption startOpt) throws IOException { 1008 final FSImage fsImage = getFSImage(); 1009 1010 // format before starting up if requested 1011 if (startOpt == StartupOption.FORMAT) { 1012 1013 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id 1014 1015 startOpt = StartupOption.REGULAR; 1016 } 1017 boolean success = false; 1018 writeLock(); 1019 try { 1020 // We shouldn't be calling saveNamespace if we've come up in standby state. 1021 MetaRecoveryContext recovery = startOpt.createRecoveryContext(); 1022 final boolean staleImage 1023 = fsImage.recoverTransitionRead(startOpt, this, recovery); 1024 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) || 1025 RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) { 1026 rollingUpgradeInfo = null; 1027 } 1028 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 1029 LOG.info("Need to save fs image? " + needToSave 1030 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled 1031 + ", isRollingUpgrade=" + isRollingUpgrade() + ")"); 1032 if (needToSave) { 1033 fsImage.saveNamespace(this); 1034 } else { 1035 updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(), 1036 startOpt); 1037 // No need to save, so mark the phase done. 1038 StartupProgress prog = NameNode.getStartupProgress(); 1039 prog.beginPhase(Phase.SAVING_CHECKPOINT); 1040 prog.endPhase(Phase.SAVING_CHECKPOINT); 1041 } 1042 // This will start a new log segment and write to the seen_txid file, so 1043 // we shouldn't do it when coming up in standby state 1044 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE) 1045 || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) { 1046 fsImage.openEditLogForWrite(); 1047 } 1048 success = true; 1049 } finally { 1050 if (!success) { 1051 fsImage.close(); 1052 } 1053 writeUnlock(); 1054 } 1055 imageLoadComplete(); 1056 } 1057 1058 private void updateStorageVersionForRollingUpgrade(final long layoutVersion, 1059 StartupOption startOpt) throws IOException { 1060 boolean rollingStarted = RollingUpgradeStartupOption.STARTED 1061 .matches(startOpt) && layoutVersion > HdfsConstants 1062 .NAMENODE_LAYOUT_VERSION; 1063 boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK 1064 .matches(startOpt); 1065 if (rollingRollback || rollingStarted) { 1066 fsImage.updateStorageVersion(); 1067 } 1068 } 1069 1070 private void startSecretManager() { 1071 if (dtSecretManager != null) { 1072 try { 1073 dtSecretManager.startThreads(); 1074 } catch (IOException e) { 1075 // Inability to start secret manager 1076 // can't be recovered from. 1077 throw new RuntimeException(e); 1078 } 1079 } 1080 } 1081 1082 private void startSecretManagerIfNecessary() { 1083 boolean shouldRun = shouldUseDelegationTokens() && 1084 !isInSafeMode() && getEditLog().isOpenForWrite(); 1085 boolean running = dtSecretManager.isRunning(); 1086 if (shouldRun && !running) { 1087 startSecretManager(); 1088 } 1089 } 1090 1091 private void stopSecretManager() { 1092 if (dtSecretManager != null) { 1093 dtSecretManager.stopThreads(); 1094 } 1095 } 1096 1097 /** 1098 * Start services common to both active and standby states 1099 */ 1100 void startCommonServices(Configuration conf, HAContext haContext) throws IOException { 1101 this.registerMBean(); // register the MBean for the FSNamesystemState 1102 writeLock(); 1103 this.haContext = haContext; 1104 try { 1105 nnResourceChecker = new NameNodeResourceChecker(conf); 1106 checkAvailableResources(); 1107 assert safeMode != null && !isPopulatingReplQueues(); 1108 StartupProgress prog = NameNode.getStartupProgress(); 1109 prog.beginPhase(Phase.SAFEMODE); 1110 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, 1111 getCompleteBlocksTotal()); 1112 setBlockTotal(); 1113 blockManager.activate(conf); 1114 } finally { 1115 writeUnlock(); 1116 } 1117 1118 registerMXBean(); 1119 DefaultMetricsSystem.instance().register(this); 1120 snapshotManager.registerMXBean(); 1121 } 1122 1123 /** 1124 * Stop services common to both active and standby states 1125 */ 1126 void stopCommonServices() { 1127 writeLock(); 1128 try { 1129 if (blockManager != null) blockManager.close(); 1130 } finally { 1131 writeUnlock(); 1132 } 1133 RetryCache.clear(retryCache); 1134 } 1135 1136 /** 1137 * Start services required in active state 1138 * @throws IOException 1139 */ 1140 void startActiveServices() throws IOException { 1141 startingActiveService = true; 1142 LOG.info("Starting services required for active state"); 1143 writeLock(); 1144 try { 1145 FSEditLog editLog = getFSImage().getEditLog(); 1146 1147 if (!editLog.isOpenForWrite()) { 1148 // During startup, we're already open for write during initialization. 1149 editLog.initJournalsForWrite(); 1150 // May need to recover 1151 editLog.recoverUnclosedStreams(); 1152 1153 LOG.info("Catching up to latest edits from old active before " + 1154 "taking over writer role in edits logs"); 1155 editLogTailer.catchupDuringFailover(); 1156 1157 blockManager.setPostponeBlocksFromFuture(false); 1158 blockManager.getDatanodeManager().markAllDatanodesStale(); 1159 blockManager.clearQueues(); 1160 blockManager.processAllPendingDNMessages(); 1161 1162 // Only need to re-process the queue, If not in SafeMode. 1163 if (!isInSafeMode()) { 1164 LOG.info("Reprocessing replication and invalidation queues"); 1165 initializeReplQueues(); 1166 } 1167 1168 if (LOG.isDebugEnabled()) { 1169 LOG.debug("NameNode metadata after re-processing " + 1170 "replication and invalidation queues during failover:\n" + 1171 metaSaveAsString()); 1172 } 1173 1174 long nextTxId = getFSImage().getLastAppliedTxId() + 1; 1175 LOG.info("Will take over writing edit logs at txnid " + 1176 nextTxId); 1177 editLog.setNextTxId(nextTxId); 1178 1179 getFSImage().editLog.openForWrite(); 1180 } 1181 1182 // Enable quota checks. 1183 dir.enableQuotaChecks(); 1184 if (haEnabled) { 1185 // Renew all of the leases before becoming active. 1186 // This is because, while we were in standby mode, 1187 // the leases weren't getting renewed on this NN. 1188 // Give them all a fresh start here. 1189 leaseManager.renewAllLeases(); 1190 } 1191 leaseManager.startMonitor(); 1192 startSecretManagerIfNecessary(); 1193 1194 //ResourceMonitor required only at ActiveNN. See HDFS-2914 1195 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); 1196 nnrmthread.start(); 1197 1198 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( 1199 editLogRollerThreshold, editLogRollerInterval)); 1200 nnEditLogRoller.start(); 1201 1202 if (lazyPersistFileScrubIntervalSec > 0) { 1203 lazyPersistFileScrubber = new Daemon(new LazyPersistFileScrubber( 1204 lazyPersistFileScrubIntervalSec)); 1205 lazyPersistFileScrubber.start(); 1206 } 1207 1208 cacheManager.startMonitorThread(); 1209 blockManager.getDatanodeManager().setShouldSendCachingCommands(true); 1210 } finally { 1211 startingActiveService = false; 1212 checkSafeMode(); 1213 writeUnlock(); 1214 } 1215 } 1216 1217 /** 1218 * Initialize replication queues. 1219 */ 1220 private void initializeReplQueues() { 1221 LOG.info("initializing replication queues"); 1222 blockManager.processMisReplicatedBlocks(); 1223 initializedReplQueues = true; 1224 } 1225 1226 private boolean inActiveState() { 1227 return haContext != null && 1228 haContext.getState().getServiceState() == HAServiceState.ACTIVE; 1229 } 1230 1231 /** 1232 * @return Whether the namenode is transitioning to active state and is in the 1233 * middle of the {@link #startActiveServices()} 1234 */ 1235 public boolean inTransitionToActive() { 1236 return haEnabled && inActiveState() && startingActiveService; 1237 } 1238 1239 private boolean shouldUseDelegationTokens() { 1240 return UserGroupInformation.isSecurityEnabled() || 1241 alwaysUseDelegationTokensForTests; 1242 } 1243 1244 /** 1245 * Stop services required in active state 1246 */ 1247 void stopActiveServices() { 1248 LOG.info("Stopping services started for active state"); 1249 writeLock(); 1250 try { 1251 stopSecretManager(); 1252 leaseManager.stopMonitor(); 1253 if (nnrmthread != null) { 1254 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); 1255 nnrmthread.interrupt(); 1256 } 1257 if (nnEditLogRoller != null) { 1258 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); 1259 nnEditLogRoller.interrupt(); 1260 } 1261 if (lazyPersistFileScrubber != null) { 1262 ((LazyPersistFileScrubber) lazyPersistFileScrubber.getRunnable()).stop(); 1263 lazyPersistFileScrubber.interrupt(); 1264 } 1265 if (dir != null && getFSImage() != null) { 1266 if (getFSImage().editLog != null) { 1267 getFSImage().editLog.close(); 1268 } 1269 // Update the fsimage with the last txid that we wrote 1270 // so that the tailer starts from the right spot. 1271 getFSImage().updateLastAppliedTxIdFromWritten(); 1272 } 1273 if (cacheManager != null) { 1274 cacheManager.stopMonitorThread(); 1275 cacheManager.clearDirectiveStats(); 1276 } 1277 blockManager.getDatanodeManager().clearPendingCachingCommands(); 1278 blockManager.getDatanodeManager().setShouldSendCachingCommands(false); 1279 // Don't want to keep replication queues when not in Active. 1280 blockManager.clearQueues(); 1281 initializedReplQueues = false; 1282 } finally { 1283 writeUnlock(); 1284 } 1285 } 1286 1287 /** 1288 * Start services required in standby state 1289 * 1290 * @throws IOException 1291 */ 1292 void startStandbyServices(final Configuration conf) throws IOException { 1293 LOG.info("Starting services required for standby state"); 1294 if (!getFSImage().editLog.isOpenForRead()) { 1295 // During startup, we're already open for read. 1296 getFSImage().editLog.initSharedJournalsForRead(); 1297 } 1298 1299 blockManager.setPostponeBlocksFromFuture(true); 1300 1301 // Disable quota checks while in standby. 1302 dir.disableQuotaChecks(); 1303 editLogTailer = new EditLogTailer(this, conf); 1304 editLogTailer.start(); 1305 if (standbyShouldCheckpoint) { 1306 standbyCheckpointer = new StandbyCheckpointer(conf, this); 1307 standbyCheckpointer.start(); 1308 } 1309 } 1310 1311 /** 1312 * Called when the NN is in Standby state and the editlog tailer tails the 1313 * OP_ROLLING_UPGRADE_START. 1314 */ 1315 void triggerRollbackCheckpoint() { 1316 setNeedRollbackFsImage(true); 1317 if (standbyCheckpointer != null) { 1318 standbyCheckpointer.triggerRollbackCheckpoint(); 1319 } 1320 } 1321 1322 /** 1323 * Called while the NN is in Standby state, but just about to be 1324 * asked to enter Active state. This cancels any checkpoints 1325 * currently being taken. 1326 */ 1327 void prepareToStopStandbyServices() throws ServiceFailedException { 1328 if (standbyCheckpointer != null) { 1329 standbyCheckpointer.cancelAndPreventCheckpoints( 1330 "About to leave standby state"); 1331 } 1332 } 1333 1334 /** Stop services required in standby state */ 1335 void stopStandbyServices() throws IOException { 1336 LOG.info("Stopping services started for standby state"); 1337 if (standbyCheckpointer != null) { 1338 standbyCheckpointer.stop(); 1339 } 1340 if (editLogTailer != null) { 1341 editLogTailer.stop(); 1342 } 1343 if (dir != null && getFSImage() != null && getFSImage().editLog != null) { 1344 getFSImage().editLog.close(); 1345 } 1346 } 1347 1348 @Override 1349 public void checkOperation(OperationCategory op) throws StandbyException { 1350 if (haContext != null) { 1351 // null in some unit tests 1352 haContext.checkOperation(op); 1353 } 1354 } 1355 1356 /** 1357 * @throws RetriableException 1358 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3) 1359 * NameNode is in active state 1360 * @throws SafeModeException 1361 * Otherwise if NameNode is in SafeMode. 1362 */ 1363 private void checkNameNodeSafeMode(String errorMsg) 1364 throws RetriableException, SafeModeException { 1365 if (isInSafeMode()) { 1366 SafeModeException se = new SafeModeException(errorMsg, safeMode); 1367 if (haEnabled && haContext != null 1368 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1369 && shouldRetrySafeMode(this.safeMode)) { 1370 throw new RetriableException(se); 1371 } else { 1372 throw se; 1373 } 1374 } 1375 } 1376 1377 /** 1378 * We already know that the safemode is on. We will throw a RetriableException 1379 * if the safemode is not manual or caused by low resource. 1380 */ 1381 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { 1382 if (safeMode == null) { 1383 return false; 1384 } else { 1385 return !safeMode.isManual() && !safeMode.areResourcesLow(); 1386 } 1387 } 1388 1389 public static Collection<URI> getNamespaceDirs(Configuration conf) { 1390 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); 1391 } 1392 1393 /** 1394 * Get all edits dirs which are required. If any shared edits dirs are 1395 * configured, these are also included in the set of required dirs. 1396 * 1397 * @param conf the HDFS configuration. 1398 * @return all required dirs. 1399 */ 1400 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) { 1401 Set<URI> ret = new HashSet<URI>(); 1402 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY)); 1403 ret.addAll(getSharedEditsDirs(conf)); 1404 return ret; 1405 } 1406 1407 private static Collection<URI> getStorageDirs(Configuration conf, 1408 String propertyName) { 1409 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName); 1410 StartupOption startOpt = NameNode.getStartupOption(conf); 1411 if(startOpt == StartupOption.IMPORT) { 1412 // In case of IMPORT this will get rid of default directories 1413 // but will retain directories specified in hdfs-site.xml 1414 // When importing image from a checkpoint, the name-node can 1415 // start with empty set of storage directories. 1416 Configuration cE = new HdfsConfiguration(false); 1417 cE.addResource("core-default.xml"); 1418 cE.addResource("core-site.xml"); 1419 cE.addResource("hdfs-default.xml"); 1420 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName); 1421 dirNames.removeAll(dirNames2); 1422 if(dirNames.isEmpty()) 1423 LOG.warn("!!! WARNING !!!" + 1424 "\n\tThe NameNode currently runs without persistent storage." + 1425 "\n\tAny changes to the file system meta-data may be lost." + 1426 "\n\tRecommended actions:" + 1427 "\n\t\t- shutdown and restart NameNode with configured \"" 1428 + propertyName + "\" in hdfs-site.xml;" + 1429 "\n\t\t- use Backup Node as a persistent and up-to-date storage " + 1430 "of the file system meta-data."); 1431 } else if (dirNames.isEmpty()) { 1432 dirNames = Collections.singletonList( 1433 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT); 1434 } 1435 return Util.stringCollectionAsURIs(dirNames); 1436 } 1437 1438 /** 1439 * Return an ordered list of edits directories to write to. 1440 * The list is ordered such that all shared edits directories 1441 * are ordered before non-shared directories, and any duplicates 1442 * are removed. The order they are specified in the configuration 1443 * is retained. 1444 * @return Collection of shared edits directories. 1445 * @throws IOException if multiple shared edits directories are configured 1446 */ 1447 public static List<URI> getNamespaceEditsDirs(Configuration conf) 1448 throws IOException { 1449 return getNamespaceEditsDirs(conf, true); 1450 } 1451 1452 public static List<URI> getNamespaceEditsDirs(Configuration conf, 1453 boolean includeShared) 1454 throws IOException { 1455 // Use a LinkedHashSet so that order is maintained while we de-dup 1456 // the entries. 1457 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>(); 1458 1459 if (includeShared) { 1460 List<URI> sharedDirs = getSharedEditsDirs(conf); 1461 1462 // Fail until multiple shared edits directories are supported (HDFS-2782) 1463 if (sharedDirs.size() > 1) { 1464 throw new IOException( 1465 "Multiple shared edits directories are not yet supported"); 1466 } 1467 1468 // First add the shared edits dirs. It's critical that the shared dirs 1469 // are added first, since JournalSet syncs them in the order they are listed, 1470 // and we need to make sure all edits are in place in the shared storage 1471 // before they are replicated locally. See HDFS-2874. 1472 for (URI dir : sharedDirs) { 1473 if (!editsDirs.add(dir)) { 1474 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1475 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates."); 1476 } 1477 } 1478 } 1479 // Now add the non-shared dirs. 1480 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) { 1481 if (!editsDirs.add(dir)) { 1482 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1483 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " + 1484 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates."); 1485 } 1486 } 1487 1488 if (editsDirs.isEmpty()) { 1489 // If this is the case, no edit dirs have been explicitly configured. 1490 // Image dirs are to be used for edits too. 1491 return Lists.newArrayList(getNamespaceDirs(conf)); 1492 } else { 1493 return Lists.newArrayList(editsDirs); 1494 } 1495 } 1496 1497 /** 1498 * Returns edit directories that are shared between primary and secondary. 1499 * @param conf configuration 1500 * @return collection of edit directories from {@code conf} 1501 */ 1502 public static List<URI> getSharedEditsDirs(Configuration conf) { 1503 // don't use getStorageDirs here, because we want an empty default 1504 // rather than the dir in /tmp 1505 Collection<String> dirNames = conf.getTrimmedStringCollection( 1506 DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1507 return Util.stringCollectionAsURIs(dirNames); 1508 } 1509 1510 @Override 1511 public void readLock() { 1512 this.fsLock.readLock().lock(); 1513 } 1514 @Override 1515 public void longReadLockInterruptibly() throws InterruptedException { 1516 this.fsLock.longReadLock().lockInterruptibly(); 1517 try { 1518 this.fsLock.readLock().lockInterruptibly(); 1519 } catch (InterruptedException ie) { 1520 // In the event we're interrupted while getting the normal FSNS read lock, 1521 // release the long read lock. 1522 this.fsLock.longReadLock().unlock(); 1523 throw ie; 1524 } 1525 } 1526 @Override 1527 public void longReadUnlock() { 1528 this.fsLock.readLock().unlock(); 1529 this.fsLock.longReadLock().unlock(); 1530 } 1531 @Override 1532 public void readUnlock() { 1533 this.fsLock.readLock().unlock(); 1534 } 1535 @Override 1536 public void writeLock() { 1537 this.fsLock.longReadLock().lock(); 1538 this.fsLock.writeLock().lock(); 1539 } 1540 @Override 1541 public void writeLockInterruptibly() throws InterruptedException { 1542 this.fsLock.longReadLock().lockInterruptibly(); 1543 try { 1544 this.fsLock.writeLock().lockInterruptibly(); 1545 } catch (InterruptedException ie) { 1546 // In the event we're interrupted while getting the normal FSNS write 1547 // lock, release the long read lock. 1548 this.fsLock.longReadLock().unlock(); 1549 throw ie; 1550 } 1551 } 1552 @Override 1553 public void writeUnlock() { 1554 this.fsLock.writeLock().unlock(); 1555 this.fsLock.longReadLock().unlock(); 1556 } 1557 @Override 1558 public boolean hasWriteLock() { 1559 return this.fsLock.isWriteLockedByCurrentThread(); 1560 } 1561 @Override 1562 public boolean hasReadLock() { 1563 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock(); 1564 } 1565 1566 public int getReadHoldCount() { 1567 return this.fsLock.getReadHoldCount(); 1568 } 1569 1570 public int getWriteHoldCount() { 1571 return this.fsLock.getWriteHoldCount(); 1572 } 1573 1574 NamespaceInfo getNamespaceInfo() { 1575 readLock(); 1576 try { 1577 return unprotectedGetNamespaceInfo(); 1578 } finally { 1579 readUnlock(); 1580 } 1581 } 1582 1583 /** 1584 * Version of @see #getNamespaceInfo() that is not protected by a lock. 1585 */ 1586 NamespaceInfo unprotectedGetNamespaceInfo() { 1587 return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(), 1588 getClusterId(), getBlockPoolId(), 1589 getFSImage().getStorage().getCTime()); 1590 } 1591 1592 /** 1593 * Close down this file system manager. 1594 * Causes heartbeat and lease daemons to stop; waits briefly for 1595 * them to finish, but a short timeout returns control back to caller. 1596 */ 1597 void close() { 1598 fsRunning = false; 1599 try { 1600 stopCommonServices(); 1601 if (smmthread != null) smmthread.interrupt(); 1602 } finally { 1603 // using finally to ensure we also wait for lease daemon 1604 try { 1605 stopActiveServices(); 1606 stopStandbyServices(); 1607 } catch (IOException ie) { 1608 } finally { 1609 IOUtils.cleanup(LOG, dir); 1610 IOUtils.cleanup(LOG, fsImage); 1611 } 1612 } 1613 } 1614 1615 @Override 1616 public boolean isRunning() { 1617 return fsRunning; 1618 } 1619 1620 @Override 1621 public boolean isInStandbyState() { 1622 if (haContext == null || haContext.getState() == null) { 1623 // We're still starting up. In this case, if HA is 1624 // on for the cluster, we always start in standby. Otherwise 1625 // start in active. 1626 return haEnabled; 1627 } 1628 1629 return HAServiceState.STANDBY == haContext.getState().getServiceState(); 1630 } 1631 1632 /** 1633 * Dump all metadata into specified file 1634 */ 1635 void metaSave(String filename) throws IOException { 1636 checkSuperuserPrivilege(); 1637 checkOperation(OperationCategory.UNCHECKED); 1638 writeLock(); 1639 try { 1640 checkOperation(OperationCategory.UNCHECKED); 1641 File file = new File(System.getProperty("hadoop.log.dir"), filename); 1642 PrintWriter out = new PrintWriter(new BufferedWriter( 1643 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8))); 1644 metaSave(out); 1645 out.flush(); 1646 out.close(); 1647 } finally { 1648 writeUnlock(); 1649 } 1650 } 1651 1652 private void metaSave(PrintWriter out) { 1653 assert hasWriteLock(); 1654 long totalInodes = this.dir.totalInodes(); 1655 long totalBlocks = this.getBlocksTotal(); 1656 out.println(totalInodes + " files and directories, " + totalBlocks 1657 + " blocks = " + (totalInodes + totalBlocks) + " total"); 1658 1659 blockManager.metaSave(out); 1660 } 1661 1662 private String metaSaveAsString() { 1663 StringWriter sw = new StringWriter(); 1664 PrintWriter pw = new PrintWriter(sw); 1665 metaSave(pw); 1666 pw.flush(); 1667 return sw.toString(); 1668 } 1669 1670 1671 long getDefaultBlockSize() { 1672 return serverDefaults.getBlockSize(); 1673 } 1674 1675 FsServerDefaults getServerDefaults() throws StandbyException { 1676 checkOperation(OperationCategory.READ); 1677 return serverDefaults; 1678 } 1679 1680 long getAccessTimePrecision() { 1681 return accessTimePrecision; 1682 } 1683 1684 private boolean isAccessTimeSupported() { 1685 return accessTimePrecision > 0; 1686 } 1687 1688 ///////////////////////////////////////////////////////// 1689 // 1690 // These methods are called by HadoopFS clients 1691 // 1692 ///////////////////////////////////////////////////////// 1693 /** 1694 * Set permissions for an existing file. 1695 * @throws IOException 1696 */ 1697 void setPermission(String src, FsPermission permission) 1698 throws AccessControlException, FileNotFoundException, SafeModeException, 1699 UnresolvedLinkException, IOException { 1700 try { 1701 setPermissionInt(src, permission); 1702 } catch (AccessControlException e) { 1703 logAuditEvent(false, "setPermission", src); 1704 throw e; 1705 } 1706 } 1707 1708 private void setPermissionInt(final String srcArg, FsPermission permission) 1709 throws AccessControlException, FileNotFoundException, SafeModeException, 1710 UnresolvedLinkException, IOException { 1711 String src = srcArg; 1712 HdfsFileStatus resultingStat = null; 1713 FSPermissionChecker pc = getPermissionChecker(); 1714 checkOperation(OperationCategory.WRITE); 1715 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1716 writeLock(); 1717 try { 1718 checkOperation(OperationCategory.WRITE); 1719 checkNameNodeSafeMode("Cannot set permission for " + src); 1720 src = resolvePath(src, pathComponents); 1721 checkOwner(pc, src); 1722 dir.setPermission(src, permission); 1723 getEditLog().logSetPermissions(src, permission); 1724 resultingStat = getAuditFileInfo(src, false); 1725 } finally { 1726 writeUnlock(); 1727 } 1728 getEditLog().logSync(); 1729 logAuditEvent(true, "setPermission", srcArg, null, resultingStat); 1730 } 1731 1732 /** 1733 * Set owner for an existing file. 1734 * @throws IOException 1735 */ 1736 void setOwner(String src, String username, String group) 1737 throws AccessControlException, FileNotFoundException, SafeModeException, 1738 UnresolvedLinkException, IOException { 1739 try { 1740 setOwnerInt(src, username, group); 1741 } catch (AccessControlException e) { 1742 logAuditEvent(false, "setOwner", src); 1743 throw e; 1744 } 1745 } 1746 1747 private void setOwnerInt(final String srcArg, String username, String group) 1748 throws AccessControlException, FileNotFoundException, SafeModeException, 1749 UnresolvedLinkException, IOException { 1750 String src = srcArg; 1751 HdfsFileStatus resultingStat = null; 1752 FSPermissionChecker pc = getPermissionChecker(); 1753 checkOperation(OperationCategory.WRITE); 1754 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1755 writeLock(); 1756 try { 1757 checkOperation(OperationCategory.WRITE); 1758 checkNameNodeSafeMode("Cannot set owner for " + src); 1759 src = resolvePath(src, pathComponents); 1760 checkOwner(pc, src); 1761 if (!pc.isSuperUser()) { 1762 if (username != null && !pc.getUser().equals(username)) { 1763 throw new AccessControlException("Non-super user cannot change owner"); 1764 } 1765 if (group != null && !pc.containsGroup(group)) { 1766 throw new AccessControlException("User does not belong to " + group); 1767 } 1768 } 1769 dir.setOwner(src, username, group); 1770 getEditLog().logSetOwner(src, username, group); 1771 resultingStat = getAuditFileInfo(src, false); 1772 } finally { 1773 writeUnlock(); 1774 } 1775 getEditLog().logSync(); 1776 logAuditEvent(true, "setOwner", srcArg, null, resultingStat); 1777 } 1778 1779 /** 1780 * Get block locations within the specified range. 1781 * @see ClientProtocol#getBlockLocations(String, long, long) 1782 */ 1783 LocatedBlocks getBlockLocations(String clientMachine, String src, 1784 long offset, long length) throws AccessControlException, 1785 FileNotFoundException, UnresolvedLinkException, IOException { 1786 LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true, 1787 true); 1788 if (blocks != null) { 1789 blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine, 1790 blocks.getLocatedBlocks()); 1791 1792 // lastBlock is not part of getLocatedBlocks(), might need to sort it too 1793 LocatedBlock lastBlock = blocks.getLastLocatedBlock(); 1794 if (lastBlock != null) { 1795 ArrayList<LocatedBlock> lastBlockList = 1796 Lists.newArrayListWithCapacity(1); 1797 lastBlockList.add(lastBlock); 1798 blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine, 1799 lastBlockList); 1800 } 1801 } 1802 return blocks; 1803 } 1804 1805 /** 1806 * Get block locations within the specified range. 1807 * @see ClientProtocol#getBlockLocations(String, long, long) 1808 * @throws FileNotFoundException, UnresolvedLinkException, IOException 1809 */ 1810 LocatedBlocks getBlockLocations(String src, long offset, long length, 1811 boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode) 1812 throws FileNotFoundException, UnresolvedLinkException, IOException { 1813 try { 1814 return getBlockLocationsInt(src, offset, length, doAccessTime, 1815 needBlockToken, checkSafeMode); 1816 } catch (AccessControlException e) { 1817 logAuditEvent(false, "open", src); 1818 throw e; 1819 } 1820 } 1821 1822 private LocatedBlocks getBlockLocationsInt(String src, long offset, 1823 long length, boolean doAccessTime, boolean needBlockToken, 1824 boolean checkSafeMode) 1825 throws FileNotFoundException, UnresolvedLinkException, IOException { 1826 if (offset < 0) { 1827 throw new HadoopIllegalArgumentException( 1828 "Negative offset is not supported. File: " + src); 1829 } 1830 if (length < 0) { 1831 throw new HadoopIllegalArgumentException( 1832 "Negative length is not supported. File: " + src); 1833 } 1834 final LocatedBlocks ret = getBlockLocationsUpdateTimes(src, 1835 offset, length, doAccessTime, needBlockToken); 1836 logAuditEvent(true, "open", src); 1837 if (checkSafeMode && isInSafeMode()) { 1838 for (LocatedBlock b : ret.getLocatedBlocks()) { 1839 // if safemode & no block locations yet then throw safemodeException 1840 if ((b.getLocations() == null) || (b.getLocations().length == 0)) { 1841 SafeModeException se = new SafeModeException( 1842 "Zero blocklocations for " + src, safeMode); 1843 if (haEnabled && haContext != null && 1844 haContext.getState().getServiceState() == HAServiceState.ACTIVE) { 1845 throw new RetriableException(se); 1846 } else { 1847 throw se; 1848 } 1849 } 1850 } 1851 } 1852 return ret; 1853 } 1854 1855 /* 1856 * Get block locations within the specified range, updating the 1857 * access times if necessary. 1858 */ 1859 private LocatedBlocks getBlockLocationsUpdateTimes(final String srcArg, 1860 long offset, long length, boolean doAccessTime, boolean needBlockToken) 1861 throws FileNotFoundException, 1862 UnresolvedLinkException, IOException { 1863 String src = srcArg; 1864 FSPermissionChecker pc = getPermissionChecker(); 1865 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 1866 for (int attempt = 0; attempt < 2; attempt++) { 1867 boolean isReadOp = (attempt == 0); 1868 if (isReadOp) { // first attempt is with readlock 1869 checkOperation(OperationCategory.READ); 1870 readLock(); 1871 } else { // second attempt is with write lock 1872 checkOperation(OperationCategory.WRITE); 1873 writeLock(); // writelock is needed to set accesstime 1874 } 1875 try { 1876 src = resolvePath(src, pathComponents); 1877 if (isReadOp) { 1878 checkOperation(OperationCategory.READ); 1879 } else { 1880 checkOperation(OperationCategory.WRITE); 1881 } 1882 if (isPermissionEnabled) { 1883 checkPathAccess(pc, src, FsAction.READ); 1884 } 1885 1886 // if the namenode is in safemode, then do not update access time 1887 if (isInSafeMode()) { 1888 doAccessTime = false; 1889 } 1890 1891 final INodesInPath iip = dir.getINodesInPath(src, true); 1892 final INode[] inodes = iip.getINodes(); 1893 final INodeFile inode = INodeFile.valueOf( 1894 inodes[inodes.length - 1], src); 1895 if (isPermissionEnabled) { 1896 checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId()); 1897 } 1898 if (!iip.isSnapshot() //snapshots are readonly, so don't update atime. 1899 && doAccessTime && isAccessTimeSupported()) { 1900 final long now = now(); 1901 if (now > inode.getAccessTime() + getAccessTimePrecision()) { 1902 // if we have to set access time but we only have the readlock, then 1903 // restart this entire operation with the writeLock. 1904 if (isReadOp) { 1905 continue; 1906 } 1907 boolean changed = dir.setTimes(inode, -1, now, false, 1908 iip.getLatestSnapshotId()); 1909 if (changed) { 1910 getEditLog().logTimes(src, -1, now); 1911 } 1912 } 1913 } 1914 final long fileSize = iip.isSnapshot() ? 1915 inode.computeFileSize(iip.getPathSnapshotId()) 1916 : inode.computeFileSizeNotIncludingLastUcBlock(); 1917 boolean isUc = inode.isUnderConstruction(); 1918 if (iip.isSnapshot()) { 1919 // if src indicates a snapshot file, we need to make sure the returned 1920 // blocks do not exceed the size of the snapshot file. 1921 length = Math.min(length, fileSize - offset); 1922 isUc = false; 1923 } 1924 1925 final FileEncryptionInfo feInfo = 1926 FSDirectory.isReservedRawName(srcArg) ? 1927 null : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(), 1928 iip); 1929 1930 final LocatedBlocks blocks = 1931 blockManager.createLocatedBlocks(inode.getBlocks(), fileSize, 1932 isUc, offset, length, needBlockToken, iip.isSnapshot(), feInfo); 1933 // Set caching information for the located blocks. 1934 for (LocatedBlock lb: blocks.getLocatedBlocks()) { 1935 cacheManager.setCachedLocations(lb); 1936 } 1937 return blocks; 1938 } finally { 1939 if (isReadOp) { 1940 readUnlock(); 1941 } else { 1942 writeUnlock(); 1943 } 1944 } 1945 } 1946 return null; // can never reach here 1947 } 1948 1949 /** 1950 * Moves all the blocks from {@code srcs} and appends them to {@code target} 1951 * To avoid rollbacks we will verify validity of ALL of the args 1952 * before we start actual move. 1953 * 1954 * This does not support ".inodes" relative path 1955 * @param target target to concat into 1956 * @param srcs file that will be concatenated 1957 * @throws IOException on error 1958 */ 1959 void concat(String target, String [] srcs) 1960 throws IOException, UnresolvedLinkException { 1961 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 1962 if (cacheEntry != null && cacheEntry.isSuccess()) { 1963 return; // Return previous response 1964 } 1965 1966 // Either there is no previous request in progress or it has failed 1967 if(FSNamesystem.LOG.isDebugEnabled()) { 1968 FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) + 1969 " to " + target); 1970 } 1971 1972 boolean success = false; 1973 try { 1974 concatInt(target, srcs, cacheEntry != null); 1975 success = true; 1976 } catch (AccessControlException e) { 1977 logAuditEvent(false, "concat", Arrays.toString(srcs), target, null); 1978 throw e; 1979 } finally { 1980 RetryCache.setState(cacheEntry, success); 1981 } 1982 } 1983 1984 private void concatInt(String target, String [] srcs, 1985 boolean logRetryCache) throws IOException, UnresolvedLinkException { 1986 // verify args 1987 if(target.isEmpty()) { 1988 throw new IllegalArgumentException("Target file name is empty"); 1989 } 1990 if(srcs == null || srcs.length == 0) { 1991 throw new IllegalArgumentException("No sources given"); 1992 } 1993 1994 // We require all files be in the same directory 1995 String trgParent = 1996 target.substring(0, target.lastIndexOf(Path.SEPARATOR_CHAR)); 1997 for (String s : srcs) { 1998 String srcParent = s.substring(0, s.lastIndexOf(Path.SEPARATOR_CHAR)); 1999 if (!srcParent.equals(trgParent)) { 2000 throw new IllegalArgumentException( 2001 "Sources and target are not in the same directory"); 2002 } 2003 } 2004 2005 HdfsFileStatus resultingStat = null; 2006 FSPermissionChecker pc = getPermissionChecker(); 2007 waitForLoadingFSImage(); 2008 writeLock(); 2009 try { 2010 checkOperation(OperationCategory.WRITE); 2011 checkNameNodeSafeMode("Cannot concat " + target); 2012 concatInternal(pc, target, srcs, logRetryCache); 2013 resultingStat = getAuditFileInfo(target, false); 2014 } finally { 2015 writeUnlock(); 2016 } 2017 getEditLog().logSync(); 2018 logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat); 2019 } 2020 2021 /** See {@link #concat(String, String[])} */ 2022 private void concatInternal(FSPermissionChecker pc, String target, 2023 String[] srcs, boolean logRetryCache) throws IOException, 2024 UnresolvedLinkException { 2025 assert hasWriteLock(); 2026 2027 // write permission for the target 2028 if (isPermissionEnabled) { 2029 checkPathAccess(pc, target, FsAction.WRITE); 2030 2031 // and srcs 2032 for(String aSrc: srcs) { 2033 checkPathAccess(pc, aSrc, FsAction.READ); // read the file 2034 checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 2035 } 2036 } 2037 2038 // to make sure no two files are the same 2039 Set<INode> si = new HashSet<INode>(); 2040 2041 // we put the following prerequisite for the operation 2042 // replication and blocks sizes should be the same for ALL the blocks 2043 2044 // check the target 2045 final INodesInPath trgIip = dir.getINodesInPath4Write(target); 2046 if (dir.getEZForPath(trgIip) != null) { 2047 throw new HadoopIllegalArgumentException( 2048 "concat can not be called for files in an encryption zone."); 2049 } 2050 final INodeFile trgInode = INodeFile.valueOf(trgIip.getLastINode(), 2051 target); 2052 if(trgInode.isUnderConstruction()) { 2053 throw new HadoopIllegalArgumentException("concat: target file " 2054 + target + " is under construction"); 2055 } 2056 // per design target shouldn't be empty and all the blocks same size 2057 if(trgInode.numBlocks() == 0) { 2058 throw new HadoopIllegalArgumentException("concat: target file " 2059 + target + " is empty"); 2060 } 2061 if (trgInode.isWithSnapshot()) { 2062 throw new HadoopIllegalArgumentException("concat: target file " 2063 + target + " is in a snapshot"); 2064 } 2065 2066 long blockSize = trgInode.getPreferredBlockSize(); 2067 2068 // check the end block to be full 2069 final BlockInfo last = trgInode.getLastBlock(); 2070 if(blockSize != last.getNumBytes()) { 2071 throw new HadoopIllegalArgumentException("The last block in " + target 2072 + " is not full; last block size = " + last.getNumBytes() 2073 + " but file block size = " + blockSize); 2074 } 2075 2076 si.add(trgInode); 2077 final short repl = trgInode.getFileReplication(); 2078 2079 // now check the srcs 2080 boolean endSrc = false; // final src file doesn't have to have full end block 2081 for(int i=0; i<srcs.length; i++) { 2082 String src = srcs[i]; 2083 if(i==srcs.length-1) 2084 endSrc=true; 2085 2086 final INodeFile srcInode = INodeFile.valueOf(dir.getINode4Write(src), src); 2087 if(src.isEmpty() 2088 || srcInode.isUnderConstruction() 2089 || srcInode.numBlocks() == 0) { 2090 throw new HadoopIllegalArgumentException("concat: source file " + src 2091 + " is invalid or empty or underConstruction"); 2092 } 2093 2094 // check replication and blocks size 2095 if(repl != srcInode.getBlockReplication()) { 2096 throw new HadoopIllegalArgumentException("concat: the source file " 2097 + src + " and the target file " + target 2098 + " should have the same replication: source replication is " 2099 + srcInode.getBlockReplication() 2100 + " but target replication is " + repl); 2101 } 2102 2103 //boolean endBlock=false; 2104 // verify that all the blocks are of the same length as target 2105 // should be enough to check the end blocks 2106 final BlockInfo[] srcBlocks = srcInode.getBlocks(); 2107 int idx = srcBlocks.length-1; 2108 if(endSrc) 2109 idx = srcBlocks.length-2; // end block of endSrc is OK not to be full 2110 if(idx >= 0 && srcBlocks[idx].getNumBytes() != blockSize) { 2111 throw new HadoopIllegalArgumentException("concat: the source file " 2112 + src + " and the target file " + target 2113 + " should have the same blocks sizes: target block size is " 2114 + blockSize + " but the size of source block " + idx + " is " 2115 + srcBlocks[idx].getNumBytes()); 2116 } 2117 2118 si.add(srcInode); 2119 } 2120 2121 // make sure no two files are the same 2122 if(si.size() < srcs.length+1) { // trg + srcs 2123 // it means at least two files are the same 2124 throw new HadoopIllegalArgumentException( 2125 "concat: at least two of the source files are the same"); 2126 } 2127 2128 if(NameNode.stateChangeLog.isDebugEnabled()) { 2129 NameNode.stateChangeLog.debug("DIR* NameSystem.concat: " + 2130 Arrays.toString(srcs) + " to " + target); 2131 } 2132 2133 long timestamp = now(); 2134 dir.concat(target, srcs, timestamp); 2135 getEditLog().logConcat(target, srcs, timestamp, logRetryCache); 2136 } 2137 2138 /** 2139 * stores the modification and access time for this inode. 2140 * The access time is precise up to an hour. The transaction, if needed, is 2141 * written to the edits log but is not flushed. 2142 */ 2143 void setTimes(String src, long mtime, long atime) 2144 throws IOException, UnresolvedLinkException { 2145 if (!isAccessTimeSupported() && atime != -1) { 2146 throw new IOException("Access time for hdfs is not configured. " + 2147 " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter."); 2148 } 2149 try { 2150 setTimesInt(src, mtime, atime); 2151 } catch (AccessControlException e) { 2152 logAuditEvent(false, "setTimes", src); 2153 throw e; 2154 } 2155 } 2156 2157 private void setTimesInt(final String srcArg, long mtime, long atime) 2158 throws IOException, UnresolvedLinkException { 2159 String src = srcArg; 2160 HdfsFileStatus resultingStat = null; 2161 FSPermissionChecker pc = getPermissionChecker(); 2162 checkOperation(OperationCategory.WRITE); 2163 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2164 writeLock(); 2165 try { 2166 checkOperation(OperationCategory.WRITE); 2167 checkNameNodeSafeMode("Cannot set times " + src); 2168 src = resolvePath(src, pathComponents); 2169 2170 // Write access is required to set access and modification times 2171 if (isPermissionEnabled) { 2172 checkPathAccess(pc, src, FsAction.WRITE); 2173 } 2174 final INodesInPath iip = dir.getINodesInPath4Write(src); 2175 final INode inode = iip.getLastINode(); 2176 if (inode != null) { 2177 boolean changed = dir.setTimes(inode, mtime, atime, true, 2178 iip.getLatestSnapshotId()); 2179 if (changed) { 2180 getEditLog().logTimes(src, mtime, atime); 2181 } 2182 resultingStat = getAuditFileInfo(src, false); 2183 } else { 2184 throw new FileNotFoundException("File/Directory " + src + " does not exist."); 2185 } 2186 } finally { 2187 writeUnlock(); 2188 } 2189 logAuditEvent(true, "setTimes", srcArg, null, resultingStat); 2190 } 2191 2192 /** 2193 * Create a symbolic link. 2194 */ 2195 @SuppressWarnings("deprecation") 2196 void createSymlink(String target, String link, 2197 PermissionStatus dirPerms, boolean createParent) 2198 throws IOException, UnresolvedLinkException { 2199 if (!FileSystem.areSymlinksEnabled()) { 2200 throw new UnsupportedOperationException("Symlinks not supported"); 2201 } 2202 if (!DFSUtil.isValidName(link)) { 2203 throw new InvalidPathException("Invalid link name: " + link); 2204 } 2205 if (FSDirectory.isReservedName(target)) { 2206 throw new InvalidPathException("Invalid target name: " + target); 2207 } 2208 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 2209 if (cacheEntry != null && cacheEntry.isSuccess()) { 2210 return; // Return previous response 2211 } 2212 boolean success = false; 2213 try { 2214 createSymlinkInt(target, link, dirPerms, createParent, cacheEntry != null); 2215 success = true; 2216 } catch (AccessControlException e) { 2217 logAuditEvent(false, "createSymlink", link, target, null); 2218 throw e; 2219 } finally { 2220 RetryCache.setState(cacheEntry, success); 2221 } 2222 } 2223 2224 private void createSymlinkInt(String target, final String linkArg, 2225 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 2226 throws IOException, UnresolvedLinkException { 2227 String link = linkArg; 2228 if (NameNode.stateChangeLog.isDebugEnabled()) { 2229 NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" 2230 + target + " link=" + link); 2231 } 2232 HdfsFileStatus resultingStat = null; 2233 FSPermissionChecker pc = getPermissionChecker(); 2234 checkOperation(OperationCategory.WRITE); 2235 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(link); 2236 writeLock(); 2237 try { 2238 checkOperation(OperationCategory.WRITE); 2239 checkNameNodeSafeMode("Cannot create symlink " + link); 2240 link = resolvePath(link, pathComponents); 2241 if (!createParent) { 2242 verifyParentDir(link); 2243 } 2244 if (!dir.isValidToCreate(link)) { 2245 throw new IOException("failed to create link " + link 2246 +" either because the filename is invalid or the file exists"); 2247 } 2248 if (isPermissionEnabled) { 2249 checkAncestorAccess(pc, link, FsAction.WRITE); 2250 } 2251 // validate that we have enough inodes. 2252 checkFsObjectLimit(); 2253 2254 // add symbolic link to namespace 2255 addSymlink(link, target, dirPerms, createParent, logRetryCache); 2256 resultingStat = getAuditFileInfo(link, false); 2257 } finally { 2258 writeUnlock(); 2259 } 2260 getEditLog().logSync(); 2261 logAuditEvent(true, "createSymlink", linkArg, target, resultingStat); 2262 } 2263 2264 /** 2265 * Set replication for an existing file. 2266 * 2267 * The NameNode sets new replication and schedules either replication of 2268 * under-replicated data blocks or removal of the excessive block copies 2269 * if the blocks are over-replicated. 2270 * 2271 * @see ClientProtocol#setReplication(String, short) 2272 * @param src file name 2273 * @param replication new replication 2274 * @return true if successful; 2275 * false if file does not exist or is a directory 2276 */ 2277 boolean setReplication(final String src, final short replication) 2278 throws IOException { 2279 try { 2280 return setReplicationInt(src, replication); 2281 } catch (AccessControlException e) { 2282 logAuditEvent(false, "setReplication", src); 2283 throw e; 2284 } 2285 } 2286 2287 private boolean setReplicationInt(final String srcArg, 2288 final short replication) throws IOException { 2289 String src = srcArg; 2290 blockManager.verifyReplication(src, replication, null); 2291 final boolean isFile; 2292 FSPermissionChecker pc = getPermissionChecker(); 2293 checkOperation(OperationCategory.WRITE); 2294 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2295 waitForLoadingFSImage(); 2296 writeLock(); 2297 try { 2298 checkOperation(OperationCategory.WRITE); 2299 checkNameNodeSafeMode("Cannot set replication for " + src); 2300 src = resolvePath(src, pathComponents); 2301 if (isPermissionEnabled) { 2302 checkPathAccess(pc, src, FsAction.WRITE); 2303 } 2304 2305 final short[] blockRepls = new short[2]; // 0: old, 1: new 2306 final Block[] blocks = dir.setReplication(src, replication, blockRepls); 2307 isFile = blocks != null; 2308 if (isFile) { 2309 getEditLog().logSetReplication(src, replication); 2310 blockManager.setReplication(blockRepls[0], blockRepls[1], src, blocks); 2311 } 2312 } finally { 2313 writeUnlock(); 2314 } 2315 2316 getEditLog().logSync(); 2317 if (isFile) { 2318 logAuditEvent(true, "setReplication", srcArg); 2319 } 2320 return isFile; 2321 } 2322 2323 /** 2324 * Set the storage policy for a file or a directory. 2325 * 2326 * @param src file/directory path 2327 * @param policyName storage policy name 2328 */ 2329 void setStoragePolicy(String src, final String policyName) 2330 throws IOException { 2331 try { 2332 setStoragePolicyInt(src, policyName); 2333 } catch (AccessControlException e) { 2334 logAuditEvent(false, "setStoragePolicy", src); 2335 throw e; 2336 } 2337 } 2338 2339 private void setStoragePolicyInt(String src, final String policyName) 2340 throws IOException, UnresolvedLinkException, AccessControlException { 2341 2342 if (!isStoragePolicyEnabled) { 2343 throw new IOException("Failed to set storage policy since " 2344 + DFS_STORAGE_POLICY_ENABLED_KEY + " is set to false."); 2345 } 2346 FSPermissionChecker pc = null; 2347 if (isPermissionEnabled) { 2348 pc = getPermissionChecker(); 2349 } 2350 2351 checkOperation(OperationCategory.WRITE); 2352 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2353 waitForLoadingFSImage(); 2354 HdfsFileStatus fileStat; 2355 writeLock(); 2356 try { 2357 checkOperation(OperationCategory.WRITE); 2358 checkNameNodeSafeMode("Cannot set storage policy for " + src); 2359 2360 if (pc != null) { 2361 checkPermission(pc, src, false, null, null, FsAction.WRITE, null, 2362 false, true); 2363 } 2364 2365 src = FSDirectory.resolvePath(src, pathComponents, dir); 2366 2367 // get the corresponding policy and make sure the policy name is valid 2368 BlockStoragePolicy policy = blockManager.getStoragePolicy(policyName); 2369 if (policy == null) { 2370 throw new HadoopIllegalArgumentException( 2371 "Cannot find a block policy with the name " + policyName); 2372 } 2373 dir.setStoragePolicy(src, policy.getId()); 2374 getEditLog().logSetStoragePolicy(src, policy.getId()); 2375 fileStat = getAuditFileInfo(src, false); 2376 } finally { 2377 writeUnlock(); 2378 } 2379 2380 getEditLog().logSync(); 2381 logAuditEvent(true, "setStoragePolicy", src, null, fileStat); 2382 } 2383 2384 /** 2385 * @return All the existing block storage policies 2386 */ 2387 BlockStoragePolicy[] getStoragePolicies() throws IOException { 2388 checkOperation(OperationCategory.READ); 2389 waitForLoadingFSImage(); 2390 readLock(); 2391 try { 2392 checkOperation(OperationCategory.READ); 2393 return blockManager.getStoragePolicies(); 2394 } finally { 2395 readUnlock(); 2396 } 2397 } 2398 2399 long getPreferredBlockSize(String filename) 2400 throws IOException, UnresolvedLinkException { 2401 FSPermissionChecker pc = getPermissionChecker(); 2402 checkOperation(OperationCategory.READ); 2403 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(filename); 2404 readLock(); 2405 try { 2406 checkOperation(OperationCategory.READ); 2407 filename = resolvePath(filename, pathComponents); 2408 if (isPermissionEnabled) { 2409 checkTraverse(pc, filename); 2410 } 2411 return dir.getPreferredBlockSize(filename); 2412 } finally { 2413 readUnlock(); 2414 } 2415 } 2416 2417 /** 2418 * Verify that parent directory of src exists. 2419 */ 2420 private void verifyParentDir(String src) throws FileNotFoundException, 2421 ParentNotDirectoryException, UnresolvedLinkException { 2422 assert hasReadLock(); 2423 Path parent = new Path(src).getParent(); 2424 if (parent != null) { 2425 final INode parentNode = dir.getINode(parent.toString()); 2426 if (parentNode == null) { 2427 throw new FileNotFoundException("Parent directory doesn't exist: " 2428 + parent); 2429 } else if (!parentNode.isDirectory() && !parentNode.isSymlink()) { 2430 throw new ParentNotDirectoryException("Parent path is not a directory: " 2431 + parent); 2432 } 2433 } 2434 } 2435 2436 /** 2437 * If the file is within an encryption zone, select the appropriate 2438 * CryptoProtocolVersion from the list provided by the client. Since the 2439 * client may be newer, we need to handle unknown versions. 2440 * 2441 * @param zone EncryptionZone of the file 2442 * @param supportedVersions List of supported protocol versions 2443 * @return chosen protocol version 2444 * @throws IOException 2445 */ 2446 private CryptoProtocolVersion chooseProtocolVersion(EncryptionZone zone, 2447 CryptoProtocolVersion[] supportedVersions) 2448 throws UnknownCryptoProtocolVersionException, UnresolvedLinkException, 2449 SnapshotAccessControlException { 2450 Preconditions.checkNotNull(zone); 2451 Preconditions.checkNotNull(supportedVersions); 2452 // Right now, we only support a single protocol version, 2453 // so simply look for it in the list of provided options 2454 final CryptoProtocolVersion required = zone.getVersion(); 2455 2456 for (CryptoProtocolVersion c : supportedVersions) { 2457 if (c.equals(CryptoProtocolVersion.UNKNOWN)) { 2458 if (LOG.isDebugEnabled()) { 2459 LOG.debug("Ignoring unknown CryptoProtocolVersion provided by " + 2460 "client: " + c.getUnknownValue()); 2461 } 2462 continue; 2463 } 2464 if (c.equals(required)) { 2465 return c; 2466 } 2467 } 2468 throw new UnknownCryptoProtocolVersionException( 2469 "No crypto protocol versions provided by the client are supported." 2470 + " Client provided: " + Arrays.toString(supportedVersions) 2471 + " NameNode supports: " + Arrays.toString(CryptoProtocolVersion 2472 .values())); 2473 } 2474 2475 /** 2476 * Invoke KeyProvider APIs to generate an encrypted data encryption key for an 2477 * encryption zone. Should not be called with any locks held. 2478 * 2479 * @param ezKeyName key name of an encryption zone 2480 * @return New EDEK, or null if ezKeyName is null 2481 * @throws IOException 2482 */ 2483 private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String 2484 ezKeyName) throws IOException { 2485 if (ezKeyName == null) { 2486 return null; 2487 } 2488 EncryptedKeyVersion edek = null; 2489 try { 2490 edek = provider.generateEncryptedKey(ezKeyName); 2491 } catch (GeneralSecurityException e) { 2492 throw new IOException(e); 2493 } 2494 Preconditions.checkNotNull(edek); 2495 return edek; 2496 } 2497 2498 /** 2499 * Create a new file entry in the namespace. 2500 * 2501 * For description of parameters and exceptions thrown see 2502 * {@link ClientProtocol#create}, except it returns valid file status upon 2503 * success 2504 */ 2505 HdfsFileStatus startFile(String src, PermissionStatus permissions, 2506 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2507 boolean createParent, short replication, long blockSize, 2508 CryptoProtocolVersion[] supportedVersions) 2509 throws AccessControlException, SafeModeException, 2510 FileAlreadyExistsException, UnresolvedLinkException, 2511 FileNotFoundException, ParentNotDirectoryException, IOException { 2512 HdfsFileStatus status = null; 2513 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 2514 null); 2515 if (cacheEntry != null && cacheEntry.isSuccess()) { 2516 return (HdfsFileStatus) cacheEntry.getPayload(); 2517 } 2518 2519 try { 2520 status = startFileInt(src, permissions, holder, clientMachine, flag, 2521 createParent, replication, blockSize, supportedVersions, 2522 cacheEntry != null); 2523 } catch (AccessControlException e) { 2524 logAuditEvent(false, "create", src); 2525 throw e; 2526 } finally { 2527 RetryCache.setState(cacheEntry, status != null, status); 2528 } 2529 return status; 2530 } 2531 2532 private HdfsFileStatus startFileInt(final String srcArg, 2533 PermissionStatus permissions, String holder, String clientMachine, 2534 EnumSet<CreateFlag> flag, boolean createParent, short replication, 2535 long blockSize, CryptoProtocolVersion[] supportedVersions, 2536 boolean logRetryCache) 2537 throws AccessControlException, SafeModeException, 2538 FileAlreadyExistsException, UnresolvedLinkException, 2539 FileNotFoundException, ParentNotDirectoryException, IOException { 2540 String src = srcArg; 2541 if (NameNode.stateChangeLog.isDebugEnabled()) { 2542 StringBuilder builder = new StringBuilder(); 2543 builder.append("DIR* NameSystem.startFile: src=" + src 2544 + ", holder=" + holder 2545 + ", clientMachine=" + clientMachine 2546 + ", createParent=" + createParent 2547 + ", replication=" + replication 2548 + ", createFlag=" + flag.toString() 2549 + ", blockSize=" + blockSize); 2550 builder.append(", supportedVersions="); 2551 if (supportedVersions != null) { 2552 builder.append(Arrays.toString(supportedVersions)); 2553 } else { 2554 builder.append("null"); 2555 } 2556 NameNode.stateChangeLog.debug(builder.toString()); 2557 } 2558 if (!DFSUtil.isValidName(src)) { 2559 throw new InvalidPathException(src); 2560 } 2561 blockManager.verifyReplication(src, replication, clientMachine); 2562 2563 boolean skipSync = false; 2564 HdfsFileStatus stat = null; 2565 FSPermissionChecker pc = getPermissionChecker(); 2566 if (blockSize < minBlockSize) { 2567 throw new IOException("Specified block size is less than configured" + 2568 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY 2569 + "): " + blockSize + " < " + minBlockSize); 2570 } 2571 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2572 boolean create = flag.contains(CreateFlag.CREATE); 2573 boolean overwrite = flag.contains(CreateFlag.OVERWRITE); 2574 boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST); 2575 2576 waitForLoadingFSImage(); 2577 2578 /** 2579 * If the file is in an encryption zone, we optimistically create an 2580 * EDEK for the file by calling out to the configured KeyProvider. 2581 * Since this typically involves doing an RPC, we take the readLock 2582 * initially, then drop it to do the RPC. 2583 * 2584 * Since the path can flip-flop between being in an encryption zone and not 2585 * in the meantime, we need to recheck the preconditions when we retake the 2586 * lock to do the create. If the preconditions are not met, we throw a 2587 * special RetryStartFileException to ask the DFSClient to try the create 2588 * again later. 2589 */ 2590 CryptoProtocolVersion protocolVersion = null; 2591 CipherSuite suite = null; 2592 String ezKeyName = null; 2593 readLock(); 2594 try { 2595 src = resolvePath(src, pathComponents); 2596 INodesInPath iip = dir.getINodesInPath4Write(src); 2597 // Nothing to do if the path is not within an EZ 2598 if (dir.isInAnEZ(iip)) { 2599 EncryptionZone zone = dir.getEZForPath(iip); 2600 protocolVersion = chooseProtocolVersion(zone, supportedVersions); 2601 suite = zone.getSuite(); 2602 ezKeyName = dir.getKeyName(iip); 2603 2604 Preconditions.checkNotNull(protocolVersion); 2605 Preconditions.checkNotNull(suite); 2606 Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN), 2607 "Chose an UNKNOWN CipherSuite!"); 2608 Preconditions.checkNotNull(ezKeyName); 2609 } 2610 } finally { 2611 readUnlock(); 2612 } 2613 2614 Preconditions.checkState( 2615 (suite == null && ezKeyName == null) || 2616 (suite != null && ezKeyName != null), 2617 "Both suite and ezKeyName should both be null or not null"); 2618 2619 // Generate EDEK if necessary while not holding the lock 2620 EncryptedKeyVersion edek = 2621 generateEncryptedDataEncryptionKey(ezKeyName); 2622 EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); 2623 2624 // Proceed with the create, using the computed cipher suite and 2625 // generated EDEK 2626 BlocksMapUpdateInfo toRemoveBlocks = null; 2627 writeLock(); 2628 try { 2629 checkOperation(OperationCategory.WRITE); 2630 checkNameNodeSafeMode("Cannot create file" + src); 2631 src = resolvePath(src, pathComponents); 2632 toRemoveBlocks = startFileInternal(pc, src, permissions, holder, 2633 clientMachine, create, overwrite, createParent, replication, 2634 blockSize, isLazyPersist, suite, protocolVersion, edek, logRetryCache); 2635 stat = dir.getFileInfo(src, false, 2636 FSDirectory.isReservedRawName(srcArg), true); 2637 } catch (StandbyException se) { 2638 skipSync = true; 2639 throw se; 2640 } finally { 2641 writeUnlock(); 2642 // There might be transactions logged while trying to recover the lease. 2643 // They need to be sync'ed even when an exception was thrown. 2644 if (!skipSync) { 2645 getEditLog().logSync(); 2646 if (toRemoveBlocks != null) { 2647 removeBlocks(toRemoveBlocks); 2648 toRemoveBlocks.clear(); 2649 } 2650 } 2651 } 2652 2653 logAuditEvent(true, "create", srcArg, null, stat); 2654 return stat; 2655 } 2656 2657 /** 2658 * Create a new file or overwrite an existing file<br> 2659 * 2660 * Once the file is create the client then allocates a new block with the next 2661 * call using {@link ClientProtocol#addBlock}. 2662 * <p> 2663 * For description of parameters and exceptions thrown see 2664 * {@link ClientProtocol#create} 2665 */ 2666 private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 2667 String src, PermissionStatus permissions, String holder, 2668 String clientMachine, boolean create, boolean overwrite, 2669 boolean createParent, short replication, long blockSize, 2670 boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version, 2671 EncryptedKeyVersion edek, boolean logRetryEntry) 2672 throws FileAlreadyExistsException, AccessControlException, 2673 UnresolvedLinkException, FileNotFoundException, 2674 ParentNotDirectoryException, RetryStartFileException, IOException { 2675 assert hasWriteLock(); 2676 // Verify that the destination does not exist as a directory already. 2677 final INodesInPath iip = dir.getINodesInPath4Write(src); 2678 final INode inode = iip.getLastINode(); 2679 if (inode != null && inode.isDirectory()) { 2680 throw new FileAlreadyExistsException(src + 2681 " already exists as a directory"); 2682 } 2683 2684 FileEncryptionInfo feInfo = null; 2685 if (dir.isInAnEZ(iip)) { 2686 // The path is now within an EZ, but we're missing encryption parameters 2687 if (suite == null || edek == null) { 2688 throw new RetryStartFileException(); 2689 } 2690 // Path is within an EZ and we have provided encryption parameters. 2691 // Make sure that the generated EDEK matches the settings of the EZ. 2692 String ezKeyName = dir.getKeyName(iip); 2693 if (!ezKeyName.equals(edek.getEncryptionKeyName())) { 2694 throw new RetryStartFileException(); 2695 } 2696 feInfo = new FileEncryptionInfo(suite, version, 2697 edek.getEncryptedKeyVersion().getMaterial(), 2698 edek.getEncryptedKeyIv(), 2699 ezKeyName, edek.getEncryptionKeyVersionName()); 2700 Preconditions.checkNotNull(feInfo); 2701 } 2702 2703 final INodeFile myFile = INodeFile.valueOf(inode, src, true); 2704 if (isPermissionEnabled) { 2705 if (overwrite && myFile != null) { 2706 checkPathAccess(pc, src, FsAction.WRITE); 2707 } 2708 /* 2709 * To overwrite existing file, need to check 'w' permission 2710 * of parent (equals to ancestor in this case) 2711 */ 2712 checkAncestorAccess(pc, src, FsAction.WRITE); 2713 } 2714 2715 if (!createParent) { 2716 verifyParentDir(src); 2717 } 2718 2719 try { 2720 BlocksMapUpdateInfo toRemoveBlocks = null; 2721 if (myFile == null) { 2722 if (!create) { 2723 throw new FileNotFoundException("Can't overwrite non-existent " + 2724 src + " for client " + clientMachine); 2725 } 2726 } else { 2727 if (overwrite) { 2728 toRemoveBlocks = new BlocksMapUpdateInfo(); 2729 List<INode> toRemoveINodes = new ChunkedArrayList<INode>(); 2730 long ret = dir.delete(src, toRemoveBlocks, toRemoveINodes, now()); 2731 if (ret >= 0) { 2732 incrDeletedFileCount(ret); 2733 removePathAndBlocks(src, null, toRemoveINodes, true); 2734 } 2735 } else { 2736 // If lease soft limit time is expired, recover the lease 2737 recoverLeaseInternal(myFile, src, holder, clientMachine, false); 2738 throw new FileAlreadyExistsException(src + " for client " + 2739 clientMachine + " already exists"); 2740 } 2741 } 2742 2743 checkFsObjectLimit(); 2744 INodeFile newNode = null; 2745 2746 // Always do an implicit mkdirs for parent directory tree. 2747 Path parent = new Path(src).getParent(); 2748 if (parent != null && mkdirsRecursively(parent.toString(), 2749 permissions, true, now())) { 2750 newNode = dir.addFile(src, permissions, replication, blockSize, 2751 holder, clientMachine); 2752 } 2753 2754 if (newNode == null) { 2755 throw new IOException("Unable to add " + src + " to namespace"); 2756 } 2757 leaseManager.addLease(newNode.getFileUnderConstructionFeature() 2758 .getClientName(), src); 2759 2760 // Set encryption attributes if necessary 2761 if (feInfo != null) { 2762 dir.setFileEncryptionInfo(src, feInfo); 2763 newNode = dir.getInode(newNode.getId()).asFile(); 2764 } 2765 2766 setNewINodeStoragePolicy(newNode, iip, isLazyPersist); 2767 2768 // record file record in log, record new generation stamp 2769 getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry); 2770 if (NameNode.stateChangeLog.isDebugEnabled()) { 2771 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added " + 2772 src + " inode " + newNode.getId() + " " + holder); 2773 } 2774 return toRemoveBlocks; 2775 } catch (IOException ie) { 2776 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " + 2777 ie.getMessage()); 2778 throw ie; 2779 } 2780 } 2781 2782 private void setNewINodeStoragePolicy(INodeFile inode, 2783 INodesInPath iip, 2784 boolean isLazyPersist) 2785 throws IOException { 2786 2787 if (isLazyPersist) { 2788 BlockStoragePolicy lpPolicy = 2789 blockManager.getStoragePolicy("LAZY_PERSIST"); 2790 2791 // Set LAZY_PERSIST storage policy if the flag was passed to 2792 // CreateFile. 2793 if (lpPolicy == null) { 2794 throw new HadoopIllegalArgumentException( 2795 "The LAZY_PERSIST storage policy has been disabled " + 2796 "by the administrator."); 2797 } 2798 inode.setStoragePolicyID(lpPolicy.getId(), 2799 iip.getLatestSnapshotId()); 2800 } else { 2801 BlockStoragePolicy effectivePolicy = 2802 blockManager.getStoragePolicy(inode.getStoragePolicyID()); 2803 2804 if (effectivePolicy != null && 2805 effectivePolicy.isCopyOnCreateFile()) { 2806 // Copy effective policy from ancestor directory to current file. 2807 inode.setStoragePolicyID(effectivePolicy.getId(), 2808 iip.getLatestSnapshotId()); 2809 } 2810 } 2811 } 2812 2813 /** 2814 * Append to an existing file for append. 2815 * <p> 2816 * 2817 * The method returns the last block of the file if this is a partial block, 2818 * which can still be used for writing more data. The client uses the returned 2819 * block locations to form the data pipeline for this block.<br> 2820 * The method returns null if the last block is full. The client then 2821 * allocates a new block with the next call using 2822 * {@link ClientProtocol#addBlock}. 2823 * <p> 2824 * 2825 * For description of parameters and exceptions thrown see 2826 * {@link ClientProtocol#append(String, String)} 2827 * 2828 * @return the last block locations if the block is partial or null otherwise 2829 */ 2830 private LocatedBlock appendFileInternal(FSPermissionChecker pc, String src, 2831 String holder, String clientMachine, boolean logRetryCache) 2832 throws AccessControlException, UnresolvedLinkException, 2833 FileNotFoundException, IOException { 2834 assert hasWriteLock(); 2835 // Verify that the destination does not exist as a directory already. 2836 final INodesInPath iip = dir.getINodesInPath4Write(src); 2837 final INode inode = iip.getLastINode(); 2838 if (inode != null && inode.isDirectory()) { 2839 throw new FileAlreadyExistsException("Cannot append to directory " + src 2840 + "; already exists as a directory."); 2841 } 2842 if (isPermissionEnabled) { 2843 checkPathAccess(pc, src, FsAction.WRITE); 2844 } 2845 2846 try { 2847 if (inode == null) { 2848 throw new FileNotFoundException("failed to append to non-existent file " 2849 + src + " for client " + clientMachine); 2850 } 2851 INodeFile myFile = INodeFile.valueOf(inode, src, true); 2852 final BlockStoragePolicy lpPolicy = 2853 blockManager.getStoragePolicy("LAZY_PERSIST"); 2854 2855 if (lpPolicy != null && 2856 lpPolicy.getId() == myFile.getStoragePolicyID()) { 2857 throw new UnsupportedOperationException( 2858 "Cannot append to lazy persist file " + src); 2859 } 2860 // Opening an existing file for write - may need to recover lease. 2861 recoverLeaseInternal(myFile, src, holder, clientMachine, false); 2862 2863 // recoverLeaseInternal may create a new InodeFile via 2864 // finalizeINodeFileUnderConstruction so we need to refresh 2865 // the referenced file. 2866 myFile = INodeFile.valueOf(dir.getINode(src), src, true); 2867 final BlockInfo lastBlock = myFile.getLastBlock(); 2868 // Check that the block has at least minimum replication. 2869 if(lastBlock != null && lastBlock.isComplete() && 2870 !getBlockManager().isSufficientlyReplicated(lastBlock)) { 2871 throw new IOException("append: lastBlock=" + lastBlock + 2872 " of src=" + src + " is not sufficiently replicated yet."); 2873 } 2874 return prepareFileForWrite(src, iip, holder, clientMachine, true, 2875 logRetryCache); 2876 } catch (IOException ie) { 2877 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); 2878 throw ie; 2879 } 2880 } 2881 2882 /** 2883 * Replace current node with a INodeUnderConstruction. 2884 * Recreate in-memory lease record. 2885 * 2886 * @param src path to the file 2887 * @param file existing file object 2888 * @param leaseHolder identifier of the lease holder on this file 2889 * @param clientMachine identifier of the client machine 2890 * @param writeToEditLog whether to persist this change to the edit log 2891 * @param logRetryCache whether to record RPC ids in editlog for retry cache 2892 * rebuilding 2893 * @return the last block locations if the block is partial or null otherwise 2894 * @throws UnresolvedLinkException 2895 * @throws IOException 2896 */ 2897 LocatedBlock prepareFileForWrite(String src, INodesInPath iip, 2898 String leaseHolder, String clientMachine, boolean writeToEditLog, 2899 boolean logRetryCache) throws IOException { 2900 final INodeFile file = iip.getLastINode().asFile(); 2901 final Quota.Counts delta = verifyQuotaForUCBlock(file, iip); 2902 2903 file.recordModification(iip.getLatestSnapshotId()); 2904 file.toUnderConstruction(leaseHolder, clientMachine); 2905 2906 leaseManager.addLease( 2907 file.getFileUnderConstructionFeature().getClientName(), src); 2908 2909 LocatedBlock ret = blockManager.convertLastBlockToUnderConstruction(file); 2910 if (ret != null && delta != null) { 2911 Preconditions.checkState(delta.get(Quota.DISKSPACE) >= 0, 2912 "appending to a block with size larger than the preferred block size"); 2913 dir.writeLock(); 2914 try { 2915 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, 2916 delta.get(Quota.NAMESPACE), delta.get(Quota.DISKSPACE)); 2917 } finally { 2918 dir.writeUnlock(); 2919 } 2920 } 2921 2922 if (writeToEditLog) { 2923 getEditLog().logOpenFile(src, file, false, logRetryCache); 2924 } 2925 return ret; 2926 } 2927 2928 /** 2929 * Verify quota when using the preferred block size for UC block. This is 2930 * usually used by append and truncate 2931 * @throws QuotaExceededException when violating the storage quota 2932 * @return expected quota usage update. null means no change or no need to 2933 * update quota usage later 2934 */ 2935 private Quota.Counts verifyQuotaForUCBlock(INodeFile file, INodesInPath iip) 2936 throws QuotaExceededException { 2937 if (!isImageLoaded() || dir.shouldSkipQuotaChecks()) { 2938 // Do not check quota if editlog is still being processed 2939 return null; 2940 } 2941 if (file.getLastBlock() != null) { 2942 final Quota.Counts delta = computeQuotaDeltaForUCBlock(file); 2943 dir.readLock(); 2944 try { 2945 FSDirectory.verifyQuota(iip.getINodes(), iip.length() - 1, 2946 delta.get(Quota.NAMESPACE), delta.get(Quota.DISKSPACE), null); 2947 return delta; 2948 } finally { 2949 dir.readUnlock(); 2950 } 2951 } 2952 return null; 2953 } 2954 2955 /** Compute quota change for converting a complete block to a UC block */ 2956 private Quota.Counts computeQuotaDeltaForUCBlock(INodeFile file) { 2957 final BlockInfo lastBlock = file.getLastBlock(); 2958 if (lastBlock != null) { 2959 final long diff = file.getPreferredBlockSize() - lastBlock.getNumBytes(); 2960 final short repl = file.getBlockReplication(); 2961 return Quota.Counts.newInstance(0, diff * repl); 2962 } else { 2963 return Quota.Counts.newInstance(); 2964 } 2965 } 2966 2967 /** 2968 * Recover lease; 2969 * Immediately revoke the lease of the current lease holder and start lease 2970 * recovery so that the file can be forced to be closed. 2971 * 2972 * @param src the path of the file to start lease recovery 2973 * @param holder the lease holder's name 2974 * @param clientMachine the client machine's name 2975 * @return true if the file is already closed 2976 * @throws IOException 2977 */ 2978 boolean recoverLease(String src, String holder, String clientMachine) 2979 throws IOException { 2980 if (!DFSUtil.isValidName(src)) { 2981 throw new IOException("Invalid file name: " + src); 2982 } 2983 2984 boolean skipSync = false; 2985 FSPermissionChecker pc = getPermissionChecker(); 2986 checkOperation(OperationCategory.WRITE); 2987 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 2988 writeLock(); 2989 try { 2990 checkOperation(OperationCategory.WRITE); 2991 checkNameNodeSafeMode("Cannot recover the lease of " + src); 2992 src = resolvePath(src, pathComponents); 2993 final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src); 2994 if (!inode.isUnderConstruction()) { 2995 return true; 2996 } 2997 if (isPermissionEnabled) { 2998 checkPathAccess(pc, src, FsAction.WRITE); 2999 } 3000 3001 recoverLeaseInternal(inode, src, holder, clientMachine, true); 3002 } catch (StandbyException se) { 3003 skipSync = true; 3004 throw se; 3005 } finally { 3006 writeUnlock(); 3007 // There might be transactions logged while trying to recover the lease. 3008 // They need to be sync'ed even when an exception was thrown. 3009 if (!skipSync) { 3010 getEditLog().logSync(); 3011 } 3012 } 3013 return false; 3014 } 3015 3016 private void recoverLeaseInternal(INodeFile fileInode, 3017 String src, String holder, String clientMachine, boolean force) 3018 throws IOException { 3019 assert hasWriteLock(); 3020 if (fileInode != null && fileInode.isUnderConstruction()) { 3021 // 3022 // If the file is under construction , then it must be in our 3023 // leases. Find the appropriate lease record. 3024 // 3025 Lease lease = leaseManager.getLease(holder); 3026 // 3027 // We found the lease for this file. And surprisingly the original 3028 // holder is trying to recreate this file. This should never occur. 3029 // 3030 3031 if (!force && lease != null) { 3032 Lease leaseFile = leaseManager.getLeaseByPath(src); 3033 if (leaseFile != null && leaseFile.equals(lease)) { 3034 throw new AlreadyBeingCreatedException( 3035 "failed to create file " + src + " for " + holder + 3036 " for client " + clientMachine + 3037 " because current leaseholder is trying to recreate file."); 3038 } 3039 } 3040 // 3041 // Find the original holder. 3042 // 3043 FileUnderConstructionFeature uc = fileInode.getFileUnderConstructionFeature(); 3044 String clientName = uc.getClientName(); 3045 lease = leaseManager.getLease(clientName); 3046 if (lease == null) { 3047 throw new AlreadyBeingCreatedException( 3048 "failed to create file " + src + " for " + holder + 3049 " for client " + clientMachine + 3050 " because pendingCreates is non-null but no leases found."); 3051 } 3052 if (force) { 3053 // close now: no need to wait for soft lease expiration and 3054 // close only the file src 3055 LOG.info("recoverLease: " + lease + ", src=" + src + 3056 " from client " + clientName); 3057 internalReleaseLease(lease, src, holder); 3058 } else { 3059 assert lease.getHolder().equals(clientName) : 3060 "Current lease holder " + lease.getHolder() + 3061 " does not match file creator " + clientName; 3062 // 3063 // If the original holder has not renewed in the last SOFTLIMIT 3064 // period, then start lease recovery. 3065 // 3066 if (lease.expiredSoftLimit()) { 3067 LOG.info("startFile: recover " + lease + ", src=" + src + " client " 3068 + clientName); 3069 boolean isClosed = internalReleaseLease(lease, src, null); 3070 if(!isClosed) 3071 throw new RecoveryInProgressException( 3072 "Failed to close file " + src + 3073 ". Lease recovery is in progress. Try again later."); 3074 } else { 3075 final BlockInfo lastBlock = fileInode.getLastBlock(); 3076 if (lastBlock != null 3077 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 3078 throw new RecoveryInProgressException("Recovery in progress, file [" 3079 + src + "], " + "lease owner [" + lease.getHolder() + "]"); 3080 } else { 3081 throw new AlreadyBeingCreatedException("Failed to create file [" 3082 + src + "] for [" + holder + "] for client [" + clientMachine 3083 + "], because this file is already being created by [" 3084 + clientName + "] on [" 3085 + uc.getClientMachine() + "]"); 3086 } 3087 } 3088 } 3089 } 3090 } 3091 3092 /** 3093 * Append to an existing file in the namespace. 3094 */ 3095 LocatedBlock appendFile(String src, String holder, String clientMachine) 3096 throws AccessControlException, SafeModeException, 3097 FileAlreadyExistsException, FileNotFoundException, 3098 ParentNotDirectoryException, IOException { 3099 LocatedBlock lb = null; 3100 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 3101 null); 3102 if (cacheEntry != null && cacheEntry.isSuccess()) { 3103 return (LocatedBlock) cacheEntry.getPayload(); 3104 } 3105 3106 boolean success = false; 3107 try { 3108 lb = appendFileInt(src, holder, clientMachine, cacheEntry != null); 3109 success = true; 3110 return lb; 3111 } catch (AccessControlException e) { 3112 logAuditEvent(false, "append", src); 3113 throw e; 3114 } finally { 3115 RetryCache.setState(cacheEntry, success, lb); 3116 } 3117 } 3118 3119 private LocatedBlock appendFileInt(final String srcArg, String holder, 3120 String clientMachine, boolean logRetryCache) 3121 throws AccessControlException, SafeModeException, 3122 FileAlreadyExistsException, FileNotFoundException, 3123 ParentNotDirectoryException, IOException { 3124 String src = srcArg; 3125 if (NameNode.stateChangeLog.isDebugEnabled()) { 3126 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src 3127 + ", holder=" + holder 3128 + ", clientMachine=" + clientMachine); 3129 } 3130 boolean skipSync = false; 3131 if (!supportAppends) { 3132 throw new UnsupportedOperationException( 3133 "Append is not enabled on this NameNode. Use the " + 3134 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); 3135 } 3136 3137 LocatedBlock lb = null; 3138 FSPermissionChecker pc = getPermissionChecker(); 3139 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3140 writeLock(); 3141 try { 3142 checkOperation(OperationCategory.WRITE); 3143 checkNameNodeSafeMode("Cannot append to file" + src); 3144 src = resolvePath(src, pathComponents); 3145 lb = appendFileInternal(pc, src, holder, clientMachine, logRetryCache); 3146 } catch (StandbyException se) { 3147 skipSync = true; 3148 throw se; 3149 } finally { 3150 writeUnlock(); 3151 // There might be transactions logged while trying to recover the lease. 3152 // They need to be sync'ed even when an exception was thrown. 3153 if (!skipSync) { 3154 getEditLog().logSync(); 3155 } 3156 } 3157 if (lb != null) { 3158 if (NameNode.stateChangeLog.isDebugEnabled()) { 3159 NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file " 3160 +src+" for "+holder+" at "+clientMachine 3161 +" block " + lb.getBlock() 3162 +" block size " + lb.getBlock().getNumBytes()); 3163 } 3164 } 3165 logAuditEvent(true, "append", srcArg); 3166 return lb; 3167 } 3168 3169 ExtendedBlock getExtendedBlock(Block blk) { 3170 return new ExtendedBlock(blockPoolId, blk); 3171 } 3172 3173 void setBlockPoolId(String bpid) { 3174 blockPoolId = bpid; 3175 blockManager.setBlockPoolId(blockPoolId); 3176 } 3177 3178 /** 3179 * The client would like to obtain an additional block for the indicated 3180 * filename (which is being written-to). Return an array that consists 3181 * of the block, plus a set of machines. The first on this list should 3182 * be where the client writes data. Subsequent items in the list must 3183 * be provided in the connection to the first datanode. 3184 * 3185 * Make sure the previous blocks have been reported by datanodes and 3186 * are replicated. Will return an empty 2-elt array if we want the 3187 * client to "try again later". 3188 */ 3189 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, 3190 ExtendedBlock previous, Set<Node> excludedNodes, 3191 List<String> favoredNodes) 3192 throws LeaseExpiredException, NotReplicatedYetException, 3193 QuotaExceededException, SafeModeException, UnresolvedLinkException, 3194 IOException { 3195 final long blockSize; 3196 final int replication; 3197 final byte storagePolicyID; 3198 Node clientNode = null; 3199 String clientMachine = null; 3200 3201 if(NameNode.stateChangeLog.isDebugEnabled()) { 3202 NameNode.stateChangeLog.debug("BLOCK* NameSystem.getAdditionalBlock: " 3203 + src + " inodeId " + fileId + " for " + clientName); 3204 } 3205 3206 // Part I. Analyze the state of the file with respect to the input data. 3207 checkOperation(OperationCategory.READ); 3208 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3209 readLock(); 3210 try { 3211 checkOperation(OperationCategory.READ); 3212 src = resolvePath(src, pathComponents); 3213 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3214 FileState fileState = analyzeFileState( 3215 src, fileId, clientName, previous, onRetryBlock); 3216 final INodeFile pendingFile = fileState.inode; 3217 src = fileState.path; 3218 3219 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) { 3220 // This is a retry. Just return the last block if having locations. 3221 return onRetryBlock[0]; 3222 } 3223 if (pendingFile.getBlocks().length >= maxBlocksPerFile) { 3224 throw new IOException("File has reached the limit on maximum number of" 3225 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY 3226 + "): " + pendingFile.getBlocks().length + " >= " 3227 + maxBlocksPerFile); 3228 } 3229 blockSize = pendingFile.getPreferredBlockSize(); 3230 clientMachine = pendingFile.getFileUnderConstructionFeature() 3231 .getClientMachine(); 3232 clientNode = blockManager.getDatanodeManager().getDatanodeByHost( 3233 clientMachine); 3234 replication = pendingFile.getFileReplication(); 3235 storagePolicyID = pendingFile.getStoragePolicyID(); 3236 } finally { 3237 readUnlock(); 3238 } 3239 3240 if (clientNode == null) { 3241 clientNode = getClientNode(clientMachine); 3242 } 3243 3244 // choose targets for the new block to be allocated. 3245 final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget4NewBlock( 3246 src, replication, clientNode, excludedNodes, blockSize, favoredNodes, 3247 storagePolicyID); 3248 3249 // Part II. 3250 // Allocate a new block, add it to the INode and the BlocksMap. 3251 Block newBlock = null; 3252 long offset; 3253 checkOperation(OperationCategory.WRITE); 3254 waitForLoadingFSImage(); 3255 writeLock(); 3256 try { 3257 checkOperation(OperationCategory.WRITE); 3258 // Run the full analysis again, since things could have changed 3259 // while chooseTarget() was executing. 3260 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3261 FileState fileState = 3262 analyzeFileState(src, fileId, clientName, previous, onRetryBlock); 3263 final INodeFile pendingFile = fileState.inode; 3264 src = fileState.path; 3265 3266 if (onRetryBlock[0] != null) { 3267 if (onRetryBlock[0].getLocations().length > 0) { 3268 // This is a retry. Just return the last block if having locations. 3269 return onRetryBlock[0]; 3270 } else { 3271 // add new chosen targets to already allocated block and return 3272 BlockInfo lastBlockInFile = pendingFile.getLastBlock(); 3273 ((BlockInfoUnderConstruction) lastBlockInFile) 3274 .setExpectedLocations(targets); 3275 offset = pendingFile.computeFileSize(); 3276 return makeLocatedBlock(lastBlockInFile, targets, offset); 3277 } 3278 } 3279 3280 // commit the last block and complete it if it has minimum replicas 3281 commitOrCompleteLastBlock(pendingFile, 3282 ExtendedBlock.getLocalBlock(previous)); 3283 3284 // allocate new block, record block locations in INode. 3285 newBlock = createNewBlock(); 3286 INodesInPath inodesInPath = INodesInPath.fromINode(pendingFile); 3287 saveAllocatedBlock(src, inodesInPath, newBlock, targets); 3288 3289 persistNewBlock(src, pendingFile); 3290 offset = pendingFile.computeFileSize(); 3291 } finally { 3292 writeUnlock(); 3293 } 3294 getEditLog().logSync(); 3295 3296 // Return located block 3297 return makeLocatedBlock(newBlock, targets, offset); 3298 } 3299 3300 /* 3301 * Resolve clientmachine address to get a network location path 3302 */ 3303 private Node getClientNode(String clientMachine) { 3304 List<String> hosts = new ArrayList<String>(1); 3305 hosts.add(clientMachine); 3306 List<String> rName = getBlockManager().getDatanodeManager() 3307 .resolveNetworkLocation(hosts); 3308 Node clientNode = null; 3309 if (rName != null) { 3310 // Able to resolve clientMachine mapping. 3311 // Create a temp node to findout the rack local nodes 3312 clientNode = new NodeBase(rName.get(0) + NodeBase.PATH_SEPARATOR_STR 3313 + clientMachine); 3314 } 3315 return clientNode; 3316 } 3317 3318 static class FileState { 3319 public final INodeFile inode; 3320 public final String path; 3321 3322 public FileState(INodeFile inode, String fullPath) { 3323 this.inode = inode; 3324 this.path = fullPath; 3325 } 3326 } 3327 3328 FileState analyzeFileState(String src, 3329 long fileId, 3330 String clientName, 3331 ExtendedBlock previous, 3332 LocatedBlock[] onRetryBlock) 3333 throws IOException { 3334 assert hasReadLock(); 3335 3336 checkBlock(previous); 3337 onRetryBlock[0] = null; 3338 checkOperation(OperationCategory.WRITE); 3339 checkNameNodeSafeMode("Cannot add block to " + src); 3340 3341 // have we exceeded the configured limit of fs objects. 3342 checkFsObjectLimit(); 3343 3344 Block previousBlock = ExtendedBlock.getLocalBlock(previous); 3345 INode inode; 3346 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3347 // Older clients may not have given us an inode ID to work with. 3348 // In this case, we have to try to resolve the path and hope it 3349 // hasn't changed or been deleted since the file was opened for write. 3350 final INodesInPath iip = dir.getINodesInPath4Write(src); 3351 inode = iip.getLastINode(); 3352 } else { 3353 // Newer clients pass the inode ID, so we can just get the inode 3354 // directly. 3355 inode = dir.getInode(fileId); 3356 if (inode != null) src = inode.getFullPathName(); 3357 } 3358 final INodeFile pendingFile = checkLease(src, clientName, inode, fileId); 3359 BlockInfo lastBlockInFile = pendingFile.getLastBlock(); 3360 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) { 3361 // The block that the client claims is the current last block 3362 // doesn't match up with what we think is the last block. There are 3363 // four possibilities: 3364 // 1) This is the first block allocation of an append() pipeline 3365 // which started appending exactly at or exceeding the block boundary. 3366 // In this case, the client isn't passed the previous block, 3367 // so it makes the allocateBlock() call with previous=null. 3368 // We can distinguish this since the last block of the file 3369 // will be exactly a full block. 3370 // 2) This is a retry from a client that missed the response of a 3371 // prior getAdditionalBlock() call, perhaps because of a network 3372 // timeout, or because of an HA failover. In that case, we know 3373 // by the fact that the client is re-issuing the RPC that it 3374 // never began to write to the old block. Hence it is safe to 3375 // to return the existing block. 3376 // 3) This is an entirely bogus request/bug -- we should error out 3377 // rather than potentially appending a new block with an empty 3378 // one in the middle, etc 3379 // 4) This is a retry from a client that timed out while 3380 // the prior getAdditionalBlock() is still being processed, 3381 // currently working on chooseTarget(). 3382 // There are no means to distinguish between the first and 3383 // the second attempts in Part I, because the first one hasn't 3384 // changed the namesystem state yet. 3385 // We run this analysis again in Part II where case 4 is impossible. 3386 3387 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); 3388 if (previous == null && 3389 lastBlockInFile != null && 3390 lastBlockInFile.getNumBytes() >= pendingFile.getPreferredBlockSize() && 3391 lastBlockInFile.isComplete()) { 3392 // Case 1 3393 if (NameNode.stateChangeLog.isDebugEnabled()) { 3394 NameNode.stateChangeLog.debug( 3395 "BLOCK* NameSystem.allocateBlock: handling block allocation" + 3396 " writing to a file with a complete previous block: src=" + 3397 src + " lastBlock=" + lastBlockInFile); 3398 } 3399 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) { 3400 if (lastBlockInFile.getNumBytes() != 0) { 3401 throw new IOException( 3402 "Request looked like a retry to allocate block " + 3403 lastBlockInFile + " but it already contains " + 3404 lastBlockInFile.getNumBytes() + " bytes"); 3405 } 3406 3407 // Case 2 3408 // Return the last block. 3409 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + 3410 "caught retry for allocation of a new block in " + 3411 src + ". Returning previously allocated block " + lastBlockInFile); 3412 long offset = pendingFile.computeFileSize(); 3413 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile, 3414 ((BlockInfoUnderConstruction)lastBlockInFile).getExpectedStorageLocations(), 3415 offset); 3416 return new FileState(pendingFile, src); 3417 } else { 3418 // Case 3 3419 throw new IOException("Cannot allocate block in " + src + ": " + 3420 "passed 'previous' block " + previous + " does not match actual " + 3421 "last block in file " + lastBlockInFile); 3422 } 3423 } 3424 3425 // Check if the penultimate block is minimally replicated 3426 if (!checkFileProgress(pendingFile, false)) { 3427 throw new NotReplicatedYetException("Not replicated yet: " + src); 3428 } 3429 return new FileState(pendingFile, src); 3430 } 3431 3432 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs, 3433 long offset) throws IOException { 3434 LocatedBlock lBlk = new LocatedBlock( 3435 getExtendedBlock(blk), locs, offset, false); 3436 getBlockManager().setBlockToken( 3437 lBlk, BlockTokenSecretManager.AccessMode.WRITE); 3438 return lBlk; 3439 } 3440 3441 /** @see ClientProtocol#getAdditionalDatanode */ 3442 LocatedBlock getAdditionalDatanode(String src, long fileId, 3443 final ExtendedBlock blk, final DatanodeInfo[] existings, 3444 final String[] storageIDs, 3445 final Set<Node> excludes, 3446 final int numAdditionalNodes, final String clientName 3447 ) throws IOException { 3448 //check if the feature is enabled 3449 dtpReplaceDatanodeOnFailure.checkEnabled(); 3450 3451 Node clientnode = null; 3452 String clientMachine; 3453 final long preferredblocksize; 3454 final byte storagePolicyID; 3455 final List<DatanodeStorageInfo> chosen; 3456 checkOperation(OperationCategory.READ); 3457 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3458 readLock(); 3459 try { 3460 checkOperation(OperationCategory.READ); 3461 //check safe mode 3462 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); 3463 src = resolvePath(src, pathComponents); 3464 3465 //check lease 3466 final INode inode; 3467 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3468 // Older clients may not have given us an inode ID to work with. 3469 // In this case, we have to try to resolve the path and hope it 3470 // hasn't changed or been deleted since the file was opened for write. 3471 inode = dir.getINode(src); 3472 } else { 3473 inode = dir.getInode(fileId); 3474 if (inode != null) src = inode.getFullPathName(); 3475 } 3476 final INodeFile file = checkLease(src, clientName, inode, fileId); 3477 clientMachine = file.getFileUnderConstructionFeature().getClientMachine(); 3478 clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 3479 preferredblocksize = file.getPreferredBlockSize(); 3480 storagePolicyID = file.getStoragePolicyID(); 3481 3482 //find datanode storages 3483 final DatanodeManager dm = blockManager.getDatanodeManager(); 3484 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs)); 3485 } finally { 3486 readUnlock(); 3487 } 3488 3489 if (clientnode == null) { 3490 clientnode = getClientNode(clientMachine); 3491 } 3492 3493 // choose new datanodes. 3494 final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode( 3495 src, numAdditionalNodes, clientnode, chosen, 3496 excludes, preferredblocksize, storagePolicyID); 3497 final LocatedBlock lb = new LocatedBlock(blk, targets); 3498 blockManager.setBlockToken(lb, AccessMode.COPY); 3499 return lb; 3500 } 3501 3502 /** 3503 * The client would like to let go of the given block 3504 */ 3505 boolean abandonBlock(ExtendedBlock b, long fileId, String src, String holder) 3506 throws LeaseExpiredException, FileNotFoundException, 3507 UnresolvedLinkException, IOException { 3508 if(NameNode.stateChangeLog.isDebugEnabled()) { 3509 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b 3510 + "of file " + src); 3511 } 3512 checkOperation(OperationCategory.WRITE); 3513 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3514 waitForLoadingFSImage(); 3515 writeLock(); 3516 try { 3517 checkOperation(OperationCategory.WRITE); 3518 checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src); 3519 src = resolvePath(src, pathComponents); 3520 3521 final INode inode; 3522 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3523 // Older clients may not have given us an inode ID to work with. 3524 // In this case, we have to try to resolve the path and hope it 3525 // hasn't changed or been deleted since the file was opened for write. 3526 inode = dir.getINode(src); 3527 } else { 3528 inode = dir.getInode(fileId); 3529 if (inode != null) src = inode.getFullPathName(); 3530 } 3531 final INodeFile file = checkLease(src, holder, inode, fileId); 3532 3533 // 3534 // Remove the block from the pending creates list 3535 // 3536 boolean removed = dir.removeBlock(src, file, 3537 ExtendedBlock.getLocalBlock(b)); 3538 if (!removed) { 3539 return true; 3540 } 3541 if(NameNode.stateChangeLog.isDebugEnabled()) { 3542 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " 3543 + b + " is removed from pendingCreates"); 3544 } 3545 persistBlocks(src, file, false); 3546 } finally { 3547 writeUnlock(); 3548 } 3549 getEditLog().logSync(); 3550 3551 return true; 3552 } 3553 3554 private INodeFile checkLease(String src, String holder, INode inode, 3555 long fileId) 3556 throws LeaseExpiredException, FileNotFoundException { 3557 assert hasReadLock(); 3558 final String ident = src + " (inode " + fileId + ")"; 3559 if (inode == null) { 3560 Lease lease = leaseManager.getLease(holder); 3561 throw new LeaseExpiredException( 3562 "No lease on " + ident + ": File does not exist. " 3563 + (lease != null ? lease.toString() 3564 : "Holder " + holder + " does not have any open files.")); 3565 } 3566 if (!inode.isFile()) { 3567 Lease lease = leaseManager.getLease(holder); 3568 throw new LeaseExpiredException( 3569 "No lease on " + ident + ": INode is not a regular file. " 3570 + (lease != null ? lease.toString() 3571 : "Holder " + holder + " does not have any open files.")); 3572 } 3573 final INodeFile file = inode.asFile(); 3574 if (!file.isUnderConstruction()) { 3575 Lease lease = leaseManager.getLease(holder); 3576 throw new LeaseExpiredException( 3577 "No lease on " + ident + ": File is not open for writing. " 3578 + (lease != null ? lease.toString() 3579 : "Holder " + holder + " does not have any open files.")); 3580 } 3581 // No further modification is allowed on a deleted file. 3582 // A file is considered deleted, if it is not in the inodeMap or is marked 3583 // as deleted in the snapshot feature. 3584 if (isFileDeleted(file)) { 3585 throw new FileNotFoundException(src); 3586 } 3587 String clientName = file.getFileUnderConstructionFeature().getClientName(); 3588 if (holder != null && !clientName.equals(holder)) { 3589 throw new LeaseExpiredException("Lease mismatch on " + ident + 3590 " owned by " + clientName + " but is accessed by " + holder); 3591 } 3592 return file; 3593 } 3594 3595 /** 3596 * Complete in-progress write to the given file. 3597 * @return true if successful, false if the client should continue to retry 3598 * (e.g if not all blocks have reached minimum replication yet) 3599 * @throws IOException on error (eg lease mismatch, file not open, file deleted) 3600 */ 3601 boolean completeFile(final String srcArg, String holder, 3602 ExtendedBlock last, long fileId) 3603 throws SafeModeException, UnresolvedLinkException, IOException { 3604 String src = srcArg; 3605 if (NameNode.stateChangeLog.isDebugEnabled()) { 3606 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + 3607 src + " for " + holder); 3608 } 3609 checkBlock(last); 3610 boolean success = false; 3611 checkOperation(OperationCategory.WRITE); 3612 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 3613 waitForLoadingFSImage(); 3614 writeLock(); 3615 try { 3616 checkOperation(OperationCategory.WRITE); 3617 checkNameNodeSafeMode("Cannot complete file " + src); 3618 src = resolvePath(src, pathComponents); 3619 success = completeFileInternal(src, holder, 3620 ExtendedBlock.getLocalBlock(last), fileId); 3621 } finally { 3622 writeUnlock(); 3623 } 3624 getEditLog().logSync(); 3625 if (success) { 3626 NameNode.stateChangeLog.info("DIR* completeFile: " + srcArg 3627 + " is closed by " + holder); 3628 } 3629 return success; 3630 } 3631 3632 private boolean completeFileInternal(String src, 3633 String holder, Block last, long fileId) throws SafeModeException, 3634 UnresolvedLinkException, IOException { 3635 assert hasWriteLock(); 3636 final INodeFile pendingFile; 3637 try { 3638 final INode inode; 3639 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 3640 // Older clients may not have given us an inode ID to work with. 3641 // In this case, we have to try to resolve the path and hope it 3642 // hasn't changed or been deleted since the file was opened for write. 3643 final INodesInPath iip = dir.getLastINodeInPath(src); 3644 inode = iip.getINode(0); 3645 } else { 3646 inode = dir.getInode(fileId); 3647 if (inode != null) src = inode.getFullPathName(); 3648 } 3649 pendingFile = checkLease(src, holder, inode, fileId); 3650 } catch (LeaseExpiredException lee) { 3651 final INode inode = dir.getINode(src); 3652 if (inode != null 3653 && inode.isFile() 3654 && !inode.asFile().isUnderConstruction()) { 3655 // This could be a retry RPC - i.e the client tried to close 3656 // the file, but missed the RPC response. Thus, it is trying 3657 // again to close the file. If the file still exists and 3658 // the client's view of the last block matches the actual 3659 // last block, then we'll treat it as a successful close. 3660 // See HDFS-3031. 3661 final Block realLastBlock = inode.asFile().getLastBlock(); 3662 if (Block.matchingIdAndGenStamp(last, realLastBlock)) { 3663 NameNode.stateChangeLog.info("DIR* completeFile: " + 3664 "request from " + holder + " to complete inode " + fileId + 3665 "(" + src + ") which is already closed. But, it appears to be " + 3666 "an RPC retry. Returning success"); 3667 return true; 3668 } 3669 } 3670 throw lee; 3671 } 3672 // Check the state of the penultimate block. It should be completed 3673 // before attempting to complete the last one. 3674 if (!checkFileProgress(pendingFile, false)) { 3675 return false; 3676 } 3677 3678 // commit the last block and complete it if it has minimum replicas 3679 commitOrCompleteLastBlock(pendingFile, last); 3680 3681 if (!checkFileProgress(pendingFile, true)) { 3682 return false; 3683 } 3684 3685 finalizeINodeFileUnderConstruction(src, pendingFile, 3686 Snapshot.CURRENT_STATE_ID); 3687 return true; 3688 } 3689 3690 /** 3691 * Save allocated block at the given pending filename 3692 * 3693 * @param src path to the file 3694 * @param inodesInPath representing each of the components of src. 3695 * The last INode is the INode for {@code src} file. 3696 * @param newBlock newly allocated block to be save 3697 * @param targets target datanodes where replicas of the new block is placed 3698 * @throws QuotaExceededException If addition of block exceeds space quota 3699 */ 3700 BlockInfo saveAllocatedBlock(String src, INodesInPath inodes, 3701 Block newBlock, DatanodeStorageInfo[] targets) 3702 throws IOException { 3703 assert hasWriteLock(); 3704 BlockInfo b = dir.addBlock(src, inodes, newBlock, targets); 3705 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + src + ". " 3706 + getBlockPoolId() + " " + b); 3707 DatanodeStorageInfo.incrementBlocksScheduled(targets); 3708 return b; 3709 } 3710 3711 /** 3712 * Create new block with a unique block id and a new generation stamp. 3713 */ 3714 Block createNewBlock() throws IOException { 3715 assert hasWriteLock(); 3716 Block b = new Block(nextBlockId(), 0, 0); 3717 // Increment the generation stamp for every new block. 3718 b.setGenerationStamp(nextGenerationStamp(false)); 3719 return b; 3720 } 3721 3722 /** 3723 * Check that the indicated file's blocks are present and 3724 * replicated. If not, return false. If checkall is true, then check 3725 * all blocks, otherwise check only penultimate block. 3726 */ 3727 boolean checkFileProgress(INodeFile v, boolean checkall) { 3728 readLock(); 3729 try { 3730 if (checkall) { 3731 // 3732 // check all blocks of the file. 3733 // 3734 for (BlockInfo block: v.getBlocks()) { 3735 if (!block.isComplete()) { 3736 LOG.info("BLOCK* checkFileProgress: " + block 3737 + " has not reached minimal replication " 3738 + blockManager.minReplication); 3739 return false; 3740 } 3741 } 3742 } else { 3743 // 3744 // check the penultimate block of this file 3745 // 3746 BlockInfo b = v.getPenultimateBlock(); 3747 if (b != null && !b.isComplete()) { 3748 LOG.warn("BLOCK* checkFileProgress: " + b 3749 + " has not reached minimal replication " 3750 + blockManager.minReplication); 3751 return false; 3752 } 3753 } 3754 return true; 3755 } finally { 3756 readUnlock(); 3757 } 3758 } 3759 3760 //////////////////////////////////////////////////////////////// 3761 // Here's how to handle block-copy failure during client write: 3762 // -- As usual, the client's write should result in a streaming 3763 // backup write to a k-machine sequence. 3764 // -- If one of the backup machines fails, no worries. Fail silently. 3765 // -- Before client is allowed to close and finalize file, make sure 3766 // that the blocks are backed up. Namenode may have to issue specific backup 3767 // commands to make up for earlier datanode failures. Once all copies 3768 // are made, edit namespace and return to client. 3769 //////////////////////////////////////////////////////////////// 3770 3771 /** 3772 * Change the indicated filename. 3773 * @deprecated Use {@link #renameTo(String, String, Options.Rename...)} instead. 3774 */ 3775 @Deprecated 3776 boolean renameTo(String src, String dst) 3777 throws IOException, UnresolvedLinkException { 3778 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3779 if (cacheEntry != null && cacheEntry.isSuccess()) { 3780 return true; // Return previous response 3781 } 3782 boolean ret = false; 3783 try { 3784 ret = renameToInt(src, dst, cacheEntry != null); 3785 } catch (AccessControlException e) { 3786 logAuditEvent(false, "rename", src, dst, null); 3787 throw e; 3788 } finally { 3789 RetryCache.setState(cacheEntry, ret); 3790 } 3791 return ret; 3792 } 3793 3794 private boolean renameToInt(final String srcArg, final String dstArg, 3795 boolean logRetryCache) 3796 throws IOException, UnresolvedLinkException { 3797 String src = srcArg; 3798 String dst = dstArg; 3799 if (NameNode.stateChangeLog.isDebugEnabled()) { 3800 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src + 3801 " to " + dst); 3802 } 3803 if (!DFSUtil.isValidName(dst)) { 3804 throw new IOException("Invalid name: " + dst); 3805 } 3806 FSPermissionChecker pc = getPermissionChecker(); 3807 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); 3808 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); 3809 boolean status = false; 3810 HdfsFileStatus resultingStat = null; 3811 writeLock(); 3812 try { 3813 checkOperation(OperationCategory.WRITE); 3814 checkNameNodeSafeMode("Cannot rename " + src); 3815 waitForLoadingFSImage(); 3816 src = resolvePath(src, srcComponents); 3817 dst = resolvePath(dst, dstComponents); 3818 checkOperation(OperationCategory.WRITE); 3819 status = renameToInternal(pc, src, dst, logRetryCache); 3820 if (status) { 3821 resultingStat = getAuditFileInfo(dst, false); 3822 } 3823 } finally { 3824 writeUnlock(); 3825 } 3826 getEditLog().logSync(); 3827 if (status) { 3828 logAuditEvent(true, "rename", srcArg, dstArg, resultingStat); 3829 } 3830 return status; 3831 } 3832 3833 /** @deprecated See {@link #renameTo(String, String)} */ 3834 @Deprecated 3835 private boolean renameToInternal(FSPermissionChecker pc, String src, 3836 String dst, boolean logRetryCache) throws IOException, 3837 UnresolvedLinkException { 3838 assert hasWriteLock(); 3839 if (isPermissionEnabled) { 3840 //We should not be doing this. This is move() not renameTo(). 3841 //but for now, 3842 //NOTE: yes, this is bad! it's assuming much lower level behavior 3843 // of rewriting the dst 3844 String actualdst = dir.isDir(dst)? 3845 dst + Path.SEPARATOR + new Path(src).getName(): dst; 3846 // Rename does not operates on link targets 3847 // Do not resolveLink when checking permissions of src and dst 3848 // Check write access to parent of src 3849 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, 3850 false, false); 3851 // Check write access to ancestor of dst 3852 checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null, 3853 false, false); 3854 } 3855 3856 long mtime = now(); 3857 if (dir.renameTo(src, dst, mtime)) { 3858 getEditLog().logRename(src, dst, mtime, logRetryCache); 3859 return true; 3860 } 3861 return false; 3862 } 3863 3864 3865 /** Rename src to dst */ 3866 void renameTo(final String srcArg, final String dstArg, 3867 Options.Rename... options) throws IOException, UnresolvedLinkException { 3868 String src = srcArg; 3869 String dst = dstArg; 3870 if (NameNode.stateChangeLog.isDebugEnabled()) { 3871 NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - " 3872 + src + " to " + dst); 3873 } 3874 if (!DFSUtil.isValidName(dst)) { 3875 throw new InvalidPathException("Invalid name: " + dst); 3876 } 3877 final FSPermissionChecker pc = getPermissionChecker(); 3878 3879 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3880 if (cacheEntry != null && cacheEntry.isSuccess()) { 3881 return; // Return previous response 3882 } 3883 byte[][] srcComponents = FSDirectory.getPathComponentsForReservedPath(src); 3884 byte[][] dstComponents = FSDirectory.getPathComponentsForReservedPath(dst); 3885 HdfsFileStatus resultingStat = null; 3886 boolean success = false; 3887 writeLock(); 3888 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 3889 try { 3890 checkOperation(OperationCategory.WRITE); 3891 checkNameNodeSafeMode("Cannot rename " + src); 3892 src = resolvePath(src, srcComponents); 3893 dst = resolvePath(dst, dstComponents); 3894 renameToInternal(pc, src, dst, cacheEntry != null, 3895 collectedBlocks, options); 3896 resultingStat = getAuditFileInfo(dst, false); 3897 success = true; 3898 } finally { 3899 writeUnlock(); 3900 RetryCache.setState(cacheEntry, success); 3901 } 3902 getEditLog().logSync(); 3903 if (!collectedBlocks.getToDeleteList().isEmpty()) { 3904 removeBlocks(collectedBlocks); 3905 collectedBlocks.clear(); 3906 } 3907 if (resultingStat != null) { 3908 StringBuilder cmd = new StringBuilder("rename options="); 3909 for (Rename option : options) { 3910 cmd.append(option.value()).append(" "); 3911 } 3912 logAuditEvent(true, cmd.toString(), srcArg, dstArg, resultingStat); 3913 } 3914 } 3915 3916 private void renameToInternal(FSPermissionChecker pc, String src, 3917 String dst, boolean logRetryCache, BlocksMapUpdateInfo collectedBlocks, 3918 Options.Rename... options) throws IOException { 3919 assert hasWriteLock(); 3920 if (isPermissionEnabled) { 3921 // Rename does not operates on link targets 3922 // Do not resolveLink when checking permissions of src and dst 3923 // Check write access to parent of src 3924 checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false, 3925 false); 3926 // Check write access to ancestor of dst 3927 checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false, 3928 false); 3929 } 3930 3931 waitForLoadingFSImage(); 3932 long mtime = now(); 3933 dir.renameTo(src, dst, mtime, collectedBlocks, options); 3934 getEditLog().logRename(src, dst, mtime, logRetryCache, options); 3935 } 3936 3937 /** 3938 * Remove the indicated file from namespace. 3939 * 3940 * @see ClientProtocol#delete(String, boolean) for detailed description and 3941 * description of exceptions 3942 */ 3943 boolean delete(String src, boolean recursive) 3944 throws AccessControlException, SafeModeException, 3945 UnresolvedLinkException, IOException { 3946 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 3947 if (cacheEntry != null && cacheEntry.isSuccess()) { 3948 return true; // Return previous response 3949 } 3950 boolean ret = false; 3951 try { 3952 ret = deleteInt(src, recursive, cacheEntry != null); 3953 } catch (AccessControlException e) { 3954 logAuditEvent(false, "delete", src); 3955 throw e; 3956 } finally { 3957 RetryCache.setState(cacheEntry, ret); 3958 } 3959 return ret; 3960 } 3961 3962 private boolean deleteInt(String src, boolean recursive, boolean logRetryCache) 3963 throws AccessControlException, SafeModeException, 3964 UnresolvedLinkException, IOException { 3965 if (NameNode.stateChangeLog.isDebugEnabled()) { 3966 NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src); 3967 } 3968 boolean status = deleteInternal(src, recursive, true, logRetryCache); 3969 if (status) { 3970 logAuditEvent(true, "delete", src); 3971 } 3972 return status; 3973 } 3974 3975 private FSPermissionChecker getPermissionChecker() 3976 throws AccessControlException { 3977 try { 3978 return new FSPermissionChecker(fsOwnerShortUserName, supergroup, getRemoteUser()); 3979 } catch (IOException ioe) { 3980 throw new AccessControlException(ioe); 3981 } 3982 } 3983 3984 /** 3985 * Remove a file/directory from the namespace. 3986 * <p> 3987 * For large directories, deletion is incremental. The blocks under 3988 * the directory are collected and deleted a small number at a time holding 3989 * the {@link FSNamesystem} lock. 3990 * <p> 3991 * For small directory or file the deletion is done in one shot. 3992 * 3993 * @see ClientProtocol#delete(String, boolean) for description of exceptions 3994 */ 3995 private boolean deleteInternal(String src, boolean recursive, 3996 boolean enforcePermission, boolean logRetryCache) 3997 throws AccessControlException, SafeModeException, UnresolvedLinkException, 3998 IOException { 3999 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 4000 List<INode> removedINodes = new ChunkedArrayList<INode>(); 4001 FSPermissionChecker pc = getPermissionChecker(); 4002 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4003 boolean ret = false; 4004 4005 waitForLoadingFSImage(); 4006 writeLock(); 4007 try { 4008 checkOperation(OperationCategory.WRITE); 4009 checkNameNodeSafeMode("Cannot delete " + src); 4010 src = resolvePath(src, pathComponents); 4011 if (!recursive && dir.isNonEmptyDirectory(src)) { 4012 throw new PathIsNotEmptyDirectoryException(src + " is non empty"); 4013 } 4014 if (enforcePermission && isPermissionEnabled) { 4015 checkPermission(pc, src, false, null, FsAction.WRITE, null, 4016 FsAction.ALL, true, false); 4017 } 4018 4019 long mtime = now(); 4020 // Unlink the target directory from directory tree 4021 long filesRemoved = dir.delete(src, collectedBlocks, removedINodes, 4022 mtime); 4023 if (filesRemoved < 0) { 4024 return false; 4025 } 4026 getEditLog().logDelete(src, mtime, logRetryCache); 4027 incrDeletedFileCount(filesRemoved); 4028 // Blocks/INodes will be handled later 4029 removePathAndBlocks(src, null, removedINodes, true); 4030 ret = true; 4031 } finally { 4032 writeUnlock(); 4033 } 4034 getEditLog().logSync(); 4035 removeBlocks(collectedBlocks); // Incremental deletion of blocks 4036 collectedBlocks.clear(); 4037 4038 if (NameNode.stateChangeLog.isDebugEnabled()) { 4039 NameNode.stateChangeLog.debug("DIR* Namesystem.delete: " 4040 + src +" is removed"); 4041 } 4042 return ret; 4043 } 4044 4045 /** 4046 * From the given list, incrementally remove the blocks from blockManager 4047 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to 4048 * ensure that other waiters on the lock can get in. See HDFS-2938 4049 * 4050 * @param blocks 4051 * An instance of {@link BlocksMapUpdateInfo} which contains a list 4052 * of blocks that need to be removed from blocksMap 4053 */ 4054 void removeBlocks(BlocksMapUpdateInfo blocks) { 4055 List<Block> toDeleteList = blocks.getToDeleteList(); 4056 Iterator<Block> iter = toDeleteList.iterator(); 4057 while (iter.hasNext()) { 4058 writeLock(); 4059 try { 4060 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { 4061 blockManager.removeBlock(iter.next()); 4062 } 4063 } finally { 4064 writeUnlock(); 4065 } 4066 } 4067 } 4068 4069 /** 4070 * Remove leases, inodes and blocks related to a given path 4071 * @param src The given path 4072 * @param blocks Containing the list of blocks to be deleted from blocksMap 4073 * @param removedINodes Containing the list of inodes to be removed from 4074 * inodesMap 4075 * @param acquireINodeMapLock Whether to acquire the lock for inode removal 4076 */ 4077 void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks, 4078 List<INode> removedINodes, final boolean acquireINodeMapLock) { 4079 assert hasWriteLock(); 4080 leaseManager.removeLeaseWithPrefixPath(src); 4081 // remove inodes from inodesMap 4082 if (removedINodes != null) { 4083 if (acquireINodeMapLock) { 4084 dir.writeLock(); 4085 } 4086 try { 4087 dir.removeFromInodeMap(removedINodes); 4088 } finally { 4089 if (acquireINodeMapLock) { 4090 dir.writeUnlock(); 4091 } 4092 } 4093 removedINodes.clear(); 4094 } 4095 if (blocks == null) { 4096 return; 4097 } 4098 4099 removeBlocksAndUpdateSafemodeTotal(blocks); 4100 } 4101 4102 /** 4103 * Removes the blocks from blocksmap and updates the safemode blocks total 4104 * 4105 * @param blocks 4106 * An instance of {@link BlocksMapUpdateInfo} which contains a list 4107 * of blocks that need to be removed from blocksMap 4108 */ 4109 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { 4110 assert hasWriteLock(); 4111 // In the case that we are a Standby tailing edits from the 4112 // active while in safe-mode, we need to track the total number 4113 // of blocks and safe blocks in the system. 4114 boolean trackBlockCounts = isSafeModeTrackingBlocks(); 4115 int numRemovedComplete = 0, numRemovedSafe = 0; 4116 4117 for (Block b : blocks.getToDeleteList()) { 4118 if (trackBlockCounts) { 4119 BlockInfo bi = getStoredBlock(b); 4120 if (bi.isComplete()) { 4121 numRemovedComplete++; 4122 if (bi.numNodes() >= blockManager.minReplication) { 4123 numRemovedSafe++; 4124 } 4125 } 4126 } 4127 blockManager.removeBlock(b); 4128 } 4129 if (trackBlockCounts) { 4130 if (LOG.isDebugEnabled()) { 4131 LOG.debug("Adjusting safe-mode totals for deletion." 4132 + "decreasing safeBlocks by " + numRemovedSafe 4133 + ", totalBlocks by " + numRemovedComplete); 4134 } 4135 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); 4136 } 4137 } 4138 4139 /** 4140 * @see SafeModeInfo#shouldIncrementallyTrackBlocks 4141 */ 4142 private boolean isSafeModeTrackingBlocks() { 4143 if (!haEnabled) { 4144 // Never track blocks incrementally in non-HA code. 4145 return false; 4146 } 4147 SafeModeInfo sm = this.safeMode; 4148 return sm != null && sm.shouldIncrementallyTrackBlocks(); 4149 } 4150 4151 /** 4152 * Get the file info for a specific file. 4153 * 4154 * @param srcArg The string representation of the path to the file 4155 * @param resolveLink whether to throw UnresolvedLinkException 4156 * if src refers to a symlink 4157 * 4158 * @throws AccessControlException if access is denied 4159 * @throws UnresolvedLinkException if a symlink is encountered. 4160 * 4161 * @return object containing information regarding the file 4162 * or null if file not found 4163 * @throws StandbyException 4164 */ 4165 HdfsFileStatus getFileInfo(final String srcArg, boolean resolveLink) 4166 throws AccessControlException, UnresolvedLinkException, 4167 StandbyException, IOException { 4168 String src = srcArg; 4169 if (!DFSUtil.isValidName(src)) { 4170 throw new InvalidPathException("Invalid file name: " + src); 4171 } 4172 HdfsFileStatus stat = null; 4173 FSPermissionChecker pc = getPermissionChecker(); 4174 checkOperation(OperationCategory.READ); 4175 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4176 readLock(); 4177 try { 4178 checkOperation(OperationCategory.READ); 4179 src = resolvePath(src, pathComponents); 4180 boolean isSuperUser = true; 4181 if (isPermissionEnabled) { 4182 checkPermission(pc, src, false, null, null, null, null, false, 4183 resolveLink); 4184 isSuperUser = pc.isSuperUser(); 4185 } 4186 stat = dir.getFileInfo(src, resolveLink, 4187 FSDirectory.isReservedRawName(srcArg), isSuperUser); 4188 } catch (AccessControlException e) { 4189 logAuditEvent(false, "getfileinfo", srcArg); 4190 throw e; 4191 } finally { 4192 readUnlock(); 4193 } 4194 logAuditEvent(true, "getfileinfo", srcArg); 4195 return stat; 4196 } 4197 4198 /** 4199 * Returns true if the file is closed 4200 */ 4201 boolean isFileClosed(final String srcArg) 4202 throws AccessControlException, UnresolvedLinkException, 4203 StandbyException, IOException { 4204 String src = srcArg; 4205 FSPermissionChecker pc = getPermissionChecker(); 4206 checkOperation(OperationCategory.READ); 4207 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4208 readLock(); 4209 try { 4210 src = resolvePath(src, pathComponents); 4211 checkOperation(OperationCategory.READ); 4212 if (isPermissionEnabled) { 4213 checkTraverse(pc, src); 4214 } 4215 return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction(); 4216 } catch (AccessControlException e) { 4217 if (isAuditEnabled() && isExternalInvocation()) { 4218 logAuditEvent(false, "isFileClosed", srcArg); 4219 } 4220 throw e; 4221 } finally { 4222 readUnlock(); 4223 } 4224 } 4225 4226 /** 4227 * Create all the necessary directories 4228 */ 4229 boolean mkdirs(String src, PermissionStatus permissions, 4230 boolean createParent) throws IOException, UnresolvedLinkException { 4231 boolean ret = false; 4232 try { 4233 ret = mkdirsInt(src, permissions, createParent); 4234 } catch (AccessControlException e) { 4235 logAuditEvent(false, "mkdirs", src); 4236 throw e; 4237 } 4238 return ret; 4239 } 4240 4241 private boolean mkdirsInt(final String srcArg, PermissionStatus permissions, 4242 boolean createParent) throws IOException, UnresolvedLinkException { 4243 String src = srcArg; 4244 if(NameNode.stateChangeLog.isDebugEnabled()) { 4245 NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src); 4246 } 4247 if (!DFSUtil.isValidName(src)) { 4248 throw new InvalidPathException(src); 4249 } 4250 FSPermissionChecker pc = getPermissionChecker(); 4251 checkOperation(OperationCategory.WRITE); 4252 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4253 HdfsFileStatus resultingStat = null; 4254 boolean status = false; 4255 writeLock(); 4256 try { 4257 checkOperation(OperationCategory.WRITE); 4258 checkNameNodeSafeMode("Cannot create directory " + src); 4259 src = resolvePath(src, pathComponents); 4260 status = mkdirsInternal(pc, src, permissions, createParent); 4261 if (status) { 4262 resultingStat = getAuditFileInfo(src, false); 4263 } 4264 } finally { 4265 writeUnlock(); 4266 } 4267 getEditLog().logSync(); 4268 if (status) { 4269 logAuditEvent(true, "mkdirs", srcArg, null, resultingStat); 4270 } 4271 return status; 4272 } 4273 4274 /** 4275 * Create all the necessary directories 4276 */ 4277 private boolean mkdirsInternal(FSPermissionChecker pc, String src, 4278 PermissionStatus permissions, boolean createParent) 4279 throws IOException, UnresolvedLinkException { 4280 assert hasWriteLock(); 4281 if (isPermissionEnabled) { 4282 checkTraverse(pc, src); 4283 } 4284 if (dir.isDirMutable(src)) { 4285 // all the users of mkdirs() are used to expect 'true' even if 4286 // a new directory is not created. 4287 return true; 4288 } 4289 if (isPermissionEnabled) { 4290 checkAncestorAccess(pc, src, FsAction.WRITE); 4291 } 4292 if (!createParent) { 4293 verifyParentDir(src); 4294 } 4295 4296 // validate that we have enough inodes. This is, at best, a 4297 // heuristic because the mkdirs() operation might need to 4298 // create multiple inodes. 4299 checkFsObjectLimit(); 4300 4301 if (!mkdirsRecursively(src, permissions, false, now())) { 4302 throw new IOException("Failed to create directory: " + src); 4303 } 4304 return true; 4305 } 4306 4307 /** 4308 * Create a directory 4309 * If ancestor directories do not exist, automatically create them. 4310 4311 * @param src string representation of the path to the directory 4312 * @param permissions the permission of the directory 4313 * @param inheritPermission if the permission of the directory should inherit 4314 * from its parent or not. u+wx is implicitly added to 4315 * the automatically created directories, and to the 4316 * given directory if inheritPermission is true 4317 * @param now creation time 4318 * @return true if the operation succeeds false otherwise 4319 * @throws QuotaExceededException if directory creation violates 4320 * any quota limit 4321 * @throws UnresolvedLinkException if a symlink is encountered in src. 4322 * @throws SnapshotAccessControlException if path is in RO snapshot 4323 */ 4324 private boolean mkdirsRecursively(String src, PermissionStatus permissions, 4325 boolean inheritPermission, long now) 4326 throws FileAlreadyExistsException, QuotaExceededException, 4327 UnresolvedLinkException, SnapshotAccessControlException, 4328 AclException { 4329 src = FSDirectory.normalizePath(src); 4330 byte[][] components = INode.getPathComponents(src); 4331 final int lastInodeIndex = components.length - 1; 4332 4333 dir.writeLock(); 4334 try { 4335 INodesInPath iip = dir.getExistingPathINodes(components); 4336 if (iip.isSnapshot()) { 4337 throw new SnapshotAccessControlException( 4338 "Modification on RO snapshot is disallowed"); 4339 } 4340 INode[] inodes = iip.getINodes(); 4341 4342 // find the index of the first null in inodes[] 4343 StringBuilder pathbuilder = new StringBuilder(); 4344 int i = 1; 4345 for(; i < inodes.length && inodes[i] != null; i++) { 4346 pathbuilder.append(Path.SEPARATOR). 4347 append(DFSUtil.bytes2String(components[i])); 4348 if (!inodes[i].isDirectory()) { 4349 throw new FileAlreadyExistsException( 4350 "Parent path is not a directory: " 4351 + pathbuilder + " "+inodes[i].getLocalName()); 4352 } 4353 } 4354 4355 // default to creating parent dirs with the given perms 4356 PermissionStatus parentPermissions = permissions; 4357 4358 // if not inheriting and it's the last inode, there's no use in 4359 // computing perms that won't be used 4360 if (inheritPermission || (i < lastInodeIndex)) { 4361 // if inheriting (ie. creating a file or symlink), use the parent dir, 4362 // else the supplied permissions 4363 // NOTE: the permissions of the auto-created directories violate posix 4364 FsPermission parentFsPerm = inheritPermission 4365 ? inodes[i-1].getFsPermission() : permissions.getPermission(); 4366 4367 // ensure that the permissions allow user write+execute 4368 if (!parentFsPerm.getUserAction().implies(FsAction.WRITE_EXECUTE)) { 4369 parentFsPerm = new FsPermission( 4370 parentFsPerm.getUserAction().or(FsAction.WRITE_EXECUTE), 4371 parentFsPerm.getGroupAction(), 4372 parentFsPerm.getOtherAction() 4373 ); 4374 } 4375 4376 if (!parentPermissions.getPermission().equals(parentFsPerm)) { 4377 parentPermissions = new PermissionStatus( 4378 parentPermissions.getUserName(), 4379 parentPermissions.getGroupName(), 4380 parentFsPerm 4381 ); 4382 // when inheriting, use same perms for entire path 4383 if (inheritPermission) permissions = parentPermissions; 4384 } 4385 } 4386 4387 // create directories beginning from the first null index 4388 for(; i < inodes.length; i++) { 4389 pathbuilder.append(Path.SEPARATOR). 4390 append(DFSUtil.bytes2String(components[i])); 4391 dir.unprotectedMkdir(allocateNewInodeId(), iip, i, components[i], 4392 (i < lastInodeIndex) ? parentPermissions : permissions, null, 4393 now); 4394 if (inodes[i] == null) { 4395 return false; 4396 } 4397 // Directory creation also count towards FilesCreated 4398 // to match count of FilesDeleted metric. 4399 NameNode.getNameNodeMetrics().incrFilesCreated(); 4400 4401 final String cur = pathbuilder.toString(); 4402 getEditLog().logMkDir(cur, inodes[i]); 4403 if(NameNode.stateChangeLog.isDebugEnabled()) { 4404 NameNode.stateChangeLog.debug( 4405 "mkdirs: created directory " + cur); 4406 } 4407 } 4408 } finally { 4409 dir.writeUnlock(); 4410 } 4411 return true; 4412 } 4413 4414 /** 4415 * Get the content summary for a specific file/dir. 4416 * 4417 * @param srcArg The string representation of the path to the file 4418 * 4419 * @throws AccessControlException if access is denied 4420 * @throws UnresolvedLinkException if a symlink is encountered. 4421 * @throws FileNotFoundException if no file exists 4422 * @throws StandbyException 4423 * @throws IOException for issues with writing to the audit log 4424 * 4425 * @return object containing information regarding the file 4426 * or null if file not found 4427 */ 4428 ContentSummary getContentSummary(final String srcArg) throws IOException { 4429 String src = srcArg; 4430 FSPermissionChecker pc = getPermissionChecker(); 4431 checkOperation(OperationCategory.READ); 4432 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4433 readLock(); 4434 boolean success = true; 4435 try { 4436 checkOperation(OperationCategory.READ); 4437 src = resolvePath(src, pathComponents); 4438 if (isPermissionEnabled) { 4439 checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE); 4440 } 4441 return dir.getContentSummary(src); 4442 4443 } catch (AccessControlException ace) { 4444 success = false; 4445 throw ace; 4446 } finally { 4447 readUnlock(); 4448 logAuditEvent(success, "contentSummary", srcArg); 4449 } 4450 } 4451 4452 /** 4453 * Set the namespace quota and diskspace quota for a directory. 4454 * See {@link ClientProtocol#setQuota(String, long, long)} for the 4455 * contract. 4456 * 4457 * Note: This does not support ".inodes" relative path. 4458 */ 4459 void setQuota(String path, long nsQuota, long dsQuota) 4460 throws IOException, UnresolvedLinkException { 4461 checkSuperuserPrivilege(); 4462 checkOperation(OperationCategory.WRITE); 4463 writeLock(); 4464 try { 4465 checkOperation(OperationCategory.WRITE); 4466 checkNameNodeSafeMode("Cannot set quota on " + path); 4467 INodeDirectory changed = dir.setQuota(path, nsQuota, dsQuota); 4468 if (changed != null) { 4469 final Quota.Counts q = changed.getQuotaCounts(); 4470 getEditLog().logSetQuota(path, 4471 q.get(Quota.NAMESPACE), q.get(Quota.DISKSPACE)); 4472 } 4473 } finally { 4474 writeUnlock(); 4475 } 4476 getEditLog().logSync(); 4477 } 4478 4479 /** Persist all metadata about this file. 4480 * @param src The string representation of the path 4481 * @param fileId The inode ID that we're fsyncing. Older clients will pass 4482 * INodeId.GRANDFATHER_INODE_ID here. 4483 * @param clientName The string representation of the client 4484 * @param lastBlockLength The length of the last block 4485 * under construction reported from client. 4486 * @throws IOException if path does not exist 4487 */ 4488 void fsync(String src, long fileId, String clientName, long lastBlockLength) 4489 throws IOException, UnresolvedLinkException { 4490 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); 4491 checkOperation(OperationCategory.WRITE); 4492 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4493 4494 waitForLoadingFSImage(); 4495 writeLock(); 4496 try { 4497 checkOperation(OperationCategory.WRITE); 4498 checkNameNodeSafeMode("Cannot fsync file " + src); 4499 src = resolvePath(src, pathComponents); 4500 final INode inode; 4501 if (fileId == INodeId.GRANDFATHER_INODE_ID) { 4502 // Older clients may not have given us an inode ID to work with. 4503 // In this case, we have to try to resolve the path and hope it 4504 // hasn't changed or been deleted since the file was opened for write. 4505 inode = dir.getINode(src); 4506 } else { 4507 inode = dir.getInode(fileId); 4508 if (inode != null) src = inode.getFullPathName(); 4509 } 4510 final INodeFile pendingFile = checkLease(src, clientName, inode, fileId); 4511 if (lastBlockLength > 0) { 4512 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock( 4513 pendingFile, lastBlockLength); 4514 } 4515 persistBlocks(src, pendingFile, false); 4516 } finally { 4517 writeUnlock(); 4518 } 4519 getEditLog().logSync(); 4520 } 4521 4522 /** 4523 * Move a file that is being written to be immutable. 4524 * @param src The filename 4525 * @param lease The lease for the client creating the file 4526 * @param recoveryLeaseHolder reassign lease to this holder if the last block 4527 * needs recovery; keep current holder if null. 4528 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal 4529 * replication;<br> 4530 * RecoveryInProgressException if lease recovery is in progress.<br> 4531 * IOException in case of an error. 4532 * @return true if file has been successfully finalized and closed or 4533 * false if block recovery has been initiated. Since the lease owner 4534 * has been changed and logged, caller should call logSync(). 4535 */ 4536 boolean internalReleaseLease(Lease lease, String src, 4537 String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 4538 IOException, UnresolvedLinkException { 4539 LOG.info("Recovering " + lease + ", src=" + src); 4540 assert !isInSafeMode(); 4541 assert hasWriteLock(); 4542 4543 final INodesInPath iip = dir.getLastINodeInPath(src); 4544 final INodeFile pendingFile = iip.getINode(0).asFile(); 4545 int nrBlocks = pendingFile.numBlocks(); 4546 BlockInfo[] blocks = pendingFile.getBlocks(); 4547 4548 int nrCompleteBlocks; 4549 BlockInfo curBlock = null; 4550 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) { 4551 curBlock = blocks[nrCompleteBlocks]; 4552 if(!curBlock.isComplete()) 4553 break; 4554 assert blockManager.checkMinReplication(curBlock) : 4555 "A COMPLETE block is not minimally replicated in " + src; 4556 } 4557 4558 // If there are no incomplete blocks associated with this file, 4559 // then reap lease immediately and close the file. 4560 if(nrCompleteBlocks == nrBlocks) { 4561 finalizeINodeFileUnderConstruction(src, pendingFile, 4562 iip.getLatestSnapshotId()); 4563 NameNode.stateChangeLog.warn("BLOCK*" 4564 + " internalReleaseLease: All existing blocks are COMPLETE," 4565 + " lease removed, file closed."); 4566 return true; // closed! 4567 } 4568 4569 // Only the last and the penultimate blocks may be in non COMPLETE state. 4570 // If the penultimate block is not COMPLETE, then it must be COMMITTED. 4571 if(nrCompleteBlocks < nrBlocks - 2 || 4572 nrCompleteBlocks == nrBlocks - 2 && 4573 curBlock != null && 4574 curBlock.getBlockUCState() != BlockUCState.COMMITTED) { 4575 final String message = "DIR* NameSystem.internalReleaseLease: " 4576 + "attempt to release a create lock on " 4577 + src + " but file is already closed."; 4578 NameNode.stateChangeLog.warn(message); 4579 throw new IOException(message); 4580 } 4581 4582 // The last block is not COMPLETE, and 4583 // that the penultimate block if exists is either COMPLETE or COMMITTED 4584 final BlockInfo lastBlock = pendingFile.getLastBlock(); 4585 BlockUCState lastBlockState = lastBlock.getBlockUCState(); 4586 BlockInfo penultimateBlock = pendingFile.getPenultimateBlock(); 4587 4588 // If penultimate block doesn't exist then its minReplication is met 4589 boolean penultimateBlockMinReplication = penultimateBlock == null ? true : 4590 blockManager.checkMinReplication(penultimateBlock); 4591 4592 switch(lastBlockState) { 4593 case COMPLETE: 4594 assert false : "Already checked that the last block is incomplete"; 4595 break; 4596 case COMMITTED: 4597 // Close file if committed blocks are minimally replicated 4598 if(penultimateBlockMinReplication && 4599 blockManager.checkMinReplication(lastBlock)) { 4600 finalizeINodeFileUnderConstruction(src, pendingFile, 4601 iip.getLatestSnapshotId()); 4602 NameNode.stateChangeLog.warn("BLOCK*" 4603 + " internalReleaseLease: Committed blocks are minimally replicated," 4604 + " lease removed, file closed."); 4605 return true; // closed! 4606 } 4607 // Cannot close file right now, since some blocks 4608 // are not yet minimally replicated. 4609 // This may potentially cause infinite loop in lease recovery 4610 // if there are no valid replicas on data-nodes. 4611 String message = "DIR* NameSystem.internalReleaseLease: " + 4612 "Failed to release lease for file " + src + 4613 ". Committed blocks are waiting to be minimally replicated." + 4614 " Try again later."; 4615 NameNode.stateChangeLog.warn(message); 4616 throw new AlreadyBeingCreatedException(message); 4617 case UNDER_CONSTRUCTION: 4618 case UNDER_RECOVERY: 4619 final BlockInfoUnderConstruction uc = (BlockInfoUnderConstruction)lastBlock; 4620 // setup the last block locations from the blockManager if not known 4621 if (uc.getNumExpectedLocations() == 0) { 4622 uc.setExpectedLocations(blockManager.getStorages(lastBlock)); 4623 } 4624 4625 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) { 4626 // There is no datanode reported to this block. 4627 // may be client have crashed before writing data to pipeline. 4628 // This blocks doesn't need any recovery. 4629 // We can remove this block and close the file. 4630 pendingFile.removeLastBlock(lastBlock); 4631 finalizeINodeFileUnderConstruction(src, pendingFile, 4632 iip.getLatestSnapshotId()); 4633 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " 4634 + "Removed empty last block and closed file."); 4635 return true; 4636 } 4637 // start recovery of the last block for this file 4638 long blockRecoveryId = nextGenerationStamp(isLegacyBlock(uc)); 4639 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile); 4640 uc.initializeBlockRecovery(blockRecoveryId); 4641 leaseManager.renewLease(lease); 4642 // Cannot close file right now, since the last block requires recovery. 4643 // This may potentially cause infinite loop in lease recovery 4644 // if there are no valid replicas on data-nodes. 4645 NameNode.stateChangeLog.warn( 4646 "DIR* NameSystem.internalReleaseLease: " + 4647 "File " + src + " has not been closed." + 4648 " Lease recovery is in progress. " + 4649 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock); 4650 break; 4651 } 4652 return false; 4653 } 4654 4655 private Lease reassignLease(Lease lease, String src, String newHolder, 4656 INodeFile pendingFile) { 4657 assert hasWriteLock(); 4658 if(newHolder == null) 4659 return lease; 4660 // The following transaction is not synced. Make sure it's sync'ed later. 4661 logReassignLease(lease.getHolder(), src, newHolder); 4662 return reassignLeaseInternal(lease, src, newHolder, pendingFile); 4663 } 4664 4665 Lease reassignLeaseInternal(Lease lease, String src, String newHolder, 4666 INodeFile pendingFile) { 4667 assert hasWriteLock(); 4668 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder); 4669 return leaseManager.reassignLease(lease, src, newHolder); 4670 } 4671 4672 private void commitOrCompleteLastBlock(final INodeFile fileINode, 4673 final Block commitBlock) throws IOException { 4674 assert hasWriteLock(); 4675 Preconditions.checkArgument(fileINode.isUnderConstruction()); 4676 if (!blockManager.commitOrCompleteLastBlock(fileINode, commitBlock)) { 4677 return; 4678 } 4679 4680 // Adjust disk space consumption if required 4681 final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes(); 4682 if (diff > 0) { 4683 try { 4684 String path = fileINode.getFullPathName(); 4685 dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication()); 4686 } catch (IOException e) { 4687 LOG.warn("Unexpected exception while updating disk space.", e); 4688 } 4689 } 4690 } 4691 4692 private void finalizeINodeFileUnderConstruction(String src, 4693 INodeFile pendingFile, int latestSnapshot) throws IOException, 4694 UnresolvedLinkException { 4695 assert hasWriteLock(); 4696 4697 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature(); 4698 if (uc == null) { 4699 throw new IOException("Cannot finalize file " + src 4700 + " because it is not under construction"); 4701 } 4702 4703 pendingFile.recordModification(latestSnapshot); 4704 4705 // The file is no longer pending. 4706 // Create permanent INode, update blocks. No need to replace the inode here 4707 // since we just remove the uc feature from pendingFile 4708 final INodeFile newFile = pendingFile.toCompleteFile(now()); 4709 4710 leaseManager.removeLease(uc.getClientName(), src); 4711 4712 waitForLoadingFSImage(); 4713 // close file and persist block allocations for this file 4714 closeFile(src, newFile); 4715 4716 blockManager.checkReplication(newFile); 4717 } 4718 4719 @VisibleForTesting 4720 BlockInfo getStoredBlock(Block block) { 4721 return blockManager.getStoredBlock(block); 4722 } 4723 4724 @Override 4725 public boolean isInSnapshot(BlockInfoUnderConstruction blockUC) { 4726 assert hasReadLock(); 4727 final BlockCollection bc = blockUC.getBlockCollection(); 4728 if (bc == null || !(bc instanceof INodeFile) 4729 || !bc.isUnderConstruction()) { 4730 return false; 4731 } 4732 4733 INodeFile inodeUC = (INodeFile) bc; 4734 String fullName = inodeUC.getName(); 4735 try { 4736 if (fullName != null && fullName.startsWith(Path.SEPARATOR) 4737 && dir.getINode(fullName) == inodeUC) { 4738 // If file exists in normal path then no need to look in snapshot 4739 return false; 4740 } 4741 } catch (UnresolvedLinkException e) { 4742 LOG.error("Error while resolving the link : " + fullName, e); 4743 return false; 4744 } 4745 /* 4746 * 1. if bc is an instance of INodeFileUnderConstructionWithSnapshot, and 4747 * bc is not in the current fsdirectory tree, bc must represent a snapshot 4748 * file. 4749 * 2. if fullName is not an absolute path, bc cannot be existent in the 4750 * current fsdirectory tree. 4751 * 3. if bc is not the current node associated with fullName, bc must be a 4752 * snapshot inode. 4753 */ 4754 return true; 4755 } 4756 4757 void commitBlockSynchronization(ExtendedBlock lastblock, 4758 long newgenerationstamp, long newlength, 4759 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, 4760 String[] newtargetstorages) 4761 throws IOException, UnresolvedLinkException { 4762 LOG.info("commitBlockSynchronization(lastblock=" + lastblock 4763 + ", newgenerationstamp=" + newgenerationstamp 4764 + ", newlength=" + newlength 4765 + ", newtargets=" + Arrays.asList(newtargets) 4766 + ", closeFile=" + closeFile 4767 + ", deleteBlock=" + deleteblock 4768 + ")"); 4769 checkOperation(OperationCategory.WRITE); 4770 String src = ""; 4771 waitForLoadingFSImage(); 4772 writeLock(); 4773 try { 4774 checkOperation(OperationCategory.WRITE); 4775 // If a DN tries to commit to the standby, the recovery will 4776 // fail, and the next retry will succeed on the new NN. 4777 4778 checkNameNodeSafeMode( 4779 "Cannot commitBlockSynchronization while in safe mode"); 4780 final BlockInfo storedBlock = getStoredBlock( 4781 ExtendedBlock.getLocalBlock(lastblock)); 4782 if (storedBlock == null) { 4783 if (deleteblock) { 4784 // This may be a retry attempt so ignore the failure 4785 // to locate the block. 4786 if (LOG.isDebugEnabled()) { 4787 LOG.debug("Block (=" + lastblock + ") not found"); 4788 } 4789 return; 4790 } else { 4791 throw new IOException("Block (=" + lastblock + ") not found"); 4792 } 4793 } 4794 final long oldGenerationStamp = storedBlock.getGenerationStamp(); 4795 final long oldNumBytes = storedBlock.getNumBytes(); 4796 // 4797 // The implementation of delete operation (see @deleteInternal method) 4798 // first removes the file paths from namespace, and delays the removal 4799 // of blocks to later time for better performance. When 4800 // commitBlockSynchronization (this method) is called in between, the 4801 // blockCollection of storedBlock could have been assigned to null by 4802 // the delete operation, throw IOException here instead of NPE; if the 4803 // file path is already removed from namespace by the delete operation, 4804 // throw FileNotFoundException here, so not to proceed to the end of 4805 // this method to add a CloseOp to the edit log for an already deleted 4806 // file (See HDFS-6825). 4807 // 4808 BlockCollection blockCollection = storedBlock.getBlockCollection(); 4809 if (blockCollection == null) { 4810 throw new IOException("The blockCollection of " + storedBlock 4811 + " is null, likely because the file owning this block was" 4812 + " deleted and the block removal is delayed"); 4813 } 4814 INodeFile iFile = ((INode)blockCollection).asFile(); 4815 if (isFileDeleted(iFile)) { 4816 throw new FileNotFoundException("File not found: " 4817 + iFile.getFullPathName() + ", likely due to delayed block" 4818 + " removal"); 4819 } 4820 if (!iFile.isUnderConstruction() || storedBlock.isComplete()) { 4821 if (LOG.isDebugEnabled()) { 4822 LOG.debug("Unexpected block (=" + lastblock 4823 + ") since the file (=" + iFile.getLocalName() 4824 + ") is not under construction"); 4825 } 4826 return; 4827 } 4828 4829 long recoveryId = 4830 ((BlockInfoUnderConstruction)storedBlock).getBlockRecoveryId(); 4831 if(recoveryId != newgenerationstamp) { 4832 throw new IOException("The recovery id " + newgenerationstamp 4833 + " does not match current recovery id " 4834 + recoveryId + " for block " + lastblock); 4835 } 4836 4837 if (deleteblock) { 4838 Block blockToDel = ExtendedBlock.getLocalBlock(lastblock); 4839 boolean remove = iFile.removeLastBlock(blockToDel); 4840 if (remove) { 4841 blockManager.removeBlockFromMap(storedBlock); 4842 } 4843 } 4844 else { 4845 // update last block 4846 storedBlock.setGenerationStamp(newgenerationstamp); 4847 storedBlock.setNumBytes(newlength); 4848 4849 // find the DatanodeDescriptor objects 4850 ArrayList<DatanodeDescriptor> trimmedTargets = 4851 new ArrayList<DatanodeDescriptor>(newtargets.length); 4852 ArrayList<String> trimmedStorages = 4853 new ArrayList<String>(newtargets.length); 4854 if (newtargets.length > 0) { 4855 for (int i = 0; i < newtargets.length; ++i) { 4856 // try to get targetNode 4857 DatanodeDescriptor targetNode = 4858 blockManager.getDatanodeManager().getDatanode(newtargets[i]); 4859 if (targetNode != null) { 4860 trimmedTargets.add(targetNode); 4861 trimmedStorages.add(newtargetstorages[i]); 4862 } else if (LOG.isDebugEnabled()) { 4863 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found"); 4864 } 4865 } 4866 } 4867 if ((closeFile) && !trimmedTargets.isEmpty()) { 4868 // the file is getting closed. Insert block locations into blockManager. 4869 // Otherwise fsck will report these blocks as MISSING, especially if the 4870 // blocksReceived from Datanodes take a long time to arrive. 4871 for (int i = 0; i < trimmedTargets.size(); i++) { 4872 DatanodeStorageInfo storageInfo = 4873 trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i)); 4874 if (storageInfo != null) { 4875 storageInfo.addBlock(storedBlock); 4876 } 4877 } 4878 } 4879 4880 // add pipeline locations into the INodeUnderConstruction 4881 DatanodeStorageInfo[] trimmedStorageInfos = 4882 blockManager.getDatanodeManager().getDatanodeStorageInfos( 4883 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]), 4884 trimmedStorages.toArray(new String[trimmedStorages.size()])); 4885 iFile.setLastBlock(storedBlock, trimmedStorageInfos); 4886 if (closeFile) { 4887 blockManager.markBlockReplicasAsCorrupt(storedBlock, 4888 oldGenerationStamp, oldNumBytes, trimmedStorageInfos); 4889 } 4890 } 4891 4892 if (closeFile) { 4893 src = closeFileCommitBlocks(iFile, storedBlock); 4894 } else { 4895 // If this commit does not want to close the file, persist blocks 4896 src = iFile.getFullPathName(); 4897 persistBlocks(src, iFile, false); 4898 } 4899 } finally { 4900 writeUnlock(); 4901 } 4902 getEditLog().logSync(); 4903 if (closeFile) { 4904 LOG.info("commitBlockSynchronization(newblock=" + lastblock 4905 + ", file=" + src 4906 + ", newgenerationstamp=" + newgenerationstamp 4907 + ", newlength=" + newlength 4908 + ", newtargets=" + Arrays.asList(newtargets) + ") successful"); 4909 } else { 4910 LOG.info("commitBlockSynchronization(" + lastblock + ") successful"); 4911 } 4912 } 4913 4914 /** 4915 * @param pendingFile open file that needs to be closed 4916 * @param storedBlock last block 4917 * @return Path of the file that was closed. 4918 * @throws IOException on error 4919 */ 4920 @VisibleForTesting 4921 String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock) 4922 throws IOException { 4923 String src = pendingFile.getFullPathName(); 4924 4925 // commit the last block and complete it if it has minimum replicas 4926 commitOrCompleteLastBlock(pendingFile, storedBlock); 4927 4928 //remove lease, close file 4929 finalizeINodeFileUnderConstruction(src, pendingFile, 4930 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID)); 4931 4932 return src; 4933 } 4934 4935 /** 4936 * Renew the lease(s) held by the given client 4937 */ 4938 void renewLease(String holder) throws IOException { 4939 checkOperation(OperationCategory.WRITE); 4940 readLock(); 4941 try { 4942 checkOperation(OperationCategory.WRITE); 4943 checkNameNodeSafeMode("Cannot renew lease for " + holder); 4944 leaseManager.renewLease(holder); 4945 } finally { 4946 readUnlock(); 4947 } 4948 } 4949 4950 /** 4951 * Get a partial listing of the indicated directory 4952 * 4953 * @param src the directory name 4954 * @param startAfter the name to start after 4955 * @param needLocation if blockLocations need to be returned 4956 * @return a partial listing starting after startAfter 4957 * 4958 * @throws AccessControlException if access is denied 4959 * @throws UnresolvedLinkException if symbolic link is encountered 4960 * @throws IOException if other I/O error occurred 4961 */ 4962 DirectoryListing getListing(String src, byte[] startAfter, 4963 boolean needLocation) 4964 throws AccessControlException, UnresolvedLinkException, IOException { 4965 try { 4966 return getListingInt(src, startAfter, needLocation); 4967 } catch (AccessControlException e) { 4968 logAuditEvent(false, "listStatus", src); 4969 throw e; 4970 } 4971 } 4972 4973 private DirectoryListing getListingInt(final String srcArg, byte[] startAfter, 4974 boolean needLocation) 4975 throws AccessControlException, UnresolvedLinkException, IOException { 4976 String src = srcArg; 4977 DirectoryListing dl; 4978 FSPermissionChecker pc = getPermissionChecker(); 4979 checkOperation(OperationCategory.READ); 4980 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 4981 String startAfterString = new String(startAfter); 4982 readLock(); 4983 try { 4984 checkOperation(OperationCategory.READ); 4985 src = resolvePath(src, pathComponents); 4986 4987 // Get file name when startAfter is an INodePath 4988 if (FSDirectory.isReservedName(startAfterString)) { 4989 byte[][] startAfterComponents = FSDirectory 4990 .getPathComponentsForReservedPath(startAfterString); 4991 try { 4992 String tmp = FSDirectory.resolvePath(src, startAfterComponents, dir); 4993 byte[][] regularPath = INode.getPathComponents(tmp); 4994 startAfter = regularPath[regularPath.length - 1]; 4995 } catch (IOException e) { 4996 // Possibly the inode is deleted 4997 throw new DirectoryListingStartAfterNotFoundException( 4998 "Can't find startAfter " + startAfterString); 4999 } 5000 } 5001 5002 boolean isSuperUser = true; 5003 if (isPermissionEnabled) { 5004 if (dir.isDir(src)) { 5005 checkPathAccess(pc, src, FsAction.READ_EXECUTE); 5006 } else { 5007 checkTraverse(pc, src); 5008 } 5009 isSuperUser = pc.isSuperUser(); 5010 } 5011 logAuditEvent(true, "listStatus", srcArg); 5012 dl = dir.getListing(src, startAfter, needLocation, isSuperUser); 5013 } finally { 5014 readUnlock(); 5015 } 5016 return dl; 5017 } 5018 5019 ///////////////////////////////////////////////////////// 5020 // 5021 // These methods are called by datanodes 5022 // 5023 ///////////////////////////////////////////////////////// 5024 /** 5025 * Register Datanode. 5026 * <p> 5027 * The purpose of registration is to identify whether the new datanode 5028 * serves a new data storage, and will report new data block copies, 5029 * which the namenode was not aware of; or the datanode is a replacement 5030 * node for the data storage that was previously served by a different 5031 * or the same (in terms of host:port) datanode. 5032 * The data storages are distinguished by their storageIDs. When a new 5033 * data storage is reported the namenode issues a new unique storageID. 5034 * <p> 5035 * Finally, the namenode returns its namespaceID as the registrationID 5036 * for the datanodes. 5037 * namespaceID is a persistent attribute of the name space. 5038 * The registrationID is checked every time the datanode is communicating 5039 * with the namenode. 5040 * Datanodes with inappropriate registrationID are rejected. 5041 * If the namenode stops, and then restarts it can restore its 5042 * namespaceID and will continue serving the datanodes that has previously 5043 * registered with the namenode without restarting the whole cluster. 5044 * 5045 * @see org.apache.hadoop.hdfs.server.datanode.DataNode 5046 */ 5047 void registerDatanode(DatanodeRegistration nodeReg) throws IOException { 5048 writeLock(); 5049 try { 5050 getBlockManager().getDatanodeManager().registerDatanode(nodeReg); 5051 checkSafeMode(); 5052 } finally { 5053 writeUnlock(); 5054 } 5055 } 5056 5057 /** 5058 * Get registrationID for datanodes based on the namespaceID. 5059 * 5060 * @see #registerDatanode(DatanodeRegistration) 5061 * @return registration ID 5062 */ 5063 String getRegistrationID() { 5064 return Storage.getRegistrationID(getFSImage().getStorage()); 5065 } 5066 5067 /** 5068 * The given node has reported in. This method should: 5069 * 1) Record the heartbeat, so the datanode isn't timed out 5070 * 2) Adjust usage stats for future block allocation 5071 * 5072 * If a substantial amount of time passed since the last datanode 5073 * heartbeat then request an immediate block report. 5074 * 5075 * @return an array of datanode commands 5076 * @throws IOException 5077 */ 5078 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, 5079 StorageReport[] reports, long cacheCapacity, long cacheUsed, 5080 int xceiverCount, int xmitsInProgress, int failedVolumes) 5081 throws IOException { 5082 readLock(); 5083 try { 5084 //get datanode commands 5085 final int maxTransfer = blockManager.getMaxReplicationStreams() 5086 - xmitsInProgress; 5087 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( 5088 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, 5089 xceiverCount, maxTransfer, failedVolumes); 5090 5091 //create ha status 5092 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( 5093 haContext.getState().getServiceState(), 5094 getFSImage().getLastAppliedOrWrittenTxId()); 5095 5096 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); 5097 } finally { 5098 readUnlock(); 5099 } 5100 } 5101 5102 /** 5103 * Returns whether or not there were available resources at the last check of 5104 * resources. 5105 * 5106 * @return true if there were sufficient resources available, false otherwise. 5107 */ 5108 boolean nameNodeHasResourcesAvailable() { 5109 return hasResourcesAvailable; 5110 } 5111 5112 /** 5113 * Perform resource checks and cache the results. 5114 */ 5115 void checkAvailableResources() { 5116 Preconditions.checkState(nnResourceChecker != null, 5117 "nnResourceChecker not initialized"); 5118 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace(); 5119 } 5120 5121 /** 5122 * Persist the block list for the inode. 5123 * @param path 5124 * @param file 5125 * @param logRetryCache 5126 */ 5127 private void persistBlocks(String path, INodeFile file, 5128 boolean logRetryCache) { 5129 assert hasWriteLock(); 5130 Preconditions.checkArgument(file.isUnderConstruction()); 5131 getEditLog().logUpdateBlocks(path, file, logRetryCache); 5132 if(NameNode.stateChangeLog.isDebugEnabled()) { 5133 NameNode.stateChangeLog.debug("persistBlocks: " + path 5134 + " with " + file.getBlocks().length + " blocks is persisted to" + 5135 " the file system"); 5136 } 5137 } 5138 5139 void incrDeletedFileCount(long count) { 5140 NameNode.getNameNodeMetrics().incrFilesDeleted(count); 5141 } 5142 5143 /** 5144 * Close file. 5145 * @param path 5146 * @param file 5147 */ 5148 private void closeFile(String path, INodeFile file) { 5149 assert hasWriteLock(); 5150 waitForLoadingFSImage(); 5151 // file is closed 5152 getEditLog().logCloseFile(path, file); 5153 if (NameNode.stateChangeLog.isDebugEnabled()) { 5154 NameNode.stateChangeLog.debug("closeFile: " 5155 +path+" with "+ file.getBlocks().length 5156 +" blocks is persisted to the file system"); 5157 } 5158 } 5159 5160 /** 5161 * Add the given symbolic link to the fs. Record it in the edits log. 5162 */ 5163 private INodeSymlink addSymlink(String path, String target, 5164 PermissionStatus dirPerms, 5165 boolean createParent, boolean logRetryCache) 5166 throws UnresolvedLinkException, FileAlreadyExistsException, 5167 QuotaExceededException, SnapshotAccessControlException, AclException { 5168 waitForLoadingFSImage(); 5169 5170 final long modTime = now(); 5171 if (createParent) { 5172 final String parent = new Path(path).getParent().toString(); 5173 if (!mkdirsRecursively(parent, dirPerms, true, modTime)) { 5174 return null; 5175 } 5176 } 5177 final String userName = dirPerms.getUserName(); 5178 long id = allocateNewInodeId(); 5179 INodeSymlink newNode = dir.addSymlink(id, path, target, modTime, modTime, 5180 new PermissionStatus(userName, null, FsPermission.getDefault())); 5181 if (newNode == null) { 5182 NameNode.stateChangeLog.info("addSymlink: failed to add " + path); 5183 return null; 5184 } 5185 getEditLog().logSymlink(path, target, modTime, modTime, newNode, 5186 logRetryCache); 5187 5188 if(NameNode.stateChangeLog.isDebugEnabled()) { 5189 NameNode.stateChangeLog.debug("addSymlink: " + path + " is added"); 5190 } 5191 return newNode; 5192 } 5193 5194 /** 5195 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if 5196 * there are found to be insufficient resources available, causes the NN to 5197 * enter safe mode. If resources are later found to have returned to 5198 * acceptable levels, this daemon will cause the NN to exit safe mode. 5199 */ 5200 class NameNodeResourceMonitor implements Runnable { 5201 boolean shouldNNRmRun = true; 5202 @Override 5203 public void run () { 5204 try { 5205 while (fsRunning && shouldNNRmRun) { 5206 checkAvailableResources(); 5207 if(!nameNodeHasResourcesAvailable()) { 5208 String lowResourcesMsg = "NameNode low on available disk space. "; 5209 if (!isInSafeMode()) { 5210 FSNamesystem.LOG.warn(lowResourcesMsg + "Entering safe mode."); 5211 } else { 5212 FSNamesystem.LOG.warn(lowResourcesMsg + "Already in safe mode."); 5213 } 5214 enterSafeMode(true); 5215 } 5216 try { 5217 Thread.sleep(resourceRecheckInterval); 5218 } catch (InterruptedException ie) { 5219 // Deliberately ignore 5220 } 5221 } 5222 } catch (Exception e) { 5223 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); 5224 } 5225 } 5226 5227 public void stopMonitor() { 5228 shouldNNRmRun = false; 5229 } 5230 } 5231 5232 class NameNodeEditLogRoller implements Runnable { 5233 5234 private boolean shouldRun = true; 5235 private final long rollThreshold; 5236 private final long sleepIntervalMs; 5237 5238 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { 5239 this.rollThreshold = rollThreshold; 5240 this.sleepIntervalMs = sleepIntervalMs; 5241 } 5242 5243 @Override 5244 public void run() { 5245 while (fsRunning && shouldRun) { 5246 try { 5247 FSEditLog editLog = getFSImage().getEditLog(); 5248 long numEdits = 5249 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); 5250 if (numEdits > rollThreshold) { 5251 FSNamesystem.LOG.info("NameNode rolling its own edit log because" 5252 + " number of edits in open segment exceeds threshold of " 5253 + rollThreshold); 5254 rollEditLog(); 5255 } 5256 } catch (Exception e) { 5257 FSNamesystem.LOG.error("Swallowing exception in " 5258 + NameNodeEditLogRoller.class.getSimpleName() + ":", e); 5259 } 5260 try { 5261 Thread.sleep(sleepIntervalMs); 5262 } catch (InterruptedException e) { 5263 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() 5264 + " was interrupted, exiting"); 5265 break; 5266 } 5267 } 5268 } 5269 5270 public void stop() { 5271 shouldRun = false; 5272 } 5273 } 5274 5275 /** 5276 * Daemon to periodically scan the namespace for lazyPersist files 5277 * with missing blocks and unlink them. 5278 */ 5279 class LazyPersistFileScrubber implements Runnable { 5280 private volatile boolean shouldRun = true; 5281 final int scrubIntervalSec; 5282 public LazyPersistFileScrubber(final int scrubIntervalSec) { 5283 this.scrubIntervalSec = scrubIntervalSec; 5284 } 5285 5286 /** 5287 * Periodically go over the list of lazyPersist files with missing 5288 * blocks and unlink them from the namespace. 5289 */ 5290 private void clearCorruptLazyPersistFiles() 5291 throws SafeModeException, AccessControlException, 5292 UnresolvedLinkException, IOException { 5293 5294 BlockStoragePolicy lpPolicy = blockManager.getStoragePolicy("LAZY_PERSIST"); 5295 5296 List<BlockCollection> filesToDelete = new ArrayList<BlockCollection>(); 5297 5298 writeLock(); 5299 5300 try { 5301 final Iterator<Block> it = blockManager.getCorruptReplicaBlockIterator(); 5302 5303 while (it.hasNext()) { 5304 Block b = it.next(); 5305 BlockInfo blockInfo = blockManager.getStoredBlock(b); 5306 if (blockInfo.getBlockCollection().getStoragePolicyID() == lpPolicy.getId()) { 5307 filesToDelete.add(blockInfo.getBlockCollection()); 5308 } 5309 } 5310 5311 for (BlockCollection bc : filesToDelete) { 5312 LOG.warn("Removing lazyPersist file " + bc.getName() + " with no replicas."); 5313 deleteInternal(bc.getName(), false, false, false); 5314 } 5315 } finally { 5316 writeUnlock(); 5317 } 5318 } 5319 5320 @Override 5321 public void run() { 5322 while (fsRunning && shouldRun) { 5323 try { 5324 clearCorruptLazyPersistFiles(); 5325 Thread.sleep(scrubIntervalSec * 1000); 5326 } catch (InterruptedException e) { 5327 FSNamesystem.LOG.info( 5328 "LazyPersistFileScrubber was interrupted, exiting"); 5329 break; 5330 } catch (Exception e) { 5331 FSNamesystem.LOG.error( 5332 "Ignoring exception in LazyPersistFileScrubber:", e); 5333 } 5334 } 5335 } 5336 5337 public void stop() { 5338 shouldRun = false; 5339 } 5340 } 5341 5342 public FSImage getFSImage() { 5343 return fsImage; 5344 } 5345 5346 public FSEditLog getEditLog() { 5347 return getFSImage().getEditLog(); 5348 } 5349 5350 private void checkBlock(ExtendedBlock block) throws IOException { 5351 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) { 5352 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId() 5353 + " - expected " + blockPoolId); 5354 } 5355 } 5356 5357 @Metric({"MissingBlocks", "Number of missing blocks"}) 5358 public long getMissingBlocksCount() { 5359 // not locking 5360 return blockManager.getMissingBlocksCount(); 5361 } 5362 5363 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) 5364 public int getExpiredHeartbeats() { 5365 return datanodeStatistics.getExpiredHeartbeats(); 5366 } 5367 5368 @Metric({"TransactionsSinceLastCheckpoint", 5369 "Number of transactions since last checkpoint"}) 5370 public long getTransactionsSinceLastCheckpoint() { 5371 return getEditLog().getLastWrittenTxId() - 5372 getFSImage().getStorage().getMostRecentCheckpointTxId(); 5373 } 5374 5375 @Metric({"TransactionsSinceLastLogRoll", 5376 "Number of transactions since last edit log roll"}) 5377 public long getTransactionsSinceLastLogRoll() { 5378 if (isInStandbyState() || !getEditLog().isSegmentOpen()) { 5379 return 0; 5380 } else { 5381 return getEditLog().getLastWrittenTxId() - 5382 getEditLog().getCurSegmentTxId() + 1; 5383 } 5384 } 5385 5386 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) 5387 public long getLastWrittenTransactionId() { 5388 return getEditLog().getLastWrittenTxId(); 5389 } 5390 5391 @Metric({"LastCheckpointTime", 5392 "Time in milliseconds since the epoch of the last checkpoint"}) 5393 public long getLastCheckpointTime() { 5394 return getFSImage().getStorage().getMostRecentCheckpointTime(); 5395 } 5396 5397 /** @see ClientProtocol#getStats() */ 5398 long[] getStats() { 5399 final long[] stats = datanodeStatistics.getStats(); 5400 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); 5401 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); 5402 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); 5403 return stats; 5404 } 5405 5406 @Override // FSNamesystemMBean 5407 @Metric({"CapacityTotal", 5408 "Total raw capacity of data nodes in bytes"}) 5409 public long getCapacityTotal() { 5410 return datanodeStatistics.getCapacityTotal(); 5411 } 5412 5413 @Metric({"CapacityTotalGB", 5414 "Total raw capacity of data nodes in GB"}) 5415 public float getCapacityTotalGB() { 5416 return DFSUtil.roundBytesToGB(getCapacityTotal()); 5417 } 5418 5419 @Override // FSNamesystemMBean 5420 @Metric({"CapacityUsed", 5421 "Total used capacity across all data nodes in bytes"}) 5422 public long getCapacityUsed() { 5423 return datanodeStatistics.getCapacityUsed(); 5424 } 5425 5426 @Metric({"CapacityUsedGB", 5427 "Total used capacity across all data nodes in GB"}) 5428 public float getCapacityUsedGB() { 5429 return DFSUtil.roundBytesToGB(getCapacityUsed()); 5430 } 5431 5432 @Override // FSNamesystemMBean 5433 @Metric({"CapacityRemaining", "Remaining capacity in bytes"}) 5434 public long getCapacityRemaining() { 5435 return datanodeStatistics.getCapacityRemaining(); 5436 } 5437 5438 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"}) 5439 public float getCapacityRemainingGB() { 5440 return DFSUtil.roundBytesToGB(getCapacityRemaining()); 5441 } 5442 5443 @Metric({"CapacityUsedNonDFS", 5444 "Total space used by data nodes for non DFS purposes in bytes"}) 5445 public long getCapacityUsedNonDFS() { 5446 return datanodeStatistics.getCapacityUsedNonDFS(); 5447 } 5448 5449 /** 5450 * Total number of connections. 5451 */ 5452 @Override // FSNamesystemMBean 5453 @Metric 5454 public int getTotalLoad() { 5455 return datanodeStatistics.getXceiverCount(); 5456 } 5457 5458 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" }) 5459 public int getNumSnapshottableDirs() { 5460 return this.snapshotManager.getNumSnapshottableDirs(); 5461 } 5462 5463 @Metric({ "Snapshots", "The number of snapshots" }) 5464 public int getNumSnapshots() { 5465 return this.snapshotManager.getNumSnapshots(); 5466 } 5467 5468 @Override 5469 public String getSnapshotStats() { 5470 Map<String, Object> info = new HashMap<String, Object>(); 5471 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs()); 5472 info.put("Snapshots", this.getNumSnapshots()); 5473 return JSON.toString(info); 5474 } 5475 5476 int getNumberOfDatanodes(DatanodeReportType type) { 5477 readLock(); 5478 try { 5479 return getBlockManager().getDatanodeManager().getDatanodeListForReport( 5480 type).size(); 5481 } finally { 5482 readUnlock(); 5483 } 5484 } 5485 5486 DatanodeInfo[] datanodeReport(final DatanodeReportType type 5487 ) throws AccessControlException, StandbyException { 5488 checkSuperuserPrivilege(); 5489 checkOperation(OperationCategory.UNCHECKED); 5490 readLock(); 5491 try { 5492 checkOperation(OperationCategory.UNCHECKED); 5493 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 5494 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); 5495 5496 DatanodeInfo[] arr = new DatanodeInfo[results.size()]; 5497 for (int i=0; i<arr.length; i++) { 5498 arr[i] = new DatanodeInfo(results.get(i)); 5499 } 5500 return arr; 5501 } finally { 5502 readUnlock(); 5503 } 5504 } 5505 5506 DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type 5507 ) throws AccessControlException, StandbyException { 5508 checkSuperuserPrivilege(); 5509 checkOperation(OperationCategory.UNCHECKED); 5510 readLock(); 5511 try { 5512 checkOperation(OperationCategory.UNCHECKED); 5513 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 5514 final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type); 5515 5516 DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()]; 5517 for (int i = 0; i < reports.length; i++) { 5518 final DatanodeDescriptor d = datanodes.get(i); 5519 reports[i] = new DatanodeStorageReport(new DatanodeInfo(d), 5520 d.getStorageReports()); 5521 } 5522 return reports; 5523 } finally { 5524 readUnlock(); 5525 } 5526 } 5527 5528 /** 5529 * Save namespace image. 5530 * This will save current namespace into fsimage file and empty edits file. 5531 * Requires superuser privilege and safe mode. 5532 * 5533 * @throws AccessControlException if superuser privilege is violated. 5534 * @throws IOException if 5535 */ 5536 void saveNamespace() throws AccessControlException, IOException { 5537 checkOperation(OperationCategory.UNCHECKED); 5538 checkSuperuserPrivilege(); 5539 5540 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 5541 if (cacheEntry != null && cacheEntry.isSuccess()) { 5542 return; // Return previous response 5543 } 5544 boolean success = false; 5545 readLock(); 5546 try { 5547 checkOperation(OperationCategory.UNCHECKED); 5548 5549 if (!isInSafeMode()) { 5550 throw new IOException("Safe mode should be turned ON " 5551 + "in order to create namespace image."); 5552 } 5553 getFSImage().saveNamespace(this); 5554 success = true; 5555 } finally { 5556 readUnlock(); 5557 RetryCache.setState(cacheEntry, success); 5558 } 5559 LOG.info("New namespace image has been created"); 5560 } 5561 5562 /** 5563 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again. 5564 * Requires superuser privilege. 5565 * 5566 * @throws AccessControlException if superuser privilege is violated. 5567 */ 5568 boolean restoreFailedStorage(String arg) throws AccessControlException, 5569 StandbyException { 5570 checkSuperuserPrivilege(); 5571 checkOperation(OperationCategory.UNCHECKED); 5572 writeLock(); 5573 try { 5574 checkOperation(OperationCategory.UNCHECKED); 5575 5576 // if it is disabled - enable it and vice versa. 5577 if(arg.equals("check")) 5578 return getFSImage().getStorage().getRestoreFailedStorage(); 5579 5580 boolean val = arg.equals("true"); // false if not 5581 getFSImage().getStorage().setRestoreFailedStorage(val); 5582 5583 return val; 5584 } finally { 5585 writeUnlock(); 5586 } 5587 } 5588 5589 Date getStartTime() { 5590 return new Date(startTime); 5591 } 5592 5593 void finalizeUpgrade() throws IOException { 5594 checkSuperuserPrivilege(); 5595 checkOperation(OperationCategory.UNCHECKED); 5596 writeLock(); 5597 try { 5598 checkOperation(OperationCategory.UNCHECKED); 5599 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); 5600 } finally { 5601 writeUnlock(); 5602 } 5603 } 5604 5605 void refreshNodes() throws IOException { 5606 checkOperation(OperationCategory.UNCHECKED); 5607 checkSuperuserPrivilege(); 5608 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration()); 5609 } 5610 5611 void setBalancerBandwidth(long bandwidth) throws IOException { 5612 checkOperation(OperationCategory.UNCHECKED); 5613 checkSuperuserPrivilege(); 5614 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth); 5615 } 5616 5617 /** 5618 * Persist the new block (the last block of the given file). 5619 * @param path 5620 * @param file 5621 */ 5622 private void persistNewBlock(String path, INodeFile file) { 5623 Preconditions.checkArgument(file.isUnderConstruction()); 5624 getEditLog().logAddBlock(path, file); 5625 if (NameNode.stateChangeLog.isDebugEnabled()) { 5626 NameNode.stateChangeLog.debug("persistNewBlock: " 5627 + path + " with new block " + file.getLastBlock().toString() 5628 + ", current total block count is " + file.getBlocks().length); 5629 } 5630 } 5631 5632 /** 5633 * SafeModeInfo contains information related to the safe mode. 5634 * <p> 5635 * An instance of {@link SafeModeInfo} is created when the name node 5636 * enters safe mode. 5637 * <p> 5638 * During name node startup {@link SafeModeInfo} counts the number of 5639 * <em>safe blocks</em>, those that have at least the minimal number of 5640 * replicas, and calculates the ratio of safe blocks to the total number 5641 * of blocks in the system, which is the size of blocks in 5642 * {@link FSNamesystem#blockManager}. When the ratio reaches the 5643 * {@link #threshold} it starts the SafeModeMonitor daemon in order 5644 * to monitor whether the safe mode {@link #extension} is passed. 5645 * Then it leaves safe mode and destroys itself. 5646 * <p> 5647 * If safe mode is turned on manually then the number of safe blocks is 5648 * not tracked because the name node is not intended to leave safe mode 5649 * automatically in the case. 5650 * 5651 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean) 5652 */ 5653 public class SafeModeInfo { 5654 // configuration fields 5655 /** Safe mode threshold condition %.*/ 5656 private final double threshold; 5657 /** Safe mode minimum number of datanodes alive */ 5658 private final int datanodeThreshold; 5659 /** 5660 * Safe mode extension after the threshold. 5661 * Make it volatile so that getSafeModeTip can read the latest value 5662 * without taking a lock. 5663 */ 5664 private volatile int extension; 5665 /** Min replication required by safe mode. */ 5666 private final int safeReplication; 5667 /** threshold for populating needed replication queues */ 5668 private final double replQueueThreshold; 5669 // internal fields 5670 /** Time when threshold was reached. 5671 * <br> -1 safe mode is off 5672 * <br> 0 safe mode is on, and threshold is not reached yet 5673 * <br> >0 safe mode is on, but we are in extension period 5674 */ 5675 private long reached = -1; 5676 /** Total number of blocks. */ 5677 int blockTotal; 5678 /** Number of safe blocks. */ 5679 int blockSafe; 5680 /** Number of blocks needed to satisfy safe mode threshold condition */ 5681 private int blockThreshold; 5682 /** Number of blocks needed before populating replication queues */ 5683 private int blockReplQueueThreshold; 5684 /** time of the last status printout */ 5685 private long lastStatusReport = 0; 5686 /** 5687 * Was safemode entered automatically because available resources were low. 5688 * Make it volatile so that getSafeModeTip can read the latest value 5689 * without taking a lock. 5690 */ 5691 private volatile boolean resourcesLow = false; 5692 /** Should safemode adjust its block totals as blocks come in */ 5693 private boolean shouldIncrementallyTrackBlocks = false; 5694 /** counter for tracking startup progress of reported blocks */ 5695 private Counter awaitingReportedBlocksCounter; 5696 5697 /** 5698 * Creates SafeModeInfo when the name node enters 5699 * automatic safe mode at startup. 5700 * 5701 * @param conf configuration 5702 */ 5703 private SafeModeInfo(Configuration conf) { 5704 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 5705 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT); 5706 if(threshold > 1.0) { 5707 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold); 5708 } 5709 this.datanodeThreshold = conf.getInt( 5710 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 5711 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT); 5712 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); 5713 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 5714 DFS_NAMENODE_REPLICATION_MIN_DEFAULT); 5715 5716 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold); 5717 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold); 5718 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension); 5719 5720 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode) 5721 this.replQueueThreshold = 5722 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 5723 (float) threshold); 5724 this.blockTotal = 0; 5725 this.blockSafe = 0; 5726 } 5727 5728 /** 5729 * In the HA case, the StandbyNode can be in safemode while the namespace 5730 * is modified by the edit log tailer. In this case, the number of total 5731 * blocks changes as edits are processed (eg blocks are added and deleted). 5732 * However, we don't want to do the incremental tracking during the 5733 * startup-time loading process -- only once the initial total has been 5734 * set after the image has been loaded. 5735 */ 5736 private boolean shouldIncrementallyTrackBlocks() { 5737 return shouldIncrementallyTrackBlocks; 5738 } 5739 5740 /** 5741 * Creates SafeModeInfo when safe mode is entered manually, or because 5742 * available resources are low. 5743 * 5744 * The {@link #threshold} is set to 1.5 so that it could never be reached. 5745 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual. 5746 * 5747 * @see SafeModeInfo 5748 */ 5749 private SafeModeInfo(boolean resourcesLow) { 5750 this.threshold = 1.5f; // this threshold can never be reached 5751 this.datanodeThreshold = Integer.MAX_VALUE; 5752 this.extension = Integer.MAX_VALUE; 5753 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication 5754 this.replQueueThreshold = 1.5f; // can never be reached 5755 this.blockTotal = -1; 5756 this.blockSafe = -1; 5757 this.resourcesLow = resourcesLow; 5758 enter(); 5759 reportStatus("STATE* Safe mode is ON.", true); 5760 } 5761 5762 /** 5763 * Check if safe mode is on. 5764 * @return true if in safe mode 5765 */ 5766 private synchronized boolean isOn() { 5767 doConsistencyCheck(); 5768 return this.reached >= 0; 5769 } 5770 5771 /** 5772 * Enter safe mode. 5773 */ 5774 private void enter() { 5775 this.reached = 0; 5776 } 5777 5778 /** 5779 * Leave safe mode. 5780 * <p> 5781 * Check for invalid, under- & over-replicated blocks in the end of startup. 5782 */ 5783 private synchronized void leave() { 5784 // if not done yet, initialize replication queues. 5785 // In the standby, do not populate repl queues 5786 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { 5787 initializeReplQueues(); 5788 } 5789 long timeInSafemode = now() - startTime; 5790 NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 5791 + timeInSafemode/1000 + " secs"); 5792 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); 5793 5794 //Log the following only once (when transitioning from ON -> OFF) 5795 if (reached >= 0) { 5796 NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 5797 } 5798 reached = -1; 5799 safeMode = null; 5800 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology(); 5801 NameNode.stateChangeLog.info("STATE* Network topology has " 5802 + nt.getNumOfRacks() + " racks and " 5803 + nt.getNumOfLeaves() + " datanodes"); 5804 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has " 5805 + blockManager.numOfUnderReplicatedBlocks() + " blocks"); 5806 5807 startSecretManagerIfNecessary(); 5808 5809 // If startup has not yet completed, end safemode phase. 5810 StartupProgress prog = NameNode.getStartupProgress(); 5811 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5812 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS); 5813 prog.endPhase(Phase.SAFEMODE); 5814 } 5815 } 5816 5817 /** 5818 * Check whether we have reached the threshold for 5819 * initializing replication queues. 5820 */ 5821 private synchronized boolean canInitializeReplQueues() { 5822 return shouldPopulateReplQueues() 5823 && blockSafe >= blockReplQueueThreshold; 5824 } 5825 5826 /** 5827 * Safe mode can be turned off iff 5828 * the threshold is reached and 5829 * the extension time have passed. 5830 * @return true if can leave or false otherwise. 5831 */ 5832 private synchronized boolean canLeave() { 5833 if (reached == 0) { 5834 return false; 5835 } 5836 5837 if (now() - reached < extension) { 5838 reportStatus("STATE* Safe mode ON, in safe mode extension.", false); 5839 return false; 5840 } 5841 5842 if (needEnter()) { 5843 reportStatus("STATE* Safe mode ON, thresholds not met.", false); 5844 return false; 5845 } 5846 5847 return true; 5848 } 5849 5850 /** 5851 * There is no need to enter safe mode 5852 * if DFS is empty or {@link #threshold} == 0 5853 */ 5854 private boolean needEnter() { 5855 return (threshold != 0 && blockSafe < blockThreshold) || 5856 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) || 5857 (!nameNodeHasResourcesAvailable()); 5858 } 5859 5860 /** 5861 * Check and trigger safe mode if needed. 5862 */ 5863 private void checkMode() { 5864 // Have to have write-lock since leaving safemode initializes 5865 // repl queues, which requires write lock 5866 assert hasWriteLock(); 5867 if (inTransitionToActive()) { 5868 return; 5869 } 5870 // if smmthread is already running, the block threshold must have been 5871 // reached before, there is no need to enter the safe mode again 5872 if (smmthread == null && needEnter()) { 5873 enter(); 5874 // check if we are ready to initialize replication queues 5875 if (canInitializeReplQueues() && !isPopulatingReplQueues() 5876 && !haEnabled) { 5877 initializeReplQueues(); 5878 } 5879 reportStatus("STATE* Safe mode ON.", false); 5880 return; 5881 } 5882 // the threshold is reached or was reached before 5883 if (!isOn() || // safe mode is off 5884 extension <= 0 || threshold <= 0) { // don't need to wait 5885 this.leave(); // leave safe mode 5886 return; 5887 } 5888 if (reached > 0) { // threshold has already been reached before 5889 reportStatus("STATE* Safe mode ON.", false); 5890 return; 5891 } 5892 // start monitor 5893 reached = now(); 5894 if (smmthread == null) { 5895 smmthread = new Daemon(new SafeModeMonitor()); 5896 smmthread.start(); 5897 reportStatus("STATE* Safe mode extension entered.", true); 5898 } 5899 5900 // check if we are ready to initialize replication queues 5901 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) { 5902 initializeReplQueues(); 5903 } 5904 } 5905 5906 /** 5907 * Set total number of blocks. 5908 */ 5909 private synchronized void setBlockTotal(int total) { 5910 this.blockTotal = total; 5911 this.blockThreshold = (int) (blockTotal * threshold); 5912 this.blockReplQueueThreshold = 5913 (int) (blockTotal * replQueueThreshold); 5914 if (haEnabled) { 5915 // After we initialize the block count, any further namespace 5916 // modifications done while in safe mode need to keep track 5917 // of the number of total blocks in the system. 5918 this.shouldIncrementallyTrackBlocks = true; 5919 } 5920 if(blockSafe < 0) 5921 this.blockSafe = 0; 5922 checkMode(); 5923 } 5924 5925 /** 5926 * Increment number of safe blocks if current block has 5927 * reached minimal replication. 5928 * @param replication current replication 5929 */ 5930 private synchronized void incrementSafeBlockCount(short replication) { 5931 if (replication == safeReplication) { 5932 this.blockSafe++; 5933 5934 // Report startup progress only if we haven't completed startup yet. 5935 StartupProgress prog = NameNode.getStartupProgress(); 5936 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5937 if (this.awaitingReportedBlocksCounter == null) { 5938 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE, 5939 STEP_AWAITING_REPORTED_BLOCKS); 5940 } 5941 this.awaitingReportedBlocksCounter.increment(); 5942 } 5943 5944 checkMode(); 5945 } 5946 } 5947 5948 /** 5949 * Decrement number of safe blocks if current block has 5950 * fallen below minimal replication. 5951 * @param replication current replication 5952 */ 5953 private synchronized void decrementSafeBlockCount(short replication) { 5954 if (replication == safeReplication-1) { 5955 this.blockSafe--; 5956 //blockSafe is set to -1 in manual / low resources safemode 5957 assert blockSafe >= 0 || isManual() || areResourcesLow(); 5958 checkMode(); 5959 } 5960 } 5961 5962 /** 5963 * Check if safe mode was entered manually 5964 */ 5965 private boolean isManual() { 5966 return extension == Integer.MAX_VALUE; 5967 } 5968 5969 /** 5970 * Set manual safe mode. 5971 */ 5972 private synchronized void setManual() { 5973 extension = Integer.MAX_VALUE; 5974 } 5975 5976 /** 5977 * Check if safe mode was entered due to resources being low. 5978 */ 5979 private boolean areResourcesLow() { 5980 return resourcesLow; 5981 } 5982 5983 /** 5984 * Set that resources are low for this instance of safe mode. 5985 */ 5986 private void setResourcesLow() { 5987 resourcesLow = true; 5988 } 5989 5990 /** 5991 * A tip on how safe mode is to be turned off: manually or automatically. 5992 */ 5993 String getTurnOffTip() { 5994 if(!isOn()) { 5995 return "Safe mode is OFF."; 5996 } 5997 5998 //Manual OR low-resource safemode. (Admin intervention required) 5999 String adminMsg = "It was turned on manually. "; 6000 if (areResourcesLow()) { 6001 adminMsg = "Resources are low on NN. Please add or free up more " 6002 + "resources then turn off safe mode manually. NOTE: If you turn off" 6003 + " safe mode before adding resources, " 6004 + "the NN will immediately return to safe mode. "; 6005 } 6006 if (isManual() || areResourcesLow()) { 6007 return adminMsg 6008 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; 6009 } 6010 6011 boolean thresholdsMet = true; 6012 int numLive = getNumLiveDataNodes(); 6013 String msg = ""; 6014 if (blockSafe < blockThreshold) { 6015 msg += String.format( 6016 "The reported blocks %d needs additional %d" 6017 + " blocks to reach the threshold %.4f of total blocks %d.%n", 6018 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); 6019 thresholdsMet = false; 6020 } else { 6021 msg += String.format("The reported blocks %d has reached the threshold" 6022 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); 6023 } 6024 if (numLive < datanodeThreshold) { 6025 msg += String.format( 6026 "The number of live datanodes %d needs an additional %d live " 6027 + "datanodes to reach the minimum number %d.%n", 6028 numLive, (datanodeThreshold - numLive), datanodeThreshold); 6029 thresholdsMet = false; 6030 } else { 6031 msg += String.format("The number of live datanodes %d has reached " 6032 + "the minimum number %d. ", 6033 numLive, datanodeThreshold); 6034 } 6035 msg += (reached > 0) ? "In safe mode extension. " : ""; 6036 msg += "Safe mode will be turned off automatically "; 6037 6038 if (!thresholdsMet) { 6039 msg += "once the thresholds have been reached."; 6040 } else if (reached + extension - now() > 0) { 6041 msg += ("in " + (reached + extension - now()) / 1000 + " seconds."); 6042 } else { 6043 msg += "soon."; 6044 } 6045 6046 return msg; 6047 } 6048 6049 /** 6050 * Print status every 20 seconds. 6051 */ 6052 private void reportStatus(String msg, boolean rightNow) { 6053 long curTime = now(); 6054 if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) 6055 return; 6056 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip()); 6057 lastStatusReport = curTime; 6058 } 6059 6060 @Override 6061 public String toString() { 6062 String resText = "Current safe blocks = " 6063 + blockSafe 6064 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold 6065 + ". Minimal replication = " + safeReplication + "."; 6066 if (reached > 0) 6067 resText += " Threshold was reached " + new Date(reached) + "."; 6068 return resText; 6069 } 6070 6071 /** 6072 * Checks consistency of the class state. 6073 * This is costly so only runs if asserts are enabled. 6074 */ 6075 private void doConsistencyCheck() { 6076 boolean assertsOn = false; 6077 assert assertsOn = true; // set to true if asserts are on 6078 if (!assertsOn) return; 6079 6080 if (blockTotal == -1 && blockSafe == -1) { 6081 return; // manual safe mode 6082 } 6083 int activeBlocks = blockManager.getActiveBlockCount(); 6084 if ((blockTotal != activeBlocks) && 6085 !(blockSafe >= 0 && blockSafe <= blockTotal)) { 6086 throw new AssertionError( 6087 " SafeMode: Inconsistent filesystem state: " 6088 + "SafeMode data: blockTotal=" + blockTotal 6089 + " blockSafe=" + blockSafe + "; " 6090 + "BlockManager data: active=" + activeBlocks); 6091 } 6092 } 6093 6094 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) { 6095 if (!shouldIncrementallyTrackBlocks) { 6096 return; 6097 } 6098 assert haEnabled; 6099 6100 if (LOG.isDebugEnabled()) { 6101 LOG.debug("Adjusting block totals from " + 6102 blockSafe + "/" + blockTotal + " to " + 6103 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal)); 6104 } 6105 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " + 6106 blockSafe + " by " + deltaSafe + ": would be negative"; 6107 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " + 6108 blockTotal + " by " + deltaTotal + ": would be negative"; 6109 6110 blockSafe += deltaSafe; 6111 setBlockTotal(blockTotal + deltaTotal); 6112 } 6113 } 6114 6115 /** 6116 * Periodically check whether it is time to leave safe mode. 6117 * This thread starts when the threshold level is reached. 6118 * 6119 */ 6120 class SafeModeMonitor implements Runnable { 6121 /** interval in msec for checking safe mode: {@value} */ 6122 private static final long recheckInterval = 1000; 6123 6124 /** 6125 */ 6126 @Override 6127 public void run() { 6128 while (fsRunning) { 6129 writeLock(); 6130 try { 6131 if (safeMode == null) { // Not in safe mode. 6132 break; 6133 } 6134 if (safeMode.canLeave()) { 6135 // Leave safe mode. 6136 safeMode.leave(); 6137 smmthread = null; 6138 break; 6139 } 6140 } finally { 6141 writeUnlock(); 6142 } 6143 6144 try { 6145 Thread.sleep(recheckInterval); 6146 } catch (InterruptedException ie) { 6147 // Ignored 6148 } 6149 } 6150 if (!fsRunning) { 6151 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); 6152 } 6153 } 6154 } 6155 6156 boolean setSafeMode(SafeModeAction action) throws IOException { 6157 if (action != SafeModeAction.SAFEMODE_GET) { 6158 checkSuperuserPrivilege(); 6159 switch(action) { 6160 case SAFEMODE_LEAVE: // leave safe mode 6161 leaveSafeMode(); 6162 break; 6163 case SAFEMODE_ENTER: // enter safe mode 6164 enterSafeMode(false); 6165 break; 6166 default: 6167 LOG.error("Unexpected safe mode action"); 6168 } 6169 } 6170 return isInSafeMode(); 6171 } 6172 6173 @Override 6174 public void checkSafeMode() { 6175 // safeMode is volatile, and may be set to null at any time 6176 SafeModeInfo safeMode = this.safeMode; 6177 if (safeMode != null) { 6178 safeMode.checkMode(); 6179 } 6180 } 6181 6182 @Override 6183 public boolean isInSafeMode() { 6184 // safeMode is volatile, and may be set to null at any time 6185 SafeModeInfo safeMode = this.safeMode; 6186 if (safeMode == null) 6187 return false; 6188 return safeMode.isOn(); 6189 } 6190 6191 @Override 6192 public boolean isInStartupSafeMode() { 6193 // safeMode is volatile, and may be set to null at any time 6194 SafeModeInfo safeMode = this.safeMode; 6195 if (safeMode == null) 6196 return false; 6197 // If the NN is in safemode, and not due to manual / low resources, we 6198 // assume it must be because of startup. If the NN had low resources during 6199 // startup, we assume it came out of startup safemode and it is now in low 6200 // resources safemode 6201 return !safeMode.isManual() && !safeMode.areResourcesLow() 6202 && safeMode.isOn(); 6203 } 6204 6205 /** 6206 * Check if replication queues are to be populated 6207 * @return true when node is HAState.Active and not in the very first safemode 6208 */ 6209 @Override 6210 public boolean isPopulatingReplQueues() { 6211 if (!shouldPopulateReplQueues()) { 6212 return false; 6213 } 6214 return initializedReplQueues; 6215 } 6216 6217 private boolean shouldPopulateReplQueues() { 6218 if(haContext == null || haContext.getState() == null) 6219 return false; 6220 return haContext.getState().shouldPopulateReplQueues(); 6221 } 6222 6223 @Override 6224 public void incrementSafeBlockCount(int replication) { 6225 // safeMode is volatile, and may be set to null at any time 6226 SafeModeInfo safeMode = this.safeMode; 6227 if (safeMode == null) 6228 return; 6229 safeMode.incrementSafeBlockCount((short)replication); 6230 } 6231 6232 @Override 6233 public void decrementSafeBlockCount(Block b) { 6234 // safeMode is volatile, and may be set to null at any time 6235 SafeModeInfo safeMode = this.safeMode; 6236 if (safeMode == null) // mostly true 6237 return; 6238 BlockInfo storedBlock = getStoredBlock(b); 6239 if (storedBlock.isComplete()) { 6240 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas()); 6241 } 6242 } 6243 6244 /** 6245 * Adjust the total number of blocks safe and expected during safe mode. 6246 * If safe mode is not currently on, this is a no-op. 6247 * @param deltaSafe the change in number of safe blocks 6248 * @param deltaTotal the change i nnumber of total blocks expected 6249 */ 6250 @Override 6251 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) { 6252 // safeMode is volatile, and may be set to null at any time 6253 SafeModeInfo safeMode = this.safeMode; 6254 if (safeMode == null) 6255 return; 6256 safeMode.adjustBlockTotals(deltaSafe, deltaTotal); 6257 } 6258 6259 /** 6260 * Set the total number of blocks in the system. 6261 */ 6262 public void setBlockTotal() { 6263 // safeMode is volatile, and may be set to null at any time 6264 SafeModeInfo safeMode = this.safeMode; 6265 if (safeMode == null) 6266 return; 6267 safeMode.setBlockTotal((int)getCompleteBlocksTotal()); 6268 } 6269 6270 /** 6271 * Get the total number of blocks in the system. 6272 */ 6273 @Override // FSNamesystemMBean 6274 @Metric 6275 public long getBlocksTotal() { 6276 return blockManager.getTotalBlocks(); 6277 } 6278 6279 /** 6280 * Get the total number of COMPLETE blocks in the system. 6281 * For safe mode only complete blocks are counted. 6282 */ 6283 private long getCompleteBlocksTotal() { 6284 // Calculate number of blocks under construction 6285 long numUCBlocks = 0; 6286 readLock(); 6287 numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); 6288 try { 6289 return getBlocksTotal() - numUCBlocks; 6290 } finally { 6291 readUnlock(); 6292 } 6293 } 6294 6295 /** 6296 * Enter safe mode. If resourcesLow is false, then we assume it is manual 6297 * @throws IOException 6298 */ 6299 void enterSafeMode(boolean resourcesLow) throws IOException { 6300 writeLock(); 6301 try { 6302 // Stop the secret manager, since rolling the master key would 6303 // try to write to the edit log 6304 stopSecretManager(); 6305 6306 // Ensure that any concurrent operations have been fully synced 6307 // before entering safe mode. This ensures that the FSImage 6308 // is entirely stable on disk as soon as we're in safe mode. 6309 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite(); 6310 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So, 6311 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode 6312 if (isEditlogOpenForWrite) { 6313 getEditLog().logSyncAll(); 6314 } 6315 if (!isInSafeMode()) { 6316 safeMode = new SafeModeInfo(resourcesLow); 6317 return; 6318 } 6319 if (resourcesLow) { 6320 safeMode.setResourcesLow(); 6321 } else { 6322 safeMode.setManual(); 6323 } 6324 if (isEditlogOpenForWrite) { 6325 getEditLog().logSyncAll(); 6326 } 6327 NameNode.stateChangeLog.info("STATE* Safe mode is ON" 6328 + safeMode.getTurnOffTip()); 6329 } finally { 6330 writeUnlock(); 6331 } 6332 } 6333 6334 /** 6335 * Leave safe mode. 6336 */ 6337 void leaveSafeMode() { 6338 writeLock(); 6339 try { 6340 if (!isInSafeMode()) { 6341 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 6342 return; 6343 } 6344 safeMode.leave(); 6345 } finally { 6346 writeUnlock(); 6347 } 6348 } 6349 6350 String getSafeModeTip() { 6351 // There is no need to take readLock. 6352 // Don't use isInSafeMode as this.safeMode might be set to null. 6353 // after isInSafeMode returns. 6354 boolean inSafeMode; 6355 SafeModeInfo safeMode = this.safeMode; 6356 if (safeMode == null) { 6357 inSafeMode = false; 6358 } else { 6359 inSafeMode = safeMode.isOn(); 6360 } 6361 6362 if (!inSafeMode) { 6363 return ""; 6364 } else { 6365 return safeMode.getTurnOffTip(); 6366 } 6367 } 6368 6369 CheckpointSignature rollEditLog() throws IOException { 6370 checkSuperuserPrivilege(); 6371 checkOperation(OperationCategory.JOURNAL); 6372 writeLock(); 6373 try { 6374 checkOperation(OperationCategory.JOURNAL); 6375 checkNameNodeSafeMode("Log not rolled"); 6376 if (Server.isRpcInvocation()) { 6377 LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); 6378 } 6379 return getFSImage().rollEditLog(); 6380 } finally { 6381 writeUnlock(); 6382 } 6383 } 6384 6385 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, 6386 NamenodeRegistration activeNamenode) throws IOException { 6387 checkOperation(OperationCategory.CHECKPOINT); 6388 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 6389 null); 6390 if (cacheEntry != null && cacheEntry.isSuccess()) { 6391 return (NamenodeCommand) cacheEntry.getPayload(); 6392 } 6393 writeLock(); 6394 NamenodeCommand cmd = null; 6395 try { 6396 checkOperation(OperationCategory.CHECKPOINT); 6397 checkNameNodeSafeMode("Checkpoint not started"); 6398 6399 LOG.info("Start checkpoint for " + backupNode.getAddress()); 6400 cmd = getFSImage().startCheckpoint(backupNode, activeNamenode); 6401 getEditLog().logSync(); 6402 return cmd; 6403 } finally { 6404 writeUnlock(); 6405 RetryCache.setState(cacheEntry, cmd != null, cmd); 6406 } 6407 } 6408 6409 public void processIncrementalBlockReport(final DatanodeID nodeID, 6410 final StorageReceivedDeletedBlocks srdb) 6411 throws IOException { 6412 writeLock(); 6413 try { 6414 blockManager.processIncrementalBlockReport(nodeID, srdb); 6415 } finally { 6416 writeUnlock(); 6417 } 6418 } 6419 6420 void endCheckpoint(NamenodeRegistration registration, 6421 CheckpointSignature sig) throws IOException { 6422 checkOperation(OperationCategory.CHECKPOINT); 6423 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 6424 if (cacheEntry != null && cacheEntry.isSuccess()) { 6425 return; // Return previous response 6426 } 6427 boolean success = false; 6428 readLock(); 6429 try { 6430 checkOperation(OperationCategory.CHECKPOINT); 6431 6432 checkNameNodeSafeMode("Checkpoint not ended"); 6433 LOG.info("End checkpoint for " + registration.getAddress()); 6434 getFSImage().endCheckpoint(sig); 6435 success = true; 6436 } finally { 6437 readUnlock(); 6438 RetryCache.setState(cacheEntry, success); 6439 } 6440 } 6441 6442 PermissionStatus createFsOwnerPermissions(FsPermission permission) { 6443 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission); 6444 } 6445 6446 private void checkOwner(FSPermissionChecker pc, String path) 6447 throws AccessControlException, UnresolvedLinkException { 6448 checkPermission(pc, path, true, null, null, null, null); 6449 } 6450 6451 private void checkPathAccess(FSPermissionChecker pc, 6452 String path, FsAction access) throws AccessControlException, 6453 UnresolvedLinkException { 6454 checkPermission(pc, path, false, null, null, access, null); 6455 } 6456 6457 private void checkUnreadableBySuperuser(FSPermissionChecker pc, 6458 INode inode, int snapshotId) 6459 throws IOException { 6460 for (XAttr xattr : dir.getXAttrs(inode, snapshotId)) { 6461 if (XAttrHelper.getPrefixName(xattr). 6462 equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { 6463 if (pc.isSuperUser()) { 6464 throw new AccessControlException("Access is denied for " + 6465 pc.getUser() + " since the superuser is not allowed to " + 6466 "perform this operation."); 6467 } 6468 } 6469 } 6470 } 6471 6472 private void checkParentAccess(FSPermissionChecker pc, 6473 String path, FsAction access) throws AccessControlException, 6474 UnresolvedLinkException { 6475 checkPermission(pc, path, false, null, access, null, null); 6476 } 6477 6478 private void checkAncestorAccess(FSPermissionChecker pc, 6479 String path, FsAction access) throws AccessControlException, 6480 UnresolvedLinkException { 6481 checkPermission(pc, path, false, access, null, null, null); 6482 } 6483 6484 private void checkTraverse(FSPermissionChecker pc, String path) 6485 throws AccessControlException, UnresolvedLinkException { 6486 checkPermission(pc, path, false, null, null, null, null); 6487 } 6488 6489 /** 6490 * This is a wrapper for FSDirectory.resolvePath(). If the path passed 6491 * is prefixed with /.reserved/raw, then it checks to ensure that the caller 6492 * has super user privs. 6493 * 6494 * @param path The path to resolve. 6495 * @param pathComponents path components corresponding to the path 6496 * @return if the path indicates an inode, return path after replacing up to 6497 * <inodeid> with the corresponding path of the inode, else the path 6498 * in {@code src} as is. If the path refers to a path in the "raw" 6499 * directory, return the non-raw pathname. 6500 * @throws FileNotFoundException 6501 * @throws AccessControlException 6502 */ 6503 private String resolvePath(String path, byte[][] pathComponents) 6504 throws FileNotFoundException, AccessControlException { 6505 if (FSDirectory.isReservedRawName(path)) { 6506 checkSuperuserPrivilege(); 6507 } 6508 return FSDirectory.resolvePath(path, pathComponents, dir); 6509 } 6510 6511 @Override 6512 public void checkSuperuserPrivilege() 6513 throws AccessControlException { 6514 if (isPermissionEnabled) { 6515 FSPermissionChecker pc = getPermissionChecker(); 6516 pc.checkSuperuserPrivilege(); 6517 } 6518 } 6519 6520 /** 6521 * Check whether current user have permissions to access the path. For more 6522 * details of the parameters, see 6523 * {@link FSPermissionChecker#checkPermission}. 6524 */ 6525 private void checkPermission(FSPermissionChecker pc, 6526 String path, boolean doCheckOwner, FsAction ancestorAccess, 6527 FsAction parentAccess, FsAction access, FsAction subAccess) 6528 throws AccessControlException, UnresolvedLinkException { 6529 checkPermission(pc, path, doCheckOwner, ancestorAccess, 6530 parentAccess, access, subAccess, false, true); 6531 } 6532 6533 /** 6534 * Check whether current user have permissions to access the path. For more 6535 * details of the parameters, see 6536 * {@link FSPermissionChecker#checkPermission}. 6537 */ 6538 private void checkPermission(FSPermissionChecker pc, 6539 String path, boolean doCheckOwner, FsAction ancestorAccess, 6540 FsAction parentAccess, FsAction access, FsAction subAccess, 6541 boolean ignoreEmptyDir, boolean resolveLink) 6542 throws AccessControlException, UnresolvedLinkException { 6543 if (!pc.isSuperUser()) { 6544 waitForLoadingFSImage(); 6545 readLock(); 6546 try { 6547 pc.checkPermission(path, dir, doCheckOwner, ancestorAccess, 6548 parentAccess, access, subAccess, ignoreEmptyDir, resolveLink); 6549 } finally { 6550 readUnlock(); 6551 } 6552 } 6553 } 6554 6555 /** 6556 * Check to see if we have exceeded the limit on the number 6557 * of inodes. 6558 */ 6559 void checkFsObjectLimit() throws IOException { 6560 if (maxFsObjects != 0 && 6561 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) { 6562 throw new IOException("Exceeded the configured number of objects " + 6563 maxFsObjects + " in the filesystem."); 6564 } 6565 } 6566 6567 /** 6568 * Get the total number of objects in the system. 6569 */ 6570 @Override // FSNamesystemMBean 6571 public long getMaxObjects() { 6572 return maxFsObjects; 6573 } 6574 6575 @Override // FSNamesystemMBean 6576 @Metric 6577 public long getFilesTotal() { 6578 // There is no need to take fSNamesystem's lock as 6579 // FSDirectory has its own lock. 6580 return this.dir.totalInodes(); 6581 } 6582 6583 @Override // FSNamesystemMBean 6584 @Metric 6585 public long getPendingReplicationBlocks() { 6586 return blockManager.getPendingReplicationBlocksCount(); 6587 } 6588 6589 @Override // FSNamesystemMBean 6590 @Metric 6591 public long getUnderReplicatedBlocks() { 6592 return blockManager.getUnderReplicatedBlocksCount(); 6593 } 6594 6595 /** Returns number of blocks with corrupt replicas */ 6596 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"}) 6597 public long getCorruptReplicaBlocks() { 6598 return blockManager.getCorruptReplicaBlocksCount(); 6599 } 6600 6601 @Override // FSNamesystemMBean 6602 @Metric 6603 public long getScheduledReplicationBlocks() { 6604 return blockManager.getScheduledReplicationBlocksCount(); 6605 } 6606 6607 @Override 6608 @Metric 6609 public long getPendingDeletionBlocks() { 6610 return blockManager.getPendingDeletionBlocksCount(); 6611 } 6612 6613 @Override 6614 public long getBlockDeletionStartTime() { 6615 return startTime + blockManager.getStartupDelayBlockDeletionInMs(); 6616 } 6617 6618 @Metric 6619 public long getExcessBlocks() { 6620 return blockManager.getExcessBlocksCount(); 6621 } 6622 6623 // HA-only metric 6624 @Metric 6625 public long getPostponedMisreplicatedBlocks() { 6626 return blockManager.getPostponedMisreplicatedBlocksCount(); 6627 } 6628 6629 // HA-only metric 6630 @Metric 6631 public int getPendingDataNodeMessageCount() { 6632 return blockManager.getPendingDataNodeMessageCount(); 6633 } 6634 6635 // HA-only metric 6636 @Metric 6637 public String getHAState() { 6638 return haContext.getState().toString(); 6639 } 6640 6641 // HA-only metric 6642 @Metric 6643 public long getMillisSinceLastLoadedEdits() { 6644 if (isInStandbyState() && editLogTailer != null) { 6645 return now() - editLogTailer.getLastLoadTimestamp(); 6646 } else { 6647 return 0; 6648 } 6649 } 6650 6651 @Metric 6652 public int getBlockCapacity() { 6653 return blockManager.getCapacity(); 6654 } 6655 6656 @Override // FSNamesystemMBean 6657 public String getFSState() { 6658 return isInSafeMode() ? "safeMode" : "Operational"; 6659 } 6660 6661 private ObjectName mbeanName; 6662 private ObjectName mxbeanName; 6663 6664 /** 6665 * Register the FSNamesystem MBean using the name 6666 * "hadoop:service=NameNode,name=FSNamesystemState" 6667 */ 6668 private void registerMBean() { 6669 // We can only implement one MXBean interface, so we keep the old one. 6670 try { 6671 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class); 6672 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean); 6673 } catch (NotCompliantMBeanException e) { 6674 throw new RuntimeException("Bad MBean setup", e); 6675 } 6676 6677 LOG.info("Registered FSNamesystemState MBean"); 6678 } 6679 6680 /** 6681 * shutdown FSNamesystem 6682 */ 6683 void shutdown() { 6684 if (snapshotManager != null) { 6685 snapshotManager.shutdown(); 6686 } 6687 if (mbeanName != null) { 6688 MBeans.unregister(mbeanName); 6689 mbeanName = null; 6690 } 6691 if (mxbeanName != null) { 6692 MBeans.unregister(mxbeanName); 6693 mxbeanName = null; 6694 } 6695 if (dir != null) { 6696 dir.shutdown(); 6697 } 6698 if (blockManager != null) { 6699 blockManager.shutdown(); 6700 } 6701 } 6702 6703 @Override // FSNamesystemMBean 6704 public int getNumLiveDataNodes() { 6705 return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); 6706 } 6707 6708 @Override // FSNamesystemMBean 6709 public int getNumDeadDataNodes() { 6710 return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); 6711 } 6712 6713 @Override // FSNamesystemMBean 6714 public int getNumDecomLiveDataNodes() { 6715 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6716 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); 6717 int liveDecommissioned = 0; 6718 for (DatanodeDescriptor node : live) { 6719 liveDecommissioned += node.isDecommissioned() ? 1 : 0; 6720 } 6721 return liveDecommissioned; 6722 } 6723 6724 @Override // FSNamesystemMBean 6725 public int getNumDecomDeadDataNodes() { 6726 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6727 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); 6728 int deadDecommissioned = 0; 6729 for (DatanodeDescriptor node : dead) { 6730 deadDecommissioned += node.isDecommissioned() ? 1 : 0; 6731 } 6732 return deadDecommissioned; 6733 } 6734 6735 @Override // FSNamesystemMBean 6736 public int getNumDecommissioningDataNodes() { 6737 return getBlockManager().getDatanodeManager().getDecommissioningNodes() 6738 .size(); 6739 } 6740 6741 @Override // FSNamesystemMBean 6742 @Metric({"StaleDataNodes", 6743 "Number of datanodes marked stale due to delayed heartbeat"}) 6744 public int getNumStaleDataNodes() { 6745 return getBlockManager().getDatanodeManager().getNumStaleNodes(); 6746 } 6747 6748 /** 6749 * Storages are marked as "content stale" after NN restart or fails over and 6750 * before NN receives the first Heartbeat followed by the first Blockreport. 6751 */ 6752 @Override // FSNamesystemMBean 6753 public int getNumStaleStorages() { 6754 return getBlockManager().getDatanodeManager().getNumStaleStorages(); 6755 } 6756 6757 /** 6758 * Sets the current generation stamp for legacy blocks 6759 */ 6760 void setGenerationStampV1(long stamp) { 6761 generationStampV1.setCurrentValue(stamp); 6762 } 6763 6764 /** 6765 * Gets the current generation stamp for legacy blocks 6766 */ 6767 long getGenerationStampV1() { 6768 return generationStampV1.getCurrentValue(); 6769 } 6770 6771 /** 6772 * Gets the current generation stamp for this filesystem 6773 */ 6774 void setGenerationStampV2(long stamp) { 6775 generationStampV2.setCurrentValue(stamp); 6776 } 6777 6778 /** 6779 * Gets the current generation stamp for this filesystem 6780 */ 6781 long getGenerationStampV2() { 6782 return generationStampV2.getCurrentValue(); 6783 } 6784 6785 /** 6786 * Upgrades the generation stamp for the filesystem 6787 * by reserving a sufficient range for all existing blocks. 6788 * Should be invoked only during the first upgrade to 6789 * sequential block IDs. 6790 */ 6791 long upgradeGenerationStampToV2() { 6792 Preconditions.checkState(generationStampV2.getCurrentValue() == 6793 GenerationStamp.LAST_RESERVED_STAMP); 6794 6795 generationStampV2.skipTo( 6796 generationStampV1.getCurrentValue() + 6797 HdfsConstants.RESERVED_GENERATION_STAMPS_V1); 6798 6799 generationStampV1Limit = generationStampV2.getCurrentValue(); 6800 return generationStampV2.getCurrentValue(); 6801 } 6802 6803 /** 6804 * Sets the generation stamp that delineates random and sequentially 6805 * allocated block IDs. 6806 * @param stamp set generation stamp limit to this value 6807 */ 6808 void setGenerationStampV1Limit(long stamp) { 6809 Preconditions.checkState(generationStampV1Limit == 6810 GenerationStamp.GRANDFATHER_GENERATION_STAMP); 6811 generationStampV1Limit = stamp; 6812 } 6813 6814 /** 6815 * Gets the value of the generation stamp that delineates sequential 6816 * and random block IDs. 6817 */ 6818 long getGenerationStampAtblockIdSwitch() { 6819 return generationStampV1Limit; 6820 } 6821 6822 @VisibleForTesting 6823 SequentialBlockIdGenerator getBlockIdGenerator() { 6824 return blockIdGenerator; 6825 } 6826 6827 /** 6828 * Sets the maximum allocated block ID for this filesystem. This is 6829 * the basis for allocating new block IDs. 6830 */ 6831 void setLastAllocatedBlockId(long blockId) { 6832 blockIdGenerator.skipTo(blockId); 6833 } 6834 6835 /** 6836 * Gets the maximum sequentially allocated block ID for this filesystem 6837 */ 6838 long getLastAllocatedBlockId() { 6839 return blockIdGenerator.getCurrentValue(); 6840 } 6841 6842 /** 6843 * Increments, logs and then returns the stamp 6844 */ 6845 long nextGenerationStamp(boolean legacyBlock) 6846 throws IOException, SafeModeException { 6847 assert hasWriteLock(); 6848 checkNameNodeSafeMode("Cannot get next generation stamp"); 6849 6850 long gs; 6851 if (legacyBlock) { 6852 gs = getNextGenerationStampV1(); 6853 getEditLog().logGenerationStampV1(gs); 6854 } else { 6855 gs = getNextGenerationStampV2(); 6856 getEditLog().logGenerationStampV2(gs); 6857 } 6858 6859 // NB: callers sync the log 6860 return gs; 6861 } 6862 6863 @VisibleForTesting 6864 long getNextGenerationStampV1() throws IOException { 6865 long genStampV1 = generationStampV1.nextValue(); 6866 6867 if (genStampV1 >= generationStampV1Limit) { 6868 // We ran out of generation stamps for legacy blocks. In practice, it 6869 // is extremely unlikely as we reserved 1T v1 generation stamps. The 6870 // result is that we can no longer append to the legacy blocks that 6871 // were created before the upgrade to sequential block IDs. 6872 throw new OutOfV1GenerationStampsException(); 6873 } 6874 6875 return genStampV1; 6876 } 6877 6878 @VisibleForTesting 6879 long getNextGenerationStampV2() { 6880 return generationStampV2.nextValue(); 6881 } 6882 6883 long getGenerationStampV1Limit() { 6884 return generationStampV1Limit; 6885 } 6886 6887 /** 6888 * Determine whether the block ID was randomly generated (legacy) or 6889 * sequentially generated. The generation stamp value is used to 6890 * make the distinction. 6891 * @return true if the block ID was randomly generated, false otherwise. 6892 */ 6893 boolean isLegacyBlock(Block block) { 6894 return block.getGenerationStamp() < getGenerationStampV1Limit(); 6895 } 6896 6897 /** 6898 * Increments, logs and then returns the block ID 6899 */ 6900 private long nextBlockId() throws IOException { 6901 assert hasWriteLock(); 6902 checkNameNodeSafeMode("Cannot get next block ID"); 6903 final long blockId = blockIdGenerator.nextValue(); 6904 getEditLog().logAllocateBlockId(blockId); 6905 // NB: callers sync the log 6906 return blockId; 6907 } 6908 6909 private boolean isFileDeleted(INodeFile file) { 6910 // Not in the inodeMap or in the snapshot but marked deleted. 6911 if (dir.getInode(file.getId()) == null) { 6912 return true; 6913 } 6914 6915 // look at the path hierarchy to see if one parent is deleted by recursive 6916 // deletion 6917 INode tmpChild = file; 6918 INodeDirectory tmpParent = file.getParent(); 6919 while (true) { 6920 if (tmpParent == null) { 6921 return true; 6922 } 6923 6924 INode childINode = tmpParent.getChild(tmpChild.getLocalNameBytes(), 6925 Snapshot.CURRENT_STATE_ID); 6926 if (childINode == null || !childINode.equals(tmpChild)) { 6927 // a newly created INode with the same name as an already deleted one 6928 // would be a different INode than the deleted one 6929 return true; 6930 } 6931 6932 if (tmpParent.isRoot()) { 6933 break; 6934 } 6935 6936 tmpChild = tmpParent; 6937 tmpParent = tmpParent.getParent(); 6938 } 6939 6940 if (file.isWithSnapshot() && 6941 file.getFileWithSnapshotFeature().isCurrentFileDeleted()) { 6942 return true; 6943 } 6944 return false; 6945 } 6946 6947 private INodeFile checkUCBlock(ExtendedBlock block, 6948 String clientName) throws IOException { 6949 assert hasWriteLock(); 6950 checkNameNodeSafeMode("Cannot get a new generation stamp and an " 6951 + "access token for block " + block); 6952 6953 // check stored block state 6954 BlockInfo storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); 6955 if (storedBlock == null || 6956 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) { 6957 throw new IOException(block + 6958 " does not exist or is not under Construction" + storedBlock); 6959 } 6960 6961 // check file inode 6962 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile(); 6963 if (file == null || !file.isUnderConstruction() || isFileDeleted(file)) { 6964 throw new IOException("The file " + storedBlock + 6965 " belonged to does not exist or it is not under construction."); 6966 } 6967 6968 // check lease 6969 if (clientName == null 6970 || !clientName.equals(file.getFileUnderConstructionFeature() 6971 .getClientName())) { 6972 throw new LeaseExpiredException("Lease mismatch: " + block + 6973 " is accessed by a non lease holder " + clientName); 6974 } 6975 6976 return file; 6977 } 6978 6979 /** 6980 * Client is reporting some bad block locations. 6981 */ 6982 void reportBadBlocks(LocatedBlock[] blocks) throws IOException { 6983 checkOperation(OperationCategory.WRITE); 6984 NameNode.stateChangeLog.info("*DIR* reportBadBlocks"); 6985 writeLock(); 6986 try { 6987 checkOperation(OperationCategory.WRITE); 6988 for (int i = 0; i < blocks.length; i++) { 6989 ExtendedBlock blk = blocks[i].getBlock(); 6990 DatanodeInfo[] nodes = blocks[i].getLocations(); 6991 String[] storageIDs = blocks[i].getStorageIDs(); 6992 for (int j = 0; j < nodes.length; j++) { 6993 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j], 6994 storageIDs == null ? null: storageIDs[j], 6995 "client machine reported it"); 6996 } 6997 } 6998 } finally { 6999 writeUnlock(); 7000 } 7001 } 7002 7003 /** 7004 * Get a new generation stamp together with an access token for 7005 * a block under construction 7006 * 7007 * This method is called for recovering a failed pipeline or setting up 7008 * a pipeline to append to a block. 7009 * 7010 * @param block a block 7011 * @param clientName the name of a client 7012 * @return a located block with a new generation stamp and an access token 7013 * @throws IOException if any error occurs 7014 */ 7015 LocatedBlock updateBlockForPipeline(ExtendedBlock block, 7016 String clientName) throws IOException { 7017 LocatedBlock locatedBlock; 7018 checkOperation(OperationCategory.WRITE); 7019 writeLock(); 7020 try { 7021 checkOperation(OperationCategory.WRITE); 7022 7023 // check vadility of parameters 7024 checkUCBlock(block, clientName); 7025 7026 // get a new generation stamp and an access token 7027 block.setGenerationStamp( 7028 nextGenerationStamp(isLegacyBlock(block.getLocalBlock()))); 7029 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); 7030 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE); 7031 } finally { 7032 writeUnlock(); 7033 } 7034 // Ensure we record the new generation stamp 7035 getEditLog().logSync(); 7036 return locatedBlock; 7037 } 7038 7039 /** 7040 * Update a pipeline for a block under construction 7041 * 7042 * @param clientName the name of the client 7043 * @param oldBlock and old block 7044 * @param newBlock a new block with a new generation stamp and length 7045 * @param newNodes datanodes in the pipeline 7046 * @throws IOException if any error occurs 7047 */ 7048 void updatePipeline(String clientName, ExtendedBlock oldBlock, 7049 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs) 7050 throws IOException { 7051 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 7052 if (cacheEntry != null && cacheEntry.isSuccess()) { 7053 return; // Return previous response 7054 } 7055 LOG.info("updatePipeline(block=" + oldBlock 7056 + ", newGenerationStamp=" + newBlock.getGenerationStamp() 7057 + ", newLength=" + newBlock.getNumBytes() 7058 + ", newNodes=" + Arrays.asList(newNodes) 7059 + ", clientName=" + clientName 7060 + ")"); 7061 waitForLoadingFSImage(); 7062 writeLock(); 7063 boolean success = false; 7064 try { 7065 checkOperation(OperationCategory.WRITE); 7066 checkNameNodeSafeMode("Pipeline not updated"); 7067 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " 7068 + oldBlock + " has different block identifier"; 7069 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, 7070 newStorageIDs, cacheEntry != null); 7071 success = true; 7072 } finally { 7073 writeUnlock(); 7074 RetryCache.setState(cacheEntry, success); 7075 } 7076 getEditLog().logSync(); 7077 LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock); 7078 } 7079 7080 /** 7081 * @see #updatePipeline(String, ExtendedBlock, ExtendedBlock, DatanodeID[], String[]) 7082 */ 7083 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 7084 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs, 7085 boolean logRetryCache) 7086 throws IOException { 7087 assert hasWriteLock(); 7088 // check the vadility of the block and lease holder name 7089 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName); 7090 final BlockInfoUnderConstruction blockinfo 7091 = (BlockInfoUnderConstruction)pendingFile.getLastBlock(); 7092 7093 // check new GS & length: this is not expected 7094 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() || 7095 newBlock.getNumBytes() < blockinfo.getNumBytes()) { 7096 String msg = "Update " + oldBlock + " (len = " + 7097 blockinfo.getNumBytes() + ") to an older state: " + newBlock + 7098 " (len = " + newBlock.getNumBytes() +")"; 7099 LOG.warn(msg); 7100 throw new IOException(msg); 7101 } 7102 7103 // Update old block with the new generation stamp and new length 7104 blockinfo.setNumBytes(newBlock.getNumBytes()); 7105 blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); 7106 7107 // find the DatanodeDescriptor objects 7108 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() 7109 .getDatanodeStorageInfos(newNodes, newStorageIDs); 7110 blockinfo.setExpectedLocations(storages); 7111 7112 String src = pendingFile.getFullPathName(); 7113 persistBlocks(src, pendingFile, logRetryCache); 7114 } 7115 7116 // rename was successful. If any part of the renamed subtree had 7117 // files that were being written to, update with new filename. 7118 void unprotectedChangeLease(String src, String dst) { 7119 assert hasWriteLock(); 7120 leaseManager.changeLease(src, dst); 7121 } 7122 7123 /** 7124 * Serializes leases. 7125 */ 7126 void saveFilesUnderConstruction(DataOutputStream out, 7127 Map<Long, INodeFile> snapshotUCMap) throws IOException { 7128 // This is run by an inferior thread of saveNamespace, which holds a read 7129 // lock on our behalf. If we took the read lock here, we could block 7130 // for fairness if a writer is waiting on the lock. 7131 synchronized (leaseManager) { 7132 Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction(); 7133 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 7134 // TODO: for HDFS-5428, because of rename operations, some 7135 // under-construction files that are 7136 // in the current fs directory can also be captured in the 7137 // snapshotUCMap. We should remove them from the snapshotUCMap. 7138 snapshotUCMap.remove(entry.getValue().getId()); 7139 } 7140 7141 out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size 7142 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 7143 FSImageSerialization.writeINodeUnderConstruction( 7144 out, entry.getValue(), entry.getKey()); 7145 } 7146 for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) { 7147 // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>" 7148 // as their paths 7149 StringBuilder b = new StringBuilder(); 7150 b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX) 7151 .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING) 7152 .append(Path.SEPARATOR).append(entry.getValue().getId()); 7153 FSImageSerialization.writeINodeUnderConstruction( 7154 out, entry.getValue(), b.toString()); 7155 } 7156 } 7157 } 7158 7159 /** 7160 * @return all the under-construction files in the lease map 7161 */ 7162 Map<String, INodeFile> getFilesUnderConstruction() { 7163 synchronized (leaseManager) { 7164 return leaseManager.getINodesUnderConstruction(); 7165 } 7166 } 7167 7168 /** 7169 * Register a Backup name-node, verifying that it belongs 7170 * to the correct namespace, and adding it to the set of 7171 * active journals if necessary. 7172 * 7173 * @param bnReg registration of the new BackupNode 7174 * @param nnReg registration of this NameNode 7175 * @throws IOException if the namespace IDs do not match 7176 */ 7177 void registerBackupNode(NamenodeRegistration bnReg, 7178 NamenodeRegistration nnReg) throws IOException { 7179 writeLock(); 7180 try { 7181 if(getFSImage().getStorage().getNamespaceID() 7182 != bnReg.getNamespaceID()) 7183 throw new IOException("Incompatible namespaceIDs: " 7184 + " Namenode namespaceID = " 7185 + getFSImage().getStorage().getNamespaceID() + "; " 7186 + bnReg.getRole() + 7187 " node namespaceID = " + bnReg.getNamespaceID()); 7188 if (bnReg.getRole() == NamenodeRole.BACKUP) { 7189 getFSImage().getEditLog().registerBackupNode( 7190 bnReg, nnReg); 7191 } 7192 } finally { 7193 writeUnlock(); 7194 } 7195 } 7196 7197 /** 7198 * Release (unregister) backup node. 7199 * <p> 7200 * Find and remove the backup stream corresponding to the node. 7201 * @throws IOException 7202 */ 7203 void releaseBackupNode(NamenodeRegistration registration) 7204 throws IOException { 7205 checkOperation(OperationCategory.WRITE); 7206 writeLock(); 7207 try { 7208 checkOperation(OperationCategory.WRITE); 7209 if(getFSImage().getStorage().getNamespaceID() 7210 != registration.getNamespaceID()) 7211 throw new IOException("Incompatible namespaceIDs: " 7212 + " Namenode namespaceID = " 7213 + getFSImage().getStorage().getNamespaceID() + "; " 7214 + registration.getRole() + 7215 " node namespaceID = " + registration.getNamespaceID()); 7216 getEditLog().releaseBackupStream(registration); 7217 } finally { 7218 writeUnlock(); 7219 } 7220 } 7221 7222 static class CorruptFileBlockInfo { 7223 final String path; 7224 final Block block; 7225 7226 public CorruptFileBlockInfo(String p, Block b) { 7227 path = p; 7228 block = b; 7229 } 7230 7231 @Override 7232 public String toString() { 7233 return block.getBlockName() + "\t" + path; 7234 } 7235 } 7236 /** 7237 * @param path Restrict corrupt files to this portion of namespace. 7238 * @param cookieTab Support for continuation; cookieTab tells where 7239 * to start from 7240 * @return a list in which each entry describes a corrupt file/block 7241 * @throws IOException 7242 */ 7243 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path, 7244 String[] cookieTab) throws IOException { 7245 checkSuperuserPrivilege(); 7246 checkOperation(OperationCategory.READ); 7247 7248 int count = 0; 7249 ArrayList<CorruptFileBlockInfo> corruptFiles = 7250 new ArrayList<CorruptFileBlockInfo>(); 7251 if (cookieTab == null) { 7252 cookieTab = new String[] { null }; 7253 } 7254 7255 // Do a quick check if there are any corrupt files without taking the lock 7256 if (blockManager.getMissingBlocksCount() == 0) { 7257 if (cookieTab[0] == null) { 7258 cookieTab[0] = String.valueOf(getIntCookie(cookieTab[0])); 7259 } 7260 LOG.info("there are no corrupt file blocks."); 7261 return corruptFiles; 7262 } 7263 7264 readLock(); 7265 try { 7266 checkOperation(OperationCategory.READ); 7267 if (!isPopulatingReplQueues()) { 7268 throw new IOException("Cannot run listCorruptFileBlocks because " + 7269 "replication queues have not been initialized."); 7270 } 7271 // print a limited # of corrupt files per call 7272 7273 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator(); 7274 7275 int skip = getIntCookie(cookieTab[0]); 7276 for (int i = 0; i < skip && blkIterator.hasNext(); i++) { 7277 blkIterator.next(); 7278 } 7279 7280 while (blkIterator.hasNext()) { 7281 Block blk = blkIterator.next(); 7282 final INode inode = (INode)blockManager.getBlockCollection(blk); 7283 skip++; 7284 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) { 7285 String src = FSDirectory.getFullPathName(inode); 7286 if (src.startsWith(path)){ 7287 corruptFiles.add(new CorruptFileBlockInfo(src, blk)); 7288 count++; 7289 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) 7290 break; 7291 } 7292 } 7293 } 7294 cookieTab[0] = String.valueOf(skip); 7295 LOG.info("list corrupt file blocks returned: " + count); 7296 return corruptFiles; 7297 } finally { 7298 readUnlock(); 7299 } 7300 } 7301 7302 /** 7303 * Convert string cookie to integer. 7304 */ 7305 private static int getIntCookie(String cookie){ 7306 int c; 7307 if(cookie == null){ 7308 c = 0; 7309 } else { 7310 try{ 7311 c = Integer.parseInt(cookie); 7312 }catch (NumberFormatException e) { 7313 c = 0; 7314 } 7315 } 7316 c = Math.max(0, c); 7317 return c; 7318 } 7319 7320 /** 7321 * Create delegation token secret manager 7322 */ 7323 private DelegationTokenSecretManager createDelegationTokenSecretManager( 7324 Configuration conf) { 7325 return new DelegationTokenSecretManager(conf.getLong( 7326 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 7327 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT), 7328 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 7329 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT), 7330 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 7331 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT), 7332 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL, 7333 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 7334 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT), 7335 this); 7336 } 7337 7338 /** 7339 * Returns the DelegationTokenSecretManager instance in the namesystem. 7340 * @return delegation token secret manager object 7341 */ 7342 DelegationTokenSecretManager getDelegationTokenSecretManager() { 7343 return dtSecretManager; 7344 } 7345 7346 /** 7347 * @param renewer Renewer information 7348 * @return delegation toek 7349 * @throws IOException on error 7350 */ 7351 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 7352 throws IOException { 7353 Token<DelegationTokenIdentifier> token; 7354 checkOperation(OperationCategory.WRITE); 7355 writeLock(); 7356 try { 7357 checkOperation(OperationCategory.WRITE); 7358 checkNameNodeSafeMode("Cannot issue delegation token"); 7359 if (!isAllowedDelegationTokenOp()) { 7360 throw new IOException( 7361 "Delegation Token can be issued only with kerberos or web authentication"); 7362 } 7363 if (dtSecretManager == null || !dtSecretManager.isRunning()) { 7364 LOG.warn("trying to get DT with no secret manager running"); 7365 return null; 7366 } 7367 7368 UserGroupInformation ugi = getRemoteUser(); 7369 String user = ugi.getUserName(); 7370 Text owner = new Text(user); 7371 Text realUser = null; 7372 if (ugi.getRealUser() != null) { 7373 realUser = new Text(ugi.getRealUser().getUserName()); 7374 } 7375 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner, 7376 renewer, realUser); 7377 token = new Token<DelegationTokenIdentifier>( 7378 dtId, dtSecretManager); 7379 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); 7380 getEditLog().logGetDelegationToken(dtId, expiryTime); 7381 } finally { 7382 writeUnlock(); 7383 } 7384 getEditLog().logSync(); 7385 return token; 7386 } 7387 7388 /** 7389 * 7390 * @param token token to renew 7391 * @return new expiryTime of the token 7392 * @throws InvalidToken if {@code token} is invalid 7393 * @throws IOException on other errors 7394 */ 7395 long renewDelegationToken(Token<DelegationTokenIdentifier> token) 7396 throws InvalidToken, IOException { 7397 long expiryTime; 7398 checkOperation(OperationCategory.WRITE); 7399 writeLock(); 7400 try { 7401 checkOperation(OperationCategory.WRITE); 7402 7403 checkNameNodeSafeMode("Cannot renew delegation token"); 7404 if (!isAllowedDelegationTokenOp()) { 7405 throw new IOException( 7406 "Delegation Token can be renewed only with kerberos or web authentication"); 7407 } 7408 String renewer = getRemoteUser().getShortUserName(); 7409 expiryTime = dtSecretManager.renewToken(token, renewer); 7410 DelegationTokenIdentifier id = new DelegationTokenIdentifier(); 7411 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); 7412 DataInputStream in = new DataInputStream(buf); 7413 id.readFields(in); 7414 getEditLog().logRenewDelegationToken(id, expiryTime); 7415 } finally { 7416 writeUnlock(); 7417 } 7418 getEditLog().logSync(); 7419 return expiryTime; 7420 } 7421 7422 /** 7423 * 7424 * @param token token to cancel 7425 * @throws IOException on error 7426 */ 7427 void cancelDelegationToken(Token<DelegationTokenIdentifier> token) 7428 throws IOException { 7429 checkOperation(OperationCategory.WRITE); 7430 writeLock(); 7431 try { 7432 checkOperation(OperationCategory.WRITE); 7433 7434 checkNameNodeSafeMode("Cannot cancel delegation token"); 7435 String canceller = getRemoteUser().getUserName(); 7436 DelegationTokenIdentifier id = dtSecretManager 7437 .cancelToken(token, canceller); 7438 getEditLog().logCancelDelegationToken(id); 7439 } finally { 7440 writeUnlock(); 7441 } 7442 getEditLog().logSync(); 7443 } 7444 7445 /** 7446 * @param out save state of the secret manager 7447 * @param sdPath String storage directory path 7448 */ 7449 void saveSecretManagerStateCompat(DataOutputStream out, String sdPath) 7450 throws IOException { 7451 dtSecretManager.saveSecretManagerStateCompat(out, sdPath); 7452 } 7453 7454 SecretManagerState saveSecretManagerState() { 7455 return dtSecretManager.saveSecretManagerState(); 7456 } 7457 7458 /** 7459 * @param in load the state of secret manager from input stream 7460 */ 7461 void loadSecretManagerStateCompat(DataInput in) throws IOException { 7462 dtSecretManager.loadSecretManagerStateCompat(in); 7463 } 7464 7465 void loadSecretManagerState(SecretManagerSection s, 7466 List<SecretManagerSection.DelegationKey> keys, 7467 List<SecretManagerSection.PersistToken> tokens) throws IOException { 7468 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); 7469 } 7470 7471 /** 7472 * Log the updateMasterKey operation to edit logs 7473 * 7474 * @param key new delegation key. 7475 */ 7476 public void logUpdateMasterKey(DelegationKey key) { 7477 7478 assert !isInSafeMode() : 7479 "this should never be called while in safemode, since we stop " + 7480 "the DT manager before entering safemode!"; 7481 // No need to hold FSN lock since we don't access any internal 7482 // structures, and this is stopped before the FSN shuts itself 7483 // down, etc. 7484 getEditLog().logUpdateMasterKey(key); 7485 getEditLog().logSync(); 7486 } 7487 7488 /** 7489 * Log the cancellation of expired tokens to edit logs 7490 * 7491 * @param id token identifier to cancel 7492 */ 7493 public void logExpireDelegationToken(DelegationTokenIdentifier id) { 7494 assert !isInSafeMode() : 7495 "this should never be called while in safemode, since we stop " + 7496 "the DT manager before entering safemode!"; 7497 // No need to hold FSN lock since we don't access any internal 7498 // structures, and this is stopped before the FSN shuts itself 7499 // down, etc. 7500 getEditLog().logCancelDelegationToken(id); 7501 } 7502 7503 private void logReassignLease(String leaseHolder, String src, 7504 String newHolder) { 7505 assert hasWriteLock(); 7506 getEditLog().logReassignLease(leaseHolder, src, newHolder); 7507 } 7508 7509 /** 7510 * 7511 * @return true if delegation token operation is allowed 7512 */ 7513 private boolean isAllowedDelegationTokenOp() throws IOException { 7514 AuthenticationMethod authMethod = getConnectionAuthenticationMethod(); 7515 if (UserGroupInformation.isSecurityEnabled() 7516 && (authMethod != AuthenticationMethod.KERBEROS) 7517 && (authMethod != AuthenticationMethod.KERBEROS_SSL) 7518 && (authMethod != AuthenticationMethod.CERTIFICATE)) { 7519 return false; 7520 } 7521 return true; 7522 } 7523 7524 /** 7525 * Returns authentication method used to establish the connection 7526 * @return AuthenticationMethod used to establish connection 7527 * @throws IOException 7528 */ 7529 private AuthenticationMethod getConnectionAuthenticationMethod() 7530 throws IOException { 7531 UserGroupInformation ugi = getRemoteUser(); 7532 AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 7533 if (authMethod == AuthenticationMethod.PROXY) { 7534 authMethod = ugi.getRealUser().getAuthenticationMethod(); 7535 } 7536 return authMethod; 7537 } 7538 7539 /** 7540 * Client invoked methods are invoked over RPC and will be in 7541 * RPC call context even if the client exits. 7542 */ 7543 private boolean isExternalInvocation() { 7544 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation(); 7545 } 7546 7547 private static InetAddress getRemoteIp() { 7548 InetAddress ip = Server.getRemoteIp(); 7549 if (ip != null) { 7550 return ip; 7551 } 7552 return NamenodeWebHdfsMethods.getRemoteIp(); 7553 } 7554 7555 // optimize ugi lookup for RPC operations to avoid a trip through 7556 // UGI.getCurrentUser which is synch'ed 7557 private static UserGroupInformation getRemoteUser() throws IOException { 7558 return NameNode.getRemoteUser(); 7559 } 7560 7561 /** 7562 * Log fsck event in the audit log 7563 */ 7564 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { 7565 if (isAuditEnabled()) { 7566 logAuditEvent(true, getRemoteUser(), 7567 remoteAddress, 7568 "fsck", src, null, null); 7569 } 7570 } 7571 /** 7572 * Register NameNodeMXBean 7573 */ 7574 private void registerMXBean() { 7575 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this); 7576 } 7577 7578 /** 7579 * Class representing Namenode information for JMX interfaces 7580 */ 7581 @Override // NameNodeMXBean 7582 public String getVersion() { 7583 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); 7584 } 7585 7586 @Override // NameNodeMXBean 7587 public long getUsed() { 7588 return this.getCapacityUsed(); 7589 } 7590 7591 @Override // NameNodeMXBean 7592 public long getFree() { 7593 return this.getCapacityRemaining(); 7594 } 7595 7596 @Override // NameNodeMXBean 7597 public long getTotal() { 7598 return this.getCapacityTotal(); 7599 } 7600 7601 @Override // NameNodeMXBean 7602 public String getSafemode() { 7603 if (!this.isInSafeMode()) 7604 return ""; 7605 return "Safe mode is ON. " + this.getSafeModeTip(); 7606 } 7607 7608 @Override // NameNodeMXBean 7609 public boolean isUpgradeFinalized() { 7610 return this.getFSImage().isUpgradeFinalized(); 7611 } 7612 7613 @Override // NameNodeMXBean 7614 public long getNonDfsUsedSpace() { 7615 return datanodeStatistics.getCapacityUsedNonDFS(); 7616 } 7617 7618 @Override // NameNodeMXBean 7619 public float getPercentUsed() { 7620 return datanodeStatistics.getCapacityUsedPercent(); 7621 } 7622 7623 @Override // NameNodeMXBean 7624 public long getBlockPoolUsedSpace() { 7625 return datanodeStatistics.getBlockPoolUsed(); 7626 } 7627 7628 @Override // NameNodeMXBean 7629 public float getPercentBlockPoolUsed() { 7630 return datanodeStatistics.getPercentBlockPoolUsed(); 7631 } 7632 7633 @Override // NameNodeMXBean 7634 public float getPercentRemaining() { 7635 return datanodeStatistics.getCapacityRemainingPercent(); 7636 } 7637 7638 @Override // NameNodeMXBean 7639 public long getCacheCapacity() { 7640 return datanodeStatistics.getCacheCapacity(); 7641 } 7642 7643 @Override // NameNodeMXBean 7644 public long getCacheUsed() { 7645 return datanodeStatistics.getCacheUsed(); 7646 } 7647 7648 @Override // NameNodeMXBean 7649 public long getTotalBlocks() { 7650 return getBlocksTotal(); 7651 } 7652 7653 @Override // NameNodeMXBean 7654 @Metric 7655 public long getTotalFiles() { 7656 return getFilesTotal(); 7657 } 7658 7659 @Override // NameNodeMXBean 7660 public long getNumberOfMissingBlocks() { 7661 return getMissingBlocksCount(); 7662 } 7663 7664 @Override // NameNodeMXBean 7665 public int getThreads() { 7666 return ManagementFactory.getThreadMXBean().getThreadCount(); 7667 } 7668 7669 /** 7670 * Returned information is a JSON representation of map with host name as the 7671 * key and value is a map of live node attribute keys to its values 7672 */ 7673 @Override // NameNodeMXBean 7674 public String getLiveNodes() { 7675 final Map<String, Map<String,Object>> info = 7676 new HashMap<String, Map<String,Object>>(); 7677 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 7678 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 7679 for (DatanodeDescriptor node : live) { 7680 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 7681 .put("infoAddr", node.getInfoAddr()) 7682 .put("infoSecureAddr", node.getInfoSecureAddr()) 7683 .put("xferaddr", node.getXferAddr()) 7684 .put("lastContact", getLastContact(node)) 7685 .put("usedSpace", getDfsUsed(node)) 7686 .put("adminState", node.getAdminState().toString()) 7687 .put("nonDfsUsedSpace", node.getNonDfsUsed()) 7688 .put("capacity", node.getCapacity()) 7689 .put("numBlocks", node.numBlocks()) 7690 .put("version", node.getSoftwareVersion()) 7691 .put("used", node.getDfsUsed()) 7692 .put("remaining", node.getRemaining()) 7693 .put("blockScheduled", node.getBlocksScheduled()) 7694 .put("blockPoolUsed", node.getBlockPoolUsed()) 7695 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) 7696 .put("volfails", node.getVolumeFailures()) 7697 .build(); 7698 7699 info.put(node.getHostName(), innerinfo); 7700 } 7701 return JSON.toString(info); 7702 } 7703 7704 /** 7705 * Returned information is a JSON representation of map with host name as the 7706 * key and value is a map of dead node attribute keys to its values 7707 */ 7708 @Override // NameNodeMXBean 7709 public String getDeadNodes() { 7710 final Map<String, Map<String, Object>> info = 7711 new HashMap<String, Map<String, Object>>(); 7712 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 7713 blockManager.getDatanodeManager().fetchDatanodes(null, dead, true); 7714 for (DatanodeDescriptor node : dead) { 7715 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 7716 .put("lastContact", getLastContact(node)) 7717 .put("decommissioned", node.isDecommissioned()) 7718 .put("xferaddr", node.getXferAddr()) 7719 .build(); 7720 info.put(node.getHostName(), innerinfo); 7721 } 7722 return JSON.toString(info); 7723 } 7724 7725 /** 7726 * Returned information is a JSON representation of map with host name as the 7727 * key and value is a map of decommissioning node attribute keys to its 7728 * values 7729 */ 7730 @Override // NameNodeMXBean 7731 public String getDecomNodes() { 7732 final Map<String, Map<String, Object>> info = 7733 new HashMap<String, Map<String, Object>>(); 7734 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager( 7735 ).getDecommissioningNodes(); 7736 for (DatanodeDescriptor node : decomNodeList) { 7737 Map<String, Object> innerinfo = ImmutableMap 7738 .<String, Object> builder() 7739 .put("xferaddr", node.getXferAddr()) 7740 .put("underReplicatedBlocks", 7741 node.decommissioningStatus.getUnderReplicatedBlocks()) 7742 .put("decommissionOnlyReplicas", 7743 node.decommissioningStatus.getDecommissionOnlyReplicas()) 7744 .put("underReplicateInOpenFiles", 7745 node.decommissioningStatus.getUnderReplicatedInOpenFiles()) 7746 .build(); 7747 info.put(node.getHostName(), innerinfo); 7748 } 7749 return JSON.toString(info); 7750 } 7751 7752 private long getLastContact(DatanodeDescriptor alivenode) { 7753 return (Time.now() - alivenode.getLastUpdate())/1000; 7754 } 7755 7756 private long getDfsUsed(DatanodeDescriptor alivenode) { 7757 return alivenode.getDfsUsed(); 7758 } 7759 7760 @Override // NameNodeMXBean 7761 public String getClusterId() { 7762 return getFSImage().getStorage().getClusterID(); 7763 } 7764 7765 @Override // NameNodeMXBean 7766 public String getBlockPoolId() { 7767 return blockPoolId; 7768 } 7769 7770 @Override // NameNodeMXBean 7771 public String getNameDirStatuses() { 7772 Map<String, Map<File, StorageDirType>> statusMap = 7773 new HashMap<String, Map<File, StorageDirType>>(); 7774 7775 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>(); 7776 for (Iterator<StorageDirectory> it 7777 = getFSImage().getStorage().dirIterator(); it.hasNext();) { 7778 StorageDirectory st = it.next(); 7779 activeDirs.put(st.getRoot(), st.getStorageDirType()); 7780 } 7781 statusMap.put("active", activeDirs); 7782 7783 List<Storage.StorageDirectory> removedStorageDirs 7784 = getFSImage().getStorage().getRemovedStorageDirs(); 7785 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>(); 7786 for (StorageDirectory st : removedStorageDirs) { 7787 failedDirs.put(st.getRoot(), st.getStorageDirType()); 7788 } 7789 statusMap.put("failed", failedDirs); 7790 7791 return JSON.toString(statusMap); 7792 } 7793 7794 @Override // NameNodeMXBean 7795 public String getNodeUsage() { 7796 float median = 0; 7797 float max = 0; 7798 float min = 0; 7799 float dev = 0; 7800 7801 final Map<String, Map<String,Object>> info = 7802 new HashMap<String, Map<String,Object>>(); 7803 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 7804 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 7805 7806 if (live.size() > 0) { 7807 float totalDfsUsed = 0; 7808 float[] usages = new float[live.size()]; 7809 int i = 0; 7810 for (DatanodeDescriptor dn : live) { 7811 usages[i++] = dn.getDfsUsedPercent(); 7812 totalDfsUsed += dn.getDfsUsedPercent(); 7813 } 7814 totalDfsUsed /= live.size(); 7815 Arrays.sort(usages); 7816 median = usages[usages.length / 2]; 7817 max = usages[usages.length - 1]; 7818 min = usages[0]; 7819 7820 for (i = 0; i < usages.length; i++) { 7821 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); 7822 } 7823 dev = (float) Math.sqrt(dev / usages.length); 7824 } 7825 7826 final Map<String, Object> innerInfo = new HashMap<String, Object>(); 7827 innerInfo.put("min", StringUtils.format("%.2f%%", min)); 7828 innerInfo.put("median", StringUtils.format("%.2f%%", median)); 7829 innerInfo.put("max", StringUtils.format("%.2f%%", max)); 7830 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev)); 7831 info.put("nodeUsage", innerInfo); 7832 7833 return JSON.toString(info); 7834 } 7835 7836 @Override // NameNodeMXBean 7837 public String getNameJournalStatus() { 7838 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); 7839 FSEditLog log = getFSImage().getEditLog(); 7840 if (log != null) { 7841 boolean openForWrite = log.isOpenForWrite(); 7842 for (JournalAndStream jas : log.getJournals()) { 7843 final Map<String, String> jasMap = new HashMap<String, String>(); 7844 String manager = jas.getManager().toString(); 7845 7846 jasMap.put("required", String.valueOf(jas.isRequired())); 7847 jasMap.put("disabled", String.valueOf(jas.isDisabled())); 7848 jasMap.put("manager", manager); 7849 7850 if (jas.isDisabled()) { 7851 jasMap.put("stream", "Failed"); 7852 } else if (openForWrite) { 7853 EditLogOutputStream elos = jas.getCurrentStream(); 7854 if (elos != null) { 7855 jasMap.put("stream", elos.generateReport()); 7856 } else { 7857 jasMap.put("stream", "not currently writing"); 7858 } 7859 } else { 7860 jasMap.put("stream", "open for read"); 7861 } 7862 jasList.add(jasMap); 7863 } 7864 } 7865 return JSON.toString(jasList); 7866 } 7867 7868 @Override // NameNodeMxBean 7869 public String getJournalTransactionInfo() { 7870 Map<String, String> txnIdMap = new HashMap<String, String>(); 7871 txnIdMap.put("LastAppliedOrWrittenTxId", 7872 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId())); 7873 txnIdMap.put("MostRecentCheckpointTxId", 7874 Long.toString(this.getFSImage().getMostRecentCheckpointTxId())); 7875 return JSON.toString(txnIdMap); 7876 } 7877 7878 @Override // NameNodeMXBean 7879 public String getNNStarted() { 7880 return getStartTime().toString(); 7881 } 7882 7883 @Override // NameNodeMXBean 7884 public String getCompileInfo() { 7885 return VersionInfo.getDate() + " by " + VersionInfo.getUser() + 7886 " from " + VersionInfo.getBranch(); 7887 } 7888 7889 /** @return the block manager. */ 7890 public BlockManager getBlockManager() { 7891 return blockManager; 7892 } 7893 /** @return the FSDirectory. */ 7894 public FSDirectory getFSDirectory() { 7895 return dir; 7896 } 7897 /** Set the FSDirectory. */ 7898 @VisibleForTesting 7899 public void setFSDirectory(FSDirectory dir) { 7900 this.dir = dir; 7901 } 7902 /** @return the cache manager. */ 7903 public CacheManager getCacheManager() { 7904 return cacheManager; 7905 } 7906 7907 @Override // NameNodeMXBean 7908 public String getCorruptFiles() { 7909 List<String> list = new ArrayList<String>(); 7910 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; 7911 try { 7912 corruptFileBlocks = listCorruptFileBlocks("/", null); 7913 int corruptFileCount = corruptFileBlocks.size(); 7914 if (corruptFileCount != 0) { 7915 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) { 7916 list.add(c.toString()); 7917 } 7918 } 7919 } catch (IOException e) { 7920 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage()); 7921 } 7922 return JSON.toString(list); 7923 } 7924 7925 @Override //NameNodeMXBean 7926 public int getDistinctVersionCount() { 7927 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() 7928 .size(); 7929 } 7930 7931 @Override //NameNodeMXBean 7932 public Map<String, Integer> getDistinctVersions() { 7933 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); 7934 } 7935 7936 @Override //NameNodeMXBean 7937 public String getSoftwareVersion() { 7938 return VersionInfo.getVersion(); 7939 } 7940 7941 /** 7942 * Verifies that the given identifier and password are valid and match. 7943 * @param identifier Token identifier. 7944 * @param password Password in the token. 7945 */ 7946 public synchronized void verifyToken(DelegationTokenIdentifier identifier, 7947 byte[] password) throws InvalidToken, RetriableException { 7948 try { 7949 getDelegationTokenSecretManager().verifyToken(identifier, password); 7950 } catch (InvalidToken it) { 7951 if (inTransitionToActive()) { 7952 throw new RetriableException(it); 7953 } 7954 throw it; 7955 } 7956 } 7957 7958 @Override 7959 public boolean isGenStampInFuture(Block block) { 7960 if (isLegacyBlock(block)) { 7961 return block.getGenerationStamp() > getGenerationStampV1(); 7962 } else { 7963 return block.getGenerationStamp() > getGenerationStampV2(); 7964 } 7965 } 7966 7967 @VisibleForTesting 7968 public EditLogTailer getEditLogTailer() { 7969 return editLogTailer; 7970 } 7971 7972 @VisibleForTesting 7973 public void setEditLogTailerForTests(EditLogTailer tailer) { 7974 this.editLogTailer = tailer; 7975 } 7976 7977 @VisibleForTesting 7978 void setFsLockForTests(ReentrantReadWriteLock lock) { 7979 this.fsLock.coarseLock = lock; 7980 } 7981 7982 @VisibleForTesting 7983 public ReentrantReadWriteLock getFsLockForTests() { 7984 return fsLock.coarseLock; 7985 } 7986 7987 @VisibleForTesting 7988 public ReentrantLock getLongReadLockForTests() { 7989 return fsLock.longReadLock; 7990 } 7991 7992 @VisibleForTesting 7993 public SafeModeInfo getSafeModeInfoForTests() { 7994 return safeMode; 7995 } 7996 7997 @VisibleForTesting 7998 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) { 7999 this.nnResourceChecker = nnResourceChecker; 8000 } 8001 8002 @Override 8003 public boolean isAvoidingStaleDataNodesForWrite() { 8004 return this.blockManager.getDatanodeManager() 8005 .shouldAvoidStaleDataNodesForWrite(); 8006 } 8007 8008 @Override // FSClusterStats 8009 public int getNumDatanodesInService() { 8010 return datanodeStatistics.getNumDatanodesInService(); 8011 } 8012 8013 @Override // for block placement strategy 8014 public double getInServiceXceiverAverage() { 8015 double avgLoad = 0; 8016 final int nodes = getNumDatanodesInService(); 8017 if (nodes != 0) { 8018 final int xceivers = datanodeStatistics.getInServiceXceiverCount(); 8019 avgLoad = (double)xceivers/nodes; 8020 } 8021 return avgLoad; 8022 } 8023 8024 public SnapshotManager getSnapshotManager() { 8025 return snapshotManager; 8026 } 8027 8028 /** Allow snapshot on a directory. */ 8029 void allowSnapshot(String path) throws SafeModeException, IOException { 8030 checkOperation(OperationCategory.WRITE); 8031 writeLock(); 8032 try { 8033 checkOperation(OperationCategory.WRITE); 8034 checkNameNodeSafeMode("Cannot allow snapshot for " + path); 8035 checkSuperuserPrivilege(); 8036 8037 dir.writeLock(); 8038 try { 8039 snapshotManager.setSnapshottable(path, true); 8040 } finally { 8041 dir.writeUnlock(); 8042 } 8043 getEditLog().logAllowSnapshot(path); 8044 } finally { 8045 writeUnlock(); 8046 } 8047 getEditLog().logSync(); 8048 8049 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8050 logAuditEvent(true, "allowSnapshot", path, null, null); 8051 } 8052 } 8053 8054 /** Disallow snapshot on a directory. */ 8055 void disallowSnapshot(String path) throws SafeModeException, IOException { 8056 checkOperation(OperationCategory.WRITE); 8057 writeLock(); 8058 try { 8059 checkOperation(OperationCategory.WRITE); 8060 checkNameNodeSafeMode("Cannot disallow snapshot for " + path); 8061 checkSuperuserPrivilege(); 8062 8063 dir.writeLock(); 8064 try { 8065 snapshotManager.resetSnapshottable(path); 8066 } finally { 8067 dir.writeUnlock(); 8068 } 8069 getEditLog().logDisallowSnapshot(path); 8070 } finally { 8071 writeUnlock(); 8072 } 8073 getEditLog().logSync(); 8074 8075 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8076 logAuditEvent(true, "disallowSnapshot", path, null, null); 8077 } 8078 } 8079 8080 /** 8081 * Create a snapshot 8082 * @param snapshotRoot The directory path where the snapshot is taken 8083 * @param snapshotName The name of the snapshot 8084 */ 8085 String createSnapshot(String snapshotRoot, String snapshotName) 8086 throws SafeModeException, IOException { 8087 checkOperation(OperationCategory.WRITE); 8088 final FSPermissionChecker pc = getPermissionChecker(); 8089 CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache, 8090 null); 8091 if (cacheEntry != null && cacheEntry.isSuccess()) { 8092 return (String) cacheEntry.getPayload(); 8093 } 8094 String snapshotPath = null; 8095 writeLock(); 8096 try { 8097 checkOperation(OperationCategory.WRITE); 8098 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); 8099 if (isPermissionEnabled) { 8100 checkOwner(pc, snapshotRoot); 8101 } 8102 8103 if (snapshotName == null || snapshotName.isEmpty()) { 8104 snapshotName = Snapshot.generateDefaultSnapshotName(); 8105 } 8106 if(snapshotName != null){ 8107 if (!DFSUtil.isValidNameForComponent(snapshotName)) { 8108 throw new InvalidPathException("Invalid snapshot name: " 8109 + snapshotName); 8110 } 8111 } 8112 dir.verifySnapshotName(snapshotName, snapshotRoot); 8113 dir.writeLock(); 8114 try { 8115 snapshotPath = snapshotManager.createSnapshot(snapshotRoot, snapshotName); 8116 } finally { 8117 dir.writeUnlock(); 8118 } 8119 getEditLog().logCreateSnapshot(snapshotRoot, snapshotName, 8120 cacheEntry != null); 8121 } finally { 8122 writeUnlock(); 8123 RetryCache.setState(cacheEntry, snapshotPath != null, snapshotPath); 8124 } 8125 getEditLog().logSync(); 8126 8127 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8128 logAuditEvent(true, "createSnapshot", snapshotRoot, snapshotPath, null); 8129 } 8130 return snapshotPath; 8131 } 8132 8133 /** 8134 * Rename a snapshot 8135 * @param path The directory path where the snapshot was taken 8136 * @param snapshotOldName Old snapshot name 8137 * @param snapshotNewName New snapshot name 8138 * @throws SafeModeException 8139 * @throws IOException 8140 */ 8141 void renameSnapshot(String path, String snapshotOldName, 8142 String snapshotNewName) throws SafeModeException, IOException { 8143 final FSPermissionChecker pc = getPermissionChecker(); 8144 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8145 if (cacheEntry != null && cacheEntry.isSuccess()) { 8146 return; // Return previous response 8147 } 8148 writeLock(); 8149 boolean success = false; 8150 try { 8151 checkOperation(OperationCategory.WRITE); 8152 checkNameNodeSafeMode("Cannot rename snapshot for " + path); 8153 if (isPermissionEnabled) { 8154 checkOwner(pc, path); 8155 } 8156 dir.verifySnapshotName(snapshotNewName, path); 8157 8158 snapshotManager.renameSnapshot(path, snapshotOldName, snapshotNewName); 8159 getEditLog().logRenameSnapshot(path, snapshotOldName, snapshotNewName, 8160 cacheEntry != null); 8161 success = true; 8162 } finally { 8163 writeUnlock(); 8164 RetryCache.setState(cacheEntry, success); 8165 } 8166 getEditLog().logSync(); 8167 8168 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8169 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); 8170 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); 8171 logAuditEvent(true, "renameSnapshot", oldSnapshotRoot, newSnapshotRoot, null); 8172 } 8173 } 8174 8175 /** 8176 * Get the list of snapshottable directories that are owned 8177 * by the current user. Return all the snapshottable directories if the 8178 * current user is a super user. 8179 * @return The list of all the current snapshottable directories 8180 * @throws IOException 8181 */ 8182 public SnapshottableDirectoryStatus[] getSnapshottableDirListing() 8183 throws IOException { 8184 SnapshottableDirectoryStatus[] status = null; 8185 checkOperation(OperationCategory.READ); 8186 final FSPermissionChecker checker = getPermissionChecker(); 8187 readLock(); 8188 try { 8189 checkOperation(OperationCategory.READ); 8190 final String user = checker.isSuperUser()? null : checker.getUser(); 8191 status = snapshotManager.getSnapshottableDirListing(user); 8192 } finally { 8193 readUnlock(); 8194 } 8195 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8196 logAuditEvent(true, "listSnapshottableDirectory", null, null, null); 8197 } 8198 return status; 8199 } 8200 8201 /** 8202 * Get the difference between two snapshots (or between a snapshot and the 8203 * current status) of a snapshottable directory. 8204 * 8205 * @param path The full path of the snapshottable directory. 8206 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null 8207 * or empty string indicates the current tree. 8208 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or 8209 * empty string indicates the current tree. 8210 * @return A report about the difference between {@code fromSnapshot} and 8211 * {@code toSnapshot}. Modified/deleted/created/renamed files and 8212 * directories belonging to the snapshottable directories are listed 8213 * and labeled as M/-/+/R respectively. 8214 * @throws IOException 8215 */ 8216 SnapshotDiffReport getSnapshotDiffReport(String path, 8217 String fromSnapshot, String toSnapshot) throws IOException { 8218 SnapshotDiffReport diffs; 8219 checkOperation(OperationCategory.READ); 8220 final FSPermissionChecker pc = getPermissionChecker(); 8221 readLock(); 8222 try { 8223 checkOperation(OperationCategory.READ); 8224 if (isPermissionEnabled) { 8225 checkSubtreeReadPermission(pc, path, fromSnapshot); 8226 checkSubtreeReadPermission(pc, path, toSnapshot); 8227 } 8228 diffs = snapshotManager.diff(path, fromSnapshot, toSnapshot); 8229 } finally { 8230 readUnlock(); 8231 } 8232 8233 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8234 logAuditEvent(true, "computeSnapshotDiff", null, null, null); 8235 } 8236 return diffs; 8237 } 8238 8239 private void checkSubtreeReadPermission(final FSPermissionChecker pc, 8240 final String snapshottablePath, final String snapshot) 8241 throws AccessControlException, UnresolvedLinkException { 8242 final String fromPath = snapshot == null? 8243 snapshottablePath: Snapshot.getSnapshotPath(snapshottablePath, snapshot); 8244 checkPermission(pc, fromPath, false, null, null, FsAction.READ, FsAction.READ); 8245 } 8246 8247 /** 8248 * Delete a snapshot of a snapshottable directory 8249 * @param snapshotRoot The snapshottable directory 8250 * @param snapshotName The name of the to-be-deleted snapshot 8251 * @throws SafeModeException 8252 * @throws IOException 8253 */ 8254 void deleteSnapshot(String snapshotRoot, String snapshotName) 8255 throws SafeModeException, IOException { 8256 final FSPermissionChecker pc = getPermissionChecker(); 8257 8258 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8259 if (cacheEntry != null && cacheEntry.isSuccess()) { 8260 return; // Return previous response 8261 } 8262 boolean success = false; 8263 BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo(); 8264 writeLock(); 8265 try { 8266 checkOperation(OperationCategory.WRITE); 8267 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot); 8268 if (isPermissionEnabled) { 8269 checkOwner(pc, snapshotRoot); 8270 } 8271 8272 List<INode> removedINodes = new ChunkedArrayList<INode>(); 8273 dir.writeLock(); 8274 try { 8275 snapshotManager.deleteSnapshot(snapshotRoot, snapshotName, 8276 collectedBlocks, removedINodes); 8277 dir.removeFromInodeMap(removedINodes); 8278 } finally { 8279 dir.writeUnlock(); 8280 } 8281 removedINodes.clear(); 8282 getEditLog().logDeleteSnapshot(snapshotRoot, snapshotName, 8283 cacheEntry != null); 8284 success = true; 8285 } finally { 8286 writeUnlock(); 8287 RetryCache.setState(cacheEntry, success); 8288 } 8289 getEditLog().logSync(); 8290 8291 removeBlocks(collectedBlocks); 8292 collectedBlocks.clear(); 8293 8294 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8295 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); 8296 logAuditEvent(true, "deleteSnapshot", rootPath, null, null); 8297 } 8298 } 8299 8300 /** 8301 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager 8302 * @param toRemove the list of INodeDirectorySnapshottable to be removed 8303 */ 8304 void removeSnapshottableDirs(List<INodeDirectory> toRemove) { 8305 if (snapshotManager != null) { 8306 snapshotManager.removeSnapshottable(toRemove); 8307 } 8308 } 8309 8310 RollingUpgradeInfo queryRollingUpgrade() throws IOException { 8311 checkSuperuserPrivilege(); 8312 checkOperation(OperationCategory.READ); 8313 readLock(); 8314 try { 8315 if (rollingUpgradeInfo != null) { 8316 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 8317 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); 8318 } 8319 return rollingUpgradeInfo; 8320 } finally { 8321 readUnlock(); 8322 } 8323 } 8324 8325 RollingUpgradeInfo startRollingUpgrade() throws IOException { 8326 checkSuperuserPrivilege(); 8327 checkOperation(OperationCategory.WRITE); 8328 writeLock(); 8329 try { 8330 checkOperation(OperationCategory.WRITE); 8331 if (isRollingUpgrade()) { 8332 return rollingUpgradeInfo; 8333 } 8334 long startTime = now(); 8335 if (!haEnabled) { // for non-HA, we require NN to be in safemode 8336 startRollingUpgradeInternalForNonHA(startTime); 8337 } else { // for HA, NN cannot be in safemode 8338 checkNameNodeSafeMode("Failed to start rolling upgrade"); 8339 startRollingUpgradeInternal(startTime); 8340 } 8341 8342 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime()); 8343 if (haEnabled) { 8344 // roll the edit log to make sure the standby NameNode can tail 8345 getFSImage().rollEditLog(); 8346 } 8347 } finally { 8348 writeUnlock(); 8349 } 8350 8351 getEditLog().logSync(); 8352 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8353 logAuditEvent(true, "startRollingUpgrade", null, null, null); 8354 } 8355 return rollingUpgradeInfo; 8356 } 8357 8358 /** 8359 * Update internal state to indicate that a rolling upgrade is in progress. 8360 * @param startTime rolling upgrade start time 8361 */ 8362 void startRollingUpgradeInternal(long startTime) 8363 throws IOException { 8364 checkRollingUpgrade("start rolling upgrade"); 8365 getFSImage().checkUpgrade(); 8366 setRollingUpgradeInfo(false, startTime); 8367 } 8368 8369 /** 8370 * Update internal state to indicate that a rolling upgrade is in progress for 8371 * non-HA setup. This requires the namesystem is in SafeMode and after doing a 8372 * checkpoint for rollback the namesystem will quit the safemode automatically 8373 */ 8374 private void startRollingUpgradeInternalForNonHA(long startTime) 8375 throws IOException { 8376 Preconditions.checkState(!haEnabled); 8377 if (!isInSafeMode()) { 8378 throw new IOException("Safe mode should be turned ON " 8379 + "in order to create namespace image."); 8380 } 8381 checkRollingUpgrade("start rolling upgrade"); 8382 getFSImage().checkUpgrade(); 8383 // in non-HA setup, we do an extra checkpoint to generate a rollback image 8384 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null); 8385 LOG.info("Successfully saved namespace for preparing rolling upgrade."); 8386 8387 // leave SafeMode automatically 8388 setSafeMode(SafeModeAction.SAFEMODE_LEAVE); 8389 setRollingUpgradeInfo(true, startTime); 8390 } 8391 8392 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) { 8393 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId, 8394 createdRollbackImages, startTime, 0L); 8395 } 8396 8397 public void setCreatedRollbackImages(boolean created) { 8398 if (rollingUpgradeInfo != null) { 8399 rollingUpgradeInfo.setCreatedRollbackImages(created); 8400 } 8401 } 8402 8403 public RollingUpgradeInfo getRollingUpgradeInfo() { 8404 return rollingUpgradeInfo; 8405 } 8406 8407 public boolean isNeedRollbackFsImage() { 8408 return needRollbackFsImage; 8409 } 8410 8411 public void setNeedRollbackFsImage(boolean needRollbackFsImage) { 8412 this.needRollbackFsImage = needRollbackFsImage; 8413 } 8414 8415 @Override // NameNodeMXBean 8416 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { 8417 if (!isRollingUpgrade()) { 8418 return null; 8419 } 8420 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo(); 8421 if (upgradeInfo.createdRollbackImages()) { 8422 return new RollingUpgradeInfo.Bean(upgradeInfo); 8423 } 8424 readLock(); 8425 try { 8426 // check again after acquiring the read lock. 8427 upgradeInfo = getRollingUpgradeInfo(); 8428 if (upgradeInfo == null) { 8429 return null; 8430 } 8431 if (!upgradeInfo.createdRollbackImages()) { 8432 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 8433 upgradeInfo.setCreatedRollbackImages(hasRollbackImage); 8434 } 8435 } catch (IOException ioe) { 8436 LOG.warn("Encountered exception setting Rollback Image", ioe); 8437 } finally { 8438 readUnlock(); 8439 } 8440 return new RollingUpgradeInfo.Bean(upgradeInfo); 8441 } 8442 8443 /** Is rolling upgrade in progress? */ 8444 public boolean isRollingUpgrade() { 8445 return rollingUpgradeInfo != null; 8446 } 8447 8448 void checkRollingUpgrade(String action) throws RollingUpgradeException { 8449 if (isRollingUpgrade()) { 8450 throw new RollingUpgradeException("Failed to " + action 8451 + " since a rolling upgrade is already in progress." 8452 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo); 8453 } 8454 } 8455 8456 void finalizeRollingUpgrade() throws IOException { 8457 checkSuperuserPrivilege(); 8458 checkOperation(OperationCategory.WRITE); 8459 writeLock(); 8460 final RollingUpgradeInfo returnInfo; 8461 try { 8462 checkOperation(OperationCategory.WRITE); 8463 if (!isRollingUpgrade()) { 8464 return; 8465 } 8466 checkNameNodeSafeMode("Failed to finalize rolling upgrade"); 8467 8468 returnInfo = finalizeRollingUpgradeInternal(now()); 8469 getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime()); 8470 if (haEnabled) { 8471 // roll the edit log to make sure the standby NameNode can tail 8472 getFSImage().rollEditLog(); 8473 } 8474 getFSImage().updateStorageVersion(); 8475 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, 8476 NameNodeFile.IMAGE); 8477 } finally { 8478 writeUnlock(); 8479 } 8480 8481 if (!haEnabled) { 8482 // Sync not needed for ha since the edit was rolled after logging. 8483 getEditLog().logSync(); 8484 } 8485 8486 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 8487 logAuditEvent(true, "finalizeRollingUpgrade", null, null, null); 8488 } 8489 return; 8490 } 8491 8492 RollingUpgradeInfo finalizeRollingUpgradeInternal(long finalizeTime) 8493 throws RollingUpgradeException { 8494 final long startTime = rollingUpgradeInfo.getStartTime(); 8495 rollingUpgradeInfo = null; 8496 return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime); 8497 } 8498 8499 long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags) 8500 throws IOException { 8501 final FSPermissionChecker pc = isPermissionEnabled ? 8502 getPermissionChecker() : null; 8503 CacheEntryWithPayload cacheEntry = 8504 RetryCache.waitForCompletion(retryCache, null); 8505 if (cacheEntry != null && cacheEntry.isSuccess()) { 8506 return (Long) cacheEntry.getPayload(); 8507 } 8508 boolean success = false; 8509 if (!flags.contains(CacheFlag.FORCE)) { 8510 cacheManager.waitForRescanIfNeeded(); 8511 } 8512 writeLock(); 8513 String effectiveDirectiveStr = null; 8514 Long result = null; 8515 try { 8516 checkOperation(OperationCategory.WRITE); 8517 if (isInSafeMode()) { 8518 throw new SafeModeException( 8519 "Cannot add cache directive", safeMode); 8520 } 8521 if (directive.getId() != null) { 8522 throw new IOException("addDirective: you cannot specify an ID " + 8523 "for this operation."); 8524 } 8525 CacheDirectiveInfo effectiveDirective = 8526 cacheManager.addDirective(directive, pc, flags); 8527 getEditLog().logAddCacheDirectiveInfo(effectiveDirective, 8528 cacheEntry != null); 8529 result = effectiveDirective.getId(); 8530 effectiveDirectiveStr = effectiveDirective.toString(); 8531 success = true; 8532 } finally { 8533 writeUnlock(); 8534 if (success) { 8535 getEditLog().logSync(); 8536 } 8537 if (isAuditEnabled() && isExternalInvocation()) { 8538 logAuditEvent(success, "addCacheDirective", effectiveDirectiveStr, null, null); 8539 } 8540 RetryCache.setState(cacheEntry, success, result); 8541 } 8542 return result; 8543 } 8544 8545 void modifyCacheDirective(CacheDirectiveInfo directive, 8546 EnumSet<CacheFlag> flags) throws IOException { 8547 final FSPermissionChecker pc = isPermissionEnabled ? 8548 getPermissionChecker() : null; 8549 boolean success = false; 8550 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8551 if (cacheEntry != null && cacheEntry.isSuccess()) { 8552 return; 8553 } 8554 if (!flags.contains(CacheFlag.FORCE)) { 8555 cacheManager.waitForRescanIfNeeded(); 8556 } 8557 writeLock(); 8558 try { 8559 checkOperation(OperationCategory.WRITE); 8560 if (isInSafeMode()) { 8561 throw new SafeModeException( 8562 "Cannot add cache directive", safeMode); 8563 } 8564 cacheManager.modifyDirective(directive, pc, flags); 8565 getEditLog().logModifyCacheDirectiveInfo(directive, 8566 cacheEntry != null); 8567 success = true; 8568 } finally { 8569 writeUnlock(); 8570 if (success) { 8571 getEditLog().logSync(); 8572 } 8573 if (isAuditEnabled() && isExternalInvocation()) { 8574 String idStr = "{id: " + directive.getId().toString() + "}"; 8575 logAuditEvent(success, "modifyCacheDirective", idStr, directive.toString(), null); 8576 } 8577 RetryCache.setState(cacheEntry, success); 8578 } 8579 } 8580 8581 void removeCacheDirective(Long id) throws IOException { 8582 final FSPermissionChecker pc = isPermissionEnabled ? 8583 getPermissionChecker() : null; 8584 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8585 if (cacheEntry != null && cacheEntry.isSuccess()) { 8586 return; 8587 } 8588 boolean success = false; 8589 writeLock(); 8590 try { 8591 checkOperation(OperationCategory.WRITE); 8592 if (isInSafeMode()) { 8593 throw new SafeModeException( 8594 "Cannot remove cache directives", safeMode); 8595 } 8596 cacheManager.removeDirective(id, pc); 8597 getEditLog().logRemoveCacheDirectiveInfo(id, cacheEntry != null); 8598 success = true; 8599 } finally { 8600 writeUnlock(); 8601 if (isAuditEnabled() && isExternalInvocation()) { 8602 String idStr = "{id: " + id.toString() + "}"; 8603 logAuditEvent(success, "removeCacheDirective", idStr, null, 8604 null); 8605 } 8606 RetryCache.setState(cacheEntry, success); 8607 } 8608 getEditLog().logSync(); 8609 } 8610 8611 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives( 8612 long startId, CacheDirectiveInfo filter) throws IOException { 8613 checkOperation(OperationCategory.READ); 8614 final FSPermissionChecker pc = isPermissionEnabled ? 8615 getPermissionChecker() : null; 8616 BatchedListEntries<CacheDirectiveEntry> results; 8617 cacheManager.waitForRescanIfNeeded(); 8618 readLock(); 8619 boolean success = false; 8620 try { 8621 checkOperation(OperationCategory.READ); 8622 results = 8623 cacheManager.listCacheDirectives(startId, filter, pc); 8624 success = true; 8625 } finally { 8626 readUnlock(); 8627 if (isAuditEnabled() && isExternalInvocation()) { 8628 logAuditEvent(success, "listCacheDirectives", filter.toString(), null, 8629 null); 8630 } 8631 } 8632 return results; 8633 } 8634 8635 public void addCachePool(CachePoolInfo req) throws IOException { 8636 final FSPermissionChecker pc = isPermissionEnabled ? 8637 getPermissionChecker() : null; 8638 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8639 if (cacheEntry != null && cacheEntry.isSuccess()) { 8640 return; // Return previous response 8641 } 8642 writeLock(); 8643 boolean success = false; 8644 String poolInfoStr = null; 8645 try { 8646 checkOperation(OperationCategory.WRITE); 8647 if (isInSafeMode()) { 8648 throw new SafeModeException( 8649 "Cannot add cache pool " + req.getPoolName(), safeMode); 8650 } 8651 if (pc != null) { 8652 pc.checkSuperuserPrivilege(); 8653 } 8654 CachePoolInfo info = cacheManager.addCachePool(req); 8655 poolInfoStr = info.toString(); 8656 getEditLog().logAddCachePool(info, cacheEntry != null); 8657 success = true; 8658 } finally { 8659 writeUnlock(); 8660 if (isAuditEnabled() && isExternalInvocation()) { 8661 logAuditEvent(success, "addCachePool", poolInfoStr, null, null); 8662 } 8663 RetryCache.setState(cacheEntry, success); 8664 } 8665 8666 getEditLog().logSync(); 8667 } 8668 8669 public void modifyCachePool(CachePoolInfo req) throws IOException { 8670 final FSPermissionChecker pc = 8671 isPermissionEnabled ? getPermissionChecker() : null; 8672 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8673 if (cacheEntry != null && cacheEntry.isSuccess()) { 8674 return; // Return previous response 8675 } 8676 writeLock(); 8677 boolean success = false; 8678 try { 8679 checkOperation(OperationCategory.WRITE); 8680 if (isInSafeMode()) { 8681 throw new SafeModeException( 8682 "Cannot modify cache pool " + req.getPoolName(), safeMode); 8683 } 8684 if (pc != null) { 8685 pc.checkSuperuserPrivilege(); 8686 } 8687 cacheManager.modifyCachePool(req); 8688 getEditLog().logModifyCachePool(req, cacheEntry != null); 8689 success = true; 8690 } finally { 8691 writeUnlock(); 8692 if (isAuditEnabled() && isExternalInvocation()) { 8693 String poolNameStr = "{poolName: " + req.getPoolName() + "}"; 8694 logAuditEvent(success, "modifyCachePool", poolNameStr, req.toString(), null); 8695 } 8696 RetryCache.setState(cacheEntry, success); 8697 } 8698 8699 getEditLog().logSync(); 8700 } 8701 8702 public void removeCachePool(String cachePoolName) throws IOException { 8703 final FSPermissionChecker pc = 8704 isPermissionEnabled ? getPermissionChecker() : null; 8705 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8706 if (cacheEntry != null && cacheEntry.isSuccess()) { 8707 return; // Return previous response 8708 } 8709 writeLock(); 8710 boolean success = false; 8711 try { 8712 checkOperation(OperationCategory.WRITE); 8713 if (isInSafeMode()) { 8714 throw new SafeModeException( 8715 "Cannot remove cache pool " + cachePoolName, safeMode); 8716 } 8717 if (pc != null) { 8718 pc.checkSuperuserPrivilege(); 8719 } 8720 cacheManager.removeCachePool(cachePoolName); 8721 getEditLog().logRemoveCachePool(cachePoolName, cacheEntry != null); 8722 success = true; 8723 } finally { 8724 writeUnlock(); 8725 if (isAuditEnabled() && isExternalInvocation()) { 8726 String poolNameStr = "{poolName: " + cachePoolName + "}"; 8727 logAuditEvent(success, "removeCachePool", poolNameStr, null, null); 8728 } 8729 RetryCache.setState(cacheEntry, success); 8730 } 8731 8732 getEditLog().logSync(); 8733 } 8734 8735 public BatchedListEntries<CachePoolEntry> listCachePools(String prevKey) 8736 throws IOException { 8737 final FSPermissionChecker pc = 8738 isPermissionEnabled ? getPermissionChecker() : null; 8739 BatchedListEntries<CachePoolEntry> results; 8740 checkOperation(OperationCategory.READ); 8741 boolean success = false; 8742 cacheManager.waitForRescanIfNeeded(); 8743 readLock(); 8744 try { 8745 checkOperation(OperationCategory.READ); 8746 results = cacheManager.listCachePools(pc, prevKey); 8747 success = true; 8748 } finally { 8749 readUnlock(); 8750 if (isAuditEnabled() && isExternalInvocation()) { 8751 logAuditEvent(success, "listCachePools", null, null, null); 8752 } 8753 } 8754 return results; 8755 } 8756 8757 void modifyAclEntries(final String srcArg, List<AclEntry> aclSpec) 8758 throws IOException { 8759 String src = srcArg; 8760 nnConf.checkAclsConfigFlag(); 8761 HdfsFileStatus resultingStat = null; 8762 FSPermissionChecker pc = getPermissionChecker(); 8763 checkOperation(OperationCategory.WRITE); 8764 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8765 writeLock(); 8766 try { 8767 checkOperation(OperationCategory.WRITE); 8768 checkNameNodeSafeMode("Cannot modify ACL entries on " + src); 8769 src = resolvePath(src, pathComponents); 8770 checkOwner(pc, src); 8771 List<AclEntry> newAcl = dir.modifyAclEntries(src, aclSpec); 8772 getEditLog().logSetAcl(src, newAcl); 8773 resultingStat = getAuditFileInfo(src, false); 8774 } catch (AccessControlException e) { 8775 logAuditEvent(false, "modifyAclEntries", srcArg); 8776 throw e; 8777 } finally { 8778 writeUnlock(); 8779 } 8780 getEditLog().logSync(); 8781 logAuditEvent(true, "modifyAclEntries", srcArg, null, resultingStat); 8782 } 8783 8784 void removeAclEntries(final String srcArg, List<AclEntry> aclSpec) 8785 throws IOException { 8786 String src = srcArg; 8787 nnConf.checkAclsConfigFlag(); 8788 HdfsFileStatus resultingStat = null; 8789 FSPermissionChecker pc = getPermissionChecker(); 8790 checkOperation(OperationCategory.WRITE); 8791 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8792 writeLock(); 8793 try { 8794 checkOperation(OperationCategory.WRITE); 8795 checkNameNodeSafeMode("Cannot remove ACL entries on " + src); 8796 src = resolvePath(src, pathComponents); 8797 checkOwner(pc, src); 8798 List<AclEntry> newAcl = dir.removeAclEntries(src, aclSpec); 8799 getEditLog().logSetAcl(src, newAcl); 8800 resultingStat = getAuditFileInfo(src, false); 8801 } catch (AccessControlException e) { 8802 logAuditEvent(false, "removeAclEntries", srcArg); 8803 throw e; 8804 } finally { 8805 writeUnlock(); 8806 } 8807 getEditLog().logSync(); 8808 logAuditEvent(true, "removeAclEntries", srcArg, null, resultingStat); 8809 } 8810 8811 void removeDefaultAcl(final String srcArg) throws IOException { 8812 String src = srcArg; 8813 nnConf.checkAclsConfigFlag(); 8814 HdfsFileStatus resultingStat = null; 8815 FSPermissionChecker pc = getPermissionChecker(); 8816 checkOperation(OperationCategory.WRITE); 8817 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8818 writeLock(); 8819 try { 8820 checkOperation(OperationCategory.WRITE); 8821 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); 8822 src = resolvePath(src, pathComponents); 8823 checkOwner(pc, src); 8824 List<AclEntry> newAcl = dir.removeDefaultAcl(src); 8825 getEditLog().logSetAcl(src, newAcl); 8826 resultingStat = getAuditFileInfo(src, false); 8827 } catch (AccessControlException e) { 8828 logAuditEvent(false, "removeDefaultAcl", srcArg); 8829 throw e; 8830 } finally { 8831 writeUnlock(); 8832 } 8833 getEditLog().logSync(); 8834 logAuditEvent(true, "removeDefaultAcl", srcArg, null, resultingStat); 8835 } 8836 8837 void removeAcl(final String srcArg) throws IOException { 8838 String src = srcArg; 8839 nnConf.checkAclsConfigFlag(); 8840 HdfsFileStatus resultingStat = null; 8841 FSPermissionChecker pc = getPermissionChecker(); 8842 checkOperation(OperationCategory.WRITE); 8843 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8844 writeLock(); 8845 try { 8846 checkOperation(OperationCategory.WRITE); 8847 checkNameNodeSafeMode("Cannot remove ACL on " + src); 8848 src = resolvePath(src, pathComponents); 8849 checkOwner(pc, src); 8850 dir.removeAcl(src); 8851 getEditLog().logSetAcl(src, AclFeature.EMPTY_ENTRY_LIST); 8852 resultingStat = getAuditFileInfo(src, false); 8853 } catch (AccessControlException e) { 8854 logAuditEvent(false, "removeAcl", srcArg); 8855 throw e; 8856 } finally { 8857 writeUnlock(); 8858 } 8859 getEditLog().logSync(); 8860 logAuditEvent(true, "removeAcl", srcArg, null, resultingStat); 8861 } 8862 8863 void setAcl(final String srcArg, List<AclEntry> aclSpec) throws IOException { 8864 String src = srcArg; 8865 nnConf.checkAclsConfigFlag(); 8866 HdfsFileStatus resultingStat = null; 8867 FSPermissionChecker pc = getPermissionChecker(); 8868 checkOperation(OperationCategory.WRITE); 8869 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8870 writeLock(); 8871 try { 8872 checkOperation(OperationCategory.WRITE); 8873 checkNameNodeSafeMode("Cannot set ACL on " + src); 8874 src = resolvePath(src, pathComponents); 8875 checkOwner(pc, src); 8876 List<AclEntry> newAcl = dir.setAcl(src, aclSpec); 8877 getEditLog().logSetAcl(src, newAcl); 8878 resultingStat = getAuditFileInfo(src, false); 8879 } catch (AccessControlException e) { 8880 logAuditEvent(false, "setAcl", srcArg); 8881 throw e; 8882 } finally { 8883 writeUnlock(); 8884 } 8885 getEditLog().logSync(); 8886 logAuditEvent(true, "setAcl", srcArg, null, resultingStat); 8887 } 8888 8889 AclStatus getAclStatus(String src) throws IOException { 8890 nnConf.checkAclsConfigFlag(); 8891 FSPermissionChecker pc = getPermissionChecker(); 8892 checkOperation(OperationCategory.READ); 8893 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 8894 boolean success = false; 8895 readLock(); 8896 try { 8897 checkOperation(OperationCategory.READ); 8898 src = resolvePath(src, pathComponents); 8899 if (isPermissionEnabled) { 8900 checkPermission(pc, src, false, null, null, null, null); 8901 } 8902 final AclStatus ret = dir.getAclStatus(src); 8903 success = true; 8904 return ret; 8905 } finally { 8906 readUnlock(); 8907 logAuditEvent(success, "getAclStatus", src); 8908 } 8909 } 8910 8911 /** 8912 * Create an encryption zone on directory src using the specified key. 8913 * 8914 * @param src the path of a directory which will be the root of the 8915 * encryption zone. The directory must be empty. 8916 * @param keyName name of a key which must be present in the configured 8917 * KeyProvider. 8918 * @throws AccessControlException if the caller is not the superuser. 8919 * @throws UnresolvedLinkException if the path can't be resolved. 8920 * @throws SafeModeException if the Namenode is in safe mode. 8921 */ 8922 void createEncryptionZone(final String src, final String keyName) 8923 throws IOException, UnresolvedLinkException, 8924 SafeModeException, AccessControlException { 8925 final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 8926 if (cacheEntry != null && cacheEntry.isSuccess()) { 8927 return; // Return previous response 8928 } 8929 8930 boolean success = false; 8931 try { 8932 if (provider == null) { 8933 throw new IOException( 8934 "Can't create an encryption zone for " + src + 8935 " since no key provider is available."); 8936 } 8937 if (keyName == null || keyName.isEmpty()) { 8938 throw new IOException("Must specify a key name when creating an " + 8939 "encryption zone"); 8940 } 8941 KeyProvider.Metadata metadata = provider.getMetadata(keyName); 8942 if (metadata == null) { 8943 /* 8944 * It would be nice if we threw something more specific than 8945 * IOException when the key is not found, but the KeyProvider API 8946 * doesn't provide for that. If that API is ever changed to throw 8947 * something more specific (e.g. UnknownKeyException) then we can 8948 * update this to match it, or better yet, just rethrow the 8949 * KeyProvider's exception. 8950 */ 8951 throw new IOException("Key " + keyName + " doesn't exist."); 8952 } 8953 createEncryptionZoneInt(src, metadata.getCipher(), 8954 keyName, cacheEntry != null); 8955 success = true; 8956 } catch (AccessControlException e) { 8957 logAuditEvent(false, "createEncryptionZone", src); 8958 throw e; 8959 } finally { 8960 RetryCache.setState(cacheEntry, success); 8961 } 8962 } 8963 8964 private void createEncryptionZoneInt(final String srcArg, String cipher, 8965 String keyName, final boolean logRetryCache) throws IOException { 8966 String src = srcArg; 8967 HdfsFileStatus resultingStat = null; 8968 checkSuperuserPrivilege(); 8969 final byte[][] pathComponents = 8970 FSDirectory.getPathComponentsForReservedPath(src); 8971 writeLock(); 8972 try { 8973 checkSuperuserPrivilege(); 8974 checkOperation(OperationCategory.WRITE); 8975 checkNameNodeSafeMode("Cannot create encryption zone on " + src); 8976 src = resolvePath(src, pathComponents); 8977 8978 final CipherSuite suite = CipherSuite.convert(cipher); 8979 // For now this is hardcoded, as we only support one method. 8980 final CryptoProtocolVersion version = 8981 CryptoProtocolVersion.ENCRYPTION_ZONES; 8982 final XAttr ezXAttr = dir.createEncryptionZone(src, suite, 8983 version, keyName); 8984 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1); 8985 xAttrs.add(ezXAttr); 8986 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); 8987 resultingStat = getAuditFileInfo(src, false); 8988 } finally { 8989 writeUnlock(); 8990 } 8991 getEditLog().logSync(); 8992 logAuditEvent(true, "createEncryptionZone", srcArg, null, resultingStat); 8993 } 8994 8995 /** 8996 * Get the encryption zone for the specified path. 8997 * 8998 * @param srcArg the path of a file or directory to get the EZ for. 8999 * @return the EZ of the of the path or null if none. 9000 * @throws AccessControlException if the caller is not the superuser. 9001 * @throws UnresolvedLinkException if the path can't be resolved. 9002 */ 9003 EncryptionZone getEZForPath(final String srcArg) 9004 throws AccessControlException, UnresolvedLinkException, IOException { 9005 String src = srcArg; 9006 HdfsFileStatus resultingStat = null; 9007 final byte[][] pathComponents = 9008 FSDirectory.getPathComponentsForReservedPath(src); 9009 boolean success = false; 9010 final FSPermissionChecker pc = getPermissionChecker(); 9011 checkOperation(OperationCategory.READ); 9012 readLock(); 9013 try { 9014 if (isPermissionEnabled) { 9015 checkPathAccess(pc, src, FsAction.READ); 9016 } 9017 checkOperation(OperationCategory.READ); 9018 src = resolvePath(src, pathComponents); 9019 final INodesInPath iip = dir.getINodesInPath(src, true); 9020 final EncryptionZone ret = dir.getEZForPath(iip); 9021 resultingStat = getAuditFileInfo(src, false); 9022 success = true; 9023 return ret; 9024 } finally { 9025 readUnlock(); 9026 logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat); 9027 } 9028 } 9029 9030 BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId) 9031 throws IOException { 9032 boolean success = false; 9033 checkSuperuserPrivilege(); 9034 checkOperation(OperationCategory.READ); 9035 readLock(); 9036 try { 9037 checkSuperuserPrivilege(); 9038 checkOperation(OperationCategory.READ); 9039 final BatchedListEntries<EncryptionZone> ret = 9040 dir.listEncryptionZones(prevId); 9041 success = true; 9042 return ret; 9043 } finally { 9044 readUnlock(); 9045 logAuditEvent(success, "listEncryptionZones", null); 9046 } 9047 } 9048 9049 /** 9050 * Set xattr for a file or directory. 9051 * 9052 * @param src 9053 * - path on which it sets the xattr 9054 * @param xAttr 9055 * - xAttr details to set 9056 * @param flag 9057 * - xAttrs flags 9058 * @throws AccessControlException 9059 * @throws SafeModeException 9060 * @throws UnresolvedLinkException 9061 * @throws IOException 9062 */ 9063 void setXAttr(String src, XAttr xAttr, EnumSet<XAttrSetFlag> flag) 9064 throws AccessControlException, SafeModeException, 9065 UnresolvedLinkException, IOException { 9066 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 9067 if (cacheEntry != null && cacheEntry.isSuccess()) { 9068 return; // Return previous response 9069 } 9070 boolean success = false; 9071 try { 9072 setXAttrInt(src, xAttr, flag, cacheEntry != null); 9073 success = true; 9074 } catch (AccessControlException e) { 9075 logAuditEvent(false, "setXAttr", src); 9076 throw e; 9077 } finally { 9078 RetryCache.setState(cacheEntry, success); 9079 } 9080 } 9081 9082 private void setXAttrInt(final String srcArg, XAttr xAttr, 9083 EnumSet<XAttrSetFlag> flag, boolean logRetryCache) throws IOException { 9084 String src = srcArg; 9085 nnConf.checkXAttrsConfigFlag(); 9086 checkXAttrSize(xAttr); 9087 HdfsFileStatus resultingStat = null; 9088 FSPermissionChecker pc = getPermissionChecker(); 9089 XAttrPermissionFilter.checkPermissionForApi(pc, xAttr, 9090 FSDirectory.isReservedRawName(src)); 9091 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 9092 writeLock(); 9093 try { 9094 checkOperation(OperationCategory.WRITE); 9095 checkNameNodeSafeMode("Cannot set XAttr on " + src); 9096 src = resolvePath(src, pathComponents); 9097 checkXAttrChangeAccess(src, xAttr, pc); 9098 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1); 9099 xAttrs.add(xAttr); 9100 dir.setXAttrs(src, xAttrs, flag); 9101 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); 9102 resultingStat = getAuditFileInfo(src, false); 9103 } finally { 9104 writeUnlock(); 9105 } 9106 getEditLog().logSync(); 9107 logAuditEvent(true, "setXAttr", srcArg, null, resultingStat); 9108 } 9109 9110 /** 9111 * Verifies that the combined size of the name and value of an xattr is within 9112 * the configured limit. Setting a limit of zero disables this check. 9113 */ 9114 private void checkXAttrSize(XAttr xAttr) { 9115 if (nnConf.xattrMaxSize == 0) { 9116 return; 9117 } 9118 int size = xAttr.getName().getBytes(Charsets.UTF_8).length; 9119 if (xAttr.getValue() != null) { 9120 size += xAttr.getValue().length; 9121 } 9122 if (size > nnConf.xattrMaxSize) { 9123 throw new HadoopIllegalArgumentException( 9124 "The XAttr is too big. The maximum combined size of the" 9125 + " name and value is " + nnConf.xattrMaxSize 9126 + ", but the total size is " + size); 9127 } 9128 } 9129 9130 List<XAttr> getXAttrs(final String srcArg, List<XAttr> xAttrs) 9131 throws IOException { 9132 String src = srcArg; 9133 nnConf.checkXAttrsConfigFlag(); 9134 FSPermissionChecker pc = getPermissionChecker(); 9135 final boolean isRawPath = FSDirectory.isReservedRawName(src); 9136 boolean getAll = xAttrs == null || xAttrs.isEmpty(); 9137 if (!getAll) { 9138 try { 9139 XAttrPermissionFilter.checkPermissionForApi(pc, xAttrs, isRawPath); 9140 } catch (AccessControlException e) { 9141 logAuditEvent(false, "getXAttrs", srcArg); 9142 throw e; 9143 } 9144 } 9145 checkOperation(OperationCategory.READ); 9146 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 9147 readLock(); 9148 try { 9149 src = resolvePath(src, pathComponents); 9150 checkOperation(OperationCategory.READ); 9151 if (isPermissionEnabled) { 9152 checkPathAccess(pc, src, FsAction.READ); 9153 } 9154 List<XAttr> all = dir.getXAttrs(src); 9155 List<XAttr> filteredAll = XAttrPermissionFilter. 9156 filterXAttrsForApi(pc, all, isRawPath); 9157 if (getAll) { 9158 return filteredAll; 9159 } else { 9160 if (filteredAll == null || filteredAll.isEmpty()) { 9161 return null; 9162 } 9163 List<XAttr> toGet = Lists.newArrayListWithCapacity(xAttrs.size()); 9164 for (XAttr xAttr : xAttrs) { 9165 boolean foundIt = false; 9166 for (XAttr a : filteredAll) { 9167 if (xAttr.getNameSpace() == a.getNameSpace() 9168 && xAttr.getName().equals(a.getName())) { 9169 toGet.add(a); 9170 foundIt = true; 9171 break; 9172 } 9173 } 9174 if (!foundIt) { 9175 throw new IOException( 9176 "At least one of the attributes provided was not found."); 9177 } 9178 } 9179 return toGet; 9180 } 9181 } catch (AccessControlException e) { 9182 logAuditEvent(false, "getXAttrs", srcArg); 9183 throw e; 9184 } finally { 9185 readUnlock(); 9186 } 9187 } 9188 9189 List<XAttr> listXAttrs(String src) throws IOException { 9190 nnConf.checkXAttrsConfigFlag(); 9191 final FSPermissionChecker pc = getPermissionChecker(); 9192 final boolean isRawPath = FSDirectory.isReservedRawName(src); 9193 checkOperation(OperationCategory.READ); 9194 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 9195 readLock(); 9196 try { 9197 src = resolvePath(src, pathComponents); 9198 checkOperation(OperationCategory.READ); 9199 if (isPermissionEnabled) { 9200 /* To access xattr names, you need EXECUTE in the owning directory. */ 9201 checkParentAccess(pc, src, FsAction.EXECUTE); 9202 } 9203 final List<XAttr> all = dir.getXAttrs(src); 9204 final List<XAttr> filteredAll = XAttrPermissionFilter. 9205 filterXAttrsForApi(pc, all, isRawPath); 9206 return filteredAll; 9207 } catch (AccessControlException e) { 9208 logAuditEvent(false, "listXAttrs", src); 9209 throw e; 9210 } finally { 9211 readUnlock(); 9212 } 9213 } 9214 9215 /** 9216 * Remove an xattr for a file or directory. 9217 * 9218 * @param src 9219 * - path to remove the xattr from 9220 * @param xAttr 9221 * - xAttr to remove 9222 * @throws AccessControlException 9223 * @throws SafeModeException 9224 * @throws UnresolvedLinkException 9225 * @throws IOException 9226 */ 9227 void removeXAttr(String src, XAttr xAttr) throws IOException { 9228 CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); 9229 if (cacheEntry != null && cacheEntry.isSuccess()) { 9230 return; // Return previous response 9231 } 9232 boolean success = false; 9233 try { 9234 removeXAttrInt(src, xAttr, cacheEntry != null); 9235 success = true; 9236 } catch (AccessControlException e) { 9237 logAuditEvent(false, "removeXAttr", src); 9238 throw e; 9239 } finally { 9240 RetryCache.setState(cacheEntry, success); 9241 } 9242 } 9243 9244 void removeXAttrInt(final String srcArg, XAttr xAttr, boolean logRetryCache) 9245 throws IOException { 9246 String src = srcArg; 9247 nnConf.checkXAttrsConfigFlag(); 9248 HdfsFileStatus resultingStat = null; 9249 FSPermissionChecker pc = getPermissionChecker(); 9250 XAttrPermissionFilter.checkPermissionForApi(pc, xAttr, 9251 FSDirectory.isReservedRawName(src)); 9252 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 9253 writeLock(); 9254 try { 9255 checkOperation(OperationCategory.WRITE); 9256 checkNameNodeSafeMode("Cannot remove XAttr entry on " + src); 9257 src = resolvePath(src, pathComponents); 9258 checkXAttrChangeAccess(src, xAttr, pc); 9259 9260 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1); 9261 xAttrs.add(xAttr); 9262 List<XAttr> removedXAttrs = dir.removeXAttrs(src, xAttrs); 9263 if (removedXAttrs != null && !removedXAttrs.isEmpty()) { 9264 getEditLog().logRemoveXAttrs(src, removedXAttrs, logRetryCache); 9265 } else { 9266 throw new IOException( 9267 "No matching attributes found for remove operation"); 9268 } 9269 resultingStat = getAuditFileInfo(src, false); 9270 } finally { 9271 writeUnlock(); 9272 } 9273 getEditLog().logSync(); 9274 logAuditEvent(true, "removeXAttr", srcArg, null, resultingStat); 9275 } 9276 9277 private void checkXAttrChangeAccess(String src, XAttr xAttr, 9278 FSPermissionChecker pc) throws UnresolvedLinkException, 9279 AccessControlException { 9280 if (isPermissionEnabled && xAttr.getNameSpace() == XAttr.NameSpace.USER) { 9281 final INode inode = dir.getINode(src); 9282 if (inode != null && 9283 inode.isDirectory() && 9284 inode.getFsPermission().getStickyBit()) { 9285 if (!pc.isSuperUser()) { 9286 checkOwner(pc, src); 9287 } 9288 } else { 9289 checkPathAccess(pc, src, FsAction.WRITE); 9290 } 9291 } 9292 } 9293 9294 void checkAccess(String src, FsAction mode) throws AccessControlException, 9295 FileNotFoundException, UnresolvedLinkException, IOException { 9296 checkOperation(OperationCategory.READ); 9297 byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); 9298 readLock(); 9299 try { 9300 checkOperation(OperationCategory.READ); 9301 src = FSDirectory.resolvePath(src, pathComponents, dir); 9302 if (dir.getINode(src) == null) { 9303 throw new FileNotFoundException("Path not found"); 9304 } 9305 if (isPermissionEnabled) { 9306 FSPermissionChecker pc = getPermissionChecker(); 9307 checkPathAccess(pc, src, mode); 9308 } 9309 } catch (AccessControlException e) { 9310 logAuditEvent(false, "checkAccess", src); 9311 throw e; 9312 } finally { 9313 readUnlock(); 9314 } 9315 } 9316 9317 /** 9318 * Default AuditLogger implementation; used when no access logger is 9319 * defined in the config file. It can also be explicitly listed in the 9320 * config file. 9321 */ 9322 private static class DefaultAuditLogger extends HdfsAuditLogger { 9323 9324 private boolean logTokenTrackingId; 9325 9326 @Override 9327 public void initialize(Configuration conf) { 9328 logTokenTrackingId = conf.getBoolean( 9329 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 9330 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT); 9331 } 9332 9333 @Override 9334 public void logAuditEvent(boolean succeeded, String userName, 9335 InetAddress addr, String cmd, String src, String dst, 9336 FileStatus status, UserGroupInformation ugi, 9337 DelegationTokenSecretManager dtSecretManager) { 9338 if (auditLog.isInfoEnabled()) { 9339 final StringBuilder sb = auditBuffer.get(); 9340 sb.setLength(0); 9341 sb.append("allowed=").append(succeeded).append("\t"); 9342 sb.append("ugi=").append(userName).append("\t"); 9343 sb.append("ip=").append(addr).append("\t"); 9344 sb.append("cmd=").append(cmd).append("\t"); 9345 sb.append("src=").append(src).append("\t"); 9346 sb.append("dst=").append(dst).append("\t"); 9347 if (null == status) { 9348 sb.append("perm=null"); 9349 } else { 9350 sb.append("perm="); 9351 sb.append(status.getOwner()).append(":"); 9352 sb.append(status.getGroup()).append(":"); 9353 sb.append(status.getPermission()); 9354 } 9355 if (logTokenTrackingId) { 9356 sb.append("\t").append("trackingId="); 9357 String trackingId = null; 9358 if (ugi != null && dtSecretManager != null 9359 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) { 9360 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) { 9361 if (tid instanceof DelegationTokenIdentifier) { 9362 DelegationTokenIdentifier dtid = 9363 (DelegationTokenIdentifier)tid; 9364 trackingId = dtSecretManager.getTokenTrackingId(dtid); 9365 break; 9366 } 9367 } 9368 } 9369 sb.append(trackingId); 9370 } 9371 sb.append("\t").append("proto="); 9372 sb.append(NamenodeWebHdfsMethods.isWebHdfsInvocation() ? "webhdfs" : "rpc"); 9373 logAuditMessage(sb.toString()); 9374 } 9375 } 9376 9377 public void logAuditMessage(String message) { 9378 auditLog.info(message); 9379 } 9380 } 9381 9382 private static void enableAsyncAuditLog() { 9383 if (!(auditLog instanceof Log4JLogger)) { 9384 LOG.warn("Log4j is required to enable async auditlog"); 9385 return; 9386 } 9387 Logger logger = ((Log4JLogger)auditLog).getLogger(); 9388 @SuppressWarnings("unchecked") 9389 List<Appender> appenders = Collections.list(logger.getAllAppenders()); 9390 // failsafe against trying to async it more than once 9391 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { 9392 AsyncAppender asyncAppender = new AsyncAppender(); 9393 // change logger to have an async appender containing all the 9394 // previously configured appenders 9395 for (Appender appender : appenders) { 9396 logger.removeAppender(appender); 9397 asyncAppender.addAppender(appender); 9398 } 9399 logger.addAppender(asyncAppender); 9400 } 9401 } 9402} 9403