001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020 021import java.io.EOFException; 022import java.io.File; 023import java.io.FileDescriptor; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.util.Collection; 028import java.util.List; 029import java.util.Map; 030 031import org.apache.hadoop.classification.InterfaceAudience; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hdfs.DFSConfigKeys; 034import org.apache.hadoop.hdfs.StorageType; 035import org.apache.hadoop.hdfs.protocol.Block; 036import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; 037import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; 038import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 039import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; 040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; 041import org.apache.hadoop.hdfs.server.datanode.DataNode; 042import org.apache.hadoop.hdfs.server.datanode.DataStorage; 043import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; 044import org.apache.hadoop.hdfs.server.datanode.Replica; 045import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface; 046import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException; 047import org.apache.hadoop.hdfs.server.datanode.StorageLocation; 048import org.apache.hadoop.hdfs.server.datanode.UnexpectedReplicaStateException; 049import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory; 050import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; 051import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean; 052import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; 053import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; 054import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 055import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; 056import org.apache.hadoop.hdfs.server.protocol.StorageReport; 057import org.apache.hadoop.util.DiskChecker.DiskErrorException; 058import org.apache.hadoop.util.ReflectionUtils; 059 060/** 061 * This is a service provider interface for the underlying storage that 062 * stores replicas for a data node. 063 * The default implementation stores replicas on local drives. 064 */ 065@InterfaceAudience.Private 066public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean { 067 /** 068 * A factory for creating {@link FsDatasetSpi} objects. 069 */ 070 public static abstract class Factory<D extends FsDatasetSpi<?>> { 071 /** @return the configured factory. */ 072 public static Factory<?> getFactory(Configuration conf) { 073 @SuppressWarnings("rawtypes") 074 final Class<? extends Factory> clazz = conf.getClass( 075 DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, 076 FsDatasetFactory.class, 077 Factory.class); 078 return ReflectionUtils.newInstance(clazz, conf); 079 } 080 081 /** Create a new object. */ 082 public abstract D newInstance(DataNode datanode, DataStorage storage, 083 Configuration conf) throws IOException; 084 085 /** Does the factory create simulated objects? */ 086 public boolean isSimulated() { 087 return false; 088 } 089 } 090 091 /** 092 * Create rolling logs. 093 * 094 * @param prefix the prefix of the log names. 095 * @return rolling logs 096 */ 097 public RollingLogs createRollingLogs(String bpid, String prefix 098 ) throws IOException; 099 100 /** @return a list of volumes. */ 101 public List<V> getVolumes(); 102 103 /** Add an array of StorageLocation to FsDataset. */ 104 public void addVolume( 105 final StorageLocation location, 106 final List<NamespaceInfo> nsInfos) throws IOException; 107 108 /** Removes a collection of volumes from FsDataset. */ 109 public void removeVolumes(Collection<StorageLocation> volumes); 110 111 /** @return a storage with the given storage ID */ 112 public DatanodeStorage getStorage(final String storageUuid); 113 114 /** @return one or more storage reports for attached volumes. */ 115 public StorageReport[] getStorageReports(String bpid) 116 throws IOException; 117 118 /** @return the volume that contains a replica of the block. */ 119 public V getVolume(ExtendedBlock b); 120 121 /** @return a volume information map (name => info). */ 122 public Map<String, Object> getVolumeInfoMap(); 123 124 /** @return a list of finalized blocks for the given block pool. */ 125 public List<FinalizedReplica> getFinalizedBlocks(String bpid); 126 127 /** @return a list of finalized blocks for the given block pool. */ 128 public List<FinalizedReplica> getFinalizedBlocksOnPersistentStorage(String bpid); 129 130 /** 131 * Check whether the in-memory block record matches the block on the disk, 132 * and, in case that they are not matched, update the record or mark it 133 * as corrupted. 134 */ 135 public void checkAndUpdate(String bpid, long blockId, File diskFile, 136 File diskMetaFile, FsVolumeSpi vol) throws IOException; 137 138 /** 139 * @param b - the block 140 * @return a stream if the meta-data of the block exists; 141 * otherwise, return null. 142 * @throws IOException 143 */ 144 public LengthInputStream getMetaDataInputStream(ExtendedBlock b 145 ) throws IOException; 146 147 /** 148 * Returns the specified block's on-disk length (excluding metadata) 149 * @return the specified block's on-disk length (excluding metadta) 150 * @throws IOException on error 151 */ 152 public long getLength(ExtendedBlock b) throws IOException; 153 154 /** 155 * Get reference to the replica meta info in the replicasMap. 156 * To be called from methods that are synchronized on {@link FSDataset} 157 * @return replica from the replicas map 158 */ 159 @Deprecated 160 public Replica getReplica(String bpid, long blockId); 161 162 /** 163 * @return replica meta information 164 */ 165 public String getReplicaString(String bpid, long blockId); 166 167 /** 168 * @return the generation stamp stored with the block. 169 */ 170 public Block getStoredBlock(String bpid, long blkid) throws IOException; 171 172 /** 173 * Returns an input stream at specified offset of the specified block 174 * @param b block 175 * @param seekOffset offset with in the block to seek to 176 * @return an input stream to read the contents of the specified block, 177 * starting at the offset 178 * @throws IOException 179 */ 180 public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset) 181 throws IOException; 182 183 /** 184 * Returns an input stream at specified offset of the specified block 185 * The block is still in the tmp directory and is not finalized 186 * @return an input stream to read the contents of the specified block, 187 * starting at the offset 188 * @throws IOException 189 */ 190 public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff, 191 long ckoff) throws IOException; 192 193 /** 194 * Creates a temporary replica and returns the meta information of the replica 195 * 196 * @param b block 197 * @return the meta info of the replica which is being written to 198 * @throws IOException if an error occurs 199 */ 200 public ReplicaInPipelineInterface createTemporary(StorageType storageType, 201 ExtendedBlock b) throws IOException; 202 203 /** 204 * Creates a RBW replica and returns the meta info of the replica 205 * 206 * @param b block 207 * @return the meta info of the replica which is being written to 208 * @throws IOException if an error occurs 209 */ 210 public ReplicaInPipelineInterface createRbw(StorageType storageType, 211 ExtendedBlock b, boolean allowLazyPersist) throws IOException; 212 213 /** 214 * Recovers a RBW replica and returns the meta info of the replica 215 * 216 * @param b block 217 * @param newGS the new generation stamp for the replica 218 * @param minBytesRcvd the minimum number of bytes that the replica could have 219 * @param maxBytesRcvd the maximum number of bytes that the replica could have 220 * @return the meta info of the replica which is being written to 221 * @throws IOException if an error occurs 222 */ 223 public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b, 224 long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException; 225 226 /** 227 * Covert a temporary replica to a RBW. 228 * @param temporary the temporary replica being converted 229 * @return the result RBW 230 */ 231 public ReplicaInPipelineInterface convertTemporaryToRbw( 232 ExtendedBlock temporary) throws IOException; 233 234 /** 235 * Append to a finalized replica and returns the meta info of the replica 236 * 237 * @param b block 238 * @param newGS the new generation stamp for the replica 239 * @param expectedBlockLen the number of bytes the replica is expected to have 240 * @return the meata info of the replica which is being written to 241 * @throws IOException 242 */ 243 public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS, 244 long expectedBlockLen) throws IOException; 245 246 /** 247 * Recover a failed append to a finalized replica 248 * and returns the meta info of the replica 249 * 250 * @param b block 251 * @param newGS the new generation stamp for the replica 252 * @param expectedBlockLen the number of bytes the replica is expected to have 253 * @return the meta info of the replica which is being written to 254 * @throws IOException 255 */ 256 public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS, 257 long expectedBlockLen) throws IOException; 258 259 /** 260 * Recover a failed pipeline close 261 * It bumps the replica's generation stamp and finalize it if RBW replica 262 * 263 * @param b block 264 * @param newGS the new generation stamp for the replica 265 * @param expectedBlockLen the number of bytes the replica is expected to have 266 * @return the storage uuid of the replica. 267 * @throws IOException 268 */ 269 public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen 270 ) throws IOException; 271 272 /** 273 * Finalizes the block previously opened for writing using writeToBlock. 274 * The block size is what is in the parameter b and it must match the amount 275 * of data written 276 * @throws IOException 277 */ 278 public void finalizeBlock(ExtendedBlock b) throws IOException; 279 280 /** 281 * Unfinalizes the block previously opened for writing using writeToBlock. 282 * The temporary file associated with this block is deleted. 283 * @throws IOException 284 */ 285 public void unfinalizeBlock(ExtendedBlock b) throws IOException; 286 287 /** 288 * Returns one block report per volume. 289 * @param bpid Block Pool Id 290 * @return - a map of DatanodeStorage to block report for the volume. 291 */ 292 public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid); 293 294 /** 295 * Returns the cache report - the full list of cached block IDs of a 296 * block pool. 297 * @param bpid Block Pool Id 298 * @return the cache report - the full list of cached block IDs. 299 */ 300 public List<Long> getCacheReport(String bpid); 301 302 /** Does the dataset contain the block? */ 303 public boolean contains(ExtendedBlock block); 304 305 /** 306 * Check if a block is valid. 307 * 308 * @param b The block to check. 309 * @param minLength The minimum length that the block must have. May be 0. 310 * @param state If this is null, it is ignored. If it is non-null, we 311 * will check that the replica has this state. 312 * 313 * @throws ReplicaNotFoundException If the replica is not found 314 * 315 * @throws UnexpectedReplicaStateException If the replica is not in the 316 * expected state. 317 * @throws FileNotFoundException If the block file is not found or there 318 * was an error locating it. 319 * @throws EOFException If the replica length is too short. 320 * 321 * @throws IOException May be thrown from the methods called. 322 */ 323 public void checkBlock(ExtendedBlock b, long minLength, ReplicaState state) 324 throws ReplicaNotFoundException, UnexpectedReplicaStateException, 325 FileNotFoundException, EOFException, IOException; 326 327 328 /** 329 * Is the block valid? 330 * @return - true if the specified block is valid 331 */ 332 public boolean isValidBlock(ExtendedBlock b); 333 334 /** 335 * Is the block a valid RBW? 336 * @return - true if the specified block is a valid RBW 337 */ 338 public boolean isValidRbw(ExtendedBlock b); 339 340 /** 341 * Invalidates the specified blocks 342 * @param bpid Block pool Id 343 * @param invalidBlks - the blocks to be invalidated 344 * @throws IOException 345 */ 346 public void invalidate(String bpid, Block invalidBlks[]) throws IOException; 347 348 /** 349 * Caches the specified blocks 350 * @param bpid Block pool id 351 * @param blockIds - block ids to cache 352 */ 353 public void cache(String bpid, long[] blockIds); 354 355 /** 356 * Uncaches the specified blocks 357 * @param bpid Block pool id 358 * @param blockIds - blocks ids to uncache 359 */ 360 public void uncache(String bpid, long[] blockIds); 361 362 /** 363 * Determine if the specified block is cached. 364 * @param bpid Block pool id 365 * @param blockIds - block id 366 * @return true if the block is cached 367 */ 368 public boolean isCached(String bpid, long blockId); 369 370 /** 371 * Check if all the data directories are healthy 372 * @throws DiskErrorException 373 */ 374 public void checkDataDir() throws DiskErrorException; 375 376 /** 377 * Shutdown the FSDataset 378 */ 379 public void shutdown(); 380 381 /** 382 * Sets the file pointer of the checksum stream so that the last checksum 383 * will be overwritten 384 * @param b block 385 * @param outs The streams for the data file and checksum file 386 * @param checksumSize number of bytes each checksum has 387 * @throws IOException 388 */ 389 public void adjustCrcChannelPosition(ExtendedBlock b, 390 ReplicaOutputStreams outs, int checksumSize) throws IOException; 391 392 /** 393 * Checks how many valid storage volumes there are in the DataNode. 394 * @return true if more than the minimum number of valid volumes are left 395 * in the FSDataSet. 396 */ 397 public boolean hasEnoughResource(); 398 399 /** 400 * Get visible length of the specified replica. 401 */ 402 long getReplicaVisibleLength(final ExtendedBlock block) throws IOException; 403 404 /** 405 * Initialize a replica recovery. 406 * @return actual state of the replica on this data-node or 407 * null if data-node does not have the replica. 408 */ 409 public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock 410 ) throws IOException; 411 412 /** 413 * Update replica's generation stamp and length and finalize it. 414 * @return the ID of storage that stores the block 415 */ 416 public String updateReplicaUnderRecovery(ExtendedBlock oldBlock, 417 long recoveryId, long newLength) throws IOException; 418 419 /** 420 * add new block pool ID 421 * @param bpid Block pool Id 422 * @param conf Configuration 423 */ 424 public void addBlockPool(String bpid, Configuration conf) throws IOException; 425 426 /** 427 * Shutdown and remove the block pool from underlying storage. 428 * @param bpid Block pool Id to be removed 429 */ 430 public void shutdownBlockPool(String bpid) ; 431 432 /** 433 * Deletes the block pool directories. If force is false, directories are 434 * deleted only if no block files exist for the block pool. If force 435 * is true entire directory for the blockpool is deleted along with its 436 * contents. 437 * @param bpid BlockPool Id to be deleted. 438 * @param force If force is false, directories are deleted only if no 439 * block files exist for the block pool, otherwise entire 440 * directory for the blockpool is deleted along with its contents. 441 * @throws IOException 442 */ 443 public void deleteBlockPool(String bpid, boolean force) throws IOException; 444 445 /** 446 * Get {@link BlockLocalPathInfo} for the given block. 447 */ 448 public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b 449 ) throws IOException; 450 451 /** 452 * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 453 * <code>blocks</code>. 454 * 455 * @param bpid pool to query 456 * @param blockIds List of block ids for which to return metadata 457 * @return metadata Metadata for the list of blocks 458 * @throws IOException 459 */ 460 public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid, 461 long[] blockIds) throws IOException; 462 463 /** 464 * Enable 'trash' for the given dataset. When trash is enabled, files are 465 * moved to a separate trash directory instead of being deleted immediately. 466 * This can be useful for example during rolling upgrades. 467 */ 468 public void enableTrash(String bpid); 469 470 /** 471 * Restore trash 472 */ 473 public void restoreTrash(String bpid); 474 475 /** 476 * @return true when trash is enabled 477 */ 478 public boolean trashEnabled(String bpid); 479 480 /** 481 * Create a marker file indicating that a rolling upgrade is in progress. 482 */ 483 public void setRollingUpgradeMarker(String bpid) throws IOException; 484 485 /** 486 * Delete the rolling upgrade marker file if it exists. 487 * @param bpid 488 */ 489 public void clearRollingUpgradeMarker(String bpid) throws IOException; 490 491 /** 492 * submit a sync_file_range request to AsyncDiskService 493 */ 494 public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block, 495 final FileDescriptor fd, final long offset, final long nbytes, 496 final int flags); 497 498 /** 499 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task end 500 */ 501 public void onCompleteLazyPersist(String bpId, long blockId, 502 long creationTime, File[] savedFiles, FsVolumeImpl targetVolume); 503 504 /** 505 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task fail 506 */ 507 public void onFailLazyPersist(String bpId, long blockId); 508}