001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.common;
019
020import java.io.File;
021import java.io.FileOutputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.RandomAccessFile;
025import java.lang.management.ManagementFactory;
026import java.nio.channels.FileLock;
027import java.nio.channels.OverlappingFileLockException;
028import java.util.ArrayList;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Properties;
032
033import org.apache.commons.logging.Log;
034import org.apache.commons.logging.LogFactory;
035import org.apache.hadoop.classification.InterfaceAudience;
036import org.apache.hadoop.fs.FileUtil;
037import org.apache.hadoop.fs.Path;
038import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
039import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
040import org.apache.hadoop.io.nativeio.NativeIO;
041import org.apache.hadoop.io.nativeio.NativeIOException;
042import org.apache.hadoop.util.ToolRunner;
043import org.apache.hadoop.util.VersionInfo;
044
045import com.google.common.base.Charsets;
046import com.google.common.base.Preconditions;
047
048
049
050/**
051 * Storage information file.
052 * <p>
053 * Local storage information is stored in a separate file VERSION.
054 * It contains type of the node, 
055 * the storage layout version, the namespace id, and 
056 * the fs state creation time.
057 * <p>
058 * Local storage can reside in multiple directories. 
059 * Each directory should contain the same VERSION file as the others.
060 * During startup Hadoop servers (name-node and data-nodes) read their local 
061 * storage information from them.
062 * <p>
063 * The servers hold a lock for each storage directory while they run so that 
064 * other nodes were not able to startup sharing the same storage.
065 * The locks are released when the servers stop (normally or abnormally).
066 * 
067 */
068@InterfaceAudience.Private
069public abstract class Storage extends StorageInfo {
070  public static final Log LOG = LogFactory.getLog(Storage.class.getName());
071
072  // last layout version that did not support upgrades
073  public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
074  
075  // this corresponds to Hadoop-0.18
076  public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
077  protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
078  
079  /** Layout versions of 0.20.203 release */
080  public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
081
082  public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
083  public    static final String STORAGE_DIR_CURRENT   = "current";
084  public    static final String STORAGE_DIR_PREVIOUS  = "previous";
085  public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
086  public    static final String STORAGE_TMP_PREVIOUS  = "previous.tmp";
087  public    static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
088  public    static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
089  public    static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
090  
091  /**
092   * The blocksBeingWritten directory which was used in some 1.x and earlier
093   * releases.
094   */
095  public static final String STORAGE_1_BBW = "blocksBeingWritten";
096  
097  public enum StorageState {
098    NON_EXISTENT,
099    NOT_FORMATTED,
100    COMPLETE_UPGRADE,
101    RECOVER_UPGRADE,
102    COMPLETE_FINALIZE,
103    COMPLETE_ROLLBACK,
104    RECOVER_ROLLBACK,
105    COMPLETE_CHECKPOINT,
106    RECOVER_CHECKPOINT,
107    NORMAL;
108  }
109  
110  /**
111   * An interface to denote storage directory type
112   * Implementations can define a type for storage directory by implementing
113   * this interface.
114   */
115  @InterfaceAudience.Private
116  public interface StorageDirType {
117    public StorageDirType getStorageDirType();
118    public boolean isOfType(StorageDirType type);
119  }
120  
121  protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
122  
123  private class DirIterator implements Iterator<StorageDirectory> {
124    final StorageDirType dirType;
125    final boolean includeShared;
126    int prevIndex; // for remove()
127    int nextIndex; // for next()
128    
129    DirIterator(StorageDirType dirType, boolean includeShared) {
130      this.dirType = dirType;
131      this.nextIndex = 0;
132      this.prevIndex = 0;
133      this.includeShared = includeShared;
134    }
135    
136    @Override
137    public boolean hasNext() {
138      if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
139        return false;
140      if (dirType != null || !includeShared) {
141        while (nextIndex < storageDirs.size()) {
142          if (shouldReturnNextDir())
143            break;
144          nextIndex++;
145        }
146        if (nextIndex >= storageDirs.size())
147         return false;
148      }
149      return true;
150    }
151    
152    @Override
153    public StorageDirectory next() {
154      StorageDirectory sd = getStorageDir(nextIndex);
155      prevIndex = nextIndex;
156      nextIndex++;
157      if (dirType != null || !includeShared) {
158        while (nextIndex < storageDirs.size()) {
159          if (shouldReturnNextDir())
160            break;
161          nextIndex++;
162        }
163      }
164      return sd;
165    }
166    
167    @Override
168    public void remove() {
169      nextIndex = prevIndex; // restore previous state
170      storageDirs.remove(prevIndex); // remove last returned element
171      hasNext(); // reset nextIndex to correct place
172    }
173    
174    private boolean shouldReturnNextDir() {
175      StorageDirectory sd = getStorageDir(nextIndex);
176      return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
177          (includeShared || !sd.isShared());
178    }
179  }
180  
181  /**
182   * @return A list of the given File in every available storage directory,
183   * regardless of whether it might exist.
184   */
185  public List<File> getFiles(StorageDirType dirType, String fileName) {
186    ArrayList<File> list = new ArrayList<File>();
187    Iterator<StorageDirectory> it =
188      (dirType == null) ? dirIterator() : dirIterator(dirType);
189    for ( ;it.hasNext(); ) {
190      list.add(new File(it.next().getCurrentDir(), fileName));
191    }
192    return list;
193  }
194
195
196  /**
197   * Return default iterator
198   * This iterator returns all entries in storageDirs
199   */
200  public Iterator<StorageDirectory> dirIterator() {
201    return dirIterator(null);
202  }
203  
204  /**
205   * Return iterator based on Storage Directory Type
206   * This iterator selects entries in storageDirs of type dirType and returns
207   * them via the Iterator
208   */
209  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
210    return dirIterator(dirType, true);
211  }
212  
213  /**
214   * Return all entries in storageDirs, potentially excluding shared dirs.
215   * @param includeShared whether or not to include shared dirs.
216   * @return an iterator over the configured storage dirs.
217   */
218  public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
219    return dirIterator(null, includeShared);
220  }
221  
222  /**
223   * @param dirType all entries will be of this type of dir
224   * @param includeShared true to include any shared directories,
225   *        false otherwise
226   * @return an iterator over the configured storage dirs.
227   */
228  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
229      boolean includeShared) {
230    return new DirIterator(dirType, includeShared);
231  }
232  
233  public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
234    return new Iterable<StorageDirectory>() {
235      @Override
236      public Iterator<StorageDirectory> iterator() {
237        return dirIterator(dirType);
238      }
239    };
240  }
241  
242  
243  /**
244   * generate storage list (debug line)
245   */
246  public String listStorageDirectories() {
247    StringBuilder buf = new StringBuilder();
248    for (StorageDirectory sd : storageDirs) {
249      buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
250    }
251    return buf.toString();
252  }
253  
254  /**
255   * One of the storage directories.
256   */
257  @InterfaceAudience.Private
258  public static class StorageDirectory implements FormatConfirmable {
259    final File root;              // root directory
260    // whether or not this dir is shared between two separate NNs for HA, or
261    // between multiple block pools in the case of federation.
262    final boolean isShared;
263    final StorageDirType dirType; // storage dir type
264    FileLock lock;                // storage lock
265
266    private String storageUuid = null;      // Storage directory identifier.
267    
268    public StorageDirectory(File dir) {
269      // default dirType is null
270      this(dir, null, false);
271    }
272    
273    public StorageDirectory(File dir, StorageDirType dirType) {
274      this(dir, dirType, false);
275    }
276    
277    public void setStorageUuid(String storageUuid) {
278      this.storageUuid = storageUuid;
279    }
280
281    public String getStorageUuid() {
282      return storageUuid;
283    }
284
285    /**
286     * Constructor
287     * @param dir directory corresponding to the storage
288     * @param dirType storage directory type
289     * @param isShared whether or not this dir is shared between two NNs. true
290     *          disables locking on the storage directory, false enables locking
291     */
292    public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
293      this.root = dir;
294      this.lock = null;
295      this.dirType = dirType;
296      this.isShared = isShared;
297    }
298    
299    /**
300     * Get root directory of this storage
301     */
302    public File getRoot() {
303      return root;
304    }
305
306    /**
307     * Get storage directory type
308     */
309    public StorageDirType getStorageDirType() {
310      return dirType;
311    }    
312
313    public void read(File from, Storage storage) throws IOException {
314      Properties props = readPropertiesFile(from);
315      storage.setFieldsFromProperties(props, this);
316    }
317
318    /**
319     * Clear and re-create storage directory.
320     * <p>
321     * Removes contents of the current directory and creates an empty directory.
322     * 
323     * This does not fully format storage directory. 
324     * It cannot write the version file since it should be written last after  
325     * all other storage type dependent files are written.
326     * Derived storage is responsible for setting specific storage values and
327     * writing the version file to disk.
328     * 
329     * @throws IOException
330     */
331    public void clearDirectory() throws IOException {
332      File curDir = this.getCurrentDir();
333      if (curDir.exists())
334        if (!(FileUtil.fullyDelete(curDir)))
335          throw new IOException("Cannot remove current directory: " + curDir);
336      if (!curDir.mkdirs())
337        throw new IOException("Cannot create directory " + curDir);
338    }
339
340    /**
341     * Directory {@code current} contains latest files defining
342     * the file system meta-data.
343     * 
344     * @return the directory path
345     */
346    public File getCurrentDir() {
347      return new File(root, STORAGE_DIR_CURRENT);
348    }
349
350    /**
351     * File {@code VERSION} contains the following fields:
352     * <ol>
353     * <li>node type</li>
354     * <li>layout version</li>
355     * <li>namespaceID</li>
356     * <li>fs state creation time</li>
357     * <li>other fields specific for this node type</li>
358     * </ol>
359     * The version file is always written last during storage directory updates.
360     * The existence of the version file indicates that all other files have
361     * been successfully written in the storage directory, the storage is valid
362     * and does not need to be recovered.
363     * 
364     * @return the version file path
365     */
366    public File getVersionFile() {
367      return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
368    }
369
370    /**
371     * File {@code VERSION} from the {@code previous} directory.
372     * 
373     * @return the previous version file path
374     */
375    public File getPreviousVersionFile() {
376      return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
377    }
378
379    /**
380     * Directory {@code previous} contains the previous file system state,
381     * which the system can be rolled back to.
382     * 
383     * @return the directory path
384     */
385    public File getPreviousDir() {
386      return new File(root, STORAGE_DIR_PREVIOUS);
387    }
388
389    /**
390     * {@code previous.tmp} is a transient directory, which holds
391     * current file system state while the new state is saved into the new
392     * {@code current} during upgrade.
393     * If the saving succeeds {@code previous.tmp} will be moved to
394     * {@code previous}, otherwise it will be renamed back to 
395     * {@code current} by the recovery procedure during startup.
396     * 
397     * @return the directory path
398     */
399    public File getPreviousTmp() {
400      return new File(root, STORAGE_TMP_PREVIOUS);
401    }
402
403    /**
404     * {@code removed.tmp} is a transient directory, which holds
405     * current file system state while the previous state is moved into
406     * {@code current} during rollback.
407     * If the moving succeeds {@code removed.tmp} will be removed,
408     * otherwise it will be renamed back to 
409     * {@code current} by the recovery procedure during startup.
410     * 
411     * @return the directory path
412     */
413    public File getRemovedTmp() {
414      return new File(root, STORAGE_TMP_REMOVED);
415    }
416
417    /**
418     * {@code finalized.tmp} is a transient directory, which holds
419     * the {@code previous} file system state while it is being removed
420     * in response to the finalize request.
421     * Finalize operation will remove {@code finalized.tmp} when completed,
422     * otherwise the removal will resume upon the system startup.
423     * 
424     * @return the directory path
425     */
426    public File getFinalizedTmp() {
427      return new File(root, STORAGE_TMP_FINALIZED);
428    }
429
430    /**
431     * {@code lastcheckpoint.tmp} is a transient directory, which holds
432     * current file system state while the new state is saved into the new
433     * {@code current} during regular namespace updates.
434     * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
435     * {@code previous.checkpoint}, otherwise it will be renamed back to 
436     * {@code current} by the recovery procedure during startup.
437     * 
438     * @return the directory path
439     */
440    public File getLastCheckpointTmp() {
441      return new File(root, STORAGE_TMP_LAST_CKPT);
442    }
443
444    /**
445     * {@code previous.checkpoint} is a directory, which holds the previous
446     * (before the last save) state of the storage directory.
447     * The directory is created as a reference only, it does not play role
448     * in state recovery procedures, and is recycled automatically, 
449     * but it may be useful for manual recovery of a stale state of the system.
450     * 
451     * @return the directory path
452     */
453    public File getPreviousCheckpoint() {
454      return new File(root, STORAGE_PREVIOUS_CKPT);
455    }
456
457    /**
458     * Check consistency of the storage directory
459     * 
460     * @param startOpt a startup option.
461     *  
462     * @return state {@link StorageState} of the storage directory 
463     * @throws InconsistentFSStateException if directory state is not 
464     * consistent and cannot be recovered.
465     * @throws IOException
466     */
467    public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
468        throws IOException {
469      assert root != null : "root is null";
470      boolean hadMkdirs = false;
471      String rootPath = root.getCanonicalPath();
472      try { // check that storage exists
473        if (!root.exists()) {
474          // storage directory does not exist
475          if (startOpt != StartupOption.FORMAT &&
476              startOpt != StartupOption.HOTSWAP) {
477            LOG.warn("Storage directory " + rootPath + " does not exist");
478            return StorageState.NON_EXISTENT;
479          }
480          LOG.info(rootPath + " does not exist. Creating ...");
481          if (!root.mkdirs())
482            throw new IOException("Cannot create directory " + rootPath);
483          hadMkdirs = true;
484        }
485        // or is inaccessible
486        if (!root.isDirectory()) {
487          LOG.warn(rootPath + "is not a directory");
488          return StorageState.NON_EXISTENT;
489        }
490        if (!FileUtil.canWrite(root)) {
491          LOG.warn("Cannot access storage directory " + rootPath);
492          return StorageState.NON_EXISTENT;
493        }
494      } catch(SecurityException ex) {
495        LOG.warn("Cannot access storage directory " + rootPath, ex);
496        return StorageState.NON_EXISTENT;
497      }
498
499      this.lock(); // lock storage if it exists
500
501      // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory,
502      // while it also checks the layout version.
503      if (startOpt == HdfsServerConstants.StartupOption.FORMAT ||
504          (startOpt == StartupOption.HOTSWAP && hadMkdirs))
505        return StorageState.NOT_FORMATTED;
506
507      if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
508        storage.checkOldLayoutStorage(this);
509      }
510
511      // check whether current directory is valid
512      File versionFile = getVersionFile();
513      boolean hasCurrent = versionFile.exists();
514
515      // check which directories exist
516      boolean hasPrevious = getPreviousDir().exists();
517      boolean hasPreviousTmp = getPreviousTmp().exists();
518      boolean hasRemovedTmp = getRemovedTmp().exists();
519      boolean hasFinalizedTmp = getFinalizedTmp().exists();
520      boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
521
522      if (!(hasPreviousTmp || hasRemovedTmp
523          || hasFinalizedTmp || hasCheckpointTmp)) {
524        // no temp dirs - no recovery
525        if (hasCurrent)
526          return StorageState.NORMAL;
527        if (hasPrevious)
528          throw new InconsistentFSStateException(root,
529                              "version file in current directory is missing.");
530        return StorageState.NOT_FORMATTED;
531      }
532
533      if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
534          + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
535        // more than one temp dirs
536        throw new InconsistentFSStateException(root,
537                                               "too many temporary directories.");
538
539      // # of temp dirs == 1 should either recover or complete a transition
540      if (hasCheckpointTmp) {
541        return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
542                          : StorageState.RECOVER_CHECKPOINT;
543      }
544
545      if (hasFinalizedTmp) {
546        if (hasPrevious)
547          throw new InconsistentFSStateException(root,
548                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
549                                                 + "cannot exist together.");
550        return StorageState.COMPLETE_FINALIZE;
551      }
552
553      if (hasPreviousTmp) {
554        if (hasPrevious)
555          throw new InconsistentFSStateException(root,
556                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
557                                                 + " cannot exist together.");
558        if (hasCurrent)
559          return StorageState.COMPLETE_UPGRADE;
560        return StorageState.RECOVER_UPGRADE;
561      }
562      
563      assert hasRemovedTmp : "hasRemovedTmp must be true";
564      if (!(hasCurrent ^ hasPrevious))
565        throw new InconsistentFSStateException(root,
566                                               "one and only one directory " + STORAGE_DIR_CURRENT 
567                                               + " or " + STORAGE_DIR_PREVIOUS 
568                                               + " must be present when " + STORAGE_TMP_REMOVED
569                                               + " exists.");
570      if (hasCurrent)
571        return StorageState.COMPLETE_ROLLBACK;
572      return StorageState.RECOVER_ROLLBACK;
573    }
574
575    /**
576     * Complete or recover storage state from previously failed transition.
577     * 
578     * @param curState specifies what/how the state should be recovered
579     * @throws IOException
580     */
581    public void doRecover(StorageState curState) throws IOException {
582      File curDir = getCurrentDir();
583      String rootPath = root.getCanonicalPath();
584      switch(curState) {
585      case COMPLETE_UPGRADE:  // mv previous.tmp -> previous
586        LOG.info("Completing previous upgrade for storage directory " 
587                 + rootPath);
588        rename(getPreviousTmp(), getPreviousDir());
589        return;
590      case RECOVER_UPGRADE:   // mv previous.tmp -> current
591        LOG.info("Recovering storage directory " + rootPath
592                 + " from previous upgrade");
593        if (curDir.exists())
594          deleteDir(curDir);
595        rename(getPreviousTmp(), curDir);
596        return;
597      case COMPLETE_ROLLBACK: // rm removed.tmp
598        LOG.info("Completing previous rollback for storage directory "
599                 + rootPath);
600        deleteDir(getRemovedTmp());
601        return;
602      case RECOVER_ROLLBACK:  // mv removed.tmp -> current
603        LOG.info("Recovering storage directory " + rootPath
604                 + " from previous rollback");
605        rename(getRemovedTmp(), curDir);
606        return;
607      case COMPLETE_FINALIZE: // rm finalized.tmp
608        LOG.info("Completing previous finalize for storage directory "
609                 + rootPath);
610        deleteDir(getFinalizedTmp());
611        return;
612      case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
613        LOG.info("Completing previous checkpoint for storage directory " 
614                 + rootPath);
615        File prevCkptDir = getPreviousCheckpoint();
616        if (prevCkptDir.exists())
617          deleteDir(prevCkptDir);
618        rename(getLastCheckpointTmp(), prevCkptDir);
619        return;
620      case RECOVER_CHECKPOINT:  // mv lastcheckpoint.tmp -> current
621        LOG.info("Recovering storage directory " + rootPath
622                 + " from failed checkpoint");
623        if (curDir.exists())
624          deleteDir(curDir);
625        rename(getLastCheckpointTmp(), curDir);
626        return;
627      default:
628        throw new IOException("Unexpected FS state: " + curState);
629      }
630    }
631    
632    /**
633     * @return true if the storage directory should prompt the user prior
634     * to formatting (i.e if the directory appears to contain some data)
635     * @throws IOException if the SD cannot be accessed due to an IO error
636     */
637    @Override
638    public boolean hasSomeData() throws IOException {
639      // Its alright for a dir not to exist, or to exist (properly accessible)
640      // and be completely empty.
641      if (!root.exists()) return false;
642      
643      if (!root.isDirectory()) {
644        // a file where you expect a directory should not cause silent
645        // formatting
646        return true;
647      }
648      
649      if (FileUtil.listFiles(root).length == 0) {
650        // Empty dir can format without prompt.
651        return false;
652      }
653      
654      return true;
655    }
656    
657    public boolean isShared() {
658      return isShared;
659    }
660
661
662    /**
663     * Lock storage to provide exclusive access.
664     * 
665     * <p> Locking is not supported by all file systems.
666     * E.g., NFS does not consistently support exclusive locks.
667     * 
668     * <p> If locking is supported we guarantee exclusive access to the
669     * storage directory. Otherwise, no guarantee is given.
670     * 
671     * @throws IOException if locking fails
672     */
673    public void lock() throws IOException {
674      if (isShared()) {
675        LOG.info("Locking is disabled for " + this.root);
676        return;
677      }
678      FileLock newLock = tryLock();
679      if (newLock == null) {
680        String msg = "Cannot lock storage " + this.root 
681          + ". The directory is already locked";
682        LOG.info(msg);
683        throw new IOException(msg);
684      }
685      // Don't overwrite lock until success - this way if we accidentally
686      // call lock twice, the internal state won't be cleared by the second
687      // (failed) lock attempt
688      lock = newLock;
689    }
690
691    /**
692     * Attempts to acquire an exclusive lock on the storage.
693     * 
694     * @return A lock object representing the newly-acquired lock or
695     * <code>null</code> if storage is already locked.
696     * @throws IOException if locking fails.
697     */
698    @SuppressWarnings("resource")
699    FileLock tryLock() throws IOException {
700      boolean deletionHookAdded = false;
701      File lockF = new File(root, STORAGE_FILE_LOCK);
702      if (!lockF.exists()) {
703        lockF.deleteOnExit();
704        deletionHookAdded = true;
705      }
706      RandomAccessFile file = new RandomAccessFile(lockF, "rws");
707      String jvmName = ManagementFactory.getRuntimeMXBean().getName();
708      FileLock res = null;
709      try {
710        res = file.getChannel().tryLock();
711        if (null == res) {
712          throw new OverlappingFileLockException();
713        }
714        file.write(jvmName.getBytes(Charsets.UTF_8));
715        LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
716      } catch(OverlappingFileLockException oe) {
717        // Cannot read from the locked file on Windows.
718        String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
719        LOG.error("It appears that another namenode" + lockingJvmName
720            + " has already locked the storage directory");
721        file.close();
722        return null;
723      } catch(IOException e) {
724        LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, " 
725            + "ensure that the appropriate nfs lock services are running.", e);
726        file.close();
727        throw e;
728      }
729      if (res != null && !deletionHookAdded) {
730        // If the file existed prior to our startup, we didn't
731        // call deleteOnExit above. But since we successfully locked
732        // the dir, we can take care of cleaning it up.
733        lockF.deleteOnExit();
734      }
735      return res;
736    }
737
738    /**
739     * Unlock storage.
740     * 
741     * @throws IOException
742     */
743    public void unlock() throws IOException {
744      if (this.lock == null)
745        return;
746      this.lock.release();
747      lock.channel().close();
748      lock = null;
749    }
750    
751    @Override
752    public String toString() {
753      return "Storage Directory " + this.root;
754    }
755
756    /**
757     * Check whether underlying file system supports file locking.
758     * 
759     * @return <code>true</code> if exclusive locks are supported or
760     *         <code>false</code> otherwise.
761     * @throws IOException
762     * @see StorageDirectory#lock()
763     */
764    public boolean isLockSupported() throws IOException {
765      FileLock firstLock = null;
766      FileLock secondLock = null;
767      try {
768        firstLock = lock;
769        if(firstLock == null) {
770          firstLock = tryLock();
771          if(firstLock == null)
772            return true;
773        }
774        secondLock = tryLock();
775        if(secondLock == null)
776          return true;
777      } finally {
778        if(firstLock != null && firstLock != lock) {
779          firstLock.release();
780          firstLock.channel().close();
781        }
782        if(secondLock != null) {
783          secondLock.release();
784          secondLock.channel().close();
785        }
786      }
787      return false;
788    }
789  }
790
791  /**
792   * Create empty storage info of the specified type
793   */
794  protected Storage(NodeType type) {
795    super(type);
796  }
797  
798  protected Storage(StorageInfo storageInfo) {
799    super(storageInfo);
800  }
801  
802  public int getNumStorageDirs() {
803    return storageDirs.size();
804  }
805  
806  public StorageDirectory getStorageDir(int idx) {
807    return storageDirs.get(idx);
808  }
809  
810  /**
811   * @return the storage directory, with the precondition that this storage
812   * has exactly one storage directory
813   */
814  public StorageDirectory getSingularStorageDir() {
815    Preconditions.checkState(storageDirs.size() == 1);
816    return storageDirs.get(0);
817  }
818  
819  protected void addStorageDir(StorageDirectory sd) {
820    storageDirs.add(sd);
821  }
822
823  /**
824   * Returns true if the storage directory on the given directory is already
825   * loaded.
826   * @param root the root directory of a {@link StorageDirectory}
827   * @throws IOException if failed to get canonical path.
828   */
829  protected boolean containsStorageDir(File root) throws IOException {
830    for (StorageDirectory sd : storageDirs) {
831      if (sd.getRoot().getCanonicalPath().equals(root.getCanonicalPath())) {
832        return true;
833      }
834    }
835    return false;
836  }
837
838  /**
839   * Return true if the layout of the given storage directory is from a version
840   * of Hadoop prior to the introduction of the "current" and "previous"
841   * directories which allow upgrade and rollback.
842   */
843  public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
844  throws IOException;
845
846  /**
847   * Check if the given storage directory comes from a version of Hadoop
848   * prior to when the directory layout changed (ie 0.13). If this is
849   * the case, this method throws an IOException.
850   */
851  private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
852    if (isPreUpgradableLayout(sd)) {
853      checkVersionUpgradable(0);
854    }
855  }
856
857  /**
858   * Checks if the upgrade from {@code oldVersion} is supported.
859   * @param oldVersion the version of the metadata to check with the current
860   *                   version
861   * @throws IOException if upgrade is not supported
862   */
863  public static void checkVersionUpgradable(int oldVersion) 
864                                     throws IOException {
865    if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
866      String msg = "*********** Upgrade is not supported from this " +
867                   " older version " + oldVersion + 
868                   " of storage to the current version." + 
869                   " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
870                   " or a later version and then upgrade to current" +
871                   " version. Old layout version is " + 
872                   (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
873                   " and latest layout version this software version can" +
874                   " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
875                   ". ************";
876      LOG.error(msg);
877      throw new IOException(msg); 
878    }
879    
880  }
881  
882  /**
883   * Iterate over each of the {@link FormatConfirmable} objects,
884   * potentially checking with the user whether it should be formatted.
885   * 
886   * If running in interactive mode, will prompt the user for each
887   * directory to allow them to format anyway. Otherwise, returns
888   * false, unless 'force' is specified.
889   * 
890   * @param force format regardless of whether dirs exist
891   * @param interactive prompt the user when a dir exists
892   * @return true if formatting should proceed
893   * @throws IOException if some storage cannot be accessed
894   */
895  public static boolean confirmFormat(
896      Iterable<? extends FormatConfirmable> items,
897      boolean force, boolean interactive) throws IOException {
898    for (FormatConfirmable item : items) {
899      if (!item.hasSomeData())
900        continue;
901      if (force) { // Don't confirm, always format.
902        System.err.println(
903            "Data exists in " + item + ". Formatting anyway.");
904        continue;
905      }
906      if (!interactive) { // Don't ask - always don't format
907        System.err.println(
908            "Running in non-interactive mode, and data appears to exist in " +
909            item + ". Not formatting.");
910        return false;
911      }
912      if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
913        System.err.println("Format aborted in " + item);
914        return false;
915      }
916    }
917    
918    return true;
919  }
920  
921  /**
922   * Interface for classes which need to have the user confirm their
923   * formatting during NameNode -format and other similar operations.
924   * 
925   * This is currently a storage directory or journal manager.
926   */
927  @InterfaceAudience.Private
928  public interface FormatConfirmable {
929    /**
930     * @return true if the storage seems to have some valid data in it,
931     * and the user should be required to confirm the format. Otherwise,
932     * false.
933     * @throws IOException if the storage cannot be accessed at all.
934     */
935    public boolean hasSomeData() throws IOException;
936    
937    /**
938     * @return a string representation of the formattable item, suitable
939     * for display to the user inside a prompt
940     */
941    public String toString();
942  }
943  
944  /**
945   * Set common storage fields into the given properties object.
946   * Should be overloaded if additional fields need to be set.
947   * 
948   * @param props the Properties object to write into
949   */
950  protected void setPropertiesFromFields(Properties props, 
951                                         StorageDirectory sd)
952      throws IOException {
953    props.setProperty("layoutVersion", String.valueOf(layoutVersion));
954    props.setProperty("storageType", storageType.toString());
955    props.setProperty("namespaceID", String.valueOf(namespaceID));
956    // Set clusterID in version with federation support
957    if (versionSupportsFederation(getServiceLayoutFeatureMap())) {
958      props.setProperty("clusterID", clusterID);
959    }
960    props.setProperty("cTime", String.valueOf(cTime));
961  }
962
963  /**
964   * Write properties to the VERSION file in the given storage directory.
965   */
966  public void writeProperties(StorageDirectory sd) throws IOException {
967    writeProperties(sd.getVersionFile(), sd);
968  }
969  
970  public void writeProperties(File to, StorageDirectory sd) throws IOException {
971    Properties props = new Properties();
972    setPropertiesFromFields(props, sd);
973    writeProperties(to, sd, props);
974  }
975
976  public static void writeProperties(File to, StorageDirectory sd,
977      Properties props) throws IOException {
978    RandomAccessFile file = new RandomAccessFile(to, "rws");
979    FileOutputStream out = null;
980    try {
981      file.seek(0);
982      out = new FileOutputStream(file.getFD());
983      /*
984       * If server is interrupted before this line, 
985       * the version file will remain unchanged.
986       */
987      props.store(out, null);
988      /*
989       * Now the new fields are flushed to the head of the file, but file 
990       * length can still be larger then required and therefore the file can 
991       * contain whole or corrupted fields from its old contents in the end.
992       * If server is interrupted here and restarted later these extra fields
993       * either should not effect server behavior or should be handled
994       * by the server correctly.
995       */
996      file.setLength(out.getChannel().position());
997    } finally {
998      if (out != null) {
999        out.close();
1000      }
1001      file.close();
1002    }
1003  }
1004
1005  public static void rename(File from, File to) throws IOException {
1006    try {
1007      NativeIO.renameTo(from, to);
1008    } catch (NativeIOException e) {
1009      throw new IOException("Failed to rename " + from.getCanonicalPath()
1010        + " to " + to.getCanonicalPath() + " due to failure in native rename. "
1011        + e.toString());
1012    }
1013  }
1014
1015  /**
1016   * Copies a file (usually large) to a new location using native unbuffered IO.
1017   * <p>
1018   * This method copies the contents of the specified source file
1019   * to the specified destination file using OS specific unbuffered IO.
1020   * The goal is to avoid churning the file system buffer cache when copying
1021   * large files.
1022   *
1023   * We can't use FileUtils#copyFile from apache-commons-io because it
1024   * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer
1025   * internally.
1026   *
1027   * The directory holding the destination file is created if it does not exist.
1028   * If the destination file exists, then this method will delete it first.
1029   * <p>
1030   * <strong>Note:</strong> Setting <code>preserveFileDate</code> to
1031   * {@code true} tries to preserve the file's last modified
1032   * date/times using {@link File#setLastModified(long)}, however it is
1033   * not guaranteed that the operation will succeed.
1034   * If the modification operation fails, no indication is provided.
1035   *
1036   * @param srcFile  an existing file to copy, must not be {@code null}
1037   * @param destFile  the new file, must not be {@code null}
1038   * @param preserveFileDate  true if the file date of the copy
1039   *  should be the same as the original
1040   *
1041   * @throws NullPointerException if source or destination is {@code null}
1042   * @throws IOException if source or destination is invalid
1043   * @throws IOException if an IO error occurs during copying
1044   */
1045  public static void nativeCopyFileUnbuffered(File srcFile, File destFile,
1046      boolean preserveFileDate) throws IOException {
1047    if (srcFile == null) {
1048      throw new NullPointerException("Source must not be null");
1049    }
1050    if (destFile == null) {
1051      throw new NullPointerException("Destination must not be null");
1052    }
1053    if (srcFile.exists() == false) {
1054      throw new FileNotFoundException("Source '" + srcFile + "' does not exist");
1055    }
1056    if (srcFile.isDirectory()) {
1057      throw new IOException("Source '" + srcFile + "' exists but is a directory");
1058    }
1059    if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) {
1060      throw new IOException("Source '" + srcFile + "' and destination '" +
1061          destFile + "' are the same");
1062    }
1063    File parentFile = destFile.getParentFile();
1064    if (parentFile != null) {
1065      if (!parentFile.mkdirs() && !parentFile.isDirectory()) {
1066        throw new IOException("Destination '" + parentFile
1067            + "' directory cannot be created");
1068      }
1069    }
1070    if (destFile.exists()) {
1071      if (FileUtil.canWrite(destFile) == false) {
1072        throw new IOException("Destination '" + destFile
1073            + "' exists but is read-only");
1074      } else {
1075        if (destFile.delete() == false) {
1076          throw new IOException("Destination '" + destFile
1077              + "' exists but cannot be deleted");
1078        }
1079      }
1080    }
1081    try {
1082      NativeIO.copyFileUnbuffered(srcFile, destFile);
1083    } catch (NativeIOException e) {
1084      throw new IOException("Failed to copy " + srcFile.getCanonicalPath()
1085          + " to " + destFile.getCanonicalPath()
1086          + " due to failure in NativeIO#copyFileUnbuffered(). "
1087          + e.toString());
1088    }
1089    if (srcFile.length() != destFile.length()) {
1090      throw new IOException("Failed to copy full contents from '" + srcFile
1091          + "' to '" + destFile + "'");
1092    }
1093    if (preserveFileDate) {
1094      if (destFile.setLastModified(srcFile.lastModified()) == false) {
1095        if (LOG.isDebugEnabled()) {
1096          LOG.debug("Failed to preserve last modified date from'" + srcFile
1097            + "' to '" + destFile + "'");
1098        }
1099      }
1100    }
1101  }
1102
1103  /**
1104   * Recursively delete all the content of the directory first and then 
1105   * the directory itself from the local filesystem.
1106   * @param dir The directory to delete
1107   * @throws IOException
1108   */
1109  public static void deleteDir(File dir) throws IOException {
1110    if (!FileUtil.fullyDelete(dir))
1111      throw new IOException("Failed to delete " + dir.getCanonicalPath());
1112  }
1113  
1114  /**
1115   * Write all data storage files.
1116   * @throws IOException
1117   */
1118  public void writeAll() throws IOException {
1119    this.layoutVersion = getServiceLayoutVersion();
1120    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1121      writeProperties(it.next());
1122    }
1123  }
1124
1125  /**
1126   * Unlock all storage directories.
1127   * @throws IOException
1128   */
1129  public void unlockAll() throws IOException {
1130    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1131      it.next().unlock();
1132    }
1133  }
1134
1135  public static String getBuildVersion() {
1136    return VersionInfo.getRevision();
1137  }
1138
1139  public static String getRegistrationID(StorageInfo storage) {
1140    return "NS-" + Integer.toString(storage.getNamespaceID())
1141      + "-" + storage.getClusterID()
1142      + "-" + Long.toString(storage.getCTime());
1143  }
1144  
1145  public static boolean is203LayoutVersion(int layoutVersion) {
1146    for (int lv203 : LAYOUT_VERSIONS_203) {
1147      if (lv203 == layoutVersion) {
1148        return true;
1149      }
1150    }
1151    return false;
1152  }
1153}