分享

打开hdfs回收站功能,防止误删除Hadoop数据

 地势坤行者无疆 2018-07-19

.Trash这个目录在windows系统中经常能见到,同样在hdfs上也有这个回收站功能。

相信不少同学曾经在Linux 上为rm -rf  这个命令痛不欲生过。在linux上通过alias 改造将rm -rf命令,或者安装trash-cli替换rm命令,可以将文件移到回收站,避免潇洒压下Enter键导致的悲剧。

但是在Hadoop 中由于trash是默认关闭的。 这种情况下是 rm和rmr命令 是会直接删除,而不保留

Hadoop中回收站trash,默认是关闭的,在core-site.xml里增加如下配置

<property>
  <name>fs.trash.interval</name>
  <value>60*24*7</value>
  <description>
      Number of minutes between trash checkpoints. If zero, the trash feature is disabled
  </description>
</property>

value的值单位是分钟,以上配置了保留7天


--测试删除数据

# ./hadoop fs -ls /user/root/test
Found 1 items
-rw-r--r--   3 root supergroup         17 2018-07-18 15:35 /user/root/test/test.txt
# ./hadoop fs -rm -r -f /user/root/test
18/07/18 15:36:02 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://bfdhadoopcool/user/root/test' to trash at: hdfs://bfdhadoopcool/user/root/.Trash/Current

注:在HDFS上的回收站数据在 /user/$USER/.Trash/Current/user/$USER/目录下


--恢复回收站数据

# ./hadoop fs -ls /user/root/.Trash/Current/user/root/test
Found 1 items
-rw-r--r--   3 root supergroup         17 2018-07-18 15:35 /user/root/.Trash/Current/user/root/test/test.txt
# ./hdfs -mv /user/root/.Trash/Current/user/root/test/ /user/root/
# ./hadoop fs -ls /user/root/test
Found 1 items
-rw-r--r--   3 root supergroup         17 2018-07-18 15:35 /user/root/test/test.txt

--清空回收站
清空回收站命令:hdfs dfs -expunge

# ./hadoop fs -rm -r /user/root/test
18/07/18 15:44:52 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://bfdhadoopc-ool/user/root/test' to trash at: hdfs://bfdhadoopcool/user/root/.Trash/Current

# ./hadoop dfs -expunge
18/07/18 15:45:13 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
18/07/18 15:45:13 INFO fs.TrashPolicyDefault: Created trash checkpoint: /user/root/.Trash/180718154513

另外在使用hadoop直接管理文件时,请不要使用超级用户hadoop防止误操作导致的删除。另外如果想直接删除数据在hadoop中 rmr -skipTrash,表示跳过回收站直接删除。请慎用!

附上源码

class Delete {
  public static void registerCommands(CommandFactory factory) {
    factory.addClass(Rm.class, "-rm");
    factory.addClass(Rmdir.class, "-rmdir");
    factory.addClass(Rmr.class, "-rmr");
    factory.addClass(Expunge.class, "-expunge");
  }

  /** remove non-directory paths */
  public static class Rm extends FsCommand {
    public static final String NAME = "rm";
    public static final String USAGE = "[-f] [-r|-R] [-skipTrash] <src> ...";
    public static final String DESCRIPTION =
      "Delete all files that match the specified file pattern. " +
      "Equivalent to the Unix command \"rm <src>\"\n" +
      "-skipTrash: option bypasses trash, if enabled, and immediately " +
      "deletes <src>\n" +
      "-f: If the file does not exist, do not display a diagnostic " +
      "message or modify the exit status to reflect an error.\n" +
      "-[rR]:  Recursively deletes directories";

    private boolean skipTrash = false;
    private boolean deleteDirs = false;
    private boolean ignoreFNF = false;
    
    @Override
    protected void processOptions(LinkedList<String> args) throws IOException {
      CommandFormat cf = new CommandFormat(
          1, Integer.MAX_VALUE, "f", "r", "R", "skipTrash");
      cf.parse(args);
      ignoreFNF = cf.getOpt("f");
      deleteDirs = cf.getOpt("r") || cf.getOpt("R");
      skipTrash = cf.getOpt("skipTrash");
    }

    @Override
    protected List<PathData> expandArgument(String arg) throws IOException {
      try {
        return super.expandArgument(arg);
      } catch (PathNotFoundException e) {
        if (!ignoreFNF) {
          throw e;
        }
        // prevent -f on a non-existent glob from failing
        return new LinkedList<PathData>();
      }
    }

    @Override
    protected void processNonexistentPath(PathData item) throws IOException {
      if (!ignoreFNF) super.processNonexistentPath(item);
    }

    @Override
    protected void processPath(PathData item) throws IOException {
      if (item.stat.isDirectory() && !deleteDirs) {
        throw new PathIsDirectoryException(item.toString());
      }

      // TODO: if the user wants the trash to be used but there is any
      // problem (ie. creating the trash dir, moving the item to be deleted,
      // etc), then the path will just be deleted because moveToTrash returns
      // false and it falls thru to fs.delete.  this doesn't seem right
      if (moveToTrash(item)) {
        return;
      }
      if (!item.fs.delete(item.path, deleteDirs)) {
        throw new PathIOException(item.toString());
      }
      out.println("Deleted " + item);
    }

    private boolean moveToTrash(PathData item) throws IOException {
      boolean success = false;
      if (!skipTrash) {
        try {
          success = Trash.moveToAppropriateTrash(item.fs, item.path, getConf());
        } catch(FileNotFoundException fnfe) {
          throw fnfe;
        } catch (IOException ioe) {
          String msg = ioe.getMessage();
          if (ioe.getCause() != null) {
            msg += ": " + ioe.getCause().getMessage();
  }
          throw new IOException(msg + ". Consider using -skipTrash option", ioe);
        }
      }
      return success;
    }
  }
  
  /** remove any path */
  static class Rmr extends Rm {
    public static final String NAME = "rmr";
    
    @Override
    protected void processOptions(LinkedList<String> args) throws IOException {
      args.addFirst("-r");
      super.processOptions(args);
    }

    @Override
    public String getReplacementCommand() {
      return "rm -r";
    }
  }

  /** remove only empty directories */
  static class Rmdir extends FsCommand {
    public static final String NAME = "rmdir";
    public static final String USAGE =
      "[--ignore-fail-on-non-empty] <dir> ...";
    public static final String DESCRIPTION =
      "Removes the directory entry specified by each directory argument, " +
      "provided it is empty.\n"; 
    
    private boolean ignoreNonEmpty = false;
    
    @Override
    protected void processOptions(LinkedList<String> args) throws IOException {
      CommandFormat cf = new CommandFormat(
          1, Integer.MAX_VALUE, "-ignore-fail-on-non-empty");
      cf.parse(args);
      ignoreNonEmpty = cf.getOpt("-ignore-fail-on-non-empty");
    }

    @Override
    protected void processPath(PathData item) throws IOException {
      if (!item.stat.isDirectory()) {
        throw new PathIsNotDirectoryException(item.toString());
      }      
      if (item.fs.listStatus(item.path).length == 0) {
        if (!item.fs.delete(item.path, false)) {
          throw new PathIOException(item.toString());
        }
      } else if (!ignoreNonEmpty) {
        throw new PathIsNotEmptyDirectoryException(item.toString());
      }
    }
  }

  /** empty the trash */
  static class Expunge extends FsCommand {
    public static final String NAME = "expunge";
    public static final String USAGE = "";
    public static final String DESCRIPTION = "Empty the Trash";

    // TODO: should probably allow path arguments for the filesystems
    @Override
    protected void processOptions(LinkedList<String> args) throws IOException {
      CommandFormat cf = new CommandFormat(0, 0);
      cf.parse(args);
    }

    @Override
    protected void processArguments(LinkedList<PathData> args)
    throws IOException {
      Trash trash = new Trash(getConf());
      trash.expunge();
      trash.checkpoint();    
    }
  }
}

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多