TestDebugAdmin.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.tools;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.util.StripedBlockUtil;
import org.apache.hadoop.io.IOUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
import java.util.List;
import java.util.Random;

import static org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class TestDebugAdmin {

  static private final String TEST_ROOT_DIR =
      new File(System.getProperty("test.build.data", "/tmp"),
          TestDebugAdmin.class.getSimpleName()).getAbsolutePath();
  private Configuration conf = new Configuration();
  private MiniDFSCluster cluster;
  private DebugAdmin admin;

  @BeforeEach
  public void setUp() throws Exception {
    final File testRoot = new File(TEST_ROOT_DIR);
    testRoot.delete();
    testRoot.mkdirs();
    admin = new DebugAdmin(conf);
  }

  @AfterEach
  public void tearDown() throws Exception {
    if (cluster != null) {
      cluster.shutdown();
      cluster = null;
    }
  }

  private String runCmd(String[] cmd) throws Exception {
    final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
    final PrintStream out = new PrintStream(bytes);
    final PrintStream oldErr = System.err;
    final PrintStream oldOut = System.out;
    System.setErr(out);
    System.setOut(out);
    int ret;
    try {
      ret = admin.run(cmd);
    } finally {
      System.setErr(oldErr);
      System.setOut(oldOut);
      IOUtils.closeStream(out);
    }
    return "ret: " + ret + ", " +
        bytes.toString().replaceAll(System.lineSeparator(), "");
  }

  @Test
  @Timeout(value = 60)
  public void testRecoverLease() throws Exception {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    assertEquals("ret: 1, You must supply a -path argument to recoverLease.",
        runCmd(new String[]{"recoverLease", "-retries", "1"}));
    DistributedFileSystem fs = cluster.getFileSystem();
    FSDataOutputStream out = fs.create(new Path("/foo"));
    out.write(123);
    out.close();
    assertEquals("ret: 0, recoverLease SUCCEEDED on /foo",
        runCmd(new String[]{"recoverLease", "-path", "/foo"}));
  }

  @Test
  @Timeout(value = 60)
  public void testVerifyMetaCommand() throws Exception {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    DataNode datanode = cluster.getDataNodes().get(0);
    DFSTestUtil.createFile(fs, new Path("/bar"), 1234, (short) 1, 0xdeadbeef);
    FsDatasetSpi<?> fsd = datanode.getFSDataset();
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, new Path("/bar"));
    File blockFile = getBlockFile(fsd,
        block.getBlockPoolId(), block.getLocalBlock());
    assertEquals("ret: 1, You must specify a meta file with -meta", runCmd(
        new String[] {"verifyMeta", "-block", blockFile.getAbsolutePath()}));
    File metaFile = getMetaFile(fsd,
        block.getBlockPoolId(), block.getLocalBlock());
    assertEquals("ret: 0, Checksum type: " +
          "DataChecksum(type=CRC32C, chunkSize=512)",
        runCmd(new String[]{"verifyMeta",
            "-meta", metaFile.getAbsolutePath()}));
    assertEquals("ret: 0, Checksum type: " +
          "DataChecksum(type=CRC32C, chunkSize=512)" +
          "Checksum verification succeeded on block file " +
          blockFile.getAbsolutePath(),
        runCmd(new String[]{"verifyMeta",
            "-meta", metaFile.getAbsolutePath(),
            "-block", blockFile.getAbsolutePath()})
    );
  }

  @Test
  @Timeout(value = 60)
  public void testComputeMetaCommand() throws Exception {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    DataNode datanode = cluster.getDataNodes().get(0);
    DFSTestUtil.createFile(fs, new Path("/bar"), 1234, (short) 1, 0xdeadbeef);
    FsDatasetSpi<?> fsd = datanode.getFSDataset();
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, new Path("/bar"));
    File blockFile = getBlockFile(fsd,
        block.getBlockPoolId(), block.getLocalBlock());

    assertEquals("ret: 1, computeMeta -block <block-file> -out "
            + "<output-metadata-file>  Compute HDFS metadata from the specified"
            + " block file, and save it to  the specified output metadata file."
            + "**NOTE: Use at your own risk! If the block file is corrupt"
            + " and you overwrite it's meta file,  it will show up"
            + " as good in HDFS, but you can't read the data."
            + " Only use as a last measure, and when you are 100% certain"
            + " the block file is good.",
        runCmd(new String[] {"computeMeta"}));
    assertEquals("ret: 2, You must specify a block file with -block",
        runCmd(new String[] {"computeMeta", "-whatever"}));
    assertEquals("ret: 3, Block file <bla> does not exist or is not a file",
        runCmd(new String[] {"computeMeta", "-block", "bla"}));
    assertEquals("ret: 4, You must specify a output file with -out", runCmd(
        new String[] {"computeMeta", "-block", blockFile.getAbsolutePath()}));
    assertEquals("ret: 5, output file already exists!", runCmd(
        new String[] {"computeMeta", "-block", blockFile.getAbsolutePath(),
            "-out", blockFile.getAbsolutePath()}));

    File outFile = new File(TEST_ROOT_DIR, "out.meta");
    outFile.delete();
    assertEquals("ret: 0, Checksum calculation succeeded on block file " +
        blockFile.getAbsolutePath() + " saved metadata to meta file " +
        outFile.getAbsolutePath(), runCmd(new String[] {"computeMeta", "-block",
        blockFile.getAbsolutePath(), "-out", outFile.getAbsolutePath()}));

    assertTrue(outFile.exists());
    assertTrue(outFile.length() > 0);
  }

  @Test
  @Timeout(value = 60)
  public void testRecoverLeaseforFileNotFound() throws Exception {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    assertTrue(runCmd(new String[] {
        "recoverLease", "-path", "/foo", "-retries", "2" }).contains(
        "Giving up on recoverLease for /foo after 1 try"));
  }

  @Test
  @Timeout(value = 60)
  public void testVerifyECCommand() throws Exception {
    final ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByID(
        SystemErasureCodingPolicies.RS_3_2_POLICY_ID);
    cluster = DFSTestUtil.setupCluster(conf, 6, 5, 0);
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();

    assertEquals("ret: 1, verifyEC -file <file> [-blockId <blk_Id>] " +
        "[-skipFailureBlocks]  -file Verify HDFS erasure coding on all block groups of the file." +
        "  -skipFailureBlocks specify will skip any block group failures during verify," +
        "  and continues verify all block groups of the file," +
        "  the default is not to skip failure blocks." +
        "  -blockId specify blk_Id to verify for a specific one block group.",
        runCmd(new String[]{"verifyEC"}));

    assertEquals("ret: 1, File /bar does not exist.",
        runCmd(new String[]{"verifyEC", "-file", "/bar"}));

    fs.create(new Path("/bar")).close();
    assertEquals("ret: 1, File /bar is not erasure coded.",
        runCmd(new String[]{"verifyEC", "-file", "/bar"}));


    final Path ecDir = new Path("/ec");
    fs.mkdir(ecDir, FsPermission.getDirDefault());
    fs.enableErasureCodingPolicy(ecPolicy.getName());
    fs.setErasureCodingPolicy(ecDir, ecPolicy.getName());

    assertEquals("ret: 1, File /ec is not a regular file.",
        runCmd(new String[]{"verifyEC", "-file", "/ec"}));

    fs.create(new Path(ecDir, "foo"));
    assertEquals("ret: 1, File /ec/foo is not closed.",
        runCmd(new String[]{"verifyEC", "-file", "/ec/foo"}));

    final short repl = 1;
    final long k = 1024;
    final long m = k * k;
    final long seed = 0x1234567L;
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_65535"), 65535, repl, seed);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_65535"})
        .contains("All EC block group status: OK"));
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_256k"), 256 * k, repl, seed);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_256k"})
        .contains("All EC block group status: OK"));
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_1m"), m, repl, seed);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_1m"})
        .contains("All EC block group status: OK"));
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_2m"), 2 * m, repl, seed);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_2m"})
        .contains("All EC block group status: OK"));
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_3m"), 3 * m, repl, seed);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_3m"})
        .contains("All EC block group status: OK"));
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_5m"), 5 * m, repl, seed);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_5m"})
        .contains("All EC block group status: OK"));
    DFSTestUtil.createFile(fs, new Path(ecDir, "foo_6m"), (int) k, 6 * m, m, repl, seed);
    assertEquals("ret: 0, Checking EC block group: blk_x;Status: OK" +
            "Checking EC block group: blk_x;Status: OK" +
            "All EC block group status: OK",
        runCmd(new String[]{"verifyEC", "-file", "/ec/foo_6m"})
            .replaceAll("blk_-[0-9]+", "blk_x;"));

    Path corruptFile = new Path(ecDir, "foo_corrupt");
    DFSTestUtil.createFile(fs, corruptFile, 5841961, repl, seed);
    List<LocatedBlock> blocks = DFSTestUtil.getAllBlocks(fs, corruptFile);
    assertEquals(1, blocks.size());
    LocatedStripedBlock blockGroup = (LocatedStripedBlock) blocks.get(0);
    LocatedBlock[] indexedBlocks = StripedBlockUtil.parseStripedBlockGroup(blockGroup,
        ecPolicy.getCellSize(), ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits());
    // Try corrupt block 0 in block group.
    LocatedBlock toCorruptLocatedBlock = indexedBlocks[0];
    ExtendedBlock toCorruptBlock = toCorruptLocatedBlock.getBlock();
    DataNode datanode = cluster.getDataNode(toCorruptLocatedBlock.getLocations()[0].getIpcPort());
    File blockFile = getBlockFile(datanode.getFSDataset(),
        toCorruptBlock.getBlockPoolId(), toCorruptBlock.getLocalBlock());
    File metaFile = getMetaFile(datanode.getFSDataset(),
        toCorruptBlock.getBlockPoolId(), toCorruptBlock.getLocalBlock());
    // Write error bytes to block file and re-generate meta checksum.
    byte[] errorBytes = new byte[2097152];
    new Random(seed).nextBytes(errorBytes);
    FileUtils.writeByteArrayToFile(blockFile, errorBytes);
    metaFile.delete();
    runCmd(new String[]{"computeMeta", "-block", blockFile.getAbsolutePath(),
        "-out", metaFile.getAbsolutePath()});
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_corrupt"})
        .contains("Status: ERROR, message: EC compute result not match."));

    // Specify -blockId.
    Path newFile = new Path(ecDir, "foo_new");
    DFSTestUtil.createFile(fs, newFile, (int) k, 6 * m, m, repl, seed);
    blocks = DFSTestUtil.getAllBlocks(fs, newFile);
    assertEquals(2, blocks.size());
    blockGroup = (LocatedStripedBlock) blocks.get(0);
    String blockName = blockGroup.getBlock().getBlockName();
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_new", "-blockId", blockName})
        .contains("ret: 0, Checking EC block group: " + blockName + "Status: OK"));

    // Specify -verifyAllFailures.
    indexedBlocks = StripedBlockUtil.parseStripedBlockGroup(blockGroup,
        ecPolicy.getCellSize(), ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits());
    // Try corrupt block 0 in block group.
    toCorruptLocatedBlock = indexedBlocks[0];
    toCorruptBlock = toCorruptLocatedBlock.getBlock();
    datanode = cluster.getDataNode(toCorruptLocatedBlock.getLocations()[0].getIpcPort());
    blockFile = getBlockFile(datanode.getFSDataset(),
        toCorruptBlock.getBlockPoolId(), toCorruptBlock.getLocalBlock());
    metaFile = getMetaFile(datanode.getFSDataset(),
        toCorruptBlock.getBlockPoolId(), toCorruptBlock.getLocalBlock());
    metaFile.delete();
    // Write error bytes to block file and re-generate meta checksum.
    errorBytes = new byte[1048576];
    new Random(0x12345678L).nextBytes(errorBytes);
    FileUtils.writeByteArrayToFile(blockFile, errorBytes);
    runCmd(new String[]{"computeMeta", "-block", blockFile.getAbsolutePath(),
        "-out", metaFile.getAbsolutePath()});
    // VerifyEC and set skipFailureBlocks.
    LocatedStripedBlock blockGroup2 = (LocatedStripedBlock) blocks.get(1);
    assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_new", "-skipFailureBlocks"})
        .contains("ret: 1, Checking EC block group: " + blockGroup.getBlock().getBlockName() +
            "Status: ERROR, message: EC compute result not match." +
            "Checking EC block group: " + blockGroup2.getBlock().getBlockName() + "Status: OK"));
  }

}