EmptiedSnapshotRecoveryTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zookeeper.test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.util.List;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.PortAssignment;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZKTestCase;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.server.ServerCnxnFactory;
import org.apache.zookeeper.server.SyncRequestProcessor;
import org.apache.zookeeper.server.ZooKeeperServer;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** If snapshots are corrupted to the empty file or deleted, Zookeeper should
 *  not proceed to read its transaction log files
 *  Test that zxid == -1 in the presence of emptied/deleted snapshots
 */
public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher {

    private static final Logger LOG = LoggerFactory.getLogger(RestoreCommittedLogTest.class);
    private static String HOSTPORT = "127.0.0.1:" + PortAssignment.unique();
    private static final int CONNECTION_TIMEOUT = 3000;
    private static final int N_TRANSACTIONS = 150;
    private static final int SNAP_COUNT = 100;

    public void runTest(boolean leaveEmptyFile, boolean trustEmptySnap) throws Exception {
        File tmpSnapDir = ClientBase.createTmpDir();
        File tmpLogDir = ClientBase.createTmpDir();
        ClientBase.setupTestEnv();
        ZooKeeperServer zks = new ZooKeeperServer(tmpSnapDir, tmpLogDir, 3000);
        SyncRequestProcessor.setSnapCount(SNAP_COUNT);
        final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
        ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
        f.startup(zks);
        assertTrue(ClientBase.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT), "waiting for server being up ");
        ZooKeeper zk = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, this);
        try {
            for (int i = 0; i < N_TRANSACTIONS; i++) {
                zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
            }
        } finally {
            zk.close();
        }
        f.shutdown();
        zks.shutdown();
        assertTrue(ClientBase.waitForServerDown(HOSTPORT, CONNECTION_TIMEOUT), "waiting for server to shutdown");

        // start server again with intact database
        zks = new ZooKeeperServer(tmpSnapDir, tmpLogDir, 3000);
        zks.startdata();
        long zxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
        LOG.info("After clean restart, zxid = {}", zxid);
        assertTrue(zxid > 0, "zxid > 0");
        zks.shutdown();

        // Make all snapshots empty
        FileTxnSnapLog txnLogFactory = zks.getTxnLogFactory();
        List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
        assertTrue(snapshots.size() > 0, "We have a snapshot to corrupt");
        for (File file : snapshots) {
            if (leaveEmptyFile) {
                new PrintWriter(file).close();
            } else {
                file.delete();
            }
        }

        if (trustEmptySnap) {
            System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true");
        }
        // start server again with corrupted database
        zks = new ZooKeeperServer(tmpSnapDir, tmpLogDir, 3000);
        try {
            zks.startdata();
            long currentZxid = zks.getZKDatabase().getDataTreeLastProcessedZxid();
            if (!trustEmptySnap) {
                fail("Should have gotten exception for corrupted database");
            }
            assertEquals(currentZxid, zxid, "zxid mismatch after restoring database");
        } catch (IOException e) {
            // expected behavior
            if (trustEmptySnap) {
                fail("Should not get exception for empty database");
            }
        } finally {
            if (trustEmptySnap) {
                System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
            }
        }

        zks.shutdown();
    }

    /**
     * Test resilience to empty Snapshots
     * @throws Exception an exception might be thrown here
     */
    @Test
    public void testRestoreWithEmptySnapFiles() throws Exception {
        runTest(true, false);
    }

    /**
     * Test resilience to deletion of Snapshots
     * @throws Exception an exception might be thrown here
     */
    @Test
    public void testRestoreWithNoSnapFiles() throws Exception {
        runTest(false, false);
    }

    @Test
    public void testRestoreWithTrustedEmptySnapFiles() throws Exception {
        runTest(false, true);
    }

    @Test
    public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws Exception {
        QuorumUtil qu = new QuorumUtil(1);
        try {
            qu.startAll();
            String connString = qu.getConnectionStringForServer(1);
            try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
                for (int i = 0; i < N_TRANSACTIONS; i++) {
                    zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
                }
            }
            int leaderIndex = qu.getLeaderServer();
            //Shut down the cluster and delete the snapshots from the followers
            for (int i = 1; i <= qu.ALL; i++) {
                qu.shutdown(i);
                if (i != leaderIndex) {
                    FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
                    List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
                    assertTrue(snapshots.size() > 0, "We have a snapshot to corrupt");
                    for (File file : snapshots) {
                        Files.delete(file.toPath());
                    }
                    assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0);
                }
            }
            //Start while trusting empty snapshots, verify that the followers save snapshots
            System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true");
            qu.start(leaderIndex);
            for (int i = 1; i <= qu.ALL; i++) {
                if (i != leaderIndex) {
                    qu.restart(i);
                    FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
                    List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
                    assertTrue(snapshots.size() > 0, "A snapshot should have been created on follower " + i);
                }
            }
            //Check that the created nodes are still there
            try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
                for (int i = 0; i < N_TRANSACTIONS; i++) {
                    assertNotNull(zk.exists("/node-" + i, false));
                }
            }
        } finally {
            System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
            qu.tearDown();
        }
    }

    public void process(WatchedEvent event) {
        // do nothing
    }

}