QuorumRestartTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zookeeper.test;

import static org.apache.zookeeper.client.ZKClientConfig.ZOOKEEPER_CLIENT_CNXN_SOCKET;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.zookeeper.ZKTestCase;
import org.apache.zookeeper.server.ServerCnxnFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class QuorumRestartTest extends ZKTestCase {

    private static final Logger LOG = LoggerFactory.getLogger(QuorumRestartTest.class);
    private QuorumUtil qu;

    @BeforeEach
    public void setUp() throws Exception {
        System.setProperty(ZOOKEEPER_CLIENT_CNXN_SOCKET, "org.apache.zookeeper.ClientCnxnSocketNetty");
        System.setProperty(ServerCnxnFactory.ZOOKEEPER_SERVER_CNXN_FACTORY, "org.apache.zookeeper.server.NettyServerCnxnFactory");

        // starting a 3 node ensemble without observers
        qu = new QuorumUtil(1, 2);
        qu.startAll();
    }


    /**
     * A basic test for rolling restart. We are restarting the ZooKeeper servers one by one,
     * starting from the first server. We always make sure that all the nodes joined to the
     * Quorum before moving forward.
     * @throws Exception
     */
    @Test
    public void testRollingRestart() throws Exception {
        for (int serverToRestart = 1; serverToRestart <= 3; serverToRestart++) {
            LOG.info("***** restarting: " + serverToRestart);
            qu.shutdown(serverToRestart);

            assertTrue(ClientBase.waitForServerDown("127.0.0.1:" + qu.getPeer(serverToRestart).clientPort, ClientBase.CONNECTION_TIMEOUT),
                    String.format("Timeout during waiting for server %d to go down", serverToRestart));

            qu.restart(serverToRestart);

            final String errorMessage = "Not all the quorum members are connected after restarting server " + serverToRestart;
            waitFor(errorMessage, () -> qu.allPeersAreConnected(), 30);

            LOG.info("***** Restart {} succeeded", serverToRestart);
        }
    }

    /**
     * Testing one of the errors reported in ZOOKEEPER-2164, when some servers can not
     * rejoin to the Quorum after restarting the servers backwards
     * @throws Exception
     */
    @Test
    public void testRollingRestartBackwards() throws Exception {
        for (int serverToRestart = 3; serverToRestart >= 1; serverToRestart--) {
            LOG.info("***** restarting: " + serverToRestart);
            qu.shutdown(serverToRestart);

            assertTrue(ClientBase.waitForServerDown("127.0.0.1:" + qu.getPeer(serverToRestart).clientPort, ClientBase.CONNECTION_TIMEOUT),
                    String.format("Timeout during waiting for server %d to go down", serverToRestart));

            qu.restart(serverToRestart);

            final String errorMessage = "Not all the quorum members are connected after restarting server " + serverToRestart;
            waitFor(errorMessage, () -> qu.allPeersAreConnected(), 30);

            LOG.info("***** Restart {} succeeded", serverToRestart);
        }
    }


    /**
     * Testing one of the errors reported in ZOOKEEPER-2164, when some servers can not
     * rejoin to the Quorum after restarting the current leader multiple times
     * @throws Exception
     */
    @Test
    public void testRestartingLeaderMultipleTimes() throws Exception {
        for (int restartCount = 1; restartCount <= 3; restartCount++) {
            int leaderId = qu.getLeaderServer();
            LOG.info("***** new leader: " + leaderId);
            qu.shutdown(leaderId);

            assertTrue(ClientBase.waitForServerDown("127.0.0.1:" + qu.getPeer(leaderId).clientPort, ClientBase.CONNECTION_TIMEOUT),
                    "Timeout during waiting for current leader to go down");

            String errorMessage = "No new leader was elected";
            waitFor(errorMessage, () -> qu.leaderExists() && qu.getLeaderServer() != leaderId, 30);

            qu.restart(leaderId);

            errorMessage = "Not all the quorum members are connected after restarting the old leader";
            waitFor(errorMessage, () -> qu.allPeersAreConnected(), 30);

            LOG.info("***** Leader Restart {} succeeded", restartCount);
        }
    }


    @AfterEach
    public void tearDown() throws Exception {
        qu.shutdownAll();
        System.clearProperty(ZOOKEEPER_CLIENT_CNXN_SOCKET);
        System.clearProperty(ServerCnxnFactory.ZOOKEEPER_SERVER_CNXN_FACTORY);
    }




}