QuorumCnxManagerSocketConnectionTimeoutTest.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zookeeper.server.quorum;

import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketAddress;
import java.net.SocketTimeoutException;
import org.apache.zookeeper.ZKTestCase;
import org.apache.zookeeper.test.ClientBase;
import org.apache.zookeeper.test.QuorumUtil;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class QuorumCnxManagerSocketConnectionTimeoutTest extends ZKTestCase {

    private static final Logger LOG = LoggerFactory.getLogger(QuorumCnxManagerSocketConnectionTimeoutTest.class);
    private QuorumUtil qu;

    @BeforeEach
    public void setUp() throws Exception {
        // starting a 3 node ensemble without observers
        qu = new QuorumUtil(1, 2);
        qu.startAll();
    }

    /**
     * Testing an error case reported in ZOOKEEPER-3756:
     *
     * When a new leader election happens after a ZooKeeper server restarted, in Kubernetes
     * the rest of the servers can not initiate connection to the restarted one. But they
     * get SocketTimeoutException instead of immediate IOException. The Leader Election was
     * time-outing quicker than the socket.connect call, so we ended up with cycles of broken
     * leader elections.
     *
     * The fix was to make the connection initiation asynchronous, so one 'broken' connection
     * doesn't make the whole leader election to be blocked, even in case of SocketTimeoutException.
     *
     * @throws Exception
     */
    @Test
    public void testSocketConnectionTimeoutDuringConnectingToElectionAddress() throws Exception {

        int leaderId = qu.getLeaderServer();

        // use a custom socket factory that will cause timeout instead of connecting to the
        // leader election port of the current leader
        final InetSocketAddress leaderElectionAddress =
            qu.getLeaderQuorumPeer().getElectionAddress().getOne();
        QuorumCnxManager.setSocketFactory(() -> new SocketStub(leaderElectionAddress));

        qu.shutdown(leaderId);

        assertTrue(ClientBase.waitForServerDown("127.0.0.1:" + qu.getPeer(leaderId).clientPort, ClientBase.CONNECTION_TIMEOUT),
                "Timeout during waiting for current leader to go down");

        String errorMessage = "No new leader was elected";
        waitFor(errorMessage, () -> qu.leaderExists() && qu.getLeaderServer() != leaderId, 15);
    }

    final class SocketStub extends Socket {

        private final InetSocketAddress addressToTimeout;

        SocketStub(InetSocketAddress addressToTimeout) {
            this.addressToTimeout = addressToTimeout;
        }

        @Override
        public void connect(SocketAddress endpoint, int timeout) throws IOException {
            if (addressToTimeout.equals(endpoint)) {
                try {
                    Thread.sleep(timeout);
                } catch (InterruptedException e) {
                    LOG.warn("interrupted SocketStub.connect", e);
                }
                throw new SocketTimeoutException("timeout reached in SocketStub.connect()");
            }

            super.connect(endpoint, timeout);
        }
    }

    @AfterEach
    public void tearDown() throws Exception {
        qu.shutdownAll();
        QuorumCnxManager.setSocketFactory(QuorumCnxManager.DEFAULT_SOCKET_FACTORY);
    }

}