NonRecoverableErrorTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zookeeper.test;
import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.util.UUID;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.PortAssignment;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.server.ZKDatabase;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.quorum.QuorumPeer;
import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState;
import org.apache.zookeeper.server.quorum.QuorumPeerTestBase;
import org.apache.zookeeper.test.ClientBase.CountdownWatcher;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
/**
* This class tests the non-recoverable error behavior of quorum server.
*/
public class NonRecoverableErrorTest extends QuorumPeerTestBase {
private static final String NODE_PATH = "/noLeaderIssue";
/**
* Test case for https://issues.apache.org/jira/browse/ZOOKEEPER-2247.
* Test to verify that even after non recoverable error (error while
* writing transaction log), ZooKeeper is still available.
*/
@Test
@Timeout(value = 30)
public void testZooKeeperServiceAvailableOnLeader() throws Exception {
int SERVER_COUNT = 3;
final int[] clientPorts = new int[SERVER_COUNT];
StringBuilder sb = new StringBuilder();
String server;
for (int i = 0; i < SERVER_COUNT; i++) {
clientPorts[i] = PortAssignment.unique();
server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique()
+ ":participant;127.0.0.1:" + clientPorts[i];
sb.append(server + "\n");
}
String currentQuorumCfgSection = sb.toString();
MainThread[] mt = new MainThread[SERVER_COUNT];
for (int i = 0; i < SERVER_COUNT; i++) {
mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false);
mt[i].start();
}
// ensure server started
for (int i = 0; i < SERVER_COUNT; i++) {
assertTrue(
ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT),
"waiting for server " + i + " being up");
}
CountdownWatcher watcher = new CountdownWatcher();
ZooKeeper zk = new ZooKeeper("127.0.0.1:" + clientPorts[0], ClientBase.CONNECTION_TIMEOUT, watcher);
watcher.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
String data = "originalData";
zk.create(NODE_PATH, data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
// get information of current leader
QuorumPeer leader = getLeaderQuorumPeer(mt);
assertNotNull(leader, "Leader must have been elected by now");
// inject problem in leader
FileTxnSnapLog snapLog = leader.getActiveServer().getTxnLogFactory();
FileTxnSnapLog fileTxnSnapLogWithError = new FileTxnSnapLog(snapLog.getDataLogDir(), snapLog.getSnapDir()) {
@Override
public void commit() throws IOException {
throw new IOException("Input/output error");
}
};
ZKDatabase originalZKDatabase = leader.getActiveServer().getZKDatabase();
long leaderCurrentEpoch = leader.getCurrentEpoch();
ZKDatabase newDB = new ZKDatabase(fileTxnSnapLogWithError);
leader.getActiveServer().setZKDatabase(newDB);
try {
// do create operation, so that injected IOException is thrown
zk.create(uniqueZnode(), data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
fail("IOException is expected due to error injected to transaction log commit");
} catch (Exception e) {
// do nothing
}
// resetting watcher so that this watcher can be again used to ensure
// that the zkClient is able to re-establish connection with the
// newly elected zookeeper quorum.
watcher.reset();
waitForNewLeaderElection(leader, leaderCurrentEpoch);
// ensure server started, give enough time, so that new leader election
// takes place
for (int i = 0; i < SERVER_COUNT; i++) {
assertTrue(
ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT),
"waiting for server " + i + " being up");
}
// revert back the error
leader.getActiveServer().setZKDatabase(originalZKDatabase);
// verify that now ZooKeeper service is up and running
leader = getLeaderQuorumPeer(mt);
assertNotNull(leader, "New leader must have been elected by now");
String uniqueNode = uniqueZnode();
watcher.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
String createNode = zk.create(uniqueNode, data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
// if node is created successfully then it means that ZooKeeper service
// is available
assertEquals(uniqueNode, createNode, "Failed to create znode");
zk.close();
// stop all severs
for (int i = 0; i < SERVER_COUNT; i++) {
mt[i].shutdown();
}
}
private void waitForNewLeaderElection(QuorumPeer peer, long leaderCurrentEpoch) throws IOException, InterruptedException {
LOG.info("Waiting for new LE cycle..");
int count = 100; // giving a grace period of 10seconds
while (count > 0) {
if (leaderCurrentEpoch == peer.getCurrentEpoch()) {
Thread.sleep(100);
}
count--;
}
assertNotEquals(leaderCurrentEpoch, peer.getCurrentEpoch(), "New LE cycle must have triggered");
}
private QuorumPeer getLeaderQuorumPeer(MainThread[] mt) {
for (int i = mt.length - 1; i >= 0; i--) {
QuorumPeer quorumPeer = mt[i].getQuorumPeer();
if (null != quorumPeer && ServerState.LEADING == quorumPeer.getPeerState()) {
return quorumPeer;
}
}
return null;
}
private String uniqueZnode() {
UUID randomUUID = UUID.randomUUID();
String node = NODE_PATH + "/" + randomUUID.toString();
return node;
}
}