TestQueueState.java
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.IOException;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.QueueState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRMAppSubmissionData;
import org.apache.hadoop.yarn.server.resourcemanager.MockRMAppSubmitter;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
/**
* Test Queue States.
*/
public class TestQueueState {
private static final String Q1 = "q1";
private static final String Q2 = "q2";
private static final String Q3 = "q3";
private final static QueuePath ROOT_PATH =
new QueuePath(CapacitySchedulerConfiguration.ROOT);
private final static QueuePath Q1_PATH =
new QueuePath(CapacitySchedulerConfiguration.ROOT + "." + Q1);
private final static QueuePath Q2_PATH =
new QueuePath(Q1_PATH + "." + Q2);
private final static QueuePath Q3_PATH =
new QueuePath(Q1_PATH + "." + Q3);
private CapacityScheduler cs;
private YarnConfiguration conf;
@Test
@Timeout(value = 15)
public void testQueueState() throws IOException {
CapacitySchedulerConfiguration csConf =
new CapacitySchedulerConfiguration();
csConf.setQueues(ROOT_PATH, new String[] {Q1});
csConf.setQueues(Q1_PATH, new String[] {Q2});
csConf.setCapacity(Q1_PATH, 100);
csConf.setCapacity(Q2_PATH, 100);
conf = new YarnConfiguration(csConf);
cs = new CapacityScheduler();
RMContext rmContext = TestUtils.getMockRMContext();
cs.setConf(conf);
cs.setRMContext(rmContext);
cs.init(conf);
//by default, the state of both queues should be RUNNING
assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q2).getState());
// Change the state of Q1 to STOPPED, and re-initiate the CS
csConf.setState(Q1_PATH, QueueState.STOPPED);
conf = new YarnConfiguration(csConf);
cs.reinitialize(conf, rmContext);
// The state of Q1 and its child: Q2 should be STOPPED
assertEquals(QueueState.STOPPED, cs.getQueue(Q1).getState());
assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
// Change the state of Q1 to RUNNING, and change the state of Q2 to STOPPED
csConf.setState(Q1_PATH, QueueState.RUNNING);
csConf.setState(Q2_PATH, QueueState.STOPPED);
conf = new YarnConfiguration(csConf);
// reinitialize the CS, the operation should be successful
cs.reinitialize(conf, rmContext);
assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
// Change the state of Q1 to STOPPED, and change the state of Q2 to RUNNING
csConf.setState(Q1_PATH, QueueState.STOPPED);
csConf.setState(Q2_PATH, QueueState.RUNNING);
conf = new YarnConfiguration(csConf);
// reinitialize the CS, the operation should be failed.
try {
cs.reinitialize(conf, rmContext);
fail("Should throw an Exception.");
} catch (Exception ex) {
assertTrue(ex.getCause().getMessage().contains(
"The parent queue:root.q1 cannot be STOPPED as the child" +
" queue:root.q1.q2 is in RUNNING state."));
}
}
@Test
@Timeout(value = 15)
public void testQueueStateTransit() throws Exception {
CapacitySchedulerConfiguration csConf =
new CapacitySchedulerConfiguration();
csConf.setQueues(ROOT_PATH, new String[] {Q1});
csConf.setQueues(Q1_PATH, new String[] {Q2, Q3});
csConf.setCapacity(Q1_PATH, 100);
csConf.setCapacity(Q2_PATH, 50);
csConf.setCapacity(Q3_PATH, 50);
conf = new YarnConfiguration(csConf);
cs = new CapacityScheduler();
RMContext rmContext = TestUtils.getMockRMContext();
cs.setConf(conf);
cs.setRMContext(rmContext);
cs.init(conf);
//by default, the state of ALL queues should be RUNNING
assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q2).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
// submit an application to Q2
ApplicationId appId = ApplicationId.newInstance(
System.currentTimeMillis(), 1);
String userName = "testUser";
cs.getQueue(Q2).submitApplication(appId, userName, Q2);
FiCaSchedulerApp app = getMockApplication(appId, userName,
Resources.createResource(4, 0));
cs.getQueue(Q2).submitApplicationAttempt(app, userName);
// set Q2 state to stop and do reinitialize.
csConf.setState(Q2_PATH, QueueState.STOPPED);
conf = new YarnConfiguration(csConf);
cs.reinitialize(conf, rmContext);
assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
assertEquals(QueueState.DRAINING, cs.getQueue(Q2).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
// set Q2 state to RUNNING and do reinitialize.
// Q2 should transit from DRAINING to RUNNING
csConf.setState(Q2_PATH, QueueState.RUNNING);
conf = new YarnConfiguration(csConf);
cs.reinitialize(conf, rmContext);
assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q2).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
// set Q2 state to stop and do reinitialize.
csConf.setState(Q2_PATH, QueueState.STOPPED);
conf = new YarnConfiguration(csConf);
cs.reinitialize(conf, rmContext);
assertEquals(QueueState.RUNNING, cs.getQueue(Q1).getState());
assertEquals(QueueState.DRAINING, cs.getQueue(Q2).getState());
assertEquals(QueueState.RUNNING, cs.getQueue(Q3).getState());
// set Q1 state to stop and do reinitialize.
csConf.setState(Q1_PATH, QueueState.STOPPED);
conf = new YarnConfiguration(csConf);
cs.reinitialize(conf, rmContext);
assertEquals(QueueState.DRAINING, cs.getQueue(Q1).getState());
assertEquals(QueueState.DRAINING, cs.getQueue(Q2).getState());
assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
// Active Q3, should fail
csConf.setState(Q3_PATH, QueueState.RUNNING);
conf = new YarnConfiguration(csConf);
try {
cs.reinitialize(conf, rmContext);
fail("Should throw an Exception.");
} catch (Exception ex) {
// Do Nothing
}
// stop the app running in q2
cs.getQueue(Q2).finishApplicationAttempt(app, Q2);
cs.getQueue(Q2).finishApplication(appId, userName);
assertEquals(QueueState.STOPPED, cs.getQueue(Q1).getState());
assertEquals(QueueState.STOPPED, cs.getQueue(Q2).getState());
assertEquals(QueueState.STOPPED, cs.getQueue(Q3).getState());
}
private FiCaSchedulerApp getMockApplication(ApplicationId appId, String user,
Resource amResource) {
FiCaSchedulerApp application = mock(FiCaSchedulerApp.class);
ApplicationAttemptId applicationAttemptId =
ApplicationAttemptId.newInstance(appId, 0);
doReturn(applicationAttemptId.getApplicationId()).
when(application).getApplicationId();
doReturn(applicationAttemptId).when(application).getApplicationAttemptId();
doReturn(user).when(application).getUser();
doReturn(amResource).when(application).getAMResource();
doReturn(Priority.newInstance(0)).when(application).getPriority();
doReturn(CommonNodeLabelsManager.NO_LABEL).when(application)
.getAppAMNodePartitionName();
doReturn(amResource).when(application).getAMResource(
CommonNodeLabelsManager.NO_LABEL);
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class)))
.thenCallRealMethod();
when(application.isRunnable()).thenReturn(true);
return application;
}
@Test
@Timeout(value = 30)
public void testRecoverDrainingStateAfterRMRestart() throws Exception {
// init conf
CapacitySchedulerConfiguration newConf =
new CapacitySchedulerConfiguration();
newConf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
newConf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED,
false);
newConf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
newConf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 1);
newConf.setQueues(ROOT_PATH, new String[]{Q1});
newConf.setQueues(Q1_PATH, new String[]{Q2});
newConf.setCapacity(Q1_PATH, 100);
newConf.setCapacity(Q2_PATH, 100);
// init state store
MemoryRMStateStore newMemStore = new MemoryRMStateStore();
newMemStore.init(newConf);
// init RM & NMs & Nodes
MockRM rm = new MockRM(newConf, newMemStore);
rm.start();
MockNM nm = rm.registerNode("h1:1234", 204800);
// submit an app, AM is running on nm1
MockRMAppSubmissionData data =
MockRMAppSubmissionData.Builder.createWithMemory(1024, rm)
.withAppName("appname")
.withUser("appuser")
.withAcls(null)
.withQueue(Q2)
.withUnmanagedAM(false)
.build();
RMApp app = MockRMAppSubmitter.submit(rm, data);
MockRM.launchAM(app, rm, nm);
rm.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
// update queue state to STOPPED
newConf.setState(Q1_PATH, QueueState.STOPPED);
CapacityScheduler capacityScheduler =
(CapacityScheduler) rm.getRMContext().getScheduler();
capacityScheduler.reinitialize(newConf, rm.getRMContext());
// current queue state should be DRAINING
assertEquals(QueueState.DRAINING,
capacityScheduler.getQueue(Q2).getState());
assertEquals(QueueState.DRAINING,
capacityScheduler.getQueue(Q1).getState());
// RM restart
rm = new MockRM(newConf, newMemStore);
rm.start();
rm.registerNode("h1:1234", 204800);
// queue state should be DRAINING after app recovered
rm.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
capacityScheduler = (CapacityScheduler) rm.getRMContext().getScheduler();
assertEquals(QueueState.DRAINING,
capacityScheduler.getQueue(Q2).getState());
assertEquals(QueueState.DRAINING,
capacityScheduler.getQueue(Q1).getState());
// close rm
rm.close();
}
}