XCTraceNormProfiler.java
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.jmh.profile;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import org.openjdk.jmh.infra.BenchmarkParams;
import org.openjdk.jmh.results.*;
import org.openjdk.jmh.util.FileUtils;
import org.openjdk.jmh.util.TempFile;
import java.io.*;
import java.nio.file.Path;
import java.util.*;
/**
* macOS perfnorm profiler based on xctrace utility shipped with Xcode Instruments.
* <p>
* The profiling process consists of several steps:
* 1) launching a program that needs to be profiled using `xctrace record` command; in case of success,
* the output of this step is a "trace-file", which is in fact a directory containing multiple files
* representing the recorded trace, the trace may contain multiple resulting tables, depending on the template;
* 2) inspecting a recorded trace to check if it contains a table supported by the profiler; this information
* could be obtained from the trace's table of contents (`xctrace export --toc`);
* 3) extracting the table with profiling results from the trace file using xctrace export and parsing it
* to extract individual samples.
* <p>
* `xctrace export` command extracts data only in XML format, thus both the table of contents and the table
* with profiling results need to be parsed as XML documents.
* <p>
* This profiler currently supports only one table type: counters-profile.
* Such tables generated by the CPU Counters instrument performing sampling either by
* timer interrupts, or interrupts generated by a PMU counter, depending on particular configuration.
* <p>
* Tracing configuration, or template, is required to perform a recording.
* It is a file that could be configured and saved using Instruments application.
* <p>
* CPU Counters template has no default parameters and could only be configured in the Instruments app UI.
* <p>
* To provide a default behavior (that does not require a user-configured template) and make profilers use a bit more
* convenient, XCTraceNormProfiler uses a preconfigured Instruments template ({@code /default.instruments.template.xml})
* that is configured to sample some PMU counters on timer interrupts.
* <p>
*/
public class XCTraceNormProfiler implements ExternalProfiler {
// https://developer.apple.com/documentation/xcode-release-notes/xcode-13-release-notes#Instruments
// Older versions support CPU Counters in some way, but are lacking handy "counters-profile" table.
private static final int XCTRACE_VERSION_WITH_COUNTERS_PROFILE_TABLE = 13;
// Currently, only counters-profile table is supported
private static final XCTraceTableHandler.ProfilingTableType SUPPORTED_TABLE_TYPE =
XCTraceTableHandler.ProfilingTableType.COUNTERS_PROFILE;
private static final String[] CYCLES_EVENT_NAMES = new String[]{
"CORE_ACTIVE_CYCLE", "Cycles", "FIXED_CYCLES", "CPU_CLK_UNHALTED.THREAD", "CPU_CLK_UNHALTED.THREAD_P"
};
private static final String[] INSTRUCTIONS_EVENT_NAMES = new String[]{
"INST_ALL", "Instructions", "FIXED_INSTRUCTIONS", "INST_RETIRED.ANY", "INST_RETIRED.ANY_P"
};
private static final String[] BRANCH_EVENT_NAMES = new String[]{
"INST_BRANCH", "BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ALL_BRANCHES_PEBS"
};
private static final String[] BRANCH_MISS_EVENT_NAMES = new String[]{
"BRANCH_MISPRED_NONSPEC", "BR_MISP_RETIRED.ALL_BRANCHES", "BR_MISP_RETIRED.ALL_BRANCHES_PS"
};
private final String xctracePath;
private final String tracingTemplate;
private final Path temporaryDirectory;
private final TempFile outputFile;
private final long delayMs;
private final long lengthMs;
private final boolean shouldFixStartTime;
public XCTraceNormProfiler(String initLine) throws ProfilerException {
OptionParser parser = new OptionParser();
parser.formatHelpWith(new ProfilerOptionFormatter(XCTraceNormProfiler.class.getName()));
OptionSpec<String> templateOpt = parser.accepts("template", "Name of or path to Instruments template. " +
"Use `xctrace list templates` to view available templates. " +
"Only templates with \"CPU Counters\" instrument are supported at the moment.")
.withOptionalArg().ofType(String.class);
OptionSpec<Integer> optDelay = parser.accepts("delay",
"Delay collection for a given time, in milliseconds; -1 to detect automatically.")
.withRequiredArg().ofType(Integer.class).describedAs("ms").defaultsTo(-1);
OptionSpec<Integer> optLength = parser.accepts("length",
"Do the collection for a given time, in milliseconds; -1 to detect automatically.")
.withRequiredArg().ofType(Integer.class).describedAs("ms").defaultsTo(-1);
OptionSpec<Boolean> correctOpt = parser.accepts("fixStartTime",
"Fix the start time by the time it took to launch.")
.withRequiredArg().ofType(Boolean.class).defaultsTo(true);
OptionSet options = ProfilerUtils.parseInitLine(initLine, parser);
delayMs = options.valueOf(optDelay);
lengthMs = options.valueOf(optLength);
shouldFixStartTime = options.valueOf(correctOpt);
xctracePath = XCTraceSupport.getXCTracePath(XCTRACE_VERSION_WITH_COUNTERS_PROFILE_TABLE);
if (options.hasArgument(templateOpt)) {
tracingTemplate = options.valueOf(templateOpt);
} else {
tracingTemplate = unpackDefaultTemplate().getAbsolutePath();
}
temporaryDirectory = XCTraceSupport.createTemporaryDirectoryName();
try {
outputFile = FileUtils.weakTempFile("xctrace-out.xml");
} catch (IOException e) {
throw new ProfilerException(e);
}
}
private static File unpackDefaultTemplate() throws ProfilerException {
try {
return FileUtils.extractFromResource("/default.instruments.template.xml");
} catch (IOException e) {
throw new ProfilerException(e);
}
}
private static XCTraceTableHandler.XCTraceTableDesc findTableDescription(XCTraceTableOfContentsHandler tocHandler) {
XCTraceTableHandler.XCTraceTableDesc tableDesc = tocHandler.getSupportedTables()
.stream()
.filter(t -> t.getTableType() == SUPPORTED_TABLE_TYPE)
.findFirst()
.orElseThrow(() -> new IllegalStateException("Table \"" + SUPPORTED_TABLE_TYPE.tableName +
"\" was not found in the trace results."));
if (tableDesc.getPmcEvents().isEmpty() && tableDesc.getTriggerType() == XCTraceTableHandler.TriggerType.TIME) {
throw new IllegalStateException("Results does not contain any events.");
}
return tableDesc;
}
@Override
public Collection<String> addJVMInvokeOptions(BenchmarkParams params) {
return XCTraceSupport.recordCommandPrefix(xctracePath, temporaryDirectory.toAbsolutePath().toString(),
tracingTemplate);
}
@Override
public Collection<String> addJVMOptions(BenchmarkParams params) {
return Collections.emptyList();
}
@Override
public void beforeTrial(BenchmarkParams benchmarkParams) {
if (!temporaryDirectory.toFile().isDirectory() && !temporaryDirectory.toFile().mkdirs()) {
throw new IllegalStateException();
}
}
@Override
public Collection<? extends Result> afterTrial(BenchmarkResult br, long pid, File stdOut, File stdErr) {
try {
return processResults(br);
} finally {
XCTraceSupport.removeDirectory(temporaryDirectory);
}
}
private Collection<? extends Result> processResults(BenchmarkResult br) {
BenchmarkResultMetaData md = br.getMetadata();
if (md == null) {
return Collections.emptyList();
}
long measurementsDurationMs = md.getStopTime() - md.getMeasurementTime();
if (measurementsDurationMs == 0L) {
return Collections.emptyList();
}
double opsThroughput = md.getMeasurementOps() / (double) measurementsDurationMs;
// Find the resulting file and extract metadata from it
Path traceFile = XCTraceSupport.findTraceFile(temporaryDirectory);
XCTraceSupport.exportTableOfContents(xctracePath, traceFile.toAbsolutePath().toString(),
outputFile.getAbsolutePath());
XCTraceTableOfContentsHandler tocHandler = new XCTraceTableOfContentsHandler();
tocHandler.parse(outputFile.file());
// Get info about a table with profiling results
XCTraceTableHandler.XCTraceTableDesc tableDesc = findTableDescription(tocHandler);
// Extract profiling results
XCTraceSupport.exportTable(xctracePath, traceFile.toAbsolutePath().toString(), outputFile.getAbsolutePath(),
SUPPORTED_TABLE_TYPE);
// Please refer to XCTraceAsmProfiler::readEvents for detailed explanation,
// but briefly, ProfilerUtils::measurementDelayMs uses the time when a fork was started,
// and it's not the actual start time.
// The actual start time is the time xctrace was launched (tocHandler.getRecordStartMs),
// and we're correcting measurement delay by the difference between these two timestamps.
long timeCorrectionMs = 0;
if (shouldFixStartTime) {
timeCorrectionMs = tocHandler.getRecordStartMs() - md.getStartTime();
}
long skipMs = delayMs;
if (skipMs == -1L) {
skipMs = ProfilerUtils.measurementDelayMs(br);
}
skipMs -= timeCorrectionMs;
long durationMs = lengthMs;
if (durationMs == -1L) {
durationMs = ProfilerUtils.measuredTimeMs(br);
}
long skipNs = skipMs * 1_000_000;
long durationNs = durationMs * 1_000_000;
AggregatedEvents aggregator = new AggregatedEvents(tableDesc);
new XCTraceTableProfileHandler(SUPPORTED_TABLE_TYPE, sample -> {
if (sample.getTimeFromStartNs() <= skipNs || sample.getTimeFromStartNs() > skipNs + durationNs) {
return;
}
aggregator.add(sample);
}).parse(outputFile.file());
if (aggregator.getEventsCount() == 0) {
return Collections.emptyList();
}
Collection<Result<?>> results = new ArrayList<>();
computeAggregates(results, aggregator);
aggregator.normalizeByThroughput(opsThroughput);
for (int i = 0; i < tableDesc.getPmcEvents().size(); i++) {
String event = tableDesc.getPmcEvents().get(i);
results.add(new ScalarResult(event, aggregator.getEventValues()[i],
"#/op", AggregationPolicy.AVG));
}
if (tableDesc.getTriggerType() == XCTraceTableHandler.TriggerType.PMI) {
results.add(new ScalarResult(tableDesc.triggerEvent(),
aggregator.getEventValues()[aggregator.getEventValues().length - 1],
"#/op", AggregationPolicy.AVG));
}
return results;
}
private void computeAggregates(Collection<Result<?>> results, AggregatedEvents aggregator) {
computeCommonMetrics(results, aggregator);
if (System.getProperty("os.arch").equals("aarch64")) {
computeAppleSiliconArm64InstDensityMetrics(results, aggregator);
}
}
private static void computeCommonMetrics(Collection<Result<?>> results, AggregatedEvents aggregator) {
CounterValue cycles = aggregator.getAnyOfOrNull(CYCLES_EVENT_NAMES);
CounterValue insts = aggregator.getAnyOfOrNull(INSTRUCTIONS_EVENT_NAMES);
if (cycles != null && cycles.getValue() != 0D && insts != null && insts.getValue() != 0D) {
results.add(new ScalarResult("CPI", cycles.getValue() / insts.getValue(),
"clks/insn", AggregationPolicy.AVG));
results.add(new ScalarResult("IPC", insts.getValue() / cycles.getValue(),
"insns/clk", AggregationPolicy.AVG));
}
CounterValue branches = aggregator.getAnyOfOrNull(BRANCH_EVENT_NAMES);
CounterValue missedBranches = aggregator.getAnyOfOrNull(BRANCH_MISS_EVENT_NAMES);
if (branches != null && branches.getValue() != 0D && missedBranches != null) {
results.add(new ScalarResult("Branch miss ratio", missedBranches.getValue() / branches.getValue(),
"miss/brs", AggregationPolicy.AVG));
}
}
// Compute instructions density metrics (defined in Apple Silicon CPU Optimization Guide,
// https://developer.apple.com/documentation/apple-silicon/cpu-optimization-guide).
private static void computeAppleSiliconArm64InstDensityMetrics(Collection<Result<?>> results,
AggregatedEvents aggregator) {
CounterValue insts = aggregator.getAnyOfOrNull(INSTRUCTIONS_EVENT_NAMES);
if (insts == null) {
return;
}
for (String eventName : aggregator.getEventNames()) {
if (!eventName.startsWith("INST_") || eventName.equals("INST_ALL")) {
continue;
}
Double value = aggregator.getCountOrNull(eventName);
if (value == null || value == 0D) {
continue;
}
results.add(new ScalarResult(eventName + " density (of insns)", value / insts.getValue(),
"#/insn", AggregationPolicy.AVG));
}
}
@Override
public boolean allowPrintOut() {
return true;
}
@Override
public boolean allowPrintErr() {
return false;
}
@Override
public String getDescription() {
return "macOS xctrace (Instruments) PMU counter statistics, normalized by operation count";
}
private static class AggregatedEvents {
private final List<String> eventNames;
private final double[] eventValues;
private long eventsCount = 0;
private long minTimestampMs = Long.MAX_VALUE;
private long maxTimestampMs = Long.MIN_VALUE;
AggregatedEvents(XCTraceTableHandler.XCTraceTableDesc tableDesc) {
List<String> names = new ArrayList<>(tableDesc.getPmcEvents());
names.add(tableDesc.triggerEvent());
eventNames = Collections.unmodifiableList(names);
eventValues = new double[getEventNames().size()];
}
void add(XCTraceTableProfileHandler.XCTraceSample sample) {
long[] counters = sample.getPmcValues();
for (int i = 0; i < counters.length; i++) {
eventValues[i] += counters[i];
}
eventValues[eventValues.length - 1] = sample.getWeight();
minTimestampMs = Math.min(minTimestampMs, sample.getTimeFromStartNs());
maxTimestampMs = Math.max(maxTimestampMs, sample.getTimeFromStartNs());
eventsCount += 1;
}
void normalizeByThroughput(double throughput) {
if (maxTimestampMs == minTimestampMs) {
throw new IllegalStateException("Min and max timestamps are the same.");
}
double timeSpanMs = (maxTimestampMs - minTimestampMs) / 1e6;
for (int i = 0; i < getEventValues().length; i++) {
eventValues[i] = eventValues[i] / timeSpanMs / throughput;
}
}
CounterValue getAnyOfOrNull(String... keys) {
for (String key : keys) {
Double value = getCountOrNull(key);
if (value != null) {
return new CounterValue(key, value);
}
}
return null;
}
Double getCountOrNull(String event) {
int idx = eventNames.indexOf(event);
if (idx == -1) return null;
return eventValues[idx];
}
List<String> getEventNames() {
return eventNames;
}
double[] getEventValues() {
return eventValues;
}
long getEventsCount() {
return eventsCount;
}
}
private static class CounterValue {
private final String name;
private final double value;
CounterValue(String name, double value) {
this.name = name;
this.value = value;
}
double getValue() {
return value;
}
String getName() {
return name;
}
}
}