AoclDiagnosticOutputParser.java
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin.InnerShellExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
final class AoclDiagnosticOutputParser {
private AoclDiagnosticOutputParser() {
// no instances
}
private static final Logger LOG = LoggerFactory.getLogger(
AoclDiagnosticOutputParser.class);
/**
* One real sample output of Intel FPGA SDK 17.0's "aocl diagnose" is as below:
* "
* aocl diagnose: Running diagnose from /home/fpga/intelFPGA_pro/17.0/hld/board/nalla_pcie/linux64/libexec
*
* ------------------------- acl0 -------------------------
* Vendor: Nallatech ltd
*
* Phys Dev Name Status Information
*
* aclnalla_pcie0Passed nalla_pcie (aclnalla_pcie0)
* PCIe dev_id = 2494, bus:slot.func = 02:00.00, Gen3 x8
* FPGA temperature = 54.4 degrees C.
* Total Card Power Usage = 31.7 Watts.
* Device Power Usage = 0.0 Watts.
*
* DIAGNOSTIC_PASSED
* ---------------------------------------------------------
* "
*
* While per Intel's guide, the output(should be outdated or prior SDK version's) is as below:
*
* "
* aocl diagnose: Running diagnostic from ALTERAOCLSDKROOT/board/<board_name>/
* <platform>/libexec
* Verified that the kernel mode driver is installed on the host machine.
* Using board package from vendor: <board_vendor_name>
* Querying information for all supported devices that are installed on the host
* machine ...
*
* device_name Status Information
*
* acl0 Passed <descriptive_board_name>
* PCIe dev_id = <device_ID>, bus:slot.func = 02:00.00,
* at Gen 2 with 8 lanes.
* FPGA temperature=43.0 degrees C.
* acl1 Passed <descriptive_board_name>
* PCIe dev_id = <device_ID>, bus:slot.func = 03:00.00,
* at Gen 2 with 8 lanes.
* FPGA temperature = 35.0 degrees C.
*
* Found 2 active device(s) installed on the host machine, to perform a full
* diagnostic on a specific device, please run aocl diagnose <device_name>
*
* DIAGNOSTIC_PASSED
* "
* But this method only support the first output
* */
public static List<FpgaDevice> parseDiagnosticOutput(
String output, InnerShellExecutor shellExecutor, String fpgaType) {
if (output.contains("DIAGNOSTIC_PASSED")) {
List<FpgaDevice> devices = new ArrayList<>();
Matcher headerStartMatcher = Pattern.compile("acl[0-31]")
.matcher(output);
Matcher headerEndMatcher = Pattern.compile("(?i)DIAGNOSTIC_PASSED")
.matcher(output);
int sectionStartIndex;
int sectionEndIndex;
String aliasName;
while (headerStartMatcher.find()) {
sectionStartIndex = headerStartMatcher.end();
String section = null;
aliasName = headerStartMatcher.group();
while (headerEndMatcher.find(sectionStartIndex)) {
sectionEndIndex = headerEndMatcher.start();
section = output.substring(sectionStartIndex, sectionEndIndex);
break;
}
if (section == null) {
LOG.warn("Unsupported diagnose output");
LOG.warn("aocl output is: " + output);
return Collections.emptyList();
}
// devName, \(.*\)
// busNum, bus:slot.func\s=\s.*,
// FPGA temperature\s=\s.*
// Total\sCard\sPower\sUsage\s=\s.*
String[] fieldRegexes = new String[]{"\\(.*\\)\n",
"(?i)bus:slot.func\\s=\\s.*,",
"(?i)FPGA temperature\\s=\\s.*",
"(?i)Total\\sCard\\sPower\\sUsage\\s=\\s.*"};
String[] fields = new String[4];
String tempFieldValue;
for (int i = 0; i < fieldRegexes.length; i++) {
Matcher fieldMatcher = Pattern.compile(fieldRegexes[i])
.matcher(section);
if (!fieldMatcher.find()) {
LOG.warn("Couldn't find " + fieldRegexes[i] + " pattern");
fields[i] = "";
continue;
}
tempFieldValue = fieldMatcher.group().trim();
if (i == 0) {
// special case for Device name
fields[i] = tempFieldValue.substring(1,
tempFieldValue.length() - 1);
} else {
String ss = tempFieldValue.split("=")[1].trim();
fields[i] = ss.substring(0, ss.length() - 1);
}
}
String majorMinorNumber = shellExecutor
.getMajorAndMinorNumber(fields[0]);
if (null != majorMinorNumber) {
String[] mmn = majorMinorNumber.split(":");
devices.add(new FpgaDevice(fpgaType,
Integer.parseInt(mmn[0]),
Integer.parseInt(mmn[1]),
aliasName));
} else {
LOG.warn("Failed to retrieve major/minor number for device");
}
}
return devices;
} else {
LOG.warn("The diagnostic has failed");
LOG.warn("Output of aocl is: " + output);
return Collections.emptyList();
}
}
}