PomMemoryAnalyzer.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.model.pom;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.maven.api.model.Model;
import org.apache.maven.model.v4.MavenStaxReader;
/**
* A utility class that analyzes Maven POM files to identify memory usage patterns and potential memory optimizations.
* This analyzer focuses on identifying duplicate strings and their memory impact across different paths in the POM structure.
*
* <p>The analyzer processes POM files recursively, tracking string occurrences and their locations within the POM structure.
* It can identify areas where string deduplication could provide significant memory savings.</p>
*
* <p>Usage example:</p>
* <pre>
* PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
* Model model = reader.read(Files.newInputStream(pomPath));
* analyzer.analyzePom(model);
* analyzer.printAnalysis();
* </pre>
*
* <p>The analysis output includes:</p>
* <ul>
* <li>Total memory usage per POM path</li>
* <li>Potential memory savings through string deduplication</li>
* <li>Most frequent string values and their occurrence counts</li>
* <li>Statistics grouped by POM element types</li>
* </ul>
*
* <p>This tool is particularly useful for identifying memory optimization opportunities
* in large Maven multi-module projects where POM files may contain significant
* duplicate content.</p>
*/
public class PomMemoryAnalyzer {
private final Map<String, Map<String, StringStats>> pathStats = new HashMap<>();
private final Map<String, Integer> globalStringFrequency = new HashMap<>();
private int totalPoms = 0;
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.out.println("Usage: PomMemoryAnalyzer <directory-with-poms>");
System.exit(1);
}
Path rootDir = Paths.get(args[0]);
PomMemoryAnalyzer analyzer = new PomMemoryAnalyzer();
MavenStaxReader reader = new MavenStaxReader();
// Find all pom.xml files, excluding those under src/ or target/
Files.walk(rootDir)
.filter(path -> path.getFileName().toString().equals("pom.xml"))
.filter(path -> !containsSrcOrTarget(path))
.forEach(pomPath -> {
try {
Model model = reader.read(Files.newInputStream(pomPath));
analyzer.analyzePom(model);
} catch (Exception e) {
System.err.println("Error processing " + pomPath + ": " + e.getMessage());
}
});
// Print analysis
analyzer.printAnalysis();
}
private static boolean containsSrcOrTarget(Path pomPath) {
Path parent = pomPath.getParent();
while (parent != null && parent.getFileName() != null) {
String dirName = parent.getFileName().toString();
if (dirName.equals("src") || dirName.equals("target")) {
return true;
}
parent = parent.getParent();
}
return false;
}
public void analyzePom(Model model) {
totalPoms++;
Set<Object> visited = new HashSet<>();
processModelNode(model, "/project", "project", visited);
}
private void processModelNode(Object node, String currentPath, String elementName, Set<Object> visited) {
if (node == null || !visited.add(node)) {
return;
}
Class<?> clazz = node.getClass();
while (clazz != null && !clazz.equals(Object.class)) {
for (Field field : clazz.getDeclaredFields()) {
// Skip static fields and synthetic fields
if (Modifier.isStatic(field.getModifiers()) || field.isSynthetic()) {
continue;
}
try {
field.setAccessible(true);
Object value = field.get(node);
if (value == null) {
continue;
}
String fullPath = currentPath + "/" + field.getName();
if (value instanceof String strValue) {
recordString(fullPath, strValue);
globalStringFrequency.merge(strValue, 1, Integer::sum);
} else if (value instanceof List<?> list) {
for (Object item : list) {
if (item != null) {
String itemName = getSingular(field.getName());
processModelNode(item, fullPath + "/" + itemName, itemName, visited);
}
}
} else if (value instanceof Map<?, ?> map) {
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (entry.getValue() != null) {
processModelNode(
entry.getValue(),
fullPath + "/" + entry.getKey(),
entry.getKey().toString(),
visited);
}
}
} else if (!value.getClass().isPrimitive()
&& !value.getClass().getName().startsWith("java.")) {
processModelNode(value, fullPath, field.getName(), visited);
}
} catch (Exception e) {
// Skip inaccessible or problematic fields
}
}
clazz = clazz.getSuperclass();
}
}
private String getSingular(String plural) {
if (plural.endsWith("ies")) {
return plural.substring(0, plural.length() - 3) + "y";
}
if (plural.endsWith("s")) {
return plural.substring(0, plural.length() - 1);
}
return plural;
}
private void recordString(String path, String value) {
pathStats
.computeIfAbsent(path, k -> new HashMap<>())
.computeIfAbsent(value, k -> new StringStats())
.recordOccurrence(value);
}
List<PathAnalysis> getPathAnalysisSorted() {
List<PathAnalysis> analysis = new ArrayList<>();
for (Map.Entry<String, Map<String, StringStats>> entry : pathStats.entrySet()) {
String path = entry.getKey();
Map<String, StringStats> stats = entry.getValue();
long uniqueStrings = stats.size();
long totalOccurrences = stats.values().stream()
.mapToLong(StringStats::getOccurrences)
.sum();
long totalMemory = stats.entrySet().stream()
.mapToLong(e -> e.getKey().length() * e.getValue().getOccurrences() * 2L)
.sum();
long potentialSavings = stats.entrySet().stream()
.mapToLong(e -> e.getKey().length() * 2L * (e.getValue().getOccurrences() - 1))
.sum();
analysis.add(new PathAnalysis(
path,
uniqueStrings,
totalOccurrences,
totalMemory,
potentialSavings,
(double) totalOccurrences / uniqueStrings,
getMostFrequentValues(stats, 5)));
}
analysis.sort((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings));
return analysis;
}
private List<ValueFrequency> getMostFrequentValues(Map<String, StringStats> stats, int limit) {
return stats.entrySet().stream()
.map(e -> new ValueFrequency(e.getKey(), e.getValue().getOccurrences()))
.sorted((a, b) -> Long.compare(b.frequency, a.frequency))
.limit(limit)
.toList();
}
public void printAnalysis() {
System.out.printf("Analyzed %d POMs%n%n", totalPoms);
// First, get all paths
List<PathAnalysis> allPaths = getPathAnalysisSorted();
// Create groups based on the final path component
Map<String, List<PathAnalysis>> groupedPaths = new HashMap<>();
Map<String, Map<String, Long>> groupValueFrequencies = new HashMap<>();
for (PathAnalysis path : allPaths) {
String finalComponent = path.path.substring(path.path.lastIndexOf('/') + 1);
// Add path to its group
groupedPaths.computeIfAbsent(finalComponent, k -> new ArrayList<>()).add(path);
// Aggregate value frequencies for the group
Map<String, Long> groupFreqs = groupValueFrequencies.computeIfAbsent(finalComponent, k -> new HashMap<>());
for (ValueFrequency vf : path.mostFrequentValues) {
groupFreqs.merge(vf.value, vf.frequency, Long::sum);
}
}
// Create final group analyses and sort them by total savings
List<GroupAnalysis> sortedGroups = groupedPaths.entrySet().stream()
.map(entry -> {
String groupName = entry.getKey();
List<PathAnalysis> paths = entry.getValue();
Map<String, Long> valueFreqs = groupValueFrequencies.get(groupName);
long totalSavings =
paths.stream().mapToLong(p -> p.potentialSavings).sum();
long totalMemory =
paths.stream().mapToLong(p -> p.totalMemory).sum();
long totalUnique = valueFreqs.size();
long totalOccurrences =
valueFreqs.values().stream().mapToLong(l -> l).sum();
List<ValueFrequency> topValues = valueFreqs.entrySet().stream()
.map(e -> new ValueFrequency(e.getKey(), e.getValue()))
.sorted((a, b) -> Long.compare(b.frequency, a.frequency))
.limit(5)
.collect(Collectors.toList());
return new GroupAnalysis(
groupName, paths, totalUnique, totalOccurrences, totalMemory, totalSavings, topValues);
})
.sorted((a, b) -> Long.compare(b.totalSavings, a.totalSavings))
.toList();
// Print each group
for (GroupAnalysis group : sortedGroups) {
System.out.printf("%nPaths ending with '%s':%n", group.name);
System.out.printf("Total potential savings: %dKB%n", group.totalSavings / 1024);
System.out.printf("Total memory: %dKB%n", group.totalMemory / 1024);
System.out.printf("Total unique values: %d%n", group.totalUnique);
System.out.printf("Total occurrences: %d%n", group.totalOccurrences);
System.out.printf("Duplication ratio: %.2f%n", (double) group.totalOccurrences / group.totalUnique);
System.out.println("\nMost frequent values across all paths:");
for (ValueFrequency v : group.mostFrequentValues) {
System.out.printf(" %-70s %d times%n", v.value, v.frequency);
}
System.out.println("\nIndividual paths:");
System.out.println("----------------------------------------");
for (PathAnalysis path : group.paths.stream()
.sorted((a, b) -> Long.compare(b.potentialSavings, a.potentialSavings))
.toList()) {
System.out.printf(
"%-90s %6dKB %6dKB%n", path.path, path.totalMemory / 1024, path.potentialSavings / 1024);
}
System.out.println();
}
}
static class StringStats {
private long occurrences = 0;
public void recordOccurrence(String value) {
occurrences++;
}
public long getOccurrences() {
return occurrences;
}
}
record GroupAnalysis(
String name,
List<PathAnalysis> paths,
long totalUnique,
long totalOccurrences,
long totalMemory,
long totalSavings,
List<ValueFrequency> mostFrequentValues) {}
record PathAnalysis(
String path,
long uniqueStrings,
long totalOccurrences,
long totalMemory,
long potentialSavings,
double duplicationRatio,
List<ValueFrequency> mostFrequentValues) {}
record ValueFrequency(String value, long frequency) {}
}