JavadocParser.java
/*
* Copyright (C) 2007-2010 J��lio Vilmar Gesser.
* Copyright (C) 2011, 2013-2024 The JavaParser Team.
*
* This file is part of JavaParser.
*
* JavaParser can be used either under the terms of
* a) the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* b) the terms of the Apache License
*
* You should have received a copy of both licenses in LICENCE.LGPL and
* LICENCE.APACHE. Please refer to those files for details.
*
* JavaParser is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*/
package com.github.javaparser;
import static com.github.javaparser.utils.Utils.*;
import com.github.javaparser.ast.comments.JavadocComment;
import com.github.javaparser.javadoc.Javadoc;
import com.github.javaparser.javadoc.JavadocBlockTag;
import com.github.javaparser.javadoc.description.JavadocDescription;
import com.github.javaparser.utils.LineSeparator;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* The class responsible for parsing the content of JavadocComments and producing JavadocDocuments.
* <a href="https://docs.oracle.com/javase/1.5.0/docs/tooldocs/windows/javadoc.html">The Javadoc specification.</a>
*/
class JavadocParser {
private static String BLOCK_TAG_PREFIX = "@";
private static Pattern BLOCK_PATTERN = Pattern.compile("^\\s*" + BLOCK_TAG_PREFIX, Pattern.MULTILINE);
public static Javadoc parse(JavadocComment comment) {
return parse(comment.getContent(), comment.isMarkdownComment());
}
public static Javadoc parse(String commentContent) {
return parse(commentContent, false);
}
public static Javadoc parse(String commentContent, boolean isMarkdownComment) {
List<String> cleanLines =
cleanLines(normalizeEolInTextBlock(commentContent, LineSeparator.SYSTEM), isMarkdownComment);
int indexOfFirstBlockTag = cleanLines.stream()
.filter(JavadocParser::isABlockLine)
.map(cleanLines::indexOf)
.findFirst()
.orElse(-1);
List<String> blockLines;
String descriptionText;
if (indexOfFirstBlockTag == -1) {
descriptionText = trimRight(String.join(LineSeparator.SYSTEM.asRawString(), cleanLines));
blockLines = Collections.emptyList();
} else {
descriptionText = trimRight(
String.join(LineSeparator.SYSTEM.asRawString(), cleanLines.subList(0, indexOfFirstBlockTag)));
// Combine cleaned lines, but only starting with the first block tag till the end
// In this combined string it is easier to handle multiple lines which actually belong together
String tagBlock = cleanLines.subList(indexOfFirstBlockTag, cleanLines.size()).stream()
.collect(Collectors.joining(LineSeparator.SYSTEM.asRawString()));
// Split up the entire tag back again, considering now that some lines belong to the same block tag.
// The pattern splits the block at each new line starting with the '@' symbol, thus the symbol
// then needs to be added again so that the block parsers handles everything correctly.
blockLines = BLOCK_PATTERN
.splitAsStream(tagBlock)
.filter(s1 -> !s1.isEmpty())
.map(s -> BLOCK_TAG_PREFIX + s)
.collect(Collectors.toList());
}
Javadoc document = new Javadoc(JavadocDescription.parseText(descriptionText), isMarkdownComment);
blockLines.forEach(l -> document.addBlockTag(parseBlockTag(l)));
return document;
}
private static JavadocBlockTag parseBlockTag(String line) {
line = line.trim().substring(1);
String tagName = nextWord(line);
String rest = line.substring(tagName.length()).trim();
return new JavadocBlockTag(tagName, rest);
}
private static boolean isABlockLine(String line) {
return line.trim().startsWith(BLOCK_TAG_PREFIX);
}
private static String trimRight(String string) {
while (!string.isEmpty() && Character.isWhitespace(string.charAt(string.length() - 1))) {
string = string.substring(0, string.length() - 1);
}
return string;
}
private static List<String> cleanLines(String content, boolean isMarkdownComment) {
String[] lines = content.split(LineSeparator.SYSTEM.asRawString());
if (lines.length == 0) {
return Collections.emptyList();
}
List<String> cleanedLines = Arrays.stream(lines)
.map(l -> {
int asteriskOrLastMdSlashIndex = startsWithAsteriskOrMdSlash(l);
if (asteriskOrLastMdSlashIndex == -1) {
return l;
}
if (l.length() > (asteriskOrLastMdSlashIndex + 1)) {
char c = l.charAt(asteriskOrLastMdSlashIndex + 1);
if (c == ' ' || c == '\t') {
return l.substring(asteriskOrLastMdSlashIndex + 2);
}
}
return l.substring(asteriskOrLastMdSlashIndex + 1);
})
.collect(Collectors.toList());
// lines containing only whitespace are normalized to empty lines
cleanedLines =
cleanedLines.stream().map(l -> l.trim().isEmpty() ? "" : l).collect(Collectors.toList());
// if the first starts with a space, remove it
if (!cleanedLines.get(0).isEmpty()
&& (cleanedLines.get(0).charAt(0) == ' ' || cleanedLines.get(0).charAt(0) == '\t')) {
cleanedLines.set(0, cleanedLines.get(0).substring(1));
}
// drop empty lines at the beginning and at the end
while (cleanedLines.size() > 0 && cleanedLines.get(0).trim().isEmpty()) {
cleanedLines = cleanedLines.subList(1, cleanedLines.size());
}
while (cleanedLines.size() > 0
&& cleanedLines.get(cleanedLines.size() - 1).trim().isEmpty()) {
cleanedLines = cleanedLines.subList(0, cleanedLines.size() - 1);
}
return cleanedLines;
}
/**
* Given a line in a block or markdown comment, this method finds the index of the * or / at the start of the line.
* For markdown comments where lines start with ///, this would be the index of the third /. This is used to strip
* the relevant prefix string when cleaning lines as part of the Javadoc parsing process.
* It is made visible for testing
*/
static int startsWithAsteriskOrMdSlash(String line) {
for (int i = 0, mdSlashCount = 0; i < line.length(); i++) {
char currentChar = line.charAt(i);
if (currentChar == '/') {
if (mdSlashCount == 2) {
return i;
} else {
mdSlashCount++;
}
} else if (currentChar == '*' && mdSlashCount == 0) {
return i;
} else if (currentChar != ' ' && currentChar != '\t') {
return -1;
}
}
return -1;
}
}