SummaryJavadocCheck.java
///////////////////////////////////////////////////////////////////////////////////////////////
// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
// Copyright (C) 2001-2024 the original author or authors.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////////////////////////////////////////////////////////////////////
package com.puppycrawl.tools.checkstyle.checks.javadoc;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import com.puppycrawl.tools.checkstyle.StatelessCheck;
import com.puppycrawl.tools.checkstyle.api.DetailNode;
import com.puppycrawl.tools.checkstyle.api.JavadocTokenTypes;
import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
/**
* <p>
* Checks that
* <a href="https://www.oracle.com/technical-resources/articles/java/javadoc-tool.html#firstsentence">
* Javadoc summary sentence</a> does not contain phrases that are not recommended to use.
* Summaries that contain only the {@code {@inheritDoc}} tag are skipped.
* Summaries that contain a non-empty {@code {@return}} are allowed.
* Check also violate Javadoc that does not contain first sentence, though with {@code {@return}} a
* period is not required as the Javadoc tool adds it.
* </p>
* <ul>
* <li>
* Property {@code forbiddenSummaryFragments} - Specify the regexp for forbidden summary fragments.
* Type is {@code java.util.regex.Pattern}.
* Default value is {@code "^$"}.
* </li>
* <li>
* Property {@code period} - Specify the period symbol. Used to check the first sentence ends with a
* period. Periods that are not followed by a whitespace character are ignored (eg. the period in
* v1.0). Because some periods include whitespace built into the character, if this is set to a
* non-default value any period will end the sentence, whether it is followed by whitespace or not.
* Type is {@code java.lang.String}.
* Default value is {@code "."}.
* </li>
* <li>
* Property {@code violateExecutionOnNonTightHtml} - Control when to print violations
* if the Javadoc being examined by this check violates the tight html rules defined at
* <a href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">Tight-HTML Rules</a>.
* Type is {@code boolean}.
* Default value is {@code false}.
* </li>
* </ul>
* <p>
* Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
* </p>
* <p>
* Violation Message Keys:
* </p>
* <ul>
* <li>
* {@code javadoc.missed.html.close}
* </li>
* <li>
* {@code javadoc.parse.rule.error}
* </li>
* <li>
* {@code javadoc.unclosedHtml}
* </li>
* <li>
* {@code javadoc.wrong.singleton.html.tag}
* </li>
* <li>
* {@code summary.first.sentence}
* </li>
* <li>
* {@code summary.javaDoc}
* </li>
* <li>
* {@code summary.javaDoc.missing}
* </li>
* <li>
* {@code summary.javaDoc.missing.period}
* </li>
* </ul>
*
* @since 6.0
*/
@StatelessCheck
public class SummaryJavadocCheck extends AbstractJavadocCheck {
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_SUMMARY_FIRST_SENTENCE = "summary.first.sentence";
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_SUMMARY_JAVADOC = "summary.javaDoc";
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_SUMMARY_JAVADOC_MISSING = "summary.javaDoc.missing";
/**
* A key is pointing to the warning message text in "messages.properties" file.
*/
public static final String MSG_SUMMARY_MISSING_PERIOD = "summary.javaDoc.missing.period";
/**
* This regexp is used to convert multiline javadoc to single-line without stars.
*/
private static final Pattern JAVADOC_MULTILINE_TO_SINGLELINE_PATTERN =
Pattern.compile("\n +(\\*)|^ +(\\*)");
/**
* This regexp is used to remove html tags, whitespace, and asterisks from a string.
*/
private static final Pattern HTML_ELEMENTS =
Pattern.compile("<[^>]*>");
/** Default period literal. */
private static final String DEFAULT_PERIOD = ".";
/** Summary tag text. */
private static final String SUMMARY_TEXT = "@summary";
/** Return tag text. */
private static final String RETURN_TEXT = "@return";
/** Set of allowed Tokens tags in summary java doc. */
private static final BitSet ALLOWED_TYPES = TokenUtil.asBitSet(
JavadocTokenTypes.WS,
JavadocTokenTypes.DESCRIPTION,
JavadocTokenTypes.TEXT);
/**
* Specify the regexp for forbidden summary fragments.
*/
private Pattern forbiddenSummaryFragments = CommonUtil.createPattern("^$");
/**
* Specify the period symbol. Used to check the first sentence ends with a period. Periods that
* are not followed by a whitespace character are ignored (eg. the period in v1.0). Because some
* periods include whitespace built into the character, if this is set to a non-default value
* any period will end the sentence, whether it is followed by whitespace or not.
*/
private String period = DEFAULT_PERIOD;
/**
* Setter to specify the regexp for forbidden summary fragments.
*
* @param pattern a pattern.
* @since 6.0
*/
public void setForbiddenSummaryFragments(Pattern pattern) {
forbiddenSummaryFragments = pattern;
}
/**
* Setter to specify the period symbol. Used to check the first sentence ends with a period.
* Periods that are not followed by a whitespace character are ignored (eg. the period in v1.0).
* Because some periods include whitespace built into the character, if this is set to a
* non-default value any period will end the sentence, whether it is followed by whitespace or
* not.
*
* @param period period's value.
* @since 6.2
*/
public void setPeriod(String period) {
this.period = period;
}
@Override
public int[] getDefaultJavadocTokens() {
return new int[] {
JavadocTokenTypes.JAVADOC,
};
}
@Override
public int[] getRequiredJavadocTokens() {
return getAcceptableJavadocTokens();
}
@Override
public void visitJavadocToken(DetailNode ast) {
final Optional<DetailNode> inlineTag = getInlineTagNode(ast);
final DetailNode inlineTagNode = inlineTag.orElse(null);
if (inlineTag.isPresent()
&& isSummaryTag(inlineTagNode)
&& isDefinedFirst(inlineTagNode)) {
validateSummaryTag(inlineTagNode);
}
else if (inlineTag.isPresent() && isInlineReturnTag(inlineTagNode)) {
validateInlineReturnTag(inlineTagNode);
}
else if (!startsWithInheritDoc(ast)) {
validateUntaggedSummary(ast);
}
}
/**
* Checks the javadoc text for {@code period} at end and forbidden fragments.
*
* @param ast the javadoc text node
*/
private void validateUntaggedSummary(DetailNode ast) {
final String summaryDoc = getSummarySentence(ast);
if (summaryDoc.isEmpty()) {
log(ast.getLineNumber(), MSG_SUMMARY_JAVADOC_MISSING);
}
else if (!period.isEmpty()) {
if (summaryDoc.contains(period)) {
final String firstSentence = getFirstSentenceOrNull(ast, period);
if (firstSentence == null) {
log(ast.getLineNumber(), MSG_SUMMARY_FIRST_SENTENCE);
}
else if (containsForbiddenFragment(firstSentence)) {
log(ast.getLineNumber(), MSG_SUMMARY_JAVADOC);
}
}
else {
log(ast.getLineNumber(), MSG_SUMMARY_FIRST_SENTENCE);
}
}
}
/**
* Gets the node for the inline tag if present.
*
* @param javadoc javadoc root node.
* @return the node for the inline tag if present.
*/
private static Optional<DetailNode> getInlineTagNode(DetailNode javadoc) {
return Arrays.stream(javadoc.getChildren())
.filter(SummaryJavadocCheck::isInlineTagPresent)
.findFirst()
.map(SummaryJavadocCheck::getInlineTagNodeForAst);
}
/**
* Whether the {@code {@summary}} tag is defined first in the javadoc.
*
* @param inlineSummaryTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
* @return {@code true} if the {@code {@summary}} tag is defined first in the javadoc
*/
private static boolean isDefinedFirst(DetailNode inlineSummaryTag) {
boolean isDefinedFirst = true;
DetailNode currentAst = inlineSummaryTag;
while (currentAst != null && isDefinedFirst) {
switch (currentAst.getType()) {
case JavadocTokenTypes.TEXT:
isDefinedFirst = currentAst.getText().isBlank();
break;
case JavadocTokenTypes.HTML_ELEMENT:
isDefinedFirst = !isTextPresentInsideHtmlTag(currentAst);
break;
default:
break;
}
currentAst = JavadocUtil.getPreviousSibling(currentAst);
}
return isDefinedFirst;
}
/**
* Whether some text is present inside the HTML element or tag.
*
* @param node DetailNode of type {@link JavadocTokenTypes#HTML_TAG}
* or {@link JavadocTokenTypes#HTML_ELEMENT}
* @return {@code true} if some text is present inside the HTML element or tag
*/
public static boolean isTextPresentInsideHtmlTag(DetailNode node) {
DetailNode nestedChild = JavadocUtil.getFirstChild(node);
if (node.getType() == JavadocTokenTypes.HTML_ELEMENT) {
nestedChild = JavadocUtil.getFirstChild(nestedChild);
}
boolean isTextPresentInsideHtmlTag = false;
while (nestedChild != null && !isTextPresentInsideHtmlTag) {
switch (nestedChild.getType()) {
case JavadocTokenTypes.TEXT:
isTextPresentInsideHtmlTag = !nestedChild.getText().isBlank();
break;
case JavadocTokenTypes.HTML_TAG:
case JavadocTokenTypes.HTML_ELEMENT:
isTextPresentInsideHtmlTag = isTextPresentInsideHtmlTag(nestedChild);
break;
default:
break;
}
nestedChild = JavadocUtil.getNextSibling(nestedChild);
}
return isTextPresentInsideHtmlTag;
}
/**
* Checks if the inline tag node is present.
*
* @param ast ast node to check.
* @return true, if the inline tag node is present.
*/
private static boolean isInlineTagPresent(DetailNode ast) {
return getInlineTagNodeForAst(ast) != null;
}
/**
* Returns an inline javadoc tag node that is within a html tag.
*
* @param ast html tag node.
* @return inline summary javadoc tag node or null if no node is found.
*/
private static DetailNode getInlineTagNodeForAst(DetailNode ast) {
DetailNode node = ast;
DetailNode result = null;
// node can never be null as this method is called when there is a HTML_ELEMENT
if (node.getType() == JavadocTokenTypes.JAVADOC_INLINE_TAG) {
result = node;
}
else if (node.getType() == JavadocTokenTypes.HTML_TAG) {
// HTML_TAG always has more than 2 children.
node = node.getChildren()[1];
result = getInlineTagNodeForAst(node);
}
else if (node.getType() == JavadocTokenTypes.HTML_ELEMENT
// Condition for SINGLETON html element which cannot contain summary node
&& node.getChildren()[0].getChildren().length > 1) {
// Html elements have one tested tag before actual content inside it
node = node.getChildren()[0].getChildren()[1];
result = getInlineTagNodeForAst(node);
}
return result;
}
/**
* Checks if the javadoc inline tag is {@code {@summary}} tag.
*
* @param javadocInlineTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
* @return {@code true} if inline tag is summary tag.
*/
private static boolean isSummaryTag(DetailNode javadocInlineTag) {
return isInlineTagWithName(javadocInlineTag, SUMMARY_TEXT);
}
/**
* Checks if the first tag inside ast is {@code {@return}} tag.
*
* @param javadocInlineTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
* @return {@code true} if first tag is return tag.
*/
private static boolean isInlineReturnTag(DetailNode javadocInlineTag) {
return isInlineTagWithName(javadocInlineTag, RETURN_TEXT);
}
/**
* Checks if the first tag inside ast is a tag with the given name.
*
* @param javadocInlineTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
* @param name name of inline tag.
*
* @return {@code true} if first tag is a tag with the given name.
*/
private static boolean isInlineTagWithName(DetailNode javadocInlineTag, String name) {
final DetailNode[] child = javadocInlineTag.getChildren();
// Checking size of ast is not required, since ast contains
// children of Inline Tag, as at least 2 children will be present which are
// RCURLY and LCURLY.
return name.equals(child[1].getText());
}
/**
* Checks the inline summary (if present) for {@code period} at end and forbidden fragments.
*
* @param inlineSummaryTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
*/
private void validateSummaryTag(DetailNode inlineSummaryTag) {
final String inlineSummary = getContentOfInlineCustomTag(inlineSummaryTag);
final String summaryVisible = getVisibleContent(inlineSummary);
if (summaryVisible.isEmpty()) {
log(inlineSummaryTag.getLineNumber(), MSG_SUMMARY_JAVADOC_MISSING);
}
else if (!period.isEmpty()) {
final boolean isPeriodNotAtEnd =
summaryVisible.lastIndexOf(period) != summaryVisible.length() - 1;
if (isPeriodNotAtEnd) {
log(inlineSummaryTag.getLineNumber(), MSG_SUMMARY_MISSING_PERIOD);
}
else if (containsForbiddenFragment(inlineSummary)) {
log(inlineSummaryTag.getLineNumber(), MSG_SUMMARY_JAVADOC);
}
}
}
/**
* Checks the inline return for forbidden fragments.
*
* @param inlineReturnTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
*/
private void validateInlineReturnTag(DetailNode inlineReturnTag) {
final String inlineReturn = getContentOfInlineCustomTag(inlineReturnTag);
final String returnVisible = getVisibleContent(inlineReturn);
if (returnVisible.isEmpty()) {
log(inlineReturnTag.getLineNumber(), MSG_SUMMARY_JAVADOC_MISSING);
}
else if (containsForbiddenFragment(inlineReturn)) {
log(inlineReturnTag.getLineNumber(), MSG_SUMMARY_JAVADOC);
}
}
/**
* Gets the content of inline custom tag.
*
* @param inlineTag inline tag node.
* @return String consisting of the content of inline custom tag.
*/
public static String getContentOfInlineCustomTag(DetailNode inlineTag) {
final DetailNode[] childrenOfInlineTag = inlineTag.getChildren();
final StringBuilder customTagContent = new StringBuilder(256);
final int indexOfContentOfSummaryTag = 3;
if (childrenOfInlineTag.length != indexOfContentOfSummaryTag) {
DetailNode currentNode = childrenOfInlineTag[indexOfContentOfSummaryTag];
while (currentNode.getType() != JavadocTokenTypes.JAVADOC_INLINE_TAG_END) {
extractInlineTagContent(currentNode, customTagContent);
currentNode = JavadocUtil.getNextSibling(currentNode);
}
}
return customTagContent.toString();
}
/**
* Extracts the content of inline custom tag recursively.
*
* @param node DetailNode
* @param customTagContent content of custom tag
*/
private static void extractInlineTagContent(DetailNode node,
StringBuilder customTagContent) {
final DetailNode[] children = node.getChildren();
if (children.length == 0) {
customTagContent.append(node.getText());
}
else {
for (DetailNode child : children) {
if (child.getType() != JavadocTokenTypes.LEADING_ASTERISK) {
extractInlineTagContent(child, customTagContent);
}
}
}
}
/**
* Gets the string that is visible to user in javadoc.
*
* @param summary entire content of summary javadoc.
* @return string that is visible to user in javadoc.
*/
private static String getVisibleContent(String summary) {
final String visibleSummary = HTML_ELEMENTS.matcher(summary).replaceAll("");
return visibleSummary.trim();
}
/**
* Tests if first sentence contains forbidden summary fragment.
*
* @param firstSentence string with first sentence.
* @return {@code true} if first sentence contains forbidden summary fragment.
*/
private boolean containsForbiddenFragment(String firstSentence) {
final String javadocText = JAVADOC_MULTILINE_TO_SINGLELINE_PATTERN
.matcher(firstSentence).replaceAll(" ");
return forbiddenSummaryFragments.matcher(trimExcessWhitespaces(javadocText)).find();
}
/**
* Trims the given {@code text} of duplicate whitespaces.
*
* @param text the text to transform.
* @return the finalized form of the text.
*/
private static String trimExcessWhitespaces(String text) {
final StringBuilder result = new StringBuilder(256);
boolean previousWhitespace = true;
for (char letter : text.toCharArray()) {
final char print;
if (Character.isWhitespace(letter)) {
if (previousWhitespace) {
continue;
}
previousWhitespace = true;
print = ' ';
}
else {
previousWhitespace = false;
print = letter;
}
result.append(print);
}
return result.toString();
}
/**
* Checks if the node starts with an {@inheritDoc}.
*
* @param root the root node to examine.
* @return {@code true} if the javadoc starts with an {@inheritDoc}.
*/
private static boolean startsWithInheritDoc(DetailNode root) {
boolean found = false;
for (DetailNode child : root.getChildren()) {
if (child.getType() == JavadocTokenTypes.JAVADOC_INLINE_TAG
&& child.getChildren()[1].getType() == JavadocTokenTypes.INHERIT_DOC_LITERAL) {
found = true;
}
if ((child.getType() == JavadocTokenTypes.TEXT
|| child.getType() == JavadocTokenTypes.HTML_ELEMENT)
&& !CommonUtil.isBlank(child.getText())) {
break;
}
}
return found;
}
/**
* Finds and returns summary sentence.
*
* @param ast javadoc root node.
* @return violation string.
*/
private static String getSummarySentence(DetailNode ast) {
final StringBuilder result = new StringBuilder(256);
for (DetailNode child : ast.getChildren()) {
if (child.getType() != JavadocTokenTypes.EOF
&& ALLOWED_TYPES.get(child.getType())) {
result.append(child.getText());
}
else {
final String summary = result.toString();
if (child.getType() == JavadocTokenTypes.HTML_ELEMENT
&& CommonUtil.isBlank(summary)) {
result.append(getStringInsideTag(summary,
child.getChildren()[0].getChildren()[0]));
}
}
}
return result.toString().trim();
}
/**
* Get concatenated string within text of html tags.
*
* @param result javadoc string
* @param detailNode javadoc tag node
* @return java doc tag content appended in result
*/
private static String getStringInsideTag(String result, DetailNode detailNode) {
final StringBuilder contents = new StringBuilder(result);
DetailNode tempNode = detailNode;
while (tempNode != null) {
if (tempNode.getType() == JavadocTokenTypes.TEXT) {
contents.append(tempNode.getText());
}
tempNode = JavadocUtil.getNextSibling(tempNode);
}
return contents.toString();
}
/**
* Finds and returns the first sentence.
*
* @param ast The Javadoc root node.
* @param period The configured period symbol.
* @return The first sentence up to and excluding the period, or null if no ending was found.
*/
@Nullable
private static String getFirstSentenceOrNull(DetailNode ast, String period) {
final List<String> sentenceParts = new ArrayList<>();
String sentence = null;
for (String text : (Iterable<String>) streamTextParts(ast)::iterator) {
final String sentenceEnding = findSentenceEndingOrNull(text, period);
if (sentenceEnding != null) {
sentenceParts.add(sentenceEnding);
sentence = String.join("", sentenceParts);
break;
}
else {
sentenceParts.add(text);
}
}
return sentence;
}
/**
* Streams through all the text under the given node.
*
* @param node The Javadoc node to examine.
* @return All the text in all nodes that have no child nodes.
*/
private static Stream<String> streamTextParts(DetailNode node) {
final Stream<String> stream;
if (node.getChildren().length == 0) {
stream = Stream.of(node.getText());
}
else {
stream = Stream.of(node.getChildren())
.flatMap(SummaryJavadocCheck::streamTextParts);
}
return stream;
}
/**
* Finds the end of a sentence. If a sentence ending period was found, returns the whole string
* up to and excluding that period. The end of sentence detection here could be replaced in the
* future by Java's built-in BreakIterator class.
*
* @param text The string to search.
* @param period The period character to find.
* @return The string up to and excluding the period, or null if no ending was found.
*/
@Nullable
private static String findSentenceEndingOrNull(String text, String period) {
int periodIndex = text.indexOf(period);
String sentenceEnding = null;
while (periodIndex >= 0) {
final int afterPeriodIndex = periodIndex + period.length();
// Handle western period separately as it is only the end of a sentence if followed
// by whitespace. Other period characters often include whitespace in the character.
if (!DEFAULT_PERIOD.equals(period)
|| afterPeriodIndex >= text.length()
|| Character.isWhitespace(text.charAt(afterPeriodIndex))) {
sentenceEnding = text.substring(0, periodIndex);
break;
}
else {
periodIndex = text.indexOf(period, afterPeriodIndex);
}
}
return sentenceEnding;
}
}