RegexpCheck.java
///////////////////////////////////////////////////////////////////////////////////////////////
// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
// Copyright (C) 2001-2024 the original author or authors.
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////////////////////////////////////////////////////////////////////
package com.puppycrawl.tools.checkstyle.checks.regexp;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
import com.puppycrawl.tools.checkstyle.api.DetailAST;
import com.puppycrawl.tools.checkstyle.api.FileContents;
import com.puppycrawl.tools.checkstyle.api.FileText;
import com.puppycrawl.tools.checkstyle.api.LineColumn;
import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
/**
* <p>
* Checks that a specified pattern exists, exists less than
* a set number of times, or does not exist in the file.
* </p>
* <p>
* This check combines all the functionality provided by
* <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
* except supplying the regular expression from a file.
* </p>
* <p>
* It differs from them in that it works in multiline mode. Its regular expression
* can span multiple lines and it checks this against the whole file at once.
* The others work in single-line mode. Their single or multiple regular expressions
* can only span one line. They check each of these against each line in the file in turn.
* </p>
* <p>
* <b>Note:</b> Because of the different mode of operation there may be some
* changes in the regular expressions used to achieve a particular end.
* </p>
* <p>
* In multiline mode...
* </p>
* <ul>
* <li>
* {@code ^} means the beginning of a line, as opposed to beginning of the input.
* </li>
* <li>
* For beginning of the input use {@code \A}.
* </li>
* <li>
* {@code $} means the end of a line, as opposed to the end of the input.
* </li>
* <li>
* For end of input use {@code \Z}.
* </li>
* <li>
* Each line in the file is terminated with a line feed character.
* </li>
* </ul>
* <p>
* <b>Note:</b> Not all regular expression engines are created equal.
* Some provide extra functions that others do not and some elements
* of the syntax may vary. This check makes use of the
* <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
* java.util.regex package</a>; please check its documentation for details
* of how to construct a regular expression to achieve a particular goal.
* </p>
* <p>
* <b>Note:</b> When entering a regular expression as a parameter in
* the XML config file you must also take into account the XML rules. e.g.
* if you want to match a < symbol you need to enter &lt;.
* The regular expression should be entered on one line.
* </p>
* <ul>
* <li>
* Property {@code duplicateLimit} - Control whether to check for duplicates
* of a required pattern, any negative value means no checking for duplicates,
* any positive value is used as the maximum number of allowed duplicates,
* if the limit is exceeded violations will be logged.
* Type is {@code int}.
* Default value is {@code 0}.
* </li>
* <li>
* Property {@code errorLimit} - Specify the maximum number of violations before
* the check will abort.
* Type is {@code int}.
* Default value is {@code 100}.
* </li>
* <li>
* Property {@code format} - Specify the pattern to match against.
* Type is {@code java.util.regex.Pattern}.
* Default value is {@code "^$"}.
* </li>
* <li>
* Property {@code ignoreComments} - Control whether to ignore matches found within comments.
* Type is {@code boolean}.
* Default value is {@code false}.
* </li>
* <li>
* Property {@code illegalPattern} - Control whether the pattern is required or illegal.
* Type is {@code boolean}.
* Default value is {@code false}.
* </li>
* <li>
* Property {@code message} - Specify message which is used to notify about
* violations, if empty then the default (hard-coded) message is used.
* Type is {@code java.lang.String}.
* Default value is {@code null}.
* </li>
* </ul>
* <p>
* Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
* </p>
* <p>
* Violation Message Keys:
* </p>
* <ul>
* <li>
* {@code duplicate.regexp}
* </li>
* <li>
* {@code illegal.regexp}
* </li>
* <li>
* {@code required.regexp}
* </li>
* </ul>
*
* @since 4.0
*/
@FileStatefulCheck
public class RegexpCheck extends AbstractCheck {
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_REQUIRED_REGEXP = "required.regexp";
/**
* A key is pointing to the warning message text in "messages.properties"
* file.
*/
public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
/** Default duplicate limit. */
private static final int DEFAULT_DUPLICATE_LIMIT = -1;
/** Default error report limit. */
private static final int DEFAULT_ERROR_LIMIT = 100;
/** Error count exceeded message. */
private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
"The error limit has been exceeded, "
+ "the check is aborting, there may be more unreported errors.";
/**
* Specify message which is used to notify about violations,
* if empty then the default (hard-coded) message is used.
*/
private String message;
/** Control whether to ignore matches found within comments. */
private boolean ignoreComments;
/** Control whether the pattern is required or illegal. */
private boolean illegalPattern;
/** Specify the maximum number of violations before the check will abort. */
private int errorLimit = DEFAULT_ERROR_LIMIT;
/**
* Control whether to check for duplicates of a required pattern,
* any negative value means no checking for duplicates,
* any positive value is used as the maximum number of allowed duplicates,
* if the limit is exceeded violations will be logged.
*/
private int duplicateLimit;
/** Boolean to say if we should check for duplicates. */
private boolean checkForDuplicates;
/** Tracks number of matches made. */
private int matchCount;
/** Tracks number of errors. */
private int errorCount;
/** Specify the pattern to match against. */
private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
/** The matcher. */
private Matcher matcher;
/**
* Setter to specify message which is used to notify about violations,
* if empty then the default (hard-coded) message is used.
*
* @param message custom message which should be used in report.
* @since 4.0
*/
public void setMessage(String message) {
this.message = message;
}
/**
* Setter to control whether to ignore matches found within comments.
*
* @param ignoreComments True if comments should be ignored.
* @since 4.0
*/
public void setIgnoreComments(boolean ignoreComments) {
this.ignoreComments = ignoreComments;
}
/**
* Setter to control whether the pattern is required or illegal.
*
* @param illegalPattern True if pattern is not allowed.
* @since 4.0
*/
public void setIllegalPattern(boolean illegalPattern) {
this.illegalPattern = illegalPattern;
}
/**
* Setter to specify the maximum number of violations before the check will abort.
*
* @param errorLimit the number of errors to report.
* @since 4.0
*/
public void setErrorLimit(int errorLimit) {
this.errorLimit = errorLimit;
}
/**
* Setter to control whether to check for duplicates of a required pattern,
* any negative value means no checking for duplicates,
* any positive value is used as the maximum number of allowed duplicates,
* if the limit is exceeded violations will be logged.
*
* @param duplicateLimit negative values mean no duplicate checking,
* any positive value is used as the limit.
* @since 4.0
*/
public void setDuplicateLimit(int duplicateLimit) {
this.duplicateLimit = duplicateLimit;
checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
}
/**
* Setter to specify the pattern to match against.
*
* @param pattern the new pattern
* @since 4.0
*/
public final void setFormat(Pattern pattern) {
format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
}
@Override
public int[] getDefaultTokens() {
return getRequiredTokens();
}
@Override
public int[] getAcceptableTokens() {
return getRequiredTokens();
}
@Override
public int[] getRequiredTokens() {
return CommonUtil.EMPTY_INT_ARRAY;
}
// suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
@SuppressWarnings("deprecation")
@Override
public void beginTree(DetailAST rootAST) {
matcher = format.matcher(getFileContents().getText().getFullText());
matchCount = 0;
errorCount = 0;
findMatch();
}
/**
* Recursive method that finds the matches.
*
* @noinspection TailRecursion
* @noinspectionreason TailRecursion - until issue #14814
*/
// suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
@SuppressWarnings("deprecation")
private void findMatch() {
final boolean foundMatch = matcher.find();
if (foundMatch) {
final FileText text = getFileContents().getText();
final LineColumn start = text.lineColumn(matcher.start());
final int startLine = start.getLine();
final boolean ignore = isIgnore(startLine, text, start);
if (!ignore) {
matchCount++;
if (illegalPattern || checkForDuplicates
&& matchCount - 1 > duplicateLimit) {
errorCount++;
logMessage(startLine);
}
}
if (canContinueValidation(ignore)) {
findMatch();
}
}
else if (!illegalPattern && matchCount == 0) {
final String msg = getMessage();
log(1, MSG_REQUIRED_REGEXP, msg);
}
}
/**
* Check if we can stop validation.
*
* @param ignore flag
* @return true is we can continue
*/
private boolean canContinueValidation(boolean ignore) {
return errorCount <= errorLimit - 1
&& (ignore || illegalPattern || checkForDuplicates);
}
/**
* Detect ignore situation.
*
* @param startLine position of line
* @param text file text
* @param start line column
* @return true is that need to be ignored
*/
// suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
@SuppressWarnings("deprecation")
private boolean isIgnore(int startLine, FileText text, LineColumn start) {
final LineColumn end;
if (matcher.end() == 0) {
end = text.lineColumn(0);
}
else {
end = text.lineColumn(matcher.end() - 1);
}
boolean ignore = false;
if (ignoreComments) {
final FileContents theFileContents = getFileContents();
final int startColumn = start.getColumn();
final int endLine = end.getLine();
final int endColumn = end.getColumn();
ignore = theFileContents.hasIntersectionWithComment(startLine,
startColumn, endLine, endColumn);
}
return ignore;
}
/**
* Displays the right message.
*
* @param lineNumber the line number the message relates to.
*/
private void logMessage(int lineNumber) {
final String msg = getMessage();
if (illegalPattern) {
log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
}
else {
log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
}
}
/**
* Provide right message.
*
* @return message for violation.
*/
private String getMessage() {
String msg;
if (message == null || message.isEmpty()) {
msg = format.pattern();
}
else {
msg = message;
}
if (errorCount >= errorLimit) {
msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
}
return msg;
}
}