Regex.java
package org.jsoup.helper;
import org.jsoup.internal.SharedConstants;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
A regular expression abstraction. Allows jsoup to optionally use the re2j regular expression engine (linear time)
instead of the JDK's backtracking regex implementation.
<p>If the {@code com.google.re2j} library is found on the classpath, by default it will be used. You can override this
by setting {@code -Djsoup.useRe2j=false} to explicitly disable, and use the JDK regex engine.</p>
<p>(Currently this a simplified implementation for jsoup's specific use; can extend as required.)</p>
*/
public class Regex {
private static final boolean hasRe2j = hasRe2j();
private final Pattern jdkPattern;
Regex(Pattern jdkPattern) {
this.jdkPattern = jdkPattern;
}
/**
Compile a regex, using re2j if enabled and available; otherwise JDK regex.
@param regex the regex to compile
@return the compiled regex
@throws ValidationException if the regex is invalid
*/
public static Regex compile(String regex) {
if (usingRe2j()) {
return Re2jRegex.compile(regex);
}
try {
return new Regex(Pattern.compile(regex));
} catch (PatternSyntaxException e) {
throw new ValidationException("Pattern syntax error: " + e.getMessage());
}
}
/** Wraps an existing JDK Pattern (for API compat); doesn't switch */
public static Regex fromPattern(Pattern pattern) {
return new Regex(pattern);
}
/**
Checks if re2j is available (on classpath) and enabled (via system property).
@return true if re2j is available and enabled
*/
public static boolean usingRe2j() {
return hasRe2j && wantsRe2j();
}
static boolean wantsRe2j() {
return Boolean.parseBoolean(System.getProperty(SharedConstants.UseRe2j, "true"));
}
static void wantsRe2j(boolean use) {
System.setProperty(SharedConstants.UseRe2j, Boolean.toString(use));
}
static boolean hasRe2j() {
try {
Class<?> re2 = Class.forName("com.google.re2j.Pattern", false, Regex.class.getClassLoader()); // check if re2j is in classpath
try {
// if it is, and we are on JVM9+, we need to dork around with modules, because re2j doesn't publish a module name.
// done via reflection so we can still run on JVM 8.
// todo remove if re2j publishes as a module
Class<?> moduleCls = Class.forName("java.lang.Module");
Method getModule = Class.class.getMethod("getModule");
Object jsoupMod = getModule.invoke(Regex.class);
Object re2Mod = getModule.invoke(re2);
boolean reads = (boolean) moduleCls.getMethod("canRead", moduleCls).invoke(jsoupMod, re2Mod);
if (!reads) moduleCls.getMethod("addReads", moduleCls).invoke(jsoupMod, re2Mod);
} catch (ClassNotFoundException ignore) {
// jvm8 - no Module class; so we can use as-is
}
return true;
} catch (ClassNotFoundException e) {
return false; // no re2j
} catch (ReflectiveOperationException e) {
// unexpectedly couldn���t wire modules on 9+; return false to avoid IllegalAccessError later
System.err.println("Warning: (bug? please report) couldn't access re2j from jsoup due to modules: " + e);
return false;
}
}
public Matcher matcher(CharSequence input) {
return new JdkMatcher(jdkPattern.matcher(input));
}
@Override
public String toString() {
return jdkPattern.toString();
}
public interface Matcher {
boolean find();
}
private static final class JdkMatcher implements Matcher {
private final java.util.regex.Matcher delegate;
JdkMatcher(java.util.regex.Matcher delegate) {
this.delegate = delegate;
}
@Override
public boolean find() {
return delegate.find();
}
}
}