SparqlShrinker.java
/*******************************************************************************
* Copyright (c) 2025 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
package org.eclipse.rdf4j.queryrender;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* SPARQL query shrinker / delta debugger (Java 11, no dependencies).
*
* Design: - Phase A: Greedy, structure-aware reducers (OPTIONAL/UNION/FILTER/BIND/VALUES/ORDER BY/etc.). Each reducer
* proposes safe, syntactically-plausible deletions or flattenings. If the FailureOracle still reports failure (and
* ValidityOracle OK if provided), accept and repeat. - Phase B: Token-level ddmin (Zeller) over the remaining token
* list for extra minimization.
*
* You control "what is a failure?" with FailureOracle (e.g., "assertRoundTrip fails"). Optionally enforce "query must
* remain valid" with ValidityOracle (e.g., a reference parser).
*/
public final class SparqlShrinker {
private SparqlShrinker() {
}
// ===========================
// Oracles & Config
// ===========================
/** Return true iff the query still exhibits the bug (e.g., parser throws, or round-trip mismatch). */
@FunctionalInterface
public interface FailureOracle {
boolean fails(String query);
}
/** Return true iff the query is valid enough to consider (optional). */
@FunctionalInterface
public interface ValidityOracle {
boolean isValid(String query);
}
/** Shrinker configuration. */
public static final class Config {
/** Max passes of greedy reductions before ddmin. */
public final int maxGreedyIterations = 30;
/** Enable token-level ddmin after greedy reductions. */
public final boolean enableDdmin = true;
/** Enforce validity using validityOracle when set. */
public boolean enforceValidity = false;
/** Hard cap on total candidate evaluations (guards endless oracles). */
public final int maxChecks = 10_000;
/** Insert spaces around operators when rejoining tokens (safer for validity). */
public final boolean spaceyJoin = true;
/** When removing UNION branches, try removing RIGHT first (often shrinks faster). */
public final boolean unionPreferRight = true;
/** When removing VALUES rows, target batch factor (n, then n*2...) for bisection-like shrink. */
public final int valuesBatchStart = 8;
public Config enforceValidity(ValidityOracle v) {
this.enforceValidity = (v != null);
return this;
}
}
/** Shrink result. */
public static final class Result {
public final String minimized;
public final int attempts;
public final int accepted;
public final List<String> log;
Result(String minimized, int attempts, int accepted, List<String> log) {
this.minimized = minimized;
this.attempts = attempts;
this.accepted = accepted;
this.log = Collections.unmodifiableList(new ArrayList<>(log));
}
@Override
public String toString() {
return "SparqlShrinker.Result{len=" + minimized.length() +
", attempts=" + attempts + ", accepted=" + accepted +
", steps=" + log.size() + "}";
}
}
// ===========================
// Public API
// ===========================
/** Shrink a failing SPARQL query to a smaller counterexample. Validity oracle is optional. */
public static Result shrink(String original,
FailureOracle failureOracle,
ValidityOracle validityOracle,
Config cfg) throws Exception {
Objects.requireNonNull(original, "original");
Objects.requireNonNull(failureOracle, "failureOracle");
if (cfg == null) {
cfg = new Config();
}
// Initial check: if it doesn't fail, nothing to do.
Guard g = new Guard(failureOracle, validityOracle, cfg);
if (!g.fails(original)) {
return new Result(original, g.attempts, g.accepted,
Collections.singletonList("Original did not fail; no shrink."));
}
String q = original;
List<String> log = new ArrayList<>();
// Phase A: Greedy structure-aware reductions until fixpoint or limits reached
boolean progress;
int greedyRounds = 0;
do {
progress = false;
greedyRounds++;
// 1) Remove ORDER BY, LIMIT, OFFSET, DISTINCT/REDUCED
String r1 = removeOrderByLimitOffsetDistinct(q, g, log);
if (!r1.equals(q)) {
q = r1;
progress = true;
continue;
}
// 2) Remove dataset clauses (FROM / FROM NAMED)
String r2 = removeDatasetClauses(q, g, log);
if (!r2.equals(q)) {
q = r2;
progress = true;
continue;
}
// 3) Flatten SERVICE and GRAPH blocks (strip wrappers)
String r3 = flattenServiceGraph(q, g, log);
if (!r3.equals(q)) {
q = r3;
progress = true;
continue;
}
// 4) Remove FILTERs (whole) and then simplify EXISTS/NOT EXISTS (flatten inner group)
String r4 = removeOrSimplifyFilters(q, g, log);
if (!r4.equals(q)) {
q = r4;
progress = true;
continue;
}
// 5) Remove BIND clauses
String r5 = removeBindClauses(q, g, log);
if (!r5.equals(q)) {
q = r5;
progress = true;
continue;
}
// 6) VALUES shrink: reduce rows, or remove entirely
String r6 = shrinkValues(q, g, cfg, log);
if (!r6.equals(q)) {
q = r6;
progress = true;
continue;
}
// 7) UNION branch removal (keep left-only or right-only)
String r7 = shrinkUnionBranches(q, g, cfg.unionPreferRight, log);
if (!r7.equals(q)) {
q = r7;
progress = true;
continue;
}
// 8) OPTIONAL removal / flatten
String r8 = shrinkOptionalBlocks(q, g, log);
if (!r8.equals(q)) {
q = r8;
progress = true;
continue;
}
// 9) GROUP BY / HAVING removal
String r9 = removeGroupByHaving(q, g, log);
if (!r9.equals(q)) {
q = r9;
progress = true;
continue;
}
// 10) SELECT projection simplification (to SELECT *), keep query form
String r10 = simplifySelectProjection(q, g, log);
if (!r10.equals(q)) {
q = r10;
progress = true;
continue;
}
// 11) CONSTRUCT template shrinking (drop extra template triples)
String r11 = shrinkConstructTemplate(q, g, log);
if (!r11.equals(q)) {
q = r11;
progress = true;
continue;
}
// 12) Trim extra triples/statements inside WHERE: drop dot-separated statements one by one
String r12 = dropWhereStatements(q, g, log);
if (!r12.equals(q)) {
q = r12;
progress = true;
}
} while (progress && greedyRounds < cfg.maxGreedyIterations && g.withinBudget());
// Phase B: ddmin over tokens
if (cfg.enableDdmin && g.withinBudget()) {
String dd = ddminTokens(q, g, cfg.spaceyJoin, log);
q = dd;
}
return new Result(q, g.attempts, g.accepted, log);
}
public static Result shrink(String original, FailureOracle failureOracle) throws Exception {
return shrink(original, failureOracle, null, new Config());
}
// ===========================
// Greedy reductions (structure-aware)
// ===========================
private static String removeOrderByLimitOffsetDistinct(String q, Guard g, List<String> log) throws Exception {
String qq = q;
// DISTINCT / REDUCED (keep SELECT form)
String qq1 = replaceIf(q, "(?i)\\bSELECT\\s+DISTINCT\\b", "SELECT ");
if (!qq1.equals(q) && g.accept(qq1)) {
log.add("Removed DISTINCT");
q = qq1;
}
qq1 = replaceIf(q, "(?i)\\bSELECT\\s+REDUCED\\b", "SELECT ");
if (!qq1.equals(q) && g.accept(qq1)) {
log.add("Removed REDUCED");
q = qq1;
}
// LIMIT / OFFSET (standalone or with ORDER BY)
while (true) {
String next = stripTailClause(q, "(?i)\\bLIMIT\\s+\\d+");
if (!next.equals(q) && g.accept(next)) {
log.add("Removed LIMIT");
q = next;
continue;
}
next = stripTailClause(q, "(?i)\\bOFFSET\\s+\\d+");
if (!next.equals(q) && g.accept(next)) {
log.add("Removed OFFSET");
q = next;
continue;
}
break;
}
// ORDER BY: from "ORDER BY" to before LIMIT/OFFSET or end
int idx = indexOfKeyword(q, "ORDER", "BY");
if (idx >= 0) {
int end = endOfOrderBy(q, idx);
String cand = q.substring(0, idx) + q.substring(end);
if (g.accept(cand)) {
log.add("Removed ORDER BY");
q = cand;
} else {
// If whole removal fails, try reducing to just first key
String reduced = keepFirstOrderKey(q, idx, end);
if (!reduced.equals(q) && g.accept(reduced)) {
log.add("Reduced ORDER BY to one key");
q = reduced;
}
}
}
return q.equals(qq) ? qq : q;
}
private static String removeDatasetClauses(String q, Guard g, List<String> log) throws Exception {
String out = q;
// Remove standalone lines of FROM / FROM NAMED with an IRI.
// Do repeated passes as long as we can delete one.
while (true) {
int idx = indexOfRegex(out, "(?i)\\bFROM\\s+(?:NAMED\\s+)?<[^>]+>");
if (idx < 0) {
break;
}
int end = endOfLineOrClause(out, idx);
String cand = out.substring(0, idx) + out.substring(end);
if (g.accept(cand)) {
log.add("Removed FROM/FROM NAMED");
out = cand;
} else {
break;
}
}
return out;
}
private static String flattenServiceGraph(String q, Guard g, List<String> log) throws Exception {
// Flatten SERVICE and GRAPH blocks: SERVICE [SILENT]? (IRI|?var) { P } -> P
String out = q;
while (true) {
Match svc = findServiceLike(out);
if (svc == null) {
break;
}
String cand = out.substring(0, svc.start) + svc.inner + out.substring(svc.end);
if (g.accept(cand)) {
log.add("Flattened " + svc.kind + " block");
out = cand;
} else {
break; // stop trying this pattern
}
}
return out;
}
private static String removeOrSimplifyFilters(String q, Guard g, List<String> log) throws Exception {
String out = q;
while (true) {
Match f = findFilter(out);
if (f == null) {
break;
}
// Try removing entire FILTER
String cand = out.substring(0, f.start) + out.substring(f.end);
if (g.accept(cand)) {
log.add("Removed FILTER");
out = cand;
continue;
}
// If it's FILTER EXISTS { P } or FILTER NOT EXISTS { P }, try keeping just inner P
if (f.inner != null && !f.inner.isEmpty()) {
String cand2 = out.substring(0, f.start) + f.inner + out.substring(f.end);
if (g.accept(cand2)) {
log.add("Flattened FILTER EXISTS/NOT EXISTS");
out = cand2;
continue;
}
}
break;
}
return out;
}
private static String removeBindClauses(String q, Guard g, List<String> log) throws Exception {
String out = q;
while (true) {
Match b = findBind(out);
if (b == null) {
break;
}
String cand = out.substring(0, b.start) + out.substring(b.end);
if (g.accept(cand)) {
log.add("Removed BIND");
out = cand;
continue;
}
break;
}
return out;
}
private static String shrinkValues(String q, Guard g, Config cfg, List<String> log) throws Exception {
String out = q;
while (true) {
ValuesBlock vb = findValues(out);
if (vb == null) {
break;
}
// Strategy: try removing entire VALUES; if not acceptable, reduce rows by halving batches.
String remove = out.substring(0, vb.start) + out.substring(vb.end);
if (g.accept(remove)) {
log.add("Removed VALUES block");
out = remove;
continue;
}
if (vb.rows.size() <= 1) {
break; // can't shrink rows further
}
int n = Math.max(cfg.valuesBatchStart, 2);
List<List<String>> rows = new ArrayList<>(vb.rows);
boolean did = false;
while (rows.size() >= 2) {
int chunk = Math.min(n, rows.size() / 2 + (rows.size() % 2));
// build candidate with first chunk only
List<List<String>> kept = rows.subList(0, chunk);
String cand = out.substring(0, vb.start) +
vb.renderWithRows(kept) +
out.substring(vb.end);
if (g.accept(cand)) {
log.add("Reduced VALUES rows: " + rows.size() + " ��� " + kept.size());
out = cand;
did = true;
break;
} else {
n = Math.min(rows.size(), n * 2);
}
}
if (!did) {
break;
}
}
return out;
}
private static String shrinkUnionBranches(String q, Guard g, boolean preferRight, List<String> log)
throws Exception {
String out = q;
while (true) {
UnionMatch u = findUnion(out);
if (u == null) {
break;
}
// Try keeping left only (remove UNION + right)
String keepLeft = out.substring(0, u.unionIdx) + out.substring(u.rightEnd + 1);
// Try keeping right only (remove left + UNION)
String keepRight = out.substring(0, u.leftStart) + out.substring(u.unionIdx + u.unionLen);
if (preferRight) {
if (g.accept(keepRight)) {
log.add("Removed UNION left-branch");
out = keepRight;
continue;
}
if (g.accept(keepLeft)) {
log.add("Removed UNION right-branch");
out = keepLeft;
continue;
}
} else {
if (g.accept(keepLeft)) {
log.add("Removed UNION right-branch");
out = keepLeft;
continue;
}
if (g.accept(keepRight)) {
log.add("Removed UNION left-branch");
out = keepRight;
continue;
}
}
break;
}
return out;
}
private static String shrinkOptionalBlocks(String q, Guard g, List<String> log) throws Exception {
String out = q;
while (true) {
Match m = findKeywordBlock(out, "OPTIONAL");
if (m == null) {
break;
}
// Option A: remove entire OPTIONAL { ... }
String remove = out.substring(0, m.start) + out.substring(m.end);
if (g.accept(remove)) {
log.add("Removed OPTIONAL block");
out = remove;
continue;
}
// Option B: flatten OPTIONAL { P } -> P
String flat = out.substring(0, m.start) + m.inner + out.substring(m.end);
if (g.accept(flat)) {
log.add("Flattened OPTIONAL block");
out = flat;
continue;
}
break;
}
return out;
}
private static String removeGroupByHaving(String q, Guard g, List<String> log) throws Exception {
String out = q;
// HAVING: from HAVING ( ... ) possibly multiple, remove whole clause
int hIdx = indexOfKeyword(out, "HAVING");
if (hIdx >= 0) {
int hend = endOfHaving(out, hIdx);
String cand = out.substring(0, hIdx) + out.substring(hend);
if (g.accept(cand)) {
log.add("Removed HAVING");
out = cand;
}
}
// GROUP BY: remove entire clause
int gIdx = indexOfKeyword(out, "GROUP", "BY");
if (gIdx >= 0) {
int gend = endOfGroupBy(out, gIdx);
String cand = out.substring(0, gIdx) + out.substring(gend);
if (g.accept(cand)) {
log.add("Removed GROUP BY");
out = cand;
}
}
return out;
}
private static String simplifySelectProjection(String q, Guard g, List<String> log) throws Exception {
// Try converting SELECT ... WHERE to SELECT * WHERE (preserve DISTINCT/REDUCED already removed earlier)
int sIdx = indexOfKeyword(q, "SELECT");
int wIdx = indexOfKeyword(q, "WHERE");
if (sIdx >= 0 && wIdx > sIdx) {
String head = q.substring(0, sIdx);
String between = q.substring(sIdx, wIdx);
String tail = q.substring(wIdx);
// If already SELECT *, nothing to do
if (between.matches("(?s).*\\b\\*\\b.*")) {
return q;
}
String selStar = between.replaceAll("(?is)SELECT\\s+.+", "SELECT * ");
String cand = head + selStar + tail;
if (g.accept(cand)) {
log.add("Simplified projection to SELECT *");
return cand;
}
}
return q;
}
private static String shrinkConstructTemplate(String q, Guard g, List<String> log) throws Exception {
// For explicit CONSTRUCT { template } WHERE { ... } ��� drop extra template triples.
// Strategy: inside the first top-level template block after CONSTRUCT, split by '.' and drop trailing parts.
int cIdx = indexOfKeyword(q, "CONSTRUCT");
if (cIdx < 0) {
return q;
}
int tplOpen = nextChar(q, '{', cIdx);
if (tplOpen < 0) {
return q;
}
int tplClose = matchBrace(q, tplOpen);
if (tplClose < 0) {
return q;
}
String templateBody = q.substring(tplOpen + 1, tplClose);
List<int[]> dotSegs = splitByDot(templateBody);
// Try removing segments from the end
for (int i = dotSegs.size() - 1; i >= 1; i--) { // keep at least one segment
int[] seg = dotSegs.get(i);
String newBody = templateBody.substring(0, seg[0]).trim();
if (!newBody.endsWith(".")) {
newBody = newBody + " .";
}
String cand = q.substring(0, tplOpen + 1) + "\n" + newBody + "\n" + q.substring(tplClose);
if (g.accept(cand)) {
log.add("Reduced CONSTRUCT template triples");
return cand;
}
}
return q;
}
private static String dropWhereStatements(String q, Guard g, List<String> log) throws Exception {
// Find first WHERE { ... } and drop dot-separated top-level statements
int wIdx = indexOfKeyword(q, "WHERE");
if (wIdx < 0) {
return q;
}
int open = nextChar(q, '{', wIdx);
if (open < 0) {
return q;
}
int close = matchBrace(q, open);
if (close < 0) {
return q;
}
String body = q.substring(open + 1, close);
List<int[]> segs = splitByDot(body);
if (segs.size() <= 1) {
return q;
}
for (int i = segs.size() - 1; i >= 0; i--) {
int[] seg = segs.get(i);
String newBody = (body.substring(0, seg[0]) + body.substring(seg[1])).trim();
if (!newBody.endsWith(".")) {
newBody = newBody + " .";
}
String cand = q.substring(0, open + 1) + "\n" + newBody + "\n" + q.substring(close);
if (g.accept(cand)) {
log.add("Dropped WHERE statement segment");
return cand;
}
}
return q;
}
// ===========================
// Token-level ddmin
// ===========================
private static String ddminTokens(String q, Guard g, boolean spaceyJoin, List<String> log) throws Exception {
List<Token> toks = Tokenizer.lex(q);
if (toks.isEmpty()) {
return q;
}
// ddmin over tokens
List<Token> minimized = ddmin(toks, cand -> {
try {
return g.accept(Tokenizer.join(cand, spaceyJoin));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
String res = Tokenizer.join(minimized, spaceyJoin);
if (!res.equals(q)) {
log.add("ddmin reduced tokens: " + toks.size() + " ��� " + minimized.size());
}
return res;
}
private static <T> List<T> ddmin(List<T> items, Predicate<List<T>> test) {
// Classic ddmin (Andreas Zeller)
List<T> c = new ArrayList<>(items);
int n = 2;
while (c.size() >= 2) {
boolean reduced = false;
int chunkSize = (int) Math.ceil(c.size() / (double) n);
for (int i = 0; i < c.size(); i += chunkSize) {
int to = Math.min(c.size(), i + chunkSize);
List<T> subset = c.subList(i, to);
List<T> complement = new ArrayList<>(c.size() - subset.size());
if (i > 0) {
complement.addAll(c.subList(0, i));
}
if (to < c.size()) {
complement.addAll(c.subList(to, c.size()));
}
if (test.test(complement)) {
c = complement;
n = Math.max(2, n - 1);
reduced = true;
break;
}
}
if (!reduced) {
if (n >= c.size()) {
break;
}
n = Math.min(c.size(), n * 2);
}
}
return c;
}
// ===========================
// Low-level helpers & scanning
// ===========================
private static final class Guard {
final FailureOracle failure;
final ValidityOracle validity;
final Config cfg;
int attempts = 0;
int accepted = 0;
Guard(FailureOracle f, ValidityOracle v, Config cfg) {
this.failure = f;
this.validity = v;
this.cfg = cfg;
}
boolean withinBudget() {
return attempts < cfg.maxChecks;
}
boolean fails(String q) throws Exception {
attempts++;
return failure.fails(q);
}
boolean accept(String q) throws Exception {
attempts++;
boolean ok = failure.fails(q) && (!cfg.enforceValidity || (validity != null && validity.isValid(q)));
if (ok) {
accepted++;
}
return ok;
}
}
// --- Minimal string search helpers (regex guarded) ---
private static String replaceIf(String src, String regex, String repl) {
return src.replaceAll(regex, repl);
}
private static int indexOfRegex(String src, String regex) {
Matcher m = Pattern.compile(regex).matcher(src);
return m.find() ? m.start() : -1;
}
private static int indexOfKeyword(String src, String... words) {
int idx = 0;
for (int i = 0; i < words.length; i++) {
int j = indexOfWord(src, words[i], idx);
if (j < 0) {
return -1;
}
idx = j + words[i].length();
}
return idx - words[words.length - 1].length();
}
private static int indexOfWord(String src, String word, int fromIdx) {
String re = "(?i)\\b" + Pattern.quote(word) + "\\b";
Matcher m = Pattern.compile(re).matcher(src);
return m.find(fromIdx) ? m.start() : -1;
}
private static int endOfLineOrClause(String src, int from) {
int n = src.length();
for (int i = from; i < n; i++) {
char c = src.charAt(i);
if (c == '\n' || c == '\r') {
return i;
}
}
return n;
}
private static int endOfOrderBy(String q, int orderIdx) {
// Stop before LIMIT/OFFSET or end
int end = q.length();
for (String stop : new String[] { "LIMIT", "OFFSET", "GROUP", "HAVING" }) {
int s = indexOfWord(q, stop, orderIdx + 1);
if (s >= 0) {
end = Math.min(end, s);
}
}
return end;
}
private static String keepFirstOrderKey(String q, int start, int end) {
String head = q.substring(0, start);
String body = q.substring(start, end);
String tail = q.substring(end);
// Keep "ORDER BY <first key>"
String first = body.replaceFirst(
"(?is)ORDER\\s+BY\\s+(.+?)(,|\\)|\\s+ASC\\(|\\s+DESC\\(|\\s+LIMIT|\\s+OFFSET|$).*", "ORDER BY $1");
if (!first.equals(body)) {
return head + first + tail;
}
// last resort: remove everything after "ORDER BY" until next space
int ob = indexOfWord(body, "BY", 0);
if (ob >= 0) {
int ks = ob + 2;
int ke = body.indexOf(' ', ks + 1);
if (ke > 0) {
return head + body.substring(0, ke) + tail;
}
}
return q;
}
private static int endOfHaving(String q, int havingIdx) {
// Simple: from HAVING to next clause keyword or end
int end = q.length();
for (String stop : new String[] { "GROUP", "ORDER", "LIMIT", "OFFSET" }) {
int s = indexOfWord(q, stop, havingIdx + 1);
if (s >= 0) {
end = Math.min(end, s);
}
}
return end;
}
private static int endOfGroupBy(String q, int start) {
int end = q.length();
for (String stop : new String[] { "HAVING", "ORDER", "LIMIT", "OFFSET" }) {
int s = indexOfWord(q, stop, start + 1);
if (s >= 0) {
end = Math.min(end, s);
}
}
return end;
}
private static int nextChar(String s, char ch, int from) {
int i = s.indexOf(ch, from);
return i;
}
private static int matchBrace(String s, int openIdx) {
char open = s.charAt(openIdx);
char close = (open == '{') ? '}' : (open == '(') ? ')' : (open == '[' ? ']' : '\0');
if (close == '\0') {
return -1;
}
int depth = 0;
boolean inStr = false;
char strQ = 0;
for (int i = openIdx; i < s.length(); i++) {
char c = s.charAt(i);
if (!inStr && (c == '"' || c == '\'')) {
inStr = true;
strQ = c;
continue;
}
if (inStr) {
if (c == strQ && s.charAt(i - 1) != '\\') {
inStr = false;
}
continue;
}
if (c == open) {
depth++;
} else if (c == close) {
depth--;
if (depth == 0) {
return i;
}
}
}
return -1;
}
private static List<int[]> splitByDot(String body) {
List<int[]> segs = new ArrayList<>();
int depth = 0;
boolean inStr = false;
char strQ = 0;
int segStart = 0;
for (int i = 0; i < body.length(); i++) {
char c = body.charAt(i);
if (!inStr && (c == '"' || c == '\'')) {
inStr = true;
strQ = c;
continue;
}
if (inStr) {
if (c == strQ && body.charAt(i - 1) != '\\') {
inStr = false;
}
continue;
}
if (c == '{' || c == '(' || c == '[') {
depth++;
} else if (c == '}' || c == ')' || c == ']') {
depth--;
} else if (c == '.' && depth == 0) {
segs.add(new int[] { segStart, i + 1 }); // include dot
segStart = i + 1;
}
}
if (segStart < body.length()) {
segs.add(new int[] { segStart, body.length() });
}
return segs;
}
// --- Pattern matchers for blocks ---
private static final class Match {
final int start, end; // span to replace
final String inner; // inner block (for flattening)
final String kind;
Match(int s, int e, String inner, String kind) {
this.start = s;
this.end = e;
this.inner = inner;
this.kind = kind;
}
}
private static final class UnionMatch {
final int leftStart, unionIdx, unionLen, rightEnd;
UnionMatch(int ls, int ui, int ul, int re) {
this.leftStart = ls;
this.unionIdx = ui;
this.unionLen = ul;
this.rightEnd = re;
}
}
private static final class ValuesBlock {
final int start, end; // positions in source
final boolean rowForm; // true if VALUES (vars) { rows }
final List<List<String>> rows; // textual rows (already captured)
final String header; // "VALUES ?v {" or "VALUES (?x ?y) {"
ValuesBlock(int start, int end, boolean rowForm, List<List<String>> rows, String header) {
this.start = start;
this.end = end;
this.rowForm = rowForm;
this.rows = rows;
this.header = header;
}
String renderWithRows(List<List<String>> keep) {
StringBuilder sb = new StringBuilder();
sb.append(header).append(' ');
if (rowForm) {
for (List<String> r : keep) {
sb.append('(');
for (int i = 0; i < r.size(); i++) {
if (i > 0) {
sb.append(' ');
}
sb.append(r.get(i));
}
sb.append(") ");
}
} else {
// 1-col: header already "VALUES ?v {" form; keep rows as single terms
for (List<String> r : keep) {
if (!r.isEmpty()) {
sb.append(r.get(0)).append(' ');
}
}
}
sb.append('}');
return sb.toString();
}
}
private static Match findServiceLike(String q) {
// SERVICE [SILENT]? (IRI|?var) { P } or GRAPH (IRI|?var) { P }
for (String kw : new String[] { "SERVICE", "GRAPH" }) {
int idx = indexOfWord(q, kw, 0);
while (idx >= 0) {
int i = idx + kw.length();
// Skip "SILENT" for SERVICE
if (kw.equals("SERVICE")) {
int s = indexOfWord(q, "SILENT", i);
if (s == i || s == i + 1) {
i = s + "SILENT".length();
}
}
// Skip ws, then token (IRI or var)
while (i < q.length() && Character.isWhitespace(q.charAt(i))) {
i++;
}
if (i >= q.length()) {
break;
}
// Accept <...> or ?var/$var or prefixed name token; we just skip one token charwise.
if (q.charAt(i) == '<') {
int gt = q.indexOf('>', i + 1);
if (gt < 0) {
break;
}
i = gt + 1;
} else if (q.charAt(i) == '?' || q.charAt(i) == '$') {
int j = i + 1;
while (j < q.length() && isNameChar(q.charAt(j))) {
j++;
}
i = j;
} else {
// prefixed name
int j = i;
while (j < q.length() && isNameCharOrColon(q.charAt(j))) {
j++;
}
i = j;
}
// Now expect '{'
while (i < q.length() && Character.isWhitespace(q.charAt(i))) {
i++;
}
if (i >= q.length() || q.charAt(i) != '{') {
idx = indexOfWord(q, kw, idx + 1);
continue;
}
int close = matchBrace(q, i);
if (close < 0) {
idx = indexOfWord(q, kw, idx + 1);
continue;
}
String inner = q.substring(i + 1, close);
return new Match(idx, close + 1, inner, kw);
}
}
return null;
}
private static Match findKeywordBlock(String q, String kw) {
int idx = indexOfWord(q, kw, 0);
while (idx >= 0) {
int i = idx + kw.length();
while (i < q.length() && Character.isWhitespace(q.charAt(i))) {
i++;
}
if (i < q.length() && q.charAt(i) == '{') {
int close = matchBrace(q, i);
if (close > i) {
String inner = q.substring(i + 1, close);
return new Match(idx, close + 1, inner, kw);
}
}
idx = indexOfWord(q, kw, idx + 1);
}
return null;
}
private static Match findFilter(String q) {
int idx = indexOfWord(q, "FILTER", 0);
while (idx >= 0) {
int i = idx + "FILTER".length();
while (i < q.length() && Character.isWhitespace(q.charAt(i))) {
i++;
}
// FILTER EXISTS { ... } or NOT EXISTS { ... }
int tmp = i;
if (matchWord(q, tmp, "NOT")) {
tmp = skipWord(q, tmp, "NOT");
while (tmp < q.length() && Character.isWhitespace(q.charAt(tmp))) {
tmp++;
}
}
if (matchWord(q, tmp, "EXISTS")) {
tmp = skipWord(q, tmp, "EXISTS");
while (tmp < q.length() && Character.isWhitespace(q.charAt(tmp))) {
tmp++;
}
if (tmp < q.length() && q.charAt(tmp) == '{') {
int close = matchBrace(q, tmp);
if (close > tmp) {
String inner = q.substring(tmp + 1, close);
return new Match(idx, close + 1, inner, "FILTER");
}
}
}
// Otherwise assume FILTER <parenthesized expression>, remove up to matching ')'
if (i < q.length() && q.charAt(i) == '(') {
int close = matchBrace(q, i);
if (close > i) {
return new Match(idx, close + 1, null, "FILTER");
}
}
idx = indexOfWord(q, "FILTER", idx + 1);
}
return null;
}
private static Match findBind(String q) {
int idx = indexOfWord(q, "BIND", 0);
while (idx >= 0) {
int i = idx + "BIND".length();
while (i < q.length() && Character.isWhitespace(q.charAt(i))) {
i++;
}
if (i < q.length() && q.charAt(i) == '(') {
int close = matchBrace(q, i);
if (close > i) {
return new Match(idx, close + 1, null, "BIND");
}
}
idx = indexOfWord(q, "BIND", idx + 1);
}
return null;
}
private static ValuesBlock findValues(String q) {
int idx = indexOfWord(q, "VALUES", 0);
while (idx >= 0) {
int i = idx + "VALUES".length();
while (i < q.length() && Character.isWhitespace(q.charAt(i))) {
i++;
}
if (i >= q.length()) {
break;
}
if (q.charAt(i) == '(') {
// Row form: VALUES (?x ?y) { (..).. }
int varClose = matchBrace(q, i);
if (varClose < 0) {
break;
}
int braceOpen = nextNonWs(q, varClose + 1);
if (braceOpen < 0 || q.charAt(braceOpen) != '{') {
break;
}
int braceClose = matchBrace(q, braceOpen);
if (braceClose < 0) {
break;
}
String header = q.substring(idx, braceOpen).trim() + " {";
String rowsTxt = q.substring(braceOpen + 1, braceClose).trim();
List<List<String>> rows = parseValuesRows(rowsTxt, true);
return new ValuesBlock(idx, braceClose + 1, true, rows, header);
} else if (q.charAt(i) == '?' || q.charAt(i) == '$') {
// 1-col form: VALUES ?x { a b UNDEF }
int afterVar = i + 1;
while (afterVar < q.length() && isNameChar(q.charAt(afterVar))) {
afterVar++;
}
int braceOpen = nextNonWs(q, afterVar);
if (braceOpen < 0 || q.charAt(braceOpen) != '{') {
break;
}
int braceClose = matchBrace(q, braceOpen);
if (braceClose < 0) {
break;
}
String header = q.substring(idx, braceOpen).trim() + " {";
String rowsTxt = q.substring(braceOpen + 1, braceClose).trim();
List<List<String>> rows = parseValuesRows(rowsTxt, false);
return new ValuesBlock(idx, braceClose + 1, false, rows, header);
} else {
// Unknown VALUES form; skip
}
idx = indexOfWord(q, "VALUES", idx + 1);
}
return null;
}
private static List<List<String>> parseValuesRows(String txt, boolean rowForm) {
List<List<String>> rows = new ArrayList<>();
if (rowForm) {
// Rows like: (ex:s1 1) (ex:s2 UNDEF) ...
int i = 0;
while (true) {
i = skipWs(txt, i);
if (i >= txt.length()) {
break;
}
if (txt.charAt(i) != '(') {
break;
}
int close = matchBrace(txt, i);
if (close < 0) {
break;
}
String row = txt.substring(i + 1, close).trim();
if (!row.isEmpty()) {
rows.add(Arrays.stream(row.split("\\s+")).collect(Collectors.toList()));
}
i = close + 1;
}
} else {
// 1-col: tokens separated by whitespace
String[] parts = txt.split("\\s+");
for (String p : parts) {
if (!p.isEmpty()) {
rows.add(Collections.singletonList(p));
}
}
}
if (rows.isEmpty()) {
rows.add(Collections.singletonList("UNDEF")); // guard, though not used if caller checks accept()
}
return rows;
}
private static UnionMatch findUnion(String q) {
// Look for pattern: '}' UNION '{' at same nesting level
int depth = 0;
boolean inStr = false;
char qch = 0;
for (int i = 0; i < q.length(); i++) {
char c = q.charAt(i);
if (!inStr && (c == '"' || c == '\'')) {
inStr = true;
qch = c;
continue;
}
if (inStr) {
if (c == qch && q.charAt(i - 1) != '\\') {
inStr = false;
}
continue;
}
if (c == '{') {
depth++;
} else if (c == '}') {
depth--;
} else if ((c == 'U' || c == 'u') && depth >= 1) {
// Try match "UNION"
if (matchWord(q, i, "UNION")) {
// Nearest preceding '}' at same depth+1
int leftClose = prevChar(q, '}', i - 1);
if (leftClose < 0) {
continue;
}
// Find its matching '{'
int leftOpen = backwardsMatchBrace(q, leftClose);
if (leftOpen < 0) {
continue;
}
// Next '{' after UNION
int rightOpen = nextChar(q, '{', i + "UNION".length());
if (rightOpen < 0) {
continue;
}
int rightClose = matchBrace(q, rightOpen);
if (rightClose < 0) {
continue;
}
return new UnionMatch(leftOpen, i, "UNION".length(), rightClose);
}
}
}
return null;
}
private static int prevChar(String s, char ch, int from) {
for (int i = from; i >= 0; i--) {
if (s.charAt(i) == ch) {
return i;
}
}
return -1;
}
private static int backwardsMatchBrace(String s, int closeIdx) {
char close = s.charAt(closeIdx);
char open = (close == '}') ? '{' : (close == ')') ? '(' : (close == ']') ? '[' : '\0';
if (open == '\0') {
return -1;
}
int depth = 0;
boolean inStr = false;
char qch = 0;
for (int i = closeIdx; i >= 0; i--) {
char c = s.charAt(i);
if (!inStr && (c == '"' || c == '\'')) {
inStr = true;
qch = c;
continue;
}
if (inStr) {
if (c == qch && (i == 0 || s.charAt(i - 1) != '\\')) {
inStr = false;
}
continue;
}
if (c == close) {
depth++;
} else if (c == open) {
depth--;
if (depth == 0) {
return i;
}
}
}
return -1;
}
private static boolean matchWord(String s, int pos, String word) {
if (pos < 0 || pos + word.length() > s.length()) {
return false;
}
String sub = s.substring(pos, pos + word.length());
boolean b = sub.equalsIgnoreCase(word);
if (!b) {
return false;
}
// Word boundary checks
boolean leftOk = (pos == 0) || !Character.isLetterOrDigit(s.charAt(pos - 1));
int end = pos + word.length();
boolean rightOk = (end == s.length()) || !Character.isLetterOrDigit(s.charAt(end));
return leftOk && rightOk;
}
private static int skipWord(String s, int pos, String word) {
return pos + word.length();
}
private static int nextNonWs(String s, int pos) {
int i = pos;
while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
i++;
}
return i < s.length() ? i : -1;
}
private static boolean isNameChar(char c) {
return Character.isLetterOrDigit(c) || c == '_' || c == '-';
}
private static boolean isNameCharOrColon(char c) {
return isNameChar(c) || c == ':' || c == '.';
}
// ===========================
// Tokenizer & Joiner
// ===========================
private enum TKind {
WORD,
VAR,
IRI,
STRING,
PUNCT
}
private static final class Token {
final String text;
final TKind kind;
Token(String t, TKind k) {
this.text = t;
this.kind = k;
}
@Override
public String toString() {
return text;
}
}
private static final class Tokenizer {
static List<Token> lex(String s) {
List<Token> out = new ArrayList<>();
int n = s.length();
int i = 0;
while (i < n) {
char c = s.charAt(i);
// Whitespace
if (Character.isWhitespace(c)) {
i++;
continue;
}
// Comments: # ... EOL
if (c == '#') {
while (i < n && s.charAt(i) != '\n' && s.charAt(i) != '\r') {
i++;
}
continue;
}
// IRI
if (c == '<') {
int j = s.indexOf('>', i + 1);
if (j < 0) {
out.add(new Token("<", TKind.PUNCT));
i++;
continue;
}
out.add(new Token(s.substring(i, j + 1), TKind.IRI));
i = j + 1;
continue;
}
// String (single or double)
if (c == '"' || c == '\'') {
int j = i + 1;
while (j < n) {
char d = s.charAt(j);
if (d == c && s.charAt(j - 1) != '\\') {
j++;
break;
}
j++;
}
if (j > n) {
j = n;
}
out.add(new Token(s.substring(i, j), TKind.STRING));
i = j;
continue;
}
// Variable
if (c == '?' || c == '$') {
int j = i + 1;
while (j < n && isNameChar(s.charAt(j))) {
j++;
}
out.add(new Token(s.substring(i, j), TKind.VAR));
i = j;
continue;
}
// Punctuation single chars we care about
if ("{}[]().,;|/^*!+=<>?-".indexOf(c) >= 0) {
out.add(new Token(String.valueOf(c), TKind.PUNCT));
i++;
continue;
}
// Word / prefixed name token (include colon and dot parts)
if (Character.isLetter(c) || c == '_') {
int j = i + 1;
while (j < n && isNameCharOrColon(s.charAt(j))) {
j++;
}
out.add(new Token(s.substring(i, j), TKind.WORD));
i = j;
continue;
}
// Numbers
if (Character.isDigit(c)) {
int j = i + 1;
while (j < n && (Character.isDigit(s.charAt(j)) || s.charAt(j) == '.' || s.charAt(j) == 'e'
|| s.charAt(j) == 'E' || s.charAt(j) == '+' || s.charAt(j) == '-')) {
j++;
}
out.add(new Token(s.substring(i, j), TKind.WORD));
i = j;
continue;
}
// Fallback: single char as punct
out.add(new Token(String.valueOf(c), TKind.PUNCT));
i++;
}
return out;
}
static String join(List<Token> toks, boolean spacey) {
if (toks.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder(toks.size() * 4);
Token prev = null;
for (Token t : toks) {
if (prev != null && spaceNeeded(prev, t, spacey)) {
sb.append(' ');
}
sb.append(t.text);
prev = t;
}
return sb.toString().trim();
}
private static boolean spaceNeeded(Token a, Token b, boolean spacey) {
if (!spacey) {
return false;
}
// Separate word-ish tokens
if ((a.kind == TKind.WORD || a.kind == TKind.VAR || a.kind == TKind.STRING || a.kind == TKind.IRI)
&& (b.kind == TKind.WORD || b.kind == TKind.VAR || b.kind == TKind.STRING || b.kind == TKind.IRI)) {
return true;
}
// Around punctuation we can usually omit, but keep for safety around operators
String bt = b.text;
if ("|/^*!+=<>?".contains(bt)) {
return true;
}
// Opening punctuation
if ("({[".contains(bt)) {
return true;
}
// Closing punctuation doesn't need leading space
if (")}]".contains(bt)) {
return false;
}
// Dots/semis/commas: ensure separation from words
if (".,;".contains(bt) && (a.kind == TKind.WORD || a.kind == TKind.VAR)) {
return false;
}
return false;
}
}
// Remove the last matching tail clause (e.g., LIMIT 10, OFFSET 20) from the query text.
private static String stripTailClause(String src, String regex) {
Matcher m = Pattern.compile(regex).matcher(src);
int lastStart = -1, lastEnd = -1;
while (m.find()) {
lastStart = m.start();
lastEnd = m.end();
}
if (lastStart >= 0) {
return src.substring(0, lastStart) + src.substring(lastEnd);
}
return src;
}
// Skip ASCII whitespace starting at pos; returns first non-ws index (or src.length()).
private static int skipWs(String s, int pos) {
int i = pos;
while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
i++;
}
return i;
}
}