CanonicalizeGroupedTailStepTransform.java
/*******************************************************************************
* Copyright (c) 2025 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.queryrender.sparql.ir.util.transform;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.rdf4j.queryrender.sparql.TupleExprIRRenderer;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrBGP;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrNode;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrPathTriple;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrSubSelect;
/**
* Normalize grouping of a final tail step like "/foaf:name" so that it appears outside the top-level grouped PT/PT
* fusion instead of inside the right-hand side group. This rewrites patterns of the form:
*
* (?LEFT)/((?RIGHT/tail)) -> ((?LEFT)/(?RIGHT))/tail
*
* It is a best-effort string-level fix applied late in the pipeline to match expected canonical output.
*/
public final class CanonicalizeGroupedTailStepTransform extends BaseTransform {
private CanonicalizeGroupedTailStepTransform() {
}
public static IrBGP apply(IrBGP bgp, TupleExprIRRenderer r) {
if (bgp == null) {
return null;
}
final List<IrNode> out = new ArrayList<>();
for (IrNode n : bgp.getLines()) {
IrNode m = n;
if (n instanceof IrPathTriple) {
IrPathTriple pt = (IrPathTriple) n;
String ptxt = pt.getPathText();
// First: move a final tail step out of the right-hand group when safe:
// (LEFT)/((RIGHT/tail)) -> ((LEFT)/(RIGHT))/tail
String afterTail = rewriteGroupedTail(ptxt);
// Second: normalize split-middle grouping like ((L)/(M))/((R)) -> ((L)/(M/(R)))
String rew = rewriteFuseSplitMiddle(afterTail);
if (!rew.equals(ptxt)) {
IrPathTriple np = new IrPathTriple(pt.getSubject(), rew, pt.getObject(), pt.isNewScope(),
pt.getPathVars());
m = np;
}
} else if (n instanceof IrSubSelect) {
// keep as-is
} else {
// Generic recursion into containers
m = BaseTransform.rewriteContainers(n, child -> apply(child, r));
}
out.add(m);
}
return BaseTransform.bgpWithLines(bgp, out);
}
/**
* Rewrite a path text of the form "((LEFT)/(MID))/((RIGHT))" into "((LEFT)/(MID/(RIGHT)))". MID is assumed to be a
* simple step or small group like "^ex:d".
*/
static String rewriteFuseSplitMiddle(String path) {
if (path == null) {
return null;
}
String s = path.trim();
if (!s.startsWith("((")) {
return s;
}
int first = s.indexOf(")/(");
if (first <= 0) {
return s;
}
// After first delim, expect MID then ")/(" then RIGHT then ')'
String afterFirst = s.substring(first + 3);
int second = afterFirst.indexOf(")/(");
if (second <= 0) {
return s;
}
String left = s.substring(2, first); // drop initial "(("
String mid = afterFirst.substring(0, second);
String rightWithParens = afterFirst.substring(second + 2); // starts with '('
if (rightWithParens.length() < 3 || rightWithParens.charAt(0) != '('
|| rightWithParens.charAt(rightWithParens.length() - 1) != ')') {
return s;
}
String right = rightWithParens.substring(1, rightWithParens.length() - 1);
// Safety: only rewrite when MID is a simple step/group without quantifier. Rewriting
// a quantified middle part like "(!(a|^b)? )" is error-prone and can lead to
// mismatched parentheses or semantics changes in rare shapes.
if (mid.indexOf('?') >= 0 || mid.indexOf('*') >= 0 || mid.indexOf('+') >= 0) {
return s;
}
// Build fused: ((LEFT)/(MID/(RIGHT)))
return "((" + left + ")/(" + mid + "/(" + right + ")))";
}
/**
* Rewrite a path text of the form "(LEFT)/((RIGHT/tail))" into "((LEFT)/(RIGHT))/tail". Returns the original text
* when no safe rewrite is detected.
*/
static String rewriteGroupedTail(String path) {
if (path == null) {
return null;
}
String s = path.trim();
// Require pattern starting with '(' and containing ")/(" and ending with ')'
int sep = s.indexOf(")/(");
if (sep <= 0 || s.charAt(0) != '(' || s.charAt(s.length() - 1) != ')') {
return s;
}
String left = s.substring(1, sep); // drop leading '('
String rightWithParens = s.substring(sep + 2); // starts with "("
if (rightWithParens.length() < 3 || rightWithParens.charAt(0) != '('
|| rightWithParens.charAt(rightWithParens.length() - 1) != ')') {
return s;
}
String right = rightWithParens.substring(1, rightWithParens.length() - 1);
int lastSlash = right.lastIndexOf('/');
if (lastSlash < 0) {
return s; // nothing to peel off
}
String base = right.substring(0, lastSlash);
String tail = right.substring(lastSlash + 1);
// Tail must look like a simple step (IRI or ^IRI) without inner alternation or quantifier
if (tail.isEmpty() || tail.contains("|") || tail.contains("(") || tail.contains(")") ||
tail.endsWith("?") || tail.endsWith("*") || tail.endsWith("+")) {
return s;
}
// Rebuild: ((LEFT)/(BASE))/TAIL
return "((" + left + ")/(" + base + "))/" + tail;
}
}