BaseTransform.java
/*******************************************************************************
* Copyright (c) 2025 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/
package org.eclipse.rdf4j.queryrender.sparql.ir.util.transform;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.queryrender.sparql.TupleExprIRRenderer;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrBGP;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrGraph;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrMinus;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrNode;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrOptional;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrPathTriple;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrService;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrStatementPattern;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrSubSelect;
import org.eclipse.rdf4j.queryrender.sparql.ir.IrUnion;
import org.eclipse.rdf4j.queryrender.sparql.util.VarUtils;
/**
* Shared helpers and small utilities for IR transform passes.
*
* Conventions and invariants: - Transforms are functional: they do not mutate input nodes; instead they build new IR
* blocks as needed. - Path/chain fusions are conservative and only cross intermediate variables that the parser created
* for property paths (variable names prefixed with {@code _anon_path_}). This prevents accidental elimination or
* inversion of user-defined variables. - Text helpers respect property path precedence and add parentheses only when
* required for correctness. - Container nodes (GRAPH/OPTIONAL/MINUS/UNION/SERVICE) are preserved, and recursion uses
* {@code transformChildren} to keep transform code small and predictable.
*/
public class BaseTransform {
/*
* =============================== ===== Union Merge Policy ====== ===============================
*
* Several transforms can merge a UNION of two branches into a single path expression (an alternation) or a single
* negated property set (NPS). This is valuable for readability and streaming-friendly output, but it must be done
* conservatively to never change query semantics nor collapse user-visible variables.
*
* Parser-provided hints: the RDF4J parser introduces anonymous bridge variables when decoding property paths. These
* variables use a reserved prefix: - _anon_path_* (forward-oriented bridge) - _anon_path_inverse_*
* (inverse-oriented bridge)
*
* We use these names as a safety signal that fusing across the bridge does not remove a user variable.
*
* High-level rules applied by union-fusing transforms: 1) No new scope (i.e., the UNION node is not marked as
* introducing a new scope): - The UNION may be merged only if EACH branch contains at least one anonymous path
* bridge variable (either prefix). See unionBranchesAllHaveAnonPathBridge().
*
* 2) New scope (i.e., the UNION node carries explicit variable-scope change): - By default, do NOT merge such a
* UNION. - Special exception: if both branches share at least one COMMON variable name that starts with the
* _anon_path_ prefix (either orientation), the UNION may still be merged. This indicates the new-scope originated
* from path decoding and is safe to compact. See unionBranchesShareCommonAnonPathVarName().
*
* Additional per-transform constraints remain in place (e.g., fusing only bare NPS, or simple single-step triples,
* identical endpoints, identical GRAPH reference), and transforms preserve explicit grouping braces when the input
* UNION marked a new scope (by wrapping the fused result in a grouped IrBGP as needed).
*/
// Local copy of parser's _anon_path_ naming hint for safe path fusions
public static final String ANON_PATH_PREFIX = "_anon_path_";
// Additional hint used by the parser for inverse-oriented anonymous path variables.
public static final String ANON_PATH_INVERSE_PREFIX = "_anon_path_inverse_";
// --------------- Path text helpers: add parens only when needed ---------------
/** Convenience: true iff SP has a constant-IRI predicate. */
public static boolean isConstantIriPredicate(IrStatementPattern sp) {
if (sp == null) {
return false;
}
Var p = sp.getPredicate();
return p != null && p.hasValue() && p.getValue() instanceof IRI;
}
/** Convenience: render a constant-IRI predicate Var to text. Returns null if not a constant IRI. */
public static String iri(Var pred, TupleExprIRRenderer r) {
if (pred == null || !pred.hasValue() || !(pred.getValue() instanceof IRI)) {
return null;
}
return r.convertIRIToString((IRI) pred.getValue());
}
/**
* Normalize compact negated-property-set forms into the canonical parenthesized variant. Examples: "!ex:p" ->
* "!(ex:p)", "!^ex:p" -> "!(^ex:p)". Leaves already-canonical and non-NPS text unchanged.
*/
public static String normalizeCompactNps(String path) {
if (path == null) {
return null;
}
String t = path.trim();
if (t.isEmpty()) {
return t;
}
if (t.startsWith("!(") && t.endsWith(")")) {
return t;
}
if (t.startsWith("!^")) {
return "!(" + t.substring(1) + ")"; // !^ex:p -> !(^ex:p)
}
if (t.startsWith("!") && (t.length() == 1 || t.charAt(1) != '(')) {
return "!(" + t.substring(1) + ")"; // !ex:p -> !(ex:p)
}
return t;
}
/** Merge NPS members of two canonical strings '!(...)', returning '!(a|b)'. Falls back to 'a' when malformed. */
public static String mergeNpsMembers(String a, String b) {
if (a == null || b == null) {
return a;
}
int a1 = a.indexOf('('), a2 = a.lastIndexOf(')');
int b1 = b.indexOf('('), b2 = b.lastIndexOf(')');
if (a1 < 0 || a2 < 0 || b1 < 0 || b2 < 0) {
return a;
}
String ia = a.substring(a1 + 1, a2).trim();
String ib = b.substring(b1 + 1, b2).trim();
if (ia.isEmpty()) {
return b;
}
if (ib.isEmpty()) {
return a;
}
return "!(" + ia + "|" + ib + ")";
}
/**
* Universal safeguard for explicit user UNIONs: true iff the UNION is marked as new scope and all its branches are
* also marked as new scope. Such a UNION should never be fused into a single path expression.
*/
public static boolean unionIsExplicitAndAllBranchesScoped(final IrUnion u) {
if (u == null || !u.isNewScope()) {
return false;
}
if (u.getBranches() == null || u.getBranches().isEmpty()) {
return false;
}
for (IrBGP b : u.getBranches()) {
if (!b.isNewScope()) {
if (b.getLines().size() != 1 || !b.getLines().get(0).isNewScope()) {
return false;
}
}
}
return true;
}
/**
* Utility: rewrite container nodes by applying a given function to their inner IrBGP children. Non-container nodes
* are returned unchanged. This abstracts common recursion boilerplate across many transforms and ensures newScope
* and other flags are preserved consistently for containers.
*
* Containers handled: IrGraph, IrOptional, IrMinus, IrService, IrUnion. Nested IrBGP lines that appear directly
* inside a parent IrBGP (explicit grouping) are intentionally left unchanged here ��� transforms should decide if and
* how to recurse into such explicit groups.
*/
public static IrNode rewriteContainers(IrNode n, Function<IrBGP, IrBGP> f) {
if (n == null) {
return null;
}
if (n instanceof IrGraph) {
IrGraph g = (IrGraph) n;
return new IrGraph(g.getGraph(), f.apply(g.getWhere()), g.isNewScope());
}
if (n instanceof IrOptional) {
IrOptional o = (IrOptional) n;
return new IrOptional(f.apply(o.getWhere()), o.isNewScope());
}
if (n instanceof IrMinus) {
IrMinus m = (IrMinus) n;
return new IrMinus(f.apply(m.getWhere()), m.isNewScope());
}
if (n instanceof IrService) {
IrService s = (IrService) n;
return new IrService(s.getServiceRefText(), s.isSilent(), f.apply(s.getWhere()), s.isNewScope());
}
if (n instanceof IrUnion) {
IrUnion u = (IrUnion) n;
IrUnion u2 = new IrUnion(u.isNewScope());
for (IrBGP b : u.getBranches()) {
u2.addBranch(f.apply(b));
}
u2.setNewScope(u.isNewScope());
return u2;
}
// Do not auto-descend into IrBGP explicit groups here; caller decides.
return n;
}
// NOTE: Depth-aware path helpers moved to PathTextUtils; call it directly at use sites.
/** Build a new IrBGP with the same scope flag and the provided lines. */
public static IrBGP bgpWithLines(IrBGP original, List<IrNode> lines) {
IrBGP res = new IrBGP(original.isNewScope());
if (lines != null) {
for (IrNode n : lines) {
res.add(n);
}
}
res.setNewScope(original.isNewScope());
return res;
}
public static void copyAllExcept(IrBGP from, IrBGP to, IrNode except) {
if (from == null) {
return;
}
for (IrNode ln : from.getLines()) {
if (ln == except) {
continue;
}
to.add(ln);
}
}
/** Fuse adjacent IrPathTriple nodes when the first's object equals the second's subject. */
public static IrBGP fuseAdjacentPtThenPt(IrBGP bgp) {
if (bgp == null) {
return null;
}
List<IrNode> in = bgp.getLines();
List<IrNode> out = new ArrayList<>();
for (int i = 0; i < in.size(); i++) {
IrNode n = in.get(i);
if (n instanceof IrPathTriple && i + 1 < in.size() && in.get(i + 1) instanceof IrPathTriple) {
IrPathTriple a = (IrPathTriple) n;
IrPathTriple b = (IrPathTriple) in.get(i + 1);
Var bridge = a.getObject();
if (sameVar(bridge, b.getSubject()) && isAnonPathVar(bridge)) {
// Merge a and b: s -(a.path/b.path)-> o. Keep explicit grouping to enable later canonicalization.
String fusedPath = "(" + a.getPathText() + ")/(" + b.getPathText() + ")";
out.add(new IrPathTriple(a.getSubject(), a.getSubjectOverride(), fusedPath, b.getObject(),
b.getObjectOverride(), IrPathTriple.mergePathVars(a, b), false));
i += 1; // consume b
} else if (sameVar(bridge, b.getObject()) && isAnonPathVar(bridge)) {
// Merge a and b with inverse join on b. Keep explicit grouping.
String fusedPath = "(" + a.getPathText() + ")/^(" + b.getPathText() + ")";
out.add(new IrPathTriple(a.getSubject(), a.getSubjectOverride(), fusedPath, b.getSubject(),
b.getSubjectOverride(), IrPathTriple.mergePathVars(a, b), false));
i += 1; // consume b
} else {
// Additional cases: the bridge variable occurs as the subject of the first path triple.
Var aSubj = a.getSubject();
if (isAnonPathVar(aSubj)) {
// Avoid inverting NPS members: if 'a' is a bare negated property set, do not
// attempt subject-shared composition which requires inverting 'a'. Leave to other
// fusers that do not alter the NPS text.
String aPath = a.getPathText();
boolean aIsNps = aPath != null && aPath.trim().startsWith("!(");
if (aIsNps) {
out.add(n);
continue;
}
// Case: a.subject == b.subject -> compose by inverting 'a' and chaining forward with 'b'
if (sameVar(aSubj, b.getSubject())) {
String left = invertNegatedPropertySet(aPath);
if (left == null) {
left = PathTextUtils.wrapForInverse(aPath);
}
String fusedPath = left + "/" + PathTextUtils.wrapForSequence(b.getPathText());
out.add(new IrPathTriple(a.getObject(), a.getObjectOverride(), fusedPath, b.getObject(),
b.getObjectOverride(), IrPathTriple.mergePathVars(a, b), false));
i += 1; // consume b
continue;
}
// Case: a.subject == b.object -> compose by inverting both 'a' and 'b'
if (sameVar(aSubj, b.getObject())) {
String left = invertNegatedPropertySet(aPath);
if (left == null) {
left = PathTextUtils.wrapForInverse(aPath);
}
String right = PathTextUtils.wrapForInverse(b.getPathText());
String fusedPath = left + "/" + right;
out.add(new IrPathTriple(a.getObject(), a.getObjectOverride(), fusedPath, b.getSubject(),
b.getSubjectOverride(), IrPathTriple.mergePathVars(a, b), false));
i += 1; // consume b
continue;
}
}
out.add(n);
}
} else {
out.add(n);
}
}
IrBGP res = new IrBGP(bgp.isNewScope());
out.forEach(res::add);
res.setNewScope(bgp.isNewScope());
return res;
}
/**
* Fuse a three-line sequence: IrPathTriple (A), IrStatementPattern (B), IrPathTriple (C) into A then ( ^B.p / C ).
*
* Pattern constraints: - A.object equals B.object (inverse join candidate) and A.object is an _anon_path_* var. -
* B.subject equals C.subject and both B.subject and B.object are _anon_path_* vars.
*/
public static IrBGP fusePtSpPtSequence(IrBGP bgp, TupleExprIRRenderer r) {
if (bgp == null) {
return null;
}
List<IrNode> in = bgp.getLines();
List<IrNode> out = new ArrayList<>();
for (int i = 0; i < in.size(); i++) {
IrNode a = in.get(i);
if (a instanceof IrPathTriple && i + 2 < in.size() && in.get(i + 1) instanceof IrStatementPattern
&& in.get(i + 2) instanceof IrPathTriple) {
IrPathTriple ptA = (IrPathTriple) a;
IrStatementPattern spB = (IrStatementPattern) in.get(i + 1);
IrPathTriple ptC = (IrPathTriple) in.get(i + 2);
Var bPred = spB.getPredicate();
if (isConstantIriPredicate(spB)) {
if (sameVar(ptA.getObject(), spB.getObject()) && isAnonPathVar(ptA.getObject())
&& sameVar(spB.getSubject(), ptC.getSubject()) && isAnonPathVar(spB.getSubject())
&& isAnonPathVar(spB.getObject())) {
String fusedPath = "^" + iri(bPred, r) + "/" + ptC.getPathText();
IrPathTriple d = new IrPathTriple(spB.getObject(), spB.getObjectOverride(), fusedPath,
ptC.getObject(), ptC.getObjectOverride(), IrPathTriple.mergePathVars(ptC), false);
// Keep A; then D replaces B and C
out.add(ptA);
out.add(d);
i += 2; // consume B and C
continue;
}
}
}
out.add(a);
}
IrBGP res = new IrBGP(bgp.isNewScope());
out.forEach(res::add);
res.setNewScope(bgp.isNewScope());
return res;
}
/**
* Re-orient a bare negated property set path "!(...)" so that its object matches the subject of the immediately
* following triple when possible, enabling chaining: prefer s !(...) ?x when the next line starts with ?x ...
*/
public static IrBGP orientBareNpsForNext(IrBGP bgp) {
if (bgp == null) {
return null;
}
List<IrNode> in = bgp.getLines();
List<IrNode> out = new ArrayList<>();
for (int i = 0; i < in.size(); i++) {
IrNode n = in.get(i);
if (n instanceof IrPathTriple) {
IrPathTriple pt = (IrPathTriple) n;
// Do not attach head/tail when the path contains an alternation anywhere.
// Some branches may require different tails, and lifting a tail outside
// would alter grouping expected by renderer tests.
String ptxtGlobal = pt.getPathText();
if (ptxtGlobal != null && ptxtGlobal.indexOf('|') >= 0) {
out.add(pt);
continue;
}
String ptxt = pt.getPathText();
if (ptxt != null) {
String s = ptxt.trim();
if (s.startsWith("!(") && s.endsWith(")")) {
// Do not re-orient bare NPS here. Flipping NPS to chain with the following
// triple inverts individual members (ex:g <-> ^ex:g), which breaks
// idempotence on round-trips. Other fusion passes can still chain without
// altering the NPS semantics.
}
}
out.add(pt);
continue;
}
// Recurse
if (n instanceof IrGraph) {
IrGraph g = (IrGraph) n;
out.add(new IrGraph(g.getGraph(), orientBareNpsForNext(g.getWhere()), g.isNewScope()));
continue;
}
if (n instanceof IrOptional) {
IrOptional o = (IrOptional) n;
IrOptional no = new IrOptional(orientBareNpsForNext(o.getWhere()), o.isNewScope());
no.setNewScope(o.isNewScope());
out.add(no);
continue;
}
if (n instanceof IrMinus) {
IrMinus m = (IrMinus) n;
out.add(new IrMinus(orientBareNpsForNext(m.getWhere()), m.isNewScope()));
continue;
}
if (n instanceof IrUnion) {
IrUnion u = (IrUnion) n;
IrUnion u2 = new IrUnion(u.isNewScope());
for (IrBGP b : u.getBranches()) {
u2.addBranch(orientBareNpsForNext(b));
}
out.add(u2);
continue;
}
if (n instanceof IrService) {
IrService s = (IrService) n;
out.add(new IrService(s.getServiceRefText(), s.isSilent(), orientBareNpsForNext(s.getWhere()),
s.isNewScope()));
continue;
}
out.add(n);
}
IrBGP res = new IrBGP(bgp.isNewScope());
out.forEach(res::add);
res.setNewScope(bgp.isNewScope());
return res;
}
public static IrBGP fuseAdjacentSpThenPt(IrBGP bgp, TupleExprIRRenderer r) {
if (bgp == null) {
return null;
}
List<IrNode> in = bgp.getLines();
List<IrNode> out = new ArrayList<>();
for (int i = 0; i < in.size(); i++) {
IrNode n = in.get(i);
if (i + 1 < in.size() && n instanceof IrStatementPattern && in.get(i + 1) instanceof IrPathTriple) {
IrStatementPattern sp = (IrStatementPattern) n;
Var p = sp.getPredicate();
if (isConstantIriPredicate(sp)) {
IrPathTriple pt = (IrPathTriple) in.get(i + 1);
if (sameVar(sp.getObject(), pt.getSubject()) && isAnonPathVar(pt.getSubject())) {
String fused = iri(p, r) + "/" + pt.getPathText();
out.add(new IrPathTriple(sp.getSubject(), sp.getSubjectOverride(), fused, pt.getObject(),
pt.getObjectOverride(), IrPathTriple.mergePathVars(pt), false));
i += 1;
continue;
} else if (sameVar(sp.getSubject(), pt.getObject()) && isAnonPathVar(pt.getObject())) {
String fused = pt.getPathText() + "/^" + iri(p, r);
out.add(new IrPathTriple(pt.getSubject(), pt.getSubjectOverride(), fused, sp.getObject(),
sp.getObjectOverride(), IrPathTriple.mergePathVars(pt), false));
i += 1;
continue;
}
}
}
out.add(n);
}
IrBGP res = new IrBGP(bgp.isNewScope());
out.forEach(res::add);
res.setNewScope(bgp.isNewScope());
return res;
}
public static IrBGP joinPathWithLaterSp(IrBGP bgp, TupleExprIRRenderer r) {
if (bgp == null) {
return null;
}
List<IrNode> in = new ArrayList<>(bgp.getLines());
List<IrNode> out = new ArrayList<>();
Set<IrNode> removed = new HashSet<>();
for (int i = 0; i < in.size(); i++) {
IrNode n = in.get(i);
if (removed.contains(n)) {
continue;
}
if (n instanceof IrPathTriple) {
IrPathTriple pt = (IrPathTriple) n;
Var objVar = pt.getObject();
if (isAnonPathVar(objVar)) {
IrStatementPattern join = null;
boolean inverse = false;
for (int j = i + 1; j < in.size(); j++) {
IrNode m = in.get(j);
if (!(m instanceof IrStatementPattern)) {
continue;
}
IrStatementPattern sp = (IrStatementPattern) m;
if (!isConstantIriPredicate(sp)) {
continue;
}
// If this SP is immediately followed by a PathTriple that shares SP.subject as its subject,
// prefer the later SP+PT fusion instead of attaching the SP here. This preserves canonical
// grouping like ...*/(^ex:d/(...)).
if (j + 1 < in.size() && in.get(j + 1) instanceof IrPathTriple) {
IrPathTriple nextPt = (IrPathTriple) in.get(j + 1);
if (sameVar(sp.getSubject(), nextPt.getSubject())
|| sameVar(sp.getObject(), nextPt.getSubject())) {
continue; // skip this SP; allow SP+PT rule to handle
}
}
if (sameVar(objVar, sp.getSubject()) && isAnonPathVar(sp.getObject())) {
join = sp;
inverse = false;
break;
}
if (sameVar(objVar, sp.getObject()) && isAnonPathVar(sp.getSubject())) {
join = sp;
inverse = true;
break;
}
}
if (join != null) {
String step = iri(join.getPredicate(), r);
String newPath = pt.getPathText() + "/" + (inverse ? "^" : "") + step;
Var newEnd = inverse ? join.getSubject() : join.getObject();
IrNode newEndOverride = inverse ? join.getSubjectOverride() : join.getObjectOverride();
pt = new IrPathTriple(pt.getSubject(), pt.getSubjectOverride(), newPath, newEnd, newEndOverride,
pt.getPathVars(), pt.isNewScope());
removed.add(join);
}
}
out.add(pt);
continue;
}
// Recurse within nested BGPs
if (n instanceof IrGraph) {
IrGraph g = (IrGraph) n;
IrBGP inner = g.getWhere();
inner = joinPathWithLaterSp(inner, r);
inner = fuseAltInverseTailBGP(inner, r);
out.add(new IrGraph(g.getGraph(), inner, g.isNewScope()));
continue;
}
if (n instanceof IrOptional) {
IrOptional o = (IrOptional) n;
IrOptional no = new IrOptional(joinPathWithLaterSp(o.getWhere(), r), o.isNewScope());
out.add(no);
continue;
}
if (n instanceof IrMinus) {
IrMinus m = (IrMinus) n;
out.add(new IrMinus(joinPathWithLaterSp(m.getWhere(), r), m.isNewScope()));
continue;
}
if (n instanceof IrUnion) {
IrUnion u = (IrUnion) n;
IrUnion u2 = new IrUnion(u.isNewScope());
for (IrBGP b : u.getBranches()) {
u2.addBranch(joinPathWithLaterSp(b, r));
}
out.add(u2);
continue;
}
if (n instanceof IrService) {
IrService s = (IrService) n;
out.add(new IrService(s.getServiceRefText(), s.isSilent(), joinPathWithLaterSp(s.getWhere(), r),
s.isNewScope()));
continue;
}
if (n instanceof IrSubSelect) {
out.add(n); // keep raw subselects
continue;
}
out.add(n);
}
IrBGP res = new IrBGP(bgp.isNewScope());
for (IrNode n2 : out) {
if (!removed.contains(n2)) {
res.add(n2);
}
}
return res;
}
public static boolean sameVar(Var a, Var b) {
return VarUtils.sameVar(a, b);
}
/**
* True when both variables denote the same term: compares names if both are variables without value, or compares
* values if both are constants. Returns false when one has a value and the other does not.
*/
public static boolean sameVarOrValue(Var a, Var b) {
return VarUtils.sameVarOrValue(a, b);
}
public static boolean isAnonPathVar(Var v) {
return VarUtils.isAnonPathVar(v);
}
/** True when the anonymous path var explicitly encodes inverse orientation. */
public static boolean isAnonPathInverseVar(Var v) {
return VarUtils.isAnonPathInverseVar(v);
}
/**
* True if the given branch contains at least one variable with the parser-generated _anon_path_ (or inverse
* variant) prefix anywhere in its simple triple-like structures. Used as a safety valve to allow certain fusions
* across UNION branches that were marked as introducing a new scope in the algebra: if every branch contains an
* anonymous path bridge var, the fusion is considered safe and preserves user-visible bindings.
*/
public static boolean branchHasAnonPathBridge(IrBGP branch) {
if (branch == null) {
return false;
}
for (IrNode ln : branch.getLines()) {
if (ln instanceof IrStatementPattern) {
IrStatementPattern sp = (IrStatementPattern) ln;
Var s = sp.getSubject();
Var o = sp.getObject();
Var p = sp.getPredicate();
if (isAnonPathVar(s) || isAnonPathInverseVar(s) || isAnonPathVar(o) || isAnonPathInverseVar(o)
|| isAnonPathVar(p) || isAnonPathInverseVar(p)) {
return true;
}
} else if (ln instanceof IrPathTriple) {
IrPathTriple pt = (IrPathTriple) ln;
if (isAnonPathVar(pt.getSubject()) || isAnonPathInverseVar(pt.getSubject())
|| isAnonPathVar(pt.getObject())
|| isAnonPathInverseVar(pt.getObject())) {
return true;
}
} else if (ln instanceof IrGraph) {
IrGraph g = (IrGraph) ln;
if (branchHasAnonPathBridge(g.getWhere())) {
return true;
}
} else if (ln instanceof IrOptional) {
IrOptional o = (IrOptional) ln;
if (branchHasAnonPathBridge(o.getWhere())) {
return true;
}
} else if (ln instanceof IrMinus) {
IrMinus m = (IrMinus) ln;
if (branchHasAnonPathBridge(m.getWhere())) {
return true;
}
} else if (ln instanceof IrBGP) {
if (branchHasAnonPathBridge((IrBGP) ln)) {
return true;
}
}
}
return false;
}
/** True if all UNION branches contain at least one _anon_path_* variable (or inverse variant). */
/**
* True if all UNION branches contain at least one _anon_path_* variable (or inverse variant).
*
* Rationale: when there is no explicit UNION scope, this safety gate ensures branch bodies are derived from
* path-decoding internals rather than user variables, so fusing to an alternation/NPS preserves semantics.
*/
public static boolean unionBranchesAllHaveAnonPathBridge(IrUnion u) {
if (unionIsExplicitAndAllBranchesScoped(u)) {
return false;
}
if (u == null || u.getBranches().isEmpty()) {
return false;
}
for (IrBGP b : u.getBranches()) {
if (!branchHasAnonPathBridge(b)) {
return false;
}
}
return true;
}
/**
* True if all UNION branches share at least one common variable name that starts with the _anon_path_ prefix. The
* check descends into simple triple-like structures and container blocks.
*/
/**
* True if all UNION branches share at least one common variable name that starts with the _anon_path_ prefix. The
* check descends into simple triple-like structures and container blocks.
*
* Rationale: used for the special-case where a UNION is marked as a new variable scope but still eligible for
* merging ��� only when we can prove the scope originates from a shared parser-generated bridge variable rather than
* a user variable. This keeps merges conservative and avoids collapsing distinct user bindings.
*/
public static boolean unionBranchesShareCommonAnonPathVarName(IrUnion u) {
if (unionIsExplicitAndAllBranchesScoped(u)) {
return false;
}
if (u == null || u.getBranches().isEmpty()) {
return false;
}
Set<String> common = null;
for (IrBGP b : u.getBranches()) {
Set<String> names = new HashSet<>();
collectAnonPathVarNames(b, names);
if (names.isEmpty()) {
return false; // a branch without anon-path vars cannot share a common one
}
if (common == null) {
common = new HashSet<>(names);
} else {
common.retainAll(names);
if (common.isEmpty()) {
return false;
}
}
}
return common != null && !common.isEmpty();
}
/**
* New-scope UNION safety: true iff the two UNION branches share at least one _anon_path_* variable name.
*
* Implementation uses the IR getVars() API to collect all Vars from each branch (including nested nodes) and then
* checks for intersection on names that start with the parser bridge prefixes. This captures subject/object,
* predicate vars, as well as IrPathTriple.pathVars contributed during path rewrites.
*/
public static boolean unionBranchesShareAnonPathVarWithAllowedRoleMapping(IrUnion u) {
if (unionIsExplicitAndAllBranchesScoped(u)) {
return false;
}
if (u == null || u.getBranches().size() != 2) {
return false;
}
Set<Var> aVars = u.getBranches().get(0).getVars();
Set<Var> bVars = u.getBranches().get(1).getVars();
if (aVars == null || bVars == null || aVars.isEmpty() || bVars.isEmpty()) {
return false;
}
Set<String> aNames = new HashSet<>();
Set<String> bNames = new HashSet<>();
for (Var v : aVars) {
if (isAnonPathVar(v) || isAnonPathInverseVar(v)) {
aNames.add(v.getName());
}
}
for (Var v : bVars) {
if (isAnonPathVar(v) || isAnonPathInverseVar(v)) {
bNames.add(v.getName());
}
}
return !aNames.isEmpty() && !bNames.isEmpty() && intersects(aNames, bNames);
}
private static boolean intersects(Set<String> a, Set<String> b) {
if (a == null || b == null) {
return false;
}
for (String x : a) {
if (b.contains(x)) {
return true;
}
}
return false;
}
private static void collectAnonPathVarNames(IrBGP b, Set<String> out) {
if (b == null) {
return;
}
for (IrNode ln : b.getLines()) {
if (ln instanceof IrStatementPattern) {
IrStatementPattern sp = (IrStatementPattern) ln;
Var s = sp.getSubject();
Var o = sp.getObject();
Var p = sp.getPredicate();
if (isAnonPathVar(s) || isAnonPathInverseVar(s)) {
out.add(s.getName());
}
if (isAnonPathVar(o) || isAnonPathInverseVar(o)) {
out.add(o.getName());
}
if (isAnonPathVar(p) || isAnonPathInverseVar(p)) {
out.add(p.getName());
}
} else if (ln instanceof IrPathTriple) {
IrPathTriple pt = (IrPathTriple) ln;
Var s = pt.getSubject();
Var o = pt.getObject();
if (isAnonPathVar(s) || isAnonPathInverseVar(s)) {
out.add(s.getName());
}
if (isAnonPathVar(o) || isAnonPathInverseVar(o)) {
out.add(o.getName());
}
} else if (ln instanceof IrGraph) {
collectAnonPathVarNames(((IrGraph) ln).getWhere(), out);
} else if (ln instanceof IrOptional) {
collectAnonPathVarNames(((IrOptional) ln).getWhere(), out);
} else if (ln instanceof IrMinus) {
collectAnonPathVarNames(((IrMinus) ln).getWhere(), out);
} else if (ln instanceof IrUnion) {
for (IrBGP br : ((IrUnion) ln).getBranches()) {
collectAnonPathVarNames(br, out);
}
} else if (ln instanceof IrBGP) {
collectAnonPathVarNames((IrBGP) ln, out);
}
}
}
/**
* If the given path text is a negated property set of the form !(a|b|...), return a version where each member is
* inverted by toggling the leading '^' (i.e., a -> ^a, ^a -> a). Returns null when the input is not a simple NPS.
*/
public static String invertNegatedPropertySet(String npsText) {
if (npsText == null) {
return null;
}
String s = npsText.trim();
if (!s.startsWith("!(") || !s.endsWith(")")) {
return null;
}
String inner = s.substring(2, s.length() - 1);
if (inner.isEmpty()) {
return s;
}
String[] toks = inner.split("\\|");
List<String> out = new ArrayList<>(toks.length);
for (String tok : toks) {
String t = tok.trim();
if (t.isEmpty()) {
continue;
}
if (t.startsWith("^")) {
out.add(t.substring(1));
} else {
out.add("^" + t);
}
}
if (out.isEmpty()) {
return s; // fallback: unchanged
}
return "!(" + String.join("|", out) + ")";
}
/**
* Fuse a path triple whose object is a bridge var with a constant-IRI tail triple that also uses the bridge var,
* producing a new path with an added '/^p' or '/p' segment. This version indexes join candidates and works inside
* GRAPH bodies as well. It is conservative: only constant predicate tails are fused and containers are preserved.
*/
public static IrBGP fuseAltInverseTailBGP(IrBGP bgp, TupleExprIRRenderer r) {
if (bgp == null) {
return null;
}
final List<IrNode> in = bgp.getLines();
final List<IrNode> out = new ArrayList<>();
final Set<IrNode> removed = new HashSet<>();
// Build index of potential tail-join SPs keyed by the bridge var text ("?name"). We store both
// subject-joins and object-joins, and prefer object-join (inverse tail) to match expectations.
final Map<String, List<IrStatementPattern>> bySubject = new HashMap<>();
final Map<String, List<IrStatementPattern>> byObject = new HashMap<>();
for (IrNode n : in) {
if (!(n instanceof IrStatementPattern)) {
continue;
}
final IrStatementPattern sp = (IrStatementPattern) n;
final Var pv = sp.getPredicate();
if (pv == null || !pv.hasValue() || !(pv.getValue() instanceof IRI)) {
continue;
}
// Only index when the non-bridge end is not an anon_path_* var (safety)
final String sTxt = varOrValue(sp.getSubject(), r);
final String oTxt = varOrValue(sp.getObject(), r);
if (sp.getObject() != null && !isAnonPathVar(sp.getSubject()) && oTxt != null && oTxt.startsWith("?")) {
byObject.computeIfAbsent(oTxt, k -> new ArrayList<>()).add(sp);
}
if (sp.getSubject() != null && !isAnonPathVar(sp.getObject()) && sTxt != null && sTxt.startsWith("?")) {
bySubject.computeIfAbsent(sTxt, k -> new ArrayList<>()).add(sp);
}
}
for (IrNode n : in) {
if (removed.contains(n)) {
continue;
}
if (n instanceof IrPathTriple) {
IrPathTriple pt = (IrPathTriple) n;
// HEAD fusion: if a SP shares the subject with pt and uses a constant IRI predicate, prefix ^p/ or p/
final String headBridge = varOrValue(pt.getSubject(), r);
if (headBridge != null && headBridge.startsWith("?") && isAnonPathVar(pt.getSubject())) {
IrStatementPattern head = null;
boolean headInverse = true; // (?mid p ?x) => ^p/
final List<IrStatementPattern> hs = bySubject.get(headBridge);
if (hs != null) {
for (IrStatementPattern sp : hs) {
if (removed.contains(sp)) {
continue;
}
if (sp.getPredicate() == null || !sp.getPredicate().hasValue()
|| !(sp.getPredicate().getValue() instanceof IRI)) {
continue;
}
head = sp;
headInverse = true;
break;
}
}
if (head == null) {
final List<IrStatementPattern> ho = byObject.get(headBridge);
if (ho != null) {
for (IrStatementPattern sp : ho) {
if (removed.contains(sp)) {
continue;
}
if (sp.getPredicate() == null || !sp.getPredicate().hasValue()
|| !(sp.getPredicate().getValue() instanceof IRI)) {
continue;
}
head = sp;
headInverse = false; // (?x p ?mid) => p/
break;
}
}
}
if (head != null) {
final String ptxt = iri(head.getPredicate(), r);
final String prefix = (headInverse ? "^" : "") + ptxt + "/";
final Var newStart = headInverse ? head.getObject() : head.getSubject();
final IrNode newStartOverride = headInverse ? head.getObjectOverride()
: head.getSubjectOverride();
pt = new IrPathTriple(newStart, newStartOverride, prefix + pt.getPathText(), pt.getObject(),
pt.getObjectOverride(), pt.getPathVars(), pt.isNewScope());
removed.add(head);
}
}
// TAIL fusion: attach a constant predicate SP that shares the object
final String bridge = varOrValue(pt.getObject(), r);
if (bridge != null && bridge.startsWith("?")) {
// Only join when the bridge var is an _anon_path_* variable, to avoid eliminating user vars
if (!isAnonPathVar(pt.getObject())) {
out.add(pt);
continue;
}
IrStatementPattern join = null;
boolean inverse = true; // prefer inverse tail (?y p ?mid) => '^p'
final List<IrStatementPattern> byObj = byObject.get(bridge);
if (byObj != null) {
for (IrStatementPattern sp : byObj) {
if (!removed.contains(sp)) {
join = sp;
inverse = true;
break;
}
}
}
if (join == null) {
final List<IrStatementPattern> bySub = bySubject.get(bridge);
if (bySub != null) {
for (IrStatementPattern sp : bySub) {
if (!removed.contains(sp)) {
join = sp;
inverse = false;
break;
}
}
}
}
if (join != null) {
final String step = iri(join.getPredicate(), r);
final String newPath = pt.getPathText() + "/" + (inverse ? "^" : "") + step;
final Var newEnd = inverse ? join.getSubject() : join.getObject();
final IrNode newEndOverride = inverse ? join.getSubjectOverride() : join.getObjectOverride();
pt = new IrPathTriple(pt.getSubject(), pt.getSubjectOverride(), newPath, newEnd, newEndOverride,
pt.getPathVars(), pt.isNewScope());
removed.add(join);
}
}
out.add(pt);
continue;
}
// Recurse into containers
if (n instanceof IrGraph) {
final IrGraph g = (IrGraph) n;
out.add(new IrGraph(g.getGraph(), fuseAltInverseTailBGP(g.getWhere(), r), g.isNewScope()));
continue;
}
if (n instanceof IrOptional) {
final IrOptional o = (IrOptional) n;
IrOptional no = new IrOptional(fuseAltInverseTailBGP(o.getWhere(), r), o.isNewScope());
no.setNewScope(o.isNewScope());
out.add(no);
continue;
}
if (n instanceof IrMinus) {
final IrMinus m = (IrMinus) n;
out.add(new IrMinus(fuseAltInverseTailBGP(m.getWhere(), r), m.isNewScope()));
continue;
}
if (n instanceof IrUnion) {
final IrUnion u = (IrUnion) n;
final IrUnion u2 = new IrUnion(u.isNewScope());
for (IrBGP b : u.getBranches()) {
u2.addBranch(fuseAltInverseTailBGP(b, r));
}
out.add(u2);
continue;
}
if (n instanceof IrService) {
final IrService s = (IrService) n;
out.add(new IrService(s.getServiceRefText(), s.isSilent(), fuseAltInverseTailBGP(s.getWhere(), r),
s.isNewScope()));
continue;
}
// Subselects: keep as-is
out.add(n);
}
final IrBGP res = new IrBGP(bgp.isNewScope());
for (IrNode n2 : out) {
if (!removed.contains(n2)) {
res.add(n2);
}
}
res.setNewScope(bgp.isNewScope());
return res;
}
public static String varOrValue(Var v, TupleExprIRRenderer r) {
if (v == null) {
return "?_";
}
if (v.hasValue()) {
return r.convertValueToString(v.getValue());
}
return "?" + v.getName();
}
}