NodeVisitor.java
package org.jsoup.select;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
/**
Node visitor interface, used to walk the DOM and visit each node. Execute via {@link #traverse(Node)} or
{@link Node#traverse(NodeVisitor)}. The traversal is depth-first.
<p>
This interface provides two methods, {@link #head} and {@link #tail}. The head method is called when a node is first
seen, and the tail method when all that node's children have been visited. As an example, {@code head} can be used to
emit a start tag for a node, and {@code tail} to emit the end tag. The {@code tail} method defaults to a no-op, so
this interface can be used as a {@link FunctionalInterface}, with {@code head} as its single abstract method.
</p>
<p><b>Example:</b></p>
<pre><code>
doc.body().traverse((node, depth) -> {
switch (node) {
case Element el -> print(el.tag() + ": " + el.ownText());
case DataNode data -> print("Data: " + data.getWholeData());
default -> print(node.nodeName() + " at depth " + depth);
}
});
</code></pre>
*/
@FunctionalInterface
public interface NodeVisitor {
/**
Callback for when a node is first visited.
<p>The node may be modified (for example via {@link Node#attr(String)}), removed with
{@link Node#remove()}, or replaced with {@link Node#replaceWith(Node)}. If the node is an
{@link Element}, you may cast it and access those methods.</p>
<p>Traversal uses a forward cursor. After {@code head()} completes:</p>
<ul>
<li>If the current node is still attached, traversal continues into its current children and then its following
siblings. Nodes inserted before the current node are not visited.</li>
<li>If the current node was detached and another node now occupies its former sibling position, the node now at
that position is not passed to {@code head()} again. Traversal continues from there: its children are visited,
then the node is passed to {@link #tail(Node, int)}, then later siblings are visited.</li>
<li>If the current node was detached and no node occupies its former sibling position, the current node is not
passed to {@code tail()}, and traversal resumes at the node that originally followed it.</li>
</ul>
<p>Traversal never advances outside the original root subtree. If the traversal root is detached during
{@code head()}, traversal stops at the original root boundary.</p>
@param node the node being visited.
@param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
of that will have depth 1.
*/
void head(Node node, int depth);
/**
Callback for when a node is last visited, after all of its descendants have been visited.
<p>This method defaults to a no-op.</p>
<p>The node passed to {@code tail()} is the node at the current traversal position when the subtree completes.
If {@code head()} replaced the original node, this may be the replacement node instead.</p>
<p>Structural changes to the current node are not supported during {@code tail()}.</p>
@param node the node being visited.
@param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
of that will have depth 1.
*/
default void tail(Node node, int depth) {
// no-op by default, to allow just specifying the head() method
}
/**
Run a depth-first traverse of the root and all of its descendants.
@param root the initial node point to traverse.
@since 1.21.1
*/
default void traverse(Node root) {
NodeTraversor.traverse(this, root);
}
}