NodeTraversor.java

package org.jsoup.select;

import org.jsoup.helper.Validate;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeFilter.FilterResult;

/**
 A depth-first node traversor. Use to walk through all nodes under and including the specified root node, in document
 order. The {@link NodeVisitor#head(Node, int)} and {@link NodeVisitor#tail(Node, int)} methods will be called for
 each node.
 <p>During the <code>head()</code> visit, DOM structural changes around the node currently being visited are
 supported, including {@link Node#replaceWith(Node)} and {@link Node#remove()}. See
 {@link NodeVisitor#head(Node, int) head()} for the traversal contract after mutation. Other non-structural node
 changes are also supported.</p>
 <p>DOM structural changes to the current node are not supported during the <code>tail()</code> visit.</p>
 */
public class NodeTraversor {
    // cursor state
    private static final byte VisitHead = 0;
    private static final byte AfterHead = 1;
    private static final byte VisitTail = 2;

    /**
     Run a depth-first traverse of the root and all of its descendants.
     @param visitor Node visitor.
     @param root the initial node point to traverse.
     @see NodeVisitor#traverse(Node root)
     */
    public static void traverse(NodeVisitor visitor, Node root) {
        Validate.notNull(visitor);
        Validate.notNull(root);
        Node node = root;
        final Node rootNext = root.nextSibling(); // don't traverse siblings beyond the original root
        int depth = 0;
        byte state = VisitHead;

        while (true) {
            if (state == VisitHead) {
                // snapshot the current cursor position so we can recover if head() structurally changes it:
                Node parent   = node.parentNode();
                Node next     = node.nextSibling();
                int  sibIndex = parent != null ? node.siblingIndex() : 0;

                visitor.head(node, depth);

                // any structural changes?
                if (parent != null && node.parentNode() != parent) { // removed / replaced / moved
                    Node occupant = sibIndex < parent.childNodeSize() ? parent.childNode(sibIndex) : null;
                    // ^^ the node now at this node's former position
                    Node boundary = depth == 0 ? rootNext : next;   // don't advance beyond this node when resuming
                    if (occupant != null && occupant != boundary) {
                        node = occupant;
                        state = AfterHead;                          // continue from that slot without re-heading it
                    } else if (depth == 0) {                        // root detached or replaced
                        break;
                    } else if (next != null && next.parentNode() == parent) {
                        node = next;                                // old slot is empty or shifted to the original next, visit
                    } else {                                        // removed last child; tail the parent next
                        node = parent;
                        depth--;
                        state = VisitTail;
                    }
                } else {
                    state = AfterHead;
                }
                continue;                                           // next loop handles the updated node/state
            }

            if (state == AfterHead && node.childNodeSize() > 0) { // descend into current children
                node = node.childNode(0);
                depth++;
                state = VisitHead;
                continue;
            }

            visitor.tail(node, depth);

            Node next = node.nextSibling();
            if (depth == 0) {
                if (next == null || next == rootNext) break; // done with the original root range
                node = next;
                state = VisitHead;
            } else if (next != null) { // traverse siblings
                node = next;
                state = VisitHead;
            } else {                // no siblings left, ascend
                node = node.parentNode();
                depth--;
                state = VisitTail;
            }
        }
    }

    /**
     Run a depth-first traversal of each Element.
     @param visitor Node visitor.
     @param elements Elements to traverse.
     */
    public static void traverse(NodeVisitor visitor, Elements elements) {
        Validate.notNull(visitor);
        Validate.notNull(elements);
        for (Element el : elements)
            traverse(visitor, el);
    }

    /**
     Run a depth-first controllable traversal of the root and all of its descendants.
     @param filter NodeFilter visitor.
     @param root the root node point to traverse.
     @return The filter result of the root node, or {@link FilterResult#STOP}.

     @see NodeFilter
     */
    public static FilterResult filter(NodeFilter filter, Node root) {
        Node node = root;
        int depth = 0;

        while (node != null) {
            FilterResult result = filter.head(node, depth);
            if (result == FilterResult.STOP)
                return result;
            // Descend into child nodes:
            if (result == FilterResult.CONTINUE && node.childNodeSize() > 0) {
                node = node.childNode(0);
                ++depth;
                continue;
            }
            // No siblings, move upwards:
            while (true) {
                assert node != null; // depth > 0, so has parent
                if (!(node.nextSibling() == null && depth > 0)) break;
                // 'tail' current node:
                if (result == FilterResult.CONTINUE || result == FilterResult.SKIP_CHILDREN) {
                    result = filter.tail(node, depth);
                    if (result == FilterResult.STOP)
                        return result;
                }
                Node prev = node; // In case we need to remove it below.
                node = node.parentNode();
                depth--;
                if (result == FilterResult.REMOVE)
                    prev.remove(); // Remove AFTER finding parent.
                result = FilterResult.CONTINUE; // Parent was not pruned.
            }
            // 'tail' current node, then proceed with siblings:
            if (result == FilterResult.CONTINUE || result == FilterResult.SKIP_CHILDREN) {
                result = filter.tail(node, depth);
                if (result == FilterResult.STOP)
                    return result;
            }
            if (node == root)
                return result;
            Node prev = node; // In case we need to remove it below.
            node = node.nextSibling();
            if (result == FilterResult.REMOVE)
                prev.remove(); // Remove AFTER finding sibling.
        }
        // root == null?
        return FilterResult.CONTINUE;
    }

    /**
     Run a depth-first controllable traversal of each Element.
     @param filter NodeFilter visitor.
     @see NodeFilter
     */
    public static void filter(NodeFilter filter, Elements elements) {
        Validate.notNull(filter);
        Validate.notNull(elements);
        for (Element el : elements)
            if (filter(filter, el) == FilterResult.STOP)
                break;
    }
}