XPathParser.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.sax.xpath;

import java.util.HashMap;
import java.util.Map;

/**
 * Parser for a very simple XPath subset. Only the following XPath constructs
 * (with namespaces) are supported:
 * <ul>
 *   <li><code>.../node()</code></li>
 *   <li><code>.../text()</code></li>
 *   <li><code>.../@*</code></li>
 *   <li><code>.../@name</code></li>
 *   <li><code>.../*...</code></li>
 *   <li><code>.../name...</code></li>
 *   <li><code>...//*...</code></li>
 *   <li><code>...//name...</code></li>
 * </ul>
 * <p>
 * In addition the non-abbreviated <code>.../descendant::node()</code>
 * construct can be used for cases where the descendant-or-self axis
 * used by the <code>...//node()</code> construct is not appropriate.
 */
public class XPathParser {

    private final Map<String, String> prefixes = new HashMap<>();

    public XPathParser() {
    }

    public XPathParser(String prefix, String namespace) {
        addPrefix(prefix, namespace);
    }

    public void addPrefix(String prefix, String namespace) {
        prefixes.put(prefix, namespace);
    }

    /**
     * Parses the given simple XPath expression to an evaluation state
     * initialized at the document node. Invalid expressions are not flagged
     * as errors, they just result in a failing evaluation state.
     *
     * @param xpath simple XPath expression
     * @return XPath evaluation state
     */
    public Matcher parse(String xpath) {
        if (xpath.equals("/text()")) {
            return TextMatcher.INSTANCE;
        } else if (xpath.equals("/node()")) {
            return NodeMatcher.INSTANCE;
        } else if (xpath.equals("/descendant::node()") ||
                xpath.equals("/descendant:node()")) { // for compatibility
            return new CompositeMatcher(TextMatcher.INSTANCE,
                    new ChildMatcher(new SubtreeMatcher(NodeMatcher.INSTANCE)));
        } else if (xpath.equals("/@*")) {
            return AttributeMatcher.INSTANCE;
        } else if (xpath.isEmpty()) {
            return ElementMatcher.INSTANCE;
        } else if (xpath.startsWith("/@")) {
            String name = xpath.substring(2);
            String prefix = null;
            int colon = name.indexOf(':');
            if (colon != -1) {
                prefix = name.substring(0, colon);
                name = name.substring(colon + 1);
            }
            if (prefixes.containsKey(prefix)) {
                return new NamedAttributeMatcher(prefixes.get(prefix), name);
            } else {
                return Matcher.FAIL;
            }
        } else if (xpath.startsWith("/*")) {
            return new ChildMatcher(parse(xpath.substring(2)));
        } else if (xpath.startsWith("///")) {
            return Matcher.FAIL;
        } else if (xpath.startsWith("//")) {
            return new SubtreeMatcher(parse(xpath.substring(1)));
        } else if (xpath.startsWith("/")) {
            int slash = xpath.indexOf('/', 1);
            if (slash == -1) {
                slash = xpath.length();
            }
            String name = xpath.substring(1, slash);
            String prefix = null;
            int colon = name.indexOf(':');
            if (colon != -1) {
                prefix = name.substring(0, colon);
                name = name.substring(colon + 1);
            }
            if (prefixes.containsKey(prefix)) {
                return new NamedElementMatcher(prefixes.get(prefix), name,
                        parse(xpath.substring(slash)));
            } else {
                return Matcher.FAIL;
            }
        } else {
            return Matcher.FAIL;
        }
    }

}