EmbeddedLimits.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.config;

import java.io.Serializable;

import org.apache.tika.parser.ParseContext;

/**
 * Configuration for limits on embedded document processing.
 * <p>
 * This controls how deep and how many embedded documents are processed:
 * <ul>
 *   <li>{@code maxDepth} - maximum nesting depth for embedded documents (-1 = unlimited)</li>
 *   <li>{@code throwOnMaxDepth} - whether to throw an exception when maxDepth is reached</li>
 *   <li>{@code maxCount} - maximum number of embedded documents to process (-1 = unlimited)</li>
 *   <li>{@code throwOnMaxCount} - whether to throw an exception when maxCount is reached</li>
 * </ul>
 * <p>
 * <b>maxDepth behavior:</b> When the depth limit is reached, recursion stops but siblings at the
 * current level continue to be processed. For example, with maxDepth=1:
 * <pre>
 * container.zip (depth 0)
 * ��������� doc1.docx (depth 1) ��� PARSED
 * ���   ��������� image1.png (depth 2) ��� NOT PARSED (exceeds maxDepth)
 * ���   ��������� embed.xlsx (depth 2) ��� NOT PARSED (exceeds maxDepth)
 * ��������� doc2.pdf (depth 1) ��� PARSED (sibling at same level)
 * ��������� doc3.txt (depth 1) ��� PARSED (sibling at same level)
 * </pre>
 * <p>
 * <b>maxCount behavior:</b> When the count limit is reached, processing stops immediately.
 * No more embedded documents are processed, including siblings.
 * <p>
 * When a limit is hit and throwing is disabled:
 * <ul>
 *   <li>{@code X-TIKA-maxDepthReached=true} is set when maxDepth is hit</li>
 *   <li>{@code X-TIKA-maxEmbeddedCountReached=true} is set when maxCount is hit</li>
 * </ul>
 * <p>
 * Example configuration:
 * <pre>
 * {
 *   "parse-context": {
 *     "embedded-limits": {
 *       "maxDepth": 10,
 *       "throwOnMaxDepth": false,
 *       "maxCount": 1000,
 *       "throwOnMaxCount": false
 *     }
 *   }
 * }
 * </pre>
 *
 * @since Apache Tika 4.0
 */
@TikaComponent(name = "embedded-limits", spi = false)
public class EmbeddedLimits implements Serializable {

    private static final long serialVersionUID = 1L;

    public static final int UNLIMITED = -1;

    private int maxDepth = UNLIMITED;
    private boolean throwOnMaxDepth = false;
    private int maxCount = UNLIMITED;
    private boolean throwOnMaxCount = false;

    /**
     * No-arg constructor for Jackson deserialization.
     */
    public EmbeddedLimits() {
    }

    /**
     * Constructor with all parameters.
     *
     * @param maxDepth maximum nesting depth (-1 = unlimited)
     * @param throwOnMaxDepth whether to throw when depth limit is reached
     * @param maxCount maximum number of embedded documents (-1 = unlimited)
     * @param throwOnMaxCount whether to throw when count limit is reached
     */
    public EmbeddedLimits(int maxDepth, boolean throwOnMaxDepth, int maxCount, boolean throwOnMaxCount) {
        this.maxDepth = maxDepth;
        this.throwOnMaxDepth = throwOnMaxDepth;
        this.maxCount = maxCount;
        this.throwOnMaxCount = throwOnMaxCount;
    }

    /**
     * Gets the maximum nesting depth for embedded documents.
     *
     * @return maximum depth, or -1 for unlimited
     */
    public int getMaxDepth() {
        return maxDepth;
    }

    /**
     * Sets the maximum nesting depth for embedded documents.
     *
     * @param maxDepth maximum depth, or -1 for unlimited
     */
    public void setMaxDepth(int maxDepth) {
        this.maxDepth = maxDepth;
    }

    /**
     * Gets whether to throw an exception when maxDepth is reached.
     *
     * @return true if an exception should be thrown
     */
    public boolean isThrowOnMaxDepth() {
        return throwOnMaxDepth;
    }

    /**
     * Sets whether to throw an exception when maxDepth is reached.
     *
     * @param throwOnMaxDepth true to throw an exception
     */
    public void setThrowOnMaxDepth(boolean throwOnMaxDepth) {
        this.throwOnMaxDepth = throwOnMaxDepth;
    }

    /**
     * Gets the maximum number of embedded documents to process.
     *
     * @return maximum count, or -1 for unlimited
     */
    public int getMaxCount() {
        return maxCount;
    }

    /**
     * Sets the maximum number of embedded documents to process.
     *
     * @param maxCount maximum count, or -1 for unlimited
     */
    public void setMaxCount(int maxCount) {
        this.maxCount = maxCount;
    }

    /**
     * Gets whether to throw an exception when maxCount is reached.
     *
     * @return true if an exception should be thrown
     */
    public boolean isThrowOnMaxCount() {
        return throwOnMaxCount;
    }

    /**
     * Sets whether to throw an exception when maxCount is reached.
     *
     * @param throwOnMaxCount true to throw an exception
     */
    public void setThrowOnMaxCount(boolean throwOnMaxCount) {
        this.throwOnMaxCount = throwOnMaxCount;
    }

    /**
     * Helper method to get EmbeddedLimits from ParseContext with defaults.
     *
     * @param context the ParseContext (may be null)
     * @return the EmbeddedLimits from context, or a new instance with defaults if not found
     */
    public static EmbeddedLimits get(ParseContext context) {
        if (context == null) {
            return new EmbeddedLimits();
        }
        EmbeddedLimits limits = context.get(EmbeddedLimits.class);
        return limits != null ? limits : new EmbeddedLimits();
    }

    @Override
    public String toString() {
        return "EmbeddedLimits{" +
                "maxDepth=" + maxDepth +
                ", throwOnMaxDepth=" + throwOnMaxDepth +
                ", maxCount=" + maxCount +
                ", throwOnMaxCount=" + throwOnMaxCount +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        EmbeddedLimits that = (EmbeddedLimits) o;
        return maxDepth == that.maxDepth &&
                throwOnMaxDepth == that.throwOnMaxDepth &&
                maxCount == that.maxCount &&
                throwOnMaxCount == that.throwOnMaxCount;
    }

    @Override
    public int hashCode() {
        int result = maxDepth;
        result = 31 * result + (throwOnMaxDepth ? 1 : 0);
        result = 31 * result + maxCount;
        result = 31 * result + (throwOnMaxCount ? 1 : 0);
        return result;
    }
}