TimeoutLimits.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.config;

import java.io.Serializable;
import java.util.Objects;

import org.apache.tika.parser.ParseContext;

/**
 * Configuration for the two-tier task timeout system.
 * <p>
 * <ul>
 *   <li>{@code totalTaskTimeoutMillis} ��� bounds entire task wall-clock time
 *       (default: 3,600,000 ms = 1 hour)</li>
 *   <li>{@code progressTimeoutMillis} ��� bounds time since the last progress update;
 *       catches infinite loops and hung processes (default: 60,000 ms = 1 minute)</li>
 * </ul>
 * <p>
 * Parsers that never call {@link TikaProgressTracker#update()} effectively get
 * {@code progressTimeoutMillis} as their total timeout (same as the old single-timeout
 * behavior). Parsers that <em>do</em> update progress can run up to
 * {@code totalTaskTimeoutMillis}.
 * <p>
 * Example configuration:
 * <pre>
 * {
 *   "parse-context": {
 *     "timeout-limits": {
 *       "totalTaskTimeoutMillis": 3600000,
 *       "progressTimeoutMillis": 60000
 *     }
 *   }
 * }
 * </pre>
 *
 * @since Apache Tika 4.0
 */
@TikaComponent(spi = false)
public class TimeoutLimits implements Serializable {

    private static final long serialVersionUID = 2L;

    public static final long DEFAULT_TOTAL_TASK_TIMEOUT_MILLIS = 3_600_000L;
    public static final long DEFAULT_PROGRESS_TIMEOUT_MILLIS = 60_000L;

    private long totalTaskTimeoutMillis = DEFAULT_TOTAL_TASK_TIMEOUT_MILLIS;
    private long progressTimeoutMillis = DEFAULT_PROGRESS_TIMEOUT_MILLIS;

    /**
     * No-arg constructor for Jackson deserialization.
     */
    public TimeoutLimits() {
    }

    /**
     * Constructor with both timeout parameters.
     *
     * @param totalTaskTimeoutMillis maximum wall-clock time for a task
     * @param progressTimeoutMillis  maximum time between progress updates
     */
    public TimeoutLimits(long totalTaskTimeoutMillis, long progressTimeoutMillis) {
        this.totalTaskTimeoutMillis = totalTaskTimeoutMillis;
        this.progressTimeoutMillis = progressTimeoutMillis;
    }

    /**
     * Gets the maximum wall-clock time in milliseconds for a parse task.
     *
     * @return total task timeout in milliseconds
     */
    public long getTotalTaskTimeoutMillis() {
        return totalTaskTimeoutMillis;
    }

    /**
     * Sets the maximum wall-clock time in milliseconds for a parse task.
     *
     * @param totalTaskTimeoutMillis total task timeout in milliseconds
     */
    public void setTotalTaskTimeoutMillis(long totalTaskTimeoutMillis) {
        this.totalTaskTimeoutMillis = totalTaskTimeoutMillis;
    }

    /**
     * Gets the maximum time in milliseconds between progress updates before
     * the task is considered stalled.
     *
     * @return progress timeout in milliseconds
     */
    public long getProgressTimeoutMillis() {
        return progressTimeoutMillis;
    }

    /**
     * Sets the maximum time in milliseconds between progress updates before
     * the task is considered stalled.
     *
     * @param progressTimeoutMillis progress timeout in milliseconds
     */
    public void setProgressTimeoutMillis(long progressTimeoutMillis) {
        this.progressTimeoutMillis = progressTimeoutMillis;
    }

    /**
     * Helper method to get TimeoutLimits from ParseContext with defaults.
     *
     * @param context the ParseContext (may be null)
     * @return the TimeoutLimits from context, or a new instance with defaults if not found
     */
    public static TimeoutLimits get(ParseContext context) {
        if (context == null) {
            return new TimeoutLimits();
        }
        TimeoutLimits limits = context.get(TimeoutLimits.class);
        return limits != null ? limits : new TimeoutLimits();
    }

    /**
     * Returns the per-process timeout to use for external process execution.
     * <p>
     * This checks for {@link TimeoutLimits} in the ParseContext and returns
     * {@code max(0, progressTimeoutMillis - 100)} to give the monitoring loop
     * a small window to detect the timeout before the process itself times out.
     * Falls back to {@code defaultMs} if no TimeoutLimits is found.
     *
     * @param context   the ParseContext (may be null)
     * @param defaultMs default timeout if no TimeoutLimits in context
     * @return timeout in milliseconds for external process execution
     */
    public static long getProcessTimeoutMillis(ParseContext context, long defaultMs) {
        if (context == null) {
            return defaultMs;
        }
        TimeoutLimits limits = context.get(TimeoutLimits.class);
        if (limits == null) {
            return defaultMs;
        }
        return Math.max(0, limits.progressTimeoutMillis - 100);
    }

    @Override
    public String toString() {
        return "TimeoutLimits{" +
                "totalTaskTimeoutMillis=" + totalTaskTimeoutMillis +
                ", progressTimeoutMillis=" + progressTimeoutMillis +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        TimeoutLimits that = (TimeoutLimits) o;
        return totalTaskTimeoutMillis == that.totalTaskTimeoutMillis &&
                progressTimeoutMillis == that.progressTimeoutMillis;
    }

    @Override
    public int hashCode() {
        return Objects.hash(totalTaskTimeoutMillis, progressTimeoutMillis);
    }
}