OutputLimits.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.config;
import java.io.Serializable;
import org.apache.tika.parser.ParseContext;
/**
* Configuration for output and security limits.
* <p>
* This controls output size and various security thresholds:
* <ul>
* <li>{@code writeLimit} - maximum characters to write (-1 = unlimited)</li>
* <li>{@code throwOnWriteLimit} - whether to throw an exception when writeLimit is reached</li>
* <li>{@code maxXmlDepth} - maximum XML element nesting depth (default: 100)</li>
* <li>{@code maxPackageEntryDepth} - maximum package entry nesting depth (default: 10)</li>
* <li>{@code zipBombThreshold} - characters before zip bomb check activates (default: 1,000,000)</li>
* <li>{@code zipBombRatio} - maximum output:input ratio for zip bomb detection (default: 100)</li>
* </ul>
* <p>
* <b>writeLimit behavior:</b> The writeLimit is the TOTAL characters across all documents
* (container + embedded). When the limit is reached:
* <ul>
* <li>If {@code throwOnWriteLimit=false}: Output is truncated, {@code X-TIKA-writeLimitReached=true} is set</li>
* <li>If {@code throwOnWriteLimit=true}: {@code WriteLimitReachedException} is thrown</li>
* </ul>
* <p>
* <b>Security limits:</b> maxXmlDepth, maxPackageEntryDepth, and zipBomb
* limits always throw exceptions when exceeded (no silent truncation option).
* <p>
* Example configuration:
* <pre>
* {
* "parse-context": {
* "output-limits": {
* "writeLimit": 100000,
* "throwOnWriteLimit": false,
* "maxXmlDepth": 100,
* "maxPackageEntryDepth": 10,
* "zipBombThreshold": 1000000,
* "zipBombRatio": 100
* }
* }
* }
* </pre>
*
* @since Apache Tika 4.0
*/
@TikaComponent(spi = false)
public class OutputLimits implements Serializable {
private static final long serialVersionUID = 1L;
public static final int UNLIMITED = -1;
// Output limits
private int writeLimit = UNLIMITED;
private boolean throwOnWriteLimit = false;
// XML/Security limits
private int maxXmlDepth = 100;
private int maxPackageEntryDepth = 10;
private long zipBombThreshold = 1_000_000;
private long zipBombRatio = 100;
/**
* No-arg constructor for Jackson deserialization.
*/
public OutputLimits() {
}
/**
* Constructor with all parameters.
*
* @param writeLimit maximum characters to write (-1 = unlimited)
* @param throwOnWriteLimit whether to throw when write limit is reached
* @param maxXmlDepth maximum XML element nesting depth
* @param maxPackageEntryDepth maximum package entry nesting depth
* @param zipBombThreshold characters before zip bomb check activates
* @param zipBombRatio maximum output:input ratio
*/
public OutputLimits(int writeLimit, boolean throwOnWriteLimit,
int maxXmlDepth, int maxPackageEntryDepth,
long zipBombThreshold, long zipBombRatio) {
this.writeLimit = writeLimit;
this.throwOnWriteLimit = throwOnWriteLimit;
this.maxXmlDepth = maxXmlDepth;
this.maxPackageEntryDepth = maxPackageEntryDepth;
this.zipBombThreshold = zipBombThreshold;
this.zipBombRatio = zipBombRatio;
}
/**
* Gets the maximum characters to write.
*
* @return maximum characters, or -1 for unlimited
*/
public int getWriteLimit() {
return writeLimit;
}
/**
* Sets the maximum characters to write.
*
* @param writeLimit maximum characters, or -1 for unlimited
*/
public void setWriteLimit(int writeLimit) {
this.writeLimit = writeLimit;
}
/**
* Gets whether to throw an exception when writeLimit is reached.
*
* @return true if an exception should be thrown
*/
public boolean isThrowOnWriteLimit() {
return throwOnWriteLimit;
}
/**
* Sets whether to throw an exception when writeLimit is reached.
*
* @param throwOnWriteLimit true to throw an exception
*/
public void setThrowOnWriteLimit(boolean throwOnWriteLimit) {
this.throwOnWriteLimit = throwOnWriteLimit;
}
/**
* Gets the maximum XML element nesting depth.
*
* @return maximum XML depth
*/
public int getMaxXmlDepth() {
return maxXmlDepth;
}
/**
* Sets the maximum XML element nesting depth.
*
* @param maxXmlDepth maximum XML depth
*/
public void setMaxXmlDepth(int maxXmlDepth) {
this.maxXmlDepth = maxXmlDepth;
}
/**
* Gets the maximum package entry nesting depth.
*
* @return maximum package entry depth
*/
public int getMaxPackageEntryDepth() {
return maxPackageEntryDepth;
}
/**
* Sets the maximum package entry nesting depth.
*
* @param maxPackageEntryDepth maximum package entry depth
*/
public void setMaxPackageEntryDepth(int maxPackageEntryDepth) {
this.maxPackageEntryDepth = maxPackageEntryDepth;
}
/**
* Gets the zip bomb threshold (characters before check activates).
*
* @return zip bomb threshold
*/
public long getZipBombThreshold() {
return zipBombThreshold;
}
/**
* Sets the zip bomb threshold (characters before check activates).
*
* @param zipBombThreshold zip bomb threshold
*/
public void setZipBombThreshold(long zipBombThreshold) {
this.zipBombThreshold = zipBombThreshold;
}
/**
* Gets the zip bomb ratio (maximum output:input ratio).
*
* @return zip bomb ratio
*/
public long getZipBombRatio() {
return zipBombRatio;
}
/**
* Sets the zip bomb ratio (maximum output:input ratio).
*
* @param zipBombRatio zip bomb ratio
*/
public void setZipBombRatio(long zipBombRatio) {
this.zipBombRatio = zipBombRatio;
}
/**
* Helper method to get OutputLimits from ParseContext with defaults.
*
* @param context the ParseContext (may be null)
* @return the OutputLimits from context, or a new instance with defaults if not found
*/
public static OutputLimits get(ParseContext context) {
if (context == null) {
return new OutputLimits();
}
OutputLimits limits = context.get(OutputLimits.class);
return limits != null ? limits : new OutputLimits();
}
@Override
public String toString() {
return "OutputLimits{" +
"writeLimit=" + writeLimit +
", throwOnWriteLimit=" + throwOnWriteLimit +
", maxXmlDepth=" + maxXmlDepth +
", maxPackageEntryDepth=" + maxPackageEntryDepth +
", zipBombThreshold=" + zipBombThreshold +
", zipBombRatio=" + zipBombRatio +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
OutputLimits that = (OutputLimits) o;
return writeLimit == that.writeLimit &&
throwOnWriteLimit == that.throwOnWriteLimit &&
maxXmlDepth == that.maxXmlDepth &&
maxPackageEntryDepth == that.maxPackageEntryDepth &&
zipBombThreshold == that.zipBombThreshold &&
zipBombRatio == that.zipBombRatio;
}
@Override
public int hashCode() {
int result = writeLimit;
result = 31 * result + (throwOnWriteLimit ? 1 : 0);
result = 31 * result + maxXmlDepth;
result = 31 * result + maxPackageEntryDepth;
result = 31 * result + (int) (zipBombThreshold ^ (zipBombThreshold >>> 32));
result = 31 * result + (int) (zipBombRatio ^ (zipBombRatio >>> 32));
return result;
}
}