StringsConfig.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.strings;

import java.io.File;
import java.io.Serializable;

import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.utils.StringUtils;

/**
 * Configuration for the "strings" (or strings-alternative) command.
 */
public class StringsConfig implements Serializable {
    /**
     * Serial version UID
     */
    private static final long serialVersionUID = -1465227101645003594L;

    private String stringsPath = "";

    private String filePath = "";

    // Minimum sequence length (characters) to print
    private int minLength = 4;

    // Character encoding of the strings that are to be found
    private StringsEncoding encoding = StringsEncoding.SINGLE_7_BIT;

    // Maximum time (seconds) to wait for the strings process termination
    private int timeoutSeconds = 120;

    /**
     * Returns the "strings" installation folder.
     *
     * @return the "strings" installation folder.
     */
    public String getStringsPath() {
        return stringsPath;
    }

    /**
     * Sets the "strings" installation folder.
     *
     * @param stringsPath the "strings" installation folder.
     */
    public void setStringsPath(String stringsPath) throws TikaConfigException {
        if (stringsPath != null && !stringsPath.isEmpty() &&
                !stringsPath.endsWith(File.separator)) {
            stringsPath += File.separatorChar;
        }
        this.stringsPath = stringsPath;
    }

    /**
     * Returns the path to the "file" command.
     *
     * @return the path to the "file" command.
     */
    public String getFilePath() {
        return filePath;
    }

    /**
     * Sets the path to the "file" command.
     *
     * @param filePath the path to the "file" command.
     */
    public void setFilePath(String filePath) throws TikaConfigException {
        this.filePath = filePath;
    }

    /**
     * Returns the minimum sequence length (characters) to print.
     *
     * @return the minimum sequence length (characters) to print.
     */
    public int getMinLength() {
        return this.minLength;
    }

    /**
     * Sets the minimum sequence length (characters) to print.
     *
     * @param minLength the minimum sequence length (characters) to print.
     */
    public void setMinLength(int minLength) {
        if (minLength < 1) {
            throw new IllegalArgumentException("Invalid minimum length");
        }
        this.minLength = minLength;
    }

    /**
     * Returns the character encoding of the strings that are to be found.
     *
     * @return {@link StringsEncoding} enum that represents the character
     * encoding of the strings that are to be found.
     */
    public StringsEncoding getEncoding() {
        return this.encoding;
    }

    /**
     * Sets the character encoding of the strings that are to be found.
     *
     * @param encoding {@link StringsEncoding} enum that represents the character
     *                 encoding of the strings that are to be found.
     */
    public void setEncoding(StringsEncoding encoding) {
        this.encoding = encoding;
    }

    /**
     * Returns the maximum time (in seconds) to wait for the "strings" command
     * to terminate.
     *
     * @return the maximum time (in seconds) to wait for the "strings" command
     * to terminate.
     */
    public int getTimeoutSeconds() {
        return this.timeoutSeconds;
    }

    /**
     * Sets the maximum time (in seconds) to wait for the "strings" command to
     * terminate.
     *
     * @param timeoutSeconds the maximum time (in seconds) to wait for the "strings"
     *                       command to terminate.
     */
    public void setTimeoutSeconds(int timeoutSeconds) {
        if (timeoutSeconds < 1) {
            throw new IllegalArgumentException("Invalid timeout");
        }
        this.timeoutSeconds = timeoutSeconds;
    }

    /**
     * RuntimeConfig blocks modification of security-sensitive path fields at runtime.
     * When a config is obtained from ParseContext (i.e. user-provided at parse time),
     * it should be deserialized as a RuntimeConfig to prevent path injection.
     * <p>
     * This class is deserialized by ConfigDeserializer (in tika-serialization) which uses
     * Jackson to populate fields via setters. If the JSON contains any path fields, the
     * overridden setters will throw TikaConfigException.
     */
    public static class RuntimeConfig extends StringsConfig {

        public RuntimeConfig() {
            super();
        }

        @Override
        public void setStringsPath(String stringsPath) throws TikaConfigException {
            if (!StringUtils.isBlank(stringsPath)) {
                throw new TikaConfigException(
                        "Cannot modify stringsPath at runtime. " +
                                "Paths must be configured at parser initialization time.");
            }
        }

        @Override
        public void setFilePath(String filePath) throws TikaConfigException {
            if (!StringUtils.isBlank(filePath)) {
                throw new TikaConfigException(
                        "Cannot modify filePath at runtime. " +
                                "Paths must be configured at parser initialization time.");
            }
        }
    }
}