ThreadSafeUnzipper.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.plugins;

import java.io.IOException;
import java.nio.file.AtomicMoveNotSupportedException;
import java.nio.file.DirectoryNotEmptyException;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Comparator;
import java.util.UUID;
import java.util.stream.Stream;

import org.pf4j.util.Unzip;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Thread-safe and process-safe plugin unzipper using atomic rename.
 * <p>
 * This avoids file locking issues on Windows by using a simple strategy:
 * <ol>
 *   <li>Check if destination directory exists with completion marker - if yes, already extracted</li>
 *   <li>Extract to a temporary directory with a unique name</li>
 *   <li>Create a completion marker file in the temp directory</li>
 *   <li>Atomically rename temp dir to final destination</li>
 *   <li>If rename fails (another process won), clean up temp dir</li>
 * </ol>
 * <p>
 * The completion marker ensures that even if atomic move is not supported,
 * other processes won't attempt to load a partially-moved directory.
 */
public class ThreadSafeUnzipper {
    private static final Logger LOG = LoggerFactory.getLogger(TikaPluginManager.class);
    private static final String COMPLETE_MARKER = ".tika-extraction-complete";

    /**
     * Unzips a plugin zip file to a directory with the same name (minus .zip extension).
     * Safe for concurrent calls from multiple threads or processes. See
     * documentation at the head of this class for how it works.
     *
     * @param source path to the .zip file
     * @throws IOException if extraction fails
     */
    public static void unzipPlugin(Path source) throws IOException {
        if (!source.getFileName().toString().endsWith(".zip")) {
            throw new IllegalArgumentException("source file name must end in '.zip'");
        }

        Path destination = getDestination(source);

        // Already extracted - check for both directory AND completion marker
        if (isExtractionComplete(destination)) {
            LOG.debug("{} is already extracted", source);
            return;
        }

        // Extract to a unique temp directory
        Path tempDir = destination.resolveSibling(
                destination.getFileName() + ".tmp." + UUID.randomUUID());

        try {
            LOG.debug("extracting {} to temp dir {}", source, tempDir);
            new Unzip(source.toFile(), tempDir.toFile()).extract();

            // Create completion marker in temp dir before moving
            Files.createFile(tempDir.resolve(COMPLETE_MARKER));

            // Atomically rename to final destination
            try {
                Files.move(tempDir, destination, StandardCopyOption.ATOMIC_MOVE);
                LOG.debug("successfully extracted {}", destination);
            } catch (FileAlreadyExistsException | DirectoryNotEmptyException e) {
                // Another process extracted it first - wait for completion marker
                LOG.debug("plugin already extracted by another process: {}", destination);
                waitForExtractionComplete(destination);
            } catch (AtomicMoveNotSupportedException e) {
                // Filesystem doesn't support atomic move, try regular move
                try {
                    Files.move(tempDir, destination);
                    LOG.debug("successfully extracted {} (non-atomic)", destination);
                } catch (FileAlreadyExistsException | DirectoryNotEmptyException e2) {
                    // Another process extracted it first - wait for completion marker
                    LOG.debug("plugin already extracted by another process: {}", destination);
                    waitForExtractionComplete(destination);
                }
            }
        } finally {
            // Clean up temp dir if it still exists (we lost the race or there was an error)
            if (Files.exists(tempDir)) {
                deleteRecursively(tempDir);
            }
        }
    }

    /**
     * Checks if extraction is complete by verifying both directory exists and completion marker is present.
     */
    private static boolean isExtractionComplete(Path destination) {
        return Files.isDirectory(destination) && Files.exists(destination.resolve(COMPLETE_MARKER));
    }

    /**
     * Waits for extraction to complete by polling for the completion marker.
     * This is called when we detect another process is extracting.
     */
    private static void waitForExtractionComplete(Path destination) throws IOException {
        long maxWaitMs = 60000; // 1 minute max wait
        long pollIntervalMs = 100;
        long waited = 0;

        while (waited < maxWaitMs) {
            if (isExtractionComplete(destination)) {
                LOG.debug("extraction completed by another process: {}", destination);
                return;
            }
            try {
                Thread.sleep(pollIntervalMs);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new IOException("interrupted while waiting for extraction to complete", e);
            }
            waited += pollIntervalMs;
        }

        throw new IOException("timed out waiting for extraction to complete: " + destination);
    }

    private static Path getDestination(Path source) {
        String fName = source.getFileName().toString();
        fName = fName.substring(0, fName.length() - 4);
        return source.toAbsolutePath().getParent().resolve(fName);
    }

    private static void deleteRecursively(Path path) {
        try (Stream<Path> walk = Files.walk(path)) {
            walk.sorted(Comparator.reverseOrder())
                    .forEach(p -> {
                        try {
                            Files.delete(p);
                        } catch (IOException e) {
                            LOG.warn("failed to delete temp file: {}", p, e);
                        }
                    });
        } catch (IOException e) {
            LOG.warn("failed to clean up temp directory: {}", path, e);
        }
    }
}