FileSystemEmitterRuntimeConfigTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.pipes.emitter.fs;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.plugins.ExtensionConfig;
/**
* Tests runtime configuration of FileSystemEmitter via ParseContext's jsonConfigs.
*/
public class FileSystemEmitterRuntimeConfigTest {
@Test
public void testRuntimeConfigCannotOverrideBasePath(@TempDir Path tempDir) throws Exception {
// Create two output directories
Path dir1 = tempDir.resolve("output1");
Path dir2 = tempDir.resolve("output2");
Files.createDirectories(dir1);
Files.createDirectories(dir2);
// Create emitter with dir1 as default basePath
String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
dir1.toString().replace("\\", "\\\\"));
ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter", "test", defaultConfig);
FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
// Emit with default config
List<Metadata> metadataList1 = new ArrayList<>();
Metadata m1 = new Metadata();
m1.set(TikaCoreProperties.TIKA_CONTENT, "content1");
metadataList1.add(m1);
ParseContext context1 = new ParseContext();
emitter.emit("test1.json", metadataList1, context1);
Path output1 = dir1.resolve("test1.json");
assertTrue(Files.exists(output1), "File should be created in dir1");
// Try to override basePath at runtime to point to dir2
// This should throw an exception for security reasons
String runtimeConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
dir2.toString().replace("\\", "\\\\"));
ParseContext context2 = new ParseContext();
context2.setJsonConfig("test-emitter", runtimeConfig);
// Emit with runtime config - should throw exception
List<Metadata> metadataList2 = new ArrayList<>();
Metadata m2 = new Metadata();
m2.set(TikaCoreProperties.TIKA_CONTENT, "content2");
metadataList2.add(m2);
IOException exception = assertThrows(IOException.class, () -> {
emitter.emit("test2.json", metadataList2, context2);
});
assertTrue(exception.getCause() != null &&
exception.getCause().getMessage().contains("Cannot change 'basePath' at runtime"),
"Should throw exception when attempting to change basePath at runtime");
}
@Test
public void testRuntimeConfigFileExtension(@TempDir Path tempDir) throws Exception {
// Create emitter with no file extension
String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
tempDir.toString().replace("\\", "\\\\"));
ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter", "test", defaultConfig);
FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
// Emit with default config - no extension added
List<Metadata> metadataList1 = new ArrayList<>();
Metadata m1 = new Metadata();
m1.set(TikaCoreProperties.TIKA_CONTENT, "content1");
metadataList1.add(m1);
ParseContext context1 = new ParseContext();
emitter.emit("test1", metadataList1, context1);
assertTrue(Files.exists(tempDir.resolve("test1")),
"File without extension should exist");
// Override at runtime to add .json extension
// Note: basePath is NOT included for security reasons
String runtimeConfig = "{\"fileExtension\":\"json\", \"onExists\":\"REPLACE\"}";
ParseContext context2 = new ParseContext();
context2.setJsonConfig("test-emitter", runtimeConfig);
// Emit with runtime config
List<Metadata> metadataList2 = new ArrayList<>();
Metadata m2 = new Metadata();
m2.set(TikaCoreProperties.TIKA_CONTENT, "content2");
metadataList2.add(m2);
emitter.emit("test2", metadataList2, context2);
assertTrue(Files.exists(tempDir.resolve("test2.json")),
"File with .json extension should exist");
}
@Test
public void testRuntimeConfigOnExists(@TempDir Path tempDir) throws Exception {
// Create emitter with REPLACE as default
String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
tempDir.toString().replace("\\", "\\\\"));
ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter", "test", defaultConfig);
FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
// Create a test file using stream emit
Path testFile = tempDir.resolve("test.txt");
Files.writeString(testFile, "original content");
// Emit with default config (REPLACE) - should succeed
InputStream inputStream1 = new ByteArrayInputStream("replaced content".getBytes(StandardCharsets.UTF_8));
Metadata metadata1 = new Metadata();
ParseContext context1 = new ParseContext();
emitter.emit("test.txt", inputStream1, metadata1, context1);
assertEquals("replaced content", Files.readString(testFile),
"Content should be replaced");
// Override at runtime to use SKIP
// Note: basePath is NOT included for security reasons
String runtimeConfig = "{\"onExists\":\"SKIP\"}";
ParseContext context2 = new ParseContext();
context2.setJsonConfig("test-emitter", runtimeConfig);
// Emit with runtime config (SKIP) - should not replace existing file
InputStream inputStream2 = new ByteArrayInputStream("new content".getBytes(StandardCharsets.UTF_8));
Metadata metadata2 = new Metadata();
emitter.emit("test.txt", inputStream2, metadata2, context2);
assertEquals("replaced content", Files.readString(testFile),
"Content should not change with SKIP");
}
@Test
public void testJsonConfigNotPresent(@TempDir Path tempDir) throws Exception {
// Create emitter with default config
String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
tempDir.toString().replace("\\", "\\\\"));
ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter", "test", defaultConfig);
FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
// Emit with ParseContext that has no jsonConfigs - should use default config
List<Metadata> metadataList = new ArrayList<>();
Metadata m = new Metadata();
m.set(TikaCoreProperties.TIKA_CONTENT, "test content");
metadataList.add(m);
ParseContext context = new ParseContext();
// Don't set jsonConfigs in context
emitter.emit("test.json", metadataList, context);
Path output = tempDir.resolve("test.json");
assertTrue(Files.exists(output), "File should be created with default config");
}
@Test
public void testJsonConfigWithDifferentId(@TempDir Path tempDir) throws Exception {
// Create emitter with default config
String defaultConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
tempDir.toString().replace("\\", "\\\\"));
ExtensionConfig pluginConfig = new ExtensionConfig("test-emitter", "test", defaultConfig);
FileSystemEmitter emitter = FileSystemEmitter.build(pluginConfig);
// Create jsonConfigs with config for a different emitter ID
Path otherDir = tempDir.resolve("other");
Files.createDirectories(otherDir);
String runtimeConfig = String.format(Locale.ROOT,
"{\"basePath\":\"%s\", \"onExists\":\"REPLACE\"}",
otherDir.toString().replace("\\", "\\\\"));
ParseContext context = new ParseContext();
context.setJsonConfig("different-emitter", runtimeConfig);
// Emit - should use default config since runtime config is for different ID
List<Metadata> metadataList = new ArrayList<>();
Metadata m = new Metadata();
m.set(TikaCoreProperties.TIKA_CONTENT, "test content");
metadataList.add(m);
emitter.emit("test.json", metadataList, context);
assertTrue(Files.exists(tempDir.resolve("test.json")),
"File should be created in default basePath");
assertFalse(Files.exists(otherDir.resolve("test.json")),
"File should not be created in other directory");
}
}