RoundTripSerializationTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.serialization;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.apache.tika.config.TimeoutLimits;
import org.apache.tika.config.loader.TikaObjectMapperFactory;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.serialization.serdes.ParseContextDeserializer;
import org.apache.tika.serialization.serdes.ParseContextSerializer;
/**
* Tests for round-trip serialization of ParseContext.
* Verifies that JSON configs can be serialized and deserialized
* with all values preserved.
*/
public class RoundTripSerializationTest {
private ObjectMapper mapper;
@BeforeEach
void setUp() {
mapper = TikaObjectMapperFactory.getMapper();
SimpleModule module = new SimpleModule();
module.addDeserializer(ParseContext.class, new ParseContextDeserializer());
module.addSerializer(ParseContext.class, new ParseContextSerializer());
mapper.registerModule(module);
}
// ==================== Basic Round-Trip Tests ====================
@Test
void testEmptyContextRoundTrip() throws Exception {
ParseContext original = new ParseContext();
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
assertTrue(reloaded.getJsonConfigs().isEmpty());
}
@Test
void testSingleConfigRoundTrip() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("pdf-parser", "{\"ocrStrategy\":\"AUTO\",\"extractInlineImages\":true}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
assertTrue(reloaded.hasJsonConfig("pdf-parser"));
String reloadedConfig = reloaded.getJsonConfig("pdf-parser").json();
assertTrue(reloadedConfig.contains("AUTO"));
assertTrue(reloadedConfig.contains("extractInlineImages"));
}
@Test
void testMultipleConfigsRoundTrip() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("pdf-parser", "{\"ocrStrategy\":\"AUTO\"}");
original.setJsonConfig("html-parser", "{\"extractScripts\":false}");
original.setJsonConfig("timeout-limits",
"{\"progressTimeoutMillis\":30000,\"totalTaskTimeoutMillis\":120000}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
assertEquals(3, reloaded.getJsonConfigs().size());
assertTrue(reloaded.hasJsonConfig("pdf-parser"));
assertTrue(reloaded.hasJsonConfig("html-parser"));
assertTrue(reloaded.hasJsonConfig("timeout-limits"));
}
@Test
void testNestedJsonRoundTrip() throws Exception {
ParseContext original = new ParseContext();
String nestedJson = """
{"level1":{"level2":{"array":[1,2,3],"nested":{"deep":"value"}}}}
""".trim();
original.setJsonConfig("complex-config", nestedJson);
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
String reloadedConfig = reloaded.getJsonConfig("complex-config").json();
JsonNode node = mapper.readTree(reloadedConfig);
assertEquals("value", node.get("level1").get("level2").get("nested").get("deep").asText());
assertEquals(3, node.get("level1").get("level2").get("array").size());
}
// ==================== Multiple Round-Trips Stability ====================
@Test
void testMultipleRoundTripsStability() throws Exception {
ParseContext context = new ParseContext();
context.setJsonConfig("pdf-parser", "{\"ocrStrategy\":\"NO_OCR\"}");
context.setJsonConfig("timeout-limits",
"{\"progressTimeoutMillis\":45000,\"totalTaskTimeoutMillis\":180000}");
// Perform 5 round-trips
for (int i = 0; i < 5; i++) {
String json = mapper.writeValueAsString(context);
context = mapper.readValue(json, ParseContext.class);
}
// Verify values preserved after multiple round-trips
assertEquals(2, context.getJsonConfigs().size());
assertTrue(context.getJsonConfig("pdf-parser").json().contains("NO_OCR"));
assertTrue(context.getJsonConfig("timeout-limits").json().contains("45000"));
}
// ==================== TimeoutLimits Round-Trip ====================
@Test
void testTimeoutLimitsRoundTrip() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("timeout-limits",
"{\"progressTimeoutMillis\":60000,\"totalTaskTimeoutMillis\":3600000}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
// Resolve and verify
ParseContextUtils.resolveAll(reloaded, Thread.currentThread().getContextClassLoader());
TimeoutLimits limits = reloaded.get(TimeoutLimits.class);
assertNotNull(limits);
assertEquals(60000, limits.getProgressTimeoutMillis());
assertEquals(3600000, limits.getTotalTaskTimeoutMillis());
}
@Test
void testTimeoutLimitsDifferentValues() throws Exception {
long[] testValues = {1000, 5000, 30000, 120000, 600000};
for (long value : testValues) {
ParseContext original = new ParseContext();
original.setJsonConfig("timeout-limits",
"{\"progressTimeoutMillis\":" + value +
",\"totalTaskTimeoutMillis\":" + (value * 10) + "}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
ParseContextUtils.resolveAll(reloaded, Thread.currentThread().getContextClassLoader());
TimeoutLimits limits = reloaded.get(TimeoutLimits.class);
assertEquals(value, limits.getProgressTimeoutMillis(),
"progressTimeoutMillis " + value + " should survive round-trip");
}
}
// ==================== JSON Structure Preservation ====================
@Test
void testArrayValuesPreserved() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("test-config", "{\"items\":[\"a\",\"b\",\"c\"],\"numbers\":[1,2,3]}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
JsonNode node = mapper.readTree(reloaded.getJsonConfig("test-config").json());
assertEquals(3, node.get("items").size());
assertEquals("b", node.get("items").get(1).asText());
assertEquals(2, node.get("numbers").get(1).asInt());
}
@Test
void testBooleanValuesPreserved() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("test-config", "{\"enabled\":true,\"disabled\":false}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
JsonNode node = mapper.readTree(reloaded.getJsonConfig("test-config").json());
assertTrue(node.get("enabled").asBoolean());
assertFalse(node.get("disabled").asBoolean());
}
@Test
void testNullValuesPreserved() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("test-config", "{\"value\":null,\"other\":\"text\"}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
JsonNode node = mapper.readTree(reloaded.getJsonConfig("test-config").json());
assertTrue(node.get("value").isNull());
assertEquals("text", node.get("other").asText());
}
// ==================== parse-context Wrapper Format ====================
@Test
void testParseContextWrapperFormat() throws Exception {
// Test that we can deserialize from wrapped format
String wrappedJson = """
{
"parse-context": {
"pdf-parser": {"ocrStrategy": "AUTO"}
}
}
""";
ParseContext reloaded = mapper.readValue(wrappedJson, ParseContext.class);
assertTrue(reloaded.hasJsonConfig("pdf-parser"));
assertTrue(reloaded.getJsonConfig("pdf-parser").json().contains("AUTO"));
}
@Test
void testFlatFormatPreferred() throws Exception {
// Verify serialization uses flat format (no wrapper)
ParseContext original = new ParseContext();
original.setJsonConfig("pdf-parser", "{\"ocrStrategy\":\"AUTO\"}");
String json = mapper.writeValueAsString(original);
assertFalse(json.contains("parse-context"), "Should serialize in flat format");
assertTrue(json.contains("pdf-parser"));
}
// ==================== Edge Cases ====================
@Test
void testEmptyObjectConfig() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("empty-config", "{}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
assertTrue(reloaded.hasJsonConfig("empty-config"));
assertEquals("{}", reloaded.getJsonConfig("empty-config").json());
}
@Test
void testSpecialCharactersInValues() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("test-config", "{\"path\":\"/path/to/file\",\"regex\":\"\\\\d+\"}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
JsonNode node = mapper.readTree(reloaded.getJsonConfig("test-config").json());
assertEquals("/path/to/file", node.get("path").asText());
}
@Test
void testUnicodeInValues() throws Exception {
ParseContext original = new ParseContext();
original.setJsonConfig("test-config", "{\"message\":\"Hello ������\",\"emoji\":\"����\"}");
String json = mapper.writeValueAsString(original);
ParseContext reloaded = mapper.readValue(json, ParseContext.class);
JsonNode node = mapper.readTree(reloaded.getJsonConfig("test-config").json());
assertEquals("Hello ������", node.get("message").asText());
assertEquals("����", node.get("emoji").asText());
}
}