ParseContextSerializer.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.serialization.serdes;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializerProvider;
import org.apache.tika.config.JsonConfig;
import org.apache.tika.config.loader.TikaObjectMapperFactory;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.serialization.ComponentNameResolver;
/**
* Serializes ParseContext to JSON.
* <p>
* Typed objects from the context map are serialized under a "typed" key.
* JSON configs are serialized at the top level.
* <p>
* Example output:
* <pre>
* {
* "typed": {
* "handler-config": {"type": "XML", "parseMode": "RMETA"}
* },
* "metadata-filters": ["mock-upper-case-filter"]
* }
* </pre>
*/
public class ParseContextSerializer extends JsonSerializer<ParseContext> {
public static final String PARSE_CONTEXT = "parse-context";
public static final String TYPED = "typed";
private static ObjectMapper plainMapper() {
return TikaObjectMapperFactory.getPlainMapper();
}
@Override
public void serialize(ParseContext parseContext, JsonGenerator gen,
SerializerProvider serializers) throws IOException {
gen.writeStartObject();
// Track which friendly names have been serialized under "typed"
// so we can skip them when serializing jsonConfigs (avoid duplicates)
Set<String> serializedNames = new HashSet<>();
// First, serialize typed objects from the context map under "typed" key
Map<String, Object> contextMap = parseContext.getContextMap();
boolean hasTypedObjects = false;
for (Map.Entry<String, Object> entry : contextMap.entrySet()) {
String keyClassName = entry.getKey();
Object value = entry.getValue();
// Skip null values
if (value == null) {
continue;
}
// Find the friendly component name ��� all serializable components must be registered
String keyName = ComponentNameResolver.getFriendlyName(value.getClass());
if (keyName == null) {
throw new IOException(
"Cannot serialize ParseContext entry: " + value.getClass().getName() +
" is not registered. Components must be registered via " +
"@TikaComponent annotation or .idx file to be serializable.");
}
if (!hasTypedObjects) {
gen.writeFieldName(TYPED);
gen.writeStartObject();
hasTypedObjects = true;
}
gen.writeFieldName(keyName);
// Use writeTree instead of writeRawValue for binary format support (e.g., Smile)
// and stricter validation (fails early if value can't be serialized)
gen.writeTree(plainMapper().valueToTree(value));
// Track this name so we skip it in jsonConfigs
serializedNames.add(keyName);
}
if (hasTypedObjects) {
gen.writeEndObject();
}
// Then, serialize JSON configs at the top level
// Skip entries that were already serialized under "typed" (they've been resolved)
Map<String, JsonConfig> jsonConfigs = parseContext.getJsonConfigs();
for (Map.Entry<String, JsonConfig> entry : jsonConfigs.entrySet()) {
if (serializedNames.contains(entry.getKey())) {
// Already serialized under "typed", skip to avoid duplicate
continue;
}
gen.writeFieldName(entry.getKey());
// Parse the JSON string into a tree for binary format support
gen.writeTree(plainMapper().readTree(entry.getValue().json()));
}
gen.writeEndObject();
}
}