JsonPipesIpc.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.pipes.core.serialization;
import java.io.IOException;
import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.dataformat.smile.SmileFactory;
import org.apache.tika.config.loader.TikaObjectMapperFactory;
import org.apache.tika.pipes.api.FetchEmitTuple;
import org.apache.tika.pipes.api.PipesResult;
import org.apache.tika.pipes.api.emitter.EmitData;
import org.apache.tika.pipes.core.emitter.EmitDataImpl;
/**
* Binary serialization/deserialization for IPC communication between PipesClient and PipesServer.
* <p>
* Uses Jackson's Smile binary format for efficient serialization. Smile is a binary JSON format
* that is more compact and faster to parse than text JSON, while maintaining full compatibility
* with the Jackson data binding API.
*/
public class JsonPipesIpc {
private static final ObjectMapper OBJECT_MAPPER;
static {
// Use SmileFactory for binary format - more compact and faster than text JSON
SmileFactory smileFactory = new SmileFactory();
// Configure stream constraints for large content (e.g., 30MB+ documents)
// Default Jackson limit is 20MB which is too small for IPC with large documents
StreamReadConstraints constraints = StreamReadConstraints.builder()
.maxStringLength(Integer.MAX_VALUE)
.build();
smileFactory.setStreamReadConstraints(constraints);
// Create mapper with Smile factory and register TikaModule for Metadata/ParseContext serializers
OBJECT_MAPPER = TikaObjectMapperFactory.createMapper(smileFactory);
// Add pipes-specific serializers
SimpleModule pipesModule = new SimpleModule();
pipesModule.addSerializer(FetchEmitTuple.class, new FetchEmitTupleSerializer());
pipesModule.addDeserializer(FetchEmitTuple.class, new FetchEmitTupleDeserializer());
pipesModule.addSerializer(EmitData.class, new EmitDataSerializer());
pipesModule.addDeserializer(EmitDataImpl.class, new EmitDataDeserializer());
pipesModule.addSerializer(PipesResult.class, new PipesResultSerializer());
pipesModule.addDeserializer(PipesResult.class, new PipesResultDeserializer());
OBJECT_MAPPER.registerModule(pipesModule);
}
/**
* Serialize an object to Smile binary format bytes.
*/
public static byte[] toBytes(Object obj) throws IOException {
return OBJECT_MAPPER.writeValueAsBytes(obj);
}
/**
* Deserialize Smile binary format bytes to an object.
*/
public static <T> T fromBytes(byte[] bytes, Class<T> clazz) throws IOException {
return OBJECT_MAPPER.readValue(bytes, clazz);
}
/**
* Get the configured ObjectMapper for direct use if needed.
*/
public static ObjectMapper getMapper() {
return OBJECT_MAPPER;
}
}