JsonFetchEmitTupleTest.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.pipes.core.serialization;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import org.junit.jupiter.api.Test;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.pipes.api.FetchEmitTuple;
import org.apache.tika.pipes.api.ParseMode;
import org.apache.tika.pipes.api.emitter.EmitKey;
import org.apache.tika.pipes.api.fetcher.FetchKey;
import org.apache.tika.pipes.core.extractor.UnpackConfig;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.ContentHandlerFactory;
public class JsonFetchEmitTupleTest {
@Test
public void testBasic() throws Exception {
Metadata m = new Metadata();
m.add("m1", "v1");
m.add("m1", "v1");
m.add("m2", "v2");
m.add("m2", "v3");
m.add("m3", "v4");
ParseContext parseContext = new ParseContext();
// Set ContentHandlerFactory and ParseMode in ParseContext
ContentHandlerFactory factory = new BasicContentHandlerFactory(
BasicContentHandlerFactory.HANDLER_TYPE.XML, 10000);
parseContext.set(ContentHandlerFactory.class, factory);
parseContext.set(ParseMode.class, ParseMode.CONCATENATE);
FetchEmitTuple t = new FetchEmitTuple("my_id", new FetchKey("my_fetcher", "fetchKey1"), new EmitKey("my_emitter", "emitKey1"), m, parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP);
StringWriter writer = new StringWriter();
JsonFetchEmitTuple.toJson(t, writer);
Reader reader = new StringReader(writer.toString());
FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson(reader);
assertEquals(t, deserialized);
}
@Test
public void testFetchRange() throws Exception {
Metadata m = new Metadata();
m.add("m1", "v1");
m.add("m1", "v1");
m.add("m2", "v2");
m.add("m2", "v3");
m.add("m3", "v4");
// TODO -- add this to the ParseContext:
// parseContext.set(ContentHandlerFactory.class, new BasicContentHandlerFactory(
// BasicContentHandlerFactory.HANDLER_TYPE.XML, 10000));
// parseContext.set(ParseMode.class, ParseMode.CONCATENATE);
FetchEmitTuple t = new FetchEmitTuple("my_id", new FetchKey("my_fetcher", "fetchKey1", 10, 1000), new EmitKey("my_emitter", "emitKey1"), m, new ParseContext(),
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP);
StringWriter writer = new StringWriter();
JsonFetchEmitTuple.toJson(t, writer);
Reader reader = new StringReader(writer.toString());
FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson(reader);
assertEquals(t, deserialized);
}
@Test
public void testBytes() throws Exception {
// TODO -- add these to the ParseContext:
// UnpackConfig bytesConfig = new UnpackConfig(true);
// bytesConfig.setEmitter("emitter");
// parseContext.set(ContentHandlerFactory.class, new BasicContentHandlerFactory(
// BasicContentHandlerFactory.HANDLER_TYPE.XML, 10000));
// parseContext.set(ParseMode.class, ParseMode.CONCATENATE);
FetchEmitTuple t = new FetchEmitTuple("my_id", new FetchKey("my_fetcher", "fetchKey1", 10, 1000), new EmitKey("my_emitter", "emitKey1"), new Metadata(), new ParseContext(),
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP);
StringWriter writer = new StringWriter();
JsonFetchEmitTuple.toJson(t, writer);
Reader reader = new StringReader(writer.toString());
FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson(reader);
assertEquals(t, deserialized);
}
@Test
public void testUnpackConfigSerialization() throws Exception {
ParseContext parseContext = new ParseContext();
parseContext.set(ParseMode.class, ParseMode.UNPACK);
// Create UnpackConfig with specific settings
UnpackConfig unpackConfig = new UnpackConfig();
unpackConfig.setZipEmbeddedFiles(true);
unpackConfig.setIncludeMetadataInZip(true);
unpackConfig.setEmitter("test-emitter");
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
parseContext.set(UnpackConfig.class, unpackConfig);
FetchEmitTuple t = new FetchEmitTuple("test-id",
new FetchKey("my_fetcher", "fetchKey1"),
new EmitKey("my_emitter", "emitKey1"),
new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
StringWriter writer = new StringWriter();
JsonFetchEmitTuple.toJson(t, writer);
String json = writer.toString();
System.out.println("Serialized JSON: " + json);
Reader reader = new StringReader(json);
FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson(reader);
// Verify ParseMode is preserved
assertEquals(ParseMode.UNPACK, deserialized.getParseContext().get(ParseMode.class));
// Verify UnpackConfig is preserved
UnpackConfig deserializedConfig = deserialized.getParseContext().get(UnpackConfig.class);
assertEquals(unpackConfig.isZipEmbeddedFiles(), deserializedConfig.isZipEmbeddedFiles(),
"zipEmbeddedFiles should be preserved");
assertEquals(unpackConfig.isIncludeMetadataInZip(), deserializedConfig.isIncludeMetadataInZip(),
"includeMetadataInZip should be preserved");
assertEquals(unpackConfig.getEmitter(), deserializedConfig.getEmitter(),
"emitter should be preserved");
assertEquals(unpackConfig.getSuffixStrategy(), deserializedConfig.getSuffixStrategy(),
"suffixStrategy should be preserved");
}
}