CorruptLiteralTest.java

/*******************************************************************************
 * Copyright (c) 2025 Eclipse RDF4J contributors.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf.model;

import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.charset.StandardCharsets;

import org.apache.commons.codec.binary.Hex;
import org.junit.jupiter.api.Test;

public class CorruptLiteralTest {

	private static CorruptLiteral litWithData(byte[] data) {
		return new CorruptLiteral(null, 789, data);
	}

	@Test
	public void recoversUtf8OrAscii() {
		byte[] invalid1 = new byte[] { (byte) 0xC3, (byte) 0x28 };
		byte[] valid = "validlong".getBytes(StandardCharsets.UTF_8);
		byte[] invalid2 = new byte[] { (byte) 0xC0, (byte) 0xAF };
		byte[] tail = "abc".getBytes(StandardCharsets.UTF_8);

		byte[] data = new byte[invalid1.length + valid.length + invalid2.length + tail.length];
		int pos = 0;
		System.arraycopy(invalid1, 0, data, pos, invalid1.length);
		pos += invalid1.length;
		System.arraycopy(valid, 0, data, pos, valid.length);
		pos += valid.length;
		System.arraycopy(invalid2, 0, data, pos, invalid2.length);
		pos += invalid2.length;
		System.arraycopy(tail, 0, data, pos, tail.length);

		CorruptLiteral lit = litWithData(data);
		String label = lit.getLabel();

		assertTrue(label.startsWith("CorruptLiteral with ID 789 with possible data: "));
		assertTrue(label.contains("validlong"), "Should recover core decodable region");
	}

	@Test
	public void fallsBackToHexWhenNoDecodable() {
		byte[] body = new byte[] { (byte) 0x80, (byte) 0x81, (byte) 0xFE, (byte) 0xFF };
		CorruptLiteral lit = litWithData(body);
		String label = lit.getLabel();
		assertTrue(label.contains(Hex.encodeHexString(body)), "Should include hex fallback");
	}

	@Test
	public void stopsAtTripleZeroSentinel() {
		byte[] head = "xyz".getBytes(StandardCharsets.UTF_8);
		byte[] sentinel = new byte[] { 0, 0, 0 };
		byte[] tail = "end".getBytes(StandardCharsets.UTF_8);
		byte[] data = new byte[head.length + sentinel.length + tail.length];
		int pos = 0;
		System.arraycopy(head, 0, data, pos, head.length);
		pos += head.length;
		System.arraycopy(sentinel, 0, data, pos, sentinel.length);
		pos += sentinel.length;
		System.arraycopy(tail, 0, data, pos, tail.length);

		CorruptLiteral lit = litWithData(data);
		String label = lit.getLabel();
		assertTrue(label.contains("xyz"), "Should include data before sentinel");
		assertTrue(!label.contains("end"), "Should not include data after sentinel");
	}
}