NativeSailStoreCorruptionTest.java

/*******************************************************************************
 * Copyright (c) 2024 Eclipse RDF4J contributors.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 ******************************************************************************/

package org.eclipse.rdf4j.sail.nativerdf;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.StringWriter;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.List;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.RepositoryResult;
import org.eclipse.rdf4j.repository.sail.SailRepository;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.Rio;
import org.jetbrains.annotations.NotNull;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Tests how the NativeStore handles corruption in the data files.
 */
public class NativeSailStoreCorruptionTest {

	private static final Logger logger = LoggerFactory.getLogger(NativeSailStoreCorruptionTest.class);

	@TempDir
	File tempFolder;

	protected Repository repo;

	protected final ValueFactory F = SimpleValueFactory.getInstance();

	private File dataDir;

	@BeforeEach
	public void before() throws IOException {
		this.dataDir = new File(tempFolder, "dbmodel");
		dataDir.mkdir();
		repo = new SailRepository(new NativeStore(dataDir, "spoc,posc"));
		repo.init();

		IRI CTX_1 = F.createIRI("urn:one");
		IRI CTX_2 = F.createIRI("urn:two");

		Statement S0 = F.createStatement(F.createIRI("http://example.org/a0"), RDFS.LABEL, F.createLiteral("zero"));
		Statement S1 = F.createStatement(F.createIRI("http://example.org/b1"), RDFS.LABEL, F.createLiteral("one"));
		Statement S2 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral("two"));
		Statement S3 = F.createStatement(Values.bnode(), RDF.TYPE, Values.bnode());
		Statement S4 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL,
				F.createLiteral("two", "en"));
		Statement S5 = F.createStatement(F.createIRI("http://example.org/c2"), RDFS.LABEL, F.createLiteral(1.2));

		try (RepositoryConnection conn = repo.getConnection()) {
			conn.add(S0);
			conn.add(S1, CTX_1);
			conn.add(S2, CTX_2);
			conn.add(S2, CTX_2);
			conn.add(S3, CTX_2);
			conn.add(S4, CTX_2);
			conn.add(S5, CTX_2);
		}
		backupFile(dataDir, "values.dat");
		backupFile(dataDir, "values.id");
		backupFile(dataDir, "values.hash");
		backupFile(dataDir, "namespaces.dat");
		backupFile(dataDir, "contexts.dat");
		backupFile(dataDir, "triples-posc.alloc");
		backupFile(dataDir, "triples-posc.dat");
		backupFile(dataDir, "triples-spoc.alloc");
		backupFile(dataDir, "triples-spoc.dat");

		NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = true;

	}

	public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException {

		// Use RandomAccessFile in "rw" mode to read and write to the file
		try (RandomAccessFile raf = new RandomAccessFile(valuesFile, "rw")) {
			// Get the length of the file
			long fileLength = raf.length();

			// Check if the position is within the file bounds
			if (pos >= fileLength) {
				throw new IOException(
						"Attempt to write outside the existing file bounds: " + pos + " >= " + fileLength);
			}

			// Move the file pointer to byte position 32
			raf.seek(pos);

			// Write the byte value 0x0 at the current position
			raf.writeByte(newVal);
		}
	}

	public static void backupFile(File dataDir, String s) throws IOException {
		File valuesFile = new File(dataDir, s);
		File backupFile = new File(dataDir, s + ".bak");

		if (!valuesFile.exists()) {
			throw new IOException(s + " does not exist and cannot be backed up.");
		}

		// Copy values.dat to values.dat.bak
		Files.copy(valuesFile.toPath(), backupFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
	}

	public static void restoreFile(File dataDir, String s) throws IOException {
		File valuesFile = new File(dataDir, s);
		File backupFile = new File(dataDir, s + ".bak");

		if (!backupFile.exists()) {
			throw new IOException("Backup file " + s + ".bak does not exist.");
		}

		// Copy values.dat.bak back to values.dat
		Files.copy(backupFile.toPath(), valuesFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
	}

	@Test
	public void testCorruptValuesDatFileNamespace() throws IOException {
		repo.shutDown();

		overwriteByteInFile(new File(dataDir, "values.dat"), 12, 0x0);

		repo.init();

		List<Statement> list = getStatements();
		assertEquals(6, list.size());
	}

	@Test
	public void testCorruptValuesDatFileNamespaceDatatype() throws IOException {
		repo.shutDown();

		overwriteByteInFile(new File(dataDir, "values.dat"), 96, 0x0);

		repo.init();

		List<Statement> list = getStatements();
		assertEquals(6, list.size());
	}

	@Test
	public void testCorruptValuesDatFileEmptyDataArrayError() throws IOException {
		repo.shutDown();

		overwriteByteInFile(new File(dataDir, "values.dat"), 173, 0x0);

		repo.init();

		List<Statement> list = getStatements();
		assertEquals(6, list.size());
	}

	@Test
	public void testCorruptValuesDatFileInvalidTypeError() throws IOException {
		repo.shutDown();

		overwriteByteInFile(new File(dataDir, "values.dat"), 174, 0x0);

		repo.init();

		List<Statement> list = getStatements();
		assertEquals(6, list.size());
	}

	@Test
	public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException {
		for (int i = 4; i < 437; i++) {
			logger.debug("Corrupting byte at position " + i);
			repo.shutDown();
			restoreFile(dataDir, "values.dat");

			overwriteByteInFile(new File(dataDir, "values.dat"), i, 0x0);

			repo.init();

			List<Statement> list = getStatements();
			assertEquals(6, list.size());
		}
	}

	@Test
	public void testCorruptLastByteOfValuesDatFile() throws IOException {
		repo.shutDown();
		File valuesFile = new File(dataDir, "values.dat");
		long fileSize = valuesFile.length();

		overwriteByteInFile(valuesFile, fileSize - 1, 0x0);

		repo.init();

		List<Statement> list = getStatements();
		assertEquals(6, list.size());
	}

	@Test
	public void testCorruptValuesIdFile() throws IOException {
		repo.shutDown();
		File valuesIdFile = new File(dataDir, "values.id");
		long fileSize = valuesIdFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, "values.id");
			overwriteByteInFile(valuesIdFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesHashFile() throws IOException {
		repo.shutDown();
		String file = "values.hash";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesNamespacesFile() throws IOException {
		repo.shutDown();
		String file = "namespaces.dat";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesContextsFile() throws IOException {
		repo.shutDown();
		String file = "contexts.dat";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesPoscAllocFile() throws IOException {
		repo.shutDown();
		String file = "triples-posc.alloc";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesPoscDataFile() throws IOException {
		repo.shutDown();
		String file = "triples-posc.dat";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = true;
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesSpocAllocFile() throws IOException {
		repo.shutDown();
		String file = "triples-spoc.alloc";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			List<Statement> list = getStatements();
			assertEquals(6, list.size(), "Failed at byte position " + i);
			repo.shutDown();
		}
	}

	@Test
	public void testCorruptValuesSpocDataFile() throws IOException {
		repo.shutDown();
		String file = "triples-spoc.dat";
		File nativeStoreFile = new File(dataDir, file);
		long fileSize = nativeStoreFile.length();

		for (long i = 4; i < fileSize; i++) {
			restoreFile(dataDir, file);
			overwriteByteInFile(nativeStoreFile, i, 0x0);
			repo.init();
			try {
				List<Statement> list = getStatements();
				assertEquals(6, list.size(), "Failed at byte position " + i);
			} catch (Throwable ignored) {
				repo.shutDown();
				nativeStoreFile.delete();
				repo.init();
				List<Statement> list = getStatements();
				assertEquals(6, list.size(), "Failed at byte position " + i);
			}

			repo.shutDown();
		}
	}

	@NotNull
	private List<Statement> getStatements() {
		List<Statement> list = new ArrayList<>();

		try (RepositoryConnection conn = repo.getConnection()) {
			StringWriter stringWriter = new StringWriter();
			RDFWriter writer = Rio.createWriter(RDFFormat.NQUADS, stringWriter);
			conn.export(writer);
			logger.debug(stringWriter.toString());
			try (RepositoryResult<Statement> statements = conn.getStatements(null, null, null, false)) {
				while (statements.hasNext()) {
					Statement next = statements.next();
					list.add(next);
					logger.debug(next.toString());
				}
			}
			return list;
		}
	}

	@AfterEach
	public void after() throws IOException {
		NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = false;
		repo.shutDown();
	}
}