RDFXMLParserTest.java

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.rio.rdfxml;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Set;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.DC;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.helpers.ParseErrorCollector;
import org.eclipse.rdf4j.rio.helpers.StatementCollector;
import org.eclipse.rdf4j.rio.helpers.XMLParserSettings;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class RDFXMLParserTest {

	private ValueFactory vf;

	private RDFParser parser;

	private StatementCollector sc;

	private ParseErrorCollector el;

	private Locale platformLocale;

	@BeforeEach
	public void setUp() {
		platformLocale = Locale.getDefault();

		Locale.setDefault(Locale.ENGLISH);
		vf = SimpleValueFactory.getInstance();
		parser = new RDFXMLParser();
		sc = new StatementCollector();
		parser.setRDFHandler(sc);
		el = new ParseErrorCollector();
		parser.setParseErrorListener(el);

	}

	@AfterEach
	public void tearDown() {
		Locale.setDefault(platformLocale);
	}

	@Test
	public void rdfXmlLoadedFromInsideAJarResolvesRelativeUris() throws Exception {
		URL zipfileUrl = this.getClass().getResource("/org/eclipse/rdf4j/rio/rdfxml/sample-with-rdfxml-data.zip");

		assertNotNull(zipfileUrl, "The sample-data.zip file must be present for this test");

		String url = "jar:" + zipfileUrl + "!/index.rdf";

		try (final InputStream in = new URL(url).openStream()) {
			parser.parse(in, url);
		}

		Collection<Statement> stmts = sc.getStatements();

		assertThat(stmts).hasSize(3);

		Iterator<Statement> iter = stmts.iterator();

		Statement stmt1 = iter.next();
		Statement stmt2 = iter.next();

		assertEquals(vf.createIRI("http://www.example.com/#"), stmt1.getSubject());
		assertEquals(vf.createIRI("http://www.example.com/ns/#document-about"), stmt1.getPredicate());
		assertTrue(stmt1.getObject() instanceof IRI);

		IRI res = (IRI) stmt1.getObject();

		String resourceUrl = res.stringValue();

		assertThat(resourceUrl).startsWith("jar:" + zipfileUrl + "!");

		URL javaUrl = new URL(resourceUrl);
		assertEquals("jar", javaUrl.getProtocol());

		try (InputStream uc = javaUrl.openStream()) {
			assertEquals(-1, uc.read(), "The resource stream should be empty");
		}

		assertEquals(res, stmt2.getSubject());
		assertEquals(DC.TITLE, stmt2.getPredicate());
		assertEquals(vf.createLiteral("Empty File"), stmt2.getObject());
	}

	@Test
	public void testIgnoreExternalGeneralEntity() throws Exception {
		try (final InputStream in = this.getClass()
				.getResourceAsStream("/org/eclipse/rdf4j/rio/rdfxml/rdfxml-external-general-entity.rdf")) {
			parser.parse(in, "");
		} catch (FileNotFoundException e) {
			fail("parser tried to read external file from external general entity");
		}
		assertEquals(0, el.getWarnings().size());
		assertEquals(0, el.getErrors().size());
		assertEquals(0, el.getFatalErrors().size());

		assertThat(sc.getStatements().size()).isEqualTo(1);

		Statement st = sc.getStatements().iterator().next();

		// literal value should be empty string as it should not have processed the
		// external entity
		assertThat(st.getObject().stringValue()).isEqualTo("");
	}

	@Test
	public void testFatalErrorDoctypeDecl() throws Exception {
		// configure parser to disallow doctype declarations
		parser.getParserConfig().set(XMLParserSettings.DISALLOW_DOCTYPE_DECL, true);

		try (final InputStream in = this.getClass()
				.getResourceAsStream("/org/eclipse/rdf4j/rio/rdfxml/rdfxml-external-param-entity.rdf")) {
			parser.parse(in, "");
		} catch (RDFParseException e) {
			assertEquals(
					"DOCTYPE is disallowed when the feature \"http://apache.org/xml/features/disallow-doctype-decl\" set to true. [line 2, column 10]",
					e.getMessage());
		}
		assertEquals(0, el.getWarnings().size());
		assertEquals(0, el.getErrors().size());
		assertEquals(1, el.getFatalErrors().size());
		assertEquals(
				"[Rio fatal] DOCTYPE is disallowed when the feature \"http://apache.org/xml/features/disallow-doctype-decl\" set to true. (2, 10)",
				el.getFatalErrors().get(0));
	}

	@Test
	public void testIgnoreExternalParamEntity() throws Exception {
		// configure parser to allow doctype declarations
		parser.getParserConfig().set(XMLParserSettings.DISALLOW_DOCTYPE_DECL, false);

		try (final InputStream in = this.getClass()
				.getResourceAsStream("/org/eclipse/rdf4j/rio/rdfxml/rdfxml-external-param-entity.rdf")) {
			parser.parse(in, "");
		} catch (FileNotFoundException e) {
			fail("parser tried to read external file from external parameter entity");
		}
		assertEquals(0, el.getWarnings().size());
		assertEquals(0, el.getErrors().size());
		assertEquals(0, el.getFatalErrors().size());
	}

	@Test
	public void testRDFXMLWhitespace() throws Exception {
		try (final InputStream in = this.getClass()
				.getResourceAsStream("/org/eclipse/rdf4j/rio/rdfxml/rdfxml-whitespace-literal.rdf")) {
			parser.parse(in, "");
		}
		Statement stmt1 = sc.getStatements().iterator().next();
		assertEquals(1, sc.getStatements().size());
		assertEquals(RDFS.LABEL, stmt1.getPredicate());
		assertEquals(vf.createLiteral("  Literal with whitespace  "), stmt1.getObject());
	}

	@Test
	public void testFatalErrorPrologContent() throws Exception {
		try (final InputStream in = this.getClass()
				.getResourceAsStream("/org/eclipse/rdf4j/rio/rdfxml/not-an-rdfxml-file.rdf")) {
			parser.parse(in, "");
		} catch (RDFParseException e) {
			assertEquals("Content is not allowed in prolog. [line 1, column 1]", e.getMessage());
		}
		assertEquals(0, el.getWarnings().size());
		assertEquals(0, el.getErrors().size());
		assertEquals(1, el.getFatalErrors().size());
		assertEquals("[Rio fatal] Content is not allowed in prolog. (1, 1)", el.getFatalErrors().get(0));
	}

	@Test
	public void testInsertUsedContextPrefixes() {

		Set<Statement> inputCollection = new LinkedHashSet<>();
		StatementCollector inputCollector = new StatementCollector(inputCollection);
		parser.setRDFHandler(inputCollector);

		try (final InputStream in = this.getClass()
				.getResourceAsStream("/org/eclipse/rdf4j/rio/rdfxml/rdfxml-namespace-addition.rdf")) {
			parser.parse(in, "");
		} catch (RDFParseException | IOException e) {
			assertEquals("Content is not allowed in prolog. [line 1, column 1]", e.getMessage());
		}

		String[] expectedLiteral = new String[2];

		String s1 = "(http://mycorp.example.com/papers/NobelPaper1, http://test.org/testing/somedata, \"<test:datapart xmlns:test=\"http://test.org/testing/\">Literal</test:datapart>\n"
				+
				"  <test:datapart xmlns:test=\"http://test.org/testing/\">0</test:datapart>\n" +
				"  <test:datapart xmlns:test=\"http://test.org/testing/\">0</test:datapart>\n" +
				"  \"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>) [null]";

		String s2 = "(http://mycorp.example.com/papers/NobelPaper1, http://purl.org/metadata/dublin_core#Creator, \"David Hume\") [null]";
		expectedLiteral[0] = s1;
		expectedLiteral[1] = s2;

		int ind = 0;
		for (Statement s : inputCollection) {
			assertEquals(s.toString(), expectedLiteral[ind]);
			ind += 1;
		}

	}
}