Verify.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.console.command;
import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;
import org.eclipse.rdf4j.common.exception.ValidationException;
import org.eclipse.rdf4j.common.transaction.IsolationLevels;
import org.eclipse.rdf4j.console.ConsoleIO;
import org.eclipse.rdf4j.console.Util;
import org.eclipse.rdf4j.console.VerificationListener;
import org.eclipse.rdf4j.console.setting.ConsoleSetting;
import org.eclipse.rdf4j.console.setting.WorkDir;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.vocabulary.RDF4J;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.eclipse.rdf4j.repository.sail.SailRepository;
import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.UnsupportedRDFormatException;
import org.eclipse.rdf4j.rio.WriterConfig;
import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
import org.eclipse.rdf4j.rio.helpers.BasicWriterSettings;
import org.eclipse.rdf4j.sail.memory.MemoryStore;
import org.eclipse.rdf4j.sail.shacl.ShaclSail;
/**
* Verify command
*
* @author Dale Visser
* @author Bart Hanssens
*/
public class Verify extends ConsoleCommand {
@Override
public String getName() {
return "verify";
}
@Override
public String getHelpShort() {
return "Verifies the syntax of an RDF data file, takes a file path or URL as argument";
}
@Override
public String getHelpLong() {
return PrintHelp.USAGE + "verify <location> [<shacl-location> <report.ttl>]\n"
+ " <location> The file path or URL identifying the data file\n"
+ " <location> <shacl-location> <report.ttl> Validate using shacl file and create a report\n"
+ "Verifies the validity of the specified data file\n";
}
@Override
public String[] usesSettings() {
return new String[] { WorkDir.NAME };
}
/**
* Constructor
*
* @param consoleIO
* @param settings
*/
public Verify(ConsoleIO consoleIO, Map<String, ConsoleSetting> settings) {
super(consoleIO, null, settings);
}
@Override
public void execute(String... tokens) {
if (tokens.length != 2 && tokens.length != 4) {
writeln(getHelpLong());
return;
}
String dataPath = parseDataPath(tokens[1]);
verify(dataPath);
if (tokens.length == 4) {
String shaclPath = parseDataPath(tokens[2]);
String reportFile = tokens[3];
shacl(dataPath, shaclPath, reportFile);
}
}
/**
* Get working dir setting.
*
* @return path of working dir
*/
private Path getWorkDir() {
return ((WorkDir) settings.get(WorkDir.NAME)).get();
}
/**
* Verify an RDF file, either a local file or URL.
*
* @param tokens parameters
*/
private void verify(String dataPath) {
try {
URL dataURL = new URL(dataPath);
RDFFormat format = Rio.getParserFormatForFileName(dataPath).orElseThrow(Rio.unsupportedFormat(dataPath));
RDFParser parser = Rio.createParser(format);
writeln("RDF Format is " + parser.getRDFFormat().getName());
VerificationListener listener = new VerificationListener(consoleIO);
parser.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
parser.set(BasicParserSettings.VERIFY_LANGUAGE_TAGS, true);
parser.set(BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES, true);
parser.set(BasicParserSettings.VERIFY_RELATIVE_URIS, true);
parser.set(BasicParserSettings.VERIFY_URI_SYNTAX, true);
parser.setParseErrorListener(listener);
parser.setRDFHandler(listener);
writeln("Verifying data...");
try (InputStream dataStream = dataURL.openStream()) {
parser.parse(dataStream, "urn://openrdf.org/RioVerifier/");
}
int warnings = listener.getWarnings();
int errors = listener.getErrors();
if (warnings + errors > 0) {
writeError("Found " + warnings + " warnings and " + errors + " errors");
} else {
writeln("Data verified, no errors were found");
}
if (errors == 0) {
writeln("File contains " + listener.getStatements() + " statements");
}
} catch (MalformedURLException e) {
writeError("Malformed URL: " + dataPath);
} catch (IOException e) {
writeError("Failed to load data", e);
} catch (UnsupportedRDFormatException e) {
writeError("No parser available for this RDF format", e);
} catch (RDFParseException e) {
writeError("Unexpected RDFParseException", e);
} catch (RDFHandlerException e) {
writeError("Unable to verify", e);
}
}
/**
* Validate an RDF data source using a SHACL file or URL, writing the report to a file.
*
* @param dataPath file or URL of the data to be validated
* @param shaclPath file or URL of the SHACL
* @param reportFile file to write validation report to
*/
private void shacl(String dataPath, String shaclPath, String reportFile) {
SailRepository repo = new SailRepository(new ShaclSail(new MemoryStore()));
repo.init();
try {
// load shapes first from a file or URL, defaults to turtle, so one can use .shacl as file extension
boolean loaded = false;
try {
writeln("Loading shapes from " + shaclPath);
URL shaclURL = new URL(shaclPath);
RDFFormat format = Rio.getParserFormatForFileName(reportFile).orElse(RDFFormat.TURTLE);
try (SailRepositoryConnection conn = repo.getConnection()) {
conn.begin(IsolationLevels.NONE, ShaclSail.TransactionSettings.ValidationApproach.Disabled);
conn.add(shaclURL, "", format, RDF4J.SHACL_SHAPE_GRAPH);
conn.commit();
}
loaded = true;
} catch (MalformedURLException e) {
writeError("Malformed URL: " + shaclPath, e);
} catch (IOException e) {
writeError("Failed to load shacl shapes", e);
}
if (!loaded) {
writeError("No shapes found");
return;
}
try {
URL dataURL = new URL(dataPath);
RDFFormat format = Rio.getParserFormatForFileName(dataPath)
.orElseThrow(Rio.unsupportedFormat(dataPath));
try (SailRepositoryConnection conn = repo.getConnection()) {
conn.begin(IsolationLevels.NONE, ShaclSail.TransactionSettings.ValidationApproach.Disabled);
conn.add(dataURL, "", format);
conn.commit();
}
} catch (MalformedURLException e) {
writeError("Malformed URL: " + dataPath);
} catch (IOException e) {
writeError("Failed to load data", e);
} catch (RepositoryException e) {
Throwable cause = e.getCause();
}
try {
try (SailRepositoryConnection conn = repo.getConnection()) {
// Bulk validation forces a full revalidation!
conn.begin(IsolationLevels.NONE, ShaclSail.TransactionSettings.ValidationApproach.Bulk);
conn.commit();
}
writeln("SHACL validation OK");
} catch (RepositoryException e) {
Throwable cause = e.getCause();
if (cause instanceof ValidationException) {
writeError("SHACL validation failed, writing report to " + reportFile);
ValidationException sv = (ValidationException) cause;
writeReport(sv.validationReportAsModel(), reportFile);
}
}
} finally {
repo.shutDown();
}
}
/**
* Parse URL or path to local file. Files will be prefixed with "file:" scheme
*
* @param str
* @return URL path as string
*/
private String parseDataPath(String str) {
String path = str;
try {
new URL(str);
// dataPath is a URI
} catch (MalformedURLException e) {
// File path specified, convert to URL
path = "file:" + Util.getNormalizedPath(getWorkDir(), str);
}
return path;
}
/**
* Write SHACL validation report to a file. File extension is used to select the serialization format, TTL is used
* as default.
*
* @param model report
* @param reportFile file name
*/
private void writeReport(Model model, String reportFile) {
WriterConfig cfg = new WriterConfig();
cfg.set(BasicWriterSettings.PRETTY_PRINT, true);
cfg.set(BasicWriterSettings.INLINE_BLANK_NODES, true);
RDFFormat format = Rio.getParserFormatForFileName(reportFile).orElse(RDFFormat.TURTLE);
try (Writer w = Files.newBufferedWriter(Paths.get(reportFile))) {
Rio.write(model, w, format, cfg);
} catch (IOException ex) {
writeError("Could not write report to " + reportFile, ex);
}
}
}