SolrIndex.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.solr;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SpatialParams;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.vocabulary.GEOF;
import org.eclipse.rdf4j.query.MalformedQueryException;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.lucene.AbstractSearchIndex;
import org.eclipse.rdf4j.sail.lucene.BulkUpdater;
import org.eclipse.rdf4j.sail.lucene.DocumentDistance;
import org.eclipse.rdf4j.sail.lucene.DocumentResult;
import org.eclipse.rdf4j.sail.lucene.DocumentScore;
import org.eclipse.rdf4j.sail.lucene.LuceneSail;
import org.eclipse.rdf4j.sail.lucene.QuerySpec;
import org.eclipse.rdf4j.sail.lucene.SearchDocument;
import org.eclipse.rdf4j.sail.lucene.SearchFields;
import org.eclipse.rdf4j.sail.lucene.util.GeoUnits;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.context.SpatialContextFactory;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Iterables;
/**
* @see LuceneSail
*/
public class SolrIndex extends AbstractSearchIndex {
public static final String SERVER_KEY = "server";
public static final String DISTANCE_FIELD = "_dist";
private final Logger logger = LoggerFactory.getLogger(getClass());
private SolrClient client;
private Function<? super String, ? extends SpatialContext> geoContextMapper;
@Override
public void initialize(Properties parameters) throws Exception {
super.initialize(parameters);
// slightly hacky cast to cope with the fact that Properties is
// Map<Object,Object>
// even though it is effectively Map<String,String>
this.geoContextMapper = createSpatialContextMapper((Map<String, String>) (Map<?, ?>) parameters);
String server = parameters.getProperty(SERVER_KEY);
if (server == null) {
throw new SailException("Missing " + SERVER_KEY + " parameter");
}
int pos = server.indexOf(':');
if (pos == -1) {
throw new SailException("Missing scheme in " + SERVER_KEY + " parameter: " + server);
}
String scheme = server.substring(0, pos);
Class<?> clientFactoryCls = Class.forName("org.eclipse.rdf4j.sail.solr.client." + scheme + ".Factory");
SolrClientFactory clientFactory = (SolrClientFactory) clientFactoryCls.newInstance();
client = clientFactory.create(server);
}
protected Function<? super String, ? extends SpatialContext> createSpatialContextMapper(
Map<String, String> parameters) {
// this should really be based on the schema
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
SpatialContext geoContext = SpatialContextFactory.makeSpatialContext(parameters, classLoader);
return Functions.constant(geoContext);
}
public SolrClient getClient() {
return client;
}
@Override
protected SpatialContext getSpatialContext(String property) {
return geoContextMapper.apply(property);
}
@Override
public void shutDown() throws IOException {
if (client != null) {
client.close();
client = null;
}
}
// //////////////////////////////// Methods for updating the index
/**
* Returns a Document representing the specified document ID (combination of resource and context), or null when no
* such Document exists yet.
*
* @param id
* @return search document
* @throws IOException
*/
@Override
protected SearchDocument getDocument(String id) throws IOException {
SolrDocument doc;
try {
doc = (SolrDocument) client
.query(new SolrQuery().setRequestHandler("/get").set(SearchFields.ID_FIELD_NAME, id))
.getResponse()
.get("doc");
} catch (SolrServerException e) {
throw new IOException(e);
}
return (doc != null) ? new SolrSearchDocument(doc) : null;
}
@Override
protected Iterable<? extends SearchDocument> getDocuments(String resourceId) throws IOException {
SolrQuery query = new SolrQuery(termQuery(SearchFields.URI_FIELD_NAME, resourceId));
SolrDocumentList docs;
try {
docs = getDocuments(query);
} catch (SolrServerException e) {
throw new IOException(e);
}
return Iterables.transform(docs, SolrSearchDocument::new);
}
@Override
protected SearchDocument newDocument(String id, String resourceId, String context) {
return new SolrSearchDocument(id, resourceId, context);
}
@Override
protected SearchDocument copyDocument(SearchDocument doc) {
SolrDocument document = ((SolrSearchDocument) doc).getDocument();
SolrDocument newDocument = new SolrDocument();
newDocument.putAll(document);
return new SolrSearchDocument(newDocument);
}
@Override
protected void addDocument(SearchDocument doc) throws IOException {
SolrDocument document = ((SolrSearchDocument) doc).getDocument();
try {
client.add(SolrUtil.toSolrInputDocument(document));
} catch (SolrServerException e) {
throw new IOException(e);
}
}
@Override
protected void updateDocument(SearchDocument doc) throws IOException {
addDocument(doc);
}
@Override
protected void deleteDocument(SearchDocument doc) throws IOException {
try {
client.deleteById(doc.getId());
} catch (SolrServerException e) {
throw new IOException(e);
}
}
@Override
protected BulkUpdater newBulkUpdate() {
return new SolrBulkUpdater(client);
}
static String termQuery(String field, String value) {
return field + ":\"" + value + "\"";
}
/**
* Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each
* document represent a set of statements with the specified Resource as a subject, which are stored in a specific
* context
*/
private SolrDocumentList getDocuments(SolrQuery query) throws SolrServerException, IOException {
return search(query).getResults();
}
/**
* Returns a Document representing the specified Resource and Context combination, or null when no such Document
* exists yet.
*
* @param subject
* @param context
* @return search document
* @throws IOException
*/
public SearchDocument getDocument(Resource subject, Resource context) throws IOException {
// fetch the Document representing this Resource
String resourceId = SearchFields.getResourceID(subject);
String contextId = SearchFields.getContextID(context);
return getDocument(SearchFields.formIdString(resourceId, contextId));
}
/**
* Returns a list of Documents representing the specified Resource (empty when no such Document exists yet).Each
* document represent a set of statements with the specified Resource as a subject, which are stored in a specific
* context
*
* @param subject
* @return list of documents
* @throws IOException
*/
public Iterable<? extends SearchDocument> getDocuments(Resource subject) throws IOException {
String resourceId = SearchFields.getResourceID(subject);
return getDocuments(resourceId);
}
/**
* Filters the given list of fields, retaining all property fields.
*
* @param fields
* @return set of fields
*/
public static Set<String> getPropertyFields(Set<String> fields) {
Set<String> result = new HashSet<>(fields.size());
for (String field : fields) {
if (SearchFields.isPropertyField(field)) {
result.add(field);
}
}
return result;
}
@Override
public void begin() throws IOException {
}
@Override
public void commit() throws IOException {
try {
client.commit();
} catch (SolrServerException e) {
throw new IOException(e);
}
}
@Override
public void rollback() throws IOException {
try {
client.rollback();
} catch (SolrServerException e) {
throw new IOException(e);
}
}
// //////////////////////////////// Methods for querying the index
/**
* Parse the passed query.
*
* @param subject
* @param spec query to process
* @return the parsed query
* @throws MalformedQueryException
* @throws IOException
* @throws IllegalArgumentException if the spec contains a multi-param query
*/
@Override
protected Iterable<? extends DocumentScore> query(Resource subject, QuerySpec spec)
throws MalformedQueryException, IOException {
if (spec.getQueryPatterns().size() != 1) {
throw new IllegalArgumentException("Multi-param query not implemented!");
}
QuerySpec.QueryParam param = spec.getQueryPatterns().iterator().next();
IRI propertyURI = param.getProperty();
boolean highlight = param.isHighlight();
String query = param.getQuery();
SolrQuery q = prepareQuery(propertyURI, new SolrQuery(query));
if (highlight) {
q.set("hl.method", "unified");
q.setHighlight(true);
String field = (propertyURI != null) ? SearchFields.getPropertyField(propertyURI) : "*";
q.addHighlightField(field);
q.setHighlightSimplePre(SearchFields.HIGHLIGHTER_PRE_TAG);
q.setHighlightSimplePost(SearchFields.HIGHLIGHTER_POST_TAG);
q.setHighlightSnippets(2);
}
QueryResponse response;
if (q.getHighlight()) {
q.addField("*");
} else {
q.addField(SearchFields.URI_FIELD_NAME);
}
q.addField("score");
int numDocs = Objects.requireNonNullElse(spec.getNumDocs(), -1);
try {
if (subject != null) {
response = search(subject, q, numDocs);
} else {
response = search(q, numDocs);
}
} catch (SolrServerException e) {
throw new IOException(e);
}
SolrDocumentList results = response.getResults();
final Map<String, Map<String, List<String>>> highlighting = response.getHighlighting();
return Iterables.transform(results, (SolrDocument document) -> {
SolrSearchDocument doc = new SolrSearchDocument(document);
Map<String, List<String>> docHighlighting = (highlighting != null) ? highlighting.get(doc.getId())
: null;
return new SolrDocumentScore(doc, docHighlighting);
});
}
/**
* Evaluates the given query only for the given resource.
*
* @param resource
* @param query
* @return response
* @throws SolrServerException
* @throws IOException
*/
public QueryResponse search(Resource resource, SolrQuery query) throws SolrServerException, IOException {
return search(resource, query, -1);
}
/**
* Evaluates the given query only for the given resource.
*
* @param resource
* @param query
* @param numDocs
* @return response
* @throws SolrServerException
* @throws IOException
*/
public QueryResponse search(Resource resource, SolrQuery query, int numDocs)
throws SolrServerException, IOException {
// rewrite the query
String idQuery = termQuery(SearchFields.URI_FIELD_NAME, SearchFields.getResourceID(resource));
query.setQuery(query.getQuery() + " AND " + idQuery);
return search(query, numDocs);
}
@Override
protected Iterable<? extends DocumentDistance> geoQuery(IRI geoProperty, Point p, final IRI units, double distance,
String distanceVar, Var contextVar) throws MalformedQueryException, IOException {
double kms = GeoUnits.toKilometres(distance, units);
String qstr = "{!geofilt score=recipDistance}";
if (contextVar != null) {
Resource ctx = (Resource) contextVar.getValue();
String tq = termQuery(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx));
if (ctx != null) {
qstr = tq + " AND " + qstr;
} else {
qstr = "-" + tq + " AND " + qstr;
}
}
SolrQuery q = new SolrQuery(qstr);
q.set(SpatialParams.FIELD, SearchFields.getPropertyField(geoProperty));
q.set(SpatialParams.POINT, p.getY() + "," + p.getX());
q.set(SpatialParams.DISTANCE, Double.toString(kms));
q.addField(SearchFields.URI_FIELD_NAME);
// ':' is part of the fl parameter syntax so we can't use the full
// property field name
// instead we use wildcard + local part of the property URI
q.addField("*" + geoProperty.getLocalName());
// always include the distance - needed for sanity checking
q.addField(DISTANCE_FIELD + ":geodist()");
boolean requireContext = (contextVar != null && !contextVar.hasValue());
if (requireContext) {
q.addField(SearchFields.CONTEXT_FIELD_NAME);
}
QueryResponse response;
try {
response = search(q);
} catch (SolrServerException e) {
throw new IOException(e);
}
SolrDocumentList results = response.getResults();
return Iterables.transform(results, (SolrDocument document) -> {
SolrSearchDocument doc = new SolrSearchDocument(document);
return new SolrDocumentDistance(doc, units);
});
}
@Override
protected Iterable<? extends DocumentResult> geoRelationQuery(String relation, IRI geoProperty, String wkt,
Var contextVar) throws MalformedQueryException, IOException {
String spatialOp = toSpatialOp(relation);
if (spatialOp == null) {
return null;
}
String qstr = "\"" + spatialOp + "(" + wkt + ")\"";
if (contextVar != null) {
Resource ctx = (Resource) contextVar.getValue();
String tq = termQuery(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx));
if (ctx != null) {
qstr = tq + " AND " + qstr;
} else {
qstr = "-" + tq + " AND " + qstr;
}
}
SolrQuery q = new SolrQuery(qstr);
q.set(CommonParams.DF, SearchFields.getPropertyField(geoProperty));
q.addField(SearchFields.URI_FIELD_NAME);
// ':' is part of the fl parameter syntax so we can't use the full
// property field name
// instead we use wildcard + local part of the property URI
q.addField("*" + geoProperty.getLocalName());
boolean requireContext = (contextVar != null && !contextVar.hasValue());
if (requireContext) {
q.addField(SearchFields.CONTEXT_FIELD_NAME);
}
QueryResponse response;
try {
response = search(q);
} catch (SolrServerException e) {
throw new IOException(e);
}
SolrDocumentList results = response.getResults();
return Iterables.transform(results, (SolrDocument document) -> {
SolrSearchDocument doc = new SolrSearchDocument(document);
return new SolrDocumentResult(doc);
});
}
private String toSpatialOp(String relation) {
if (GEOF.SF_INTERSECTS.stringValue().equals(relation)) {
return "Intersects";
}
if (GEOF.SF_DISJOINT.stringValue().equals(relation)) {
return "IsDisjointTo";
}
if (GEOF.EH_COVERED_BY.stringValue().equals(relation)) {
return "IsWithin";
}
return null;
}
@Override
protected Shape parseQueryShape(String property, String value) throws ParseException {
Shape s = super.parseQueryShape(property, value);
// workaround to preserve WKT string
return (s instanceof Point) ? new WktPoint((Point) s, value) : new WktShape<>(s, value);
}
protected String toWkt(Shape s) {
return ((WktShape<?>) s).wkt;
}
private static class WktShape<S extends Shape> implements Shape {
final S s;
final String wkt;
WktShape(S s, String wkt) {
this.s = s;
this.wkt = wkt;
}
@Override
public SpatialRelation relate(Shape other) {
return s.relate(other);
}
@Override
public Rectangle getBoundingBox() {
return s.getBoundingBox();
}
@Override
public boolean hasArea() {
return s.hasArea();
}
@Override
public double getArea(SpatialContext ctx) {
return s.getArea(ctx);
}
@Override
public Point getCenter() {
return s.getCenter();
}
@Override
public Shape getBuffered(double distance, SpatialContext ctx) {
return s.getBuffered(distance, ctx);
}
@Override
public boolean isEmpty() {
return s.isEmpty();
}
@Override
public boolean equals(Object other) {
return s.equals(other);
}
@Override
public SpatialContext getContext() {
return s.getContext();
}
}
private static class WktPoint extends WktShape<Point> implements Point {
WktPoint(Point p, String wkt) {
super(p, wkt);
}
@Override
public void reset(double x, double y) {
s.reset(x, y);
}
@Override
public double getX() {
return s.getX();
}
@Override
public double getY() {
return s.getY();
}
}
/**
* Evaluates the given query and returns the results as a TopDocs instance.
*
* @param query
* @return query response
* @throws SolrServerException
* @throws IOException
*/
public QueryResponse search(SolrQuery query) throws SolrServerException, IOException {
return search(query, -1);
}
/**
* Evaluates the given query and returns the results as a TopDocs instance.
*
* @param query
* @param numDocs
* @return query response
* @throws SolrServerException
* @throws IOException
*/
public QueryResponse search(SolrQuery query, int numDocs) throws SolrServerException, IOException {
if (numDocs < -1) {
throw new IllegalArgumentException("numDocs should be 0 or greater if defined by the user");
}
int size = defaultNumDocs;
if (numDocs >= 0) {
// If the user has set numDocs we will use that. If it is 0 then the implementation may end up throwing an
// exception.
size = Math.min(maxDocs, numDocs);
}
if (size < 0) {
long docCount = client.query(query.setRows(0)).getResults().getNumFound();
size = Math.max((int) Math.min(docCount, maxDocs), 1);
}
return client.query(query.setRows(size));
}
private SolrQuery prepareQuery(IRI propertyURI, SolrQuery query) {
// check out which query parser to use, based on the given property URI
if (propertyURI == null)
// if we have no property given, we create a default query parser which
// has the TEXT_FIELD_NAME as the default field
{
query.set(CommonParams.DF, SearchFields.TEXT_FIELD_NAME);
} else
// otherwise we create a query parser that has the given property as
// the default field
{
query.set(CommonParams.DF, SearchFields.getPropertyField(propertyURI));
}
return query;
}
/**
* @param contexts
* @throws IOException
*/
@Override
public synchronized void clearContexts(Resource... contexts) throws IOException {
logger.debug("deleting contexts: {}", Arrays.toString(contexts));
// these resources have to be read from the underlying rdf store
// and their triples have to be added to the luceneindex after deletion of
// documents
try {
// remove all contexts passed
for (Resource context : contexts) {
// attention: context can be NULL!
String contextString = SearchFields.getContextID(context);
client.deleteByQuery(termQuery(SearchFields.CONTEXT_FIELD_NAME, contextString));
}
} catch (SolrServerException e) {
throw new IOException(e);
}
}
/**
*
*/
@Override
public synchronized void clear() throws IOException {
try {
client.deleteByQuery("*:*");
} catch (SolrServerException e) {
throw new IOException(e);
}
}
}