LuceneDocument.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.lucene.impl;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LatLonBoundingBox;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LatLonShape;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.geo.SimpleWKTShapeParser;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.spatial.SpatialStrategy;
import org.eclipse.rdf4j.sail.lucene.LuceneSail;
import org.eclipse.rdf4j.sail.lucene.SearchDocument;
import org.eclipse.rdf4j.sail.lucene.SearchFields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Function;
public class LuceneDocument implements SearchDocument {
private final Document doc;
private final Logger logger = LoggerFactory.getLogger(getClass());
private static final String POINT_FIELD_PREFIX = "_pt_";
private static final String GEO_FIELD_PREFIX = "_geo_";
private final Function<? super String, ? extends SpatialStrategy> geoStrategyMapper;
public LuceneDocument(Function<? super String, ? extends SpatialStrategy> geoStrategyMapper) {
this(new Document(), geoStrategyMapper);
}
public LuceneDocument(Document doc, Function<? super String, ? extends SpatialStrategy> geoStrategyMapper) {
this.doc = doc;
this.geoStrategyMapper = geoStrategyMapper;
}
public LuceneDocument(String id, String resourceId, String context,
Function<? super String, ? extends SpatialStrategy> geoStrategyMapper) {
this(geoStrategyMapper);
setId(id);
setResource(resourceId);
setContext(context);
}
private void setId(String id) {
LuceneIndex.addIDField(id, doc);
}
private void setContext(String context) {
LuceneIndex.addContextField(context, doc);
}
private void setResource(String resourceId) {
LuceneIndex.addResourceField(resourceId, doc);
}
public Document getDocument() {
return doc;
}
@Override
public String getId() {
return doc.get(SearchFields.ID_FIELD_NAME);
}
@Override
public String getResource() {
return doc.get(SearchFields.URI_FIELD_NAME);
}
@Override
public String getContext() {
return doc.get(SearchFields.CONTEXT_FIELD_NAME);
}
@Override
public Set<String> getPropertyNames() {
List<IndexableField> fields = doc.getFields();
Set<String> names = new HashSet<>();
for (IndexableField field : fields) {
String name = field.name();
if (SearchFields.isPropertyField(name)) {
names.add(name);
}
}
return names;
}
@Override
public void addProperty(String name) {
// don't need to do anything
}
/**
* Stores and indexes a property in a Document. We don't have to recalculate the concatenated text: just add another
* TEXT field and Lucene will take care of this. Additional advantage: Lucene may be able to handle the invididual
* strings in a way that may affect e.g. phrase and proximity searches (concatenation basically means loss of
* information). NOTE: The TEXT_FIELD_NAME has to be stored, see in LuceneSail
*
* @see LuceneSail
*/
@Override
public void addProperty(String name, String text) {
LuceneIndex.addPredicateField(name, text, doc);
LuceneIndex.addTextField(text, doc);
}
/**
* Checks whether a field occurs with a specified value in a Document.
*/
@Override
public boolean hasProperty(String fieldName, String value) {
String[] fields = doc.getValues(fieldName);
if (fields != null) {
for (String field : fields) {
if (value.equals(field)) {
return true;
}
}
}
return false;
}
@Override
public List<String> getProperty(String name) {
return Arrays.asList(doc.getValues(name));
}
private void indexShape(Object shape, String field) {
if (shape instanceof Object[]) { // case of GEOMETRYCOLLECTION
Object[] geometries = (Object[]) shape;
for (int i = 0; i < geometries.length; i++) {
indexShape(geometries[i], field);
}
} else {
if (shape instanceof Polygon) { // WKT:POLYGON
for (Field f : LatLonShape.createIndexableFields(GEO_FIELD_PREFIX + field, (Polygon) shape)) {
doc.add(f);
}
} else if (shape instanceof Line) { // WKT:LINESTRING
for (Field f : LatLonShape.createIndexableFields(GEO_FIELD_PREFIX + field, (Line) shape)) {
doc.add(f);
}
} else if (shape instanceof double[]) { // WKT:POINT
double[] point = (double[]) shape;
for (Field f : LatLonShape.createIndexableFields(GEO_FIELD_PREFIX + field, point[1],
point[0])) {
doc.add(f);
}
doc.add(new LatLonPoint(POINT_FIELD_PREFIX + field, point[1], point[0]));
} else if (shape instanceof Rectangle) { // WKT:ENVELOPE / RECTANGLE
Rectangle box = (Rectangle) shape;
doc.add(new LatLonBoundingBox(GEO_FIELD_PREFIX + field, box.minLat, box.minLon, box.maxLat,
box.maxLon));
} else {
throw new IllegalArgumentException("Geometry for shape " + shape.toString() + " is not supported");
}
}
}
@Override
public void addGeoProperty(String field, String value) {
LuceneIndex.addStoredOnlyPredicateField(field, value, doc);
try {
String wkt = value;
Object shape = SimpleWKTShapeParser.parse(wkt);
indexShape(shape, field);
} catch (ParseException e) {
logger.warn("error while processing geo property", e);
} catch (IOException e) {
logger.warn("error while parsing wkt geometry", e);
}
}
}