Skip to content
This repository has been archived by the owner on Jan 3, 2019. It is now read-only.

Commit

Permalink
Enable an external index (Solr) to receive a minimal index-update
Browse files Browse the repository at this point in the history
document (Solr document) based on messaging emitted from an F4
repository

Implement customizable search index design (https://wiki.duraspace.org/display/FF/Design+-+Customizable+Search+Index)

Also, allow asynchronous or synchronous indexing patterns for both Solr
and SPARQL indexing.
  • Loading branch information
ajs6f authored and cbeer committed Dec 12, 2013
1 parent 5e7c81f commit a2e80dd
Show file tree
Hide file tree
Showing 43 changed files with 1,808 additions and 568 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -8,6 +8,7 @@ FedoraRepository/
indexes/
ObjectStore/
*/.cache
**/data
.cache
.DS_Store
*~
Expand Down
65 changes: 51 additions & 14 deletions fcrepo-jms-indexer-core/pom.xml
Expand Up @@ -59,10 +59,6 @@
<artifactId>spring-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.abdera</groupId>
<artifactId>abdera-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
Expand All @@ -71,10 +67,6 @@
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>org.apache.abdera</groupId>
<artifactId>abdera-parser</artifactId>
</dependency>

<dependency>
<groupId>ch.qos.logback</groupId>
Expand All @@ -84,16 +76,34 @@
<dependency>
<groupId>org.fcrepo</groupId>
<artifactId>fcrepo-http-commons</artifactId>
<exclusions>
<exclusion>
<artifactId>lucene-core</artifactId>
<groupId>org.apache.lucene</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.fcrepo</groupId>
<artifactId>fcrepo-http-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.fcrepo</groupId>
<artifactId>fcrepo-transform</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>log4j-over-slf4j</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.fcrepo</groupId>
<artifactId>fcrepo-kernel</artifactId>
</dependency>


<dependency>
<groupId>org.fcrepo</groupId>
Expand All @@ -111,20 +121,28 @@
<groupId>org.apache.jena</groupId>
<artifactId>jena-fuseki</artifactId>
<scope>test</scope>
<!-- <exclusions>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
</exclusions> -->
</dependency>
<!-- Start of Solr Indexer libs -->
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
<exclusions>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- <dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-test-framework</artifactId>
</dependency>
</dependency> -->
<!-- HttpClient are used for create standardalone SolrIndexer Server client.
They seem to be included with Solr 3.6.2?
Ver 4.2.5 aim to fit JENA included version -->
Expand All @@ -133,6 +151,11 @@
<artifactId>httpmime</artifactId>
</dependency>
<!-- End of Solr Indexer libs -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.4</version>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -242,4 +265,18 @@
</plugins>
</build>

<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-facet</artifactId>
<version>${solr.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>
@@ -0,0 +1,41 @@
/**
* Copyright 2013 DuraSpace, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.fcrepo.indexer;

/**
* Indicates that a resource was not designated for named-field indexing.
* Typically this would be because there has been assigned no appropriate
* transformation.
*
* @author ajs6f
* @date Dec 4, 2013
*/
public class AbsentTransformPropertyException extends Exception {

/**
*
*/
private static final long serialVersionUID = 1L;

/**
* @param msg
*/
public AbsentTransformPropertyException(final String msg) {
super(msg);
}

}
@@ -0,0 +1,95 @@
/**
* Copyright 2013 DuraSpace, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.fcrepo.indexer;

import static org.slf4j.LoggerFactory.getLogger;

import java.io.IOException;
import java.io.Reader;

import org.slf4j.Logger;

import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListenableFutureTask;
import com.google.common.util.concurrent.ListeningExecutorService;

/**
* An {@link Indexer} that executes its operation asynchronously.
*
* @author ajs6f
* @date Dec 8, 2013
* @param <T> the type of response to expect from an operation
*/
public abstract class AsynchIndexer<T> implements Indexer {

private static final Logger LOGGER = getLogger(AsynchIndexer.class);

/**
* @return The {@link ListeningExecutorService} to use for operation.
*/
public abstract ListeningExecutorService executorService();

@Override
public ListenableFuture<T> update(final String identifier,
final Reader content) throws IOException {
LOGGER.debug("Received update for identifier: {}", identifier);

final ListenableFutureTask<T> task = updateSynch(identifier, content);
task.addListener(new Runnable() {
@Override
public void run() {
synchronized (this) {
notifyAll();
}
}
}, executorService());
executorService().submit(task);
return task;
}

@Override
public ListenableFuture<T> remove(final String identifier)
throws IOException {
LOGGER.debug("Received remove for identifier: {}", identifier);
final ListenableFutureTask<T> task = removeSynch(identifier);
task.addListener(new Runnable() {
@Override
public void run() {
synchronized (this) {
notifyAll();
}
}
}, executorService());
executorService().submit(task);
return task;
}

/**
* @param identifier
* @return
*/
public abstract ListenableFutureTask<T> removeSynch(final String identifier);

/**
* @param identifier
* @param content
* @return
*/
public abstract ListenableFutureTask<T> updateSynch(final String identifier,
final Reader content);

}
@@ -0,0 +1,73 @@
/**
* Copyright 2013 DuraSpace, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*/
package org.fcrepo.indexer;

import static org.slf4j.LoggerFactory.getLogger;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.slf4j.Logger;


/**
* {@link IndexableContentRetriever} that caches its results.
*
* @author ajs6f
* @date Dec 7, 2013
*/
public abstract class CachingRetriever implements IndexableContentRetriever {

private Boolean cached = false;

private byte[] cache;

private static final Logger LOGGER = getLogger(CachingRetriever.class);


/* (non-Javadoc)
* @see java.util.concurrent.Callable#call()
*/
@Override
public InputStream call() throws ClientProtocolException, IOException,
AbsentTransformPropertyException, HttpException {
if (cached) {
LOGGER.debug("Returning cached content...");
return new ByteArrayInputStream(cache);
}
LOGGER.debug("Retrieving uncached content...");
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
retrieveHttpResponse().getEntity().writeTo(out);
cache = out.toByteArray();
}
cached = true;
LOGGER.debug("Retrieved cache-able content:\n{}", new String(cache));
return new ByteArrayInputStream(cache);
}

protected abstract HttpResponse retrieveHttpResponse()
throws AbsentTransformPropertyException,
ClientProtocolException, IOException, HttpException;

}

0 comments on commit a2e80dd

Please sign in to comment.