Skip to content
This repository has been archived by the owner on Jan 3, 2019. It is now read-only.

Commit

Permalink
Bumped Any23 version, better streaming internally
Browse files Browse the repository at this point in the history
  • Loading branch information
ajs6f committed Jul 11, 2013
1 parent 9ed9f5d commit 4d12582
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 24 deletions.
6 changes: 5 additions & 1 deletion pom.xml
Expand Up @@ -10,6 +10,10 @@

<name>Fedora TEI triple generator.</name>
<description>Generates RDF triples from TEI documents.</description>

<properties>
<any23.version>0.9.0-SNAPSHOT</any23.version>
</properties>

<dependencies>
<dependency>
Expand All @@ -20,7 +24,7 @@
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
<version>0.8.1-SNAPSHOT</version>
<version>${any23.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
Expand Down
Expand Up @@ -24,11 +24,9 @@
import static javax.xml.transform.TransformerFactory.newInstance;
import static org.slf4j.LoggerFactory.getLogger;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URL;

import javax.jcr.RepositoryException;
Expand All @@ -54,6 +52,7 @@
import org.fcrepo.triplegenerators.tei.xslt.LoggingErrorListener;
import org.slf4j.Logger;

import com.google.common.io.FileBackedOutputStream;
import com.hp.hpl.jena.graph.Graph;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
Expand Down Expand Up @@ -140,22 +139,23 @@ public Dataset getTriples(final javax.jcr.Node uri, final GraphSubjects gs,
TransformerException, TripleHandlerException,
RepositoryException {
final String baseUri = gs.getGraphSubject(uri).asNode().getURI();
final String rdfXml = createRDFXML(resource);
// TODO when Any23 supports it, use a streaming transfer here
final byte[] rdfXml = createRDFXML(resource);
// TODO when Any23 supports it, use a streaming transfer between
// these two steps
return extractTriples(rdfXml, baseUri);
}

protected Dataset extractTriples(final String rdfXml, final String baseUri)
protected Dataset extractTriples(final byte[] rdfXml, final String baseUri)
throws TripleHandlerException, IOException {

final DocumentSource source =
new ByteArrayDocumentSource(rdfXml.getBytes(), baseUri,
new ByteArrayDocumentSource(rdfXml, baseUri,
"application/rdf+xml");
final Graph problems = new GraphMem();
try (final ModelTripleHandler handler = new ModelTripleHandler()) {
try {
final ExtractionReport report = any23.extract(source, handler);
final Dataset results = new DatasetImpl(handler.getModel());
final Graph problems = new GraphMem();
for (final Extractor<?> extractor : report.getMatchingExtractors()) {
for (final Issue issue : report.getExtractorIssues(extractor
.getDescription().getExtractorName())) {
Expand All @@ -170,12 +170,11 @@ protected Dataset extractTriples(final String rdfXml, final String baseUri)
}
return results;
} catch (final ExtractionException e) {
final Dataset results = new DatasetImpl(createDefaultModel());
final Graph problems = new GraphMem();
final Dataset sadResults = new DatasetImpl(createDefaultModel());
problems.add(new Triple(createURI(baseUri), PROBLEM_PREDICATE,
createLiteral(e.getMessage())));
results.addNamedModel("problems", createModelForGraph(problems));
return results;
sadResults.addNamedModel("problems", createModelForGraph(problems));
return sadResults;
}
}
}
Expand All @@ -187,27 +186,27 @@ protected Dataset extractTriples(final String rdfXml, final String baseUri)
* @throws TransformerConfigurationException
* @throws TransformerException
*/
private String createRDFXML(final InputStream resource)
private byte[] createRDFXML(final InputStream resource)
throws IOException, TransformerConfigurationException,
TransformerException {
final Source resourceSource = new StreamSource(resource);
try (final Writer addIdsResultWriter = new StringWriter()) {
final Result addIdsResult = new StreamResult(addIdsResultWriter);
try (
final FileBackedOutputStream addIdsResultStream =
new FileBackedOutputStream(1024 * 1024)) {
final Result addIdsResult = new StreamResult(addIdsResultStream);
addIdsXform.transform(resourceSource, addIdsResult);
final String teiWithIds = addIdsResultWriter.toString();
LOGGER.debug("Added XML IDs to TEI: \n{}", teiWithIds);
// TODO stream the results into the new source
LOGGER.debug("Added XML IDs to TEI.");
try (
final InputStream tei2RdfSourceStream =
new ByteArrayInputStream(teiWithIds.getBytes())) {
addIdsResultStream.getSupplier().getInput()) {
final Source tei2RdfSource =
new StreamSource(tei2RdfSourceStream);
final StreamResult tei2RdfResult =
new StreamResult(new StringWriter());
tei2RdfXform.transform(tei2RdfSource, tei2RdfResult);
LOGGER.debug("Created RDF/XML from TEI: \n{}", tei2RdfResult
.getWriter().toString());
return tei2RdfResult.getWriter().toString();
return tei2RdfResult.getWriter().toString().getBytes();
}
}
}
Expand Down
Expand Up @@ -16,7 +16,7 @@

package org.fcrepo.triplegenerators.tei;

import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.io.Files.toByteArray;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
Expand All @@ -38,7 +38,6 @@
import org.mockito.Mock;
import org.slf4j.Logger;

import com.google.common.io.Files;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
Expand Down Expand Up @@ -103,8 +102,8 @@ public void testExtraction() throws Exception {
@Test
public void testExtractionWithBadRdfXml() throws Exception {

final String rdfXml =
Files.toString(new File("target/test-classes/bad-rdf.xml"), UTF_8);
final byte[] rdfXml =
toByteArray(new File("target/test-classes/bad-rdf.xml"));
final Dataset results = extractTriples(rdfXml, "http://fedora");
for (final StmtIterator i = results.getDefaultModel().listStatements(); i
.hasNext();) {
Expand Down

0 comments on commit 4d12582

Please sign in to comment.