Skip to content

Commit

Permalink
Added fedora 3 object and datastream properties.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Durbin authored and Andrew Woods committed Oct 4, 2013
1 parent 3b5bc0c commit 2d51af6
Show file tree
Hide file tree
Showing 20 changed files with 1,060 additions and 336 deletions.
10 changes: 0 additions & 10 deletions README.md
Expand Up @@ -45,13 +45,6 @@ In fcrepo4/fcrepo-webapp/pom.xml add
</dependency>
</dependency>

In fcrepo4/fcrepo-kernel/src/main/resources/fedora-node-types.cnd add

/*
* A federated fedora 3 repository
*/
[fedora:repository]

In the json file referenced in fcrepo4/fcrepo-webapp/src/main/resources/spring/repo.xml,
(which at the time of this writing is fcrepo4/fcrepo-jcr/src/main/resources/config/rest-sessions/repository.json add

Expand Down Expand Up @@ -87,8 +80,5 @@ You can see the federation over your fedora 3 content at [http://localhost:8080/
## Caveats

* right now, the number of objects in the repository that are exposed is reduced to 21 to simplify testing
* datastream content doesn't yet behave properly
* versions are not presented
* a great deal of fedora 3 attributes and metadata aren't yet made available
* no integration tests

Expand Up @@ -28,6 +28,7 @@
import org.modeshape.jcr.federation.spi.ReadOnlyConnector;
import org.modeshape.jcr.value.BinaryKey;
import org.modeshape.jcr.value.BinaryValue;
import org.modeshape.jcr.value.DateTimeFactory;
import org.modeshape.jcr.value.binary.ExternalBinaryValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -38,6 +39,8 @@
import java.io.InputStream;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.List;

/**
* A ReadOnly connector to a fedora 3 repository.
Expand All @@ -50,7 +53,32 @@ public class Fedora3FederationConnector extends ReadOnlyConnector
private static final Logger LOGGER
= LoggerFactory.getLogger(Fedora3FederationConnector.class);

public static final String NT_F3_REPOSITORY = "fedora:repository";
private static final String NT_F3_REPOSITORY = "f3:repository";
private static final String NT_F3_OBJECT = "f3:object";
private static final String NT_F3_DATASTREAM = "f3:datastream";

private static final String F3_PID = "f3:pid";
private static final String F3_OBJ_STATE = "f3:objState";
private static final String F3_OBJ_LABEL = "f3:objlabel";
private static final String F3_OBJ_OWNER_ID = "f3:objOwnerId";
private static final String F3_OBJ_CREATED_DATE = "f3:objCreatedDate";
private static final String F3_OBJ_LAST_MODIFIED_DATE
= "f3:objLastModifiedDate";

private static final String F3_DSID = "f3:dsid";
private static final String F3_DS_CONTROL_GROUP = "f3:dsCongtrolGroup";
private static final String F3_DS_STATE = "f3:dsState";
private static final String F3_DS_VERSIONABLE = "f3:versionable";
private static final String F3_DS_VERSION_ID = "f3:dsVersionId";
private static final String F3_DS_LABEL = "f3:dsLabel";
private static final String F3_DS_CREATED = "f3:dsCreated";
private static final String F3_DS_MIME_TYPE = "f3:dsMimeType";
private static final String F3_DS_FORMAT_URI = "f3:dsFormatURI";
private static final String F3_DS_ALT_IDS = "f3:dsAltIds";
private static final String F3_DS_SIZE = "f3:dsSize";
private static final String F3_DS_CONTENT_DIGEST_TYPE
= "f3:dsContentDigestType";
private static final String F3_DS_CONTENT_DIGEST = "f3:dsCongtentDigest";

protected Fedora3DataInterface f3;

Expand Down Expand Up @@ -93,6 +121,17 @@ public void initialize(NamespaceRegistry registry,
super.initialize(registry, nodeTypeManager);

LOGGER.trace("Initializing");

if (nodeTypeManager != null) { // only null for unit tests
nodeTypeManager.registerNodeTypes(
getClass().getClassLoader()
.getResourceAsStream("fedora3-node-types.cnd"),
true);
LOGGER.debug("Loaded node types from {}.",
getClass().getClassLoader()
.getResource("fedora3-node-types.cnd"));
}

try {
if (fedoraUrl != null && username != null && password != null) {
f3 = new RESTFedora3DataImpl(fedoraUrl, username,
Expand Down Expand Up @@ -128,36 +167,16 @@ public Document getDocumentById(String idStr) {
FedoraObjectRecord o = f3.getObjectByPid(id.getPid());
writer.setPrimaryType(JcrConstants.NT_FOLDER);
writer.setParent(ID.ROOT_ID.getId());
writer.addMixinType(FEDORA_OBJECT);
if (o.getModificationDate() != null) {
writer.addProperty(JCR_LASTMODIFIED,
factories().getDateFactory().create(
o.getModificationDate()));
}
if (o.getCreatedDate() != null) {
writer.addProperty(JCR_CREATED,
factories().getDateFactory().create(
o.getCreatedDate()));
}
addObjectProperties(writer, o);
addObjectChildren(writer, o);
return writer.document();
} else if (id.isDatastreamID()) {
// return a datastream node
writer.setPrimaryType(JcrConstants.NT_FILE);
writer.setParent(id.getParentId());
writer.addMixinType(FEDORA_DATASTREAM);
FedoraDatastreamRecord ds
= f3.getDatastream(id.getPid(), id.getDSID());
if (ds.getModificationDate() != null) {
writer.addProperty(JCR_LASTMODIFIED,
factories().getDateFactory().create(
ds.getModificationDate()));
}
if (ds.getCreatedDate() != null) {
writer.addProperty(JCR_CREATED,
factories().getDateFactory().create(
ds.getCreatedDate()));
}
addDatastreamProperties(writer, ds);
ID contentId = ID.contentID(id.getPid(), id.getDSID());
writer.addChild(contentId.getId(), contentId.getName());
return writer.document();
Expand All @@ -167,19 +186,7 @@ public Document getDocumentById(String idStr) {
id.getDSID());
writer.setPrimaryType(JcrConstants.NT_RESOURCE);
writer.setParent(id.getParentId());
writer.addMixinType(FEDORA_BINARY);
try {
BinaryValue binary = new Fedora3DatastreamBinaryValue(ds);
writer.addProperty(JcrConstants.JCR_DATA, binary);
LOGGER.trace("{} size: {}", ds.getId(), binary.getSize());
LOGGER.trace("{} hash: {}", ds.getId(), binary.getHexHash());
writer.addProperty(CONTENT_DIGEST, ContentDigest.
asURI("SHA-1", binary.getHexHash()));
writer.addProperty(CONTENT_SIZE, binary.getSize());
} catch (Exception ex) {
throw new RuntimeException(ex);
}
writer.addProperty(JcrConstants.JCR_MIME_TYPE, ds.getMimeType());
addDatastreamContentProperties(writer, ds);
return writer.document();
} else {
return null;
Expand All @@ -204,6 +211,114 @@ private void addRepositoryChildren(DocumentWriter writer, String idStr,
//}
}

/**
* Adds the Fedora 3 object mixin type and Fedora 3 properties for the
* given obejct. This method also adds any JCR base or Fedora 4 properties
* to which the existing Fedora 3 properties map.
*
* This method doesn't expose versioning from fedora 3 and instead only
* includes information from the latest version.
*/
private void addObjectProperties(DocumentWriter writer,
FedoraObjectRecord o) {
// Fedora 3 Properties
writer.addMixinType(NT_F3_OBJECT);
writer.addProperty(F3_PID, o.getPid());
addOptionalProperty(writer, F3_OBJ_STATE, o.getState());
addOptionalProperty(writer, F3_OBJ_LABEL, o.getLabel());
addOptionalProperty(writer, F3_OBJ_OWNER_ID, o.getOwnerIds());
addOptionalProperty(writer, F3_OBJ_CREATED_DATE, o.getCreatedDate());
addOptionalProperty(writer, F3_OBJ_LAST_MODIFIED_DATE,
o.getModificationDate());

// JCR Properties
addOptionalProperty(writer, JCR_CREATED, o.getCreatedDate());
addOptionalProperty(writer, JCR_LASTMODIFIED, o.getModificationDate());
}

/**
* Adds the Fedora 3 datastream mixin type and Fedora 3 datastream
* properties for the given datastream. This method also adds the fedora 4
* "datastream" mixin and relevant properties.
*
* This method does not include version information and only exposes the
* latest version of a datastream.
*/
private void addDatastreamProperties(DocumentWriter writer,
FedoraDatastreamRecord ds) {
// Fedora 3 Datastream Properties
writer.addMixinType(NT_F3_DATASTREAM);
writer.addProperty(F3_DSID, ds.getId());
writer.addProperty(F3_DS_CONTROL_GROUP, ds.getControlGroup());
writer.addProperty(F3_DS_STATE, ds.getState());
writer.addProperty(F3_DS_VERSIONABLE, ds.getVersionable());
FedoraDatastreamVersionRecord dsVer = ds.getCurrentVersion();
writer.addProperty(F3_DS_VERSION_ID, dsVer.getVersionId());
addOptionalProperty(writer, F3_DS_CREATED, dsVer.getCreatedDate());
addOptionalProperty(writer, F3_DS_LABEL, dsVer.getLabel());
writer.addProperty(F3_DS_MIME_TYPE, dsVer.getMimeType());
addOptionalProperty(writer, F3_DS_FORMAT_URI, dsVer.getFormatURI());
addOptionalProperty(writer, F3_DS_ALT_IDS, dsVer.getAltIDs());
writer.addProperty(F3_DS_SIZE, dsVer.getContentLength());
addOptionalProperty(writer, F3_DS_CONTENT_DIGEST_TYPE,
dsVer.getContentDigestType());
addOptionalProperty(writer, F3_DS_CONTENT_DIGEST,
dsVer.getContentDigest());

// Fedora 4 Datastream Properties
writer.addMixinType(FEDORA_DATASTREAM);
if (getContext() != null) { //only null for unit tests
DateTimeFactory f = factories().getDateFactory();
writer.addProperty(JCR_LASTMODIFIED,
f.create(dsVer.getCreatedDate()));
FedoraDatastreamVersionRecord firstVer
= ds.getHistory().get(ds.getHistory().size() - 1);
writer.addProperty(JCR_CREATED,
f.create(firstVer.getCreatedDate()));
}
}

private void addDatastreamContentProperties(DocumentWriter writer,
FedoraDatastreamRecord ds) {
writer.addMixinType(FEDORA_BINARY);
try {
BinaryValue binary = new Fedora3DatastreamBinaryValue(ds);
writer.addProperty(JcrConstants.JCR_DATA, binary);
LOGGER.trace("{} size: {}", ds.getId(), binary.getSize());
LOGGER.trace("{} hash: {}", ds.getId(), binary.getHexHash());
writer.addProperty(CONTENT_DIGEST, ContentDigest.
asURI("SHA-1", binary.getHexHash()));
writer.addProperty(CONTENT_SIZE, binary.getSize());
} catch (Exception ex) {
throw new RuntimeException(ex);
}
writer.addProperty(JcrConstants.JCR_MIME_TYPE,
ds.getCurrentVersion().getMimeType());

}

/**
* A helper method that adds a property (or list of properties) if the
* value isn't null. This method also conveniently converts Date values to
* DateTime values.
*/
private void addOptionalProperty(DocumentWriter writer, String name,
Object value) {
if (value != null) {
if (value instanceof List) {
writer.addProperty(name, ((List) value).toArray());
} else if (value instanceof Date) {
if (getContext() != null) { //only null for unit tests
DateTimeFactory f = factories().getDateFactory();
writer.addProperty(name, f.create((Date) value));
}
} else {
writer.addProperty(name, value);
}
}
}


private void addObjectChildren(DocumentWriter writer,
FedoraObjectRecord object) {
for (String dsidStr : object.listDatastreamIds()) {
Expand All @@ -226,8 +341,9 @@ public Collection<String> getDocumentPathsById(String id) {

/**
* Checks if a document with the given id exists.
* @param idStr a {@code non-null} string representing the identifier within
* the system whose existence is being queried in this federation.
* @param idStr a {@code non-null} string representing the identifier
* within the system whose existence is being queried in this
* federation.
*/
public boolean hasDocument(String idStr) {
LOGGER.info("hasDocument {}", idStr);
Expand Down Expand Up @@ -275,11 +391,12 @@ public class Fedora3DatastreamBinaryValue extends ExternalBinaryValue {

private FedoraDatastreamRecord ds;

Fedora3DatastreamBinaryValue(FedoraDatastreamRecord ds) throws Exception {
super(new BinaryKey(ds.getSha1()),
Fedora3DatastreamBinaryValue(FedoraDatastreamRecord ds)
throws Exception {
super(new BinaryKey(ds.getCurrentVersion().getSha1()),
Fedora3FederationConnector.this.getSourceName(),
ID.contentID(ds.getPid(), ds.getId()).getId(),
ds.getContentLength(), null, null);
ds.getCurrentVersion().getContentLength(), null, null);
this.ds = ds;
}

Expand All @@ -288,7 +405,7 @@ public class Fedora3DatastreamBinaryValue extends ExternalBinaryValue {
*/
public InputStream getStream() throws RepositoryException {
try {
return ds.getStream();
return ds.getCurrentVersion().getStream();
} catch (Exception e) {
throw new RepositoryException(e);
}
Expand All @@ -299,7 +416,7 @@ public InputStream getStream() throws RepositoryException {
* fedora 3 datastream whose content is exposed by this BinaryValue.
*/
public String getMimeType() {
return ds.getMimeType();
return ds.getCurrentVersion().getMimeType();
}
}
}
Expand Up @@ -16,56 +16,62 @@

package org.fcrepo.connector.fedora3;

import java.io.InputStream;
import java.util.Date;
import java.util.List;

/**
* An interface to expose enough information about a Fedora 3 datastream to
* import it into fedora 4.
* import it into fedora 4. All metadata is reprsented here, even those fields
* that are meaningless in the Fedora 4 architecture.
*
* @author Michael Durbin
*/
public interface FedoraDatastreamRecord {

/**
* Gets the pid of the object whose datastream is represented.
* Gets the pid of the object whose datastream is described by this record.
*/
public String getPid();

/**
* Gets the DSID.
* Gets the DSID for the datastream described by this record.
*/
public String getId();

/**
* Gets the MIME type.
* Get the control group for the datastream described by this record. This
* value, while significant in Fedora 3, is only present as an indicator of
* historic status, since storage management is handled through another
* mechanism in Fedora 4.
*/
public String getMimeType();
public String getControlGroup();

/**
* Gets the modification date for the datastream described by this record.
* Gets the state for the datastream described by this record.
*/
public Date getModificationDate();
public String getState();

/**
* Gets the creation date for the datastream described by this record.
* Gets the 'versionable' value for the datastream described by this
* record. This value does not imply anything about whether or how many
* versions of this datastream exists (there is always at least one) but
* instead was an indicator of whether an update to the described
* datastream in Fedora 3 would overwrite the current version or create a
* new one. In the context of fedora 4, this value is just to preserve
* the historical record.
*/
public Date getCreatedDate();
public boolean getVersionable();

/**
* Gets a new InputStream to access the content of the datastream.
* Returns a non-null, non-empty list of FedoraDatastreamVersionRecord
* objects representing the ordered history of this datastream from most
* recent to oldest.
*/
public InputStream getStream() throws Exception;
public List<FedoraDatastreamVersionRecord> getHistory();

/**
* Gets the length in bytes of the content of the datastream.
* Gets the most recent version of the datastream. This method is just a
* shortcut for getHistory().get(0).
*/
public long getContentLength();
public FedoraDatastreamVersionRecord getCurrentVersion();

/**
* Gets (or computes) a SHA-1 hash of the content of the datastream.
*/
public byte[] getSha1() throws Exception;

// TODO: expose other properties
}

0 comments on commit 2d51af6

Please sign in to comment.