Skip to content

Commit

Permalink
Report a version/param description for fingerprints.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnmay committed Apr 30, 2017
1 parent 05d316d commit 5995e07
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 15 deletions.
Expand Up @@ -39,6 +39,24 @@
*/
public interface IFingerprinter {

/**
* Generate a fingerprint type version description in chemfp's FPS format. We
* report the library version rather than an individual version per fingerprint,
* although this is awkward as many fingerprint's don't/won't change between releases
* and we can not keep comptability we guarantee we document how the fingerprint was
* encoded.
*
* <br>
* Examples:
* <pre>
* #type=CDK-Fingerprinter/2.0 searchDepth=7 pathLimit=2000 hashPseudoAtoms=true
* #type=CDK-CircularFingerprint/2.0 classType=ECFP4
* </pre>
*
* @return version description.
*/
String getVersionDescription();

/**
* Generate a binary fingerprint as a bit. This method will usually delegate to
* {@link #getBitFingerprint(IAtomContainer)} and invoke
Expand Down
Expand Up @@ -23,13 +23,40 @@

package org.openscience.cdk.fingerprint;

import org.openscience.cdk.CDK;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;

import java.util.BitSet;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public abstract class AbstractFingerprinter implements IFingerprinter {

/**
* Base classes should override this method to report the parameters they
* are configured with.
*
* @return The key=value pairs of configured parameters
*/
protected List<Map.Entry<String,String>> getParameters() {
return Collections.emptyList();
}

@Override
public final String getVersionDescription() {
StringBuilder sb = new StringBuilder();
sb.append("CDK-")
.append(getClass().getSimpleName())
.append("/")
.append(CDK.getVersion()); // could version fingerprints separetely
for (Map.Entry<String,String> param : getParameters()) {
sb.append(' ').append(param.getKey()).append('=').append(param.getValue());
}
return sb.toString();
}

/** {@inheritDoc} */
@Override
public BitSet getFingerprint(IAtomContainer mol) throws CDKException {
Expand Down
Expand Up @@ -35,7 +35,10 @@
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;

import java.util.AbstractMap;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -142,6 +145,15 @@ public Fingerprinter(int size, int searchDepth) {

}

@Override
protected List<Map.Entry<String, String>> getParameters() {
return Arrays.<Map.Entry<String,String>>asList(
new SimpleImmutableEntry<>("searchDepth", Integer.toString(searchDepth)),
new SimpleImmutableEntry<>("pathLimit", Integer.toString(pathLimit)),
new SimpleImmutableEntry<>("hashPseudoAtoms", Boolean.toString(hashPseudoAtoms))
);
}

/**
* Generates a fingerprint of the default size for the given AtomContainer.
*
Expand Down
Expand Up @@ -23,23 +23,9 @@
*/
package org.openscience.cdk.fingerprint;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.PathTools;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomType.Hybridization;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.periodictable.PeriodicTable;

/**
Expand Down
Expand Up @@ -26,16 +26,17 @@
import java.math.BigInteger;
import java.util.BitSet;

import org.hamcrest.CoreMatchers;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.openscience.cdk.Atom;
import org.openscience.cdk.CDK;
import org.openscience.cdk.ChemFile;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.Reaction;
import org.openscience.cdk.SlowTest;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.graph.AtomContainerAtomPermutor;
import org.openscience.cdk.graph.AtomContainerBondPermutor;
import org.openscience.cdk.interfaces.IAtomContainer;
Expand All @@ -53,6 +54,7 @@
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

/**
Expand Down Expand Up @@ -407,4 +409,13 @@ public static void main(String[] args) throws Exception {
assertFalse(FingerprinterTool.isSubset(fp4, fp3));
assertFalse(FingerprinterTool.isSubset(fp3, fp4));
}

@Test public void testVersion() {
Fingerprinter fpr = new Fingerprinter(1024, 7);
fpr.setPathLimit(2000);
fpr.setHashPseudoAtoms(true);
String expected = "CDK-Fingerprinter/" + CDK.getVersion() + " searchDepth=7 pathLimit=2000 hashPseudoAtoms=true";
assertThat(fpr.getVersionDescription(),
CoreMatchers.is(expected));
}
}
Expand Up @@ -28,8 +28,11 @@

package org.openscience.cdk.fingerprint;

import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -188,6 +191,24 @@ public CircularFingerprinter(int classType, int len) {
this.length = len;
}

@Override
protected List<Map.Entry<String, String>> getParameters() {
String type = null;
switch (classType) {
case CLASS_ECFP0: type = "ECFP0"; break;
case CLASS_ECFP2: type = "ECFP2"; break;
case CLASS_ECFP4: type = "ECFP4"; break;
case CLASS_ECFP6: type = "ECFP6"; break;
case CLASS_FCFP0: type = "FCFP0"; break;
case CLASS_FCFP2: type = "FCFP2"; break;
case CLASS_FCFP4: type = "FCFP4"; break;
case CLASS_FCFP6: type = "FCFP6"; break;
}
return Collections.<Map.Entry<String,String>>singletonList(
new AbstractMap.SimpleImmutableEntry<>("classType", type)
);
}

/**
* Calculates the fingerprints for the given {@link IAtomContainer}, and stores them for subsequent retrieval.
*
Expand Down
Expand Up @@ -42,7 +42,10 @@
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.hamcrest.CoreMatchers;
import org.junit.Assert;
import org.openscience.cdk.AtomContainer;
import org.openscience.cdk.CDK;
import org.openscience.cdk.CDKTestCase;
import org.openscience.cdk.SlowTest;
import org.openscience.cdk.exception.CDKException;
Expand Down Expand Up @@ -336,4 +339,11 @@ static IAtom atom(String symbol, int h, double x, double y) {
return a;
}

@Test public void testVersion() {
IFingerprinter fpr = new CircularFingerprinter(CircularFingerprinter.CLASS_ECFP4);
String expected = "CDK-CircularFingerprinter/" + CDK.getVersion() + " classType=ECFP4";
Assert.assertThat(fpr.getVersionDescription(),
CoreMatchers.is(expected));
}

}
Expand Up @@ -22,7 +22,11 @@
*/
package org.openscience.cdk.fingerprint;

import java.util.AbstractMap;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.openscience.cdk.exception.CDKException;
Expand Down Expand Up @@ -57,6 +61,14 @@ public SignatureFingerprinter(int depth) {
this.signatureDepth = depth;
}


@Override
protected List<Map.Entry<String, String>> getParameters() {
return Collections.<Map.Entry<String,String>>singletonList(
new AbstractMap.SimpleImmutableEntry<>("signatureDepth", Integer.toString(signatureDepth))
);
}

@Override
public IBitFingerprint getBitFingerprint(IAtomContainer atomContainer) throws CDKException {
return new IntArrayFingerprint(getRawFingerprint(atomContainer));
Expand Down
Expand Up @@ -30,7 +30,11 @@
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.smiles.SmilesGenerator;

import java.util.AbstractMap;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -74,6 +78,13 @@ public LingoFingerprinter(int n) {
this.n = n;
}

@Override
protected List<Map.Entry<String, String>> getParameters() {
return Collections.<Map.Entry<String,String>>singletonList(
new AbstractMap.SimpleImmutableEntry<>("ngramLength", Integer.toString(n))
);
}

@Override
public IBitFingerprint getBitFingerprint(IAtomContainer iAtomContainer) throws CDKException {
return FingerprinterTool.makeBitFingerprint(getRawFingerprint(iAtomContainer));
Expand Down

0 comments on commit 5995e07

Please sign in to comment.