Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Allows circular fingerprint to encode stereochemistry from SMILES.
  • Loading branch information
johnmay committed Oct 12, 2017
1 parent 21c9308 commit ef55233
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 8 deletions.
Expand Up @@ -30,6 +30,7 @@

import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
Expand All @@ -44,7 +45,7 @@
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;

import org.openscience.cdk.interfaces.IStereoElement;


/**
Expand Down Expand Up @@ -124,7 +125,6 @@ public FP(int hashCode, int iteration, int[] atoms) {
private final int ATOMCLASS_ECFP = 1;
private final int ATOMCLASS_FCFP = 2;

private int classType, atomClass;
private IAtomContainer mol;
private final int length;

Expand Down Expand Up @@ -153,6 +153,10 @@ public FP(int hashCode, int iteration, int[] atoms) {
private boolean[] lonePair; // true if the atom is N,O,S with octet valence and at least one lone pair
private boolean[] tetrazole; // special flag for being in a tetrazole (C1=NN=NN1) ring

// ------------ options -------------------
private int classType, atomClass;
private boolean optPerceiveStereo = false;

// ------------ public methods ------------

/**
Expand Down Expand Up @@ -188,6 +192,17 @@ public CircularFingerprinter(int classType, int len) {
this.length = len;
}

/**
* Sets whether stereochemistry should be re-perceived from 2D/3D
* coordinates. By default stereochemistry encoded as {@link IStereoElement}s
* are used.
*
* @param val perceived from 2D
*/
public void setPerceiveStereo(boolean val) {
this.optPerceiveStereo = val;
}

@Override
protected List<Map.Entry<String, String>> getParameters() {
String type = null;
Expand All @@ -201,8 +216,10 @@ protected List<Map.Entry<String, String>> getParameters() {
case CLASS_FCFP4: type = "FCFP4"; break;
case CLASS_FCFP6: type = "FCFP6"; break;
}
return Collections.<Map.Entry<String,String>>singletonList(
new AbstractMap.SimpleImmutableEntry<>("classType", type)
return Arrays.<Map.Entry<String, String>>asList(
new AbstractMap.SimpleImmutableEntry<>("classType", type),
new AbstractMap.SimpleImmutableEntry<>("perceiveStereochemistry",
Boolean.toString(optPerceiveStereo))
);
}

Expand Down Expand Up @@ -617,8 +634,12 @@ else if (bond.getOrder() == IBond.Order.TRIPLE)
detectStrictAromaticity();

tetra = new int[na][];
for (int n = 0; n < na; n++)
tetra[n] = rubricTetrahedral(n);
if (optPerceiveStereo) {
for (int n = 0; n < na; n++)
tetra[n] = rubricTetrahedral(n);
} else {
rubricTetrahedralsCdk();
}
}

// assign a ring block ID to each atom (0=not in ring)
Expand Down Expand Up @@ -832,6 +853,39 @@ private void detectStrictAromaticity() {
}
}

// tetrahedral 'rubric': for any sp3 atom that has stereo defined
// in the CDK's object model.
private void rubricTetrahedralsCdk() {
for (IStereoElement se : mol.stereoElements()) {
if (se.getConfigClass() == IStereoElement.Tetrahedral) {
@SuppressWarnings("unchecked") final IStereoElement<IAtom, IAtom> th =
(IStereoElement<IAtom, IAtom>) se;
final IAtom focus = th.getFocus();
final List<IAtom> carriers = th.getCarriers();
int[] adj = new int[4];

for (int i = 0; i < 4; i++) {
if (focus.equals(carriers.get(i)))
adj[i] = -1; // impl H
else
adj[i] = mol.indexOf(carriers.get(i));
}
switch (th.getConfig()) {
case IStereoElement.LEFT:
int i = adj[2];
adj[2] = adj[3];
adj[3] = i;
tetra[mol.indexOf(focus)] = adj;
break;
case IStereoElement.RIGHT:
tetra[mol.indexOf(focus)] = adj;
break;
default:
}
}
}
}

// tetrahedral 'rubric': for any sp3 atom that has enough neighbours and appropriate wedge bond/3D geometry information,
// build up a list of neighbours in a certain permutation order; the resulting array of size 4 can have a total of
// 24 permutations; there are two groups of 12 that can be mapped onto each other by tetrahedral rotations, hence this
Expand Down
Expand Up @@ -28,6 +28,7 @@
package org.openscience.cdk.fingerprint;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertNotNull;

Expand All @@ -36,6 +37,7 @@
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
Expand All @@ -53,6 +55,7 @@
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.io.MDLV2000Writer;
import org.openscience.cdk.silent.Atom;
Expand Down Expand Up @@ -96,6 +99,44 @@ public void testFingerprints() throws Exception {
logger.info("CircularFingerprinter test: completed without any problems");
}

@Test public void testUseStereoElements() throws CDKException {
final String smiles1 = "CC[C@@H](C)O";
final String smiles2 = "CC[C@H](O)C";
final String molfile = "\n"
+ " CDK 10121722462D \n"
+ "\n"
+ " 5 4 0 0 0 0 999 V2000\n"
+ " -4.1837 2.6984 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
+ " -3.4692 3.1109 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
+ " -2.7547 2.6984 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0\n"
+ " -2.0403 3.1109 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n"
+ " -2.7547 1.8734 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n"
+ " 1 2 1 0 0 0 0\n"
+ " 2 3 1 0 0 0 0\n"
+ " 3 4 1 0 0 0 0\n"
+ " 3 5 1 1 0 0 0\n"
+ "M END\n";
IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
MDLV2000Reader mdlr = new MDLV2000Reader(new StringReader(molfile));
SmilesParser smipar = new SmilesParser(bldr);

IAtomContainer mol1 = smipar.parseSmiles(smiles1);
IAtomContainer mol2 = smipar.parseSmiles(smiles2);
IAtomContainer mol3 = mdlr.read(bldr.newAtomContainer());

CircularFingerprinter fpr = new CircularFingerprinter();

// when stereo-chemistry is perceived we don't have coordinates from the
// SMILES and so get a different fingerprint
fpr.setPerceiveStereo(true);
Assert.assertThat(fpr.getFingerprint(mol1), is(fpr.getFingerprint(mol2)));
Assert.assertThat(fpr.getFingerprint(mol2), is(not(fpr.getFingerprint(mol3))));

fpr.setPerceiveStereo(false);
Assert.assertThat(fpr.getFingerprint(mol1), is(fpr.getFingerprint(mol2)));
Assert.assertThat(fpr.getFingerprint(mol2), is(fpr.getFingerprint(mol3)));
}

@Test
public void testGetBitFingerprint() throws Exception {
assert (trivialMol != null);
Expand Down Expand Up @@ -340,8 +381,9 @@ static IAtom atom(String symbol, int h, double x, double y) {
}

@Test public void testVersion() {
IFingerprinter fpr = new CircularFingerprinter(CircularFingerprinter.CLASS_ECFP4);
String expected = "CDK-CircularFingerprinter/" + CDK.getVersion() + " classType=ECFP4";
CircularFingerprinter fpr = new CircularFingerprinter(CircularFingerprinter.CLASS_ECFP4);
String expected = "CDK-CircularFingerprinter/" + CDK.getVersion() +
" classType=ECFP4 perceiveStereochemistry=false";
Assert.assertThat(fpr.getVersionDescription(),
CoreMatchers.is(expected));
}
Expand Down

0 comments on commit ef55233

Please sign in to comment.