Skip to content

Commit

Permalink
Merge pull request #291 from cdk/patch/maccs166opt
Browse files Browse the repository at this point in the history
Patch/maccs166opt
  • Loading branch information
rajarshi committed Apr 1, 2017
2 parents fd684bd + ef43de6 commit a9bc279
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 100 deletions.
Expand Up @@ -29,6 +29,7 @@
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;

import java.util.Iterator;
import java.util.concurrent.TimeUnit;

import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap;

Expand Down Expand Up @@ -121,8 +122,19 @@ public int[] match(IAtomContainer target) {
/**{@inheritDoc} */
@Override
public Mappings matchAll(final IAtomContainer target) {
EdgeToBondMap bonds2 = EdgeToBondMap.withSpaceFor(target);
int[][] g2 = GraphUtil.toAdjList(target, bonds2);

final EdgeToBondMap bonds2;
final int[][] g2;

AdjListCache cached = target.getProperty(AdjListCache.class.getName());
if (cached == null || !cached.validate(target)) {
cached = new AdjListCache(target);
target.setProperty(AdjListCache.class.getName(), cached);
}

bonds2 = cached.bmap;
g2 = cached.g;

Iterable<int[]> iterable = new VFIterable(query, target, g1, g2, bonds1, bonds2, atomMatcher, bondMatcher,
subgraph);
return new Mappings(query, target, iterable);
Expand Down Expand Up @@ -240,4 +252,29 @@ public Iterator<int[]> iterator() {
new VFState(container1, container2, g1, g2, bonds1, bonds2, atomMatcher, bondMatcher));
}
}

private static final class AdjListCache {

// 100 ms max age
private static final long MAX_AGE = TimeUnit.MILLISECONDS.toNanos(100);

private final int[][] g;
private final EdgeToBondMap bmap;
private final int numAtoms, numBonds;
private final long tInit;

private AdjListCache(IAtomContainer mol) {
this.bmap = EdgeToBondMap.withSpaceFor(mol);
this.g = GraphUtil.toAdjList(mol, bmap);
this.numAtoms = mol.getAtomCount();
this.numBonds = mol.getBondCount();
this.tInit = System.nanoTime();
}

private boolean validate(IAtomContainer mol) {
return mol.getAtomCount() == numAtoms &&
mol.getBondCount() == numBonds &&
(System.nanoTime() - tInit) < MAX_AGE;
}
}
}
Expand Up @@ -22,30 +22,29 @@
*/
package org.openscience.cdk.fingerprint;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Map;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.ConnectedComponents;
import org.openscience.cdk.graph.AllCycles;
import org.openscience.cdk.graph.GraphUtil;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IRingSet;
import org.openscience.cdk.isomorphism.Pattern;
import org.openscience.cdk.isomorphism.Ullmann;
import org.openscience.cdk.isomorphism.VentoFoggia;
import org.openscience.cdk.isomorphism.matchers.smarts.SmartsMatchers;
import org.openscience.cdk.ringsearch.AllRingsFinder;
import org.openscience.cdk.smiles.smarts.parser.SMARTSParser;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Map;

/**
* This fingerprinter generates 166 bit MACCS keys.
*
Expand Down Expand Up @@ -103,47 +102,130 @@ public IBitFingerprint getBitFingerprint(IAtomContainer container) throws CDKExc
// init SMARTS invariants (connectivity, degree, etc)
SmartsMatchers.prepare(container, false);

final int numAtoms = container.getAtomCount();


final GraphUtil.EdgeToBondMap bmap = GraphUtil.EdgeToBondMap.withSpaceFor(container);
final int[][] adjlist = GraphUtil.toAdjList(container, bmap);

for (int i = 0; i < keys.length; i++) {
Pattern pattern = keys[i].pattern;
if (pattern == null) continue;
final MaccsKey key = keys[i];
final Pattern pattern = key.pattern;

switch (key.smarts) {
case "[!*]":
break;
case "[!0]":
for (IAtom atom : container.atoms()) {
if (atom.getMassNumber() != null) {
fp.set(i);
break;
}
}
break;

// check if there are at least 'count' unique hits, key.count = 0
// means find at least one match hence we add 1 to out limit
if (pattern.matchAll(container).uniqueAtoms().atLeast(keys[i].count + 1)) fp.set(i);
// ring bits
case "[R]1@*@*@1": // 3M RING bit22
case "[R]1@*@*@*@1": // 4M RING bit11
case "[R]1@*@*@*@*@1": // 5M RING bit96
case "[R]1@*@*@*@*@*@1": // 6M RING bit163, x2=bit145
case "[R]1@*@*@*@*@*@*@1": // 7M RING, bit19
case "[R]1@*@*@*@*@*@*@*@1": // 8M RING, bit101
// handled separately
break;

case "(*).(*)":
// bit 166 (*).(*) we can match this in SMARTS but it's faster to just
// count the number of components or in this case try to traverse the
// component, iff there are some atoms not visited we have more than
// one component
boolean[] visit = new boolean[numAtoms];
if (numAtoms > 1 && visitPart(visit, adjlist, 0, -1) < numAtoms)
fp.set(165);
break;

default:
if (key.count == 0) {
if (pattern.matches(container))
fp.set(i);
} else {
// check if there are at least 'count' unique hits, key.count = 0
// means find at least one match hence we add 1 to out limit
if (pattern.matchAll(container).uniqueAtoms().atLeast(key.count + 1))
fp.set(i);
}
break;
}
}

// at this point we have skipped the entries whose pattern is "?"
// (bits 1,44,125,166) so let try and do those features by hand

// bit 125 aromatic ring count > 1
// bit 101 a ring with more than 8 members
AllRingsFinder ringFinder = new AllRingsFinder();
IRingSet rings = ringFinder.findAllRings(container);
int ringCount = 0;
for (int i = 0; i < rings.getAtomContainerCount(); i++) {
IAtomContainer ring = rings.getAtomContainer(i);
boolean allAromatic = true;
if (ringCount < 2) { // already found enough aromatic rings
for (IBond bond : ring.bonds()) {
if (!bond.getFlag(CDKConstants.ISAROMATIC)) {
allAromatic = false;
// Ring Bits

// threshold=126, see AllRingsFinder.Threshold.PubChem_97
if (numAtoms > 2) {
AllCycles allcycles = new AllCycles(adjlist,
Math.min(8, numAtoms),
126);
int numArom = 0;
for (int[] path : allcycles.paths()) {
// length is +1 as we repeat the closure vertex
switch (path.length) {
case 4: // 3M bit22
fp.set(21);
break;
case 5: // 4M bit11
fp.set(10);
break;
case 6: // 5M bit96
fp.set(95);
break;
case 7: // 6M bit163->bit145, bit124 numArom > 1

if (numArom < 2) {
if (isAromPath(path, bmap)) {
numArom++;
if (numArom == 2)
fp.set(124);
}
}

if (fp.get(162)) {
fp.set(144); // >0
} else {
fp.set(162); // >1
}
break;
case 8: // 7M bit19
fp.set(18);
break;
case 9: // 8M bit101
fp.set(100);
break;
}
}
}
if (allAromatic) ringCount++;
if (ringCount > 1) fp.set(124);
if (ring.getAtomCount() >= 8) fp.set(100);
}

// bit 166 (*).(*) we can match this in SMARTS but it's faster to just
// count the number of component
ConnectedComponents cc = new ConnectedComponents(GraphUtil.toAdjList(container));
if (cc.nComponents() > 1) fp.set(165);

return new BitSetFingerprint(fp);
}

private static int visitPart(boolean[] visit, int[][] g, int beg, int prev) {
visit[beg] = true;
int visited = 1;
for (int end : g[beg]) {
if (end != prev && !visit[end])
visited += visitPart(visit, g, end, beg);
}
return visited;
}

private static boolean isAromPath(int[] path, GraphUtil.EdgeToBondMap bmap) {
int end = path.length - 1;
for (int i = 0; i < end; i++) {
if (!bmap.get(path[i], path[i+1]).isAromatic())
return false;
}
return true;
}

/** {@inheritDoc} */
@Override
public Map<String, Integer> getRawFingerprint(IAtomContainer iAtomContainer) throws CDKException {
Expand All @@ -153,10 +235,7 @@ public Map<String, Integer> getRawFingerprint(IAtomContainer iAtomContainer) thr
/** {@inheritDoc} */
@Override
public int getSize() {
if (keys != null)
return keys.length;
else
return 0; // throw exception when keys aren't loaded?
return 166;
}

private MaccsKey[] readKeyDef(final IChemObjectBuilder builder) throws IOException, CDKException {
Expand Down Expand Up @@ -238,7 +317,7 @@ private MaccsKey[] keys(final IChemObjectBuilder builder) throws CDKException {
* @return the pattern to match
*/
private Pattern createPattern(String smarts, IChemObjectBuilder builder) {
if (smarts.equals("?")) return null;
return Ullmann.findSubstructure(SMARTSParser.parse(smarts, builder));
if (smarts.equals("[!0]")) return null; // FIXME can't be parsed by our SMARTS Grammar ATM
return VentoFoggia.findSubstructure(SMARTSParser.parse(smarts, builder));
}
}

0 comments on commit a9bc279

Please sign in to comment.