Skip to content

Commit

Permalink
Work around limitation of absolute SMILES with unknown elements. We e…
Browse files Browse the repository at this point in the history
…ncode the unknown element as Rf (Rutherfordium - the highest supported by InChI v1.3 - JNI InChI current). We can still encode structures with Rf providing there are no unknown atoms.
  • Loading branch information
johnmay committed Mar 6, 2017
1 parent 31ae0dc commit cc4da2b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
Expand Up @@ -27,6 +27,7 @@
import com.google.common.base.Joiner;
import org.junit.Assert;
import org.junit.Test;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.silent.SilentChemObjectBuilder;

Expand Down Expand Up @@ -112,6 +113,21 @@ public void dbStereoCanonGeneration() throws Exception {
cansmi.create(smipar.parseSmiles(cansmi.create(mol))));
}

@Test
public void smilesWithUnknownElem() throws Exception {
test("*CC", "CC*");
}

@Test
public void rfElement() throws Exception {
test("[Rf]");
}

@Test(expected = CDKException.class)
public void problematic() throws Exception {
test("*[Rf]");
}

static void test(String... inputs) throws Exception {

SmilesParser sp = new SmilesParser(SilentChemObjectBuilder.getInstance());
Expand Down
Expand Up @@ -23,6 +23,7 @@
package org.openscience.cdk.smiles;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Elements;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.ConnectedComponents;
import org.openscience.cdk.graph.GraphUtil;
Expand All @@ -47,6 +48,7 @@
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
Expand Down Expand Up @@ -690,6 +692,13 @@ private static long[] inchiNumbers(IAtomContainer container) throws CDKException
// class each time
String cname = "org.openscience.cdk.graph.invariant.InChINumbersTools";
String mname = "getUSmilesNumbers";

List<IAtom> rgrps = getRgrps(container, Elements.Rutherfordium);
for (IAtom rgrp : rgrps) {
rgrp.setAtomicNumber(Elements.Rutherfordium.number());
rgrp.setSymbol(Elements.Rutherfordium.symbol());
}

try {
Class<?> c = Class.forName(cname);
Method method = c.getDeclaredMethod("getUSmilesNumbers", IAtomContainer.class);
Expand All @@ -703,7 +712,24 @@ private static long[] inchiNumbers(IAtomContainer container) throws CDKException
throw new CDKException("An InChI could not be generated and used to canonise SMILES: " + e.getMessage(), e);
} catch (IllegalAccessException e) {
throw new CDKException("Could not access method to obtain InChI numbers.");
} finally {
for (IAtom rgrp : rgrps) {
rgrp.setAtomicNumber(Elements.Unknown.number());
rgrp.setSymbol("*");
}
}
}

private static List<IAtom> getRgrps(IAtomContainer container, Elements reversed) {
List<IAtom> res = new ArrayList<>();
for (IAtom atom : container.atoms()) {
if (atom.getAtomicNumber() == 0) {
res.add(atom);
} else if (atom.getAtomicNumber() == reversed.number()) {
return Collections.emptyList();
}
}
return res;
}

// utility safety check to guard against invalid state
Expand Down

0 comments on commit cc4da2b

Please sign in to comment.