Skip to content

Commit

Permalink
Merge pull request #311 from cdk/patch/beamconvspeedup
Browse files Browse the repository at this point in the history
Patch/beamconvspeedup
  • Loading branch information
egonw committed Apr 30, 2017
2 parents 10a0417 + 6e60cdd commit 3d566b8
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 154 deletions.
Expand Up @@ -26,6 +26,7 @@

import com.google.common.collect.ImmutableSet;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Elements;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.config.Isotopes;
import org.openscience.cdk.exception.CDKException;
Expand Down Expand Up @@ -1280,9 +1281,11 @@ static int length(final String str) {
* @throws CDKException the symbol is not allowed
*/
private IAtom createAtom(String symbol, IChemObjectBuilder builder, int lineNum) throws CDKException {
if (isPeriodicElement(symbol)) {
final Elements elem = Elements.ofString(symbol);
if (elem != Elements.Unknown) {
IAtom atom = builder.newAtom();
atom.setSymbol(symbol);
atom.setSymbol(elem.symbol());
atom.setAtomicNumber(elem.number());
return atom;
}
if (symbol.equals("D") && interpretHydrogenIsotopes.isSet()) {
Expand Down Expand Up @@ -1315,17 +1318,6 @@ private IAtom createAtom(String symbol, IChemObjectBuilder builder, int lineNum)
return atom;
}

/**
* Is the symbol a periodic element.
*
* @param symbol a symbol from the input
* @return the symbol is a pseudo atom
*/
private static boolean isPeriodicElement(final String symbol) {
// XXX: PeriodicTable is slow - switch without file IO would be optimal
Integer elem = PeriodicTable.getAtomicNumber(symbol);
return elem != null && elem > 0;
}

/**
* Is the atom symbol a non-periodic element (i.e. pseudo). Valid pseudo
Expand Down
202 changes: 94 additions & 108 deletions storage/smiles/src/main/java/org/openscience/cdk/smiles/BeamToCDK.java
Expand Up @@ -41,6 +41,7 @@
import uk.ac.ebi.beam.Element;

import java.util.Arrays;
import java.util.List;

import static org.openscience.cdk.CDKConstants.ATOM_ATOM_MAPPING;
import static org.openscience.cdk.CDKConstants.ISAROMATIC;
Expand Down Expand Up @@ -117,38 +118,108 @@ final class BeamToCDK {
*/
IAtomContainer toAtomContainer(Graph g, boolean kekule) {

IAtomContainer ac = emptyContainer();
IAtom[] atoms = new IAtom[g.order()];
IBond[] bonds = new IBond[g.size()];
IAtomContainer ac = emptyContainer();
int numAtoms = g.order();
IAtom[] atoms = new IAtom[numAtoms];
IBond[] bonds = new IBond[g.size()];

int j = 0; // bond index

for (int i = 0; i < g.order(); i++)
boolean checkAtomStereo = false;
boolean checkBondStereo = false;

for (int i = 0; i < g.order(); i++) {
checkAtomStereo = checkAtomStereo || g.configurationOf(i).type() != Configuration.Type.None;
atoms[i] = toCDKAtom(g.atom(i), g.implHCount(i));
for (Edge e : g.edges())
bonds[j++] = toCDKBond(e, atoms, kekule);
}
ac.setAtoms(atoms);
for (Edge edge : g.edges()) {

final int u = edge.either();
final int v = edge.other(u);
IBond bond = builder.newBond();
bond.setAtoms(new IAtom[]{atoms[u], atoms[v]});
bonds[j++] = bond;

switch (edge.bond()) {
case SINGLE:
bond.setOrder(IBond.Order.SINGLE);
break;
case UP:
case DOWN:
checkBondStereo = true;
bond.setOrder(IBond.Order.SINGLE);
break;
case IMPLICIT:
bond.setOrder(IBond.Order.SINGLE);
if (!kekule && atoms[u].isAromatic() && atoms[v].isAromatic()) {
bond.setIsAromatic(true);
bond.setOrder(IBond.Order.UNSET);
atoms[u].setIsAromatic(true);
atoms[v].setIsAromatic(true);
}
break;
case IMPLICIT_AROMATIC:
case AROMATIC:
bond.setOrder(IBond.Order.SINGLE);
bond.setIsAromatic(true);
atoms[u].setIsAromatic(true);
atoms[v].setIsAromatic(true);
break;
case DOUBLE:
bond.setOrder(IBond.Order.DOUBLE);
break;
case DOUBLE_AROMATIC:
bond.setOrder(IBond.Order.DOUBLE);
bond.setIsAromatic(true);
atoms[u].setIsAromatic(true);
atoms[v].setIsAromatic(true);
break;
case TRIPLE:
bond.setOrder(IBond.Order.TRIPLE);
break;
case QUADRUPLE:
bond.setOrder(IBond.Order.QUADRUPLE);
break;
default:
throw new IllegalArgumentException("Edge label " + edge.bond()
+ "cannot be converted to a CDK bond order");
}
}

// atom-centric stereo-specification (only tetrahedral ATM)
for (int u = 0; u < g.order(); u++) {
if (checkAtomStereo) {
for (int u = 0; u < g.order(); u++) {

Configuration c = g.configurationOf(u);
if (c.type() == Tetrahedral) {
Configuration c = g.configurationOf(u);
switch (c.type()) {
case Tetrahedral: {

IStereoElement se = newTetrahedral(u, g.neighbors(u), atoms, c);
IStereoElement se = newTetrahedral(u, g.neighbors(u), atoms, c);

if (se != null) ac.addStereoElement(se);
} else if (c.type() == ExtendedTetrahedral) {
IStereoElement se = newExtendedTetrahedral(u, g, atoms);
if (se != null) ac.addStereoElement(se);
break;
}
case ExtendedTetrahedral: {
IStereoElement se = newExtendedTetrahedral(u, g, atoms);

if (se != null) ac.addStereoElement(se);
if (se != null) ac.addStereoElement(se);
break;
}
case DoubleBond: {
checkBondStereo = true;
break;
}
}
}
}

ac.setAtoms(atoms);
ac.setBonds(bonds);

// use directional bonds to assign bond-based stereo-specification
addDoubleBondStereochemistry(g, ac);
if (checkBondStereo) {
addDoubleBondStereochemistry(g, ac);
}

// title suffix
ac.setProperty(CDKConstants.TITLE, g.getTitle());
Expand All @@ -173,12 +244,12 @@ private void addDoubleBondStereochemistry(Graph g, IAtomContainer ac) {
int v = e.other(u);

// find a directional bond for either end
Edge first = findDirectionalEdge(g, u);
Edge second = findDirectionalEdge(g, v);
Edge first = null;
Edge second = null;

// if either atom is not incident to a directional label there
// is no configuration
if (first != null && second != null) {
if ((first = findDirectionalEdge(g, u)) != null && (second = findDirectionalEdge(g, v)) != null) {

// if the directions (relative to the double bond) are the
// same then they are on the same side - otherwise they
Expand All @@ -202,7 +273,6 @@ private void addDoubleBondStereochemistry(Graph g, IAtomContainer ac) {
if (uConf.type() == Configuration.Type.DoubleBond &&
vConf.type() == Configuration.Type.DoubleBond) {


int[] nbrs = new int[6];
int[] uNbrs = g.neighbors(u);
int[] vNbrs = g.neighbors(v);
Expand Down Expand Up @@ -288,7 +358,10 @@ private void addDoubleBondStereochemistry(Graph g, IAtomContainer ac) {
* @return first directional edge (or null if none)
*/
private Edge findDirectionalEdge(Graph g, int u) {
for (Edge e : g.edges(u)) {
List<Edge> edges = g.edges(u);
if (edges.size() == 1)
return null;
for (Edge e : edges) {
Bond b = e.bond();
if (b == Bond.UP || b == Bond.DOWN) return e;
}
Expand Down Expand Up @@ -415,75 +488,6 @@ IAtom newCDKAtom(Atom atom) {
return createAtom(element);
}

/**
* Convert an edge from the Beam Graph to a CDK bond. Note -
* currently aromatic bonds are set to SINGLE and then.
*
* @param edge the Beam edge to convert
* @param atoms the already converted atoms
* @return new bond instance
*/
IBond toCDKBond(Edge edge, IAtom[] atoms, boolean kekule) {

int u = edge.either();
int v = edge.other(u);

IBond bond = createBond(atoms[u], atoms[v], toCDKBondOrder(edge));

// switch on the edge label to set aromatic flags
switch (edge.bond()) {
case AROMATIC:
case IMPLICIT_AROMATIC:
case DOUBLE_AROMATIC:
bond.setIsAromatic(true);
atoms[u].setIsAromatic(true);
atoms[v].setIsAromatic(true);
break;
case IMPLICIT:
if (!kekule && atoms[u].isAromatic() && atoms[v].isAromatic()) {
bond.setIsAromatic(true);
bond.setOrder(IBond.Order.UNSET);
atoms[u].setIsAromatic(true);
atoms[v].setIsAromatic(true);
}
break;
}

return bond;
}

/**
* Convert bond label on the edge to a CDK bond order - there is no aromatic
* bond order and as such this is currently set 'SINGLE' with the aromatic
* flag to be set.
*
* @param edge beam edge
* @return CDK bond order for the edge type
* @throws IllegalArgumentException the bond was a 'DOT' - should not be
* loaded but the exception is there
* in-case
*/
private IBond.Order toCDKBondOrder(Edge edge) {
switch (edge.bond()) {
case SINGLE:
case UP:
case DOWN:
case IMPLICIT: // single/aromatic - aromatic ~ single atm.
case IMPLICIT_AROMATIC:
case AROMATIC: // we will also set the flag
return IBond.Order.SINGLE;
case DOUBLE:
case DOUBLE_AROMATIC:
return IBond.Order.DOUBLE;
case TRIPLE:
return IBond.Order.TRIPLE;
case QUADRUPLE:
return IBond.Order.QUADRUPLE;
default:
throw new IllegalArgumentException("Edge label " + edge.bond()
+ "cannot be converted to a CDK bond order");
}
}

/**
* Create a new empty atom container instance.
Expand All @@ -507,22 +511,4 @@ private IAtom createAtom(Element element) {
atom.setAtomicNumber(element.atomicNumber());
return atom;
}

/**
* Create a new bond for the provided symbol. The bond is created by cloning
* an existing 'template'. Unfortunately IChemObjectBuilders really show a
* slow down when SMILES processing.
*
* @param either an atom of the bond
* @param other another atom of the bond
* @param order the order of the bond
*
* @return new bond instance
*/
private IBond createBond(IAtom either, IAtom other, IBond.Order order) {
IBond bond = builder.newBond();
bond.setAtoms(new IAtom[]{either, other});
bond.setOrder(order);
return bond;
}
}
Expand Up @@ -152,39 +152,6 @@ public void aromatic() {
assertTrue(a.getFlag(CDKConstants.ISAROMATIC));
}

@Test
public void singleBondEdge() {
IAtom[] atoms = new IAtom[]{mock(IAtom.class), mock(IAtom.class), mock(IAtom.class), mock(IAtom.class),
mock(IAtom.class), mock(IAtom.class)};
IBond b = g2c.toCDKBond(Bond.SINGLE.edge(0, 5), atoms, true);
assertThat(b.getOrder(), is(IBond.Order.SINGLE));
assertFalse(b.getFlag(CDKConstants.ISAROMATIC));
assertThat(b.getAtom(0), is(atoms[0]));
assertThat(b.getAtom(1), is(atoms[5]));
}

@Test
public void aromaticBondEdge() {
IAtom[] atoms = new IAtom[]{mock(IAtom.class), mock(IAtom.class), mock(IAtom.class), mock(IAtom.class),
mock(IAtom.class), mock(IAtom.class)};
IBond b = g2c.toCDKBond(Bond.AROMATIC.edge(0, 5), atoms, true);
assertThat(b.getOrder(), is(IBond.Order.SINGLE));
assertTrue(b.getFlag(CDKConstants.ISAROMATIC));
assertThat(b.getAtom(0), is(atoms[0]));
assertThat(b.getAtom(1), is(atoms[5]));
}

@Test
public void doubleBondEdge() {
IAtom[] atoms = new IAtom[]{mock(IAtom.class), mock(IAtom.class), mock(IAtom.class), mock(IAtom.class),
mock(IAtom.class), mock(IAtom.class)};
IBond b = g2c.toCDKBond(Bond.DOUBLE.edge(0, 5), atoms, true);
assertThat(b.getOrder(), is(IBond.Order.DOUBLE));
assertFalse(b.getFlag(CDKConstants.ISAROMATIC));
assertThat(b.getAtom(0), is(atoms[0]));
assertThat(b.getAtom(1), is(atoms[5]));
}

@Test
public void benzene() throws IOException {
IAtomContainer ac = convert("c1ccccc1");
Expand Down

0 comments on commit 3d566b8

Please sign in to comment.