Skip to content

Commit

Permalink
Merge pull request #222 from cdk/patch/mdlrad
Browse files Browse the repository at this point in the history
Looks good, and great to see wider support for radicals :)
  • Loading branch information
egonw committed Aug 16, 2016
2 parents 6582393 + 04966f8 commit 6a170cd
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 84 deletions.
Expand Up @@ -41,8 +41,6 @@
import org.openscience.cdk.interfaces.IIsotope;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
import org.openscience.cdk.interfaces.IStereoElement;
import org.openscience.cdk.interfaces.ITetrahedralChirality;
import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV2000Format;
Expand Down Expand Up @@ -506,7 +504,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
hasQueryBonds = true; // also counts aromatic bond as query
} else {
int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i));
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired);
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired, unpaired);
}
}

Expand Down Expand Up @@ -544,13 +542,14 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param unpaired unpaired electron count
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence) {
private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) {

if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - explicitValence);
atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired));
else
atom.setImplicitHydrogenCount(0);
} else {
Expand Down Expand Up @@ -1886,24 +1885,11 @@ private void readPropertiesSlow(BufferedReader input, IAtomContainer container,
StringTokenizer st = new StringTokenizer(line.substring(9));
for (int i = 1; i <= infoCount; i++) {
int atomNumber = Integer.parseInt(st.nextToken().trim());
int spinMultiplicity = Integer.parseInt(st.nextToken().trim());
MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE;
if (spinMultiplicity > 0) {
int rad = Integer.parseInt(st.nextToken().trim());
MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.None;
if (rad > 0) {
IAtom radical = container.getAtom(atomNumber - 1);
switch (spinMultiplicity) {
case 1:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET;
break;
case 2:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET;
break;
case 3:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET;
break;
default:
logger.debug("Invalid spin multiplicity found: " + spinMultiplicity);
break;
}
spin = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(rad);
for (int j = 0; j < spin.getSingleElectrons(); j++) {
container.addSingleElectron(container.getBuilder().newInstance(ISingleElectron.class,
radical));
Expand Down
66 changes: 33 additions & 33 deletions storage/io/src/main/java/org/openscience/cdk/io/MDLV2000Writer.java
Expand Up @@ -24,35 +24,12 @@
*/
package org.openscience.cdk.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Isotopes;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IBond.Order;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemModel;
import org.openscience.cdk.interfaces.IChemObject;
Expand All @@ -74,6 +51,28 @@
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Writes MDL molfiles, which contains a single molecule (see {@cdk.cite DAL92}).
* For writing a MDL molfile you can this code:
Expand Down Expand Up @@ -118,7 +117,10 @@ public class MDLV2000Writer extends DefaultChemObjectWriter {
*/
public enum SPIN_MULTIPLICITY {

NONE(0, 0), SINGLET(2, 1), DOUBLET(1, 2), TRIPLET(3, 2);
None(0, 0),
Monovalent(2, 1),
DivalentSinglet(1, 2),
DivalentTriplet(3, 2);

// the radical SDF value
private final int value;
Expand Down Expand Up @@ -158,13 +160,13 @@ public int getSingleElectrons() {
public static SPIN_MULTIPLICITY ofValue(int value) throws CDKException {
switch (value) {
case 0:
return NONE;
return None;
case 1:
return DOUBLET;
return DivalentSinglet;
case 2:
return SINGLET;
return Monovalent;
case 3:
return TRIPLET;
return DivalentTriplet;
default:
throw new CDKException("unknown spin multiplicity: " + value);
}
Expand Down Expand Up @@ -712,13 +714,11 @@ else if (valence > 0 && valence < 15)
case 0:
continue;
case 1:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.SINGLET);
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.Monovalent);
break;
case 2:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DOUBLET);
break;
case 3:
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.TRIPLET);
// information loss, divalent but singlet or triplet?
atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DivalentSinglet);
break;
default:
logger.debug("Invalid number of radicals found: " + eCount);
Expand Down
148 changes: 120 additions & 28 deletions storage/io/src/main/java/org/openscience/cdk/io/MDLV3000Reader.java
Expand Up @@ -47,8 +47,10 @@
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV3000Format;
import org.openscience.cdk.isomorphism.matchers.IQueryBond;
import org.openscience.cdk.sgroup.Sgroup;
import org.openscience.cdk.sgroup.SgroupType;
import org.openscience.cdk.tools.ILoggingTool;
Expand Down Expand Up @@ -151,6 +153,8 @@ public IAtomContainer readMolecule(IChemObjectBuilder builder) throws CDKExcepti
}

public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException {


logger.info("Reading CTAB block");
IAtomContainer readData = builder.newInstance(IAtomContainer.class);
boolean foundEND = false;
Expand All @@ -175,6 +179,27 @@ public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDK
}
lastLine = readLine();
}

for (IAtom atom : readData.atoms()) {
// XXX: slow method is slow
int valence = 0;
for (IBond bond : readData.getConnectedBondsList(atom)) {
if (bond instanceof IQueryBond || bond.getOrder() == IBond.Order.UNSET) {
valence = -1;
break;
}
else {
valence += bond.getOrder().numeric();
}
}
if (valence < 0) {
logger.warn("Cannot set valence for atom with query bonds"); // also counts aromatic bond as query
} else {
final int unpaired = readData.getConnectedSingleElectronsCount(atom);
applyMDLValenceModel(atom, valence + unpaired, unpaired);
}
}

return readData;
}

Expand Down Expand Up @@ -310,13 +335,41 @@ public void readAtomBlock(IAtomContainer readData) throws CDKException {
String key = keys.next();
String value = options.get(key);
try {
if (key.equals("CHG")) {
int charge = Integer.parseInt(value);
if (charge != 0) { // zero is no charge specified
atom.setFormalCharge(charge);
}
} else {
logger.warn("Not parsing key: " + key);
switch (key) {
case "CHG":
int charge = Integer.parseInt(value);
if (charge != 0) { // zero is no charge specified
atom.setFormalCharge(charge);
}
break;
case "RAD":
int numElectons = MDLV2000Writer.SPIN_MULTIPLICITY.ofValue(Integer.parseInt(value))
.getSingleElectrons();
while (numElectons-- > 0) {
readData.addSingleElectron(readData.getBuilder().newInstance(ISingleElectron.class, atom));
}
break;
case "VAL":
if (!(atom instanceof IPseudoAtom)) {
try {
int valence = Integer.parseInt(value);
if (valence != 0) {
//15 is defined as 0 in mol files
if (valence == 15)
atom.setValency(0);
else
atom.setValency(valence);
}
} catch (Exception exception) {
handleError("Could not parse valence information field", lineNumber, 0, 0, exception);
}
} else {
logger.error("Cannot set valence information for a non-element!");
}
break;
default:
logger.warn("Not parsing key: " + key);
break;
}
} catch (Exception exception) {
String error = "Error while parsing key/value " + key + "=" + value + ": "
Expand Down Expand Up @@ -408,27 +461,32 @@ public void readBondBlock(IAtomContainer readData) throws CDKException {
for (String key : options.keySet()) {
String value = options.get(key);
try {
if (key.equals("CFG")) {
int configuration = Integer.parseInt(value);
if (configuration == 0) {
bond.setStereo(IBond.Stereo.NONE);
} else if (configuration == 1) {
bond.setStereo(IBond.Stereo.UP);
} else if (configuration == 2) {
bond.setStereo((IBond.Stereo) CDKConstants.UNSET);
} else if (configuration == 3) {
bond.setStereo(IBond.Stereo.DOWN);
}
} else if (key.equals("ENDPTS")) {
String[] endptStr = value.split(" ");
// skip first value that is count
for (int i = 1; i < endptStr.length; i++) {
endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1));
}
} else if (key.equals("ATTACH")) {
attach = value;
} else {
logger.warn("Not parsing key: " + key);
switch (key) {
case "CFG":
int configuration = Integer.parseInt(value);
if (configuration == 0) {
bond.setStereo(IBond.Stereo.NONE);
} else if (configuration == 1) {
bond.setStereo(IBond.Stereo.UP);
} else if (configuration == 2) {
bond.setStereo((IBond.Stereo) CDKConstants.UNSET);
} else if (configuration == 3) {
bond.setStereo(IBond.Stereo.DOWN);
}
break;
case "ENDPTS":
String[] endptStr = value.split(" ");
// skip first value that is count
for (int i = 1; i < endptStr.length; i++) {
endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1));
}
break;
case "ATTACH":
attach = value;
break;
default:
logger.warn("Not parsing key: " + key);
break;
}
} catch (Exception exception) {
String error = "Error while parsing key/value " + key + "=" + value + ": "
Expand Down Expand Up @@ -626,4 +684,38 @@ public void close() throws IOException {

private void initIOSettings() {}

/**
* Applies the MDL valence model to atoms using the explicit valence (bond
* order sum) and charge to determine the correct number of implicit
* hydrogens. The model is not applied if the explicit valence is less than
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence, int unpaired) {

if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - (explicitValence - unpaired));
else
atom.setImplicitHydrogenCount(0);
} else {
Integer element = atom.getAtomicNumber();
if (element == null) element = 0;

Integer charge = atom.getFormalCharge();
if (charge == null) charge = 0;

int implicitValence = MDLValence.implicitValence(element, charge, explicitValence);
if (implicitValence < explicitValence) {
atom.setValency(explicitValence);
atom.setImplicitHydrogenCount(0);
} else {
atom.setValency(implicitValence);
atom.setImplicitHydrogenCount(implicitValence - explicitValence);
}
}
}

}
Expand Up @@ -221,7 +221,16 @@ private void writeAtomBlock(IAtomContainer mol, IAtom[] atoms, Map<IChemObject,
final int chg = nullAsZero(atom.getFormalCharge());
final int mass = nullAsZero(atom.getMassNumber());
final int hcnt = nullAsZero(atom.getImplicitHydrogenCount());
final int rad = mol.getConnectedSingleElectronsCount(atom);
final int elec = mol.getConnectedSingleElectronsCount(atom);
int rad = 0;
switch (elec) {
case 1: // 2
rad = MDLV2000Writer.SPIN_MULTIPLICITY.Monovalent.getValue();
break;
case 2: // 1 or 3? Information loss as to which
rad = MDLV2000Writer.SPIN_MULTIPLICITY.DivalentSinglet.getValue();
break;
}

int expVal = 0;
for (IBond bond : mol.getConnectedBondsList(atom)) {
Expand Down
Expand Up @@ -137,4 +137,12 @@ public void testPseudoAtomLabels() throws Exception {
assertThat(sgroups.get(0).getType(), is(SgroupType.ExtMulticenter));
}
}

@Test public void radicalsInCH3() throws Exception {
try (MDLV3000Reader reader = new MDLV3000Reader(getClass().getResourceAsStream("CH3.mol"))) {
IAtomContainer container = reader.read(new org.openscience.cdk.AtomContainer(0, 0, 0, 0));
assertThat(container.getSingleElectronCount(), is(1));
assertThat(container.getAtom(0).getImplicitHydrogenCount(), is(3));
}
}
}

0 comments on commit 6a170cd

Please sign in to comment.