Skip to content

Commit

Permalink
Updated bcut descriptor to check for undefined values before getting …
Browse files Browse the repository at this point in the history
…eigenvalues. Added test file and test case. Addresses bug 3489559

Change-Id: Ic3fd58f387f8b8fb060d07accb790f27c2d8a473
Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
  • Loading branch information
rajarshi authored and egonw committed Feb 20, 2012
1 parent 7c7248a commit 7d698ef
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 13 deletions.
Expand Up @@ -128,7 +128,7 @@
@TestClass("org.openscience.cdk.qsar.descriptors.molecular.BCUTDescriptorTest")
public class BCUTDescriptor implements IMolecularDescriptor {
private static ILoggingTool logger =
LoggingToolFactory.createLoggingTool(BCUTDescriptor.class);
LoggingToolFactory.createLoggingTool(BCUTDescriptor.class);

// the number of negative & positive eigenvalues
// to return for each class of BCUT descriptor
Expand Down Expand Up @@ -201,7 +201,7 @@ public Object[] getParameters() {
return (params);
}

@TestMethod(value="testNamesConsistency")
@TestMethod("testNamesConsistency")
public String[] getDescriptorNames() {
String[] names;
String[] suffix = {"w", "c", "p"};
Expand Down Expand Up @@ -250,12 +250,20 @@ public Object getParameterType(String name) {
return (object);
}

private boolean hasUndefined(double[][] m) {
for (double[] aM : m) {
for (int j = 0; j < m[0].length; j++) {
if (Double.isNaN(aM[j]) || Double.isInfinite(aM[j])) return true;
}
}
return false;
}

static private class BurdenMatrix {

static double[][] evalMatrix(IAtomContainer atomContainer, double[] vsd) {
IAtomContainer local = AtomContainerManipulator.removeHydrogens(atomContainer);

int natom = local.getAtomCount();
double[][] matrix = new double[natom][natom];
for (int i = 0; i < natom; i++) {
Expand Down Expand Up @@ -303,7 +311,7 @@ static double[][] evalMatrix(IAtomContainer atomContainer, double[] vsd) {
* @return An ArrayList containing the descriptors. The default is to return
* all calculated eigenvalues of the Burden matrices in the order described
* above. If a parameter list was supplied, then only the specified number
* of highest and lowest eigenvalues (for each class of BCUT) will be returned.
* of highest and lowest eigenvalues (for each class of BCUT) will be returned.
*/
@TestMethod("testCalculate_IAtomContainer")
public DescriptorValue calculate(IAtomContainer container) {
Expand All @@ -318,7 +326,7 @@ public DescriptorValue calculate(IAtomContainer container) {

// add H's in case they're not present
try {
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
CDKHydrogenAdder hAdder = CDKHydrogenAdder.getInstance(molecule.getBuilder());
hAdder.addImplicitHydrogens(molecule);
AtomContainerManipulator.convertImplicitToExplicitHydrogens(molecule);
Expand All @@ -336,7 +344,7 @@ public DescriptorValue calculate(IAtomContainer container) {
try {
CDKHueckelAromaticityDetector.detectAromaticity(molecule);
} catch (CDKException e) {
return getDummyDescriptorValue(new CDKException("Error in aromaticity perception: "+e.getMessage()));
return getDummyDescriptorValue(new CDKException("Error in aromaticity perception: " + e.getMessage()));
}
}

Expand Down Expand Up @@ -364,10 +372,12 @@ public DescriptorValue calculate(IAtomContainer container) {
}

double[][] burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
if (!hasUndefined(burdenMatrix))
return getDummyDescriptorValue(new CDKException("Burden matrix has undefined values"));
Matrix matrix = new Matrix(burdenMatrix);
EigenvalueDecomposition eigenDecomposition = new EigenvalueDecomposition(matrix);
double[] eval1 = eigenDecomposition.getRealEigenvalues();

// get charge weighted BCUT
LonePairElectronChecker lpcheck = new LonePairElectronChecker();
GasteigerPEPEPartialCharges pepe;
Expand All @@ -393,7 +403,9 @@ public DescriptorValue calculate(IAtomContainer container) {
diagvalue[counter] = molecule.getAtom(i).getCharge();
counter++;
}
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
if (!hasUndefined(burdenMatrix))
return getDummyDescriptorValue(new CDKException("Burden matrix has undefined values"));
matrix = new Matrix(burdenMatrix);
eigenDecomposition = new EigenvalueDecomposition(matrix);
double[] eval2 = eigenDecomposition.getRealEigenvalues();
Expand All @@ -405,11 +417,13 @@ public DescriptorValue calculate(IAtomContainer container) {
Polarizability pol = new Polarizability();
counter = 0;
for (int i = 0; i < molecule.getAtomCount(); i++) {
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
diagvalue[counter] = pol.calculateGHEffectiveAtomPolarizability(molecule, molecule.getAtom(i), false, topoDistance);
counter++;
}
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
if (!hasUndefined(burdenMatrix))
return getDummyDescriptorValue(new CDKException("Burden matrix has undefined values"));
matrix = new Matrix(burdenMatrix);
eigenDecomposition = new EigenvalueDecomposition(matrix);
double[] eval3 = eigenDecomposition.getRealEigenvalues();
Expand All @@ -435,7 +449,7 @@ public DescriptorValue calculate(IAtomContainer container) {
enhigh = 0;
}

DoubleArrayResult retval = new DoubleArrayResult( (lnlow+enlow+lnhigh+enhigh) * 3);
DoubleArrayResult retval = new DoubleArrayResult((lnlow + enlow + lnhigh + enhigh) * 3);

for (int i = 0; i < lnlow; i++) retval.add(eval1[i]);
for (int i = 0; i < enlow; i++) retval.add(Double.NaN);
Expand Down
109 changes: 109 additions & 0 deletions src/test/data/mdl/burden_undefined.sdf
@@ -0,0 +1,109 @@

OpenBabel02181210582D

10 12 0 0 0 0 0 0 0 0999 V2000
1.7872 -1.2465 0.0000 As 0 0 0 0 0
1.7549 -2.3765 0.0000 O 0 0 0 0 0
2.8039 -2.4283 0.0000 As 0 0 0 0 0
1.4020 -3.2378 0.0000 O 0 0 0 0 0
0.0000 -2.4283 0.0000 As 0 0 0 0 0
0.7414 -1.8034 0.0000 O 0 0 0 0 0
0.0000 -0.8094 0.0000 O 0 0 0 0 0
1.4020 0.0000 0.0000 As 0 0 0 0 0
0.9260 -0.8256 0.0000 O 0 0 0 0 0
2.8039 -0.8094 0.0000 O 0 0 0 0 0
1 2 1 0 0 0
1 6 1 0 0 0
1 9 1 0 0 0
2 3 1 0 0 0
3 4 1 0 0 0
3 10 1 0 0 0
4 5 1 0 0 0
5 6 1 0 0 0
5 7 1 0 0 0
7 8 1 0 0 0
8 9 1 0 0 0
8 10 1 0 0 0
M END
> <DSSTox_RID>
20103

> <DSSTox_CID>
103

> <DSSTox_Generic_SID>
20103

> <DSSTox_FileID>
106_CPDBAS_v5d

> <STRUCTURE_Formula>
As4O6

> <STRUCTURE_MolecularWeight>
395.6828

> <STRUCTURE_ChemicalType>
inorganic

> <STRUCTURE_Shown>
tested chemical

> <TestSubstance_ChemicalName>
Arsenious oxide

> <TestSubstance_CASRN>
1327-53-3

> <TestSubstance_Description>
single chemical compound

> <STRUCTURE_ChemicalName_IUPAC>
tricyclo[3.3.1.1~3,7~]tetraarsoxane

> <STRUCTURE_SMILES>
[As]21O[As]3O[As](O1)O[As](O2)O3

> <STRUCTURE_InChI>
InChI=1/As4O6/c5-1-6-3-8-2(5)9-4(7-1)10-3

> <STRUCTURE_InChIKey>
KTTMEOWBIWLMSE-UHFFFAOYAT

> <StudyType>
Carcinogenicity

> <Endpoint>
TD50; Tumor Target Sites

> <Species>
mouse

> <ActivityScore_CPDBAS_Mouse>
0

> <TD50_Mouse_Note>
no positive results

> <TargetSites_Mouse_Male>
no positive results

> <TargetSites_Mouse_Female>
no positive results

> <ActivityOutcome_CPDBAS_Mouse>
inactive

> <ActivityOutcome_CPDBAS_SingleCellCall>
inactive

> <ActivityOutcome_CPDBAS_MultiCellCall>
inactive

> <ActivityOutcome_CPDBAS_MultiCellCall_Details>
multisex inactive

> <ChemicalPage_URL>
http://potency.berkeley.edu/chempages/ARSENIOUS%20OXIDE.html

$$$$
Expand Up @@ -19,9 +19,6 @@
*/
package org.openscience.cdk.qsar.descriptors.molecular;

import java.io.InputStream;
import java.util.List;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
Expand All @@ -33,11 +30,15 @@
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.io.HINReader;
import org.openscience.cdk.io.ISimpleChemObjectReader;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.qsar.DescriptorValue;
import org.openscience.cdk.qsar.result.DoubleArrayResult;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

import java.io.InputStream;
import java.util.List;


/**
* TestSuite that runs all QSAR tests.
Expand Down Expand Up @@ -155,5 +156,26 @@ public void testBCUT() throws Exception {
DoubleArrayResult result1 = (DoubleArrayResult) descriptor.calculate(mol).getValue();
for (int i = 0; i < result1.length(); i++) Assert.assertTrue( result1.get(i) != Double.NaN);
}

/**
* @cdk.bug 3489559
*/
@Test
public void testUndefinedValues() throws Exception {
String filename = "data/mdl/burden_undefined.sdf";
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
ISimpleChemObjectReader reader = new MDLV2000Reader(ins);
ChemFile content = reader.read(new ChemFile());
List cList = ChemFileManipulator.getAllAtomContainers(content);
IAtomContainer ac = (IAtomContainer) cList.get(0);

Assert.assertNotNull(ac);
addExplicitHydrogens(ac);
CDKHueckelAromaticityDetector.detectAromaticity(ac);

Exception e = descriptor.calculate(ac).getException();
Assert.assertNotNull(e);
Assert.assertEquals("Burden matrix has undefined values", e.getMessage());
}
}

0 comments on commit 7d698ef

Please sign in to comment.