Skip to content

Commit

Permalink
Updates MDL readers to set symbol of pseudatoms to label. Ensures tha…
Browse files Browse the repository at this point in the history
…t writing such a molecule does not force the SDF writer to see all pseudoatoms as R groups. Added unit test and test cases for V2000 and V3000 readers as well as V2000 writer

Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
Signed-off-by: Rajarshi  Guha <rajarshi.guha@gmail.com>
  • Loading branch information
rajarshi committed Jan 22, 2012
1 parent aac73e7 commit e59d204
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 29 deletions.
13 changes: 7 additions & 6 deletions src/main/org/openscience/cdk/io/MDLV2000Reader.java
Expand Up @@ -64,7 +64,7 @@

/**
* Reads content from MDL molfiles and SD files.
* It can read a {@link IMolecule} or {@link IChemModel} from an MDL molfile, and
* It can read a {@link IAtomContainer} or {@link IChemModel} from an MDL molfile, and
* a {@link IChemFile} from a SD file, with a {@link IChemSequence} of
* {@link IChemModel}'s, where each IChemModel will contain one IMolecule.
*
Expand Down Expand Up @@ -478,15 +478,16 @@ private IMolecule readMolecule(IMolecule molecule) throws CDKException {
}
}
else {
atom = molecule.getBuilder().newInstance(IPseudoAtom.class,"R");
atom = molecule.getBuilder().newInstance(IPseudoAtom.class,element);
}
} else {
handleError(
"Invalid element type. Must be an existing " +
"element, or one in: A, Q, L, LP, *.",
linecount, 32, 35
"Invalid element type. Must be an existing " +
"element, or one in: A, Q, L, LP, *.",
linecount, 32, 35
);
atom = molecule.getBuilder().newInstance(IPseudoAtom.class,element);
atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
atom.setSymbol(element);
}

// store as 3D for now, convert to 2D (if totalZ == 0.0) later
Expand Down
32 changes: 16 additions & 16 deletions src/main/org/openscience/cdk/io/MDLV3000Reader.java
Expand Up @@ -20,22 +20,6 @@
*/
package org.openscience.cdk.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.vecmath.Point2d;
import javax.vecmath.Point3d;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
Expand All @@ -55,6 +39,21 @@
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.BondManipulator;

import javax.vecmath.Point2d;
import javax.vecmath.Point3d;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Class that implements the MDL mol V3000 format. This reader reads the
* element symbol and 2D or 3D coordinates from the ATOM block.
Expand Down Expand Up @@ -272,6 +271,7 @@ public void readAtomBlock(IAtomContainer readData) throws CDKException {
throw new CDKException("Invalid element type. Must be an existing element, or one in: A, Q, L, LP, *.");
}
atom = readData.getBuilder().newInstance(IPseudoAtom.class,element);
atom.setSymbol(element);
}

// parse atom coordinates (in Angstrom)
Expand Down
78 changes: 78 additions & 0 deletions src/test/data/mdl/pseudoatoms.sdf
@@ -0,0 +1,78 @@

-OEChem-01171208162D

31 32 0 1 0 0 0 0 0999 V2000
2.2700 -0.5000 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0
-1.2200 -0.1300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.9600 -0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.6200 1.4900 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-0.1200 -1.4500 0.0000 Gln 0 0 0 0 0 0 0 0 0 0 0 0
-0.8400 -0.9400 0.0000 Ile 0 0 0 0 0 0 0 0 0 0 0 0
0.7600 -1.5400 0.0000 Asn 0 0 0 0 0 0 0 0 0 0 0 0
-1.1200 0.7400 0.0000 C 0 0 3 0 0 0 0 0 0 0 0 0
1.7600 -1.2400 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
0.2000 1.8500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
3.1800 -1.5400 0.0000 Pro 0 0 0 0 0 0 0 0 0 0 0 0
4.0200 -1.7800 0.0000 Leu 0 0 0 0 0 0 0 0 0 0 0 0
4.2600 -2.6500 0.0000 Gly 0 0 0 0 0 0 0 0 0 0 0 0
-2.1000 -0.2500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.5800 -0.0700 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-1.8400 1.2800 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0400 2.7500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
5.1000 -2.8900 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-2.6800 0.9200 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.5400 -1.2400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.4300 0.5500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.0800 -1.9900 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
-4.3000 0.1700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.4000 1.4300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-2.7800 0.0200 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-4.2200 1.0700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-3.5800 -0.3400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.1200 -0.1900 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1.0900 1.8600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.9800 1.4100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-5.2200 -1.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2 6 1 0 0 0 0
1 3 1 0 0 0 0
4 10 1 0 0 0 0
5 7 1 0 0 0 0
5 6 1 0 0 0 0
7 9 1 0 0 0 0
4 8 1 0 0 0 0
1 9 1 0 0 0 0
10 29 1 0 0 0 0
3 11 1 0 0 0 0
11 12 1 0 0 0 0
12 13 1 0 0 0 0
2 14 2 0 0 0 0
3 15 2 0 0 0 0
8 16 1 0 0 0 0
10 17 2 0 0 0 0
13 18 1 0 0 0 0
16 19 1 0 0 0 0
1 20 1 0 0 0 0
1 21 1 0 0 0 0
20 22 1 0 0 0 0
23 26 2 0 0 0 0
19 24 2 0 0 0 0
19 25 1 0 0 0 0
24 26 1 0 0 0 0
25 27 2 0 0 0 0
23 28 1 0 0 0 0
29 30 1 0 0 0 0
21 30 1 0 0 0 0
28 31 1 0 0 0 0
2 8 1 0 0 0 0
23 27 1 0 0 0 0
M END
> <Index_Number>
00819

> <Name>
Carbetocin

> <Entry_Date>
6/19/1990

$$$$
32 changes: 32 additions & 0 deletions src/test/data/mdl/pseudoatomsv3000.mol
@@ -0,0 +1,32 @@

Marvin 01211213222D

0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 10 10 0 0 0
M V30 BEGIN ATOM
M V30 1 C -1.925 0.935 0 0
M V30 2 C -3.2587 0.165 0 0
M V30 3 C -3.2587 -1.375 0 0
M V30 4 C -1.925 -2.145 0 0
M V30 5 C -0.5913 -1.375 0 0
M V30 6 C -0.5913 0.165 0 0
M V30 7 C 0.8962 0.5636 0 0
M V30 8 C 2.2299 -0.2064 0 0
M V30 9 C 3.7174 0.1922 0 0
M V30 10 Leu 0.8962 2.1036 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 1 6
M V30 3 2 2 3
M V30 4 1 3 4
M V30 5 2 4 5
M V30 6 1 5 6
M V30 7 1 6 7
M V30 8 1 7 8
M V30 9 1 8 9
M V30 10 1 7 10
M V30 END BOND
M V30 END CTAB
M END
14 changes: 14 additions & 0 deletions src/test/org/openscience/cdk/io/MDLV2000ReaderTest.java
Expand Up @@ -56,6 +56,7 @@
import java.util.Properties;
import java.util.Set;


/**
* TestCase for the reading MDL mol files using one test file.
* A test case for SDF files is available as separate Class.
Expand Down Expand Up @@ -905,4 +906,17 @@ public void testAliasAtomNaming() throws Exception {
}
Assert.assertEquals(2, r1Count);
}

@Test
public void testPseudoAtomLabels() throws Exception {
InputStream in = ClassLoader.getSystemResourceAsStream("data/mdl/pseudoatoms.sdf");
MDLV2000Reader reader = new MDLV2000Reader(in);
IAtomContainer molecule = DefaultChemObjectBuilder.getInstance().newInstance(IAtomContainer.class);
molecule = reader.read(molecule);
Assert.assertTrue(molecule.getAtom(4) instanceof IPseudoAtom);
Assert.assertEquals("Gln", molecule.getAtom(4).getSymbol());
IPseudoAtom pa = (IPseudoAtom) molecule.getAtom(4);
Assert.assertEquals("Gln", pa.getLabel());
}

}
30 changes: 23 additions & 7 deletions src/test/org/openscience/cdk/io/MDLV2000WriterTest.java
Expand Up @@ -23,12 +23,6 @@
*/
package org.openscience.cdk.io;

import java.io.StringWriter;

import java.util.Properties;

import javax.vecmath.Point2d;
import javax.vecmath.Point3d;

import org.junit.Assert;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -57,6 +51,12 @@
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.templates.MoleculeFactory;

import javax.vecmath.Point2d;
import javax.vecmath.Point3d;
import java.io.StringWriter;
import java.io.InputStream;
import java.util.Properties;


/**
* TestCase for the writer MDL mol files using one test file.
Expand Down Expand Up @@ -358,5 +358,21 @@ public void testUnsupportedBondOrder() throws Exception {
mdlWriter.write(benzene);
Assert.assertTrue(writer.toString().indexOf("2 1 4 0 0 0 0") != -1);
}


@Test
public void testWritePseudoAtoms() throws Exception {
InputStream in = ClassLoader.getSystemResourceAsStream("data/mdl/pseudoatoms.sdf");
MDLV2000Reader reader = new MDLV2000Reader(in);
IAtomContainer molecule = DefaultChemObjectBuilder.getInstance().newInstance(IAtomContainer.class);
molecule = reader.read(molecule);

StringWriter writer = new StringWriter();
MDLV2000Writer mwriter = new MDLV2000Writer(writer);
mwriter.write(molecule);

String output = writer.toString();
Assert.assertTrue(output.indexOf("Gln") != -1);
Assert.assertTrue(output.indexOf("Leu") != -1);
}

}
14 changes: 14 additions & 0 deletions src/test/org/openscience/cdk/io/MDLV3000ReaderTest.java
Expand Up @@ -35,8 +35,10 @@
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.nonotify.NNMolecule;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.DefaultChemObjectBuilder;

/**
* TestCase for the reading MDL V3000 mol files using one test file.
Expand Down Expand Up @@ -91,4 +93,16 @@ public class MDLV3000ReaderTest extends SimpleChemObjectReaderTest {
}
}

@Test
public void testPseudoAtomLabels() throws Exception {
InputStream in = ClassLoader.getSystemResourceAsStream("data/mdl/pseudoatomsv3000.mol");
MDLV3000Reader reader = new MDLV3000Reader(in);
IAtomContainer molecule = DefaultChemObjectBuilder.getInstance().newInstance(IAtomContainer.class);
molecule = reader.read(molecule);
Assert.assertTrue(molecule.getAtom(9) instanceof IPseudoAtom);
Assert.assertEquals("Leu", molecule.getAtom(9).getSymbol());
IPseudoAtom pa = (IPseudoAtom) molecule.getAtom(9);
Assert.assertEquals("Leu", pa.getLabel());
}

}

0 comments on commit e59d204

Please sign in to comment.