Skip to content

Commit

Permalink
Added SMILES support to createMoleculeIterator
Browse files Browse the repository at this point in the history
  • Loading branch information
jonalv committed Jun 19, 2012
1 parent b9499c9 commit 243845c
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 8 deletions.
3 changes: 2 additions & 1 deletion plugins/net.bioclipse.cdk.business.test/build.properties
Expand Up @@ -9,5 +9,6 @@
source.. = src/
output.. = bin/
bin.includes = META-INF/,\
.
.,\
bin/testFiles/
source.. = src/
Expand Up @@ -581,6 +581,29 @@ public void testCreatingMoleculeIterator()

assertEquals( 2, molecules.size() );
}

@Test
public void testCreatingMoleculeIteratorSMILES()
throws CoreException,
URISyntaxException, MalformedURLException, IOException,
BioclipseException {

URI uri = getClass().getResource("/testFiles/testsmi2sdf.smi").toURI();
URL url = FileLocator.toFileURL(uri.toURL());
String path=url.getFile();

List<ICDKMolecule> molecules = new ArrayList<ICDKMolecule>();

for ( Iterator<net.bioclipse.cdk.domain.ICDKMolecule> iterator
= cdk.createMoleculeIterator(path);
iterator.hasNext(); ) {
molecules.add( iterator.next() );
}

assertEquals( 8, molecules.size() );
assertEquals( 10, molecules.get(0).getAtomContainer()
.getProperties().size() );
}

@Test
public void testFingerPrintMatch() throws BioclipseException {
Expand Down
3 changes: 2 additions & 1 deletion plugins/net.bioclipse.cdk.business/META-INF/MANIFEST.MF
Expand Up @@ -40,7 +40,8 @@ Require-Bundle: org.openscience.cdk.io,
org.openscience.cdk.fingerprint,
net.bioclipse.inchi,
net.bioclipse.ui.business,
net.sf.cglib
net.sf.cglib,
org.openscience.cdk.silent;bundle-version="1.4.10"
Bundle-ActivationPolicy: lazy
Export-Package: net.bioclipse.cdk.business,
net.bioclipse.cdk.domain,
Expand Down
Expand Up @@ -28,6 +28,7 @@
import java.io.StringWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -36,6 +37,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.Random;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -146,6 +148,7 @@
import org.openscience.cdk.nonotify.NNMolecule;
import org.openscience.cdk.nonotify.NNMoleculeSet;
import org.openscience.cdk.nonotify.NoNotificationChemObjectBuilder;
import org.openscience.cdk.silent.SilentChemObjectBuilder;
import org.openscience.cdk.similarity.Tanimoto;
import org.openscience.cdk.smiles.DeduceBondSystemTool;
import org.openscience.cdk.smiles.SmilesGenerator;
Expand All @@ -160,13 +163,18 @@
import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
import org.xmlcml.cml.element.CMLAtomType;

import sun.tools.tree.ThisExpression;

/**
* The manager class for CDK. Contains CDK related methods.
*
* @author olas, jonalv
*/
public class CDKManager implements IBioclipseManager {




private static final Logger logger = Logger.getLogger(CDKManager.class);

// ReaderFactory used to instantiate IChemObjectReaders
Expand Down Expand Up @@ -1050,26 +1058,203 @@ public List<ICDKMolecule> moleculesFromString( String s )
);
}

private boolean isSMILESFile( IFile file ) {

if ( !Arrays.asList( SMILESFormat.getInstance().getNameExtensions() )
.contains( file.getFileExtension() ) ) {
return false;
}

try {
BufferedReader buf
= new BufferedReader(
new InputStreamReader(
new BufferedInputStream( file.getContents() )));

String separator = determineSeparator( buf.readLine() );
/*
* First line is either header or SMILES anything is ok. Let's
* check next line.
*/
String secondLine = buf.readLine();

if ( separator == null ) {
try {
fromSMILES(secondLine);
} catch ( BioclipseException e ) {
// It was not SMILES so it can't be a SMILES file
return false;
}
}
else {
try {
fromSMILES( secondLine.split( separator )[0] );
}
catch ( BioclipseException e ) {
// It was not SMILES so it can't be a SMILES file
return false;
}
}
/*
* We have managed to read a SMILES where we expect a SMILES so
* let's assume this is a SMILES file.
*/
return true;
} catch ( CoreException e ) {
LogUtils.debugTrace( logger, e );
} catch ( IOException e ) {
LogUtils.debugTrace( logger, e );
}

return false;
}

public Iterator<ICDKMolecule>
createMoleculeIterator( IFile file, IProgressMonitor monitor)
throws CoreException, IOException, BioclipseException {

IChemFormat format = determineIChemFormat(file);
if (format == SMILESFormat.getInstance()) {
return null;
//TODO: Implement IteratingBioclipseSMILESReader

if (isSMILESFile(file)) {
return new IteratingBioclipseSMILESReader(
file.getContents(),
SilentChemObjectBuilder.getInstance(),
monitor );
}
else if (format == null) {
throw new BioclipseException("Unsupported format for file: " + file.getName());
}

return new IteratingBioclipseMDLReader(
file.getContents(),
NoNotificationChemObjectBuilder.getInstance(),
SilentChemObjectBuilder.getInstance(),
monitor );
}
}


public class IteratingBioclipseSMILESReader
implements Iterator<ICDKMolecule> {

InputStream contents;
IChemObjectBuilder moleculeBuilder;
IProgressMonitor monitor;
ICDKMolecule cache = null;
BufferedReader reader;
String separator;
String[] headers;

public IteratingBioclipseSMILESReader( InputStream contents,
IChemObjectBuilder instance,
IProgressMonitor monitor) {

this.contents = contents;
this.moleculeBuilder = instance;
this.monitor = monitor;
this.reader = new BufferedReader(
new InputStreamReader(
new BufferedInputStream(contents) ) );
String firstLine = null;
try {
firstLine = reader.readLine();
}
catch ( IOException e ) {
throw new RuntimeException("Could not read file", e);
}
separator = determineSeparator(firstLine);

// Assume first line is header
if ( separator == null) {
// no separator so assuming only a SMILES string on each row
headers = new String[] {"smiles"};
}
else {
headers = firstLine.split(separator);
}

// Confirm first line seems to be headers
try {
ICDKMolecule mol
= fromSMILES( headers.length == 1 ? firstLine
: headers[0] );
// first line not headers. Now handle any properties.
String[] values = headers;
int cols = headers.length;
if ( cols >= 1 ) { headers[0] = "smiles"; }
if ( cols >= 2 ) { headers[1] = "identifier"; }
if ( cols >= 3 ) {
for ( int i = 2 ; i < cols ; i++ ) {
headers[i] = "p"+i;
}
}
for ( int i = 1 ; i<values.length ; i++ ) {
mol.setProperty( headers[i], values[i] );
}
cache = mol;
}
catch (BioclipseException e) {
// Well it's not a SMILES so assume it is headers
}
}

private ICDKMolecule getNext() {
ICDKMolecule mol = null;
try {
String line = reader.readLine();
if ( line == null ) { return null; }
if (separator != null) {
String[] cols = line.split( separator );
mol = fromSMILES( cols[0] );
for ( int i = 1 ; i<cols.length ; i++ ) {
mol.setProperty( headers[i], cols[i] );
}
}
else {
mol = fromSMILES( line );
}
}
catch (BioclipseException e) {
throw new RuntimeException(e.getMessage(), e);
}
catch ( IOException e ) {
LogUtils.debugTrace( logger, e );
}
return mol;
}

@Override
public boolean hasNext() {
if ( cache != null ) {
return true;
}
cache = getNext();
return cache != null;
}

@Override
public ICDKMolecule next() {
ICDKMolecule result;
if ( cache != null ) {
result = cache;
cache = null;
}
else {
result = getNext();
if (result == null) {
throw new NoSuchElementException();
}
}
return result;
}

@Override
public void remove() {

throw new UnsupportedOperationException("Remove is not supported");
}

}

static class IteratingBioclipseMDLReader
static class IteratingBioclipseMDLReader
implements Iterator<ICDKMolecule> {

IteratingMDLReader reader;
Expand Down

0 comments on commit 243845c

Please sign in to comment.