Skip to content

Commit

Permalink
a faster ring search to identify the atoms and bonds which are member…
Browse files Browse the repository at this point in the history
…s of rings.

Conflicts:
	src/test/org/openscience/cdk/modulesuites/McoreTests.java

Change-Id: I044a11b41ad170dac31db0d541a2a9da63616f26
Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
  • Loading branch information
johnmay authored and egonw committed Feb 8, 2013
1 parent 6b8d8ca commit 7f96eea
Show file tree
Hide file tree
Showing 18 changed files with 3,408 additions and 2 deletions.
71 changes: 71 additions & 0 deletions src/main/org/openscience/cdk/ringsearch/CyclicVertexSearch.java
@@ -0,0 +1,71 @@
/*
* Copyright (C) 2012 John May <jwmay@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version. All we ask is that proper credit is given for our
* work, which includes - but is not limited to - adding the above copyright
* notice to the beginning of your source code files, and to any copyright
* notice that you may distribute with programs based on this work.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.ringsearch;

/**
* Describes a search to identify vertices which belong to elementary cycles and
* if those cycles are isolated or are part of a fused system. We define a cycle
* as isolated if it edge disjoint with all other cycles. This corresponds to
* the isolated and spiro rings of a chemical structures.
*
* @author John May
* @cdk.module core
*/
public interface CyclicVertexSearch {

/**
* Returns true if the vertex <i>v</i> is in a cycle.
*
* @param v a vertex identifier by index
* @return whether the vertex is in a cycle
*/
boolean cyclic(int v);

/**
* The set of cyclic vertices.
*
* @return the cyclic vertices of the molecule.
*/
int[] cyclic();

/**
* Construct the sets of vertices which belong to isolated cycles. Each row
* in the array describes a set of cyclic vertices which is edge disjoint
* with all other elementary cycles.
*
* @return vertices belonging to the isolated rings
*/
int[][] isolated();

/**
* Construct the sets of vertices which belong to fused cycle systems (share
* at least one edge). Each row in the array describes a set of vertices in
* a separate fused system. Each fused system is edge disjoint with every
* other fused system.
*
* @return vertices belonging to the fused cycles
*/
int[][] fused();

}

326 changes: 326 additions & 0 deletions src/main/org/openscience/cdk/ringsearch/JumboCyclicVertexSearch.java
@@ -0,0 +1,326 @@
/*
* Copyright (C) 2012 John May <jwmay@users.sf.net>
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version. All we ask is that proper credit is given for our
* work, which includes - but is not limited to - adding the above copyright
* notice to the beginning of your source code files, and to any copyright
* notice that you may distribute with programs based on this work.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.ringsearch;

import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;

/**
* CyclicVertexSearch for graphs with more then 64 vertices.
*
* @author John May
* @cdk.module core
*/
@TestClass("org.openscience.cdk.ringsearch.JumboCyclicVertexSearchTest")
class JumboCyclicVertexSearch implements CyclicVertexSearch {

/* graph representation */
private final int[][] g;

/* set of known cyclic vertices */
private final BitSet cyclic;

/* cycle systems as they are discovered */
private List<BitSet> cycles = new ArrayList<BitSet>(1);

/* indicates if the 'cycle' at 'i' in 'cycles' is fused */
private List<Boolean> fused = new ArrayList<Boolean>(1);

/* set of visited vertices */
private BitSet visited;

/* the vertices in our path at a given vertex index */
private BitSet[] state;

/**
* Create a new cyclic vertex search for the provided graph.
*
* @param graph adjacency list representation of a graph
*/
@TestMethod("testEmpty") JumboCyclicVertexSearch(int[][] graph) {
this.g = graph;
final int n = graph.length;

cyclic = new BitSet(n);

if (n == 0) return;

state = new BitSet[n];
visited = new BitSet(n);

BitSet empty = new BitSet(n);

// start from vertex 0
search(0, copy(empty), copy(empty));

// if g is a fragment we will not have visited everything
int v = 0;
while (visited.cardinality() != n) {
v++;
// each search expands to the whole fragment, as we
// may have fragments we need to visit 0 and then
// check every other vertex
if (!visited.get(v)) {
search(v, copy(empty), copy(empty));
}
}

// allow the states to be collected
state = null;
visited = null;

}

/**
* Perform a depth first search from the vertex <i>v</i>.
*
* @param v vertex to search from
* @param prev the state before we vistaed our parent (previous state)
* @param curr the current state (including our parent)
*/
private void search(int v, BitSet prev, BitSet curr) {

state[v] = curr; // set the state before we visit v
curr = copy(curr); // include v in our current state (state[v] is unmodified)
curr.set(v);
visited.or(curr); // mark v as visited (or being visited)

// for each neighbor w of v
for (int w : g[v]) {

// if w is in our prev state we have a cycle of size >3.
// we don't check out current state as this will always
// include w - they are adjacent
if (prev.get(w)) {
// we have a cycle, xor the state when we last visited 'w'
// with our current state. this set is all the vertices
// we visited since then
add(xor(state[w], curr));
}

// check w hasn't been visited or isn't being visited further up the stack.
// this mainly stops us re-visiting the vertex we came from
else if (!visited.get(w)) {
// recursively call for the neighbor 'w'
search(w, state[v], curr);
}
}

}

/**
* @inheritDoc
*/
@TestMethod("testCyclic_Int")
@Override
public boolean cyclic(int v) {
return cyclic.get(v);
}

/**
* @inheritDoc
*/
@TestMethod("testCyclic")
@Override public int[] cyclic() {
return toArray(cyclic);
}

/**
* @inheritDoc
*/
@TestMethod("testIsolated,testIsolated_NonCyclic,testIsolated_Empty," +
"testIsolated_Spiro,testIsolated_SpiroMedium," +
"testIsolated_Biphenyl,testIsolated_BenzylBenzene," +
"testIsolatedFragments")
@Override public int[][] isolated() {
List<int[]> isolated = new ArrayList<int[]>(cycles.size());
for (int i = 0; i < cycles.size(); i++) {
if (!fused.get(i))
isolated.add(toArray(cycles.get(i)));
}
return isolated.toArray(new int[isolated.size()][]);
}

/**
* @inheritDoc
*/
@TestMethod("testFused,testFused_BiocycloEdgeLinked," +
"testFused_BiocycloVertexLinked,testFused_Orthofused," +
"testFused_Biorthofused,testFused_Cylclophane," +
"testFused_Fullerene")
@Override public int[][] fused() {
List<int[]> fused = new ArrayList<int[]>(cycles.size());
for (int i = 0; i < cycles.size(); i++) {
if (this.fused.get(i))
fused.add(toArray(cycles.get(i)));
}
return fused.toArray(new int[fused.size()][]);
}

/**
* Add the cycle vertices to our discovered cycles. The cycle is first
* checked to see if it is isolated (shares at most one vertex) or
* <i>potentially</i> fused.
*
* @param cycle newly discovered cyclic vertex set
*/
private void add(BitSet cycle) {

BitSet intersect = and(cycle, cyclic);

if (intersect.cardinality() > 1) {
addFused(cycle);
} else {
addIsolated(cycle);
}

cyclic.or(cycle);

}

/**
* Add an a new isolated cycle which is currently edge disjoint with all
* other cycles.
*
* @param cycle newly discovered cyclic vertices
*/
private void addIsolated(BitSet cycle) {
cycles.add(cycle);
fused.add(false);
}

/**
* Adds a <i>potentially</i> fused cycle. If the cycle is discovered not be
* fused it will still be added as isolated.
*
* @param cycle vertex set of a potentially fused cycle, indicated by the
* set bits
*/
private void addFused(BitSet cycle) {

int i = indexOfFused(0, cycle);

if (i != -1) {
// add new cycle and mark as fused
cycles.get(i).or(cycle);
fused.set(i, true);
int j = i;

// merge other cycles we could be fused with into 'i'
while ((j = indexOfFused(j + 1, cycle)) != -1) {
cycles.get(i).or(cycles.remove(j));
fused.remove(j);
j--;
}
} else {
addIsolated(cycle);
}

}

/**
* Find the next index that the <i>cycle</i> intersects with by at least two
* vertices. If the intersect of a vertex set with another contains more
* then two vertices it cannot be edge disjoint.
*
* @param start start searching from here
* @param cycle test whether any current cycles are fused with this one
* @return the index of the first fused after 'start', -1 if none
*/
private int indexOfFused(int start, BitSet cycle) {
for (int i = start; i < cycles.size(); i++) {
if (and(cycles.get(i), cycle).cardinality() > 1) {
return i;
}
}
return -1;
}

/**
* Convert the set bits of a BitSet to an int[].
*
* @param set input with 0 or more set bits
* @return the bits which are set in the input
*/
@TestMethod("testToArray,testToArray_Empty")
static int[] toArray(BitSet set) {
int[] vertices = new int[set.cardinality()];
int i = 0;

// fill the cyclic vertices with the bits that have been set
for (int v = 0; i < vertices.length; v++) {
if (set.get(v)) {
vertices[i++] = v;
}
}

return vertices;
}

/**
* XOR the to bit sets together and return the result. Neither input is
* modified.
*
* @param x first bit set
* @param y second bit set
* @return the XOR of the two bit sets
*/
@TestMethod("testXor")
static BitSet xor(BitSet x, BitSet y) {
BitSet z = copy(x);
z.xor(y);
return z;
}

/**
* AND the to bit sets together and return the result. Neither input is
* modified.
*
* @param x first bit set
* @param y second bit set
* @return the AND of the two bit sets
*/
@TestMethod("testAnd") static BitSet and(BitSet x, BitSet y) {
BitSet z =
copy(x);
z.and(y);
return z;
}

/**
* Copy the original bit set.
*
* @param org input bit set
* @return copy of the input
*/
@TestMethod("testCopy")
static BitSet copy(BitSet org) {
BitSet cpy = (BitSet) org.clone();
return cpy;
}

}

0 comments on commit 7f96eea

Please sign in to comment.