Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Names and synonyms are now generated using Groovy / Open RDF
  • Loading branch information
egonw committed Apr 13, 2012
1 parent 273f792 commit 667bd40
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .gitignore
@@ -1,3 +1,3 @@
vars.php
vars.properties
*~
mysql*.jar
2 changes: 1 addition & 1 deletion README.markdown
Expand Up @@ -59,7 +59,7 @@ These scripts were tested against version 13 of ChEMBL, as downloaded from:

# Requirements

ChEMBL 13 and the MySQL JDBC plugin.
ChEMBL 13, OpenRDF (aka Sesame), SLF4J, and the MySQL JDBC plugin.

# Installation

Expand Down
45 changes: 45 additions & 0 deletions compounds.groovy
@@ -0,0 +1,45 @@
import groovy.sql.Sql
import org.openrdf.repository.Repository
import org.openrdf.repository.sail.SailRepository
import org.openrdf.sail.memory.MemoryStore
import org.openrdf.model.vocabulary.RDFS
import org.openrdf.model.vocabulary.RDF
import org.openrdf.rio.ntriples.NTriplesWriter

// export CLASSPATH=$(JARS=(*.jar); IFS=:; echo "${JARS[*]}")

def props = new Properties()
new File("vars.properties").withInputStream { stream -> props.load(stream) }

def url = "jdbc:mysql://localhost/" + props.dbprefix + props.version
def sql = Sql.newInstance(url, props.user, props.pwd, "com.mysql.jdbc.Driver")

allMolregno = "SELECT DISTINCT molregno FROM compound_records " + props.limit

sql.eachRow(allMolregno) { row ->
def repos = new SailRepository(new MemoryStore())
repos.initialize()
con = repos.getConnection();
factory = repos.getValueFactory();

molURI = factory.createURI("http://data.kasabi.com/dataset/chembl-rdf/molecule/m" + row.molregno)

// the names
allNames = "SELECT DISTINCT compound_name FROM compound_records WHERE molregno = " + row.molregno
sql.eachRow(allNames) { nameRow ->
if (nameRow['compound_name'] != null) {
con.add(molURI, RDFS.LABEL, factory.createLiteral(nameRow['compound_name']))
}
}

// the synonyms
allNames = "SELECT DISTINCT synonyms FROM molecule_synonyms WHERE molregno = " + row.molregno
sql.eachRow(allNames) { nameRow ->
if (nameRow['synonyms'] != null) {
con.add(molURI, RDFS.LABEL, factory.createLiteral(nameRow['synonyms']))
}
}

con.export(new NTriplesWriter(System.out))
con.close()
}
14 changes: 0 additions & 14 deletions compounds.php
Expand Up @@ -20,13 +20,6 @@
$molregno = $row['molregno'];
$molecule = $MOL . "m" . $molregno;

# get the compound names
$names = mysql_query("SELECT DISTINCT compound_name FROM compound_records WHERE molregno = $molregno");
while ($nameRow = mysql_fetch_assoc($names)) {
if ($nameRow['compound_name'])
echo data_triple( $molecule, $RDFS . "label", str_replace("\"", "\\\"", $nameRow['compound_name']) );
}

# get the literature references
$refs = mysql_query("SELECT DISTINCT doc_id FROM compound_records WHERE molregno = $molregno");
while ($refRow = mysql_fetch_assoc($refs)) {
Expand Down Expand Up @@ -95,13 +88,6 @@
}
}

# get the synonyms
$names = mysql_query("SELECT DISTINCT synonyms FROM molecule_synonyms WHERE molregno = $molregno");
while ($name = mysql_fetch_assoc($names)) {
if ($name['synonyms'])
echo data_triple( $molecule, $RDFS . "label", str_replace("\"", "\\\"", $name['synonyms']) );
}

# get parent/child information
$hierarchies = mysql_query("SELECT DISTINCT * FROM molecule_hierarchy WHERE molregno = $molregno");
while ($hierarchy = mysql_fetch_assoc($hierarchies)) {
Expand Down
Binary file added mysql.jar
Binary file not shown.
Binary file added openrdf-sesame-2.6.5-onejar.jar
Binary file not shown.
Binary file added slf4j-api.jar
Binary file not shown.
Binary file added slf4j-simple.jar
Binary file not shown.
8 changes: 8 additions & 0 deletions vars.properties.example
@@ -0,0 +1,8 @@
version=13
rooturi=http://data.kasabi.com/dataset/chembl-rdf/

dbprefix=chembl_
user=user
pwd=secret

limit=LIMIT 1

0 comments on commit 667bd40

Please sign in to comment.