Skip to content

Commit

Permalink
Merge pull request #241 from cdk/patch/abbr_rendering
Browse files Browse the repository at this point in the history
Patch/abbr rendering
  • Loading branch information
egonw committed Oct 7, 2016
2 parents aee807e + 2b83fa4 commit a79da5d
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 32 deletions.
118 changes: 92 additions & 26 deletions app/depict/src/main/java/org/openscience/cdk/depict/Abbreviations.java
Expand Up @@ -24,6 +24,7 @@
package org.openscience.cdk.depict;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Multimap;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Elements;
Expand Down Expand Up @@ -54,6 +55,7 @@
import org.openscience.cdk.smiles.SmilesGenerator;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import uk.ac.ebi.beam.Element;

import java.io.BufferedReader;
import java.io.File;
Expand All @@ -64,6 +66,7 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
Expand Down Expand Up @@ -124,6 +127,11 @@ public class Abbreviations implements Iterable<String> {

private static final int MAX_FRAG = 50;

/**
* Symbol for joining disconnected fragments.
*/
private static final String INTERPUNCT = "·";

private final Map<String, String> connectedAbbreviations = new LinkedHashMap<>();
private final Map<String, String> disconnectedAbbreviations = new LinkedHashMap<>();
private final Set<String> labels = new LinkedHashSet<>();
Expand Down Expand Up @@ -349,38 +357,54 @@ public List<Sgroup> generate(final IAtomContainer mol) {
sgroup.addAtom(atom);
return Collections.singletonList(sgroup);
} else if (cansmi.contains(".")) {
List<Sgroup> newSgroups = new ArrayList<>();
List<Sgroup> complexAbbr = new ArrayList<>(4); // e.g. NEt3
List<Sgroup> simpleAbbr = new ArrayList<>(4); // e.g. HCl
for (IAtomContainer part : ConnectivityChecker.partitionIntoMolecules(mol).atomContainers()) {
cansmi = usmigen.create(part);
label = disconnectedAbbreviations.get(cansmi);
if (label != null && !disabled.contains(label)) {
Sgroup sgroup = new Sgroup();
sgroup.setType(SgroupType.CtabAbbreviation);
sgroup.setSubscript(label);
for (IAtom atom : part.atoms())
if (part.getAtomCount() == 1) {
IAtom atom = part.getAtom(0);
label = getBasicElementSymbol(atom);
if (label != null) {
Sgroup sgroup = new Sgroup();
sgroup.setType(SgroupType.CtabAbbreviation);
sgroup.setSubscript(label);
sgroup.addAtom(atom);
newSgroups.add(sgroup);
simpleAbbr.add(sgroup);
}
} else {
cansmi = usmigen.create(part);
label = disconnectedAbbreviations.get(cansmi);
if (label != null && !disabled.contains(label)) {
Sgroup sgroup = new Sgroup();
sgroup.setType(SgroupType.CtabAbbreviation);
sgroup.setSubscript(label);
for (IAtom atom : part.atoms())
sgroup.addAtom(atom);
complexAbbr.add(sgroup);
}
}
}
if (!newSgroups.isEmpty()) {
// merge together
if (newSgroups.size() > 1) {
if (!complexAbbr.isEmpty()) {
// merge together the abbreviations, iff there is at least
// one complex abbr
if (complexAbbr.size() > 0 &&
complexAbbr.size() + simpleAbbr.size() > 1) {
Sgroup combined = new Sgroup();
label = null;
for (Sgroup sgroup : newSgroups) {
complexAbbr.addAll(simpleAbbr);
for (Sgroup sgroup : complexAbbr) {
if (label == null)
label = sgroup.getSubscript();
else
label += "/" + sgroup.getSubscript();
label += INTERPUNCT + sgroup.getSubscript();
for (IAtom atom : sgroup.getAtoms())
combined.addAtom(atom);
}
combined.setSubscript(label);
combined.setType(SgroupType.CtabAbbreviation);
newSgroups.clear();
newSgroups.add(combined);
complexAbbr.clear();
complexAbbr.add(combined);
}
return newSgroups;
return complexAbbr;
}
}

Expand Down Expand Up @@ -471,6 +495,7 @@ else if (attachBond.getAtom(1) == atom)
nbrSymbols.add(sgroup.getSubscript());
todelete.add(sgroup);
}
int numSGrpNbrs = nbrSymbols.size();
for (IBond bond : mol.getConnectedBondsList(attach)) {
if (!xbonds.contains(bond)) {
IAtom nbr = bond.getConnectedAtom(attach);
Expand All @@ -483,7 +508,7 @@ else if (attachBond.getAtom(1) == atom)
hcount++;
xatoms.add(nbr);
} else if (nbr.getAtomicNumber() > 0){
nbrSymbols.add(newSymbol(nbr.getAtomicNumber(), nbr.getImplicitHydrogenCount()));
nbrSymbols.add(newSymbol(nbr.getAtomicNumber(), nbr.getImplicitHydrogenCount(), false));
xatoms.add(nbr);
}
} else {
Expand All @@ -492,12 +517,17 @@ else if (attachBond.getAtom(1) == atom)
}
}

if (newbonds.size() < 1 || newbonds.size() > 3 || nbrSymbols.isEmpty())
// reject if no symbols
// reject if no bonds (<1), except if all symbols are identical... (HashSet.size==1)
// reject if more that 2 bonds
if (nbrSymbols.isEmpty() ||
newbonds.size() < 1 && (new HashSet<>(nbrSymbols).size() != 1) ||
newbonds.size() > 2)
continue;

// create the symbol
StringBuilder sb = new StringBuilder();
sb.append(newSymbol(attach.getAtomicNumber(), hcount));
sb.append(newSymbol(attach.getAtomicNumber(), hcount, newbonds.size() == 0));
String prev = null;
int count = 0;
Collections.sort(nbrSymbols, new Comparator<String>() {
Expand Down Expand Up @@ -567,16 +597,25 @@ private boolean digitAtEnd(String str) {
return Character.isDigit(str.charAt(str.length()-1));
}

private String newSymbol(int atomnum, int hcount) {
private String newSymbol(int atomnum, int hcount, boolean prefix) {
StringBuilder sb = new StringBuilder();
Elements elem = Elements.ofNumber(atomnum);
if (elem == Elements.Carbon && hcount == 3)
return "Me";
sb.append(elem.symbol());
if (hcount > 0) {
sb.append('H');
if (hcount > 1)
sb.append(hcount);
if (prefix) {
if (hcount > 0) {
sb.append('H');
if (hcount > 1)
sb.append(hcount);
}
sb.append(elem.symbol());
} else {
sb.append(elem.symbol());
if (hcount > 0) {
sb.append('H');
if (hcount > 1)
sb.append(hcount);
}
}
return sb.toString();
}
Expand Down Expand Up @@ -783,6 +822,33 @@ private static String getSmilesSuffix(String line) {
return "";
}

private static String getBasicElementSymbol(IAtom atom) {
if (atom.getFormalCharge() != null && atom.getFormalCharge() != 0)
return null;
if (atom.getMassNumber() != null && atom.getMassNumber() != 0)
return null;
if (atom.getAtomicNumber() == null || atom.getAtomicNumber() < 1)
return null;
Integer hcnt = atom.getImplicitHydrogenCount();
if (hcnt == null) return null;
Elements elem = Elements.ofNumber(atom.getAtomicNumber());
final String hsym = (hcnt > 0) ? ((hcnt > 1) ? ("H" + hcnt) : "H") : "";
// see HydrogenPosition for canonical list
switch (elem) {
case Oxygen:
case Sulfur:
case Selenium:
case Tellurium:
case Fluorine:
case Chlorine:
case Bromine:
case Iodine:
return hsym + elem.symbol();
default:
return elem.symbol() + hsym;
}
}

private int loadSmiles(final InputStream in) throws IOException {
int count = 0;
try (BufferedReader brdr = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
Expand Down
Expand Up @@ -199,7 +199,7 @@ public void dontOverwriteExistingSgroups() throws Exception {
IAtomContainer mol = smi("c1ccccc1N(Cl)C(=O)OC(C)(C)C");
List<Sgroup> sgroups = factory.generate(mol);
assertThat(sgroups.size(), is(1));
assertThat(sgroups.get(0).getSubscript(), is("N(Cl)Boc"));
assertThat(sgroups.get(0).getSubscript(), is("NClBoc"));
assertThat(sgroups.get(0).getBonds().size(), is(1));
assertThat(sgroups.get(0).getAtoms().size(), is(9));
}
Expand Down Expand Up @@ -247,6 +247,31 @@ public void dontOverwriteExistingSgroups() throws Exception {
assertThat(sgroups.get(1).getSubscript(), is("SO3-"));
}

@Test public void hclSaltOfEdci() throws Exception {
Abbreviations factory = new Abbreviations();
factory.add("CCN=C=NCCCN(C)C EDCI");
IAtomContainer mol = smi("CCN=C=NCCCN(C)C.Cl");
List<Sgroup> sgroups = factory.generate(mol);
assertThat(sgroups.size(), is(1));
assertThat(sgroups.get(0).getSubscript(), is("EDCI·HCl"));
}

@Test public void SnCl2() throws Exception {
Abbreviations factory = new Abbreviations();
IAtomContainer mol = smi("Cl[Sn]Cl");
List<Sgroup> sgroups = factory.generate(mol);
assertThat(sgroups.size(), is(1));
assertThat(sgroups.get(0).getSubscript(), is("SnCl2"));
}

@Test public void HOOH() throws Exception {
Abbreviations factory = new Abbreviations();
IAtomContainer mol = smi("OO");
List<Sgroup> sgroups = factory.generate(mol);
assertThat(sgroups.size(), is(1));
assertThat(sgroups.get(0).getSubscript(), is("HOOH"));
}

@Test
public void loadFromFile() throws Exception {
Abbreviations factory = new Abbreviations();
Expand Down
Expand Up @@ -104,7 +104,7 @@ final class AbbreviationLabel {
"Oct", "Octyl",
"PAB", "Pentyl", "Ph", "Phenyl", "Pivaloyl", "PMB", "Pro", "Propargyl", "Propyl", "Pv",
"R", "SEM",
"T", "TBDMS", "TBDPS", "TES", "Tf", "THP", "THPO", "TIPS", "TMS", "Tos", "Tol", "Tosyl", "Tr", "Troc",
"T", "TBDMS", "Trt", "TBDPS", "TES", "Tf", "THP", "THPO", "TIPS", "TMS", "Tos", "Tol", "Tosyl", "Tr", "Troc",
"Vinyl", "Voc", "Z"};

private static Trie PREFIX_TRIE = new Trie();
Expand Down Expand Up @@ -191,7 +191,7 @@ static boolean parse(String label, List<String> tokens) {
continue;
}

if (c == '/') {
if (c == '/' || c == '·') {
tokens.add(Character.toString(c));
i++;
continue;
Expand Down
Expand Up @@ -173,6 +173,27 @@ public void formatTBu() {
assertThat(texts.get(1).style, is(AbbreviationLabel.STYLE_NORMAL));
}

@Test
public void NEt3DotHCl() {
List<String> tokens = new ArrayList<>();
assertTrue(AbbreviationLabel.parse("NEt3·HCl", tokens));
assertThat(tokens.size(), is(5));
assertThat(tokens.get(0), is("N"));
assertThat(tokens.get(1), is("Et3"));
assertThat(tokens.get(2), is("·"));
assertThat(tokens.get(3), is("H"));
assertThat(tokens.get(4), is("Cl"));
List<AbbreviationLabel.FormattedText> formatted = AbbreviationLabel.format(tokens);
AbbreviationLabel.reduce(formatted, 0, formatted.size());
assertThat(formatted.size(), is(3));
assertThat(formatted.get(0).text, is("NEt"));
assertThat(formatted.get(0).style, is(AbbreviationLabel.STYLE_NORMAL));
assertThat(formatted.get(1).text, is("3"));
assertThat(formatted.get(1).style, is(AbbreviationLabel.STYLE_SUBSCRIPT));
assertThat(formatted.get(2).text, is("·HCl"));
assertThat(formatted.get(2).style, is(AbbreviationLabel.STYLE_NORMAL));
}

@Test
public void formatOPO3H2() {
List<String> tokens = Arrays.asList("O", "P", "O3", "H2");
Expand Down

0 comments on commit a79da5d

Please sign in to comment.