Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Add a manager for jsoup
An example: htmlFilename = "/ECHA/substance.html"; dossierFolder = "http://apps.echa.europa.eu/registered/data/dossiers/"; testURL = dossierFolder + "DISS-9d8ad2a1-0d51-13f7-e044-00144f67d249/AGGR-7e14e77e-8a7d-4f8d-ab80-d0b7df6bb939_DISS-9d8ad2a1-0d51-13f7-e044-00144f67d249.html"; htmlFile = bioclipse.downloadAsFile(testURL, "text/html", htmlFilename); doc = jsoup.parse(htmlFilename); id = jsoup.select(doc, "#body .structuralFormula img").attr("src").split("_")[0]; name = jsoup.select(doc, "#body .name .value").text() ecNumber = jsoup.select(doc, "#body .ecNumber .value").text() casNumber = jsoup.select(doc, "#body .casNumber .value").text() molecularFormula = jsoup.select(doc, "#body .molecularFormula .value").first().text() iupacName = jsoup.select(doc, "#body .iupacName .value").text() smiles = jsoup.select(doc, "#body .smilesNotation .value span").text() inchi = jsoup.select(doc, "#body .inchi .value span").text()
- Loading branch information
Showing
14 changed files
with
452 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<classpath> | ||
<classpathentry exported="true" kind="lib" path="lib/jsoup-1.8.3.jar"/> | ||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/> | ||
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/> | ||
<classpathentry kind="src" path="src"/> | ||
<classpathentry kind="output" path="bin"/> | ||
</classpath> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<projectDescription> | ||
<name>net.bioclipse.jsoup.business</name> | ||
<comment></comment> | ||
<projects> | ||
</projects> | ||
<buildSpec> | ||
<buildCommand> | ||
<name>org.eclipse.jdt.core.javabuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
<buildCommand> | ||
<name>org.eclipse.pde.ManifestBuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
<buildCommand> | ||
<name>org.eclipse.pde.SchemaBuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
</buildSpec> | ||
<natures> | ||
<nature>org.eclipse.pde.PluginNature</nature> | ||
<nature>org.eclipse.jdt.core.javanature</nature> | ||
</natures> | ||
</projectDescription> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
Manifest-Version: 1.0 | ||
Bundle-ManifestVersion: 2 | ||
Bundle-Name: Manager for the JSoup API | ||
Bundle-SymbolicName: net.bioclipse.jsoup.business;singleton:=true | ||
Bundle-Version: 2.6.2.qualifier | ||
Bundle-Activator: net.bioclipse.jsoup.business.Activator | ||
Bundle-Vendor: The Bioclipse Team | ||
Export-Package: net.bioclipse.jsoup.business, net.bioclipse.jsoup.business.business | ||
Require-Bundle: org.eclipse.ui, | ||
org.eclipse.core.runtime, | ||
net.bioclipse.core, | ||
net.bioclipse.scripting, | ||
org.springframework.bundle.spring.aop, | ||
net.sf.cglib, | ||
org.springframework.osgi.aopalliance.osgi, | ||
org.eclipse.core.resources, | ||
net.bioclipse.business, | ||
net.bioclipse.ui.business | ||
Bundle-RequiredExecutionEnvironment: JavaSE-1.7 | ||
Import-Package: org.apache.log4j | ||
Bundle-ActivationPolicy: lazy | ||
Bundle-ClassPath: ., | ||
lib/jsoup-1.8.3.jar |
68 changes: 68 additions & 0 deletions
68
plugins/net.bioclipse.jsoup.business/META-INF/spring/context.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
|
||
<beans xmlns="http://www.springframework.org/schema/beans" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xmlns:osgi="http://www.springframework.org/schema/osgi" | ||
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd | ||
http://www.springframework.org/schema/osgi http://www.springframework.org/schema/osgi/spring-osgi.xsd"> | ||
|
||
<osgi:service id="javaJSoupManagerOSGI" | ||
ref="javaJSoupManager" | ||
interface="net.bioclipse.jsoup.business.business.IJavaJSoupManager" /> | ||
|
||
<osgi:service id="javaScriptJSoupManagerOSGI" | ||
ref="javaScriptJSoupManager" | ||
interface="net.bioclipse.jsoup.business.business.IJavaScriptJSoupManager" /> | ||
|
||
<osgi:reference id="recordingAdvice" | ||
interface="net.bioclipse.recording.IRecordingAdvice" /> | ||
|
||
<osgi:reference id="javaManagerDispatcherAdvisor" | ||
interface="net.bioclipse.managers.business.IJavaManagerDispatcherAdvisor" /> | ||
|
||
<osgi:reference id="javaScriptManagerDispatcherAdvisor" | ||
interface="net.bioclipse.managers.business.IJavaScriptManagerDispatcherAdvisor" /> | ||
|
||
<osgi:reference id="wrapInProxyAdvice" | ||
interface="net.bioclipse.recording.IWrapInProxyAdvice" /> | ||
|
||
<bean id="recordingAdvisor" | ||
class="org.springframework.aop.support.RegexpMethodPointcutAdvisor"> | ||
<property name="advice" ref="recordingAdvice" /> | ||
<property name="pattern" value=".*" /> <!-- See also class implementation --> | ||
</bean> | ||
|
||
<bean id="JSoupManagerTarget" | ||
class="net.bioclipse.jsoup.business.business.JSoupManager"> | ||
</bean> | ||
|
||
<bean id="javaScriptJSoupManager" | ||
class="org.springframework.aop.framework.ProxyFactoryBean"> | ||
<property name="target" | ||
ref="JSoupManagerTarget" /> | ||
<property name="proxyInterfaces" | ||
value="net.bioclipse.jsoup.business.business.IJavaScriptJSoupManager" /> | ||
<property name="interceptorNames" > | ||
<list> | ||
<value>recordingAdvisor</value> | ||
<value>wrapInProxyAdvice</value> | ||
<value>javaScriptManagerDispatcherAdvisor</value> | ||
</list> | ||
</property> | ||
</bean> | ||
|
||
<bean id="javaJSoupManager" | ||
class="org.springframework.aop.framework.ProxyFactoryBean"> | ||
<property name="target" | ||
ref="JSoupManagerTarget" /> | ||
<property name="proxyInterfaces" | ||
value="net.bioclipse.jsoup.business.business.IJavaJSoupManager" /> | ||
<property name="interceptorNames" > | ||
<list> | ||
<value>recordingAdvisor</value> | ||
<value>wrapInProxyAdvice</value> | ||
<value>javaManagerDispatcherAdvisor</value> | ||
</list> | ||
</property> | ||
</bean> | ||
</beans> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
source.. = src/ | ||
output.. = bin/ | ||
bin.includes = plugin.xml,\ | ||
META-INF/,\ | ||
.,\ | ||
lib/jsoup-1.8.3.jar | ||
source.. = src/ |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<?eclipse version="3.4"?> | ||
<plugin> | ||
|
||
<extension | ||
point="net.bioclipse.scripting.contribution"> | ||
<scriptContribution | ||
service="net.bioclipse.jsoup.business.business.JSoupManagerFactory" | ||
id="net.bioclipse.jsoup.business.business.JSoupManager"> | ||
</scriptContribution> | ||
</extension> | ||
|
||
</plugin> |
105 changes: 105 additions & 0 deletions
105
plugins/net.bioclipse.jsoup.business/src/net/bioclipse/jsoup/business/Activator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2015 Egon Willighagen <egon.willighagen@gmail.com> | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Public License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v10.html | ||
* | ||
* Contact: http://www.bioclipse.net/ | ||
******************************************************************************/ | ||
package net.bioclipse.jsoup.business; | ||
|
||
import net.bioclipse.jsoup.business.business.IJSoupManager; | ||
import net.bioclipse.jsoup.business.business.IJavaJSoupManager; | ||
import net.bioclipse.jsoup.business.business.IJavaScriptJSoupManager; | ||
|
||
import org.apache.log4j.Logger; | ||
import org.eclipse.ui.plugin.AbstractUIPlugin; | ||
import org.osgi.framework.BundleContext; | ||
import org.osgi.util.tracker.ServiceTracker; | ||
|
||
/** | ||
* The Activator class controls the plug-in life cycle | ||
*/ | ||
public class Activator extends AbstractUIPlugin { | ||
|
||
private static final Logger logger = Logger.getLogger(Activator.class); | ||
|
||
// The shared instance | ||
private static Activator plugin; | ||
|
||
// Trackers for getting the managers | ||
private ServiceTracker javaFinderTracker; | ||
private ServiceTracker jsFinderTracker; | ||
|
||
public Activator() { | ||
} | ||
|
||
public void start(BundleContext context) throws Exception { | ||
super.start(context); | ||
plugin = this; | ||
javaFinderTracker | ||
= new ServiceTracker( context, | ||
IJavaJSoupManager.class.getName(), | ||
null ); | ||
|
||
javaFinderTracker.open(); | ||
jsFinderTracker | ||
= new ServiceTracker( context, | ||
IJavaScriptJSoupManager.class.getName(), | ||
null ); | ||
|
||
jsFinderTracker.open(); | ||
} | ||
|
||
public void stop(BundleContext context) throws Exception { | ||
plugin = null; | ||
super.stop(context); | ||
} | ||
|
||
/** | ||
* Returns the shared instance | ||
* | ||
* @return the shared instance | ||
*/ | ||
public static Activator getDefault() { | ||
return plugin; | ||
} | ||
|
||
public IJSoupManager getJavaJSoupManager() { | ||
IJSoupManager manager = null; | ||
try { | ||
manager = (IJSoupManager) | ||
javaFinderTracker.waitForService(1000*10); | ||
} | ||
catch (InterruptedException e) { | ||
throw new IllegalStateException( | ||
"Could not get the Java JSoupManager", | ||
e ); | ||
} | ||
if (manager == null) { | ||
throw new IllegalStateException( | ||
"Could not get the Java JSoupManager"); | ||
} | ||
return manager; | ||
} | ||
|
||
public IJavaScriptJSoupManager getJavaScriptJSoupManager() { | ||
IJavaScriptJSoupManager manager = null; | ||
try { | ||
manager = (IJavaScriptJSoupManager) | ||
jsFinderTracker.waitForService(1000*10); | ||
} | ||
catch (InterruptedException e) { | ||
throw new IllegalStateException( | ||
"Could not get the JavaScript JSoupManager", | ||
e ); | ||
} | ||
if (manager == null) { | ||
throw new IllegalStateException( | ||
"Could not get the JavaScript JSoupManager"); | ||
} | ||
return manager; | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
...net.bioclipse.jsoup.business/src/net/bioclipse/jsoup/business/business/IJSoupManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2015 Egon Willighagen <egon.willighagen@gmail.com> | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Public License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v10.html | ||
* | ||
* Contact: http://www.bioclipse.net/ | ||
******************************************************************************/ | ||
package net.bioclipse.jsoup.business.business; | ||
|
||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
|
||
import net.bioclipse.core.PublishedClass; | ||
import net.bioclipse.core.PublishedMethod; | ||
import net.bioclipse.core.Recorded; | ||
import net.bioclipse.core.business.BioclipseException; | ||
import net.bioclipse.managers.business.IBioclipseManager; | ||
|
||
@PublishedClass( | ||
value="Manager to process HTML files with JSoup (MIT license)." | ||
) | ||
public interface IJSoupManager extends IBioclipseManager { | ||
|
||
@Recorded | ||
@PublishedMethod( | ||
params="String htmlString", | ||
methodSummary="Parses a HTML String into a JSoup document" | ||
) | ||
public Document parseString(String htmlString); | ||
|
||
@Recorded | ||
@PublishedMethod( | ||
params="File htmlFile", | ||
methodSummary="Parses a HTML file into a JSoup document" | ||
) | ||
public Document parse(String htmlFile) throws BioclipseException; | ||
|
||
@Recorded | ||
@PublishedMethod( | ||
params="Document doc, String cssSelector", | ||
methodSummary="Extracts elements from the JSoup Element (e.g. a Document) using the CSS seletor." | ||
) | ||
public Elements select(Element doc, String cssSelector); | ||
|
||
} |
15 changes: 15 additions & 0 deletions
15
...bioclipse.jsoup.business/src/net/bioclipse/jsoup/business/business/IJavaJSoupManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2015 Egon Willighagen <egon.willighagen@gmail.com> | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Public License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v10.html | ||
* | ||
* Contact: Bioclipse Project <http://www.bioclipse.net> | ||
******************************************************************************/ | ||
package net.bioclipse.jsoup.business.business; | ||
|
||
public interface IJavaJSoupManager extends IJSoupManager { | ||
|
||
} |
18 changes: 18 additions & 0 deletions
18
...pse.jsoup.business/src/net/bioclipse/jsoup/business/business/IJavaScriptJSoupManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2015 Egon Willighagen <egon.willighagen@gmail.com> | ||
* | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Public License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v10.html | ||
* | ||
* Contact: Bioclipse Project <http://www.bioclipse.net> | ||
******************************************************************************/ | ||
package net.bioclipse.jsoup.business.business; | ||
|
||
import net.bioclipse.managers.business.IBioclipseJSManager; | ||
|
||
public interface IJavaScriptJSoupManager | ||
extends IJSoupManager, IBioclipseJSManager { | ||
|
||
} |
Oops, something went wrong.