Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is a first version of a downloader module for papermonk, meant to serve as an example for implementing other modules. At the moment, the actual paper downloading code is not implemented because it's silly to stuff everything into the "download" function, maybe this concept should be split up into multiple methods instead. version bump to: 0.0.1 (initial commit)
- Loading branch information
0 parents
commit bd4774a
Showing
7 changed files
with
163 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# don't track other node modules | ||
node_modules/ | ||
|
||
# also ignore npm errors | ||
npm-debug.log | ||
|
||
# ignore temporary files leftover from vim | ||
.*.sw* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Bryan Bishop <kanzure@gmail.com> (http://heybryan.org/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
BSD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# papermonk-downloader-plosone | ||
|
||
This is a [papermonk](https://github.com/kanzure/papermonk) downloader that | ||
scrapes abstracts, papers, pdfs, supplementary documents, and other data from | ||
[PLOS ONE](http://www.plosone.org/). | ||
|
||
* [More about PLOS ONE](http://www.plosone.org/static/information) | ||
* [More about Public Library of Science](http://www.plos.org/about/what-is-plos/) | ||
|
||
## installing | ||
|
||
See [papermonk](https://github.com/kanzure/papermonk) for the main module. This | ||
module is meant to be used as a plugin. However, it should also be possible to | ||
use this module independently and in isolation from other papermonk modules. | ||
|
||
## testing | ||
|
||
``` | ||
node tests.js | ||
``` | ||
|
||
## license | ||
|
||
BSD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
urlparser = require("url-parser"); | ||
|
||
module.exports.test = function test(url) { | ||
parsedurl = urlparser.parse(url); | ||
|
||
if (parsedurl.hostname === "plosone.org") | ||
return true; | ||
|
||
if (parsedurl.hostname === "www.plosone.org") | ||
return true; | ||
|
||
if (parsedurl.href === "plosone.org") | ||
return true; | ||
|
||
if (parsedurl.href === "www.plosone.org") | ||
return true; | ||
|
||
return false; | ||
}; | ||
|
||
// TODO: this should be split into multiple methods | ||
module.exports.download = function download(url, options, callback) { | ||
// TODO: this needs to be implemented | ||
throw new Error("not implemented"); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
{ | ||
"name": "papermonk-downloader-plosone", | ||
"description": "plosone.org scraper", | ||
"version": "0.0.1", | ||
"readmeFilename": "README.md", | ||
"homepage": "https://github.com/kanzure/papermonk-downloader-plosone", | ||
"author": { | ||
"name": "Bryan Bishop", | ||
"email": "kanzure@gmail.com", | ||
"url": "http://heybryan.org/" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/kanzure/papermonk-downloader-plosone" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/kanzure/papermonk-downloader-plosone/issues", | ||
"email": "kanzure@gmail.com" | ||
}, | ||
"license": "BSD", | ||
"keywords": [ | ||
"papermonk", | ||
"plos", | ||
"plos one", | ||
"plosone.org", | ||
"public library of science", | ||
"papers", | ||
"pdf", | ||
"pdfs", | ||
"academic articles", | ||
"academic papers", | ||
"scholarly articles", | ||
"scholarly papers", | ||
"journals", | ||
"scraping", | ||
"spidering", | ||
"crawling" | ||
], | ||
"engines": { | ||
"node": ">0" | ||
}, | ||
"devDependencies": { | ||
}, | ||
"dependencies": { | ||
"url-parser": "*", | ||
"tape": ">0" | ||
}, | ||
"optionalDependencies": { | ||
}, | ||
"main": "./index.js" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
var test = require("tape"); | ||
|
||
test("require against the module", function(t) { | ||
var downloader = require("./"); | ||
|
||
t.ok(downloader, "must not be undefined"); | ||
|
||
t.end(); | ||
}); | ||
|
||
test("has a method called test", function(t) { | ||
var downloader = require("./"); | ||
|
||
t.ok(downloader.test, "has a method called test"); | ||
|
||
t.end(); | ||
}); | ||
|
||
test("matches for a url with plosone.org", function(t) { | ||
var downloader = require("./"); | ||
|
||
t.ok(downloader.test("http://plosone.org/")); | ||
t.ok(downloader.test("http://plosone.org:80/")); | ||
t.ok(downloader.test("http://plosone.org")); | ||
t.ok(downloader.test("http://plosone.org:80")); | ||
t.ok(downloader.test("http://www.plosone.org")); | ||
t.ok(downloader.test("http://www.plosone.org:80")); | ||
t.ok(downloader.test("http://www.plosone.org/")); | ||
t.ok(downloader.test("http://www.plosone.org:80/")); | ||
|
||
t.ok(downloader.test("https://plosone.org/")); | ||
t.ok(downloader.test("https://plosone.org:80/")); | ||
t.ok(downloader.test("https://plosone.org")); | ||
t.ok(downloader.test("https://plosone.org:80")); | ||
t.ok(downloader.test("https://www.plosone.org")); | ||
t.ok(downloader.test("https://www.plosone.org:80")); | ||
t.ok(downloader.test("https://www.plosone.org/")); | ||
t.ok(downloader.test("https://www.plosone.org:80/")); | ||
|
||
/* | ||
// TODO: url-parser doesn't support these, maybe there's a better module? | ||
t.ok(downloader.test("www.plosone.org")); | ||
t.ok(downloader.test("www.plosone.org/")); | ||
t.ok(downloader.test("www.plosone.org:80")); | ||
t.ok(downloader.test("www.plosone.org:80/")); | ||
t.ok(downloader.test("plosone.org")); | ||
t.ok(downloader.test("plosone.org/")); | ||
t.ok(downloader.test("plosone.org:80")); | ||
t.ok(downloader.test("plosone.org:80/")); | ||
*/ | ||
|
||
t.end(); | ||
}); |