Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: getnikola/plugins
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 561fcf7c49d7
Choose a base ref
...
head repository: getnikola/plugins
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 32d87020a2bc
Choose a head ref
  • 2 commits
  • 2 files changed
  • 1 contributor

Commits on May 23, 2017

  1. missing dep

    Roberto Alsina committed May 23, 2017
    Copy the full SHA
    ed41e64 View commit details
  2. dep handling (broken magictimeline)

    Roberto Alsina committed May 23, 2017

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    32d8702 View commit details
Showing with 11 additions and 2 deletions.
  1. +1 −0 v7/similarity/requirements.txt
  2. +10 −2 v7/similarity/similarity.py
1 change: 1 addition & 0 deletions v7/similarity/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
stop-words
gensim
12 changes: 10 additions & 2 deletions v7/similarity/similarity.py
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@
from stop_words import get_stop_words

from nikola.plugin_categories import Task
from nikola import utils


class Similarity(Task):
@@ -46,6 +47,12 @@ def gen_tasks(self):
"""Build similarity data for each post."""
self.site.scan_posts()

kw = {
"translations": self.site.translations,
"output_folder": self.site.config["OUTPUT_FOLDER"],
}


stopwords = {}
for l in self.site.translations:
stopwords[l] = get_stop_words(l)
@@ -54,7 +61,6 @@ def split_text(text, lang="en"):
words = text.lower().split()
return [w for w in words if w not in stopwords[lang]]

# FIXME langs!!!!
texts = []

yield self.group_task()
@@ -97,7 +103,7 @@ def title_similarity(p1, p2):
index = gensim.similarities.MatrixSimilarity(lsi[corpus])
for i, post in enumerate(self.site.timeline):
# FIXME config output
out_name = os.path.join('output', post.destination_path(lang=lang)) + '.related.json'
out_name = os.path.join(kw['output_folder'], post.destination_path(lang=lang)) + '.related.json'
doc = texts[i]
vec_bow = dictionary.doc2bow(doc)
vec_lsi = lsi[vec_bow]
@@ -112,5 +118,7 @@ def title_similarity(p1, p2):
'name': out_name,
'targets': [out_name],
'actions': [(write_similar, (out_name, related))],
# 'file_dep': ['####MAGIC####TIMELINE'],
'uptodate': [utils.config_changed({1: kw}, 'similarity')],
}
yield task