Skip to content

Commit 0516994

Browse files
author
Roberto Alsina
committedMay 22, 2017
lint
1 parent e7fb38e commit 0516994

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed
 

‎v7/similarity/similarity.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -30,33 +30,32 @@
3030

3131
from nikola.plugin_categories import Task
3232

33+
3334
class Similarity(Task):
3435
"""Calculate post similarity."""
3536
name = "similarity"
3637

3738
def set_site(self, site):
3839
self.site = site
39-
40+
4041
def gen_tasks(self):
4142
"""Build similarity data for each post."""
4243
self.site.scan_posts()
43-
44+
4445
texts = []
45-
46+
4647
for p in self.site.timeline:
4748
texts.append(p.text(strip_html=True).lower().split())
48-
49+
4950
dictionary = gensim.corpora.Dictionary(texts)
5051
corpus = [dictionary.doc2bow(text) for text in texts]
5152
lsi = gensim.models.LsiModel(corpus, id2word=dictionary, num_topics=2)
5253
index = gensim.similarities.MatrixSimilarity(lsi[corpus])
53-
54+
5455
for i, post in enumerate(self.site.timeline):
5556
doc = texts[i]
5657
vec_bow = dictionary.doc2bow(doc)
5758
vec_lsi = lsi[vec_bow]
5859
sims = index[vec_lsi]
5960
sims = sorted(enumerate(sims), key=lambda item: -item[1])
6061
print(i, sims[:10])
61-
62-

0 commit comments

Comments
 (0)
Please sign in to comment.