@@ -71,7 +71,6 @@ def write_similar(path, related):
71
71
with open (path , 'w+' ) as outf :
72
72
json .dump (data , outf )
73
73
74
-
75
74
def tags_similarity (p1 , p2 ):
76
75
t1 = set (p1 .tags )
77
76
t2 = set (p2 .tags )
@@ -98,16 +97,16 @@ def title_similarity(p1, p2):
98
97
index = gensim .similarities .MatrixSimilarity (lsi [corpus ])
99
98
for i , post in enumerate (self .site .timeline ):
100
99
# FIXME config output
101
- out_name = os .path .join ('output' , post .destination_path (lang = lang ))+ '.related.json'
100
+ out_name = os .path .join ('output' , post .destination_path (lang = lang )) + '.related.json'
102
101
doc = texts [i ]
103
102
vec_bow = dictionary .doc2bow (doc )
104
103
vec_lsi = lsi [vec_bow ]
105
104
body_sims = index [vec_lsi ]
106
105
tag_sims = [tags_similarity (post , p ) for p in self .site .timeline ]
107
106
title_sims = [title_similarity (post , p ) for p in self .site .timeline ]
108
- full_sims = [tag_sims [i ] + title_sims [i ] + body_sims [i ] * 2 for i in range (len (self .site .timeline ))]
107
+ full_sims = [tag_sims [i ] + title_sims [i ] + body_sims [i ] * 1.5 for i in range (len (self .site .timeline ))]
109
108
full_sims = sorted (enumerate (full_sims ), key = lambda item : - item [1 ])
110
- related = [(self .site .timeline [s [0 ]], s [1 ], tag_sims [s [0 ]], title_sims [s [0 ]], body_sims [s [0 ]]) for s in full_sims [:11 ] if s [0 ] != i ]
109
+ related = [(self .site .timeline [s [0 ]], s [1 ], tag_sims [s [0 ]], title_sims [s [0 ]], body_sims [s [0 ]]) for s in full_sims [:11 ] if s [0 ] != i ]
111
110
task = {
112
111
'basename' : self .name ,
113
112
'name' : out_name ,
0 commit comments