Skip to content

Commit a80a2d7

Browse files
committedNov 2, 2017
fix reddit threadurl crawler
1 parent 79aacae commit a80a2d7

File tree

3 files changed

+8
-9
lines changed

3 files changed

+8
-9
lines changed
 

‎configs/sfw.ini

+3-4
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
Port: 5000
33

44
[Logging]
5-
Destination: syslog
6-
Verbosity: warning
5+
Destination: File
6+
File: /tmp/foo.log
7+
Verbosity: debug
78

89
[Sites]
910
SoupIO: false
1011
Pr0gramm: false
1112
Reddit: gifs,pics,aww_gifs,reactiongifs,Oddlysatisfying,Spaceflightporn,Therewasanattempt,Unexpected,Whatcouldgowrong,Weird,photoshopbattles,NichtDerPostillon,Nonononoyes,earthporn*0.5,Bettereveryloop,machineporn,Shittyfoodporn,Instant_regret,Mildlyinfuriating,Crappydesign,Accidentalwesanderson,engineeringporn,tinder,hmmm*2.0,mechanical_gifs,cableporn,shittylifehacks*3.0,perfecttiming,AnimalPorn,CatGifs,ChildrenFallingOver,funny,geek,IdiotsInCars,interestingasfuck,itsaunixsystem*5.0,MadeMeSmile,PeopleFuckingDying,spaceflightporn,woahdude,spaceporn*2.0,chargeyourphone,japanpics,neckbeardnests,techsupportgore
12-
NineGag: wtf,girl*0.1,hot,trending
1313
Instagram: false
1414
Fourchan: false
15-
Giphy: wtf*0.1

‎crawler/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -226,12 +226,11 @@ def __images_clear(cls):
226226
cls.__images = {} # alternative: cls.__images[:] = [] # be aware: list.clean() is not available in py2
227227

228228
@classmethod
229-
def __add_image(cls, uri, crawler, site, threadurl=None):
229+
def __add_image(cls, uri, crawler, site, threadurl):
230230
"""
231231
:type uri: str
232232
:type crawler: str
233233
:type site: str
234-
:type threadurl: str
235234
:return: bool
236235
"""
237236
if not cls._is_image(uri):
@@ -374,13 +373,14 @@ def crawl(self):
374373
e = sys.exc_info()[0]
375374
self.__class__._log("exception", "unexpected crawler error: %s" % (repr(e)))
376375

377-
def _add_image(self, uri, site):
376+
def _add_image(self, uri, site, threadurl=None):
378377
"""
379378
:type uri: str
380379
:type site: str
380+
:type threadurl: str
381381
:rtype: bool
382382
"""
383-
return self.__class__.__add_image(uri, self.__class__.__name__, site)
383+
return self.__class__.__add_image(uri, self.__class__.__name__, site, threadurl)
384384

385385
## abstract functions
386386

‎crawler/reddit.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def _crawl(self):
4646
for child in data['data']['children']:
4747
image = child['data']['url']
4848
if image:
49-
threadurl = 'https://reddit.com/' + child['data']['permalink']
49+
threadurl = 'https://reddit.com' + child['data']['permalink']
5050
self.__class__._log("debug", threadurl)
5151
if self._add_image(image, self.__site, threadurl):
5252
images_added += 1

0 commit comments

Comments
 (0)
Please sign in to comment.