fix reddit threadurl crawler

noqqe · noqqe · commit a80a2d7f2bf0 · 2017-11-02T22:38:40.000+01:00
diff --git a/configs/sfw.ini b/configs/sfw.ini
@@ -2,14 +2,13 @@
 Port: 5000
 
 [Logging]
-Destination: syslog
-Verbosity: warning
+Destination: File
+File: /tmp/foo.log
+Verbosity: debug
 
 [Sites]
 SoupIO: false
 Pr0gramm: false
 Reddit: gifs,pics,aww_gifs,reactiongifs,Oddlysatisfying,Spaceflightporn,Therewasanattempt,Unexpected,Whatcouldgowrong,Weird,photoshopbattles,NichtDerPostillon,Nonononoyes,earthporn*0.5,Bettereveryloop,machineporn,Shittyfoodporn,Instant_regret,Mildlyinfuriating,Crappydesign,Accidentalwesanderson,engineeringporn,tinder,hmmm*2.0,mechanical_gifs,cableporn,shittylifehacks*3.0,perfecttiming,AnimalPorn,CatGifs,ChildrenFallingOver,funny,geek,IdiotsInCars,interestingasfuck,itsaunixsystem*5.0,MadeMeSmile,PeopleFuckingDying,spaceflightporn,woahdude,spaceporn*2.0,chargeyourphone,japanpics,neckbeardnests,techsupportgore
-NineGag: wtf,girl*0.1,hot,trending
 Instagram: false
 Fourchan: false
-Giphy: wtf*0.1
diff --git a/crawler/__init__.py b/crawler/__init__.py
@@ -226,12 +226,11 @@ def __images_clear(cls):
         cls.__images = {}  # alternative: cls.__images[:] = [] # be aware: list.clean() is not available in py2
 
     @classmethod
-    def __add_image(cls, uri, crawler, site, threadurl=None):
+    def __add_image(cls, uri, crawler, site, threadurl):
         """
         :type uri: str
         :type crawler: str
         :type site: str
-        :type threadurl: str
         :return: bool
         """
         if not cls._is_image(uri):
@@ -374,13 +373,14 @@ def crawl(self):
             e = sys.exc_info()[0]
             self.__class__._log("exception", "unexpected crawler error: %s" % (repr(e)))
 
-    def _add_image(self, uri, site):
+    def _add_image(self, uri, site, threadurl=None):
         """
         :type uri: str
         :type site: str
+        :type threadurl: str
         :rtype: bool
         """
-        return self.__class__.__add_image(uri, self.__class__.__name__, site)
+        return self.__class__.__add_image(uri, self.__class__.__name__, site, threadurl)
 
     ## abstract functions
 
diff --git a/crawler/reddit.py b/crawler/reddit.py
@@ -46,7 +46,7 @@ def _crawl(self):
         for child in data['data']['children']:
             image = child['data']['url']
             if image:
-                threadurl = 'https://reddit.com/' + child['data']['permalink']
+                threadurl = 'https://reddit.com' + child['data']['permalink']
                 self.__class__._log("debug", threadurl)
                 if self._add_image(image, self.__site, threadurl):
                     images_added += 1