diff options
-rw-r--r-- | Plugins/FourChan.py | 14 | ||||
-rw-r--r-- | captcha/ShareonlineBiz.py | 9 |
2 files changed, 18 insertions, 5 deletions
diff --git a/Plugins/FourChan.py b/Plugins/FourChan.py index 020270e9b..840274457 100644 --- a/Plugins/FourChan.py +++ b/Plugins/FourChan.py @@ -12,14 +12,13 @@ class FourChan(Plugin): props = {} props['name'] = "FourChan" props['type'] = "container" - props['pattern'] = r"http://(www\.)?(img\.)?(zip\.)?4chan.org/\w+/res/" + props['pattern'] = r"http://(www\.)?(img\.)?(zip\.)?4chan.org/\w+/(res/|imgboard\.html)" props['version'] = "0.1" props['description'] = """4chan.org Thread Download Plugin""" props['author_name'] = ("Spoob") props['author_mail'] = ("Spoob@pyload.org") self.props = props self.parent = parent - print True self.html = None def file_exists(self): @@ -30,4 +29,13 @@ class FourChan(Plugin): def proceed(self, url, location): url = self.parent.url html = self.req.load(url) - self.links = re.findall('File : <a href="(http://(?:img\.)?(?:zip\.)?4chan\.org/\w{,3}/src/\d*\..{3})"', html) + link_pattern = "" + temp_links = [] + if "imagebord.html" in url: + link_pattern = '[<a href="(res/\d*\.html)">Reply</a>]' + temp_links = re.findall(link_pattern, html) + for link in re.findall(link_pattern, html): + temp_links.append(link) + else: + temp_links = re.findall('File : <a href="(http://(?:img\.)?(?:zip\.)?4chan\.org/\w{,3}/src/\d*\..{3})"', html) + self.links = temp_links diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py index 1e3fc6214..5c8e682f4 100644 --- a/captcha/ShareonlineBiz.py +++ b/captcha/ShareonlineBiz.py @@ -6,23 +6,28 @@ class ShareonlineBiz(OCR): OCR.__init__(self) def get_captcha(self, image): + urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") self.load_image(image) self.to_greyscale() self.image = self.image.resize((160, 50)) self.pixels = self.image.load() self.threshold(1.85) self.eval_black_white(240) + self.derotate_by_average() letters = self.split_captcha_letters() final = "" + i = 0 for letter in letters: self.image = letter - self.run_tesser() + self.image.save(str(i) + ".jpeg") + self.run_gocr() final += self.result_captcha + i += 1 return final if __name__ == '__main__': ocr = ShareonlineBiz() - print ocr.get_captcha('captcha.php3.jpeg') + print ocr.get_captcha('captcha.jpeg') |