summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Plugins/FourChan.py14
-rw-r--r--captcha/ShareonlineBiz.py9
2 files changed, 18 insertions, 5 deletions
diff --git a/Plugins/FourChan.py b/Plugins/FourChan.py
index 020270e9b..840274457 100644
--- a/Plugins/FourChan.py
+++ b/Plugins/FourChan.py
@@ -12,14 +12,13 @@ class FourChan(Plugin):
props = {}
props['name'] = "FourChan"
props['type'] = "container"
- props['pattern'] = r"http://(www\.)?(img\.)?(zip\.)?4chan.org/\w+/res/"
+ props['pattern'] = r"http://(www\.)?(img\.)?(zip\.)?4chan.org/\w+/(res/|imgboard\.html)"
props['version'] = "0.1"
props['description'] = """4chan.org Thread Download Plugin"""
props['author_name'] = ("Spoob")
props['author_mail'] = ("Spoob@pyload.org")
self.props = props
self.parent = parent
- print True
self.html = None
def file_exists(self):
@@ -30,4 +29,13 @@ class FourChan(Plugin):
def proceed(self, url, location):
url = self.parent.url
html = self.req.load(url)
- self.links = re.findall('File : <a href="(http://(?:img\.)?(?:zip\.)?4chan\.org/\w{,3}/src/\d*\..{3})"', html)
+ link_pattern = ""
+ temp_links = []
+ if "imagebord.html" in url:
+ link_pattern = '[<a href="(res/\d*\.html)">Reply</a>]'
+ temp_links = re.findall(link_pattern, html)
+ for link in re.findall(link_pattern, html):
+ temp_links.append(link)
+ else:
+ temp_links = re.findall('File : <a href="(http://(?:img\.)?(?:zip\.)?4chan\.org/\w{,3}/src/\d*\..{3})"', html)
+ self.links = temp_links
diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py
index 1e3fc6214..5c8e682f4 100644
--- a/captcha/ShareonlineBiz.py
+++ b/captcha/ShareonlineBiz.py
@@ -6,23 +6,28 @@ class ShareonlineBiz(OCR):
OCR.__init__(self)
def get_captcha(self, image):
+ urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")
self.load_image(image)
self.to_greyscale()
self.image = self.image.resize((160, 50))
self.pixels = self.image.load()
self.threshold(1.85)
self.eval_black_white(240)
+ self.derotate_by_average()
letters = self.split_captcha_letters()
final = ""
+ i = 0
for letter in letters:
self.image = letter
- self.run_tesser()
+ self.image.save(str(i) + ".jpeg")
+ self.run_gocr()
final += self.result_captcha
+ i += 1
return final
if __name__ == '__main__':
ocr = ShareonlineBiz()
- print ocr.get_captcha('captcha.php3.jpeg')
+ print ocr.get_captcha('captcha.jpeg')