diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-18 13:17:18 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-18 13:17:18 +0200 |
commit | 0a705696089fdc28463d45598fde2ceb78220790 (patch) | |
tree | 17b8fa4dbd2c04075891e1cec44556e239484e96 | |
parent | fixed disconnecting (diff) | |
download | pyload-0a705696089fdc28463d45598fde2ceb78220790.tar.xz |
plugin for gigasize
-rw-r--r-- | Plugins/GigasizeCom.py | 74 | ||||
-rw-r--r-- | Plugins/Plugin.py | 20 | ||||
-rw-r--r-- | captcha/captcha.py | 81 | ||||
-rw-r--r-- | module/download_thread.py | 11 |
4 files changed, 153 insertions, 33 deletions
diff --git a/Plugins/GigasizeCom.py b/Plugins/GigasizeCom.py new file mode 100644 index 000000000..ef39cc3cc --- /dev/null +++ b/Plugins/GigasizeCom.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import re +import tempfile + +from Plugin import Plugin + +class GigasizeCom(Plugin): + + def __init__(self, parent): + Plugin.__init__(self, parent) + props = {} + props['name'] = "GigasizeCom" + props['type'] = "hoster" + props['pattern'] = r"(?:http://)?(?:www.)?gigasize.com/get.php\?d=" + props['version'] = "0.1" + props['description'] = """Gigasize.com Download Plugin""" + props['author_name'] = ("spoob") + props['author_mail'] = ("spoob@pyload.org") + self.props = props + self.parent = parent + self.html = [None, None] + self.want_reconnect = False + self.init_ocr() + self.multi_dl = False + + def download_html(self): + url = self.parent.url + self.html[0] = self.req.load(url, cookies=True) + + captcha_image = tempfile.NamedTemporaryFile().name + ".jpg" + self.req.download("http://www.gigasize.com/randomImage.php", captcha_image, cookies=True) + captcha = self.ocr.get_captcha(captcha_image) + + os.remove(captcha_image) + print captcha + + self.html[1] = self.req.load(file_server_url, None, {"txtNumber": captcha}, cookies=True) + + def get_file_url(self): + """ returns the absolute downloadable filepath + """ + if self.html[0] == None: + self.download_html() + if not self.want_reconnect: + file_url_pattern = r"<form action=\"(/getcgi.php\?t=.*)\" method=\"post\" id=\"formDownload\">" + return "http://gigazise.com" + re.search(file_url_pattern, self.html[1]).group(1) + else: + return False + + def get_file_name(self): + if self.html[0] == None: + self.download_html() + if not self.want_reconnect: + file_name_pattern = "<p><strong>Name</strong>: <b>(.*)</b></p>" + return re.search(file_name_pattern, self.html[0]).group(1) + else: + return self.parent.url + + def file_exists(self): + """ returns True or False + """ + if self.html[0] == None: + self.download_html() + if re.search(r"HTTP Status 404", self.html) != None: + return False + else: + return True + + def proceed(self, url, location): + + self.req.download(url, location, cookies=True)
\ No newline at end of file diff --git a/Plugins/Plugin.py b/Plugins/Plugin.py index f43172b55..ba3ee29b3 100644 --- a/Plugins/Plugin.py +++ b/Plugins/Plugin.py @@ -1,6 +1,22 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - +# +#Copyright (C) 2009 kingzero, RaNaN +# +#This program is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, +#or (at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +#See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +### import ConfigParser import re @@ -76,7 +92,7 @@ class Plugin(): self.config[option] = False if self.config[option].lower() == 'false' else self.config[option] def init_ocr(self): - modul = __import__(self.props['name'], fromlist=['plugins']) + modul = __import__("captcha."+self.props['name'], fromlist=['captcha']) captchaClass = getattr(modul, self.props['name']) self.ocr = captchaClass() diff --git a/captcha/captcha.py b/captcha/captcha.py index de3e61cf0..a76a7aa25 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -1,6 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +#Copyright (C) 2009 kingzero, RaNaN +# +#This program is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, +#or (at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +#See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +### +import subprocess +import tempfile + import Image import ImageOps -import subprocess class OCR(object): def __init__(self): @@ -9,28 +30,37 @@ class OCR(object): def load_image(self, image): self.image = Image.open(image) self.pixels = self.image.load() - self.image_name = 'captcha_clean.png' self.result_captcha = '' - def unload(): + def unload(self): """delete all tmp images""" pass def threshold(self, value): - self.image = self.image.point(lambda a: a * value +10) + self.image = self.image.point(lambda a: a * value + 10) + + def run(self, command, inputdata=None): + """Run a command and return standard output""" + pipe = subprocess.PIPE + popen = subprocess.Popen(command, stdout=pipe, stderr=pipe) + outputdata, errdata = popen.communicate(inputdata) + assert (popen.returncode == 0), \ + "Error running: %s\n\n%s" % (command, errdata) + return outputdata def run_gocr(self): - self.image.save(self.image_name) - cmd = ['gocr', self.image_name] - self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','') + tmp = tempfile.NamedTemporaryFile(suffix=".jpg") + self.image.save(tmp) + self.result_captcha = self.run(['gocr', tmp.name]).replace("\n", "") def run_tesser(self): - self.image.save('captcha.tif', 'TIFF') - cmd = ['tesseract', 'captcha.tif', '0'] - self.result_captcha = subprocess.Popen(cmd) - self.result_captcha.wait() - cmd = ['cat', '0.txt'] - self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','') + tmp = tempfile.NamedTemporaryFile(suffix=".tif") + tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + + self.image.save(tmp.name, 'TIFF') + self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", "")]) + + self.result_captcha = self.run(['cat', tmpTxt.name]) def get_captcha(self): raise NotImplementedError @@ -49,29 +79,28 @@ class OCR(object): for x in xrange(w): for y in xrange(h): - # no point in processing white pixels since we only want to remove black pixels if pixels[x, y] == 255: continue - + # no point in processing white pixels since we only want to remove black pixel count = 0 try: if pixels[x-1, y-1] != 255: count += 1 - if pixels[x-1, y ] != 255: count += 1 - if pixels[x-1, y+1] != 255: count += 1 - if pixels[x, y+1 ] != 255: count += 1 - if pixels[x+1, y+1] != 255: count += 1 - if pixels[x+1, y ] != 255: count += 1 - if pixels[x+1, y-1] != 255: count += 1 - if pixels[x, y-1 ] != 255: count += 1 + if pixels[x-1, y] != 255: count += 1 + if pixels[x-1, y + 1] != 255: count += 1 + if pixels[x, y + 1] != 255: count += 1 + if pixels[x + 1, y + 1] != 255: count += 1 + if pixels[x + 1, y] != 255: count += 1 + if pixels[x + 1, y-1] != 255: count += 1 + if pixels[x, y-1] != 255: count += 1 except: pass - # not enough neighbors are dark pixels so mark this pixel - # to be changed to white + # not enough neighbors are dark pixels so mark this pixel + # to be changed to white if count < allowed: pixels[x, y] = 1 - - # second pass: this time set all 1's to 255 (white) + + # second pass: this time set all 1's to 255 (white) for x in xrange(w): for y in xrange(h): if pixels[x, y] == 1: pixels[x, y] = 255 diff --git a/module/download_thread.py b/module/download_thread.py index a290bc4f7..f68cfedd7 100644 --- a/module/download_thread.py +++ b/module/download_thread.py @@ -17,9 +17,10 @@ # along with this program; if not, see <http://www.gnu.org/licenses/>. # ### - import threading -from time import time, sleep +import traceback +from time import sleep +from time import time class Status(object): @@ -45,7 +46,7 @@ class Status(object): return self.pyfile.plugin.req.dl_size / 1024 def percent(self): if not self.kB_left() == 0 and not self.size() == 0: - return ((self.size()-self.kB_left())*100)/self.size() + return ((self.size()-self.kB_left()) * 100) / self.size() return 0 class Download_Thread(threading.Thread): @@ -65,7 +66,7 @@ class Download_Thread(threading.Thread): try: self.download(self.loadedPyFile) except Exception, e: - print "Error:", e #catch up all error here + traceback.print_exc() self.loadedPyFile.status.type = "failed" finally: self.parent.job_finished(self.loadedPyFile) @@ -79,7 +80,7 @@ class Download_Thread(threading.Thread): pyfile.prepareDownload() if not status.exists: - raise "FileDontExists" #i know its deprecated, who cares^^ + raise "FileDontExists", "The file was not found on the server." #i know its deprecated, who cares^^ status.type = "waiting" |