diff options
Diffstat (limited to 'module/plugins')
-rw-r--r-- | module/plugins/Base.py | 118 | ||||
-rw-r--r-- | module/plugins/Hoster.py | 96 | ||||
-rw-r--r-- | module/plugins/captcha/GigasizeCom.py | 19 | ||||
-rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 147 | ||||
-rw-r--r-- | module/plugins/captcha/MegauploadCom.py | 14 | ||||
-rw-r--r-- | module/plugins/captcha/__init__.py | 0 | ||||
-rw-r--r-- | module/plugins/internal/NetloadInOCR.py (renamed from module/plugins/captcha/NetloadIn.py) | 11 | ||||
-rw-r--r-- | module/plugins/internal/OCR.py (renamed from module/plugins/captcha/captcha.py) | 3 | ||||
-rw-r--r-- | module/plugins/internal/ShareonlineBizOCR.py (renamed from module/plugins/captcha/ShareonlineBiz.py) | 8 |
9 files changed, 125 insertions, 291 deletions
diff --git a/module/plugins/Base.py b/module/plugins/Base.py index b846bbd60..61fa211f4 100644 --- a/module/plugins/Base.py +++ b/module/plugins/Base.py @@ -18,8 +18,11 @@ """ import sys +from time import time, sleep +from random import randint + from module.utils import decode -from module.utils.fs import exists, makedirs, join +from module.utils.fs import exists, makedirs, join, remove # TODO # more attributes if needed @@ -32,6 +35,9 @@ class Fail(Exception): class Retry(Exception): """ raised when start again from beginning """ +class Abort(Exception): + """ raised when aborted """ + class Base(object): """ The Base plugin class with all shared methods and every possible attribute for plugin definition. @@ -83,6 +89,9 @@ class Base(object): #: :class:`InteractionManager` self.im = core.interactionManager + #: last interaction task + self.task = None + def logInfo(self, *args, **kwargs): """ Print args to log at specific level @@ -118,11 +127,7 @@ class Base(object): getattr(self.log, level)("%s: %s" % (self.__name__, sep.join(strings))) def setConfig(self, option, value): - """ Set config value for current plugin - - :param option: - :param value: - """ + """ Set config value for current plugin """ self.core.config.set(self.__name__, option, value) def getConf(self, option): @@ -130,11 +135,7 @@ class Base(object): return self.core.config.get(self.__name__, option) def getConfig(self, option): - """ Returns config value for current plugin - - :param option: - :return: - """ + """ Returns config value for current plugin """ return self.getConf(option) def setStorage(self, key, value): @@ -167,6 +168,14 @@ class Base(object): sys.stdout = sys._stdout embed() + def abort(self): + """ Check if plugin is in an abort state, is overwritten by subtypes""" + return False + + def checkAbort(self): + """ Will be overwriten to determine if control flow should be aborted """ + if self.abort: raise Abort() + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): """Load content at url and returns it @@ -180,6 +189,7 @@ class Base(object): :return: Loaded content """ if not hasattr(self, "req"): raise Exception("Plugin type does not have Request attribute.") + self.checkAbort() res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode) @@ -225,6 +235,92 @@ class Base(object): return res + def invalidTask(self): + if self.task: + self.task.invalid() + + def invalidCaptcha(self): + self.logDebug("Deprecated method .invalidCaptcha, use .invalidTask") + self.invalidTask() + + def correctTask(self): + if self.task: + self.task.correct() + + def correctCaptcha(self): + self.logDebug("Deprecated method .correctCaptcha, use .correctTask") + self.correctTask() + + def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, imgtype='jpg', + result_type='textual'): + """ Loads a captcha and decrypts it with ocr, plugin, user input + + :param url: url of captcha image + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param forceUser: if True, ocr is not used + :param imgtype: Type of the Image + :param result_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + + :return: result of decrypting + """ + + img = self.load(url, get=get, post=post, cookies=cookies) + + id = ("%.2f" % time())[-6:].replace(".", "") + temp_file = open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") + temp_file.write(img) + temp_file.close() + + name = "%sOCR" % self.__name__ + has_plugin = name in self.core.pluginManager.getPlugins("internal") + + if self.core.captcha: + OCR = self.core.pluginManager.loadClass("internal", name) + else: + OCR = None + + if OCR and not forceUser: + sleep(randint(3000, 5000) / 1000.0) + self.checkAbort() + + ocr = OCR() + result = ocr.get_captcha(temp_file.name) + else: + task = self.im.newCaptchaTask(img, imgtype, temp_file.name, result_type) + self.task = task + self.im.handleTask(task) + + while task.isWaiting(): + if self.abort(): + self.im.removeTask(task) + raise Abort() + sleep(1) + + #TODO + self.im.removeTask(task) + + if task.error and has_plugin: #ignore default error message since the user could use OCR + self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) + elif task.error: + self.fail(task.error) + elif not task.result: + self.fail(_("No captcha result obtained in appropiate time by any of the plugins.")) + + result = task.result + self.log.debug("Received captcha result: %s" % str(result)) + + if not self.core.debug: + try: + remove(temp_file.name) + except: + pass + + return result + def fail(self, reason): """ fail and give reason """ raise Fail(reason)
\ No newline at end of file diff --git a/module/plugins/Hoster.py b/module/plugins/Hoster.py index 32c587aa5..b330743e6 100644 --- a/module/plugins/Hoster.py +++ b/module/plugins/Hoster.py @@ -17,10 +17,8 @@ @author: RaNaN, spoob, mkaay """ -from time import time, sleep -from random import randint - import os +from time import time if os.name != "nt": from module.utils.fs import chown @@ -35,9 +33,6 @@ from module.utils.fs import save_join, save_filename, fs_encode, fs_decode,\ # Import for Hoster Plugins chunks = _chunks -class Abort(Exception): - """ raised when aborted """ - class Reconnect(Exception): """ raised when reconnected """ @@ -170,6 +165,9 @@ class Hoster(Base): """the 'main' method of every plugin, you **have to** overwrite it""" raise NotImplementedError + def abort(self): + return self.pyfile.abort + def resetAccount(self): """ dont use account and retry download """ self.account = None @@ -208,7 +206,7 @@ class Hoster(Base): while self.pyfile.waitUntil > time(): self.thread.m.reconnecting.wait(2) - if self.pyfile.abort: raise Abort + self.checkAbort() if self.thread.m.reconnecting.isSet(): self.waiting = False self.wantReconnect = False @@ -243,88 +241,6 @@ class Hoster(Base): self.retries += 1 raise Retry(reason) - def invalidCaptcha(self): - if self.cTask: - self.cTask.invalid() - - def correctCaptcha(self): - if self.cTask: - self.cTask.correct() - - def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, imgtype='jpg', - result_type='textual'): - """ Loads a captcha and decrypts it with ocr, plugin, user input - - :param url: url of captcha image - :param get: get part for request - :param post: post part for request - :param cookies: True if cookies should be enabled - :param forceUser: if True, ocr is not used - :param imgtype: Type of the Image - :param result_type: 'textual' if text is written on the captcha\ - or 'positional' for captcha where the user have to click\ - on a specific region on the captcha - - :return: result of decrypting - """ - - img = self.load(url, get=get, post=post, cookies=cookies) - - id = ("%.2f" % time())[-6:].replace(".", "") - temp_file = open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") - temp_file.write(img) - temp_file.close() - - has_plugin = self.__name__ in self.core.pluginManager.getPlugins("captcha") - - if self.core.captcha: - Ocr = self.core.pluginManager.loadClass("captcha", self.__name__) - else: - Ocr = None - - if Ocr and not forceUser: - sleep(randint(3000, 5000) / 1000.0) - if self.pyfile.abort: raise Abort - - ocr = Ocr() - result = ocr.get_captcha(temp_file.name) - else: - captchaManager = self.core.captchaManager - task = captchaManager.newTask(img, imgtype, temp_file.name, result_type) - self.cTask = task - captchaManager.handleCaptcha(task) - - while task.isWaiting(): - if self.pyfile.abort: - captchaManager.removeTask(task) - raise Abort - sleep(1) - - captchaManager.removeTask(task) - - if task.error and has_plugin: #ignore default error message since the user could use OCR - self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) - elif task.error: - self.fail(task.error) - elif not task.result: - self.fail(_("No captcha result obtained in appropiate time by any of the plugins.")) - - result = task.result - self.log.debug("Received captcha result: %s" % str(result)) - - if not self.core.debug: - try: - remove(temp_file.name) - except: - pass - - return result - - - def load(self, *args, **kwargs): - """ See 'Base' load method for more info """ - if self.pyfile.abort: raise Abort - return Base.load(self, *args, **kwargs) def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): """Downloads the content at url to download folder @@ -338,8 +254,8 @@ class Hoster(Base): the filename will be changed if needed :return: The location where the file was saved """ - self.checkForSameFiles() + self.checkAbort() self.pyfile.setStatus("downloading") diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py deleted file mode 100644 index d31742eb5..000000000 --- a/module/plugins/captcha/GigasizeCom.py +++ /dev/null @@ -1,19 +0,0 @@ -# -*- coding: utf-8 -*- -from captcha import OCR - -class GigasizeCom(OCR): - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.threshold(2.8) - self.run_tesser(True, False, False, True) - return self.result_captcha - -if __name__ == '__main__': - ocr = GigasizeCom() - import urllib - urllib.urlretrieve('http://www.gigasize.com/randomImage.php', "gigasize_tmp.jpg") - - print ocr.get_captcha('gigasize_tmp.jpg') diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py deleted file mode 100644 index 3ad7b265a..000000000 --- a/module/plugins/captcha/LinksaveIn.py +++ /dev/null @@ -1,147 +0,0 @@ -from captcha import OCR -import Image -from os import sep -from os.path import dirname -from os.path import abspath -from glob import glob - - -class LinksaveIn(OCR): - __name__ = "LinksaveIn" - def __init__(self): - OCR.__init__(self) - self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep - - def load_image(self, image): - im = Image.open(image) - frame_nr = 0 - - lut = im.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - new = Image.new("RGB", im.size) - npix = new.load() - while True: - try: - im.seek(frame_nr) - except EOFError: - break - frame = im.copy() - pix = frame.load() - for x in range(frame.size[0]): - for y in range(frame.size[1]): - if lut[pix[x, y]] != (0,0,0): - npix[x, y] = lut[pix[x, y]] - frame_nr += 1 - new.save(self.data_dir+"unblacked.png") - self.image = new.copy() - self.pixels = self.image.load() - self.result_captcha = '' - - def get_bg(self): - stat = {} - cstat = {} - img = self.image.convert("P") - for bgpath in glob(self.data_dir+"bg/*.gif"): - stat[bgpath] = 0 - bg = Image.open(bgpath) - - bglut = bg.resize((256, 1)) - bglut.putdata(range(256)) - bglut = list(bglut.convert("RGB").getdata()) - - lut = img.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - bgpix = bg.load() - pix = img.load() - for x in range(bg.size[0]): - for y in range(bg.size[1]): - rgb_bg = bglut[bgpix[x, y]] - rgb_c = lut[pix[x, y]] - try: - cstat[rgb_c] += 1 - except: - cstat[rgb_c] = 1 - if rgb_bg == rgb_c: - stat[bgpath] += 1 - max_p = 0 - bg = "" - for bgpath, value in stat.items(): - if max_p < value: - bg = bgpath - max_p = value - return bg - - def substract_bg(self, bgpath): - bg = Image.open(bgpath) - img = self.image.convert("P") - - bglut = bg.resize((256, 1)) - bglut.putdata(range(256)) - bglut = list(bglut.convert("RGB").getdata()) - - lut = img.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - bgpix = bg.load() - pix = img.load() - orgpix = self.image.load() - for x in range(bg.size[0]): - for y in range(bg.size[1]): - rgb_bg = bglut[bgpix[x, y]] - rgb_c = lut[pix[x, y]] - if rgb_c == rgb_bg: - orgpix[x, y] = (255,255,255) - - def eval_black_white(self): - new = Image.new("RGB", (140, 75)) - pix = new.load() - orgpix = self.image.load() - thresh = 4 - for x in range(new.size[0]): - for y in range(new.size[1]): - rgb = orgpix[x, y] - r, g, b = rgb - pix[x, y] = (255,255,255) - if r > max(b, g)+thresh: - pix[x, y] = (0,0,0) - if g < min(r, b): - pix[x, y] = (0,0,0) - if g > max(r, b)+thresh: - pix[x, y] = (0,0,0) - if b > max(r, g)+thresh: - pix[x, y] = (0,0,0) - self.image = new - self.pixels = self.image.load() - - def get_captcha(self, image): - self.load_image(image) - bg = self.get_bg() - self.substract_bg(bg) - self.eval_black_white() - self.to_greyscale() - self.image.save(self.data_dir+"cleaned_pass1.png") - self.clean(4) - self.clean(4) - self.image.save(self.data_dir+"cleaned_pass2.png") - letters = self.split_captcha_letters() - final = "" - for n, letter in enumerate(letters): - self.image = letter - self.image.save(ocr.data_dir+"letter%d.png" % n) - self.run_tesser(True, True, False, False) - final += self.result_captcha - - return final - -if __name__ == '__main__': - import urllib - ocr = LinksaveIn() - testurl = "http://linksave.in/captcha/cap.php?hsh=2229185&code=ZzHdhl3UffV3lXTH5U4b7nShXj%2Bwma1vyoNBcbc6lcc%3D" - urllib.urlretrieve(testurl, ocr.data_dir+"captcha.gif") - - print ocr.get_captcha(ocr.data_dir+'captcha.gif') diff --git a/module/plugins/captcha/MegauploadCom.py b/module/plugins/captcha/MegauploadCom.py deleted file mode 100644 index 469ee4094..000000000 --- a/module/plugins/captcha/MegauploadCom.py +++ /dev/null @@ -1,14 +0,0 @@ -from captcha import OCR - -class MegauploadCom(OCR): - __name__ = "MegauploadCom" - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.run_tesser(True, True, False, True) - return self.result_captcha - -if __name__ == '__main__': - ocr = MegauploadCom() diff --git a/module/plugins/captcha/__init__.py b/module/plugins/captcha/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/module/plugins/captcha/__init__.py +++ /dev/null diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/internal/NetloadInOCR.py index 7f2e6a8d1..e50978701 100644 --- a/module/plugins/captcha/NetloadIn.py +++ b/module/plugins/internal/NetloadInOCR.py @@ -1,7 +1,10 @@ -from captcha import OCR +# -*- coding: utf-8 -*- + +from OCR import OCR + +class NetloadInOCR(OCR): + __version__ = 0.1 -class NetloadIn(OCR): - __name__ = "NetloadIn" def __init__(self): OCR.__init__(self) @@ -18,7 +21,7 @@ class NetloadIn(OCR): if __name__ == '__main__': import urllib - ocr = NetloadIn() + ocr = NetloadInOCR() urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png") print ocr.get_captcha('captcha.png') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/internal/OCR.py index 4cbb736c1..9f8b7ef8c 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/internal/OCR.py @@ -33,8 +33,7 @@ import JpegImagePlugin class OCR(object): - - __name__ = "OCR" + __version__ = 0.1 def __init__(self): self.logger = logging.getLogger("log") diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/internal/ShareonlineBizOCR.py index b07fb9b0f..c5c2e92e8 100644 --- a/module/plugins/captcha/ShareonlineBiz.py +++ b/module/plugins/internal/ShareonlineBizOCR.py @@ -17,10 +17,10 @@ # along with this program; if not, see <http://www.gnu.org/licenses/>. # ### -from captcha import OCR +from OCR import OCR -class ShareonlineBiz(OCR): - __name__ = "ShareonlineBiz" +class ShareonlineBizOCR(OCR): + __version__ = 0.1 def __init__(self): OCR.__init__(self) @@ -48,6 +48,6 @@ class ShareonlineBiz(OCR): if __name__ == '__main__': import urllib - ocr = ShareonlineBiz() + ocr = ShareonlineBizOCR() urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") print ocr.get_captcha('captcha.jpeg') |