diff options
Diffstat (limited to 'module/plugins')
-rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 16 | ||||
-rw-r--r-- | module/plugins/captcha/MegauploadCom.py | 2 | ||||
-rw-r--r-- | module/plugins/captcha/NetloadIn.py | 6 | ||||
-rw-r--r-- | module/plugins/captcha/ShareonlineBiz.py | 12 | ||||
-rw-r--r-- | module/plugins/captcha/captcha.py | 20 | ||||
-rw-r--r-- | module/plugins/hoster/NetloadIn.py | 10 | ||||
-rw-r--r-- | module/plugins/hoster/ShareonlineBiz.py | 1 |
7 files changed, 30 insertions, 37 deletions
diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index d6f61e362..22b801273 100644 --- a/module/plugins/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py @@ -118,20 +118,6 @@ class LinksaveIn(OCR): self.image = new self.pixels = self.image.load() - def run_tesser(self): - self.logger.debug("create tmp tif") - tmp = tempfile.NamedTemporaryFile(suffix=".tif") - self.logger.debug("create tmp txt") - tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") - self.logger.debug("save tiff") - self.image.save(tmp.name, 'TIFF') - self.logger.debug("run tesseract") - self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", ""), "nobatch", self.data_dir+"tesser_conf"]) - self.logger.debug("read txt") - - with open(tmpTxt.name, 'r') as f: - self.result_captcha = f.read().replace("\n", "") - def get_captcha(self, image): self.load_image(image) bg = self.get_bg() @@ -147,7 +133,7 @@ class LinksaveIn(OCR): for n, letter in enumerate(letters): self.image = letter self.image.save(ocr.data_dir+"letter%d.png" % n) - self.run_tesser() + self.run_tesser(True, True, False, False) final += self.result_captcha return final diff --git a/module/plugins/captcha/MegauploadCom.py b/module/plugins/captcha/MegauploadCom.py index 374bcd678..da8ab2cb9 100644 --- a/module/plugins/captcha/MegauploadCom.py +++ b/module/plugins/captcha/MegauploadCom.py @@ -6,7 +6,7 @@ class MegauploadCom(OCR): def get_captcha(self, image): self.load_image(image) - self.run_tesser() + self.run_tesser(True, True, False, True) return self.result_captcha if __name__ == '__main__': diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index 9799a6a2b..c99a0744c 100644 --- a/module/plugins/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py @@ -9,11 +9,7 @@ class NetloadIn(OCR): self.to_greyscale() self.clean(3) self.clean(3) - self.run_tesser() - - self.correct({ - ("$", "g"): "5", - }) + self.run_tesser(True, True, False, False) return self.result_captcha diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index 91124f181..7bd5d7960 100644 --- a/module/plugins/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py @@ -37,19 +37,9 @@ class ShareonlineBiz(OCR): final = "" for letter in letters: self.image = letter - self.run_tesser() + self.run_tesser(True, True, False, False) final += self.result_captcha - #replace common errors - final = self.correct({ - "A": "4", - "‘5": "3", - ("‘1", "T"): "7", - ("‘L", "B", "'L"): "2", - "b": "6", - ("I", "X"): "1" - }, final) - return final #tesseract at 60% diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 283b171e0..452952533 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -82,15 +82,31 @@ class OCR(object): self.image.save(tmp) self.result_captcha = self.run(['gocr', tmp.name]).replace("\n", "") - def run_tesser(self): + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True ): self.logger.debug("create tmp tif") tmp = tempfile.NamedTemporaryFile(suffix=".tif") self.logger.debug("create tmp txt") tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") self.logger.debug("save tiff") self.image.save(tmp.name, 'TIFF') + + tessparams = ['tesseract', tmp.name, tmpTxt.name.replace(".txt", "") + + if subset and (digits or lowercase or uppercase): + self.logger.debug("create temp subset config") + tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + tmpSub.write("tessedit_char_whitelist ") + if digits: + tmpSub.write("0123456789") + if lowercase: + tmpSub.write("abcdefghijklmnopqrstuvwxyz") + if uppercase: + tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + tessparams.append("nobatch") + tessparams.append(tmpSub.name) + self.logger.debug("run tesseract") - self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", "")]) + self.run(tessparams) self.logger.debug("read txt") with open(tmpTxt.name, 'r') as f: diff --git a/module/plugins/hoster/NetloadIn.py b/module/plugins/hoster/NetloadIn.py index 0b7bcd27f..9891828a2 100644 --- a/module/plugins/hoster/NetloadIn.py +++ b/module/plugins/hoster/NetloadIn.py @@ -57,6 +57,7 @@ class NetloadIn(Plugin): thread.wait(self.parent) pyfile.status.url = self.get_file_url() + return True else: return False @@ -69,7 +70,10 @@ class NetloadIn(Plugin): apiurl = "http://netload.in/share/fileinfos2.php" src = self.req.load(apiurl, cookies=False, get={"file_id": match.group(1)}) self.api_data = {} - if not src == "unknown file_data": + if src == "unknown_server_data": + self.api_data = False + self.html[0] = self.req.load(self.parent.url, cookies=False) + elif not src == "unknown file_data": lines = src.split(";") self.api_data["exists"] = True self.api_data["fileid"] = lines[0] @@ -124,7 +128,7 @@ class NetloadIn(Plugin): self.time_plus_wait = time() + wait_seconds def get_file_name(self): - if self.api_data["filename"]: + if self.api_data and self.api_data["filename"]: return self.api_data["filename"] elif self.html[0]: file_name_pattern = '\t\t\t(.+)<span style="color: #8d8d8d;">' @@ -134,7 +138,7 @@ class NetloadIn(Plugin): return self.parent.url def file_exists(self): - if self.api_data["exists"]: + if self.api_data and self.api_data["exists"]: return self.api_data["exists"] elif self.html[0] and re.search(r"The file has been deleted", self.html[0]) == None: return True diff --git a/module/plugins/hoster/ShareonlineBiz.py b/module/plugins/hoster/ShareonlineBiz.py index ede810bbd..b76e83568 100644 --- a/module/plugins/hoster/ShareonlineBiz.py +++ b/module/plugins/hoster/ShareonlineBiz.py @@ -46,6 +46,7 @@ class ShareonlineBiz(Plugin): pyfile.status.waituntil = self.time_plus_wait pyfile.status.url = self.get_file_url() pyfile.status.want_reconnect = self.want_reconnect + return True else: return False |