From 654f816389e2a91f82d9569b4a12fb5f7c356545 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Sat, 5 Jul 2014 18:05:16 +0200 Subject: Use "is" instead "==" for bool condition check --- module/plugins/captcha/captcha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 7e4dec697..74b3205b4 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -268,7 +268,7 @@ class OCR(object): black_pixel_in_col = True - if black_pixel_in_col == False and started == True: + if black_pixel_in_col is False and started is True: rect = (firstX, topY, lastX, bottomY) new_captcha = captcha.crop(rect) -- cgit v1.2.3 From 48c0c42fd6faffc56432d5f037cd575979f180cc Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Mon, 14 Jul 2014 02:23:37 +0200 Subject: Removed all @author flags + key attributes cleanup for internal & hooks plugins --- module/plugins/captcha/captcha.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 74b3205b4..aa84a1a75 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -33,8 +33,14 @@ import JpegImagePlugin class OCR(object): - __name__ = "OCR" + __type__ = "ocr" + __version__ = "0.1" + + __description__ = """OCR base plugin""" + __author_name__ = "pyLoad Team" + __author_mail__ = "admin@pyload.org" + def __init__(self): self.logger = logging.getLogger("log") -- cgit v1.2.3 From 5060e4c6374a5116d0d8b02528f910f8c5f8bcf9 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Tue, 15 Jul 2014 16:25:41 +0200 Subject: Fix code indentation, some bad whitespaces and missing authors + use 'not' instead 'is None' + replace __pattern__'s r" with r' + other minor cosmetics --- module/plugins/captcha/captcha.py | 41 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index aa84a1a75..48f4a4217 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -149,19 +149,28 @@ class OCR(object): for x in xrange(w): for y in xrange(h): - if pixels[x, y] == 255: continue - # no point in processing white pixels since we only want to remove black pixel + if pixels[x, y] == 255: + continue + # No point in processing white pixels since we only want to remove black pixel count = 0 try: - if pixels[x-1, y-1] != 255: count += 1 - if pixels[x-1, y] != 255: count += 1 - if pixels[x-1, y + 1] != 255: count += 1 - if pixels[x, y + 1] != 255: count += 1 - if pixels[x + 1, y + 1] != 255: count += 1 - if pixels[x + 1, y] != 255: count += 1 - if pixels[x + 1, y-1] != 255: count += 1 - if pixels[x, y-1] != 255: count += 1 + if pixels[x-1, y-1] != 255: + count += 1 + if pixels[x-1, y] != 255: + count += 1 + if pixels[x-1, y + 1] != 255: + count += 1 + if pixels[x, y + 1] != 255: + count += 1 + if pixels[x + 1, y + 1] != 255: + count += 1 + if pixels[x + 1, y] != 255: + count += 1 + if pixels[x + 1, y-1] != 255: + count += 1 + if pixels[x, y-1] != 255: + count += 1 except: pass @@ -173,7 +182,8 @@ class OCR(object): # second pass: this time set all 1's to 255 (white) for x in xrange(w): for y in xrange(h): - if pixels[x, y] == 1: pixels[x, y] = 255 + if pixels[x, y] == 1: + pixels[x, y] = 255 self.pixels = pixels @@ -268,9 +278,12 @@ class OCR(object): firstX = x lastX = x - if y > bottomY: bottomY = y - if y < topY: topY = y - if x > lastX: lastX = x + if y > bottomY: + bottomY = y + if y < topY: + topY = y + if x > lastX: + lastX = x black_pixel_in_col = True -- cgit v1.2.3 From 7b8c458cca7d21a029620f98e453f746fce69cd1 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Mon, 14 Jul 2014 16:10:01 +0200 Subject: Prefer single quote for dict key name --- module/plugins/captcha/captcha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 48f4a4217..a4667b4ed 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -84,7 +84,7 @@ class OCR(object): if os.name == "nt": tessparams = [join(pypath,"tesseract","tesseract.exe")] else: - tessparams = ['tesseract'] + tessparams = ["tesseract"] tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] ) -- cgit v1.2.3 From 8e47b0de30a25d0fd5dfb518bfe4e1e7beff93fd Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Tue, 15 Jul 2014 16:27:44 +0200 Subject: Key attributes cleanup for account, container and crypter plugins --- module/plugins/captcha/captcha.py | 1 - 1 file changed, 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index a4667b4ed..061228c99 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -301,7 +301,6 @@ class OCR(object): return letters def correct(self, values, var=None): - if var: result = var else: -- cgit v1.2.3 From ba916633f2bedb04c7358000b91aed69f52e8e43 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Fri, 1 Aug 2014 19:35:59 +0200 Subject: Remove trailing whitespaces + remove license headers + import urllib methods directly + sort and fix key attributes + use save_join instead join + sort some import declarations + other minor code cosmetics --- module/plugins/captcha/captcha.py | 46 ++++++++------------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 061228c99..cc07f50cf 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -1,35 +1,18 @@ # -*- coding: utf-8 -*- -# -#Copyright (C) 2009 kingzero, RaNaN -# -#This program is free software; you can redistribute it and/or modify -#it under the terms of the GNU General Public License as published by -#the Free Software Foundation; either version 3 of the License, -#or (at your option) any later version. -# -#This program is distributed in the hope that it will be useful, -#but WITHOUT ANY WARRANTY; without even the implied warranty of -#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -#See the GNU General Public License for more details. -# -#You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -### from __future__ import with_statement -import os -from os.path import join -from os.path import abspath + +import GifImagePlugin +import Image +import JpegImagePlugin +import PngImagePlugin +import TiffImagePlugin import logging +import os import subprocess #import tempfile -import Image -import TiffImagePlugin -import PngImagePlugin -import GifImagePlugin -import JpegImagePlugin +from os.path import abspath, join class OCR(object): @@ -82,7 +65,7 @@ class OCR(object): self.image.save(tmp.name, 'TIFF') if os.name == "nt": - tessparams = [join(pypath,"tesseract","tesseract.exe")] + tessparams = [join(pypath, "tesseract", "tesseract.exe")] else: tessparams = ["tesseract"] @@ -318,14 +301,3 @@ class OCR(object): return result else: self.result_captcha = result - - -if __name__ == '__main__': - ocr = OCR() - ocr.load_image("B.jpg") - ocr.to_greyscale() - ocr.eval_black_white(140) - ocr.derotate_by_average() - ocr.run_tesser() - print "Tesseract", ocr.result_captcha - ocr.image.save("derotated.jpg") -- cgit v1.2.3 From 2a5ff9ce8b025336cccdd7dde1260a7255efc683 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Sun, 5 Oct 2014 17:06:22 +0200 Subject: Fix pillow import header --- module/plugins/captcha/captcha.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index cc07f50cf..7d6ff264f 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -2,11 +2,11 @@ from __future__ import with_statement -import GifImagePlugin -import Image -import JpegImagePlugin -import PngImagePlugin -import TiffImagePlugin +try: + from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin +except ImportError: + import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin + import logging import os import subprocess -- cgit v1.2.3 From b0868ae6446078bacf1635dde5e4ab316b4a94cb Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Tue, 7 Oct 2014 18:57:59 +0200 Subject: New __authors__ key replaces __author_name__ and __author_mail__ + Whitespaces and EOF fixup --- module/plugins/captcha/captcha.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 7d6ff264f..17ba0f12f 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -21,8 +21,7 @@ class OCR(object): __version__ = "0.1" __description__ = """OCR base plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" + __authors__ = [("pyLoad Team", "admin@pyload.org")] def __init__(self): -- cgit v1.2.3 From ae7a7e66981456e5bbe2b54006d79b6f907be7a4 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Wed, 8 Oct 2014 20:18:13 +0200 Subject: Add __license__ key attribute to plugins --- module/plugins/captcha/captcha.py | 1 + 1 file changed, 1 insertion(+) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 17ba0f12f..b3d4bd57e 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -21,6 +21,7 @@ class OCR(object): __version__ = "0.1" __description__ = """OCR base plugin""" + __license__ = "GPLv3" __authors__ = [("pyLoad Team", "admin@pyload.org")] -- cgit v1.2.3 From 18836967d39d0b6e6f2aeea4e6aece605246a2bf Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Fri, 17 Oct 2014 20:55:00 +0200 Subject: Spare code cosmetics --- module/plugins/captcha/captcha.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index b3d4bd57e..fb85d8996 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -51,28 +51,25 @@ class OCR(object): self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): - #self.logger.debug("create tmp tif") + #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") + tmpTif.close() - #tmp = tempfile.NamedTemporaryFile(suffix=".tif") - tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") - tmp.close() - #self.logger.debug("create tmp txt") #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") tmpTxt.close() self.logger.debug("save tiff") - self.image.save(tmp.name, 'TIFF') + self.image.save(tmpTif.name, 'TIFF') if os.name == "nt": tessparams = [join(pypath, "tesseract", "tesseract.exe")] else: tessparams = ["tesseract"] - tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] ) + tessparams.extend( [abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] ) if subset and (digits or lowercase or uppercase): - #self.logger.debug("create temp subset config") #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") tmpSub.write("tessedit_char_whitelist ") @@ -99,7 +96,7 @@ class OCR(object): self.logger.debug(self.result_captcha) try: - os.remove(tmp.name) + os.remove(tmpTif.name) os.remove(tmpTxt.name) if subset and (digits or lowercase or uppercase): os.remove(tmpSub.name) -- cgit v1.2.3 From 0eb6e7ec4a1144dcca824d8add049787d3da1762 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Wed, 22 Oct 2014 19:44:59 +0200 Subject: Two space before function declaration --- module/plugins/captcha/captcha.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index fb85d8996..dda6fed02 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -28,18 +28,22 @@ class OCR(object): def __init__(self): self.logger = logging.getLogger("log") + def load_image(self, image): self.image = Image.open(image) self.pixels = self.image.load() self.result_captcha = '' + def unload(self): """delete all tmp images""" pass + def threshold(self, value): self.image = self.image.point(lambda a: a * value + 10) + def run(self, command): """Run a command""" @@ -50,6 +54,7 @@ class OCR(object): popen.stderr.close() self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") @@ -103,15 +108,18 @@ class OCR(object): except: pass + def get_captcha(self, name): raise NotImplementedError + def to_greyscale(self): if self.image.mode != 'L': self.image = self.image.convert('L') self.pixels = self.image.load() + def eval_black_white(self, limit): self.pixels = self.image.load() w, h = self.image.size @@ -122,6 +130,7 @@ class OCR(object): else: self.pixels[x, y] = 0 + def clean(self, allowed): pixels = self.pixels @@ -167,6 +176,7 @@ class OCR(object): self.pixels = pixels + def derotate_by_average(self): """rotate by checking each angle and guess most suitable""" @@ -241,6 +251,7 @@ class OCR(object): self.pixels = pixels + def split_captcha_letters(self): captcha = self.image started = False @@ -280,6 +291,7 @@ class OCR(object): return letters + def correct(self, values, var=None): if var: result = var -- cgit v1.2.3 From 34984dae733c3f3d47b41a0acfba3724d53c65a1 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Tue, 28 Oct 2014 16:52:10 +0100 Subject: Code cosmetics: plugin class attributes --- module/plugins/captcha/captcha.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index dda6fed02..93c8164c6 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -16,13 +16,13 @@ from os.path import abspath, join class OCR(object): - __name__ = "OCR" - __type__ = "ocr" + __name__ = "OCR" + __type__ = "ocr" __version__ = "0.1" __description__ = """OCR base plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] def __init__(self): -- cgit v1.2.3 From bd8259220ab4d56ab419b7b32045b08cc9b0a7c8 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Sun, 9 Nov 2014 03:08:19 +0100 Subject: Use with statement instead open method when accessing fod + handle i/o error --- module/plugins/captcha/captcha.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 93c8164c6..e0cd7d31c 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -57,12 +57,17 @@ class OCR(object): def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") - tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") - tmpTif.close() - - #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") - tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") - tmpTxt.close() + try: + tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") + tmpTif.close() + + #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") + tmpTxt.close() + + except IOError, e: + self.logError(str(e)) + return self.logger.debug("save tiff") self.image.save(tmpTif.name, 'TIFF') -- cgit v1.2.3 From 59f72bfc5ed721c80c821bd0ca1bc8daf0d49880 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Sun, 9 Nov 2014 03:12:41 +0100 Subject: Code cosmetics --- module/plugins/captcha/captcha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index e0cd7d31c..418c11ffe 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -64,7 +64,7 @@ class OCR(object): #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") tmpTxt.close() - + except IOError, e: self.logError(str(e)) return -- cgit v1.2.3 From c9e31d875d32de31e54959b82bc35eff2b3e0f3f Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Mon, 10 Nov 2014 00:19:51 +0100 Subject: Code cosmetics --- module/plugins/captcha/captcha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 418c11ffe..b67ce9b9e 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -66,7 +66,7 @@ class OCR(object): tmpTxt.close() except IOError, e: - self.logError(str(e)) + self.logError(e) return self.logger.debug("save tiff") -- cgit v1.2.3 From 6151e81fa0b325dffda3da4228d5821e73db3ef3 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Tue, 9 Dec 2014 01:19:46 +0100 Subject: Fix __version__ format in some plugins --- module/plugins/captcha/captcha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index b67ce9b9e..5dcde33fb 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -18,7 +18,7 @@ from os.path import abspath, join class OCR(object): __name__ = "OCR" __type__ = "ocr" - __version__ = "0.1" + __version__ = "0.10" __description__ = """OCR base plugin""" __license__ = "GPLv3" -- cgit v1.2.3 From 4d578cb15f3d6edd036e438e504739b97660f93e Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Tue, 9 Dec 2014 16:58:35 +0100 Subject: Spare code cosmetics --- module/plugins/captcha/captcha.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'module/plugins/captcha/captcha.py') diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 5dcde33fb..0f233ec00 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -81,18 +81,19 @@ class OCR(object): if subset and (digits or lowercase or uppercase): #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") - tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") - tmpSub.write("tessedit_char_whitelist ") - if digits: - tmpSub.write("0123456789") - if lowercase: - tmpSub.write("abcdefghijklmnopqrstuvwxyz") - if uppercase: - tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") - tmpSub.write("\n") - tessparams.append("nobatch") - tessparams.append(abspath(tmpSub.name)) - tmpSub.close() + with open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub: + tmpSub.write("tessedit_char_whitelist ") + + if digits: + tmpSub.write("0123456789") + if lowercase: + tmpSub.write("abcdefghijklmnopqrstuvwxyz") + if uppercase: + tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + + tmpSub.write("\n") + tessparams.append("nobatch") + tessparams.append(abspath(tmpSub.name)) self.logger.debug("run tesseract") self.run(tessparams) -- cgit v1.2.3