summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Plugins/GigasizeCom.py74
-rw-r--r--Plugins/Plugin.py20
-rw-r--r--captcha/captcha.py81
-rw-r--r--module/download_thread.py11
4 files changed, 153 insertions, 33 deletions
diff --git a/Plugins/GigasizeCom.py b/Plugins/GigasizeCom.py
new file mode 100644
index 000000000..ef39cc3cc
--- /dev/null
+++ b/Plugins/GigasizeCom.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import tempfile
+
+from Plugin import Plugin
+
+class GigasizeCom(Plugin):
+
+ def __init__(self, parent):
+ Plugin.__init__(self, parent)
+ props = {}
+ props['name'] = "GigasizeCom"
+ props['type'] = "hoster"
+ props['pattern'] = r"(?:http://)?(?:www.)?gigasize.com/get.php\?d="
+ props['version'] = "0.1"
+ props['description'] = """Gigasize.com Download Plugin"""
+ props['author_name'] = ("spoob")
+ props['author_mail'] = ("spoob@pyload.org")
+ self.props = props
+ self.parent = parent
+ self.html = [None, None]
+ self.want_reconnect = False
+ self.init_ocr()
+ self.multi_dl = False
+
+ def download_html(self):
+ url = self.parent.url
+ self.html[0] = self.req.load(url, cookies=True)
+
+ captcha_image = tempfile.NamedTemporaryFile().name + ".jpg"
+ self.req.download("http://www.gigasize.com/randomImage.php", captcha_image, cookies=True)
+ captcha = self.ocr.get_captcha(captcha_image)
+
+ os.remove(captcha_image)
+ print captcha
+
+ self.html[1] = self.req.load(file_server_url, None, {"txtNumber": captcha}, cookies=True)
+
+ def get_file_url(self):
+ """ returns the absolute downloadable filepath
+ """
+ if self.html[0] == None:
+ self.download_html()
+ if not self.want_reconnect:
+ file_url_pattern = r"<form action=\"(/getcgi.php\?t=.*)\" method=\"post\" id=\"formDownload\">"
+ return "http://gigazise.com" + re.search(file_url_pattern, self.html[1]).group(1)
+ else:
+ return False
+
+ def get_file_name(self):
+ if self.html[0] == None:
+ self.download_html()
+ if not self.want_reconnect:
+ file_name_pattern = "<p><strong>Name</strong>: <b>(.*)</b></p>"
+ return re.search(file_name_pattern, self.html[0]).group(1)
+ else:
+ return self.parent.url
+
+ def file_exists(self):
+ """ returns True or False
+ """
+ if self.html[0] == None:
+ self.download_html()
+ if re.search(r"HTTP Status 404", self.html) != None:
+ return False
+ else:
+ return True
+
+ def proceed(self, url, location):
+
+ self.req.download(url, location, cookies=True) \ No newline at end of file
diff --git a/Plugins/Plugin.py b/Plugins/Plugin.py
index f43172b55..ba3ee29b3 100644
--- a/Plugins/Plugin.py
+++ b/Plugins/Plugin.py
@@ -1,6 +1,22 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-
+#
+#Copyright (C) 2009 kingzero, RaNaN
+#
+#This program is free software; you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License,
+#or (at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#See the GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+###
import ConfigParser
import re
@@ -76,7 +92,7 @@ class Plugin():
self.config[option] = False if self.config[option].lower() == 'false' else self.config[option]
def init_ocr(self):
- modul = __import__(self.props['name'], fromlist=['plugins'])
+ modul = __import__("captcha."+self.props['name'], fromlist=['captcha'])
captchaClass = getattr(modul, self.props['name'])
self.ocr = captchaClass()
diff --git a/captcha/captcha.py b/captcha/captcha.py
index de3e61cf0..a76a7aa25 100644
--- a/captcha/captcha.py
+++ b/captcha/captcha.py
@@ -1,6 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#Copyright (C) 2009 kingzero, RaNaN
+#
+#This program is free software; you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License,
+#or (at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#See the GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+###
+import subprocess
+import tempfile
+
import Image
import ImageOps
-import subprocess
class OCR(object):
def __init__(self):
@@ -9,28 +30,37 @@ class OCR(object):
def load_image(self, image):
self.image = Image.open(image)
self.pixels = self.image.load()
- self.image_name = 'captcha_clean.png'
self.result_captcha = ''
- def unload():
+ def unload(self):
"""delete all tmp images"""
pass
def threshold(self, value):
- self.image = self.image.point(lambda a: a * value +10)
+ self.image = self.image.point(lambda a: a * value + 10)
+
+ def run(self, command, inputdata=None):
+ """Run a command and return standard output"""
+ pipe = subprocess.PIPE
+ popen = subprocess.Popen(command, stdout=pipe, stderr=pipe)
+ outputdata, errdata = popen.communicate(inputdata)
+ assert (popen.returncode == 0), \
+ "Error running: %s\n\n%s" % (command, errdata)
+ return outputdata
def run_gocr(self):
- self.image.save(self.image_name)
- cmd = ['gocr', self.image_name]
- self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','')
+ tmp = tempfile.NamedTemporaryFile(suffix=".jpg")
+ self.image.save(tmp)
+ self.result_captcha = self.run(['gocr', tmp.name]).replace("\n", "")
def run_tesser(self):
- self.image.save('captcha.tif', 'TIFF')
- cmd = ['tesseract', 'captcha.tif', '0']
- self.result_captcha = subprocess.Popen(cmd)
- self.result_captcha.wait()
- cmd = ['cat', '0.txt']
- self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','')
+ tmp = tempfile.NamedTemporaryFile(suffix=".tif")
+ tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+
+ self.image.save(tmp.name, 'TIFF')
+ self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", "")])
+
+ self.result_captcha = self.run(['cat', tmpTxt.name])
def get_captcha(self):
raise NotImplementedError
@@ -49,29 +79,28 @@ class OCR(object):
for x in xrange(w):
for y in xrange(h):
- # no point in processing white pixels since we only want to remove black pixels
if pixels[x, y] == 255: continue
-
+ # no point in processing white pixels since we only want to remove black pixel
count = 0
try:
if pixels[x-1, y-1] != 255: count += 1
- if pixels[x-1, y ] != 255: count += 1
- if pixels[x-1, y+1] != 255: count += 1
- if pixels[x, y+1 ] != 255: count += 1
- if pixels[x+1, y+1] != 255: count += 1
- if pixels[x+1, y ] != 255: count += 1
- if pixels[x+1, y-1] != 255: count += 1
- if pixels[x, y-1 ] != 255: count += 1
+ if pixels[x-1, y] != 255: count += 1
+ if pixels[x-1, y + 1] != 255: count += 1
+ if pixels[x, y + 1] != 255: count += 1
+ if pixels[x + 1, y + 1] != 255: count += 1
+ if pixels[x + 1, y] != 255: count += 1
+ if pixels[x + 1, y-1] != 255: count += 1
+ if pixels[x, y-1] != 255: count += 1
except:
pass
- # not enough neighbors are dark pixels so mark this pixel
- # to be changed to white
+ # not enough neighbors are dark pixels so mark this pixel
+ # to be changed to white
if count < allowed:
pixels[x, y] = 1
-
- # second pass: this time set all 1's to 255 (white)
+
+ # second pass: this time set all 1's to 255 (white)
for x in xrange(w):
for y in xrange(h):
if pixels[x, y] == 1: pixels[x, y] = 255
diff --git a/module/download_thread.py b/module/download_thread.py
index a290bc4f7..f68cfedd7 100644
--- a/module/download_thread.py
+++ b/module/download_thread.py
@@ -17,9 +17,10 @@
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
###
-
import threading
-from time import time, sleep
+import traceback
+from time import sleep
+from time import time
class Status(object):
@@ -45,7 +46,7 @@ class Status(object):
return self.pyfile.plugin.req.dl_size / 1024
def percent(self):
if not self.kB_left() == 0 and not self.size() == 0:
- return ((self.size()-self.kB_left())*100)/self.size()
+ return ((self.size()-self.kB_left()) * 100) / self.size()
return 0
class Download_Thread(threading.Thread):
@@ -65,7 +66,7 @@ class Download_Thread(threading.Thread):
try:
self.download(self.loadedPyFile)
except Exception, e:
- print "Error:", e #catch up all error here
+ traceback.print_exc()
self.loadedPyFile.status.type = "failed"
finally:
self.parent.job_finished(self.loadedPyFile)
@@ -79,7 +80,7 @@ class Download_Thread(threading.Thread):
pyfile.prepareDownload()
if not status.exists:
- raise "FileDontExists" #i know its deprecated, who cares^^
+ raise "FileDontExists", "The file was not found on the server." #i know its deprecated, who cares^^
status.type = "waiting"