diff options
author | fragonib <devnull@localhost> | 2011-04-24 17:40:11 +0200 |
---|---|---|
committer | fragonib <devnull@localhost> | 2011-04-24 17:40:11 +0200 |
commit | 25ab9093d84cbeb86f7b626d7251f4a0180b32bf (patch) | |
tree | 0a064a60a8a85a3bc4c09c5f6031a303537fb6f4 /module/plugins | |
parent | BitshareCom: Fix i18n issues (diff) | |
download | pyload-25ab9093d84cbeb86f7b626d7251f4a0180b32bf.tar.xz |
UploadStation & BitshareCom: Improved debugging
Diffstat (limited to 'module/plugins')
-rw-r--r-- | module/plugins/hoster/BitshareCom.py | 103 | ||||
-rw-r--r-- | module/plugins/hoster/UploadStationCom.py | 117 |
2 files changed, 127 insertions, 93 deletions
diff --git a/module/plugins/hoster/BitshareCom.py b/module/plugins/hoster/BitshareCom.py index f44a89e23..496071fa3 100644 --- a/module/plugins/hoster/BitshareCom.py +++ b/module/plugins/hoster/BitshareCom.py @@ -10,6 +10,7 @@ from module.plugins.Hoster import Hoster from module.plugins.ReCaptcha import ReCaptcha from module.network.RequestFactory import getURL +from wx.lib.analogclock.helpers import Hand def unicode2str(unitext): return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore') @@ -19,20 +20,22 @@ def getInfo(urls): for url in urls: - # Get html + # Get file info html + # TODO: Force responses in english language html = getURL(url) - if re.search(BitshareCom.OFFLINE_PATTERN, html): + if re.search(BitshareCom.FILE_OFFLINE_PATTERN, html): result.append((url, 0, 1, url)) # Name name1 = re.search(BitshareCom.__pattern__, url).group('name') m = re.search(BitshareCom.FILE_INFO_PATTERN, html) name2 = m.group('name') - name = unicode2str(max(name1, name2)) + name = unicode2str(max(name1, name2)) # Unicode BUG workaround # Size value = float(m.group('size')) - pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[m.group('units')] + units = m.group('units') + pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] size = int(value*1024**pow) # Return info @@ -44,12 +47,14 @@ class BitshareCom(Hoster): __name__ = "BitshareCom" __type__ = "hoster" __pattern__ = r"http://(www\.)?bitshare\.com/(files/(?P<id1>[a-zA-Z0-9]+)(/(?P<name>.*?)\.html)?|\?f=(?P<id2>[a-zA-Z0-9]+))" - __version__ = "0.2" + __version__ = "0.3" __description__ = """Bitshare.Com File Download Hoster""" - __author_name__ = ("paul", "king") + __author_name__ = ("paul", "king", "fragonib") - OFFLINE_PATTERN = r'''(>We are sorry, but the requested file was not found in our database|>Error - File not available<|The file was deleted either by the uploader, inactivity or due to copyright claim)''' + FILE_OFFLINE_PATTERN = r'''(>We are sorry, but the requested file was not found in our database|>Error - File not available<|The file was deleted either by the uploader, inactivity or due to copyright claim)''' FILE_INFO_PATTERN = r'<h1>.*\s(?P<name>.+?)\s-\s(?P<size>\d+)\s(?P<units>..)yte</h1>' + FILE_AJAXID_PATTERN = r'var ajaxdl = "(.*?)";' + CAPTCHA_KEY_PATTERN = r"http://api\.recaptcha\.net/challenge\?k=(.*?) " def setup(self): self.multiDL = False @@ -57,74 +62,92 @@ class BitshareCom(Hoster): def process(self, pyfile): self.pyfile = pyfile + + # Force responses language self.req.cj.setCookie("bitshare.com", "language_selection", "EN") # File id m = re.match(self.__pattern__, self.pyfile.url) self.file_id = max(m.group('id1'), m.group('id2')) - - # File url - self.log.debug("%s: File_id is %s" % (self.__name__, self.file_id)) + self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id)) # Load main page self.html = self.load(self.pyfile.url, ref=False, utf8=True, cookies=True) # Check offline - if re.search(self.OFFLINE_PATTERN, self.html) is not None: + if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None: self.offline() # File name name1 = re.search(BitshareCom.__pattern__, self.pyfile.url).group('name') name2 = re.search(BitshareCom.FILE_INFO_PATTERN, self.html).group('name') - self.pyfile.name = unicode2str(max(name1, name2)) + self.pyfile.name = unicode2str(max(name1, name2)) # Unicode BUG workaround - self.ajaxid = re.search("var ajaxdl = \"(.*?)\";",self.html).group(1) - - self.log.debug("%s: AjaxId %s" % (self.__name__, self.ajaxid)) + # Ajax file id + self.ajaxid = re.search(BitshareCom.FILE_AJAXID_PATTERN, self.html).group(1) + self.log.debug("%s: File ajax id is [%s]" % (self.__name__, self.ajaxid)) + # Handle free downloading self.handleFree() def handleFree(self): - action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", + # Get download info + self.log.debug("%s: Getting download info" % (self.__name__)) + response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", post={"request" : "generateID", "ajaxid" : self.ajaxid}) - self.log.debug("%s: Result of generateID %s" % (self.__name__, action)) - parts = action.split(":") - - if parts[0] == "ERROR": - self.fail(parts[1]) - + self.handleErrors(response, ':') + parts = response.split(":") filetype = parts[0] wait = int(parts[1]) captcha = int(parts[2]) + self.log.debug("%s: Download info [type: '%s', waiting: %d, captcha: %d]" % + (self.__name__, filetype, wait, captcha)) + # Waiting if wait > 0: - self.log.info("%s: Waiting %d seconds." % (self.__name__, wait)) + self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait)) self.setWait(wait, True) self.wait() + # Resolve captcha if captcha == 1: - id = re.search(r"http://api\.recaptcha\.net/challenge\?k=(.*?) ", self.html).group(1) - self.log.debug("%s: ReCaptcha key %s" % (self.__name__, id)) - for i in range(3): # Try upto 3 times + self.log.debug("%s: File is captcha protected" % (self.__name__)) + id = re.search(BitshareCom.CAPTCHA_KEY_PATTERN, self.html).group(1) + # Try up to 3 times + for i in range(3): + self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1)) recaptcha = ReCaptcha(self) challenge, code = recaptcha.challenge(id) - action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", + response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", post={"request" : "validateCaptcha", "ajaxid" : self.ajaxid, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code}) - parts = action.split(":") - if parts[0] != "SUCCESS": - self.invalidCaptcha() - else: + if self.handleCaptchaErrors(response): break - action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", - post={"request" : "getDownloadURL", "ajaxid" : self.ajaxid}) - parts = action.split("#") - - if parts[0] == "ERROR": - self.fail(parts[1]) + # Get download URL + self.log.debug("%s: Getting download url" % (self.__name__)) + response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", + post={"request" : "getDownloadURL", "ajaxid" : self.ajaxid}) + self.handleErrors(response, '#') + url = response.split("#")[-1] - # this may either download our file or forward us to an error page - self.log.debug("%s: Download url %s" % (self.__name__, parts[1])) - dl = self.download(parts[1]) + # Request download URL + # This may either download our file or forward us to an error page + self.log.debug("%s: Downloading file with url [%s]" % (self.__name__, url)) + dl = self.download(url) + + def handleErrors(self, response, separator): + self.log.debug("%s: Checking response [%s]" % (self.__name__, response)) + if "ERROR" in response: + msg = response.split(separator)[-1] + self.fail(msg) + + def handleCaptchaErrors(self, response): + self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response)) + if "SUCCESS" in response: + return True + + self.log.debug("%s: Wrong captcha" % (self.__name__)) + self.invalidCaptcha() + diff --git a/module/plugins/hoster/UploadStationCom.py b/module/plugins/hoster/UploadStationCom.py index 2723ae2ef..19c2d078d 100644 --- a/module/plugins/hoster/UploadStationCom.py +++ b/module/plugins/hoster/UploadStationCom.py @@ -12,29 +12,28 @@ from module.plugins.ReCaptcha import ReCaptcha from module.network.RequestFactory import getURL
def unicode2str(unitext):
- return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore')
-
+ return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore')
+
def getInfo(urls):
result = []
for url in urls:
- # Get html
- html = getURL(url)
- pattern = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
- if re.search(pattern, html):
+ # Get file info html
+ html = getURL(url)
+ if re.search(UploadStationCom.FILE_OFFLINE_PATTERN, html):
result.append((url, 0, 1, url))
continue
# Name
- pattern = r'''<div class=\"download_item\">(.*?)</div>'''
- name = re.search(pattern, html).group(1)
+ name = re.search(UploadStationCom.FILE_TITLE_PATTERN, html).group(1)
+ name = unicode2str(name) # Unicode BUG workaround
# Size
- pattern = r'''<div><span>File size: <b>(.*?) (KB|MB|GB)</b>'''
- m = re.search(pattern, html)
+ m = re.search(UploadStationCom.FILE_SIZE_PATTERN, html)
value = float(m.group(1))
- pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[m.group(2)]
+ units = m.group(2)
+ pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units]
size = int(value*1024**pow)
# Return info
@@ -46,16 +45,27 @@ def getInfo(urls): class UploadStationCom(Hoster):
__name__ = "UploadStationCom"
__type__ = "hoster"
- __pattern__ = r"http://(www\.)?uploadstation\.com/file/[A-Za-z0-9]+"
- __version__ = "0.2"
+ __pattern__ = r"http://(www\.)?uploadstation\.com/file/(?P<id>[A-Za-z0-9]+)"
+ __version__ = "0.3"
__description__ = """UploadStation.Com File Download Hoster"""
__author_name__ = ("fragonib")
__author_mail__ = ("fragonib[AT]yahoo[DOT]es")
+
+ FILE_OFFLINE_PATTERN = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
+ FILE_TITLE_PATTERN = r'''<div class=\"download_item\">(.*?)</div>'''
+ FILE_SIZE_PATTERN = r'''<div><span>File size: <b>(.*?) (KB|MB|GB)</b>'''
+ CAPTCHA_PRESENT_TOKEN = '<div class="speedBox" id="showCaptcha" style="display:none;">'
+ CAPTCHA_KEY_PATTERN = r"var reCAPTCHA_publickey='(.*?)';"
+ CAPTCHA_WRONG_TOKEN = 'incorrect-captcha-sol'
+ WAITING_PATTERN = r".*?(\d+).*?"
+ TIME_LIMIT_TOKEN = '"fail":"timeLimit"'
+ TIME_LIMIT_WAIT_PATTERN = r"You need to wait (\d+) seconds to download next file."
+ DOWNLOAD_RESTRICTION_TOKEN = '"To remove download restriction, please choose your suitable plan as below</h1>"'
def setup(self):
self.multiDL = False
- self.fileId = re.search(r"uploadstation\.com/file/([a-zA-Z0-9]+)(http:.*)?", self.pyfile.url).group(1)
- self.pyfile.url = "http://www.uploadstation.com/file/" + self.fileId
+ self.fileId = ''
+ self.html = ''
def process(self, pyfile):
@@ -63,15 +73,14 @@ class UploadStationCom(Hoster): self.html = self.load(self.pyfile.url, ref=False, cookies=True, utf8=True)
# Is offline?
- pattern = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
- m = re.search(pattern, self.html)
+ m = re.search(UploadStationCom.FILE_OFFLINE_PATTERN, self.html)
if m is not None:
self.offline()
- # Title
- pattern = r'''<div class=\"download_item\">(.*?)</div>'''
- title = re.search(pattern, self.html).group(1)
- self.pyfile.name = unicode2str(title)
+ # Id & Title
+ self.fileId = re.search(self.__pattern__, self.pyfile.url).group('id')
+ title = re.search(UploadStationCom.FILE_TITLE_PATTERN, self.html).group(1)
+ self.pyfile.name = unicode2str(title) # Unicode BUG workaround
# Free account
self.handleFree()
@@ -79,81 +88,83 @@ class UploadStationCom(Hoster): def handleFree(self):
# Not needed yet
- #pattern = r'''\"(/landing/.*?/download_captcha\.js)\"'''
- #jsPage = re.search(pattern, self.html).group(1)
- #self.jsPage = self.load("http://uploadstation.com" + jsPage)
+ # pattern = r'''\"(/landing/.*?/download_captcha\.js)\"'''
+ # jsPage = re.search(pattern, self.html).group(1)
+ # self.jsPage = self.load("http://uploadstation.com" + jsPage)
# Check download
response = self.load(self.pyfile.url, post={"checkDownload" : "check"})
- if not '"success":"showCaptcha"' in response:
- self.handleErrors(response)
+ self.log.debug("%s: Checking download, response [%s]" % (self.__name__, response))
+ self.handleErrors(response)
# We got a captcha?
- if '<div class="speedBox" id="showCaptcha" style="display:none;">' in self.html:
- id = re.search(r"var reCAPTCHA_publickey='(.*?)';", self.html).group(1)
+ if UploadStationCom.CAPTCHA_PRESENT_TOKEN in self.html:
+ id = re.search(UploadStationCom.CAPTCHA_KEY_PATTERN, self.html).group(1)
+ self.log.debug("%s: Resolving ReCaptcha with key [%s]" % (self.__name__, id))
recaptcha = ReCaptcha(self)
challenge, code = recaptcha.challenge(id)
response = self.load('http://www.uploadstation.com/checkReCaptcha.php',
post={'recaptcha_challenge_field' : challenge,
'recaptcha_response_field' : code,
'recaptcha_shortencode_field' : self.fileId})
- if r'incorrect-captcha-sol' in response:
- self.handleCaptchaErrors(response)
+ self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response))
+ self.handleCaptchaErrors(response)
# Process waiting
- response = self.load(self.pyfile.url, post={"downloadLink":"wait"})
- m = re.search(r".*?(\d+).*?", response)
+ response = self.load(self.pyfile.url, post={"downloadLink" : "wait"})
+ m = re.search(UploadStationCom.WAITING_PATTERN, response)
if m is not None:
- wait = m.group(1)
- if wait == "404":
+ wait = int(m.group(1))
+ if wait == 404:
self.log.debug("No wait time returned")
self.fail("No wait time returned")
- else:
- self.setWait(int(wait))
+ self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))
+ self.setWait(wait + 3)
self.wait()
# Show download link
- self.load(self.pyfile.url, post={"downloadLink":"show"})
+ self.load(self.pyfile.url, post={"downloadLink" : "show"})
# This may either download our file or forward us to an error page
- dl = self.download(self.pyfile.url, post={"download":"normal"})
+ self.log.debug("%s: Downloading file." % (self.__name__))
+ dl = self.download(self.pyfile.url, post={"download" : "normal"})
self.handleDownloadedFile()
def handleErrors(self, response):
- text = '"fail":"timeLimit"'
- if text in response:
+ if UploadStationCom.TIME_LIMIT_TOKEN in response:
wait = 300
html = self.load(self.pyfile.url, post={"checkDownload" : "showError", "errorType" : "timeLimit"})
- m = re.search(r"You need to wait (\d+) seconds to download next file.", html)
+ m = re.search(UploadStationCom.TIME_LIMIT_WAIT_PATTERN, html)
if m is not None:
wait = int(m.group(1))
+ self.log.info("%s: Time limit reached, waiting %d seconds." % (self.__name__, wait))
self.setWait(wait, True)
self.wait()
self.retry()
- text = '"To remove download restriction, please choose your suitable plan as below</h1>"'
- if text in response:
+ if UploadStationCom.DOWNLOAD_RESTRICTION_TOKEN in response:
wait = 720
+ self.log.info("%s: Free account time limit reached, waiting %d seconds." % (self.__name__, wait))
self.setWait(wait, True)
self.wait()
self.retry()
def handleCaptchaErrors(self, response):
- self.invalidCaptcha()
- self.retry()
+ if UploadStationCom.CAPTCHA_WRONG_TOKEN in response:
+ self.log.info("%s: Invalid captcha response, retrying." % (self.__name__))
+ self.invalidCaptcha()
+ self.retry()
def handleDownloadedFile(self):
- check = self.checkDownload({"wait": re.compile(r'You need to wait (\d+) seconds to download next file.')})
-
+ check = self.checkDownload({"wait": re.compile(UploadStationCom.TIME_LIMIT_WAIT_PATTERN)})
if check == "wait":
- wait_time = 720
+ wait = 720
if self.lastCheck is not None:
- wait_time = int(self.lastCheck.group(1))
- self.setWait(wait_time+3)
- self.log.debug("%s: You need to wait %d seconds for another download." % (self.__name__, wait_time))
- self.wantReconnect = True
+ wait = int(self.lastCheck.group(1))
+ self.log.debug("%s: Failed, you need to wait %d seconds for another download." % (self.__name__, wait))
+ self.setWait(wait + 3, True)
self.wait()
- self.retry()
\ No newline at end of file + self.retry()
\ No newline at end of file |