diff options
-rw-r--r-- | module/Api.py | 9 | ||||
-rw-r--r-- | module/PluginThread.py | 17 | ||||
-rw-r--r-- | module/ThreadManager.py | 35 | ||||
-rw-r--r-- | module/common/packagetools.py | 46 | ||||
-rw-r--r-- | module/plugins/hoster/FileserveCom.py | 30 | ||||
-rwxr-xr-x | pyLoadCli.py | 33 |
6 files changed, 121 insertions, 49 deletions
diff --git a/module/Api.py b/module/Api.py index dbdd2b04c..1d8d4d77e 100644 --- a/module/Api.py +++ b/module/Api.py @@ -285,7 +285,14 @@ class Api(Iface): return self.core.threadManager.createResultThread(data) def pollResults(self, rid): - pass + """ Polls the result available for ResultID + :param rid: + :return: + """ + self.core.threadManager.timestamp = time() + 5 * 60 + + return self.core.threadManager.getInfoResult(rid) + def generatePackages(self, links): """ Parses links, generates packages names only from urls diff --git a/module/PluginThread.py b/module/PluginThread.py index a44981c52..5492f3ec4 100644 --- a/module/PluginThread.py +++ b/module/PluginThread.py @@ -407,7 +407,6 @@ class HookThread(PluginThread): class InfoThread(PluginThread): - #---------------------------------------------------------------------- def __init__(self, manager, data, pid=-1, rid=-1): """Constructor""" PluginThread.__init__(self, manager) @@ -422,7 +421,6 @@ class InfoThread(PluginThread): self.start() - #---------------------------------------------------------------------- def run(self): """run method""" @@ -457,9 +455,16 @@ class InfoThread(PluginThread): else: #generate default result - pass + tmp = [(url, (url, OnlineStatus(url, pluginname, 3, 0))) for url in urls] + result = parseNames(tmp) + for k in result.iterkeys(): + result[k] = dict(result[k]) + + self.m.setInfoResults(self.rid, result) + + self.m.infoResults[self.rid]["ALL_INFO_FETCHED"] = {} - self.m.infoResults[self.rid]["ALL_INFO_FETCHED"] = [] + self.m.timestamp = time() + 5 * 60 def updateDB(self, plugin, result): @@ -471,7 +476,7 @@ class InfoThread(PluginThread): self.cache.extend(result) - if len(self.cache) > 20 or force: + if len(self.cache) >= 20 or force: #used for package generating tmp = [(name, (url, OnlineStatus(name, plugin, status, int(size)))) for name, size, status, url in self.cache] @@ -480,7 +485,7 @@ class InfoThread(PluginThread): for k in result.iterkeys(): result[k] = dict(result[k]) - print result + self.m.setInfoResults(self.rid, result) self.cache = [] diff --git a/module/ThreadManager.py b/module/ThreadManager.py index 0ee59b427..ba75764c5 100644 --- a/module/ThreadManager.py +++ b/module/ThreadManager.py @@ -21,8 +21,8 @@ from os.path import exists, join import re from subprocess import Popen -from threading import Event -from time import sleep +from threading import Event, Lock +from time import sleep, time from traceback import print_exc from random import choice @@ -31,7 +31,7 @@ import pycurl import PluginThread from module.PyFile import PyFile from module.network.RequestFactory import getURL -from module.utils import freeSpace +from module.utils import freeSpace, lock class ThreadManager: @@ -52,6 +52,8 @@ class ThreadManager: self.reconnecting.clear() self.downloaded = 0 #number of files downloaded since last cleanup + self.lock = Lock() + # some operations require to fetch url info from hoster, so we caching them so it wont be done twice # contains a timestamp and will be purged after timeout self.infoCache = {} @@ -76,17 +78,19 @@ class ThreadManager: thread = PluginThread.DownloadThread(self) self.threads.append(thread) - def createInfoThread(self, data, pid): """ start a thread whichs fetches online status and other infos data = [ .. () .. ] """ + self.timestamp = time() + 5 * 60 PluginThread.InfoThread(self, data, pid) + @lock def createResultThread(self, data): """ creates a thread to fetch online status, returns result id """ + self.timestamp = time() + 5 * 60 rid = self.resultIDs self.resultIDs += 1 @@ -96,6 +100,24 @@ class ThreadManager: return rid + @lock + def getInfoResult(self, rid): + """returns result and clears it""" + if rid in self.infoResults: + data = self.infoResults[rid] + self.infoResults[rid] = {} + return data + else: + return {} + + @lock + def setInfoResults(self, rid, result): + for k, v in result.iteritems(): + if k in self.infoResults[rid]: + self.infoResults[rid][k].update(v) + else: + self.infoResults[rid][k] = v + def downloadingIds(self): """get a list of the currently downloading pyfile's ids""" return [x.active.id for x in self.threads if x.active and isinstance(x.active, PyFile)] @@ -129,6 +151,11 @@ class ThreadManager: self.assignJob() #it may be failed non critical so we try it again + if (self.infoCache or self.infoResults) and self.timestamp < time(): + self.log.debug("Cleared Result cache") + self.infoCache.clear() + self.infoResults.clear() + #---------------------------------------------------------------------- def tryReconnect(self): """checks if reconnect needed""" diff --git a/module/common/packagetools.py b/module/common/packagetools.py index 4682b0dc1..6b37c0198 100644 --- a/module/common/packagetools.py +++ b/module/common/packagetools.py @@ -50,8 +50,9 @@ def parseNames(files): pat3 = re.compile("(.+)[\\.\\-_]+$") pat4 = re.compile("(.+)\\.\\d+\\.xtm$") - for file, url in files: + patternMatch = False + # remove trailing / name = file.rstrip('/') @@ -71,22 +72,26 @@ def parseNames(files): # if found: continue # unrar pattern, 7zip/zip and hjmerge pattern, isz pattern, FFSJ pattern + before = name name = matchFirst(name, rarPats, zipPats, iszPats, ffsjPats) + if before != name: + patternMatch = True # xtremsplit pattern r = pat4.search(name) if r is not None: name = r.group(1) - # remove part and cd pattern r = pat1.search(name) if r is not None: name = name.replace(r.group(0), "") + patternMatch = True r = pat2.search(name) if r is not None: name = name.replace(r.group(0), "") + patternMatch = True # remove extension index = name.rfind(".") @@ -108,28 +113,34 @@ def parseNames(files): name = name.strip() - # checks if name could be a hash - if file.find("file/" + name) >= 0: - name = "" + # special checks if no extension pattern matched + if patternMatch is False: + # checks if name could be a hash + if file.find("file/" + name) >= 0: + name = "" - if file.find("files/" + name) >= 0: - name = "" + if file.find("files/" + name) >= 0: + name = "" - r = re.search("^[0-9]+$", name, re.I) - if r is not None: - name = "" + r = re.search("^[0-9]+$", name, re.I) + if r is not None: + name = "" - r = re.search("^[0-9a-z]+$", name, re.I) - if r is not None: - r1 = re.search("[0-9]+.+[0-9]", name) - r2 = re.search("[a-z]+.+[a-z]+", name, re.I) - if r1 is not None and r2 is not None: + r = re.search("^[0-9a-z]+$", name, re.I) + if r is not None: + r1 = re.search("[0-9]+.+[0-9]", name) + r2 = re.search("[a-z]+.+[a-z]+", name, re.I) + if r1 is not None and r2 is not None: + name = "" + + path = urlparse(file).path + if path == "/" + name or path == "/" + name + ".htm": name = "" # fallback: package by hoster if not name: name = urlparse(file).hostname - if name: name = name.replace("ww.", "") + if name: name = name.replace("www.", "") # fallback : default name if not name: @@ -145,7 +156,6 @@ def parseNames(files): if __name__ == "__main__": - from os.path import join from pprint import pprint @@ -159,4 +169,4 @@ if __name__ == "__main__": pprint(packs) - print "Got %d urls." % sum([len(x) for x in packs.itervalues()])
\ No newline at end of file + print "Got %d urls." % sum([len(x) for x in packs.itervalues()]) diff --git a/module/plugins/hoster/FileserveCom.py b/module/plugins/hoster/FileserveCom.py index ba319ee2b..6cd842941 100644 --- a/module/plugins/hoster/FileserveCom.py +++ b/module/plugins/hoster/FileserveCom.py @@ -7,6 +7,7 @@ from module.plugins.Hoster import Hoster from module.plugins.ReCaptcha import ReCaptcha
from module.network.RequestFactory import getURL
+from module.utils import parseFileSize
try:
from json import loads as json_loads
@@ -14,26 +15,17 @@ except ImportError: # pragma: no cover from module.lib.simplejson import loads as json_loads
def getInfo(urls):
- result = []
-
- for url in urls:
- # Get html
- html = getURL(url)
- if re.search(r'<h1>File not available</h1>', html):
- result.append((url, 0, 1, url))
- continue
+ reg = r"<td>(http://(?:www\.)?fileserve\.com/file/.+(?:[\r\n\t]+)?)</td>[\r\n\t ]+<td>(.*?)</td>[\r\n\t ]+<td>(.*?)</td>[\r\n\t ]+<td>(Available|Not available)(?:\ )?(?:<img|</td>)"
+ url = "http://fileserve.com/link-checker.php"
- # Name
- name = re.search('<h1>(.*?)<br/></h1>', html).group(1)
+ #get all at once, shows strange behavior otherwise
+ html = getURL(url, post={"submit": "Check Urls", "urls": "\n".join(urls)}, decode=True)
- # Size
- m = re.search(r"<strong>(.*?) (KB|MB|GB)</strong>", html)
- units = float(m.group(1))
- pow = {'KB': 1, 'MB': 2, 'GB': 3}[m.group(2)]
- size = int(units * 1024 ** pow)
+ match = re.findall(reg, html, re.IGNORECASE + re.MULTILINE)
- # Return info
- result.append((name, size, 2, url))
+ result = []
+ for url, name, size, status in match:
+ result.append((name, parseFileSize(size), 1 if status == "Not available" else 2, url))
yield result
@@ -42,13 +34,13 @@ class FileserveCom(Hoster): __name__ = "FileserveCom"
__type__ = "hoster"
__pattern__ = r"http://(www\.)?fileserve\.com/file/[a-zA-Z0-9]+"
- __version__ = "0.4"
+ __version__ = "0.41"
__description__ = """Fileserve.Com File Download Hoster"""
__author_name__ = ("jeix", "mkaay", "paul king")
__author_mail__ = ("jeix@hasnomail.de", "mkaay@mkaay.de", "")
FILE_ID_KEY = r"fileserve\.com/file/(?P<id>\w+)"
- FILE_CHECK_KEY = r"<td>http://www.fileserve.com/file/(?P<id>\w+)</td>.*?<td>(?P<name>.*?)</td>.*?<td>(?P<units>.*?) (?P<scale>.B)</td>.*?<td>(?P<online>.*?)</td>"
+ FILE_CHECK_KEY = r"<td>http://www.fileserve\.com/file/(?P<id>\w+)</td>.*?<td>(?P<name>.*?)</td>.*?<td>(?P<units>.*?) (?P<scale>.B)</td>.*?<td>(?P<online>.*?)</td>"
CAPTCHA_KEY_PATTERN = r"var reCAPTCHA_publickey='(?P<key>.*?)';"
LONG_WAIT_PATTERN = r"You need to wait (\d+) seconds to start another download"
diff --git a/pyLoadCli.py b/pyLoadCli.py index db5952104..a59832c73 100755 --- a/pyLoadCli.py +++ b/pyLoadCli.py @@ -23,7 +23,7 @@ from getopt import GetoptError, getopt import gettext import os from os import _exit -from os.path import join, exists +from os.path import join, exists, abspath import sys from sys import exit from threading import Thread, Lock @@ -41,6 +41,10 @@ else: sys.stdout = getwriter(enc)(sys.stdout, errors = "replace") +#original working dir +OWD = abspath("") + + from module import InitHomeDir from module.cli.printer import * from module.cli import AddPackage, ManageFiles @@ -311,6 +315,32 @@ class Cli: pack = self.client.getPackageInfo(int(pid)) self.client.movePackage((pack.dest + 1) % 2, pack.pid) + elif command == "check": + links = [] + for url in args: + if exists(join(OWD, url)): + f = open(join(OWD, url), "rb") + links.extend([x.strip() for x in f.readlines() if x.strip()]) + else: + links.append(url) + + print _("Checking %d links:") % len(links) + print + rid = client.checkOnlineStatus(links) + while True: + sleep(1) + result = client.pollResults(rid) + for pack in result.itervalues(): + for url, status in pack.iteritems(): + if status.status == 2: check = "Online" + elif status.status == 1: check = "Offline" + else: check = "Unknown" + + print "%-30s: %-30s %-8s\t %s" % (url, status.name, formatSize(status.size), check) + + if "ALL_INFO_FETCHED" in result: break + + elif command == "pause": self.client.pause() @@ -416,6 +446,7 @@ def print_commands(): ("move <pid> <pid2>...", _("Move Packages from Queue to Collector or vice versa")), ("restart_file <fid> <fid2>...", _("Restart files")), ("restart_package <pid> <pid2>...", _("Restart packages")), + ("check <linklist|url> ...", _("Check online status")), ("pause", _("Pause the server")), ("unpause", _("continue downloads")), ("toggle", _("Toggle pause/unpause")), |