diff options
30 files changed, 2159 insertions, 3734 deletions
diff --git a/module/AccountManager.py b/module/AccountManager.py new file mode 100644 index 000000000..ff3db5a47 --- /dev/null +++ b/module/AccountManager.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +######################################################################## +class AccountManager(): + """manages all accounts""" + + #---------------------------------------------------------------------- + def __init__(self, core): + """Constructor""" + + self.core = core + + self.accounts = {} # key = (plugin, accountname) + + self.loadAccounts() + + #---------------------------------------------------------------------- + def getAccount(self, plugin): + """get account instance for plugin or None if anonymous""" + #@TODO ... + return None + + + #---------------------------------------------------------------------- + def loadAccounts(self): + """loads all accounts available""" + pass + + #---------------------------------------------------------------------- + def saveAccount(self): + """save all account information""" + pass + +
\ No newline at end of file diff --git a/module/BeautifulSoup.py b/module/BeautifulSoup.py deleted file mode 100644 index 34204e740..000000000 --- a/module/BeautifulSoup.py +++ /dev/null @@ -1,2000 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup parses a (possibly invalid) XML or HTML document into a -tree representation. It provides methods and Pythonic idioms that make -it easy to navigate, search, and modify the tree. - -A well-formed XML/HTML document yields a well-formed data -structure. An ill-formed XML/HTML document yields a correspondingly -ill-formed data structure. If your document is only locally -well-formed, you can use this library to find and process the -well-formed part of it. - -Beautiful Soup works with Python 2.2 and up. It has no external -dependencies, but you'll have more success at converting data to UTF-8 -if you also install these three packages: - -* chardet, for auto-detecting character encodings - http://chardet.feedparser.org/ -* cjkcodecs and iconv_codec, which add more encodings to the ones supported - by stock Python. - http://cjkpython.i18n.org/ - -Beautiful Soup defines classes for two main parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. This class has web browser-like heuristics for - obtaining a sensible parse tree in the face of common HTML errors. - -Beautiful Soup also defines a class (UnicodeDammit) for autodetecting -the encoding of an HTML or XML document, and converting it to -Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. - -For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/documentation.html - -Here, have some legalese: - -Copyright (c) 2004-2009, Leonard Richardson - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the the Beautiful Soup Consortium and All - Night Kosher Bakery nor the names of its contributors may be - used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. - -""" -from __future__ import generators - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "3.1.0.1" -__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson" -__license__ = "New-style BSD" - -import codecs -import markupbase -import types -import re -from HTMLParser import HTMLParser, HTMLParseError -try: - from htmlentitydefs import name2codepoint -except ImportError: - name2codepoint = {} -try: - set -except NameError: - from sets import Set as set - -#These hacks make Beautiful Soup able to parse XML with namespaces -markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match - -DEFAULT_OUTPUT_ENCODING = "utf-8" - -# First, the classes that represent markup elements. - -def sob(unicode, encoding): - """Returns either the given Unicode string or its encoding.""" - if encoding is None: - return unicode - else: - return unicode.encode(encoding) - -class PageElement: - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def setup(self, parent=None, previous=None): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent - self.previous = previous - self.next = None - self.previousSibling = None - self.nextSibling = None - if self.parent and self.parent.contents: - self.previousSibling = self.parent.contents[-1] - self.previousSibling.nextSibling = self - - def replaceWith(self, replaceWith): - oldParent = self.parent - myIndex = self.parent.contents.index(self) - if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: - # We're replacing this element with one of its siblings. - index = self.parent.contents.index(replaceWith) - if index and index < myIndex: - # Furthermore, it comes before this element. That - # means that when we extract it, the index of this - # element will change. - myIndex = myIndex - 1 - self.extract() - oldParent.insert(myIndex, replaceWith) - - def extract(self): - """Destructively rips this element out of the tree.""" - if self.parent: - try: - self.parent.contents.remove(self) - except ValueError: - pass - - #Find the two elements that would be next to each other if - #this element (and any children) hadn't been parsed. Connect - #the two. - lastChild = self._lastRecursiveChild() - nextElement = lastChild.next - - if self.previous: - self.previous.next = nextElement - if nextElement: - nextElement.previous = self.previous - self.previous = None - lastChild.next = None - - self.parent = None - if self.previousSibling: - self.previousSibling.nextSibling = self.nextSibling - if self.nextSibling: - self.nextSibling.previousSibling = self.previousSibling - self.previousSibling = self.nextSibling = None - return self - - def _lastRecursiveChild(self): - "Finds the last element beneath this object to be parsed." - lastChild = self - while hasattr(lastChild, 'contents') and lastChild.contents: - lastChild = lastChild.contents[-1] - return lastChild - - def insert(self, position, newChild): - if (isinstance(newChild, basestring) - or isinstance(newChild, unicode)) \ - and not isinstance(newChild, NavigableString): - newChild = NavigableString(newChild) - - position = min(position, len(self.contents)) - if hasattr(newChild, 'parent') and newChild.parent != None: - # We're 'inserting' an element that's already one - # of this object's children. - if newChild.parent == self: - index = self.find(newChild) - if index and index < position: - # Furthermore we're moving it further down the - # list of this object's children. That means that - # when we extract this element, our target index - # will jump down one. - position = position - 1 - newChild.extract() - - newChild.parent = self - previousChild = None - if position == 0: - newChild.previousSibling = None - newChild.previous = self - else: - previousChild = self.contents[position-1] - newChild.previousSibling = previousChild - newChild.previousSibling.nextSibling = newChild - newChild.previous = previousChild._lastRecursiveChild() - if newChild.previous: - newChild.previous.next = newChild - - newChildsLastElement = newChild._lastRecursiveChild() - - if position >= len(self.contents): - newChild.nextSibling = None - - parent = self - parentsNextSibling = None - while not parentsNextSibling: - parentsNextSibling = parent.nextSibling - parent = parent.parent - if not parent: # This is the last element in the document. - break - if parentsNextSibling: - newChildsLastElement.next = parentsNextSibling - else: - newChildsLastElement.next = None - else: - nextChild = self.contents[position] - newChild.nextSibling = nextChild - if newChild.nextSibling: - newChild.nextSibling.previousSibling = newChild - newChildsLastElement.next = nextChild - - if newChildsLastElement.next: - newChildsLastElement.next.previous = newChildsLastElement - self.contents.insert(position, newChild) - - def append(self, tag): - """Appends the given tag to the contents of this tag.""" - self.insert(len(self.contents), tag) - - def findNext(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and - appears after this Tag in the document.""" - return self._findOne(self.findAllNext, name, attrs, text, **kwargs) - - def findAllNext(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns all items that match the given criteria and appear - after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.nextGenerator, - **kwargs) - - def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the - given criteria and appears after this Tag in the document.""" - return self._findOne(self.findNextSiblings, name, attrs, text, - **kwargs) - - def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns the siblings of this Tag that match the given - criteria and appear after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.nextSiblingGenerator, **kwargs) - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x - - def findPrevious(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and - appears before this Tag in the document.""" - return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) - - def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, - **kwargs): - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.previousGenerator, - **kwargs) - fetchPrevious = findAllPrevious # Compatibility with pre-3.x - - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the - given criteria and appears before this Tag in the document.""" - return self._findOne(self.findPreviousSiblings, name, attrs, text, - **kwargs) - - def findPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None, **kwargs): - """Returns the siblings of this Tag that match the given - criteria and appear before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.previousSiblingGenerator, **kwargs) - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x - - def findParent(self, name=None, attrs={}, **kwargs): - """Returns the closest parent of this Tag that matches the given - criteria.""" - # NOTE: We can't use _findOne because findParents takes a different - # set of arguments. - r = None - l = self.findParents(name, attrs, 1) - if l: - r = l[0] - return r - - def findParents(self, name=None, attrs={}, limit=None, **kwargs): - """Returns the parents of this Tag that match the given - criteria.""" - - return self._findAll(name, attrs, None, limit, self.parentGenerator, - **kwargs) - fetchParents = findParents # Compatibility with pre-3.x - - #These methods do the real heavy lifting. - - def _findOne(self, method, name, attrs, text, **kwargs): - r = None - l = method(name, attrs, text, 1, **kwargs) - if l: - r = l[0] - return r - - def _findAll(self, name, attrs, text, limit, generator, **kwargs): - "Iterates over a generator looking for things that match." - - if isinstance(name, SoupStrainer): - strainer = name - else: - # Build a SoupStrainer - strainer = SoupStrainer(name, attrs, text, **kwargs) - results = ResultSet(strainer) - g = generator() - while True: - try: - i = g.next() - except StopIteration: - break - if i: - found = strainer.search(i) - if found: - results.append(found) - if limit and len(results) >= limit: - break - return results - - #These Generators can be used to navigate starting from both - #NavigableStrings and Tags. - def nextGenerator(self): - i = self - while i: - i = i.next - yield i - - def nextSiblingGenerator(self): - i = self - while i: - i = i.nextSibling - yield i - - def previousGenerator(self): - i = self - while i: - i = i.previous - yield i - - def previousSiblingGenerator(self): - i = self - while i: - i = i.previousSibling - yield i - - def parentGenerator(self): - i = self - while i: - i = i.parent - yield i - - # Utility methods - def substituteEncoding(self, str, encoding=None): - encoding = encoding or "utf-8" - return str.replace("%SOUP-ENCODING%", encoding) - - def toEncoding(self, s, encoding=None): - """Encodes an object to a string in some encoding, or to Unicode. - .""" - if isinstance(s, unicode): - if encoding: - s = s.encode(encoding) - elif isinstance(s, str): - if encoding: - s = s.encode(encoding) - else: - s = unicode(s) - else: - if encoding: - s = self.toEncoding(str(s), encoding) - else: - s = unicode(s) - return s - -class NavigableString(unicode, PageElement): - - def __new__(cls, value): - """Create a new NavigableString. - - When unpickling a NavigableString, this method is called with - the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be - passed in to the superclass's __new__ or the superclass won't know - how to handle non-ASCII characters. - """ - if isinstance(value, unicode): - return unicode.__new__(cls, value) - return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) - - def __getnewargs__(self): - return (unicode(self),) - - def __getattr__(self, attr): - """text.string gives you text. This is for backwards - compatibility for Navigable*String, but for CData* it lets you - get the string without the CData wrapper.""" - if attr == 'string': - return self - else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - - def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.decode().encode(encoding) - - def decodeGivenEventualEncoding(self, eventualEncoding): - return self - -class CData(NavigableString): - - def decodeGivenEventualEncoding(self, eventualEncoding): - return u'<![CDATA[' + self + u']]>' - -class ProcessingInstruction(NavigableString): - - def decodeGivenEventualEncoding(self, eventualEncoding): - output = self - if u'%SOUP-ENCODING%' in output: - output = self.substituteEncoding(output, eventualEncoding) - return u'<?' + output + u'?>' - -class Comment(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): - return u'<!--' + self + u'-->' - -class Declaration(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): - return u'<!' + self + u'>' - -class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" - - def _invert(h): - "Cheap function to invert a hash." - i = {} - for k,v in h.items(): - i[v] = k - return i - - XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", - "quot" : '"', - "amp" : "&", - "lt" : "<", - "gt" : ">" } - - XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) - - def _convertEntities(self, match): - """Used in a call to re.sub to replace HTML, XML, and numeric - entities with the appropriate Unicode characters. If HTML - entities are being converted, any unrecognized entities are - escaped.""" - x = match.group(1) - if self.convertHTMLEntities and x in name2codepoint: - return unichr(name2codepoint[x]) - elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if self.convertXMLEntities: - return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] - else: - return u'&%s;' % x - elif len(x) > 0 and x[0] == '#': - # Handle numeric entities - if len(x) > 1 and x[1] == 'x': - return unichr(int(x[2:], 16)) - else: - return unichr(int(x[1:])) - - elif self.escapeUnrecognizedEntities: - return u'&%s;' % x - else: - return u'&%s;' % x - - def __init__(self, parser, name, attrs=None, parent=None, - previous=None): - "Basic constructor." - - # We don't actually store the parser object: that lets extracted - # chunks be garbage-collected - self.parserClass = parser.__class__ - self.isSelfClosing = parser.isSelfClosingTag(name) - self.name = name - if attrs == None: - attrs = [] - self.attrs = attrs - self.contents = [] - self.setup(parent, previous) - self.hidden = False - self.containsSubstitutions = False - self.convertHTMLEntities = parser.convertHTMLEntities - self.convertXMLEntities = parser.convertXMLEntities - self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities - - def convert(kval): - "Converts HTML, XML and numeric entities in the attribute value." - k, val = kval - if val is None: - return kval - return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", - self._convertEntities, val)) - self.attrs = map(convert, self.attrs) - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or - the value given for 'default' if it doesn't have that - attribute.""" - return self._getAttrMap().get(key, default) - - def has_key(self, key): - return self._getAttrMap().has_key(key) - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, - and throws an exception if it's not there.""" - return self._getAttrMap()[key] - - def __iter__(self): - "Iterating over a tag iterates over its contents." - return iter(self.contents) - - def __len__(self): - "The length of a tag is the length of its list of contents." - return len(self.contents) - - def __contains__(self, x): - return x in self.contents - - def __nonzero__(self): - "A tag is non-None even if it has no contents." - return True - - def __setitem__(self, key, value): - """Setting tag[key] sets the value of the 'key' attribute for the - tag.""" - self._getAttrMap() - self.attrMap[key] = value - found = False - for i in range(0, len(self.attrs)): - if self.attrs[i][0] == key: - self.attrs[i] = (key, value) - found = True - if not found: - self.attrs.append((key, value)) - self._getAttrMap()[key] = value - - def __delitem__(self, key): - "Deleting tag[key] deletes all 'key' attributes for the tag." - for item in self.attrs: - if item[0] == key: - self.attrs.remove(item) - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() - if self.attrMap.has_key(key): - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - findAll() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" - return apply(self.findAll, args, kwargs) - - def __getattr__(self, tag): - #print "Getattr %s.%s" % (self.__class__, tag) - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: - return self.find(tag[:-3]) - elif tag.find('__') != 0: - return self.find(tag) - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, - and the same contents (recursively) as the given tag. - - NOTE: right now this will return false if two tags have the - same attributes in a different order. Should this be fixed?""" - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: - return False - return True - - def __ne__(self, other): - """Returns true iff this tag is not identical to the other tag, - as defined in __eq__.""" - return not self == other - - def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): - """Renders this tag as a string.""" - return self.decode(eventualEncoding=encoding) - - BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" - + ")") - - def _sub_entity(self, x): - """Used with a regular expression to substitute the - appropriate XML entity for an XML special character.""" - return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" - - def __unicode__(self): - return self.decode() - - def __str__(self): - return self.encode() - - def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - return self.decode(prettyPrint, indentLevel, encoding).encode(encoding) - - def decode(self, prettyPrint=False, indentLevel=0, - eventualEncoding=DEFAULT_OUTPUT_ENCODING): - """Returns a string or Unicode representation of this tag and - its contents. To get Unicode, pass None for encoding.""" - - attrs = [] - if self.attrs: - for key, val in self.attrs: - fmt = '%s="%s"' - if isString(val): - if (self.containsSubstitutions - and eventualEncoding is not None - and '%SOUP-ENCODING%' in val): - val = self.substituteEncoding(val, eventualEncoding) - - # The attribute value either: - # - # * Contains no embedded double quotes or single quotes. - # No problem: we enclose it in double quotes. - # * Contains embedded single quotes. No problem: - # double quotes work here too. - # * Contains embedded double quotes. No problem: - # we enclose it in single quotes. - # * Embeds both single _and_ double quotes. This - # can't happen naturally, but it can happen if - # you modify an attribute value after parsing - # the document. Now we have a bit of a - # problem. We solve it by enclosing the - # attribute in single quotes, and escaping any - # embedded single quotes to XML entities. - if '"' in val: - fmt = "%s='%s'" - if "'" in val: - # TODO: replace with apos when - # appropriate. - val = val.replace("'", "&squot;") - - # Now we're okay w/r/t quotes. But the attribute - # value might also contain angle brackets, or - # ampersands that aren't part of entities. We need - # to escape those to XML entities too. - val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) - if val is None: - # Handle boolean attributes. - decoded = key - else: - decoded = fmt % (key, val) - attrs.append(decoded) - close = '' - closeTag = '' - if self.isSelfClosing: - close = ' /' - else: - closeTag = '</%s>' % self.name - - indentTag, indentContents = 0, 0 - if prettyPrint: - indentTag = indentLevel - space = (' ' * (indentTag-1)) - indentContents = indentTag + 1 - contents = self.decodeContents(prettyPrint, indentContents, - eventualEncoding) - if self.hidden: - s = contents - else: - s = [] - attributeString = '' - if attrs: - attributeString = ' ' + ' '.join(attrs) - if prettyPrint: - s.append(space) - s.append('<%s%s%s>' % (self.name, attributeString, close)) - if prettyPrint: - s.append("\n") - s.append(contents) - if prettyPrint and contents and contents[-1] != "\n": - s.append("\n") - if prettyPrint and closeTag: - s.append(space) - s.append(closeTag) - if prettyPrint and closeTag and self.nextSibling: - s.append("\n") - s = ''.join(s) - return s - - def decompose(self): - """Recursively destroys the contents of this tree.""" - contents = [i for i in self.contents] - for i in contents: - if isinstance(i, Tag): - i.decompose() - else: - i.extract() - self.extract() - - def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.encode(encoding, True) - - def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - return self.decodeContents(prettyPrint, indentLevel).encode(encoding) - - def decodeContents(self, prettyPrint=False, indentLevel=0, - eventualEncoding=DEFAULT_OUTPUT_ENCODING): - """Renders the contents of this tag as a string in the given - encoding. If encoding is None, returns a Unicode string..""" - s=[] - for c in self: - text = None - if isinstance(c, NavigableString): - text = c.decodeGivenEventualEncoding(eventualEncoding) - elif isinstance(c, Tag): - s.append(c.decode(prettyPrint, indentLevel, eventualEncoding)) - if text and prettyPrint: - text = text.strip() - if text: - if prettyPrint: - s.append(" " * (indentLevel-1)) - s.append(text) - if prettyPrint: - s.append("\n") - return ''.join(s) - - #Soup methods - - def find(self, name=None, attrs={}, recursive=True, text=None, - **kwargs): - """Return only the first child of this Tag matching the given - criteria.""" - r = None - l = self.findAll(name, attrs, recursive, text, 1, **kwargs) - if l: - r = l[0] - return r - findChild = find - - def findAll(self, name=None, attrs={}, recursive=True, text=None, - limit=None, **kwargs): - """Extracts a list of Tag objects that match the given - criteria. You can specify the name of the Tag and any - attributes you want the Tag to have. - - The value of a key-value pair in the 'attrs' map can be a - string, a list of strings, a regular expression object, or a - callable that takes a string and returns whether or not the - string matches for some custom definition of 'matches'. The - same is true of the tag name.""" - generator = self.recursiveChildGenerator - if not recursive: - generator = self.childGenerator - return self._findAll(name, attrs, text, limit, generator, **kwargs) - findChildren = findAll - - # Pre-3.x compatibility methods. Will go away in 4.0. - first = find - fetch = findAll - - def fetchText(self, text=None, recursive=True, limit=None): - return self.findAll(text=text, recursive=recursive, limit=limit) - - def firstText(self, text=None, recursive=True): - return self.find(text=text, recursive=recursive) - - # 3.x compatibility methods. Will go away in 4.0. - def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - if encoding is None: - return self.decodeContents(prettyPrint, indentLevel, encoding) - else: - return self.encodeContents(encoding, prettyPrint, indentLevel) - - - #Private methods - - def _getAttrMap(self): - """Initializes a map representation of this tag's attributes, - if not already initialized.""" - if not getattr(self, 'attrMap'): - self.attrMap = {} - for (key, value) in self.attrs: - self.attrMap[key] = value - return self.attrMap - - #Generator methods - def recursiveChildGenerator(self): - if not len(self.contents): - raise StopIteration - stopNode = self._lastRecursiveChild().next - current = self.contents[0] - while current is not stopNode: - yield current - current = current.next - - def childGenerator(self): - if not len(self.contents): - raise StopIteration - current = self.contents[0] - while current: - yield current - current = current.nextSibling - raise StopIteration - -# Next, a couple classes to represent queries and their results. -class SoupStrainer: - """Encapsulates a number of ways of matching a markup element (tag or - text).""" - - def __init__(self, name=None, attrs={}, text=None, **kwargs): - self.name = name - if isString(attrs): - kwargs['class'] = attrs - attrs = None - if kwargs: - if attrs: - attrs = attrs.copy() - attrs.update(kwargs) - else: - attrs = kwargs - self.attrs = attrs - self.text = text - - def __str__(self): - if self.text: - return self.text - else: - return "%s|%s" % (self.name, self.attrs) - - def searchTag(self, markupName=None, markupAttrs={}): - found = None - markup = None - if isinstance(markupName, Tag): - markup = markupName - markupAttrs = markup - callFunctionWithTagData = callable(self.name) \ - and not isinstance(markupName, Tag) - - if (not self.name) \ - or callFunctionWithTagData \ - or (markup and self._matches(markup, self.name)) \ - or (not markup and self._matches(markupName, self.name)): - if callFunctionWithTagData: - match = self.name(markupName, markupAttrs) - else: - match = True - markupAttrMap = None - for attr, matchAgainst in self.attrs.items(): - if not markupAttrMap: - if hasattr(markupAttrs, 'get'): - markupAttrMap = markupAttrs - else: - markupAttrMap = {} - for k,v in markupAttrs: - markupAttrMap[k] = v - attrValue = markupAttrMap.get(attr) - if not self._matches(attrValue, matchAgainst): - match = False - break - if match: - if markup: - found = markup - else: - found = markupName - return found - - def search(self, markup): - #print 'looking for %s in %s' % (self, markup) - found = None - # If given a list of items, scan it for a text element that - # matches. - if isList(markup) and not isinstance(markup, Tag): - for element in markup: - if isinstance(element, NavigableString) \ - and self.search(element): - found = element - break - # If it's a Tag, make sure its name or attributes match. - # Don't bother with Tags if we're searching for text. - elif isinstance(markup, Tag): - if not self.text: - found = self.searchTag(markup) - # If it's text, make sure the text matches. - elif isinstance(markup, NavigableString) or \ - isString(markup): - if self._matches(markup, self.text): - found = markup - else: - raise Exception, "I don't know how to match against a %s" \ - % markup.__class__ - return found - - def _matches(self, markup, matchAgainst): - #print "Matching %s against %s" % (markup, matchAgainst) - result = False - if matchAgainst == True and type(matchAgainst) == types.BooleanType: - result = markup != None - elif callable(matchAgainst): - result = matchAgainst(markup) - else: - #Custom match methods take the tag as an argument, but all - #other ways of matching match the tag name as a string. - if isinstance(markup, Tag): - markup = markup.name - if markup is not None and not isString(markup): - markup = unicode(markup) - #Now we know that chunk is either a string, or None. - if hasattr(matchAgainst, 'match'): - # It's a regexp object. - result = markup and matchAgainst.search(markup) - elif (isList(matchAgainst) - and (markup is not None or not isString(matchAgainst))): - result = markup in matchAgainst - elif hasattr(matchAgainst, 'items'): - result = markup.has_key(matchAgainst) - elif matchAgainst and isString(markup): - if isinstance(markup, unicode): - matchAgainst = unicode(matchAgainst) - else: - matchAgainst = str(matchAgainst) - - if not result: - result = matchAgainst == markup - return result - -class ResultSet(list): - """A ResultSet is just a list that keeps track of the SoupStrainer - that created it.""" - def __init__(self, source): - list.__init__([]) - self.source = source - -# Now, some helper functions. - -def isList(l): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return ((hasattr(l, '__iter__') and not isString(l)) - or (type(l) in (types.ListType, types.TupleType))) - -def isString(s): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is stringlike.""" - try: - return isinstance(s, unicode) or isinstance(s, basestring) - except NameError: - return isinstance(s, str) - -def buildTagMap(default, *args): - """Turns a list of maps, lists, or scalars into a single map. - Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and - NESTING_RESET_TAGS maps out of lists and partial maps.""" - built = {} - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. - for k,v in portion.items(): - built[k] = v - elif isList(portion) and not isString(portion): - #It's a list. Map each item to the default. - for k in portion: - built[k] = default - else: - #It's a scalar. Map it to the default. - built[portion] = default - return built - -# Now, the parser classes. - -class HTMLParserBuilder(HTMLParser): - - def __init__(self, soup): - HTMLParser.__init__(self) - self.soup = soup - - # We inherit feed() and reset(). - - def handle_starttag(self, name, attrs): - if name == 'meta': - self.soup.extractCharsetFromMeta(attrs) - else: - self.soup.unknown_starttag(name, attrs) - - def handle_endtag(self, name): - self.soup.unknown_endtag(name) - - def handle_data(self, content): - self.soup.handle_data(content) - - def _toStringSubclass(self, text, subclass): - """Adds a certain piece of text to the tree as a NavigableString - subclass.""" - self.soup.endData() - self.handle_data(text) - self.soup.endData(subclass) - - def handle_pi(self, text): - """Handle a processing instruction as a ProcessingInstruction - object, possibly one with a %SOUP-ENCODING% slot into which an - encoding will be plugged later.""" - if text[:3] == "xml": - text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" - self._toStringSubclass(text, ProcessingInstruction) - - def handle_comment(self, text): - "Handle comments as Comment objects." - self._toStringSubclass(text, Comment) - - def handle_charref(self, ref): - "Handle character references as data." - if self.soup.convertEntities: - data = unichr(int(ref)) - else: - data = '&#%s;' % ref - self.handle_data(data) - - def handle_entityref(self, ref): - """Handle entity references as data, possibly converting known - HTML and/or XML entity references to the corresponding Unicode - characters.""" - data = None - if self.soup.convertHTMLEntities: - try: - data = unichr(name2codepoint[ref]) - except KeyError: - pass - - if not data and self.soup.convertXMLEntities: - data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) - - if not data and self.soup.convertHTMLEntities and \ - not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): - # TODO: We've got a problem here. We're told this is - # an entity reference, but it's not an XML entity - # reference or an HTML entity reference. Nonetheless, - # the logical thing to do is to pass it through as an - # unrecognized entity reference. - # - # Except: when the input is "&carol;" this function - # will be called with input "carol". When the input is - # "AT&T", this function will be called with input - # "T". We have no way of knowing whether a semicolon - # was present originally, so we don't know whether - # this is an unknown entity or just a misplaced - # ampersand. - # - # The more common case is a misplaced ampersand, so I - # escape the ampersand and omit the trailing semicolon. - data = "&%s" % ref - if not data: - # This case is different from the one above, because we - # haven't already gone through a supposedly comprehensive - # mapping of entities to Unicode characters. We might not - # have gone through any mapping at all. So the chances are - # very high that this is a real entity, and not a - # misplaced ampersand. - data = "&%s;" % ref - self.handle_data(data) - - def handle_decl(self, data): - "Handle DOCTYPEs and the like as Declaration objects." - self._toStringSubclass(data, Declaration) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as a CData object.""" - j = None - if self.rawdata[i:i+9] == '<![CDATA[': - k = self.rawdata.find(']]>', i) - if k == -1: - k = len(self.rawdata) - data = self.rawdata[i+9:k] - j = k+3 - self._toStringSubclass(data, CData) - else: - try: - j = HTMLParser.parse_declaration(self, i) - except HTMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - - -class BeautifulStoneSoup(Tag): - - """This class contains the basic parser and search code. It defines - a parser that knows nothing about tag behavior except for the - following: - - You can't close a tag without closing all the tags it encloses. - That is, "<foo><bar></foo>" actually means - "<foo><bar></bar></foo>". - - [Another possible explanation is "<foo><bar /></foo>", but since - this class defines no SELF_CLOSING_TAGS, it will never use that - explanation.] - - This class is useful for parsing XML or made-up markup languages, - or when BeautifulSoup makes an assumption counter to what you were - expecting.""" - - SELF_CLOSING_TAGS = {} - NESTABLE_TAGS = {} - RESET_NESTING_TAGS = {} - QUOTE_TAGS = {} - PRESERVE_WHITESPACE_TAGS = [] - - MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda x: x.group(1) + ' />'), - (re.compile('<!\s+([^<>]*)>'), - lambda x: '<!' + x.group(1) + '>') - ] - - ROOT_TAG_NAME = u'[document]' - - HTML_ENTITIES = "html" - XML_ENTITIES = "xml" - XHTML_ENTITIES = "xhtml" - # TODO: This only exists for backwards-compatibility - ALL_ENTITIES = XHTML_ENTITIES - - # Used when determining whether a text node is all whitespace and - # can be replaced with a single space. A text node that contains - # fancy Unicode spaces (usually non-breaking) should be left - # alone. - STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } - - def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, - markupMassage=True, smartQuotesTo=XML_ENTITIES, - convertEntities=None, selfClosingTags=None, isHTML=False, - builder=HTMLParserBuilder): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser. - - HTMLParser will process most bad HTML, and the BeautifulSoup - class has some tricks for dealing with some HTML that kills - HTMLParser, but Beautiful Soup can nonetheless choke or lose data - if your data uses self-closing tags or declarations - incorrectly. - - By default, Beautiful Soup uses regexes to sanitize input, - avoiding the vast majority of these problems. If the problems - don't apply to you, pass in False for markupMassage, and - you'll get better performance. - - The default parser massage techniques fix the two most common - instances of invalid HTML that choke HTMLParser: - - <br/> (No space between name of closing tag and tag close) - <! --Comment--> (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - - self.parseOnlyThese = parseOnlyThese - self.fromEncoding = fromEncoding - self.smartQuotesTo = smartQuotesTo - self.convertEntities = convertEntities - # Set the rules for how we'll deal with the entities we - # encounter - if self.convertEntities: - # It doesn't make sense to convert encoded characters to - # entities even while you're converting entities to Unicode. - # Just convert it all to Unicode. - self.smartQuotesTo = None - if convertEntities == self.HTML_ENTITIES: - self.convertXMLEntities = False - self.convertHTMLEntities = True - self.escapeUnrecognizedEntities = True - elif convertEntities == self.XHTML_ENTITIES: - self.convertXMLEntities = True - self.convertHTMLEntities = True - self.escapeUnrecognizedEntities = False - elif convertEntities == self.XML_ENTITIES: - self.convertXMLEntities = True - self.convertHTMLEntities = False - self.escapeUnrecognizedEntities = False - else: - self.convertXMLEntities = False - self.convertHTMLEntities = False - self.escapeUnrecognizedEntities = False - - self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) - self.builder = builder(self) - self.reset() - - if hasattr(markup, 'read'): # It's a file-type object. - markup = markup.read() - self.markup = markup - self.markupMassage = markupMassage - try: - self._feed(isHTML=isHTML) - except StopParsing: - pass - self.markup = None # The markup can now be GCed. - self.builder = None # So can the builder. - - def _feed(self, inDocumentEncoding=None, isHTML=False): - # Convert the document to Unicode. - markup = self.markup - if isinstance(markup, unicode): - if not hasattr(self, 'originalEncoding'): - self.originalEncoding = None - else: - dammit = UnicodeDammit\ - (markup, [self.fromEncoding, inDocumentEncoding], - smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) - markup = dammit.unicode - self.originalEncoding = dammit.originalEncoding - self.declaredHTMLEncoding = dammit.declaredHTMLEncoding - if markup: - if self.markupMassage: - if not isList(self.markupMassage): - self.markupMassage = self.MARKUP_MASSAGE - for fix, m in self.markupMassage: - markup = fix.sub(m, markup) - # TODO: We get rid of markupMassage so that the - # soup object can be deepcopied later on. Some - # Python installations can't copy regexes. If anyone - # was relying on the existence of markupMassage, this - # might cause problems. - del(self.markupMassage) - self.builder.reset() - - self.builder.feed(markup) - # Close out any unfinished strings and close all the open tags. - self.endData() - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def isSelfClosingTag(self, name): - """Returns true iff the given string is the name of a - self-closing tag according to this parser.""" - return self.SELF_CLOSING_TAGS.has_key(name) \ - or self.instanceSelfClosingTags.has_key(name) - - def reset(self): - Tag.__init__(self, self, self.ROOT_TAG_NAME) - self.hidden = 1 - self.builder.reset() - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.quoteStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableString): - self.currentTag.string = self.currentTag.contents[0] - - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.contents.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self, containerClass=NavigableString): - if self.currentData: - currentData = u''.join(self.currentData) - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and - not set([tag.name for tag in self.tagStack]).intersection( - self.PRESERVE_WHITESPACE_TAGS)): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - self.currentData = [] - if self.parseOnlyThese and len(self.tagStack) <= 1 and \ - (not self.parseOnlyThese.text or \ - not self.parseOnlyThese.search(currentData)): - return - o = containerClass(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - #print "Popping to %s" % name - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: - <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. - <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. - <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. - - <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. - <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' - <td><tr><td> *<td>* should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - #Non-nestable tags get popped to the top or to their - #last occurance. - popTo = name - break - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag - #that causes nesting to reset, pop up to but not - #including that tag. - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - #print "Start tag %s: %s" % (name, attrs) - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - - if not self.isSelfClosingTag(name) and not selfClosing: - self._smartPop(name) - - if self.parseOnlyThese and len(self.tagStack) <= 1 \ - and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): - return - - tag = Tag(self, name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or self.isSelfClosingTag(name): - self.popTag() - if name in self.QUOTE_TAGS: - #print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - return tag - - def unknown_endtag(self, name): - #print "End tag %s" % name - if self.quoteStack and self.quoteStack[-1] != name: - #This is not a real end tag. - #print "</%s> is not real!" % name - self.handle_data('</%s>' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def extractCharsetFromMeta(self, attrs): - self.unknown_starttag('meta', attrs) - - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a <p> tag should implicitly close the previous <p> tag. - - <p>Para1<p>Para2 - should be transformed into: - <p>Para1</p><p>Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a <blockquote> tag should _not_ implicitly close the previous - <blockquote> tag. - - Alice said: <blockquote>Bob said: <blockquote>Blah - should NOT be transformed into: - Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a <tr> tag should - implicitly close the previous <tr> tag within the same <table>, - but not close a <tr> tag in another table. - - <table><tr>Blah<tr>Blah - should be transformed into: - <table><tr>Blah</tr><tr>Blah - but, - <tr>Blah<table><tr>Blah - should NOT be transformed into - <tr>Blah<table></tr><tr>Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup, MinimalSoup, or - BeautifulStoneSoup before writing your own subclass.""" - - def __init__(self, *args, **kwargs): - if not kwargs.has_key('smartQuotesTo'): - kwargs['smartQuotesTo'] = self.HTML_ENTITIES - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) - - SELF_CLOSING_TAGS = buildTagMap(None, - ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) - - QUOTE_TAGS = {'script' : None, 'textarea' : None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] - - #According to the HTML standard, these block tags can contain - #another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] - - #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } - - #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - 'thead' : ['table'], - 'tbody' : ['table'], - 'tfoot' : ['table'], - } - - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] - - #If one of these tags is encountered, all tags up to the next tag of - #this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - - # Used to detect the charset in a META tag; see start_meta - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) - - def extractCharsetFromMeta(self, attrs): - """Beautiful Soup can detect a charset included in a META tag, - try to convert the document to that charset, and re-parse the - document from the beginning.""" - httpEquiv = None - contentType = None - contentTypeIndex = None - tagNeedsEncodingSubstitution = False - - for i in range(0, len(attrs)): - key, value = attrs[i] - key = key.lower() - if key == 'http-equiv': - httpEquiv = value - elif key == 'content': - contentType = value - contentTypeIndex = i - - if httpEquiv and contentType: # It's an interesting meta tag. - match = self.CHARSET_RE.search(contentType) - if match: - if (self.declaredHTMLEncoding is not None or - self.originalEncoding == self.fromEncoding): - # An HTML encoding was sniffed while converting - # the document to Unicode, or an HTML encoding was - # sniffed during a previous pass through the - # document, or an encoding was specified - # explicitly and it worked. Rewrite the meta tag. - def rewrite(match): - return match.group(1) + "%SOUP-ENCODING%" - newAttr = self.CHARSET_RE.sub(rewrite, contentType) - attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], - newAttr) - tagNeedsEncodingSubstitution = True - else: - # This is our first pass through the document. - # Go through it again with the encoding information. - newCharset = match.group(3) - if newCharset and newCharset != self.originalEncoding: - self.declaredHTMLEncoding = newCharset - self._feed(self.declaredHTMLEncoding) - raise StopParsing - pass - tag = self.unknown_starttag("meta", attrs) - if tag and tagNeedsEncodingSubstitution: - tag.containsSubstitutions = True - - -class StopParsing(Exception): - pass - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - <b>Foo<b>Bar</b></b> - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "<b>Foo<b>Bar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '</b></b>' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close a 'b' tag - than to actually use nested 'b' tags, and the BeautifulSoup class - handles the common case. This class handles the not-co-common - case: where you can't believe someone wrote what they did, but - it's valid HTML and BeautifulSoup screwed up by assuming it - wouldn't be.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -class MinimalSoup(BeautifulSoup): - """The MinimalSoup class is for parsing HTML that contains - pathologically bad markup. It makes no assumptions about tag - nesting, but it does know which tags are self-closing, that - <script> tags contain Javascript and should not be parsed, that - META tags may contain encoding information, and so on. - - This also makes it better for subclassing than BeautifulStoneSoup - or BeautifulSoup.""" - - RESET_NESTING_TAGS = buildTagMap('noscript') - NESTABLE_TAGS = {} - -class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag - name, and the value is the string child. An example should give - the flavor of the change: - - <foo><bar>baz</bar></foo> - => - <foo bar="baz"><bar>baz</bar></foo> - - You can then access fooTag['bar'] instead of fooTag.barTag.string. - - This is, of course, useful for scraping structures that tend to - use subelements instead of attributes, such as SOAP messages. Note - that it modifies its input, so don't print the modified version - out. - - I'm not sure how many people really want to use this class; let me - know if you do. Mainly I like the name.""" - - def popTag(self): - if len(self.tagStack) > 1: - tag = self.tagStack[-1] - parent = self.tagStack[-2] - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableString) and - not parent.attrMap.has_key(tag.name)): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -#Enterprise class names! It has come to our attention that some people -#think the names of the Beautiful Soup parser classes are too silly -#and "unprofessional" for use in enterprise screen-scraping. We feel -#your pain! For such-minded folk, the Beautiful Soup Consortium And -#All-Night Kosher Bakery recommends renaming this file to -#"RobustParser.py" (or, in cases of extreme enterprisiness, -#"RobustParserBeanInterface.class") and using the following -#enterprise-friendly class aliases: -class RobustXMLParser(BeautifulStoneSoup): - pass -class RobustHTMLParser(BeautifulSoup): - pass -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): - pass -class RobustInsanelyWackAssHTMLParser(MinimalSoup): - pass -class SimplifyingSOAPParser(BeautifulSOAP): - pass - -###################################################### -# -# Bonus library: Unicode, Dammit -# -# This class forces XML data into a standard format (usually to UTF-8 -# or Unicode). It is heavily based on code from Mark Pilgrim's -# Universal Feed Parser. It does not rewrite the XML or HTML to -# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi -# (XML) and BeautifulSoup.start_meta (HTML). - -# Autodetects character encodings. -# Download from http://chardet.feedparser.org/ -try: - import chardet -# import chardet.constants -# chardet.constants._debug = 1 -except ImportError: - chardet = None - -# cjkcodecs and iconv_codec make Python know about more character encodings. -# Both are available from http://cjkpython.i18n.org/ -# They're built in if you use Python 2.4. -try: - import cjkcodecs.aliases -except ImportError: - pass -try: - import iconv_codec -except ImportError: - pass - -class UnicodeDammit: - """A class for detecting the encoding of a *ML document and - converting it to a Unicode string. If the source encoding is - windows-1252, can replace MS smart quotes with their HTML or XML - equivalents.""" - - # This dictionary maps commonly seen values for "charset" in HTML - # meta tags to the corresponding Python codec names. It only covers - # values that aren't in Python's aliases and can't be determined - # by the heuristics in find_codec. - CHARSET_ALIASES = { "macintosh" : "mac-roman", - "x-sjis" : "shift-jis" } - - def __init__(self, markup, overrideEncodings=[], - smartQuotesTo='xml', isHTML=False): - self.declaredHTMLEncoding = None - self.markup, documentEncoding, sniffedEncoding = \ - self._detectEncoding(markup, isHTML) - self.smartQuotesTo = smartQuotesTo - self.triedEncodings = [] - if markup == '' or isinstance(markup, unicode): - self.originalEncoding = None - self.unicode = unicode(markup) - return - - u = None - for proposedEncoding in overrideEncodings: - u = self._convertFrom(proposedEncoding) - if u: break - if not u: - for proposedEncoding in (documentEncoding, sniffedEncoding): - u = self._convertFrom(proposedEncoding) - if u: break - - # If no luck and we have auto-detection library, try that: - if not u and chardet and not isinstance(self.markup, unicode): - u = self._convertFrom(chardet.detect(self.markup)['encoding']) - - # As a last resort, try utf-8 and windows-1252: - if not u: - for proposed_encoding in ("utf-8", "windows-1252"): - u = self._convertFrom(proposed_encoding) - if u: break - - self.unicode = u - if not u: self.originalEncoding = None - - def _subMSChar(self, match): - """Changes a MS smart quote character to an XML or HTML - entity.""" - orig = match.group(1) - sub = self.MS_CHARS.get(orig) - if type(sub) == types.TupleType: - if self.smartQuotesTo == 'xml': - sub = '&#x'.encode() + sub[1].encode() + ';'.encode() - else: - sub = '&'.encode() + sub[0].encode() + ';'.encode() - else: - sub = sub.encode() - return sub - - def _convertFrom(self, proposed): - proposed = self.find_codec(proposed) - if not proposed or proposed in self.triedEncodings: - return None - self.triedEncodings.append(proposed) - markup = self.markup - - # Convert smart quotes to HTML if coming from an encoding - # that might have them. - if self.smartQuotesTo and proposed.lower() in("windows-1252", - "iso-8859-1", - "iso-8859-2"): - smart_quotes_re = "([\x80-\x9f])" - smart_quotes_compiled = re.compile(smart_quotes_re) - markup = smart_quotes_compiled.sub(self._subMSChar, markup) - - try: - # print "Trying to convert document to %s" % proposed - u = self._toUnicode(markup, proposed) - self.markup = u - self.originalEncoding = proposed - except Exception, e: - # print "That didn't work!" - # print e - return None - #print "Correct encoding: %s" % proposed - return self.markup - - def _toUnicode(self, data, encoding): - '''Given a string and its encoding, decodes the string into Unicode. - %encoding is a string recognized by encodings.aliases''' - - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == '\xef\xbb\xbf': - encoding = 'utf-8' - data = data[3:] - elif data[:4] == '\x00\x00\xfe\xff': - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == '\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding) - return newdata - - def _detectEncoding(self, xml_data, isHTML=False): - """Given a document, tries to detect its XML encoding.""" - xml_encoding = sniffed_xml_encoding = None - try: - if xml_data[:4] == '\x4c\x6f\xa7\x94': - # EBCDIC - xml_data = self._ebcdic_to_ascii(xml_data) - elif xml_data[:4] == '\x00\x3c\x00\x3f': - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ - and (xml_data[2:4] != '\x00\x00'): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x3f\x00': - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ - (xml_data[2:4] != '\x00\x00'): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\x00\x3c': - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x00\x00': - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\xfe\xff': - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\xff\xfe\x00\x00': - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == '\xef\xbb\xbf': - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - sniffed_xml_encoding = 'ascii' - pass - except: - xml_encoding_match = None - xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode() - xml_encoding_match = re.compile(xml_encoding_re).match(xml_data) - if not xml_encoding_match and isHTML: - meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode() - regexp = re.compile(meta_re, re.I) - xml_encoding_match = regexp.search(xml_data) - if xml_encoding_match is not None: - xml_encoding = xml_encoding_match.groups()[0].decode( - 'ascii').lower() - if isHTML: - self.declaredHTMLEncoding = xml_encoding - if sniffed_xml_encoding and \ - (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', - 'iso-10646-ucs-4', 'ucs-4', 'csucs4', - 'utf-16', 'utf-32', 'utf_16', 'utf_32', - 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - return xml_data, xml_encoding, sniffed_xml_encoding - - - def find_codec(self, charset): - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ - or (charset and self._codec(charset.replace("-", ""))) \ - or (charset and self._codec(charset.replace("-", "_"))) \ - or charset - - def _codec(self, charset): - if not charset: return charset - codec = None - try: - codecs.lookup(charset) - codec = charset - except (LookupError, ValueError): - pass - return codec - - EBCDIC_TO_ASCII_MAP = None - def _ebcdic_to_ascii(self, s): - c = self.__class__ - if not c.EBCDIC_TO_ASCII_MAP: - emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, - 201,202,106,107,108,109,110,111,112,113,114,203,204,205, - 206,207,208,209,126,115,116,117,118,119,120,121,122,210, - 211,212,213,214,215,216,217,218,219,220,221,222,223,224, - 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, - 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, - 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, - 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, - 250,251,252,253,254,255) - import string - c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) - return s.translate(c.EBCDIC_TO_ASCII_MAP) - - MS_CHARS = { '\x80' : ('euro', '20AC'), - '\x81' : ' ', - '\x82' : ('sbquo', '201A'), - '\x83' : ('fnof', '192'), - '\x84' : ('bdquo', '201E'), - '\x85' : ('hellip', '2026'), - '\x86' : ('dagger', '2020'), - '\x87' : ('Dagger', '2021'), - '\x88' : ('circ', '2C6'), - '\x89' : ('permil', '2030'), - '\x8A' : ('Scaron', '160'), - '\x8B' : ('lsaquo', '2039'), - '\x8C' : ('OElig', '152'), - '\x8D' : '?', - '\x8E' : ('#x17D', '17D'), - '\x8F' : '?', - '\x90' : '?', - '\x91' : ('lsquo', '2018'), - '\x92' : ('rsquo', '2019'), - '\x93' : ('ldquo', '201C'), - '\x94' : ('rdquo', '201D'), - '\x95' : ('bull', '2022'), - '\x96' : ('ndash', '2013'), - '\x97' : ('mdash', '2014'), - '\x98' : ('tilde', '2DC'), - '\x99' : ('trade', '2122'), - '\x9a' : ('scaron', '161'), - '\x9b' : ('rsaquo', '203A'), - '\x9c' : ('oelig', '153'), - '\x9d' : '?', - '\x9e' : ('#x17E', '17E'), - '\x9f' : ('Yuml', ''),} - -####################################################################### - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) - print soup.prettify() diff --git a/module/ConfigParser.py b/module/ConfigParser.py new file mode 100644 index 000000000..974986093 --- /dev/null +++ b/module/ConfigParser.py @@ -0,0 +1,293 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement +from pprint import pprint +from os.path import exists +from os.path import join +from shutil import copy + + +######################################################################## +class ConfigParser: + """ + holds and manage the configuration + + current dict layout: + + { + + section : { + option : { + value: + type: + desc: + } + desc: + + } + + + """ + + #---------------------------------------------------------------------- + def __init__(self): + """Constructor""" + self.config = {} # the config values + self.plugin = {} # the config for plugins + + self.username = "" + self.password = "" + #stored outside and may not modified + + #@TODO start setup.. + + self.checkVersion() + + self.readConfig() + + #---------------------------------------------------------------------- + def checkVersion(self): + """determines if config need to be copied""" + + if not exists("pyload.config"): + copy(join(pypath,"module", "config", "default.config"), "pyload.config") + + if not exists("plugin.config"): + f = open("plugin.config", "wb") + f.close() + + #@TODO: testing conf file version + + #---------------------------------------------------------------------- + def readConfig(self): + """reads the config file""" + + self.config = self.parseConfig(join(pypath,"module", "config", "default.config")) + self.plugin = self.parseConfig("plugin.config") + + try: + homeconf = self.parseConfig("pyload.config") + self.updateValues(homeconf, self.config) + + except Exception, e: + print e + + + self.username = self.config["remote"]["username"]["value"] + del self.config["remote"]["username"] + + self.password = self.config["remote"]["password"]["value"] + del self.config["remote"]["password"] + + + #---------------------------------------------------------------------- + def parseConfig(self, config): + """parses a given configfile""" + + f = open(config) + + config = f.read() + + config = config.split("\n") + + conf = {} + + section, option, value, typ, desc = "","","","","" + + listmode = False + + for line in config: + + line = line.rpartition("#") # removes comments + + if line[1]: + line = line[0] + else: + line = line[2] + + line = line.strip() + + try: + + if line == "": + continue + elif line.endswith(":"): + section, none, desc = line[:-1].partition('-') + section = section.strip() + desc = desc.replace('"', "").strip() + conf[section] = { "desc" : desc } + else: + if listmode: + + if line.endswith("]"): + listmode = False + line = line.replace("]","") + + value += [self.cast(typ, x.strip()) for x in line.split(",") if x] + + if not listmode: + conf[section][option] = { "desc" : desc, + "typ" : typ, + "value" : value} + + + else: + content, none, value = line.partition("=") + + content, none, desc = content.partition(":") + + desc = desc.replace('"', "").strip() + + typ, option = content.split() + + value = value.strip() + + if value.startswith("["): + if value.endswith("]"): + listmode = False + value = value[:-1] + else: + listmode = True + + value = [self.cast(typ, x.strip()) for x in value[1:].split(",") if x] + else: + value = self.cast(typ, value) + + if not listmode: + conf[section][option] = { "desc" : desc, + "typ" : typ, + "value" : value} + + except: + pass + + + f.close() + return conf + + + + #---------------------------------------------------------------------- + def updateValues(self, config, dest): + """sets the config values from a parsed config file to values in destination""" + + for section in config.iterkeys(): + + if dest.has_key(section): + + for option in config[section].iterkeys(): + + if option == "desc": continue + + if dest[section].has_key(option): + dest[section][option]["value"] = config[section][option]["value"] + + else: + dest[section][option] = config[section][option] + + + else: + dest[section] = config[section] + + #---------------------------------------------------------------------- + def saveConfig(self, config, filename): + """saves config to filename""" + #@TODO save username and config + with open(filename, "wb") as f: + + for section in config.iterkeys(): + f.write('%s - "%s":\n' % (section, config[section]["desc"])) + + for option, data in config[section].iteritems(): + + if option == "desc": continue + + if isinstance(data["value"], list): + value = "[ \n" + for x in data["value"]: + value += "\t\t" + x + ",\n" + value += "\t\t]\n" + else: + value = data["value"] + "\n" + + f.write('\t%s %s : "%s" = %s' % (data["typ"], option, data["desc"], value) ) + #---------------------------------------------------------------------- + def cast(self, typ, value): + """cast value to given format""" + if typ == "int": + return int(value) + elif typ == "bool": + return True if value.lower() in ("true", "on", "an","yes") else False + else: + return value + + #---------------------------------------------------------------------- + def save(self): + """saves the configs to disk""" + self.saveConfig(self.config, "pyload.config") + self.saveConfig(self.plugin, "plugin.config") + + #---------------------------------------------------------------------- + def __getitem__(self, section): + """provides dictonary like access: c['section']['option']""" + return Section(self, section) + + #---------------------------------------------------------------------- + def get(self, section, option): + """get value""" + return self.config[section][option]["value"] + + #---------------------------------------------------------------------- + def set(self, section, option, value): + """set value""" + self.config[section][option]["value"] = value + + #---------------------------------------------------------------------- + def getPlugin(self, plugin, option): + """gets a value for a plugin""" + return self.plugin[plugin][option]["value"] + + #---------------------------------------------------------------------- + def setPlugin(self, plugin, option, value): + """sets a value for a plugin""" + self.plugin[plugin][option]["value"] = value + + +######################################################################## +class Section: + """provides dictionary like access for configparser""" + + #---------------------------------------------------------------------- + def __init__(self, parser, section): + """Constructor""" + self.parser = parser + self.section = section + + #---------------------------------------------------------------------- + def __getitem__(self, item): + """getitem""" + return self.parser.get(self.section, item) + + #---------------------------------------------------------------------- + def __setitem__(self, item, value): + """setitem""" + self.parser.set(self.section, item, value) + + + +if __name__ == "__main__": + pypath = "" + + from time import time,sleep + + a = time() + + c = ConfigParser() + + b = time() + + print "sec", b-a + + pprint(c.config) + + c.saveConfig(c.config, "user.conf")
\ No newline at end of file diff --git a/module/DownloadThread.py b/module/DownloadThread.py deleted file mode 100644 index 2178bd21e..000000000 --- a/module/DownloadThread.py +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay - @author: spoob - @author: sebnapi - @version: v0.3.2 -""" - -from threading import Thread -import traceback -from time import sleep, time - -from module.network.Request import AbortDownload -from module.PullEvents import UpdateEvent - -class Status(object): - """ Saves all status information - """ - def __init__(self, pyfile): - self.pyfile = pyfile - self.type = None - self.status_queue = None - self.filename = None - self.url = None - self.exists = False - self.waituntil = 0 - self.plugin = pyfile.plugin.__name__ - self.want_reconnect = False - self.error = "" - - def get_ETA(self): - return self.pyfile.plugin.req.get_ETA() - def get_speed(self): - return self.pyfile.plugin.req.get_speed() - def kB_left(self): - return self.pyfile.plugin.req.kB_left() - def size(self): - return self.pyfile.plugin.req.dl_size / 1024 - def percent(self): - if not self.kB_left() == 0 and not self.size() == 0: - percent = ((self.size()-self.kB_left()) * 100) / self.size() - return percent if percent < 101 else 0 - return 0 - -class Reconnect(Exception): - pass - -class Checksum(Exception): - def __init__(self, code, local_file): - self.code = code - self.file = local_file - - def getCode(self): - return self.code - - def getFile(self): - return self.file - -class CaptchaError(Exception): - pass - -class DownloadThread(Thread): - def __init__(self, parent, job): - Thread.__init__(self) - self.parent = parent - self.setDaemon(True) - self.loadedPyFile = job - - def run(self): - try: - self.download(self.loadedPyFile) - except AbortDownload: - self.loadedPyFile.plugin.req.abort = False - self.loadedPyFile.status.type = "aborted" - except Reconnect: - pass - except Checksum, e: - self.loadedPyFile.status.type = "failed" - self.loadedPyFile.status.error = "Checksum error: %d" % e.getCode() - f = open("%s.info" % e.getFile(), "w") - f.write("Checksum not matched!") - f.close() - except CaptchaError: - self.loadedPyFile.status.type = "failed" - self.loadedPyFile.status.error = "Can't solve captcha" - except Exception, e: - try: - if self.parent.parent.config['general']['debug_mode']: - traceback.print_exc() - code, msg = e - if code == 7: - sleep(60) - self.parent.parent.logger.info(_("Hoster unvailable, wait 60 seconds")) - except Exception, f: - self.parent.parent.logger.debug(_("Error getting error code: %s") % f) - if self.parent.parent.config['general']['debug_mode']: - traceback.print_exc() - self.loadedPyFile.status.type = "failed" - self.loadedPyFile.status.error = str(e) - finally: - self.parent.jobFinished(self.loadedPyFile) - self.parent.parent.pullManager.addEvent(UpdateEvent("file", self.loadedPyFile.id, "queue")) - sleep(0.8) - self.parent.removeThread(self) - - def handleNewInterface(self, pyfile): - status = pyfile.status - plugin = pyfile.plugin - - if plugin.__type__ == "container" or plugin.__type__ == "crypter": - status.type = "decrypting" - else: #hoster - status.type = "starting" - self.parent.parent.pullManager.addEvent(UpdateEvent("file", pyfile.id, "queue")) - - if plugin.__type__ == "container": - plugin.decrypt(pyfile.url) - else: - plugin.preparePlugin(self) - - plugin.prepareDownload() - - plugin.startDownload() - status.type = "finished" - - def download(self, pyfile): - if hasattr(pyfile.plugin, "__interface__") and pyfile.plugin.__interface__ >= 2: - self.handleNewInterface(pyfile) - return - status = pyfile.status - status.type = "starting" - self.parent.parent.pullManager.addEvent(UpdateEvent("file", pyfile.id, "queue")) - - pyfile.init_download() - - if not pyfile.plugin.prepare(self): - raise Exception, _("File not found") - - pyfile.plugin.req.set_timeout(self.parent.parent.config['general']['max_download_time']) - - if pyfile.plugin.__type__ == "container" or pyfile.plugin.__type__ == "crypter": - status.type = "decrypting" - else: - status.type = "downloading" - self.parent.parent.pullManager.addEvent(UpdateEvent("file", pyfile.id, "queue")) - - - #~ free_file_name = self.get_free_name(status.filename) - #~ location = join(pyfile.folder, status.filename) - pyfile.plugin.proceed(status.url, status.filename) - - if self.parent.parent.xmlconfig.get("general", "checksum", True): - status.type = "checking" - check, code = pyfile.plugin.check_file(status.filename) - """ - return codes: - 0 - checksum ok - 1 - checksum wrong - 5 - can't get checksum - 10 - not implemented - 20 - unknown error - """ - if code == 0: - self.parent.parent.logger.info(_("Checksum ok ('%s')") % status.filename) - elif code == 1: - self.parent.parent.logger.info(_("Checksum not matched! ('%s')") % status.filename) - elif code == 5: - self.parent.parent.logger.debug(_("Can't get checksum for %s") % status.filename) - elif code == 10: - self.parent.parent.logger.debug(_("Checksum not implemented for %s") % status.filename) - if not check: - raise Checksum(code, status.filename) - - status.type = "finished" - - def wait(self, pyfile): - pyfile.status.type = "waiting" - self.parent.parent.pullManager.addEvent(UpdateEvent("file", pyfile.id, "queue")) - while (time() < pyfile.status.waituntil): - if self.parent.initReconnect() or self.parent.reconnecting: - pyfile.status.type = "reconnected" - pyfile.status.want_reconnect = False - raise Reconnect - if pyfile.plugin.req.abort: - raise AbortDownload - sleep(1) - pyfile.status.want_reconnect = False - return True diff --git a/module/FileDatabase.py b/module/FileDatabase.py new file mode 100644 index 000000000..843121492 --- /dev/null +++ b/module/FileDatabase.py @@ -0,0 +1,684 @@ +#!/usr/bin/env python +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" +from Queue import Queue +import sqlite3 +from threading import Thread +from threading import RLock +from time import sleep +from time import time +from os import path +import traceback + +statusMap = { + "finished": 0, + "offline": 1, + "online": 2, + "queued": 3, + "checking": 4, + "waiting": 5, + "reconnected": 6, + "starting": 7, + "failed": 8, + "aborted": 9, + "decrypting": 10, + "custom": 11, + "downloading": 12, + "processing": 13 +} + +######################################################################## +class FileHandler: + """Handles all request made to obtain information, + modify status or other request for links or packages""" + + + #---------------------------------------------------------------------- + def __init__(self, core): + """Constructor""" + self.core = core + + # translations + self.statusMsg = [_("finished"), _("offline"), _("online"), _("queued"), _("checking"), _("waiting"), _("reconnected"), _("starting"),_("failed"), _("aborted"), _("decrypting"), _("custom"),_("downloading"), _("processing")] + + self.cache = {} #holds instances for files + self.packageCache = {} # same for packages + #@TODO: purge the cache + + self.jobCache = {} + + self.lock = RLock() + + self.filecount = -1 # if an invalid value is set get current value from db + self.unchanged = False #determines if any changes was made since last call + + self.db = FileDatabaseBackend(self) # the backend + + + def change(func): + def new(*args): + args[0].unchanged = False + args[0].filecount = -1 + args[0].jobCache = {} + return func(*args) + return new + + #---------------------------------------------------------------------- + def save(self): + """saves all data to backend""" + self.db.commit() + + #---------------------------------------------------------------------- + def getCompleteData(self, queue=1): + """gets a complete data representation""" + + data = self.db.getAllLinks(queue) + packs = self.db.getAllPackages(queue) + + print data + print packs + + data.update( [ (x.id, x.toDbDict()[x.id]) for x in self.cache.itervalues() ] ) + packs.update( [ (x.id, x.toDict()[x.id]) for x in self.packageCache.itervalues() if x.queue == queue] ) + + for key, value in data.iteritems(): + if packs.has_key(value["package"]): + packs[value["package"]]["links"][key] = value + + return packs + + #---------------------------------------------------------------------- + @change + def addLinks(self, urls, package): + """adds links""" + + # tuple of (url, name, plugin, package) + links = [ (x[0], x[0], x[1], package) for x in self.core.pluginManager.parseUrls(urls) ] + + self.db.addLinks(links, package) + + + #---------------------------------------------------------------------- + @change + def addPackage(self, name, folder, queue=0): + """adds a package, default to link collector""" + return self.db.addPackage(name, folder, queue) + + #---------------------------------------------------------------------- + @change + def deletePackage(self, id): + """delete package and all contained links""" + + self.lock.acquire() + + if self.packageCache.has_key(id): + del self.packageCache[id] + + toDelete = [] + + for pyfile in self.cache.itervalues(): + if pyfile.packageid == id: + pyfile.abort() + toDelete.append(pyfile.id) + + for pid in toDelete: + del self.cache[pid] + + self.db.deletePackage(id) + + + self.lock.release() + + #---------------------------------------------------------------------- + @change + def deleteLink(self, id): + """deletes links""" + + self.lock.acquire() + + if self.cache.has_key(id): + self.cache[id].abort() + del self.cache[id] + + self.lock.release() + + self.db.deleteLink(id) + + #---------------------------------------------------------------------- + def releaseLink(self, id): + """removes pyfile from cache""" + if self.cache.has_key(id): + del self.cache[id] + + #---------------------------------------------------------------------- + def releasePackage(self, id): + """removes package from cache""" + if self.packageCache.has_key(id): + del self.packageCache[id] + + #---------------------------------------------------------------------- + def updateLink(self, pyfile): + """updates link""" + self.db.updateLink(pyfile) + + #---------------------------------------------------------------------- + def updatePackage(self, pypack): + """updates a package""" + self.db.updatePackage(pypack) + + #---------------------------------------------------------------------- + def getPackage(self, id): + """return package instance""" + + if self.packageCache.has_key(id): + return self.packageCache[id] + else: + return self.db.getPackage(id) + + #---------------------------------------------------------------------- + def getFile(self, id): + """returns pyfile instance""" + if self.cache.has_key(id): + return self.cache[id] + else: + return self.db.getFile(id) + + #---------------------------------------------------------------------- + def getJob(self, occ): + """get suitable job""" + + self.lock.acquire() + + if self.jobCache.has_key(occ): + pass + else: + self.jobCache = {} #better not caching to much + jobs = self.db.getJob(occ) + jobs.reverse() + self.jobCache[occ] = jobs + + #@TODO: maybe the new job has to be approved... + + if not self.jobCache[occ]: + pyfile = None + else: + pyfile = self.getFile(self.jobCache[occ].pop()) + + self.lock.release() + return pyfile + + + #---------------------------------------------------------------------- + def getFileCount(self): + """returns number of files""" + + if self.filecount == -1: + self.filecount = self.db.filecount(1) + + return self.filecount + + + +######################################################################### +class FileDatabaseBackend(Thread): + """underlying backend for the filehandler to save the data""" + + def __init__(self, manager): + Thread.__init__(self) + + self.setDaemon(True) + + self.manager = manager + + self.jobs = Queue() # queues for jobs + self.res = Queue() + + self.start() + + + def queue(func): + """use as decorator when fuction directly executes sql commands""" + def new(*args): + args[0].jobs.put((func, args, 0)) + return args[0].res.get() + return new + + def async(func): + """use as decorator when function does not return anything and asynchron execution is wanted""" + def new(*args): + args[0].jobs.put((func, args, 1)) + return True + return new + + def run(self): + """main loop, which executes commands""" + + self.conn = sqlite3.connect("files.db") + self.c = self.conn.cursor() + #self.c.execute("PRAGMA synchronous = OFF") + self._createTables() + + while True: + try: + f, args, async = self.jobs.get() + if f == "quit": return True + res = f(*args) + if not async: self.res.put(res) + except Exception, e: + #@TODO log etc + print "Database Error @", f.__name__, args[1:], e + traceback.print_exc() + if not async: self.res.put(None) + + def shutdown(self): + self.save() + self.jobs.put(("quit", "", 0)) + + def _createTables(self): + """create tables for database""" + + self.c.execute('CREATE TABLE IF NOT EXISTS "packages" ("id" INTEGER PRIMARY KEY AUTOINCREMENT, "name" TEXT NOT NULL, "folder" TEXT, "password" TEXT, "site" TEXT, "queue" INTEGER DEFAULT 0 NOT NULL)') + self.c.execute('CREATE TABLE IF NOT EXISTS "links" ("id" INTEGER PRIMARY KEY AUTOINCREMENT, "url" TEXT NOT NULL, "name" TEXT, "size" INTEGER DEFAULT 0 NOT NULL, "status" INTEGER DEFAULT 3 NOT NULL, "plugin" TEXT DEFAULT "BasePlugin" NOT NULL, "error" TEXT, "package" INTEGER DEFAULT 0 NOT NULL, FOREIGN KEY(package) REFERENCES packages(id))') + self.c.execute('CREATE INDEX IF NOT EXISTS "pIdIndex" ON links(package)') + self.c.execute('VACUUM') + + #---------------------------------------------------------------------- + @queue + def filecount(self, queue): + """returns number of files in queue""" + self.c.execute("SELECT l.id FROM links as l INNER JOIN packages as p ON l.package=p.id WHERE p.queue=? ORDER BY l.id", (queue,)) + r = self.c.fetchall() + return len(r) + + @queue + def addLink(self, url, name, plugin, package): + self.c.execute('INSERT INTO links(url, name, plugin, package) VALUES(?,?,?,?)', (url, name, plugin, package)) + return self.c.lastrowid + + @queue + def addLinks(self, links, package): + """ links is a list of tupels (url,name,plugin)""" + self.c.executemany('INSERT INTO links(url, name, plugin, package) VALUES(?,?,?,?)', links) + + @queue + def addPackage(self, name, folder, queue): + + self.c.execute('INSERT INTO packages(name, folder, queue) VALUES(?,?,?)', (name, folder, queue)) + return self.c.lastrowid + + @queue + def deletePackage(self, id): + + self.c.execute('DELETE FROM links WHERE package=?', (str(id), )) + self.c.execute('DELETE FROM packages WHERE id=?', (str(id), )) + + @queue + def deleteLink(self, id): + + self.c.execute('DELETE FROM links WHERE id=?', (str(id), )) + + + @queue + def getAllLinks(self, q): + """return information about all links in queue q + + q0 queue + q1 collector + + format: + + { + id: {'name': name, ... 'package': id }, ... + } + + """ + self.c.execute('SELECT l.id,l.url,l.name,l.size,l.status,l.error,l.plugin,l.package FROM links as l INNER JOIN packages as p ON l.package=p.id WHERE p.queue=? ORDER BY l.id', (q, )) + data = {} + for r in self.c: + data[int(r[0])] = { + 'url': r[1], + 'name': r[2], + 'size': r[3], + 'status': r[4], + 'statusmsg': self.manager.statusMsg[r[4]], + 'error': r[5], + 'plugin': r[6], + 'package': r[7] + } + + return data + + @queue + def getAllPackages(self, q): + """return information about packages in queue q + (only useful in get all data) + + q0 queue + q1 collector + + format: + + { + id: {'name': name ... 'links': {} }, ... + } + """ + self.c.execute('SELECT id,name,folder,site,password,queue FROM packages WHERE queue=? ORDER BY id', str(q)) + + data = {} + for r in self.c: + data[int(r[0])] = { + 'name': r[1], + 'folder': r[2], + 'site': r[3], + 'password': r[4], + 'queue': r[5], + 'links': {} + } + + return data + + + def getLinkData(self, id): + """get link information""" + pass + + def getPackageData(self, id): + """get package data _with_ link data""" + pass + + + @async + def updateLink(self, f): + self.c.execute('UPDATE links SET url=?,name=?,size=?,status=?,error=?,package=? WHERE id=?', (f.name, f.url, f.size, f.status, f.error, str(f.packageid), str(f.id))) + + @async + def updatePackage(self, p): + self.c.execute('UPDATE packages SET name=?,folder=?,site=?,password=?,queue=? WHERE id=?', (p.name, p.folder, p.site, p.password, p.queue, str(p.id))) + + @async + def commit(self): + self.conn.commit() + + @queue + def getPackage(self, id): + """return package instance from id""" + self.c.execute("SELECT name,folder,site,password,queue FROM packages WHERE id=?", (str(id),)) + r = self.c.fetchone() + if not r: return None + return PyPackage(self.manager, id, *r) + + #---------------------------------------------------------------------- + @queue + def getFile(self, id): + """return link instance from id""" + self.c.execute("SELECT url, name, size, status, error, plugin, package FROM links WHERE id=?", (str(id),)) + r = self.c.fetchone() + if not r: return None + return PyFile(self.manager, id, *r) + + + @queue + def getJob(self, occ): + """return pyfile instance, which is suitable for download and dont use a occupied plugin""" + self.c.execute("SELECT l.id FROM links as l INNER JOIN packages as p ON l.package=p.id WHERE p.queue=1 AND l.plugin NOT IN ('else','some','else') AND l.status IN (2,3,6) LIMIT 5") + + return [x[0] for x in self.c ] + +class PyFile(): + def __init__(self, manager, id, url, name, size, status, error, pluginname, package): + self.m = manager + self.m.cache[int(id)] = self + + self.id = int(id) + self.url = url + self.name = name + self.size = size + self.status = status + self.pluginname = pluginname + self.packageid = package #should not be used, use package() instead + self.error = error + # database information ends here + + self.waitUntil = 0 # time() + time to wait + + # status attributes + self.active = False #obsolete? + self.abort = False + self.reconnected = False + + + def __repr__(self): + return "PyFile %s: %s@%s" % (self.id, self.name, self.pluginname) + + def initPlugin(self): + """ inits plugin instance """ + self.pluginmodule = self.m.core.pluginManager.getPlugin(self.pluginname) + self.pluginclass = getattr(self.pluginmodule, self.pluginname) + self.plugin = self.pluginclass(self) + + + def package(self): + """ return package instance""" + return self.m.getPackage(self.packageid) + + def setStatus(self, status): + self.status = statusMap[status] + self.sync() #@TODO needed aslong no better job approving exists + + def hasStatus(self, status): + return statusMap[status] == self.status + + def sync(self): + """sync PyFile instance with database""" + self.m.updateLink(self) + + def release(self): + """sync and remove from cache""" + self.sync() + self.m.releaseLink(self.id) + + def delete(self): + """delete pyfile from database""" + self.m.deleteLink(self.id) + + def toDict(self): + """return dict with all information for interface""" + return self.toDbDict() + + def toDbDict(self): + """return data as dict for databse + + format: + + { + id: {'url': url, 'name': name ... } + } + + """ + return { + self.id: { + 'url': self.url, + 'name': self.name, + 'plugin' : self.pluginname, + 'size': self.size, + 'status': self.status, + 'statusmsg': self.m.statusMsg[self.status], + 'package': self.packageid, + 'error': self.error + } + } + + def abort(self): + """abort pyfile if possible""" + + while self.id in self.m.core.ThreadManager.processingIds(): + self.abort = True + sleep(0.025) + + abort = False + + def finishIfDone(self): + """set status to finish and release file if every thread is finished with it""" + + if self.id in self.m.core.threadManager.processingIds(): + return False + + self.setStatus("finished") + self.release() + return True + + def formatWait(self): + """ formats and return wait time in humanreadable format """ + return self.waitUntil - time() + + + +class PyPackage(): + def __init__(self, manager, id, name, folder, site, password, queue): + self.m = manager + self.m.packageCache[int(id)] = self + + self.id = int(id) + self.name = name + self.folder = folder + self.site = site + self.password = password + self.queue = queue + + def toDict(self): + """return data as dict + + format: + + { + id: {'name': name ... 'links': {} } } + } + + """ + return { + self.id: { + 'name': self.name, + 'folder': self.folder, + 'site': self.site, + 'password': self.password, + 'queue': self.queue, + 'links': {} + } + } + + def getChildren(self): + """get information about contained links""" + raise NotImplementedError + + def sync(self): + """sync with db""" + self.m.updatePackage(self) + + def release(self): + """sync and delete from cache""" + self.sync() + self.m.releasePackage(self.id) + + def delete(self): + self.m.deletePackage(self.id) + + +if __name__ == "__main__": + + pypath = "." + + db = FileHandler(None) + + #p = PyFile(db, 5) + #sleep(0.1) + + a = time() + + #print db.addPackage("package", "folder" , 1) + + #print db.addPackage("package", "folder", 1) + + #db.addLinks([x for x in range(0,200)], 5) + + db.save() + + b = time() + print "adding 200 links, single sql execs, no commit", b-a + + + res = db.getCompleteData(1) + #print res + r = [ len(x["links"]) for x in res.itervalues() ] + print r + c = time() + print "get all links", c-b + + #i = 2 + #db.updateLink(i, "urlupdate%s" % i, "nameupdate%s" % i, i, i, i,i) + + d = time() + print "update one link", d-c + + #p.sync() + #p.remove() + + e = time() + print "sync and remove link", e-d + + db.save() + + db.deletePackage(1) + #db.commit() + + f = time() + print "commit, remove package/links, commit", f-e + + #db.commit() + sleep(0.5) + + g = time() + print "empty commit", g-f -0.5 + + + job = db.getJob("") + print job + + h = time() + #print db.getFileCount() + + print "get job", h-g + + print db.getFileCount() + + i = time() + + print "file count", i-h + + + print db.getJob("") + + j = time() + + + print "get job 2", j-i + + for i in db.cache.itervalues(): + i.sync() + + sleep(1) + +
\ No newline at end of file diff --git a/module/HookManager.py b/module/HookManager.py index bb08fff4a..2d81d87b3 100644 --- a/module/HookManager.py +++ b/module/HookManager.py @@ -13,30 +13,36 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - + @author: mkaay @interface-version: 0.1 """ import logging import traceback -from threading import Lock +from threading import RLock -from module.XMLConfigParser import XMLConfigParser class HookManager(): def __init__(self, core): self.core = core - self.configParser = self.core.parser_plugins - self.configParser.loadData() - self.config = self.configParser.getConfig() - self.logger = logging.getLogger("log") + + self.config = self.core.config + + self.log = self.core.log self.plugins = [] - self.lock = Lock() + self.lock = RLock() self.createIndex() - + + def lock(func): + def new(*args): + args[0].lock.acquire() + res = func(*args) + args[0].lock.release() + return res + return new + def createIndex(self): - self.lock.acquire() plugins = [] for pluginClass in self.core.pluginManager.getHookPlugins(): @@ -45,51 +51,46 @@ class HookManager(): plugin.readConfig() plugins.append(plugin) except: - self.logger.warning(_("Failed activating %(name)s") % {"name":plugin.__name__}) - if self.core.config['general']['debug_mode']: + #self.log.warning(_("Failed activating %(name)s") % {"name":plugin.__name__}) + if self.core.debug: traceback.print_exc() - + self.plugins = plugins - self.lock.release() + + + def periodical(self): + pass def coreReady(self): - self.lock.acquire() - for plugin in self.plugins: plugin.coreReady() - self.lock.release() - + + @lock def downloadStarts(self, pyfile): - self.lock.acquire() for plugin in self.plugins: plugin.downloadStarts(pyfile) - self.lock.release() - + + @lock def downloadFinished(self, pyfile): - self.lock.acquire() for plugin in self.plugins: plugin.downloadFinished(pyfile) - self.lock.release() - + + @lock def packageFinished(self, package): - self.lock.acquire() for plugin in self.plugins: plugin.packageFinished(package) - self.lock.release() - + + @lock def beforeReconnecting(self, ip): - self.lock.acquire() for plugin in self.plugins: plugin.beforeReconnecting(ip) - self.lock.release() + @lock def afterReconnecting(self, ip): - self.lock.acquire() for plugin in self.plugins: plugin.afterReconnecting(ip) - self.lock.release() diff --git a/module/InitHomeDir.py b/module/InitHomeDir.py new file mode 100644 index 000000000..aa94f698c --- /dev/null +++ b/module/InitHomeDir.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN + + This modules inits working directories and global variables, pydir and homedir +""" + + +from os import mkdir +from os import path +from os import chdir +from sys import platform +from sys import argv + +import __builtin__ +__builtin__.pypath = path.abspath(path.join(__file__,"..","..")) + + +homedir = "" +try: + from win32com.shell import shellcon, shell + homedir = shell.SHGetFolderPath(0, shellcon.CSIDL_APPDATA, 0, 0) +except ImportError: # quick semi-nasty fallback for non-windows/win32com case + if platform == 'nt': + import ctypes + from ctypes import wintypes, windll + CSIDL_APPDATA = 26 + _SHGetFolderPath = ctypes.windll.shell32.SHGetFolderPathW + _SHGetFolderPath.argtypes = [ctypes.wintypes.HWND, + ctypes.c_int, + ctypes.wintypes.HANDLE, + ctypes.wintypes.DWORD, ctypes.wintypes.LPCWSTR] + + path_buf = ctypes.wintypes.create_unicode_buffer(ctypes.wintypes.MAX_PATH) + result = _SHGetFolderPath(0, CSIDL_APPDATA, 0, 0, path_buf) + homedir = path_buf.value + else: + homedir = path.expanduser("~") + +__builtin__.homedir = homedir + + +args = " ".join(argv[1:]) + +# dirty method to set configdir from commandline arguments + +if "--configdir=" in args: + pos = args.find("--configdir=") + end = args.find("-", pos+12) + + if end == -1: + configdir = args[pos+12:].strip() + else: + configdir = args[pos+12:end].strip() +else: + if platform in ("posix","linux2"): + configdir = path.join(homedir, ".pyload") + else: + configdir = path.join(homedir, "pyload") + +if not path.exists(configdir): + mkdir(configdir, 0700) + +__builtin__.configdir = configdir +chdir(configdir) + +#print "Using %s as working directory." % configdir diff --git a/module/PluginManager.py b/module/PluginManager.py index ad01eba1b..b56626d0f 100644 --- a/module/PluginManager.py +++ b/module/PluginManager.py @@ -14,99 +14,251 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - @author: mkaay - @interface-version: 0.1 + @author: mkaay, RaNaN """ -import logging import re from threading import Lock -from module.XMLConfigParser import XMLConfigParser -from module.plugins.Hoster import Hoster - +from os import listdir +from os.path import isfile +from os.path import join from sys import version_info +from itertools import chain + import traceback class PluginManager(): def __init__(self, core): self.core = core - self.configParser = self.core.xmlconfig - self.configParser.loadData() - self.config = self.configParser.getConfig() - self.logger = logging.getLogger("log") - self.crypterPlugins = [] - self.containerPlugins = [] - self.hosterPlugins = [] - self.captchaPlugins = [] - self.accountPlugins = [] - self.hookPlugins = [] - self.lock = Lock() + + #self.config = self.core.config + self.log = core.log + + self.crypterPlugins = {} + self.containerPlugins = {} + self.hosterPlugins = {} + self.captchaPlugins = {} + self.accountPlugins = {} + self.hookPlugins = {} + + self.createHomeDirs() + self.createIndex() + + #@TODO plugin updater + #---------------------------------------------------------------------- + def createHomeDirs(self): + """create homedirectories containing plugins""" + pass def createIndex(self): - self.lock.acquire() + """create information for all plugins available""" + self.rePattern = re.compile(r'__pattern__.*=.*r("|\')([^"\']+)') + self.reVersion = re.compile(r'__version__.*=.*("|\')([0-9.]+)') + self.reConfig = re.compile(r'__config__.*=.*\[([^\]]+)', re.MULTILINE) - self.crypterPlugins = self.parse(self.core.config["plugins"]["load_crypter_plugins"], _("Crypter")) - self.containerPlugins = self.parse(self.core.config["plugins"]["load_container_plugins"], _("Container")) - self.hosterPlugins = self.parse(self.core.config["plugins"]["load_hoster_plugins"], _("Hoster")) - self.captchaPlugins = self.parse(self.core.config["plugins"]["load_captcha_plugins"], _("Captcha")) - self.accountPlugins = self.parse(self.core.config["plugins"]["load_account_plugins"], _("Account"), create=True) - self.hookPlugins = self.parse(self.core.config["plugins"]["load_hook_plugins"], _("Hook")) + self.crypterPlugins = self.parse(_("Crypter"), "crypter", pattern=True) + self.containerPlugins = self.parse(_("Container"), "container", pattern=True) + self.hosterPlugins = self.parse(_("Hoster") ,"hoster", pattern=True) - self.lock.release() - self.logger.info(_("created index of plugins")) + self.captchaPlugins = self.parse(_("Captcha"), "captcha") + self.accountPlugins = self.parse(_("Account"), "accounts", create=True) + self.hookPlugins = self.parse(_("Hook"), "hooks") + + self.log.info(_("created index of plugins")) - def parse(self, pluginStr, ptype, create=False): - plugins = [] - for pluginModule in pluginStr.split(","): - pluginModule = pluginModule.strip() - if not pluginModule: - continue - pluginName = pluginModule.split(".")[-1] - if pluginName.endswith("_25") and not version_info[0:2] == (2, 5): - continue - elif pluginName.endswith("_26") and not version_info[0:2] == (2, 6): - continue - try: - module = __import__(pluginModule, globals(), locals(), [pluginName], -1) - pluginClass = getattr(module, pluginName) - self.logger.debug(_("%(type)s: %(name)s added") % {"name":pluginName, "type":ptype}) - if create: - pluginClass = pluginClass(self) - plugins.append(pluginClass) - except: - self.logger.warning(_("Failed activating %(name)s") % {"name":pluginName}) - if self.core.config['general']['debug_mode']: - traceback.print_exc() - return plugins + def parse(self, typ, folder, create=False, pattern=False): + """ + returns dict with information + + { + name : {path, version, config, (pattern, re), (plugin, class)} + } + + """ + plugins = {} + pfolder = join(pypath, "module", "plugins", folder) + + for f in listdir(pfolder): + if (isfile(join(pfolder, f)) and f.endswith(".py") or f.endswith("_25.pyc") or f.endswith("_26.pyc") or f.endswith("_27.pyc")) and not f.startswith("_"): + data = open(join(pfolder, f)) + content = data.read() + data.close() + + if f.endswith("_25.pyc") and not version_info[0:2] == (2, 5): + continue + elif f.endswith("_26.pyc") and not version_info[0:2] == (2, 6): + continue + elif f.endswith("_27.pyc") and not version_info[0:2] == (2, 7): + continue + + name = f[:-3] + if name[-1] == "." : name = name[:-4] + + plugins[name] = {} + + module = f.replace(".pyc","").replace(".py","") + path = "module.plugins.%s.%s" % (folder, module) + + plugins[name]["name"] = module + plugins[name]["path"] = path + + + if pattern: + pattern = self.rePattern.findall(content) + + if pattern: + pattern = pattern[0][1] + else: + pattern = "unmachtable" + + plugins[name]["pattern"] = pattern + + try: + plugins[name]["re"] = re.compile(pattern) + except: + self.log.error(_("%s has invalid pattern.") % name) + + version = self.reVersion.findall(content) + if version: + version = float(version[0][1]) + else: + version = 0 + plugins[name]["v"] = version + + config = self.reConfig.findall(content) + + if config: + config = [ [y.strip() for y in x.replace("'","").replace('"',"").replace(")","").split(",") if y.strip()] for x in config[0].split("(") if x.strip()] + + #@TODO: create config - def getPluginFromPattern(self, urlPattern): - plugins = [] - plugins.extend(self.crypterPlugins) - plugins.extend(self.containerPlugins) - plugins.extend(self.hosterPlugins) - for plugin in plugins: - if not plugin.__pattern__: + #@TODO replace with plugins in homedir + + return plugins + + #---------------------------------------------------------------------- + def parseUrls(self, urls): + """parse plugins for given list of urls""" + + last = None + res = [] # tupels of (url, plugin) + + for url in urls: + + found = False + + if last and last[1]["re"].match(url): + res.append((url, last[0])) continue - if re.match(plugin.__pattern__, urlPattern): - return plugin - return Hoster + + for name, value in chain(self.containerPlugins.iteritems(), self.crypterPlugins.iteritems(), self.hosterPlugins.iteritems() ): + if value["re"].match(url): + res.append((url, name)) + last = (name, value) + found = True + break + + if not found: + res.append((url, "BasePlugin")) + + return res + #---------------------------------------------------------------------- + def getPlugin(self, name): + """return plugin module from hoster|decrypter|container""" + plugin = None + + if self.containerPlugins.has_key(name): + plugin = self.containerPlugins[name] + if self.crypterPlugins.has_key(name): + plugin = self.crypterPlugins[name] + if self.hosterPlugins.has_key(name): + plugin = self.hosterPlugins[name] + + if not plugin: + plugin = __import__("module.plugins.hoster.BasePlugin", fromlist=[]) + return plugin + + if plugin.has_key("module"): + return plugin["module"] + + plugin["module"] = __import__(plugin["path"], globals(), locals(), [plugin["name"]] , -1) + + return plugin["module"] + + + #---------------------------------------------------------------------- def getCaptchaPlugin(self, name): - for plugin in self.captchaPlugins: - if plugin.__name__ == name: - return plugin + """return captcha modul if existent""" + if self.captchaPlugins.has_key(name): + plugin = self.captchaPlugins[name] + if plugin.has_key("module"): + return plugin["module"] + + plugin["module"] = __import__(plugin["path"], globals(), locals(), [plugin["name"]] , -1) + + return plugin["module"] + return None - + #---------------------------------------------------------------------- def getAccountPlugin(self, name): - for plugin in self.accountPlugins: - if plugin.__name__ == name: - return plugin + """return account class if existent""" + if self.accountPlugins.has_key(name): + plugin = self.accountPlugins[name] + if plugin.has_key("inst"): + return plugin["inst"] + + module = __import__(plugin["path"], globals(), locals(), [plugin["name"]] , -1) + pclass = getattr(module, plugin["name"]) + plugin["inst"] = pclass(self) + + + return plugin["inst"] + return None - + + #---------------------------------------------------------------------- def getAccountPlugins(self): - return self.accountPlugins - + """return list of account modules""" + + #---------------------------------------------------------------------- def getHookPlugins(self): - return self.hookPlugins + """return list of hook classes""" + + classes = [] + + for name, value in self.hookPlugins.iteritems(): + if value.has_key("class"): + classes.append(value["class"]) + continue + + module = __import__(value["path"], globals(), locals(), [value["name"]] , -1) + + pluginClass = getattr(module, name) + + value["class"] = pluginClass + + classes.append(pluginClass) + + return classes + + +if __name__ == "__main__": + _ = lambda x : x + pypath = "/home/christian/Projekte/pyload-0.4/module/plugins" + + from time import time + + p = PluginManager(None) + + a = time() + + test = [ "http://www.youtube.com/watch?v=%s" % x for x in range(0,100) ] + print p.parseUrls(test) + + b = time() + + print b-a ,"s" +
\ No newline at end of file diff --git a/module/PluginThread.py b/module/PluginThread.py new file mode 100644 index 000000000..75b643408 --- /dev/null +++ b/module/PluginThread.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +from threading import Thread +from Queue import Queue + +from time import sleep +from traceback import print_exc + +from pycurl import error +from module.plugins.Plugin import Abort, Reconnect, Retry, Fail + +######################################################################## +class PluginThread(Thread): + """abstract base class for thread types""" + + #---------------------------------------------------------------------- + def __init__(self, manager): + """Constructor""" + Thread.__init__(self) + self.setDaemon(True) + self.m = manager + + +######################################################################## +class DownloadThread(PluginThread): + """thread for downloading files from 'real' hoster plugins""" + + #---------------------------------------------------------------------- + def __init__(self, manager): + """Constructor""" + PluginThread.__init__(self, manager) + + self.queue = Queue() # job queue + self.active = False + + self.start() + + #---------------------------------------------------------------------- + def run(self): + """run method""" + + while True: + self.active = self.queue.get() + pyfile = self.active + + if self.active == "quit": + return True + + print pyfile + + try: + pyfile.plugin.preprocessing(self) + + except NotImplementedError: + + self.m.log.error(_("Plugin %s is missing a function.") % pyfile.pluginname) + continue + + except Abort: + self.m.log.info(_("%s aborted") % pyfile.name) + pyfile.setStatus("aborted") + self.active = False + pyfile.release() + continue + + except Reconnect: + self.queue.put(pyfile) + pyfile.req.clearCookies() + + while self.m.reconnecting.isSet(): + sleep(0.5) + + continue + + except Retry: + + self.m.log.info(_("restarting %s") % pyfile.name) + self.queue.put(pyfile) + continue + + except Fail,e : + + msg = e.args[0] + + if msg == "offline": + pyfile.setStatus("offline") + self.m.log.warning(_("%s is offline.") % pyfile.name) + else: + pyfile.setStatus("failed") + self.m.log.warning(_("%s failed with message: %s") % (pyfile.name, msg)) + pyfile.error = msg + + continue + + except error, e: + code, msg = e + print "pycurl error", code, msg + continue + + except Exception,e : + pyfile.setStatus("failed") + self.m.log.error(_("%s failed with message: .") % (pyfile.name, str(e))) + + if self.m.core.debug: + print_exc() + + continue + + + finally: + print "saved" + self.m.core.files.save() + + print "finished successfully" + + #@TODO hooks, packagaefinished etc + + + self.active = False + pyfile.finishIfDone() + + #---------------------------------------------------------------------- + def put(self, job): + """assing job to thread""" + self.queue.put(job) + + #---------------------------------------------------------------------- + def stop(self): + """stops the thread""" + self.put("quit") + + + +######################################################################## +class DecrypterThread(PluginThread): + """thread for decrypting""" + + #---------------------------------------------------------------------- + def __init__(self, pyfile): + """constructor""" + Thread.__init__(self) + + self.pyfile = pyfile + + self.start() + + #---------------------------------------------------------------------- + def run(self): + """run method""" + self.pyfile + #@TODO: decrypt it + + +######################################################################## +class HookThread(PluginThread): + """thread for hooks""" + + #---------------------------------------------------------------------- + def __init__(self): + """Constructor""" + + + +
\ No newline at end of file diff --git a/module/RequestFactory.py b/module/RequestFactory.py index 975356254..373eeb312 100644 --- a/module/RequestFactory.py +++ b/module/RequestFactory.py @@ -36,8 +36,11 @@ class RequestFactory(): if type == "HTTP": iface = self.core.config["general"]["download_interface"] req = Request(interface=str(iface)) - cj = self.getCookieJar(pluginName, account) - req.setCookieJar(cj) + if account: + cj = self.getCookieJar(pluginName, account) + req.setCookieJar(cj) + else: + req.setCookieJar(CookieJar(pluginName)) elif type == "XDCC": req = XdccRequest() diff --git a/module/ThreadManager.py b/module/ThreadManager.py index 4e2beaf49..67ea0d8d8 100644 --- a/module/ThreadManager.py +++ b/module/ThreadManager.py @@ -14,261 +14,101 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay - @author: spoob - @author: sebnapi + @author: RaNaN - @version: v0.3.2 """ -from __future__ import with_statement -from os.path import exists -import re -import subprocess -from threading import RLock, Thread -from time import sleep -from module.network.Request import getURL -from module.DownloadThread import DownloadThread -from module.SpeedManager import SpeedManager - -class ThreadManager(Thread): - def __init__(self, parent): - Thread.__init__(self) - self.parent = parent - self.list = parent.file_list #file list - self.threads = [] - self.lock = RLock() - self.py_downloading = [] # files downloading - self.occ_plugins = [] #occupied plugins - self.pause = True - self.reconnecting = False - - self.speedManager = SpeedManager(self) - - def run(self): - while True: - if (len(self.threads) < int(self.parent.config['general']['max_downloads']) or self.isDecryptWaiting()) and not self.pause: - job = self.getJob() - if job: - thread = self.createThread(job) - thread.start() - sleep(1) - - def createThread(self, job): - """ creates thread for Py_Load_File and append thread to self.threads - """ - thread = DownloadThread(self, job) - self.threads.append(thread) - return thread - - def removeThread(self, thread): - self.threads.remove(thread) - - def getJob(self): - """return job if suitable, otherwise send thread idle""" - - if not self.parent.server_methods.is_time_download() or self.pause or self.reconnecting or self.list.queueEmpty(): #conditions when threads dont download - return None - - if self.parent.freeSpace() < self.parent.config["general"]["min_free_space"]: - self.parent.logger.debug(_("minimal free space exceeded")) - return None - - self.initReconnect() - - self.lock.acquire() - - pyfile = None - pyfiles = self.list.getDownloadList(self.occ_plugins) - - if pyfiles: - pyfile = pyfiles[0] - self.py_downloading.append(pyfile) - self.parent.hookManager.downloadStarts(pyfile) - if not pyfile.plugin.multi_dl: - self.occ_plugins.append(pyfile.plugin.__name__) - pyfile.active = True - if pyfile.plugin.__type__ == "container": - self.parent.logger.info(_("Get links from: %s") % pyfile.url) - else: - self.parent.logger.info(_("Download starts: %s") % pyfile.url) - - self.lock.release() - return pyfile - - def isDecryptWaiting(self): - pyfiles = self.list.getDownloadList(self.occ_plugins) - for pyfile in pyfiles: - if pyfile.plugin.__type__ == "container": - return True - return False - - def handleNewInterface(self, pyfile): - plugin = pyfile.plugin - if plugin.__type__ == "container": - if plugin.createNewPackage(): - packages = plugin.getPackages() - if len(packages) == 1: - self.parent.logger.info(_("1 new package from %s") % (pyfile.status.filename,)) - else: - self.parent.logger.info(_("%i new packages from %s") % (len(packages), pyfile.status.filename)) - for name, links in packages: - if not name: - name = pyfile.status.filename - pid = self.list.packager.addNewPackage(name) - for link in links: - newFile = self.list.collector.addLink(link) - self.list.packager.addFileToPackage(pid, self.list.collector.popFile(newFile)) - if len(links) == 1: - self.parent.logger.info(_("1 link in %s") % (name,)) - else: - self.parent.logger.info(_("%i links in %s") % (len(links), name)) - else: - pass - self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.data["id"]) - - def jobFinished(self, pyfile): - """manage completing download""" - self.lock.acquire() - - if not pyfile.plugin.multi_dl: - self.occ_plugins.remove(pyfile.plugin.__name__) - - pyfile.active = False - - if not pyfile.status == "reconnected": - try: - pyfile.plugin.req.pycurl.close() - except: - pass - - self.py_downloading.remove(pyfile) - - if pyfile.status.type == "finished": - if hasattr(pyfile.plugin, "__interface__") and pyfile.plugin.__interface__ >= 2: - self.handleNewInterface(pyfile) - elif pyfile.plugin.__type__ == "container": - newLinks = 0 - if pyfile.plugin.links: - if isinstance(pyfile.plugin.links, dict): - packmap = {} - for packname in pyfile.plugin.links.keys(): - packmap[packname] = self.list.packager.addNewPackage(packname) - for packname, links in pyfile.plugin.links.items(): - pid = packmap[packname] - for link in links: - newFile = self.list.collector.addLink(link) - self.list.packager.addFileToPackage(pid, self.list.collector.popFile(newFile)) - self.list.packager.pushPackage2Queue(pid) - newLinks += 1 - else: - for link in pyfile.plugin.links: - newFile = self.list.collector.addLink(link) - pid = pyfile.package.data["id"] - self.list.packager.addFileToPackage(pyfile.package.data["id"], self.list.collector.popFile(newFile)) - newLinks += 1 - self.list.packager.pushPackage2Queue(pid) - - self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.data["id"]) - - if newLinks: - self.parent.logger.info(_("Parsed links from %s: %i") % (pyfile.status.filename, newLinks)) - else: - self.parent.logger.info(_("No links in %s") % pyfile.status.filename) - #~ self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.id) - #~ for link in pyfile.plugin.links: - #~ id = self.list.collector.addLink(link) - #~ pyfile.packager.pullOutPackage(pyfile.package.id) - #~ pyfile.packager.addFileToPackage(pyfile.package.id, pyfile.collector.popFile(id)) - else: - packFinished = True - for packfile in pyfile.package.files: - if packfile.status.type != "finished": - packFinished = False - break - - self.parent.logger.info(_("Download finished: %s") % pyfile.url) - if packFinished: - self.parent.logger.info(_("Package finished: %s") % pyfile.package.data['package_name']) - self.parent.hookManager.packageFinished(pyfile.package) - - elif pyfile.status.type == "reconnected": - pyfile.plugin.req.init_curl() - - elif pyfile.status.type == "failed": - self.parent.logger.warning(_("Download failed: %s | %s") % (pyfile.url, pyfile.status.error)) - with open(self.parent.config['general']['failed_file'], 'a') as f: - f.write(pyfile.url + "\n") - - elif pyfile.status.type == "aborted": - self.parent.logger.info(_("Download aborted: %s") % pyfile.url) - - self.list.save() - - self.parent.hookManager.downloadFinished(pyfile) - - self.lock.release() - return True - - def initReconnect(self): - """initialise a reonnect""" - if not self.parent.config['reconnect']['activated'] or self.reconnecting or not self.parent.server_methods.is_time_reconnect(): - return False - - if not exists(self.parent.config['reconnect']['method']): - self.parent.logger.info(self.parent.config['reconnect']['method'] + " not found") - self.parent.config['reconnect']['activated'] = False - return False - - self.lock.acquire() - - if self.checkReconnect(): - self.reconnecting = True - self.reconnect() - sleep(1.1) - - self.reconnecting = False - self.lock.release() - return True - - self.lock.release() - return False - - def checkReconnect(self): - """checks if all files want reconnect""" - - if not self.py_downloading: - return False - - i = 0 - for obj in self.py_downloading: - if obj.status.want_reconnect: - i += 1 - - if len(self.py_downloading) == i: - return True - else: - return False - - def reconnect(self): - self.parent.logger.info(_("Starting reconnect")) - ip = re.match(".*Current IP Address: (.*)</body>.*", getURL("http://checkip.dyndns.org/")).group(1) - self.parent.hookManager.beforeReconnecting(ip) - reconn = subprocess.Popen(self.parent.config['reconnect']['method'])#, stdout=subprocess.PIPE) - reconn.wait() - sleep(1) - ip = "" - while ip == "": - try: - ip = re.match(".*Current IP Address: (.*)</body>.*", getURL("http://checkip.dyndns.org/")).group(1) #versuchen neue ip aus zu lesen - except: - ip = "" - sleep(1) - self.parent.hookManager.afterReconnecting(ip) - self.parent.logger.info(_("Reconnected, new IP: %s") % ip) +from threading import Event +import PluginThread + +######################################################################## +class ThreadManager: + """manages the download threads, assign jobs, reconnect etc""" + + #---------------------------------------------------------------------- + def __init__(self, core): + """Constructor""" + self.core = core + self.log = core.log + + self.threads = [] # thread list + self.localThreads = [] #hook+decrypter threads + + self.pause = True + + self.reconnecting = Event() + self.reconnecting.clear() + + for i in range(0, self.core.config.get("general","max_downloads") ): + self.createThread() + + + + #---------------------------------------------------------------------- + def createThread(self): + """create a download thread""" + + thread = PluginThread.DownloadThread(self) + self.threads.append(thread) + + #---------------------------------------------------------------------- + def downloadingIds(self): + """get a list of the currently downloading pyfile's ids""" + return [x.active.id for x in self.threads if x.active] + + #---------------------------------------------------------------------- + def processingIds(self): + """get a id list of all pyfiles processed""" + return [x.active.id for x in self.threads+self.localThreads if x.active] + + + #---------------------------------------------------------------------- + def work(self): + """run all task which have to be done (this is for repetivive call by core)""" + + self.checkReconnect() + self.checkThreadCount() + self.assingJob() + + #---------------------------------------------------------------------- + def checkReconnect(self): + """checks if reconnect needed""" + pass + + #---------------------------------------------------------------------- + def checkThreadCount(self): + """checks if there are need for increasing or reducing thread count""" + + if len(self.threads) == self.core.config.get("general", "max_downloads"): + return True + elif len(self.threads) < self.core.config.get("general", "max_downloads"): + self.createThread() + else: + #@TODO: close thread + pass + + + #---------------------------------------------------------------------- + def assingJob(self): + """assing a job to a thread if possible""" + + if self.pause: return + + free = [x for x in self.threads if not x.active] + + if free: + thread = free[0] + + occ = [x.active.pluginname for x in self.threads if x.active and not x.active.plugin.multiDL ] + occ.sort() + occ = set(occ) + job = self.core.files.getJob(tuple(occ)) + if job: + job.initPlugin() + thread.put(job) + + + + - def stopAllDownloads(self): - self.pause = True - for pyfile in self.py_downloading: - pyfile.plugin.req.abort = True +
\ No newline at end of file diff --git a/module/XMLConfigParser.py b/module/XMLConfigParser.py deleted file mode 100644 index b691ecb8e..000000000 --- a/module/XMLConfigParser.py +++ /dev/null @@ -1,270 +0,0 @@ -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay, spoob -""" -from __future__ import with_statement - -from os.path import exists - -from xml.dom.minidom import parse -import re -from shutil import copy, move - -class XMLConfigParser(): - def __init__(self, data, forceDefault=False, defaultFile=None): - self.xml = None - self.version = "0.1" - self.file = data - if defaultFile: - self.file_default = defaultFile - else: - self.file_default = self.file.replace(".xml", "_default.xml") - self.forceDefault = forceDefault - self.config = {} - self.data = {} - self.types = {} - self.loadData() - self.root = self.xml.documentElement - if not forceDefault: - self.defaultParser = XMLConfigParser(data, True, defaultFile=defaultFile) - - def loadData(self): - file = self.file - if self.forceDefault: - file = self.file_default - if not exists(self.file): - self._copyConfig() - with open(file, 'r') as fh: - self.xml = parse(fh) - if not self.xml.documentElement.getAttribute("version") == self.version: - self._copyConfig() - with open(file, 'r') as fh: - self.xml = parse(fh) - if not self.xml.documentElement.getAttribute("version") == self.version: - print _("Cant Update %s, your config version is outdated") % self.file - i = raw_input(_("backup old file and copy new one? [%s]/%s") % (_("yes")[0], _("no")[0])) - if i == "" or i == _("yes")[0]: - move(self.file, self.file.replace(".xml", "_backup.xml")) - self.loadData(self) - return - self.root = self.xml.documentElement - self._read_config() - - def _copyConfig(self): - try: - copy(self.file_default, self.file) - except: - print _("%s not found") % self.file_default - exit() #ok? - - def saveData(self): - if self.forceDefault: - return - with open(self.file, 'w') as fh: - self.xml.writexml(fh) - - def _read_config(self): - def format(val, t="str"): - if val.lower() == "true": - return True - elif val.lower() == "false": - return False - elif t == "int": - return int(val) - else: - return val - root = self.xml.documentElement - self.root = root - config = {} - data = {} - for node in root.childNodes: - if node.nodeType == node.ELEMENT_NODE: - section = node.tagName - config[section] = {} - data[section] = {} - data[section]["options"] = {} - data[section]["name"] = node.getAttribute("name") - for opt in node.childNodes: - if opt.nodeType == opt.ELEMENT_NODE: - data[section]["options"][opt.tagName] = {} - try: - config[section][opt.tagName] = format(opt.firstChild.data, opt.getAttribute("type")) - data[section]["options"][opt.tagName]["value"] = format(opt.firstChild.data, opt.getAttribute("type")) - except: - config[section][opt.tagName] = "" - data[section]["options"][opt.tagName]["name"] = opt.getAttribute("name") - data[section]["options"][opt.tagName]["type"] = opt.getAttribute("type") - data[section]["options"][opt.tagName]["input"] = opt.getAttribute("input") - self.config = config - self.data = data - - def get(self, section, option, default=None): - try: - return self.config[section][option] - except: - if self.forceDefault: - return default - return self.defaultParser.get(section, option, default) - - def getConfig(self): - return Config(self) - - def getConfigDict(self): - return self.config - - def getDataDict(self): - return self.data - - def set(self, section, data, value): - root = self.root - replace = False - sectionNode = False - if type(data) == str: - data = {"option": data} - if not self.checkInput(section, data["option"], value): - raise Exception("invalid input") - for node in root.childNodes: - if node.nodeType == node.ELEMENT_NODE: - if section == node.tagName: - sectionNode = node - for opt in node.childNodes: - if opt.nodeType == opt.ELEMENT_NODE: - if data["option"] == opt.tagName: - replace = opt - text = self.xml.createTextNode(str(value)) - if replace: - replace.replaceChild(text, replace.firstChild) - else: - newNode = self.xml.createElement(data["option"]) - newNode.appendChild(text) - if sectionNode: - sectionNode.appendChild(newNode) - else: - newSection = self.xml.createElement(section) - newSection.appendChild(newNode) - root.appendChild(newSection) - self._setAttributes(section, data) - self.saveData() - self.loadData() - - def remove(self, section, option): - root = self.root - for node in root.childNodes: - if node.nodeType == node.ELEMENT_NODE: - if section == node.tagName: - for opt in node.childNodes: - if opt.nodeType == opt.ELEMENT_NODE: - if option == opt.tagName: - node.removeChild(opt) - self.saveData() - return - - - def _setAttributes(self, node, data): - option = self.root.getElementsByTagName(node)[0].getElementsByTagName(data["option"])[0] - try: - option.setAttribute("name", data["name"]) - except: - pass - try: - option.setAttribute("type", data["type"]) - except: - pass - try: - option.setAttribute("input", data["input"]) - except: - pass - - def getType(self, section, option): - try: - return self.data[section]["options"][option]["type"] - except: - return "str" - - def getInputValues(self, section, option): - try: - if not self.data[section]["options"][option]["input"]: - return [] - return self.data[section]["options"][option]["input"].split(";") - except: - return [] - - def getDisplayName(self, section, option=None): - try: - if option: - return self.data[section]["options"][option]["name"] - else: - return self.data[section]["name"] - except: - if option: - return option - else: - return section - - def isValidSection(self, section): - try: - self.config[section] - return True - except: - if self.forceDefault: - return False - return self.defaultParser.isValidSection(section) - - def checkInput(self, section, option, value): - oinput = self.getInputValues(section, option) - if oinput: - for i in oinput: - if i == value: - return True - return False - otype = self.getType(section, option) - if not otype: - return True - if otype == "int" and (type(value) == int or re.match("^\d+$", value)): - return True - elif otype == "bool" and (type(value) == bool or re.match("^(true|false|True|False)$", value)): - return True - elif otype == "time" and re.match("^[0-2]{0,1}\d:[0-5]{0,1}\d$", value): - return True - elif otype == "ip" and re.match("^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$", value): - return True - elif otype == "str": - return True - else: - return False - -class Config(object): - def __init__(self, parser): - self.parser = parser - - def __getitem__(self, key): - if self.parser.isValidSection(key): - return Section(self.parser, key) - raise Exception(_("invalid section")) - - def keys(self): - return self.parser.config.keys() - -class Section(object): - def __init__(self, parser, section): - self.parser = parser - self.section = section - - def __getitem__(self, key): - return self.parser.get(self.section, key) - - def __setitem__(self, key, value): - self.parser.set(self.section, key, value) diff --git a/module/config/core_default.xml b/module/config/core_default.xml deleted file mode 100644 index 4d2e59aad..000000000 --- a/module/config/core_default.xml +++ /dev/null @@ -1,133 +0,0 @@ -<?xml version="1.0" ?> -<config name="Configuration" version="0.1"> - <remote name="Remote"> - <port type="int" name="Port">7227</port> - <listenaddr type="ip" name="Adress">0.0.0.0</listenaddr> - <username type="str" name="Username">admin</username> - <password type="str" name="Password">pwhere</password> - </remote> - <ssl name="SSL"> - <activated type="bool" name="Activated">False</activated> - <cert type="str" name="SSL Certificate">ssl.crt</cert> - <key type="str" name="SSL Key">ssl.key</key> - </ssl> - <webinterface name="Webinterface"> - <activated type="bool" name="Activated">True</activated> - <server type="str" input="builtin;lighttpd;nginx;fastcgi" name="Server">builtin</server> - <https type="bool" name="Use HTTPS">False</https> - <host type="ip" name="IP">0.0.0.0</host> - <port type="int" name="Port">8001</port> - <template type="str" name="Template">default</template> - </webinterface> - <xdcc> - <nick name="Nickname" type="str">pyload</nick> - <ident name="Ident" type="str">pyloadident</ident> - <realname name="Realname" type="str">pyloadreal</realname> - </xdcc> - <log name="Log"> - <file_log type="bool" name="File Log">True</file_log> - <log_folder type="str" name="Folder">Logs</log_folder> - <log_count type="int" name="Count">5</log_count> - </log> - <general name="General"> - <language type="str" input="en;de;fr;nl;pl" name="Language">de</language> - <download_folder type="str" name="Download Folder">Downloads</download_folder> - <max_downloads type="int" name="Max Parallel Downloads">3</max_downloads> - <link_file type="str" name="File For Links">links.txt</link_file> - <failed_file type="str" name="File For Failed Links">failed_links.txt</failed_file> - <debug_mode type="bool" name="Debug Mode">False</debug_mode> - <max_download_time type="int" name="Max Download Time">5</max_download_time> - <download_speed_limit type="int" name="Download Speed Limit">0</download_speed_limit> - <checksum type="bool" name="Use Checksum">True</checksum> - <min_free_space type="int" name="Min Free Space (MB)">200</min_free_space> - <folder_per_package type="bool" name="Create folder for each package">False</folder_per_package> - <download_interface type="ip" name="Outgoing IP address for downloads"></download_interface> - </general> - <updates name="Updates"> - <search_updates type="bool" name="Search">True</search_updates> - <install_updates type="bool" name="Install">False</install_updates> - </updates> - <reconnect name="Reconnect"> - <activated type="bool" name="Use Reconnect">False</activated> - <method type="str" name="Method">None</method> - <startTime type="time" name="Start">0:00</startTime> - <endTime type="time" name="End">0:00</endTime> - </reconnect> - <downloadTime name="Download Time"> - <start type="time" name="Start">0:00</start> - <end type="time" name="End">0:00</end> - </downloadTime> - <proxy name="Proxy"> - <activated type="bool" name="Activated">False</activated> - <adress type="str" name="Adress">http://localhost:8080</adress> - <protocol type="str" name="Protocol">http</protocol> - </proxy> - <plugins> <!-- python import style, separated with comma --> - <load_hook_plugins> - module.plugins.hooks.ClickAndLoad, - module.plugins.hooks.ContainerDownload, - module.plugins.hooks.ExternalScripts, - </load_hook_plugins> - <load_captcha_plugins> - module.plugins.captcha.GigasizeCom, - module.plugins.captcha.LinksaveIn, - module.plugins.captcha.MegauploadCom, - module.plugins.captcha.NetloadIn, - module.plugins.captcha.ShareonlineBiz, - </load_captcha_plugins> - <load_container_plugins> - module.plugins.container.CCF, - module.plugins.container.DLC_25, - module.plugins.container.DLC_26, - module.plugins.container.RSDF, - module.plugins.container.LinkList, - </load_container_plugins> - <load_crypter_plugins> - module.plugins.crypter.DDLMusicOrg, - module.plugins.crypter.FourChanOrg, - module.plugins.crypter.HoerbuchIn, - module.plugins.crypter.LixIn, - module.plugins.crypter.OneKhDe, - module.plugins.crypter.RelinkUs, - module.plugins.crypter.RSLayerCom, - module.plugins.crypter.SecuredIn, - module.plugins.crypter.SerienjunkiesOrg, - module.plugins.crypter.StealthTo, - module.plugins.crypter.YoutubeChannel, - </load_crypter_plugins> - <load_hoster_plugins> - module.plugins.hoster.DepositfilesCom, - module.plugins.hoster.DuckloadCom, - module.plugins.hoster.FilefactoryCom, - module.plugins.hoster.FilesmonsterCom, - module.plugins.hoster.FreakshareNet, - module.plugins.hoster.GigasizeCom, - module.plugins.hoster.HotfileCom, - module.plugins.hoster.MegauploadCom, - module.plugins.hoster.MegavideoCom, - module.plugins.hoster.MyvideoDe, - module.plugins.hoster.NetloadIn, - module.plugins.hoster.PornhostCom, - module.plugins.hoster.PornhubCom, - module.plugins.hoster.RapidshareCom, - module.plugins.hoster.RedtubeCom, - module.plugins.hoster.ShareonlineBiz, - module.plugins.hoster.ShragleCom, - module.plugins.hoster.StorageTo, - module.plugins.hoster.UploadedTo, - module.plugins.hoster.XupIn, - module.plugins.hoster.YoupornCom, - module.plugins.hoster.YoutubeCom, - module.plugins.hoster.ZippyshareCom, - module.plugins.hoster.ZshareNet, - module.plugins.hoster.SharenowNet, - module.plugins.hoster.YourfilesTo, - module.plugins.hoster.Xdcc, - module.plugins.hoster.Ftp, - </load_hoster_plugins> - <load_account_plugins> - module.plugins.accounts.RapidshareCom, - module.plugins.accounts.UploadedTo, - </load_account_plugins> - </plugins> -</config> diff --git a/module/config/default.config b/module/config/default.config new file mode 100644 index 000000000..7a9b7a93c --- /dev/null +++ b/module/config/default.config @@ -0,0 +1,52 @@ +remote - "Remote":
+ int port : "Port" = 7227
+ ip listenaddr : "Adress" = 0.0.0.0
+ str username : "Username" = admin
+ str password : "Password" = pwhere
+ssl - "SSL":
+ bool activated : "Activated"= False
+ str cert : "SSL Certificate" = ssl.crt
+ str key : "SSL Key" = ssl.key
+webinterface - "Webinterface":
+ bool activated : "Activated" = True
+ str server : "Server" = builtin #builtin;lighttpd;nginx;fastcgi
+ bool https : "Use HTTPS" = False
+ ip host : "IP" = 0.0.0.0
+ int port : "Port" = 8001
+ str template : "Template" = default
+xdcc - "xdcc":
+ str nick : "Nickname" = pyload
+ str ident : "Ident" = pyloadident
+ str realname : "Realname" = pyloadreal
+log - "Log":
+ bool file_log : "File Log" = True
+ str log_folder : "Folder" = Logs
+ int log_count : "Count" = 5
+general - "General":
+ str language : "Language" = de #input="en;de;fr;nl;pl"
+ str download_folder : "Download Folder" = Downloads
+ int max_downloads : "Max Parallel Downloads" = 3
+ str link_file : "File For Links" = links.txt
+ str failed_file : "File For Failed Links" = failed_links.txt
+ bool debug_mode : "Debug Mode" = False
+ int max_download_time : "Max Download Time" = 5
+ int download_speed_limit : "Download Speed Limit" = 0
+ bool checksum : "Use Checksum" = True
+ int min_free_space : "Min Free Space (MB)" = 200
+ bool folder_per_package : "Create folder for each package" = False
+ ip download_interface : "Outgoing IP address for downloads" = None
+updates - "Updates":
+ bool search_updates : "Search" = True
+ bool install_updates : "Install" = False
+reconnect - "Reconnect":
+ bool activated : "Use Reconnect" = False
+ str method : "Method" = None
+ time startTime : "Start" = 0:00
+ time endTime : "End" = 0:00
+downloadTime - "Download Time":
+ time start : "Start" = 0:00
+ time end : "End" = 0:00
+proxy - "Proxy":
+ bool activated : "Activated" = False
+ str adress : "Adress" = http://localhost:8080
+ str protocol : "Protocol" = http
\ No newline at end of file diff --git a/module/config/plugin_default.config b/module/config/plugin_default.config new file mode 100644 index 000000000..a10b03587 --- /dev/null +++ b/module/config/plugin_default.config @@ -0,0 +1,14 @@ +remote - "Remote": + int port : "Port" = 7227 + ip listenaddr : "Adress" = 0.0.0.0 + str username : "Username" = admin + str password : "Password" = pwhere +SSL - "SSL": + bool activated : "Activated" = False + file key : "SSL - Cert" = /home/martin/pyload/ssl.crt1 + file cert : "SSL - Key" = /home/martin/pyload/ssl.key + int list : "A List" = [ 1 , 2 , 3 , + 4, + 5, + 6, + ]
\ No newline at end of file diff --git a/module/config/plugin_default.xml b/module/config/plugin_default.xml deleted file mode 100644 index d9105d5d4..000000000 --- a/module/config/plugin_default.xml +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0" ?> -<config name="Configuration" version="0.1"> - <RapidshareCom> - <server input=";Cogent;Deutsche Telekom;Level(3);Level(3) #2;GlobalCrossing;Level(3) #3;Teleglobe;GlobalCrossing #2;TeliaSonera #2;Teleglobe #2;TeliaSonera #3;TeliaSonera"></server> - <premium>False</premium> - <username></username> - <password></password> - </RapidshareCom> - <NetloadIn> - <premium>False</premium> - <username></username> - <password></password> - </NetloadIn> - <UploadedTo> - <premium>False</premium> - <username></username> - <password></password> - </UploadedTo> - <ShareonlineBiz> - <premium>False</premium> - <username></username> - <password></password> - </ShareonlineBiz> - <HotfileCom> - <premium>False</premium> - <username></username> - <password></password> - </HotfileCom> - <YoutubeCom> - <quality>hd</quality><!-- hd: high definition, hq: high quality, sd: standard definition --> - </YoutubeCom> - <YoutubeChannel> - <!-- False for no limitation --> - <max_videos>False</max_videos> - <video_groups name="Video Groups" type="str" input="uploads;favorites">uploads,favorites</video_groups> - </YoutubeChannel> - <SerienjunkiesOrg> - <preferredHoster>RapidshareCom,UploadedTo,NetloadIn,FilefactoryCom</preferredHoster> - </SerienjunkiesOrg> -</config> diff --git a/module/network/Request.py b/module/network/Request.py index 4c7de2b22..4649c712a 100755 --- a/module/network/Request.py +++ b/module/network/Request.py @@ -23,7 +23,8 @@ import base64 import time from os import sep, rename, stat -from os.path import exists +from os.path import exists, join +from shutil import move import urllib from cStringIO import StringIO import pycurl @@ -92,7 +93,7 @@ class Request: self.pycurl.setopt(pycurl.SSL_VERIFYPEER, 0) if self.debug: self.pycurl.setopt(pycurl.VERBOSE, 1) - if self.interface: + if self.interface and self.interface.lower() != "none": self.pycurl.setopt(pycurl.INTERFACE, self.interface) @@ -186,13 +187,7 @@ class Request: self.pycurl.setopt(pycurl.USERPWD, upwstr) self.pycurl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY) - def add_cookies(self, req): - cookie_head = "" - for cookie in self.cookies: - cookie_head += cookie.name + "=" + cookie.value + "; " - req.add_header("Cookie", cookie_head) - - def clear_cookies(self): + def clearCookies(self): self.pycurl.setopt(pycurl.COOKIELIST, "") def add_proxy(self, protocol, adress): @@ -200,7 +195,7 @@ class Request: self.pycurl.setopt(pycurl.PROXY, adress.split(":")[0]) self.pycurl.setopt(pycurl.PROXYPORT, adress.split(":")[1]) - def download(self, url, file_name, get={}, post={}, ref=True, cookies=True, no_post_encode=False): + def download(self, url, file_name, folder, get={}, post={}, ref=True, cookies=True, no_post_encode=False): url = str(url) @@ -216,11 +211,10 @@ class Request: else: get = "" - file_temp = self.get_free_name(file_name) + ".part" - if not self.canContinue: - self.fp = open(file_temp, 'wb') - else: - self.fp = open(file_temp, 'ab') + file_temp = self.get_free_name(folder,file_name) + ".part" + + self.fp = open(file_temp, 'wb' if not self.canContinue else 'ab') + partSize = self.fp.tell() self.init_curl() @@ -291,20 +285,20 @@ class Request: code, msg = e if not code == 23: raise Exception, e + finally: + self.dl = False + self.dl_finished = time.time() self.addCookies() self.fp.close() - if self.abort: - raise AbortDownload - free_name = self.get_free_name(file_name) - rename(file_temp, free_name) + free_name = self.get_free_name(folder, file_name) + move(file_temp, free_name) - self.dl = False - self.dl_finished = time.time() - - return free_name + #@TODO content disposition + + #return free_name def updateCurrentSpeed(self, speed): self.dl_speed = speed @@ -358,8 +352,9 @@ class Request: self.dl_arrived = int(dl_d) self.dl_size = int(dl_t) - def get_free_name(self, file_name): + def get_free_name(self, folder, file_name): file_count = 0 + file_name = join(folder, file_name) while exists(file_name): file_count += 1 if "." in file_name: diff --git a/module/plugins/Plugin.py b/module/plugins/Plugin.py index 0e04a86e6..51cd78f2d 100644 --- a/module/plugins/Plugin.py +++ b/module/plugins/Plugin.py @@ -22,15 +22,35 @@ import re from os.path import exists from os.path import join +from time import time from time import sleep + import sys from os.path import exists from os import makedirs -from module.DownloadThread import CaptchaError -class Plugin(): +def dec(func): + def new(*args): + if args[0].pyfile.abort: + raise Abort + return func(*args) + return new + +class Abort(Exception): + """ raised when aborted """ + +class Fail(Exception): + """ raised when failed """ + +class Reconnect(Exception): + """ raised when reconnected """ + +class Retry(Exception): + """ raised when start again from beginning """ + +class Plugin(object): __name__ = "Plugin" __version__ = "0.4" __pattern__ = None @@ -39,107 +59,55 @@ class Plugin(): __author_name__ = ("RaNaN", "spoob", "mkaay") __author_mail__ = ("RaNaN@pyload.org", "spoob@pyload.org", "mkaay@mkaay.de") - def __init__(self, parent): - self.configparser = parent.core.parser_plugins - self.config = {} - self.parent = parent - self.req = parent.core.requestFactory.getRequest(self.__name__) - self.html = 0 - self.time_plus_wait = 0 #time() + wait in seconds - self.want_reconnect = False - self.multi_dl = True - self.ocr = None #captcha reader instance - self.logger = logging.getLogger("log") - self.decryptNow = True - self.pyfile = self.parent - - def prepare(self, thread): - self.want_reconnect = False - self.pyfile.status.exists = self.file_exists() - - if not self.pyfile.status.exists: - return False - - self.pyfile.status.filename = self.get_file_name() - self.pyfile.status.waituntil = self.time_plus_wait - self.pyfile.status.url = self.get_file_url() - self.pyfile.status.want_reconnect = self.want_reconnect - thread.wait(self.parent) - - return True - - def set_parent_status(self): - """ sets all available Statusinfos about a File in self.parent.status - """ - pass - - def download_html(self): - """ gets the url from self.parent.url saves html in self.html and parses - """ - self.html = "" - - def file_exists(self): - """ returns True or False - """ - if re.search(r"(?!http://).*\.(dlc|ccf|rsdf|txt)", self.parent.url): - return exists(self.parent.url) - header = self.load(self.parent.url, just_header=True) - try: - if re.search(r"HTTP/1.1 404 Not Found", header): - return False - except: - pass - return True - - def get_file_url(self): - """ returns the absolute downloadable filepath - """ - return self.parent.url - - def get_file_name(self): - try: - return re.findall("([^\/=]+)", self.parent.url)[-1] - except: - return self.parent.url[:20] - - def wait_until(self): - if self.html != None: - self.download_html() - return self.time_plus_wait - - def proceed(self, url, location): - self.download(url, location) - - def set_config(self): - for k, v in self.config.items(): - self.configparser.set(self.__name__, {"option": k}, v) - - def remove_config(self, option): - self.configparser.remove(self.__name__, option) - - def get_config(self, value, default=None): - self.configparser.loadData() - return self.configparser.get(self.__name__, value, default=default) - - def read_config(self): - self.configparser.loadData() - try: - self.verify_config() - self.config = self.configparser.getConfig()[self.__name__] - except: - pass - - def verify_config(self): - pass - - def init_ocr(self): - captchaClass = self.parent.core.pluginManager.getCaptchaPlugin(self.__name__) - self.ocr = captchaClass() + def __new__(cls, *args, **kws): + for f in dir(cls): + if not f.startswith("_") and f not in ("checksum"): + setattr(cls, f, dec(getattr(cls, f)) ) + + o = super(cls.__class__, cls).__new__(cls) + #wrap decorator around every method + return o + + def __init__(self, pyfile): + self.config = pyfile.m.core.config + + self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) + + self.wantReconnect = False + self.multiDL = True + + self.waitUntil = 0 # time() + wait in seconds + self.premium = False + + self.ocr = None # captcha reader instance + self.account = pyfile.m.core.accountManager.getAccount(self.__name__) # account handler instance + self.req = pyfile.m.core.requestFactory.getRequest(self.__name__, self.account) + + self.log = logging.getLogger("log") + + self.pyfile = pyfile + self.thread = None # holds thread in future + def __call__(self): + return self.__name__ + + def preprocessing(self, thread): + """ handles important things to do before starting """ + self.thread = thread + + if not self.account: + self.req.clearCookies() + + return self.process(self.pyfile) + + #---------------------------------------------------------------------- + def process(self, pyfile): + """the 'main' method of every plugin""" raise NotImplementedError - def check_file(self, local_file): + + def checksum(self, local_file=None): """ return codes: 0 - checksum ok @@ -148,8 +116,48 @@ class Plugin(): 10 - not implemented 20 - unknown error """ + #@TODO checksum check hook + return (True, 10) + + def setConf(self, option, value): + """ sets a config value """ + self.config.setPlugin(self.__name__, option, value) + + def removeConf(self, option): + """ removes a config value """ + raise NotImplementedError + + def getConf(self, option): + """ gets a config value """ + return self.config.getPlugin(self.__name__, option) + + + def setWait(self, seconds): + """ set the wait time to specified seconds """ + self.waitUntil = time() + int(seconds) + + def wait(): + """ waits the time previously set """ + pass + + def fail(self, reason): + """ fail and give reason """ + raise Fail(reason) + + def offline(self): + """ fail and indicate file is offline """ + raise Fail("offline") + + def retry(self): + """ begin again from the beginning """ + raise Retry + + def askCaptcha(self, url): + """ loads the catpcha and decrypt it or ask the user for input """ + pass + def waitForCaptcha(self, captchaData, imgType): captchaManager = self.parent.core.captchaManager task = captchaManager.newTask(self) @@ -165,16 +173,19 @@ class Plugin(): return result def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False): + """ returns the content loaded """ return self.req.load(url, get, post, ref, cookies, just_header) - def download(self, url, file_name, get={}, post={}, ref=True, cookies=True): - download_folder = self.parent.core.config['general']['download_folder'] - if self.pyfile.package.data["package_name"] != (self.parent.core.config['general']['link_file']) and self.parent.core.xmlconfig.get("general", "folder_per_package", False): - self.pyfile.folder = self.pyfile.package.data["package_name"] - location = join(download_folder, self.pyfile.folder.decode(sys.getfilesystemencoding())) - if not exists(location): makedirs(location) - file_path = join(location.decode(sys.getfilesystemencoding()), self.pyfile.status.filename.decode(sys.getfilesystemencoding())) - else: - file_path = join(download_folder, self.pyfile.status.filename.decode(sys.getfilesystemencoding())) + def download(self, url, get={}, post={}, ref=True, cookies=True): + """ downloads the url content to disk """ + download_folder = self.config['general']['download_folder'] + + location = join(download_folder, self.pyfile.package().folder.decode(sys.getfilesystemencoding())) + + if not exists(location): + makedirs(location) + + newname = self.req.download(url, self.pyfile.name, location, get, post, ref, cookies) - self.pyfile.status.filename = self.req.download(url, file_path, get, post, ref, cookies) + if newname: + self.pyfile.name = newname diff --git a/module/plugins/hooks/ContainerDownload.py b/module/plugins/hooks/ContainerDownload.py deleted file mode 100644 index 673931391..000000000 --- a/module/plugins/hooks/ContainerDownload.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay - @interface-version: 0.1 -""" - -from module.plugins.Hook import Hook - -from os.path import join, abspath - -class ContainerDownload(Hook): - __name__ = "ContainerDownload" - __version__ = "0.1" - __description__ = """add the downloaded container to current package""" - __author_name__ = ("mkaay") - __author_mail__ = ("mkaay@mkaay.de") - - def downloadFinished(self, pyfile): - filename = pyfile.status.filename - if not pyfile.url.startswith("http"): - return - if filename.endswith(".dlc") or filename.endswith(".ccf") or filename.endswith(".rsdf"): - self.logger.info("ContainerDownload: adding container file") - location = abspath(join(pyfile.folder, filename)) - newFile = self.core.file_list.collector.addLink(location) - self.core.file_list.packager.addFileToPackage(pyfile.package.data["id"], self.core.file_list.collector.popFile(newFile)) diff --git a/module/plugins/hooks/LinuxFileEvents.py b/module/plugins/hooks/LinuxFileEvents.py deleted file mode 100644 index f4fe12de4..000000000 --- a/module/plugins/hooks/LinuxFileEvents.py +++ /dev/null @@ -1,75 +0,0 @@ -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay - @interface-version: 0.1 -""" - -from module.plugins.Hook import Hook -import os - -class LinuxFileEvents(Hook): - __name__ = "LinuxFileEvents" - __version__ = "0.1" - __description__ = """monitors files and directories for changes""" - __author_name__ = ("mkaay") - __author_mail__ = ("mkaay@mkaay.de") - - def __init__(self, core): - Hook.__init__(self, core) - - return #@TODO remove when working correctly - - if not os.name == "posix": - return - - self.core.check_file(self.core.make_path("container"), _("folder for container"), True) - self.core.check_install("pyinotify", _("pyinotify for LinuxFileEvents")) - - try: - import pyinotify - except: - return - wm = pyinotify.WatchManager() - - class FileChangeHandler(pyinotify.ProcessEvent): - def __init__(self, hook): - self.hook = hook - - def process_default(self, event): - self.hook.fileChangeEvent(event.path) - - notifier = pyinotify.ThreadedNotifier(wm, FileChangeHandler(self)) - notifier.start() - mask = pyinotify.IN_MODIFY | pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO - wm.add_watch(os.path.join(self.core.path, "links.txt"), mask) - wm.add_watch(os.path.join(self.core.path, "container"), mask, rec=True, auto_add=True) - - def fileChangeEvent(self, path): - path = os.path.abspath(path) - if self.isValidContainer(path): - self.addNewFile(path) - - def isValidContainer(self, path): - ext = [".txt", ".dlc", ".ccf", ".rsdf"] - for e in ext: - if path.endswith(e): - return True - return False - - def addNewFile(self, path): - self.core.server_methods.add_package("Container", [path]) - diff --git a/module/plugins/hoster/BasePlugin.py b/module/plugins/hoster/BasePlugin.py new file mode 100644 index 000000000..09545d493 --- /dev/null +++ b/module/plugins/hoster/BasePlugin.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import re +from module.plugins.Hoster import Hoster + +class BasePlugin(Hoster): + __name__ = "BasePlugin" + __type__ = "hoster" + __pattern__ = r"^unmatchable$" + __version__ = "0.1" + __description__ = """Base Plugin when any other didnt fit""" + __author_name__ = ("RaNaN") + __author_mail__ = ("RaNaN@pyload.org") + + def process(self, pyfile): + """main function""" + + if pyfile.url.startswith("http://"): + + pyfile.name = re.findall("([^\/=]+)", pyfile.url)[-1] + self.download(pyfile.url) + + else: + self.fail("No Plugin matched and not a downloadable url.")
\ No newline at end of file diff --git a/module/plugins/hoster/YoutubeCom.py b/module/plugins/hoster/YoutubeCom.py index 6c952e2ba..978d89a37 100644 --- a/module/plugins/hoster/YoutubeCom.py +++ b/module/plugins/hoster/YoutubeCom.py @@ -9,6 +9,8 @@ class YoutubeCom(Hoster): __type__ = "hoster" __pattern__ = r"http://(www\.)?(de\.)?\youtube\.com/watch\?v=.*" __version__ = "0.2" + __config__ = [ ("int", "quality" , "Quality Setting", "hd;lq"), + ("int", "config", "Config Settings" , "default" ) ] __description__ = """Youtube.com Video Download Hoster""" __author_name__ = ("spoob") __author_mail__ = ("spoob@pyload.org") diff --git a/module/web/ServerThread.py b/module/web/ServerThread.py index 4ced2e1e4..f7bf11b3c 100644 --- a/module/web/ServerThread.py +++ b/module/web/ServerThread.py @@ -12,13 +12,13 @@ import sys import logging core = None -logger = logging.getLogger("log") +log = logging.getLogger("log") class WebServer(threading.Thread): def __init__(self, pycore): global core threading.Thread.__init__(self) - self.pycore = pycore + self.core = pycore core = pycore self.running = True self.server = pycore.config['webinterface']['server'] @@ -26,23 +26,25 @@ class WebServer(threading.Thread): self.setDaemon(True) def run(self): - sys.path.append(join(self.pycore.path, "module", "web")) + sys.path.append(join(pypath, "module", "web")) avail = ["builtin"] - host = self.pycore.config['webinterface']['host'] - port = self.pycore.config['webinterface']['port'] - path = join(self.pycore.path, "module", "web") + host = self.core.config['webinterface']['host'] + port = self.core.config['webinterface']['port'] + path = join(pypath, "module", "web") out = StringIO() - if not exists(join(self.pycore.path, "module", "web", "pyload.db")): + #@TODO rewrite, maybe as hook + + if exists(join("module", "web", "pyload.db")): #print "########## IMPORTANT ###########" #print "### Database for Webinterface does not exitst, it will not be available." #print "### Please run: python %s syncdb" % join(self.pycore.path, "module", "web", "manage.py") #print "### You have to add at least one User, to gain access to webinterface: python %s createsuperuser" % join(self.pycore.path, "module", "web", "manage.py") #print "### Dont forget to restart pyLoad if you are done." - logger.warning(_("Database for Webinterface does not exitst, it will not be available.")) - logger.warning(_("Please run: python %s syncdb") % join(self.pycore.path, "module", "web", "manage.py")) - logger.warning(_("You have to add at least one User, to gain access to webinterface: python %s createsuperuser") % join(self.pycore.path, "module", "web", "manage.py")) - logger.warning(_("Dont forget to restart pyLoad if you are done.")) + log.warning(_("Database for Webinterface does not exitst, it will not be available.")) + log.warning(_("Please run: python %s syncdb") % join(pypath, "module", "web", "manage.py")) + log.warning(_("You have to add at least one User, to gain access to webinterface: python %s createsuperuser") % join(configdir, "module", "web", "manage.py")) + log.warning(_("Dont forget to restart pyLoad if you are done.")) return None try: @@ -68,10 +70,10 @@ class WebServer(threading.Thread): try: - if exists(self.pycore.config["ssl"]["cert"]) and exists(self.pycore.config["ssl"]["key"]): + if exists(self.core.config["ssl"]["cert"]) and exists(self.core.config["ssl"]["key"]): if not exists("ssl.pem"): - key = file(self.pycore.config["ssl"]["key"], "rb") - cert = file(self.pycore.config["ssl"]["cert"], "rb") + key = file(self.core.config["ssl"]["key"], "rb") + cert = file(self.core.config["ssl"]["cert"], "rb") pem = file("ssl.pem", "wb") pem.writelines(key.readlines()) @@ -94,7 +96,7 @@ class WebServer(threading.Thread): if self.server == "nginx": - self.pycore.logger.info(_("Starting nginx Webserver: %s:%s") % (host, port)) + self.core.logger.info(_("Starting nginx Webserver: %s:%s") % (host, port)) config = file(join(path, "servers", "nginx_default.conf"), "rb") content = config.readlines() config.close() @@ -111,7 +113,7 @@ class WebServer(threading.Thread): ssl on; ssl_certificate %s; ssl_certificate_key %s; - """ % (self.pycore.config["ssl"]["cert"], self.pycore.config["ssl"]["key"])) + """ % (self.core.config["ssl"]["cert"], self.core.config["ssl"]["key"])) else: content = content.replace("%(ssl)", "") @@ -127,7 +129,7 @@ class WebServer(threading.Thread): elif self.server == "lighttpd": - self.pycore.logger.info(_("Starting lighttpd Webserver: %s:%s") % (host, port)) + self.core.logger.info(_("Starting lighttpd Webserver: %s:%s") % (host, port)) config = file(join(path, "servers", "lighttpd_default.conf"), "rb") content = config.readlines() config.close() @@ -144,7 +146,7 @@ class WebServer(threading.Thread): ssl.engine = "enable" ssl.pemfile = "%s" ssl.ca-file = "%s" - """ % (join(self.pycore.path, "ssl.pem"), self.pycore.config["ssl"]["cert"])) + """ % (join(selcorere.path, "ssl.pem"), self.core.config["ssl"]["cert"])) else: content = content.replace("%(ssl)", "") new_config = file(join(path, "servers", "lighttpd.conf"), "wb") @@ -158,17 +160,14 @@ class WebServer(threading.Thread): run_fcgi.handle("daemonize=false", "method=threaded", "host=127.0.0.1", "port=9295") - elif self.server == "builtin": - self.pycore.logger.info(_("Starting django builtin Webserver: %s:%s") % (host, port)) - - import run_server - run_server.handle(host, port) - #command = ['python', join(self.pycore.path, "module", "web", "run_server.py"), "%s:%s" % (host, port)] - #self.p = Popen(command, stderr=Output(out), stdin=Output(out), stdout=Output(out)) - else: + elif self.server == "fastcgi": #run fastcgi on port import run_fcgi run_fcgi.handle("daemonize=false", "method=threaded", "host=127.0.0.1", "port=%s" % str(port)) + else: + self.core.log.info(_("Starting django builtin Webserver: %s:%s") % (host, port)) + import run_server + run_server.handle(host, port) def quit(self): diff --git a/module/web/ajax/views.py b/module/web/ajax/views.py index c52be119f..5e4d4710b 100644 --- a/module/web/ajax/views.py +++ b/module/web/ajax/views.py @@ -75,6 +75,7 @@ def add_package(request): links = map(lambda x: x.strip(), links) links = filter(lambda x: x != "", links) + settings.PYLOAD.add_package(name, links, queue) return JsonResponse("success") @@ -199,7 +200,7 @@ def remove_package(request, id): try: settings.PYLOAD.del_packages([int(id)]) return JsonResponse("sucess") - except: + except Exception, e: return HttpResponseServerError() @permission('pyload.can_add_dl') diff --git a/module/web/pyload/views.py b/module/web/pyload/views.py index 47ef0a864..52b58cd03 100644 --- a/module/web/pyload/views.py +++ b/module/web/pyload/views.py @@ -74,7 +74,7 @@ def home(request): @permission('pyload.can_see_dl') @check_server def queue(request): - queue = settings.PYLOAD.get_full_queue() + queue = settings.PYLOAD.get_queue() return render_to_response(join(settings.TEMPLATE, 'queue.html'), RequestContext(request, {'content': queue}, [status_proc])) @@ -159,10 +159,8 @@ def logs(request, page=0): @permission('pyload.can_add_dl') @check_server def collector(request): - coll = settings.PYLOAD.get_collector_packages() - for pack in coll: - pack["children"] = map(settings.PYLOAD.get_file_info, settings.PYLOAD.get_package_files(pack["id"])) - return render_to_response(join(settings.TEMPLATE, 'collector.html'), RequestContext(request, {'content': coll}, [status_proc])) + queue = settings.PYLOAD.get_collector() + return render_to_response(join(settings.TEMPLATE, 'collector.html'), RequestContext(request, {'content': queue}, [status_proc])) @login_required diff --git a/module/web/settings.py b/module/web/settings.py index 8f9425198..7380541e6 100644 --- a/module/web/settings.py +++ b/module/web/settings.py @@ -15,9 +15,17 @@ PROJECT_DIR = os.path.dirname(__file__) PYLOAD_DIR = os.path.join(PROJECT_DIR,"..","..")
+sys.path.append(PYLOAD_DIR)
+
+
sys.path.append(os.path.join(PYLOAD_DIR, "module"))
-from XMLConfigParser import XMLConfigParser
-config = XMLConfigParser(os.path.join(PYLOAD_DIR,"module","config","core.xml"))
+
+import InitHomeDir
+sys.path.append(pypath)
+
+
+from module.ConfigParser import ConfigParser
+config = ConfigParser()
#DEBUG = config.get("general","debug")
@@ -35,8 +43,8 @@ except: server_url = "http%s://%s:%s@%s:%s/" % (
ssl,
- config.get("remote", "username"),
- config.get("remote", "password"),
+ config.username,
+ config.password,
config.get("remote", "listenaddr"),
config.get("remote", "port")
)
diff --git a/module/web/templates/default/collector.html b/module/web/templates/default/collector.html index 73625136c..b0d5b1839 100644 --- a/module/web/templates/default/collector.html +++ b/module/web/templates/default/collector.html @@ -106,10 +106,10 @@ document.addEvent("domready", function(){ </li>{% endblock %}
{% block content %}
-{% for package in content %}
-<div id="package_{{package.id}}" class="package">
+{% for id,package in content.iteritems %}
+<div id="package_{{id}}" class="package">
<div class="packagename" style="cursor: pointer;">
- {{ package.package_name }}
+ {{ package.name }}
<img title="{% trans "Delete Package" %}" width="12px" height="12px" src="{{ MEDIA_URL }}img/delete.png" />
@@ -117,15 +117,15 @@ document.addEvent("domready", function(){
<img title="{% trans "Push Package to Queue" %}" style="margin-left: -10px" height="12px" src="{{ MEDIA_URL }}img/package_go.png" />
</div>
- <div id="children_{{package.id}}" style="display: none;" class="children">
- {% for child in package.children %}
- <div class="child" id="file_{{child.id}}">
+ <div id="children_{{id}}" style="display: none;" class="children">
+ {% for lid, child in package.links.iteritems %}
+ <div class="child" id="file_{{lid}}">
<span class="child_status">
<img src="/media/default/img/status_{{child.status_type}}.png" style="width: 12px; height:12px;"/>
</span>
- <span style="font-size: 15px">{{ child.filename }}</span><br />
+ <span style="font-size: 15px">{{ child.name }}</span><br />
<div class="child_secrow">
- <span class="child_status">{{ child.status_type }}</span>{{child.status_error}}
+ <span class="child_status">{{ child.status }}</span>{{child.error}}
<span class="child_status">{{ child.size }} KB</span>
<span class="child_status">{{ child.plugin }}</span>
<span class="child_status">{% trans "Folder:" %} {{child.folder}}</span>
diff --git a/module/web/templates/default/queue.html b/module/web/templates/default/queue.html index d33a11032..d149853d5 100644 --- a/module/web/templates/default/queue.html +++ b/module/web/templates/default/queue.html @@ -96,27 +96,27 @@ document.addEvent("domready", function(){ </li>{% endblock %}
{% block content %}
-{% for package in content %}
-<div id="package_{{package.data.id}}" class="package">
+{% for id, package in content.iteritems %}
+<div id="package_{{id}}" class="package">
<div class="packagename" style="cursor: pointer;">
- {{ package.data.package_name }}
+ {{ package.name }}
<img title="{% trans "Delete Package" %}" width="12px" height="12px" src="{{ MEDIA_URL }}img/delete.png" />
<img title="{% trans "Restart Package" %}" style="margin-left: -10px" height="12px" src="{{ MEDIA_URL }}img/arrow_refresh.png" />
</div>
- <div id="children_{{package.data.id}}" style="display: none;" class="children">
- {% for child in package.children %}
- <div class="child" id="file_{{child.id}}">
+ <div id="children_{{id}}" style="display: none;" class="children">
+ {% for lid, child in package.links.iteritems %}
+ <div class="child" id="file_{{lid}}">
<span class="child_status">
- <img src="/media/default/img/status_{{child.status_type}}.png" style="width: 12px; height:12px;"/>
+ <img src="/media/default/img/status_{{child.status}}.png" style="width: 12px; height:12px;"/>
</span>
- <span style="font-size: 15px">{{ child.filename }}</span><br />
+ <span style="font-size: 15px">{{ child.name }}</span><br />
<div class="child_secrow">
- <span class="child_status">{{ child.status_type }}</span>{{child.status_error}}
+ <span class="child_status">{{ child.status }}</span>{{child.error}}
<span class="child_status">{{ child.size }} KB</span>
<span class="child_status">{{ child.plugin }}</span>
- <span class="child_status">{% trans "Folder:" %} {{child.folder}}</span>
+ <span class="child_status">{% trans "Folder:" %} {{package.folder}}</span>
<img title="{% trans "Delete Link" %}" style="cursor: pointer;" width="10px" height="10px" src="{{ MEDIA_URL }}img/delete.png" />
diff --git a/pyLoadCli.py b/pyLoadCli.py index c5996eb29..bbe126a3d 100755 --- a/pyLoadCli.py +++ b/pyLoadCli.py @@ -34,8 +34,11 @@ import threading import time from time import sleep import xmlrpclib +from itertools import islice -from module.XMLConfigParser import XMLConfigParser +from module import InitHomeDir + +from module.ConfigParser import ConfigParser if sys.stdout.encoding.lower().startswith("utf"): conv = unicode @@ -63,7 +66,7 @@ class pyLoadCli: self.core.add_package(add, [add]) print _("Linklist added") exit() - + self.links_added = 0 os.system("clear") @@ -103,11 +106,11 @@ class pyLoadCli: def format_time(self, seconds): seconds = int(seconds) - + hours, seconds = divmod(seconds, 3600) minutes, seconds = divmod(seconds, 60) return "%.2i:%.2i:%.2i" % (hours, minutes, seconds) - + def format_size(self, size): return conv(size / 1024) + " MiB" @@ -156,7 +159,7 @@ class pyLoadCli: self.println(line, "") line += 1 self.menuline = line - + self.build_menu() # self.file_list = data @@ -182,7 +185,7 @@ class pyLoadCli: line += 1 self.println(line, "") elif self.pos[0] == 1:#add links - + if self.pos[1] == 0: self.println(line, "") line += 1 @@ -199,7 +202,7 @@ class pyLoadCli: self.println(line, mag("0.") + _(" back to main menu")) line += 1 self.println(line, "") - + else: self.println(line, _("Package: %s") % self.new_package['name']) line += 1 @@ -222,7 +225,7 @@ class pyLoadCli: self.println(line, _("Type d(number of package) to delete a package, r to restart, or w/o d,r to look into it.")) line += 1 i = 0 - for id in range(self.pos[2], self.pos[2] + 5): + for id, value in islice(pack.iteritems(), start=self.pos[2], end=self.pos[2] + 5 ): try: self.println(line, mag(conv(pack[id]['id'])) + ": " + pack[id]['package_name']) line += 1 @@ -232,7 +235,7 @@ class pyLoadCli: for x in range(5-i): self.println(line, "") line += 1 - + else: links = self.core.get_package_files(self.pos[1]) self.println(line, _("Type d(number) of the link you want to delete or r(number) to restart.")) @@ -241,24 +244,24 @@ class pyLoadCli: for id in range(self.pos[2], self.pos[2] + 5): try: link = self.core.get_file_info(links[id]) - - if not link['status_filename']: - self.println(line, mag(conv(link['id'])) + ": " + link['url']) - else: - self.println(line, mag(conv(link['id'])) + ": %s | %s | %s" % (link['filename'], link['status_type'], link['plugin'])) - line += 1 + + if not link['status_filename']: + self.println(line, mag(conv(link['id'])) + ": " + link['url']) + else: + self.println(line, mag(conv(link['id'])) + ": %s | %s | %s" % (link['filename'], link['status_type'], link['plugin'])) + line += 1 i += 1 - + except Exception, e: pass for x in range(5-i): self.println(line, "") line += 1 - + self.println(line, mag("p") + _(" - previous") + " | " + mag("n") + _(" - next")) line += 1 self.println(line, mag("0.") + _(" back to main menu")) - + self.inputline = line + 1 self.print_input() @@ -284,7 +287,7 @@ class pyLoadCli: elif inp == "5": os.system('clear') sys.exit() - + elif self.pos[0] == 1: #add links if self.pos[1] == 0: self.new_package['name'] = inp @@ -292,13 +295,13 @@ class pyLoadCli: self.pos[1] = 1 else: if inp == "END": - self.core.add_package(self.new_package['name'], self.new_package['links']) # add package + self.core.add_package(self.new_package['name'], self.new_package['links'], 1) # add package self.pos = [0, 0, 0] self.links_added = 0 else: #@TODO validation self.new_package['links'].append(inp) self.links_added += 1 - + elif self.pos[0] == 2: #remove links if self.pos[1] == 0: if inp.startswith("d"): @@ -333,7 +336,7 @@ class RefreshThread(threading.Thread): threading.Thread.__init__(self) self.setDaemon(True) self.cli = cli - + def run(self): while True: sleep(1) @@ -343,8 +346,8 @@ class RefreshThread(threading.Thread): self.cli.println(2, red(conv(e))) self.cli.pos[1] = 0 self.cli.pos[2] = 0 - - + + @@ -374,7 +377,7 @@ class _GetchUnix: import sys import tty import termios - + fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) try: @@ -467,40 +470,12 @@ def print_help(): print " -h, --help", " " * 7, "Display this help screen" print "" - -def get_config_path(): - try: - from win32com.shell import shellcon, shell - homedir = shell.SHGetFolderPath(0, shellcon.CSIDL_APPDATA, 0, 0) - except ImportError: # quick semi-nasty fallback for non-windows/win32com case - if platform == 'nt': - import ctypes - from ctypes import wintypes, windll - CSIDL_APPDATA = 26 - _SHGetFolderPath = ctypes.windll.shell32.SHGetFolderPathW - _SHGetFolderPath.argtypes = [ctypes.wintypes.HWND, - ctypes.c_int, - ctypes.wintypes.HANDLE, - ctypes.wintypes.DWORD, ctypes.wintypes.LPCWSTR] - - path_buf = ctypes.wintypes.create_unicode_buffer(ctypes.wintypes.MAX_PATH) - result = _SHGetFolderPath(0, CSIDL_APPDATA, 0, 0, path_buf) - homedir = path_buf.value - else: - homedir = expanduser("~") - - if platform == "posix": - configdir = join(homedir, ".config", "pyload") - else: - configdir = join(homedir, "pyLoad") - - return join(configdir, "core.xml") -if __name__ == "__main__": - xmlconfig = XMLConfigParser(get_config_path()) - config = xmlconfig.getConfig() - translation = gettext.translation("pyLoadCli", join(abspath(dirname(__file__)), "locale"), languages=[config['general']['language']]) +if __name__ == "__main__": + config = ConfigParser() + + translation = gettext.translation("pyLoadCli", join(pypath, "locale"), languages=[config['general']['language']]) translation.install(unicode=(True if sys.stdout.encoding.lower().startswith("utf") else False)) server_url = "" @@ -530,8 +505,8 @@ if __name__ == "__main__": if config['ssl']['activated']: ssl = "s" - username = config['remote']['username'] - password = config['remote']['password'] + username = config.username + password = config.password addr = config['remote']['listenaddr'] port = config['remote']['port'] elif option in ("-u", "--username"): @@ -574,7 +549,7 @@ if __name__ == "__main__": server_url = "http%s://%s:%s@%s:%s/" % (ssl, username, password, addr, port) - #print server_url + print server_url if add: cli = pyLoadCli(server_url, add) else: diff --git a/pyLoadCore.py b/pyLoadCore.py index 3b7cbd60c..5dbef139a 100755 --- a/pyLoadCore.py +++ b/pyLoadCore.py @@ -13,14 +13,14 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - + @author: spoob @author: sebnapi @author: RaNaN @author: mkaay - @version: v0.3.2 + @version: v0.4.0 """ -CURRENT_VERSION = '0.3.2' +CURRENT_VERSION = '0.4.0b' from copy import deepcopy from getopt import GetoptError @@ -33,6 +33,7 @@ import logging.handlers from operator import attrgetter from os import _exit from os import chdir +from os import getcwd from os import execv from os import makedirs from os import name as platform @@ -60,8 +61,11 @@ import thread import time from time import sleep from xmlrpclib import Binary +import __builtin__ + +from module import InitHomeDir -from module.XMLConfigParser import XMLConfigParser +from module.ConfigParser import ConfigParser from module.network.Request import getURL import module.remote.SecureXMLRPCServer as Server from module.web.ServerThread import WebServer @@ -71,8 +75,9 @@ from module.CaptchaManager import CaptchaManager from module.HookManager import HookManager from module.PullEvents import PullManager from module.PluginManager import PluginManager -from module.FileList import FileList +from module.FileDatabase import FileHandler from module.RequestFactory import RequestFactory +from module.AccountManager import AccountManager class Core(object): """ pyLoad Core """ @@ -80,30 +85,19 @@ class Core(object): def __init__(self): self.doDebug = False self.arg_links = [] - self.path = abspath(dirname(__file__)) - chdir(self.path) - self.homedir = self.getHomeDir() + if len(argv) > 1: try: options, args = getopt(argv[1:], 'vca:hdusC:', ["version", "clear", "add=", "help", "debug", "user", "setup", "configdir="]) - customConfig = None - for option, argument in options: - if option in ("-C", "--configdir"): - customConfig = argument - if customConfig: - self.configdir = self.make_path(customConfig) - else: - self.defaultConfig() - - self.initConfig() - + for option, argument in options: if option in ("-v", "--version"): print "pyLoad", CURRENT_VERSION exit() elif option in ("-c", "--clear"): try: + #@TODO rewrite remove(join(self.configdir, "module", "links.pkl")) print "Removed Linklist" except: @@ -117,6 +111,7 @@ class Core(object): elif option in ("-d", "--debug"): self.doDebug = True elif option in ("-u", "--user"): + #@TODO rewrite from module.setup import Setup self.xmlconfig = XMLConfigParser(self.make_path(self.configdir, "core.xml"), defaultFile=join(self.path, "module", "config", "core_default.xml")) self.config = self.xmlconfig.getConfig() @@ -125,6 +120,7 @@ class Core(object): exit() elif option in ("-s", "--setup"): from module.setup import Setup + #@TODO rewrite self.xmlconfig = XMLConfigParser(self.make_path(self.configdir, "core.xml"), defaultFile=join(self.path, "module", "config", "core_default.xml")) self.config = self.xmlconfig.getConfig() s = Setup(self.path, self.config) @@ -134,38 +130,6 @@ class Core(object): print 'Unknown Argument(s) "%s"' % " ".join(argv[1:]) self.print_help() exit() - else: - self.defaultConfig() - self.initConfig() - - def defaultConfig(self): - if platform == "posix": - self.configdir = self.make_path(self.homedir, ".config", "pyload") - else: - self.configdir = self.make_path(self.homedir, "pyload") - self.check_file(self.configdir, "folder for config files", True, quiet=True) - - def initConfig(self): - #check if no config exists, assume its first run - if not exists(join(self.configdir, "core.xml")): - print "No configuration file found." - print "Startig Configuration Assistent" - print "" - - from module.setup import Setup - self.xmlconfig = XMLConfigParser(self.make_path(self.configdir, "core.xml"), defaultFile=join(self.path, "module", "config", "core_default.xml")) - self.config = self.xmlconfig.getConfig() - - s = Setup(self.path, self.config) - try: - result = s.start() - if not result: - remove(join(self.configdir, "core.xml")) - except Exception, e: - print e - remove(join(self.configdir, "core.xml")) - - exit() def print_help(self): print "" @@ -180,7 +144,7 @@ class Core(object): print " -u, --user", " " * 13, "Set new User and password" print " -d, --debug", " " * 12, "Enable debug mode" print " -s, --setup", " " * 12, "Run Setup Assistent" - print " -C, --configdir=<path>", " " * 1, "Custom config dir (includes db)" + print " --configdir=<path>", " " * 5, "Custom config dir, (see config folder for permanent change)" print " -h, --help", " " * 13, "Display this help screen" print "" @@ -194,144 +158,117 @@ class Core(object): def quit(self, a, b): self.shutdown() - self.logger.info(_("Received Quit signal")) + self.log.info(_("Received Quit signal")) _exit(1) def start(self): - """ starts the machine""" + """ starts the fun :D """ try: signal.signal(signal.SIGQUIT, self.quit) except: pass - - self.config = {} - self.plugins_avaible = {} - - self.plugin_folder = self.make_path("module", "plugins") - self.xmlconfig = XMLConfigParser(self.make_path(self.configdir, "core.xml"), defaultFile=self.make_path(self.path, "module", "config", "core_default.xml")) - self.config = self.xmlconfig.getConfig() - if self.doDebug == True: - self.config['general']['debug_mode'] = True - self.parser_plugins = XMLConfigParser(self.make_path(self.configdir, "plugin.xml"), defaultFile=self.make_path(self.path, "module", "config", "plugin_default.xml")) - self.config['ssl']['cert'] = self.make_path(self.config['ssl']['cert']) - self.config['ssl']['key'] = self.make_path(self.config['ssl']['key']) + self.config = ConfigParser() + + self.debug = self.doDebug or self.config['general']['debug_mode'] + + if self.debug: + self.init_logger(logging.DEBUG) # logging level + else: + self.init_logger(logging.INFO) # logging level + self.do_kill = False self.do_restart = False - translation = gettext.translation("pyLoad", self.make_path("locale"), languages=[self.config['general']['language']]) + + translation = gettext.translation("pyLoad", self.path("locale"), languages=["en", self.config['general']['language']]) translation.install(unicode=(True if sys.getfilesystemencoding().lower().startswith("utf") else False)) + self.log.info(_("Using home directory: %s") % getcwd() ) + + #@TODO refractor + self.check_install("Crypto", _("pycrypto to decode container files")) self.check_install("Image", _("Python Image Libary (PIL) for captha reading")) self.check_install("pycurl", _("pycurl to download any files"), True, True) self.check_install("django", _("Django for webinterface")) self.check_install("tesseract", _("tesseract for captcha reading"), False) self.check_install("gocr", _("gocr for captcha reading"), False) - - self.check_file(self.make_path(self.config['log']['log_folder']), _("folder for logs"), True) - self.check_file(self.make_path(self.config['general']['download_folder']), _("folder for downloads"), True) - self.check_file(self.make_path(self.config['general']['link_file']), _("file for links")) - self.check_file(self.make_path(self.config['general']['failed_file']), _("file for failed links")) - + + self.check_file(self.config['log']['log_folder'], _("folder for logs"), True) + self.check_file(self.config['general']['download_folder'], _("folder for downloads"), True) + if self.config['ssl']['activated']: self.check_install("OpenSSL", _("OpenSSL for secure connection"), True) - self.check_file(self.make_path(self.config['ssl']['cert']), _("ssl certificate"), False, True) - self.check_file(self.make_path(self.config['ssl']['key']), _("ssl key"), False, True) - - self.downloadSpeedLimit = int(self.xmlconfig.get("general", "download_speed_limit", 0)) - if self.config['general']['debug_mode']: - self.init_logger(logging.DEBUG) # logging level - else: - self.init_logger(logging.INFO) # logging level - + + self.downloadSpeedLimit = int(self.config.get("general", "download_speed_limit")) + self.requestFactory = RequestFactory(self) - self.create_plugin_index() - self.init_hooks() - path.append(self.plugin_folder) - - self.lastGuiConnected = 0 - + + #path.append(self.plugin_folder) + + self.lastClientConnected = 0 + self.server_methods = ServerMethods(self) - self.file_list = FileList(self) + + #hell yeah, so many important managers :D + self.files = FileHandler(self) + self.pluginManager = PluginManager(self) self.pullManager = PullManager(self) - self.thread_list = ThreadManager(self) + self.accountManager = AccountManager(self) + self.threadManager = ThreadManager(self) self.captchaManager = CaptchaManager(self) - - self.last_update_check = 0 - self.update_check_interval = 6 * 60 * 60 - self.update_available = self.check_update() - self.logger.info(_("Downloadtime: %s") % self.server_methods.is_time_download()) + self.hookManager = HookManager(self) + + self.log.info(_("Downloadtime: %s") % self.server_methods.is_time_download()) self.init_server() self.init_webserver() linkFile = self.config['general']['link_file'] - - packs = self.server_methods.get_queue() - found = False - for data in packs: - if data["package_name"] == linkFile: - found = data["id"] - break - if found == False: - pid = self.file_list.packager.addNewPackage(package_name=linkFile) - else: - pid = found - lid = self.file_list.collector.addLink(linkFile) - try: - self.file_list.packager.addFileToPackage(pid, self.file_list.collector.popFile(lid)) - if self.arg_links: - for link in self.arg_links: - lid = self.file_list.collector.addLink(link) - self.file_list.packager.addFileToPackage(pid, self.file_list.collector.popFile(lid)) - - self.file_list.packager.pushPackage2Queue(pid) - self.file_list.continueAborted() - except: - pass freeSpace = self.freeSpace() if freeSpace > 5 * 1024: - self.logger.info(_("Free space: %sGB") % (freeSpace / 1024)) + self.log.info(_("Free space: %sGB") % (freeSpace / 1024)) else: - self.logger.info(_("Free space: %sMB") % freeSpace) + self.log.info(_("Free space: %sMB") % freeSpace) + + self.threadManager.pause = False + #self.threadManager.start() - self.thread_list.pause = False - self.thread_list.start() - self.hookManager.coreReady() while True: sleep(2) if self.do_restart: - self.logger.info(_("restarting pyLoad")) + self.log.info(_("restarting pyLoad")) self.restart() if self.do_kill: self.shutdown() - self.logger.info(_("pyLoad quits")) + self.log.info(_("pyLoad quits")) exit() - if self.last_update_check + self.update_check_interval <= time.time(): - self.update_available = self.check_update() + + self.threadManager.work() + self.hookManager.periodical() def init_server(self): try: server_addr = (self.config['remote']['listenaddr'], int(self.config['remote']['port'])) - usermap = {self.config['remote']['username']: self.config['remote']['password']} + usermap = {self.config.username: self.config.password} if self.config['ssl']['activated']: self.server = Server.SecureXMLRPCServer(server_addr, self.config['ssl']['cert'], self.config['ssl']['key'], usermap) - self.logger.info(_("Secure XMLRPC Server Started")) + self.log.info(_("Secure XMLRPC Server Started")) else: self.server = Server.AuthXMLRPCServer(server_addr, usermap) - self.logger.info(_("Auth XMLRPC Server Started")) + self.log.info(_("Auth XMLRPC Server Started")) self.server.register_instance(self.server_methods) thread.start_new_thread(self.server.serve_forever, ()) except Exception, e: - self.logger.error(_("Failed starting XMLRPC server CLI and GUI will not be available: %s") % str(e)) - if self.config['general']['debug_mode']: + self.log.error(_("Failed starting XMLRPC server CLI and GUI will not be available: %s") % str(e)) + if self.debug: import traceback traceback.print_exc() @@ -339,23 +276,20 @@ class Core(object): if self.config['webinterface']['activated']: self.webserver = WebServer(self) self.webserver.start() - + def init_logger(self, level): console = logging.StreamHandler(stdout) - frm = logging.Formatter("%(asctime)s: %(levelname)-8s %(message)s", "%d.%m.%Y %H:%M:%S") + frm = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s", "%d.%m.%Y %H:%M:%S") console.setFormatter(frm) - self.logger = logging.getLogger("log") # settable in config + self.log = logging.getLogger("log") # settable in config if self.config['log']['file_log']: - file_handler = logging.handlers.RotatingFileHandler(join(self.path, self.config['log']['log_folder'], 'log.txt'), maxBytes=102400, backupCount=int(self.config['log']['log_count'])) #100 kib each + file_handler = logging.handlers.RotatingFileHandler(join(self.config['log']['log_folder'], 'log.txt'), maxBytes=102400, backupCount=int(self.config['log']['log_count'])) #100 kib each file_handler.setFormatter(frm) - self.logger.addHandler(file_handler) + self.log.addHandler(file_handler) - self.logger.addHandler(console) #if console logging - self.logger.setLevel(level) - - def init_hooks(self): - self.hookManager = HookManager(self) + self.log.addHandler(console) #if console logging + self.log.setLevel(level) def check_install(self, check_name, legend, python=True, essential=False): """check wether needed tools are installed""" @@ -366,7 +300,7 @@ class Core(object): pipe = subprocess.PIPE subprocess.Popen(check_name, stdout=pipe, stderr=pipe) except: - print _("Install %s") % legend + self.log.info( _("Install %s") % legend ) if essential: exit() def check_file(self, check_names, description="", folder=False, empty=True, essential=False, quiet=False): @@ -394,30 +328,27 @@ class Core(object): file_created = False if not file_exists and not quiet: if file_created: - print _("%s created") % description + self.log.info( _("%s created") % description ) else: if not empty: - print _("could not find %s: %s") % (description, tmp_name) + self.log.warning( _("could not find %s: %s") % (description, tmp_name) ) else: - print _("could not create %s: %s") % (description, tmp_name) + self.log.warning( _("could not create %s: %s") % (description, tmp_name) ) if essential: exit() - - def isGUIConnected(self): - return self.lastGuiConnected + 10 > time.time() - + + def isClientConnected(self): + return self.lastClientConnected + 30 > time.time() + def restart(self): self.shutdown() execv(executable, [executable, "pyLoadCore.py"]) - def create_plugin_index(self): - self.pluginManager = PluginManager(self) - def compare_time(self, start, end): - + start = map(int, start) end = map(int, end) - + if start == end: return True now = list(time.localtime()[3:5]) @@ -425,12 +356,12 @@ class Core(object): elif start > end and (now > start or now < end): return True elif start < now and end < now and start > end: return True else: return False - + def getMaxSpeed(self): return self.downloadSpeedLimit - + def shutdown(self): - self.logger.info(_("shutting down...")) + self.log.info(_("shutting down...")) try: if self.config['webinterface']['activated']: self.webserver.quit() @@ -443,61 +374,13 @@ class Core(object): self.file_list.save() self.requestFactory.clean() except: - self.logger.info(_("error while shutting down")) + self.log.info(_("error while shutting down")) - def check_update(self): - try: - if self.config['updates']['search_updates']: - version_check = getURL("http://get.pyload.org/check/%s/" % (CURRENT_VERSION,)) - if version_check == "": - self.logger.info(_("No Updates for pyLoad")) - return False - else: - self.logger.info(_("New pyLoad Version %s available") % version_check) - return True - else: - return False - except: - self.logger.error(_("Not able to connect server")) - finally: - self.last_update_check = time.time() - - def install_update(self): - try: - if self.config['updates']['search_updates']: - if self.core.config['updates']['install_updates']: - version_check = getURL("http://get.pyload.org/get/update/%s/" % (CURRENT_VERSION,)) - else: - version_check = getURL("http://get.pyload.org/check/%s/" % (CURRENT_VERSION,)) - if version_check == "": - return False - else: - if self.config['updates']['install_updates']: - try: - tmp_zip_name = __import__("tempfile").NamedTemporaryFile(suffix=".zip").name - tmp_zip = open(tmp_zip_name, 'wb') - tmp_zip.write(version_check) - tmp_zip.close() - __import__("module.Unzip", globals(), locals(), "Unzip", -1).Unzip().extract(tmp_zip_name, "Test/") - return True - except: - self.logger.info(_("Auto install Failed")) - return False - else: - return False - else: - return False - finally: - return False + def path(self, *args): + return join(pypath, *args) - def make_path(self, *args): - if isabs(args[0]): - return join(*args) - else: - return join(self.path, *args) - def freeSpace(self): - folder = self.make_path(self.config['general']['download_folder']) + folder = self.config['general']['download_folder'] if platform == 'nt': import ctypes free_bytes = ctypes.c_ulonglong(0) @@ -507,39 +390,21 @@ class Core(object): from os import statvfs s = statvfs(folder) return s.f_bsize * s.f_bavail / 1024 / 1024 #megabyte - - def getHomeDir(self): - try: - from win32com.shell import shellcon, shell - return shell.SHGetFolderPath(0, shellcon.CSIDL_APPDATA, 0, 0) - except ImportError: # quick semi-nasty fallback for non-windows/win32com case - if platform == 'nt': - import ctypes - from ctypes import wintypes, windll - CSIDL_APPDATA = 26 - _SHGetFolderPath = ctypes.windll.shell32.SHGetFolderPathW - _SHGetFolderPath.argtypes = [ctypes.wintypes.HWND, - ctypes.c_int, - ctypes.wintypes.HANDLE, - ctypes.wintypes.DWORD, ctypes.wintypes.LPCWSTR] - - path_buf = ctypes.wintypes.create_unicode_buffer(ctypes.wintypes.MAX_PATH) - result = _SHGetFolderPath(0, CSIDL_APPDATA, 0, 0, path_buf) - return path_buf.value - else: - return expanduser("~") - + + #################################### ########## XMLRPC Methods ########## #################################### class ServerMethods(): + """ methods that can be used by clients with xmlrpc connection""" def __init__(self, core): self.core = core def status_downloads(self): + """ gives status about all files currently processed """ downloads = [] - for pyfile in self.core.thread_list.py_downloading: + for pyfile in [x.active for x in self.core.threadManager.threads + self.core.threadManager.localThreads if x.active]: download = {} download['id'] = pyfile.id download['name'] = pyfile.status.filename @@ -553,145 +418,118 @@ class ServerMethods(): download['package'] = pyfile.package.data["package_name"] downloads.append(download) return downloads - + def get_conf_val(self, cat, var): - if var != "username" and var != "password": - return self.core.config[cat][var] - else: - raise Exception("not allowed!") + """ get config value """ + return self.config[cat][var] def set_conf_val(self, cat, opt, val): - if opt not in ("username", "password"): - self.core.config[str(cat)][str(opt)] = val - else: - raise Exception("not allowed!") - - def get_config(self): - d = deepcopy(self.core.xmlconfig.getConfigDict()) - del d["remote"]["username"] - del d["remote"]["password"] - return d - - def get_config_data(self): - d = deepcopy(self.core.xmlconfig.getDataDict()) - del d["remote"]["options"]["username"] - del d["remote"]["options"]["password"] - return d + """ set config value """ + self.core.config[str(cat)][str(opt)] = val + def get_config(self): + """ gets complete config """ + return self.core.config.config + + def get_plugin_config(self): + """ gets complete plugin config """ + return self.core.config.plugin + def pause_server(self): - self.core.thread_list.pause = True - + self.core.threadManager.pause = True + def unpause_server(self): - self.core.thread_list.pause = False - + self.core.threadManager.pause = False + def toggle_pause(self): - if self.core.thread_list.pause: - self.core.thread_list.pause = False + if self.core.threadManager.pause: + self.core.threadManager.pause = False else: - self.core.thread_list.pause = True - return self.core.thread_list.pause - + self.core.threadManager.pause = True + return self.core.threadManager.pause + def status_server(self): + """ dict with current server status """ status = {} - status['pause'] = self.core.thread_list.pause - status['activ'] = len(self.core.thread_list.py_downloading) - status['queue'] = self.core.file_list.countDownloads() - status['total'] = len(self.core.file_list.data['queue']) + status['pause'] = self.core.threadManager.pause + status['activ'] = len([x.active for x in self.core.threadManager.threads if x.active]) + status['queue'] = self.core.files.getFileCount() + status['total'] = self.core.files.getFileCount() status['speed'] = 0 - for pyfile in self.core.thread_list.py_downloading: - status['speed'] += pyfile.status.get_speed() + for pyfile in [x.active for x in self.core.threadManager.threads if x.active]: + status['speed'] += pyfile.status.getSpeed() - status['download'] = not self.core.thread_list.pause and self.is_time_download() + status['download'] = not self.core.threadManager.pause and self.is_time_download() status['reconnect'] = self.core.config['reconnect']['activated'] and self.is_time_reconnect() return status - - def file_exists(self, path): #@XXX: security?! - return exists(path) - + def get_server_version(self): return CURRENT_VERSION - - def add_urls(self, links): - for link in links: - link = link.strip() - if link.startswith("http") or exists(link): - self.core.file_list.collector.addLink(link) - self.core.file_list.save() - - def add_package(self, name, links, queue=True): - pid = self.new_package(name) - fids = map(self.core.file_list.collector.addLink, links) - map(lambda fid: self.move_file_2_package(fid, pid), fids) + def add_package(self, name, links, queue=0): + #0 is collector + pid = self.core.files.addPackage(name, name, queue) + + self.core.files.addLinks(links, pid) - if queue: - self.core.file_list.packager.pushPackage2Queue(pid) + self.core.log.info(_("Added package %s containing %s links") % (name, len(links) ) ) - self.core.file_list.save() - - def new_package(self, name): - id = self.core.file_list.packager.addNewPackage(name) - self.core.file_list.save() - return id - + self.core.files.save() + + def get_package_data(self, id): return self.core.file_list.packager.getPackageData(id) - - def get_package_files(self, id): - return self.core.file_list.packager.getPackageFiles(id) - + def get_file_info(self, id): return self.core.file_list.getFileInfo(id) - + def del_links(self, ids): for id in ids: - try: - self.core.file_list.collector.removeFile(id) - except: - self.core.file_list.packager.removeFile(id) - self.core.file_list.save() - + #@TODO rewrite + pass + + self.core.files.save() + def del_packages(self, ids): - map(self.core.file_list.packager.removePackage, ids) - self.core.file_list.save() + for id in ids: + self.core.files.deletePackage(id) + + self.core.files.save() + def kill(self): self.core.do_kill = True return True - + def restart(self): self.core.do_restart = True - + def get_queue(self): - return map(attrgetter("data"), self.core.file_list.data["queue"]) + return self.core.files.getCompleteData(1) - def get_collector_packages(self): - return map(attrgetter("data"), self.core.file_list.data["packages"]) + def get_collector(self): + return self.core.files.getCompleteData(0) + + def add_files_to_package(self, pid, urls): + #@TODO implement + pass + + def push_package_to_queue(self, id): + #@TODO implement + pass - def get_collector_files(self): - return map(attrgetter("id"), self.core.file_list.data["collector"]) - - def move_file_2_package(self, fid, pid): - try: - pyfile = self.core.file_list.collector.getFile(fid) - self.core.file_list.packager.addFileToPackage(pid, pyfile) - except: - return - else: - self.core.file_list.collector.removeFile(fid) - - def push_package_2_queue(self, id): - self.core.file_list.packager.pushPackage2Queue(id) - def restart_package(self, packid): - map(self.core.file_list.packager.resetFileStatus, self.core.file_list.packager.getPackageFiles(packid)) - + #@TODO package resett + pass + def restart_file(self, fileid): - self.core.file_list.packager.resetFileStatus(fileid) - + #@TODO file resett + pass + def upload_container(self, filename, type, content): + #@TODO py2.5 unproofed th = NamedTemporaryFile(mode="w", suffix="." + type, delete=False) th.write(str(content)) path = th.name @@ -700,7 +538,7 @@ class ServerMethods(): cid = self.core.file_list.collector.addLink(path) self.move_file_2_package(cid, pid) self.core.file_list.save() - + def get_log(self, offset=0): filename = self.core.config['log']['log_folder'] + sep + 'log.txt' fh = open(filename, "r") @@ -710,10 +548,10 @@ class ServerMethods(): if offset >= len(lines): return None return lines[offset:] - + def stop_downloads(self): self.core.thread_list.stopAllDownloads() - + def stop_download(self, type, id): if type == "pack": ids = self.core.file_list.getPackageFiles(id) @@ -721,21 +559,21 @@ class ServerMethods(): self.core.file_list.packager.abortFile(fid) else: self.core.file_list.packager.abortFile(id) - - def update_available(self): - return self.core.update_available - + + def set_package_name(self, pid, name): self.core.file_list.packager.setPackageData(pid, package_name=name) - + def pull_out_package(self, pid): - self.core.file_list.packager.pullOutPackage(pid) - + """put package back to collector""" + #@TODO implement + pass + def is_captcha_waiting(self): self.core.lastGuiConnected = time.time() task = self.core.captchaManager.getTask() return not task == None - + def get_captcha_task(self, exclusive=False): self.core.lastGuiConnected = time.time() task = self.core.captchaManager.getTask() @@ -745,11 +583,11 @@ class ServerMethods(): return str(task.getID()), Binary(c[0]), str(c[1]) else: return None, None, None - + def get_task_status(self, tid): self.core.lastGuiConnected = time.time() return self.core.captchaManager.getTaskFromID(tid).getStatus() - + def set_captcha_result(self, tid, result): self.core.lastGuiConnected = time.time() task = self.core.captchaManager.getTaskFromID(tid) @@ -759,38 +597,19 @@ class ServerMethods(): return True else: return False - + def get_events(self, uuid): return self.core.pullManager.getEvents(uuid) - - def get_full_queue(self): - data = [] - for pack in self.core.file_list.data["queue"]: - p = {"data":pack.data, "children":[]} - for child in pack.files: - info = self.core.file_list.getFileInfo(child.id) - info["downloading"] = None - p["children"].append(info) - data.append(p) - return data - + def get_premium_accounts(self): + #@TODO implement plugins = self.core.pluginManager.getAccountPlugins() data = [] for p in plugins: data.extend(p.getAllAccounts()) return data - - #def move_urls_up(self, ids): - # for id in ids: - # self.core.file_list.move(id) - # self.core.file_list.save() - - #def move_urls_down(self, ids): - # for id in ids: - # self.core.file_list.move(id, 1) - # self.core.file_list.save() + def is_time_download(self): start = self.core.config['downloadTime']['start'].split(":") end = self.core.config['downloadTime']['end'].split(":") @@ -808,5 +627,5 @@ if __name__ == "__main__": pyload_core.start() except KeyboardInterrupt: pyload_core.shutdown() - pyload_core.logger.info(_("killed pyLoad from Terminal")) + pyload_core.log.info(_("killed pyLoad from Terminal")) _exit(1) |