diff options
Diffstat (limited to 'module/FileDatabase.py')
-rw-r--r-- | module/FileDatabase.py | 684 |
1 files changed, 684 insertions, 0 deletions
diff --git a/module/FileDatabase.py b/module/FileDatabase.py new file mode 100644 index 000000000..843121492 --- /dev/null +++ b/module/FileDatabase.py @@ -0,0 +1,684 @@ +#!/usr/bin/env python +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" +from Queue import Queue +import sqlite3 +from threading import Thread +from threading import RLock +from time import sleep +from time import time +from os import path +import traceback + +statusMap = { + "finished": 0, + "offline": 1, + "online": 2, + "queued": 3, + "checking": 4, + "waiting": 5, + "reconnected": 6, + "starting": 7, + "failed": 8, + "aborted": 9, + "decrypting": 10, + "custom": 11, + "downloading": 12, + "processing": 13 +} + +######################################################################## +class FileHandler: + """Handles all request made to obtain information, + modify status or other request for links or packages""" + + + #---------------------------------------------------------------------- + def __init__(self, core): + """Constructor""" + self.core = core + + # translations + self.statusMsg = [_("finished"), _("offline"), _("online"), _("queued"), _("checking"), _("waiting"), _("reconnected"), _("starting"),_("failed"), _("aborted"), _("decrypting"), _("custom"),_("downloading"), _("processing")] + + self.cache = {} #holds instances for files + self.packageCache = {} # same for packages + #@TODO: purge the cache + + self.jobCache = {} + + self.lock = RLock() + + self.filecount = -1 # if an invalid value is set get current value from db + self.unchanged = False #determines if any changes was made since last call + + self.db = FileDatabaseBackend(self) # the backend + + + def change(func): + def new(*args): + args[0].unchanged = False + args[0].filecount = -1 + args[0].jobCache = {} + return func(*args) + return new + + #---------------------------------------------------------------------- + def save(self): + """saves all data to backend""" + self.db.commit() + + #---------------------------------------------------------------------- + def getCompleteData(self, queue=1): + """gets a complete data representation""" + + data = self.db.getAllLinks(queue) + packs = self.db.getAllPackages(queue) + + print data + print packs + + data.update( [ (x.id, x.toDbDict()[x.id]) for x in self.cache.itervalues() ] ) + packs.update( [ (x.id, x.toDict()[x.id]) for x in self.packageCache.itervalues() if x.queue == queue] ) + + for key, value in data.iteritems(): + if packs.has_key(value["package"]): + packs[value["package"]]["links"][key] = value + + return packs + + #---------------------------------------------------------------------- + @change + def addLinks(self, urls, package): + """adds links""" + + # tuple of (url, name, plugin, package) + links = [ (x[0], x[0], x[1], package) for x in self.core.pluginManager.parseUrls(urls) ] + + self.db.addLinks(links, package) + + + #---------------------------------------------------------------------- + @change + def addPackage(self, name, folder, queue=0): + """adds a package, default to link collector""" + return self.db.addPackage(name, folder, queue) + + #---------------------------------------------------------------------- + @change + def deletePackage(self, id): + """delete package and all contained links""" + + self.lock.acquire() + + if self.packageCache.has_key(id): + del self.packageCache[id] + + toDelete = [] + + for pyfile in self.cache.itervalues(): + if pyfile.packageid == id: + pyfile.abort() + toDelete.append(pyfile.id) + + for pid in toDelete: + del self.cache[pid] + + self.db.deletePackage(id) + + + self.lock.release() + + #---------------------------------------------------------------------- + @change + def deleteLink(self, id): + """deletes links""" + + self.lock.acquire() + + if self.cache.has_key(id): + self.cache[id].abort() + del self.cache[id] + + self.lock.release() + + self.db.deleteLink(id) + + #---------------------------------------------------------------------- + def releaseLink(self, id): + """removes pyfile from cache""" + if self.cache.has_key(id): + del self.cache[id] + + #---------------------------------------------------------------------- + def releasePackage(self, id): + """removes package from cache""" + if self.packageCache.has_key(id): + del self.packageCache[id] + + #---------------------------------------------------------------------- + def updateLink(self, pyfile): + """updates link""" + self.db.updateLink(pyfile) + + #---------------------------------------------------------------------- + def updatePackage(self, pypack): + """updates a package""" + self.db.updatePackage(pypack) + + #---------------------------------------------------------------------- + def getPackage(self, id): + """return package instance""" + + if self.packageCache.has_key(id): + return self.packageCache[id] + else: + return self.db.getPackage(id) + + #---------------------------------------------------------------------- + def getFile(self, id): + """returns pyfile instance""" + if self.cache.has_key(id): + return self.cache[id] + else: + return self.db.getFile(id) + + #---------------------------------------------------------------------- + def getJob(self, occ): + """get suitable job""" + + self.lock.acquire() + + if self.jobCache.has_key(occ): + pass + else: + self.jobCache = {} #better not caching to much + jobs = self.db.getJob(occ) + jobs.reverse() + self.jobCache[occ] = jobs + + #@TODO: maybe the new job has to be approved... + + if not self.jobCache[occ]: + pyfile = None + else: + pyfile = self.getFile(self.jobCache[occ].pop()) + + self.lock.release() + return pyfile + + + #---------------------------------------------------------------------- + def getFileCount(self): + """returns number of files""" + + if self.filecount == -1: + self.filecount = self.db.filecount(1) + + return self.filecount + + + +######################################################################### +class FileDatabaseBackend(Thread): + """underlying backend for the filehandler to save the data""" + + def __init__(self, manager): + Thread.__init__(self) + + self.setDaemon(True) + + self.manager = manager + + self.jobs = Queue() # queues for jobs + self.res = Queue() + + self.start() + + + def queue(func): + """use as decorator when fuction directly executes sql commands""" + def new(*args): + args[0].jobs.put((func, args, 0)) + return args[0].res.get() + return new + + def async(func): + """use as decorator when function does not return anything and asynchron execution is wanted""" + def new(*args): + args[0].jobs.put((func, args, 1)) + return True + return new + + def run(self): + """main loop, which executes commands""" + + self.conn = sqlite3.connect("files.db") + self.c = self.conn.cursor() + #self.c.execute("PRAGMA synchronous = OFF") + self._createTables() + + while True: + try: + f, args, async = self.jobs.get() + if f == "quit": return True + res = f(*args) + if not async: self.res.put(res) + except Exception, e: + #@TODO log etc + print "Database Error @", f.__name__, args[1:], e + traceback.print_exc() + if not async: self.res.put(None) + + def shutdown(self): + self.save() + self.jobs.put(("quit", "", 0)) + + def _createTables(self): + """create tables for database""" + + self.c.execute('CREATE TABLE IF NOT EXISTS "packages" ("id" INTEGER PRIMARY KEY AUTOINCREMENT, "name" TEXT NOT NULL, "folder" TEXT, "password" TEXT, "site" TEXT, "queue" INTEGER DEFAULT 0 NOT NULL)') + self.c.execute('CREATE TABLE IF NOT EXISTS "links" ("id" INTEGER PRIMARY KEY AUTOINCREMENT, "url" TEXT NOT NULL, "name" TEXT, "size" INTEGER DEFAULT 0 NOT NULL, "status" INTEGER DEFAULT 3 NOT NULL, "plugin" TEXT DEFAULT "BasePlugin" NOT NULL, "error" TEXT, "package" INTEGER DEFAULT 0 NOT NULL, FOREIGN KEY(package) REFERENCES packages(id))') + self.c.execute('CREATE INDEX IF NOT EXISTS "pIdIndex" ON links(package)') + self.c.execute('VACUUM') + + #---------------------------------------------------------------------- + @queue + def filecount(self, queue): + """returns number of files in queue""" + self.c.execute("SELECT l.id FROM links as l INNER JOIN packages as p ON l.package=p.id WHERE p.queue=? ORDER BY l.id", (queue,)) + r = self.c.fetchall() + return len(r) + + @queue + def addLink(self, url, name, plugin, package): + self.c.execute('INSERT INTO links(url, name, plugin, package) VALUES(?,?,?,?)', (url, name, plugin, package)) + return self.c.lastrowid + + @queue + def addLinks(self, links, package): + """ links is a list of tupels (url,name,plugin)""" + self.c.executemany('INSERT INTO links(url, name, plugin, package) VALUES(?,?,?,?)', links) + + @queue + def addPackage(self, name, folder, queue): + + self.c.execute('INSERT INTO packages(name, folder, queue) VALUES(?,?,?)', (name, folder, queue)) + return self.c.lastrowid + + @queue + def deletePackage(self, id): + + self.c.execute('DELETE FROM links WHERE package=?', (str(id), )) + self.c.execute('DELETE FROM packages WHERE id=?', (str(id), )) + + @queue + def deleteLink(self, id): + + self.c.execute('DELETE FROM links WHERE id=?', (str(id), )) + + + @queue + def getAllLinks(self, q): + """return information about all links in queue q + + q0 queue + q1 collector + + format: + + { + id: {'name': name, ... 'package': id }, ... + } + + """ + self.c.execute('SELECT l.id,l.url,l.name,l.size,l.status,l.error,l.plugin,l.package FROM links as l INNER JOIN packages as p ON l.package=p.id WHERE p.queue=? ORDER BY l.id', (q, )) + data = {} + for r in self.c: + data[int(r[0])] = { + 'url': r[1], + 'name': r[2], + 'size': r[3], + 'status': r[4], + 'statusmsg': self.manager.statusMsg[r[4]], + 'error': r[5], + 'plugin': r[6], + 'package': r[7] + } + + return data + + @queue + def getAllPackages(self, q): + """return information about packages in queue q + (only useful in get all data) + + q0 queue + q1 collector + + format: + + { + id: {'name': name ... 'links': {} }, ... + } + """ + self.c.execute('SELECT id,name,folder,site,password,queue FROM packages WHERE queue=? ORDER BY id', str(q)) + + data = {} + for r in self.c: + data[int(r[0])] = { + 'name': r[1], + 'folder': r[2], + 'site': r[3], + 'password': r[4], + 'queue': r[5], + 'links': {} + } + + return data + + + def getLinkData(self, id): + """get link information""" + pass + + def getPackageData(self, id): + """get package data _with_ link data""" + pass + + + @async + def updateLink(self, f): + self.c.execute('UPDATE links SET url=?,name=?,size=?,status=?,error=?,package=? WHERE id=?', (f.name, f.url, f.size, f.status, f.error, str(f.packageid), str(f.id))) + + @async + def updatePackage(self, p): + self.c.execute('UPDATE packages SET name=?,folder=?,site=?,password=?,queue=? WHERE id=?', (p.name, p.folder, p.site, p.password, p.queue, str(p.id))) + + @async + def commit(self): + self.conn.commit() + + @queue + def getPackage(self, id): + """return package instance from id""" + self.c.execute("SELECT name,folder,site,password,queue FROM packages WHERE id=?", (str(id),)) + r = self.c.fetchone() + if not r: return None + return PyPackage(self.manager, id, *r) + + #---------------------------------------------------------------------- + @queue + def getFile(self, id): + """return link instance from id""" + self.c.execute("SELECT url, name, size, status, error, plugin, package FROM links WHERE id=?", (str(id),)) + r = self.c.fetchone() + if not r: return None + return PyFile(self.manager, id, *r) + + + @queue + def getJob(self, occ): + """return pyfile instance, which is suitable for download and dont use a occupied plugin""" + self.c.execute("SELECT l.id FROM links as l INNER JOIN packages as p ON l.package=p.id WHERE p.queue=1 AND l.plugin NOT IN ('else','some','else') AND l.status IN (2,3,6) LIMIT 5") + + return [x[0] for x in self.c ] + +class PyFile(): + def __init__(self, manager, id, url, name, size, status, error, pluginname, package): + self.m = manager + self.m.cache[int(id)] = self + + self.id = int(id) + self.url = url + self.name = name + self.size = size + self.status = status + self.pluginname = pluginname + self.packageid = package #should not be used, use package() instead + self.error = error + # database information ends here + + self.waitUntil = 0 # time() + time to wait + + # status attributes + self.active = False #obsolete? + self.abort = False + self.reconnected = False + + + def __repr__(self): + return "PyFile %s: %s@%s" % (self.id, self.name, self.pluginname) + + def initPlugin(self): + """ inits plugin instance """ + self.pluginmodule = self.m.core.pluginManager.getPlugin(self.pluginname) + self.pluginclass = getattr(self.pluginmodule, self.pluginname) + self.plugin = self.pluginclass(self) + + + def package(self): + """ return package instance""" + return self.m.getPackage(self.packageid) + + def setStatus(self, status): + self.status = statusMap[status] + self.sync() #@TODO needed aslong no better job approving exists + + def hasStatus(self, status): + return statusMap[status] == self.status + + def sync(self): + """sync PyFile instance with database""" + self.m.updateLink(self) + + def release(self): + """sync and remove from cache""" + self.sync() + self.m.releaseLink(self.id) + + def delete(self): + """delete pyfile from database""" + self.m.deleteLink(self.id) + + def toDict(self): + """return dict with all information for interface""" + return self.toDbDict() + + def toDbDict(self): + """return data as dict for databse + + format: + + { + id: {'url': url, 'name': name ... } + } + + """ + return { + self.id: { + 'url': self.url, + 'name': self.name, + 'plugin' : self.pluginname, + 'size': self.size, + 'status': self.status, + 'statusmsg': self.m.statusMsg[self.status], + 'package': self.packageid, + 'error': self.error + } + } + + def abort(self): + """abort pyfile if possible""" + + while self.id in self.m.core.ThreadManager.processingIds(): + self.abort = True + sleep(0.025) + + abort = False + + def finishIfDone(self): + """set status to finish and release file if every thread is finished with it""" + + if self.id in self.m.core.threadManager.processingIds(): + return False + + self.setStatus("finished") + self.release() + return True + + def formatWait(self): + """ formats and return wait time in humanreadable format """ + return self.waitUntil - time() + + + +class PyPackage(): + def __init__(self, manager, id, name, folder, site, password, queue): + self.m = manager + self.m.packageCache[int(id)] = self + + self.id = int(id) + self.name = name + self.folder = folder + self.site = site + self.password = password + self.queue = queue + + def toDict(self): + """return data as dict + + format: + + { + id: {'name': name ... 'links': {} } } + } + + """ + return { + self.id: { + 'name': self.name, + 'folder': self.folder, + 'site': self.site, + 'password': self.password, + 'queue': self.queue, + 'links': {} + } + } + + def getChildren(self): + """get information about contained links""" + raise NotImplementedError + + def sync(self): + """sync with db""" + self.m.updatePackage(self) + + def release(self): + """sync and delete from cache""" + self.sync() + self.m.releasePackage(self.id) + + def delete(self): + self.m.deletePackage(self.id) + + +if __name__ == "__main__": + + pypath = "." + + db = FileHandler(None) + + #p = PyFile(db, 5) + #sleep(0.1) + + a = time() + + #print db.addPackage("package", "folder" , 1) + + #print db.addPackage("package", "folder", 1) + + #db.addLinks([x for x in range(0,200)], 5) + + db.save() + + b = time() + print "adding 200 links, single sql execs, no commit", b-a + + + res = db.getCompleteData(1) + #print res + r = [ len(x["links"]) for x in res.itervalues() ] + print r + c = time() + print "get all links", c-b + + #i = 2 + #db.updateLink(i, "urlupdate%s" % i, "nameupdate%s" % i, i, i, i,i) + + d = time() + print "update one link", d-c + + #p.sync() + #p.remove() + + e = time() + print "sync and remove link", e-d + + db.save() + + db.deletePackage(1) + #db.commit() + + f = time() + print "commit, remove package/links, commit", f-e + + #db.commit() + sleep(0.5) + + g = time() + print "empty commit", g-f -0.5 + + + job = db.getJob("") + print job + + h = time() + #print db.getFileCount() + + print "get job", h-g + + print db.getFileCount() + + i = time() + + print "file count", i-h + + + print db.getJob("") + + j = time() + + + print "get job 2", j-i + + for i in db.cache.itervalues(): + i.sync() + + sleep(1) + +
\ No newline at end of file |