diff options
Diffstat (limited to 'pyload/utils/packagetools.py')
-rw-r--r-- | pyload/utils/packagetools.py | 113 |
1 files changed, 66 insertions, 47 deletions
diff --git a/pyload/utils/packagetools.py b/pyload/utils/packagetools.py index d5ab4d182..d930157e1 100644 --- a/pyload/utils/packagetools.py +++ b/pyload/utils/packagetools.py @@ -1,15 +1,57 @@ -# JDownloader/src/jd/controlling/LinkGrabberPackager.java +# -*- coding: utf-8 -*- import re + from urlparse import urlparse + +endings = ("jdeatme", "3gp", "7zip", "7z", "abr", "ac3", "aiff", "aifc", "aif", "ai", + "au", "avi", "apk", "bin", "bmp", "bat", "bz2", "cbr", "cbz", "ccf", "chm", + "cr2", "cso", "cue", "cvd", "dta", "deb", "divx", "djvu", "dlc", "dmg", "doc", + "docx", "dot", "eps", "epub", "exe", "ff", "flv", "flac", "f4v", "gsd", "gif", + "gpg", "gz", "iwd", "idx", "iso", "ipa", "ipsw", "java", "jar", "jpe?g", "load", + "m2ts", "m4v", "m4a", "md5", "mkv", "mp2", "mp3", "mp4", "mobi", "mov", "movie", + "mpeg", "mpe", "mpg", "mpq", "msi", "msu", "msp", "mv", "mws", "nfo", "npk", "oga", + "ogg", "ogv", "otrkey", "par2", "pkg", "png", "pdf", "pptx?", "ppsx?", "ppz", "pot", + "psd", "qt", "rmvb", "rm", "rar", "ram", "ra", "rev", "rnd", "rpm", "run", "rsdf", + "reg", "rtf", "shnf", "sh(?!tml)", "ssa", "smi", "sub", "srt", "snd", "sfv", "sfx", + "swf", "swc", "tar\.(gz|bz2|xz)", "tar", "tgz", "tiff?", "ts", "txt", "viv", "vivo", + "vob", "vtt", "webm", "wav", "wmv", "wma", "xla", "xls", "xpi", "zeno", "zip", + "[r-z]\d{2}", "_[_a-z]{2}", "\d{3,4}(?=\?|$|\"|\r|\n)") + +rarPats = [re.compile(r'(.*)(\.|_|-)pa?r?t?\.?\d+.(rar|exe)$', re.I), + re.compile(r'(.*)(\.|_|-)part\.?[0]*[1].(rar|exe)$', re.I), + re.compile(r'(.*)\.rar$', re.I), + re.compile(r'(.*)\.r\d+$', re.I), + re.compile(r'(.*)(\.|_|-)\d+$', re.I)] + +zipPats = [re.compile(r'(.*)\.zip$', re.I), + re.compile(r'(.*)\.z\d+$', re.I), + re.compile(r'(?is).*\.7z\.[\d]+$', re.I), + re.compile(r'(.*)\.a.$', re.I)] + +ffsjPats = [re.compile(r'(.*)\._((_[a-z])|([a-z]{2}))(\.|$)'), + re.compile(r'(.*)(\.|_|-)[\d]+(\.(' + '|'.join(endings) + ')$)', re.I)] + +iszPats = [re.compile(r'(.*)\.isz$', re.I), + re.compile(r'(.*)\.i\d{2}$', re.I)] + +pat0 = re.compile(r'www\d*\.', re.I) + +pat1 = re.compile(r'(\.?CD\d+)', re.I) +pat2 = re.compile(r'(\.?part\d+)', re.I) + +pat3 = re.compile(r'(.+)[\.\-_]+$') +pat4 = re.compile(r'(.+)\.\d+\.xtm$') + + def matchFirst(string, *args): - """ matches against list of regexp and returns first match""" + """ matches against list of regexp and returns first match """ for patternlist in args: for pattern in patternlist: - r = pattern.search(string) - if r is not None: - name = r.group(1) + m = pattern.search(string) + if m is not None: + name = m.group(1) return name return string @@ -19,35 +61,10 @@ def parseNames(files): """ Generates packages names from name, data lists :param files: list of (name, data) - :return: packagenames mapt to data lists (eg. urls) + :return: packagenames mapped to data lists (eg. urls) """ packs = {} - endings = "\\.(3gp|7zip|7z|abr|ac3|aiff|aifc|aif|ai|au|avi|bin|bz2|cbr|cbz|ccf|cue|cvd|chm|dta|deb|divx|djvu|dlc|dmg|doc|docx|dot|eps|exe|ff|flv|f4v|gsd|gif|gz|iwd|iso|ipsw|java|jar|jpg|jpeg|jdeatme|load|mws|mw|m4v|m4a|mkv|mp2|mp3|mp4|mov|movie|mpeg|mpe|mpg|msi|msu|msp|nfo|npk|oga|ogg|ogv|otrkey|pkg|png|pdf|pptx|ppt|pps|ppz|pot|psd|qt|rmvb|rm|rar|ram|ra|rev|rnd|r\\d+|rpm|run|rsdf|rtf|sh(!?tml)|srt|snd|sfv|swf|tar|tif|tiff|ts|txt|viv|vivo|vob|wav|wmv|xla|xls|xpi|zeno|zip|z\\d+|_[_a-z]{2}|\\d+$)" - - rarPats = [re.compile("(.*)(\\.|_|-)pa?r?t?\\.?[0-9]+.(rar|exe)$", re.I), - re.compile("(.*)(\\.|_|-)part\\.?[0]*[1].(rar|exe)$", re.I), - re.compile("(.*)\\.rar$", re.I), - re.compile("(.*)\\.r\\d+$", re.I), - re.compile("(.*)(\\.|_|-)\\d+$", re.I)] - - zipPats = [re.compile("(.*)\\.zip$", re.I), - re.compile("(.*)\\.z\\d+$", re.I), - re.compile("(?is).*\\.7z\\.[\\d]+$", re.I), - re.compile("(.*)\\.a.$", re.I)] - - ffsjPats = [re.compile("(.*)\\._((_[a-z])|([a-z]{2}))(\\.|$)"), - re.compile("(.*)(\\.|_|-)[\\d]+(" + endings + "$)", re.I)] - - iszPats = [re.compile("(.*)\\.isz$", re.I), - re.compile("(.*)\\.i\\d{2}$", re.I)] - - pat1 = re.compile("(\\.?CD\\d+)", re.I) - pat2 = re.compile("(\\.?part\\d+)", re.I) - - pat3 = re.compile("(.+)[\\.\\-_]+$") - pat4 = re.compile("(.+)\\.\\d+\\.xtm$") - for file, url in files: patternMatch = False @@ -62,7 +79,7 @@ def parseNames(files): if len(split) > 1: name = split.pop(1) - #check if a already existing package may be ok for this file + #check if an already existing package may be ok for this file # found = False # for pack in packs: # if pack in file: @@ -70,7 +87,8 @@ def parseNames(files): # found = True # break # - # if found: continue + # if found: + # continue # unrar pattern, 7zip/zip and hjmerge pattern, isz pattern, FFSJ pattern before = name @@ -79,19 +97,19 @@ def parseNames(files): patternMatch = True # xtremsplit pattern - r = pat4.search(name) - if r is not None: - name = r.group(1) + m = pat4.search(name) + if m is not None: + name = m.group(1) # remove part and cd pattern - r = pat1.search(name) - if r is not None: - name = name.replace(r.group(0), "") + m = pat1.search(name) + if m is not None: + name = name.replace(m.group(0), "") patternMatch = True - r = pat2.search(name) - if r is not None: - name = name.replace(r.group(0), "") + m = pat2.search(name) + if m is not None: + name = name.replace(m.group(0), "") patternMatch = True # additional checks if extension pattern matched @@ -106,9 +124,9 @@ def parseNames(files): name = name[:-length] # remove endings like . _ - - r = pat3.search(name) - if r is not None: - name = r.group(1) + m = pat3.search(name) + if m is not None: + name = m.group(1) # replace . and _ with space name = name.replace(".", " ") @@ -121,11 +139,12 @@ def parseNames(files): # fallback: package by hoster if not name: name = urlparse(file).hostname - if name: name = name.replace("www.", "") + if name: + name = pat0.sub("", name) # fallback : default name if not name: - name = "unknown" + name = _("Unnamed package") # build mapping if name in packs: |