import httplib
import re
import StringIO
import sys
import traceback
import urllib
import urllib2
from bs4 import BeautifulSoup as Soup
from datetime import datetime
from module.plugins.internal.Addon import Addon
from pytz import timezone
UNIX_EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
def notifyPushover(**kwargs):
Data = kwargs
Connection = httplib.HTTPSConnection('api.pushover.net:443')
Connection.request('POST', '/1/messages.json', urllib.urlencode(Data),
{'Content-type': 'application/x-www-form-urlencoded'})
Response = Connection.getresponse()
def replaceUmlauts(title):
title = title.replace(unichr(228), 'ae').replace(unichr(196), 'Ae')
title = title.replace(unichr(252), 'ue').replace(unichr(220), 'Ue')
title = title.replace(unichr(246), 'oe').replace(unichr(214), 'Oe')
title = title.replace(unichr(223), 'ss')
title = title.replace('&', '&')
return title
def getUnixTimestamp(String):
String = re.search(r'^.*(\d{2}.\d{2}.\d{4})(\d{1,2}):(\d{2}).*$', String)
if String:
String = String.group(1) + \
('0' + String.group(2) if String.group(2) < '10' else String.group(2)) + \
String.group(3)
String = String.replace('.', '')
UnixTimestamp = (
timezone('Europe/Berlin').localize(datetime.strptime(String, '%d%m%Y%H%M')).astimezone(timezone('UTC'))
- UNIX_EPOCH
).total_seconds()
return UnixTimestamp
class WarezWorld(Addon):
__name__ = 'WarezWorld'
__type__ = 'hook'
__status__ = 'testing'
__author_name__ = ('Arno-Nymous')
__author_mail__ = ('Arno-Nymous@users.noreply.github.com')
__version__ = '1.2'
__description__ = 'Get new movies from Warez-World.org'
__config__ = [
('activated', 'bool', 'Active', 'False'),
('interval', 'int', 'Waiting time until next run in minutes', '60'),
('minYear', 'long', 'No movies older than year', '1970'),
('pushoverAppToken', 'str', 'Pushover app token', ''),
('pushoverUserToken', 'str', 'Pushover user token', ''),
('preferredHosters', 'str', 'Preferred hosters (seperated by;)','Share-online.biz'),
('quality', '720p;1080p', 'Video quality', '720p'),
('ratingCollector', 'float', 'Send releases to link collector with an IMDb rating of (or higher)', '6.5'),
('ratingQueue', 'float', 'Send releases to queue with an IMDb rating of (or higher)', '8.0'),
('rejectGenres', 'str', 'Reject movies of an of the following genres (seperated by ;)', 'Anime;Documentary;Family'),
('rejectReleaseTokens', 'str', 'Reject releases containing any of the following tokens (seperated by ;)', '.ts.;.hdts.'),
('soundError', ';none;alien;bike;bugle;cashregister;classical;climb;cosmic;echo;falling;gamelan;incoming;intermission;magic;mechanical;persistent;pianobar;pushover;siren;spacealarm;tugboat;updown', 'Use this sound for errors pushed via Pushover (empty for default)', ''),
('soundNotification', ';none;alien;bike;bugle;cashregister;classical;climb;cosmic;echo;falling;gamelan;incoming;intermission;magic;mechanical;persistent;pianobar;pushover;siren;spacealarm;tugboat;updown', 'Use this sound for notifications pushed via Pushover (empty for default)', '')
]
UrlOpener = urllib2.build_opener()
RejectGenres = []
RejectReleaseTokens = []
LastReleaseTimestamp = None
# Initialize dictionary keys here to enable quick access on keys via augmented operators
# in later code without further code magic
Statistics = {'Total': 0, 'Added': 0, 'Skipped': 0, 'AlreadyProcessed': 0}
def __init__(self, *args, **kwargs):
super(WarezWorld, self).__init__(*args, **kwargs)
self.start_periodical(self.get_config('interval'))
def periodical(self):
self.log_info(u'Start periodical run...')
self.interval = self.get_config('interval') * 60
self.RejectGenres = self.get_config('rejectGenres').split(';')
self.PreferredHosters = self.get_config('preferredHosters').lower().split(';')
self.RejectReleaseTokens = self.get_config('rejectReleaseTokens').lower().split(';')
self.LastReleaseTimestamp = float(self.retrieve('LastReleaseTimestamp', 0))
# Setting statistics to 0 by iterating over dictionary items
# instead of recreating dictionary over and over
for Key in self.Statistics:
self.Statistics[Key] = 0
try:
Request = urllib2.Request('http://warez-world.org/kategorie/filme', 'html5lib')
Request.add_header('User-Agent', 'Mozilla/5.0')
Page = Soup(self.UrlOpener.open(Request).read())
Items = Page.findAll('li', class_='main-single')
Releases = []
for Item in Items:
Releases.append({
'MovieName': Item.find('span', class_='main-rls').text,
'ReleaseName': re.search(r'
(.*)', unicode(Item.find('span', class_='main-rls'))).group(1),
'ReleaseLink': unicode(Item.find('span', class_='main-rls').a['href']),
'ReleaseDate': getUnixTimestamp(unicode(Item.find(class_='main-date').text))
})
self.log_info(u'{0} releases found'.format(len(Releases)))
for Release in Releases[::-1]:
if (Release['ReleaseDate'] < self.LastReleaseTimestamp):
self.log_debug(u'Release already processed \"{0}\"'.format (Release['ReleaseName']))
self.Statistics['AlreadyProcessed'] += 1
continue
self.log_debug(u'Processing release \"{0}\"'.format(Release['ReleaseName']))
Release['MovieYear'] = 1900
Release['MovieRating'] = 0
Release['MovieGenres'] = []
if self.parseRelease(Release):
self.downloadRelease(Release)
self.store('LastReleaseTimestamp', Releases[0]['ReleaseDate'])
self.log_debug(u'Last parsed release timestamp is {0}'.format(Releases[0]['ReleaseDate']))
self.Statistics['Total'] = sum(self.Statistics.itervalues())
self.log_info(u'Periodical run finished. Statistics: {0} total, {1} added, {2} skipped, {3} already processed'.format(
self.Statistics['Total'],
self.Statistics['Added'],
self.Statistics['Skipped'],
self.Statistics['AlreadyProcessed']
))
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
output = StringIO.StringIO()
traceback.print_exception(exc_type, exc_value, exc_traceback, file=output)
if 'Release' in locals():
msg = 'Stacktrace\n{0}\nRelease\n{1}\n\nDate\n{2}'.format(
output.getvalue(), Release['ReleaseName'].encode('utf-8'), Release['ReleaseDate']
)
else:
msg = 'Stacktrace\n{0}'.format(output.getvalue())
notifyPushover(
token=self.get_config('pushoverAppToken'),
user=self.get_config('pushoverUserToken'),
title='Error in script \"WarezWorld.py\"',
message=msg,
sound=self.get_config('soundError'),
html=1
)
raise
def parseRelease(self, Release):
if any([
set(re.split(r'[\. ]', Release['ReleaseName'].lower())) & set(self.RejectReleaseTokens),
not(self.get_config('quality').lower() in Release['ReleaseName'].lower())
]):
self.log_debug(u'...Skip release ({0})'.format("Release name contains unwanted tokens or quality mismatch"))
self.Statistics['Skipped'] += 1
return False
Request = urllib2.Request(Release['ReleaseLink'], 'html5lib')
Request.add_header('User-Agent', 'Mozilla/5.0')
ReleasePage = Soup(self.UrlOpener.open(Request).read())
DownloadLinks = ReleasePage.findAll('div', id='download-links')
if DownloadLinks:
for DownloadLink in DownloadLinks:
if DownloadLink.a.string and DownloadLink.a.string.lower() in self.PreferredHosters:
Release['DownloadLink'] = DownloadLink.a['href']
break
if 'DownloadLink' not in Release:
self.log_debug('...No download link of preferred hoster found')
return False
ReleaseNfo = ReleasePage.find('div', class_='spoiler')
ImdbUrl = re.search(r'(http://)?.*(imdb\.com/title/tt\d+)\D', unicode(ReleaseNfo))
if ImdbUrl:
Release['ImdbUrl'] = 'http://www.' + ImdbUrl.group(2)
self.addImdbData(Release)
else:
for Div in ReleasePage.findAll('div', class_='ui2'):
if Div.a and Div.a.string == 'IMDb-Seite':
Request = urllib2.Request(urllib.quote_plus(Div.a['href'].encode('utf-8'), '/:?='))
ImdbPage = Soup(self.UrlOpener.open(Request).read())
if ImdbPage.find('table', class_='findList'):
Release['ImdbUrl'] = 'http://www.imdb.com' + \
ImdbPage.find('td', class_='result_text').a['href']
self.addImdbData(Release)
else:
self.log_debug(u'...Could not obtain IMDb data for release...Send to link collector')
self.Statistics['Added'] += 1
break
if all([Release['MovieYear'] >= self.get_config('minYear'),
Release['MovieRating'] >= self.get_config('ratingCollector'),
not(set(Release['MovieGenres']) & set(self.RejectGenres))]):
return True
else:
self.log_debug(u'...Skip release ({0})'.format('Movie too old, poor IMDb rating or unwanted genres'))
self.Statistics['Skipped'] += 1
return False
def addImdbData(self, Release):
self.log_debug(u'...Fetching IMDb data for release ({0})'.format(Release['ImdbUrl']))
Request = urllib2.Request(Release['ImdbUrl'])
Request.add_header('User-Agent', 'Mozilla/5.0')
ImdbPage = Soup(self.UrlOpener.open(Request).read())
MovieName = ImdbPage.find('span', {'itemprop': 'name'}).string
# For the year it has to be done a tiny bit of BeautifulSoup magic as it sometimes can
# be formatted as a link on IMDb and sometimes not
try:
MovieYear = ImdbPage.find('h1', class_='header').find('span', class_='nobr').find(
text=re.compile(r'\d{4}')
).strip(u' ()\u2013')
except:
MovieYear = 0
self.log_debug('...Could not parse movie year ({0})'.format(Release['ImdbUrl']))
try:
MovieRating = ImdbPage.find('span', {'itemprop': 'ratingValue'}).string.replace(',', '.')
except:
MovieRating = 0
self.log_debug(u'...Could not parse movie rating ({0})'.format(MovieName, Release['ImdbUrl']))
MovieGenres = []
try:
for Genre in ImdbPage.find('div', {'itemprop': 'genre'}).findAll('a'):
MovieGenres.append(Genre.string.strip())
except:
self.log_debug(u'...Could not parse movie genres ({0})'.format(Release['ImdbUrl']))
Release['MovieName'] = MovieName
Release['MovieYear'] = MovieYear
Release['MovieRating'] = MovieRating
Release['MovieGenres'] = MovieGenres
def downloadRelease(self, Release):
Storage = self.retrieve(u'{0} ({1})'.format(Release['MovieName'], Release['MovieYear']))
if Storage == '1':
self.log_debug(u'Skip release ({0})'.format('already downloaded'))
self.Statistics['Skipped'] += 1
else:
Storage = u'{0} ({1})'.format(Release['MovieName'], Release['MovieYear'])
if Release['MovieRating'] >= self.get_config('ratingQueue'):
self.pyload.api.addPackage(Storage + ' IMDb: ' + Release['MovieRating'],
[Release['DownloadLink']], 1)
PushoverTitle = 'New movie added to queue'
self.log_info(u'New movie added to queue ({0})'.format(Storage))
else:
self.pyload.api.addPackage(Storage + ' IMDb: ' + Release['MovieRating'],
[Release['DownloadLink']], 0)
PushoverTitle = 'New movie added to link collector'
self.log_info(u'New movie added to link collector ({0})'.format(Storage))
self.Statistics['Added'] += 1
notifyPushover(
token=self.get_config('pushoverAppToken'),
user=self.get_config('pushoverUserToken'),
title=PushoverTitle,
message='{0} ({1})\nRating: {2}\nGenres: {3}\n\n{4}'.format(
Release['MovieName'].encode('utf-8'),
Release['MovieYear'].encode('utf-8'),
Release['MovieRating'].encode('utf-8'),
', '.join(Release['MovieGenres']).encode('utf-8'),
Release['ReleaseName'].encode('utf-8')
),
sound=self.get_config('soundNotification'),
url=(Release['ImdbUrl'].encode('utf-8') if 'ImdbUrl' in Release else ''),
url_title='View on IMDb',
html=1
)
self.store(Storage, '1')