summaryrefslogtreecommitdiffstats
path: root/module/lib/feedparser.py
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@gmail.com> 2014-03-28 22:32:14 +0100
committerGravatar Walter Purcaro <vuolter@gmail.com> 2014-06-28 02:47:08 +0200
commitb1fffc3a1b2dbbb807213b85f538e59251b9bf35 (patch)
treec373d3234dcb474bb424371a3d89341bed8a9e07 /module/lib/feedparser.py
parentPlugins licensing doc (diff)
downloadpyload-b1fffc3a1b2dbbb807213b85f538e59251b9bf35.tar.xz
Remove bad whitespaces
Merged vuolter/pyload@00288e6
Diffstat (limited to 'module/lib/feedparser.py')
-rw-r--r--module/lib/feedparser.py192
1 files changed, 96 insertions, 96 deletions
diff --git a/module/lib/feedparser.py b/module/lib/feedparser.py
index a746ed8f5..32f9d2dd7 100644
--- a/module/lib/feedparser.py
+++ b/module/lib/feedparser.py
@@ -89,7 +89,7 @@ try:
except (NameError, AttributeError):
import string
_maketrans = string.maketrans
-
+
# base64 support for Atom feeds that contain embedded binary data
try:
import base64, binascii
@@ -334,7 +334,7 @@ class FeedParserDict(UserDict):
if not self.has_key(key):
self[key] = value
return self[key]
-
+
def has_key(self, key):
try:
return hasattr(self, key) or UserDict.__contains__(self, key)
@@ -343,7 +343,7 @@ class FeedParserDict(UserDict):
# This alias prevents the 2to3 tool from changing the semantics of the
# __contains__ function below and exhausting the maximum recursion depth
__has_key = has_key
-
+
def __getattr__(self, key):
try:
return self.__dict__[key]
@@ -451,7 +451,7 @@ class _FeedParserMixin:
'http://purl.org/atom/ns#': '',
'http://www.w3.org/2005/Atom': '',
'http://purl.org/rss/1.0/modules/rss091#': '',
-
+
'http://webns.net/mvcb/': 'admin',
'http://purl.org/rss/1.0/modules/aggregation/': 'ag',
'http://purl.org/rss/1.0/modules/annotate/': 'annotate',
@@ -508,7 +508,7 @@ class _FeedParserMixin:
can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
html_types = ['text/html', 'application/xhtml+xml']
-
+
def __init__(self, baseuri=None, baselang=None, encoding='utf-8'):
if _debug: sys.stderr.write('initializing FeedParser\n')
if not self._matchnamespaces:
@@ -554,7 +554,7 @@ class _FeedParserMixin:
# strict xml parsers do -- account for this difference
if isinstance(self, _LooseFeedParser):
attrs = [(k, v.replace('&amp;', '&')) for k, v in attrs]
-
+
# track xml:base and xml:lang
attrsD = dict(attrs)
baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri
@@ -582,7 +582,7 @@ class _FeedParserMixin:
self.lang = lang
self.basestack.append(self.baseuri)
self.langstack.append(lang)
-
+
# track namespaces
for prefix, uri in attrs:
if prefix.startswith('xmlns:'):
@@ -620,7 +620,7 @@ class _FeedParserMixin:
self.intextinput = 0
if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'):
self.inimage = 0
-
+
# call special handler (if defined) or default handler
methodname = '_start_' + prefix + suffix
try:
@@ -754,7 +754,7 @@ class _FeedParserMixin:
elif contentType == 'xhtml':
contentType = 'application/xhtml+xml'
return contentType
-
+
def trackNamespace(self, prefix, uri):
loweruri = uri.lower()
if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version:
@@ -775,7 +775,7 @@ class _FeedParserMixin:
def resolveURI(self, uri):
return _urljoin(self.baseuri or '', uri)
-
+
def decodeEntities(self, element, data):
return data
@@ -788,7 +788,7 @@ class _FeedParserMixin:
def pop(self, element, stripWhitespace=1):
if not self.elementstack: return
if self.elementstack[-1][0] != element: return
-
+
element, expectingText, pieces = self.elementstack.pop()
if self.version == 'atom10' and self.contentparams.get('type','text') == 'application/xhtml+xml':
@@ -833,11 +833,11 @@ class _FeedParserMixin:
# In Python 3, base64 takes and outputs bytes, not str
# This may not be the most correct way to accomplish this
output = _base64decode(output.encode('utf-8')).decode('utf-8')
-
+
# resolve relative URIs
if (element in self.can_be_relative_uri) and output:
output = self.resolveURI(output)
-
+
# decode entities within embedded markup
if not self.contentparams.get('base64', 0):
output = self.decodeEntities(element, output)
@@ -860,7 +860,7 @@ class _FeedParserMixin:
if is_htmlish and RESOLVE_RELATIVE_URIS:
if element in self.can_contain_relative_uris:
output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html'))
-
+
# parse microformats
# (must do this before sanitizing because some microformats
# rely on elements that we sanitize)
@@ -876,7 +876,7 @@ class _FeedParserMixin:
vcard = mfresults.get('vcard')
if vcard:
self._getContext()['vcard'] = vcard
-
+
# sanitize embedded markup
if is_htmlish and SANITIZE_HTML:
if element in self.can_contain_dangerous_markup:
@@ -906,7 +906,7 @@ class _FeedParserMixin:
if element == 'title' and self.hasTitle:
return output
-
+
# store output in appropriate place(s)
if self.inentry and not self.insource:
if element == 'content':
@@ -962,7 +962,7 @@ class _FeedParserMixin:
self.incontent -= 1
self.contentparams.clear()
return value
-
+
# a number of elements in a number of RSS variants are nominally plain
# text, but this is routinely ignored. This is an attempt to detect
# the most common cases. As false positives often result in silent
@@ -993,7 +993,7 @@ class _FeedParserMixin:
prefix = self.namespacemap.get(prefix, prefix)
name = prefix + ':' + suffix
return name
-
+
def _getAttribute(self, attrsD, name):
return attrsD.get(self._mapToStandardPrefix(name))
@@ -1021,7 +1021,7 @@ class _FeedParserMixin:
pass
attrsD['href'] = href
return attrsD
-
+
def _save(self, key, value, overwrite=False):
context = self._getContext()
if overwrite:
@@ -1046,7 +1046,7 @@ class _FeedParserMixin:
self.version = 'rss20'
else:
self.version = 'rss'
-
+
def _start_dlhottitles(self, attrsD):
self.version = 'hotrss'
@@ -1064,7 +1064,7 @@ class _FeedParserMixin:
self._start_link({})
self.elementstack[-1][-1] = attrsD['href']
self._end_link()
-
+
def _start_feed(self, attrsD):
self.infeed = 1
versionmap = {'0.1': 'atom01',
@@ -1081,7 +1081,7 @@ class _FeedParserMixin:
def _end_channel(self):
self.infeed = 0
_end_feed = _end_channel
-
+
def _start_image(self, attrsD):
context = self._getContext()
if not self.inentry:
@@ -1089,7 +1089,7 @@ class _FeedParserMixin:
self.inimage = 1
self.hasTitle = 0
self.push('image', 0)
-
+
def _end_image(self):
self.pop('image')
self.inimage = 0
@@ -1101,7 +1101,7 @@ class _FeedParserMixin:
self.hasTitle = 0
self.push('textinput', 0)
_start_textInput = _start_textinput
-
+
def _end_textinput(self):
self.pop('textinput')
self.intextinput = 0
@@ -1301,7 +1301,7 @@ class _FeedParserMixin:
self.popContent('subtitle')
_end_tagline = _end_subtitle
_end_itunes_subtitle = _end_subtitle
-
+
def _start_rights(self, attrsD):
self.pushContent('rights', attrsD, 'text/plain', 1)
_start_dc_rights = _start_rights
@@ -1399,7 +1399,7 @@ class _FeedParserMixin:
attrsD['rel']='license'
if value: attrsD['href']=value
context.setdefault('links', []).append(attrsD)
-
+
def _start_creativecommons_license(self, attrsD):
self.push('license', 1)
_start_creativeCommons_license = _start_creativecommons_license
@@ -1420,7 +1420,7 @@ class _FeedParserMixin:
value = FeedParserDict({'relationships': relationships, 'href': href, 'name': name})
if value not in xfn:
xfn.append(value)
-
+
def _addTag(self, term, scheme, label):
context = self._getContext()
tags = context.setdefault('tags', [])
@@ -1438,7 +1438,7 @@ class _FeedParserMixin:
self.push('category', 1)
_start_dc_subject = _start_category
_start_keywords = _start_category
-
+
def _start_media_category(self, attrsD):
attrsD.setdefault('scheme', 'http://search.yahoo.com/mrss/category_schema')
self._start_category(attrsD)
@@ -1446,11 +1446,11 @@ class _FeedParserMixin:
def _end_itunes_keywords(self):
for term in self.pop('itunes_keywords').split():
self._addTag(term, 'http://www.itunes.com/', None)
-
+
def _start_itunes_category(self, attrsD):
self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None)
self.push('category', 1)
-
+
def _end_category(self):
value = self.pop('category')
if not value: return
@@ -1467,7 +1467,7 @@ class _FeedParserMixin:
def _start_cloud(self, attrsD):
self._getContext()['cloud'] = FeedParserDict(attrsD)
-
+
def _start_link(self, attrsD):
attrsD.setdefault('rel', 'alternate')
if attrsD['rel'] == 'self':
@@ -1568,7 +1568,7 @@ class _FeedParserMixin:
context = self._getContext()
if context.has_key('generator_detail'):
context['generator_detail']['name'] = value
-
+
def _start_admin_generatoragent(self, attrsD):
self.push('generator', 1)
value = self._getAttribute(attrsD, 'rdf:resource')
@@ -1583,7 +1583,7 @@ class _FeedParserMixin:
if value:
self.elementstack[-1][2].append(value)
self.pop('errorreportsto')
-
+
def _start_summary(self, attrsD):
context = self._getContext()
if context.has_key('summary'):
@@ -1601,13 +1601,13 @@ class _FeedParserMixin:
self.popContent(self._summaryKey or 'summary')
self._summaryKey = None
_end_itunes_summary = _end_summary
-
+
def _start_enclosure(self, attrsD):
attrsD = self._itsAnHrefDamnIt(attrsD)
context = self._getContext()
attrsD['rel']='enclosure'
context.setdefault('links', []).append(FeedParserDict(attrsD))
-
+
def _start_source(self, attrsD):
if 'url' in attrsD:
# This means that we're processing a source element from an RSS 2.0 feed
@@ -1659,7 +1659,7 @@ class _FeedParserMixin:
if attrsD.get('href'):
self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
_start_itunes_link = _start_itunes_image
-
+
def _end_itunes_block(self):
value = self.pop('itunes_block', 0)
self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0
@@ -1718,12 +1718,12 @@ if _XML_AVAILABLE:
self.bozo = 0
self.exc = None
self.decls = {}
-
+
def startPrefixMapping(self, prefix, uri):
self.trackNamespace(prefix, uri)
if uri == 'http://www.w3.org/1999/xlink':
self.decls['xmlns:'+prefix] = uri
-
+
def startElementNS(self, name, qname, attrs):
namespace, localname = name
lowernamespace = str(namespace or '').lower()
@@ -1910,7 +1910,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:])
else:
self.pieces.append('&#%(ref)s;' % locals())
-
+
def handle_entityref(self, ref):
# called for each entity reference, e.g. for '&copy;', ref will be 'copy'
# Reconstruct the original entity reference.
@@ -1925,12 +1925,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
# Store the original text verbatim.
if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_data, text=%s\n' % text)
self.pieces.append(text)
-
+
def handle_comment(self, text):
# called for each HTML comment, e.g. <!-- insert Javascript code here -->
# Reconstruct the original comment.
self.pieces.append('<!--%(text)s-->' % locals())
-
+
def handle_pi(self, text):
# called for each processing instruction, e.g. <?instruction>
# Reconstruct original processing instruction.
@@ -1942,7 +1942,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
# "http://www.w3.org/TR/html4/loose.dtd">
# Reconstruct original DOCTYPE
self.pieces.append('<!%(text)s>' % locals())
-
+
_new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
def _scan_name(self, i, declstartpos):
rawdata = self.rawdata
@@ -1998,7 +1998,7 @@ class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor):
data = data.replace('&quot;', '"')
data = data.replace('&apos;', "'")
return data
-
+
def strattrs(self, attrs):
return ''.join([' %s="%s"' % (n,v.replace('"','&quot;')) for n,v in attrs])
@@ -2022,12 +2022,12 @@ class _MicroformatsParser:
self.enclosures = []
self.xfn = []
self.vcard = None
-
+
def vcardEscape(self, s):
if type(s) in (type(''), type(u'')):
s = s.replace(',', '\\,').replace(';', '\\;').replace('\n', '\\n')
return s
-
+
def vcardFold(self, s):
s = re.sub(';+$', '', s)
sFolded = ''
@@ -2043,14 +2043,14 @@ class _MicroformatsParser:
def normalize(self, s):
return re.sub(r'\s+', ' ', s).strip()
-
+
def unique(self, aList):
results = []
for element in aList:
if element not in results:
results.append(element)
return results
-
+
def toISO8601(self, dt):
return time.strftime('%Y-%m-%dT%H:%M:%SZ', dt)
@@ -2140,21 +2140,21 @@ class _MicroformatsParser:
def findVCards(self, elmRoot, bAgentParsing=0):
sVCards = ''
-
+
if not bAgentParsing:
arCards = self.getPropertyValue(elmRoot, 'vcard', bAllowMultiple=1)
else:
arCards = [elmRoot]
-
+
for elmCard in arCards:
arLines = []
-
+
def processSingleString(sProperty):
sValue = self.getPropertyValue(elmCard, sProperty, self.STRING, bAutoEscape=1).decode(self.encoding)
if sValue:
arLines.append(self.vcardFold(sProperty.upper() + ':' + sValue))
return sValue or u''
-
+
def processSingleURI(sProperty):
sValue = self.getPropertyValue(elmCard, sProperty, self.URI)
if sValue:
@@ -2177,7 +2177,7 @@ class _MicroformatsParser:
if sContentType:
sContentType = ';TYPE=' + sContentType.upper()
arLines.append(self.vcardFold(sProperty.upper() + sEncoding + sContentType + sValueKey + ':' + sValue))
-
+
def processTypeValue(sProperty, arDefaultType, arForceType=None):
arResults = self.getPropertyValue(elmCard, sProperty, bAllowMultiple=1)
for elmResult in arResults:
@@ -2189,7 +2189,7 @@ class _MicroformatsParser:
sValue = self.getPropertyValue(elmResult, 'value', self.EMAIL, 0)
if sValue:
arLines.append(self.vcardFold(sProperty.upper() + ';TYPE=' + ','.join(arType) + ':' + sValue))
-
+
# AGENT
# must do this before all other properties because it is destructive
# (removes nested class="vcard" nodes so they don't interfere with
@@ -2208,10 +2208,10 @@ class _MicroformatsParser:
sAgentValue = self.getPropertyValue(elmAgent, 'value', self.URI, bAutoEscape=1);
if sAgentValue:
arLines.append(self.vcardFold('AGENT;VALUE=uri:' + sAgentValue))
-
+
# FN (full name)
sFN = processSingleString('fn')
-
+
# N (name)
elmName = self.getPropertyValue(elmCard, 'n')
if elmName:
@@ -2237,25 +2237,25 @@ class _MicroformatsParser:
arLines.append(self.vcardFold('N:' + arNames[0] + ';' + arNames[1]))
else:
arLines.append(self.vcardFold('N:' + arNames[1] + ';' + arNames[0]))
-
+
# SORT-STRING
sSortString = self.getPropertyValue(elmCard, 'sort-string', self.STRING, bAutoEscape=1)
if sSortString:
arLines.append(self.vcardFold('SORT-STRING:' + sSortString))
-
+
# NICKNAME
arNickname = self.getPropertyValue(elmCard, 'nickname', self.STRING, 1, 1)
if arNickname:
arLines.append(self.vcardFold('NICKNAME:' + ','.join(arNickname)))
-
+
# PHOTO
processSingleURI('photo')
-
+
# BDAY
dtBday = self.getPropertyValue(elmCard, 'bday', self.DATE)
if dtBday:
arLines.append(self.vcardFold('BDAY:' + self.toISO8601(dtBday)))
-
+
# ADR (address)
arAdr = self.getPropertyValue(elmCard, 'adr', bAllowMultiple=1)
for elmAdr in arAdr:
@@ -2277,38 +2277,38 @@ class _MicroformatsParser:
sRegion + ';' +
sPostalCode + ';' +
sCountryName))
-
+
# LABEL
processTypeValue('label', ['intl','postal','parcel','work'])
-
+
# TEL (phone number)
processTypeValue('tel', ['voice'])
-
+
# EMAIL
processTypeValue('email', ['internet'], ['internet'])
-
+
# MAILER
processSingleString('mailer')
-
+
# TZ (timezone)
processSingleString('tz')
-
+
# GEO (geographical information)
elmGeo = self.getPropertyValue(elmCard, 'geo')
if elmGeo:
sLatitude = self.getPropertyValue(elmGeo, 'latitude', self.STRING, 0, 1)
sLongitude = self.getPropertyValue(elmGeo, 'longitude', self.STRING, 0, 1)
arLines.append(self.vcardFold('GEO:' + sLatitude + ';' + sLongitude))
-
+
# TITLE
processSingleString('title')
-
+
# ROLE
processSingleString('role')
# LOGO
processSingleURI('logo')
-
+
# ORG (organization)
elmOrg = self.getPropertyValue(elmCard, 'org')
if elmOrg:
@@ -2322,39 +2322,39 @@ class _MicroformatsParser:
else:
arOrganizationUnit = self.getPropertyValue(elmOrg, 'organization-unit', self.STRING, 1, 1)
arLines.append(self.vcardFold('ORG:' + sOrganizationName + ';' + ';'.join(arOrganizationUnit)))
-
+
# CATEGORY
arCategory = self.getPropertyValue(elmCard, 'category', self.STRING, 1, 1) + self.getPropertyValue(elmCard, 'categories', self.STRING, 1, 1)
if arCategory:
arLines.append(self.vcardFold('CATEGORIES:' + ','.join(arCategory)))
-
+
# NOTE
processSingleString('note')
-
+
# REV
processSingleString('rev')
-
+
# SOUND
processSingleURI('sound')
-
+
# UID
processSingleString('uid')
-
+
# URL
processSingleURI('url')
-
+
# CLASS
processSingleString('class')
-
+
# KEY
processSingleURI('key')
-
+
if arLines:
arLines = [u'BEGIN:vCard',u'VERSION:3.0'] + arLines + [u'END:vCard']
sVCards += u'\n'.join(arLines) + u'\n'
-
+
return sVCards.strip()
-
+
def isProbablyDownloadable(self, elm):
attrsD = elm.attrMap
if not attrsD.has_key('href'): return 0
@@ -2453,7 +2453,7 @@ class _RelativeURIResolver(_BaseHTMLProcessor):
def resolveURI(self, uri):
return _makeSafeAbsoluteURI(_urljoin(self.baseuri, uri.strip()))
-
+
def unknown_starttag(self, tag, attrs):
if _debug:
sys.stderr.write('tag: [%s] with attributes: [%s]\n' % (tag, str(attrs)))
@@ -2612,7 +2612,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
self.unacceptablestack = 0
self.mathmlOK = 0
self.svgOK = 0
-
+
def unknown_starttag(self, tag, attrs):
acceptable_attributes = self.acceptable_attributes
keymap = {}
@@ -2671,7 +2671,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
clean_value = self.sanitize_style(value)
if clean_value: clean_attrs.append((key,clean_value))
_BaseHTMLProcessor.unknown_starttag(self, tag, clean_attrs)
-
+
def unknown_endtag(self, tag):
if not tag in self.acceptable_elements:
if tag in self.unacceptable_elements_with_end_tag:
@@ -2791,7 +2791,7 @@ class _FeedURLHandler(urllib2.HTTPDigestAuthHandler, urllib2.HTTPRedirectHandler
http_error_300 = http_error_302
http_error_303 = http_error_302
http_error_307 = http_error_302
-
+
def http_error_401(self, req, fp, code, msg, headers):
# Check if
# - server requires digest auth, AND
@@ -2890,7 +2890,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
return opener.open(request)
finally:
opener.close() # JohnD
-
+
# try to open with native open function (if url_file_stream_or_string is a filename)
try:
return open(url_file_stream_or_string, 'rb')
@@ -2942,7 +2942,7 @@ _date_handlers = []
def registerDateHandler(func):
'''Register a date handler function (takes string, returns 9-tuple date in GMT)'''
_date_handlers.insert(0, func)
-
+
# ISO-8601 date parsing routines written by Fazal Majid.
# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
# parser is beyond the scope of feedparser and would be a worthwhile addition
@@ -3055,7 +3055,7 @@ def _parse_date_iso8601(dateString):
# Many implementations have bugs, but we'll pretend they don't.
return time.localtime(time.mktime(tuple(tm)))
registerDateHandler(_parse_date_iso8601)
-
+
# 8-bit date handling routines written by ytrewq1.
_korean_year = u'\ub144' # b3e2 in euc-kr
_korean_month = u'\uc6d4' # bff9 in euc-kr
@@ -3374,7 +3374,7 @@ def _getCharacterEncoding(http_headers, xml_data):
http_headers is a dictionary
xml_data is a raw string (not Unicode)
-
+
This is so much trickier than it sounds, it's not even funny.
According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type
is application/xml, application/*+xml,
@@ -3393,12 +3393,12 @@ def _getCharacterEncoding(http_headers, xml_data):
served with a Content-Type of text/* and no charset parameter
must be treated as us-ascii. (We now do this.) And also that it
must always be flagged as non-well-formed. (We now do this too.)
-
+
If Content-Type is unspecified (input was local file or non-HTTP source)
or unrecognized (server just got it totally wrong), then go by the
encoding given in the XML prefix of the document and default to
'iso-8859-1' as per the HTTP specification (RFC 2616).
-
+
Then, assuming we didn't find a character encoding in the HTTP headers
(and the HTTP Content-type allowed us to look in the body), we need
to sniff the first few bytes of the XML data and try to determine
@@ -3508,7 +3508,7 @@ def _getCharacterEncoding(http_headers, xml_data):
if true_encoding.lower() == 'gb2312':
true_encoding = 'gb18030'
return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
-
+
def _toUTF8(data, encoding):
'''Changes an XML data stream on the fly to specify a new encoding
@@ -3571,7 +3571,7 @@ def _stripDoctype(data):
start = re.search(_s2bytes('<\w'), data)
start = start and start.start() or -1
head,data = data[:start+1], data[start+1:]
-
+
entity_pattern = re.compile(_s2bytes(r'^\s*<!ENTITY([^>]*?)>'), re.MULTILINE)
entity_results=entity_pattern.findall(head)
head = entity_pattern.sub(_s2bytes(''), head)
@@ -3593,10 +3593,10 @@ def _stripDoctype(data):
data = doctype_pattern.sub(replacement, head) + data
return version, data, dict(replacement and [(k.decode('utf-8'), v.decode('utf-8')) for k, v in safe_pattern.findall(replacement)])
-
+
def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=[], request_headers={}, response_headers={}):
'''Parse a feed from a URL, file, stream, or string.
-
+
request_headers, if given, is a dict from http header name to value to add
to the request; this overrides internally generated values.
'''
@@ -3837,7 +3837,7 @@ class TextSerializer(Serializer):
stream.write('\n')
except:
pass
-
+
class PprintSerializer(Serializer):
def write(self, stream=sys.stdout):
if self.results.has_key('href'):
@@ -3845,7 +3845,7 @@ class PprintSerializer(Serializer):
from pprint import pprint
pprint(self.results, stream)
stream.write('\n')
-
+
if __name__ == '__main__':
try:
from optparse import OptionParser