Fixed an annoying unicode/ascii issue due to a non-breaking space unicode character getting into the feed.

This commit is contained in:
Thomas Gideon 2010-09-29 21:31:37 -04:00
parent 7477e62159
commit c278f7556c

View file

@ -65,6 +65,7 @@ def __append(feed, suffix, append_fn):
logging.info('%s is up to date.' % suffix) logging.info('%s is up to date.' % suffix)
return return
entry.title.encode('ascii')
base_url = 'http://www.archive.org/download/%s' % __archive_slug(entry.title) base_url = 'http://www.archive.org/download/%s' % __archive_slug(entry.title)
filename = 'cmdln_%s.xml' % suffix filename = 'cmdln_%s.xml' % suffix
today = datetime.date.today() today = datetime.date.today()
@ -183,7 +184,7 @@ def __description(content):
bare link is added to the last paragraph for the benefit of aggregators bare link is added to the last paragraph for the benefit of aggregators
that may strip out HTML. that may strip out HTML.
""" """
description = content[0].value description = re.sub(u'\xa0', ' ', content[0].value)
description = re.sub('<p></p>\n', '', description) description = re.sub('<p></p>\n', '', description)
description = re.sub(re.compile('License</a>.</p>.*$', re.M | re.S), 'License</a>.</p>', description) description = re.sub(re.compile('License</a>.</p>.*$', re.M | re.S), 'License</a>.</p>', description)
description = re.sub('</p>\n', '</p>\n\n', description) description = re.sub('</p>\n', '</p>\n\n', description)