From d740c98b52bc5e88425b5f121b49285840c69ee9 Mon Sep 17 00:00:00 2001 From: cmdln Date: Sat, 12 Mar 2011 11:55:06 -0500 Subject: [PATCH] Fix en dash handling as well as m4a mimetype. --- append.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/append.py b/append.py index c279cbd..3852ce1 100755 --- a/append.py +++ b/append.py @@ -66,7 +66,6 @@ def __append(config, feed, suffix, append_fn): logging.info('%s is up to date.' % suffix) return - entry.title.encode('ascii') base_url = 'http://www.archive.org/download/%s' % __archive_slug(entry.title) filename = '%s%s.xml' % (config['file_prefix'], suffix) today = datetime.date.today() @@ -106,7 +105,7 @@ def __append_non_itunes(config, entry, output, suffix, base_url): %(permalink)s -""" % { 'title': entry.title, +""" % { 'title': __title(entry.title), 'link': entry.link, 'description': __description(config, entry.content), 'pubDate' : entry.date, @@ -145,7 +144,7 @@ def __append_itunes(config, entry, output, suffix, base_url): no %(duration)s -""" % { 'title': entry.title, +""" % { 'title': __title(entry.title), 'link': entry.link, 'description': description, 'pubDate' : entry.date, @@ -178,6 +177,12 @@ def __permalink(title): return permalink +def __title(title): + fixed = title + fixed = re.sub(u'\u2013', '-', fixed) + return fixed + + def __description(config, content): """ This function strips out parts of the description used in the main site @@ -207,6 +212,8 @@ def __enclosure(config, enclosures, base_url, suffix): # returned by archive.org for Ogg Vorbis files if 'ogg' == suffix: mime_type = 'audio/ogg' + elif 'm4a' == suffix: + mime_type = 'audio/mp4' else: mime_type = usock.info().type size = usock.info().get('Content-Length') @@ -222,6 +229,7 @@ def __archive_slug(title): on their description and empirical data from dozens of uploads. """ slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', title) + slug = re.sub(u'\u2013', '-', slug) slug = re.sub('[^A-Za-z0-9\-\.]', ' ', slug) slug = re.sub(' {2,}', ' ', slug) tokens = slug.split(' ')