Added comments.
This commit is contained in:
parent
ec0bc0569f
commit
5f7d718d68
1 changed files with 84 additions and 7 deletions
89
append.py
89
append.py
|
@ -1,4 +1,34 @@
|
||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
|
#
|
||||||
|
# append.py - Compares the podcast category feed for the podcast's web site
|
||||||
|
# with format specific feeds, adding the newest episode if missing.
|
||||||
|
#
|
||||||
|
# All project files are made available under the following, BSD-new license.
|
||||||
|
#
|
||||||
|
# Copyright (c) 2010, Thomas Gideon
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in the
|
||||||
|
# documentation and/or other materials provided with the distribution.
|
||||||
|
# * Neither the name of Thomas Gideon nor the
|
||||||
|
# names of additional contributors may be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
||||||
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
import sys
|
import sys
|
||||||
import feedparser
|
import feedparser
|
||||||
import urllib2
|
import urllib2
|
||||||
|
@ -13,6 +43,7 @@ import datetime
|
||||||
|
|
||||||
|
|
||||||
def __fetch_feed(url):
|
def __fetch_feed(url):
|
||||||
|
""" Pull the feed and parse it, logging any errors. """
|
||||||
try:
|
try:
|
||||||
return feedparser.parse(url)
|
return feedparser.parse(url)
|
||||||
except HTTPError, e:
|
except HTTPError, e:
|
||||||
|
@ -25,6 +56,11 @@ def __fetch_feed(url):
|
||||||
|
|
||||||
|
|
||||||
def __append(feed, suffix, append_fn):
|
def __append(feed, suffix, append_fn):
|
||||||
|
"""
|
||||||
|
For the given main site feed, load the appropriate media specific feed
|
||||||
|
and compare. If the latest episode isn't in the media specific feed,
|
||||||
|
insert it making the necessary adjustments to the new episode's entry.
|
||||||
|
"""
|
||||||
latest = __fetch_feed('cmdln_%s.xml' % suffix).entries[0]
|
latest = __fetch_feed('cmdln_%s.xml' % suffix).entries[0]
|
||||||
entry = feed.entries[0]
|
entry = feed.entries[0]
|
||||||
if latest.title.find(entry.title) != -1:
|
if latest.title.find(entry.title) != -1:
|
||||||
|
@ -56,11 +92,12 @@ def __append(feed, suffix, append_fn):
|
||||||
|
|
||||||
|
|
||||||
def __append_non_itunes(entry, output, suffix, base_url):
|
def __append_non_itunes(entry, output, suffix, base_url):
|
||||||
|
"""
|
||||||
|
For most of the feeds, new episodes are simple stanzas and the
|
||||||
|
adjustments consist mostly of copying what is in the mean site feed's
|
||||||
|
entry and just re-writing the enclosure to the appropriate media file.
|
||||||
|
"""
|
||||||
(url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
|
(url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
|
||||||
# Google listen won't play 'application/ogg' and that mime type is currently
|
|
||||||
# returned by archive.org for Ogg Vorbis files
|
|
||||||
if 'ogg' == suffix:
|
|
||||||
mime_type = 'audio/ogg'
|
|
||||||
output.write(""" <item>
|
output.write(""" <item>
|
||||||
<title>%(title)s (Comment Line 240-949-2638)</title>
|
<title>%(title)s (Comment Line 240-949-2638)</title>
|
||||||
<link>%(link)s</link>
|
<link>%(link)s</link>
|
||||||
|
@ -81,6 +118,13 @@ def __append_non_itunes(entry, output, suffix, base_url):
|
||||||
|
|
||||||
|
|
||||||
def __append_itunes(entry, output, suffix, base_url):
|
def __append_itunes(entry, output, suffix, base_url):
|
||||||
|
"""
|
||||||
|
For the iTunes/AAC feed, there are some additional elements that make
|
||||||
|
use of the Apple extensions to RSS. Some of these, like the duration,
|
||||||
|
author and subtitle, can be copied as is. The description and summary
|
||||||
|
produced by PodPress is less than desirable so those get munged to
|
||||||
|
something more suitable before writing into the iTunes feed.
|
||||||
|
"""
|
||||||
description = __description(entry.content)
|
description = __description(entry.content)
|
||||||
soup = BeautifulSoup(description)
|
soup = BeautifulSoup(description)
|
||||||
summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
|
summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
|
||||||
|
@ -115,6 +159,13 @@ def __append_itunes(entry, output, suffix, base_url):
|
||||||
|
|
||||||
|
|
||||||
def __permalink(title):
|
def __permalink(title):
|
||||||
|
"""
|
||||||
|
PodPress uses the opaque permalink from WordPress, basically just a
|
||||||
|
base url with a minimal query string containing the post's internal ID.
|
||||||
|
The OS X app used to maintain these feeds previously, Feeder, munged
|
||||||
|
the title into a nice, readable slug. This function reproduces what
|
||||||
|
Feed does to populate the permalink element in the feed entry.
|
||||||
|
"""
|
||||||
permalink = title.lower()
|
permalink = title.lower()
|
||||||
permalink = re.sub('-', '', permalink)
|
permalink = re.sub('-', '', permalink)
|
||||||
permalink = re.sub('[^a-z0-9]', '-', permalink)
|
permalink = re.sub('[^a-z0-9]', '-', permalink)
|
||||||
|
@ -125,6 +176,15 @@ def __permalink(title):
|
||||||
|
|
||||||
|
|
||||||
def __description(content):
|
def __description(content):
|
||||||
|
"""
|
||||||
|
This function strips out parts of the description used in the main site
|
||||||
|
feed that are less appropriate for the media specific feeds. PodPress
|
||||||
|
leaves a blank paragraph where its Flash player renders. The main
|
||||||
|
site's episodes have some extra verbiage after the license image and
|
||||||
|
links, namely the sharing and relate posts plugin output. A simple,
|
||||||
|
bare link is added to the last paragraph for the benefit of aggregators
|
||||||
|
that may strip out HTML.
|
||||||
|
"""
|
||||||
description = content[0].value
|
description = content[0].value
|
||||||
description = re.sub('<p></p>\n', '', description)
|
description = re.sub('<p></p>\n', '', description)
|
||||||
description = re.sub(re.compile('License</a>.</p>.*$', re.M | re.S), 'License</a>.</p>', description)
|
description = re.sub(re.compile('License</a>.</p>.*$', re.M | re.S), 'License</a>.</p>', description)
|
||||||
|
@ -133,9 +193,18 @@ def __description(content):
|
||||||
|
|
||||||
|
|
||||||
def __enclosure(enclosures, base_url, suffix):
|
def __enclosure(enclosures, base_url, suffix):
|
||||||
|
"""
|
||||||
|
Uses the file name from the main site's enclosure plus the base_url to
|
||||||
|
pull together values to re-write the attributes for the correct media.
|
||||||
|
"""
|
||||||
m = re.search('cmdln.net_[0-9]{4}-[0-9]{2}-[0-9]{2}', enclosures[0].href)
|
m = re.search('cmdln.net_[0-9]{4}-[0-9]{2}-[0-9]{2}', enclosures[0].href)
|
||||||
url = '%s/%s.%s' % (base_url, m.group(), suffix)
|
url = '%s/%s.%s' % (base_url, m.group(), suffix)
|
||||||
usock = urllib2.urlopen(url)
|
usock = urllib2.urlopen(url)
|
||||||
|
# Google listen won't play 'application/ogg' and that mime type is currently
|
||||||
|
# returned by archive.org for Ogg Vorbis files
|
||||||
|
if 'ogg' == suffix:
|
||||||
|
mime_type = 'audio/ogg'
|
||||||
|
else:
|
||||||
mime_type = usock.info().type
|
mime_type = usock.info().type
|
||||||
size = usock.info().get('Content-Length')
|
size = usock.info().get('Content-Length')
|
||||||
if size is None:
|
if size is None:
|
||||||
|
@ -144,6 +213,11 @@ def __enclosure(enclosures, base_url, suffix):
|
||||||
|
|
||||||
|
|
||||||
def __archive_slug(title):
|
def __archive_slug(title):
|
||||||
|
"""
|
||||||
|
The Internet Archive transforms the title field of new entries in a
|
||||||
|
specific way. This is my reverse engineering of their algorithm based
|
||||||
|
on their description and empirical data from dozens of uploads.
|
||||||
|
"""
|
||||||
slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', title)
|
slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', title)
|
||||||
slug = re.sub('[^A-Za-z0-9\-]', ' ', slug)
|
slug = re.sub('[^A-Za-z0-9\-]', ' ', slug)
|
||||||
slug = re.sub(' {2,}', ' ', slug)
|
slug = re.sub(' {2,}', ' ', slug)
|
||||||
|
@ -153,9 +227,11 @@ def __archive_slug(title):
|
||||||
return slug
|
return slug
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def __main():
|
||||||
logging.basicConfig(level=logging.INFO,
|
logging.basicConfig(level=logging.INFO,
|
||||||
format='%(message)s')
|
format='%(message)s')
|
||||||
|
# pulls the category feed from the web site which will have just the most recent episodes
|
||||||
|
# along with all the iTunes jiggery-pokery PodPress performs
|
||||||
feed = __fetch_feed('http://thecommandline.net/category/podcast/feed/')
|
feed = __fetch_feed('http://thecommandline.net/category/podcast/feed/')
|
||||||
if feed is None:
|
if feed is None:
|
||||||
logging.error('Failed to fetch feed.')
|
logging.error('Failed to fetch feed.')
|
||||||
|
@ -164,8 +240,9 @@ def main():
|
||||||
__append(feed, 'mp3', __append_non_itunes)
|
__append(feed, 'mp3', __append_non_itunes)
|
||||||
__append(feed, 'ogg', __append_non_itunes)
|
__append(feed, 'ogg', __append_non_itunes)
|
||||||
__append(feed, 'm4a', __append_itunes)
|
__append(feed, 'm4a', __append_itunes)
|
||||||
|
# TODO add flac
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
main()
|
__main()
|
||||||
|
|
Loading…
Reference in a new issue