Added comments.

2010-09-29 12:32:16 -04:00 · 2010-09-29 12:32:16 -04:00 · 5f7d718d68
commit 5f7d718d68
parent ec0bc0569f
1 changed files with 84 additions and 7 deletions
--- a/append.py
+++ b/append.py
@ -1,4 +1,34 @@
 #!/usr/bin/python
 #
 # append.py - Compares the podcast category feed for the podcast's web site
 # with format specific feeds, adding the newest episode if missing.
 #
 # All project files are made available under the following, BSD-new license.
 # 
 # Copyright (c) 2010, Thomas Gideon
 # All rights reserved.
 # 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #     * Redistributions of source code must retain the above copyright
 #       notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above copyright
 #       notice, this list of conditions and the following disclaimer in the
 #       documentation and/or other materials provided with the distribution.
 #     * Neither the name of Thomas Gideon nor the
 #       names of additional contributors may be used to endorse or promote products
 #       derived from this software without specific prior written permission.
 # 
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 # DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import sys
 import feedparser
 import urllib2
@ -13,6 +43,7 @@ import datetime
 def __fetch_feed(url):
    """ Pull the feed and parse it, logging any errors. """
    try:
        return feedparser.parse(url)
    except HTTPError, e:
@ -25,6 +56,11 @@ def __fetch_feed(url):
 def __append(feed, suffix, append_fn):
    """
        For the given main site feed, load the appropriate media specific feed
        and compare.  If the latest episode isn't in the media specific feed,
        insert it making the necessary adjustments to the new episode's entry.
    """
    latest = __fetch_feed('cmdln_%s.xml' % suffix).entries[0]
    entry = feed.entries[0]
    if latest.title.find(entry.title) != -1:
@ -56,11 +92,12 @@ def __append(feed, suffix, append_fn):
 def __append_non_itunes(entry, output, suffix, base_url):
    """ 
        For most of the feeds, new episodes are simple stanzas and the
        adjustments consist mostly of copying what is in the mean site feed's
        entry and just re-writing the enclosure to the appropriate media file.
    """
    (url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
    # Google listen won't play 'application/ogg' and that mime type is currently
    # returned by archive.org for Ogg Vorbis files
    if 'ogg' == suffix:
        mime_type = 'audio/ogg'
    output.write("""        <item>
            <title>%(title)s (Comment Line 240-949-2638)</title>
            <link>%(link)s</link>
@ -81,6 +118,13 @@ def __append_non_itunes(entry, output, suffix, base_url):
 def __append_itunes(entry, output, suffix, base_url):
    """
        For the iTunes/AAC feed, there are some additional elements that make
        use of the Apple extensions to RSS.  Some of these, like the duration,
        author and subtitle, can be copied as is.  The description and summary
        produced by PodPress is less than desirable so those get munged to
        something more suitable before writing into the iTunes feed.
    """
    description = __description(entry.content)
    soup = BeautifulSoup(description)
    summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
@ -115,6 +159,13 @@ def __append_itunes(entry, output, suffix, base_url):
 def __permalink(title):
    """ 
        PodPress uses the opaque permalink from WordPress, basically just a
        base url with a minimal query string containing the post's internal ID.
        The OS X app used to maintain these feeds previously, Feeder, munged
        the title into a nice, readable slug.  This function reproduces what
        Feed does to populate the permalink element in the feed entry.
    """
    permalink = title.lower()
    permalink = re.sub('-', '', permalink)
    permalink = re.sub('[^a-z0-9]', '-', permalink)
@ -125,6 +176,15 @@ def __permalink(title):
 def __description(content):
    """ 
        This function strips out parts of the description used in the main site
        feed that are less appropriate for the media specific feeds.  PodPress
        leaves a blank paragraph where its Flash player renders.  The main
        site's episodes have some extra verbiage after the license image and
        links, namely the sharing and relate posts plugin output.  A simple,
        bare link is added to the last paragraph for the benefit of aggregators
        that may strip out HTML.
    """
    description = content[0].value
    description = re.sub('<p></p>\n', '', description)
    description = re.sub(re.compile('License</a>.</p>.*$', re.M | re.S), 'License</a>.</p>', description)
@ -133,9 +193,18 @@ def __description(content):
 def __enclosure(enclosures, base_url, suffix):
    """ 
        Uses the file name from the main site's enclosure plus the base_url to
        pull together values to re-write the attributes for the correct media.
    """
    m = re.search('cmdln.net_[0-9]{4}-[0-9]{2}-[0-9]{2}', enclosures[0].href)
    url = '%s/%s.%s' % (base_url, m.group(), suffix)
    usock = urllib2.urlopen(url)
    # Google listen won't play 'application/ogg' and that mime type is currently
    # returned by archive.org for Ogg Vorbis files
    if 'ogg' == suffix:
        mime_type = 'audio/ogg'
    else:
        mime_type = usock.info().type
    size =  usock.info().get('Content-Length')
    if size is None:
@ -144,6 +213,11 @@ def __enclosure(enclosures, base_url, suffix):
 def __archive_slug(title):
    """ 
        The Internet Archive transforms the title field of new entries in a
        specific way.  This is my reverse engineering of their algorithm based
        on their description and empirical data from dozens of uploads.
    """
    slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', title)
    slug = re.sub('[^A-Za-z0-9\-]', ' ', slug)
    slug = re.sub(' {2,}', ' ', slug)
@ -153,9 +227,11 @@ def __archive_slug(title):
    return slug
-def main():
+def __main():
    logging.basicConfig(level=logging.INFO,
            format='%(message)s')
    # pulls the category feed from the web site which will have just the most recent episodes
    # along with all the iTunes jiggery-pokery PodPress performs
    feed = __fetch_feed('http://thecommandline.net/category/podcast/feed/')
    if feed is None:
        logging.error('Failed to fetch feed.')
@ -164,8 +240,9 @@ def main():
    __append(feed, 'mp3', __append_non_itunes)
    __append(feed, 'ogg', __append_non_itunes)
    __append(feed, 'm4a', __append_itunes)
    # TODO add flac
 if __name__ == "__main__":
-    main()
+    __main()