From 5f7d718d688f89c0b66eea5f34f94fa78ab737d4 Mon Sep 17 00:00:00 2001
From: Thomas Gideon <cmdln@thecommandline.net>
Date: Wed, 29 Sep 2010 12:32:16 -0400
Subject: [PATCH] Added comments.

---
 append.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 7 deletions(-)
diff --git a/append.py b/append.py
index 3c0a09c..7895203 100755
--- a/append.py
+++ b/append.py
@@ -1,4 +1,34 @@
 #!/usr/bin/python
+#
+# append.py - Compares the podcast category feed for the podcast's web site
+# with format specific feeds, adding the newest episode if missing.
+#
+# All project files are made available under the following, BSD-new license.
+# 
+# Copyright (c) 2010, Thomas Gideon
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of Thomas Gideon nor the
+#       names of additional contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import sys
 import feedparser
 import urllib2
@@ -13,6 +43,7 @@ import datetime
 
 
 def __fetch_feed(url):
+    """ Pull the feed and parse it, logging any errors. """
     try:
         return feedparser.parse(url)
     except HTTPError, e:
@@ -25,6 +56,11 @@ def __fetch_feed(url):
 
 
 def __append(feed, suffix, append_fn):
+    """
+        For the given main site feed, load the appropriate media specific feed
+        and compare.  If the latest episode isn't in the media specific feed,
+        insert it making the necessary adjustments to the new episode's entry.
+    """
     latest = __fetch_feed('cmdln_%s.xml' % suffix).entries[0]
     entry = feed.entries[0]
     if latest.title.find(entry.title) != -1:
@@ -56,11 +92,12 @@ def __append(feed, suffix, append_fn):
 
 
 def __append_non_itunes(entry, output, suffix, base_url):
+    """ 
+        For most of the feeds, new episodes are simple stanzas and the
+        adjustments consist mostly of copying what is in the mean site feed's
+        entry and just re-writing the enclosure to the appropriate media file.
+    """
     (url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
-    # Google listen won't play 'application/ogg' and that mime type is currently
-    # returned by archive.org for Ogg Vorbis files
-    if 'ogg' == suffix:
-        mime_type = 'audio/ogg'
     output.write("""        <item>
             <title>%(title)s (Comment Line 240-949-2638)</title>
             <link>%(link)s</link>
@@ -81,6 +118,13 @@ def __append_non_itunes(entry, output, suffix, base_url):
 
 
 def __append_itunes(entry, output, suffix, base_url):
+    """
+        For the iTunes/AAC feed, there are some additional elements that make
+        use of the Apple extensions to RSS.  Some of these, like the duration,
+        author and subtitle, can be copied as is.  The description and summary
+        produced by PodPress is less than desirable so those get munged to
+        something more suitable before writing into the iTunes feed.
+    """
     description = __description(entry.content)
     soup = BeautifulSoup(description)
     summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
@@ -115,6 +159,13 @@ def __append_itunes(entry, output, suffix, base_url):
 
 
 def __permalink(title):
+    """ 
+        PodPress uses the opaque permalink from WordPress, basically just a
+        base url with a minimal query string containing the post's internal ID.
+        The OS X app used to maintain these feeds previously, Feeder, munged
+        the title into a nice, readable slug.  This function reproduces what
+        Feed does to populate the permalink element in the feed entry.
+    """
     permalink = title.lower()
     permalink = re.sub('-', '', permalink)
     permalink = re.sub('[^a-z0-9]', '-', permalink)
@@ -125,6 +176,15 @@ def __permalink(title):
 
 
 def __description(content):
+    """ 
+        This function strips out parts of the description used in the main site
+        feed that are less appropriate for the media specific feeds.  PodPress
+        leaves a blank paragraph where its Flash player renders.  The main
+        site's episodes have some extra verbiage after the license image and
+        links, namely the sharing and relate posts plugin output.  A simple,
+        bare link is added to the last paragraph for the benefit of aggregators
+        that may strip out HTML.
+    """
     description = content[0].value
     description = re.sub('<p></p>\n', '', description)
     description = re.sub(re.compile('License</a>.</p>.*$', re.M | re.S), 'License</a>.</p>', description)
@@ -133,10 +193,19 @@ def __description(content):
 
 
 def __enclosure(enclosures, base_url, suffix):
+    """ 
+        Uses the file name from the main site's enclosure plus the base_url to
+        pull together values to re-write the attributes for the correct media.
+    """
     m = re.search('cmdln.net_[0-9]{4}-[0-9]{2}-[0-9]{2}', enclosures[0].href)
     url = '%s/%s.%s' % (base_url, m.group(), suffix)
     usock = urllib2.urlopen(url)
-    mime_type = usock.info().type
+    # Google listen won't play 'application/ogg' and that mime type is currently
+    # returned by archive.org for Ogg Vorbis files
+    if 'ogg' == suffix:
+        mime_type = 'audio/ogg'
+    else:
+        mime_type = usock.info().type
     size =  usock.info().get('Content-Length')
     if size is None:
         size = 0
@@ -144,6 +213,11 @@ def __enclosure(enclosures, base_url, suffix):
 
 
 def __archive_slug(title):
+    """ 
+        The Internet Archive transforms the title field of new entries in a
+        specific way.  This is my reverse engineering of their algorithm based
+        on their description and empirical data from dozens of uploads.
+    """
     slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', title)
     slug = re.sub('[^A-Za-z0-9\-]', ' ', slug)
     slug = re.sub(' {2,}', ' ', slug)
@@ -153,9 +227,11 @@ def __archive_slug(title):
     return slug
 
 
-def main():
+def __main():
     logging.basicConfig(level=logging.INFO,
             format='%(message)s')
+    # pulls the category feed from the web site which will have just the most recent episodes
+    # along with all the iTunes jiggery-pokery PodPress performs
     feed = __fetch_feed('http://thecommandline.net/category/podcast/feed/')
     if feed is None:
         logging.error('Failed to fetch feed.')
@@ -164,8 +240,9 @@ def main():
     __append(feed, 'mp3', __append_non_itunes)
     __append(feed, 'ogg', __append_non_itunes)
     __append(feed, 'm4a', __append_itunes)
+    # TODO add flac
 
 
 if __name__ == "__main__":
     
-    main()
+    __main()