import sys import feedparser import urllib2 from urllib2 import HTTPError, URLError import logging import re from BeautifulSoup import BeautifulSoup def __fetch_feed(url): try: feed = feedparser.parse(url) return feed.entries[0] except HTTPError, e: logging.error('Failed with HTTP status code %d' % e.code) return None except URLError, e: logging.error('Failed to connect with network.') logging.debug('Network failure reason, %s.' % e.reason) return None def __append(entry, suffix, append_fn, args=None): latest = __fetch_feed('cmdln_%s.xml' % suffix) if entry.title.find(latest.title) != -1: logging.info('Up to date.') return f = open('cmdln_%s.xml' % suffix) o = open('cmdln_%s_out.xml' % suffix, 'w') first = False try: for line in f: if line.find('') != -1 and not first: append_fn(entry, o, suffix, args) first = True o.write(line) finally: f.close() def __append_non_itunes(entry, output, suffix, args): (url, mime_type, size) = __enclosure(entry.enclosures, 'http://cmdln.evenflow.nl/mp3', suffix) output.write(""" %(title)s (Comment Line 240-949-2638) %(link)s %(pubDate)s %(permalink)s """ % { 'title': entry.title, 'link': entry.link, 'description': __description(entry.content), 'pubDate' : entry.date, 'permalink' : __permalink(entry.title), 'url' : url, 'mime_type' : mime_type, 'size' : size }) logging.info('Inserted new %s item.' % suffix) def __append_itunes(entry, output, suffix, args): description = __description(entry.content) soup = BeautifulSoup(description) summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')]) (url, mime_type, size) = __enclosure(entry.enclosures, 'http://traffic.libsyn.com/cmdln', suffix) output.write(""" %(title)s (Comment Line 240-949-2638) %(link)s %(pubDate)s %(permalink)s Thomas Gideon %(subtitle)s %(summary)s no %(duration)s """ % { 'title': entry.title, 'link': entry.link, 'description': description, 'pubDate' : entry.date, 'permalink' : __permalink(entry.title), 'url' : url, 'mime_type' : mime_type, 'size' : size, 'subtitle' : ''.join(soup.contents[0].findAll(text = True)), 'summary' : summary, 'duration' : args[1] }) logging.info('Inserted new %s item.' % suffix) def __permalink(title): permalink = title.lower() permalink = re.sub('-', '', permalink) permalink = re.sub('[^a-z0-9]', '-', permalink) permalink = re.sub('-{2,}', '-', permalink) if len(permalink) > 48: permalink = permalink[:48] return permalink def __description(content): description = content[0].value description = re.sub('

\n', '', description) description = re.sub(re.compile('License.

.*$', re.M | re.S), 'License.

', description) description = re.sub('

\n', '

\n\n', description) return re.sub('

View the More news, commentary, and alternate feeds available at http://thecommandline.net/. View the