#!/usr/bin/python
import sys
import feedparser
import urllib2
from urllib2 import HTTPError, URLError
import logging
import re
from BeautifulSoup import BeautifulSoup
import shutil
import time
import datetime
def __fetch_feed(url):
try:
return feedparser.parse(url)
except HTTPError, e:
logging.error('Failed with HTTP status code %d' % e.code)
return None
except URLError, e:
logging.error('Failed to connect with network.')
logging.debug('Network failure reason, %s.' % e.reason)
return None
def __append(feed, suffix, append_fn):
latest = __fetch_feed('cmdln_%s.xml' % suffix).entries[0]
entry = feed.entries[0]
if latest.title.find(entry.title) != -1:
logging.info('%s is up to date.' % suffix)
return
base_url = 'http://www.archive.org/download/%s' % __archive_slug(entry.title)
filename = 'cmdln_%s.xml' % suffix
today = datetime.date.today()
backup = '%s.%s' % (filename, today.strftime('%Y-%m-%d'))
shutil.copy(filename, backup)
f = open(backup)
o = open(filename, 'w')
firstItem = False
try:
updated = time.strftime('%a, %d %b %Y %X +0000', feed.updated)
for line in f:
if line.find('- ') != -1 and not firstItem:
append_fn(entry, o, suffix, base_url)
firstItem = True
if line.startswith(' '):
line = ' %s\n' % updated
if line.startswith(' '):
line = ' %s\n' % updated
o.write(line)
finally:
f.close()
o.close()
def __append_non_itunes(entry, output, suffix, base_url):
(url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
output.write("""
-
%(title)s (Comment Line 240-949-2638)
%(link)s
%(pubDate)s
%(permalink)s
""" % { 'title': entry.title,
'link': entry.link,
'description': __description(entry.content),
'pubDate' : entry.date,
'permalink' : __permalink(entry.title),
'url' : url,
'mime_type' : mime_type,
'size' : size })
logging.info('Inserted new %s item.' % suffix)
def __append_itunes(entry, output, suffix, base_url):
description = __description(entry.content)
soup = BeautifulSoup(description)
summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
(url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
if size == 0:
raise Exception('Couldn not find media, %s.' % base_url)
output.write(""" -
%(title)s (Comment Line 240-949-2638)
%(link)s
%(pubDate)s
%(permalink)s
Thomas Gideon
%(subtitle)s
%(summary)s
no
%(duration)s
""" % { 'title': entry.title,
'link': entry.link,
'description': description,
'pubDate' : entry.date,
'permalink' : __permalink(entry.title),
'url' : url,
'mime_type' : mime_type,
'size' : size,
'subtitle' : ''.join(soup.contents[0].findAll(text = True)),
'summary' : summary,
'duration' : entry.itunes_duration })
logging.info('Inserted new %s item.' % suffix)
def __permalink(title):
permalink = title.lower()
permalink = re.sub('-', '', permalink)
permalink = re.sub('[^a-z0-9]', '-', permalink)
permalink = re.sub('-{2,}', '-', permalink)
if len(permalink) > 48:
permalink = permalink[:48]
return permalink
def __description(content):
description = content[0].value
description = re.sub('\n', '', description)
description = re.sub(re.compile('License.
.*$', re.M | re.S), 'License.', description)
description = re.sub('\n', '\n\n', description)
return re.sub('View the More news, commentary, and alternate feeds available at http://thecommandline.net/. View the