Changing feed append code to use an external configuration file.
This commit is contained in:
parent
cf706d7ece
commit
6fde85012d
4 changed files with 50 additions and 24 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -12,3 +12,4 @@
|
||||||
*.m4a
|
*.m4a
|
||||||
*.chapters.txt
|
*.chapters.txt
|
||||||
*.conf
|
*.conf
|
||||||
|
*.feed
|
||||||
|
|
1
README
1
README
|
@ -9,3 +9,4 @@ with_offset.xsl - Transform that handles the recursive structure of OmniOutliner
|
||||||
without_offset.xsl - Transform that handles the recursive structure of OmniOutliner files better than Beautiful Soup does, works with segment notes that do not have time offsets.
|
without_offset.xsl - Transform that handles the recursive structure of OmniOutliner files better than Beautiful Soup does, works with segment notes that do not have time offsets.
|
||||||
outline.bash - Drives the XSLT operation and subsequent scripting tasks that cannot be handled in XSL.
|
outline.bash - Drives the XSLT operation and subsequent scripting tasks that cannot be handled in XSL.
|
||||||
cleanup.bash - Remove output files produced from encode.bash
|
cleanup.bash - Remove output files produced from encode.bash
|
||||||
|
restore.bash - Restore backup of feed files made today on top of current feed files.
|
||||||
|
|
60
append.py
60
append.py
|
@ -53,13 +53,14 @@ def __fetch_feed(url):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def __append(feed, suffix, append_fn):
|
def __append(config, feed, suffix, append_fn):
|
||||||
"""
|
"""
|
||||||
For the given main site feed, load the appropriate media specific feed
|
For the given main site feed, load the appropriate media specific feed
|
||||||
and compare. If the latest episode isn't in the media specific feed,
|
and compare. If the latest episode isn't in the media specific feed,
|
||||||
insert it making the necessary adjustments to the new episode's entry.
|
insert it making the necessary adjustments to the new episode's entry.
|
||||||
"""
|
"""
|
||||||
latest = __fetch_feed('cmdln_%s.xml' % suffix).entries[0]
|
local_file = '%s%s.xml' % (config['file_prefix'], suffix)
|
||||||
|
latest = __fetch_feed(local_file).entries[0]
|
||||||
entry = feed.entries[0]
|
entry = feed.entries[0]
|
||||||
if latest.title.find(entry.title) != -1:
|
if latest.title.find(entry.title) != -1:
|
||||||
logging.info('%s is up to date.' % suffix)
|
logging.info('%s is up to date.' % suffix)
|
||||||
|
@ -67,7 +68,7 @@ def __append(feed, suffix, append_fn):
|
||||||
|
|
||||||
entry.title.encode('ascii')
|
entry.title.encode('ascii')
|
||||||
base_url = 'http://www.archive.org/download/%s' % __archive_slug(entry.title)
|
base_url = 'http://www.archive.org/download/%s' % __archive_slug(entry.title)
|
||||||
filename = 'cmdln_%s.xml' % suffix
|
filename = '%s%s.xml' % (config['file_prefix'], suffix)
|
||||||
today = datetime.date.today()
|
today = datetime.date.today()
|
||||||
backup = '%s.%s' % (filename, today.strftime('%Y-%m-%d'))
|
backup = '%s.%s' % (filename, today.strftime('%Y-%m-%d'))
|
||||||
shutil.copy(filename, backup)
|
shutil.copy(filename, backup)
|
||||||
|
@ -78,7 +79,7 @@ def __append(feed, suffix, append_fn):
|
||||||
updated = time.strftime('%a, %d %b %Y %X +0000', feed.updated)
|
updated = time.strftime('%a, %d %b %Y %X +0000', feed.updated)
|
||||||
for line in f:
|
for line in f:
|
||||||
if line.find('<item>') != -1 and not firstItem:
|
if line.find('<item>') != -1 and not firstItem:
|
||||||
append_fn(entry, o, suffix, base_url)
|
append_fn(config, entry, o, suffix, base_url)
|
||||||
firstItem = True
|
firstItem = True
|
||||||
if line.startswith(' <pubDate>'):
|
if line.startswith(' <pubDate>'):
|
||||||
line = ' <pubDate>%s</pubDate>\n' % updated
|
line = ' <pubDate>%s</pubDate>\n' % updated
|
||||||
|
@ -90,15 +91,15 @@ def __append(feed, suffix, append_fn):
|
||||||
o.close()
|
o.close()
|
||||||
|
|
||||||
|
|
||||||
def __append_non_itunes(entry, output, suffix, base_url):
|
def __append_non_itunes(config, entry, output, suffix, base_url):
|
||||||
"""
|
"""
|
||||||
For most of the feeds, new episodes are simple stanzas and the
|
For most of the feeds, new episodes are simple stanzas and the
|
||||||
adjustments consist mostly of copying what is in the mean site feed's
|
adjustments consist mostly of copying what is in the mean site feed's
|
||||||
entry and just re-writing the enclosure to the appropriate media file.
|
entry and just re-writing the enclosure to the appropriate media file.
|
||||||
"""
|
"""
|
||||||
(url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
|
(url, mime_type, size) = __enclosure(config, entry.enclosures, base_url, suffix)
|
||||||
output.write(""" <item>
|
output.write(""" <item>
|
||||||
<title>%(title)s (Comment Line 240-949-2638)</title>
|
<title>%(title)s%(title_suffix)s</title>
|
||||||
<link>%(link)s</link>
|
<link>%(link)s</link>
|
||||||
<description><![CDATA[%(description)s]]></description>
|
<description><![CDATA[%(description)s]]></description>
|
||||||
<pubDate>%(pubDate)s</pubDate>
|
<pubDate>%(pubDate)s</pubDate>
|
||||||
|
@ -107,16 +108,17 @@ def __append_non_itunes(entry, output, suffix, base_url):
|
||||||
</item>
|
</item>
|
||||||
""" % { 'title': entry.title,
|
""" % { 'title': entry.title,
|
||||||
'link': entry.link,
|
'link': entry.link,
|
||||||
'description': __description(entry.content),
|
'description': __description(config, entry.content),
|
||||||
'pubDate' : entry.date,
|
'pubDate' : entry.date,
|
||||||
'permalink' : __permalink(entry.title),
|
'permalink' : __permalink(entry.title),
|
||||||
'url' : url,
|
'url' : url,
|
||||||
'mime_type' : mime_type,
|
'mime_type' : mime_type,
|
||||||
'size' : size })
|
'size' : size,
|
||||||
|
'title_suffix': config['title_suffix'] })
|
||||||
logging.info('Inserted new %s item.' % suffix)
|
logging.info('Inserted new %s item.' % suffix)
|
||||||
|
|
||||||
|
|
||||||
def __append_itunes(entry, output, suffix, base_url):
|
def __append_itunes(config, entry, output, suffix, base_url):
|
||||||
"""
|
"""
|
||||||
For the iTunes/AAC feed, there are some additional elements that make
|
For the iTunes/AAC feed, there are some additional elements that make
|
||||||
use of the Apple extensions to RSS. Some of these, like the duration,
|
use of the Apple extensions to RSS. Some of these, like the duration,
|
||||||
|
@ -124,20 +126,20 @@ def __append_itunes(entry, output, suffix, base_url):
|
||||||
produced by PodPress is less than desirable so those get munged to
|
produced by PodPress is less than desirable so those get munged to
|
||||||
something more suitable before writing into the iTunes feed.
|
something more suitable before writing into the iTunes feed.
|
||||||
"""
|
"""
|
||||||
description = __description(entry.content)
|
description = __description(config, entry.content)
|
||||||
soup = BeautifulSoup(description)
|
soup = BeautifulSoup(description)
|
||||||
summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
|
summary = '\n\n'.join([''.join(p.findAll(text=True)) for p in soup.findAll('p')])
|
||||||
(url, mime_type, size) = __enclosure(entry.enclosures, base_url, suffix)
|
(url, mime_type, size) = __enclosure(config, entry.enclosures, base_url, suffix)
|
||||||
if size == 0:
|
if size == 0:
|
||||||
raise Exception('Couldn not find media, %s.' % base_url)
|
raise Exception('Couldn not find media, %s.' % base_url)
|
||||||
output.write(""" <item>
|
output.write(""" <item>
|
||||||
<title>%(title)s (Comment Line 240-949-2638)</title>
|
<title>%(title)s%(title_suffix)s</title>
|
||||||
<link>%(link)s</link>
|
<link>%(link)s</link>
|
||||||
<description><![CDATA[%(description)s]]></description>
|
<description><![CDATA[%(description)s]]></description>
|
||||||
<pubDate>%(pubDate)s</pubDate>
|
<pubDate>%(pubDate)s</pubDate>
|
||||||
<enclosure url="%(url)s" length="%(size)s" type="%(mime_type)s"/>
|
<enclosure url="%(url)s" length="%(size)s" type="%(mime_type)s"/>
|
||||||
<guid isPermaLink="false">%(permalink)s</guid>
|
<guid isPermaLink="false">%(permalink)s</guid>
|
||||||
<itunes:author>Thomas Gideon</itunes:author>
|
<itunes:author>%(author)s</itunes:author>
|
||||||
<itunes:subtitle>%(subtitle)s</itunes:subtitle>
|
<itunes:subtitle>%(subtitle)s</itunes:subtitle>
|
||||||
<itunes:summary>%(summary)s</itunes:summary>
|
<itunes:summary>%(summary)s</itunes:summary>
|
||||||
<itunes:explicit>no</itunes:explicit>
|
<itunes:explicit>no</itunes:explicit>
|
||||||
|
@ -153,7 +155,9 @@ def __append_itunes(entry, output, suffix, base_url):
|
||||||
'size' : size,
|
'size' : size,
|
||||||
'subtitle' : ''.join(soup.contents[0].findAll(text = True)),
|
'subtitle' : ''.join(soup.contents[0].findAll(text = True)),
|
||||||
'summary' : summary,
|
'summary' : summary,
|
||||||
'duration' : entry.itunes_duration })
|
'duration' : entry.itunes_duration,
|
||||||
|
'title_suffix': config['title_suffix'],
|
||||||
|
'author': config['author'] })
|
||||||
logging.info('Inserted new %s item.' % suffix)
|
logging.info('Inserted new %s item.' % suffix)
|
||||||
|
|
||||||
|
|
||||||
|
@ -174,7 +178,7 @@ def __permalink(title):
|
||||||
return permalink
|
return permalink
|
||||||
|
|
||||||
|
|
||||||
def __description(content):
|
def __description(config, content):
|
||||||
"""
|
"""
|
||||||
This function strips out parts of the description used in the main site
|
This function strips out parts of the description used in the main site
|
||||||
feed that are less appropriate for the media specific feeds. PodPress
|
feed that are less appropriate for the media specific feeds. PodPress
|
||||||
|
@ -191,12 +195,12 @@ def __description(content):
|
||||||
return re.sub('<p>View the <a', '<p>More news, commentary, and alternate feeds available at http://thecommandline.net/. View the <a', description)
|
return re.sub('<p>View the <a', '<p>More news, commentary, and alternate feeds available at http://thecommandline.net/. View the <a', description)
|
||||||
|
|
||||||
|
|
||||||
def __enclosure(enclosures, base_url, suffix):
|
def __enclosure(config, enclosures, base_url, suffix):
|
||||||
"""
|
"""
|
||||||
Uses the file name from the main site's enclosure plus the base_url to
|
Uses the file name from the main site's enclosure plus the base_url to
|
||||||
pull together values to re-write the attributes for the correct media.
|
pull together values to re-write the attributes for the correct media.
|
||||||
"""
|
"""
|
||||||
m = re.search('cmdln.net_[0-9]{4}-[0-9]{2}-[0-9]{2}', enclosures[0].href)
|
m = re.search('%s[0-9]{4}-[0-9]{2}-[0-9]{2}' % config['enclosure_prefix'], enclosures[0].href)
|
||||||
url = '%s/%s.%s' % (base_url, m.group(), suffix)
|
url = '%s/%s.%s' % (base_url, m.group(), suffix)
|
||||||
usock = urllib2.urlopen(url)
|
usock = urllib2.urlopen(url)
|
||||||
# Google listen won't play 'application/ogg' and that mime type is currently
|
# Google listen won't play 'application/ogg' and that mime type is currently
|
||||||
|
@ -226,22 +230,30 @@ def __archive_slug(title):
|
||||||
return slug
|
return slug
|
||||||
|
|
||||||
|
|
||||||
def __main():
|
def __main(feed_file):
|
||||||
logging.basicConfig(level=logging.INFO,
|
logging.basicConfig(level=logging.INFO,
|
||||||
format='%(message)s')
|
format='%(message)s')
|
||||||
|
f = open(feed_file)
|
||||||
|
config = dict()
|
||||||
|
try:
|
||||||
|
for line in f:
|
||||||
|
(name, value) = line.split('=')
|
||||||
|
config[name] = value.rstrip()
|
||||||
|
finally:
|
||||||
|
f.close()
|
||||||
# pulls the category feed from the web site which will have just the most recent episodes
|
# pulls the category feed from the web site which will have just the most recent episodes
|
||||||
# along with all the iTunes jiggery-pokery PodPress performs
|
# along with all the iTunes jiggery-pokery PodPress performs
|
||||||
feed = __fetch_feed('http://thecommandline.net/category/podcast/feed/')
|
feed = __fetch_feed(config['url'])
|
||||||
if feed is None:
|
if feed is None:
|
||||||
logging.error('Failed to fetch feed.')
|
logging.error('Failed to fetch feed.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
__append(feed, 'mp3', __append_non_itunes)
|
__append(config, feed, 'mp3', __append_non_itunes)
|
||||||
__append(feed, 'ogg', __append_non_itunes)
|
__append(config, feed, 'ogg', __append_non_itunes)
|
||||||
__append(feed, 'm4a', __append_itunes)
|
__append(config, feed, 'm4a', __append_itunes)
|
||||||
# TODO add flac
|
# TODO add flac
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
__main()
|
__main(sys.argv[1])
|
||||||
|
|
12
restore.bash
Executable file
12
restore.bash
Executable file
|
@ -0,0 +1,12 @@
|
||||||
|
#!/bin/bash
|
||||||
|
function restore {
|
||||||
|
backup=${1}.$(date +%Y-%m-%d)
|
||||||
|
if [ -f "$backup" ]
|
||||||
|
then
|
||||||
|
echo "Restoring $backup"
|
||||||
|
mv ${1}.$(date +%Y-%m-%d) ${1}
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
restore cmdln_mp3.xml
|
||||||
|
restore cmdln_m4a.xml
|
||||||
|
restore cmdln_ogg.xml
|
Loading…
Reference in a new issue