autocast/relink.py

71 lines
2.1 KiB
Python
Executable file

#!/usr/bin/python
from BeautifulSoup import BeautifulStoneSoup
import re
import urllib2
from urllib2 import HTTPError, URLError
import os.path
import logging
import shutil
import datetime
def __repoint():
logging.basicConfig(level=logging.INFO,
format='%(message)s')
today = datetime.date.today()
filename = 'cmdln_m4a.xml'
backup = '%s.%s' % (filename, today.strftime('%Y-%m-%d'))
shutil.copy(filename, backup)
f = open(backup)
o = open(filename, 'w')
try:
soup = BeautifulStoneSoup(f)
enclosures = soup.findAll('enclosure')
for enclosure in enclosures:
url = enclosure['url']
if url.find('archive.org') != -1:
continue
title = enclosure.findPreviousSibling('title')
rewritten = 'http://www.archive.org/download/%s/%s' % (__archive_slug(title.string),
os.path.basename(url))
(mime_type, length) = __url_info(rewritten)
if mime_type is None:
print 'Could not find media, %s.' % rewritten
break
enclosure['url'] = rewritten
enclosure['type'] = mime_type
enclosure['length'] = length
o.write(str(soup))
finally:
f.close()
o.close()
def __archive_slug(title):
paren = title.find('(')
slug = title[:paren -1]
slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', slug)
slug = re.sub('[^A-Za-z0-9\-]', ' ', slug)
slug = re.sub(' {2,}', ' ', slug)
tokens = slug.split(' ')
tokens = [t.capitalize() for t in tokens]
slug = ''.join(tokens)
return slug
def __url_info(url):
try:
usock = urllib2.urlopen(url)
if usock.info() is None:
return (None, None)
return (usock.info().type, usock.info().get('Content-Length'))
except HTTPError, e:
logging.error('Failed with HTTP status code %d' % e.code)
return (None, None)
except URLError, e:
logging.error('Failed to connect with network.')
logging.debug('Network failure reason, %s.' % e.reason)
return (None, None)
if __name__ == "__main__":
__repoint()