From 9cd50d1bdff631d03e295587c118ca593ce7f7f5 Mon Sep 17 00:00:00 2001 From: cmdln Date: Sat, 25 Sep 2010 17:34:05 -0400 Subject: [PATCH] New script to find and re-link enclosures to files hosted at the Archive. --- relink.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100755 relink.py diff --git a/relink.py b/relink.py new file mode 100755 index 0000000..2483ea6 --- /dev/null +++ b/relink.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +from BeautifulSoup import BeautifulStoneSoup +import re +import urllib2 +from urllib2 import HTTPError, URLError +import os.path +import logging + + + +def __repoint(): + logging.basicConfig(level=logging.INFO, + format='%(message)s') + f = open("cmdln_m4a.xml") + try: + soup = BeautifulStoneSoup(f) + enclosures = soup.findAll('enclosure') + for enclosure in enclosures: + url = enclosure['url'] + if url.find('archive.org') != -1: + continue + title = enclosure.findPreviousSibling('title') + rewritten = 'http://www.archive.org/download/%s/%s' % (__archive_slug(title.string), + os.path.basename(url)) + (mime_type, length) = __url_info(rewritten) + if mime_type is None: + print 'Could not find media, %s.' % rewritten + break + enclosure['url'] = rewritten + enclosure['type'] = mime_type + enclosure['length'] = length + print soup + finally: + f.close() + + +def __archive_slug(title): + paren = title.find('(') + slug = title[:paren -1] + slug = re.sub('\([^0-9]\)-\([^0-9]\)', '\1\2', slug) + slug = re.sub('[^A-Za-z0-9\-]', ' ', slug) + slug = re.sub(' {2,}', ' ', slug) + tokens = slug.split(' ') + tokens = [t.capitalize() for t in tokens] + slug = ''.join(tokens) + return slug + +def __url_info(url): + try: + usock = urllib2.urlopen(url) + if usock.info() is None: + return (None, None) + return (usock.info().type, usock.info().get('Content-Length')) + except HTTPError, e: + logging.error('Failed with HTTP status code %d' % e.code) + return (None, None) + except URLError, e: + logging.error('Failed to connect with network.') + logging.debug('Network failure reason, %s.' % e.reason) + return (None, None) + +if __name__ == "__main__": + __repoint()