#!/usr/bin/env python import urllib2,urlparse from urllib2 import HTTPError,URLError from BeautifulSoup import BeautifulSoup import os import optparse __usage__=''' tormon.py -O ~/test/tormon -u "http://rss.feed" ''' class Main(object): ''' tormon checks an rss feed for new torrents. When it finds a new .torrent, to downloads it to a specified output directory, where (presumably) a monitoring torrent program will download the corresponding file. ''' def parse_options(self): usage = 'usage: %prog [options]'+__usage__ parser = optparse.OptionParser(usage=usage) parser.add_option( '-O', '--output_dir', dest='output_dir', help='directory into which new torrents are saved', metavar='DIR') parser.add_option( '-f', '--filetype', dest='filetype', action='append', default=[], help='admissible file types', metavar='TYPE') parser.add_option( '-d', '--downloaded_torrents', dest='downloaded_torrents', default=os.path.expanduser('~/.downloaded_torrents'), help='log of already downloaded torrents', metavar='FILE') parser.add_option( '-e', '--error_log', dest='error_log', help='log of torrents tormon failed to download', metavar='FILE') parser.add_option( '-b', '--batch', dest='batch', help='file containing list of rss feed urls', metavar='FILE') parser.add_option( '-u', '--url', dest='url', action='append', default=[], help='url of the rss feed', metavar='URL') parser.add_option( '-m','--mark_all_downloaded', dest='mark_all_downloaded', action='store_true', default=False, help="mark all torrents as already downloaded") parser.add_option( '-M','--match_by_filename', dest='match_by_filename', action='store_true', default=False, help="recognize downloaded files by filename, not URL. Matching by URL is the default.") (self.opt, args) = parser.parse_args() if self.opt.batch: for line in open(self.opt.batch,'r'): line=line.strip() if line and not line.startswith('#'): self.opt.url.append(line) if not self.opt.output_dir: self.opt.output_dir=os.path.expanduser('~/Desktop') if not self.opt.filetype: self.opt.filetype=['.torrent'] if not self.opt.error_log: self.opt.error_log=self.opt.downloaded_torrents+'.errors' try: os.makedirs(self.opt.output_dir) except OSError: if not os.path.exists(self.opt.output_dir): print('tormon failed to create directory %s'%self.opt.output_dir) exit(1) def load_list_of_already_downloaded_torrents(self): try: self.downloaded=open(self.opt.downloaded_torrents,'r').read().split() except IOError: self.downloaded=[] try: self.errors=open(self.opt.error_log,'r').read().split() except IOError: self.errors=[] def update_downloaded(self,url): self.downloaded.append(url) try: self.errors.remove(url) except ValueError: pass def download_torrent(self,url): try: sock=urllib2.urlopen(url) except (HTTPError, URLError): # print('tormon failed to download %s'%url) if url not in self.errors: self.errors.append(url) else: filename=self.url2filename(url) target_file=os.path.join(self.opt.output_dir,filename) print('Downloading %s'%target_file) content=sock.read() sock.close() fh=open(target_file,'w') fh.write(content) fh.close() self.update_downloaded(url) def url2filename(self,url): return os.path.basename(urlparse.urlparse(url)[2]) def has_been_downloaded(self,url): if self.opt.match_by_filename: filename=self.url2filename(url) return (filename in [self.url2filename(link) for link in self.downloaded]) else: return (url in self.downloaded) def parse_rss_feed(self): for url in self.opt.url: print('RSS feed: %s'%url) try: sock=urllib2.urlopen(url) except (HTTPError, URLError): print('tormon failed to download %s'%url) else: content=sock.read() sock.close() soup=BeautifulSoup(content) links=([link.nextSibling for link in soup.findAll('link')]+ [link['href'] for link in soup.findAll('a')]+ [link['url'] for link in soup.findAll('media:content')]) for link in links: if (any([link.lower().endswith(ending) for ending in self.opt.filetype]) and not self.has_been_downloaded(link)): if self.opt.mark_all_downloaded: print('Marking %s as downloaded'%link) self.update_downloaded(link) else: self.download_torrent(link) def save_list_of_already_downloaded_torrents(self): fh=open(self.opt.downloaded_torrents, 'w') fh.write('\n'.join(self.downloaded)) fh.close() fh=open(self.opt.error_log, 'w') fh.write('\n'.join(self.errors)) fh.close() def __init__(self): self.parse_options() self.load_list_of_already_downloaded_torrents() try: self.parse_rss_feed() except KeyboardInterrupt: pass finally: self.save_list_of_already_downloaded_torrents() if __name__=='__main__': Main()