X-Git-Url: https://git.rmz.io/dotfiles.git/blobdiff_plain/4b06eb26103a8f0840915532fdf411ef82e9ad24..89cf694280e02442fb435d5fe516ac09783d776a:/bin/tormon diff --git a/bin/tormon b/bin/tormon new file mode 100755 index 0000000..775bfb5 --- /dev/null +++ b/bin/tormon @@ -0,0 +1,159 @@ +#!/usr/bin/env python + +import urllib2,urlparse +from urllib2 import HTTPError,URLError +from BeautifulSoup import BeautifulSoup +import os +import optparse + +__usage__=''' +tormon.py -O ~/test/tormon -u "http://rss.feed" +''' + +class Main(object): + ''' + tormon checks an rss feed for new torrents. When it finds a new .torrent, to + downloads it to a specified output directory, where (presumably) a monitoring + torrent program will download the corresponding file. + ''' + def parse_options(self): + usage = 'usage: %prog [options]'+__usage__ + parser = optparse.OptionParser(usage=usage) + parser.add_option( + '-O', '--output_dir', dest='output_dir', + help='directory into which new torrents are saved', + metavar='DIR') + parser.add_option( + '-f', '--filetype', dest='filetype', + action='append', + default=[], + help='admissible file types', + metavar='TYPE') + parser.add_option( + '-d', '--downloaded_torrents', dest='downloaded_torrents', + default=os.path.expanduser('~/.downloaded_torrents'), + help='log of already downloaded torrents', + metavar='FILE') + parser.add_option( + '-e', '--error_log', dest='error_log', + help='log of torrents tormon failed to download', + metavar='FILE') + parser.add_option( + '-b', '--batch', dest='batch', + help='file containing list of rss feed urls', + metavar='FILE') + parser.add_option( + '-u', '--url', dest='url', + action='append', + default=[], + help='url of the rss feed', + metavar='URL') + parser.add_option( + '-m','--mark_all_downloaded', dest='mark_all_downloaded', + action='store_true', + default=False, + help="mark all torrents as already downloaded") + parser.add_option( + '-M','--match_by_filename', dest='match_by_filename', + action='store_true', + default=False, + help="recognize downloaded files by filename, not URL. Matching by URL is the default.") + (self.opt, args) = parser.parse_args() + if self.opt.batch: + for line in open(self.opt.batch,'r'): + line=line.strip() + if line and not line.startswith('#'): + self.opt.url.append(line) + if not self.opt.output_dir: + self.opt.output_dir=os.path.expanduser('~/Desktop') + if not self.opt.filetype: + self.opt.filetype=['.torrent'] + if not self.opt.error_log: + self.opt.error_log=self.opt.downloaded_torrents+'.errors' + try: + os.makedirs(self.opt.output_dir) + except OSError: + if not os.path.exists(self.opt.output_dir): + print('tormon failed to create directory %s'%self.opt.output_dir) + exit(1) + def load_list_of_already_downloaded_torrents(self): + try: + self.downloaded=open(self.opt.downloaded_torrents,'r').read().split() + except IOError: + self.downloaded=[] + try: + self.errors=open(self.opt.error_log,'r').read().split() + except IOError: + self.errors=[] + def update_downloaded(self,url): + self.downloaded.append(url) + try: + self.errors.remove(url) + except ValueError: + pass + def download_torrent(self,url): + try: + sock=urllib2.urlopen(url) + except (HTTPError, URLError): + # print('tormon failed to download %s'%url) + if url not in self.errors: + self.errors.append(url) + else: + filename=self.url2filename(url) + target_file=os.path.join(self.opt.output_dir,filename) + print('Downloading %s'%target_file) + content=sock.read() + sock.close() + fh=open(target_file,'w') + fh.write(content) + fh.close() + self.update_downloaded(url) + def url2filename(self,url): + return os.path.basename(urlparse.urlparse(url)[2]) + def has_been_downloaded(self,url): + if self.opt.match_by_filename: + filename=self.url2filename(url) + return (filename in [self.url2filename(link) for link in self.downloaded]) + else: + return (url in self.downloaded) + def parse_rss_feed(self): + for url in self.opt.url: + print('RSS feed: %s'%url) + try: + sock=urllib2.urlopen(url) + except (HTTPError, URLError): + print('tormon failed to download %s'%url) + else: + content=sock.read() + sock.close() + soup=BeautifulSoup(content) + links=([link.nextSibling for link in soup.findAll('link')]+ + [link['href'] for link in soup.findAll('a')]+ + [link['url'] for link in soup.findAll('media:content')]) + for link in links: + if (any([link.lower().endswith(ending) + for ending in self.opt.filetype]) + and not self.has_been_downloaded(link)): + if self.opt.mark_all_downloaded: + print('Marking %s as downloaded'%link) + self.update_downloaded(link) + else: + self.download_torrent(link) + def save_list_of_already_downloaded_torrents(self): + fh=open(self.opt.downloaded_torrents, 'w') + fh.write('\n'.join(self.downloaded)) + fh.close() + fh=open(self.opt.error_log, 'w') + fh.write('\n'.join(self.errors)) + fh.close() + def __init__(self): + self.parse_options() + self.load_list_of_already_downloaded_torrents() + try: + self.parse_rss_feed() + except KeyboardInterrupt: + pass + finally: + self.save_list_of_already_downloaded_torrents() +if __name__=='__main__': + Main()