add tardis bin

[dotfiles.git] / bin / tormon
diff --git a/bin/tormon b/bin/tormon

new file mode 100755 (executable)

index 0000000..775bfb5
--- /dev/null
+++ b/bin/tormon
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+
+import urllib2,urlparse
+from urllib2 import HTTPError,URLError
+from BeautifulSoup import BeautifulSoup
+import os
+import optparse
+
+__usage__='''
+tormon.py -O ~/test/tormon -u "http://rss.feed"
+'''
+
+class Main(object):
+    '''
+    tormon checks an rss feed for new torrents. When it finds a new .torrent, to
+    downloads it to a specified output directory, where (presumably) a monitoring
+    torrent program will download the corresponding file.    
+    '''
+    def parse_options(self):
+        usage = 'usage: %prog [options]'+__usage__
+        parser = optparse.OptionParser(usage=usage)
+        parser.add_option(
+            '-O', '--output_dir', dest='output_dir', 
+            help='directory into which new torrents are saved', 
+            metavar='DIR')
+        parser.add_option(
+            '-f', '--filetype', dest='filetype',
+            action='append',
+            default=[],
+            help='admissible file types', 
+            metavar='TYPE')
+        parser.add_option(
+            '-d', '--downloaded_torrents', dest='downloaded_torrents',
+            default=os.path.expanduser('~/.downloaded_torrents'),
+            help='log of already downloaded torrents', 
+            metavar='FILE')
+        parser.add_option(
+            '-e', '--error_log', dest='error_log',
+            help='log of torrents tormon failed to download', 
+            metavar='FILE')
+        parser.add_option(
+            '-b', '--batch', dest='batch',
+            help='file containing list of rss feed urls', 
+            metavar='FILE') 
+        parser.add_option(
+            '-u', '--url', dest='url',
+            action='append',
+            default=[],
+            help='url of the rss feed', 
+            metavar='URL')
+        parser.add_option(
+            '-m','--mark_all_downloaded', dest='mark_all_downloaded',
+            action='store_true', 
+            default=False,
+            help="mark all torrents as already downloaded")
+        parser.add_option(
+            '-M','--match_by_filename', dest='match_by_filename',
+            action='store_true', 
+            default=False,
+            help="recognize downloaded files by filename, not URL. Matching by URL is the default.")        
+        (self.opt, args) = parser.parse_args()
+        if self.opt.batch:
+            for line in open(self.opt.batch,'r'):
+                line=line.strip()
+                if line and not line.startswith('#'):
+                    self.opt.url.append(line)
+        if not self.opt.output_dir:
+            self.opt.output_dir=os.path.expanduser('~/Desktop')
+        if not self.opt.filetype:
+            self.opt.filetype=['.torrent']
+        if not self.opt.error_log:
+            self.opt.error_log=self.opt.downloaded_torrents+'.errors'
+        try:
+            os.makedirs(self.opt.output_dir)
+        except OSError:
+            if not os.path.exists(self.opt.output_dir):
+                print('tormon failed to create directory %s'%self.opt.output_dir)
+                exit(1)
+    def load_list_of_already_downloaded_torrents(self):
+        try:
+            self.downloaded=open(self.opt.downloaded_torrents,'r').read().split()
+        except IOError:
+            self.downloaded=[]
+        try:
+            self.errors=open(self.opt.error_log,'r').read().split()
+        except IOError:
+            self.errors=[]
+    def update_downloaded(self,url):
+        self.downloaded.append(url)
+        try:
+            self.errors.remove(url)
+        except ValueError:
+            pass        
+    def download_torrent(self,url):
+        try:
+            sock=urllib2.urlopen(url)
+        except (HTTPError, URLError):
+            # print('tormon failed to download %s'%url)
+            if url not in self.errors:
+                self.errors.append(url)
+        else:
+            filename=self.url2filename(url)
+            target_file=os.path.join(self.opt.output_dir,filename)
+            print('Downloading %s'%target_file)
+            content=sock.read()
+            sock.close()
+            fh=open(target_file,'w')
+            fh.write(content)
+            fh.close()
+            self.update_downloaded(url)
+    def url2filename(self,url):
+        return os.path.basename(urlparse.urlparse(url)[2])
+    def has_been_downloaded(self,url):
+        if self.opt.match_by_filename:
+            filename=self.url2filename(url)
+            return (filename in [self.url2filename(link) for link in self.downloaded])
+        else:
+            return (url in self.downloaded)
+    def parse_rss_feed(self):
+        for url in self.opt.url:
+            print('RSS feed: %s'%url)
+            try:
+                sock=urllib2.urlopen(url)
+            except (HTTPError, URLError):
+                print('tormon failed to download %s'%url)
+            else:
+                content=sock.read()
+                sock.close()
+                soup=BeautifulSoup(content)
+                links=([link.nextSibling for link in soup.findAll('link')]+
+                       [link['href'] for link in soup.findAll('a')]+
+                       [link['url'] for link in soup.findAll('media:content')])
+                for link in links:
+                    if (any([link.lower().endswith(ending)
+                             for ending in self.opt.filetype])
+                        and not self.has_been_downloaded(link)):
+                        if self.opt.mark_all_downloaded:
+                            print('Marking %s as downloaded'%link)
+                            self.update_downloaded(link)
+                        else:
+                            self.download_torrent(link)
+    def save_list_of_already_downloaded_torrents(self):
+        fh=open(self.opt.downloaded_torrents, 'w')
+        fh.write('\n'.join(self.downloaded))
+        fh.close()
+        fh=open(self.opt.error_log, 'w')
+        fh.write('\n'.join(self.errors))
+        fh.close()
+    def __init__(self):
+        self.parse_options()        
+        self.load_list_of_already_downloaded_torrents()
+        try:
+            self.parse_rss_feed()
+        except KeyboardInterrupt:
+            pass
+        finally:
+            self.save_list_of_already_downloaded_torrents()
+if __name__=='__main__':
+    Main()