bin/tormon

   1 #!/usr/bin/env python
   2
   3 import urllib2,urlparse
   4 from urllib2 import HTTPError,URLError
   5 from BeautifulSoup import BeautifulSoup
   6 import os
   7 import optparse
   8
   9 __usage__='''
  10 tormon.py -O ~/test/tormon -u "http://rss.feed"
  11 '''
  12
  13 class Main(object):
  14     '''
  15     tormon checks an rss feed for new torrents. When it finds a new .torrent, to
  16     downloads it to a specified output directory, where (presumably) a monitoring
  17     torrent program will download the corresponding file.
  18     '''
  19     def parse_options(self):
  20         usage = 'usage: %prog [options]'+__usage__
  21         parser = optparse.OptionParser(usage=usage)
  22         parser.add_option(
  23             '-O', '--output_dir', dest='output_dir',
  24             help='directory into which new torrents are saved',
  25             metavar='DIR')
  26         parser.add_option(
  27             '-f', '--filetype', dest='filetype',
  28             action='append',
  29             default=[],
  30             help='admissible file types',
  31             metavar='TYPE')
  32         parser.add_option(
  33             '-d', '--downloaded_torrents', dest='downloaded_torrents',
  34             default=os.path.expanduser('~/.downloaded_torrents'),
  35             help='log of already downloaded torrents',
  36             metavar='FILE')
  37         parser.add_option(
  38             '-e', '--error_log', dest='error_log',
  39             help='log of torrents tormon failed to download',
  40             metavar='FILE')
  41         parser.add_option(
  42             '-b', '--batch', dest='batch',
  43             help='file containing list of rss feed urls',
  44             metavar='FILE')
  45         parser.add_option(
  46             '-u', '--url', dest='url',
  47             action='append',
  48             default=[],
  49             help='url of the rss feed',
  50             metavar='URL')
  51         parser.add_option(
  52             '-m','--mark_all_downloaded', dest='mark_all_downloaded',
  53             action='store_true',
  54             default=False,
  55             help="mark all torrents as already downloaded")
  56         parser.add_option(
  57             '-M','--match_by_filename', dest='match_by_filename',
  58             action='store_true',
  59             default=False,
  60             help="recognize downloaded files by filename, not URL. Matching by URL is the default.")
  61         (self.opt, args) = parser.parse_args()
  62         if self.opt.batch:
  63             for line in open(self.opt.batch,'r'):
  64                 line=line.strip()
  65                 if line and not line.startswith('#'):
  66                     self.opt.url.append(line)
  67         if not self.opt.output_dir:
  68             self.opt.output_dir=os.path.expanduser('~/Desktop')
  69         if not self.opt.filetype:
  70             self.opt.filetype=['.torrent']
  71         if not self.opt.error_log:
  72             self.opt.error_log=self.opt.downloaded_torrents+'.errors'
  73         try:
  74             os.makedirs(self.opt.output_dir)
  75         except OSError:
  76             if not os.path.exists(self.opt.output_dir):
  77                 print('tormon failed to create directory %s'%self.opt.output_dir)
  78                 exit(1)
  79     def load_list_of_already_downloaded_torrents(self):
  80         try:
  81             self.downloaded=open(self.opt.downloaded_torrents,'r').read().split()
  82         except IOError:
  83             self.downloaded=[]
  84         try:
  85             self.errors=open(self.opt.error_log,'r').read().split()
  86         except IOError:
  87             self.errors=[]
  88     def update_downloaded(self,url):
  89         self.downloaded.append(url)
  90         try:
  91             self.errors.remove(url)
  92         except ValueError:
  93             pass
  94     def download_torrent(self,url):
  95         try:
  96             sock=urllib2.urlopen(url)
  97         except (HTTPError, URLError):
  98             # print('tormon failed to download %s'%url)
  99             if url not in self.errors:
 100                 self.errors.append(url)
 101         else:
 102             filename=self.url2filename(url)
 103             target_file=os.path.join(self.opt.output_dir,filename)
 104             print('Downloading %s'%target_file)
 105             content=sock.read()
 106             sock.close()
 107             fh=open(target_file,'w')
 108             fh.write(content)
 109             fh.close()
 110             self.update_downloaded(url)
 111     def url2filename(self,url):
 112         return os.path.basename(urlparse.urlparse(url)[2])
 113     def has_been_downloaded(self,url):
 114         if self.opt.match_by_filename:
 115             filename=self.url2filename(url)
 116             return (filename in [self.url2filename(link) for link in self.downloaded])
 117         else:
 118             return (url in self.downloaded)
 119     def parse_rss_feed(self):
 120         for url in self.opt.url:
 121             print('RSS feed: %s'%url)
 122             try:
 123                 sock=urllib2.urlopen(url)
 124             except (HTTPError, URLError):
 125                 print('tormon failed to download %s'%url)
 126             else:
 127                 content=sock.read()
 128                 sock.close()
 129                 soup=BeautifulSoup(content)
 130                 links=([link.nextSibling for link in soup.findAll('link')]+
 131                        [link['href'] for link in soup.findAll('a')]+
 132                        [link['url'] for link in soup.findAll('media:content')])
 133                 for link in links:
 134                     if (any([link.lower().endswith(ending)
 135                              for ending in self.opt.filetype])
 136                         and not self.has_been_downloaded(link)):
 137                         if self.opt.mark_all_downloaded:
 138                             print('Marking %s as downloaded'%link)
 139                             self.update_downloaded(link)
 140                         else:
 141                             self.download_torrent(link)
 142     def save_list_of_already_downloaded_torrents(self):
 143         fh=open(self.opt.downloaded_torrents, 'w')
 144         fh.write('\n'.join(self.downloaded))
 145         fh.close()
 146         fh=open(self.opt.error_log, 'w')
 147         fh.write('\n'.join(self.errors))
 148         fh.close()
 149     def __init__(self):
 150         self.parse_options()
 151         self.load_list_of_already_downloaded_torrents()
 152         try:
 153             self.parse_rss_feed()
 154         except KeyboardInterrupt:
 155             pass
 156         finally:
 157             self.save_list_of_already_downloaded_torrents()
 158 if __name__=='__main__':
 159     Main()