+++ /dev/null
-#!/usr/bin/env python
-
-import urllib2,urlparse
-from urllib2 import HTTPError,URLError
-from BeautifulSoup import BeautifulSoup
-import os
-import optparse
-
-__usage__='''
-tormon.py -O ~/test/tormon -u "http://rss.feed"
-'''
-
-class Main(object):
- '''
- tormon checks an rss feed for new torrents. When it finds a new .torrent, to
- downloads it to a specified output directory, where (presumably) a monitoring
- torrent program will download the corresponding file.
- '''
- def parse_options(self):
- usage = 'usage: %prog [options]'+__usage__
- parser = optparse.OptionParser(usage=usage)
- parser.add_option(
- '-O', '--output_dir', dest='output_dir',
- help='directory into which new torrents are saved',
- metavar='DIR')
- parser.add_option(
- '-f', '--filetype', dest='filetype',
- action='append',
- default=[],
- help='admissible file types',
- metavar='TYPE')
- parser.add_option(
- '-d', '--downloaded_torrents', dest='downloaded_torrents',
- default=os.path.expanduser('~/.downloaded_torrents'),
- help='log of already downloaded torrents',
- metavar='FILE')
- parser.add_option(
- '-e', '--error_log', dest='error_log',
- help='log of torrents tormon failed to download',
- metavar='FILE')
- parser.add_option(
- '-b', '--batch', dest='batch',
- help='file containing list of rss feed urls',
- metavar='FILE')
- parser.add_option(
- '-u', '--url', dest='url',
- action='append',
- default=[],
- help='url of the rss feed',
- metavar='URL')
- parser.add_option(
- '-m','--mark_all_downloaded', dest='mark_all_downloaded',
- action='store_true',
- default=False,
- help="mark all torrents as already downloaded")
- parser.add_option(
- '-M','--match_by_filename', dest='match_by_filename',
- action='store_true',
- default=False,
- help="recognize downloaded files by filename, not URL. Matching by URL is the default.")
- (self.opt, args) = parser.parse_args()
- if self.opt.batch:
- for line in open(self.opt.batch,'r'):
- line=line.strip()
- if line and not line.startswith('#'):
- self.opt.url.append(line)
- if not self.opt.output_dir:
- self.opt.output_dir=os.path.expanduser('~/Desktop')
- if not self.opt.filetype:
- self.opt.filetype=['.torrent']
- if not self.opt.error_log:
- self.opt.error_log=self.opt.downloaded_torrents+'.errors'
- try:
- os.makedirs(self.opt.output_dir)
- except OSError:
- if not os.path.exists(self.opt.output_dir):
- print('tormon failed to create directory %s'%self.opt.output_dir)
- exit(1)
- def load_list_of_already_downloaded_torrents(self):
- try:
- self.downloaded=open(self.opt.downloaded_torrents,'r').read().split()
- except IOError:
- self.downloaded=[]
- try:
- self.errors=open(self.opt.error_log,'r').read().split()
- except IOError:
- self.errors=[]
- def update_downloaded(self,url):
- self.downloaded.append(url)
- try:
- self.errors.remove(url)
- except ValueError:
- pass
- def download_torrent(self,url):
- try:
- sock=urllib2.urlopen(url)
- except (HTTPError, URLError):
- # print('tormon failed to download %s'%url)
- if url not in self.errors:
- self.errors.append(url)
- else:
- filename=self.url2filename(url)
- target_file=os.path.join(self.opt.output_dir,filename)
- print('Downloading %s'%target_file)
- content=sock.read()
- sock.close()
- fh=open(target_file,'w')
- fh.write(content)
- fh.close()
- self.update_downloaded(url)
- def url2filename(self,url):
- return os.path.basename(urlparse.urlparse(url)[2])
- def has_been_downloaded(self,url):
- if self.opt.match_by_filename:
- filename=self.url2filename(url)
- return (filename in [self.url2filename(link) for link in self.downloaded])
- else:
- return (url in self.downloaded)
- def parse_rss_feed(self):
- for url in self.opt.url:
- print('RSS feed: %s'%url)
- try:
- sock=urllib2.urlopen(url)
- except (HTTPError, URLError):
- print('tormon failed to download %s'%url)
- else:
- content=sock.read()
- sock.close()
- soup=BeautifulSoup(content)
- links=([link.nextSibling for link in soup.findAll('link')]+
- [link['href'] for link in soup.findAll('a')]+
- [link['url'] for link in soup.findAll('media:content')])
- for link in links:
- if (any([link.lower().endswith(ending)
- for ending in self.opt.filetype])
- and not self.has_been_downloaded(link)):
- if self.opt.mark_all_downloaded:
- print('Marking %s as downloaded'%link)
- self.update_downloaded(link)
- else:
- self.download_torrent(link)
- def save_list_of_already_downloaded_torrents(self):
- fh=open(self.opt.downloaded_torrents, 'w')
- fh.write('\n'.join(self.downloaded))
- fh.close()
- fh=open(self.opt.error_log, 'w')
- fh.write('\n'.join(self.errors))
- fh.close()
- def __init__(self):
- self.parse_options()
- self.load_list_of_already_downloaded_torrents()
- try:
- self.parse_rss_feed()
- except KeyboardInterrupt:
- pass
- finally:
- self.save_list_of_already_downloaded_torrents()
-if __name__=='__main__':
- Main()