]> git.rmz.io Git - dotfiles.git/blob - bin/tormon
775bfb5634823cb956df599751f4f7a48a691a14
[dotfiles.git] / bin / tormon
1 #!/usr/bin/env python
2
3 import urllib2,urlparse
4 from urllib2 import HTTPError,URLError
5 from BeautifulSoup import BeautifulSoup
6 import os
7 import optparse
8
9 __usage__='''
10 tormon.py -O ~/test/tormon -u "http://rss.feed"
11 '''
12
13 class Main(object):
14 '''
15 tormon checks an rss feed for new torrents. When it finds a new .torrent, to
16 downloads it to a specified output directory, where (presumably) a monitoring
17 torrent program will download the corresponding file.
18 '''
19 def parse_options(self):
20 usage = 'usage: %prog [options]'+__usage__
21 parser = optparse.OptionParser(usage=usage)
22 parser.add_option(
23 '-O', '--output_dir', dest='output_dir',
24 help='directory into which new torrents are saved',
25 metavar='DIR')
26 parser.add_option(
27 '-f', '--filetype', dest='filetype',
28 action='append',
29 default=[],
30 help='admissible file types',
31 metavar='TYPE')
32 parser.add_option(
33 '-d', '--downloaded_torrents', dest='downloaded_torrents',
34 default=os.path.expanduser('~/.downloaded_torrents'),
35 help='log of already downloaded torrents',
36 metavar='FILE')
37 parser.add_option(
38 '-e', '--error_log', dest='error_log',
39 help='log of torrents tormon failed to download',
40 metavar='FILE')
41 parser.add_option(
42 '-b', '--batch', dest='batch',
43 help='file containing list of rss feed urls',
44 metavar='FILE')
45 parser.add_option(
46 '-u', '--url', dest='url',
47 action='append',
48 default=[],
49 help='url of the rss feed',
50 metavar='URL')
51 parser.add_option(
52 '-m','--mark_all_downloaded', dest='mark_all_downloaded',
53 action='store_true',
54 default=False,
55 help="mark all torrents as already downloaded")
56 parser.add_option(
57 '-M','--match_by_filename', dest='match_by_filename',
58 action='store_true',
59 default=False,
60 help="recognize downloaded files by filename, not URL. Matching by URL is the default.")
61 (self.opt, args) = parser.parse_args()
62 if self.opt.batch:
63 for line in open(self.opt.batch,'r'):
64 line=line.strip()
65 if line and not line.startswith('#'):
66 self.opt.url.append(line)
67 if not self.opt.output_dir:
68 self.opt.output_dir=os.path.expanduser('~/Desktop')
69 if not self.opt.filetype:
70 self.opt.filetype=['.torrent']
71 if not self.opt.error_log:
72 self.opt.error_log=self.opt.downloaded_torrents+'.errors'
73 try:
74 os.makedirs(self.opt.output_dir)
75 except OSError:
76 if not os.path.exists(self.opt.output_dir):
77 print('tormon failed to create directory %s'%self.opt.output_dir)
78 exit(1)
79 def load_list_of_already_downloaded_torrents(self):
80 try:
81 self.downloaded=open(self.opt.downloaded_torrents,'r').read().split()
82 except IOError:
83 self.downloaded=[]
84 try:
85 self.errors=open(self.opt.error_log,'r').read().split()
86 except IOError:
87 self.errors=[]
88 def update_downloaded(self,url):
89 self.downloaded.append(url)
90 try:
91 self.errors.remove(url)
92 except ValueError:
93 pass
94 def download_torrent(self,url):
95 try:
96 sock=urllib2.urlopen(url)
97 except (HTTPError, URLError):
98 # print('tormon failed to download %s'%url)
99 if url not in self.errors:
100 self.errors.append(url)
101 else:
102 filename=self.url2filename(url)
103 target_file=os.path.join(self.opt.output_dir,filename)
104 print('Downloading %s'%target_file)
105 content=sock.read()
106 sock.close()
107 fh=open(target_file,'w')
108 fh.write(content)
109 fh.close()
110 self.update_downloaded(url)
111 def url2filename(self,url):
112 return os.path.basename(urlparse.urlparse(url)[2])
113 def has_been_downloaded(self,url):
114 if self.opt.match_by_filename:
115 filename=self.url2filename(url)
116 return (filename in [self.url2filename(link) for link in self.downloaded])
117 else:
118 return (url in self.downloaded)
119 def parse_rss_feed(self):
120 for url in self.opt.url:
121 print('RSS feed: %s'%url)
122 try:
123 sock=urllib2.urlopen(url)
124 except (HTTPError, URLError):
125 print('tormon failed to download %s'%url)
126 else:
127 content=sock.read()
128 sock.close()
129 soup=BeautifulSoup(content)
130 links=([link.nextSibling for link in soup.findAll('link')]+
131 [link['href'] for link in soup.findAll('a')]+
132 [link['url'] for link in soup.findAll('media:content')])
133 for link in links:
134 if (any([link.lower().endswith(ending)
135 for ending in self.opt.filetype])
136 and not self.has_been_downloaded(link)):
137 if self.opt.mark_all_downloaded:
138 print('Marking %s as downloaded'%link)
139 self.update_downloaded(link)
140 else:
141 self.download_torrent(link)
142 def save_list_of_already_downloaded_torrents(self):
143 fh=open(self.opt.downloaded_torrents, 'w')
144 fh.write('\n'.join(self.downloaded))
145 fh.close()
146 fh=open(self.opt.error_log, 'w')
147 fh.write('\n'.join(self.errors))
148 fh.close()
149 def __init__(self):
150 self.parse_options()
151 self.load_list_of_already_downloaded_torrents()
152 try:
153 self.parse_rss_feed()
154 except KeyboardInterrupt:
155 pass
156 finally:
157 self.save_list_of_already_downloaded_torrents()
158 if __name__=='__main__':
159 Main()