]>
git.rmz.io Git - dotfiles.git/blob - bin/tormon
775bfb5634823cb956df599751f4f7a48a691a14
3 import urllib2
,urlparse
4 from urllib2
import HTTPError
,URLError
5 from BeautifulSoup
import BeautifulSoup
10 tormon.py -O ~/test/tormon -u "http://rss.feed"
15 tormon checks an rss feed for new torrents. When it finds a new .torrent, to
16 downloads it to a specified output directory, where (presumably) a monitoring
17 torrent program will download the corresponding file.
19 def parse_options(self
):
20 usage
= 'usage: %prog [options]'+__usage__
21 parser
= optparse
.OptionParser(usage
=usage
)
23 '-O', '--output_dir', dest
='output_dir',
24 help='directory into which new torrents are saved',
27 '-f', '--filetype', dest
='filetype',
30 help='admissible file types',
33 '-d', '--downloaded_torrents', dest
='downloaded_torrents',
34 default
=os
.path
.expanduser('~/.downloaded_torrents'),
35 help='log of already downloaded torrents',
38 '-e', '--error_log', dest
='error_log',
39 help='log of torrents tormon failed to download',
42 '-b', '--batch', dest
='batch',
43 help='file containing list of rss feed urls',
46 '-u', '--url', dest
='url',
49 help='url of the rss feed',
52 '-m','--mark_all_downloaded', dest
='mark_all_downloaded',
55 help="mark all torrents as already downloaded")
57 '-M','--match_by_filename', dest
='match_by_filename',
60 help="recognize downloaded files by filename, not URL. Matching by URL is the default.")
61 (self
.opt
, args
) = parser
.parse_args()
63 for line
in open(self
.opt
.batch
,'r'):
65 if line
and not line
.startswith('#'):
66 self
.opt
.url
.append(line
)
67 if not self
.opt
.output_dir
:
68 self
.opt
.output_dir
=os
.path
.expanduser('~/Desktop')
69 if not self
.opt
.filetype
:
70 self
.opt
.filetype
=['.torrent']
71 if not self
.opt
.error_log
:
72 self
.opt
.error_log
=self
.opt
.downloaded_torrents
+'.errors'
74 os
.makedirs(self
.opt
.output_dir
)
76 if not os
.path
.exists(self
.opt
.output_dir
):
77 print('tormon failed to create directory %s'%self
.opt
.output_dir
)
79 def load_list_of_already_downloaded_torrents(self
):
81 self
.downloaded
=open(self
.opt
.downloaded_torrents
,'r').read().split()
85 self
.errors
=open(self
.opt
.error_log
,'r').read().split()
88 def update_downloaded(self
,url
):
89 self
.downloaded
.append(url
)
91 self
.errors
.remove(url
)
94 def download_torrent(self
,url
):
96 sock
=urllib2
.urlopen(url
)
97 except (HTTPError
, URLError
):
98 # print('tormon failed to download %s'%url)
99 if url
not in self
.errors
:
100 self
.errors
.append(url
)
102 filename
=self
.url2filename(url
)
103 target_file
=os
.path
.join(self
.opt
.output_dir
,filename
)
104 print('Downloading %s'%target_file
)
107 fh
=open(target_file
,'w')
110 self
.update_downloaded(url
)
111 def url2filename(self
,url
):
112 return os
.path
.basename(urlparse
.urlparse(url
)[2])
113 def has_been_downloaded(self
,url
):
114 if self
.opt
.match_by_filename
:
115 filename
=self
.url2filename(url
)
116 return (filename
in [self
.url2filename(link
) for link
in self
.downloaded
])
118 return (url
in self
.downloaded
)
119 def parse_rss_feed(self
):
120 for url
in self
.opt
.url
:
121 print('RSS feed: %s'%url
)
123 sock
=urllib2
.urlopen(url
)
124 except (HTTPError
, URLError
):
125 print('tormon failed to download %s'%url
)
129 soup
=BeautifulSoup(content
)
130 links
=([link
.nextSibling
for link
in soup
.findAll('link')]+
131 [link
['href'] for link
in soup
.findAll('a')]+
132 [link
['url'] for link
in soup
.findAll('media:content')])
134 if (any([link
.lower().endswith(ending
)
135 for ending
in self
.opt
.filetype
])
136 and not self
.has_been_downloaded(link
)):
137 if self
.opt
.mark_all_downloaded
:
138 print('Marking %s as downloaded'%link
)
139 self
.update_downloaded(link
)
141 self
.download_torrent(link
)
142 def save_list_of_already_downloaded_torrents(self
):
143 fh
=open(self
.opt
.downloaded_torrents
, 'w')
144 fh
.write('\n'.join(self
.downloaded
))
146 fh
=open(self
.opt
.error_log
, 'w')
147 fh
.write('\n'.join(self
.errors
))
151 self
.load_list_of_already_downloaded_torrents()
153 self
.parse_rss_feed()
154 except KeyboardInterrupt:
157 self
.save_list_of_already_downloaded_torrents()
158 if __name__
=='__main__':