]> git.rmz.io Git - dotfiles.git/blob - weechat/python/grep.py
flexget: add blacklist redemption
[dotfiles.git] / weechat / python / grep.py
1 # -*- coding: utf-8 -*-
2 ###
3 # Copyright (c) 2009-2011 by Elián Hanisch <lambdae2@gmail.com>
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 ###
18
19 ###
20 # Search in Weechat buffers and logs (for Weechat 0.3.*)
21 #
22 # Inspired by xt's grep.py
23 # Originally I just wanted to add some fixes in grep.py, but then
24 # I got carried away and rewrote everything, so new script.
25 #
26 # Commands:
27 # * /grep
28 # Search in logs or buffers, see /help grep
29 # * /logs:
30 # Lists logs in ~/.weechat/logs, see /help logs
31 #
32 # Settings:
33 # * plugins.var.python.grep.clear_buffer:
34 # Clear the results buffer before each search. Valid values: on, off
35 #
36 # * plugins.var.python.grep.go_to_buffer:
37 # Automatically go to grep buffer when search is over. Valid values: on, off
38 #
39 # * plugins.var.python.grep.log_filter:
40 # Coma separated list of patterns that grep will use for exclude logs, e.g.
41 # if you use '*server/*' any log in the 'server' folder will be excluded
42 # when using the command '/grep log'
43 #
44 # * plugins.var.python.grep.show_summary:
45 # Shows summary for each log. Valid values: on, off
46 #
47 # * plugins.var.python.grep.max_lines:
48 # Grep will only print the last matched lines that don't surpass the value defined here.
49 #
50 # * plugins.var.python.grep.size_limit:
51 # Size limit in KiB, is used for decide whenever grepping should run in background or not. If
52 # the logs to grep have a total size bigger than this value then grep run as a new process.
53 # It can be used for force or disable background process, using '0' forces to always grep in
54 # background, while using '' (empty string) will disable it.
55 #
56 # * plugins.var.python.grep.default_tail_head:
57 # Config option for define default number of lines returned when using --head or --tail options.
58 # Can be overriden in the command with --number option.
59 #
60 #
61 # TODO:
62 # * try to figure out why hook_process chokes in long outputs (using a tempfile as a
63 # workaround now)
64 # * possibly add option for defining time intervals
65 #
66 #
67 # History:
68 #
69 # 2016-06-23, mickael9
70 # version 0.7.7: fix get_home function
71 #
72 # 2015-11-26
73 # version 0.7.6: fix a typo
74 #
75 # 2015-01-31, Nicd-
76 # version 0.7.5:
77 # '~' is now expaned to the home directory in the log file path so
78 # paths like '~/logs/' should work.
79 #
80 # 2015-01-14, nils_2
81 # version 0.7.4: make q work to quit grep buffer (requested by: gb)
82 #
83 # 2014-03-29, Felix Eckhofer <felix@tribut.de>
84 # version 0.7.3: fix typo
85 #
86 # 2011-01-09
87 # version 0.7.2: bug fixes
88 #
89 # 2010-11-15
90 # version 0.7.1:
91 # * use TempFile so temporal files are guaranteed to be deleted.
92 # * enable Archlinux workaround.
93 #
94 # 2010-10-26
95 # version 0.7:
96 # * added templates.
97 # * using --only-match shows only unique strings.
98 # * fixed bug that inverted -B -A switches when used with -t
99 #
100 # 2010-10-14
101 # version 0.6.8: by xt <xt@bash.no>
102 # * supress highlights when printing in grep buffer
103 #
104 # 2010-10-06
105 # version 0.6.7: by xt <xt@bash.no>
106 # * better temporary file:
107 # use tempfile.mkstemp. to create a temp file in log dir,
108 # makes it safer with regards to write permission and multi user
109 #
110 # 2010-04-08
111 # version 0.6.6: bug fixes
112 # * use WEECHAT_LIST_POS_END in log file completion, makes completion faster
113 # * disable bytecode if using python 2.6
114 # * use single quotes in command string
115 # * fix bug that could change buffer's title when using /grep stop
116 #
117 # 2010-01-24
118 # version 0.6.5: disable bytecode is a 2.6 feature, instead, resort to delete the bytecode manually
119 #
120 # 2010-01-19
121 # version 0.6.4: bug fix
122 # version 0.6.3: added options --invert --only-match (replaces --exact, which is still available
123 # but removed from help)
124 # * use new 'irc_nick_color' info
125 # * don't generate bytecode when spawning a new process
126 # * show active options in buffer title
127 #
128 # 2010-01-17
129 # version 0.6.2: removed 2.6-ish code
130 # version 0.6.1: fixed bug when grepping in grep's buffer
131 #
132 # 2010-01-14
133 # version 0.6.0: implemented grep in background
134 # * improved context lines presentation.
135 # * grepping for big (or many) log files runs in a weechat_process.
136 # * added /grep stop.
137 # * added 'size_limit' option
138 # * fixed a infolist leak when grepping buffers
139 # * added 'default_tail_head' option
140 # * results are sort by line count
141 # * don't die if log is corrupted (has NULL chars in it)
142 # * changed presentation of /logs
143 # * log path completion doesn't suck anymore
144 # * removed all tabs, because I learned how to configure Vim so that spaces aren't annoying
145 # anymore. This was the script's original policy.
146 #
147 # 2010-01-05
148 # version 0.5.5: rename script to 'grep.py' (FlashCode <flashcode@flashtux.org>).
149 #
150 # 2010-01-04
151 # version 0.5.4.1: fix index error when using --after/before-context options.
152 #
153 # 2010-01-03
154 # version 0.5.4: new features
155 # * added --after-context and --before-context options.
156 # * added --context as a shortcut for using both -A -B options.
157 #
158 # 2009-11-06
159 # version 0.5.3: improvements for long grep output
160 # * grep buffer input accepts the same flags as /grep for repeat a search with different
161 # options.
162 # * tweaks in grep's output.
163 # * max_lines option added for limit grep's output.
164 # * code in update_buffer() optimized.
165 # * time stats in buffer title.
166 # * added go_to_buffer config option.
167 # * added --buffer for search only in buffers.
168 # * refactoring.
169 #
170 # 2009-10-12, omero
171 # version 0.5.2: made it python-2.4.x compliant
172 #
173 # 2009-08-17
174 # version 0.5.1: some refactoring, show_summary option added.
175 #
176 # 2009-08-13
177 # version 0.5: rewritten from xt's grep.py
178 # * fixed searching in non weechat logs, for cases like, if you're
179 # switching from irssi and rename and copy your irssi logs to %h/logs
180 # * fixed "timestamp rainbow" when you /grep in grep's buffer
181 # * allow to search in other buffers other than current or in logs
182 # of currently closed buffers with cmd 'buffer'
183 # * allow to search in any log file in %h/logs with cmd 'log'
184 # * added --count for return the number of matched lines
185 # * added --matchcase for case sensible search
186 # * added --hilight for color matches
187 # * added --head and --tail options, and --number
188 # * added command /logs for list files in %h/logs
189 # * added config option for clear the buffer before a search
190 # * added config option for filter logs we don't want to grep
191 # * added the posibility to repeat last search with another regexp by writing
192 # it in grep's buffer
193 # * changed spaces for tabs in the code, which is my preference
194 #
195 ###
196
197 from os import path
198 import sys, getopt, time, os, re, tempfile
199
200 try:
201 import weechat
202 from weechat import WEECHAT_RC_OK, prnt, prnt_date_tags
203 import_ok = True
204 except ImportError:
205 import_ok = False
206
207 SCRIPT_NAME = "grep"
208 SCRIPT_AUTHOR = "Elián Hanisch <lambdae2@gmail.com>"
209 SCRIPT_VERSION = "0.7.7"
210 SCRIPT_LICENSE = "GPL3"
211 SCRIPT_DESC = "Search in buffers and logs"
212 SCRIPT_COMMAND = "grep"
213
214 ### Default Settings ###
215 settings = {
216 'clear_buffer' : 'off',
217 'log_filter' : '',
218 'go_to_buffer' : 'on',
219 'max_lines' : '4000',
220 'show_summary' : 'on',
221 'size_limit' : '2048',
222 'default_tail_head' : '10',
223 }
224
225 ### Class definitions ###
226 class linesDict(dict):
227 """
228 Class for handling matched lines in more than one buffer.
229 linesDict[buffer_name] = matched_lines_list
230 """
231 def __setitem__(self, key, value):
232 assert isinstance(value, list)
233 if key not in self:
234 dict.__setitem__(self, key, value)
235 else:
236 dict.__getitem__(self, key).extend(value)
237
238 def get_matches_count(self):
239 """Return the sum of total matches stored."""
240 if dict.__len__(self):
241 return sum(map(lambda L: L.matches_count, self.itervalues()))
242 else:
243 return 0
244
245 def __len__(self):
246 """Return the sum of total lines stored."""
247 if dict.__len__(self):
248 return sum(map(len, self.itervalues()))
249 else:
250 return 0
251
252 def __str__(self):
253 """Returns buffer count or buffer name if there's just one stored."""
254 n = len(self.keys())
255 if n == 1:
256 return self.keys()[0]
257 elif n > 1:
258 return '%s logs' %n
259 else:
260 return ''
261
262 def items(self):
263 """Returns a list of items sorted by line count."""
264 items = dict.items(self)
265 items.sort(key=lambda i: len(i[1]))
266 return items
267
268 def items_count(self):
269 """Returns a list of items sorted by match count."""
270 items = dict.items(self)
271 items.sort(key=lambda i: i[1].matches_count)
272 return items
273
274 def strip_separator(self):
275 for L in self.itervalues():
276 L.strip_separator()
277
278 def get_last_lines(self, n):
279 total_lines = len(self)
280 #debug('total: %s n: %s' %(total_lines, n))
281 if n >= total_lines:
282 # nothing to do
283 return
284 for k, v in reversed(self.items()):
285 l = len(v)
286 if n > 0:
287 if l > n:
288 del v[:l-n]
289 v.stripped_lines = l-n
290 n -= l
291 else:
292 del v[:]
293 v.stripped_lines = l
294
295 class linesList(list):
296 """Class for list of matches, since sometimes I need to add lines that aren't matches, I need an
297 independent counter."""
298 _sep = '...'
299 def __init__(self, *args):
300 list.__init__(self, *args)
301 self.matches_count = 0
302 self.stripped_lines = 0
303
304 def append(self, item):
305 """Append lines, can be a string or a list with strings."""
306 if isinstance(item, str):
307 list.append(self, item)
308 else:
309 self.extend(item)
310
311 def append_separator(self):
312 """adds a separator into the list, makes sure it doen't add two together."""
313 s = self._sep
314 if (self and self[-1] != s) or not self:
315 self.append(s)
316
317 def onlyUniq(self):
318 s = set(self)
319 del self[:]
320 self.extend(s)
321
322 def count_match(self, item=None):
323 if item is None or isinstance(item, str):
324 self.matches_count += 1
325 else:
326 self.matches_count += len(item)
327
328 def strip_separator(self):
329 """removes separators if there are first or/and last in the list."""
330 if self:
331 s = self._sep
332 if self[0] == s:
333 del self[0]
334 if self[-1] == s:
335 del self[-1]
336
337 ### Misc functions ###
338 now = time.time
339 def get_size(f):
340 try:
341 return os.stat(f).st_size
342 except OSError:
343 return 0
344
345 sizeDict = {0:'b', 1:'KiB', 2:'MiB', 3:'GiB', 4:'TiB'}
346 def human_readable_size(size):
347 power = 0
348 while size > 1024:
349 power += 1
350 size /= 1024.0
351 return '%.2f %s' %(size, sizeDict.get(power, ''))
352
353 def color_nick(nick):
354 """Returns coloured nick, with coloured mode if any."""
355 if not nick: return ''
356 wcolor = weechat.color
357 config_string = lambda s : weechat.config_string(weechat.config_get(s))
358 config_int = lambda s : weechat.config_integer(weechat.config_get(s))
359 # prefix and suffix
360 prefix = config_string('irc.look.nick_prefix')
361 suffix = config_string('irc.look.nick_suffix')
362 prefix_c = suffix_c = wcolor(config_string('weechat.color.chat_delimiters'))
363 if nick[0] == prefix:
364 nick = nick[1:]
365 else:
366 prefix = prefix_c = ''
367 if nick[-1] == suffix:
368 nick = nick[:-1]
369 suffix = wcolor(color_delimiter) + suffix
370 else:
371 suffix = suffix_c = ''
372 # nick mode
373 modes = '@!+%'
374 if nick[0] in modes:
375 mode, nick = nick[0], nick[1:]
376 mode_color = wcolor(config_string('weechat.color.nicklist_prefix%d' \
377 %(modes.find(mode) + 1)))
378 else:
379 mode = mode_color = ''
380 # nick color
381 nick_color = weechat.info_get('irc_nick_color', nick)
382 if not nick_color:
383 # probably we're in WeeChat 0.3.0
384 #debug('no irc_nick_color')
385 color_nicks_number = config_int('weechat.look.color_nicks_number')
386 idx = (sum(map(ord, nick))%color_nicks_number) + 1
387 nick_color = wcolor(config_string('weechat.color.chat_nick_color%02d' %idx))
388 return ''.join((prefix_c, prefix, mode_color, mode, nick_color, nick, suffix_c, suffix))
389
390 ### Config and value validation ###
391 boolDict = {'on':True, 'off':False}
392 def get_config_boolean(config):
393 value = weechat.config_get_plugin(config)
394 try:
395 return boolDict[value]
396 except KeyError:
397 default = settings[config]
398 error("Error while fetching config '%s'. Using default value '%s'." %(config, default))
399 error("'%s' is invalid, allowed: 'on', 'off'" %value)
400 return boolDict[default]
401
402 def get_config_int(config, allow_empty_string=False):
403 value = weechat.config_get_plugin(config)
404 try:
405 return int(value)
406 except ValueError:
407 if value == '' and allow_empty_string:
408 return value
409 default = settings[config]
410 error("Error while fetching config '%s'. Using default value '%s'." %(config, default))
411 error("'%s' is not a number." %value)
412 return int(default)
413
414 def get_config_log_filter():
415 filter = weechat.config_get_plugin('log_filter')
416 if filter:
417 return filter.split(',')
418 else:
419 return []
420
421 def get_home():
422 home = weechat.config_string(weechat.config_get('logger.file.path'))
423 home = home.replace('%h', weechat.info_get('weechat_dir', ''))
424 home = path.abspath(path.expanduser(home))
425 return home
426
427 def strip_home(s, dir=''):
428 """Strips home dir from the begging of the log path, this makes them sorter."""
429 if not dir:
430 global home_dir
431 dir = home_dir
432 l = len(dir)
433 if s[:l] == dir:
434 return s[l:]
435 return s
436
437 ### Messages ###
438 script_nick = SCRIPT_NAME
439 def error(s, buffer=''):
440 """Error msg"""
441 prnt(buffer, '%s%s %s' %(weechat.prefix('error'), script_nick, s))
442 if weechat.config_get_plugin('debug'):
443 import traceback
444 if traceback.sys.exc_type:
445 trace = traceback.format_exc()
446 prnt('', trace)
447
448 def say(s, buffer=''):
449 """normal msg"""
450 prnt_date_tags(buffer, 0, 'no_highlight', '%s\t%s' %(script_nick, s))
451
452
453
454 ### Log files and buffers ###
455 cache_dir = {} # note: don't remove, needed for completion if the script was loaded recently
456 def dir_list(dir, filter_list=(), filter_excludes=True, include_dir=False):
457 """Returns a list of files in 'dir' and its subdirs."""
458 global cache_dir
459 from os import walk
460 from fnmatch import fnmatch
461 #debug('dir_list: listing in %s' %dir)
462 key = (dir, include_dir)
463 try:
464 return cache_dir[key]
465 except KeyError:
466 pass
467
468 filter_list = filter_list or get_config_log_filter()
469 dir_len = len(dir)
470 if filter_list:
471 def filter(file):
472 file = file[dir_len:] # pattern shouldn't match home dir
473 for pattern in filter_list:
474 if fnmatch(file, pattern):
475 return filter_excludes
476 return not filter_excludes
477 else:
478 filter = lambda f : not filter_excludes
479
480 file_list = []
481 extend = file_list.extend
482 join = path.join
483 def walk_path():
484 for basedir, subdirs, files in walk(dir):
485 #if include_dir:
486 # subdirs = map(lambda s : join(s, ''), subdirs)
487 # files.extend(subdirs)
488 files_path = map(lambda f : join(basedir, f), files)
489 files_path = [ file for file in files_path if not filter(file) ]
490 extend(files_path)
491
492 walk_path()
493 cache_dir[key] = file_list
494 #debug('dir_list: got %s' %str(file_list))
495 return file_list
496
497 def get_file_by_pattern(pattern, all=False):
498 """Returns the first log whose path matches 'pattern',
499 if all is True returns all logs that matches."""
500 if not pattern: return []
501 #debug('get_file_by_filename: searching for %s.' %pattern)
502 # do envvar expandsion and check file
503 file = path.expanduser(pattern)
504 file = path.expandvars(file)
505 if path.isfile(file):
506 return [file]
507 # lets see if there's a matching log
508 global home_dir
509 file = path.join(home_dir, pattern)
510 if path.isfile(file):
511 return [file]
512 else:
513 from fnmatch import fnmatch
514 file = []
515 file_list = dir_list(home_dir)
516 n = len(home_dir)
517 for log in file_list:
518 basename = log[n:]
519 if fnmatch(basename, pattern):
520 file.append(log)
521 #debug('get_file_by_filename: got %s.' %file)
522 if not all and file:
523 file.sort()
524 return [ file[-1] ]
525 return file
526
527 def get_file_by_buffer(buffer):
528 """Given buffer pointer, finds log's path or returns None."""
529 #debug('get_file_by_buffer: searching for %s' %buffer)
530 infolist = weechat.infolist_get('logger_buffer', '', '')
531 if not infolist: return
532 try:
533 while weechat.infolist_next(infolist):
534 pointer = weechat.infolist_pointer(infolist, 'buffer')
535 if pointer == buffer:
536 file = weechat.infolist_string(infolist, 'log_filename')
537 if weechat.infolist_integer(infolist, 'log_enabled'):
538 #debug('get_file_by_buffer: got %s' %file)
539 return file
540 #else:
541 # debug('get_file_by_buffer: got %s but log not enabled' %file)
542 finally:
543 #debug('infolist gets freed')
544 weechat.infolist_free(infolist)
545
546 def get_file_by_name(buffer_name):
547 """Given a buffer name, returns its log path or None. buffer_name should be in 'server.#channel'
548 or '#channel' format."""
549 #debug('get_file_by_name: searching for %s' %buffer_name)
550 # common mask options
551 config_masks = ('logger.mask.irc', 'logger.file.mask')
552 # since there's no buffer pointer, we try to replace some local vars in mask, like $channel and
553 # $server, then replace the local vars left with '*', and use it as a mask for get the path with
554 # get_file_by_pattern
555 for config in config_masks:
556 mask = weechat.config_string(weechat.config_get(config))
557 #debug('get_file_by_name: mask: %s' %mask)
558 if '$name' in mask:
559 mask = mask.replace('$name', buffer_name)
560 elif '$channel' in mask or '$server' in mask:
561 if '.' in buffer_name and \
562 '#' not in buffer_name[:buffer_name.find('.')]: # the dot isn't part of the channel name
563 # ^ I'm asuming channel starts with #, i'm lazy.
564 server, channel = buffer_name.split('.', 1)
565 else:
566 server, channel = '*', buffer_name
567 if '$channel' in mask:
568 mask = mask.replace('$channel', channel)
569 if '$server' in mask:
570 mask = mask.replace('$server', server)
571 # change the unreplaced vars by '*'
572 from string import letters
573 if '%' in mask:
574 # vars for time formatting
575 mask = mask.replace('%', '$')
576 if '$' in mask:
577 masks = mask.split('$')
578 masks = map(lambda s: s.lstrip(letters), masks)
579 mask = '*'.join(masks)
580 if mask[0] != '*':
581 mask = '*' + mask
582 #debug('get_file_by_name: using mask %s' %mask)
583 file = get_file_by_pattern(mask)
584 #debug('get_file_by_name: got file %s' %file)
585 if file:
586 return file
587 return None
588
589 def get_buffer_by_name(buffer_name):
590 """Given a buffer name returns its buffer pointer or None."""
591 #debug('get_buffer_by_name: searching for %s' %buffer_name)
592 pointer = weechat.buffer_search('', buffer_name)
593 if not pointer:
594 try:
595 infolist = weechat.infolist_get('buffer', '', '')
596 while weechat.infolist_next(infolist):
597 short_name = weechat.infolist_string(infolist, 'short_name')
598 name = weechat.infolist_string(infolist, 'name')
599 if buffer_name in (short_name, name):
600 #debug('get_buffer_by_name: found %s' %name)
601 pointer = weechat.buffer_search('', name)
602 return pointer
603 finally:
604 weechat.infolist_free(infolist)
605 #debug('get_buffer_by_name: got %s' %pointer)
606 return pointer
607
608 def get_all_buffers():
609 """Returns list with pointers of all open buffers."""
610 buffers = []
611 infolist = weechat.infolist_get('buffer', '', '')
612 while weechat.infolist_next(infolist):
613 buffers.append(weechat.infolist_pointer(infolist, 'pointer'))
614 weechat.infolist_free(infolist)
615 grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
616 if grep_buffer and grep_buffer in buffers:
617 # remove it from list
618 del buffers[buffers.index(grep_buffer)]
619 return buffers
620
621 ### Grep ###
622 def make_regexp(pattern, matchcase=False):
623 """Returns a compiled regexp."""
624 if pattern in ('.', '.*', '.?', '.+'):
625 # because I don't need to use a regexp if we're going to match all lines
626 return None
627 # matching takes a lot more time if pattern starts or ends with .* and it isn't needed.
628 if pattern[:2] == '.*':
629 pattern = pattern[2:]
630 if pattern[-2:] == '.*':
631 pattern = pattern[:-2]
632 try:
633 if not matchcase:
634 regexp = re.compile(pattern, re.IGNORECASE)
635 else:
636 regexp = re.compile(pattern)
637 except Exception, e:
638 raise Exception, 'Bad pattern, %s' %e
639 return regexp
640
641 def check_string(s, regexp, hilight='', exact=False):
642 """Checks 's' with a regexp and returns it if is a match."""
643 if not regexp:
644 return s
645
646 elif exact:
647 matchlist = regexp.findall(s)
648 if matchlist:
649 if isinstance(matchlist[0], tuple):
650 # join tuples (when there's more than one match group in regexp)
651 return [ ' '.join(t) for t in matchlist ]
652 return matchlist
653
654 elif hilight:
655 matchlist = regexp.findall(s)
656 if matchlist:
657 if isinstance(matchlist[0], tuple):
658 # flatten matchlist
659 matchlist = [ item for L in matchlist for item in L if item ]
660 matchlist = list(set(matchlist)) # remove duplicates if any
661 # apply hilight
662 color_hilight, color_reset = hilight.split(',', 1)
663 for m in matchlist:
664 s = s.replace(m, '%s%s%s' % (color_hilight, m, color_reset))
665 return s
666
667 # no need for findall() here
668 elif regexp.search(s):
669 return s
670
671 def grep_file(file, head, tail, after_context, before_context, count, regexp, hilight, exact, invert):
672 """Return a list of lines that match 'regexp' in 'file', if no regexp returns all lines."""
673 if count:
674 tail = head = after_context = before_context = False
675 hilight = ''
676 elif exact:
677 before_context = after_context = False
678 hilight = ''
679 elif invert:
680 hilight = ''
681 #debug(' '.join(map(str, (file, head, tail, after_context, before_context))))
682
683 lines = linesList()
684 # define these locally as it makes the loop run slightly faster
685 append = lines.append
686 count_match = lines.count_match
687 separator = lines.append_separator
688 if invert:
689 def check(s):
690 if check_string(s, regexp, hilight, exact):
691 return None
692 else:
693 return s
694 else:
695 check = lambda s: check_string(s, regexp, hilight, exact)
696
697 try:
698 file_object = open(file, 'r')
699 except IOError:
700 # file doesn't exist
701 return lines
702 if tail or before_context:
703 # for these options, I need to seek in the file, but is slower and uses a good deal of
704 # memory if the log is too big, so we do this *only* for these options.
705 file_lines = file_object.readlines()
706
707 if tail:
708 # instead of searching in the whole file and later pick the last few lines, we
709 # reverse the log, search until count reached and reverse it again, that way is a lot
710 # faster
711 file_lines.reverse()
712 # don't invert context switches
713 before_context, after_context = after_context, before_context
714
715 if before_context:
716 before_context_range = range(1, before_context + 1)
717 before_context_range.reverse()
718
719 limit = tail or head
720
721 line_idx = 0
722 while line_idx < len(file_lines):
723 line = file_lines[line_idx]
724 line = check(line)
725 if line:
726 if before_context:
727 separator()
728 trimmed = False
729 for id in before_context_range:
730 try:
731 context_line = file_lines[line_idx - id]
732 if check(context_line):
733 # match in before context, that means we appended these same lines in a
734 # previous match, so we delete them merging both paragraphs
735 if not trimmed:
736 del lines[id - before_context - 1:]
737 trimmed = True
738 else:
739 append(context_line)
740 except IndexError:
741 pass
742 append(line)
743 count_match(line)
744 if after_context:
745 id, offset = 0, 0
746 while id < after_context + offset:
747 id += 1
748 try:
749 context_line = file_lines[line_idx + id]
750 _context_line = check(context_line)
751 if _context_line:
752 offset = id
753 context_line = _context_line # so match is hilighted with --hilight
754 count_match()
755 append(context_line)
756 except IndexError:
757 pass
758 separator()
759 line_idx += id
760 if limit and lines.matches_count >= limit:
761 break
762 line_idx += 1
763
764 if tail:
765 lines.reverse()
766 else:
767 # do a normal grep
768 limit = head
769
770 for line in file_object:
771 line = check(line)
772 if line:
773 count or append(line)
774 count_match(line)
775 if after_context:
776 id, offset = 0, 0
777 while id < after_context + offset:
778 id += 1
779 try:
780 context_line = file_object.next()
781 _context_line = check(context_line)
782 if _context_line:
783 offset = id
784 context_line = _context_line
785 count_match()
786 count or append(context_line)
787 except StopIteration:
788 pass
789 separator()
790 if limit and lines.matches_count >= limit:
791 break
792
793 file_object.close()
794 return lines
795
796 def grep_buffer(buffer, head, tail, after_context, before_context, count, regexp, hilight, exact,
797 invert):
798 """Return a list of lines that match 'regexp' in 'buffer', if no regexp returns all lines."""
799 lines = linesList()
800 if count:
801 tail = head = after_context = before_context = False
802 hilight = ''
803 elif exact:
804 before_context = after_context = False
805 #debug(' '.join(map(str, (tail, head, after_context, before_context, count, exact, hilight))))
806
807 # Using /grep in grep's buffer can lead to some funny effects
808 # We should take measures if that's the case
809 def make_get_line_funcion():
810 """Returns a function for get lines from the infolist, depending if the buffer is grep's or
811 not."""
812 string_remove_color = weechat.string_remove_color
813 infolist_string = weechat.infolist_string
814 grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
815 if grep_buffer and buffer == grep_buffer:
816 def function(infolist):
817 prefix = infolist_string(infolist, 'prefix')
818 message = infolist_string(infolist, 'message')
819 if prefix: # only our messages have prefix, ignore it
820 return None
821 return message
822 else:
823 infolist_time = weechat.infolist_time
824 def function(infolist):
825 prefix = string_remove_color(infolist_string(infolist, 'prefix'), '')
826 message = string_remove_color(infolist_string(infolist, 'message'), '')
827 date = infolist_time(infolist, 'date')
828 return '%s\t%s\t%s' %(date, prefix, message)
829 return function
830 get_line = make_get_line_funcion()
831
832 infolist = weechat.infolist_get('buffer_lines', buffer, '')
833 if tail:
834 # like with grep_file() if we need the last few matching lines, we move the cursor to
835 # the end and search backwards
836 infolist_next = weechat.infolist_prev
837 infolist_prev = weechat.infolist_next
838 else:
839 infolist_next = weechat.infolist_next
840 infolist_prev = weechat.infolist_prev
841 limit = head or tail
842
843 # define these locally as it makes the loop run slightly faster
844 append = lines.append
845 count_match = lines.count_match
846 separator = lines.append_separator
847 if invert:
848 def check(s):
849 if check_string(s, regexp, hilight, exact):
850 return None
851 else:
852 return s
853 else:
854 check = lambda s: check_string(s, regexp, hilight, exact)
855
856 if before_context:
857 before_context_range = range(1, before_context + 1)
858 before_context_range.reverse()
859
860 while infolist_next(infolist):
861 line = get_line(infolist)
862 if line is None: continue
863 line = check(line)
864 if line:
865 if before_context:
866 separator()
867 trimmed = False
868 for id in before_context_range:
869 if not infolist_prev(infolist):
870 trimmed = True
871 for id in before_context_range:
872 context_line = get_line(infolist)
873 if check(context_line):
874 if not trimmed:
875 del lines[id - before_context - 1:]
876 trimmed = True
877 else:
878 append(context_line)
879 infolist_next(infolist)
880 count or append(line)
881 count_match(line)
882 if after_context:
883 id, offset = 0, 0
884 while id < after_context + offset:
885 id += 1
886 if infolist_next(infolist):
887 context_line = get_line(infolist)
888 _context_line = check(context_line)
889 if _context_line:
890 context_line = _context_line
891 offset = id
892 count_match()
893 append(context_line)
894 else:
895 # in the main loop infolist_next will start again an cause an infinite loop
896 # this will avoid it
897 infolist_next = lambda x: 0
898 separator()
899 if limit and lines.matches_count >= limit:
900 break
901 weechat.infolist_free(infolist)
902
903 if tail:
904 lines.reverse()
905 return lines
906
907 ### this is our main grep function
908 hook_file_grep = None
909 def show_matching_lines():
910 """
911 Greps buffers in search_in_buffers or files in search_in_files and updates grep buffer with the
912 result.
913 """
914 global pattern, matchcase, number, count, exact, hilight, invert
915 global tail, head, after_context, before_context
916 global search_in_files, search_in_buffers, matched_lines, home_dir
917 global time_start
918 matched_lines = linesDict()
919 #debug('buffers:%s \nlogs:%s' %(search_in_buffers, search_in_files))
920 time_start = now()
921
922 # buffers
923 if search_in_buffers:
924 regexp = make_regexp(pattern, matchcase)
925 for buffer in search_in_buffers:
926 buffer_name = weechat.buffer_get_string(buffer, 'name')
927 matched_lines[buffer_name] = grep_buffer(buffer, head, tail, after_context,
928 before_context, count, regexp, hilight, exact, invert)
929
930 # logs
931 if search_in_files:
932 size_limit = get_config_int('size_limit', allow_empty_string=True)
933 background = False
934 if size_limit or size_limit == 0:
935 size = sum(map(get_size, search_in_files))
936 if size > size_limit * 1024:
937 background = True
938 elif size_limit == '':
939 background = False
940
941 if not background:
942 # run grep normally
943 regexp = make_regexp(pattern, matchcase)
944 for log in search_in_files:
945 log_name = strip_home(log)
946 matched_lines[log_name] = grep_file(log, head, tail, after_context, before_context,
947 count, regexp, hilight, exact, invert)
948 buffer_update()
949 else:
950 # we hook a process so grepping runs in background.
951 #debug('on background')
952 global hook_file_grep, script_path, bytecode
953 timeout = 1000*60*5 # 5 min
954
955 quotify = lambda s: '"%s"' %s
956 files_string = ', '.join(map(quotify, search_in_files))
957
958 global tmpFile
959 # we keep the file descriptor as a global var so it isn't deleted until next grep
960 tmpFile = tempfile.NamedTemporaryFile(prefix=SCRIPT_NAME,
961 dir=weechat.info_get('weechat_dir', ''))
962 cmd = grep_process_cmd %dict(logs=files_string, head=head, pattern=pattern, tail=tail,
963 hilight=hilight, after_context=after_context, before_context=before_context,
964 exact=exact, matchcase=matchcase, home_dir=home_dir, script_path=script_path,
965 count=count, invert=invert, bytecode=bytecode, filename=tmpFile.name,
966 python=weechat.info_get('python2_bin', '') or 'python')
967
968 #debug(cmd)
969 hook_file_grep = weechat.hook_process(cmd, timeout, 'grep_file_callback', tmpFile.name)
970 global pattern_tmpl
971 if hook_file_grep:
972 buffer_create("Searching for '%s' in %s worth of data..." %(pattern_tmpl,
973 human_readable_size(size)))
974 else:
975 buffer_update()
976
977 # defined here for commodity
978 grep_process_cmd = """%(python)s -%(bytecode)sc '
979 import sys, cPickle, os
980 sys.path.append("%(script_path)s") # add WeeChat script dir so we can import grep
981 from grep import make_regexp, grep_file, strip_home
982 logs = (%(logs)s, )
983 try:
984 regexp = make_regexp("%(pattern)s", %(matchcase)s)
985 d = {}
986 for log in logs:
987 log_name = strip_home(log, "%(home_dir)s")
988 lines = grep_file(log, %(head)s, %(tail)s, %(after_context)s, %(before_context)s,
989 %(count)s, regexp, "%(hilight)s", %(exact)s, %(invert)s)
990 d[log_name] = lines
991 fd = open("%(filename)s", "wb")
992 cPickle.dump(d, fd, -1)
993 fd.close()
994 except Exception, e:
995 print >> sys.stderr, e'
996 """
997
998 grep_stdout = grep_stderr = ''
999 def grep_file_callback(filename, command, rc, stdout, stderr):
1000 global hook_file_grep, grep_stderr, grep_stdout
1001 global matched_lines
1002 #debug("rc: %s\nstderr: %s\nstdout: %s" %(rc, repr(stderr), repr(stdout)))
1003 if stdout:
1004 grep_stdout += stdout
1005 if stderr:
1006 grep_stderr += stderr
1007 if int(rc) >= 0:
1008
1009 def set_buffer_error():
1010 grep_buffer = buffer_create()
1011 title = weechat.buffer_get_string(grep_buffer, 'title')
1012 title = title + ' %serror' %color_title
1013 weechat.buffer_set(grep_buffer, 'title', title)
1014
1015 try:
1016 if grep_stderr:
1017 error(grep_stderr)
1018 set_buffer_error()
1019 #elif grep_stdout:
1020 #debug(grep_stdout)
1021 elif path.exists(filename):
1022 import cPickle
1023 try:
1024 #debug(file)
1025 fd = open(filename, 'rb')
1026 d = cPickle.load(fd)
1027 matched_lines.update(d)
1028 fd.close()
1029 except Exception, e:
1030 error(e)
1031 set_buffer_error()
1032 else:
1033 buffer_update()
1034 global tmpFile
1035 tmpFile = None
1036 finally:
1037 grep_stdout = grep_stderr = ''
1038 hook_file_grep = None
1039 return WEECHAT_RC_OK
1040
1041 def get_grep_file_status():
1042 global search_in_files, matched_lines, time_start
1043 elapsed = now() - time_start
1044 if len(search_in_files) == 1:
1045 log = '%s (%s)' %(strip_home(search_in_files[0]),
1046 human_readable_size(get_size(search_in_files[0])))
1047 else:
1048 size = sum(map(get_size, search_in_files))
1049 log = '%s log files (%s)' %(len(search_in_files), human_readable_size(size))
1050 return 'Searching in %s, running for %.4f seconds. Interrupt it with "/grep stop" or "stop"' \
1051 ' in grep buffer.' %(log, elapsed)
1052
1053 ### Grep buffer ###
1054 def buffer_update():
1055 """Updates our buffer with new lines."""
1056 global pattern_tmpl, matched_lines, pattern, count, hilight, invert, exact
1057 time_grep = now()
1058
1059 buffer = buffer_create()
1060 if get_config_boolean('clear_buffer'):
1061 weechat.buffer_clear(buffer)
1062 matched_lines.strip_separator() # remove first and last separators of each list
1063 len_total_lines = len(matched_lines)
1064 max_lines = get_config_int('max_lines')
1065 if not count and len_total_lines > max_lines:
1066 weechat.buffer_clear(buffer)
1067
1068 def _make_summary(log, lines, note):
1069 return '%s matches "%s%s%s"%s in %s%s%s%s' \
1070 %(lines.matches_count, color_summary, pattern_tmpl, color_info,
1071 invert and ' (inverted)' or '',
1072 color_summary, log, color_reset, note)
1073
1074 if count:
1075 make_summary = lambda log, lines : _make_summary(log, lines, ' (not shown)')
1076 else:
1077 def make_summary(log, lines):
1078 if lines.stripped_lines:
1079 if lines:
1080 note = ' (last %s lines shown)' %len(lines)
1081 else:
1082 note = ' (not shown)'
1083 else:
1084 note = ''
1085 return _make_summary(log, lines, note)
1086
1087 global weechat_format
1088 if hilight:
1089 # we don't want colors if there's match highlighting
1090 format_line = lambda s : '%s %s %s' %split_line(s)
1091 else:
1092 def format_line(s):
1093 global nick_dict, weechat_format
1094 date, nick, msg = split_line(s)
1095 if weechat_format:
1096 try:
1097 nick = nick_dict[nick]
1098 except KeyError:
1099 # cache nick
1100 nick_c = color_nick(nick)
1101 nick_dict[nick] = nick_c
1102 nick = nick_c
1103 return '%s%s %s%s %s' %(color_date, date, nick, color_reset, msg)
1104 else:
1105 #no formatting
1106 return msg
1107
1108 prnt(buffer, '\n')
1109 print_line('Search for "%s%s%s"%s in %s%s%s.' %(color_summary, pattern_tmpl, color_info,
1110 invert and ' (inverted)' or '', color_summary, matched_lines, color_reset),
1111 buffer)
1112 # print last <max_lines> lines
1113 if matched_lines.get_matches_count():
1114 if count:
1115 # with count we sort by matches lines instead of just lines.
1116 matched_lines_items = matched_lines.items_count()
1117 else:
1118 matched_lines_items = matched_lines.items()
1119
1120 matched_lines.get_last_lines(max_lines)
1121 for log, lines in matched_lines_items:
1122 if lines.matches_count:
1123 # matched lines
1124 if not count:
1125 # print lines
1126 weechat_format = True
1127 if exact:
1128 lines.onlyUniq()
1129 for line in lines:
1130 #debug(repr(line))
1131 if line == linesList._sep:
1132 # separator
1133 prnt(buffer, context_sep)
1134 else:
1135 if '\x00' in line:
1136 # log was corrupted
1137 error("Found garbage in log '%s', maybe it's corrupted" %log)
1138 line = line.replace('\x00', '')
1139 prnt_date_tags(buffer, 0, 'no_highlight', format_line(line))
1140
1141 # summary
1142 if count or get_config_boolean('show_summary'):
1143 summary = make_summary(log, lines)
1144 print_line(summary, buffer)
1145
1146 # separator
1147 if not count and lines:
1148 prnt(buffer, '\n')
1149 else:
1150 print_line('No matches found.', buffer)
1151
1152 # set title
1153 global time_start
1154 time_end = now()
1155 # total time
1156 time_total = time_end - time_start
1157 # percent of the total time used for grepping
1158 time_grep_pct = (time_grep - time_start)/time_total*100
1159 #debug('time: %.4f seconds (%.2f%%)' %(time_total, time_grep_pct))
1160 if not count and len_total_lines > max_lines:
1161 note = ' (last %s lines shown)' %len(matched_lines)
1162 else:
1163 note = ''
1164 title = "'q': close buffer | Search in %s%s%s %s matches%s | pattern \"%s%s%s\"%s %s | %.4f seconds (%.2f%%)" \
1165 %(color_title, matched_lines, color_reset, matched_lines.get_matches_count(), note,
1166 color_title, pattern_tmpl, color_reset, invert and ' (inverted)' or '', format_options(),
1167 time_total, time_grep_pct)
1168 weechat.buffer_set(buffer, 'title', title)
1169
1170 if get_config_boolean('go_to_buffer'):
1171 weechat.buffer_set(buffer, 'display', '1')
1172
1173 # free matched_lines so it can be removed from memory
1174 del matched_lines
1175
1176 def split_line(s):
1177 """Splits log's line 's' in 3 parts, date, nick and msg."""
1178 global weechat_format
1179 if weechat_format and s.count('\t') >= 2:
1180 date, nick, msg = s.split('\t', 2) # date, nick, message
1181 else:
1182 # looks like log isn't in weechat's format
1183 weechat_format = False # incoming lines won't be formatted
1184 date, nick, msg = '', '', s
1185 # remove tabs
1186 if '\t' in msg:
1187 msg = msg.replace('\t', ' ')
1188 return date, nick, msg
1189
1190 def print_line(s, buffer=None, display=False):
1191 """Prints 's' in script's buffer as 'script_nick'. For displaying search summaries."""
1192 if buffer is None:
1193 buffer = buffer_create()
1194 say('%s%s' %(color_info, s), buffer)
1195 if display and get_config_boolean('go_to_buffer'):
1196 weechat.buffer_set(buffer, 'display', '1')
1197
1198 def format_options():
1199 global matchcase, number, count, exact, hilight, invert
1200 global tail, head, after_context, before_context
1201 options = []
1202 append = options.append
1203 insert = options.insert
1204 chars = 'cHmov'
1205 for i, flag in enumerate((count, hilight, matchcase, exact, invert)):
1206 if flag:
1207 append(chars[i])
1208
1209 if head or tail:
1210 n = get_config_int('default_tail_head')
1211 if head:
1212 append('h')
1213 if head != n:
1214 insert(-1, ' -')
1215 append('n')
1216 append(head)
1217 elif tail:
1218 append('t')
1219 if tail != n:
1220 insert(-1, ' -')
1221 append('n')
1222 append(tail)
1223
1224 if before_context and after_context and (before_context == after_context):
1225 append(' -C')
1226 append(before_context)
1227 else:
1228 if before_context:
1229 append(' -B')
1230 append(before_context)
1231 if after_context:
1232 append(' -A')
1233 append(after_context)
1234
1235 s = ''.join(map(str, options)).strip()
1236 if s and s[0] != '-':
1237 s = '-' + s
1238 return s
1239
1240 def buffer_create(title=None):
1241 """Returns our buffer pointer, creates and cleans the buffer if needed."""
1242 buffer = weechat.buffer_search('python', SCRIPT_NAME)
1243 if not buffer:
1244 buffer = weechat.buffer_new(SCRIPT_NAME, 'buffer_input', '', '', '')
1245 weechat.buffer_set(buffer, 'time_for_each_line', '0')
1246 weechat.buffer_set(buffer, 'nicklist', '0')
1247 weechat.buffer_set(buffer, 'title', title or 'grep output buffer')
1248 weechat.buffer_set(buffer, 'localvar_set_no_log', '1')
1249 elif title:
1250 weechat.buffer_set(buffer, 'title', title)
1251 return buffer
1252
1253 def buffer_input(data, buffer, input_data):
1254 """Repeats last search with 'input_data' as regexp."""
1255 try:
1256 cmd_grep_stop(buffer, input_data)
1257 except:
1258 return WEECHAT_RC_OK
1259 if input_data in ('q', 'Q'):
1260 weechat.buffer_close(buffer)
1261 return weechat.WEECHAT_RC_OK
1262
1263 global search_in_buffers, search_in_files
1264 global pattern
1265 try:
1266 if pattern and (search_in_files or search_in_buffers):
1267 # check if the buffer pointers are still valid
1268 for pointer in search_in_buffers:
1269 infolist = weechat.infolist_get('buffer', pointer, '')
1270 if not infolist:
1271 del search_in_buffers[search_in_buffers.index(pointer)]
1272 weechat.infolist_free(infolist)
1273 try:
1274 cmd_grep_parsing(input_data)
1275 except Exception, e:
1276 error('Argument error, %s' %e, buffer=buffer)
1277 return WEECHAT_RC_OK
1278 try:
1279 show_matching_lines()
1280 except Exception, e:
1281 error(e)
1282 except NameError:
1283 error("There isn't any previous search to repeat.", buffer=buffer)
1284 return WEECHAT_RC_OK
1285
1286 ### Commands ###
1287 def cmd_init():
1288 """Resets global vars."""
1289 global home_dir, cache_dir, nick_dict
1290 global pattern_tmpl, pattern, matchcase, number, count, exact, hilight, invert
1291 global tail, head, after_context, before_context
1292 hilight = ''
1293 head = tail = after_context = before_context = invert = False
1294 matchcase = count = exact = False
1295 pattern_tmpl = pattern = number = None
1296 home_dir = get_home()
1297 cache_dir = {} # for avoid walking the dir tree more than once per command
1298 nick_dict = {} # nick cache for don't calculate nick color every time
1299
1300 def cmd_grep_parsing(args):
1301 """Parses args for /grep and grep input buffer."""
1302 global pattern_tmpl, pattern, matchcase, number, count, exact, hilight, invert
1303 global tail, head, after_context, before_context
1304 global log_name, buffer_name, only_buffers, all
1305 opts, args = getopt.gnu_getopt(args.split(), 'cmHeahtivn:bA:B:C:o', ['count', 'matchcase', 'hilight',
1306 'exact', 'all', 'head', 'tail', 'number=', 'buffer', 'after-context=', 'before-context=',
1307 'context=', 'invert', 'only-match'])
1308 #debug(opts, 'opts: '); debug(args, 'args: ')
1309 if len(args) >= 2:
1310 if args[0] == 'log':
1311 del args[0]
1312 log_name = args.pop(0)
1313 elif args[0] == 'buffer':
1314 del args[0]
1315 buffer_name = args.pop(0)
1316
1317 def tmplReplacer(match):
1318 """This function will replace templates with regexps"""
1319 s = match.groups()[0]
1320 tmpl_args = s.split()
1321 tmpl_key, _, tmpl_args = s.partition(' ')
1322 try:
1323 template = templates[tmpl_key]
1324 if callable(template):
1325 r = template(tmpl_args)
1326 if not r:
1327 error("Template %s returned empty string "\
1328 "(WeeChat doesn't have enough data)." %t)
1329 return r
1330 else:
1331 return template
1332 except:
1333 return t
1334
1335 args = ' '.join(args) # join pattern for keep spaces
1336 if args:
1337 pattern_tmpl = args
1338 pattern = _tmplRe.sub(tmplReplacer, args)
1339 debug('Using regexp: %s', pattern)
1340 if not pattern:
1341 raise Exception, 'No pattern for grep the logs.'
1342
1343 def positive_number(opt, val):
1344 try:
1345 number = int(val)
1346 if number < 0:
1347 raise ValueError
1348 return number
1349 except ValueError:
1350 if len(opt) == 1:
1351 opt = '-' + opt
1352 else:
1353 opt = '--' + opt
1354 raise Exception, "argument for %s must be a positive integer." %opt
1355
1356 for opt, val in opts:
1357 opt = opt.strip('-')
1358 if opt in ('c', 'count'):
1359 count = not count
1360 elif opt in ('m', 'matchcase'):
1361 matchcase = not matchcase
1362 elif opt in ('H', 'hilight'):
1363 # hilight must be always a string!
1364 if hilight:
1365 hilight = ''
1366 else:
1367 hilight = '%s,%s' %(color_hilight, color_reset)
1368 # we pass the colors in the variable itself because check_string() must not use
1369 # weechat's module when applying the colors (this is for grep in a hooked process)
1370 elif opt in ('e', 'exact', 'o', 'only-match'):
1371 exact = not exact
1372 invert = False
1373 elif opt in ('a', 'all'):
1374 all = not all
1375 elif opt in ('h', 'head'):
1376 head = not head
1377 tail = False
1378 elif opt in ('t', 'tail'):
1379 tail = not tail
1380 head = False
1381 elif opt in ('b', 'buffer'):
1382 only_buffers = True
1383 elif opt in ('n', 'number'):
1384 number = positive_number(opt, val)
1385 elif opt in ('C', 'context'):
1386 n = positive_number(opt, val)
1387 after_context = n
1388 before_context = n
1389 elif opt in ('A', 'after-context'):
1390 after_context = positive_number(opt, val)
1391 elif opt in ('B', 'before-context'):
1392 before_context = positive_number(opt, val)
1393 elif opt in ('i', 'v', 'invert'):
1394 invert = not invert
1395 exact = False
1396 # number check
1397 if number is not None:
1398 if number == 0:
1399 head = tail = False
1400 number = None
1401 elif head:
1402 head = number
1403 elif tail:
1404 tail = number
1405 else:
1406 n = get_config_int('default_tail_head')
1407 if head:
1408 head = n
1409 elif tail:
1410 tail = n
1411
1412 def cmd_grep_stop(buffer, args):
1413 global hook_file_grep, pattern, matched_lines, tmpFile
1414 if hook_file_grep:
1415 if args == 'stop':
1416 weechat.unhook(hook_file_grep)
1417 hook_file_grep = None
1418 s = 'Search for \'%s\' stopped.' %pattern
1419 say(s, buffer)
1420 grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
1421 if grep_buffer:
1422 weechat.buffer_set(grep_buffer, 'title', s)
1423 del matched_lines
1424 tmpFile = None
1425 else:
1426 say(get_grep_file_status(), buffer)
1427 raise Exception
1428
1429 def cmd_grep(data, buffer, args):
1430 """Search in buffers and logs."""
1431 global pattern, matchcase, head, tail, number, count, exact, hilight
1432 try:
1433 cmd_grep_stop(buffer, args)
1434 except:
1435 return WEECHAT_RC_OK
1436
1437 if not args:
1438 weechat.command('', '/help %s' %SCRIPT_COMMAND)
1439 return WEECHAT_RC_OK
1440
1441 cmd_init()
1442 global log_name, buffer_name, only_buffers, all
1443 log_name = buffer_name = ''
1444 only_buffers = all = False
1445
1446 # parse
1447 try:
1448 cmd_grep_parsing(args)
1449 except Exception, e:
1450 error('Argument error, %s' %e)
1451 return WEECHAT_RC_OK
1452
1453 # find logs
1454 log_file = search_buffer = None
1455 if log_name:
1456 log_file = get_file_by_pattern(log_name, all)
1457 if not log_file:
1458 error("Couldn't find any log for %s. Try /logs" %log_name)
1459 return WEECHAT_RC_OK
1460 elif all:
1461 search_buffer = get_all_buffers()
1462 elif buffer_name:
1463 search_buffer = get_buffer_by_name(buffer_name)
1464 if not search_buffer:
1465 # there's no buffer, try in the logs
1466 log_file = get_file_by_name(buffer_name)
1467 if not log_file:
1468 error("Logs or buffer for '%s' not found." %buffer_name)
1469 return WEECHAT_RC_OK
1470 else:
1471 search_buffer = [search_buffer]
1472 else:
1473 search_buffer = [buffer]
1474
1475 # make the log list
1476 global search_in_files, search_in_buffers
1477 search_in_files = []
1478 search_in_buffers = []
1479 if log_file:
1480 search_in_files = log_file
1481 elif not only_buffers:
1482 #debug(search_buffer)
1483 for pointer in search_buffer:
1484 log = get_file_by_buffer(pointer)
1485 #debug('buffer %s log %s' %(pointer, log))
1486 if log:
1487 search_in_files.append(log)
1488 else:
1489 search_in_buffers.append(pointer)
1490 else:
1491 search_in_buffers = search_buffer
1492
1493 # grepping
1494 try:
1495 show_matching_lines()
1496 except Exception, e:
1497 error(e)
1498 return WEECHAT_RC_OK
1499
1500 def cmd_logs(data, buffer, args):
1501 """List files in Weechat's log dir."""
1502 cmd_init()
1503 global home_dir
1504 sort_by_size = False
1505 filter = []
1506
1507 try:
1508 opts, args = getopt.gnu_getopt(args.split(), 's', ['size'])
1509 if args:
1510 filter = args
1511 for opt, var in opts:
1512 opt = opt.strip('-')
1513 if opt in ('size', 's'):
1514 sort_by_size = True
1515 except Exception, e:
1516 error('Argument error, %s' %e)
1517 return WEECHAT_RC_OK
1518
1519 # is there's a filter, filter_excludes should be False
1520 file_list = dir_list(home_dir, filter, filter_excludes=not filter)
1521 if sort_by_size:
1522 file_list.sort(key=get_size)
1523 else:
1524 file_list.sort()
1525
1526 file_sizes = map(lambda x: human_readable_size(get_size(x)), file_list)
1527 # calculate column lenght
1528 if file_list:
1529 L = file_list[:]
1530 L.sort(key=len)
1531 bigest = L[-1]
1532 column_len = len(bigest) + 3
1533 else:
1534 column_len = ''
1535
1536 buffer = buffer_create()
1537 if get_config_boolean('clear_buffer'):
1538 weechat.buffer_clear(buffer)
1539 file_list = zip(file_list, file_sizes)
1540 msg = 'Found %s logs.' %len(file_list)
1541
1542 print_line(msg, buffer, display=True)
1543 for file, size in file_list:
1544 separator = column_len and '.'*(column_len - len(file))
1545 prnt(buffer, '%s %s %s' %(strip_home(file), separator, size))
1546 if file_list:
1547 print_line(msg, buffer)
1548 return WEECHAT_RC_OK
1549
1550
1551 ### Completion ###
1552 def completion_log_files(data, completion_item, buffer, completion):
1553 #debug('completion: %s' %', '.join((data, completion_item, buffer, completion)))
1554 global home_dir
1555 l = len(home_dir)
1556 completion_list_add = weechat.hook_completion_list_add
1557 WEECHAT_LIST_POS_END = weechat.WEECHAT_LIST_POS_END
1558 for log in dir_list(home_dir):
1559 completion_list_add(completion, log[l:], 0, WEECHAT_LIST_POS_END)
1560 return WEECHAT_RC_OK
1561
1562 def completion_grep_args(data, completion_item, buffer, completion):
1563 for arg in ('count', 'all', 'matchcase', 'hilight', 'exact', 'head', 'tail', 'number', 'buffer',
1564 'after-context', 'before-context', 'context', 'invert', 'only-match'):
1565 weechat.hook_completion_list_add(completion, '--' + arg, 0, weechat.WEECHAT_LIST_POS_SORT)
1566 for tmpl in templates:
1567 weechat.hook_completion_list_add(completion, '%{' + tmpl, 0, weechat.WEECHAT_LIST_POS_SORT)
1568 return WEECHAT_RC_OK
1569
1570
1571 ### Templates ###
1572 # template placeholder
1573 _tmplRe = re.compile(r'%\{(\w+.*?)(?:\}|$)')
1574 # will match 999.999.999.999 but I don't care
1575 ipAddress = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
1576 domain = r'[\w-]{2,}(?:\.[\w-]{2,})*\.[a-z]{2,}'
1577 url = r'\w+://(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?' % (domain, ipAddress)
1578
1579 def make_url_regexp(args):
1580 #debug('make url: %s', args)
1581 if args:
1582 words = r'(?:%s)' %'|'.join(map(re.escape, args.split()))
1583 return r'(?:\w+://|www\.)[^\s]*%s[^\s]*(?:/[^\])>\s]*)?' %words
1584 else:
1585 return url
1586
1587 def make_simple_regexp(pattern):
1588 s = ''
1589 for c in pattern:
1590 if c == '*':
1591 s += '.*'
1592 elif c == '?':
1593 s += '.'
1594 else:
1595 s += re.escape(c)
1596 return s
1597
1598 templates = {
1599 'ip': ipAddress,
1600 'url': make_url_regexp,
1601 'escape': lambda s: re.escape(s),
1602 'simple': make_simple_regexp,
1603 'domain': domain,
1604 }
1605
1606 ### Main ###
1607 def delete_bytecode():
1608 global script_path
1609 bytecode = path.join(script_path, SCRIPT_NAME + '.pyc')
1610 if path.isfile(bytecode):
1611 os.remove(bytecode)
1612 return WEECHAT_RC_OK
1613
1614 if __name__ == '__main__' and import_ok and \
1615 weechat.register(SCRIPT_NAME, SCRIPT_AUTHOR, SCRIPT_VERSION, SCRIPT_LICENSE, \
1616 SCRIPT_DESC, 'delete_bytecode', ''):
1617 home_dir = get_home()
1618
1619 # for import ourselves
1620 global script_path
1621 script_path = path.dirname(__file__)
1622 sys.path.append(script_path)
1623 delete_bytecode()
1624
1625 # check python version
1626 import sys
1627 global bytecode
1628 if sys.version_info > (2, 6):
1629 bytecode = 'B'
1630 else:
1631 bytecode = ''
1632
1633
1634 weechat.hook_command(SCRIPT_COMMAND, cmd_grep.__doc__,
1635 "[log <file> | buffer <name> | stop] [-a|--all] [-b|--buffer] [-c|--count] [-m|--matchcase] "
1636 "[-H|--hilight] [-o|--only-match] [-i|-v|--invert] [(-h|--head)|(-t|--tail) [-n|--number <n>]] "
1637 "[-A|--after-context <n>] [-B|--before-context <n>] [-C|--context <n> ] <expression>",
1638 # help
1639 """
1640 log <file>: Search in one log that matches <file> in the logger path.
1641 Use '*' and '?' as wildcards.
1642 buffer <name>: Search in buffer <name>, if there's no buffer with <name> it will
1643 try to search for a log file.
1644 stop: Stops a currently running search.
1645 -a --all: Search in all open buffers.
1646 If used with 'log <file>' search in all logs that matches <file>.
1647 -b --buffer: Search only in buffers, not in file logs.
1648 -c --count: Just count the number of matched lines instead of showing them.
1649 -m --matchcase: Don't do case insensitive search.
1650 -H --hilight: Colour exact matches in output buffer.
1651 -o --only-match: Print only the matching part of the line (unique matches).
1652 -v -i --invert: Print lines that don't match the regular expression.
1653 -t --tail: Print the last 10 matching lines.
1654 -h --head: Print the first 10 matching lines.
1655 -n --number <n>: Overrides default number of lines for --tail or --head.
1656 -A --after-context <n>: Shows <n> lines of trailing context after matching lines.
1657 -B --before-context <n>: Shows <n> lines of leading context before matching lines.
1658 -C --context <n>: Same as using both --after-context and --before-context simultaneously.
1659 <expression>: Expression to search.
1660
1661 Grep buffer:
1662 Input line accepts most arguments of /grep, it'll repeat last search using the new
1663 arguments provided. You can't search in different logs from the buffer's input.
1664 Boolean arguments like --count, --tail, --head, --hilight, ... are toggleable
1665
1666 Python regular expression syntax:
1667 See http://docs.python.org/lib/re-syntax.html
1668
1669 Grep Templates:
1670 %{url [text]}: Matches anything like an url, or an url with text.
1671 %{ip}: Matches anything that looks like an ip.
1672 %{domain}: Matches anything like a domain.
1673 %{escape text}: Escapes text in pattern.
1674 %{simple pattern}: Converts a pattern with '*' and '?' wildcards into a regexp.
1675
1676 Examples:
1677 Search for urls with the word 'weechat' said by 'nick'
1678 /grep nick\\t.*%{url weechat}
1679 Search for '*.*' string
1680 /grep %{escape *.*}
1681 """,
1682 # completion template
1683 "buffer %(buffers_names) %(grep_arguments)|%*"
1684 "||log %(grep_log_files) %(grep_arguments)|%*"
1685 "||stop"
1686 "||%(grep_arguments)|%*",
1687 'cmd_grep' ,'')
1688 weechat.hook_command('logs', cmd_logs.__doc__, "[-s|--size] [<filter>]",
1689 "-s --size: Sort logs by size.\n"
1690 " <filter>: Only show logs that match <filter>. Use '*' and '?' as wildcards.", '--size', 'cmd_logs', '')
1691
1692 weechat.hook_completion('grep_log_files', "list of log files",
1693 'completion_log_files', '')
1694 weechat.hook_completion('grep_arguments', "list of arguments",
1695 'completion_grep_args', '')
1696
1697 # settings
1698 for opt, val in settings.iteritems():
1699 if not weechat.config_is_set_plugin(opt):
1700 weechat.config_set_plugin(opt, val)
1701
1702 # colors
1703 color_date = weechat.color('brown')
1704 color_info = weechat.color('cyan')
1705 color_hilight = weechat.color('lightred')
1706 color_reset = weechat.color('reset')
1707 color_title = weechat.color('yellow')
1708 color_summary = weechat.color('lightcyan')
1709 color_delimiter = weechat.color('chat_delimiters')
1710 color_script_nick = weechat.color('chat_nick')
1711
1712 # pretty [grep]
1713 script_nick = '%s[%s%s%s]%s' %(color_delimiter, color_script_nick, SCRIPT_NAME, color_delimiter,
1714 color_reset)
1715 script_nick_nocolor = '[%s]' %SCRIPT_NAME
1716 # paragraph separator when using context options
1717 context_sep = '%s\t%s--' %(script_nick, color_info)
1718
1719 # -------------------------------------------------------------------------
1720 # Debug
1721
1722 if weechat.config_get_plugin('debug'):
1723 try:
1724 # custom debug module I use, allows me to inspect script's objects.
1725 import pybuffer
1726 debug = pybuffer.debugBuffer(globals(), '%s_debug' % SCRIPT_NAME)
1727 except:
1728 def debug(s, *args):
1729 if not isinstance(s, basestring):
1730 s = str(s)
1731 if args:
1732 s = s %args
1733 prnt('', '%s\t%s' %(script_nick, s))
1734 else:
1735 def debug(*args):
1736 pass
1737
1738 # vim:set shiftwidth=4 tabstop=4 softtabstop=4 expandtab textwidth=100: