weechat/python/grep.py

   1 # -*- coding: utf-8 -*-
   2 ###
   3 # Copyright (c) 2009-2011 by Elián Hanisch <lambdae2@gmail.com>
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 ###
  18
  19 ###
  20 # Search in Weechat buffers and logs (for Weechat 0.3.*)
  21 #
  22 #   Inspired by xt's grep.py
  23 #   Originally I just wanted to add some fixes in grep.py, but then
  24 #   I got carried away and rewrote everything, so new script.
  25 #
  26 #   Commands:
  27 #   * /grep
  28 #     Search in logs or buffers, see /help grep
  29 #   * /logs:
  30 #     Lists logs in ~/.weechat/logs, see /help logs
  31 #
  32 #   Settings:
  33 #   * plugins.var.python.grep.clear_buffer:
  34 #     Clear the results buffer before each search. Valid values: on, off
  35 #
  36 #   * plugins.var.python.grep.go_to_buffer:
  37 #     Automatically go to grep buffer when search is over. Valid values: on, off
  38 #
  39 #   * plugins.var.python.grep.log_filter:
  40 #     Coma separated list of patterns that grep will use for exclude logs, e.g.
  41 #     if you use '*server/*' any log in the 'server' folder will be excluded
  42 #     when using the command '/grep log'
  43 #
  44 #   * plugins.var.python.grep.show_summary:
  45 #     Shows summary for each log. Valid values: on, off
  46 #
  47 #   * plugins.var.python.grep.max_lines:
  48 #     Grep will only print the last matched lines that don't surpass the value defined here.
  49 #
  50 #   * plugins.var.python.grep.size_limit:
  51 #     Size limit in KiB, is used for decide whenever grepping should run in background or not. If
  52 #     the logs to grep have a total size bigger than this value then grep run as a new process.
  53 #     It can be used for force or disable background process, using '0' forces to always grep in
  54 #     background, while using '' (empty string) will disable it.
  55 #
  56 #   * plugins.var.python.grep.timeout_secs:
  57 #     Timeout (in seconds) for background grepping.
  58 #
  59 #   * plugins.var.python.grep.default_tail_head:
  60 #     Config option for define default number of lines returned when using --head or --tail options.
  61 #     Can be overriden in the command with --number option.
  62 #
  63 #
  64 #   TODO:
  65 #   * try to figure out why hook_process chokes in long outputs (using a tempfile as a
  66 #   workaround now)
  67 #   * possibly add option for defining time intervals
  68 #
  69 #
  70 #   History:
  71 #
  72 #   2018-04-10, Sébastien Helleu <flashcode@flashtux.org>
  73 #   version 0.8.1: fix infolist_time for WeeChat >= 2.2 (WeeChat returns a long
  74 #                  integer instead of a string)
  75 #
  76 #   2017-09-20, mickael9
  77 #   version 0.8:
  78 #   * use weechat 1.5+ api for background processing (old method was unsafe and buggy)
  79 #   * add timeout_secs setting (was previously hardcoded to 5 mins)
  80 #
  81 #   2017-07-23, Sébastien Helleu <flashcode@flashtux.org>
  82 #   version 0.7.8: fix modulo by zero when nick is empty string
  83 #
  84 #   2016-06-23, mickael9
  85 #   version 0.7.7: fix get_home function
  86 #
  87 #   2015-11-26
  88 #   version 0.7.6: fix a typo
  89 #
  90 #   2015-01-31, Nicd-
  91 #   version 0.7.5:
  92 #   '~' is now expaned to the home directory in the log file path so
  93 #   paths like '~/logs/' should work.
  94 #
  95 #   2015-01-14, nils_2
  96 #   version 0.7.4: make q work to quit grep buffer (requested by: gb)
  97 #
  98 #   2014-03-29, Felix Eckhofer <felix@tribut.de>
  99 #   version 0.7.3: fix typo
 100 #
 101 #   2011-01-09
 102 #   version 0.7.2: bug fixes
 103 #
 104 #   2010-11-15
 105 #   version 0.7.1:
 106 #   * use TempFile so temporal files are guaranteed to be deleted.
 107 #   * enable Archlinux workaround.
 108 #
 109 #   2010-10-26
 110 #   version 0.7:
 111 #   * added templates.
 112 #   * using --only-match shows only unique strings.
 113 #   * fixed bug that inverted -B -A switches when used with -t
 114 #
 115 #   2010-10-14
 116 #   version 0.6.8: by xt <xt@bash.no>
 117 #   * supress highlights when printing in grep buffer
 118 #
 119 #   2010-10-06
 120 #   version 0.6.7: by xt <xt@bash.no>
 121 #   * better temporary file:
 122 #    use tempfile.mkstemp. to create a temp file in log dir,
 123 #    makes it safer with regards to write permission and multi user
 124 #
 125 #   2010-04-08
 126 #   version 0.6.6: bug fixes
 127 #   * use WEECHAT_LIST_POS_END in log file completion, makes completion faster
 128 #   * disable bytecode if using python 2.6
 129 #   * use single quotes in command string
 130 #   * fix bug that could change buffer's title when using /grep stop
 131 #
 132 #   2010-01-24
 133 #   version 0.6.5: disable bytecode is a 2.6 feature, instead, resort to delete the bytecode manually
 134 #
 135 #   2010-01-19
 136 #   version 0.6.4: bug fix
 137 #   version 0.6.3: added options --invert --only-match (replaces --exact, which is still available
 138 #   but removed from help)
 139 #   * use new 'irc_nick_color' info
 140 #   * don't generate bytecode when spawning a new process
 141 #   * show active options in buffer title
 142 #
 143 #   2010-01-17
 144 #   version 0.6.2: removed 2.6-ish code
 145 #   version 0.6.1: fixed bug when grepping in grep's buffer
 146 #
 147 #   2010-01-14
 148 #   version 0.6.0: implemented grep in background
 149 #   * improved context lines presentation.
 150 #   * grepping for big (or many) log files runs in a weechat_process.
 151 #   * added /grep stop.
 152 #   * added 'size_limit' option
 153 #   * fixed a infolist leak when grepping buffers
 154 #   * added 'default_tail_head' option
 155 #   * results are sort by line count
 156 #   * don't die if log is corrupted (has NULL chars in it)
 157 #   * changed presentation of /logs
 158 #   * log path completion doesn't suck anymore
 159 #   * removed all tabs, because I learned how to configure Vim so that spaces aren't annoying
 160 #   anymore. This was the script's original policy.
 161 #
 162 #   2010-01-05
 163 #   version 0.5.5: rename script to 'grep.py' (FlashCode <flashcode@flashtux.org>).
 164 #
 165 #   2010-01-04
 166 #   version 0.5.4.1: fix index error when using --after/before-context options.
 167 #
 168 #   2010-01-03
 169 #   version 0.5.4: new features
 170 #   * added --after-context and --before-context options.
 171 #   * added --context as a shortcut for using both -A -B options.
 172 #
 173 #   2009-11-06
 174 #   version 0.5.3: improvements for long grep output
 175 #   * grep buffer input accepts the same flags as /grep for repeat a search with different
 176 #     options.
 177 #   * tweaks in grep's output.
 178 #   * max_lines option added for limit grep's output.
 179 #   * code in update_buffer() optimized.
 180 #   * time stats in buffer title.
 181 #   * added go_to_buffer config option.
 182 #   * added --buffer for search only in buffers.
 183 #   * refactoring.
 184 #
 185 #   2009-10-12, omero
 186 #   version 0.5.2: made it python-2.4.x compliant
 187 #
 188 #   2009-08-17
 189 #   version 0.5.1: some refactoring, show_summary option added.
 190 #
 191 #   2009-08-13
 192 #   version 0.5: rewritten from xt's grep.py
 193 #   * fixed searching in non weechat logs, for cases like, if you're
 194 #     switching from irssi and rename and copy your irssi logs to %h/logs
 195 #   * fixed "timestamp rainbow" when you /grep in grep's buffer
 196 #   * allow to search in other buffers other than current or in logs
 197 #     of currently closed buffers with cmd 'buffer'
 198 #   * allow to search in any log file in %h/logs with cmd 'log'
 199 #   * added --count for return the number of matched lines
 200 #   * added --matchcase for case sensible search
 201 #   * added --hilight for color matches
 202 #   * added --head and --tail options, and --number
 203 #   * added command /logs for list files in %h/logs
 204 #   * added config option for clear the buffer before a search
 205 #   * added config option for filter logs we don't want to grep
 206 #   * added the posibility to repeat last search with another regexp by writing
 207 #     it in grep's buffer
 208 #   * changed spaces for tabs in the code, which is my preference
 209 #
 210 ###
 211
 212 from os import path
 213 import sys, getopt, time, os, re
 214
 215 try:
 216     import cPickle as pickle
 217 except ImportError:
 218     import pickle
 219
 220 try:
 221     import weechat
 222     from weechat import WEECHAT_RC_OK, prnt, prnt_date_tags
 223     import_ok = True
 224 except ImportError:
 225     import_ok = False
 226
 227 SCRIPT_NAME    = "grep"
 228 SCRIPT_AUTHOR  = "Elián Hanisch <lambdae2@gmail.com>"
 229 SCRIPT_VERSION = "0.8.1"
 230 SCRIPT_LICENSE = "GPL3"
 231 SCRIPT_DESC    = "Search in buffers and logs"
 232 SCRIPT_COMMAND = "grep"
 233
 234 ### Default Settings ###
 235 settings = {
 236     'clear_buffer'      : 'off',
 237     'log_filter'        : '',
 238     'go_to_buffer'      : 'on',
 239     'max_lines'         : '4000',
 240     'show_summary'      : 'on',
 241     'size_limit'        : '2048',
 242     'default_tail_head' : '10',
 243     'timeout_secs'      : '300',
 244 }
 245
 246 ### Class definitions ###
 247 class linesDict(dict):
 248     """
 249     Class for handling matched lines in more than one buffer.
 250     linesDict[buffer_name] = matched_lines_list
 251     """
 252     def __setitem__(self, key, value):
 253         assert isinstance(value, list)
 254         if key not in self:
 255             dict.__setitem__(self, key, value)
 256         else:
 257             dict.__getitem__(self, key).extend(value)
 258
 259     def get_matches_count(self):
 260         """Return the sum of total matches stored."""
 261         if dict.__len__(self):
 262             return sum(map(lambda L: L.matches_count, self.itervalues()))
 263         else:
 264             return 0
 265
 266     def __len__(self):
 267         """Return the sum of total lines stored."""
 268         if dict.__len__(self):
 269             return sum(map(len, self.itervalues()))
 270         else:
 271             return 0
 272
 273     def __str__(self):
 274         """Returns buffer count or buffer name if there's just one stored."""
 275         n = len(self.keys())
 276         if n == 1:
 277             return self.keys()[0]
 278         elif n > 1:
 279             return '%s logs' %n
 280         else:
 281             return ''
 282
 283     def items(self):
 284         """Returns a list of items sorted by line count."""
 285         items = dict.items(self)
 286         items.sort(key=lambda i: len(i[1]))
 287         return items
 288
 289     def items_count(self):
 290         """Returns a list of items sorted by match count."""
 291         items = dict.items(self)
 292         items.sort(key=lambda i: i[1].matches_count)
 293         return items
 294
 295     def strip_separator(self):
 296         for L in self.itervalues():
 297             L.strip_separator()
 298
 299     def get_last_lines(self, n):
 300         total_lines = len(self)
 301         #debug('total: %s n: %s' %(total_lines, n))
 302         if n >= total_lines:
 303             # nothing to do
 304             return
 305         for k, v in reversed(self.items()):
 306             l = len(v)
 307             if n > 0:
 308                 if l > n:
 309                     del v[:l-n]
 310                     v.stripped_lines = l-n
 311                 n -= l
 312             else:
 313                 del v[:]
 314                 v.stripped_lines = l
 315
 316 class linesList(list):
 317     """Class for list of matches, since sometimes I need to add lines that aren't matches, I need an
 318     independent counter."""
 319     _sep = '...'
 320     def __init__(self, *args):
 321         list.__init__(self, *args)
 322         self.matches_count = 0
 323         self.stripped_lines = 0
 324
 325     def append(self, item):
 326         """Append lines, can be a string or a list with strings."""
 327         if isinstance(item, str):
 328             list.append(self, item)
 329         else:
 330             self.extend(item)
 331
 332     def append_separator(self):
 333         """adds a separator into the list, makes sure it doen't add two together."""
 334         s = self._sep
 335         if (self and self[-1] != s) or not self:
 336             self.append(s)
 337
 338     def onlyUniq(self):
 339         s = set(self)
 340         del self[:]
 341         self.extend(s)
 342
 343     def count_match(self, item=None):
 344         if item is None or isinstance(item, str):
 345             self.matches_count += 1
 346         else:
 347             self.matches_count += len(item)
 348
 349     def strip_separator(self):
 350         """removes separators if there are first or/and last in the list."""
 351         if self:
 352             s = self._sep
 353             if self[0] == s:
 354                 del self[0]
 355             if self[-1] == s:
 356                 del self[-1]
 357
 358 ### Misc functions ###
 359 now = time.time
 360 def get_size(f):
 361     try:
 362         return os.stat(f).st_size
 363     except OSError:
 364         return 0
 365
 366 sizeDict = {0:'b', 1:'KiB', 2:'MiB', 3:'GiB', 4:'TiB'}
 367 def human_readable_size(size):
 368     power = 0
 369     while size > 1024:
 370         power += 1
 371         size /= 1024.0
 372     return '%.2f %s' %(size, sizeDict.get(power, ''))
 373
 374 def color_nick(nick):
 375     """Returns coloured nick, with coloured mode if any."""
 376     if not nick: return ''
 377     wcolor = weechat.color
 378     config_string = lambda s : weechat.config_string(weechat.config_get(s))
 379     config_int = lambda s : weechat.config_integer(weechat.config_get(s))
 380     # prefix and suffix
 381     prefix = config_string('irc.look.nick_prefix')
 382     suffix = config_string('irc.look.nick_suffix')
 383     prefix_c = suffix_c = wcolor(config_string('weechat.color.chat_delimiters'))
 384     if nick[0] == prefix:
 385         nick = nick[1:]
 386     else:
 387         prefix = prefix_c = ''
 388     if nick[-1] == suffix:
 389         nick = nick[:-1]
 390         suffix = wcolor(color_delimiter) + suffix
 391     else:
 392         suffix = suffix_c = ''
 393     # nick mode
 394     modes = '@!+%'
 395     if nick[0] in modes:
 396         mode, nick = nick[0], nick[1:]
 397         mode_color = wcolor(config_string('weechat.color.nicklist_prefix%d' \
 398             %(modes.find(mode) + 1)))
 399     else:
 400         mode = mode_color = ''
 401     # nick color
 402     nick_color = ''
 403     if nick:
 404         nick_color = weechat.info_get('irc_nick_color', nick)
 405         if not nick_color:
 406             # probably we're in WeeChat 0.3.0
 407             #debug('no irc_nick_color')
 408             color_nicks_number = config_int('weechat.look.color_nicks_number')
 409             idx = (sum(map(ord, nick))%color_nicks_number) + 1
 410             nick_color = wcolor(config_string('weechat.color.chat_nick_color%02d' %idx))
 411     return ''.join((prefix_c, prefix, mode_color, mode, nick_color, nick, suffix_c, suffix))
 412
 413 ### Config and value validation ###
 414 boolDict = {'on':True, 'off':False}
 415 def get_config_boolean(config):
 416     value = weechat.config_get_plugin(config)
 417     try:
 418         return boolDict[value]
 419     except KeyError:
 420         default = settings[config]
 421         error("Error while fetching config '%s'. Using default value '%s'." %(config, default))
 422         error("'%s' is invalid, allowed: 'on', 'off'" %value)
 423         return boolDict[default]
 424
 425 def get_config_int(config, allow_empty_string=False):
 426     value = weechat.config_get_plugin(config)
 427     try:
 428         return int(value)
 429     except ValueError:
 430         if value == '' and allow_empty_string:
 431             return value
 432         default = settings[config]
 433         error("Error while fetching config '%s'. Using default value '%s'." %(config, default))
 434         error("'%s' is not a number." %value)
 435         return int(default)
 436
 437 def get_config_log_filter():
 438     filter = weechat.config_get_plugin('log_filter')
 439     if filter:
 440         return filter.split(',')
 441     else:
 442         return []
 443
 444 def get_home():
 445     home = weechat.config_string(weechat.config_get('logger.file.path'))
 446     home = home.replace('%h', weechat.info_get('weechat_dir', ''))
 447     home = path.abspath(path.expanduser(home))
 448     return home
 449
 450 def strip_home(s, dir=''):
 451     """Strips home dir from the begging of the log path, this makes them sorter."""
 452     if not dir:
 453         global home_dir
 454         dir = home_dir
 455     l = len(dir)
 456     if s[:l] == dir:
 457         return s[l:]
 458     return s
 459
 460 ### Messages ###
 461 script_nick = SCRIPT_NAME
 462 def error(s, buffer=''):
 463     """Error msg"""
 464     prnt(buffer, '%s%s %s' %(weechat.prefix('error'), script_nick, s))
 465     if weechat.config_get_plugin('debug'):
 466         import traceback
 467         if traceback.sys.exc_type:
 468             trace = traceback.format_exc()
 469             prnt('', trace)
 470
 471 def say(s, buffer=''):
 472     """normal msg"""
 473     prnt_date_tags(buffer, 0, 'no_highlight', '%s\t%s' %(script_nick, s))
 474
 475
 476
 477 ### Log files and buffers ###
 478 cache_dir = {} # note: don't remove, needed for completion if the script was loaded recently
 479 def dir_list(dir, filter_list=(), filter_excludes=True, include_dir=False):
 480     """Returns a list of files in 'dir' and its subdirs."""
 481     global cache_dir
 482     from os import walk
 483     from fnmatch import fnmatch
 484     #debug('dir_list: listing in %s' %dir)
 485     key = (dir, include_dir)
 486     try:
 487         return cache_dir[key]
 488     except KeyError:
 489         pass
 490
 491     filter_list = filter_list or get_config_log_filter()
 492     dir_len = len(dir)
 493     if filter_list:
 494         def filter(file):
 495             file = file[dir_len:] # pattern shouldn't match home dir
 496             for pattern in filter_list:
 497                 if fnmatch(file, pattern):
 498                     return filter_excludes
 499             return not filter_excludes
 500     else:
 501         filter = lambda f : not filter_excludes
 502
 503     file_list = []
 504     extend = file_list.extend
 505     join = path.join
 506     def walk_path():
 507         for basedir, subdirs, files in walk(dir):
 508             #if include_dir:
 509             #    subdirs = map(lambda s : join(s, ''), subdirs)
 510             #    files.extend(subdirs)
 511             files_path = map(lambda f : join(basedir, f), files)
 512             files_path = [ file for file in files_path if not filter(file) ]
 513             extend(files_path)
 514
 515     walk_path()
 516     cache_dir[key] = file_list
 517     #debug('dir_list: got %s' %str(file_list))
 518     return file_list
 519
 520 def get_file_by_pattern(pattern, all=False):
 521     """Returns the first log whose path matches 'pattern',
 522     if all is True returns all logs that matches."""
 523     if not pattern: return []
 524     #debug('get_file_by_filename: searching for %s.' %pattern)
 525     # do envvar expandsion and check file
 526     file = path.expanduser(pattern)
 527     file = path.expandvars(file)
 528     if path.isfile(file):
 529         return [file]
 530     # lets see if there's a matching log
 531     global home_dir
 532     file = path.join(home_dir, pattern)
 533     if path.isfile(file):
 534         return [file]
 535     else:
 536         from fnmatch import fnmatch
 537         file = []
 538         file_list = dir_list(home_dir)
 539         n = len(home_dir)
 540         for log in file_list:
 541             basename = log[n:]
 542             if fnmatch(basename, pattern):
 543                 file.append(log)
 544         #debug('get_file_by_filename: got %s.' %file)
 545         if not all and file:
 546             file.sort()
 547             return [ file[-1] ]
 548         return file
 549
 550 def get_file_by_buffer(buffer):
 551     """Given buffer pointer, finds log's path or returns None."""
 552     #debug('get_file_by_buffer: searching for %s' %buffer)
 553     infolist = weechat.infolist_get('logger_buffer', '', '')
 554     if not infolist: return
 555     try:
 556         while weechat.infolist_next(infolist):
 557             pointer = weechat.infolist_pointer(infolist, 'buffer')
 558             if pointer == buffer:
 559                 file = weechat.infolist_string(infolist, 'log_filename')
 560                 if weechat.infolist_integer(infolist, 'log_enabled'):
 561                     #debug('get_file_by_buffer: got %s' %file)
 562                     return file
 563                 #else:
 564                 #    debug('get_file_by_buffer: got %s but log not enabled' %file)
 565     finally:
 566         #debug('infolist gets freed')
 567         weechat.infolist_free(infolist)
 568
 569 def get_file_by_name(buffer_name):
 570     """Given a buffer name, returns its log path or None. buffer_name should be in 'server.#channel'
 571     or '#channel' format."""
 572     #debug('get_file_by_name: searching for %s' %buffer_name)
 573     # common mask options
 574     config_masks = ('logger.mask.irc', 'logger.file.mask')
 575     # since there's no buffer pointer, we try to replace some local vars in mask, like $channel and
 576     # $server, then replace the local vars left with '*', and use it as a mask for get the path with
 577     # get_file_by_pattern
 578     for config in config_masks:
 579         mask = weechat.config_string(weechat.config_get(config))
 580         #debug('get_file_by_name: mask: %s' %mask)
 581         if '$name' in mask:
 582             mask = mask.replace('$name', buffer_name)
 583         elif '$channel' in mask or '$server' in mask:
 584             if '.' in buffer_name and \
 585                     '#' not in buffer_name[:buffer_name.find('.')]: # the dot isn't part of the channel name
 586                 #    ^ I'm asuming channel starts with #, i'm lazy.
 587                 server, channel = buffer_name.split('.', 1)
 588             else:
 589                 server, channel = '*', buffer_name
 590             if '$channel' in mask:
 591                 mask = mask.replace('$channel', channel)
 592             if '$server' in mask:
 593                 mask = mask.replace('$server', server)
 594         # change the unreplaced vars by '*'
 595         from string import letters
 596         if '%' in mask:
 597             # vars for time formatting
 598             mask = mask.replace('%', '$')
 599         if '$' in mask:
 600             masks = mask.split('$')
 601             masks = map(lambda s: s.lstrip(letters), masks)
 602             mask = '*'.join(masks)
 603             if mask[0] != '*':
 604                 mask = '*' + mask
 605         #debug('get_file_by_name: using mask %s' %mask)
 606         file = get_file_by_pattern(mask)
 607         #debug('get_file_by_name: got file %s' %file)
 608         if file:
 609             return file
 610     return None
 611
 612 def get_buffer_by_name(buffer_name):
 613     """Given a buffer name returns its buffer pointer or None."""
 614     #debug('get_buffer_by_name: searching for %s' %buffer_name)
 615     pointer = weechat.buffer_search('', buffer_name)
 616     if not pointer:
 617         try:
 618             infolist = weechat.infolist_get('buffer', '', '')
 619             while weechat.infolist_next(infolist):
 620                 short_name = weechat.infolist_string(infolist, 'short_name')
 621                 name = weechat.infolist_string(infolist, 'name')
 622                 if buffer_name in (short_name, name):
 623                     #debug('get_buffer_by_name: found %s' %name)
 624                     pointer = weechat.buffer_search('', name)
 625                     return pointer
 626         finally:
 627             weechat.infolist_free(infolist)
 628     #debug('get_buffer_by_name: got %s' %pointer)
 629     return pointer
 630
 631 def get_all_buffers():
 632     """Returns list with pointers of all open buffers."""
 633     buffers = []
 634     infolist = weechat.infolist_get('buffer', '', '')
 635     while weechat.infolist_next(infolist):
 636         buffers.append(weechat.infolist_pointer(infolist, 'pointer'))
 637     weechat.infolist_free(infolist)
 638     grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
 639     if grep_buffer and grep_buffer in buffers:
 640         # remove it from list
 641         del buffers[buffers.index(grep_buffer)]
 642     return buffers
 643
 644 ### Grep ###
 645 def make_regexp(pattern, matchcase=False):
 646     """Returns a compiled regexp."""
 647     if pattern in ('.', '.*', '.?', '.+'):
 648         # because I don't need to use a regexp if we're going to match all lines
 649         return None
 650     # matching takes a lot more time if pattern starts or ends with .* and it isn't needed.
 651     if pattern[:2] == '.*':
 652         pattern = pattern[2:]
 653     if pattern[-2:] == '.*':
 654         pattern = pattern[:-2]
 655     try:
 656         if not matchcase:
 657             regexp = re.compile(pattern, re.IGNORECASE)
 658         else:
 659             regexp = re.compile(pattern)
 660     except Exception, e:
 661         raise Exception, 'Bad pattern, %s' %e
 662     return regexp
 663
 664 def check_string(s, regexp, hilight='', exact=False):
 665     """Checks 's' with a regexp and returns it if is a match."""
 666     if not regexp:
 667         return s
 668
 669     elif exact:
 670         matchlist = regexp.findall(s)
 671         if matchlist:
 672             if isinstance(matchlist[0], tuple):
 673                 # join tuples (when there's more than one match group in regexp)
 674                 return [ ' '.join(t) for t in matchlist ]
 675             return matchlist
 676
 677     elif hilight:
 678         matchlist = regexp.findall(s)
 679         if matchlist:
 680             if isinstance(matchlist[0], tuple):
 681                 # flatten matchlist
 682                 matchlist = [ item for L in matchlist for item in L if item ]
 683             matchlist = list(set(matchlist)) # remove duplicates if any
 684             # apply hilight
 685             color_hilight, color_reset = hilight.split(',', 1)
 686             for m in matchlist:
 687                 s = s.replace(m, '%s%s%s' % (color_hilight, m, color_reset))
 688             return s
 689
 690     # no need for findall() here
 691     elif regexp.search(s):
 692         return s
 693
 694 def grep_file(file, head, tail, after_context, before_context, count, regexp, hilight, exact, invert):
 695     """Return a list of lines that match 'regexp' in 'file', if no regexp returns all lines."""
 696     if count:
 697         tail = head = after_context = before_context = False
 698         hilight = ''
 699     elif exact:
 700         before_context = after_context = False
 701         hilight = ''
 702     elif invert:
 703         hilight = ''
 704     #debug(' '.join(map(str, (file, head, tail, after_context, before_context))))
 705
 706     lines = linesList()
 707     # define these locally as it makes the loop run slightly faster
 708     append = lines.append
 709     count_match = lines.count_match
 710     separator = lines.append_separator
 711     if invert:
 712         def check(s):
 713             if check_string(s, regexp, hilight, exact):
 714                 return None
 715             else:
 716                 return s
 717     else:
 718         check = lambda s: check_string(s, regexp, hilight, exact)
 719
 720     try:
 721         file_object = open(file, 'r')
 722     except IOError:
 723         # file doesn't exist
 724         return lines
 725     if tail or before_context:
 726         # for these options, I need to seek in the file, but is slower and uses a good deal of
 727         # memory if the log is too big, so we do this *only* for these options.
 728         file_lines = file_object.readlines()
 729
 730         if tail:
 731             # instead of searching in the whole file and later pick the last few lines, we
 732             # reverse the log, search until count reached and reverse it again, that way is a lot
 733             # faster
 734             file_lines.reverse()
 735             # don't invert context switches
 736             before_context, after_context = after_context, before_context
 737
 738         if before_context:
 739             before_context_range = range(1, before_context + 1)
 740             before_context_range.reverse()
 741
 742         limit = tail or head
 743
 744         line_idx = 0
 745         while line_idx < len(file_lines):
 746             line = file_lines[line_idx]
 747             line = check(line)
 748             if line:
 749                 if before_context:
 750                     separator()
 751                     trimmed = False
 752                     for id in before_context_range:
 753                         try:
 754                             context_line = file_lines[line_idx - id]
 755                             if check(context_line):
 756                                 # match in before context, that means we appended these same lines in a
 757                                 # previous match, so we delete them merging both paragraphs
 758                                 if not trimmed:
 759                                     del lines[id - before_context - 1:]
 760                                     trimmed = True
 761                             else:
 762                                 append(context_line)
 763                         except IndexError:
 764                             pass
 765                 append(line)
 766                 count_match(line)
 767                 if after_context:
 768                     id, offset = 0, 0
 769                     while id < after_context + offset:
 770                         id += 1
 771                         try:
 772                             context_line = file_lines[line_idx + id]
 773                             _context_line = check(context_line)
 774                             if _context_line:
 775                                 offset = id
 776                                 context_line = _context_line # so match is hilighted with --hilight
 777                                 count_match()
 778                             append(context_line)
 779                         except IndexError:
 780                             pass
 781                     separator()
 782                     line_idx += id
 783                 if limit and lines.matches_count >= limit:
 784                     break
 785             line_idx += 1
 786
 787         if tail:
 788             lines.reverse()
 789     else:
 790         # do a normal grep
 791         limit = head
 792
 793         for line in file_object:
 794             line = check(line)
 795             if line:
 796                 count or append(line)
 797                 count_match(line)
 798                 if after_context:
 799                     id, offset = 0, 0
 800                     while id < after_context + offset:
 801                         id += 1
 802                         try:
 803                             context_line = file_object.next()
 804                             _context_line = check(context_line)
 805                             if _context_line:
 806                                 offset = id
 807                                 context_line = _context_line
 808                                 count_match()
 809                             count or append(context_line)
 810                         except StopIteration:
 811                             pass
 812                     separator()
 813                 if limit and lines.matches_count >= limit:
 814                     break
 815
 816     file_object.close()
 817     return lines
 818
 819 def grep_buffer(buffer, head, tail, after_context, before_context, count, regexp, hilight, exact,
 820         invert):
 821     """Return a list of lines that match 'regexp' in 'buffer', if no regexp returns all lines."""
 822     lines = linesList()
 823     if count:
 824         tail = head = after_context = before_context = False
 825         hilight = ''
 826     elif exact:
 827         before_context = after_context = False
 828     #debug(' '.join(map(str, (tail, head, after_context, before_context, count, exact, hilight))))
 829
 830     # Using /grep in grep's buffer can lead to some funny effects
 831     # We should take measures if that's the case
 832     def make_get_line_funcion():
 833         """Returns a function for get lines from the infolist, depending if the buffer is grep's or
 834         not."""
 835         string_remove_color = weechat.string_remove_color
 836         infolist_string = weechat.infolist_string
 837         grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
 838         if grep_buffer and buffer == grep_buffer:
 839             def function(infolist):
 840                 prefix = infolist_string(infolist, 'prefix')
 841                 message = infolist_string(infolist, 'message')
 842                 if prefix: # only our messages have prefix, ignore it
 843                     return None
 844                 return message
 845         else:
 846             infolist_time = weechat.infolist_time
 847             def function(infolist):
 848                 prefix = string_remove_color(infolist_string(infolist, 'prefix'), '')
 849                 message = string_remove_color(infolist_string(infolist, 'message'), '')
 850                 date = infolist_time(infolist, 'date')
 851                 # since WeeChat 2.2, infolist_time returns a long integer
 852                 # instead of a string
 853                 if not isinstance(date, str):
 854                     date = time.strftime('%F %T', time.localtime(int(date)))
 855                 return '%s\t%s\t%s' %(date, prefix, message)
 856         return function
 857     get_line = make_get_line_funcion()
 858
 859     infolist = weechat.infolist_get('buffer_lines', buffer, '')
 860     if tail:
 861         # like with grep_file() if we need the last few matching lines, we move the cursor to
 862         # the end and search backwards
 863         infolist_next = weechat.infolist_prev
 864         infolist_prev = weechat.infolist_next
 865     else:
 866         infolist_next = weechat.infolist_next
 867         infolist_prev = weechat.infolist_prev
 868     limit = head or tail
 869
 870     # define these locally as it makes the loop run slightly faster
 871     append = lines.append
 872     count_match = lines.count_match
 873     separator = lines.append_separator
 874     if invert:
 875         def check(s):
 876             if check_string(s, regexp, hilight, exact):
 877                 return None
 878             else:
 879                 return s
 880     else:
 881         check = lambda s: check_string(s, regexp, hilight, exact)
 882
 883     if before_context:
 884         before_context_range = range(1, before_context + 1)
 885         before_context_range.reverse()
 886
 887     while infolist_next(infolist):
 888         line = get_line(infolist)
 889         if line is None: continue
 890         line = check(line)
 891         if line:
 892             if before_context:
 893                 separator()
 894                 trimmed = False
 895                 for id in before_context_range:
 896                     if not infolist_prev(infolist):
 897                         trimmed = True
 898                 for id in before_context_range:
 899                     context_line = get_line(infolist)
 900                     if check(context_line):
 901                         if not trimmed:
 902                             del lines[id - before_context - 1:]
 903                             trimmed = True
 904                     else:
 905                         append(context_line)
 906                     infolist_next(infolist)
 907             count or append(line)
 908             count_match(line)
 909             if after_context:
 910                 id, offset = 0, 0
 911                 while id < after_context + offset:
 912                     id += 1
 913                     if infolist_next(infolist):
 914                         context_line = get_line(infolist)
 915                         _context_line = check(context_line)
 916                         if _context_line:
 917                             context_line = _context_line
 918                             offset = id
 919                             count_match()
 920                         append(context_line)
 921                     else:
 922                         # in the main loop infolist_next will start again an cause an infinite loop
 923                         # this will avoid it
 924                         infolist_next = lambda x: 0
 925                 separator()
 926             if limit and lines.matches_count >= limit:
 927                 break
 928     weechat.infolist_free(infolist)
 929
 930     if tail:
 931         lines.reverse()
 932     return lines
 933
 934 ### this is our main grep function
 935 hook_file_grep = None
 936 def show_matching_lines():
 937     """
 938     Greps buffers in search_in_buffers or files in search_in_files and updates grep buffer with the
 939     result.
 940     """
 941     global pattern, matchcase, number, count, exact, hilight, invert
 942     global tail, head, after_context, before_context
 943     global search_in_files, search_in_buffers, matched_lines, home_dir
 944     global time_start
 945     matched_lines = linesDict()
 946     #debug('buffers:%s \nlogs:%s' %(search_in_buffers, search_in_files))
 947     time_start = now()
 948
 949     # buffers
 950     if search_in_buffers:
 951         regexp = make_regexp(pattern, matchcase)
 952         for buffer in search_in_buffers:
 953             buffer_name = weechat.buffer_get_string(buffer, 'name')
 954             matched_lines[buffer_name] = grep_buffer(buffer, head, tail, after_context,
 955                     before_context, count, regexp, hilight, exact, invert)
 956
 957     # logs
 958     if search_in_files:
 959         size_limit = get_config_int('size_limit', allow_empty_string=True)
 960         background = False
 961         if size_limit or size_limit == 0:
 962             size = sum(map(get_size, search_in_files))
 963             if size > size_limit * 1024:
 964                 background = True
 965         elif size_limit == '':
 966             background = False
 967
 968         regexp = make_regexp(pattern, matchcase)
 969
 970         global grep_options, log_pairs
 971         grep_options = (head, tail, after_context, before_context,
 972                         count, regexp, hilight, exact, invert)
 973
 974         log_pairs = [(strip_home(log), log) for log in search_in_files]
 975
 976         if not background:
 977             # run grep normally
 978             for log_name, log in log_pairs:
 979                 matched_lines[log_name] = grep_file(log, *grep_options)
 980             buffer_update()
 981         else:
 982             global hook_file_grep, grep_stdout, grep_stderr, pattern_tmpl
 983             grep_stdout = grep_stderr = ''
 984             hook_file_grep = weechat.hook_process(
 985                 'func:grep_process',
 986                 get_config_int('timeout_secs') * 1000,
 987                 'grep_process_cb',
 988                 ''
 989             )
 990             if hook_file_grep:
 991                 buffer_create("Searching for '%s' in %s worth of data..." % (
 992                     pattern_tmpl,
 993                     human_readable_size(size)
 994                 ))
 995     else:
 996         buffer_update()
 997
 998
 999 def grep_process(*args):
1000     result = {}
1001     try:
1002         global grep_options, log_pairs
1003         for log_name, log in log_pairs:
1004             result[log_name] = grep_file(log, *grep_options)
1005     except Exception, e:
1006         result = e
1007
1008     return pickle.dumps(result)
1009
1010 grep_stdout = grep_stderr = ''
1011
1012 def grep_process_cb(data, command, return_code, out, err):
1013     global grep_stdout, grep_stderr, matched_lines, hook_file_grep
1014
1015     grep_stdout += out
1016     grep_stderr += err
1017
1018     def set_buffer_error(message):
1019         error(message)
1020         grep_buffer = buffer_create()
1021         title = weechat.buffer_get_string(grep_buffer, 'title')
1022         title = title + ' %serror' % color_title
1023         weechat.buffer_set(grep_buffer, 'title', title)
1024
1025     if return_code == weechat.WEECHAT_HOOK_PROCESS_ERROR:
1026         set_buffer_error("Background grep timed out")
1027         hook_file_grep = None
1028         return WEECHAT_RC_OK
1029
1030     elif return_code >= 0:
1031         hook_file_grep = None
1032         if grep_stderr:
1033             set_buffer_error(grep_stderr)
1034             return WEECHAT_RC_OK
1035
1036         try:
1037             data = pickle.loads(grep_stdout)
1038             if isinstance(data, Exception):
1039                 raise data
1040             matched_lines.update(data)
1041         except Exception, e:
1042             set_buffer_error(repr(e))
1043             return WEECHAT_RC_OK
1044         else:
1045             buffer_update()
1046
1047     return WEECHAT_RC_OK
1048
1049 def get_grep_file_status():
1050     global search_in_files, matched_lines, time_start
1051     elapsed = now() - time_start
1052     if len(search_in_files) == 1:
1053         log = '%s (%s)' %(strip_home(search_in_files[0]),
1054                 human_readable_size(get_size(search_in_files[0])))
1055     else:
1056         size = sum(map(get_size, search_in_files))
1057         log = '%s log files (%s)' %(len(search_in_files), human_readable_size(size))
1058     return 'Searching in %s, running for %.4f seconds. Interrupt it with "/grep stop" or "stop"' \
1059         ' in grep buffer.' %(log, elapsed)
1060
1061 ### Grep buffer ###
1062 def buffer_update():
1063     """Updates our buffer with new lines."""
1064     global pattern_tmpl, matched_lines, pattern, count, hilight, invert, exact
1065     time_grep = now()
1066
1067     buffer = buffer_create()
1068     if get_config_boolean('clear_buffer'):
1069         weechat.buffer_clear(buffer)
1070     matched_lines.strip_separator() # remove first and last separators of each list
1071     len_total_lines = len(matched_lines)
1072     max_lines = get_config_int('max_lines')
1073     if not count and len_total_lines > max_lines:
1074         weechat.buffer_clear(buffer)
1075
1076     def _make_summary(log, lines, note):
1077         return '%s matches "%s%s%s"%s in %s%s%s%s' \
1078                 %(lines.matches_count, color_summary, pattern_tmpl, color_info,
1079                   invert and ' (inverted)' or '',
1080                   color_summary, log, color_reset, note)
1081
1082     if count:
1083         make_summary = lambda log, lines : _make_summary(log, lines, ' (not shown)')
1084     else:
1085         def make_summary(log, lines):
1086             if lines.stripped_lines:
1087                 if lines:
1088                     note = ' (last %s lines shown)' %len(lines)
1089                 else:
1090                     note = ' (not shown)'
1091             else:
1092                 note = ''
1093             return _make_summary(log, lines, note)
1094
1095     global weechat_format
1096     if hilight:
1097         # we don't want colors if there's match highlighting
1098         format_line = lambda s : '%s %s %s' %split_line(s)
1099     else:
1100         def format_line(s):
1101             global nick_dict, weechat_format
1102             date, nick, msg = split_line(s)
1103             if weechat_format:
1104                 try:
1105                     nick = nick_dict[nick]
1106                 except KeyError:
1107                     # cache nick
1108                     nick_c = color_nick(nick)
1109                     nick_dict[nick] = nick_c
1110                     nick = nick_c
1111                 return '%s%s %s%s %s' %(color_date, date, nick, color_reset, msg)
1112             else:
1113                 #no formatting
1114                 return msg
1115
1116     prnt(buffer, '\n')
1117     print_line('Search for "%s%s%s"%s in %s%s%s.' %(color_summary, pattern_tmpl, color_info,
1118         invert and ' (inverted)' or '', color_summary, matched_lines, color_reset),
1119             buffer)
1120     # print last <max_lines> lines
1121     if matched_lines.get_matches_count():
1122         if count:
1123             # with count we sort by matches lines instead of just lines.
1124             matched_lines_items = matched_lines.items_count()
1125         else:
1126             matched_lines_items = matched_lines.items()
1127
1128         matched_lines.get_last_lines(max_lines)
1129         for log, lines in matched_lines_items:
1130             if lines.matches_count:
1131                 # matched lines
1132                 if not count:
1133                     # print lines
1134                     weechat_format = True
1135                     if exact:
1136                         lines.onlyUniq()
1137                     for line in lines:
1138                         #debug(repr(line))
1139                         if line == linesList._sep:
1140                             # separator
1141                             prnt(buffer, context_sep)
1142                         else:
1143                             if '\x00' in line:
1144                                 # log was corrupted
1145                                 error("Found garbage in log '%s', maybe it's corrupted" %log)
1146                                 line = line.replace('\x00', '')
1147                             prnt_date_tags(buffer, 0, 'no_highlight', format_line(line))
1148
1149                 # summary
1150                 if count or get_config_boolean('show_summary'):
1151                     summary = make_summary(log, lines)
1152                     print_line(summary, buffer)
1153
1154             # separator
1155             if not count and lines:
1156                 prnt(buffer, '\n')
1157     else:
1158         print_line('No matches found.', buffer)
1159
1160     # set title
1161     global time_start
1162     time_end = now()
1163     # total time
1164     time_total = time_end - time_start
1165     # percent of the total time used for grepping
1166     time_grep_pct = (time_grep - time_start)/time_total*100
1167     #debug('time: %.4f seconds (%.2f%%)' %(time_total, time_grep_pct))
1168     if not count and len_total_lines > max_lines:
1169         note = ' (last %s lines shown)' %len(matched_lines)
1170     else:
1171         note = ''
1172     title = "'q': close buffer | Search in %s%s%s %s matches%s | pattern \"%s%s%s\"%s %s | %.4f seconds (%.2f%%)" \
1173             %(color_title, matched_lines, color_reset, matched_lines.get_matches_count(), note,
1174               color_title, pattern_tmpl, color_reset, invert and ' (inverted)' or '', format_options(),
1175               time_total, time_grep_pct)
1176     weechat.buffer_set(buffer, 'title', title)
1177
1178     if get_config_boolean('go_to_buffer'):
1179         weechat.buffer_set(buffer, 'display', '1')
1180
1181     # free matched_lines so it can be removed from memory
1182     del matched_lines
1183
1184 def split_line(s):
1185     """Splits log's line 's' in 3 parts, date, nick and msg."""
1186     global weechat_format
1187     if weechat_format and s.count('\t') >= 2:
1188         date, nick, msg = s.split('\t', 2) # date, nick, message
1189     else:
1190         # looks like log isn't in weechat's format
1191         weechat_format = False # incoming lines won't be formatted
1192         date, nick, msg = '', '', s
1193     # remove tabs
1194     if '\t' in msg:
1195         msg = msg.replace('\t', '    ')
1196     return date, nick, msg
1197
1198 def print_line(s, buffer=None, display=False):
1199     """Prints 's' in script's buffer as 'script_nick'. For displaying search summaries."""
1200     if buffer is None:
1201         buffer = buffer_create()
1202     say('%s%s' %(color_info, s), buffer)
1203     if display and get_config_boolean('go_to_buffer'):
1204         weechat.buffer_set(buffer, 'display', '1')
1205
1206 def format_options():
1207     global matchcase, number, count, exact, hilight, invert
1208     global tail, head, after_context, before_context
1209     options = []
1210     append = options.append
1211     insert = options.insert
1212     chars = 'cHmov'
1213     for i, flag in enumerate((count, hilight, matchcase, exact, invert)):
1214         if flag:
1215             append(chars[i])
1216
1217     if head or tail:
1218         n = get_config_int('default_tail_head')
1219         if head:
1220             append('h')
1221             if head != n:
1222                 insert(-1, ' -')
1223                 append('n')
1224                 append(head)
1225         elif tail:
1226             append('t')
1227             if tail != n:
1228                 insert(-1, ' -')
1229                 append('n')
1230                 append(tail)
1231
1232     if before_context and after_context and (before_context == after_context):
1233         append(' -C')
1234         append(before_context)
1235     else:
1236         if before_context:
1237             append(' -B')
1238             append(before_context)
1239         if after_context:
1240             append(' -A')
1241             append(after_context)
1242
1243     s = ''.join(map(str, options)).strip()
1244     if s and s[0] != '-':
1245         s = '-' + s
1246     return s
1247
1248 def buffer_create(title=None):
1249     """Returns our buffer pointer, creates and cleans the buffer if needed."""
1250     buffer = weechat.buffer_search('python', SCRIPT_NAME)
1251     if not buffer:
1252         buffer = weechat.buffer_new(SCRIPT_NAME, 'buffer_input', '', '', '')
1253         weechat.buffer_set(buffer, 'time_for_each_line', '0')
1254         weechat.buffer_set(buffer, 'nicklist', '0')
1255         weechat.buffer_set(buffer, 'title', title or 'grep output buffer')
1256         weechat.buffer_set(buffer, 'localvar_set_no_log', '1')
1257     elif title:
1258         weechat.buffer_set(buffer, 'title', title)
1259     return buffer
1260
1261 def buffer_input(data, buffer, input_data):
1262     """Repeats last search with 'input_data' as regexp."""
1263     try:
1264         cmd_grep_stop(buffer, input_data)
1265     except:
1266         return WEECHAT_RC_OK
1267     if input_data in ('q', 'Q'):
1268         weechat.buffer_close(buffer)
1269         return weechat.WEECHAT_RC_OK
1270
1271     global search_in_buffers, search_in_files
1272     global pattern
1273     try:
1274         if pattern and (search_in_files or search_in_buffers):
1275             # check if the buffer pointers are still valid
1276             for pointer in search_in_buffers:
1277                 infolist = weechat.infolist_get('buffer', pointer, '')
1278                 if not infolist:
1279                     del search_in_buffers[search_in_buffers.index(pointer)]
1280                 weechat.infolist_free(infolist)
1281             try:
1282                 cmd_grep_parsing(input_data)
1283             except Exception, e:
1284                 error('Argument error, %s' %e, buffer=buffer)
1285                 return WEECHAT_RC_OK
1286             try:
1287                 show_matching_lines()
1288             except Exception, e:
1289                 error(e)
1290     except NameError:
1291         error("There isn't any previous search to repeat.", buffer=buffer)
1292     return WEECHAT_RC_OK
1293
1294 ### Commands ###
1295 def cmd_init():
1296     """Resets global vars."""
1297     global home_dir, cache_dir, nick_dict
1298     global pattern_tmpl, pattern, matchcase, number, count, exact, hilight, invert
1299     global tail, head, after_context, before_context
1300     hilight = ''
1301     head = tail = after_context = before_context = invert = False
1302     matchcase = count = exact = False
1303     pattern_tmpl = pattern = number = None
1304     home_dir = get_home()
1305     cache_dir = {} # for avoid walking the dir tree more than once per command
1306     nick_dict = {} # nick cache for don't calculate nick color every time
1307
1308 def cmd_grep_parsing(args):
1309     """Parses args for /grep and grep input buffer."""
1310     global pattern_tmpl, pattern, matchcase, number, count, exact, hilight, invert
1311     global tail, head, after_context, before_context
1312     global log_name, buffer_name, only_buffers, all
1313     opts, args = getopt.gnu_getopt(args.split(), 'cmHeahtivn:bA:B:C:o', ['count', 'matchcase', 'hilight',
1314         'exact', 'all', 'head', 'tail', 'number=', 'buffer', 'after-context=', 'before-context=',
1315         'context=', 'invert', 'only-match'])
1316     #debug(opts, 'opts: '); debug(args, 'args: ')
1317     if len(args) >= 2:
1318         if args[0] == 'log':
1319             del args[0]
1320             log_name = args.pop(0)
1321         elif args[0] == 'buffer':
1322             del args[0]
1323             buffer_name = args.pop(0)
1324
1325     def tmplReplacer(match):
1326         """This function will replace templates with regexps"""
1327         s = match.groups()[0]
1328         tmpl_args = s.split()
1329         tmpl_key, _, tmpl_args = s.partition(' ')
1330         try:
1331             template = templates[tmpl_key]
1332             if callable(template):
1333                 r = template(tmpl_args)
1334                 if not r:
1335                     error("Template %s returned empty string "\
1336                           "(WeeChat doesn't have enough data)." %t)
1337                 return r
1338             else:
1339                 return template
1340         except:
1341             return t
1342
1343     args = ' '.join(args) # join pattern for keep spaces
1344     if args:
1345         pattern_tmpl = args
1346         pattern = _tmplRe.sub(tmplReplacer, args)
1347         debug('Using regexp: %s', pattern)
1348     if not pattern:
1349         raise Exception, 'No pattern for grep the logs.'
1350
1351     def positive_number(opt, val):
1352         try:
1353             number = int(val)
1354             if number < 0:
1355                 raise ValueError
1356             return number
1357         except ValueError:
1358             if len(opt) == 1:
1359                 opt = '-' + opt
1360             else:
1361                 opt = '--' + opt
1362             raise Exception, "argument for %s must be a positive integer." %opt
1363
1364     for opt, val in opts:
1365         opt = opt.strip('-')
1366         if opt in ('c', 'count'):
1367             count = not count
1368         elif opt in ('m', 'matchcase'):
1369             matchcase = not matchcase
1370         elif opt in ('H', 'hilight'):
1371             # hilight must be always a string!
1372             if hilight:
1373                 hilight = ''
1374             else:
1375                 hilight = '%s,%s' %(color_hilight, color_reset)
1376             # we pass the colors in the variable itself because check_string() must not use
1377             # weechat's module when applying the colors (this is for grep in a hooked process)
1378         elif opt in ('e', 'exact', 'o', 'only-match'):
1379             exact = not exact
1380             invert = False
1381         elif opt in ('a', 'all'):
1382             all = not all
1383         elif opt in ('h', 'head'):
1384             head = not head
1385             tail = False
1386         elif opt in ('t', 'tail'):
1387             tail = not tail
1388             head = False
1389         elif opt in ('b', 'buffer'):
1390             only_buffers = True
1391         elif opt in ('n', 'number'):
1392             number = positive_number(opt, val)
1393         elif opt in ('C', 'context'):
1394             n = positive_number(opt, val)
1395             after_context = n
1396             before_context = n
1397         elif opt in ('A', 'after-context'):
1398             after_context = positive_number(opt, val)
1399         elif opt in ('B', 'before-context'):
1400             before_context = positive_number(opt, val)
1401         elif opt in ('i', 'v', 'invert'):
1402             invert = not invert
1403             exact = False
1404     # number check
1405     if number is not None:
1406         if number == 0:
1407             head = tail = False
1408             number = None
1409         elif head:
1410             head = number
1411         elif tail:
1412             tail = number
1413     else:
1414         n = get_config_int('default_tail_head')
1415         if head:
1416             head = n
1417         elif tail:
1418             tail = n
1419
1420 def cmd_grep_stop(buffer, args):
1421     global hook_file_grep, pattern, matched_lines
1422     if hook_file_grep:
1423         if args == 'stop':
1424             weechat.unhook(hook_file_grep)
1425             hook_file_grep = None
1426
1427             s = 'Search for \'%s\' stopped.' % pattern
1428             say(s, buffer)
1429             grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
1430             if grep_buffer:
1431                 weechat.buffer_set(grep_buffer, 'title', s)
1432             matched_lines = {}
1433         else:
1434             say(get_grep_file_status(), buffer)
1435         raise Exception
1436
1437 def cmd_grep(data, buffer, args):
1438     """Search in buffers and logs."""
1439     global pattern, matchcase, head, tail, number, count, exact, hilight
1440     try:
1441         cmd_grep_stop(buffer, args)
1442     except:
1443         return WEECHAT_RC_OK
1444
1445     if not args:
1446         weechat.command('', '/help %s' %SCRIPT_COMMAND)
1447         return WEECHAT_RC_OK
1448
1449     cmd_init()
1450     global log_name, buffer_name, only_buffers, all
1451     log_name = buffer_name = ''
1452     only_buffers = all = False
1453
1454     # parse
1455     try:
1456         cmd_grep_parsing(args)
1457     except Exception, e:
1458         error('Argument error, %s' %e)
1459         return WEECHAT_RC_OK
1460
1461     # find logs
1462     log_file = search_buffer = None
1463     if log_name:
1464         log_file = get_file_by_pattern(log_name, all)
1465         if not log_file:
1466             error("Couldn't find any log for %s. Try /logs" %log_name)
1467             return WEECHAT_RC_OK
1468     elif all:
1469         search_buffer = get_all_buffers()
1470     elif buffer_name:
1471         search_buffer = get_buffer_by_name(buffer_name)
1472         if not search_buffer:
1473             # there's no buffer, try in the logs
1474             log_file = get_file_by_name(buffer_name)
1475             if not log_file:
1476                 error("Logs or buffer for '%s' not found." %buffer_name)
1477                 return WEECHAT_RC_OK
1478         else:
1479             search_buffer = [search_buffer]
1480     else:
1481         search_buffer = [buffer]
1482
1483     # make the log list
1484     global search_in_files, search_in_buffers
1485     search_in_files = []
1486     search_in_buffers = []
1487     if log_file:
1488         search_in_files = log_file
1489     elif not only_buffers:
1490         #debug(search_buffer)
1491         for pointer in search_buffer:
1492             log = get_file_by_buffer(pointer)
1493             #debug('buffer %s log %s' %(pointer, log))
1494             if log:
1495                 search_in_files.append(log)
1496             else:
1497                 search_in_buffers.append(pointer)
1498     else:
1499         search_in_buffers = search_buffer
1500
1501     # grepping
1502     try:
1503         show_matching_lines()
1504     except Exception, e:
1505         error(e)
1506     return WEECHAT_RC_OK
1507
1508 def cmd_logs(data, buffer, args):
1509     """List files in Weechat's log dir."""
1510     cmd_init()
1511     global home_dir
1512     sort_by_size = False
1513     filter = []
1514
1515     try:
1516         opts, args = getopt.gnu_getopt(args.split(), 's', ['size'])
1517         if args:
1518             filter = args
1519         for opt, var in opts:
1520             opt = opt.strip('-')
1521             if opt in ('size', 's'):
1522                 sort_by_size = True
1523     except Exception, e:
1524         error('Argument error, %s' %e)
1525         return WEECHAT_RC_OK
1526
1527     # is there's a filter, filter_excludes should be False
1528     file_list = dir_list(home_dir, filter, filter_excludes=not filter)
1529     if sort_by_size:
1530         file_list.sort(key=get_size)
1531     else:
1532         file_list.sort()
1533
1534     file_sizes = map(lambda x: human_readable_size(get_size(x)), file_list)
1535     # calculate column lenght
1536     if file_list:
1537         L = file_list[:]
1538         L.sort(key=len)
1539         bigest = L[-1]
1540         column_len = len(bigest) + 3
1541     else:
1542         column_len = ''
1543
1544     buffer = buffer_create()
1545     if get_config_boolean('clear_buffer'):
1546         weechat.buffer_clear(buffer)
1547     file_list = zip(file_list, file_sizes)
1548     msg = 'Found %s logs.' %len(file_list)
1549
1550     print_line(msg, buffer, display=True)
1551     for file, size in file_list:
1552         separator = column_len and '.'*(column_len - len(file))
1553         prnt(buffer, '%s %s %s' %(strip_home(file), separator, size))
1554     if file_list:
1555         print_line(msg, buffer)
1556     return WEECHAT_RC_OK
1557
1558
1559 ### Completion ###
1560 def completion_log_files(data, completion_item, buffer, completion):
1561     #debug('completion: %s' %', '.join((data, completion_item, buffer, completion)))
1562     global home_dir
1563     l = len(home_dir)
1564     completion_list_add = weechat.hook_completion_list_add
1565     WEECHAT_LIST_POS_END = weechat.WEECHAT_LIST_POS_END
1566     for log in dir_list(home_dir):
1567         completion_list_add(completion, log[l:], 0, WEECHAT_LIST_POS_END)
1568     return WEECHAT_RC_OK
1569
1570 def completion_grep_args(data, completion_item, buffer, completion):
1571     for arg in ('count', 'all', 'matchcase', 'hilight', 'exact', 'head', 'tail', 'number', 'buffer',
1572             'after-context', 'before-context', 'context', 'invert', 'only-match'):
1573         weechat.hook_completion_list_add(completion, '--' + arg, 0, weechat.WEECHAT_LIST_POS_SORT)
1574     for tmpl in templates:
1575         weechat.hook_completion_list_add(completion, '%{' + tmpl, 0, weechat.WEECHAT_LIST_POS_SORT)
1576     return WEECHAT_RC_OK
1577
1578
1579 ### Templates ###
1580 # template placeholder
1581 _tmplRe = re.compile(r'%\{(\w+.*?)(?:\}|$)')
1582 # will match 999.999.999.999 but I don't care
1583 ipAddress = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
1584 domain = r'[\w-]{2,}(?:\.[\w-]{2,})*\.[a-z]{2,}'
1585 url = r'\w+://(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?' % (domain, ipAddress)
1586
1587 def make_url_regexp(args):
1588     #debug('make url: %s', args)
1589     if args:
1590         words = r'(?:%s)' %'|'.join(map(re.escape, args.split()))
1591         return r'(?:\w+://|www\.)[^\s]*%s[^\s]*(?:/[^\])>\s]*)?' %words
1592     else:
1593         return url
1594
1595 def make_simple_regexp(pattern):
1596     s = ''
1597     for c in pattern:
1598         if c == '*':
1599             s += '.*'
1600         elif c == '?':
1601             s += '.'
1602         else:
1603             s += re.escape(c)
1604     return s
1605
1606 templates = {
1607             'ip': ipAddress,
1608            'url': make_url_regexp,
1609         'escape': lambda s: re.escape(s),
1610         'simple': make_simple_regexp,
1611         'domain': domain,
1612         }
1613
1614 ### Main ###
1615 def delete_bytecode():
1616     global script_path
1617     bytecode = path.join(script_path, SCRIPT_NAME + '.pyc')
1618     if path.isfile(bytecode):
1619         os.remove(bytecode)
1620     return WEECHAT_RC_OK
1621
1622 if __name__ == '__main__' and import_ok and \
1623         weechat.register(SCRIPT_NAME, SCRIPT_AUTHOR, SCRIPT_VERSION, SCRIPT_LICENSE, \
1624         SCRIPT_DESC, 'delete_bytecode', ''):
1625     home_dir = get_home()
1626
1627     # for import ourselves
1628     global script_path
1629     script_path = path.dirname(__file__)
1630     sys.path.append(script_path)
1631     delete_bytecode()
1632
1633     # check python version
1634     import sys
1635     global bytecode
1636     if sys.version_info > (2, 6):
1637         bytecode = 'B'
1638     else:
1639         bytecode = ''
1640
1641
1642     weechat.hook_command(SCRIPT_COMMAND, cmd_grep.__doc__,
1643             "[log <file> | buffer <name> | stop] [-a|--all] [-b|--buffer] [-c|--count] [-m|--matchcase] "
1644             "[-H|--hilight] [-o|--only-match] [-i|-v|--invert] [(-h|--head)|(-t|--tail) [-n|--number <n>]] "
1645             "[-A|--after-context <n>] [-B|--before-context <n>] [-C|--context <n> ] <expression>",
1646 # help
1647 """
1648      log <file>: Search in one log that matches <file> in the logger path.
1649                  Use '*' and '?' as wildcards.
1650   buffer <name>: Search in buffer <name>, if there's no buffer with <name> it will
1651                  try to search for a log file.
1652            stop: Stops a currently running search.
1653        -a --all: Search in all open buffers.
1654                  If used with 'log <file>' search in all logs that matches <file>.
1655     -b --buffer: Search only in buffers, not in file logs.
1656      -c --count: Just count the number of matched lines instead of showing them.
1657  -m --matchcase: Don't do case insensitive search.
1658    -H --hilight: Colour exact matches in output buffer.
1659 -o --only-match: Print only the matching part of the line (unique matches).
1660  -v -i --invert: Print lines that don't match the regular expression.
1661       -t --tail: Print the last 10 matching lines.
1662       -h --head: Print the first 10 matching lines.
1663 -n --number <n>: Overrides default number of lines for --tail or --head.
1664 -A --after-context <n>: Shows <n> lines of trailing context after matching lines.
1665 -B --before-context <n>: Shows <n> lines of leading context before matching lines.
1666 -C --context <n>: Same as using both --after-context and --before-context simultaneously.
1667   <expression>: Expression to search.
1668
1669 Grep buffer:
1670   Input line accepts most arguments of /grep, it'll repeat last search using the new
1671   arguments provided. You can't search in different logs from the buffer's input.
1672   Boolean arguments like --count, --tail, --head, --hilight, ... are toggleable
1673
1674 Python regular expression syntax:
1675   See http://docs.python.org/lib/re-syntax.html
1676
1677 Grep Templates:
1678      %{url [text]}: Matches anything like an url, or an url with text.
1679              %{ip}: Matches anything that looks like an ip.
1680          %{domain}: Matches anything like a domain.
1681     %{escape text}: Escapes text in pattern.
1682  %{simple pattern}: Converts a pattern with '*' and '?' wildcards into a regexp.
1683
1684 Examples:
1685   Search for urls with the word 'weechat' said by 'nick'
1686     /grep nick\\t.*%{url weechat}
1687   Search for '*.*' string
1688     /grep %{escape *.*}
1689 """,
1690             # completion template
1691             "buffer %(buffers_names) %(grep_arguments)|%*"
1692             "||log %(grep_log_files) %(grep_arguments)|%*"
1693             "||stop"
1694             "||%(grep_arguments)|%*",
1695             'cmd_grep' ,'')
1696     weechat.hook_command('logs', cmd_logs.__doc__, "[-s|--size] [<filter>]",
1697             "-s --size: Sort logs by size.\n"
1698             " <filter>: Only show logs that match <filter>. Use '*' and '?' as wildcards.", '--size', 'cmd_logs', '')
1699
1700     weechat.hook_completion('grep_log_files', "list of log files",
1701             'completion_log_files', '')
1702     weechat.hook_completion('grep_arguments', "list of arguments",
1703             'completion_grep_args', '')
1704
1705     # settings
1706     for opt, val in settings.iteritems():
1707         if not weechat.config_is_set_plugin(opt):
1708             weechat.config_set_plugin(opt, val)
1709
1710     # colors
1711     color_date        = weechat.color('brown')
1712     color_info        = weechat.color('cyan')
1713     color_hilight     = weechat.color('lightred')
1714     color_reset       = weechat.color('reset')
1715     color_title       = weechat.color('yellow')
1716     color_summary     = weechat.color('lightcyan')
1717     color_delimiter   = weechat.color('chat_delimiters')
1718     color_script_nick = weechat.color('chat_nick')
1719
1720     # pretty [grep]
1721     script_nick = '%s[%s%s%s]%s' %(color_delimiter, color_script_nick, SCRIPT_NAME, color_delimiter,
1722             color_reset)
1723     script_nick_nocolor = '[%s]' %SCRIPT_NAME
1724     # paragraph separator when using context options
1725     context_sep = '%s\t%s--' %(script_nick, color_info)
1726
1727     # -------------------------------------------------------------------------
1728     # Debug
1729
1730     if weechat.config_get_plugin('debug'):
1731         try:
1732             # custom debug module I use, allows me to inspect script's objects.
1733             import pybuffer
1734             debug = pybuffer.debugBuffer(globals(), '%s_debug' % SCRIPT_NAME)
1735         except:
1736             def debug(s, *args):
1737                 if not isinstance(s, basestring):
1738                     s = str(s)
1739                 if args:
1740                     s = s %args
1741                 prnt('', '%s\t%s' %(script_nick, s))
1742     else:
1743         def debug(*args):
1744             pass
1745
1746 # vim:set shiftwidth=4 tabstop=4 softtabstop=4 expandtab textwidth=100: