weechat/python/grep.py

   1 # -*- coding: utf-8 -*-
   2 ###
   3 # Copyright (c) 2009-2011 by Elián Hanisch <lambdae2@gmail.com>
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 ###
  18
  19 ###
  20 # Search in Weechat buffers and logs (for Weechat 0.3.*)
  21 #
  22 #   Inspired by xt's grep.py
  23 #   Originally I just wanted to add some fixes in grep.py, but then
  24 #   I got carried away and rewrote everything, so new script.
  25 #
  26 #   Commands:
  27 #   * /grep
  28 #     Search in logs or buffers, see /help grep
  29 #   * /logs:
  30 #     Lists logs in ~/.weechat/logs, see /help logs
  31 #
  32 #   Settings:
  33 #   * plugins.var.python.grep.clear_buffer:
  34 #     Clear the results buffer before each search. Valid values: on, off
  35 #
  36 #   * plugins.var.python.grep.go_to_buffer:
  37 #     Automatically go to grep buffer when search is over. Valid values: on, off
  38 #
  39 #   * plugins.var.python.grep.log_filter:
  40 #     Coma separated list of patterns that grep will use for exclude logs, e.g.
  41 #     if you use '*server/*' any log in the 'server' folder will be excluded
  42 #     when using the command '/grep log'
  43 #
  44 #   * plugins.var.python.grep.show_summary:
  45 #     Shows summary for each log. Valid values: on, off
  46 #
  47 #   * plugins.var.python.grep.max_lines:
  48 #     Grep will only print the last matched lines that don't surpass the value defined here.
  49 #
  50 #   * plugins.var.python.grep.size_limit:
  51 #     Size limit in KiB, is used for decide whenever grepping should run in background or not. If
  52 #     the logs to grep have a total size bigger than this value then grep run as a new process.
  53 #     It can be used for force or disable background process, using '0' forces to always grep in
  54 #     background, while using '' (empty string) will disable it.
  55 #
  56 #   * plugins.var.python.grep.default_tail_head:
  57 #     Config option for define default number of lines returned when using --head or --tail options.
  58 #     Can be overriden in the command with --number option.
  59 #
  60 #
  61 #   TODO:
  62 #   * try to figure out why hook_process chokes in long outputs (using a tempfile as a
  63 #   workaround now)
  64 #   * possibly add option for defining time intervals
  65 #
  66 #
  67 #   History:
  68 #
  69 #   2017-07-23, Sébastien Helleu <flashcode@flashtux.org>
  70 #   version 0.7.8: fix modulo by zero when nick is empty string
  71 #
  72 #   2016-06-23, mickael9
  73 #   version 0.7.7: fix get_home function
  74 #
  75 #   2015-11-26
  76 #   version 0.7.6: fix a typo
  77 #
  78 #   2015-01-31, Nicd-
  79 #   version 0.7.5:
  80 #   '~' is now expaned to the home directory in the log file path so
  81 #   paths like '~/logs/' should work.
  82 #
  83 #   2015-01-14, nils_2
  84 #   version 0.7.4: make q work to quit grep buffer (requested by: gb)
  85 #
  86 #   2014-03-29, Felix Eckhofer <felix@tribut.de>
  87 #   version 0.7.3: fix typo
  88 #
  89 #   2011-01-09
  90 #   version 0.7.2: bug fixes
  91 #
  92 #   2010-11-15
  93 #   version 0.7.1:
  94 #   * use TempFile so temporal files are guaranteed to be deleted.
  95 #   * enable Archlinux workaround.
  96 #
  97 #   2010-10-26
  98 #   version 0.7:
  99 #   * added templates.
 100 #   * using --only-match shows only unique strings.
 101 #   * fixed bug that inverted -B -A switches when used with -t
 102 #
 103 #   2010-10-14
 104 #   version 0.6.8: by xt <xt@bash.no>
 105 #   * supress highlights when printing in grep buffer
 106 #
 107 #   2010-10-06
 108 #   version 0.6.7: by xt <xt@bash.no>
 109 #   * better temporary file:
 110 #    use tempfile.mkstemp. to create a temp file in log dir,
 111 #    makes it safer with regards to write permission and multi user
 112 #
 113 #   2010-04-08
 114 #   version 0.6.6: bug fixes
 115 #   * use WEECHAT_LIST_POS_END in log file completion, makes completion faster
 116 #   * disable bytecode if using python 2.6
 117 #   * use single quotes in command string
 118 #   * fix bug that could change buffer's title when using /grep stop
 119 #
 120 #   2010-01-24
 121 #   version 0.6.5: disable bytecode is a 2.6 feature, instead, resort to delete the bytecode manually
 122 #
 123 #   2010-01-19
 124 #   version 0.6.4: bug fix
 125 #   version 0.6.3: added options --invert --only-match (replaces --exact, which is still available
 126 #   but removed from help)
 127 #   * use new 'irc_nick_color' info
 128 #   * don't generate bytecode when spawning a new process
 129 #   * show active options in buffer title
 130 #
 131 #   2010-01-17
 132 #   version 0.6.2: removed 2.6-ish code
 133 #   version 0.6.1: fixed bug when grepping in grep's buffer
 134 #
 135 #   2010-01-14
 136 #   version 0.6.0: implemented grep in background
 137 #   * improved context lines presentation.
 138 #   * grepping for big (or many) log files runs in a weechat_process.
 139 #   * added /grep stop.
 140 #   * added 'size_limit' option
 141 #   * fixed a infolist leak when grepping buffers
 142 #   * added 'default_tail_head' option
 143 #   * results are sort by line count
 144 #   * don't die if log is corrupted (has NULL chars in it)
 145 #   * changed presentation of /logs
 146 #   * log path completion doesn't suck anymore
 147 #   * removed all tabs, because I learned how to configure Vim so that spaces aren't annoying
 148 #   anymore. This was the script's original policy.
 149 #
 150 #   2010-01-05
 151 #   version 0.5.5: rename script to 'grep.py' (FlashCode <flashcode@flashtux.org>).
 152 #
 153 #   2010-01-04
 154 #   version 0.5.4.1: fix index error when using --after/before-context options.
 155 #
 156 #   2010-01-03
 157 #   version 0.5.4: new features
 158 #   * added --after-context and --before-context options.
 159 #   * added --context as a shortcut for using both -A -B options.
 160 #
 161 #   2009-11-06
 162 #   version 0.5.3: improvements for long grep output
 163 #   * grep buffer input accepts the same flags as /grep for repeat a search with different
 164 #     options.
 165 #   * tweaks in grep's output.
 166 #   * max_lines option added for limit grep's output.
 167 #   * code in update_buffer() optimized.
 168 #   * time stats in buffer title.
 169 #   * added go_to_buffer config option.
 170 #   * added --buffer for search only in buffers.
 171 #   * refactoring.
 172 #
 173 #   2009-10-12, omero
 174 #   version 0.5.2: made it python-2.4.x compliant
 175 #
 176 #   2009-08-17
 177 #   version 0.5.1: some refactoring, show_summary option added.
 178 #
 179 #   2009-08-13
 180 #   version 0.5: rewritten from xt's grep.py
 181 #   * fixed searching in non weechat logs, for cases like, if you're
 182 #     switching from irssi and rename and copy your irssi logs to %h/logs
 183 #   * fixed "timestamp rainbow" when you /grep in grep's buffer
 184 #   * allow to search in other buffers other than current or in logs
 185 #     of currently closed buffers with cmd 'buffer'
 186 #   * allow to search in any log file in %h/logs with cmd 'log'
 187 #   * added --count for return the number of matched lines
 188 #   * added --matchcase for case sensible search
 189 #   * added --hilight for color matches
 190 #   * added --head and --tail options, and --number
 191 #   * added command /logs for list files in %h/logs
 192 #   * added config option for clear the buffer before a search
 193 #   * added config option for filter logs we don't want to grep
 194 #   * added the posibility to repeat last search with another regexp by writing
 195 #     it in grep's buffer
 196 #   * changed spaces for tabs in the code, which is my preference
 197 #
 198 ###
 199
 200 from os import path
 201 import sys, getopt, time, os, re, tempfile
 202
 203 try:
 204     import weechat
 205     from weechat import WEECHAT_RC_OK, prnt, prnt_date_tags
 206     import_ok = True
 207 except ImportError:
 208     import_ok = False
 209
 210 SCRIPT_NAME    = "grep"
 211 SCRIPT_AUTHOR  = "Elián Hanisch <lambdae2@gmail.com>"
 212 SCRIPT_VERSION = "0.7.8"
 213 SCRIPT_LICENSE = "GPL3"
 214 SCRIPT_DESC    = "Search in buffers and logs"
 215 SCRIPT_COMMAND = "grep"
 216
 217 ### Default Settings ###
 218 settings = {
 219 'clear_buffer'      : 'off',
 220 'log_filter'        : '',
 221 'go_to_buffer'      : 'on',
 222 'max_lines'         : '4000',
 223 'show_summary'      : 'on',
 224 'size_limit'        : '2048',
 225 'default_tail_head' : '10',
 226 }
 227
 228 ### Class definitions ###
 229 class linesDict(dict):
 230     """
 231     Class for handling matched lines in more than one buffer.
 232     linesDict[buffer_name] = matched_lines_list
 233     """
 234     def __setitem__(self, key, value):
 235         assert isinstance(value, list)
 236         if key not in self:
 237             dict.__setitem__(self, key, value)
 238         else:
 239             dict.__getitem__(self, key).extend(value)
 240
 241     def get_matches_count(self):
 242         """Return the sum of total matches stored."""
 243         if dict.__len__(self):
 244             return sum(map(lambda L: L.matches_count, self.itervalues()))
 245         else:
 246             return 0
 247
 248     def __len__(self):
 249         """Return the sum of total lines stored."""
 250         if dict.__len__(self):
 251             return sum(map(len, self.itervalues()))
 252         else:
 253             return 0
 254
 255     def __str__(self):
 256         """Returns buffer count or buffer name if there's just one stored."""
 257         n = len(self.keys())
 258         if n == 1:
 259             return self.keys()[0]
 260         elif n > 1:
 261             return '%s logs' %n
 262         else:
 263             return ''
 264
 265     def items(self):
 266         """Returns a list of items sorted by line count."""
 267         items = dict.items(self)
 268         items.sort(key=lambda i: len(i[1]))
 269         return items
 270
 271     def items_count(self):
 272         """Returns a list of items sorted by match count."""
 273         items = dict.items(self)
 274         items.sort(key=lambda i: i[1].matches_count)
 275         return items
 276
 277     def strip_separator(self):
 278         for L in self.itervalues():
 279             L.strip_separator()
 280
 281     def get_last_lines(self, n):
 282         total_lines = len(self)
 283         #debug('total: %s n: %s' %(total_lines, n))
 284         if n >= total_lines:
 285             # nothing to do
 286             return
 287         for k, v in reversed(self.items()):
 288             l = len(v)
 289             if n > 0:
 290                 if l > n:
 291                     del v[:l-n]
 292                     v.stripped_lines = l-n
 293                 n -= l
 294             else:
 295                 del v[:]
 296                 v.stripped_lines = l
 297
 298 class linesList(list):
 299     """Class for list of matches, since sometimes I need to add lines that aren't matches, I need an
 300     independent counter."""
 301     _sep = '...'
 302     def __init__(self, *args):
 303         list.__init__(self, *args)
 304         self.matches_count = 0
 305         self.stripped_lines = 0
 306
 307     def append(self, item):
 308         """Append lines, can be a string or a list with strings."""
 309         if isinstance(item, str):
 310             list.append(self, item)
 311         else:
 312             self.extend(item)
 313
 314     def append_separator(self):
 315         """adds a separator into the list, makes sure it doen't add two together."""
 316         s = self._sep
 317         if (self and self[-1] != s) or not self:
 318             self.append(s)
 319
 320     def onlyUniq(self):
 321         s = set(self)
 322         del self[:]
 323         self.extend(s)
 324
 325     def count_match(self, item=None):
 326         if item is None or isinstance(item, str):
 327             self.matches_count += 1
 328         else:
 329             self.matches_count += len(item)
 330
 331     def strip_separator(self):
 332         """removes separators if there are first or/and last in the list."""
 333         if self:
 334             s = self._sep
 335             if self[0] == s:
 336                 del self[0]
 337             if self[-1] == s:
 338                 del self[-1]
 339
 340 ### Misc functions ###
 341 now = time.time
 342 def get_size(f):
 343     try:
 344         return os.stat(f).st_size
 345     except OSError:
 346         return 0
 347
 348 sizeDict = {0:'b', 1:'KiB', 2:'MiB', 3:'GiB', 4:'TiB'}
 349 def human_readable_size(size):
 350     power = 0
 351     while size > 1024:
 352         power += 1
 353         size /= 1024.0
 354     return '%.2f %s' %(size, sizeDict.get(power, ''))
 355
 356 def color_nick(nick):
 357     """Returns coloured nick, with coloured mode if any."""
 358     if not nick: return ''
 359     wcolor = weechat.color
 360     config_string = lambda s : weechat.config_string(weechat.config_get(s))
 361     config_int = lambda s : weechat.config_integer(weechat.config_get(s))
 362     # prefix and suffix
 363     prefix = config_string('irc.look.nick_prefix')
 364     suffix = config_string('irc.look.nick_suffix')
 365     prefix_c = suffix_c = wcolor(config_string('weechat.color.chat_delimiters'))
 366     if nick[0] == prefix:
 367         nick = nick[1:]
 368     else:
 369         prefix = prefix_c = ''
 370     if nick[-1] == suffix:
 371         nick = nick[:-1]
 372         suffix = wcolor(color_delimiter) + suffix
 373     else:
 374         suffix = suffix_c = ''
 375     # nick mode
 376     modes = '@!+%'
 377     if nick[0] in modes:
 378         mode, nick = nick[0], nick[1:]
 379         mode_color = wcolor(config_string('weechat.color.nicklist_prefix%d' \
 380             %(modes.find(mode) + 1)))
 381     else:
 382         mode = mode_color = ''
 383     # nick color
 384     nick_color = ''
 385     if nick:
 386         nick_color = weechat.info_get('irc_nick_color', nick)
 387         if not nick_color:
 388             # probably we're in WeeChat 0.3.0
 389             #debug('no irc_nick_color')
 390             color_nicks_number = config_int('weechat.look.color_nicks_number')
 391             idx = (sum(map(ord, nick))%color_nicks_number) + 1
 392             nick_color = wcolor(config_string('weechat.color.chat_nick_color%02d' %idx))
 393     return ''.join((prefix_c, prefix, mode_color, mode, nick_color, nick, suffix_c, suffix))
 394
 395 ### Config and value validation ###
 396 boolDict = {'on':True, 'off':False}
 397 def get_config_boolean(config):
 398     value = weechat.config_get_plugin(config)
 399     try:
 400         return boolDict[value]
 401     except KeyError:
 402         default = settings[config]
 403         error("Error while fetching config '%s'. Using default value '%s'." %(config, default))
 404         error("'%s' is invalid, allowed: 'on', 'off'" %value)
 405         return boolDict[default]
 406
 407 def get_config_int(config, allow_empty_string=False):
 408     value = weechat.config_get_plugin(config)
 409     try:
 410         return int(value)
 411     except ValueError:
 412         if value == '' and allow_empty_string:
 413             return value
 414         default = settings[config]
 415         error("Error while fetching config '%s'. Using default value '%s'." %(config, default))
 416         error("'%s' is not a number." %value)
 417         return int(default)
 418
 419 def get_config_log_filter():
 420     filter = weechat.config_get_plugin('log_filter')
 421     if filter:
 422         return filter.split(',')
 423     else:
 424         return []
 425
 426 def get_home():
 427     home = weechat.config_string(weechat.config_get('logger.file.path'))
 428     home = home.replace('%h', weechat.info_get('weechat_dir', ''))
 429     home = path.abspath(path.expanduser(home))
 430     return home
 431
 432 def strip_home(s, dir=''):
 433     """Strips home dir from the begging of the log path, this makes them sorter."""
 434     if not dir:
 435         global home_dir
 436         dir = home_dir
 437     l = len(dir)
 438     if s[:l] == dir:
 439         return s[l:]
 440     return s
 441
 442 ### Messages ###
 443 script_nick = SCRIPT_NAME
 444 def error(s, buffer=''):
 445     """Error msg"""
 446     prnt(buffer, '%s%s %s' %(weechat.prefix('error'), script_nick, s))
 447     if weechat.config_get_plugin('debug'):
 448         import traceback
 449         if traceback.sys.exc_type:
 450             trace = traceback.format_exc()
 451             prnt('', trace)
 452
 453 def say(s, buffer=''):
 454     """normal msg"""
 455     prnt_date_tags(buffer, 0, 'no_highlight', '%s\t%s' %(script_nick, s))
 456
 457
 458
 459 ### Log files and buffers ###
 460 cache_dir = {} # note: don't remove, needed for completion if the script was loaded recently
 461 def dir_list(dir, filter_list=(), filter_excludes=True, include_dir=False):
 462     """Returns a list of files in 'dir' and its subdirs."""
 463     global cache_dir
 464     from os import walk
 465     from fnmatch import fnmatch
 466     #debug('dir_list: listing in %s' %dir)
 467     key = (dir, include_dir)
 468     try:
 469         return cache_dir[key]
 470     except KeyError:
 471         pass
 472
 473     filter_list = filter_list or get_config_log_filter()
 474     dir_len = len(dir)
 475     if filter_list:
 476         def filter(file):
 477             file = file[dir_len:] # pattern shouldn't match home dir
 478             for pattern in filter_list:
 479                 if fnmatch(file, pattern):
 480                     return filter_excludes
 481             return not filter_excludes
 482     else:
 483         filter = lambda f : not filter_excludes
 484
 485     file_list = []
 486     extend = file_list.extend
 487     join = path.join
 488     def walk_path():
 489         for basedir, subdirs, files in walk(dir):
 490             #if include_dir:
 491             #    subdirs = map(lambda s : join(s, ''), subdirs)
 492             #    files.extend(subdirs)
 493             files_path = map(lambda f : join(basedir, f), files)
 494             files_path = [ file for file in files_path if not filter(file) ]
 495             extend(files_path)
 496
 497     walk_path()
 498     cache_dir[key] = file_list
 499     #debug('dir_list: got %s' %str(file_list))
 500     return file_list
 501
 502 def get_file_by_pattern(pattern, all=False):
 503     """Returns the first log whose path matches 'pattern',
 504     if all is True returns all logs that matches."""
 505     if not pattern: return []
 506     #debug('get_file_by_filename: searching for %s.' %pattern)
 507     # do envvar expandsion and check file
 508     file = path.expanduser(pattern)
 509     file = path.expandvars(file)
 510     if path.isfile(file):
 511         return [file]
 512     # lets see if there's a matching log
 513     global home_dir
 514     file = path.join(home_dir, pattern)
 515     if path.isfile(file):
 516         return [file]
 517     else:
 518         from fnmatch import fnmatch
 519         file = []
 520         file_list = dir_list(home_dir)
 521         n = len(home_dir)
 522         for log in file_list:
 523             basename = log[n:]
 524             if fnmatch(basename, pattern):
 525                 file.append(log)
 526         #debug('get_file_by_filename: got %s.' %file)
 527         if not all and file:
 528             file.sort()
 529             return [ file[-1] ]
 530         return file
 531
 532 def get_file_by_buffer(buffer):
 533     """Given buffer pointer, finds log's path or returns None."""
 534     #debug('get_file_by_buffer: searching for %s' %buffer)
 535     infolist = weechat.infolist_get('logger_buffer', '', '')
 536     if not infolist: return
 537     try:
 538         while weechat.infolist_next(infolist):
 539             pointer = weechat.infolist_pointer(infolist, 'buffer')
 540             if pointer == buffer:
 541                 file = weechat.infolist_string(infolist, 'log_filename')
 542                 if weechat.infolist_integer(infolist, 'log_enabled'):
 543                     #debug('get_file_by_buffer: got %s' %file)
 544                     return file
 545                 #else:
 546                 #    debug('get_file_by_buffer: got %s but log not enabled' %file)
 547     finally:
 548         #debug('infolist gets freed')
 549         weechat.infolist_free(infolist)
 550
 551 def get_file_by_name(buffer_name):
 552     """Given a buffer name, returns its log path or None. buffer_name should be in 'server.#channel'
 553     or '#channel' format."""
 554     #debug('get_file_by_name: searching for %s' %buffer_name)
 555     # common mask options
 556     config_masks = ('logger.mask.irc', 'logger.file.mask')
 557     # since there's no buffer pointer, we try to replace some local vars in mask, like $channel and
 558     # $server, then replace the local vars left with '*', and use it as a mask for get the path with
 559     # get_file_by_pattern
 560     for config in config_masks:
 561         mask = weechat.config_string(weechat.config_get(config))
 562         #debug('get_file_by_name: mask: %s' %mask)
 563         if '$name' in mask:
 564             mask = mask.replace('$name', buffer_name)
 565         elif '$channel' in mask or '$server' in mask:
 566             if '.' in buffer_name and \
 567                     '#' not in buffer_name[:buffer_name.find('.')]: # the dot isn't part of the channel name
 568                 #    ^ I'm asuming channel starts with #, i'm lazy.
 569                 server, channel = buffer_name.split('.', 1)
 570             else:
 571                 server, channel = '*', buffer_name
 572             if '$channel' in mask:
 573                 mask = mask.replace('$channel', channel)
 574             if '$server' in mask:
 575                 mask = mask.replace('$server', server)
 576         # change the unreplaced vars by '*'
 577         from string import letters
 578         if '%' in mask:
 579             # vars for time formatting
 580             mask = mask.replace('%', '$')
 581         if '$' in mask:
 582             masks = mask.split('$')
 583             masks = map(lambda s: s.lstrip(letters), masks)
 584             mask = '*'.join(masks)
 585             if mask[0] != '*':
 586                 mask = '*' + mask
 587         #debug('get_file_by_name: using mask %s' %mask)
 588         file = get_file_by_pattern(mask)
 589         #debug('get_file_by_name: got file %s' %file)
 590         if file:
 591             return file
 592     return None
 593
 594 def get_buffer_by_name(buffer_name):
 595     """Given a buffer name returns its buffer pointer or None."""
 596     #debug('get_buffer_by_name: searching for %s' %buffer_name)
 597     pointer = weechat.buffer_search('', buffer_name)
 598     if not pointer:
 599         try:
 600             infolist = weechat.infolist_get('buffer', '', '')
 601             while weechat.infolist_next(infolist):
 602                 short_name = weechat.infolist_string(infolist, 'short_name')
 603                 name = weechat.infolist_string(infolist, 'name')
 604                 if buffer_name in (short_name, name):
 605                     #debug('get_buffer_by_name: found %s' %name)
 606                     pointer = weechat.buffer_search('', name)
 607                     return pointer
 608         finally:
 609             weechat.infolist_free(infolist)
 610     #debug('get_buffer_by_name: got %s' %pointer)
 611     return pointer
 612
 613 def get_all_buffers():
 614     """Returns list with pointers of all open buffers."""
 615     buffers = []
 616     infolist = weechat.infolist_get('buffer', '', '')
 617     while weechat.infolist_next(infolist):
 618         buffers.append(weechat.infolist_pointer(infolist, 'pointer'))
 619     weechat.infolist_free(infolist)
 620     grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
 621     if grep_buffer and grep_buffer in buffers:
 622         # remove it from list
 623         del buffers[buffers.index(grep_buffer)]
 624     return buffers
 625
 626 ### Grep ###
 627 def make_regexp(pattern, matchcase=False):
 628     """Returns a compiled regexp."""
 629     if pattern in ('.', '.*', '.?', '.+'):
 630         # because I don't need to use a regexp if we're going to match all lines
 631         return None
 632     # matching takes a lot more time if pattern starts or ends with .* and it isn't needed.
 633     if pattern[:2] == '.*':
 634         pattern = pattern[2:]
 635     if pattern[-2:] == '.*':
 636         pattern = pattern[:-2]
 637     try:
 638         if not matchcase:
 639             regexp = re.compile(pattern, re.IGNORECASE)
 640         else:
 641             regexp = re.compile(pattern)
 642     except Exception, e:
 643         raise Exception, 'Bad pattern, %s' %e
 644     return regexp
 645
 646 def check_string(s, regexp, hilight='', exact=False):
 647     """Checks 's' with a regexp and returns it if is a match."""
 648     if not regexp:
 649         return s
 650
 651     elif exact:
 652         matchlist = regexp.findall(s)
 653         if matchlist:
 654             if isinstance(matchlist[0], tuple):
 655                 # join tuples (when there's more than one match group in regexp)
 656                 return [ ' '.join(t) for t in matchlist ]
 657             return matchlist
 658
 659     elif hilight:
 660         matchlist = regexp.findall(s)
 661         if matchlist:
 662             if isinstance(matchlist[0], tuple):
 663                 # flatten matchlist
 664                 matchlist = [ item for L in matchlist for item in L if item ]
 665             matchlist = list(set(matchlist)) # remove duplicates if any
 666             # apply hilight
 667             color_hilight, color_reset = hilight.split(',', 1)
 668             for m in matchlist:
 669                 s = s.replace(m, '%s%s%s' % (color_hilight, m, color_reset))
 670             return s
 671
 672     # no need for findall() here
 673     elif regexp.search(s):
 674         return s
 675
 676 def grep_file(file, head, tail, after_context, before_context, count, regexp, hilight, exact, invert):
 677     """Return a list of lines that match 'regexp' in 'file', if no regexp returns all lines."""
 678     if count:
 679         tail = head = after_context = before_context = False
 680         hilight = ''
 681     elif exact:
 682         before_context = after_context = False
 683         hilight = ''
 684     elif invert:
 685         hilight = ''
 686     #debug(' '.join(map(str, (file, head, tail, after_context, before_context))))
 687
 688     lines = linesList()
 689     # define these locally as it makes the loop run slightly faster
 690     append = lines.append
 691     count_match = lines.count_match
 692     separator = lines.append_separator
 693     if invert:
 694         def check(s):
 695             if check_string(s, regexp, hilight, exact):
 696                 return None
 697             else:
 698                 return s
 699     else:
 700         check = lambda s: check_string(s, regexp, hilight, exact)
 701
 702     try:
 703         file_object = open(file, 'r')
 704     except IOError:
 705         # file doesn't exist
 706         return lines
 707     if tail or before_context:
 708         # for these options, I need to seek in the file, but is slower and uses a good deal of
 709         # memory if the log is too big, so we do this *only* for these options.
 710         file_lines = file_object.readlines()
 711
 712         if tail:
 713             # instead of searching in the whole file and later pick the last few lines, we
 714             # reverse the log, search until count reached and reverse it again, that way is a lot
 715             # faster
 716             file_lines.reverse()
 717             # don't invert context switches
 718             before_context, after_context = after_context, before_context
 719
 720         if before_context:
 721             before_context_range = range(1, before_context + 1)
 722             before_context_range.reverse()
 723
 724         limit = tail or head
 725
 726         line_idx = 0
 727         while line_idx < len(file_lines):
 728             line = file_lines[line_idx]
 729             line = check(line)
 730             if line:
 731                 if before_context:
 732                     separator()
 733                     trimmed = False
 734                     for id in before_context_range:
 735                         try:
 736                             context_line = file_lines[line_idx - id]
 737                             if check(context_line):
 738                                 # match in before context, that means we appended these same lines in a
 739                                 # previous match, so we delete them merging both paragraphs
 740                                 if not trimmed:
 741                                     del lines[id - before_context - 1:]
 742                                     trimmed = True
 743                             else:
 744                                 append(context_line)
 745                         except IndexError:
 746                             pass
 747                 append(line)
 748                 count_match(line)
 749                 if after_context:
 750                     id, offset = 0, 0
 751                     while id < after_context + offset:
 752                         id += 1
 753                         try:
 754                             context_line = file_lines[line_idx + id]
 755                             _context_line = check(context_line)
 756                             if _context_line:
 757                                 offset = id
 758                                 context_line = _context_line # so match is hilighted with --hilight
 759                                 count_match()
 760                             append(context_line)
 761                         except IndexError:
 762                             pass
 763                     separator()
 764                     line_idx += id
 765                 if limit and lines.matches_count >= limit:
 766                     break
 767             line_idx += 1
 768
 769         if tail:
 770             lines.reverse()
 771     else:
 772         # do a normal grep
 773         limit = head
 774
 775         for line in file_object:
 776             line = check(line)
 777             if line:
 778                 count or append(line)
 779                 count_match(line)
 780                 if after_context:
 781                     id, offset = 0, 0
 782                     while id < after_context + offset:
 783                         id += 1
 784                         try:
 785                             context_line = file_object.next()
 786                             _context_line = check(context_line)
 787                             if _context_line:
 788                                 offset = id
 789                                 context_line = _context_line
 790                                 count_match()
 791                             count or append(context_line)
 792                         except StopIteration:
 793                             pass
 794                     separator()
 795                 if limit and lines.matches_count >= limit:
 796                     break
 797
 798     file_object.close()
 799     return lines
 800
 801 def grep_buffer(buffer, head, tail, after_context, before_context, count, regexp, hilight, exact,
 802         invert):
 803     """Return a list of lines that match 'regexp' in 'buffer', if no regexp returns all lines."""
 804     lines = linesList()
 805     if count:
 806         tail = head = after_context = before_context = False
 807         hilight = ''
 808     elif exact:
 809         before_context = after_context = False
 810     #debug(' '.join(map(str, (tail, head, after_context, before_context, count, exact, hilight))))
 811
 812     # Using /grep in grep's buffer can lead to some funny effects
 813     # We should take measures if that's the case
 814     def make_get_line_funcion():
 815         """Returns a function for get lines from the infolist, depending if the buffer is grep's or
 816         not."""
 817         string_remove_color = weechat.string_remove_color
 818         infolist_string = weechat.infolist_string
 819         grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
 820         if grep_buffer and buffer == grep_buffer:
 821             def function(infolist):
 822                 prefix = infolist_string(infolist, 'prefix')
 823                 message = infolist_string(infolist, 'message')
 824                 if prefix: # only our messages have prefix, ignore it
 825                     return None
 826                 return message
 827         else:
 828             infolist_time = weechat.infolist_time
 829             def function(infolist):
 830                 prefix = string_remove_color(infolist_string(infolist, 'prefix'), '')
 831                 message = string_remove_color(infolist_string(infolist, 'message'), '')
 832                 date = infolist_time(infolist, 'date')
 833                 return '%s\t%s\t%s' %(date, prefix, message)
 834         return function
 835     get_line = make_get_line_funcion()
 836
 837     infolist = weechat.infolist_get('buffer_lines', buffer, '')
 838     if tail:
 839         # like with grep_file() if we need the last few matching lines, we move the cursor to
 840         # the end and search backwards
 841         infolist_next = weechat.infolist_prev
 842         infolist_prev = weechat.infolist_next
 843     else:
 844         infolist_next = weechat.infolist_next
 845         infolist_prev = weechat.infolist_prev
 846     limit = head or tail
 847
 848     # define these locally as it makes the loop run slightly faster
 849     append = lines.append
 850     count_match = lines.count_match
 851     separator = lines.append_separator
 852     if invert:
 853         def check(s):
 854             if check_string(s, regexp, hilight, exact):
 855                 return None
 856             else:
 857                 return s
 858     else:
 859         check = lambda s: check_string(s, regexp, hilight, exact)
 860
 861     if before_context:
 862         before_context_range = range(1, before_context + 1)
 863         before_context_range.reverse()
 864
 865     while infolist_next(infolist):
 866         line = get_line(infolist)
 867         if line is None: continue
 868         line = check(line)
 869         if line:
 870             if before_context:
 871                 separator()
 872                 trimmed = False
 873                 for id in before_context_range:
 874                     if not infolist_prev(infolist):
 875                         trimmed = True
 876                 for id in before_context_range:
 877                     context_line = get_line(infolist)
 878                     if check(context_line):
 879                         if not trimmed:
 880                             del lines[id - before_context - 1:]
 881                             trimmed = True
 882                     else:
 883                         append(context_line)
 884                     infolist_next(infolist)
 885             count or append(line)
 886             count_match(line)
 887             if after_context:
 888                 id, offset = 0, 0
 889                 while id < after_context + offset:
 890                     id += 1
 891                     if infolist_next(infolist):
 892                         context_line = get_line(infolist)
 893                         _context_line = check(context_line)
 894                         if _context_line:
 895                             context_line = _context_line
 896                             offset = id
 897                             count_match()
 898                         append(context_line)
 899                     else:
 900                         # in the main loop infolist_next will start again an cause an infinite loop
 901                         # this will avoid it
 902                         infolist_next = lambda x: 0
 903                 separator()
 904             if limit and lines.matches_count >= limit:
 905                 break
 906     weechat.infolist_free(infolist)
 907
 908     if tail:
 909         lines.reverse()
 910     return lines
 911
 912 ### this is our main grep function
 913 hook_file_grep = None
 914 def show_matching_lines():
 915     """
 916     Greps buffers in search_in_buffers or files in search_in_files and updates grep buffer with the
 917     result.
 918     """
 919     global pattern, matchcase, number, count, exact, hilight, invert
 920     global tail, head, after_context, before_context
 921     global search_in_files, search_in_buffers, matched_lines, home_dir
 922     global time_start
 923     matched_lines = linesDict()
 924     #debug('buffers:%s \nlogs:%s' %(search_in_buffers, search_in_files))
 925     time_start = now()
 926
 927     # buffers
 928     if search_in_buffers:
 929         regexp = make_regexp(pattern, matchcase)
 930         for buffer in search_in_buffers:
 931             buffer_name = weechat.buffer_get_string(buffer, 'name')
 932             matched_lines[buffer_name] = grep_buffer(buffer, head, tail, after_context,
 933                     before_context, count, regexp, hilight, exact, invert)
 934
 935     # logs
 936     if search_in_files:
 937         size_limit = get_config_int('size_limit', allow_empty_string=True)
 938         background = False
 939         if size_limit or size_limit == 0:
 940             size = sum(map(get_size, search_in_files))
 941             if size > size_limit * 1024:
 942                 background = True
 943         elif size_limit == '':
 944             background = False
 945
 946         if not background:
 947             # run grep normally
 948             regexp = make_regexp(pattern, matchcase)
 949             for log in search_in_files:
 950                 log_name = strip_home(log)
 951                 matched_lines[log_name] = grep_file(log, head, tail, after_context, before_context,
 952                         count, regexp, hilight, exact, invert)
 953             buffer_update()
 954         else:
 955             # we hook a process so grepping runs in background.
 956             #debug('on background')
 957             global hook_file_grep, script_path, bytecode
 958             timeout = 1000*60*5 # 5 min
 959
 960             quotify = lambda s: '"%s"' %s
 961             files_string = ', '.join(map(quotify, search_in_files))
 962
 963             global tmpFile
 964             # we keep the file descriptor as a global var so it isn't deleted until next grep
 965             tmpFile = tempfile.NamedTemporaryFile(prefix=SCRIPT_NAME,
 966                     dir=weechat.info_get('weechat_dir', ''))
 967             cmd = grep_process_cmd %dict(logs=files_string, head=head, pattern=pattern, tail=tail,
 968                     hilight=hilight, after_context=after_context, before_context=before_context,
 969                     exact=exact, matchcase=matchcase, home_dir=home_dir, script_path=script_path,
 970                     count=count, invert=invert, bytecode=bytecode, filename=tmpFile.name,
 971                     python=weechat.info_get('python2_bin', '') or 'python')
 972
 973             #debug(cmd)
 974             hook_file_grep = weechat.hook_process(cmd, timeout, 'grep_file_callback', tmpFile.name)
 975             global pattern_tmpl
 976             if hook_file_grep:
 977                 buffer_create("Searching for '%s' in %s worth of data..." %(pattern_tmpl,
 978                     human_readable_size(size)))
 979     else:
 980         buffer_update()
 981
 982 # defined here for commodity
 983 grep_process_cmd = """%(python)s -%(bytecode)sc '
 984 import sys, cPickle, os
 985 sys.path.append("%(script_path)s") # add WeeChat script dir so we can import grep
 986 from grep import make_regexp, grep_file, strip_home
 987 logs = (%(logs)s, )
 988 try:
 989     regexp = make_regexp("%(pattern)s", %(matchcase)s)
 990     d = {}
 991     for log in logs:
 992         log_name = strip_home(log, "%(home_dir)s")
 993         lines = grep_file(log, %(head)s, %(tail)s, %(after_context)s, %(before_context)s,
 994         %(count)s, regexp, "%(hilight)s", %(exact)s, %(invert)s)
 995         d[log_name] = lines
 996     fd = open("%(filename)s", "wb")
 997     cPickle.dump(d, fd, -1)
 998     fd.close()
 999 except Exception, e:
1000     print >> sys.stderr, e'
1001 """
1002
1003 grep_stdout = grep_stderr = ''
1004 def grep_file_callback(filename, command, rc, stdout, stderr):
1005     global hook_file_grep, grep_stderr,  grep_stdout
1006     global matched_lines
1007     #debug("rc: %s\nstderr: %s\nstdout: %s" %(rc, repr(stderr), repr(stdout)))
1008     if stdout:
1009         grep_stdout += stdout
1010     if stderr:
1011         grep_stderr += stderr
1012     if int(rc) >= 0:
1013
1014         def set_buffer_error():
1015             grep_buffer = buffer_create()
1016             title = weechat.buffer_get_string(grep_buffer, 'title')
1017             title = title + ' %serror' %color_title
1018             weechat.buffer_set(grep_buffer, 'title', title)
1019
1020         try:
1021             if grep_stderr:
1022                 error(grep_stderr)
1023                 set_buffer_error()
1024             #elif grep_stdout:
1025                 #debug(grep_stdout)
1026             elif path.exists(filename):
1027                 import cPickle
1028                 try:
1029                     #debug(file)
1030                     fd = open(filename, 'rb')
1031                     d = cPickle.load(fd)
1032                     matched_lines.update(d)
1033                     fd.close()
1034                 except Exception, e:
1035                     error(e)
1036                     set_buffer_error()
1037                 else:
1038                     buffer_update()
1039             global tmpFile
1040             tmpFile = None
1041         finally:
1042             grep_stdout = grep_stderr = ''
1043             hook_file_grep = None
1044     return WEECHAT_RC_OK
1045
1046 def get_grep_file_status():
1047     global search_in_files, matched_lines, time_start
1048     elapsed = now() - time_start
1049     if len(search_in_files) == 1:
1050         log = '%s (%s)' %(strip_home(search_in_files[0]),
1051                 human_readable_size(get_size(search_in_files[0])))
1052     else:
1053         size = sum(map(get_size, search_in_files))
1054         log = '%s log files (%s)' %(len(search_in_files), human_readable_size(size))
1055     return 'Searching in %s, running for %.4f seconds. Interrupt it with "/grep stop" or "stop"' \
1056         ' in grep buffer.' %(log, elapsed)
1057
1058 ### Grep buffer ###
1059 def buffer_update():
1060     """Updates our buffer with new lines."""
1061     global pattern_tmpl, matched_lines, pattern, count, hilight, invert, exact
1062     time_grep = now()
1063
1064     buffer = buffer_create()
1065     if get_config_boolean('clear_buffer'):
1066         weechat.buffer_clear(buffer)
1067     matched_lines.strip_separator() # remove first and last separators of each list
1068     len_total_lines = len(matched_lines)
1069     max_lines = get_config_int('max_lines')
1070     if not count and len_total_lines > max_lines:
1071         weechat.buffer_clear(buffer)
1072
1073     def _make_summary(log, lines, note):
1074         return '%s matches "%s%s%s"%s in %s%s%s%s' \
1075                 %(lines.matches_count, color_summary, pattern_tmpl, color_info,
1076                   invert and ' (inverted)' or '',
1077                   color_summary, log, color_reset, note)
1078
1079     if count:
1080         make_summary = lambda log, lines : _make_summary(log, lines, ' (not shown)')
1081     else:
1082         def make_summary(log, lines):
1083             if lines.stripped_lines:
1084                 if lines:
1085                     note = ' (last %s lines shown)' %len(lines)
1086                 else:
1087                     note = ' (not shown)'
1088             else:
1089                 note = ''
1090             return _make_summary(log, lines, note)
1091
1092     global weechat_format
1093     if hilight:
1094         # we don't want colors if there's match highlighting
1095         format_line = lambda s : '%s %s %s' %split_line(s)
1096     else:
1097         def format_line(s):
1098             global nick_dict, weechat_format
1099             date, nick, msg = split_line(s)
1100             if weechat_format:
1101                 try:
1102                     nick = nick_dict[nick]
1103                 except KeyError:
1104                     # cache nick
1105                     nick_c = color_nick(nick)
1106                     nick_dict[nick] = nick_c
1107                     nick = nick_c
1108                 return '%s%s %s%s %s' %(color_date, date, nick, color_reset, msg)
1109             else:
1110                 #no formatting
1111                 return msg
1112
1113     prnt(buffer, '\n')
1114     print_line('Search for "%s%s%s"%s in %s%s%s.' %(color_summary, pattern_tmpl, color_info,
1115         invert and ' (inverted)' or '', color_summary, matched_lines, color_reset),
1116             buffer)
1117     # print last <max_lines> lines
1118     if matched_lines.get_matches_count():
1119         if count:
1120             # with count we sort by matches lines instead of just lines.
1121             matched_lines_items = matched_lines.items_count()
1122         else:
1123             matched_lines_items = matched_lines.items()
1124
1125         matched_lines.get_last_lines(max_lines)
1126         for log, lines in matched_lines_items:
1127             if lines.matches_count:
1128                 # matched lines
1129                 if not count:
1130                     # print lines
1131                     weechat_format = True
1132                     if exact:
1133                         lines.onlyUniq()
1134                     for line in lines:
1135                         #debug(repr(line))
1136                         if line == linesList._sep:
1137                             # separator
1138                             prnt(buffer, context_sep)
1139                         else:
1140                             if '\x00' in line:
1141                                 # log was corrupted
1142                                 error("Found garbage in log '%s', maybe it's corrupted" %log)
1143                                 line = line.replace('\x00', '')
1144                             prnt_date_tags(buffer, 0, 'no_highlight', format_line(line))
1145
1146                 # summary
1147                 if count or get_config_boolean('show_summary'):
1148                     summary = make_summary(log, lines)
1149                     print_line(summary, buffer)
1150
1151             # separator
1152             if not count and lines:
1153                 prnt(buffer, '\n')
1154     else:
1155         print_line('No matches found.', buffer)
1156
1157     # set title
1158     global time_start
1159     time_end = now()
1160     # total time
1161     time_total = time_end - time_start
1162     # percent of the total time used for grepping
1163     time_grep_pct = (time_grep - time_start)/time_total*100
1164     #debug('time: %.4f seconds (%.2f%%)' %(time_total, time_grep_pct))
1165     if not count and len_total_lines > max_lines:
1166         note = ' (last %s lines shown)' %len(matched_lines)
1167     else:
1168         note = ''
1169     title = "'q': close buffer | Search in %s%s%s %s matches%s | pattern \"%s%s%s\"%s %s | %.4f seconds (%.2f%%)" \
1170             %(color_title, matched_lines, color_reset, matched_lines.get_matches_count(), note,
1171               color_title, pattern_tmpl, color_reset, invert and ' (inverted)' or '', format_options(),
1172               time_total, time_grep_pct)
1173     weechat.buffer_set(buffer, 'title', title)
1174
1175     if get_config_boolean('go_to_buffer'):
1176         weechat.buffer_set(buffer, 'display', '1')
1177
1178     # free matched_lines so it can be removed from memory
1179     del matched_lines
1180
1181 def split_line(s):
1182     """Splits log's line 's' in 3 parts, date, nick and msg."""
1183     global weechat_format
1184     if weechat_format and s.count('\t') >= 2:
1185         date, nick, msg = s.split('\t', 2) # date, nick, message
1186     else:
1187         # looks like log isn't in weechat's format
1188         weechat_format = False # incoming lines won't be formatted
1189         date, nick, msg = '', '', s
1190     # remove tabs
1191     if '\t' in msg:
1192         msg = msg.replace('\t', '    ')
1193     return date, nick, msg
1194
1195 def print_line(s, buffer=None, display=False):
1196     """Prints 's' in script's buffer as 'script_nick'. For displaying search summaries."""
1197     if buffer is None:
1198         buffer = buffer_create()
1199     say('%s%s' %(color_info, s), buffer)
1200     if display and get_config_boolean('go_to_buffer'):
1201         weechat.buffer_set(buffer, 'display', '1')
1202
1203 def format_options():
1204     global matchcase, number, count, exact, hilight, invert
1205     global tail, head, after_context, before_context
1206     options = []
1207     append = options.append
1208     insert = options.insert
1209     chars = 'cHmov'
1210     for i, flag in enumerate((count, hilight, matchcase, exact, invert)):
1211         if flag:
1212             append(chars[i])
1213
1214     if head or tail:
1215         n = get_config_int('default_tail_head')
1216         if head:
1217             append('h')
1218             if head != n:
1219                 insert(-1, ' -')
1220                 append('n')
1221                 append(head)
1222         elif tail:
1223             append('t')
1224             if tail != n:
1225                 insert(-1, ' -')
1226                 append('n')
1227                 append(tail)
1228
1229     if before_context and after_context and (before_context == after_context):
1230         append(' -C')
1231         append(before_context)
1232     else:
1233         if before_context:
1234             append(' -B')
1235             append(before_context)
1236         if after_context:
1237             append(' -A')
1238             append(after_context)
1239
1240     s = ''.join(map(str, options)).strip()
1241     if s and s[0] != '-':
1242         s = '-' + s
1243     return s
1244
1245 def buffer_create(title=None):
1246     """Returns our buffer pointer, creates and cleans the buffer if needed."""
1247     buffer = weechat.buffer_search('python', SCRIPT_NAME)
1248     if not buffer:
1249         buffer = weechat.buffer_new(SCRIPT_NAME, 'buffer_input', '', '', '')
1250         weechat.buffer_set(buffer, 'time_for_each_line', '0')
1251         weechat.buffer_set(buffer, 'nicklist', '0')
1252         weechat.buffer_set(buffer, 'title', title or 'grep output buffer')
1253         weechat.buffer_set(buffer, 'localvar_set_no_log', '1')
1254     elif title:
1255         weechat.buffer_set(buffer, 'title', title)
1256     return buffer
1257
1258 def buffer_input(data, buffer, input_data):
1259     """Repeats last search with 'input_data' as regexp."""
1260     try:
1261         cmd_grep_stop(buffer, input_data)
1262     except:
1263         return WEECHAT_RC_OK
1264     if input_data in ('q', 'Q'):
1265         weechat.buffer_close(buffer)
1266         return weechat.WEECHAT_RC_OK
1267
1268     global search_in_buffers, search_in_files
1269     global pattern
1270     try:
1271         if pattern and (search_in_files or search_in_buffers):
1272             # check if the buffer pointers are still valid
1273             for pointer in search_in_buffers:
1274                 infolist = weechat.infolist_get('buffer', pointer, '')
1275                 if not infolist:
1276                     del search_in_buffers[search_in_buffers.index(pointer)]
1277                 weechat.infolist_free(infolist)
1278             try:
1279                 cmd_grep_parsing(input_data)
1280             except Exception, e:
1281                 error('Argument error, %s' %e, buffer=buffer)
1282                 return WEECHAT_RC_OK
1283             try:
1284                 show_matching_lines()
1285             except Exception, e:
1286                 error(e)
1287     except NameError:
1288         error("There isn't any previous search to repeat.", buffer=buffer)
1289     return WEECHAT_RC_OK
1290
1291 ### Commands ###
1292 def cmd_init():
1293     """Resets global vars."""
1294     global home_dir, cache_dir, nick_dict
1295     global pattern_tmpl, pattern, matchcase, number, count, exact, hilight, invert
1296     global tail, head, after_context, before_context
1297     hilight = ''
1298     head = tail = after_context = before_context = invert = False
1299     matchcase = count = exact = False
1300     pattern_tmpl = pattern = number = None
1301     home_dir = get_home()
1302     cache_dir = {} # for avoid walking the dir tree more than once per command
1303     nick_dict = {} # nick cache for don't calculate nick color every time
1304
1305 def cmd_grep_parsing(args):
1306     """Parses args for /grep and grep input buffer."""
1307     global pattern_tmpl, pattern, matchcase, number, count, exact, hilight, invert
1308     global tail, head, after_context, before_context
1309     global log_name, buffer_name, only_buffers, all
1310     opts, args = getopt.gnu_getopt(args.split(), 'cmHeahtivn:bA:B:C:o', ['count', 'matchcase', 'hilight',
1311         'exact', 'all', 'head', 'tail', 'number=', 'buffer', 'after-context=', 'before-context=',
1312         'context=', 'invert', 'only-match'])
1313     #debug(opts, 'opts: '); debug(args, 'args: ')
1314     if len(args) >= 2:
1315         if args[0] == 'log':
1316             del args[0]
1317             log_name = args.pop(0)
1318         elif args[0] == 'buffer':
1319             del args[0]
1320             buffer_name = args.pop(0)
1321
1322     def tmplReplacer(match):
1323         """This function will replace templates with regexps"""
1324         s = match.groups()[0]
1325         tmpl_args = s.split()
1326         tmpl_key, _, tmpl_args = s.partition(' ')
1327         try:
1328             template = templates[tmpl_key]
1329             if callable(template):
1330                 r = template(tmpl_args)
1331                 if not r:
1332                     error("Template %s returned empty string "\
1333                           "(WeeChat doesn't have enough data)." %t)
1334                 return r
1335             else:
1336                 return template
1337         except:
1338             return t
1339
1340     args = ' '.join(args) # join pattern for keep spaces
1341     if args:
1342         pattern_tmpl = args
1343         pattern = _tmplRe.sub(tmplReplacer, args)
1344         debug('Using regexp: %s', pattern)
1345     if not pattern:
1346         raise Exception, 'No pattern for grep the logs.'
1347
1348     def positive_number(opt, val):
1349         try:
1350             number = int(val)
1351             if number < 0:
1352                 raise ValueError
1353             return number
1354         except ValueError:
1355             if len(opt) == 1:
1356                 opt = '-' + opt
1357             else:
1358                 opt = '--' + opt
1359             raise Exception, "argument for %s must be a positive integer." %opt
1360
1361     for opt, val in opts:
1362         opt = opt.strip('-')
1363         if opt in ('c', 'count'):
1364             count = not count
1365         elif opt in ('m', 'matchcase'):
1366             matchcase = not matchcase
1367         elif opt in ('H', 'hilight'):
1368             # hilight must be always a string!
1369             if hilight:
1370                 hilight = ''
1371             else:
1372                 hilight = '%s,%s' %(color_hilight, color_reset)
1373             # we pass the colors in the variable itself because check_string() must not use
1374             # weechat's module when applying the colors (this is for grep in a hooked process)
1375         elif opt in ('e', 'exact', 'o', 'only-match'):
1376             exact = not exact
1377             invert = False
1378         elif opt in ('a', 'all'):
1379             all = not all
1380         elif opt in ('h', 'head'):
1381             head = not head
1382             tail = False
1383         elif opt in ('t', 'tail'):
1384             tail = not tail
1385             head = False
1386         elif opt in ('b', 'buffer'):
1387             only_buffers = True
1388         elif opt in ('n', 'number'):
1389             number = positive_number(opt, val)
1390         elif opt in ('C', 'context'):
1391             n = positive_number(opt, val)
1392             after_context = n
1393             before_context = n
1394         elif opt in ('A', 'after-context'):
1395             after_context = positive_number(opt, val)
1396         elif opt in ('B', 'before-context'):
1397             before_context = positive_number(opt, val)
1398         elif opt in ('i', 'v', 'invert'):
1399             invert = not invert
1400             exact = False
1401     # number check
1402     if number is not None:
1403         if number == 0:
1404             head = tail = False
1405             number = None
1406         elif head:
1407             head = number
1408         elif tail:
1409             tail = number
1410     else:
1411         n = get_config_int('default_tail_head')
1412         if head:
1413             head = n
1414         elif tail:
1415             tail = n
1416
1417 def cmd_grep_stop(buffer, args):
1418     global hook_file_grep, pattern, matched_lines, tmpFile
1419     if hook_file_grep:
1420         if args == 'stop':
1421             weechat.unhook(hook_file_grep)
1422             hook_file_grep = None
1423             s = 'Search for \'%s\' stopped.' %pattern
1424             say(s, buffer)
1425             grep_buffer = weechat.buffer_search('python', SCRIPT_NAME)
1426             if grep_buffer:
1427                 weechat.buffer_set(grep_buffer, 'title', s)
1428             del matched_lines
1429             tmpFile = None
1430         else:
1431             say(get_grep_file_status(), buffer)
1432         raise Exception
1433
1434 def cmd_grep(data, buffer, args):
1435     """Search in buffers and logs."""
1436     global pattern, matchcase, head, tail, number, count, exact, hilight
1437     try:
1438         cmd_grep_stop(buffer, args)
1439     except:
1440         return WEECHAT_RC_OK
1441
1442     if not args:
1443         weechat.command('', '/help %s' %SCRIPT_COMMAND)
1444         return WEECHAT_RC_OK
1445
1446     cmd_init()
1447     global log_name, buffer_name, only_buffers, all
1448     log_name = buffer_name = ''
1449     only_buffers = all = False
1450
1451     # parse
1452     try:
1453         cmd_grep_parsing(args)
1454     except Exception, e:
1455         error('Argument error, %s' %e)
1456         return WEECHAT_RC_OK
1457
1458     # find logs
1459     log_file = search_buffer = None
1460     if log_name:
1461         log_file = get_file_by_pattern(log_name, all)
1462         if not log_file:
1463             error("Couldn't find any log for %s. Try /logs" %log_name)
1464             return WEECHAT_RC_OK
1465     elif all:
1466         search_buffer = get_all_buffers()
1467     elif buffer_name:
1468         search_buffer = get_buffer_by_name(buffer_name)
1469         if not search_buffer:
1470             # there's no buffer, try in the logs
1471             log_file = get_file_by_name(buffer_name)
1472             if not log_file:
1473                 error("Logs or buffer for '%s' not found." %buffer_name)
1474                 return WEECHAT_RC_OK
1475         else:
1476             search_buffer = [search_buffer]
1477     else:
1478         search_buffer = [buffer]
1479
1480     # make the log list
1481     global search_in_files, search_in_buffers
1482     search_in_files = []
1483     search_in_buffers = []
1484     if log_file:
1485         search_in_files = log_file
1486     elif not only_buffers:
1487         #debug(search_buffer)
1488         for pointer in search_buffer:
1489             log = get_file_by_buffer(pointer)
1490             #debug('buffer %s log %s' %(pointer, log))
1491             if log:
1492                 search_in_files.append(log)
1493             else:
1494                 search_in_buffers.append(pointer)
1495     else:
1496         search_in_buffers = search_buffer
1497
1498     # grepping
1499     try:
1500         show_matching_lines()
1501     except Exception, e:
1502         error(e)
1503     return WEECHAT_RC_OK
1504
1505 def cmd_logs(data, buffer, args):
1506     """List files in Weechat's log dir."""
1507     cmd_init()
1508     global home_dir
1509     sort_by_size = False
1510     filter = []
1511
1512     try:
1513         opts, args = getopt.gnu_getopt(args.split(), 's', ['size'])
1514         if args:
1515             filter = args
1516         for opt, var in opts:
1517             opt = opt.strip('-')
1518             if opt in ('size', 's'):
1519                 sort_by_size = True
1520     except Exception, e:
1521         error('Argument error, %s' %e)
1522         return WEECHAT_RC_OK
1523
1524     # is there's a filter, filter_excludes should be False
1525     file_list = dir_list(home_dir, filter, filter_excludes=not filter)
1526     if sort_by_size:
1527         file_list.sort(key=get_size)
1528     else:
1529         file_list.sort()
1530
1531     file_sizes = map(lambda x: human_readable_size(get_size(x)), file_list)
1532     # calculate column lenght
1533     if file_list:
1534         L = file_list[:]
1535         L.sort(key=len)
1536         bigest = L[-1]
1537         column_len = len(bigest) + 3
1538     else:
1539         column_len = ''
1540
1541     buffer = buffer_create()
1542     if get_config_boolean('clear_buffer'):
1543         weechat.buffer_clear(buffer)
1544     file_list = zip(file_list, file_sizes)
1545     msg = 'Found %s logs.' %len(file_list)
1546
1547     print_line(msg, buffer, display=True)
1548     for file, size in file_list:
1549         separator = column_len and '.'*(column_len - len(file))
1550         prnt(buffer, '%s %s %s' %(strip_home(file), separator, size))
1551     if file_list:
1552         print_line(msg, buffer)
1553     return WEECHAT_RC_OK
1554
1555
1556 ### Completion ###
1557 def completion_log_files(data, completion_item, buffer, completion):
1558     #debug('completion: %s' %', '.join((data, completion_item, buffer, completion)))
1559     global home_dir
1560     l = len(home_dir)
1561     completion_list_add = weechat.hook_completion_list_add
1562     WEECHAT_LIST_POS_END = weechat.WEECHAT_LIST_POS_END
1563     for log in dir_list(home_dir):
1564         completion_list_add(completion, log[l:], 0, WEECHAT_LIST_POS_END)
1565     return WEECHAT_RC_OK
1566
1567 def completion_grep_args(data, completion_item, buffer, completion):
1568     for arg in ('count', 'all', 'matchcase', 'hilight', 'exact', 'head', 'tail', 'number', 'buffer',
1569             'after-context', 'before-context', 'context', 'invert', 'only-match'):
1570         weechat.hook_completion_list_add(completion, '--' + arg, 0, weechat.WEECHAT_LIST_POS_SORT)
1571     for tmpl in templates:
1572         weechat.hook_completion_list_add(completion, '%{' + tmpl, 0, weechat.WEECHAT_LIST_POS_SORT)
1573     return WEECHAT_RC_OK
1574
1575
1576 ### Templates ###
1577 # template placeholder
1578 _tmplRe = re.compile(r'%\{(\w+.*?)(?:\}|$)')
1579 # will match 999.999.999.999 but I don't care
1580 ipAddress = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
1581 domain = r'[\w-]{2,}(?:\.[\w-]{2,})*\.[a-z]{2,}'
1582 url = r'\w+://(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?' % (domain, ipAddress)
1583
1584 def make_url_regexp(args):
1585     #debug('make url: %s', args)
1586     if args:
1587         words = r'(?:%s)' %'|'.join(map(re.escape, args.split()))
1588         return r'(?:\w+://|www\.)[^\s]*%s[^\s]*(?:/[^\])>\s]*)?' %words
1589     else:
1590         return url
1591
1592 def make_simple_regexp(pattern):
1593     s = ''
1594     for c in pattern:
1595         if c == '*':
1596             s += '.*'
1597         elif c == '?':
1598             s += '.'
1599         else:
1600             s += re.escape(c)
1601     return s
1602
1603 templates = {
1604             'ip': ipAddress,
1605            'url': make_url_regexp,
1606         'escape': lambda s: re.escape(s),
1607         'simple': make_simple_regexp,
1608         'domain': domain,
1609         }
1610
1611 ### Main ###
1612 def delete_bytecode():
1613     global script_path
1614     bytecode = path.join(script_path, SCRIPT_NAME + '.pyc')
1615     if path.isfile(bytecode):
1616         os.remove(bytecode)
1617     return WEECHAT_RC_OK
1618
1619 if __name__ == '__main__' and import_ok and \
1620         weechat.register(SCRIPT_NAME, SCRIPT_AUTHOR, SCRIPT_VERSION, SCRIPT_LICENSE, \
1621         SCRIPT_DESC, 'delete_bytecode', ''):
1622     home_dir = get_home()
1623
1624     # for import ourselves
1625     global script_path
1626     script_path = path.dirname(__file__)
1627     sys.path.append(script_path)
1628     delete_bytecode()
1629
1630     # check python version
1631     import sys
1632     global bytecode
1633     if sys.version_info > (2, 6):
1634         bytecode = 'B'
1635     else:
1636         bytecode = ''
1637
1638
1639     weechat.hook_command(SCRIPT_COMMAND, cmd_grep.__doc__,
1640             "[log <file> | buffer <name> | stop] [-a|--all] [-b|--buffer] [-c|--count] [-m|--matchcase] "
1641             "[-H|--hilight] [-o|--only-match] [-i|-v|--invert] [(-h|--head)|(-t|--tail) [-n|--number <n>]] "
1642             "[-A|--after-context <n>] [-B|--before-context <n>] [-C|--context <n> ] <expression>",
1643 # help
1644 """
1645      log <file>: Search in one log that matches <file> in the logger path.
1646                  Use '*' and '?' as wildcards.
1647   buffer <name>: Search in buffer <name>, if there's no buffer with <name> it will
1648                  try to search for a log file.
1649            stop: Stops a currently running search.
1650        -a --all: Search in all open buffers.
1651                  If used with 'log <file>' search in all logs that matches <file>.
1652     -b --buffer: Search only in buffers, not in file logs.
1653      -c --count: Just count the number of matched lines instead of showing them.
1654  -m --matchcase: Don't do case insensitive search.
1655    -H --hilight: Colour exact matches in output buffer.
1656 -o --only-match: Print only the matching part of the line (unique matches).
1657  -v -i --invert: Print lines that don't match the regular expression.
1658       -t --tail: Print the last 10 matching lines.
1659       -h --head: Print the first 10 matching lines.
1660 -n --number <n>: Overrides default number of lines for --tail or --head.
1661 -A --after-context <n>: Shows <n> lines of trailing context after matching lines.
1662 -B --before-context <n>: Shows <n> lines of leading context before matching lines.
1663 -C --context <n>: Same as using both --after-context and --before-context simultaneously.
1664   <expression>: Expression to search.
1665
1666 Grep buffer:
1667   Input line accepts most arguments of /grep, it'll repeat last search using the new
1668   arguments provided. You can't search in different logs from the buffer's input.
1669   Boolean arguments like --count, --tail, --head, --hilight, ... are toggleable
1670
1671 Python regular expression syntax:
1672   See http://docs.python.org/lib/re-syntax.html
1673
1674 Grep Templates:
1675      %{url [text]}: Matches anything like an url, or an url with text.
1676              %{ip}: Matches anything that looks like an ip.
1677          %{domain}: Matches anything like a domain.
1678     %{escape text}: Escapes text in pattern.
1679  %{simple pattern}: Converts a pattern with '*' and '?' wildcards into a regexp.
1680
1681 Examples:
1682   Search for urls with the word 'weechat' said by 'nick'
1683     /grep nick\\t.*%{url weechat}
1684   Search for '*.*' string
1685     /grep %{escape *.*}
1686 """,
1687             # completion template
1688             "buffer %(buffers_names) %(grep_arguments)|%*"
1689             "||log %(grep_log_files) %(grep_arguments)|%*"
1690             "||stop"
1691             "||%(grep_arguments)|%*",
1692             'cmd_grep' ,'')
1693     weechat.hook_command('logs', cmd_logs.__doc__, "[-s|--size] [<filter>]",
1694             "-s --size: Sort logs by size.\n"
1695             " <filter>: Only show logs that match <filter>. Use '*' and '?' as wildcards.", '--size', 'cmd_logs', '')
1696
1697     weechat.hook_completion('grep_log_files', "list of log files",
1698             'completion_log_files', '')
1699     weechat.hook_completion('grep_arguments', "list of arguments",
1700             'completion_grep_args', '')
1701
1702     # settings
1703     for opt, val in settings.iteritems():
1704         if not weechat.config_is_set_plugin(opt):
1705             weechat.config_set_plugin(opt, val)
1706
1707     # colors
1708     color_date        = weechat.color('brown')
1709     color_info        = weechat.color('cyan')
1710     color_hilight     = weechat.color('lightred')
1711     color_reset       = weechat.color('reset')
1712     color_title       = weechat.color('yellow')
1713     color_summary     = weechat.color('lightcyan')
1714     color_delimiter   = weechat.color('chat_delimiters')
1715     color_script_nick = weechat.color('chat_nick')
1716
1717     # pretty [grep]
1718     script_nick = '%s[%s%s%s]%s' %(color_delimiter, color_script_nick, SCRIPT_NAME, color_delimiter,
1719             color_reset)
1720     script_nick_nocolor = '[%s]' %SCRIPT_NAME
1721     # paragraph separator when using context options
1722     context_sep = '%s\t%s--' %(script_nick, color_info)
1723
1724     # -------------------------------------------------------------------------
1725     # Debug
1726
1727     if weechat.config_get_plugin('debug'):
1728         try:
1729             # custom debug module I use, allows me to inspect script's objects.
1730             import pybuffer
1731             debug = pybuffer.debugBuffer(globals(), '%s_debug' % SCRIPT_NAME)
1732         except:
1733             def debug(s, *args):
1734                 if not isinstance(s, basestring):
1735                     s = str(s)
1736                 if args:
1737                     s = s %args
1738                 prnt('', '%s\t%s' %(script_nick, s))
1739     else:
1740         def debug(*args):
1741             pass
1742
1743 # vim:set shiftwidth=4 tabstop=4 softtabstop=4 expandtab textwidth=100: