From 83b091486668aac9fdf80eff1bd15ce0ac4273c4 Mon Sep 17 00:00:00 2001 From: Wolfgang Müller Date: Thu, 22 Apr 2021 18:54:05 +0200 Subject: Initial prototype --- .gitignore | 1 + quarg/__init__.py | 0 quarg/database/__init__.py | 0 quarg/database/filters.py | 48 +++++++++++ quarg/database/tables.py | 68 ++++++++++++++++ quarg/main.py | 193 +++++++++++++++++++++++++++++++++++++++++++++ quarg/quassel/__init__.py | 0 quarg/quassel/formatter.py | 139 ++++++++++++++++++++++++++++++++ quarg/quassel/types.py | 51 ++++++++++++ setup.py | 22 ++++++ 10 files changed, 522 insertions(+) create mode 100644 .gitignore create mode 100644 quarg/__init__.py create mode 100644 quarg/database/__init__.py create mode 100644 quarg/database/filters.py create mode 100644 quarg/database/tables.py create mode 100644 quarg/main.py create mode 100644 quarg/quassel/__init__.py create mode 100644 quarg/quassel/formatter.py create mode 100644 quarg/quassel/types.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b192ef --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +quarg.egg-info/ diff --git a/quarg/__init__.py b/quarg/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/quarg/database/__init__.py b/quarg/database/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/quarg/database/filters.py b/quarg/database/filters.py new file mode 100644 index 0000000..ee2b3fb --- /dev/null +++ b/quarg/database/filters.py @@ -0,0 +1,48 @@ +from sqlalchemy.sql.expression import or_, between +from quarg.database.tables import Backlog, Buffer, Network, QuasselUser, Sender + +def msg_like(query): + return Backlog.message.like(query, escape='\\') + +def msg_contains(query): + return Backlog.message.contains(query, autoescape=True) + +def buffer(name): + return Buffer.buffername == name + +def nick(nickname): + return or_(Sender.sender.like(nickname + "!%"), Sender.sender == nickname) + +def prefix(pref): + return Backlog.senderprefixes.contains(pref) + +def buftype(btype): + return Buffer.buffertype == btype.value + +def msgflag(flag): + return Backlog.flags == flag.value + +def msgtype(mtype): + return Backlog.type == mtype.value + +def user(name): + return QuasselUser.username == name + +def network(name): + return Network.networkname == name + +def time_around(datetuple): + start, end = datetuple + return between(Backlog.time, start, end) + +def time_from(start): + return Backlog.time >= start + +def time_to(end): + return Backlog.time <= end + +def joined(boolean): + return Buffer.joined == boolean + +def any_filter(fun, items): + return or_(*[fun(item) for item in items]) diff --git a/quarg/database/tables.py b/quarg/database/tables.py new file mode 100644 index 0000000..ea82f52 --- /dev/null +++ b/quarg/database/tables.py @@ -0,0 +1,68 @@ +from sqlalchemy.schema import Column, ForeignKey +from sqlalchemy.types import BigInteger, Boolean, DateTime, Integer, Text +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship + +# pylint: disable=too-few-public-methods + +Base = declarative_base() + +# Note: To keep SQLAlchemy from selecting columns that we will never end up +# using, we have commented out unused ones + +class Backlog(Base): + __tablename__ = 'backlog' + messageid = Column(BigInteger, primary_key=True) + time = Column(DateTime) + bufferid = Column(Integer, ForeignKey('buffer.bufferid')) + type = Column(Integer) + flags = Column(Integer) + senderid = Column(BigInteger, ForeignKey('sender.senderid')) + senderprefixes = Column(Text) + message = Column(Text) + + buffer = relationship('Buffer') + sender = relationship('Sender') + +class Sender(Base): + __tablename__ = 'sender' + senderid = Column(BigInteger, primary_key=True) + sender = Column(Text) + # realname = Column(Text) + # avatarurl = Column(Text) + +class Buffer(Base): + __tablename__ = 'buffer' + bufferid = Column(Integer, primary_key=True) + userid = Column(Integer, ForeignKey('user.userid')) + groupid = Column(Integer) + networkid = Column(Integer, ForeignKey('network.networkid')) + buffername = Column(Text) + buffercname = Column(Text) + buffertype = Column(Integer) + # lastmsgid = Column(BigInteger) + # lastseenmsgid = Column(BigInteger) + # markerlinemsgid = Column(BigInteger) + # bufferactivity = Column(Integer) + # highlightcount = Column(Integer) + # key = Column(Text) + joined = Column(Boolean) + # cipher = Column(Text) + + network = relationship('Network') + +class QuasselUser(Base): + __tablename__ = 'quasseluser' + userid = Column(Integer, primary_key=True) + username = Column(Text) + # password = Column(Text) + # hashversion = Column(Integer) + # authenticator = Column(Text) + +class Network(Base): + __tablename__ = 'network' + networkid = Column(Integer, primary_key=True) + userid = Column(Integer, ForeignKey('quasseluser.userid')) + networkname = Column(Text) + + user = relationship('QuasselUser') diff --git a/quarg/main.py b/quarg/main.py new file mode 100644 index 0000000..eeaf943 --- /dev/null +++ b/quarg/main.py @@ -0,0 +1,193 @@ +import argparse +import configparser +import os +import sys +from abc import ABCMeta, abstractmethod +from timeit import default_timer as timer + +import dateutil.parser +import dateutil.relativedelta +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +import quarg.database.filters as filters +from quarg.database.tables import Backlog, Buffer, Network, QuasselUser, Sender +from quarg.quassel.formatter import format_from +from quarg.quassel.types import BufferType, MessageFlag, MessageType + +def errx(msg): + sys.exit(f'quarg: {msg}') + +# TODO Find out what to do about passing INVALID or GROUP as BufferType +# TODO Find out what to do about passing various message flags + +class ParseEnum(argparse.Action, metaclass=ABCMeta): + def __call__(self, parser, namespace, value, option_string=None): + key = value.upper() + if key not in self.enumclass.__members__: + errx(f'Not a valid {self.enumclass.describe()}: {value}') + + saved = getattr(namespace, self.dest) or [] + saved.append(self.enumclass[key]) + setattr(namespace, self.dest, saved) + + @property + @abstractmethod + def enumclass(self): + pass + +class ParseMessageType(ParseEnum): + @property + def enumclass(self): + return MessageType + +class ParseMessageFlag(ParseEnum): + @property + def enumclass(self): + return MessageFlag + +class ParseBufferType(ParseEnum): + @property + def enumclass(self): + return BufferType + +def parse_isodate(date): + try: + parsed = dateutil.parser.isoparse(date) + except ValueError as err: + errx(f'isoparse: invalid date format \'{date}\', {err}') + except OverflowError as err: + errx(f'isoparse: date overflows: \'{date}\'') + + return parsed + +# FIXME make sure pylint disables are actually still needed everywhere + +class ParseDate(argparse.Action): + # pylint: disable=too-few-public-methods, unsupported-membership-test + def __call__(self, parser, namespace, datespec, option_string=None): + setattr(namespace, self.dest, parse_isodate(datespec)) + +class ParseAround(argparse.Action): + # pylint: disable=too-few-public-methods, unsupported-membership-test + def __call__(self, parser, namespace, aroundspec, option_string=None): + if '/' in aroundspec: + # FIXME / fine here? + datespec, rangespec = aroundspec.split('/', 1) + try: + hour_range = int(rangespec) + except ValueError as err: + errx(err) + else: + datespec, hour_range = (aroundspec, 12) + + date = parse_isodate(datespec) + + offset = dateutil.relativedelta.relativedelta(hours=hour_range) + setattr(namespace, self.dest, (date - offset, date + offset)) + +# TODO Make --after/--before and --around mutually exclusive +# FIXME why need default=None for --joined/--no-joined? + +# pylint: disable=line-too-long +cli = argparse.ArgumentParser() +cli.add_argument('query', nargs='*', help='match messages containing this query') +cli.add_argument('-d', action='store_true', dest='debug', help='print SQL query information') +cli.add_argument('-e', action='store_true', dest='expr', help='interpret query as LIKE expression') +cli.add_argument('-b', action='append', dest='buffer', help='match messages sent to this buffer') +cli.add_argument('-B', action=ParseBufferType, dest='buftype', help='match messages sent to buffers of this type') +cli.add_argument('-n', action='append', dest='nick', help='match messages sent by this nickname') +cli.add_argument('-N', action='append', dest='network', help='match messages sent to this network') +cli.add_argument('-u', action='append', dest='user', help='match messages received by this quassel user') +cli.add_argument('-t', action=ParseMessageType, dest='msgtype', help='match messages of this message type') +cli.add_argument('-f', action=ParseMessageFlag, dest='msgflag', help='match messages with this flag') +cli.add_argument('-p', action='append', dest='prefix', help='match nicks with this prefix') +cli.add_argument('--joined', default=None, action='store_true', dest='joined', help='match messages sent to channels which are currently joined') +cli.add_argument('--no-joined', default=None, action='store_false', dest='joined', help='match messages sent to channels which are not currently joined') +cli.add_argument('--after', action=ParseDate, metavar='DATE', help='match messages sent after this date') +cli.add_argument('--before', action=ParseDate, metavar='DATE', help='match messages sent before this date') +cli.add_argument('--around', action=ParseAround, metavar='DATE', help='match messages sent within 12 hours of this date') +# pylint: enable=line-too-long + +Session = sessionmaker() + +def get_config(): + xdg_config_home = os.path.expanduser('~/.config/') + if 'XDG_CONFIG_HOME' in os.environ: + xdg_config_home = os.environ['XDG_CONFIG_HOME'] + + path = os.path.join(xdg_config_home, 'quarg', 'config') + + config = configparser.ConfigParser() + config.read(path) + + return config + +def collect_predicates(args): + funs = { + 'query': filters.msg_like if args.expr else filters.msg_contains, + 'buffer': filters.buffer, + 'nick': filters.nick, + 'after': filters.time_from, + 'before': filters.time_to, + 'around': filters.time_around, + 'user': filters.user, + 'network': filters.network, + 'msgflag': filters.msgflag, + 'msgtype': filters.msgtype, + 'buftype': filters.buftype, + 'prefix': filters.prefix, + 'joined': filters.joined, + } + + for key, value in vars(args).items(): + # FIXME sadly the 'joined' namespace will contain a falsy value, so + # check against None or [] + if key in funs and value not in [None, []]: + fun = funs[key] + if args.debug: + print(f'{key}: {value}', file=sys.stderr) + if isinstance(value, list): + yield filters.any_filter(fun, value) + else: + yield fun(value) + +def run_query(session, predicates): + start = timer() + + query = session.query(Backlog).join(Sender).join(Buffer).join(Network).join(QuasselUser) + + for predicate in predicates: + query = query.filter(predicate) + + rows = query.order_by(Backlog.time).all() + + end = timer() + + return (rows, end - start) + +def main(): + config = get_config() + + if not config.has_option('Database', 'url'): + errx('No database URL set in config file.') + + args = cli.parse_args() + + engine = create_engine(config.get('Database', 'url'), echo=args.debug) + session = Session(bind=engine) + + predicates = list(collect_predicates(args)) + + if not predicates: + errx('Nothing to match.') + + rows, time = run_query(session, predicates) + + for row in rows: + print(format_from(row)) + + print(f'Query returned {len(rows)} lines in {time:.4f} seconds.', file=sys.stderr) + +if __name__ == "__main__": + main() diff --git a/quarg/quassel/__init__.py b/quarg/quassel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/quarg/quassel/formatter.py b/quarg/quassel/formatter.py new file mode 100644 index 0000000..1297288 --- /dev/null +++ b/quarg/quassel/formatter.py @@ -0,0 +1,139 @@ +import datetime + +# from functools import partial +from typing import NamedTuple +from quarg.quassel.types import MessageType + +class User(NamedTuple): + nick: str + host: str + prefix: str + + @classmethod + def from_sender(cls, sender, prefixes=''): + nick, *host = sender.split('!', 1) + prefix = prefixes[0] if prefixes else '' + return cls(nick, host[0] if host else '', prefix) + + def __repr__(self): + return f'{self.prefix}{self.nick}' + +class Message(NamedTuple): + type: MessageType + time: datetime.datetime + buffer: str + user: User + message: str + + @classmethod + def from_backlog(cls, backlog): + return cls(MessageType(backlog.type), backlog.time, backlog.buffer.buffername, + User.from_sender(backlog.sender.sender, backlog.senderprefixes), backlog.message) + +def format_from(backlog_row): + message = Message.from_backlog(backlog_row) + formatter = FORMATTERS[message.type] + timestamp = message.time.isoformat(sep=' ', timespec='seconds') + + return f'{timestamp}\t{message.buffer}\t{formatter(message)}' + +def format_privmsg(msg): + return f'<{msg.user}> {msg.message}' + +def format_notice(msg): + return f'[{msg.user}] {msg.message}' + +def format_action(msg): + return f'-*- {msg.user} {msg.message}' + +def format_nick(msg): + return f'<-> {msg.user} is now known as {msg.message}' + +def format_mode(msg): + return f'*** Mode {msg.message} by {msg.user}' + +def format_join(msg): + return f'--> {msg.user} ({msg.user.host}) has joined {msg.buffer}' + +def format_part(msg): + if msg.message: + return f'<-- {msg.user} has left {msg.buffer} ({msg.message})' + + return f'<-- {msg.user} has left {msg.buffer}' + +def format_quit(msg): + if msg.message: + return f'<-- {msg.user} has quit ({msg.message})' + + return f'<-- {msg.user} has quit' + +def format_kick(msg): + target, *kickmsg = msg.message.split(' ', 1) + + if kickmsg: + return f'<-* {msg.user} has kicked {target} from {msg.buffer} ({kickmsg[0]})' + + return f'<-* {msg.user} has kicked {target} from {msg.buffer}' + +def format_kill(msg): + # pylint: disable=line-too-long + + # As of 2021-04-24 not even Quassel implements printing this message [1]. + # They do have a symbol [2] for it, however, so use that along with the message + # [1] https://github.com/quassel/quassel/blob/285215315e6f2420724532323a4b1bccae156cb1/src/uisupport/uistyle.cpp#L950 + # [2] https://github.com/quassel/quassel/blob/285215315e6f2420724532323a4b1bccae156cb1/src/uisupport/uistyle.cpp#L1079-L1080 + return f'<-x {msg.message}' + +def format_generic(msg): + return f'* {msg.message}' + +def parse_netsplit(splitmsg): + # splitmsg contains user!host separated by #:# ... + elements = splitmsg.split('#:#') + + # ... however, the last element contains the split servers instead + servers = elements.pop().split(' ', 1) + + # TODO: This takes ages if the netsplit was large + users = [User.from_sender(e) for e in elements] + + return users, servers + +def format_netsplit_join(msg): + users, (srv_left, srv_right) = parse_netsplit(msg.message) + have_joined = ', '.join(user.nick for user in users) + return f'=> Netsplit between {srv_left} and {srv_right} ended. Users joined: {have_joined}' + +def format_netsplit_quit(msg): + users, (srv_left, srv_right) = parse_netsplit(msg.message) + have_quit = ', '.join(user.nick for user in users) + return f'<= Netsplit between {srv_left} and {srv_right}. Users quit: {have_quit}' + +# TODO inline the format strings here and have a wrapper function that gives msg? +# TODO also .format(**msg) + +def format_from_string(string, msg): + return string.format(**msg._asdict()) + +# MessageType.PART: partial(format_from_string, '<-- {user} has left {buffer}'), + +FORMATTERS = { + MessageType.PRIVMSG: format_privmsg, + MessageType.NOTICE: format_notice, + MessageType.ACTION: format_action, + MessageType.NICK: format_nick, + MessageType.MODE: format_mode, + MessageType.JOIN: format_join, + MessageType.PART: format_part, + MessageType.QUIT: format_quit, + MessageType.KICK: format_kick, + MessageType.KILL: format_kill, + MessageType.SERVER: format_generic, + MessageType.INFO: format_generic, + MessageType.ERROR: format_generic, + MessageType.DAYCHANGE: format_generic, + MessageType.TOPIC: format_generic, + MessageType.NETSPLIT_JOIN: format_netsplit_join, + MessageType.NETSPLIT_QUIT: format_netsplit_quit, + MessageType.INVITE: format_generic, +} diff --git a/quarg/quassel/types.py b/quarg/quassel/types.py new file mode 100644 index 0000000..36f385a --- /dev/null +++ b/quarg/quassel/types.py @@ -0,0 +1,51 @@ +from enum import Enum + +class BufferType(Enum): + INVALID = 0x0 + STATUS = 0x1 + CHANNEL = 0x2 + QUERY = 0x4 + GROUP = 0x8 + + @classmethod + def describe(cls): + return 'buffer type' + +class MessageFlag(Enum): + NONE = 0x0 + SELF = 0x1 + HIGHLIGHT = 0x2 + REDIRECTED = 0x4 + SERVERMSG = 0x8 + STATUSMSG = 0x10 + IGNORED = 0x20 + BACKLOG = 0x80 + + @classmethod + def describe(cls): + return 'message flag' + +class MessageType(Enum): + PRIVMSG = 0x1 + NOTICE = 0x2 + ACTION = 0x4 + NICK = 0x8 + MODE = 0x10 + JOIN = 0x20 + PART = 0x40 + QUIT = 0x80 + KICK = 0x100 + KILL = 0x200 + SERVER = 0x400 + INFO = 0x800 + ERROR = 0x1000 + # as far as I can see DAYCHANGE does not show up in the database + DAYCHANGE = 0x2000 + TOPIC = 0x4000 + NETSPLIT_JOIN = 0x8000 + NETSPLIT_QUIT = 0x10000 + INVITE = 0x20000 + + @classmethod + def describe(cls): + return 'message type' diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b6149d0 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +import setuptools + +setuptools.setup( + name="quarg", + version="0.0.1", + author="Wolfgang Müller", + author_email="wolf@oriole.systems", + packages=setuptools.find_packages(), + python_requires=">=3.8", + + entry_points = { + 'console_scripts': [ + 'quarg = quarg.main:main', + ] + }, + + install_requires=[ + 'python-dateutil', + 'psycopg2', + 'sqlalchemy>=1.4', + ] +) -- cgit v1.2.3-2-gb3c3