diff --git a/archivist.py b/archivist.py index 69d35fb..051a413 100644 --- a/archivist.py +++ b/archivist.py @@ -7,8 +7,8 @@ from generator import Generator class Archivist(object): def __init__(self, logger, chatdir=None, chatext=None, admin=0, - period_inc=5, save_count=15, max_period=100000, - read_only=False + period_inc=5, save_count=15, min_period=1, + max_period=100000, read_only=False ): if chatdir is None or len(chatdir) == 0: chatdir = "./" @@ -19,16 +19,20 @@ class Archivist(object): self.chatext = chatext self.period_inc = period_inc self.save_count = save_count + self.min_period = min_period self.max_period = max_period self.read_only = read_only + # Formats and returns a chat folder path def chat_folder(self, *formatting, **key_format): return (self.chatdir + "/chat_{tag}").format(*formatting, **key_format) + # Formats and returns a chat file path def chat_file(self, *formatting, **key_format): return (self.chatdir + "/chat_{tag}/{file}{ext}").format(*formatting, **key_format) - def store(self, tag, data, vocab_dumper): + # Stores a Reader/Generator file pair + def store(self, tag, data, vocab): chat_folder = self.chat_folder(tag=tag) chat_card = self.chat_file(tag=tag, file="card", ext=".txt") @@ -45,17 +49,18 @@ class Archivist(object): file.write(data) file.close() - if vocab_dumper is not None: + if vocab is not None: chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) file = open(chat_record, 'w', encoding="utf-16") - vocab_dumper(file) + file.write(vocab) file.close() + # Loads a Generator's vocabulary file dump def load_vocab(self, tag): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r', encoding="utf-16") - record = Generator.load(file) + record = file.read() file.close() return record except Exception as e: @@ -63,6 +68,7 @@ class Archivist(object): self.logger.exception(e) return None + # Loads a Generator's vocabulary file dump in the old UTF-8 encoding def load_vocab_old(self, tag): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: @@ -75,7 +81,8 @@ class Archivist(object): self.logger.exception(e) return None - def load_reader(self, tag): + # Loads a Metadata card file dump + def load_card(self, tag): filepath = self.chat_file(tag=tag, file="card", ext=".txt") try: reader_file = open(filepath, 'r') @@ -86,16 +93,21 @@ class Archivist(object): self.logger.error("Metadata file {} not found.".format(filepath)) return None + # Returns a Reader for a given ID with an already working vocabulary - be it + # new or loaded from file def get_reader(self, tag): - reader = self.load_reader(tag) - if reader: - vocab = self.load_vocab(tag) - if not vocab: + card = self.load_card(tag) + if card: + vocab_dump = self.load_vocab(tag) + if vocab_dump: + vocab = Generator.loads(vocab_dump) + else: vocab = Generator() - return Reader.FromCard(reader, vocab, self.max_period, self.logger) + return Reader.FromCard(card, vocab, self.max_period, self.logger) else: return None + # Count the stored chats def chat_count(self): count = 0 directory = os.fsencode(self.chatdir) @@ -105,6 +117,7 @@ class Archivist(object): count += 1 return count + # Crawl through all the stored Readers def readers_pass(self): directory = os.fsencode(self.chatdir) for subdir in os.scandir(directory): @@ -124,6 +137,7 @@ class Archivist(object): self.logger.exception(e) raise e + # Load and immediately store every Reader def update(self): for reader in self.readers_pass(): if reader.vocab is None: diff --git a/generator.py b/generator.py index a004a70..52b2fef 100644 --- a/generator.py +++ b/generator.py @@ -4,12 +4,12 @@ import random import json +# This splits strings into lists of words delimited by space. +# Other whitespaces are appended space characters so they are included +# as their own Markov chain element, so as not to pollude with +# "different" words that would only differ in having a whitespace +# attached or not def rewrite(text): - # This splits strings into lists of words delimited by space. - # Other whitespaces are appended space characters so they are included - # as their own Markov chain element, so as not to pollude with - # "different" words that would only differ in having a whitespace - # attached or not words = text.replace('\n', '\n ').split(' ') i = 0 while i < len(words): @@ -23,24 +23,24 @@ def rewrite(text): return words +# This gives a dictionary key from 2 words, ignoring case def getkey(w1, w2): - # This gives a dictionary key from 2 words, ignoring case key = (w1.strip().casefold(), w2.strip().casefold()) return str(key) +# This turns a dictionary key back into 2 separate words def getwords(key): - # This turns a dictionary key back into 2 separate words words = key.strip('()').split(', ') for i in range(len(words)): words[i].strip('\'') return words +# Generates triplets of words from the given data string. So if our string +# were "What a lovely day", we'd generate (What, a, lovely) and then +# (a, lovely, day). def triplets(wordlist): - # Generates triplets of words from the given data string. So if our string - # were "What a lovely day", we'd generate (What, a, lovely) and then - # (a, lovely, day). if len(wordlist) < 3: return @@ -49,24 +49,25 @@ def triplets(wordlist): class Generator(object): + # Marks when we want to create a Generator object from a given JSON MODE_JSON = "MODE_JSON" - # This is to mark when we want to create a Generator object from a given JSON + # Marks when we want to create a Generator object from a given list of words MODE_LIST = "MODE_LIST" - # This is to mark when we want to create a Generator object from a given list of words + # Marks when we want to create a Generator object from a given dictionary MODE_DICT = "MODE_DICT" - # This is to mark when we want to create a Generator object from a given dictionary - MODE_CHAT_DATA = "MODE_CHAT_DATA" - # This is to mark when we want to create a Generator object from Chat data (WIP) + # Marks when we want to create a Generator object from a whole Chat history (WIP) + MODE_HIST = "MODE_HIST" + # Marks the beginning of a message HEAD = "\n^MESSAGE_SEPARATOR^" + # Marks the end of a message TAIL = " ^MESSAGE_SEPARATOR^" def __init__(self, load=None, mode=None): if mode is not None: - # We ain't creating a new Generator from scratch if mode == Generator.MODE_JSON: self.cache = json.loads(load) elif mode == Generator.MODE_LIST: @@ -74,45 +75,44 @@ class Generator(object): self.load_list(load) elif mode == Generator.MODE_DICT: self.cache = load + # TODO: Chat History mode else: self.cache = {} - # The cache is where we store our words + # Loads a text divided into a list of lines def load_list(self, many): - # Takes a list of strings and adds them to the cache one by one for one in many: self.add(one) + # Dumps the cache dictionary into a JSON-formatted string def dumps(self): - # Dumps the cache dictionary into a JSON-formatted string return json.dumps(self.cache, ensure_ascii=False) + # Dumps the cache dictionary into a file, formatted as JSON def dump(self, f): - json.dump(self.cache, f, ensure_ascii=False, indent='') + json.dump(self.cache, f, ensure_ascii=False) + # Loads the cache dictionary from a JSON-formatted string def loads(dump): - # Loads the cache dictionary from a JSON-formatted string if len(dump) == 0: # faulty dump gives default Generator return Generator() # otherwise return Generator(load=dump, mode=Generator.MODE_JSON) + # Loads the cache dictionary from a file, formatted as JSON def load(f): return Generator(load=json.load(f), mode=Generator.MODE_DICT) def add(self, text): - # This takes a string and stores it in the cache, preceding it - # with the HEAD that marks the beginning of a new message and - # following it with the TAIL that marks the end words = [Generator.HEAD] text = rewrite(text + Generator.TAIL) words.extend(text) self.database(words) + # This takes a list of words and stores it in the cache, adding + # a special entry for the first word (the HEAD marker) def database(self, words): - # This takes a list of words and stores it in the cache, adding - # a special entry for the first word (the HEAD marker) for w1, w2, w3 in triplets(words): if w1 == Generator.HEAD: if w1 in self.cache: @@ -128,50 +128,50 @@ class Generator(object): # the new end of chain self.cache[key] = [w3] + # This generates the Markov text/word chain + # silence=True disables Telegram user mentions def generate(self, size=50, silence=False): - # This generates the Markov text/word chain - # silence tells if mentions should be silenced if len(self.cache) == 0: # If there is nothing in the cache we cannot generate anything return "" + # Start with a message HEAD and a random message starting word w1 = random.choice(self.cache[Generator.HEAD]) w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)]) - # Start with a message HEAD and a random message starting word gen_words = [] + # As long as we don't go over the max. message length (in n. of words)... for i in range(size): - # As long as we don't go over the size value (max. message length)... if silence and w1.startswith("@") and len(w1) > 1: + # ...append word 1, disabling any possible Telegram mention gen_words.append(w1.replace("@", "(@)")) - # ...append the first word, silencing any possible username mention else: + # ..append word 1 gen_words.append(w1) - # ..append the first word if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache: # When there's no key from the last 2 words to follow the chain, # or we reached a separation between messages, stop break else: + # Get a random third word that follows the chain of words 1 + # and 2, then make words 2 and 3 to be the new words 1 and 2 w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) - # Make the second word to be the new first word, and - # make a new random word that follows the chain to be - # the new second word return ' '.join(gen_words) + # Cross a second Generator into this one def cross(self, gen): - # cross 2 Generators into this one for key in gen.cache: if key in self.cache: self.cache[key].extend(gen.cache[key]) else: self.cache[key] = list(gen.cache[key]) + # Count again the number of messages + # (for whenever the count number is unreliable) def new_count(self): - # Count again the number of messages if the current number is unreliable count = 0 for key in self.cache: for word in self.cache[key]: if word == Generator.TAIL: + # ...by just counting message separators count += 1 - # by just counting message separators return count diff --git a/memorylist.py b/memorylist.py index f62c05f..3e12da0 100644 --- a/memorylist.py +++ b/memorylist.py @@ -1,11 +1,19 @@ #!/usr/bin/env python3 -from collections.abc import MutableSequence +from collections.abc import Sequence -class MemoryList(MutableSequence): +class MemoryList(Sequence): + """Special "memory list" class that: + - Whenever an item is added that was already in the list, + it gets moved to the back instead + - Whenever an item is looked for, it gets moved to the + back + - If a new item is added that goes over a given capacity + limit, the item at the front (oldest accessed item) + is removed (and returned)""" + def __init__(self, capacity, data=None): - """Initialize the class""" super(MemoryList, self).__init__() self._capacity = capacity if (data is not None): @@ -16,37 +24,25 @@ class MemoryList(MutableSequence): def __repr__(self): return "<{0} {1}, capacity {2}>".format(self.__class__.__name__, self._list, self._capacity) + def __str__(self): + return "{0}, {1}/{2}".format(self._list, len(self._list), self._capacity) + def __len__(self): - """List length""" return len(self._list) def capacity(self): return self._capacity def __getitem__(self, ii): - """Get a list item""" return self._list[ii] - def __delitem__(self, ii): - """Delete an item""" - del self._list[ii] - - def __setitem__(self, ii, val): - self._list[ii] = val - - def __str__(self): - return str(self._list) - def __contains__(self, val): return val in self._list def __iter__(self): return self._list.__iter__() - def insert(self, ii, val): - self._list.insert(ii, val) - - def append(self, val): + def add(self, val): if val in self._list: self._list.remove(val) @@ -58,8 +54,8 @@ class MemoryList(MutableSequence): else: return None - def get_next(self, cond): - val = next((v for v in self._list if cond(v)), None) + def search(self, cond, *args, **kwargs): + val = next((v for v in self._list if cond(v)), *args, **kwargs) if val is not None: self._list.remove(val) self._list.append(val) diff --git a/metadata.py b/metadata.py index 2a89ed7..f54e28d 100644 --- a/metadata.py +++ b/metadata.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 +# This reads a line in the format 'VARIABLE=value' and gives me the value. +# See Metadata.loadl(...) for more details def parse_card_line(line): - # This reads a line in the format 'VARIABLE=value' and gives me the value. - # See Metadata.loadl(...) for more details s = line.split('=', 1) if len(s) < 2: return "" @@ -10,35 +10,37 @@ def parse_card_line(line): return s[1] +# This is a chat's Metadata, holding different configuration values for +# Velasco and other miscellaneous information about the chat class Metadata(object): - # This is a chat's Metadata, holding different configuration values for - # Velasco and other miscellaneous information about the chat - def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False): - self.id = str(cid) # The Telegram chat's ID - self.type = ctype + self.id = str(cid) # The type of chat - self.title = title + self.type = ctype # The title of the chat + self.title = title if period is None: if "group" in ctype: - period = 10 # Default period for groups and supergroups + period = 10 else: - period = 2 # Default period for private or channel chats + period = 2 + # The number of messages read in a chat self.count = count - # The number of messages read - self.period = period # This chat's configured period - self.answer = answer + self.period = period # This chat's configured answer probability - self.restricted = restricted + self.answer = answer # Wether some interactions are restricted to admins only - self.silenced = silenced + self.restricted = restricted # Wether messages should silence user mentions + self.silenced = silenced + # Sets the period for a chat + # It has to be higher than 1 + # Returns the new value def set_period(self, period): if period < 1: raise ValueError('Tried to set period a value less than 1.') @@ -46,6 +48,9 @@ class Metadata(object): self.period = period return self.period + # Sets the answer probability + # It's a percentage represented as a decimal between 0 and 1 + # Returns the new value def set_answer(self, prob): if prob > 1: raise ValueError('Tried to set answer probability higher than 1.') @@ -55,6 +60,8 @@ class Metadata(object): self.answer = prob return self.answer + # Dumps the metadata into a list of lines, then joined together in a string, + # ready to be written into a file def dumps(self): lines = ["CARD=v5"] lines.append("CHAT_ID=" + self.id) @@ -68,10 +75,12 @@ class Metadata(object): # lines.append("WORD_DICT=") return ('\n'.join(lines)) + '\n' + # Creates a Metadata object from a previous text dump def loads(text): lines = text.splitlines() return Metadata.loadl(lines) + # Creates a Metadata object from a list of metadata lines def loadl(lines): # In a perfect world, I would get both the variable name and its corresponding value # from each side of the lines, but I know the order in which the lines are writen in @@ -90,6 +99,14 @@ class Metadata(object): silenced=(parse_card_line(lines[8]) == 'True') ) elif version == "v3": + # Deprecated: this elif block will be removed in a new version + print("Warning! This Card format ({}) is deprecated. Update all".format(version), + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # This is kept for retrocompatibility purposes, in case someone did a fork + # of this repo and still has some chat files that haven't been updated in + # a long while -- but I already converted all my files to v5 return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), @@ -99,6 +116,12 @@ class Metadata(object): restricted=(parse_card_line(lines[6]) == 'True') ) elif version == "v2": + # Deprecated: this elif block will be removed in a new version + print("Warning! This Card format ({}) is deprecated. Update all".format(version), + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # Also kept for retrocompatibility purposes return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), @@ -107,6 +130,12 @@ class Metadata(object): answer=float(parse_card_line(lines[5])) ) elif version == "dict:": + # Deprecated: this elif block will be removed in a new version + print("Warning! This Card format ('dict') is deprecated. Update all", + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # Also kept for retrocompatibility purposes # At some point I decided to number the versions of each dictionary format, # but this was not always the case. This is what you get if you try to read # whatever there is in very old files where the version should be @@ -117,7 +146,13 @@ class Metadata(object): period=int(lines[3]) ) else: - # This is for the oldest of files + # Deprecated: this elif block will be removed in a new version + print("Warning! This ancient Card format is deprecated. Update all", + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # Also kept for retrocompatibility purposes + # This is for the oldest of file formats return Metadata(cid=lines[0], ctype=lines[1], title=lines[2], diff --git a/reader.py b/reader.py index e4ce04b..4c41ca6 100644 --- a/reader.py +++ b/reader.py @@ -5,9 +5,9 @@ from metadata import Metadata, parse_card_line from generator import Generator +# This gives me the chat title, or the first and maybe last +# name of the user as fallback if it's a private chat def get_chat_title(chat): - # This gives me the chat title, or the first and maybe last - # name of the user as fallback if it's a private chat if chat.title is not None: return chat.title elif chat.first_name is not None: @@ -25,40 +25,52 @@ class Memory(object): self.content = content +# This is a chat Reader object, in charge of managing the parsing of messages +# for a specific chat, and holding said chat's metadata class Reader(object): - # This is a chat Reader object, in charge of managing the parsing of messages - # for a specific chat, and holding said chat's metadata - + # Media tagging variables TAG_PREFIX = "^IS_" STICKER_TAG = "^IS_STICKER^" ANIM_TAG = "^IS_ANIMATION^" VIDEO_TAG = "^IS_VIDEO^" - def __init__(self, metadata, vocab, max_period, logger): + def __init__(self, metadata, vocab, max_period, logger, names=[]): + # The Metadata object holding a chat's specific bot parameters self.meta = metadata + # The Generator object holding the vocabulary learned so far self.vocab = vocab + # The maximum period allowed for this bot self.max_period = max_period + # The short term memory, for recently read messages (see below) self.short_term_mem = [] + # The countdown until the period ends and it's time to talk self.countdown = self.meta.period + # The logger object shared program-wide self.logger = logger + # The bot's nicknames + username + self.names = names + # Create a new Reader from a Chat object def FromChat(chat, max_period, logger): - # Create a new Reader from a Chat object meta = Metadata(chat.id, chat.type, get_chat_title(chat)) vocab = Generator() return Reader(meta, vocab, max_period, logger) + # TODO: Create a new Reader from a whole Chat history def FromHistory(history, vocab, max_period, logger): - # Create a new Reader from a whole Chat history (WIP) return None + # Create a new Reader from a meta's file dump def FromCard(meta, vocab, max_period, logger): - # Create a new Reader from a meta's file dump metadata = Metadata.loads(meta) return Reader(metadata, vocab, max_period, logger) + # Deprecated: this method will be removed in a new version def FromFile(text, max_period, logger, vocab=None): - # Load a Reader from a file's text string (obsolete) + print("Warning! This method of loading a Reader from file (Reader.FromFile(...))", + "is deprecated, and will be removed from the next update. Use FromCard instead.") + + # Load a Reader from a file's text string lines = text.splitlines() version = parse_card_line(lines[0]).strip() version = version if len(version.strip()) > 1 else lines[4] @@ -86,27 +98,33 @@ class Reader(object): r = Reader(meta, vocab, max_period, logger) return r + # Returns a nice lice little tuple package for the archivist to save to file. + # Also commits to long term memory any pending short term memories def archive(self): - # Returns a nice lice little tuple package for the archivist to save to file. - # Also commits to long term memory any pending short term memories self.commit_memory() - return (self.meta.id, self.meta.dumps(), self.vocab.dump) + return (self.meta.id, self.meta.dumps(), self.vocab.dumps()) + # Checks type. Returns "True" for "group" even if it's supergroupA def check_type(self, t): - # Checks type. Returns "True" for "group" even if it's supergroup return t in self.meta.type + # Hard check def exactly_type(self, t): - # Hard check return t == self.meta.type def set_title(self, title): self.meta.title = title + # Sets a new period in the Metadata def set_period(self, period): - if period < self.countdown: - self.countdown = max(period, 1) - return self.meta.set_period(min(period, self.max_period)) + # The period has to be under max_period; otherwise, truncate to max_period + new_period = min(period, self.max_period) + set_period = self.meta.set_period(new_period) + if new_period == set_period and new_period < self.countdown: + # If succesfully changed and the new period is less than the current + # remaining countdown, reduce the countdown to the new period + self.countdown = new_period + return new_period def set_answer(self, prob): return self.meta.set_answer(prob) @@ -141,6 +159,8 @@ class Reader(object): def toggle_silence(self): self.meta.silenced = (not self.meta.silenced) + # Rolls the chance for answering in this specific chat, + # according to the answer probability def is_answering(self): rand = random.random() chance = self.answer() @@ -150,10 +170,13 @@ class Reader(object): return False return rand <= chance + # Adds a new message to the short term memory def add_memory(self, mid, content): mem = Memory(mid, content) self.short_term_mem.append(mem) + # Returns a random message ID from the short memory, + # when answering to a random comment def random_memory(self): if len(self.short_term_mem) == 0: return None @@ -163,6 +186,10 @@ class Reader(object): def reset_countdown(self): self.countdown = self.meta.period + # Reads a message + # This process will determine which kind of message it is (Sticker, Anim, + # Video, or actual text) and pre-process it accordingly for the Generator, + # then store it in the short term memory def read(self, message): mid = str(message.message_id) @@ -174,16 +201,25 @@ class Reader(object): self.learn_drawing(mid, Reader.ANIM_TAG, message.animation.file_id) elif message.video is not None: self.learn_drawing(mid, Reader.VIDEO_TAG, message.video.file_id) + self.meta.count += 1 + # Stores a multimedia message in the short term memory as a text with + # TAG + the media file ID def learn_drawing(self, mid, tag, drawing): self.learn(mid, tag + " " + drawing) + # Stores a text message in the short term memory def learn(self, mid, text): - if "velasco" in text.casefold() and len(text.split()) <= 3: - return + for name in self.names: + if name.casefold() in text.casefold() and len(text.split()) <= 3: + # If it's less than 3 words and one of the bot's names is in + # the message, ignore it as it's most probably just a summon + return self.add_memory(mid, text) + # Commits the short term memory messages into the "long term memory" + # aka the vocabulary Generator's cache def commit_memory(self): for mem in self.short_term_mem: self.vocab.add(mem.content) diff --git a/speaker.py b/speaker.py index 3d44df3..434800c 100644 --- a/speaker.py +++ b/speaker.py @@ -8,19 +8,24 @@ from reader import Reader, get_chat_title from telegram.error import NetworkError +# Auxiliar print to stderr function (alongside logger messages) def eprint(*args, **kwargs): print(*args, end=' ', file=stderr, **kwargs) +# Auxiliar message to send a text to a chat through a bot def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): - kwargs["parse_mode"] = formatting - kwargs["reply_to_message_id"] = replying + # Markdown or HTML formatting (both argument names are valid) + kwargs["parse_mode"] = formatting or kwargs.get("parse_mode") + # ID of the message it's replying to (both argument names are valid) + kwargs["reply_to_message_id"] = replying or kwargs.get("reply_to_message_id") + # Reminder that dict.get(key) defaults to None if the key isn't found if text.startswith(Reader.TAG_PREFIX): + # We're sending a media file ID words = text.split(maxsplit=1) if logger: logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) - # eprint('[]') # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID' if words[0] == Reader.STICKER_TAG: @@ -30,16 +35,18 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): elif words[0] == Reader.VIDEO_TAG: return bot.send_video(cid, words[1], **kwargs) else: - text + # It's text if logger: - mtype = "reply" if replying else "message" + mtype = "reply" if (kwargs.get("reply_to_message_id")) else "message" logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) # eprint('.') return bot.send_message(cid, text, **kwargs) class Speaker(object): + # Marks if the period is a fixed time when to send a new message ModeFixed = "FIXED_MODE" + # Marks if the "periodic" messages have a weighted random chance to be sent, depending on the period ModeChance = "CHANCE_MODE" def __init__(self, username, archivist, logger, admin=0, nicknames=[], @@ -47,35 +54,55 @@ class Speaker(object): memory=20, mute_time=60, save_time=3600, bypass=False, cid_whitelist=None, max_len=50 ): + # List of nicknames other than the username that the bot can be called as self.names = nicknames + # Mute time for Telegram network errors self.mute_time = mute_time + # Last mute timestamp self.mute_timer = None + # The bot's username, "@" included self.username = username - + # The maximum chat period for this bot self.max_period = archivist.max_period + + # The Archivist functions to load and save from and to files self.get_reader_file = archivist.get_reader self.store_file = archivist.store + + # Archivist function to crawl all stored Readers self.readers_pass = archivist.readers_pass + # Legacy load logging emssages logger.info("----") logger.info("Finished loading.") logger.info("Loaded {} chats.".format(archivist.chat_count())) logger.info("----") + # Wakeup flag that determines if it should send a wakeup message to stored groupchats self.wakeup = wakeup + # The logger shared program-wide self.logger = logger + # Chance of sending messages as replies self.reply = reply + # Chance of sending 2 messages in a row self.repeat = repeat + # If not empty, whitelist of chat IDs to only respond to self.cid_whitelist = cid_whitelist + # Memory list/cache for the last accessed chats self.memory = MemoryList(memory) + # Minimum time to wait between memory saves (triggered at the next message from any chat) self.save_time = save_time + # Last save timestamp self.memory_timer = int(time.perf_counter()) + # Admin user ID self.admin = admin + # For testing purposes self.bypass = bypass + # Max word length for a message self.max_len = max_len + # Sends an announcement to all chats that pass the check def announce(self, bot, announcement, check=(lambda _: True)): - # Sends an announcement to all chats that pass the check for reader in self.readers_pass(): try: if check(reader): @@ -84,9 +111,9 @@ class Speaker(object): except Exception: pass + # If wakeup flag is set, sends a wake-up message as announcement to all chats that + # are groups. Also, always sends a wakeup message to the 'bot admin' def wake(self, bot, wake): - # If wakeup flag is set, sends a wake-up message as announcement to all chats that - # are groups. Also, always sends a wakeup message to the 'bot admin' send(bot, self.admin, wake) if self.wakeup: @@ -94,9 +121,13 @@ class Speaker(object): return reader.check_type("group") self.announce(bot, wake, group_check) + # Looks up a reader in the memory list def get_reader(self, cid): - return self.memory.get_next(lambda r: r.cid() == cid) + return self.memory.search(lambda r: r.cid() == cid, None) + # Looks up and returns a reader if it's in memory, or loads up a reader from + # file, adds it to memory, and returns it. Any other reader pushed out of + # memory is saved to file def load_reader(self, chat): cid = str(chat.id) reader = self.get_reader(cid) @@ -107,19 +138,24 @@ class Speaker(object): if not reader: reader = Reader.FromChat(chat, self.max_period, self.logger) - old_reader = self.memory.append(reader) + old_reader = self.memory.add(reader) if old_reader is not None: old_reader.commit_memory() self.store(old_reader) return reader + # Returns a reader if it's in memory, or loads it up from a file and returns + # it otherwise. Does NOT add the Reader to memory + # This is useful for command prompts that do not require the Reader to be cached def access_reader(self, cid): reader = self.get_reader(cid) if reader is None: return self.get_reader_file(cid) return reader + # Returns True if the bot's username is called, or if one of the nicknames is + # mentioned and they're not another user's username def mentioned(self, text): if self.username in text: return True @@ -128,20 +164,28 @@ class Speaker(object): return True return False + # Returns True if not enough time has passed since the last mute timestamp def is_mute(self): current_time = int(time.perf_counter()) return self.mute_timer is not None and (current_time - self.mute_timer) < self.mute_time + # Series of checks to determine if the bot should reply to a specific message, aside + # from the usual periodic messages def should_reply(self, message, reader): if self.is_mute(): + # Not if mute time hasn't finished return False if not self.bypass and reader.is_restricted(): + # If we're not in testing mode and the chat is restricted user = message.chat.get_member(message.from_user.id) if not self.user_is_admin(user): - # update.message.reply_text("You do not have permissions to do that.") + # ...And the user has no permissions, should not reply return False + + # otherwise (testing mode, or the chat is unrestricted, or the user has permissions) replied = message.reply_to_message text = message.text.casefold() if message.text else "" + # Only if it's a reply to a message of ours or the bot is mentioned in the message return (((replied is not None) and (replied.from_user.name == self.username)) or (self.mentioned(text))) @@ -151,12 +195,14 @@ class Speaker(object): else: self.store_file(*reader.archive()) + # Check if enough time for saving memory has passed def should_save(self): current_time = int(time.perf_counter()) elapsed = (current_time - self.memory_timer) self.logger.debug("Save check: {}".format(elapsed)) return elapsed >= self.save_time + # Save all Readers in memory to files if it's save time def save(self): if self.should_save(): self.logger.info("Saving chats in memory...") @@ -165,29 +211,38 @@ class Speaker(object): self.memory_timer = time.perf_counter() self.logger.info("Chats saved.") + # Reads a non-command message def read(self, update, context): + # Check for save time self.save() + # Ignore non-message updates if update.message is None: return + chat = update.message.chat reader = self.load_reader(chat) reader.read(update.message) + # Check if it's a "replyable" message & roll the chance to do so if self.should_reply(update.message, reader) and reader.is_answering(): self.say(context.bot, reader, replying=update.message.message_id) return + # Update the Reader's title if it has changed since the last message read title = get_chat_title(update.message.chat) if title != reader.title(): reader.set_title(title) + # Decrease the countdown for the chat, and send a message if it reached 0 reader.countdown -= 1 if reader.countdown < 0: reader.reset_countdown() + # Random chance to reply to a recent message rid = reader.random_memory() if random.random() <= self.reply else None self.say(context.bot, reader, replying=rid) + # Handles /speak command def speak(self, update, context): chat = (update.message.chat) reader = self.load_reader(chat) @@ -200,12 +255,14 @@ class Speaker(object): mid = str(update.message.message_id) replied = update.message.reply_to_message + # Reply to the message that the command replies to, otherwise to the command itself rid = replied.message_id if replied else mid words = update.message.text.split() if len(words) > 1: reader.read(' '.join(words[1:])) self.say(context.bot, reader, replying=rid) + # Checks user permissions. Bot admin is always considered as having full permissions def user_is_admin(self, member): self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) # eprint('!') @@ -214,23 +271,30 @@ class Speaker(object): or (member.status == 'administrator') or (member.user.id == self.admin)) + # Generate speech (message) def speech(self, reader): return reader.generate_message(self.max_len) + # Say a newly generated message def say(self, bot, reader, replying=None, **kwargs): cid = reader.cid() if self.cid_whitelist is not None and cid not in self.cid_whitelist: + # Don't, if there's a whitelist and this chat is not in it return if self.is_mute(): + # Don't, if mute time isn't over return try: send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs) if self.bypass: + # Testing mode, force a reasonable period (to not have the bot spam one specific chat with a low period) max_period = self.max_period reader.set_period(random.randint(max_period // 4, max_period)) if random.random() <= self.repeat: send(bot, cid, self.speech(reader), logger=self.logger, **kwargs) + # Consider any Network Error as a Telegram temporary ban, as I couldn't find + # out in the documentation how error 429 is handled by python-telegram-bot except NetworkError as e: self.logger.error("Sending a message caused network error:") self.logger.exception(e) @@ -240,21 +304,25 @@ class Speaker(object): self.logger.error("Sending a message caused exception:") self.logger.exception(e) + # Handling /count command def get_count(self, update, context): cid = str(update.message.chat.id) - reader = self.access_reader(cid) + reader = self.load_reader(cid) num = str(reader.count()) if reader else "no" update.message.reply_text("I remember {} messages.".format(num)) + # Handling /get_chats command (exclusive for bot admin) def get_chats(self, update, context): lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.readers_pass()] chat_list = "\n".join(lines) update.message.reply_text("I have the following chats:\n\n" + chat_list) + # Handling /period command + # Print the current period or set a new one if one is given def period(self, update, context): chat = update.message.chat - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: @@ -270,13 +338,14 @@ class Speaker(object): period = int(words[1]) period = reader.set_period(period) update.message.reply_text("Period of speaking set to {}.".format(period)) - self.store_file(*reader.archive()) except Exception: update.message.reply_text("Format was confusing; period unchanged from {}.".format(reader.period())) + # Handling /answer command + # Print the current answer probability or set a new one if one is given def answer(self, update, context): chat = update.message.chat - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: @@ -292,17 +361,18 @@ class Speaker(object): answer = float(words[1]) answer = reader.set_answer(answer) update.message.reply_text("Answer probability set to {}.".format(answer)) - self.store_file(*reader.archive()) except Exception: update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(reader.answer())) + # Handling /restrict command + # Toggle the restriction value if it's a group chat and the user has permissions to do so def restrict(self, update, context): if "group" not in update.message.chat.type: update.message.reply_text("That only works in groups.") return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) if reader.is_restricted(): if not self.user_is_admin(user): @@ -311,15 +381,16 @@ class Speaker(object): reader.toggle_restrict() allowed = "let only admins" if reader.is_restricted() else "let everyone" update.message.reply_text("I will {} configure me now.".format(allowed)) - self.store_file(*reader.archive()) + # Handling /silence command + # Toggle the silence value if it's a group chat and the user has permissions to do so def silence(self, update, context): if "group" not in update.message.chat.type: update.message.reply_text("That only works in groups.") return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) if reader.is_restricted(): if not self.user_is_admin(user): @@ -328,8 +399,8 @@ class Speaker(object): reader.toggle_silence() allowed = "avoid mentioning" if reader.is_silenced() else "mention" update.message.reply_text("I will {} people now.".format(allowed)) - self.store_file(*reader.archive()) + # Handling /who command def who(self, update, context): msg = update.message usr = msg.from_user @@ -346,6 +417,7 @@ class Speaker(object): msg.reply_markdown(answer) + # Handling /where command def where(self, update, context): msg = update.message chat = msg.chat diff --git a/velasco.py b/velasco.py index 8670544..5a135fd 100644 --- a/velasco.py +++ b/velasco.py @@ -90,7 +90,11 @@ def main(): parser.add_argument('-m', '--mute_time', metavar='T', type=int, default=60, help='The time (in s) for the muting period when Telegram limits the bot. (default: 60).') parser.add_argument('-s', '--save_time', metavar='T', type=int, default=3600, - help='The time (in s) for periodic saves (default: 3600).') + help='The time (in s) for periodic saves. (default: 3600)') + parser.add_argument('-p', '--min_period', metavar='MIN_P', type=int, default=1, + help='The minimum value for a chat\'s period. (default: 1)') + parser.add_argument('-P', '--max_period', metavar='MAX_P', type=int, default=100000, + help='The maximum value for a chat\'s period. (default: 100000)') args = parser.parse_args() @@ -104,6 +108,8 @@ def main(): archivist = Archivist(logger, chatdir=args.directory, chatext=".vls", + min_period=args.min_period, + max_period=args.max_period, read_only=False )