From 328bd6adbf21d3fcd4aa19a725587cce7355e1da Mon Sep 17 00:00:00 2001 From: vylion Date: Wed, 7 Oct 2020 23:32:10 +0200 Subject: [PATCH 01/22] Overhaul 2 WIP - Generator (Markov) :heavy_check_mark: - ChatCard (Chatlog) :heavy_check_mark: - ChatReader (Scribe) :construction: - Speaker :construction: - - Speaker->get_reader()... :construction: --- .gitignore | 1 + archivist.py | 92 ++++++++++++------------ brain.py | 5 ++ chatcard.py | 122 +++++++++++++++++++++++++++++++ chatlog.py | 106 --------------------------- chatreader.py | 190 ++++++++++++++++++++++++++++++++++++++++++++++++ generator.py | 166 ++++++++++++++++++++++++++++++++++++++++++ markov.py | 105 --------------------------- scribe.py | 194 -------------------------------------------------- speaker.py | 32 +++------ velasco.py | 10 ++- 11 files changed, 548 insertions(+), 475 deletions(-) create mode 100644 brain.py create mode 100644 chatcard.py delete mode 100644 chatlog.py create mode 100644 chatreader.py create mode 100644 generator.py delete mode 100644 markov.py delete mode 100644 scribe.py diff --git a/.gitignore b/.gitignore index 3bc950a..4ccade3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ chatlogs/* __pycache__/* misc/* +test/* diff --git a/archivist.py b/archivist.py index 65dc5cb..21637a1 100644 --- a/archivist.py +++ b/archivist.py @@ -1,14 +1,15 @@ import os, errno, random, pickle -from scribe import Scribe -from markov import Markov +from chatreader import ChatReader as Reader +from generator import Generator + class Archivist(object): def __init__(self, logger, chatdir=None, chatext=None, admin=0, - freqIncrement=5, saveCount=15, maxFreq=100000, maxLen=50, - readOnly=False, filterCids=None, bypass=False - ): + freq_increment=5, save_count=15, max_period=100000, max_len=50, + read_only=False, filter_cids=None, bypass=False + ): if chatdir is None or len(chatdir) == 0: raise ValueError("Chatlog directory name is empty") elif chatext is None: # Can be len(chatext) == 0 @@ -17,43 +18,46 @@ class Archivist(object): self.chatdir = chatdir self.chatext = chatext self.admin = admin - self.freqIncrement = freqIncrement - self.saveCount = saveCount - self.maxFreq = maxFreq - self.maxLen = maxLen - self.readOnly = readOnly - self.filterCids = filterCids + self.freq_increment = freq_increment + self.save_count = save_count + self.max_period = max_period + self.max_len = max_len + self.read_only = read_only + self.filter_cids = filter_cids self.bypass = bypass - self.scribeFolder = chatdir + "chat_{tag}" - self.scribePath = chatdir + "chat_{tag}/{file}{ext}" + + def chat_folder(self, *formatting, **key_format): + return (self.chatdir + "chat_{tag}").format(*formatting, **key_format) + + def chat_file(self, *formatting, **key_format): + return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) def store(self, tag, log, gen): - scribefolder = self.scribeFolder.format(tag=tag) - cardfile = self.scribePath.format(tag=tag, file="card", ext=".txt") - if self.readOnly: + chat_folder = self.chat_folder(tag=tag) + chat_card = self.chat_file(tag=tag, file="card", ext=".txt") + if self.read_only: return try: - if not os.path.exists(scribefolder): - os.makedirs(scribefolder, exist_ok=True) - self.logger.info("Storing a new chat. Folder {} created.".format(scribefolder)) + if not os.path.exists(chat_folder): + os.makedirs(chat_folder, exist_ok=True) + self.logger.info("Storing a new chat. Folder {} created.".format(chat_folder)) except: - self.logger.error("Failed creating {} folder.".format(scribefolder)) + self.logger.error("Failed creating {} folder.".format(chat_folder)) return - file = open(cardfile, 'w') + file = open(chat_card, 'w') file.write(log) file.close() if gen is not None: - recordfile = self.scribePath.format(tag=tag, file="record", ext=self.chatext) - file = open(recordfile, 'w') + chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) + file = open(chat_record, 'w') file.write(gen) file.close() - def recall(self, filename): - #print("Loading chat: " + path) + def get_reader(self, filename): file = open(self.chatdir + filename, 'rb') scribe = None try: - scribe = Scribe.Recall(pickle.load(file), self) + reader, vocab = Reader.FromFile(pickle.load(file), self) self.logger.info("Unpickled {}{}".format(self.chatdir, filename)) except pickle.UnpicklingError: file.close() @@ -68,27 +72,24 @@ class Archivist(object): file.close() return scribe - def wakeScribe(self, filepath): + def load_reader(self, filepath): file = open(filepath.format(filename="card", ext=".txt"), 'r') card = file.read() file.close() - return Scribe.FromFile(card, self) + return Reader.FromCard(card, self) def wakeParrot(self, tag): - filepath = self.scribePath.format(tag=tag, file="record", ext=self.chatext) + filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r') - #print("\nOPening " + filepath + "\n") record = file.read() file.close() - return Markov.loads(record) + return Generator.loads(record) except: - self.logger.error("Parrot file {} not found.".format(filepath)) + self.logger.error("Record file {} not found.".format(filepath)) return None - def wakeScriptorium(self): - scriptorium = {} - + def readers_pass(self): directory = os.fsencode(self.chatdir) for subdir in os.scandir(directory): dirname = subdir.name.decode("utf-8") @@ -96,17 +97,16 @@ class Archivist(object): cid = dirname[5:] try: filepath = self.chatdir + dirname + "/{filename}{ext}" - scriptorium[cid] = self.wakeScribe(filepath) - self.logger.info("Chat {} contents:\n".format(cid) + scriptorium[cid].chat.dumps()) + reader = self.load_reader(filepath) + self.logger.info("Chat {} contents:\n".format(cid) + reader.card.dumps()) if self.bypass: - scriptorium[cid].setFreq(random.randint(self.maxFreq//2, self.maxFreq)) - elif scriptorium[cid].freq() > self.maxFreq: - scriptorium[cid].setFreq(self.maxFreq) + reader.set_period(random.randint(self.max_period//2, self.max_period)) + elif scriptorium[cid].freq() > self.max_period: + scriptorium[cid].setFreq(self.max_period) except Exception as e: self.logger.error("Failed reading {}".format(dirname)) self.logger.exception(e) raise e - return scriptorium """ def wake_old(self): @@ -117,17 +117,17 @@ class Archivist(object): filename = os.fsdecode(file) if filename.endswith(self.chatext): cid = filename[:-(len(self.chatext))] - if self.filterCids is not None: + if self.filter_cids is not None: #self.logger.info("CID " + cid) - if not cid in self.filterCids: + if not cid in self.filter_cids: continue scriptorium[cid] = self.recall(filename) scribe = scriptorium[cid] if scribe is not None: if self.bypass: - scribe.setFreq(random.randint(self.maxFreq//2, self.maxFreq)) - elif scribe.freq() > self.maxFreq: - scribe.setFreq(self.maxFreq) + scribe.setFreq(random.randint(self.max_period//2, self.max_period)) + elif scribe.freq() > self.max_period: + scribe.setFreq(self.max_period) self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]" "\n" + "\n".join(scribe.chat.dumps())) else: diff --git a/brain.py b/brain.py new file mode 100644 index 0000000..fb55324 --- /dev/null +++ b/brain.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +import random +from chatreader import ChatReader as Reader + diff --git a/chatcard.py b/chatcard.py new file mode 100644 index 0000000..4af559f --- /dev/null +++ b/chatcard.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 + +def parse_card_line(line): + # This reads a line in the format 'VARIABLE=value' and gives me the value. + # See ChatCard.loadl(...) for more details + s = line.split('=', 1) + if len(s) < 2: + return "" + else: + return s[1] + + +class ChatCard(object): + def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False): + self.id = str(cid) + # The Telegram chat's ID + self.type = ctype + # The type of chat + self.title = title + # The title of the chat + if period is None: + if "group" in ctype: + period = 10 + # Default period for groups and supergroups + else: + period = 2 + # Default period for private or channel chats + self.count = count + # The number of messages read + self.period = period + # This chat's configured period + self.answer = answer + # This chat's configured answer probability + self.restricted = restricted + # Wether some interactions are restricted to admins only + self.silenced = silenced + # Wether messages should silence user mentions + + def set_period(self, period): + if period < 1: + raise ValueError('Tried to set period a value less than 1.') + else: + self.period = period + return self.period + + def set_answer(self, prob): + if prob > 1: + raise ValueError('Tried to set answer probability higher than 1.') + elif prob < 0: + raise ValueError('Tried to set answer probability lower than 0.') + else: + self.answer = prob + return self.answer + + def dumps(self): + lines = ["CARD=v5"] + lines.append("CHAT_ID=" + self.id) + lines.append("CHAT_TYPE=" + self.type) + lines.append("CHAT_NAME=" + self.title) + lines.append("WORD_COUNT=" + str(self.count)) + lines.append("MESSAGE_PERIOD=" + str(self.period)) + lines.append("ANSWER_PROB=" + str(self.answer)) + lines.append("RESTRICTED=" + str(self.restricted)) + lines.append("SILENCED=" + str(self.silenced)) + # lines.append("WORD_DICT=") + return ('\n'.join(lines)) + '\n' + + def loads(text): + lines = text.splitlines() + return ChatCard.loadl(lines) + + def loadl(lines): + # In a perfect world, I would get both the variable name and its corresponding value + # from each side of the lines, but I know the order in which the lines are writen in + # the file, I hardcoded it. So I can afford also hardcoding reading it back in the + # same order, and nobody can stop me + version = parse_card_line(lines[0]).strip() + version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO") + if version == "v4" or version == "v5": + return ChatCard(cid=parse_card_line(lines[1]), + ctype=parse_card_line(lines[2]), + title=parse_card_line(lines[3]), + count=int(parse_card_line(lines[4])), + period=int(parse_card_line(lines[5])), + answer=float(parse_card_line(lines[6])), + restricted=(parse_card_line(lines[7]) == 'True'), + silenced=(parse_card_line(lines[8]) == 'True') + ) + elif version == "v3": + return ChatCard(cid=parse_card_line(lines[1]), + ctype=parse_card_line(lines[2]), + title=parse_card_line(lines[3]), + count=int(parse_card_line(lines[7])), + period=int(parse_card_line(lines[4])), + answer=float(parse_card_line(lines[5])), + restricted=(parse_card_line(lines[6]) == 'True') + ) + elif version == "v2": + return ChatCard(cid=parse_card_line(lines[1]), + ctype=parse_card_line(lines[2]), + title=parse_card_line(lines[3]), + count=int(parse_card_line(lines[6])), + period=int(parse_card_line(lines[4])), + answer=float(parse_card_line(lines[5])) + ) + elif version == "dict:": + # At some point I decided to number the versions of each dictionary format, + # but this was not always the case. This is what you get if you try to read + # whatever there is in very old files where the version should be + return ChatCard(cid=lines[0], + ctype=lines[1], + title=lines[2], + count=int(lines[5]), + period=int(lines[3]) + ) + else: + # This is for the oldest of files + return ChatCard(cid=lines[0], + ctype=lines[1], + title=lines[2], + period=int(lines[3]) + ) diff --git a/chatlog.py b/chatlog.py deleted file mode 100644 index b398c12..0000000 --- a/chatlog.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 - -def parse(l): - s = l.split('=', 1) - if len(s) < 2: - return "" - else: - return s[1] - -class Chatlog(object): - def __init__(self, cid, ctype, title, count=0, freq=None, answer=0.5, restricted=False, silenced=False): - self.id = str(cid) - self.type = ctype - self.title = title - if freq is None: - if "group" in ctype: - freq = 10 - #elif ctype is "private": - else: - freq = 2 - self.count = count - self.freq = freq - self.answer = answer - self.restricted = restricted - self.silenced = silenced - - def add_msg(self, message): - self.gen.add_text(message) - self.count += 1 - - def set_freq(self, freq): - if freq < 1: - raise ValueError('Tried to set freq a value less than 1.') - else: - self.freq = freq - return self.freq - - def set_answer(self, afreq): - if afreq > 1: - raise ValueError('Tried to set answer probability higher than 1.') - elif afreq < 0: - raise ValueError('Tried to set answer probability lower than 0.') - else: - self.answer = afreq - return self.answer - - def dumps(self): - lines = ["LOG=v4"] - lines.append("CHAT_ID=" + self.id) - lines.append("CHAT_TYPE=" + self.type) - lines.append("CHAT_NAME=" + self.title) - lines.append("WORD_COUNT=" + str(self.count)) - lines.append("MESSAGE_FREQ=" + str(self.freq)) - lines.append("ANSWER_FREQ=" + str(self.answer)) - lines.append("RESTRICTED=" + str(self.restricted)) - lines.append("SILENCED=" + str(self.silenced)) - #lines.append("WORD_DICT=") - return '\n'.join(lines) - - def loads(text): - lines = text.splitlines() - return Chatlog.loadl(lines) - - def loadl(lines): - version = parse(lines[0]).strip() - version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO") - if version == "v4": - return Chatlog(cid=parse(lines[1]), - ctype=parse(lines[2]), - title=parse(lines[3]), - count=int(parse(lines[4])), - freq=int(parse(lines[5])), - answer=float(parse(lines[6])), - restricted=(parse(lines[7]) == 'True'), - silenced=(parse(lines[8]) == 'True') - ) - elif version == "v3": - return Chatlog(cid=parse(lines[1]), - ctype=parse(lines[2]), - title=parse(lines[3]), - count=int(parse(lines[7])), - freq=int(parse(lines[4])), - answer=float(parse(lines[5])), - restricted=(parse(lines[6]) == 'True') - ) - elif version == "v2": - return Chatlog(cid=parse(lines[1]), - ctype=parse(lines[2]), - title=parse(lines[3]), - count=int(parse(lines[6])), - freq=int(parse(lines[4])), - answer=float(parse(lines[5])) - ) - elif version == "dict:": - return Chatlog(cid=lines[0], - ctype=lines[1], - title=lines[2], - count=int(lines[5]), - freq=int(lines[3]) - ) - else: - return Chatlog(cid=lines[0], - ctype=lines[1], - title=lines[2], - freq=int(lines[3]) - ) diff --git a/chatreader.py b/chatreader.py new file mode 100644 index 0000000..beb486c --- /dev/null +++ b/chatreader.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 + +import random +from chatcard import ChatCard, parse_card_line +from generator import Generator + + +def get_chat_title(chat): + # This gives me the chat title, or the first and maybe last + # name of the user as fallback if it's a private chat + if chat.title is not None: + return chat.title + elif chat.first_name is not None: + if chat.last_name is not None: + return chat.first_name + " " + chat.last_name + else: + return chat.first_name + else: + return "" + + +class Memory(object): + def __init__(self, mid, content): + self.id = mid + self.content = content + + +class ChatReader(object): + TAG_PREFIX = "^IS_" + STICKER_TAG = "^IS_STICKER^" + ANIM_TAG = "^IS_ANIMATION^" + VIDEO_TAG = "^IS_VIDEO^" + + def __init__(self, chatcard, max_period, logger): + self.card = chatcard + self.max_period = max_period + self.short_term_mem = [] + self.countdown = self.card.period + self.logger = logger + + def FromChat(chat, max_period, logger, newchat=False): + # Create a new ChatReader from a Chat object + card = ChatCard(chat.id, chat.type, get_chat_title(chat)) + return ChatReader(card, max_period, logger) + + def FromData(data, max_period, logger): + # Create a new ChatReader from a whole Chat history (WIP) + return None + + def FromCard(card, max_period, logger): + # Create a new ChatReader from a card's file dump + chatcard = ChatCard.loads(card) + return ChatReader(chatcard, max_period, logger) + + def FromFile(text, max_period, logger): + # Load a ChatReader from a file's text string + lines = text.splitlines() + version = parse_card_line(lines[0]).strip() + version = version if len(version.strip()) > 1 else lines[4] + logger.info("Dictionary version: {} ({} lines)".format(version, len(lines))) + vocab = None + if version == "v4" or version == "v5": + return ChatReader.FromCard(text, max_period, logger) + # I stopped saving the chat metadata and the cache together + elif version == "v3": + card = ChatCard.loadl(lines[0:8]) + cache = '\n'.join(lines[9:]) + vocab = Generator.loads(cache) + elif version == "v2": + card = ChatCard.loadl(lines[0:7]) + cache = '\n'.join(lines[8:]) + vocab = Generator.loads(cache) + elif version == "dict:": + card = ChatCard.loadl(lines[0:6]) + cache = '\n'.join(lines[6:]) + vocab = Generator.loads(cache) + else: + card = ChatCard.loadl(lines[0:4]) + cache = lines[4:] + vocab = Generator(load=cache, mode=Generator.MODE_LIST) + # raise SyntaxError("ChatReader: ChatCard format unrecognized.") + s = ChatReader(card, max_period, logger) + return (s, vocab) + + def archive(self, vocab): + # Returns a nice lice little tuple package for the archivist to save to file. + # Also commits to long term memory any pending short term memories + self.commit_long_term(vocab) + return (self.card.id, self.card.dumps(), vocab) + + def check_type(self, t): + # Checks type. Returns "True" for "group" even if it's supergroup + return t in self.card.type + + def exactly_type(self, t): + # Hard check + return t == self.card.type + + def set_title(self, title): + self.card.title = title + + def set_period(self, period): + if period < self.countdown: + self.countdown = max(period, 1) + return self.card.set_period(min(period, self.max_period)) + + def set_answer(self, prob): + return self.card.set_answer(prob) + + def cid(self): + return str(self.card.id) + + def count(self): + return self.card.count + + def period(self): + return self.card.period + + def title(self): + return self.card.title + + def answer(self): + return self.card.answer + + def ctype(self): + return self.card.type + + def is_restricted(self): + return self.card.restricted + + def toggle_restrict(self): + self.card.restricted = (not self.card.restricted) + + def is_silenced(self): + return self.card.silenced + + def toggle_silence(self): + self.card.silenced = (not self.card.silenced) + + def is_answering(self): + rand = random.random() + chance = self.answer() + if chance == 1: + return True + elif chance == 0: + return False + return rand <= chance + + def add_memory(self, mid, content): + mem = Memory(mid, content) + self.short_term_mem.append(mem) + + def random_memory(self): + mem = random.choice(self.short_term_mem) + return mem.id + + def reset_countdown(self): + self.countdown = self.card.period + + def read(self, message): + mid = str(message.message_id) + + if message.text is not None: + self.read(mid, message.text) + elif message.sticker is not None: + self.learn_drawing(mid, ChatReader.STICKER_TAG, message.sticker.file_id) + elif message.animation is not None: + self.learn_drawing(mid, ChatReader.ANIM_TAG, message.animation.file_id) + elif message.video is not None: + self.learn_drawing(mid, ChatReader.VIDEO_TAG, message.video.file_id) + self.card.count += 1 + + def learn_drawing(self, mid, tag, drawing): + self.learn(mid, tag + " " + drawing) + + def learn(self, mid, text): + if "velasco" in text.casefold() and len(text.split()) <= 3: + return + self.add_memory(mid, text) + + def commit_long_term(self, vocab): + for mem in self.short_term_mem: + vocab.add(mem.content) + self.short_term_mem = [] + + """ + def learnFrom(self, scribe): + self.card.count += scribe.chat.count + self.vocab.cross(scribe.vocab) + """ diff --git a/generator.py b/generator.py new file mode 100644 index 0000000..17e5d45 --- /dev/null +++ b/generator.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +import random +import json + + +def rewrite(text): + # This splits strings into lists of words delimited by space. + # Other whitespaces are appended space characters so they are included + # as their own Markov chain element, so as not to pollude with + # "different" words that would only differ in having a whitespace + # attached or not + words = text.replace('\n', '\n ').split(' ') + i = 0 + while i < len(words): + w = words[i].strip(' \t') + if len(w) > 0: + words[i] = w + else: + del words[i] + i -= 1 + i += 1 + return words + + +def getkey(w1, w2): + # This gives a dictionary key from 2 words, ignoring case + key = (w1.strip().casefold(), w2.strip().casefold()) + return str(key) + + +def getwords(key): + # This turns a dictionary key back into 2 separate words + words = key.strip('()').split(', ') + for i in range(len(words)): + words[i].strip('\'') + return words + + +def triplets(wordlist): + # Generates triplets of words from the given data string. So if our string + # were "What a lovely day", we'd generate (What, a, lovely) and then + # (a, lovely, day). + if len(wordlist) < 3: + return + + for i in range(len(wordlist) - 2): + yield (wordlist[i], wordlist[i+1], wordlist[i+2]) + + +class Generator(object): + MODE_JSON = "MODE_JSON" + # This is to mark when we want to create a Generator object from a given JSON + + MODE_LIST = "MODE_LIST" + # This is to mark when we want to create a Generator object from a given list of words + + MODE_CHAT_DATA = "MODE_CHAT_DATA" + # This is to mark when we want to create a Generator object from Chat data (WIP) + + HEAD = "\n^MESSAGE_SEPARATOR^" + TAIL = "^MESSAGE_SEPARATOR^" + + def __init__(self, load=None, mode=None): + if mode is not None: + # We ain't creating a new Generator from scratch + if mode == Generator.MODE_JSON: + self.cache = json.loads(load) + elif mode == Generator.MODE_LIST: + self.cache = {} + self.load_list(load) + else: + self.cache = {} + # The cache is where we store our words + + def load_list(self, many): + # Takes a list of strings and adds them to the cache one by one + for one in many: + self.add(one) + + def dumps(self): + # Dumps the cache dictionary into a JSON-formatted string + return json.dumps(self.cache) + + def loads(dump): + # Loads the cache dictionary from a JSON-formatted string + if len(dump) == 0: + # faulty dump gives default Generator + return Generator() + # otherwise + return Generator(load=dump, mode=Generator.MODE_JSON) + + def add(self, text): + # This takes a string and stores it in the cache, preceding it + # with the HEAD that marks the beginning of a new message and + # following it with the TAIL that marks the end + words = [Generator.HEAD] + text = text + " " + Generator.TAIL + words.extend(text.split()) + self.database(rewrite(text)) + + def database(self, words): + # This takes a list of words and stores it in the cache, adding + # a special entry for the first word (the HEAD marker) + for w1, w2, w3 in triplets(words): + if w1 == Generator.HEAD: + if w1 in self.cache: + self.cache[Generator.HEAD].append(w2) + else: + self.cache[Generator.HEAD] = [w2] + key = getkey(w1, w2) + if key in self.cache: + # if the key exists, add the new word to the end of the chain + self.cache[key].append(w3) + else: + # otherwise, create a new entry for the new key starting with + # the new end of chain + self.cache[key] = [w3] + + def generate(self, size=50, silence=False): + # This generates the Markov text/word chain + # silence tells if mentions should be silenced + if len(self.cache) == 0: + # If there is nothing in the cache we cannot generate anything + return "" + + w1 = random.choice(self.cache[Generator.HEAD]) + w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)]) + # Start with a message HEAD and a random message starting word + gen_words = [] + for i in range(size): + # As long as we don't go over the size value (max. message length)... + if silence and w1.startswith("@") and len(w1) > 1: + gen_words.append(w1.replace("@", "(@)")) + # ...append the first word, silencing any possible username mention + else: + gen_words.append(w1) + # ..append the first word + if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache: + # When there's no key from the last 2 words to follow the chain, + # or we reached a separation between messages, stop + break + else: + w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) + # Make the second word to be the new first word, and + # make a new random word that follows the chain to be + # the new second word + return ' '.join(gen_words) + + def cross(self, gen): + # cross 2 Generators into this one + for key in gen.cache: + if key in self.cache: + self.cache[key].extend(gen.cache[key]) + else: + self.cache[key] = list(gen.cache[key]) + + def new_count(self): + # Count again the number of messages if the current number is unreliable + count = 0 + for key in self.cache: + for word in self.cache[key]: + if word == Generator.TAIL: + count += 1 + # by just counting message separators + return count diff --git a/markov.py b/markov.py deleted file mode 100644 index bf1c3ce..0000000 --- a/markov.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 - -import random -import json - -def getkey(w1, w2): - key = (w1.strip().casefold(), w2.strip().casefold()) - return str(key) - -def getwords(key): - words = key.strip('()').split(', ') - for i in range(len(words)): - words[i].strip('\'') - return words - -def triples(wordlist): - # Generates triples from the given data string. So if our string were - # "What a lovely day", we'd generate (What, a, lovely) and then - # (a, lovely, day). - if len(wordlist) < 3: - return - - for i in range(len(wordlist) - 2): - yield (wordlist[i], wordlist[i+1], wordlist[i+2]) - -class Markov(object): - ModeJson = "MODE_JSON" - ModeList = "MODE_LIST" - ModeChatData = "MODE_CHAT_DATA" - - Head = "\n^MESSAGE_SEPARATOR^" - Tail = "^MESSAGE_SEPARATOR^" - - def __init__(self, load=None, mode=None): - if mode is not None: - if mode == Markov.ModeJson: - self.cache = json.loads(load) - elif mode == Markov.ModeList: - self.cache = {} - self.loadList(load) - else: - self.cache = {} - - def loadList(self, lines): - for line in lines: - words = [Markov.Head] - words.extend(line.split()) - self.learn_words(words) - - def dumps(self): - return json.dumps(self.cache) - - def loads(dump): - if len(dump) == 0: - return Markov() - return Markov(load=dump, mode=Markov.ModeJson) - - def learn_words(self, words): - self.database(words) - - def database(self, wordlist): - for w1, w2, w3 in triples(wordlist): - if w1 == Markov.Head: - if w1 in self.cache: - self.cache[Markov.Head].append(w2) - else: - self.cache[Markov.Head] = [w2] - key = getkey(w1, w2) - if key in self.cache: - self.cache[key].append(w3) - else: - self.cache[key] = [w3] - - def generate_markov_text(self, size=50, silence=False): - if len(self.cache) == 0: - return "" - w1 = random.choice(self.cache[Markov.Head]) - w2 = random.choice(self.cache[getkey(Markov.Head, w1)]) - gen_words = [] - for i in range(size): - if silence and w1.startswith("@") and len(w1) > 1: - gen_words.append(w1.replace("@", "(@)")) - else: - gen_words.append(w1) - if w2 == Markov.Tail or not getkey(w1, w2) in self.cache: - # print("Generated text") - break - else: - w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) - return ' '.join(gen_words) - - def cross(self, gen): - for key in gen.cache: - if key in self.cache: - self.cache[key].extend(d[key]) - else: - self.cache[key] = list(d[key]) - - def new_count(self): - count = 0 - for key in self.cache: - for word in self.cache[key]: - if word == Markov.Tail: - count += 1 - return count diff --git a/scribe.py b/scribe.py deleted file mode 100644 index 7bcedb4..0000000 --- a/scribe.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 - -import random -from chatlog import * -from markov import Markov - -def getTitle(chat): - if chat.title is not None: - return chat.title - elif chat.first_name is not None: - if chat.last_name is not None: - return chat.first_name + " " + chat.last_name - else: - return chat.first_name - else: - return "" - -def rewrite(text): - words = text.replace('\n', '\n ').split(' ') - i = 0 - while i < len(words): - w = words[i].strip(' \t') - if len(w) > 0: - words[i] = w - else: - del words[i] - i -= 1 - i += 1 - return words - -class Page(object): - def __init__(self, mid, content): - self.id = mid - self.content = content - -class Scribe(object): - TagPrefix = "^IS_" - StickerTag = "^IS_STICKER^" - AnimTag = "^IS_ANIMATION^" - VideoTag = "^IS_VIDEO^" - - def __init__(self, chatlog, archivist): - self.chat = chatlog - self.archivist = archivist - self.pages = [] - self.countdown = self.chat.freq - self.logger = self.archivist.logger - - def FromChat(chat, archivist, newchat=False): - chatlog = Chatlog(chat.id, chat.type, getTitle(chat)) - scribe = Scribe(chatlog, archivist) - return scribe - - def FromData(data, archivist): - return None - - def FromFile(log, archivist): - chatlog = Chatlog.loads(log) - return Scribe(chatlog, archivist) - - def Recall(text, archivist): - lines = text.splitlines() - version = parse(lines[0]).strip() - version = version if len(version.strip()) > 1 else lines[4] - archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) ) - if version == "v4": - chatlog = Chatlog.loadl(lines[0:9]) - cache = '\n'.join(lines[10:]) - parrot = Markov.loads(cache) - elif version == "v3": - chatlog = Chatlog.loadl(lines[0:8]) - cache = '\n'.join(lines[9:]) - parrot = Markov.loads(cache) - elif version == "v2": - chatlog = Chatlog.loadl(lines[0:7]) - cache = '\n'.join(lines[8:]) - parrot = Markov.loads(cache) - elif version == "dict:": - chatlog = Chatlog.loadl(lines[0:6]) - cache = '\n'.join(lines[6:]) - parrot = Markov.loads(cache) - else: - chatlog = Chatlog.loadl(lines[0:4]) - cache = lines[4:] - parrot = Markov(load=cache, mode=Markov.ModeList) - #raise SyntaxError("Scribe: Chatlog format unrecognized.") - s = Scribe(chatlog, archivist) - s.parrot = parrot - return s - - def store(self, parrot): - self.archivist.store(self.chat.id, self.chat.dumps(), parrot) - - def checkType(self, t): - return t in self.chat.type - - def compareType(self, t): - return t == self.chat.type - - def setTitle(self, title): - self.chat.title = title - - def setFreq(self, freq): - if freq < self.countdown: - self.countdown = max(freq, 1) - return self.chat.set_freq(min(freq, self.archivist.maxFreq)) - - def setAnswer(self, afreq): - return self.chat.set_answer(afreq) - - def cid(self): - return str(self.chat.id) - - def count(self): - return self.chat.count - - def freq(self): - return self.chat.freq - - def title(self): - return self.chat.title - - def answer(self): - return self.chat.answer - - def type(self): - return self.chat.type - - def isRestricted(self): - return self.chat.restricted - - def restrict(self): - self.chat.restricted = (not self.chat.restricted) - - def isSilenced(self): - return self.chat.silenced - - def silence(self): - self.chat.silenced = (not self.chat.silenced) - - def isAnswering(self): - rand = random.random() - chance = self.answer() - if chance == 1: - return True - elif chance == 0: - return False - return rand <= chance - - def addPage(self, mid, content): - page = Page(mid, content) - self.pages.append(page) - - def getReference(self): - page = random.choice(self.pages) - return page.id - - def resetCountdown(self): - self.countdown = self.chat.freq - - def learn(self, message): - mid = str(message.message_id) - - if message.text is not None: - self.read(mid, message.text) - elif message.sticker is not None: - self.learnDrawing(mid, Scribe.StickerTag, message.sticker.file_id) - elif message.animation is not None: - self.learnDrawing(mid, Scribe.AnimTag, message.animation.file_id) - elif message.video is not None: - self.learnDrawing(mid, Scribe.VideoTag, message.video.file_id) - self.chat.count += 1 - - def learnDrawing(self, mid, tag, drawing): - self.read(mid, tag + " " + drawing) - - def read(self, mid, text): - if "velasco" in text.casefold() and len(text.split()) <= 3: - return - words = [Markov.Head] - text = text + " " + Markov.Tail - words.extend(rewrite(text)) - self.addPage(mid, words) - - def teachParrot(self, parrot): - for page in self.pages: - parrot.learn_words(page.content) - self.pages = [] - -""" - def learnFrom(self, scribe): - self.chat.count += scribe.chat.count - self.parrot.cross(scribe.parrot) -""" diff --git a/speaker.py b/speaker.py index 465d50e..f07788c 100644 --- a/speaker.py +++ b/speaker.py @@ -1,24 +1,25 @@ #!/usr/bin/env python3 import random -from scribe import Scribe -from markov import Markov +from chatreader import ChatReader as Reader from telegram.error import * -def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs): - kwargs["parse_mode"] = format + +def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): + kwargs["parse_mode"] = formatting kwargs["reply_to_message_id"] = replying - if text.startswith(Scribe.TagPrefix): + if text.startswith(Reader.TAG_PREFIX): words = text.split(maxsplit=1) if logger: logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) + # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID' - if words[0] == Scribe.StickerTag: + if words[0] == Reader.STICKER_TAG: return bot.send_sticker(cid, words[1], **kwargs) - elif words[0] == Scribe.AnimTag: + elif words[0] == Reader.ANIM_TAG: return bot.send_animation(cid, words[1], **kwargs) - elif words[0] == Scribe.VideoTag: + elif words[0] == Reader.VIDEO_TAG: return bot.send_video(cid, words[1], **kwargs) else: text @@ -27,17 +28,6 @@ def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs): logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) return bot.send_message(cid, text, **kwargs) -def getTitle(chat): - if chat.title: - return chat.title - else: - last = chat.last_name if chat.last_name else "" - first = chat.first_name if chat.first_name else "" - name = " ".join([first, last]).strip() - if len(name) == 0: - return "Unknown" - else: - return name class Speaker(object): ModeFixed = "FIXED_MODE" @@ -59,7 +49,7 @@ class Speaker(object): self.reply = reply self.repeat = repeat self.filterCids = archivist.filterCids - self.bypass=archivist.bypass + self.bypass = archivist.bypass def announce(self, announcement, check=(lambda _: True)): for scribe in self.scriptorium: @@ -79,7 +69,7 @@ class Speaker(object): def getScribe(self, chat): cid = str(chat.id) if not cid in self.scriptorium: - scribe = Scribe.FromChat(chat, self.archivist, newchat=True) + scribe = Reader.FromChat(chat, self.archivist, newchat=True) self.scriptorium[cid] = scribe return scribe else: diff --git a/velasco.py b/velasco.py index e512dab..162b748 100644 --- a/velasco.py +++ b/velasco.py @@ -18,7 +18,7 @@ speakerbot = None logger = logging.getLogger(__name__) # Enable logging -log_format="[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper()) +log_format = "[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper()) if coloredlogsError: logging.basicConfig(format=log_format, level=logging.INFO) @@ -49,20 +49,24 @@ about_msg = "I am yet another Markov Bot experiment. I read everything you type explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send." + def static_reply(text, format=None): def reply(bot, update): update.message.reply_text(text, parse_mode=format) return reply + def error(bot, update, error): logger.warning('Update "{}" caused error "{}"'.format(update, error)) + def stop(bot, update): scribe = speakerbot.getScribe(update.message.chat.id) #del chatlogs[chatlog.id] #os.remove(LOG_DIR + chatlog.id + LOG_EXT) logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid())) + def main(): global speakerbot parser = argparse.ArgumentParser(description='A Telegram markov bot.') @@ -76,7 +80,7 @@ def main(): updater = Updater(args.token) #filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)] - filterCids=None + filterCids = None archivist = Archivist(logger, chatdir="chatlogs/", @@ -84,7 +88,7 @@ def main(): admin=args.admin_id, filterCids=filterCids, readOnly=False - ) + ) speakerbot = Speaker("velasco", "@" + username, archivist, logger, wakeup=args.wakeup) From 724a49f8be7d409d59075e9dc54da481c8213059 Mon Sep 17 00:00:00 2001 From: vylion Date: Sun, 25 Oct 2020 22:56:02 +0100 Subject: [PATCH 02/22] Overhaul 2 Finished Markov -> Generator :heavy_check_mark: Chatlog -> Metadata :heavy_check_mark: Scribe -> Reader :heavy_check_mark: Speaker updated :heavy_check_mark: Also: - Updated to python-telegram-bot v. 12 callbacks - Renamed a lot of variables and rewritten a lot of code to follow Python conventions - Added a new argument for filter chat IDs - Changed obtention of username from hardcoded to a library-provided function - Added a new argument for bot nicknames that it can respond to --- .gitignore | 1 + archivist.py | 132 ++++++++--------- generator.py | 8 +- log.txt | 1 + chatcard.py => metadata.py | 19 +-- chatreader.py => reader.py | 117 +++++++-------- speaker.py | 283 ++++++++++++++++++++----------------- velasco.py | 57 ++++---- 8 files changed, 326 insertions(+), 292 deletions(-) create mode 100644 log.txt rename chatcard.py => metadata.py (90%) rename chatreader.py => reader.py (55%) diff --git a/.gitignore b/.gitignore index 4ccade3..1ae7c90 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ chatlogs/* __pycache__/* misc/* +bkp/* test/* diff --git a/archivist.py b/archivist.py index 21637a1..9ff340e 100644 --- a/archivist.py +++ b/archivist.py @@ -1,61 +1,97 @@ -import os, errno, random, pickle -from chatreader import ChatReader as Reader +import os, random, pickle +from reader import Reader from generator import Generator class Archivist(object): def __init__(self, logger, chatdir=None, chatext=None, admin=0, - freq_increment=5, save_count=15, max_period=100000, max_len=50, + period_inc=5, save_count=15, max_period=100000, max_len=50, read_only=False, filter_cids=None, bypass=False ): if chatdir is None or len(chatdir) == 0: - raise ValueError("Chatlog directory name is empty") - elif chatext is None: # Can be len(chatext) == 0 + chatdir = "./" + elif chatext is None: # Can be len(chatext) == 0 raise ValueError("Chatlog file extension is invalid") self.logger = logger self.chatdir = chatdir self.chatext = chatext self.admin = admin - self.freq_increment = freq_increment + self.period_inc = period_inc self.save_count = save_count self.max_period = max_period self.max_len = max_len self.read_only = read_only self.filter_cids = filter_cids self.bypass = bypass - + def chat_folder(self, *formatting, **key_format): - return (self.chatdir + "chat_{tag}").format(*formatting, **key_format) + return ("./" + self.chatdir + "chat_{tag}").format(*formatting, **key_format) def chat_file(self, *formatting, **key_format): - return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) + return ("./" + self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) - def store(self, tag, log, gen): + def store(self, tag, data, gen): chat_folder = self.chat_folder(tag=tag) chat_card = self.chat_file(tag=tag, file="card", ext=".txt") + if self.read_only: return try: if not os.path.exists(chat_folder): os.makedirs(chat_folder, exist_ok=True) self.logger.info("Storing a new chat. Folder {} created.".format(chat_folder)) - except: + except Exception: self.logger.error("Failed creating {} folder.".format(chat_folder)) return file = open(chat_card, 'w') - file.write(log) + file.write(data) file.close() + if gen is not None: chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) file = open(chat_record, 'w') file.write(gen) file.close() - def get_reader(self, filename): + def load_vocab(self, tag): + filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) + try: + file = open(filepath, 'r') + record = file.read() + file.close() + return record + except Exception: + self.logger.error("Vocabulary file {} not found.".format(filepath)) + return None + + def load_reader(self, tag): + filepath = self.chat_file(tag=tag, file="card", ext=".txt") + try: + reader_file = open(filepath, 'r') + reader = reader_file.read() + reader_file.close() + return reader + except OSError: + self.logger.error("Metadata file {} not found.".format(filepath)) + return None + + def get_reader(self, tag): + reader = self.load_reader(tag) + if reader: + vocab_dump = self.load_vocab(tag) + if vocab_dump: + vocab = Generator.loads(vocab_dump) + else: + vocab = Generator() + return Reader.FromCard(reader, vocab, self.max_period, self.logger) + else: + return None + + def load_reader_old(self, filename): file = open(self.chatdir + filename, 'rb') - scribe = None + reader = None try: reader, vocab = Reader.FromFile(pickle.load(file), self) self.logger.info("Unpickled {}{}".format(self.chatdir, filename)) @@ -63,7 +99,7 @@ class Archivist(object): file.close() file = open(self.chatdir + filename, 'r') try: - scribe = Scribe.Recall(file.read(), self) + scribe = Reader.FromFile(file.read(), self) self.logger.info("Read {}{} text file".format(self.chatdir, filename)) except Exception as e: self.logger.error("Failed reading {}{}".format(self.chatdir, filename)) @@ -72,22 +108,14 @@ class Archivist(object): file.close() return scribe - def load_reader(self, filepath): - file = open(filepath.format(filename="card", ext=".txt"), 'r') - card = file.read() - file.close() - return Reader.FromCard(card, self) - - def wakeParrot(self, tag): - filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) - try: - file = open(filepath, 'r') - record = file.read() - file.close() - return Generator.loads(record) - except: - self.logger.error("Record file {} not found.".format(filepath)) - return None + def chat_count(self): + count = 0 + directory = os.fsencode(self.chatdir) + for subdir in os.scandir(directory): + dirname = subdir.name.decode("utf-8") + if dirname.startswith("chat_"): + count += 1 + return count def readers_pass(self): directory = os.fsencode(self.chatdir) @@ -96,45 +124,19 @@ class Archivist(object): if dirname.startswith("chat_"): cid = dirname[5:] try: - filepath = self.chatdir + dirname + "/{filename}{ext}" - reader = self.load_reader(filepath) - self.logger.info("Chat {} contents:\n".format(cid) + reader.card.dumps()) - if self.bypass: - reader.set_period(random.randint(self.max_period//2, self.max_period)) - elif scriptorium[cid].freq() > self.max_period: - scriptorium[cid].setFreq(self.max_period) + reader = self.load_reader(cid) + # self.logger.info("Chat {} contents:\n{}".format(cid, reader.card.dumps())) + self.logger.info("Successfully read {} ({}) chat.\n".format(cid, reader.title())) + if self.bypass: # I forgot what I made this for + reader.set_period(random.randint(self.max_period // 2, self.max_period)) + elif reader.period() > self.max_period: + reader.set_period(self.max_period) + yield reader except Exception as e: self.logger.error("Failed reading {}".format(dirname)) self.logger.exception(e) raise e - """ - def wake_old(self): - scriptorium = {} - - directory = os.fsencode(self.chatdir) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.endswith(self.chatext): - cid = filename[:-(len(self.chatext))] - if self.filter_cids is not None: - #self.logger.info("CID " + cid) - if not cid in self.filter_cids: - continue - scriptorium[cid] = self.recall(filename) - scribe = scriptorium[cid] - if scribe is not None: - if self.bypass: - scribe.setFreq(random.randint(self.max_period//2, self.max_period)) - elif scribe.freq() > self.max_period: - scribe.setFreq(self.max_period) - self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]" - "\n" + "\n".join(scribe.chat.dumps())) - else: - continue - return scriptorium - """ - def update(self, oldext=None): failed = [] remove = False diff --git a/generator.py b/generator.py index 17e5d45..f9deaa1 100644 --- a/generator.py +++ b/generator.py @@ -59,7 +59,7 @@ class Generator(object): # This is to mark when we want to create a Generator object from Chat data (WIP) HEAD = "\n^MESSAGE_SEPARATOR^" - TAIL = "^MESSAGE_SEPARATOR^" + TAIL = " ^MESSAGE_SEPARATOR^" def __init__(self, load=None, mode=None): if mode is not None: @@ -95,9 +95,9 @@ class Generator(object): # with the HEAD that marks the beginning of a new message and # following it with the TAIL that marks the end words = [Generator.HEAD] - text = text + " " + Generator.TAIL - words.extend(text.split()) - self.database(rewrite(text)) + text = rewrite(text + Generator.TAIL) + words.extend(text) + self.database(words) def database(self, words): # This takes a list of words and stores it in the cache, adding diff --git a/log.txt b/log.txt new file mode 100644 index 0000000..9947608 --- /dev/null +++ b/log.txt @@ -0,0 +1 @@ +Velascobot turning on. diff --git a/chatcard.py b/metadata.py similarity index 90% rename from chatcard.py rename to metadata.py index 4af559f..2a89ed7 100644 --- a/chatcard.py +++ b/metadata.py @@ -2,7 +2,7 @@ def parse_card_line(line): # This reads a line in the format 'VARIABLE=value' and gives me the value. - # See ChatCard.loadl(...) for more details + # See Metadata.loadl(...) for more details s = line.split('=', 1) if len(s) < 2: return "" @@ -10,7 +10,10 @@ def parse_card_line(line): return s[1] -class ChatCard(object): +class Metadata(object): + # This is a chat's Metadata, holding different configuration values for + # Velasco and other miscellaneous information about the chat + def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False): self.id = str(cid) # The Telegram chat's ID @@ -67,7 +70,7 @@ class ChatCard(object): def loads(text): lines = text.splitlines() - return ChatCard.loadl(lines) + return Metadata.loadl(lines) def loadl(lines): # In a perfect world, I would get both the variable name and its corresponding value @@ -77,7 +80,7 @@ class ChatCard(object): version = parse_card_line(lines[0]).strip() version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO") if version == "v4" or version == "v5": - return ChatCard(cid=parse_card_line(lines[1]), + return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), count=int(parse_card_line(lines[4])), @@ -87,7 +90,7 @@ class ChatCard(object): silenced=(parse_card_line(lines[8]) == 'True') ) elif version == "v3": - return ChatCard(cid=parse_card_line(lines[1]), + return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), count=int(parse_card_line(lines[7])), @@ -96,7 +99,7 @@ class ChatCard(object): restricted=(parse_card_line(lines[6]) == 'True') ) elif version == "v2": - return ChatCard(cid=parse_card_line(lines[1]), + return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), count=int(parse_card_line(lines[6])), @@ -107,7 +110,7 @@ class ChatCard(object): # At some point I decided to number the versions of each dictionary format, # but this was not always the case. This is what you get if you try to read # whatever there is in very old files where the version should be - return ChatCard(cid=lines[0], + return Metadata(cid=lines[0], ctype=lines[1], title=lines[2], count=int(lines[5]), @@ -115,7 +118,7 @@ class ChatCard(object): ) else: # This is for the oldest of files - return ChatCard(cid=lines[0], + return Metadata(cid=lines[0], ctype=lines[1], title=lines[2], period=int(lines[3]) diff --git a/chatreader.py b/reader.py similarity index 55% rename from chatreader.py rename to reader.py index beb486c..4189fa9 100644 --- a/chatreader.py +++ b/reader.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import random -from chatcard import ChatCard, parse_card_line +from metadata import Metadata, parse_card_line from generator import Generator @@ -25,117 +25,121 @@ class Memory(object): self.content = content -class ChatReader(object): +class Reader(object): + # This is a chat Reader object, in charge of managing the parsing of messages + # for a specific chat, and holding said chat's metadata + TAG_PREFIX = "^IS_" STICKER_TAG = "^IS_STICKER^" ANIM_TAG = "^IS_ANIMATION^" VIDEO_TAG = "^IS_VIDEO^" - def __init__(self, chatcard, max_period, logger): - self.card = chatcard + def __init__(self, metadata, vocab, max_period, logger): + self.meta = metadata + self.vocab = vocab self.max_period = max_period self.short_term_mem = [] - self.countdown = self.card.period + self.countdown = self.meta.period self.logger = logger - def FromChat(chat, max_period, logger, newchat=False): - # Create a new ChatReader from a Chat object - card = ChatCard(chat.id, chat.type, get_chat_title(chat)) - return ChatReader(card, max_period, logger) + def FromChat(chat, max_period, logger): + # Create a new Reader from a Chat object + meta = Metadata(chat.id, chat.type, get_chat_title(chat)) + vocab = Generator() + return Reader(meta, vocab, max_period, logger) - def FromData(data, max_period, logger): - # Create a new ChatReader from a whole Chat history (WIP) + def FromHistory(history, vocab, max_period, logger): + # Create a new Reader from a whole Chat history (WIP) return None - def FromCard(card, max_period, logger): - # Create a new ChatReader from a card's file dump - chatcard = ChatCard.loads(card) - return ChatReader(chatcard, max_period, logger) + def FromCard(meta, vocab, max_period, logger): + # Create a new Reader from a meta's file dump + metadata = Metadata.loads(meta) + return Reader(metadata, vocab, max_period, logger) - def FromFile(text, max_period, logger): - # Load a ChatReader from a file's text string + def FromFile(text, max_period, logger, vocab=None): + # Load a Reader from a file's text string (obsolete) lines = text.splitlines() version = parse_card_line(lines[0]).strip() version = version if len(version.strip()) > 1 else lines[4] logger.info("Dictionary version: {} ({} lines)".format(version, len(lines))) - vocab = None if version == "v4" or version == "v5": - return ChatReader.FromCard(text, max_period, logger) + return Reader.FromCard(text, vocab, max_period, logger) # I stopped saving the chat metadata and the cache together elif version == "v3": - card = ChatCard.loadl(lines[0:8]) + meta = Metadata.loadl(lines[0:8]) cache = '\n'.join(lines[9:]) vocab = Generator.loads(cache) elif version == "v2": - card = ChatCard.loadl(lines[0:7]) + meta = Metadata.loadl(lines[0:7]) cache = '\n'.join(lines[8:]) vocab = Generator.loads(cache) elif version == "dict:": - card = ChatCard.loadl(lines[0:6]) + meta = Metadata.loadl(lines[0:6]) cache = '\n'.join(lines[6:]) vocab = Generator.loads(cache) else: - card = ChatCard.loadl(lines[0:4]) + meta = Metadata.loadl(lines[0:4]) cache = lines[4:] vocab = Generator(load=cache, mode=Generator.MODE_LIST) - # raise SyntaxError("ChatReader: ChatCard format unrecognized.") - s = ChatReader(card, max_period, logger) - return (s, vocab) + # raise SyntaxError("Reader: Metadata format unrecognized.") + r = Reader(meta, vocab, max_period, logger) + return r - def archive(self, vocab): + def archive(self): # Returns a nice lice little tuple package for the archivist to save to file. # Also commits to long term memory any pending short term memories - self.commit_long_term(vocab) - return (self.card.id, self.card.dumps(), vocab) + self.commit_memory() + return (self.meta.id, self.meta.dumps(), self.vocab.dumps()) def check_type(self, t): # Checks type. Returns "True" for "group" even if it's supergroup - return t in self.card.type + return t in self.meta.type def exactly_type(self, t): # Hard check - return t == self.card.type + return t == self.meta.type def set_title(self, title): - self.card.title = title + self.meta.title = title def set_period(self, period): if period < self.countdown: self.countdown = max(period, 1) - return self.card.set_period(min(period, self.max_period)) + return self.meta.set_period(min(period, self.max_period)) def set_answer(self, prob): - return self.card.set_answer(prob) + return self.meta.set_answer(prob) def cid(self): - return str(self.card.id) + return str(self.meta.id) def count(self): - return self.card.count + return self.meta.count def period(self): - return self.card.period + return self.meta.period def title(self): - return self.card.title + return self.meta.title def answer(self): - return self.card.answer + return self.meta.answer def ctype(self): - return self.card.type + return self.meta.type def is_restricted(self): - return self.card.restricted + return self.meta.restricted def toggle_restrict(self): - self.card.restricted = (not self.card.restricted) + self.meta.restricted = (not self.meta.restricted) def is_silenced(self): - return self.card.silenced + return self.meta.silenced def toggle_silence(self): - self.card.silenced = (not self.card.silenced) + self.meta.silenced = (not self.meta.silenced) def is_answering(self): rand = random.random() @@ -151,24 +155,26 @@ class ChatReader(object): self.short_term_mem.append(mem) def random_memory(self): + if len(self.short_term_mem) == 0: + return None mem = random.choice(self.short_term_mem) return mem.id def reset_countdown(self): - self.countdown = self.card.period + self.countdown = self.meta.period def read(self, message): mid = str(message.message_id) if message.text is not None: - self.read(mid, message.text) + self.learn(mid, message.text) elif message.sticker is not None: - self.learn_drawing(mid, ChatReader.STICKER_TAG, message.sticker.file_id) + self.learn_drawing(mid, Reader.STICKER_TAG, message.sticker.file_id) elif message.animation is not None: - self.learn_drawing(mid, ChatReader.ANIM_TAG, message.animation.file_id) + self.learn_drawing(mid, Reader.ANIM_TAG, message.animation.file_id) elif message.video is not None: - self.learn_drawing(mid, ChatReader.VIDEO_TAG, message.video.file_id) - self.card.count += 1 + self.learn_drawing(mid, Reader.VIDEO_TAG, message.video.file_id) + self.meta.count += 1 def learn_drawing(self, mid, tag, drawing): self.learn(mid, tag + " " + drawing) @@ -178,13 +184,10 @@ class ChatReader(object): return self.add_memory(mid, text) - def commit_long_term(self, vocab): + def commit_memory(self): for mem in self.short_term_mem: - vocab.add(mem.content) + self.vocab.add(mem.content) self.short_term_mem = [] - """ - def learnFrom(self, scribe): - self.card.count += scribe.chat.count - self.vocab.cross(scribe.vocab) - """ + def generate_message(self, max_len): + return self.vocab.generate(size=max_len, silence=self.is_silenced()) diff --git a/speaker.py b/speaker.py index f07788c..d7e4391 100644 --- a/speaker.py +++ b/speaker.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 import random -from chatreader import ChatReader as Reader -from telegram.error import * +from reader import Reader, get_chat_title +from telegram.error import TimedOut def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): @@ -33,105 +33,117 @@ class Speaker(object): ModeFixed = "FIXED_MODE" ModeChance = "MODE_CHANCE" - def __init__(self, name, username, archivist, logger, + def __init__(self, username, archivist, logger, nicknames=[], reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed ): - self.name = name + self.names = nicknames self.username = username self.archivist = archivist - self.scriptorium = archivist.wakeScriptorium() logger.info("----") logger.info("Finished loading.") - logger.info("Loaded {} chats.".format(len(self.scriptorium))) + logger.info("Loaded {} chats.".format(archivist.chat_count())) logger.info("----") self.wakeup = wakeup self.logger = logger self.reply = reply self.repeat = repeat - self.filterCids = archivist.filterCids + self.filter_cids = archivist.filter_cids self.bypass = archivist.bypass + self.current_reader = None - def announce(self, announcement, check=(lambda _: True)): - for scribe in self.scriptorium: + def announce(self, bot, announcement, check=(lambda _: True)): + # Sends an announcement to all chats that pass the check + for reader in self.archivist.readers_pass(): try: - if check(scribe): - send(bot, scribe.cid(), announcement) - logger.info("Waking up on chat {}".format(scribe.cid())) - except: + if check(reader): + send(bot, reader.cid(), announcement) + self.logger.info("Sending announcement to chat {}".format(reader.cid())) + except Exception: pass def wake(self, bot, wake): + # Sends a wake-up message as announcement to all chats that + # are groups if self.wakeup: - def check(scribe): - return scribe.checkType("group") - self.announce(wake, check) + def group_check(reader): + return reader.check_type("group") + self.announce(bot, wake, group_check) - def getScribe(self, chat): + def load_reader(self, chat): cid = str(chat.id) - if not cid in self.scriptorium: - scribe = Reader.FromChat(chat, self.archivist, newchat=True) - self.scriptorium[cid] = scribe - return scribe - else: - return self.scriptorium[cid] + if self.current_reader is not None and cid == self.current_reader.cid(): + return - def shouldReply(self, message, scribe): - if not self.bypass and scribe.isRestricted(): + if self.current_reader is not None: + self.current_reader.commit_memory() + self.save() + + reader = self.archivist.get_reader(cid) + if not reader: + reader = Reader.FromChat(chat, self.archivist.max_period, self.logger) + self.current_reader = reader + + def get_reader(self, cid): + if self.current_reader is None or cid != self.current_reader.cid(): + return self.archivist.get_reader(cid) + + return self.current_reader + + def mentioned(self, text): + if self.username in text: + return True + for name in self.names: + if name in text and "@{}".format(name) not in text: + return True + return False + + def should_reply(self, message): + if not self.bypass and self.current_reader.is_restricted(): user = message.chat.get_member(message.from_user.id) - if not self.userIsAdmin(user): + if not self.user_is_admin(user): # update.message.reply_text("You do not have permissions to do that.") return False replied = message.reply_to_message text = message.text.casefold() if message.text else "" - return ( ((replied is not None) and (replied.from_user.name == self.username)) or - (self.username in text) or - (self.name in text and "@{}".format(self.name) not in text) - ) + return (((replied is not None) and (replied.from_user.name == self.username)) + or (self.mentioned(text))) - def store(self, scribe): - if self.parrot is None: - raise ValueError("Tried to store a Parrot that is None.") + def save(self): + if self.current_reader is None: + raise ValueError("Tried to store a None Reader.") else: - scribe.store(self.parrot.dumps()) + self.archivist.store(*self.current_reader.archive()) - def loadParrot(self, scribe): - newParrot = False - self.parrot = self.archivist.wakeParrot(scribe.cid()) - if self.parrot is None: - newParrot = True - self.parrot = Markov() - scribe.teachParrot(self.parrot) - self.store(scribe) - return newParrot - - def read(self, bot, update): + def read(self, update, context): + if update.message is None: + return chat = update.message.chat - scribe = self.getScribe(chat) - scribe.learn(update.message) + self.load_reader(chat) + self.current_reader.read(update.message) - if self.shouldReply(update.message, scribe) and scribe.isAnswering(): - self.say(bot, scribe, replying=update.message.message_id) + if self.should_reply(update.message) and self.current_reader.is_answering(): + self.say(context.bot, replying=update.message.message_id) return - title = getTitle(update.message.chat) - if title != scribe.title(): - scribe.setTitle(title) + title = get_chat_title(update.message.chat) + if title != self.current_reader.title(): + self.current_reader.set_title(title) - scribe.countdown -= 1 - if scribe.countdown < 0: - scribe.resetCountdown() - rid = scribe.getReference() if random.random() <= self.reply else None - self.say(bot, scribe, replying=rid) - elif (scribe.freq() - scribe.countdown) % self.archivist.saveCount == 0: - self.loadParrot(scribe) + self.current_reader.countdown -= 1 + if self.current_reader.countdown < 0: + self.current_reader.reset_countdown() + rid = self.current_reader.random_memory() if random.random() <= self.reply else None + self.say(context.bot, replying=rid) + elif (self.current_reader.period() - self.current_reader.countdown) % self.archivist.save_count == 0: + self.save() - def speak(self, bot, update): + def speak(self, update, context): chat = (update.message.chat) - scribe = self.getScribe(chat) + self.load_reader(chat) - if not self.bypass and scribe.isRestricted(): + if not self.bypass and self.current_reader.is_restricted(): user = update.message.chat.get_member(update.message.from_user.id) - if not self.userIsAdmin(user): + if not self.user_is_admin(user): # update.message.reply_text("You do not have permissions to do that.") return @@ -140,148 +152,153 @@ class Speaker(object): rid = replied.message_id if replied else mid words = update.message.text.split() if len(words) > 1: - scribe.learn(' '.join(words[1:])) - self.say(bot, scribe, replying=rid) + self.current_reader.read(' '.join(words[1:])) + self.say(context.bot, replying=rid) - def userIsAdmin(self, member): + def user_is_admin(self, member): self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) # self.logger.info("Bot Creator ID is {}".format(str(self.archivist.admin))) - return ((member.status == 'creator') or - (member.status == 'administrator') or - (member.user.id == self.archivist.admin)) + return ((member.status == 'creator') + or (member.status == 'administrator') + or (member.user.id == self.archivist.admin)) - def speech(self, scribe): - return self.parrot.generate_markov_text(size=self.archivist.maxLen, silence=scribe.isSilenced()) + def speech(self): + return self.current_reader.generate_message(self.archivist.max_len) - def say(self, bot, scribe, replying=None, **kwargs): - if self.filterCids is not None and not scribe.cid() in self.filterCids: + def say(self, bot, replying=None, **kwargs): + cid = self.current_reader.cid() + if self.filter_cids is not None and cid not in self.filter_cids: return - self.loadParrot(scribe) try: - send(bot, scribe.cid(), self.speech(scribe), replying, logger=self.logger, **kwargs) + send(bot, cid, self.speech(), replying, logger=self.logger, **kwargs) if self.bypass: - maxFreq = self.archivist.maxFreq - scribe.setFreq(random.randint(maxFreq//4, maxFreq)) + max_period = self.archivist.max_period + self.current_reader.set_period(random.randint(max_period // 4, max_period)) if random.random() <= self.repeat: - send(bot, scribe.cid(), self.speech(scribe), logger=self.logger, **kwargs) + send(bot, cid, self.speech(), logger=self.logger, **kwargs) except TimedOut: - scribe.setFreq(scribe.freq() + self.archivist.freqIncrement) - self.logger.warning("Increased period for chat {} [{}]".format(scribe.title(), scribe.cid())) + self.current_reader.set_period(self.current_reader.period() + self.archivist.period_inc) + self.logger.warning("Increased period for chat {} [{}]".format(self.current_reader.title(), cid)) except Exception as e: self.logger.error("Sending a message caused error:") - self.logger.error(e) + raise e - def getCount(self, bot, update): + def get_count(self, update, context): cid = str(update.message.chat.id) - scribe = self.scriptorium[cid] - num = str(scribe.count()) if self.scriptorium[cid] else "no" + reader = self.get_reader(cid) + + num = str(reader.count()) if reader else "no" update.message.reply_text("I remember {} messages.".format(num)) - def getChats(self, bot, update): - lines = ["[{}]: {}".format(cid, self.scriptorium[cid].title()) for cid in self.scriptorium] - list = "\n".join(lines) - update.message.reply_text( "\n\n".join(["I have the following chats:", list]) ) + def get_chats(self, update, context): + lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.archivist.readers_pass] + chat_list = "\n".join(lines) + update.message.reply_text("I have the following chats:\n\n" + chat_list) - def freq(self, bot, update): + def period(self, update, context): chat = update.message.chat - scribe = self.getScribe(chat) + reader = self.get_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: - update.message.reply_text("The current speech period is {}".format(scribe.freq())) + update.message.reply_text("The current speech period is {}".format(reader.period())) return - if scribe.isRestricted(): + if reader.is_restricted(): user = update.message.chat.get_member(update.message.from_user.id) - if not self.userIsAdmin(user): + if not self.user_is_admin(user): update.message.reply_text("You do not have permissions to do that.") return try: - freq = int(words[1]) - freq = scribe.setFreq(freq) - update.message.reply_text("Period of speaking set to {}.".format(freq)) - scribe.store(None) - except: - update.message.reply_text("Format was confusing; period unchanged from {}.".format(scribe.freq())) + period = int(words[1]) + period = reader.set_period(period) + update.message.reply_text("Period of speaking set to {}.".format(period)) + self.archivist.store(*reader.archive()) + except Exception: + update.message.reply_text("Format was confusing; period unchanged from {}.".format(reader.period())) - def answer(self, bot, update): + def answer(self, update, context): chat = update.message.chat - scribe = self.getScribe(chat) + reader = self.get_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: - update.message.reply_text("The current answer probability is {}".format(scribe.answer())) + update.message.reply_text("The current answer probability is {}".format(reader.answer())) return - if scribe.isRestricted(): + if reader.is_restricted(): user = update.message.chat.get_member(update.message.from_user.id) - if not self.userIsAdmin(user): + if not self.user_is_admin(user): update.message.reply_text("You do not have permissions to do that.") return try: - answ = float(words[1]) - answ = scribe.setAnswer(answ) - update.message.reply_text("Answer probability set to {}.".format(answ)) - scribe.store(None) - except: - update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(scribe.answer())) + answer = float(words[1]) + answer = reader.set_answer(answer) + update.message.reply_text("Answer probability set to {}.".format(answer)) + self.archivist.store(*reader.archive()) + except Exception: + update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(reader.answer())) - def restrict(self, bot, update): + def restrict(self, update, context): if "group" not in update.message.chat.type: update.message.reply_text("That only works in groups.") return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - scribe = self.getScribe(chat) - if scribe.isRestricted(): - if not self.userIsAdmin(user): + reader = self.get_reader(str(chat.id)) + + if reader.is_restricted(): + if not self.user_is_admin(user): update.message.reply_text("You do not have permissions to do that.") return - scribe.restrict() - allowed = "let only admins" if scribe.isRestricted() else "let everyone" + reader.toggle_restrict() + allowed = "let only admins" if reader.is_restricted() else "let everyone" update.message.reply_text("I will {} configure me now.".format(allowed)) + self.archivist.store(*reader.archive()) - def silence(self, bot, update): + def silence(self, update, context): if "group" not in update.message.chat.type: update.message.reply_text("That only works in groups.") return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - scribe = self.getScribe(chat) - if scribe.isRestricted(): - if not self.userIsAdmin(user): + reader = self.get_reader(str(chat.id)) + + if reader.is_restricted(): + if not self.user_is_admin(user): update.message.reply_text("You do not have permissions to do that.") return - scribe.silence() - allowed = "avoid mentioning" if scribe.isSilenced() else "mention" + reader.toggle_silence() + allowed = "avoid mentioning" if reader.is_silenced() else "mention" update.message.reply_text("I will {} people now.".format(allowed)) + self.archivist.store(*reader.archive()) - def who(self, bot, update): + def who(self, update, context): msg = update.message usr = msg.from_user cht = msg.chat chtname = cht.title if cht.title else cht.first_name + rdr = self.get_reader(str(cht.id)) answer = ("You're **{name}**, with username `{username}`, and " "id `{uid}`.\nYou're messaging in the chat named __{cname}__," " of type {ctype}, with id `{cid}`, and timestamp `{tstamp}`." ).format(name=usr.full_name, username=usr.username, uid=usr.id, cname=chtname, cid=cht.id, - ctype=scribe.type(), tstamp=str(msg.date)) + ctype=rdr.ctype(), tstamp=str(msg.date)) msg.reply_markdown(answer) - def where(self, bot, update): - print("THEY'RE ASKING WHERE") + def where(self, update, context): msg = update.message chat = msg.chat - scribe = self.getScribe(chat) - if scribe.isRestricted() and scribe.isSilenced(): + reader = self.get_reader(str(chat.id)) + if reader.is_restricted() and reader.is_silenced(): permissions = "restricted and silenced" - elif scribe.isRestricted(): + elif reader.is_restricted(): permissions = "restricted but not silenced" - elif scribe.isSilenced(): + elif reader.is_silenced(): permissions = "not restricted but silenced" else: permissions = "neither restricted nor silenced" @@ -289,8 +306,8 @@ class Speaker(object): answer = ("You're messaging in the chat of saved title __{cname}__," " with id `{cid}`, message count {c}, period {p}, and answer " "probability {a}.\n\nThis chat is {perm}." - ).format(cname=scribe.title(), cid=scribe.cid(), - c=scribe.count(), p=scribe.freq(), a=scribe.answer(), - perm=permissions) + ).format(cname=reader.title(), cid=reader.cid(), + c=reader.count(), p=reader.period(), + a=reader.answer(), perm=permissions) msg.reply_markdown(answer) diff --git a/velasco.py b/velasco.py index 162b748..e854d28 100644 --- a/velasco.py +++ b/velasco.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 -import logging, argparse +import logging +import argparse from telegram.ext import Updater, CommandHandler, MessageHandler, Filters -from telegram.error import * +# from telegram.error import * from archivist import Archivist from speaker import Speaker @@ -38,7 +39,7 @@ help_msg = """I answer to the following commands: /explain - I explain how I work. /help - I send this message. /count - I tell you how many messages from this chat I remember. -/freq - Change the frequency of my messages. (Maximum of 100000) +/period - Change the period of my messages. (Maximum of 100000) /speak - Forces me to speak. /answer - Change the probability to answer to a reply. (Decimal between 0 and 1). /restrict - Toggle restriction of configuration commands to admins only. @@ -47,7 +48,7 @@ help_msg = """I answer to the following commands: about_msg = "I am yet another Markov Bot experiment. I read everything you type to me and then spit back nonsensical messages that look like yours.\n\nYou can send /explain if you want further explanation." -explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send." +explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default period in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send." def static_reply(text, format=None): @@ -56,15 +57,16 @@ def static_reply(text, format=None): return reply -def error(bot, update, error): - logger.warning('Update "{}" caused error "{}"'.format(update, error)) +def error(update, context): + logger.warning('The following update:\n"{}"\n\nCaused the following error:\n"{}"'.format(update, context.error)) + # raise error def stop(bot, update): - scribe = speakerbot.getScribe(update.message.chat.id) - #del chatlogs[chatlog.id] - #os.remove(LOG_DIR + chatlog.id + LOG_EXT) - logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid())) + reader = speakerbot.get_reader(str(update.message.chat.id)) + # del chatlogs[chatlog.id] + # os.remove(LOG_DIR + chatlog.id + LOG_EXT) + logger.warning("I got blocked by user {} [{}]".format(reader.title(), reader.cid())) def main(): @@ -73,38 +75,42 @@ def main(): parser.add_argument('token', metavar='TOKEN', help='The Bot Token to work with the Telegram Bot API') parser.add_argument('admin_id', metavar='ADMIN_ID', type=int, help='The ID of the Telegram user that manages this bot') parser.add_argument('-w', '--wakeup', action='store_true', help='Flag that makes the bot send a first message to all chats during wake up.') + parser.add_argument('-f', '--filter', nargs='*', metavar='FILTER_CID', help='Zero or more chat IDs to add in a filter whitelist (default is empty, all chats allowed)') + parser.add_argument('-n', '--nicknames', nargs='*', metavar='NICKNAME', help='Any possible nicknames that the bot could answer to.') args = parser.parse_args() # Create the EventHandler and pass it your bot's token. - updater = Updater(args.token) + updater = Updater(args.token, use_context=True) - #filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)] - filterCids = None + filter_cids = args.filter + if filter_cids: + filter_cids.append(str(args.admin_id)) archivist = Archivist(logger, chatdir="chatlogs/", chatext=".vls", admin=args.admin_id, - filterCids=filterCids, - readOnly=False + filter_cids=filter_cids, + read_only=False ) - speakerbot = Speaker("velasco", "@" + username, archivist, logger, wakeup=args.wakeup) + username = updater.bot.get_me().username + speakerbot = Speaker("@" + username, archivist, logger, nicknames=args.nicknames, wakeup=args.wakeup) # Get the dispatcher to register handlers dp = updater.dispatcher # on different commands - answer in Telegram - dp.add_handler(CommandHandler("start", static_reply(start_msg) )) - dp.add_handler(CommandHandler("about", static_reply(about_msg) )) - dp.add_handler(CommandHandler("explain", static_reply(explanation) )) - dp.add_handler(CommandHandler("help", static_reply(help_msg) )) - dp.add_handler(CommandHandler("count", speakerbot.getCount)) - dp.add_handler(CommandHandler("period", speakerbot.freq)) - dp.add_handler(CommandHandler("list", speakerbot.getChats, Filters.chat(chat_id=archivist.admin))) - #dp.add_handler(CommandHandler("user", get_name, Filters.chat(chat_id=archivist.admin))) - #dp.add_handler(CommandHandler("id", get_id)) + dp.add_handler(CommandHandler("start", static_reply(start_msg))) + dp.add_handler(CommandHandler("about", static_reply(about_msg))) + dp.add_handler(CommandHandler("explain", static_reply(explanation))) + dp.add_handler(CommandHandler("help", static_reply(help_msg))) + dp.add_handler(CommandHandler("count", speakerbot.get_count)) + dp.add_handler(CommandHandler("period", speakerbot.period)) + dp.add_handler(CommandHandler("list", speakerbot.get_chats, filters=Filters.chat(chat_id=archivist.admin))) + # dp.add_handler(CommandHandler("user", get_name, Filters.chat(chat_id=archivist.admin))) + # dp.add_handler(CommandHandler("id", get_id)) dp.add_handler(CommandHandler("stop", stop)) dp.add_handler(CommandHandler("speak", speakerbot.speak)) dp.add_handler(CommandHandler("answer", speakerbot.answer)) @@ -130,5 +136,6 @@ def main(): # start_polling() is non-blocking and will stop the bot gracefully. updater.idle() + if __name__ == '__main__': main() From ffaf0c102287c62fe9313e7aefe821044fcd658c Mon Sep 17 00:00:00 2001 From: vylion Date: Sun, 25 Oct 2020 22:58:22 +0100 Subject: [PATCH 03/22] removed experimental brain.py --- brain.py | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 brain.py diff --git a/brain.py b/brain.py deleted file mode 100644 index fb55324..0000000 --- a/brain.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python3 - -import random -from chatreader import ChatReader as Reader - From 9413821025fd21b199fce5ea71c86b261d2700de Mon Sep 17 00:00:00 2001 From: vylion Date: Sun, 25 Oct 2020 22:59:38 +0100 Subject: [PATCH 04/22] removed testing log file --- log.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 log.txt diff --git a/log.txt b/log.txt deleted file mode 100644 index 9947608..0000000 --- a/log.txt +++ /dev/null @@ -1 +0,0 @@ -Velascobot turning on. From ca0dc26ef4a00a199e82c7e136cd1ed0b92411d8 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 09:32:09 +0100 Subject: [PATCH 05/22] Fixed archivist.readers_pass(...) --- archivist.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/archivist.py b/archivist.py index 9ff340e..388b78a 100644 --- a/archivist.py +++ b/archivist.py @@ -124,16 +124,17 @@ class Archivist(object): if dirname.startswith("chat_"): cid = dirname[5:] try: - reader = self.load_reader(cid) + reader = self.get_reader(cid) # self.logger.info("Chat {} contents:\n{}".format(cid, reader.card.dumps())) - self.logger.info("Successfully read {} ({}) chat.\n".format(cid, reader.title())) + self.logger.info("Successfully passed through {} ({}) chat.\n".format(cid, reader.title())) if self.bypass: # I forgot what I made this for reader.set_period(random.randint(self.max_period // 2, self.max_period)) elif reader.period() > self.max_period: reader.set_period(self.max_period) + self.store(*reader.archive()) yield reader except Exception as e: - self.logger.error("Failed reading {}".format(dirname)) + self.logger.error("Failed passing through {}".format(dirname)) self.logger.exception(e) raise e From f6d0305868dfad68321b47898df875f420f48521 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 17:01:00 +0100 Subject: [PATCH 06/22] Added forced wakeup message to bot admin (admin id set by a flag) --- speaker.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/speaker.py b/speaker.py index d7e4391..8078f83 100644 --- a/speaker.py +++ b/speaker.py @@ -62,8 +62,10 @@ class Speaker(object): pass def wake(self, bot, wake): - # Sends a wake-up message as announcement to all chats that - # are groups + # If wakeup flag is set, sends a wake-up message as announcement to all chats that + # are groups. Also, always sends a wakeup message to the 'bot admin' + send(bot, self.archivist.admin, wake) + if self.wakeup: def group_check(reader): return reader.check_type("group") From 66f328074187884c760523227796153ab86110aa Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 18:44:29 +0100 Subject: [PATCH 07/22] Added simple TooManyRequests error handling --- speaker.py | 27 ++++++++++++++++++++------- velasco.py | 3 ++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/speaker.py b/speaker.py index 8078f83..683f5e7 100644 --- a/speaker.py +++ b/speaker.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 import random +import time from reader import Reader, get_chat_title -from telegram.error import TimedOut +from telegram.error import TimedOut, NetworkError def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): @@ -33,10 +34,11 @@ class Speaker(object): ModeFixed = "FIXED_MODE" ModeChance = "MODE_CHANCE" - def __init__(self, username, archivist, logger, nicknames=[], + def __init__(self, username, archivist, logger, nicknames=[], mute_time=60, reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed ): self.names = nicknames + self.mute_time = mute_time self.username = username self.archivist = archivist logger.info("----") @@ -50,6 +52,7 @@ class Speaker(object): self.filter_cids = archivist.filter_cids self.bypass = archivist.bypass self.current_reader = None + self.time_counter = None def announce(self, bot, announcement, check=(lambda _: True)): # Sends an announcement to all chats that pass the check @@ -100,6 +103,9 @@ class Speaker(object): return False def should_reply(self, message): + current_time = int(time.perf_counter()) + if self.time_counter is not None and (current_time - self.time_counter) < self.mute_time: + return False if not self.bypass and self.current_reader.is_restricted(): user = message.chat.get_member(message.from_user.id) if not self.user_is_admin(user): @@ -179,12 +185,19 @@ class Speaker(object): self.current_reader.set_period(random.randint(max_period // 4, max_period)) if random.random() <= self.repeat: send(bot, cid, self.speech(), logger=self.logger, **kwargs) - except TimedOut: - self.current_reader.set_period(self.current_reader.period() + self.archivist.period_inc) - self.logger.warning("Increased period for chat {} [{}]".format(self.current_reader.title(), cid)) + except TimedOut as e: + self.logger.error("Telegram timed out.") + self.logger.exception(e) + except NetworkError as e: + if '429' in e.message: + self.logger.error("Error: TooManyRequests. Going mute for {} seconds.".format(self.mute_time)) + self.time_counter = int(time.perf_counter()) + else: + self.logger.error("Sending a message caused network error:") + self.logger.exception(e) except Exception as e: - self.logger.error("Sending a message caused error:") - raise e + self.logger.error("Sending a message caused exception:") + self.logger.exception(e) def get_count(self, update, context): cid = str(update.message.chat.id) diff --git a/velasco.py b/velasco.py index e854d28..0c6dfa0 100644 --- a/velasco.py +++ b/velasco.py @@ -58,7 +58,8 @@ def static_reply(text, format=None): def error(update, context): - logger.warning('The following update:\n"{}"\n\nCaused the following error:\n"{}"'.format(update, context.error)) + logger.warning('The following update:\n"{}"\n\nCaused the following error:\n'.format(update)) + logger.exception(context.error) # raise error From 11058f47a3e767e37c21941a37de54d9d87d5a60 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 19:54:08 +0100 Subject: [PATCH 08/22] Changed file encoding (part 1) --- archivist.py | 40 +++++++++++----------------------------- generator.py | 2 +- 2 files changed, 12 insertions(+), 30 deletions(-) diff --git a/archivist.py b/archivist.py index 388b78a..489caab 100644 --- a/archivist.py +++ b/archivist.py @@ -51,7 +51,7 @@ class Archivist(object): if gen is not None: chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) - file = open(chat_record, 'w') + file = open(chat_record, 'w', encoding="utf-16") file.write(gen) file.close() @@ -59,11 +59,12 @@ class Archivist(object): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r') - record = file.read() + record = file.read().encode().decode('utf-8') file.close() return record - except Exception: + except Exception as e: self.logger.error("Vocabulary file {} not found.".format(filepath)) + self.logger.exception(e) return None def load_reader(self, tag): @@ -139,29 +140,10 @@ class Archivist(object): raise e def update(self, oldext=None): - failed = [] - remove = False - if not oldext: - oldext = self.chatext - remove = True - - directory = os.fsencode(self.chatdir) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.endswith(oldext): - try: - self.logger.info("Updating chat " + filename) - scribe = self.recall(filename) - if scribe is not None: - scribe.store(scribe.parrot.dumps()) - self.wakeParrot(scribe.cid()) - self.logger.info("--- Update done: " + scribe.title()) - if remove: - os.remove(filename) - except Exception as e: - failed.append(filename) - self.logger.error("Found the following error when trying to update:") - self.logger.exception(e) - else: - continue - return failed + for reader in self.readers_pass(): + try: + self.store(*reader.archive()) + except Exception as e: + e.message = e.message[:1000] + self.logger.exception(e) + yield reader.cid() diff --git a/generator.py b/generator.py index f9deaa1..bd1bb50 100644 --- a/generator.py +++ b/generator.py @@ -80,7 +80,7 @@ class Generator(object): def dumps(self): # Dumps the cache dictionary into a JSON-formatted string - return json.dumps(self.cache) + return json.dumps(self.cache, ensure_ascii=False) def loads(dump): # Loads the cache dictionary from a JSON-formatted string From 64c117258d497b1e1aabf0b0bad19fec223b6ca7 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 22:59:24 +0100 Subject: [PATCH 09/22] Changed file encoding (part 2) --- .gitignore | 2 ++ archivist.py | 45 ++++++++++++++++++++------------------------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 1ae7c90..28c693d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ __pycache__/* misc/* bkp/* test/* +*log* + diff --git a/archivist.py b/archivist.py index 489caab..f09fb25 100644 --- a/archivist.py +++ b/archivist.py @@ -56,6 +56,18 @@ class Archivist(object): file.close() def load_vocab(self, tag): + filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) + try: + file = open(filepath, 'r', encoding="utf-16") + record = file.read() + file.close() + return record + except Exception as e: + self.logger.error("Vocabulary file {} not found.".format(filepath)) + self.logger.exception(e) + return None + + def load_vocab_old(self, tag): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r') @@ -90,25 +102,6 @@ class Archivist(object): else: return None - def load_reader_old(self, filename): - file = open(self.chatdir + filename, 'rb') - reader = None - try: - reader, vocab = Reader.FromFile(pickle.load(file), self) - self.logger.info("Unpickled {}{}".format(self.chatdir, filename)) - except pickle.UnpicklingError: - file.close() - file = open(self.chatdir + filename, 'r') - try: - scribe = Reader.FromFile(file.read(), self) - self.logger.info("Read {}{} text file".format(self.chatdir, filename)) - except Exception as e: - self.logger.error("Failed reading {}{}".format(self.chatdir, filename)) - self.logger.exception(e) - raise e - file.close() - return scribe - def chat_count(self): count = 0 directory = os.fsencode(self.chatdir) @@ -139,11 +132,13 @@ class Archivist(object): self.logger.exception(e) raise e - def update(self, oldext=None): + def update(self): for reader in self.readers_pass(): - try: - self.store(*reader.archive()) - except Exception as e: - e.message = e.message[:1000] - self.logger.exception(e) + if reader.vocab is None: yield reader.cid() + else: + try: + self.store(*reader.archive()) + except Exception as e: + self.logger.exception(e) + yield reader.cid() From 3402aa68998a7f5a5dac479264373827ec3ee874 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 23:16:31 +0100 Subject: [PATCH 10/22] Changed to clean startup --- velasco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/velasco.py b/velasco.py index 0c6dfa0..0891be4 100644 --- a/velasco.py +++ b/velasco.py @@ -130,7 +130,7 @@ def main(): speakerbot.wake(updater.bot, wake_msg) # Start the Bot - updater.start_polling() + updater.start_polling(clean=True) # Run the bot until you press Ctrl-C or the process receives SIGINT, # SIGTERM or SIGABRT. This should be used most of the time, since From 06c9412b4a4defd9c314e4eb496ba4a283f581d8 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 23:19:37 +0100 Subject: [PATCH 11/22] Avoid rewriting every file at startup --- archivist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/archivist.py b/archivist.py index f09fb25..9285471 100644 --- a/archivist.py +++ b/archivist.py @@ -123,9 +123,10 @@ class Archivist(object): self.logger.info("Successfully passed through {} ({}) chat.\n".format(cid, reader.title())) if self.bypass: # I forgot what I made this for reader.set_period(random.randint(self.max_period // 2, self.max_period)) + self.store(*reader.archive()) elif reader.period() > self.max_period: reader.set_period(self.max_period) - self.store(*reader.archive()) + self.store(*reader.archive()) yield reader except Exception as e: self.logger.error("Failed passing through {}".format(dirname)) From bb00efe1d15f0bd88884b63154a9f12279cbffd5 Mon Sep 17 00:00:00 2001 From: vylion Date: Mon, 26 Oct 2020 23:48:55 +0100 Subject: [PATCH 12/22] Forgot some pytho-telegram-bot v12 callback changes --- archivist.py | 2 +- velasco.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/archivist.py b/archivist.py index 9285471..2392a1f 100644 --- a/archivist.py +++ b/archivist.py @@ -1,5 +1,5 @@ -import os, random, pickle +import os, random from reader import Reader from generator import Generator diff --git a/velasco.py b/velasco.py index 0891be4..5170bcf 100644 --- a/velasco.py +++ b/velasco.py @@ -52,7 +52,7 @@ explanation = "I decompose every message I read in groups of 3 consecutive words def static_reply(text, format=None): - def reply(bot, update): + def reply(update, context): update.message.reply_text(text, parse_mode=format) return reply @@ -63,7 +63,7 @@ def error(update, context): # raise error -def stop(bot, update): +def stop(update, context): reader = speakerbot.get_reader(str(update.message.chat.id)) # del chatlogs[chatlog.id] # os.remove(LOG_DIR + chatlog.id + LOG_EXT) From 895e1cd843a9182859c6dff67f03bc28f5625ef3 Mon Sep 17 00:00:00 2001 From: vylion Date: Tue, 27 Oct 2020 14:42:58 +0100 Subject: [PATCH 13/22] Reverted back to non-clean startup Added a memory of last C updated chats (default: 20) --- archivist.py | 4 +- memorylist.py | 66 +++++++++++++++++++++++++++ speaker.py | 120 ++++++++++++++++++++++++++++---------------------- velasco.py | 4 +- 4 files changed, 138 insertions(+), 56 deletions(-) create mode 100644 memorylist.py diff --git a/archivist.py b/archivist.py index 2392a1f..80cd873 100644 --- a/archivist.py +++ b/archivist.py @@ -27,10 +27,10 @@ class Archivist(object): self.bypass = bypass def chat_folder(self, *formatting, **key_format): - return ("./" + self.chatdir + "chat_{tag}").format(*formatting, **key_format) + return (self.chatdir + "chat_{tag}").format(*formatting, **key_format) def chat_file(self, *formatting, **key_format): - return ("./" + self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) + return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) def store(self, tag, data, gen): chat_folder = self.chat_folder(tag=tag) diff --git a/memorylist.py b/memorylist.py new file mode 100644 index 0000000..cd1afa4 --- /dev/null +++ b/memorylist.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +from collections.abc import MutableSequence + + +class MemoryList(MutableSequence): + def __init__(self, capacity, data=None): + """Initialize the class""" + super(MemoryList, self).__init__() + self._capacity = capacity + if (data is not None): + self._list = list(data) + else: + self._list = list() + + def __repr__(self): + return "<{0} {1}, capacity {2}>".format(self.__class__.__name__, self._list, self._capacity) + + def __len__(self): + """List length""" + return len(self._list) + + def capacity(self): + return self._capacity + + def __getitem__(self, ii): + """Get a list item""" + return self._list[ii] + + def __delitem__(self, ii): + """Delete an item""" + del self._list[ii] + + def __setitem__(self, ii, val): + self._list[ii] = val + + def __str__(self): + return str(self._list) + + def __contains__(self, val): + return val in self._list + + def __iter__(self): + return self._list.__iter__() + + def insert(self, ii, val): + self._list.insert(ii, val) + + def append(self, val): + if val in self._list: + self._list.remove(val) + + self._list.append(val) + if len(self._list) >= self._capacity: + x = self._list[0] + del self._list[0] + return x + else: + return None + + def get_next(self, cond): + val = next((v for v in self._list if cond(v)), None) + if val is not None: + self._list.remove(val) + self._list.append(val) + return val diff --git a/speaker.py b/speaker.py index 683f5e7..5810e7c 100644 --- a/speaker.py +++ b/speaker.py @@ -2,10 +2,15 @@ import random import time +from memorylist import MemoryList from reader import Reader, get_chat_title from telegram.error import TimedOut, NetworkError +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): kwargs["parse_mode"] = formatting kwargs["reply_to_message_id"] = replying @@ -13,7 +18,8 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): if text.startswith(Reader.TAG_PREFIX): words = text.split(maxsplit=1) if logger: - logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) + # logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) + eprint('.') # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID' if words[0] == Reader.STICKER_TAG: @@ -35,7 +41,8 @@ class Speaker(object): ModeChance = "MODE_CHANCE" def __init__(self, username, archivist, logger, nicknames=[], mute_time=60, - reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed + reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed, + memory=20 ): self.names = nicknames self.mute_time = mute_time @@ -51,8 +58,8 @@ class Speaker(object): self.repeat = repeat self.filter_cids = archivist.filter_cids self.bypass = archivist.bypass - self.current_reader = None self.time_counter = None + self.memory = MemoryList(memory) def announce(self, bot, announcement, check=(lambda _: True)): # Sends an announcement to all chats that pass the check @@ -74,25 +81,31 @@ class Speaker(object): return reader.check_type("group") self.announce(bot, wake, group_check) + def get_reader(self, cid): + return self.memory.get_next(lambda r: r.cid() == cid) + def load_reader(self, chat): cid = str(chat.id) - if self.current_reader is not None and cid == self.current_reader.cid(): - return - - if self.current_reader is not None: - self.current_reader.commit_memory() - self.save() + reader = self.get_reader(cid) + if reader is not None: + return reader reader = self.archivist.get_reader(cid) if not reader: reader = Reader.FromChat(chat, self.archivist.max_period, self.logger) - self.current_reader = reader - def get_reader(self, cid): - if self.current_reader is None or cid != self.current_reader.cid(): + old_reader = self.memory.append(reader) + if old_reader is not None: + old_reader.commit_memory() + self.store(old_reader) + + return reader + + def access_reader(self, cid): + reader = self.get_reader(cid) + if reader is None: return self.archivist.get_reader(cid) - - return self.current_reader + return reader def mentioned(self, text): if self.username in text: @@ -102,11 +115,14 @@ class Speaker(object): return True return False - def should_reply(self, message): + def is_mute(self): current_time = int(time.perf_counter()) - if self.time_counter is not None and (current_time - self.time_counter) < self.mute_time: + return self.time_counter is not None and (current_time - self.time_counter) < self.mute_time + + def should_reply(self, message, reader): + if self.is_mute(): return False - if not self.bypass and self.current_reader.is_restricted(): + if not self.bypass and reader.is_restricted(): user = message.chat.get_member(message.from_user.id) if not self.user_is_admin(user): # update.message.reply_text("You do not have permissions to do that.") @@ -116,40 +132,38 @@ class Speaker(object): return (((replied is not None) and (replied.from_user.name == self.username)) or (self.mentioned(text))) - def save(self): - if self.current_reader is None: + def store(self, reader): + if reader is None: raise ValueError("Tried to store a None Reader.") else: - self.archivist.store(*self.current_reader.archive()) + self.archivist.store(*reader.archive()) def read(self, update, context): if update.message is None: return chat = update.message.chat - self.load_reader(chat) - self.current_reader.read(update.message) + reader = self.load_reader(chat) + reader.read(update.message) - if self.should_reply(update.message) and self.current_reader.is_answering(): - self.say(context.bot, replying=update.message.message_id) + if self.should_reply(update.message, reader) and reader.is_answering(): + self.say(context.bot, reader, replying=update.message.message_id) return title = get_chat_title(update.message.chat) - if title != self.current_reader.title(): - self.current_reader.set_title(title) + if title != reader.title(): + reader.set_title(title) - self.current_reader.countdown -= 1 - if self.current_reader.countdown < 0: - self.current_reader.reset_countdown() - rid = self.current_reader.random_memory() if random.random() <= self.reply else None - self.say(context.bot, replying=rid) - elif (self.current_reader.period() - self.current_reader.countdown) % self.archivist.save_count == 0: - self.save() + reader.countdown -= 1 + if reader.countdown < 0: + reader.reset_countdown() + rid = reader.random_memory() if random.random() <= self.reply else None + self.say(context.bot, reader, replying=rid) def speak(self, update, context): chat = (update.message.chat) - self.load_reader(chat) + reader = self.load_reader(chat) - if not self.bypass and self.current_reader.is_restricted(): + if not self.bypass and reader.is_restricted(): user = update.message.chat.get_member(update.message.from_user.id) if not self.user_is_admin(user): # update.message.reply_text("You do not have permissions to do that.") @@ -160,31 +174,33 @@ class Speaker(object): rid = replied.message_id if replied else mid words = update.message.text.split() if len(words) > 1: - self.current_reader.read(' '.join(words[1:])) - self.say(context.bot, replying=rid) + reader.read(' '.join(words[1:])) + self.say(context.bot, reader, replying=rid) def user_is_admin(self, member): - self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) + # self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) # self.logger.info("Bot Creator ID is {}".format(str(self.archivist.admin))) return ((member.status == 'creator') or (member.status == 'administrator') or (member.user.id == self.archivist.admin)) - def speech(self): - return self.current_reader.generate_message(self.archivist.max_len) + def speech(self, reader): + return reader.generate_message(self.archivist.max_len) - def say(self, bot, replying=None, **kwargs): - cid = self.current_reader.cid() + def say(self, bot, reader, replying=None, **kwargs): + cid = reader.cid() if self.filter_cids is not None and cid not in self.filter_cids: return + if self.is_mute(): + return try: - send(bot, cid, self.speech(), replying, logger=self.logger, **kwargs) + send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs) if self.bypass: max_period = self.archivist.max_period - self.current_reader.set_period(random.randint(max_period // 4, max_period)) + reader.set_period(random.randint(max_period // 4, max_period)) if random.random() <= self.repeat: - send(bot, cid, self.speech(), logger=self.logger, **kwargs) + send(bot, cid, self.speech(reader), logger=self.logger, **kwargs) except TimedOut as e: self.logger.error("Telegram timed out.") self.logger.exception(e) @@ -201,7 +217,7 @@ class Speaker(object): def get_count(self, update, context): cid = str(update.message.chat.id) - reader = self.get_reader(cid) + reader = self.access_reader(cid) num = str(reader.count()) if reader else "no" update.message.reply_text("I remember {} messages.".format(num)) @@ -213,7 +229,7 @@ class Speaker(object): def period(self, update, context): chat = update.message.chat - reader = self.get_reader(str(chat.id)) + reader = self.access_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: @@ -235,7 +251,7 @@ class Speaker(object): def answer(self, update, context): chat = update.message.chat - reader = self.get_reader(str(chat.id)) + reader = self.access_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: @@ -261,7 +277,7 @@ class Speaker(object): return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - reader = self.get_reader(str(chat.id)) + reader = self.access_reader(str(chat.id)) if reader.is_restricted(): if not self.user_is_admin(user): @@ -278,7 +294,7 @@ class Speaker(object): return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - reader = self.get_reader(str(chat.id)) + reader = self.access_reader(str(chat.id)) if reader.is_restricted(): if not self.user_is_admin(user): @@ -294,7 +310,7 @@ class Speaker(object): usr = msg.from_user cht = msg.chat chtname = cht.title if cht.title else cht.first_name - rdr = self.get_reader(str(cht.id)) + rdr = self.access_reader(str(cht.id)) answer = ("You're **{name}**, with username `{username}`, and " "id `{uid}`.\nYou're messaging in the chat named __{cname}__," @@ -308,7 +324,7 @@ class Speaker(object): def where(self, update, context): msg = update.message chat = msg.chat - reader = self.get_reader(str(chat.id)) + reader = self.access_reader(str(chat.id)) if reader.is_restricted() and reader.is_silenced(): permissions = "restricted and silenced" elif reader.is_restricted(): diff --git a/velasco.py b/velasco.py index 5170bcf..b9d1953 100644 --- a/velasco.py +++ b/velasco.py @@ -89,7 +89,7 @@ def main(): filter_cids.append(str(args.admin_id)) archivist = Archivist(logger, - chatdir="chatlogs/", + chatdir="./chatlogs/", chatext=".vls", admin=args.admin_id, filter_cids=filter_cids, @@ -130,7 +130,7 @@ def main(): speakerbot.wake(updater.bot, wake_msg) # Start the Bot - updater.start_polling(clean=True) + updater.start_polling() # Run the bot until you press Ctrl-C or the process receives SIGINT, # SIGTERM or SIGABRT. This should be used most of the time, since From 0974ec52e74b42d5b5cd429f6908d4f16248a378 Mon Sep 17 00:00:00 2001 From: vylion Date: Tue, 27 Oct 2020 17:10:41 +0100 Subject: [PATCH 14/22] Moved constructor args from Archivist to Speaker when it made sense Added a periodic save time for chats in memory Added flags for mute time, save time, chats folder --- archivist.py | 17 +++------- memorylist.py | 3 ++ speaker.py | 92 +++++++++++++++++++++++++++++++++------------------ velasco.py | 37 +++++++++++++++------ 4 files changed, 94 insertions(+), 55 deletions(-) diff --git a/archivist.py b/archivist.py index 80cd873..29d72db 100644 --- a/archivist.py +++ b/archivist.py @@ -7,8 +7,8 @@ from generator import Generator class Archivist(object): def __init__(self, logger, chatdir=None, chatext=None, admin=0, - period_inc=5, save_count=15, max_period=100000, max_len=50, - read_only=False, filter_cids=None, bypass=False + period_inc=5, save_count=15, max_period=100000, + read_only=False ): if chatdir is None or len(chatdir) == 0: chatdir = "./" @@ -17,20 +17,16 @@ class Archivist(object): self.logger = logger self.chatdir = chatdir self.chatext = chatext - self.admin = admin self.period_inc = period_inc self.save_count = save_count self.max_period = max_period - self.max_len = max_len self.read_only = read_only - self.filter_cids = filter_cids - self.bypass = bypass def chat_folder(self, *formatting, **key_format): - return (self.chatdir + "chat_{tag}").format(*formatting, **key_format) + return (self.chatdir + "/chat_{tag}").format(*formatting, **key_format) def chat_file(self, *formatting, **key_format): - return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) + return (self.chatdir + "/chat_{tag}/{file}{ext}").format(*formatting, **key_format) def store(self, tag, data, gen): chat_folder = self.chat_folder(tag=tag) @@ -121,10 +117,7 @@ class Archivist(object): reader = self.get_reader(cid) # self.logger.info("Chat {} contents:\n{}".format(cid, reader.card.dumps())) self.logger.info("Successfully passed through {} ({}) chat.\n".format(cid, reader.title())) - if self.bypass: # I forgot what I made this for - reader.set_period(random.randint(self.max_period // 2, self.max_period)) - self.store(*reader.archive()) - elif reader.period() > self.max_period: + if reader.period() > self.max_period: reader.set_period(self.max_period) self.store(*reader.archive()) yield reader diff --git a/memorylist.py b/memorylist.py index cd1afa4..f62c05f 100644 --- a/memorylist.py +++ b/memorylist.py @@ -64,3 +64,6 @@ class MemoryList(MutableSequence): self._list.remove(val) self._list.append(val) return val + + def remove(self, val): + self._list.remove(val) diff --git a/speaker.py b/speaker.py index 5810e7c..dc3b20f 100644 --- a/speaker.py +++ b/speaker.py @@ -2,13 +2,14 @@ import random import time +from sys import stderr from memorylist import MemoryList from reader import Reader, get_chat_title -from telegram.error import TimedOut, NetworkError +from telegram.error import NetworkError def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) + print(*args, end=' ', file=stderr, **kwargs) def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): @@ -18,8 +19,8 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): if text.startswith(Reader.TAG_PREFIX): words = text.split(maxsplit=1) if logger: - # logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) - eprint('.') + logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) + # eprint('[]') # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID' if words[0] == Reader.STICKER_TAG: @@ -33,37 +34,48 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): if logger: mtype = "reply" if replying else "message" logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) + # eprint('.') return bot.send_message(cid, text, **kwargs) class Speaker(object): ModeFixed = "FIXED_MODE" - ModeChance = "MODE_CHANCE" + ModeChance = "CHANCE_MODE" - def __init__(self, username, archivist, logger, nicknames=[], mute_time=60, + def __init__(self, username, archivist, logger, admin=0, nicknames=[], reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed, - memory=20 + memory=20, mute_time=60, save_time=3600, bypass=False, + filter_cids=[], max_len=50 ): self.names = nicknames self.mute_time = mute_time + self.mute_timer = None self.username = username - self.archivist = archivist + + self.max_period = archivist.max_period + self.get_reader_file = archivist.get_reader + self.store_file = archivist.store + self.readers_pass = archivist.readers_pass + logger.info("----") logger.info("Finished loading.") logger.info("Loaded {} chats.".format(archivist.chat_count())) logger.info("----") + self.wakeup = wakeup self.logger = logger self.reply = reply self.repeat = repeat - self.filter_cids = archivist.filter_cids - self.bypass = archivist.bypass - self.time_counter = None + self.filter_cids = filter_cids self.memory = MemoryList(memory) + self.memory_timer = time.perf_counter() + self.admin = admin + self.bypass = bypass + self.max_len = max_len def announce(self, bot, announcement, check=(lambda _: True)): # Sends an announcement to all chats that pass the check - for reader in self.archivist.readers_pass(): + for reader in self.readers_pass(): try: if check(reader): send(bot, reader.cid(), announcement) @@ -74,7 +86,7 @@ class Speaker(object): def wake(self, bot, wake): # If wakeup flag is set, sends a wake-up message as announcement to all chats that # are groups. Also, always sends a wakeup message to the 'bot admin' - send(bot, self.archivist.admin, wake) + send(bot, self.admin, wake) if self.wakeup: def group_check(reader): @@ -90,9 +102,9 @@ class Speaker(object): if reader is not None: return reader - reader = self.archivist.get_reader(cid) + reader = self.get_reader_file(cid) if not reader: - reader = Reader.FromChat(chat, self.archivist.max_period, self.logger) + reader = Reader.FromChat(chat, self.max_period, self.logger) old_reader = self.memory.append(reader) if old_reader is not None: @@ -104,7 +116,7 @@ class Speaker(object): def access_reader(self, cid): reader = self.get_reader(cid) if reader is None: - return self.archivist.get_reader(cid) + return self.get_reader_file(cid) return reader def mentioned(self, text): @@ -117,7 +129,7 @@ class Speaker(object): def is_mute(self): current_time = int(time.perf_counter()) - return self.time_counter is not None and (current_time - self.time_counter) < self.mute_time + return self.mute_timer is not None and (current_time - self.mute_timer) < self.mute_time def should_reply(self, message, reader): if self.is_mute(): @@ -136,9 +148,25 @@ class Speaker(object): if reader is None: raise ValueError("Tried to store a None Reader.") else: - self.archivist.store(*reader.archive()) + self.store_file(*reader.archive()) + + def should_save(self): + current_time = int(time.perf_counter()) + elapsed = (current_time - self.memory_timer) + self.logger.debug("Save check: {}".format(elapsed)) + return elapsed < self.save_time + + def save(self): + if self.should_save(): + self.logger.info("Saving chats in memory...") + for reader in self.memory: + self.store(reader) + self.memory_timer = time.perf_counter() + self.logger.info("Chats saved.") def read(self, update, context): + self.save() + if update.message is None: return chat = update.message.chat @@ -178,18 +206,19 @@ class Speaker(object): self.say(context.bot, reader, replying=rid) def user_is_admin(self, member): - # self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) - # self.logger.info("Bot Creator ID is {}".format(str(self.archivist.admin))) + self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) + # eprint('!') + # self.logger.info("Bot Creator ID is {}".format(str(self.admin))) return ((member.status == 'creator') or (member.status == 'administrator') - or (member.user.id == self.archivist.admin)) + or (member.user.id == self.admin)) def speech(self, reader): - return reader.generate_message(self.archivist.max_len) + return reader.generate_message(self.max_len) def say(self, bot, reader, replying=None, **kwargs): cid = reader.cid() - if self.filter_cids is not None and cid not in self.filter_cids: + if cid not in self.filter_cids: return if self.is_mute(): return @@ -197,17 +226,14 @@ class Speaker(object): try: send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs) if self.bypass: - max_period = self.archivist.max_period + max_period = self.max_period reader.set_period(random.randint(max_period // 4, max_period)) if random.random() <= self.repeat: send(bot, cid, self.speech(reader), logger=self.logger, **kwargs) - except TimedOut as e: - self.logger.error("Telegram timed out.") - self.logger.exception(e) except NetworkError as e: if '429' in e.message: self.logger.error("Error: TooManyRequests. Going mute for {} seconds.".format(self.mute_time)) - self.time_counter = int(time.perf_counter()) + self.mute_timer = int(time.perf_counter()) else: self.logger.error("Sending a message caused network error:") self.logger.exception(e) @@ -223,7 +249,7 @@ class Speaker(object): update.message.reply_text("I remember {} messages.".format(num)) def get_chats(self, update, context): - lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.archivist.readers_pass] + lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.readers_pass()] chat_list = "\n".join(lines) update.message.reply_text("I have the following chats:\n\n" + chat_list) @@ -245,7 +271,7 @@ class Speaker(object): period = int(words[1]) period = reader.set_period(period) update.message.reply_text("Period of speaking set to {}.".format(period)) - self.archivist.store(*reader.archive()) + self.store_file(*reader.archive()) except Exception: update.message.reply_text("Format was confusing; period unchanged from {}.".format(reader.period())) @@ -267,7 +293,7 @@ class Speaker(object): answer = float(words[1]) answer = reader.set_answer(answer) update.message.reply_text("Answer probability set to {}.".format(answer)) - self.archivist.store(*reader.archive()) + self.store_file(*reader.archive()) except Exception: update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(reader.answer())) @@ -286,7 +312,7 @@ class Speaker(object): reader.toggle_restrict() allowed = "let only admins" if reader.is_restricted() else "let everyone" update.message.reply_text("I will {} configure me now.".format(allowed)) - self.archivist.store(*reader.archive()) + self.store_file(*reader.archive()) def silence(self, update, context): if "group" not in update.message.chat.type: @@ -303,7 +329,7 @@ class Speaker(object): reader.toggle_silence() allowed = "avoid mentioning" if reader.is_silenced() else "mention" update.message.reply_text("I will {} people now.".format(allowed)) - self.archivist.store(*reader.archive()) + self.store_file(*reader.archive()) def who(self, update, context): msg = update.message diff --git a/velasco.py b/velasco.py index b9d1953..48341d2 100644 --- a/velasco.py +++ b/velasco.py @@ -73,11 +73,22 @@ def stop(update, context): def main(): global speakerbot parser = argparse.ArgumentParser(description='A Telegram markov bot.') - parser.add_argument('token', metavar='TOKEN', help='The Bot Token to work with the Telegram Bot API') - parser.add_argument('admin_id', metavar='ADMIN_ID', type=int, help='The ID of the Telegram user that manages this bot') - parser.add_argument('-w', '--wakeup', action='store_true', help='Flag that makes the bot send a first message to all chats during wake up.') - parser.add_argument('-f', '--filter', nargs='*', metavar='FILTER_CID', help='Zero or more chat IDs to add in a filter whitelist (default is empty, all chats allowed)') - parser.add_argument('-n', '--nicknames', nargs='*', metavar='NICKNAME', help='Any possible nicknames that the bot could answer to.') + parser.add_argument('token', metavar='TOKEN', + help='The Bot Token to work with the Telegram Bot API') + parser.add_argument('admin_id', metavar='ADMIN_ID', type=int, default=0, + help='The ID of the Telegram user that manages this bot') + parser.add_argument('-w', '--wakeup', action='store_true', + help='Flag that makes the bot send a first message to all chats during wake up.') + parser.add_argument('-f', '--filter', nargs='*', default=[], metavar='cid', + help='Zero or more chat IDs to add in a filter whitelist (default is empty, all chats allowed)') + parser.add_argument('-n', '--nicknames', nargs='*', default=[], metavar='name', + help='Any possible nicknames that the bot could answer to.') + parser.add_argument('-d', '--directory', metavar='CHATLOG_DIR', default='./chatlogs', + help='The chat logs directory path (default: "./chatlogs").') + parser.add_argument('-m', '--mute_time', metavar='T', type=int, default=60, + help='The time (in s) for the muting period when Telegram limits the bot. (default: 60).') + parser.add_argument('-s', '--save_time', metavar='T', type=int, default=3600, + help='The time (in s) for periodic saves (default: 3600).') args = parser.parse_args() @@ -89,15 +100,21 @@ def main(): filter_cids.append(str(args.admin_id)) archivist = Archivist(logger, - chatdir="./chatlogs/", + chatdir=args.directory, chatext=".vls", - admin=args.admin_id, - filter_cids=filter_cids, read_only=False ) username = updater.bot.get_me().username - speakerbot = Speaker("@" + username, archivist, logger, nicknames=args.nicknames, wakeup=args.wakeup) + speakerbot = Speaker("@" + username, + archivist, + logger, + admin=args.admin_id, + filter_cids=filter_cids, + nicknames=args.nicknames, + wakeup=args.wakeup, + mute_time=args.mute_time, + save_time=args.save_time) # Get the dispatcher to register handlers dp = updater.dispatcher @@ -109,7 +126,7 @@ def main(): dp.add_handler(CommandHandler("help", static_reply(help_msg))) dp.add_handler(CommandHandler("count", speakerbot.get_count)) dp.add_handler(CommandHandler("period", speakerbot.period)) - dp.add_handler(CommandHandler("list", speakerbot.get_chats, filters=Filters.chat(chat_id=archivist.admin))) + dp.add_handler(CommandHandler("list", speakerbot.get_chats, filters=Filters.chat(chat_id=speakerbot.admin))) # dp.add_handler(CommandHandler("user", get_name, Filters.chat(chat_id=archivist.admin))) # dp.add_handler(CommandHandler("id", get_id)) dp.add_handler(CommandHandler("stop", stop)) From d7ad39dfe0b12f97611ed3168a0ec8e254ca3385 Mon Sep 17 00:00:00 2001 From: vylion Date: Tue, 27 Oct 2020 17:24:30 +0100 Subject: [PATCH 15/22] Fixed periodic chat save mechanism --- speaker.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/speaker.py b/speaker.py index dc3b20f..e96257f 100644 --- a/speaker.py +++ b/speaker.py @@ -68,7 +68,8 @@ class Speaker(object): self.repeat = repeat self.filter_cids = filter_cids self.memory = MemoryList(memory) - self.memory_timer = time.perf_counter() + self.save_time = save_time + self.memory_timer = int(time.perf_counter()) self.admin = admin self.bypass = bypass self.max_len = max_len @@ -154,7 +155,7 @@ class Speaker(object): current_time = int(time.perf_counter()) elapsed = (current_time - self.memory_timer) self.logger.debug("Save check: {}".format(elapsed)) - return elapsed < self.save_time + return elapsed >= self.save_time def save(self): if self.should_save(): From 457868259c4496a87be137b9e2c3c605c6054147 Mon Sep 17 00:00:00 2001 From: vylion Date: Tue, 27 Oct 2020 19:29:38 +0100 Subject: [PATCH 16/22] Testing json streaming directly into/from files instead of using middleman string variable --- archivist.py | 16 +++++++--------- generator.py | 11 +++++++++++ reader.py | 2 +- speaker.py | 10 ++++------ velasco.py | 3 +++ 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/archivist.py b/archivist.py index 29d72db..69d35fb 100644 --- a/archivist.py +++ b/archivist.py @@ -1,5 +1,5 @@ -import os, random +import os from reader import Reader from generator import Generator @@ -28,7 +28,7 @@ class Archivist(object): def chat_file(self, *formatting, **key_format): return (self.chatdir + "/chat_{tag}/{file}{ext}").format(*formatting, **key_format) - def store(self, tag, data, gen): + def store(self, tag, data, vocab_dumper): chat_folder = self.chat_folder(tag=tag) chat_card = self.chat_file(tag=tag, file="card", ext=".txt") @@ -45,17 +45,17 @@ class Archivist(object): file.write(data) file.close() - if gen is not None: + if vocab_dumper is not None: chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) file = open(chat_record, 'w', encoding="utf-16") - file.write(gen) + vocab_dumper(file) file.close() def load_vocab(self, tag): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r', encoding="utf-16") - record = file.read() + record = Generator.load(file) file.close() return record except Exception as e: @@ -89,10 +89,8 @@ class Archivist(object): def get_reader(self, tag): reader = self.load_reader(tag) if reader: - vocab_dump = self.load_vocab(tag) - if vocab_dump: - vocab = Generator.loads(vocab_dump) - else: + vocab = self.load_vocab(tag) + if not vocab: vocab = Generator() return Reader.FromCard(reader, vocab, self.max_period, self.logger) else: diff --git a/generator.py b/generator.py index bd1bb50..590eead 100644 --- a/generator.py +++ b/generator.py @@ -55,6 +55,9 @@ class Generator(object): MODE_LIST = "MODE_LIST" # This is to mark when we want to create a Generator object from a given list of words + MODE_DICT = "MODE_DICT" + # This is to mark when we want to create a Generator object from a given dictionary + MODE_CHAT_DATA = "MODE_CHAT_DATA" # This is to mark when we want to create a Generator object from Chat data (WIP) @@ -69,6 +72,8 @@ class Generator(object): elif mode == Generator.MODE_LIST: self.cache = {} self.load_list(load) + elif mode == Generator.MODE_DICT: + self.cache = load else: self.cache = {} # The cache is where we store our words @@ -82,6 +87,9 @@ class Generator(object): # Dumps the cache dictionary into a JSON-formatted string return json.dumps(self.cache, ensure_ascii=False) + def dump(self, f): + json.dump(self.cache, f, ensure_ascii=False, indent='') + def loads(dump): # Loads the cache dictionary from a JSON-formatted string if len(dump) == 0: @@ -90,6 +98,9 @@ class Generator(object): # otherwise return Generator(load=dump, mode=Generator.MODE_JSON) + def load(self, f): + return Generator(load=json.load(f), mode=Generator.MODE_DICT) + def add(self, text): # This takes a string and stores it in the cache, preceding it # with the HEAD that marks the beginning of a new message and diff --git a/reader.py b/reader.py index 4189fa9..e4ce04b 100644 --- a/reader.py +++ b/reader.py @@ -90,7 +90,7 @@ class Reader(object): # Returns a nice lice little tuple package for the archivist to save to file. # Also commits to long term memory any pending short term memories self.commit_memory() - return (self.meta.id, self.meta.dumps(), self.vocab.dumps()) + return (self.meta.id, self.meta.dumps(), self.vocab.dump) def check_type(self, t): # Checks type. Returns "True" for "group" even if it's supergroup diff --git a/speaker.py b/speaker.py index e96257f..4f07748 100644 --- a/speaker.py +++ b/speaker.py @@ -232,12 +232,10 @@ class Speaker(object): if random.random() <= self.repeat: send(bot, cid, self.speech(reader), logger=self.logger, **kwargs) except NetworkError as e: - if '429' in e.message: - self.logger.error("Error: TooManyRequests. Going mute for {} seconds.".format(self.mute_time)) - self.mute_timer = int(time.perf_counter()) - else: - self.logger.error("Sending a message caused network error:") - self.logger.exception(e) + self.logger.error("Sending a message caused network error:") + self.logger.exception(e) + self.logger.error("Going mute for {} seconds.".format(self.mute_time)) + self.mute_timer = int(time.perf_counter()) except Exception as e: self.logger.error("Sending a message caused exception:") self.logger.exception(e) diff --git a/velasco.py b/velasco.py index 48341d2..a5b8380 100644 --- a/velasco.py +++ b/velasco.py @@ -85,6 +85,8 @@ def main(): help='Any possible nicknames that the bot could answer to.') parser.add_argument('-d', '--directory', metavar='CHATLOG_DIR', default='./chatlogs', help='The chat logs directory path (default: "./chatlogs").') + parser.add_argument('-c', '--capacity', metavar='C', type=int, default=20, + help='The memory capacity for the last C updated chats. (default: 20).') parser.add_argument('-m', '--mute_time', metavar='T', type=int, default=60, help='The time (in s) for the muting period when Telegram limits the bot. (default: 60).') parser.add_argument('-s', '--save_time', metavar='T', type=int, default=3600, @@ -113,6 +115,7 @@ def main(): filter_cids=filter_cids, nicknames=args.nicknames, wakeup=args.wakeup, + memory=args.capacity, mute_time=args.mute_time, save_time=args.save_time) From c91ceda24b668f08bf9636930a627ef0a2286942 Mon Sep 17 00:00:00 2001 From: vylion Date: Tue, 27 Oct 2020 19:31:36 +0100 Subject: [PATCH 17/22] woops --- generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generator.py b/generator.py index 590eead..a004a70 100644 --- a/generator.py +++ b/generator.py @@ -98,7 +98,7 @@ class Generator(object): # otherwise return Generator(load=dump, mode=Generator.MODE_JSON) - def load(self, f): + def load(f): return Generator(load=json.load(f), mode=Generator.MODE_DICT) def add(self, text): From a13bdd51c7e95028134c71fce5df901b4433fd90 Mon Sep 17 00:00:00 2001 From: vylion Date: Tue, 27 Oct 2020 19:49:46 +0100 Subject: [PATCH 18/22] Undoing CID whitelist hypercorrection (and changing name to a more self-explanatory one) --- speaker.py | 6 +++--- velasco.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/speaker.py b/speaker.py index 4f07748..3d44df3 100644 --- a/speaker.py +++ b/speaker.py @@ -45,7 +45,7 @@ class Speaker(object): def __init__(self, username, archivist, logger, admin=0, nicknames=[], reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed, memory=20, mute_time=60, save_time=3600, bypass=False, - filter_cids=[], max_len=50 + cid_whitelist=None, max_len=50 ): self.names = nicknames self.mute_time = mute_time @@ -66,7 +66,7 @@ class Speaker(object): self.logger = logger self.reply = reply self.repeat = repeat - self.filter_cids = filter_cids + self.cid_whitelist = cid_whitelist self.memory = MemoryList(memory) self.save_time = save_time self.memory_timer = int(time.perf_counter()) @@ -219,7 +219,7 @@ class Speaker(object): def say(self, bot, reader, replying=None, **kwargs): cid = reader.cid() - if cid not in self.filter_cids: + if self.cid_whitelist is not None and cid not in self.cid_whitelist: return if self.is_mute(): return diff --git a/velasco.py b/velasco.py index a5b8380..8670544 100644 --- a/velasco.py +++ b/velasco.py @@ -79,7 +79,7 @@ def main(): help='The ID of the Telegram user that manages this bot') parser.add_argument('-w', '--wakeup', action='store_true', help='Flag that makes the bot send a first message to all chats during wake up.') - parser.add_argument('-f', '--filter', nargs='*', default=[], metavar='cid', + parser.add_argument('-f', '--filter', nargs='*', default=None, metavar='cid', help='Zero or more chat IDs to add in a filter whitelist (default is empty, all chats allowed)') parser.add_argument('-n', '--nicknames', nargs='*', default=[], metavar='name', help='Any possible nicknames that the bot could answer to.') @@ -112,7 +112,7 @@ def main(): archivist, logger, admin=args.admin_id, - filter_cids=filter_cids, + cid_whitelist=filter_cids, nicknames=args.nicknames, wakeup=args.wakeup, memory=args.capacity, From 09cf241f181dc9092824765f4ffcf27acba1cd5a Mon Sep 17 00:00:00 2001 From: vylion Date: Thu, 29 Oct 2020 08:58:21 +0100 Subject: [PATCH 19/22] Added documenting comments accross all the files Added minimum and maximum period values as argument flags --- archivist.py | 38 +++++++++++------ generator.py | 76 ++++++++++++++++----------------- memorylist.py | 38 ++++++++--------- metadata.py | 67 ++++++++++++++++++++++------- reader.py | 76 ++++++++++++++++++++++++--------- speaker.py | 114 ++++++++++++++++++++++++++++++++++++++++---------- velasco.py | 8 +++- 7 files changed, 288 insertions(+), 129 deletions(-) diff --git a/archivist.py b/archivist.py index 69d35fb..051a413 100644 --- a/archivist.py +++ b/archivist.py @@ -7,8 +7,8 @@ from generator import Generator class Archivist(object): def __init__(self, logger, chatdir=None, chatext=None, admin=0, - period_inc=5, save_count=15, max_period=100000, - read_only=False + period_inc=5, save_count=15, min_period=1, + max_period=100000, read_only=False ): if chatdir is None or len(chatdir) == 0: chatdir = "./" @@ -19,16 +19,20 @@ class Archivist(object): self.chatext = chatext self.period_inc = period_inc self.save_count = save_count + self.min_period = min_period self.max_period = max_period self.read_only = read_only + # Formats and returns a chat folder path def chat_folder(self, *formatting, **key_format): return (self.chatdir + "/chat_{tag}").format(*formatting, **key_format) + # Formats and returns a chat file path def chat_file(self, *formatting, **key_format): return (self.chatdir + "/chat_{tag}/{file}{ext}").format(*formatting, **key_format) - def store(self, tag, data, vocab_dumper): + # Stores a Reader/Generator file pair + def store(self, tag, data, vocab): chat_folder = self.chat_folder(tag=tag) chat_card = self.chat_file(tag=tag, file="card", ext=".txt") @@ -45,17 +49,18 @@ class Archivist(object): file.write(data) file.close() - if vocab_dumper is not None: + if vocab is not None: chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) file = open(chat_record, 'w', encoding="utf-16") - vocab_dumper(file) + file.write(vocab) file.close() + # Loads a Generator's vocabulary file dump def load_vocab(self, tag): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r', encoding="utf-16") - record = Generator.load(file) + record = file.read() file.close() return record except Exception as e: @@ -63,6 +68,7 @@ class Archivist(object): self.logger.exception(e) return None + # Loads a Generator's vocabulary file dump in the old UTF-8 encoding def load_vocab_old(self, tag): filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: @@ -75,7 +81,8 @@ class Archivist(object): self.logger.exception(e) return None - def load_reader(self, tag): + # Loads a Metadata card file dump + def load_card(self, tag): filepath = self.chat_file(tag=tag, file="card", ext=".txt") try: reader_file = open(filepath, 'r') @@ -86,16 +93,21 @@ class Archivist(object): self.logger.error("Metadata file {} not found.".format(filepath)) return None + # Returns a Reader for a given ID with an already working vocabulary - be it + # new or loaded from file def get_reader(self, tag): - reader = self.load_reader(tag) - if reader: - vocab = self.load_vocab(tag) - if not vocab: + card = self.load_card(tag) + if card: + vocab_dump = self.load_vocab(tag) + if vocab_dump: + vocab = Generator.loads(vocab_dump) + else: vocab = Generator() - return Reader.FromCard(reader, vocab, self.max_period, self.logger) + return Reader.FromCard(card, vocab, self.max_period, self.logger) else: return None + # Count the stored chats def chat_count(self): count = 0 directory = os.fsencode(self.chatdir) @@ -105,6 +117,7 @@ class Archivist(object): count += 1 return count + # Crawl through all the stored Readers def readers_pass(self): directory = os.fsencode(self.chatdir) for subdir in os.scandir(directory): @@ -124,6 +137,7 @@ class Archivist(object): self.logger.exception(e) raise e + # Load and immediately store every Reader def update(self): for reader in self.readers_pass(): if reader.vocab is None: diff --git a/generator.py b/generator.py index a004a70..52b2fef 100644 --- a/generator.py +++ b/generator.py @@ -4,12 +4,12 @@ import random import json +# This splits strings into lists of words delimited by space. +# Other whitespaces are appended space characters so they are included +# as their own Markov chain element, so as not to pollude with +# "different" words that would only differ in having a whitespace +# attached or not def rewrite(text): - # This splits strings into lists of words delimited by space. - # Other whitespaces are appended space characters so they are included - # as their own Markov chain element, so as not to pollude with - # "different" words that would only differ in having a whitespace - # attached or not words = text.replace('\n', '\n ').split(' ') i = 0 while i < len(words): @@ -23,24 +23,24 @@ def rewrite(text): return words +# This gives a dictionary key from 2 words, ignoring case def getkey(w1, w2): - # This gives a dictionary key from 2 words, ignoring case key = (w1.strip().casefold(), w2.strip().casefold()) return str(key) +# This turns a dictionary key back into 2 separate words def getwords(key): - # This turns a dictionary key back into 2 separate words words = key.strip('()').split(', ') for i in range(len(words)): words[i].strip('\'') return words +# Generates triplets of words from the given data string. So if our string +# were "What a lovely day", we'd generate (What, a, lovely) and then +# (a, lovely, day). def triplets(wordlist): - # Generates triplets of words from the given data string. So if our string - # were "What a lovely day", we'd generate (What, a, lovely) and then - # (a, lovely, day). if len(wordlist) < 3: return @@ -49,24 +49,25 @@ def triplets(wordlist): class Generator(object): + # Marks when we want to create a Generator object from a given JSON MODE_JSON = "MODE_JSON" - # This is to mark when we want to create a Generator object from a given JSON + # Marks when we want to create a Generator object from a given list of words MODE_LIST = "MODE_LIST" - # This is to mark when we want to create a Generator object from a given list of words + # Marks when we want to create a Generator object from a given dictionary MODE_DICT = "MODE_DICT" - # This is to mark when we want to create a Generator object from a given dictionary - MODE_CHAT_DATA = "MODE_CHAT_DATA" - # This is to mark when we want to create a Generator object from Chat data (WIP) + # Marks when we want to create a Generator object from a whole Chat history (WIP) + MODE_HIST = "MODE_HIST" + # Marks the beginning of a message HEAD = "\n^MESSAGE_SEPARATOR^" + # Marks the end of a message TAIL = " ^MESSAGE_SEPARATOR^" def __init__(self, load=None, mode=None): if mode is not None: - # We ain't creating a new Generator from scratch if mode == Generator.MODE_JSON: self.cache = json.loads(load) elif mode == Generator.MODE_LIST: @@ -74,45 +75,44 @@ class Generator(object): self.load_list(load) elif mode == Generator.MODE_DICT: self.cache = load + # TODO: Chat History mode else: self.cache = {} - # The cache is where we store our words + # Loads a text divided into a list of lines def load_list(self, many): - # Takes a list of strings and adds them to the cache one by one for one in many: self.add(one) + # Dumps the cache dictionary into a JSON-formatted string def dumps(self): - # Dumps the cache dictionary into a JSON-formatted string return json.dumps(self.cache, ensure_ascii=False) + # Dumps the cache dictionary into a file, formatted as JSON def dump(self, f): - json.dump(self.cache, f, ensure_ascii=False, indent='') + json.dump(self.cache, f, ensure_ascii=False) + # Loads the cache dictionary from a JSON-formatted string def loads(dump): - # Loads the cache dictionary from a JSON-formatted string if len(dump) == 0: # faulty dump gives default Generator return Generator() # otherwise return Generator(load=dump, mode=Generator.MODE_JSON) + # Loads the cache dictionary from a file, formatted as JSON def load(f): return Generator(load=json.load(f), mode=Generator.MODE_DICT) def add(self, text): - # This takes a string and stores it in the cache, preceding it - # with the HEAD that marks the beginning of a new message and - # following it with the TAIL that marks the end words = [Generator.HEAD] text = rewrite(text + Generator.TAIL) words.extend(text) self.database(words) + # This takes a list of words and stores it in the cache, adding + # a special entry for the first word (the HEAD marker) def database(self, words): - # This takes a list of words and stores it in the cache, adding - # a special entry for the first word (the HEAD marker) for w1, w2, w3 in triplets(words): if w1 == Generator.HEAD: if w1 in self.cache: @@ -128,50 +128,50 @@ class Generator(object): # the new end of chain self.cache[key] = [w3] + # This generates the Markov text/word chain + # silence=True disables Telegram user mentions def generate(self, size=50, silence=False): - # This generates the Markov text/word chain - # silence tells if mentions should be silenced if len(self.cache) == 0: # If there is nothing in the cache we cannot generate anything return "" + # Start with a message HEAD and a random message starting word w1 = random.choice(self.cache[Generator.HEAD]) w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)]) - # Start with a message HEAD and a random message starting word gen_words = [] + # As long as we don't go over the max. message length (in n. of words)... for i in range(size): - # As long as we don't go over the size value (max. message length)... if silence and w1.startswith("@") and len(w1) > 1: + # ...append word 1, disabling any possible Telegram mention gen_words.append(w1.replace("@", "(@)")) - # ...append the first word, silencing any possible username mention else: + # ..append word 1 gen_words.append(w1) - # ..append the first word if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache: # When there's no key from the last 2 words to follow the chain, # or we reached a separation between messages, stop break else: + # Get a random third word that follows the chain of words 1 + # and 2, then make words 2 and 3 to be the new words 1 and 2 w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) - # Make the second word to be the new first word, and - # make a new random word that follows the chain to be - # the new second word return ' '.join(gen_words) + # Cross a second Generator into this one def cross(self, gen): - # cross 2 Generators into this one for key in gen.cache: if key in self.cache: self.cache[key].extend(gen.cache[key]) else: self.cache[key] = list(gen.cache[key]) + # Count again the number of messages + # (for whenever the count number is unreliable) def new_count(self): - # Count again the number of messages if the current number is unreliable count = 0 for key in self.cache: for word in self.cache[key]: if word == Generator.TAIL: + # ...by just counting message separators count += 1 - # by just counting message separators return count diff --git a/memorylist.py b/memorylist.py index f62c05f..3e12da0 100644 --- a/memorylist.py +++ b/memorylist.py @@ -1,11 +1,19 @@ #!/usr/bin/env python3 -from collections.abc import MutableSequence +from collections.abc import Sequence -class MemoryList(MutableSequence): +class MemoryList(Sequence): + """Special "memory list" class that: + - Whenever an item is added that was already in the list, + it gets moved to the back instead + - Whenever an item is looked for, it gets moved to the + back + - If a new item is added that goes over a given capacity + limit, the item at the front (oldest accessed item) + is removed (and returned)""" + def __init__(self, capacity, data=None): - """Initialize the class""" super(MemoryList, self).__init__() self._capacity = capacity if (data is not None): @@ -16,37 +24,25 @@ class MemoryList(MutableSequence): def __repr__(self): return "<{0} {1}, capacity {2}>".format(self.__class__.__name__, self._list, self._capacity) + def __str__(self): + return "{0}, {1}/{2}".format(self._list, len(self._list), self._capacity) + def __len__(self): - """List length""" return len(self._list) def capacity(self): return self._capacity def __getitem__(self, ii): - """Get a list item""" return self._list[ii] - def __delitem__(self, ii): - """Delete an item""" - del self._list[ii] - - def __setitem__(self, ii, val): - self._list[ii] = val - - def __str__(self): - return str(self._list) - def __contains__(self, val): return val in self._list def __iter__(self): return self._list.__iter__() - def insert(self, ii, val): - self._list.insert(ii, val) - - def append(self, val): + def add(self, val): if val in self._list: self._list.remove(val) @@ -58,8 +54,8 @@ class MemoryList(MutableSequence): else: return None - def get_next(self, cond): - val = next((v for v in self._list if cond(v)), None) + def search(self, cond, *args, **kwargs): + val = next((v for v in self._list if cond(v)), *args, **kwargs) if val is not None: self._list.remove(val) self._list.append(val) diff --git a/metadata.py b/metadata.py index 2a89ed7..f54e28d 100644 --- a/metadata.py +++ b/metadata.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 +# This reads a line in the format 'VARIABLE=value' and gives me the value. +# See Metadata.loadl(...) for more details def parse_card_line(line): - # This reads a line in the format 'VARIABLE=value' and gives me the value. - # See Metadata.loadl(...) for more details s = line.split('=', 1) if len(s) < 2: return "" @@ -10,35 +10,37 @@ def parse_card_line(line): return s[1] +# This is a chat's Metadata, holding different configuration values for +# Velasco and other miscellaneous information about the chat class Metadata(object): - # This is a chat's Metadata, holding different configuration values for - # Velasco and other miscellaneous information about the chat - def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False): - self.id = str(cid) # The Telegram chat's ID - self.type = ctype + self.id = str(cid) # The type of chat - self.title = title + self.type = ctype # The title of the chat + self.title = title if period is None: if "group" in ctype: - period = 10 # Default period for groups and supergroups + period = 10 else: - period = 2 # Default period for private or channel chats + period = 2 + # The number of messages read in a chat self.count = count - # The number of messages read - self.period = period # This chat's configured period - self.answer = answer + self.period = period # This chat's configured answer probability - self.restricted = restricted + self.answer = answer # Wether some interactions are restricted to admins only - self.silenced = silenced + self.restricted = restricted # Wether messages should silence user mentions + self.silenced = silenced + # Sets the period for a chat + # It has to be higher than 1 + # Returns the new value def set_period(self, period): if period < 1: raise ValueError('Tried to set period a value less than 1.') @@ -46,6 +48,9 @@ class Metadata(object): self.period = period return self.period + # Sets the answer probability + # It's a percentage represented as a decimal between 0 and 1 + # Returns the new value def set_answer(self, prob): if prob > 1: raise ValueError('Tried to set answer probability higher than 1.') @@ -55,6 +60,8 @@ class Metadata(object): self.answer = prob return self.answer + # Dumps the metadata into a list of lines, then joined together in a string, + # ready to be written into a file def dumps(self): lines = ["CARD=v5"] lines.append("CHAT_ID=" + self.id) @@ -68,10 +75,12 @@ class Metadata(object): # lines.append("WORD_DICT=") return ('\n'.join(lines)) + '\n' + # Creates a Metadata object from a previous text dump def loads(text): lines = text.splitlines() return Metadata.loadl(lines) + # Creates a Metadata object from a list of metadata lines def loadl(lines): # In a perfect world, I would get both the variable name and its corresponding value # from each side of the lines, but I know the order in which the lines are writen in @@ -90,6 +99,14 @@ class Metadata(object): silenced=(parse_card_line(lines[8]) == 'True') ) elif version == "v3": + # Deprecated: this elif block will be removed in a new version + print("Warning! This Card format ({}) is deprecated. Update all".format(version), + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # This is kept for retrocompatibility purposes, in case someone did a fork + # of this repo and still has some chat files that haven't been updated in + # a long while -- but I already converted all my files to v5 return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), @@ -99,6 +116,12 @@ class Metadata(object): restricted=(parse_card_line(lines[6]) == 'True') ) elif version == "v2": + # Deprecated: this elif block will be removed in a new version + print("Warning! This Card format ({}) is deprecated. Update all".format(version), + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # Also kept for retrocompatibility purposes return Metadata(cid=parse_card_line(lines[1]), ctype=parse_card_line(lines[2]), title=parse_card_line(lines[3]), @@ -107,6 +130,12 @@ class Metadata(object): answer=float(parse_card_line(lines[5])) ) elif version == "dict:": + # Deprecated: this elif block will be removed in a new version + print("Warning! This Card format ('dict') is deprecated. Update all", + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # Also kept for retrocompatibility purposes # At some point I decided to number the versions of each dictionary format, # but this was not always the case. This is what you get if you try to read # whatever there is in very old files where the version should be @@ -117,7 +146,13 @@ class Metadata(object): period=int(lines[3]) ) else: - # This is for the oldest of files + # Deprecated: this elif block will be removed in a new version + print("Warning! This ancient Card format is deprecated. Update all", + "your files in case that there are still some left in old formats before", + "downloading the next update.") + + # Also kept for retrocompatibility purposes + # This is for the oldest of file formats return Metadata(cid=lines[0], ctype=lines[1], title=lines[2], diff --git a/reader.py b/reader.py index e4ce04b..4c41ca6 100644 --- a/reader.py +++ b/reader.py @@ -5,9 +5,9 @@ from metadata import Metadata, parse_card_line from generator import Generator +# This gives me the chat title, or the first and maybe last +# name of the user as fallback if it's a private chat def get_chat_title(chat): - # This gives me the chat title, or the first and maybe last - # name of the user as fallback if it's a private chat if chat.title is not None: return chat.title elif chat.first_name is not None: @@ -25,40 +25,52 @@ class Memory(object): self.content = content +# This is a chat Reader object, in charge of managing the parsing of messages +# for a specific chat, and holding said chat's metadata class Reader(object): - # This is a chat Reader object, in charge of managing the parsing of messages - # for a specific chat, and holding said chat's metadata - + # Media tagging variables TAG_PREFIX = "^IS_" STICKER_TAG = "^IS_STICKER^" ANIM_TAG = "^IS_ANIMATION^" VIDEO_TAG = "^IS_VIDEO^" - def __init__(self, metadata, vocab, max_period, logger): + def __init__(self, metadata, vocab, max_period, logger, names=[]): + # The Metadata object holding a chat's specific bot parameters self.meta = metadata + # The Generator object holding the vocabulary learned so far self.vocab = vocab + # The maximum period allowed for this bot self.max_period = max_period + # The short term memory, for recently read messages (see below) self.short_term_mem = [] + # The countdown until the period ends and it's time to talk self.countdown = self.meta.period + # The logger object shared program-wide self.logger = logger + # The bot's nicknames + username + self.names = names + # Create a new Reader from a Chat object def FromChat(chat, max_period, logger): - # Create a new Reader from a Chat object meta = Metadata(chat.id, chat.type, get_chat_title(chat)) vocab = Generator() return Reader(meta, vocab, max_period, logger) + # TODO: Create a new Reader from a whole Chat history def FromHistory(history, vocab, max_period, logger): - # Create a new Reader from a whole Chat history (WIP) return None + # Create a new Reader from a meta's file dump def FromCard(meta, vocab, max_period, logger): - # Create a new Reader from a meta's file dump metadata = Metadata.loads(meta) return Reader(metadata, vocab, max_period, logger) + # Deprecated: this method will be removed in a new version def FromFile(text, max_period, logger, vocab=None): - # Load a Reader from a file's text string (obsolete) + print("Warning! This method of loading a Reader from file (Reader.FromFile(...))", + "is deprecated, and will be removed from the next update. Use FromCard instead.") + + # Load a Reader from a file's text string lines = text.splitlines() version = parse_card_line(lines[0]).strip() version = version if len(version.strip()) > 1 else lines[4] @@ -86,27 +98,33 @@ class Reader(object): r = Reader(meta, vocab, max_period, logger) return r + # Returns a nice lice little tuple package for the archivist to save to file. + # Also commits to long term memory any pending short term memories def archive(self): - # Returns a nice lice little tuple package for the archivist to save to file. - # Also commits to long term memory any pending short term memories self.commit_memory() - return (self.meta.id, self.meta.dumps(), self.vocab.dump) + return (self.meta.id, self.meta.dumps(), self.vocab.dumps()) + # Checks type. Returns "True" for "group" even if it's supergroupA def check_type(self, t): - # Checks type. Returns "True" for "group" even if it's supergroup return t in self.meta.type + # Hard check def exactly_type(self, t): - # Hard check return t == self.meta.type def set_title(self, title): self.meta.title = title + # Sets a new period in the Metadata def set_period(self, period): - if period < self.countdown: - self.countdown = max(period, 1) - return self.meta.set_period(min(period, self.max_period)) + # The period has to be under max_period; otherwise, truncate to max_period + new_period = min(period, self.max_period) + set_period = self.meta.set_period(new_period) + if new_period == set_period and new_period < self.countdown: + # If succesfully changed and the new period is less than the current + # remaining countdown, reduce the countdown to the new period + self.countdown = new_period + return new_period def set_answer(self, prob): return self.meta.set_answer(prob) @@ -141,6 +159,8 @@ class Reader(object): def toggle_silence(self): self.meta.silenced = (not self.meta.silenced) + # Rolls the chance for answering in this specific chat, + # according to the answer probability def is_answering(self): rand = random.random() chance = self.answer() @@ -150,10 +170,13 @@ class Reader(object): return False return rand <= chance + # Adds a new message to the short term memory def add_memory(self, mid, content): mem = Memory(mid, content) self.short_term_mem.append(mem) + # Returns a random message ID from the short memory, + # when answering to a random comment def random_memory(self): if len(self.short_term_mem) == 0: return None @@ -163,6 +186,10 @@ class Reader(object): def reset_countdown(self): self.countdown = self.meta.period + # Reads a message + # This process will determine which kind of message it is (Sticker, Anim, + # Video, or actual text) and pre-process it accordingly for the Generator, + # then store it in the short term memory def read(self, message): mid = str(message.message_id) @@ -174,16 +201,25 @@ class Reader(object): self.learn_drawing(mid, Reader.ANIM_TAG, message.animation.file_id) elif message.video is not None: self.learn_drawing(mid, Reader.VIDEO_TAG, message.video.file_id) + self.meta.count += 1 + # Stores a multimedia message in the short term memory as a text with + # TAG + the media file ID def learn_drawing(self, mid, tag, drawing): self.learn(mid, tag + " " + drawing) + # Stores a text message in the short term memory def learn(self, mid, text): - if "velasco" in text.casefold() and len(text.split()) <= 3: - return + for name in self.names: + if name.casefold() in text.casefold() and len(text.split()) <= 3: + # If it's less than 3 words and one of the bot's names is in + # the message, ignore it as it's most probably just a summon + return self.add_memory(mid, text) + # Commits the short term memory messages into the "long term memory" + # aka the vocabulary Generator's cache def commit_memory(self): for mem in self.short_term_mem: self.vocab.add(mem.content) diff --git a/speaker.py b/speaker.py index 3d44df3..434800c 100644 --- a/speaker.py +++ b/speaker.py @@ -8,19 +8,24 @@ from reader import Reader, get_chat_title from telegram.error import NetworkError +# Auxiliar print to stderr function (alongside logger messages) def eprint(*args, **kwargs): print(*args, end=' ', file=stderr, **kwargs) +# Auxiliar message to send a text to a chat through a bot def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): - kwargs["parse_mode"] = formatting - kwargs["reply_to_message_id"] = replying + # Markdown or HTML formatting (both argument names are valid) + kwargs["parse_mode"] = formatting or kwargs.get("parse_mode") + # ID of the message it's replying to (both argument names are valid) + kwargs["reply_to_message_id"] = replying or kwargs.get("reply_to_message_id") + # Reminder that dict.get(key) defaults to None if the key isn't found if text.startswith(Reader.TAG_PREFIX): + # We're sending a media file ID words = text.split(maxsplit=1) if logger: logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) - # eprint('[]') # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID' if words[0] == Reader.STICKER_TAG: @@ -30,16 +35,18 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): elif words[0] == Reader.VIDEO_TAG: return bot.send_video(cid, words[1], **kwargs) else: - text + # It's text if logger: - mtype = "reply" if replying else "message" + mtype = "reply" if (kwargs.get("reply_to_message_id")) else "message" logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) # eprint('.') return bot.send_message(cid, text, **kwargs) class Speaker(object): + # Marks if the period is a fixed time when to send a new message ModeFixed = "FIXED_MODE" + # Marks if the "periodic" messages have a weighted random chance to be sent, depending on the period ModeChance = "CHANCE_MODE" def __init__(self, username, archivist, logger, admin=0, nicknames=[], @@ -47,35 +54,55 @@ class Speaker(object): memory=20, mute_time=60, save_time=3600, bypass=False, cid_whitelist=None, max_len=50 ): + # List of nicknames other than the username that the bot can be called as self.names = nicknames + # Mute time for Telegram network errors self.mute_time = mute_time + # Last mute timestamp self.mute_timer = None + # The bot's username, "@" included self.username = username - + # The maximum chat period for this bot self.max_period = archivist.max_period + + # The Archivist functions to load and save from and to files self.get_reader_file = archivist.get_reader self.store_file = archivist.store + + # Archivist function to crawl all stored Readers self.readers_pass = archivist.readers_pass + # Legacy load logging emssages logger.info("----") logger.info("Finished loading.") logger.info("Loaded {} chats.".format(archivist.chat_count())) logger.info("----") + # Wakeup flag that determines if it should send a wakeup message to stored groupchats self.wakeup = wakeup + # The logger shared program-wide self.logger = logger + # Chance of sending messages as replies self.reply = reply + # Chance of sending 2 messages in a row self.repeat = repeat + # If not empty, whitelist of chat IDs to only respond to self.cid_whitelist = cid_whitelist + # Memory list/cache for the last accessed chats self.memory = MemoryList(memory) + # Minimum time to wait between memory saves (triggered at the next message from any chat) self.save_time = save_time + # Last save timestamp self.memory_timer = int(time.perf_counter()) + # Admin user ID self.admin = admin + # For testing purposes self.bypass = bypass + # Max word length for a message self.max_len = max_len + # Sends an announcement to all chats that pass the check def announce(self, bot, announcement, check=(lambda _: True)): - # Sends an announcement to all chats that pass the check for reader in self.readers_pass(): try: if check(reader): @@ -84,9 +111,9 @@ class Speaker(object): except Exception: pass + # If wakeup flag is set, sends a wake-up message as announcement to all chats that + # are groups. Also, always sends a wakeup message to the 'bot admin' def wake(self, bot, wake): - # If wakeup flag is set, sends a wake-up message as announcement to all chats that - # are groups. Also, always sends a wakeup message to the 'bot admin' send(bot, self.admin, wake) if self.wakeup: @@ -94,9 +121,13 @@ class Speaker(object): return reader.check_type("group") self.announce(bot, wake, group_check) + # Looks up a reader in the memory list def get_reader(self, cid): - return self.memory.get_next(lambda r: r.cid() == cid) + return self.memory.search(lambda r: r.cid() == cid, None) + # Looks up and returns a reader if it's in memory, or loads up a reader from + # file, adds it to memory, and returns it. Any other reader pushed out of + # memory is saved to file def load_reader(self, chat): cid = str(chat.id) reader = self.get_reader(cid) @@ -107,19 +138,24 @@ class Speaker(object): if not reader: reader = Reader.FromChat(chat, self.max_period, self.logger) - old_reader = self.memory.append(reader) + old_reader = self.memory.add(reader) if old_reader is not None: old_reader.commit_memory() self.store(old_reader) return reader + # Returns a reader if it's in memory, or loads it up from a file and returns + # it otherwise. Does NOT add the Reader to memory + # This is useful for command prompts that do not require the Reader to be cached def access_reader(self, cid): reader = self.get_reader(cid) if reader is None: return self.get_reader_file(cid) return reader + # Returns True if the bot's username is called, or if one of the nicknames is + # mentioned and they're not another user's username def mentioned(self, text): if self.username in text: return True @@ -128,20 +164,28 @@ class Speaker(object): return True return False + # Returns True if not enough time has passed since the last mute timestamp def is_mute(self): current_time = int(time.perf_counter()) return self.mute_timer is not None and (current_time - self.mute_timer) < self.mute_time + # Series of checks to determine if the bot should reply to a specific message, aside + # from the usual periodic messages def should_reply(self, message, reader): if self.is_mute(): + # Not if mute time hasn't finished return False if not self.bypass and reader.is_restricted(): + # If we're not in testing mode and the chat is restricted user = message.chat.get_member(message.from_user.id) if not self.user_is_admin(user): - # update.message.reply_text("You do not have permissions to do that.") + # ...And the user has no permissions, should not reply return False + + # otherwise (testing mode, or the chat is unrestricted, or the user has permissions) replied = message.reply_to_message text = message.text.casefold() if message.text else "" + # Only if it's a reply to a message of ours or the bot is mentioned in the message return (((replied is not None) and (replied.from_user.name == self.username)) or (self.mentioned(text))) @@ -151,12 +195,14 @@ class Speaker(object): else: self.store_file(*reader.archive()) + # Check if enough time for saving memory has passed def should_save(self): current_time = int(time.perf_counter()) elapsed = (current_time - self.memory_timer) self.logger.debug("Save check: {}".format(elapsed)) return elapsed >= self.save_time + # Save all Readers in memory to files if it's save time def save(self): if self.should_save(): self.logger.info("Saving chats in memory...") @@ -165,29 +211,38 @@ class Speaker(object): self.memory_timer = time.perf_counter() self.logger.info("Chats saved.") + # Reads a non-command message def read(self, update, context): + # Check for save time self.save() + # Ignore non-message updates if update.message is None: return + chat = update.message.chat reader = self.load_reader(chat) reader.read(update.message) + # Check if it's a "replyable" message & roll the chance to do so if self.should_reply(update.message, reader) and reader.is_answering(): self.say(context.bot, reader, replying=update.message.message_id) return + # Update the Reader's title if it has changed since the last message read title = get_chat_title(update.message.chat) if title != reader.title(): reader.set_title(title) + # Decrease the countdown for the chat, and send a message if it reached 0 reader.countdown -= 1 if reader.countdown < 0: reader.reset_countdown() + # Random chance to reply to a recent message rid = reader.random_memory() if random.random() <= self.reply else None self.say(context.bot, reader, replying=rid) + # Handles /speak command def speak(self, update, context): chat = (update.message.chat) reader = self.load_reader(chat) @@ -200,12 +255,14 @@ class Speaker(object): mid = str(update.message.message_id) replied = update.message.reply_to_message + # Reply to the message that the command replies to, otherwise to the command itself rid = replied.message_id if replied else mid words = update.message.text.split() if len(words) > 1: reader.read(' '.join(words[1:])) self.say(context.bot, reader, replying=rid) + # Checks user permissions. Bot admin is always considered as having full permissions def user_is_admin(self, member): self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) # eprint('!') @@ -214,23 +271,30 @@ class Speaker(object): or (member.status == 'administrator') or (member.user.id == self.admin)) + # Generate speech (message) def speech(self, reader): return reader.generate_message(self.max_len) + # Say a newly generated message def say(self, bot, reader, replying=None, **kwargs): cid = reader.cid() if self.cid_whitelist is not None and cid not in self.cid_whitelist: + # Don't, if there's a whitelist and this chat is not in it return if self.is_mute(): + # Don't, if mute time isn't over return try: send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs) if self.bypass: + # Testing mode, force a reasonable period (to not have the bot spam one specific chat with a low period) max_period = self.max_period reader.set_period(random.randint(max_period // 4, max_period)) if random.random() <= self.repeat: send(bot, cid, self.speech(reader), logger=self.logger, **kwargs) + # Consider any Network Error as a Telegram temporary ban, as I couldn't find + # out in the documentation how error 429 is handled by python-telegram-bot except NetworkError as e: self.logger.error("Sending a message caused network error:") self.logger.exception(e) @@ -240,21 +304,25 @@ class Speaker(object): self.logger.error("Sending a message caused exception:") self.logger.exception(e) + # Handling /count command def get_count(self, update, context): cid = str(update.message.chat.id) - reader = self.access_reader(cid) + reader = self.load_reader(cid) num = str(reader.count()) if reader else "no" update.message.reply_text("I remember {} messages.".format(num)) + # Handling /get_chats command (exclusive for bot admin) def get_chats(self, update, context): lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.readers_pass()] chat_list = "\n".join(lines) update.message.reply_text("I have the following chats:\n\n" + chat_list) + # Handling /period command + # Print the current period or set a new one if one is given def period(self, update, context): chat = update.message.chat - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: @@ -270,13 +338,14 @@ class Speaker(object): period = int(words[1]) period = reader.set_period(period) update.message.reply_text("Period of speaking set to {}.".format(period)) - self.store_file(*reader.archive()) except Exception: update.message.reply_text("Format was confusing; period unchanged from {}.".format(reader.period())) + # Handling /answer command + # Print the current answer probability or set a new one if one is given def answer(self, update, context): chat = update.message.chat - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) words = update.message.text.split() if len(words) <= 1: @@ -292,17 +361,18 @@ class Speaker(object): answer = float(words[1]) answer = reader.set_answer(answer) update.message.reply_text("Answer probability set to {}.".format(answer)) - self.store_file(*reader.archive()) except Exception: update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(reader.answer())) + # Handling /restrict command + # Toggle the restriction value if it's a group chat and the user has permissions to do so def restrict(self, update, context): if "group" not in update.message.chat.type: update.message.reply_text("That only works in groups.") return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) if reader.is_restricted(): if not self.user_is_admin(user): @@ -311,15 +381,16 @@ class Speaker(object): reader.toggle_restrict() allowed = "let only admins" if reader.is_restricted() else "let everyone" update.message.reply_text("I will {} configure me now.".format(allowed)) - self.store_file(*reader.archive()) + # Handling /silence command + # Toggle the silence value if it's a group chat and the user has permissions to do so def silence(self, update, context): if "group" not in update.message.chat.type: update.message.reply_text("That only works in groups.") return chat = update.message.chat user = chat.get_member(update.message.from_user.id) - reader = self.access_reader(str(chat.id)) + reader = self.load_reader(str(chat.id)) if reader.is_restricted(): if not self.user_is_admin(user): @@ -328,8 +399,8 @@ class Speaker(object): reader.toggle_silence() allowed = "avoid mentioning" if reader.is_silenced() else "mention" update.message.reply_text("I will {} people now.".format(allowed)) - self.store_file(*reader.archive()) + # Handling /who command def who(self, update, context): msg = update.message usr = msg.from_user @@ -346,6 +417,7 @@ class Speaker(object): msg.reply_markdown(answer) + # Handling /where command def where(self, update, context): msg = update.message chat = msg.chat diff --git a/velasco.py b/velasco.py index 8670544..5a135fd 100644 --- a/velasco.py +++ b/velasco.py @@ -90,7 +90,11 @@ def main(): parser.add_argument('-m', '--mute_time', metavar='T', type=int, default=60, help='The time (in s) for the muting period when Telegram limits the bot. (default: 60).') parser.add_argument('-s', '--save_time', metavar='T', type=int, default=3600, - help='The time (in s) for periodic saves (default: 3600).') + help='The time (in s) for periodic saves. (default: 3600)') + parser.add_argument('-p', '--min_period', metavar='MIN_P', type=int, default=1, + help='The minimum value for a chat\'s period. (default: 1)') + parser.add_argument('-P', '--max_period', metavar='MAX_P', type=int, default=100000, + help='The maximum value for a chat\'s period. (default: 100000)') args = parser.parse_args() @@ -104,6 +108,8 @@ def main(): archivist = Archivist(logger, chatdir=args.directory, chatext=".vls", + min_period=args.min_period, + max_period=args.max_period, read_only=False ) From 175c006229f903ad7f9286f1fc86e95b94f8f1e3 Mon Sep 17 00:00:00 2001 From: vylion Date: Thu, 29 Oct 2020 10:05:53 +0100 Subject: [PATCH 20/22] Documentation, baby~ --- MANUAL.md | 28 ++++++++++++++++++++++++++++ README.md | 49 ++++++++++++++++++++++++++----------------------- velasco.py | 2 ++ 3 files changed, 56 insertions(+), 23 deletions(-) create mode 100644 MANUAL.md diff --git a/MANUAL.md b/MANUAL.md new file mode 100644 index 0000000..ecd9be5 --- /dev/null +++ b/MANUAL.md @@ -0,0 +1,28 @@ +# Velascobot: Manual + +**OUTDATED: REVISION PENDING** + +## Markov chains + +This bot uses Markov chains of 3 words for message generation. For each 3 consecutive words read, it will store the 3rd one as the word that follows the first 2 combined. This way, whenever it is generating a new sentence, it will always pick at random one of the stored words that follow the last 2 words of the message generated so far, combined. + +## Storing + +The actual messages aren't stored. After they're processed and all the words have been assigned to lists under combinations of 2 words, the message is discarded, and only the dictionary with the lists of "following words" is stored. The words said in a chat may be visible, but from a certain point onwards its impossible to recreate with accuracy the exact messages said in a chat. + +The storing action is made sometimes when a configuration value is changed, and whenever the bot sends a message. If the bot crashes, all the words processed from the messages since the last one from Velascobot will be lost. For high `period` values, this could be a considerable amount, but for small ones this is negligible. Still, the bot is not expected to crash often. + +## File hierarchy + +For those who are interested in cloning or forking: + +- `velasco.py` is the file in charge of starting up the telegram bot itself +- `speaker.py` is the file with all the functions for the commands that Velasco has +- A *Speaker* is then the entity that receives the messages, and has 1 *Parrot* and 1 *Scriptorium* +- The *Scriptorium* is a collection of *Scribes*. Each *Scribe* contains the metadata of a chat (title, ID number, the `period`, etc) and the Markov dictionary associated to it +- *Scribes* are defined in `scribe.py` +- A *Parrot* is an entity that contains a Markov dictionary, and the *Speaker's Parrot* corresponds to the last chat that prompted a Velasco message. Whenever that happens, the *Parrot* for that chat is loaded, the corresponding *Scribe* teaches the *Parrot* the latest messages, and then the *Scribe* is stored along with the updated dictionary +- A Markov dictionary is defined in `markov.py` +- The *Archivist* (defined in `archivist.py`) is in charge of doing all file saves and loads + +**Warning:** This hierarchy is pending an overhaul. \ No newline at end of file diff --git a/README.md b/README.md index f3b067c..f68fbc0 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,51 @@ # Velascobot -This is yet another Markov chain-based chatbot, based on the Twitterbot fad consisting of creating a bot account that would try to generate new random tweets, using your own as a template. However, instead of reading the messages from a Twitter account, this bot is made to read the messages in a group chat, and try to blend in by generating new messages that fit the patterns seen in that specific group chat. At the beginning that will mean a lot of parroting, but eventually the bot starts coming up with sentences of itself. +This is yet another Markov chain-based chatbot, based on the Twitterbot fad consisting of creating a bot account that would try to generate new random tweets (usually having `_ebooks` or `.txt` to indicate that an account was one of such, or just a plain `bot` suffix), using your own as a template. However, instead of reading the messages from a Twitter account, this bot is made to read the messages in a group chat, and try to blend in by generating new messages that fit the patterns seen in that specific group chat. At the beginning that will mean a lot of parroting, but eventually the bot starts coming up with sentences of itself. This bot also works on private chats between a user and itself, but of course the training is much lower and it will feel like talking to a parrot for a longer time, unless you feed it a lot of messages quickly. -## Markov chains +## How to use it -This bot uses Markov chains of 3 words for message generation. For each 3 consecutive words read, it will store the 3rd one as the word that follows the first 2 combined. This way, whenever it is generating a new sentence, it will always pick at random one of the stored words that follow the last 2 words of the message generated so far, combined. +You have to add the bot to a chat group, letting it rad and send messages. Maybe set some configuration commands too. -## Storing +If you want to clone or fork this repo and host your own instance of Velasco, see MANUAL.md. -The actual messages aren't stored. After they're processed and all the words have been assigned to lists under combinations of 2 words, the message is discarded, and only the dictionary with the lists of "following words" is stored. The words said in a chat may be visible, but from a certain point onwards its impossible to recreate with accuracy the exact messages said in a chat. +## Commands & ussage -The storing action is made sometimes when a configuration value is changed, and whenever the bot sends a message. If the bot crashes, all the words processed from the messages since the last one from Velascobot will be lost. For high `freq` values, this could be a considerable amount, but for small ones this is negligible. Still, the bot is not expected to crash often. +### Help, About and Explain -## Configuration commands +The `/help` command lists the most useful available commands for the bot. The `/about` command has a short explanation on the purpose of this bot, and the `/explain` command goes a little further in detail. + +### Speak + +This will make the bot send a message, aside from the periodic messages. If the command message is a reply to a different message M, the bot's message will be a reply to M as well; otherwise, the bot will reply to the message with the command. + +### Summon + +This isn't a command per se, but mentioning the username (in this case, '@velascobot') or any of the configured nicknames (like 'velasco') will prompt a chance for the bot to answer. + +A summon of 3 or less words will not be processed, so you can call Velasco's name to your heart's content without having to worry for the bot learning to repeat a lot of short 'Velasco!' messages. ### Count -This is the amount of messages that the bot remembers, this is, the amount of messages processed. The messages themselves aren't stored but there is a counter that increases each time a message is processed. +This tells you the amount of messages that the bot has read so far. The messages themselves aren't stored, but there is a counter that increases each time a message is processed. -### Freq +### Period -It comes from "frequency", and at the beginning it was, but now it's actually the opposite, the "period". This is the amount of messages that the bot waits for before sending a message of its own. Increase it to make it talk less often, and decrease it to make it talk more often. +This is the amount of messages that the bot waits for before sending a message of its own. Increase it to make it talk less often, and decrease it to make it talk more often. -Sending the command on its own tells you the current value. Sending a positive number with the command will set that as the new value. +Sending the command on its own (e.g. `/period`) tells you the current value. Sending a positive number with the command (e.g. `/period 85`) will set that as the new value. ### Answer This value is the chance of the bot to answer to a message that is in turn a reply to one of its own messages, or (to be implemented:) to a message that mentions it. The default value is 0.5 (50% chance). The maximum is 1 (100% chance) and to disable it you must set it to 0 (0% chance). -Sending the command on its own tells you the current value. Sending a positive decimal number between 0 and 1 inclusive will set it as the new value. +Sending the command on its own (e.g. `/answer`) tells you the current value. Sending a positive decimal number between 0 and 1 inclusive (e.g. `/answer 0.95`) will set it as the new value. -## File hierarchy +### Restricted -For those who are interested in cloning or forking: +This toggles the chat's *restriction* (off by default). Having the chat *restricted* means that only the administrators of a chat can send configuration commands, like `/period n` or `/answer n`, only they can force the bot to speak with the `/speak` command, and only they can summon the bot. The bot will still read all users' messages and will still send periodic messages for all to enjoy. -- `velasco.py` is the file in charge of starting up the telegram bot itself -- `speaker.py` is the file with all the functions for the commands that Velasco has -- A *Speaker* is then the entity that receives the messages, and has 1 *Parrot* and 1 *Scriptorium* -- The *Scriptorium* is a collection of *Scribes*. Each *Scribe* contains the metadata of a chat (title, ID number, the `freq`, etc) and the Markov dictionary associated to it -- *Scribes* are defined in `scribe.py` -- A *Parrot* is an entity that contains a Markov dictionary, and the *Speaker's Parrot* corresponds to the last chat that prompted a Velasco message. Whenever that happens, the *Parrot* for that chat is loaded, the corresponding *Scribe* teaches the *Parrot* the latest messages, and then the *Scribe* is stored along with the updated dictionary -- A Markov dictionary is defined in `markov.py` -- The *Archivist* (defined in `archivist.py`) is in charge of doing all file saves and loads +### Silenced -**Warning:** This hierarchy is pending an overhaul. \ No newline at end of file +This toggles the chat's *silence* (off by default). Having the chat *silenced* means that possible user mentions that may appear in randomly generated messages, will be disabled by enveloping the '@' between parentheses. This will avoid Telegram mention notifications, specially useful for those who have the group chat muted. diff --git a/velasco.py b/velasco.py index 5a135fd..fc1da66 100644 --- a/velasco.py +++ b/velasco.py @@ -44,6 +44,8 @@ help_msg = """I answer to the following commands: /answer - Change the probability to answer to a reply. (Decimal between 0 and 1). /restrict - Toggle restriction of configuration commands to admins only. /silence - Toggle restriction on mentions by the bot. +/who - Tell general information about you and your message. For debugging purposes. +/where - Tell my configuration for this chat. """ about_msg = "I am yet another Markov Bot experiment. I read everything you type to me and then spit back nonsensical messages that look like yours.\n\nYou can send /explain if you want further explanation." From ec14abcaff95d2c87644ff5906cd53258f3a564d Mon Sep 17 00:00:00 2001 From: vylion Date: Thu, 29 Oct 2020 10:38:19 +0100 Subject: [PATCH 21/22] Touched up the README.md some more Added actual minimum period checks accross all the code --- README.md | 10 +++++----- archivist.py | 5 ++++- reader.py | 24 ++++++++++++------------ speaker.py | 11 +++++++---- velasco.py | 2 ++ 5 files changed, 30 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index f68fbc0..01c05ee 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ # Velascobot -This is yet another Markov chain-based chatbot, based on the Twitterbot fad consisting of creating a bot account that would try to generate new random tweets (usually having `_ebooks` or `.txt` to indicate that an account was one of such, or just a plain `bot` suffix), using your own as a template. However, instead of reading the messages from a Twitter account, this bot is made to read the messages in a group chat, and try to blend in by generating new messages that fit the patterns seen in that specific group chat. At the beginning that will mean a lot of parroting, but eventually the bot starts coming up with sentences of itself. +This is yet another Markov chain-based chatbot, based on the Twitterbot fad consisting of creating a bot account that would try to generate new random tweets (usually having `_ebooks` or `.txt` in their names to indicate that an account was one of such, or just a plain `bot` suffix), using your own as a template. However, instead of reading the messages from a Twitter account, this bot is made to read the messages in a group chat, and try to blend in by generating new messages that fit the patterns seen in that specific group chat. At the beginning that will mean a lot of parroting, but eventually the bot starts coming up with sentences of itself. This bot also works on private chats between a user and itself, but of course the training is much lower and it will feel like talking to a parrot for a longer time, unless you feed it a lot of messages quickly. ## How to use it -You have to add the bot to a chat group, letting it rad and send messages. Maybe set some configuration commands too. +You have to add the bot to a chat group, or speak to it privately, letting it read and send messages. Maybe set some configuration commands too. -If you want to clone or fork this repo and host your own instance of Velasco, see MANUAL.md. +If you want to clone or fork this repo and host your own instance of Velasco, see [MANUAL.md](MANUAL.md). ## Commands & ussage @@ -38,9 +38,9 @@ Sending the command on its own (e.g. `/period`) tells you the current value. Sen ### Answer -This value is the chance of the bot to answer to a message that is in turn a reply to one of its own messages, or (to be implemented:) to a message that mentions it. The default value is 0.5 (50% chance). The maximum is 1 (100% chance) and to disable it you must set it to 0 (0% chance). +This value is the chance of the bot to answer to a message that is in turn a reply to one of its own messages, or to a message that mentions the bot (see above: [Summon](###Summon)). The default value is `0.5` (50% chance). The maximum is `1` (100% chance) and to disable it you must set it to 0 (0% chance). -Sending the command on its own (e.g. `/answer`) tells you the current value. Sending a positive decimal number between 0 and 1 inclusive (e.g. `/answer 0.95`) will set it as the new value. +Sending the command on its own (e.g. `/answer`) tells you the current value. Sending a positive decimal number between `0` and `1` inclusive (e.g. `/answer 0.95`) will set it as the new value. ### Restricted diff --git a/archivist.py b/archivist.py index 051a413..9b4ad0d 100644 --- a/archivist.py +++ b/archivist.py @@ -103,7 +103,7 @@ class Archivist(object): vocab = Generator.loads(vocab_dump) else: vocab = Generator() - return Reader.FromCard(card, vocab, self.max_period, self.logger) + return Reader.FromCard(card, vocab, self.min_period, self.max_period, self.logger) else: return None @@ -131,6 +131,9 @@ class Archivist(object): if reader.period() > self.max_period: reader.set_period(self.max_period) self.store(*reader.archive()) + elif reader.period() < self.min_period: + reader.set_period(self.min_period) + self.store(*reader.archive()) yield reader except Exception as e: self.logger.error("Failed passing through {}".format(dirname)) diff --git a/reader.py b/reader.py index 4c41ca6..7f3aa16 100644 --- a/reader.py +++ b/reader.py @@ -34,7 +34,7 @@ class Reader(object): ANIM_TAG = "^IS_ANIMATION^" VIDEO_TAG = "^IS_VIDEO^" - def __init__(self, metadata, vocab, max_period, logger, names=[]): + def __init__(self, metadata, vocab, min_period, max_period, logger, names=[]): # The Metadata object holding a chat's specific bot parameters self.meta = metadata # The Generator object holding the vocabulary learned so far @@ -51,22 +51,22 @@ class Reader(object): self.names = names # Create a new Reader from a Chat object - def FromChat(chat, max_period, logger): + def FromChat(chat, min_period, max_period, logger): meta = Metadata(chat.id, chat.type, get_chat_title(chat)) vocab = Generator() - return Reader(meta, vocab, max_period, logger) + return Reader(meta, vocab, min_period, max_period, logger) # TODO: Create a new Reader from a whole Chat history - def FromHistory(history, vocab, max_period, logger): + def FromHistory(history, vocab, min_period, max_period, logger): return None # Create a new Reader from a meta's file dump - def FromCard(meta, vocab, max_period, logger): - metadata = Metadata.loads(meta) - return Reader(metadata, vocab, max_period, logger) + def FromCard(card, vocab, min_period, max_period, logger): + meta = Metadata.loads(card) + return Reader(meta, vocab, min_period, max_period, logger) # Deprecated: this method will be removed in a new version - def FromFile(text, max_period, logger, vocab=None): + def FromFile(text, min_period, max_period, logger, vocab=None): print("Warning! This method of loading a Reader from file (Reader.FromFile(...))", "is deprecated, and will be removed from the next update. Use FromCard instead.") @@ -76,7 +76,7 @@ class Reader(object): version = version if len(version.strip()) > 1 else lines[4] logger.info("Dictionary version: {} ({} lines)".format(version, len(lines))) if version == "v4" or version == "v5": - return Reader.FromCard(text, vocab, max_period, logger) + return Reader.FromCard(text, vocab, min_period, max_period, logger) # I stopped saving the chat metadata and the cache together elif version == "v3": meta = Metadata.loadl(lines[0:8]) @@ -95,7 +95,7 @@ class Reader(object): cache = lines[4:] vocab = Generator(load=cache, mode=Generator.MODE_LIST) # raise SyntaxError("Reader: Metadata format unrecognized.") - r = Reader(meta, vocab, max_period, logger) + r = Reader(meta, vocab, min_period, max_period, logger) return r # Returns a nice lice little tuple package for the archivist to save to file. @@ -117,8 +117,8 @@ class Reader(object): # Sets a new period in the Metadata def set_period(self, period): - # The period has to be under max_period; otherwise, truncate to max_period - new_period = min(period, self.max_period) + # The period has to be in the range [min..max_period]; otherwise, clamp to said range + new_period = max(self.min_period, min(period, self.max_period)) set_period = self.meta.set_period(new_period) if new_period == set_period and new_period < self.countdown: # If succesfully changed and the new period is less than the current diff --git a/speaker.py b/speaker.py index 434800c..71bd0bb 100644 --- a/speaker.py +++ b/speaker.py @@ -62,7 +62,8 @@ class Speaker(object): self.mute_timer = None # The bot's username, "@" included self.username = username - # The maximum chat period for this bot + # The minimum and maximum chat period for this bot + self.min_period = archivist.min_period self.max_period = archivist.max_period # The Archivist functions to load and save from and to files @@ -136,7 +137,7 @@ class Speaker(object): reader = self.get_reader_file(cid) if not reader: - reader = Reader.FromChat(chat, self.max_period, self.logger) + reader = Reader.FromChat(chat, self.min_period, self.max_period, self.logger) old_reader = self.memory.add(reader) if old_reader is not None: @@ -289,8 +290,10 @@ class Speaker(object): send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs) if self.bypass: # Testing mode, force a reasonable period (to not have the bot spam one specific chat with a low period) - max_period = self.max_period - reader.set_period(random.randint(max_period // 4, max_period)) + minp = self.min_period + maxp = self.max_period + rangep = maxp - minp + reader.set_period(random.randint(rangep // 4, rangep) + minp) if random.random() <= self.repeat: send(bot, cid, self.speech(reader), logger=self.logger, **kwargs) # Consider any Network Error as a Telegram temporary ban, as I couldn't find diff --git a/velasco.py b/velasco.py index fc1da66..c1bc703 100644 --- a/velasco.py +++ b/velasco.py @@ -100,6 +100,8 @@ def main(): args = parser.parse_args() + assert args.max_period >= args.min_period + # Create the EventHandler and pass it your bot's token. updater = Updater(args.token, use_context=True) From df73401a86e04b4d187d83693276834e8b800da5 Mon Sep 17 00:00:00 2001 From: vylion Date: Thu, 29 Oct 2020 13:41:52 +0100 Subject: [PATCH 22/22] MANUAL.md update --- MANUAL.md | 52 +++++++++++++++++++++++++++++++++++++++++----------- README.md | 19 ++++++++++++++++++- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/MANUAL.md b/MANUAL.md index ecd9be5..de3ebb3 100644 --- a/MANUAL.md +++ b/MANUAL.md @@ -1,6 +1,24 @@ # Velascobot: Manual -**OUTDATED: REVISION PENDING** +Some notes: + +- Scriptorium version: Velasco v4.X (from the "Big Overhaul Update" on 27 Mar, 2019 until the 2nd Overhaul) + - Recognizable because Readers are Scribes and stored in a big dictionary called the Scriptorium, among others +- Overhaul 2 version: starting with Velasco v5.0 + +# Updating to Overhaul 2 + +If you have a Velasco clone or fork from the Scriptorium version, you should follow these steps: + +1. First of all, update all your chat files to CARD=v4 format. You can do this by making a script that imports the Archivist, and then loading and saving all files. +2. Then, pull the update. +3. To convert files to the new unescaped UTF-16 encoding (previously the default, escaped UTF-8, was used), edit the `get_reader(...)` function in the Archivist so it uses `load_reader_old(...)` instead of `load_reader(...)`. +4. Make a script that imports the Archivist and calls the `update(...)` function (it loads and saves all files). +5. Revert the `get_reader(...)` edit. + +And voilĂ ! You're up to date. Unless you want to switch to the `mongodb` branch (WIP). + +# Mechanisms ## Markov chains @@ -12,17 +30,29 @@ The actual messages aren't stored. After they're processed and all the words hav The storing action is made sometimes when a configuration value is changed, and whenever the bot sends a message. If the bot crashes, all the words processed from the messages since the last one from Velascobot will be lost. For high `period` values, this could be a considerable amount, but for small ones this is negligible. Still, the bot is not expected to crash often. +## Speaker's Memory + +The memory of a `Speaker` is a small cache of the `C` most recently modified `Readers` (where `C` is set through a flag; default is `20`). A modified `Reader` is one where the metadata was changed through a command, or a new message has been read. When a new `Reader`is modified that goes over the memory limit, the oldest modified `Reader` is pushed out and saved into its file. + +## Reader's Short Term and Long Term Memory + +When a message is read, it gets stored in a temporal cache. It will only be processed into the vocabulary `Generator` when the `Reader` is asked to generate a new message, or whenever the `Reader` gets saved into a file. This allows the bot to answer to other recent messages, and not just the last one, when the periodic message is a reply. + ## File hierarchy -For those who are interested in cloning or forking: +- `Generator` is the object class that holds a vocabulary dictionary and can generate new messages +- `Metadata` is the object class that holds one chat's configuration flags and other miscellaneous information. + - Some times the file where the metadata is saved is called a `card`. +- `Reader`is an object class that holds a `Metadata`instance and a `Generator` instance, and is associated with a specific chat. +- `Archivist`is the object class that handles persistence: reading and loading from files. +- `Speaker` is the object class that handles all (or most of) the functions for the commands that Velasco has + - Holds a limited set of `Readers` that it loads and saves through some `Archivist` functions (borrowed during `Speaker` initialization). +- `velasco.py` is the main file, in charge of starting up the telegram bot itself. -- `velasco.py` is the file in charge of starting up the telegram bot itself -- `speaker.py` is the file with all the functions for the commands that Velasco has -- A *Speaker* is then the entity that receives the messages, and has 1 *Parrot* and 1 *Scriptorium* -- The *Scriptorium* is a collection of *Scribes*. Each *Scribe* contains the metadata of a chat (title, ID number, the `period`, etc) and the Markov dictionary associated to it -- *Scribes* are defined in `scribe.py` -- A *Parrot* is an entity that contains a Markov dictionary, and the *Speaker's Parrot* corresponds to the last chat that prompted a Velasco message. Whenever that happens, the *Parrot* for that chat is loaded, the corresponding *Scribe* teaches the *Parrot* the latest messages, and then the *Scribe* is stored along with the updated dictionary -- A Markov dictionary is defined in `markov.py` -- The *Archivist* (defined in `archivist.py`) is in charge of doing all file saves and loads +### TODO -**Warning:** This hierarchy is pending an overhaul. \ No newline at end of file +After managing to get Velasco back to being somewhat usable, I've already stated in the [News channel](t.me/velascobotnews) that I will focus on rewriting the code into a different language. Thus, I will add no improvements to the Python version from that point onwards. If you're interested of picking this project up and continue development for Python, here's a few suggestions: + +- The `speaker.py` is too big. It would be useful to separate it into 2 files, one that has surface command handling, and another one that does all the speech handling (doing checks for `restricted` and `silenced` flags, the `period`, the random chances, ...). +- For a while now, Telegram allows to download a full chat history in a compressed file. Being able to send the compressed file, making sure that it *is* a Telegram chat history compressed file, and then unpacking and loading it into the chat's `Generator` would be cool. +- The most active chats have files that are too massive to keep in the process' memory. I will probably add a local database in MongoDB to solve that, but it will be a simple local one. Expanding it could be a good idea. diff --git a/README.md b/README.md index 01c05ee..62f5b38 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Sending the command on its own (e.g. `/period`) tells you the current value. Sen ### Answer -This value is the chance of the bot to answer to a message that is in turn a reply to one of its own messages, or to a message that mentions the bot (see above: [Summon](###Summon)). The default value is `0.5` (50% chance). The maximum is `1` (100% chance) and to disable it you must set it to 0 (0% chance). +This value is the chance of the bot to answer to a message that is in turn a reply to one of its own messages, or to a message that mentions the bot (see above: [Summon](#summon)). The default value is `0.5` (50% chance). The maximum is `1` (100% chance) and to disable it you must set it to 0 (0% chance). Sending the command on its own (e.g. `/answer`) tells you the current value. Sending a positive decimal number between `0` and `1` inclusive (e.g. `/answer 0.95`) will set it as the new value. @@ -49,3 +49,20 @@ This toggles the chat's *restriction* (off by default). Having the chat *restric ### Silenced This toggles the chat's *silence* (off by default). Having the chat *silenced* means that possible user mentions that may appear in randomly generated messages, will be disabled by enveloping the '@' between parentheses. This will avoid Telegram mention notifications, specially useful for those who have the group chat muted. + +## When does the bot send a message? + +The bot will send a message, guaranteed: + +- If someone sends the `/speak` command, and have permissions to do so. +- If `period` messages have been read by the bot since the last time it sent a message. + +In addition, the bot will have a random chance to: + +- Reply to a message that mentions it (be it the username, like "@velascobot", or a name from a list of given nicknames, like "Velasco"). + - The chance of this is the answer probability configured with the `/answer` command. + - This does not affect the `period` countdown. +- Send a guaranteed message as a reply to a random recent read message (see [below](#readers-short-term-and-long-term-memory)) instead of sending it normally. + - The chance of this is the `reply` variable in `Speaker`, and the default is `1`. +- Send a second message just after sending one (never a third one). + - The chance of this is the `repeat` variable in `Speaker`, and the default is `0.05`.