From d075624263a008dc6d906043b2c1e3db50b287d3 Mon Sep 17 00:00:00 2001 From: vylion Date: Wed, 27 Mar 2019 13:34:22 +0100 Subject: [PATCH] Velasco Big Overhaul Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 📝 Changed the whole script files hierarchy: - velasco.py starts up the telegram bot - speaker.py has all the bot behavior methods - A Parrot is what stores a Markov object - An Scribe stores a single chat's data - A Speaker has an Scriptorium, with all active Scribes - A Speaker has a single Parrot, the one associated with the last Scribe that had to send a message - An Archivist is in charge of loading the Scriptorium at startup, as well as storing Scribes and Parrots in files 🐞 Fixed a bug that stopped new Parrots from being saved, prevented because their non-existing file could not be loaded into the Speaker's Parrot --- .gitignore | 3 +- archivist.py | 161 +++++++++++++++++++++ chatlog.py | 188 ++++++++++-------------- markov.py | 95 ++++++------- scribe.py | 193 +++++++++++++++++++++++++ speaker.py | 299 ++++++++++++++++++++++++++++++++++++++ velasco.py | 394 +++++++++------------------------------------------ 7 files changed, 839 insertions(+), 494 deletions(-) create mode 100644 archivist.py create mode 100644 scribe.py create mode 100644 speaker.py mode change 100755 => 100644 velasco.py diff --git a/.gitignore b/.gitignore index afb39e2..3bc950a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ chatlogs/* -__pycache__/* \ No newline at end of file +__pycache__/* +misc/* diff --git a/archivist.py b/archivist.py new file mode 100644 index 0000000..b9ec921 --- /dev/null +++ b/archivist.py @@ -0,0 +1,161 @@ + +import os, errno, random, pickle +from scribe import Scribe +from markov import Markov + +class Archivist(object): + + def __init__(self, logger, chatdir=None, chatext=None, admin=0, + freqIncrement=5, saveCount=15, maxFreq=100000, maxLen=50, + readOnly=False, filterCids=None, bypass=False + ): + if chatdir is None or len(chatdir) == 0: + raise ValueError("Chatlog directory name is empty") + elif chatext is None: # Can be len(chatext) == 0 + raise ValueError("Chatlog file extension is invalid") + self.logger = logger + self.chatdir = chatdir + self.chatext = chatext + self.admin = admin + self.freqIncrement = freqIncrement + self.saveCount = saveCount + self.maxFreq = maxFreq + self.maxLen = maxLen + self.readOnly = readOnly + self.filterCids = filterCids + self.bypass = bypass + self.scribePath = chatdir + "chat_{tag}/{file}{ext}" + + def openfile(self, filename, mode): + if not os.path.exists(os.path.dirname(filename)): + try: + os.makedirs(os.path.dirname(filename)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + return open(filename, mode) + + def store(self, tag, log, gen): + if self.readOnly: + return + file = self.openfile(self.scribePath.format(tag=tag, file="card", ext=".txt"), 'w') + file.write(log) + file.close() + file = self.openfile(self.scribePath.format(tag=tag, file="record", ext=self.chatext), 'w') + file.write(gen) + file.close() + + def recall(self, filename): + #print("Loading chat: " + path) + file = open(self.chatdir + filename, 'rb') + scribe = None + try: + scribe = Scribe.Recall(pickle.load(file), self) + self.logger.info("Unpickled {}{}".format(self.chatdir, filename)) + except pickle.UnpicklingError: + file.close() + file = open(self.chatdir + filename, 'r') + try: + scribe = Scribe.Recall(file.read(), self) + self.logger.info("Read {}{} text file".format(self.chatdir, filename)) + except Exception as e: + self.logger.error("Failed reading {}{}".format(self.chatdir, filename)) + self.logger.exception(e) + raise e + file.close() + return scribe + + def wakeScribe(self, filepath): + file = open(filepath.format(filename="card", ext=".txt"), 'r') + card = file.read() + file.close() + return Scribe.FromFile(card, self) + + def wakeParrot(self, tag): + filepath = self.scribePath.format(tag=tag, file="record", ext=self.chatext) + try: + file = open(filepath, 'r') + #print("\nOPening " + filepath + "\n") + record = file.read() + file.close() + return Markov.loads(record) + except: + self.logger.error("Parrot file {} not found. Assuming first time parrot.".format(filepath)) + return Markov() + + def wakeScriptorium(self): + scriptorium = {} + + directory = os.fsencode(self.chatdir) + for subdir in os.scandir(directory): + dirname = subdir.name.decode("utf-8") + if dirname.startswith("chat_"): + cid = dirname[5:] + try: + filepath = self.chatdir + dirname + "/{filename}{ext}" + scriptorium[cid] = self.wakeScribe(filepath) + self.logger.info("Chat {} contents:\n".format(cid) + scriptorium[cid].chat.dumps()) + if self.bypass: + scriptorium[cid].setFreq(random.randint(self.maxFreq//2, self.maxFreq)) + elif scriptorium[cid].freq() > self.maxFreq: + scriptorium[cid].setFreq(self.maxFreq) + except Exception as e: + self.logger.error("Failed reading {}".format(dirname)) + self.logger.exception(e) + raise e + return scriptorium + + """ + def wake_old(self): + scriptorium = {} + + directory = os.fsencode(self.chatdir) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.endswith(self.chatext): + cid = filename[:-(len(self.chatext))] + if self.filterCids is not None: + #self.logger.info("CID " + cid) + if not cid in self.filterCids: + continue + scriptorium[cid] = self.recall(filename) + scribe = scriptorium[cid] + if scribe is not None: + if self.bypass: + scribe.setFreq(random.randint(self.maxFreq//2, self.maxFreq)) + elif scribe.freq() > self.maxFreq: + scribe.setFreq(self.maxFreq) + self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]" + "\n" + "\n".join(scribe.chat.dumps())) + else: + continue + return scriptorium + """ + + def update(self, oldext=None): + failed = [] + remove = False + if not oldext: + oldext = self.chatext + remove = True + + directory = os.fsencode(self.chatdir) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.endswith(oldext): + try: + self.logger.info("Updating chat " + filename) + scribe = self.recall(filename) + if scribe is not None: + scribe.store(scribe.parrot.dumps()) + self.wakeParrot(scribe.cid()) + self.logger.info("--- Update done: " + scribe.title()) + if remove: + os.remove(filename) + except Exception as e: + failed.append(filename) + self.logger.error("Found the following error when trying to update:") + self.logger.exception(e) + else: + continue + return failed diff --git a/chatlog.py b/chatlog.py index 03b03e0..b398c12 100644 --- a/chatlog.py +++ b/chatlog.py @@ -1,146 +1,106 @@ #!/usr/bin/env python3 -import random -from markov import * - -def parse_line(l): - s = l.split('=') +def parse(l): + s = l.split('=', 1) if len(s) < 2: return "" else: return s[1] class Chatlog(object): - def __init__(self, ident, chattype, title, text=None, freq=None, answer=0.5, restricted=False): - self.id = str(ident) - self.type = chattype + def __init__(self, cid, ctype, title, count=0, freq=None, answer=0.5, restricted=False, silenced=False): + self.id = str(cid) + self.type = ctype self.title = title if freq is None: - if "group" in chattype: + if "group" in ctype: freq = 10 - #elif chattype is "private": + #elif ctype is "private": else: freq = 2 + self.count = count self.freq = freq - if text is not None: - self.count = len(text) - else: - self.count = 0 - self.replyables = [] self.answer = answer self.restricted = restricted - self.gen = Markov(text) - - def set_title(self, title): - self.title = title - - def set_freq(self, freq): - if not freq > 0: - raise ValueError('Tried to set 0 or negative freq value.') - elif freq > 100000: - freq = 100000 - self.freq = freq - return self.freq - - def set_answer_freq(self, freq): - if freq > 1: - self.answer = 1 - elif freq < 0: - self.answer = 0 - else: - self.answer = freq - return self.answer + self.silenced = silenced def add_msg(self, message): - self.gen.add_text(message + ' ' + TAIL) + self.gen.add_text(message) self.count += 1 - def add_sticker(self, file_id): - self.gen.add_text(STICKER_TAG + ' ' + file_id + ' ' + TAIL) - self.count += 1 - - def add_video(self, file_id): - self.gen.add_text(VIDEO_TAG + ' ' + file_id + ' ' + TAIL) - self.count += 1 - - def add_animation(self, file_id): - self.gen.add_text(ANIM_TAG + ' ' + file_id + ' ' + TAIL) - self.count += 1 - - def speak(self): - return self.gen.generate_markov_text() - - def get_count(self): - return self.count - - def answering(self, rand): - if self.answer == 1: - return True - elif self.answer == 0: - return False - return rand <= self.answer - - def add_replyable(self, msg_id): - self.replyables.append(msg_id) - - def restart_replyables(self, msg_id): - if msg_id is not None: - self.replyables = [msg_id] + def set_freq(self, freq): + if freq < 1: + raise ValueError('Tried to set freq a value less than 1.') else: - self.replyables = [] + self.freq = freq + return self.freq - def get_replyable(self): - random.choice(self.replyables) - def toggle_restrict(self): - self.restricted = (not self.restricted) + def set_answer(self, afreq): + if afreq > 1: + raise ValueError('Tried to set answer probability higher than 1.') + elif afreq < 0: + raise ValueError('Tried to set answer probability lower than 0.') + else: + self.answer = afreq + return self.answer - def is_restricted(self): - return self.restricted - - def to_txt(self): - lines = ["DICT=v3"] + def dumps(self): + lines = ["LOG=v4"] lines.append("CHAT_ID=" + self.id) lines.append("CHAT_TYPE=" + self.type) lines.append("CHAT_NAME=" + self.title) + lines.append("WORD_COUNT=" + str(self.count)) lines.append("MESSAGE_FREQ=" + str(self.freq)) lines.append("ANSWER_FREQ=" + str(self.answer)) lines.append("RESTRICTED=" + str(self.restricted)) - lines.append("WORD_COUNT=" + str(self.count)) - lines.append("WORD_DICT=") - txt = '\n'.join(lines) - return txt + '\n' + self.gen.to_json() + lines.append("SILENCED=" + str(self.silenced)) + #lines.append("WORD_DICT=") + return '\n'.join(lines) - def from_txt(text): + def loads(text): lines = text.splitlines() - #print("Line 4=" + lines[4]) - print("-- Loaded " + parse_line(lines[0]) + ".") - if(parse_line(lines[0]) == "v3"): - new_log = Chatlog(parse_line(lines[1]), parse_line(lines[2]), parse_line(lines[3]), None, int(parse_line(lines[4])), float(parse_line(lines[5])), (parse_line(lines[6]) == 'True')) - new_log.count = int(parse_line(lines[7])) - cache = '\n'.join(lines[9:]) - new_log.gen = Markov.from_json(cache) - if new_log.count < 0: - new_log.count = new_log.gen.new_count() - return new_log - elif(parse_line(lines[0]) == "v2"): - new_log = Chatlog(parse_line(lines[1]), parse_line(lines[2]), parse_line(lines[3]), None, int(parse_line(lines[4])), float(parse_line(lines[5]))) - new_log.count = int(parse_line(lines[6])) - cache = '\n'.join(lines[8:]) - new_log.gen = Markov.from_json(cache) - if new_log.count < 0: - new_log.count = new_log.gen.new_count() - return new_log - elif(lines[4] == "dict:"): - new_log = Chatlog(lines[0], lines[1], lines[2], None, int(lines[3])) - new_log.count = int(lines[5]) - cache = '\n'.join(lines[6:]) - new_log.gen = Markov.from_json(cache) - if new_log.count < 0: - new_log.count = new_log.gen.new_count() - return new_log - else: - return Chatlog(lines[0], lines[1], lines[2], lines[4:], int(lines[3])) + return Chatlog.loadl(lines) - def fuse_with(chatlog): - self.count += chatlog.count - self.gen.fuse_with(chatlog.gen) + def loadl(lines): + version = parse(lines[0]).strip() + version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO") + if version == "v4": + return Chatlog(cid=parse(lines[1]), + ctype=parse(lines[2]), + title=parse(lines[3]), + count=int(parse(lines[4])), + freq=int(parse(lines[5])), + answer=float(parse(lines[6])), + restricted=(parse(lines[7]) == 'True'), + silenced=(parse(lines[8]) == 'True') + ) + elif version == "v3": + return Chatlog(cid=parse(lines[1]), + ctype=parse(lines[2]), + title=parse(lines[3]), + count=int(parse(lines[7])), + freq=int(parse(lines[4])), + answer=float(parse(lines[5])), + restricted=(parse(lines[6]) == 'True') + ) + elif version == "v2": + return Chatlog(cid=parse(lines[1]), + ctype=parse(lines[2]), + title=parse(lines[3]), + count=int(parse(lines[6])), + freq=int(parse(lines[4])), + answer=float(parse(lines[5])) + ) + elif version == "dict:": + return Chatlog(cid=lines[0], + ctype=lines[1], + title=lines[2], + count=int(lines[5]), + freq=int(lines[3]) + ) + else: + return Chatlog(cid=lines[0], + ctype=lines[1], + title=lines[2], + freq=int(lines[3]) + ) diff --git a/markov.py b/markov.py index 1a37d31..bf1c3ce 100644 --- a/markov.py +++ b/markov.py @@ -3,25 +3,6 @@ import random import json -HEAD = "\n^MESSAGE_SEPARATOR^" -TAIL = "^MESSAGE_SEPARATOR^" -STICKER_TAG = "^IS_STICKER^" -ANIM_TAG = "^IS_ANIMATION^" -VIDEO_TAG = "^IS_VIDEO^" - -def trim_and_split(text): - words = text.replace('\n', '\n ').split(' ') - i = 0 - while i < len(words): - w = words[i].strip(' \t') - if len(w) > 0: - words[i] = w - else: - del words[i] - i -= 1 - i += 1 - return words - def getkey(w1, w2): key = (w1.strip().casefold(), w2.strip().casefold()) return str(key) @@ -33,11 +14,9 @@ def getwords(key): return words def triples(wordlist): - """ Generates triples from the given data string. So if our string were - "What a lovely day", we'd generate (What, a, lovely) and then - (a, lovely, day). - """ - + # Generates triples from the given data string. So if our string were + # "What a lovely day", we'd generate (What, a, lovely) and then + # (a, lovely, day). if len(wordlist) < 3: return @@ -45,54 +24,72 @@ def triples(wordlist): yield (wordlist[i], wordlist[i+1], wordlist[i+2]) class Markov(object): - def __init__(self, text=None, from_json=False): - self.cache = {} - if not from_json: - if text is not None: - for line in text: - self.add_text(line) - else: - self.cache = json.loads(text) + ModeJson = "MODE_JSON" + ModeList = "MODE_LIST" + ModeChatData = "MODE_CHAT_DATA" - def to_json(self): + Head = "\n^MESSAGE_SEPARATOR^" + Tail = "^MESSAGE_SEPARATOR^" + + def __init__(self, load=None, mode=None): + if mode is not None: + if mode == Markov.ModeJson: + self.cache = json.loads(load) + elif mode == Markov.ModeList: + self.cache = {} + self.loadList(load) + else: + self.cache = {} + + def loadList(self, lines): + for line in lines: + words = [Markov.Head] + words.extend(line.split()) + self.learn_words(words) + + def dumps(self): return json.dumps(self.cache) - def from_json(string): - return Markov(string, True) + def loads(dump): + if len(dump) == 0: + return Markov() + return Markov(load=dump, mode=Markov.ModeJson) - def add_text(self, text): - words = [HEAD] - words.extend(trim_and_split(text)) + def learn_words(self, words): self.database(words) def database(self, wordlist): for w1, w2, w3 in triples(wordlist): - if w1 == HEAD: + if w1 == Markov.Head: if w1 in self.cache: - self.cache[HEAD].append(w2) + self.cache[Markov.Head].append(w2) else: - self.cache[HEAD] = [w2] + self.cache[Markov.Head] = [w2] key = getkey(w1, w2) if key in self.cache: self.cache[key].append(w3) else: self.cache[key] = [w3] - def generate_markov_text(self, size=50): - w1 = random.choice(self.cache[HEAD]) - w2 = random.choice(self.cache[getkey(HEAD, w1)]) + def generate_markov_text(self, size=50, silence=False): + if len(self.cache) == 0: + return "" + w1 = random.choice(self.cache[Markov.Head]) + w2 = random.choice(self.cache[getkey(Markov.Head, w1)]) gen_words = [] for i in range(size): - gen_words.append(w1) - if w2 == TAIL or not getkey(w1, w2) in self.cache: + if silence and w1.startswith("@") and len(w1) > 1: + gen_words.append(w1.replace("@", "(@)")) + else: + gen_words.append(w1) + if w2 == Markov.Tail or not getkey(w1, w2) in self.cache: # print("Generated text") break else: w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) return ' '.join(gen_words) - def fuse_with(self, gen): - d = gen.cache + def cross(self, gen): for key in gen.cache: if key in self.cache: self.cache[key].extend(d[key]) @@ -103,6 +100,6 @@ class Markov(object): count = 0 for key in self.cache: for word in self.cache[key]: - if word == MESSAGE_SEPARATOR: + if word == Markov.Tail: count += 1 return count diff --git a/scribe.py b/scribe.py new file mode 100644 index 0000000..aa4b540 --- /dev/null +++ b/scribe.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 + +import random +from chatlog import * +from markov import Markov + +def getTitle(chat): + if chat.title is not None: + return chat.title + elif chat.first_name is not None: + if chat.last_name is not None: + return chat.first_name + " " + chat.last_name + else: + return chat.first_name + else: + return "" + +def rewrite(text): + words = text.replace('\n', '\n ').split(' ') + i = 0 + while i < len(words): + w = words[i].strip(' \t') + if len(w) > 0: + words[i] = w + else: + del words[i] + i -= 1 + i += 1 + return words + +class Page(object): + def __init__(self, mid, content): + self.id = mid + self.content = content + +class Scribe(object): + TagPrefix = "^IS_" + StickerTag = "^IS_STICKER^" + AnimTag = "^IS_ANIMATION^" + VideoTag = "^IS_VIDEO^" + + def __init__(self, chatlog, archivist): + self.chat = chatlog + self.archivist = archivist + self.pages = [] + self.countdown = self.chat.freq + self.logger = self.archivist.logger + + def FromChat(chat, archivist): + chatlog = Chatlog(chat.id, chat.type, getTitle(chat)) + return Scribe(chatlog, archivist) + + def FromData(data, archivist): + return None + + def FromFile(log, archivist): + chatlog = Chatlog.loads(log) + return Scribe(chatlog, archivist) + + def Recall(text, archivist): + lines = text.splitlines() + version = parse(lines[0]).strip() + version = version if len(version.strip()) > 1 else lines[4] + archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) ) + if version == "v4": + chatlog = Chatlog.loadl(lines[0:9]) + cache = '\n'.join(lines[10:]) + parrot = Markov.loads(cache) + elif version == "v3": + chatlog = Chatlog.loadl(lines[0:8]) + cache = '\n'.join(lines[9:]) + parrot = Markov.loads(cache) + elif version == "v2": + chatlog = Chatlog.loadl(lines[0:7]) + cache = '\n'.join(lines[8:]) + parrot = Markov.loads(cache) + elif version == "dict:": + chatlog = Chatlog.loadl(lines[0:6]) + cache = '\n'.join(lines[6:]) + parrot = Markov.loads(cache) + else: + chatlog = Chatlog.loadl(lines[0:4]) + cache = lines[4:] + parrot = Markov(load=cache, mode=Markov.ModeList) + #raise SyntaxError("Scribe: Chatlog format unrecognized.") + s = Scribe(chatlog, archivist) + s.parrot = parrot + return s + + def store(self, parrot): + self.archivist.store(self.chat.id, self.chat.dumps(), parrot) + + def checkType(self, t): + return t in self.chat.type + + def compareType(self, t): + return t == self.chat.type + + def setTitle(self, title): + self.chat.title = title + + def setFreq(self, freq): + if freq < self.countdown: + self.countdown = max(freq, 1) + return self.chat.set_freq(min(freq, self.archivist.maxFreq)) + + def setAnswer(self, afreq): + return self.chat.set_answer(afreq) + + def cid(self): + return str(self.chat.id) + + def count(self): + return self.chat.count + + def freq(self): + return self.chat.freq + + def title(self): + return self.chat.title + + def answer(self): + return self.chat.answer + + def type(self): + return self.chat.type + + def isRestricted(self): + return self.chat.restricted + + def restrict(self): + self.chat.restricted = (not self.chat.restricted) + + def isSilenced(self): + return self.chat.silenced + + def silence(self): + self.chat.silenced = (not self.chat.silenced) + + def isAnswering(self): + rand = random.random() + chance = self.answer() + if chance == 1: + return True + elif chance == 0: + return False + return rand <= chance + + def addPage(self, mid, content): + page = Page(mid, content) + self.pages.append(page) + + def getReference(self): + page = random.choice(self.pages) + return page.id + + def resetCountdown(self): + self.countdown = self.chat.freq + + def learn(self, message): + mid = str(message.message_id) + + if message.text is not None: + self.read(mid, message.text) + elif message.sticker is not None: + self.learnDrawing(mid, Scribe.StickerTag, message.sticker.file_id) + elif message.animation is not None: + self.learnDrawing(mid, Scribe.AnimTag, message.animation.file_id) + elif message.video is not None: + self.learnDrawing(mid, Scribe.VideoTag, message.video.file_id) + self.chat.count += 1 + + def learnDrawing(self, mid, tag, drawing): + self.read(mid, tag + " " + drawing) + + def read(self, mid, text): + if "velasco" in text.casefold() and len(text.split()) <= 3: + return + words = [Markov.Head] + text = text + " " + Markov.Tail + words.extend(rewrite(text)) + self.addPage(mid, words) + + def teachParrot(self, parrot): + for page in self.pages: + parrot.learn_words(page.content) + self.pages = [] + +""" + def learnFrom(self, scribe): + self.chat.count += scribe.chat.count + self.parrot.cross(scribe.parrot) +""" diff --git a/speaker.py b/speaker.py new file mode 100644 index 0000000..253038e --- /dev/null +++ b/speaker.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 + +import random +from scribe import Scribe +from telegram.error import * + +def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs): + kwargs["parse_mode"] = format + kwargs["reply_to_message_id"] = replying + + if text.startswith(Scribe.TagPrefix): + words = text.split(maxsplit=1) + if logger: + logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) + + if words[0] == Scribe.StickerTag: + return bot.send_sticker(cid, words[1], **kwargs) + elif words[0] == Scribe.AnimTag: + return bot.send_animation(cid, words[1], **kwargs) + elif words[0] == Scribe.VideoTag: + return bot.send_video(cid, words[1], **kwargs) + else: + text + if logger: + mtype = "reply" if replying else "message" + logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) + return bot.send_message(cid, text, **kwargs) + +def getTitle(chat): + if chat.title: + return chat.title + else: + last = chat.last_name if chat.last_name else "" + first = chat.first_name if chat.first_name else "" + name = " ".join([first, last]).strip() + if len(name) == 0: + return "Unknown" + else: + return name + +class Speaker(object): + ModeFixed = "FIXED_MODE" + ModeChance = "MODE_CHANCE" + + def __init__(self, name, username, archivist, logger, + reply=0.1, repeat=0.05, wakeup=False, mode=ModeFixed + ): + self.name = name + self.username = username + self.archivist = archivist + self.scriptorium = archivist.wakeScriptorium() + logger.info("----") + logger.info("Finished loading.") + logger.info("Loaded {} chats.".format(len(self.scriptorium))) + logger.info("----") + self.wakeup = wakeup + self.logger = logger + self.reply = reply + self.repeat = repeat + self.filterCids = archivist.filterCids + self.bypass=archivist.bypass + + def announce(self, announcement, check=(lambda _: True)): + for scribe in self.scriptorium: + try: + if check(scribe): + send(bot, scribe.cid(), announcement) + logger.info("Waking up on chat {}".format(scribe.cid())) + except: + pass + + def wake(self, bot, wake): + if self.wakeup: + def check(scribe): + return scribe.checkType("group") + self.announce(wake, check) + + def getScribe(self, chat): + cid = str(chat.id) + if not cid in self.scriptorium: + scribe = Scribe.FromChat(chat, self.archivist) + self.scriptorium[cid] = scribe + return scribe + else: + return self.scriptorium[cid] + + def shouldReply(self, message, scribe): + if not self.bypass and scribe.isRestricted(): + user = message.chat.get_member(message.from_user.id) + if not self.userIsAdmin(user): + # update.message.reply_text("You do not have permissions to do that.") + return False + replied = message.reply_to_message + text = message.text.casefold() if message.text else "" + return ( ((replied is not None) and (replied.from_user.name == self.username)) or + (self.username in text) or + (self.name in text and "@{}".format(self.name) not in text) + ) + + def store(self, scribe): + if self.parrot is None: + raise ValueError("Tried to store a Parrot that is None.") + else: + scribe.store(self.parrot.dumps()) + + def read(self, bot, update): + chat = update.message.chat + scribe = self.getScribe(chat) + scribe.learn(update.message) + + if self.shouldReply(update.message, scribe) and scribe.isAnswering(): + self.say(bot, scribe, replying=update.message.message_id) + return + + title = getTitle(update.message.chat) + if title != scribe.title(): + scribe.setTitle(title) + + scribe.countdown -= 1 + if scribe.countdown < 0: + scribe.resetCountdown() + rid = scribe.getReference() if random.random() <= self.reply else None + self.say(bot, scribe, replying=rid) + elif (scribe.freq() - scribe.countdown) % self.archivist.saveCount == 0: + self.parrot = self.archivist.wakeParrot(scribe.cid()) + scribe.teachParrot(self.parrot) + self.store(scribe) + + def speak(self, bot, update): + chat = (update.message.chat) + scribe = self.getScribe(chat) + + if not self.bypass and scribe.isRestricted(): + user = update.message.chat.get_member(update.message.from_user.id) + if not self.userIsAdmin(user): + # update.message.reply_text("You do not have permissions to do that.") + return + + mid = str(update.message.message_id) + replied = update.message.reply_to_message + rid = replied.message_id if replied else mid + words = update.message.text.split() + if len(words) > 1: + scribe.learn(' '.join(words[1:])) + self.say(bot, scribe, replying=rid) + + def userIsAdmin(self, member): + self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name)) + # self.logger.info("Bot Creator ID is {}".format(str(self.archivist.admin))) + return ((member.status == 'creator') or + (member.status == 'administrator') or + (member.user.id == self.archivist.admin)) + + def speech(self, scribe): + return self.parrot.generate_markov_text(size=self.archivist.maxLen, silence=scribe.isSilenced()) + + def say(self, bot, scribe, replying=None, **kwargs): + if self.filterCids is not None and not scribe.cid() in self.filterCids: + return + + self.parrot = self.archivist.wakeParrot(scribe.cid()) + scribe.teachParrot(self.parrot) + scribe.store(self.parrot) + try: + send(bot, scribe.cid(), self.speech(scribe), replying, logger=self.logger, **kwargs) + if self.bypass: + maxFreq = self.archivist.maxFreq + scribe.setFreq(random.randint(maxFreq//4, maxFreq)) + if random.random() <= self.repeat: + send(bot, scribe.cid(), self.speech(scribe), logger=self.logger, **kwargs) + except TimedOut: + scribe.setFreq(scribe.freq() + self.archivist.freqIncrement) + self.logger.warning("Increased period for chat {} [{}]".format(scribe.title(), scribe.cid())) + except Exception as e: + self.logger.error("Sending a message caused error:") + self.logger.error(e) + + def getCount(self, bot, update): + cid = str(update.message.chat.id) + scribe = self.scriptorium[cid] + num = str(scribe.count()) if self.scriptorium[cid] else "no" + update.message.reply_text("I remember {} messages.".format(num)) + + def getChats(self, bot, update): + lines = ["[{}]: {}".format(cid, self.scriptorium[cid].title()) for cid in self.scriptorium] + list = "\n".join(lines) + update.message.reply_text( "\n\n".join(["I have the following chats:", list]) ) + + def freq(self, bot, update): + chat = update.message.chat + scribe = self.getScribe(chat) + + words = update.message.text.split() + if len(words) <= 1: + update.message.reply_text("The current speech period is {}".format(scribe.freq())) + return + + if scribe.isRestricted(): + user = update.message.chat.get_member(update.message.from_user.id) + if not self.userIsAdmin(user): + update.message.reply_text("You do not have permissions to do that.") + return + try: + freq = int(words[1]) + freq = scribe.setFreq(freq) + update.message.reply_text("Period of speaking set to {}.".format(freq)) + scribe.store() + except: + update.message.reply_text("Format was confusing; period unchanged from {}.".format(scribe.freq())) + + def answer(self, bot, update): + chat = update.message.chat + scribe = self.getScribe(chat) + + words = update.message.text.split() + if len(words) <= 1: + update.message.reply_text("The current answer probability is {}".format(scribe.answer())) + return + + if scribe.isRestricted(): + user = update.message.chat.get_member(update.message.from_user.id) + if not self.userIsAdmin(user): + update.message.reply_text("You do not have permissions to do that.") + return + try: + afreq = int(words[1]) + afreq = scribe.setAnswer(afreq) + update.message.reply_text("Answer probability set to {}.".format(afreq)) + scribe.store() + except: + update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(scribe.answer())) + + def restrict(self, bot, update): + if "group" not in update.message.chat.type: + update.message.reply_text("That only works in groups.") + return + chat = update.message.chat + user = chat.get_member(update.message.from_user.id) + scribe = self.getScribe(chat) + if scribe.isRestricted(): + if not self.userIsAdmin(user): + update.message.reply_text("You do not have permissions to do that.") + return + scribe.restrict() + allowed = "let only admins" if scribe.isRestricted() else "let everyone" + update.message.reply_text("I will {} configure me now.".format(allowed)) + + def silence(self, bot, update): + if "group" not in update.message.chat.type: + update.message.reply_text("That only works in groups.") + return + chat = update.message.chat + user = chat.get_member(update.message.from_user.id) + scribe = self.getScribe(chat) + if scribe.isRestricted(): + if not self.userIsAdmin(user): + update.message.reply_text("You do not have permissions to do that.") + return + scribe.silence() + allowed = "avoid mentioning" if scribe.isSilenced() else "mention" + update.message.reply_text("I will {} people now.".format(allowed)) + + def who(self, bot, update): + msg = update.message + usr = msg.from_user + cht = msg.chat + chtname = cht.title if cht.title else cht.first_name + + answer = ("You're **{name}**, with username `{username}`, and " + "id `{uid}`.\nYou're messaging in the chat named __{cname}__," + " of type {ctype}, with id `{cid}`, and timestamp `{tstamp}`." + ).format(name=usr.full_name, username=usr.username, + uid=usr.id, cname=chtname, cid=cht.id, + ctype=scribe.type(), tstamp=str(msg.date)) + + msg.reply_markdown(answer) + + def where(self, bot, update): + print("THEY'RE ASKING WHERE") + msg = update.message + chat = msg.chat + scribe = self.getScribe(chat) + if scribe.isRestricted() and scribe.isSilenced(): + permissions = "restricted and silenced" + elif scribe.isRestricted(): + permissions = "restricted but not silenced" + elif scribe.isSilenced(): + permissions = "not restricted but silenced" + else: + permissions = "neither restricted nor silenced" + + answer = ("You're messaging in the chat of saved title __{cname}__," + " with id `{cid}`, message count {c}, period {p}, and answer " + "probability {a}.\n\nThis chat is {perm}." + ).format(cname=scribe.title(), cid=scribe.cid(), + c=scribe.count(), p=scribe.freq(), a=scribe.answer(), + perm=permissions) + + msg.reply_markdown(answer) diff --git a/velasco.py b/velasco.py old mode 100755 new mode 100644 index a111336..3e2ac06 --- a/velasco.py +++ b/velasco.py @@ -1,74 +1,37 @@ #!/usr/bin/env python3 -import sys, os +import logging, argparse from telegram.ext import Updater, CommandHandler, MessageHandler, Filters from telegram.error import * -from chatlog import * -import logging -import argparse -import random +from archivist import Archivist +from speaker import Speaker -# Enable logging -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s - velascobot', - level=logging.INFO) +coloredlogsError = None +try: + import coloredlogs +except ImportError as e: + coloredlogsError = e + +username = "velascobot" +speakerbot = None logger = logging.getLogger(__name__) -chatlogs = {} +# Enable logging +log_format="[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper()) -ADMIN_ID = 0 -WAKEUP = False -CHAT_INC = 5 -CHAT_SAVE = 15 -LOG_DIR = "chatlogs/" -LOG_EXT = ".txt" -REPL_CHANCE = 10/100 -REPT_CHANCE = 5/100 +if coloredlogsError: + logging.basicConfig(format=log_format, level=logging.INFO) + logger.warning("Unable to load coloredlogs:") + logger.warning(coloredlogsError) +else: + coloredlogs.install(level=logging.INFO, fmt=log_format) -def wake(bot): - directory = os.fsencode(LOG_DIR) +start_msg = "Hello there! Ask me for /help to see an overview of the available commands." - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.endswith(LOG_EXT): - chat = loadchat(LOG_DIR + filename) - if chat is not None: - chatlogs[chat.id] = chat - print("loaded chat " + chat.title + " [" + chat.id + "]") - continue - else: - continue +wake_msg = "Good morning. I just woke up" - for c in chatlogs: - try: - print("Waking up on chat {}.".format(c)) - if WAKEUP and "group" in chatlogs[c].type: - bot.sendMessage(c, "Good morning. I just woke up") - except: - pass - #del chatlogs[c] - -def start(bot, update): - update.message.reply_text("Hello there! Ask me for /help to see an overview of the available commands.") - -def savechat(chatlog): - open_file = open(LOG_DIR + chatlog.id + LOG_EXT, 'w') - open_file.write(chatlog.to_txt()) - open_file.close() - -def loadchat(path): - #print("Loading chat: " + path) - open_file = open(path, 'r') - chat = None - try: - chat = Chatlog.from_txt(open_file.read()) - except: - pass - open_file.close() - return chat - -def help(bot, update): - update.message.reply_text("""I answer to the following commands: +help_msg = """I answer to the following commands: /start - I say hi. /about - What I'm about. @@ -79,267 +42,29 @@ def help(bot, update): /speak - Forces me to speak. /answer - Change the probability to answer to a reply. (Decimal between 0 and 1). /restrict - Toggle restriction of configuration commands to admins only. - """) +/silence - Toggle restriction on mentions by the bot. +""" -def about(bot, update): - update.message.reply_text('I am yet another Markov Bot experiment. I read everything you type to me and then spit back nonsensical messages that look like yours\n\nYou can send /explain if you want further explanation') +about_msg = "I am yet another Markov Bot experiment. I read everything you type to me and then spit back nonsensical messages that look like yours.\n\nYou can send /explain if you want further explanation." -def explain(bot, update): - update.message.reply_text('I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send.') +explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send." -def echo(bot, update): - text = update.message.text.split(None, maxsplit=1) - if len(text) > 1: - text = text[1] - chatlog.add_msg(text) - update.message.reply_text(text) +def static_reply(text, format=None): + def reply(bot, update): + update.message.reply_text(text, parse_mode=format) + return reply def error(bot, update, error): - logger.warning('Update "%s" caused error "%s"' % (update, error)) - -def get_chatname(chat): - if chat.title is not None: - return chat.title - elif chat.first_name is not None: - if chat.last_name is not None: - return chat.first_name + " " + chat.last_name - else: - return chat.first_name - else: - return "" - -def read(bot, update): - global chatlogs - chat = update.message.chat - ident = str(chat.id) - if not ident in chatlogs: - title = get_chatname(chat) - chatlog = Chatlog(chat.id, chat.type, title) - else: - chatlog = chatlogs[ident] - - if update.message.text is not None: - chatlog.add_msg(update.message.text) - elif update.message.sticker is not None: - #print("I received a sticker") - chatlog.add_sticker(update.message.sticker.file_id) - elif update.message.animation is not None: - #print("I received an animation") - chatlog.add_animation(update.message.animation.file_id) - elif update.message.video is not None: - #print("I received a video") - chatlog.add_video(update.message.video.file_id) - # print("Read a message of id "update.message.message_id) - if chatlog.get_count()%chatlog.freq == 1: - chatlog.restart_replyables(update.message.message_id) - else: - chatlog.add_replyable(update.message.message_id) - - replied = update.message.reply_to_message - reply_text = update.message.text.casefold() if update.message.text else "" - to_reply = ((replied is not None) and (replied.from_user.name == "@velascobot")) or ("@velascobot" in reply_text) or ("velasco" in reply_text and "@velasco" not in reply_text) - - if to_reply and chatlog.answering(random.random()): - print("They're talking to me, I'm answering back") - msg = chatlog.speak() - send_message(bot, update, msg, update.message.message_id) - - if random.random() <= REPT_CHANCE: - msg = chatlog.speak() - send_message(bot, update, msg) - - elif chatlog.get_count()%chatlog.freq == 0: - msg = chatlog.speak() - try: - if random.random() <= REPL_CHANCE: - print("I made a reply") - send_message(bot, update, msg, chatlog.get_replyable()) - else: - print("I sent a message") - send_message(bot, update, msg) - if random.random() <= REPT_CHANCE: - print("And a followup") - msg = chatlog.speak() - send_message(bot, update, msg) - except TimedOut: - chatlog.set_freq(chatlog.freq + CHAT_INC) - print("Increased freq for chat " + chatlog.title + " [" + chatlog.id + "]") - if get_chatname(chat) != chatlog.title: - chatlog.set_title(get_chatname(chat)) - savechat(chatlog) - elif chatlog.freq > CHAT_SAVE and chatlog.get_count()%CHAT_SAVE == 0: - savechat(chatlog) - chatlogs[chatlog.id] = chatlog - -def speak(bot, update): - global chatlogs - ident = str(update.message.chat.id) - if not ident in chatlogs: - chat = update.message.chat - title = get_chatname(chat) - chatlog = Chatlog(chat.id, chat.type, title) - else: - chatlog = chatlogs[ident] - - if chatlogs[ident].is_restricted(): - user = update.message.chat.get_member(update.message.from_user.id) - if not user_is_admin(user): - return - - reply_to = update.message.reply_to_message.message_id if update.message.reply_to_message else update.message.message_id - text = update.message.text.split() - if len(text) > 1: - chatlog.add_msg(' '.join(text[1:])) - msg = chatlog.speak() - send_message(bot, update, msg, reply_to) - savechat(chatlog) - chatlogs[chatlog.id] = chatlog - -def send_message(bot, update, msg, reply_id=None): - words = msg.split(maxsplit=1) - if words[0] == STICKER_TAG: - if reply_id is not None: - update.message.reply_sticker(words[1]) - else: - bot.sendSticker(update.message.chat_id, words[1]) - - elif words[0] == ANIM_TAG: - if reply_id is not None: - update.message.reply_animation(words[1]) - else: - bot.sendAnimation(update.message.chat_id, words[1]) - - elif words[0] == VIDEO_TAG: - if reply_id is not None: - try: - update.message.reply_animation(words[1]) - except: - update.message.reply_video(words[1]) - else: - try: - bot.sendAnimation(update.message.chat_id, words[1]) - except: - bot.sendVideo(update.message.chat_id, words[1]) - - else: - if reply_id is not None: - bot.sendMessage(update.message.chat.id, msg, reply_to_message_id=reply_id) - else: - bot.sendMessage(update.message.chat.id, msg) - -def get_chatlogs(bot, update): - m = "I have these chatlogs:" - for c in chatlogs: - m += "\n" + chatlogs[c].id + " " + chatlogs[c].title - send_message(bot, update, msg, update.message.message_id) - -def get_id(bot, update): - update.message.reply_text("This chat's id is: " + str(update.message.chat.id)) - -def get_name(bot, update): - update.message.reply_text("Your name is: " + update.message.from_user.name) - -def get_count(bot, update): - ident = str(update.message.chat.id) - reply = "I remember " - if ident in chatlogs: - reply += str(chatlogs[ident].get_count()) - else: - reply += "no" - reply += " messages." - update.message.reply_text(reply) - -def user_is_admin(member): - global ADMIN_ID - print("user {} requesting a restricted action".format(str(member.user.id))) - print("Creator ID is {}".format(str(ADMIN_ID))) - return (member.status == 'creator') or (member.status == 'administrator') or (member.user.id == ADMIN_ID) - -def set_freq(bot, update): - ident = str(update.message.chat.id) - if not ident in chatlogs: - chat = update.message.chat - title = get_chatname(chat) - chatlog = Chatlog(chat.id, chat.type, title) - chatlogs[chatlog.id] = chatlog - - if not len(update.message.text.split()) > 1: - reply = "Current frequency is " + str(chatlogs[ident].freq) - else: - if chatlogs[ident].is_restricted(): - user = update.message.chat.get_member(update.message.from_user.id) - if not user_is_admin(user): - reply = "You do not have permissions to do that." - update.message.reply_text(reply) - return - try: - value = update.message.text.split()[1] - value = int(value) - value = chatlogs[ident].set_freq(value) - reply = "Frequency of speaking set to " + str(value) - savechat(chatlogs[ident]) - except: - reply = "Format was confusing; frequency not changed from " + str(chatlogs[ident].freq) - update.message.reply_text(reply) - -def set_answer_freq(bot, update): - ident = str(update.message.chat.id) - if not ident in chatlogs: - chat = update.message.chat - title = get_chatname(chat) - chatlog = Chatlog(chat.id, chat.type, title) - chatlogs[chatlog.id] = chatlog - - if not len(update.message.text.split()) > 1: - reply = "Current answer probability is " + str(chatlogs[ident].answer) - else: - if chatlogs[ident].is_restricted(): - user = chat.get_member(update.message.from_user.id) - if not user_is_admin(user): - reply = "You do not have permissions to do that." - update.message.reply_text(reply) - return - try: - value = update.message.text.split()[1] - value = float(value) - value = chatlogs[ident].set_answer_freq(value) - reply = "Probability of answering set to " + str(value) - savechat(chatlogs[ident]) - except: - reply = "Format was confusing; answer probability not changed from " + str(chatlogs[ident].answer) - update.message.reply_text(reply) - -def restrict(bot, update): - if "group" not in update.message.chat.type: - update.message.reply_text("That only works in groups.") - return - ident = str(update.message.chat.id) - if not ident in chatlogs: - chat = update.message.chat - title = get_chatname(chat) - chatlog = Chatlog(chat.id, chat.type, title) - chatlogs[chatlog.id] = chatlog - else: - chatlog = chatlogs[ident] - if chatlog.is_restricted(): - user = update.message.chat.get_member(update.message.from_user.id) - if not user_is_admin(user): - reply = "You do not have permissions to do that." - update.message.reply_text(reply) - return - chatlogs[ident].toggle_restrict() - reply = (chatlogs[ident].is_restricted() and "I will only let admins " or "I will let everyone ") + "configure me now." - update.message.reply_text(reply) + logger.warning('Update "{}" caused error "{}"'.format(update, error)) def stop(bot, update): - global ADMIN_ID - chatlog = chatlogs[update.message.chat.id] + scribe = speakerbot.getScribe(update.message.chat.id) #del chatlogs[chatlog.id] #os.remove(LOG_DIR + chatlog.id + LOG_EXT) - print("I got blocked by user " + chatlog.id) + logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid())) def main(): - global ADMIN_ID, WAKEUP + global speakerbot parser = argparse.ArgumentParser(description='A Telegram markov bot.') parser.add_argument('token', metavar='TOKEN', help='The Bot Token to work with the Telegram Bot API') parser.add_argument('admin_id', metavar='ADMIN_ID', type=int, help='The ID of the Telegram user that manages this bot') @@ -349,40 +74,49 @@ def main(): # Create the EventHandler and pass it your bot's token. updater = Updater(args.token) - ADMIN_ID = args.admin_id - if args.wakeup: - WAKEUP = True + + #filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)] + filterCids=None + + archivist = Archivist(logger, + chatdir="chatlogs/", + chatext=".vls", + admin=args.admin_id, + filterCids=filterCids, + readOnly=True + ) + + speakerbot = Speaker("velasco", "@" + username, archivist, logger, wakeup=args.wakeup) # Get the dispatcher to register handlers dp = updater.dispatcher # on different commands - answer in Telegram - dp.add_handler(CommandHandler("start", start)) - dp.add_handler(CommandHandler("about", about)) - dp.add_handler(CommandHandler("explain", explain)) - dp.add_handler(CommandHandler("help", help)) - dp.add_handler(CommandHandler("count", get_count)) - dp.add_handler(CommandHandler("freq", set_freq)) - dp.add_handler(CommandHandler("list", get_chatlogs, Filters.chat(args.admin_id))) - dp.add_handler(CommandHandler("user", get_name, Filters.chat(args.admin_id))) - dp.add_handler(CommandHandler("id", get_id)) + dp.add_handler(CommandHandler("start", static_reply(start_msg) )) + dp.add_handler(CommandHandler("about", static_reply(about_msg) )) + dp.add_handler(CommandHandler("explain", static_reply(explanation) )) + dp.add_handler(CommandHandler("help", static_reply(help_msg) )) + dp.add_handler(CommandHandler("count", speakerbot.getCount)) + dp.add_handler(CommandHandler("period", speakerbot.freq)) + dp.add_handler(CommandHandler("list", speakerbot.getChats, Filters.chat(chat_id=archivist.admin))) + #dp.add_handler(CommandHandler("user", get_name, Filters.chat(chat_id=archivist.admin))) + #dp.add_handler(CommandHandler("id", get_id)) dp.add_handler(CommandHandler("stop", stop)) - dp.add_handler(CommandHandler("speak", speak)) - dp.add_handler(CommandHandler("answer", set_answer_freq)) - dp.add_handler(CommandHandler("restrict", restrict)) + dp.add_handler(CommandHandler("speak", speakerbot.speak)) + dp.add_handler(CommandHandler("answer", speakerbot.answer)) + dp.add_handler(CommandHandler("restrict", speakerbot.restrict)) + dp.add_handler(CommandHandler("silence", speakerbot.silence)) + dp.add_handler(CommandHandler("who", speakerbot.who)) + dp.add_handler(CommandHandler("where", speakerbot.where)) # on noncommand i.e message - echo the message on Telegram # dp.add_handler(MessageHandler(Filters.text, echo)) - dp.add_handler(MessageHandler((Filters.text | Filters.sticker | Filters.animation), read)) + dp.add_handler(MessageHandler((Filters.text | Filters.sticker | Filters.animation), speakerbot.read)) # log all errors dp.add_error_handler(error) - wake(updater.bot) - - print("-----") - print("Finished loading.") - print("-----") + speakerbot.wake(updater.bot, wake_msg) # Start the Bot updater.start_polling()