diff --git a/.gitignore b/.gitignore index 3bc950a..4ccade3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ chatlogs/* __pycache__/* misc/* +test/* diff --git a/archivist.py b/archivist.py index 65dc5cb..21637a1 100644 --- a/archivist.py +++ b/archivist.py @@ -1,14 +1,15 @@ import os, errno, random, pickle -from scribe import Scribe -from markov import Markov +from chatreader import ChatReader as Reader +from generator import Generator + class Archivist(object): def __init__(self, logger, chatdir=None, chatext=None, admin=0, - freqIncrement=5, saveCount=15, maxFreq=100000, maxLen=50, - readOnly=False, filterCids=None, bypass=False - ): + freq_increment=5, save_count=15, max_period=100000, max_len=50, + read_only=False, filter_cids=None, bypass=False + ): if chatdir is None or len(chatdir) == 0: raise ValueError("Chatlog directory name is empty") elif chatext is None: # Can be len(chatext) == 0 @@ -17,43 +18,46 @@ class Archivist(object): self.chatdir = chatdir self.chatext = chatext self.admin = admin - self.freqIncrement = freqIncrement - self.saveCount = saveCount - self.maxFreq = maxFreq - self.maxLen = maxLen - self.readOnly = readOnly - self.filterCids = filterCids + self.freq_increment = freq_increment + self.save_count = save_count + self.max_period = max_period + self.max_len = max_len + self.read_only = read_only + self.filter_cids = filter_cids self.bypass = bypass - self.scribeFolder = chatdir + "chat_{tag}" - self.scribePath = chatdir + "chat_{tag}/{file}{ext}" + + def chat_folder(self, *formatting, **key_format): + return (self.chatdir + "chat_{tag}").format(*formatting, **key_format) + + def chat_file(self, *formatting, **key_format): + return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format) def store(self, tag, log, gen): - scribefolder = self.scribeFolder.format(tag=tag) - cardfile = self.scribePath.format(tag=tag, file="card", ext=".txt") - if self.readOnly: + chat_folder = self.chat_folder(tag=tag) + chat_card = self.chat_file(tag=tag, file="card", ext=".txt") + if self.read_only: return try: - if not os.path.exists(scribefolder): - os.makedirs(scribefolder, exist_ok=True) - self.logger.info("Storing a new chat. Folder {} created.".format(scribefolder)) + if not os.path.exists(chat_folder): + os.makedirs(chat_folder, exist_ok=True) + self.logger.info("Storing a new chat. Folder {} created.".format(chat_folder)) except: - self.logger.error("Failed creating {} folder.".format(scribefolder)) + self.logger.error("Failed creating {} folder.".format(chat_folder)) return - file = open(cardfile, 'w') + file = open(chat_card, 'w') file.write(log) file.close() if gen is not None: - recordfile = self.scribePath.format(tag=tag, file="record", ext=self.chatext) - file = open(recordfile, 'w') + chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext) + file = open(chat_record, 'w') file.write(gen) file.close() - def recall(self, filename): - #print("Loading chat: " + path) + def get_reader(self, filename): file = open(self.chatdir + filename, 'rb') scribe = None try: - scribe = Scribe.Recall(pickle.load(file), self) + reader, vocab = Reader.FromFile(pickle.load(file), self) self.logger.info("Unpickled {}{}".format(self.chatdir, filename)) except pickle.UnpicklingError: file.close() @@ -68,27 +72,24 @@ class Archivist(object): file.close() return scribe - def wakeScribe(self, filepath): + def load_reader(self, filepath): file = open(filepath.format(filename="card", ext=".txt"), 'r') card = file.read() file.close() - return Scribe.FromFile(card, self) + return Reader.FromCard(card, self) def wakeParrot(self, tag): - filepath = self.scribePath.format(tag=tag, file="record", ext=self.chatext) + filepath = self.chat_file(tag=tag, file="record", ext=self.chatext) try: file = open(filepath, 'r') - #print("\nOPening " + filepath + "\n") record = file.read() file.close() - return Markov.loads(record) + return Generator.loads(record) except: - self.logger.error("Parrot file {} not found.".format(filepath)) + self.logger.error("Record file {} not found.".format(filepath)) return None - def wakeScriptorium(self): - scriptorium = {} - + def readers_pass(self): directory = os.fsencode(self.chatdir) for subdir in os.scandir(directory): dirname = subdir.name.decode("utf-8") @@ -96,17 +97,16 @@ class Archivist(object): cid = dirname[5:] try: filepath = self.chatdir + dirname + "/{filename}{ext}" - scriptorium[cid] = self.wakeScribe(filepath) - self.logger.info("Chat {} contents:\n".format(cid) + scriptorium[cid].chat.dumps()) + reader = self.load_reader(filepath) + self.logger.info("Chat {} contents:\n".format(cid) + reader.card.dumps()) if self.bypass: - scriptorium[cid].setFreq(random.randint(self.maxFreq//2, self.maxFreq)) - elif scriptorium[cid].freq() > self.maxFreq: - scriptorium[cid].setFreq(self.maxFreq) + reader.set_period(random.randint(self.max_period//2, self.max_period)) + elif scriptorium[cid].freq() > self.max_period: + scriptorium[cid].setFreq(self.max_period) except Exception as e: self.logger.error("Failed reading {}".format(dirname)) self.logger.exception(e) raise e - return scriptorium """ def wake_old(self): @@ -117,17 +117,17 @@ class Archivist(object): filename = os.fsdecode(file) if filename.endswith(self.chatext): cid = filename[:-(len(self.chatext))] - if self.filterCids is not None: + if self.filter_cids is not None: #self.logger.info("CID " + cid) - if not cid in self.filterCids: + if not cid in self.filter_cids: continue scriptorium[cid] = self.recall(filename) scribe = scriptorium[cid] if scribe is not None: if self.bypass: - scribe.setFreq(random.randint(self.maxFreq//2, self.maxFreq)) - elif scribe.freq() > self.maxFreq: - scribe.setFreq(self.maxFreq) + scribe.setFreq(random.randint(self.max_period//2, self.max_period)) + elif scribe.freq() > self.max_period: + scribe.setFreq(self.max_period) self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]" "\n" + "\n".join(scribe.chat.dumps())) else: diff --git a/brain.py b/brain.py new file mode 100644 index 0000000..fb55324 --- /dev/null +++ b/brain.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +import random +from chatreader import ChatReader as Reader + diff --git a/chatcard.py b/chatcard.py new file mode 100644 index 0000000..4af559f --- /dev/null +++ b/chatcard.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 + +def parse_card_line(line): + # This reads a line in the format 'VARIABLE=value' and gives me the value. + # See ChatCard.loadl(...) for more details + s = line.split('=', 1) + if len(s) < 2: + return "" + else: + return s[1] + + +class ChatCard(object): + def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False): + self.id = str(cid) + # The Telegram chat's ID + self.type = ctype + # The type of chat + self.title = title + # The title of the chat + if period is None: + if "group" in ctype: + period = 10 + # Default period for groups and supergroups + else: + period = 2 + # Default period for private or channel chats + self.count = count + # The number of messages read + self.period = period + # This chat's configured period + self.answer = answer + # This chat's configured answer probability + self.restricted = restricted + # Wether some interactions are restricted to admins only + self.silenced = silenced + # Wether messages should silence user mentions + + def set_period(self, period): + if period < 1: + raise ValueError('Tried to set period a value less than 1.') + else: + self.period = period + return self.period + + def set_answer(self, prob): + if prob > 1: + raise ValueError('Tried to set answer probability higher than 1.') + elif prob < 0: + raise ValueError('Tried to set answer probability lower than 0.') + else: + self.answer = prob + return self.answer + + def dumps(self): + lines = ["CARD=v5"] + lines.append("CHAT_ID=" + self.id) + lines.append("CHAT_TYPE=" + self.type) + lines.append("CHAT_NAME=" + self.title) + lines.append("WORD_COUNT=" + str(self.count)) + lines.append("MESSAGE_PERIOD=" + str(self.period)) + lines.append("ANSWER_PROB=" + str(self.answer)) + lines.append("RESTRICTED=" + str(self.restricted)) + lines.append("SILENCED=" + str(self.silenced)) + # lines.append("WORD_DICT=") + return ('\n'.join(lines)) + '\n' + + def loads(text): + lines = text.splitlines() + return ChatCard.loadl(lines) + + def loadl(lines): + # In a perfect world, I would get both the variable name and its corresponding value + # from each side of the lines, but I know the order in which the lines are writen in + # the file, I hardcoded it. So I can afford also hardcoding reading it back in the + # same order, and nobody can stop me + version = parse_card_line(lines[0]).strip() + version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO") + if version == "v4" or version == "v5": + return ChatCard(cid=parse_card_line(lines[1]), + ctype=parse_card_line(lines[2]), + title=parse_card_line(lines[3]), + count=int(parse_card_line(lines[4])), + period=int(parse_card_line(lines[5])), + answer=float(parse_card_line(lines[6])), + restricted=(parse_card_line(lines[7]) == 'True'), + silenced=(parse_card_line(lines[8]) == 'True') + ) + elif version == "v3": + return ChatCard(cid=parse_card_line(lines[1]), + ctype=parse_card_line(lines[2]), + title=parse_card_line(lines[3]), + count=int(parse_card_line(lines[7])), + period=int(parse_card_line(lines[4])), + answer=float(parse_card_line(lines[5])), + restricted=(parse_card_line(lines[6]) == 'True') + ) + elif version == "v2": + return ChatCard(cid=parse_card_line(lines[1]), + ctype=parse_card_line(lines[2]), + title=parse_card_line(lines[3]), + count=int(parse_card_line(lines[6])), + period=int(parse_card_line(lines[4])), + answer=float(parse_card_line(lines[5])) + ) + elif version == "dict:": + # At some point I decided to number the versions of each dictionary format, + # but this was not always the case. This is what you get if you try to read + # whatever there is in very old files where the version should be + return ChatCard(cid=lines[0], + ctype=lines[1], + title=lines[2], + count=int(lines[5]), + period=int(lines[3]) + ) + else: + # This is for the oldest of files + return ChatCard(cid=lines[0], + ctype=lines[1], + title=lines[2], + period=int(lines[3]) + ) diff --git a/chatlog.py b/chatlog.py deleted file mode 100644 index b398c12..0000000 --- a/chatlog.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 - -def parse(l): - s = l.split('=', 1) - if len(s) < 2: - return "" - else: - return s[1] - -class Chatlog(object): - def __init__(self, cid, ctype, title, count=0, freq=None, answer=0.5, restricted=False, silenced=False): - self.id = str(cid) - self.type = ctype - self.title = title - if freq is None: - if "group" in ctype: - freq = 10 - #elif ctype is "private": - else: - freq = 2 - self.count = count - self.freq = freq - self.answer = answer - self.restricted = restricted - self.silenced = silenced - - def add_msg(self, message): - self.gen.add_text(message) - self.count += 1 - - def set_freq(self, freq): - if freq < 1: - raise ValueError('Tried to set freq a value less than 1.') - else: - self.freq = freq - return self.freq - - def set_answer(self, afreq): - if afreq > 1: - raise ValueError('Tried to set answer probability higher than 1.') - elif afreq < 0: - raise ValueError('Tried to set answer probability lower than 0.') - else: - self.answer = afreq - return self.answer - - def dumps(self): - lines = ["LOG=v4"] - lines.append("CHAT_ID=" + self.id) - lines.append("CHAT_TYPE=" + self.type) - lines.append("CHAT_NAME=" + self.title) - lines.append("WORD_COUNT=" + str(self.count)) - lines.append("MESSAGE_FREQ=" + str(self.freq)) - lines.append("ANSWER_FREQ=" + str(self.answer)) - lines.append("RESTRICTED=" + str(self.restricted)) - lines.append("SILENCED=" + str(self.silenced)) - #lines.append("WORD_DICT=") - return '\n'.join(lines) - - def loads(text): - lines = text.splitlines() - return Chatlog.loadl(lines) - - def loadl(lines): - version = parse(lines[0]).strip() - version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO") - if version == "v4": - return Chatlog(cid=parse(lines[1]), - ctype=parse(lines[2]), - title=parse(lines[3]), - count=int(parse(lines[4])), - freq=int(parse(lines[5])), - answer=float(parse(lines[6])), - restricted=(parse(lines[7]) == 'True'), - silenced=(parse(lines[8]) == 'True') - ) - elif version == "v3": - return Chatlog(cid=parse(lines[1]), - ctype=parse(lines[2]), - title=parse(lines[3]), - count=int(parse(lines[7])), - freq=int(parse(lines[4])), - answer=float(parse(lines[5])), - restricted=(parse(lines[6]) == 'True') - ) - elif version == "v2": - return Chatlog(cid=parse(lines[1]), - ctype=parse(lines[2]), - title=parse(lines[3]), - count=int(parse(lines[6])), - freq=int(parse(lines[4])), - answer=float(parse(lines[5])) - ) - elif version == "dict:": - return Chatlog(cid=lines[0], - ctype=lines[1], - title=lines[2], - count=int(lines[5]), - freq=int(lines[3]) - ) - else: - return Chatlog(cid=lines[0], - ctype=lines[1], - title=lines[2], - freq=int(lines[3]) - ) diff --git a/chatreader.py b/chatreader.py new file mode 100644 index 0000000..beb486c --- /dev/null +++ b/chatreader.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 + +import random +from chatcard import ChatCard, parse_card_line +from generator import Generator + + +def get_chat_title(chat): + # This gives me the chat title, or the first and maybe last + # name of the user as fallback if it's a private chat + if chat.title is not None: + return chat.title + elif chat.first_name is not None: + if chat.last_name is not None: + return chat.first_name + " " + chat.last_name + else: + return chat.first_name + else: + return "" + + +class Memory(object): + def __init__(self, mid, content): + self.id = mid + self.content = content + + +class ChatReader(object): + TAG_PREFIX = "^IS_" + STICKER_TAG = "^IS_STICKER^" + ANIM_TAG = "^IS_ANIMATION^" + VIDEO_TAG = "^IS_VIDEO^" + + def __init__(self, chatcard, max_period, logger): + self.card = chatcard + self.max_period = max_period + self.short_term_mem = [] + self.countdown = self.card.period + self.logger = logger + + def FromChat(chat, max_period, logger, newchat=False): + # Create a new ChatReader from a Chat object + card = ChatCard(chat.id, chat.type, get_chat_title(chat)) + return ChatReader(card, max_period, logger) + + def FromData(data, max_period, logger): + # Create a new ChatReader from a whole Chat history (WIP) + return None + + def FromCard(card, max_period, logger): + # Create a new ChatReader from a card's file dump + chatcard = ChatCard.loads(card) + return ChatReader(chatcard, max_period, logger) + + def FromFile(text, max_period, logger): + # Load a ChatReader from a file's text string + lines = text.splitlines() + version = parse_card_line(lines[0]).strip() + version = version if len(version.strip()) > 1 else lines[4] + logger.info("Dictionary version: {} ({} lines)".format(version, len(lines))) + vocab = None + if version == "v4" or version == "v5": + return ChatReader.FromCard(text, max_period, logger) + # I stopped saving the chat metadata and the cache together + elif version == "v3": + card = ChatCard.loadl(lines[0:8]) + cache = '\n'.join(lines[9:]) + vocab = Generator.loads(cache) + elif version == "v2": + card = ChatCard.loadl(lines[0:7]) + cache = '\n'.join(lines[8:]) + vocab = Generator.loads(cache) + elif version == "dict:": + card = ChatCard.loadl(lines[0:6]) + cache = '\n'.join(lines[6:]) + vocab = Generator.loads(cache) + else: + card = ChatCard.loadl(lines[0:4]) + cache = lines[4:] + vocab = Generator(load=cache, mode=Generator.MODE_LIST) + # raise SyntaxError("ChatReader: ChatCard format unrecognized.") + s = ChatReader(card, max_period, logger) + return (s, vocab) + + def archive(self, vocab): + # Returns a nice lice little tuple package for the archivist to save to file. + # Also commits to long term memory any pending short term memories + self.commit_long_term(vocab) + return (self.card.id, self.card.dumps(), vocab) + + def check_type(self, t): + # Checks type. Returns "True" for "group" even if it's supergroup + return t in self.card.type + + def exactly_type(self, t): + # Hard check + return t == self.card.type + + def set_title(self, title): + self.card.title = title + + def set_period(self, period): + if period < self.countdown: + self.countdown = max(period, 1) + return self.card.set_period(min(period, self.max_period)) + + def set_answer(self, prob): + return self.card.set_answer(prob) + + def cid(self): + return str(self.card.id) + + def count(self): + return self.card.count + + def period(self): + return self.card.period + + def title(self): + return self.card.title + + def answer(self): + return self.card.answer + + def ctype(self): + return self.card.type + + def is_restricted(self): + return self.card.restricted + + def toggle_restrict(self): + self.card.restricted = (not self.card.restricted) + + def is_silenced(self): + return self.card.silenced + + def toggle_silence(self): + self.card.silenced = (not self.card.silenced) + + def is_answering(self): + rand = random.random() + chance = self.answer() + if chance == 1: + return True + elif chance == 0: + return False + return rand <= chance + + def add_memory(self, mid, content): + mem = Memory(mid, content) + self.short_term_mem.append(mem) + + def random_memory(self): + mem = random.choice(self.short_term_mem) + return mem.id + + def reset_countdown(self): + self.countdown = self.card.period + + def read(self, message): + mid = str(message.message_id) + + if message.text is not None: + self.read(mid, message.text) + elif message.sticker is not None: + self.learn_drawing(mid, ChatReader.STICKER_TAG, message.sticker.file_id) + elif message.animation is not None: + self.learn_drawing(mid, ChatReader.ANIM_TAG, message.animation.file_id) + elif message.video is not None: + self.learn_drawing(mid, ChatReader.VIDEO_TAG, message.video.file_id) + self.card.count += 1 + + def learn_drawing(self, mid, tag, drawing): + self.learn(mid, tag + " " + drawing) + + def learn(self, mid, text): + if "velasco" in text.casefold() and len(text.split()) <= 3: + return + self.add_memory(mid, text) + + def commit_long_term(self, vocab): + for mem in self.short_term_mem: + vocab.add(mem.content) + self.short_term_mem = [] + + """ + def learnFrom(self, scribe): + self.card.count += scribe.chat.count + self.vocab.cross(scribe.vocab) + """ diff --git a/generator.py b/generator.py new file mode 100644 index 0000000..17e5d45 --- /dev/null +++ b/generator.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +import random +import json + + +def rewrite(text): + # This splits strings into lists of words delimited by space. + # Other whitespaces are appended space characters so they are included + # as their own Markov chain element, so as not to pollude with + # "different" words that would only differ in having a whitespace + # attached or not + words = text.replace('\n', '\n ').split(' ') + i = 0 + while i < len(words): + w = words[i].strip(' \t') + if len(w) > 0: + words[i] = w + else: + del words[i] + i -= 1 + i += 1 + return words + + +def getkey(w1, w2): + # This gives a dictionary key from 2 words, ignoring case + key = (w1.strip().casefold(), w2.strip().casefold()) + return str(key) + + +def getwords(key): + # This turns a dictionary key back into 2 separate words + words = key.strip('()').split(', ') + for i in range(len(words)): + words[i].strip('\'') + return words + + +def triplets(wordlist): + # Generates triplets of words from the given data string. So if our string + # were "What a lovely day", we'd generate (What, a, lovely) and then + # (a, lovely, day). + if len(wordlist) < 3: + return + + for i in range(len(wordlist) - 2): + yield (wordlist[i], wordlist[i+1], wordlist[i+2]) + + +class Generator(object): + MODE_JSON = "MODE_JSON" + # This is to mark when we want to create a Generator object from a given JSON + + MODE_LIST = "MODE_LIST" + # This is to mark when we want to create a Generator object from a given list of words + + MODE_CHAT_DATA = "MODE_CHAT_DATA" + # This is to mark when we want to create a Generator object from Chat data (WIP) + + HEAD = "\n^MESSAGE_SEPARATOR^" + TAIL = "^MESSAGE_SEPARATOR^" + + def __init__(self, load=None, mode=None): + if mode is not None: + # We ain't creating a new Generator from scratch + if mode == Generator.MODE_JSON: + self.cache = json.loads(load) + elif mode == Generator.MODE_LIST: + self.cache = {} + self.load_list(load) + else: + self.cache = {} + # The cache is where we store our words + + def load_list(self, many): + # Takes a list of strings and adds them to the cache one by one + for one in many: + self.add(one) + + def dumps(self): + # Dumps the cache dictionary into a JSON-formatted string + return json.dumps(self.cache) + + def loads(dump): + # Loads the cache dictionary from a JSON-formatted string + if len(dump) == 0: + # faulty dump gives default Generator + return Generator() + # otherwise + return Generator(load=dump, mode=Generator.MODE_JSON) + + def add(self, text): + # This takes a string and stores it in the cache, preceding it + # with the HEAD that marks the beginning of a new message and + # following it with the TAIL that marks the end + words = [Generator.HEAD] + text = text + " " + Generator.TAIL + words.extend(text.split()) + self.database(rewrite(text)) + + def database(self, words): + # This takes a list of words and stores it in the cache, adding + # a special entry for the first word (the HEAD marker) + for w1, w2, w3 in triplets(words): + if w1 == Generator.HEAD: + if w1 in self.cache: + self.cache[Generator.HEAD].append(w2) + else: + self.cache[Generator.HEAD] = [w2] + key = getkey(w1, w2) + if key in self.cache: + # if the key exists, add the new word to the end of the chain + self.cache[key].append(w3) + else: + # otherwise, create a new entry for the new key starting with + # the new end of chain + self.cache[key] = [w3] + + def generate(self, size=50, silence=False): + # This generates the Markov text/word chain + # silence tells if mentions should be silenced + if len(self.cache) == 0: + # If there is nothing in the cache we cannot generate anything + return "" + + w1 = random.choice(self.cache[Generator.HEAD]) + w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)]) + # Start with a message HEAD and a random message starting word + gen_words = [] + for i in range(size): + # As long as we don't go over the size value (max. message length)... + if silence and w1.startswith("@") and len(w1) > 1: + gen_words.append(w1.replace("@", "(@)")) + # ...append the first word, silencing any possible username mention + else: + gen_words.append(w1) + # ..append the first word + if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache: + # When there's no key from the last 2 words to follow the chain, + # or we reached a separation between messages, stop + break + else: + w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) + # Make the second word to be the new first word, and + # make a new random word that follows the chain to be + # the new second word + return ' '.join(gen_words) + + def cross(self, gen): + # cross 2 Generators into this one + for key in gen.cache: + if key in self.cache: + self.cache[key].extend(gen.cache[key]) + else: + self.cache[key] = list(gen.cache[key]) + + def new_count(self): + # Count again the number of messages if the current number is unreliable + count = 0 + for key in self.cache: + for word in self.cache[key]: + if word == Generator.TAIL: + count += 1 + # by just counting message separators + return count diff --git a/markov.py b/markov.py deleted file mode 100644 index bf1c3ce..0000000 --- a/markov.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 - -import random -import json - -def getkey(w1, w2): - key = (w1.strip().casefold(), w2.strip().casefold()) - return str(key) - -def getwords(key): - words = key.strip('()').split(', ') - for i in range(len(words)): - words[i].strip('\'') - return words - -def triples(wordlist): - # Generates triples from the given data string. So if our string were - # "What a lovely day", we'd generate (What, a, lovely) and then - # (a, lovely, day). - if len(wordlist) < 3: - return - - for i in range(len(wordlist) - 2): - yield (wordlist[i], wordlist[i+1], wordlist[i+2]) - -class Markov(object): - ModeJson = "MODE_JSON" - ModeList = "MODE_LIST" - ModeChatData = "MODE_CHAT_DATA" - - Head = "\n^MESSAGE_SEPARATOR^" - Tail = "^MESSAGE_SEPARATOR^" - - def __init__(self, load=None, mode=None): - if mode is not None: - if mode == Markov.ModeJson: - self.cache = json.loads(load) - elif mode == Markov.ModeList: - self.cache = {} - self.loadList(load) - else: - self.cache = {} - - def loadList(self, lines): - for line in lines: - words = [Markov.Head] - words.extend(line.split()) - self.learn_words(words) - - def dumps(self): - return json.dumps(self.cache) - - def loads(dump): - if len(dump) == 0: - return Markov() - return Markov(load=dump, mode=Markov.ModeJson) - - def learn_words(self, words): - self.database(words) - - def database(self, wordlist): - for w1, w2, w3 in triples(wordlist): - if w1 == Markov.Head: - if w1 in self.cache: - self.cache[Markov.Head].append(w2) - else: - self.cache[Markov.Head] = [w2] - key = getkey(w1, w2) - if key in self.cache: - self.cache[key].append(w3) - else: - self.cache[key] = [w3] - - def generate_markov_text(self, size=50, silence=False): - if len(self.cache) == 0: - return "" - w1 = random.choice(self.cache[Markov.Head]) - w2 = random.choice(self.cache[getkey(Markov.Head, w1)]) - gen_words = [] - for i in range(size): - if silence and w1.startswith("@") and len(w1) > 1: - gen_words.append(w1.replace("@", "(@)")) - else: - gen_words.append(w1) - if w2 == Markov.Tail or not getkey(w1, w2) in self.cache: - # print("Generated text") - break - else: - w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)]) - return ' '.join(gen_words) - - def cross(self, gen): - for key in gen.cache: - if key in self.cache: - self.cache[key].extend(d[key]) - else: - self.cache[key] = list(d[key]) - - def new_count(self): - count = 0 - for key in self.cache: - for word in self.cache[key]: - if word == Markov.Tail: - count += 1 - return count diff --git a/scribe.py b/scribe.py deleted file mode 100644 index 7bcedb4..0000000 --- a/scribe.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 - -import random -from chatlog import * -from markov import Markov - -def getTitle(chat): - if chat.title is not None: - return chat.title - elif chat.first_name is not None: - if chat.last_name is not None: - return chat.first_name + " " + chat.last_name - else: - return chat.first_name - else: - return "" - -def rewrite(text): - words = text.replace('\n', '\n ').split(' ') - i = 0 - while i < len(words): - w = words[i].strip(' \t') - if len(w) > 0: - words[i] = w - else: - del words[i] - i -= 1 - i += 1 - return words - -class Page(object): - def __init__(self, mid, content): - self.id = mid - self.content = content - -class Scribe(object): - TagPrefix = "^IS_" - StickerTag = "^IS_STICKER^" - AnimTag = "^IS_ANIMATION^" - VideoTag = "^IS_VIDEO^" - - def __init__(self, chatlog, archivist): - self.chat = chatlog - self.archivist = archivist - self.pages = [] - self.countdown = self.chat.freq - self.logger = self.archivist.logger - - def FromChat(chat, archivist, newchat=False): - chatlog = Chatlog(chat.id, chat.type, getTitle(chat)) - scribe = Scribe(chatlog, archivist) - return scribe - - def FromData(data, archivist): - return None - - def FromFile(log, archivist): - chatlog = Chatlog.loads(log) - return Scribe(chatlog, archivist) - - def Recall(text, archivist): - lines = text.splitlines() - version = parse(lines[0]).strip() - version = version if len(version.strip()) > 1 else lines[4] - archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) ) - if version == "v4": - chatlog = Chatlog.loadl(lines[0:9]) - cache = '\n'.join(lines[10:]) - parrot = Markov.loads(cache) - elif version == "v3": - chatlog = Chatlog.loadl(lines[0:8]) - cache = '\n'.join(lines[9:]) - parrot = Markov.loads(cache) - elif version == "v2": - chatlog = Chatlog.loadl(lines[0:7]) - cache = '\n'.join(lines[8:]) - parrot = Markov.loads(cache) - elif version == "dict:": - chatlog = Chatlog.loadl(lines[0:6]) - cache = '\n'.join(lines[6:]) - parrot = Markov.loads(cache) - else: - chatlog = Chatlog.loadl(lines[0:4]) - cache = lines[4:] - parrot = Markov(load=cache, mode=Markov.ModeList) - #raise SyntaxError("Scribe: Chatlog format unrecognized.") - s = Scribe(chatlog, archivist) - s.parrot = parrot - return s - - def store(self, parrot): - self.archivist.store(self.chat.id, self.chat.dumps(), parrot) - - def checkType(self, t): - return t in self.chat.type - - def compareType(self, t): - return t == self.chat.type - - def setTitle(self, title): - self.chat.title = title - - def setFreq(self, freq): - if freq < self.countdown: - self.countdown = max(freq, 1) - return self.chat.set_freq(min(freq, self.archivist.maxFreq)) - - def setAnswer(self, afreq): - return self.chat.set_answer(afreq) - - def cid(self): - return str(self.chat.id) - - def count(self): - return self.chat.count - - def freq(self): - return self.chat.freq - - def title(self): - return self.chat.title - - def answer(self): - return self.chat.answer - - def type(self): - return self.chat.type - - def isRestricted(self): - return self.chat.restricted - - def restrict(self): - self.chat.restricted = (not self.chat.restricted) - - def isSilenced(self): - return self.chat.silenced - - def silence(self): - self.chat.silenced = (not self.chat.silenced) - - def isAnswering(self): - rand = random.random() - chance = self.answer() - if chance == 1: - return True - elif chance == 0: - return False - return rand <= chance - - def addPage(self, mid, content): - page = Page(mid, content) - self.pages.append(page) - - def getReference(self): - page = random.choice(self.pages) - return page.id - - def resetCountdown(self): - self.countdown = self.chat.freq - - def learn(self, message): - mid = str(message.message_id) - - if message.text is not None: - self.read(mid, message.text) - elif message.sticker is not None: - self.learnDrawing(mid, Scribe.StickerTag, message.sticker.file_id) - elif message.animation is not None: - self.learnDrawing(mid, Scribe.AnimTag, message.animation.file_id) - elif message.video is not None: - self.learnDrawing(mid, Scribe.VideoTag, message.video.file_id) - self.chat.count += 1 - - def learnDrawing(self, mid, tag, drawing): - self.read(mid, tag + " " + drawing) - - def read(self, mid, text): - if "velasco" in text.casefold() and len(text.split()) <= 3: - return - words = [Markov.Head] - text = text + " " + Markov.Tail - words.extend(rewrite(text)) - self.addPage(mid, words) - - def teachParrot(self, parrot): - for page in self.pages: - parrot.learn_words(page.content) - self.pages = [] - -""" - def learnFrom(self, scribe): - self.chat.count += scribe.chat.count - self.parrot.cross(scribe.parrot) -""" diff --git a/speaker.py b/speaker.py index 465d50e..f07788c 100644 --- a/speaker.py +++ b/speaker.py @@ -1,24 +1,25 @@ #!/usr/bin/env python3 import random -from scribe import Scribe -from markov import Markov +from chatreader import ChatReader as Reader from telegram.error import * -def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs): - kwargs["parse_mode"] = format + +def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs): + kwargs["parse_mode"] = formatting kwargs["reply_to_message_id"] = replying - if text.startswith(Scribe.TagPrefix): + if text.startswith(Reader.TAG_PREFIX): words = text.split(maxsplit=1) if logger: logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) + # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID' - if words[0] == Scribe.StickerTag: + if words[0] == Reader.STICKER_TAG: return bot.send_sticker(cid, words[1], **kwargs) - elif words[0] == Scribe.AnimTag: + elif words[0] == Reader.ANIM_TAG: return bot.send_animation(cid, words[1], **kwargs) - elif words[0] == Scribe.VideoTag: + elif words[0] == Reader.VIDEO_TAG: return bot.send_video(cid, words[1], **kwargs) else: text @@ -27,17 +28,6 @@ def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs): logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) return bot.send_message(cid, text, **kwargs) -def getTitle(chat): - if chat.title: - return chat.title - else: - last = chat.last_name if chat.last_name else "" - first = chat.first_name if chat.first_name else "" - name = " ".join([first, last]).strip() - if len(name) == 0: - return "Unknown" - else: - return name class Speaker(object): ModeFixed = "FIXED_MODE" @@ -59,7 +49,7 @@ class Speaker(object): self.reply = reply self.repeat = repeat self.filterCids = archivist.filterCids - self.bypass=archivist.bypass + self.bypass = archivist.bypass def announce(self, announcement, check=(lambda _: True)): for scribe in self.scriptorium: @@ -79,7 +69,7 @@ class Speaker(object): def getScribe(self, chat): cid = str(chat.id) if not cid in self.scriptorium: - scribe = Scribe.FromChat(chat, self.archivist, newchat=True) + scribe = Reader.FromChat(chat, self.archivist, newchat=True) self.scriptorium[cid] = scribe return scribe else: diff --git a/velasco.py b/velasco.py index e512dab..162b748 100644 --- a/velasco.py +++ b/velasco.py @@ -18,7 +18,7 @@ speakerbot = None logger = logging.getLogger(__name__) # Enable logging -log_format="[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper()) +log_format = "[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper()) if coloredlogsError: logging.basicConfig(format=log_format, level=logging.INFO) @@ -49,20 +49,24 @@ about_msg = "I am yet another Markov Bot experiment. I read everything you type explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send." + def static_reply(text, format=None): def reply(bot, update): update.message.reply_text(text, parse_mode=format) return reply + def error(bot, update, error): logger.warning('Update "{}" caused error "{}"'.format(update, error)) + def stop(bot, update): scribe = speakerbot.getScribe(update.message.chat.id) #del chatlogs[chatlog.id] #os.remove(LOG_DIR + chatlog.id + LOG_EXT) logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid())) + def main(): global speakerbot parser = argparse.ArgumentParser(description='A Telegram markov bot.') @@ -76,7 +80,7 @@ def main(): updater = Updater(args.token) #filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)] - filterCids=None + filterCids = None archivist = Archivist(logger, chatdir="chatlogs/", @@ -84,7 +88,7 @@ def main(): admin=args.admin_id, filterCids=filterCids, readOnly=False - ) + ) speakerbot = Speaker("velasco", "@" + username, archivist, logger, wakeup=args.wakeup)