Overhaul 2 WIP

- Generator (Markov) ✔️ - ChatCard (Chatlog) ✔️ - ChatReader (Scribe) 🚧 - Speaker 🚧 - - Speaker->get_reader()... 🚧
2025-07-02 01:04:38 +02:00 · 2020-10-07 23:32:10 +02:00 · 2020-10-07 23:32:10 +02:00 · 328bd6adbf
commit 328bd6adbf
parent 950bbfbabd
11 changed files with 548 additions and 475 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 chatlogs/*
 __pycache__/*
 misc/*
 test/*
--- a/archivist.py
+++ b/archivist.py
@ -1,14 +1,15 @@
 import os, errno, random, pickle
-from scribe import Scribe
+from chatreader import ChatReader as Reader
-from markov import Markov
+from generator import Generator
 class Archivist(object):
    def __init__(self, logger, chatdir=None, chatext=None, admin=0,
-            freqIncrement=5, saveCount=15, maxFreq=100000, maxLen=50,
+                 freq_increment=5, save_count=15, max_period=100000, max_len=50,
-            readOnly=False, filterCids=None, bypass=False
+                 read_only=False, filter_cids=None, bypass=False
-        ):
+                 ):
        if chatdir is None or len(chatdir) == 0:
            raise ValueError("Chatlog directory name is empty")
        elif chatext is None: # Can be len(chatext) == 0
@ -17,43 +18,46 @@ class Archivist(object):
        self.chatdir = chatdir
        self.chatext = chatext
        self.admin = admin
-        self.freqIncrement = freqIncrement
+        self.freq_increment = freq_increment
-        self.saveCount = saveCount
+        self.save_count = save_count
-        self.maxFreq = maxFreq
+        self.max_period = max_period
-        self.maxLen = maxLen
+        self.max_len = max_len
-        self.readOnly = readOnly
+        self.read_only = read_only
-        self.filterCids = filterCids
+        self.filter_cids = filter_cids
        self.bypass = bypass
-        self.scribeFolder = chatdir + "chat_{tag}"
+    
-        self.scribePath = chatdir + "chat_{tag}/{file}{ext}"
+    def chat_folder(self, *formatting, **key_format):
        return (self.chatdir + "chat_{tag}").format(*formatting, **key_format)
    def chat_file(self, *formatting, **key_format):
        return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format)
    def store(self, tag, log, gen):
-        scribefolder = self.scribeFolder.format(tag=tag)
+        chat_folder = self.chat_folder(tag=tag)
-        cardfile = self.scribePath.format(tag=tag, file="card", ext=".txt")
+        chat_card = self.chat_file(tag=tag, file="card", ext=".txt")
-        if self.readOnly:
+        if self.read_only:
            return
        try:
-            if not os.path.exists(scribefolder):
+            if not os.path.exists(chat_folder):
-                os.makedirs(scribefolder, exist_ok=True)
+                os.makedirs(chat_folder, exist_ok=True)
-                self.logger.info("Storing a new chat. Folder {} created.".format(scribefolder))
+                self.logger.info("Storing a new chat. Folder {} created.".format(chat_folder))
        except:
-            self.logger.error("Failed creating {} folder.".format(scribefolder))
+            self.logger.error("Failed creating {} folder.".format(chat_folder))
            return
-        file = open(cardfile, 'w')
+        file = open(chat_card, 'w')
        file.write(log)
        file.close()
        if gen is not None:
-            recordfile = self.scribePath.format(tag=tag, file="record", ext=self.chatext)
+            chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext)
-            file = open(recordfile, 'w')
+            file = open(chat_record, 'w')
            file.write(gen)
            file.close()
-    def recall(self, filename):
+    def get_reader(self, filename):
        #print("Loading chat: " + path)
        file = open(self.chatdir + filename, 'rb')
        scribe = None
        try:
-            scribe = Scribe.Recall(pickle.load(file), self)
+            reader, vocab = Reader.FromFile(pickle.load(file), self)
            self.logger.info("Unpickled {}{}".format(self.chatdir, filename))
        except pickle.UnpicklingError:
            file.close()
@ -68,27 +72,24 @@ class Archivist(object):
        file.close()
        return scribe
-    def wakeScribe(self, filepath):
+    def load_reader(self, filepath):
        file = open(filepath.format(filename="card", ext=".txt"), 'r')
        card = file.read()
        file.close()
-        return Scribe.FromFile(card, self)
+        return Reader.FromCard(card, self)
    def wakeParrot(self, tag):
-        filepath = self.scribePath.format(tag=tag, file="record", ext=self.chatext)
+        filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
        try:
            file = open(filepath, 'r')
            #print("\nOPening " + filepath + "\n")
            record = file.read()
            file.close()
-            return Markov.loads(record)
+            return Generator.loads(record)
        except:
-            self.logger.error("Parrot file {} not found.".format(filepath))
+            self.logger.error("Record file {} not found.".format(filepath))
            return None
-    def wakeScriptorium(self):
+    def readers_pass(self):
        scriptorium = {}
        directory = os.fsencode(self.chatdir)
        for subdir in os.scandir(directory):
            dirname = subdir.name.decode("utf-8")
@ -96,17 +97,16 @@ class Archivist(object):
                cid = dirname[5:]
                try:
                    filepath = self.chatdir + dirname + "/{filename}{ext}"
-                    scriptorium[cid] = self.wakeScribe(filepath)
+                    reader = self.load_reader(filepath)
-                    self.logger.info("Chat {} contents:\n".format(cid) + scriptorium[cid].chat.dumps())
+                    self.logger.info("Chat {} contents:\n".format(cid) + reader.card.dumps())
                    if self.bypass:
-                        scriptorium[cid].setFreq(random.randint(self.maxFreq//2, self.maxFreq))
+                        reader.set_period(random.randint(self.max_period//2, self.max_period))
-                    elif scriptorium[cid].freq() > self.maxFreq:
+                    elif scriptorium[cid].freq() > self.max_period:
-                        scriptorium[cid].setFreq(self.maxFreq)
+                        scriptorium[cid].setFreq(self.max_period)
                except Exception as e:
                    self.logger.error("Failed reading {}".format(dirname))
                    self.logger.exception(e)
                    raise e
        return scriptorium
    """
    def wake_old(self):
@ -117,17 +117,17 @@ class Archivist(object):
            filename = os.fsdecode(file)
            if filename.endswith(self.chatext):
                cid = filename[:-(len(self.chatext))]
-                if self.filterCids is not None:
+                if self.filter_cids is not None:
                    #self.logger.info("CID " + cid)
-                    if not cid in self.filterCids:
+                    if not cid in self.filter_cids:
                        continue
                scriptorium[cid] = self.recall(filename)
                scribe = scriptorium[cid]
                if scribe is not None:
                    if self.bypass:
-                        scribe.setFreq(random.randint(self.maxFreq//2, self.maxFreq))
+                        scribe.setFreq(random.randint(self.max_period//2, self.max_period))
-                    elif scribe.freq() > self.maxFreq:
+                    elif scribe.freq() > self.max_period:
-                        scribe.setFreq(self.maxFreq)
+                        scribe.setFreq(self.max_period)
                    self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]"
                                     "\n" + "\n".join(scribe.chat.dumps()))
            else:
--- a/brain.py
+++ b/brain.py
@ -0,0 +1,5 @@
 #!/usr/bin/env python3
 import random
 from chatreader import ChatReader as Reader
--- a/chatcard.py
+++ b/chatcard.py
@ -0,0 +1,122 @@
 #!/usr/bin/env python3
 def parse_card_line(line):
    # This reads a line in the format 'VARIABLE=value' and gives me the value.
    # See ChatCard.loadl(...) for more details
    s = line.split('=', 1)
    if len(s) < 2:
        return ""
    else:
        return s[1]
 class ChatCard(object):
    def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False):
        self.id = str(cid)
        # The Telegram chat's ID
        self.type = ctype
        # The type of chat
        self.title = title
        # The title of the chat
        if period is None:
            if "group" in ctype:
                period = 10
                # Default period for groups and supergroups
            else:
                period = 2
                # Default period for private or channel chats
        self.count = count
        # The number of messages read
        self.period = period
        # This chat's configured period
        self.answer = answer
        # This chat's configured answer probability
        self.restricted = restricted
        # Wether some interactions are restricted to admins only
        self.silenced = silenced
        # Wether messages should silence user mentions
    def set_period(self, period):
        if period < 1:
            raise ValueError('Tried to set period a value less than 1.')
        else:
            self.period = period
        return self.period
    def set_answer(self, prob):
        if prob > 1:
            raise ValueError('Tried to set answer probability higher than 1.')
        elif prob < 0:
            raise ValueError('Tried to set answer probability lower than 0.')
        else:
            self.answer = prob
        return self.answer
    def dumps(self):
        lines = ["CARD=v5"]
        lines.append("CHAT_ID=" + self.id)
        lines.append("CHAT_TYPE=" + self.type)
        lines.append("CHAT_NAME=" + self.title)
        lines.append("WORD_COUNT=" + str(self.count))
        lines.append("MESSAGE_PERIOD=" + str(self.period))
        lines.append("ANSWER_PROB=" + str(self.answer))
        lines.append("RESTRICTED=" + str(self.restricted))
        lines.append("SILENCED=" + str(self.silenced))
        # lines.append("WORD_DICT=")
        return ('\n'.join(lines)) + '\n'
    def loads(text):
        lines = text.splitlines()
        return ChatCard.loadl(lines)
    def loadl(lines):
        # In a perfect world, I would get both the variable name and its corresponding value
        # from each side of the lines, but I know the order in which the lines are writen in
        # the file, I hardcoded it. So I can afford also hardcoding reading it back in the
        # same order, and nobody can stop me
        version = parse_card_line(lines[0]).strip()
        version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO")
        if version == "v4" or version == "v5":
            return ChatCard(cid=parse_card_line(lines[1]),
                            ctype=parse_card_line(lines[2]),
                            title=parse_card_line(lines[3]),
                            count=int(parse_card_line(lines[4])),
                            period=int(parse_card_line(lines[5])),
                            answer=float(parse_card_line(lines[6])),
                            restricted=(parse_card_line(lines[7]) == 'True'),
                            silenced=(parse_card_line(lines[8]) == 'True')
                            )
        elif version == "v3":
            return ChatCard(cid=parse_card_line(lines[1]),
                            ctype=parse_card_line(lines[2]),
                            title=parse_card_line(lines[3]),
                            count=int(parse_card_line(lines[7])),
                            period=int(parse_card_line(lines[4])),
                            answer=float(parse_card_line(lines[5])),
                            restricted=(parse_card_line(lines[6]) == 'True')
                            )
        elif version == "v2":
            return ChatCard(cid=parse_card_line(lines[1]),
                            ctype=parse_card_line(lines[2]),
                            title=parse_card_line(lines[3]),
                            count=int(parse_card_line(lines[6])),
                            period=int(parse_card_line(lines[4])),
                            answer=float(parse_card_line(lines[5]))
                            )
        elif version == "dict:":
            # At some point I decided to number the versions of each dictionary format,
            # but this was not always the case. This is what you get if you try to read
            # whatever there is in very old files where the version should be
            return ChatCard(cid=lines[0],
                            ctype=lines[1],
                            title=lines[2],
                            count=int(lines[5]),
                            period=int(lines[3])
                            )
        else:
            # This is for the oldest of files
            return ChatCard(cid=lines[0],
                            ctype=lines[1],
                            title=lines[2],
                            period=int(lines[3])
                            )
--- a/chatlog.py
+++ b/chatlog.py
@ -1,106 +0,0 @@
 #!/usr/bin/env python3
 def parse(l):
    s = l.split('=', 1)
    if len(s) < 2:
        return ""
    else:
        return s[1]
 class Chatlog(object):
    def __init__(self, cid, ctype, title, count=0, freq=None, answer=0.5, restricted=False, silenced=False):
        self.id = str(cid)
        self.type = ctype
        self.title = title
        if freq is None:
            if "group" in ctype:
                freq = 10
            #elif ctype is "private":
            else:
                freq = 2
        self.count = count
        self.freq = freq
        self.answer = answer
        self.restricted = restricted
        self.silenced = silenced
    def add_msg(self, message):
        self.gen.add_text(message)
        self.count += 1
    def set_freq(self, freq):
        if freq < 1:
            raise ValueError('Tried to set freq a value less than 1.')
        else:
            self.freq = freq
        return self.freq
    def set_answer(self, afreq):
        if afreq > 1:
            raise ValueError('Tried to set answer probability higher than 1.')
        elif afreq < 0:
            raise ValueError('Tried to set answer probability lower than 0.')
        else:
            self.answer = afreq
        return self.answer
    def dumps(self):
        lines = ["LOG=v4"]
        lines.append("CHAT_ID=" + self.id)
        lines.append("CHAT_TYPE=" + self.type)
        lines.append("CHAT_NAME=" + self.title)
        lines.append("WORD_COUNT=" + str(self.count))
        lines.append("MESSAGE_FREQ=" + str(self.freq))
        lines.append("ANSWER_FREQ=" + str(self.answer))
        lines.append("RESTRICTED=" + str(self.restricted))
        lines.append("SILENCED=" + str(self.silenced))
        #lines.append("WORD_DICT=")
        return '\n'.join(lines)
    def loads(text):
        lines = text.splitlines()
        return Chatlog.loadl(lines)
    def loadl(lines):
        version = parse(lines[0]).strip()
        version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO")
        if version == "v4":
            return Chatlog(cid=parse(lines[1]),
                           ctype=parse(lines[2]),
                           title=parse(lines[3]),
                           count=int(parse(lines[4])),
                           freq=int(parse(lines[5])),
                           answer=float(parse(lines[6])),
                           restricted=(parse(lines[7]) == 'True'),
                           silenced=(parse(lines[8]) == 'True')
                     )
        elif version == "v3":
            return Chatlog(cid=parse(lines[1]),
                           ctype=parse(lines[2]),
                           title=parse(lines[3]),
                           count=int(parse(lines[7])),
                           freq=int(parse(lines[4])),
                           answer=float(parse(lines[5])),
                           restricted=(parse(lines[6]) == 'True')
                      )
        elif version == "v2":
            return Chatlog(cid=parse(lines[1]),
                           ctype=parse(lines[2]),
                           title=parse(lines[3]),
                           count=int(parse(lines[6])),
                           freq=int(parse(lines[4])),
                           answer=float(parse(lines[5]))
                      )
        elif version == "dict:":
            return Chatlog(cid=lines[0],
                           ctype=lines[1],
                           title=lines[2],
                           count=int(lines[5]),
                           freq=int(lines[3])
                      )
        else:
            return Chatlog(cid=lines[0],
                           ctype=lines[1],
                           title=lines[2],
                           freq=int(lines[3])
                      )
--- a/chatreader.py
+++ b/chatreader.py
@ -0,0 +1,190 @@
 #!/usr/bin/env python3
 import random
 from chatcard import ChatCard, parse_card_line
 from generator import Generator
 def get_chat_title(chat):
    # This gives me the chat title, or the first and maybe last
    # name of the user as fallback if it's a private chat
    if chat.title is not None:
        return chat.title
    elif chat.first_name is not None:
        if chat.last_name is not None:
            return chat.first_name + " " + chat.last_name
        else:
            return chat.first_name
    else:
        return ""
 class Memory(object):
    def __init__(self, mid, content):
        self.id = mid
        self.content = content
 class ChatReader(object):
    TAG_PREFIX = "^IS_"
    STICKER_TAG = "^IS_STICKER^"
    ANIM_TAG = "^IS_ANIMATION^"
    VIDEO_TAG = "^IS_VIDEO^"
    def __init__(self, chatcard, max_period, logger):
        self.card = chatcard
        self.max_period = max_period
        self.short_term_mem = []
        self.countdown = self.card.period
        self.logger = logger
    def FromChat(chat, max_period, logger, newchat=False):
        # Create a new ChatReader from a Chat object
        card = ChatCard(chat.id, chat.type, get_chat_title(chat))
        return ChatReader(card, max_period, logger)
    def FromData(data, max_period, logger):
        # Create a new ChatReader from a whole Chat history (WIP)
        return None
    def FromCard(card, max_period, logger):
        # Create a new ChatReader from a card's file dump
        chatcard = ChatCard.loads(card)
        return ChatReader(chatcard, max_period, logger)
    def FromFile(text, max_period, logger):
        # Load a ChatReader from a file's text string
        lines = text.splitlines()
        version = parse_card_line(lines[0]).strip()
        version = version if len(version.strip()) > 1 else lines[4]
        logger.info("Dictionary version: {} ({} lines)".format(version, len(lines)))
        vocab = None
        if version == "v4" or version == "v5":
            return ChatReader.FromCard(text, max_period, logger)
            # I stopped saving the chat metadata and the cache together
        elif version == "v3":
            card = ChatCard.loadl(lines[0:8])
            cache = '\n'.join(lines[9:])
            vocab = Generator.loads(cache)
        elif version == "v2":
            card = ChatCard.loadl(lines[0:7])
            cache = '\n'.join(lines[8:])
            vocab = Generator.loads(cache)
        elif version == "dict:":
            card = ChatCard.loadl(lines[0:6])
            cache = '\n'.join(lines[6:])
            vocab = Generator.loads(cache)
        else:
            card = ChatCard.loadl(lines[0:4])
            cache = lines[4:]
            vocab = Generator(load=cache, mode=Generator.MODE_LIST)
            # raise SyntaxError("ChatReader: ChatCard format unrecognized.")
        s = ChatReader(card, max_period, logger)
        return (s, vocab)
    def archive(self, vocab):
        # Returns a nice lice little tuple package for the archivist to save to file.
        # Also commits to long term memory any pending short term memories
        self.commit_long_term(vocab)
        return (self.card.id, self.card.dumps(), vocab)
    def check_type(self, t):
        # Checks type. Returns "True" for "group" even if it's supergroup
        return t in self.card.type
    def exactly_type(self, t):
        # Hard check
        return t == self.card.type
    def set_title(self, title):
        self.card.title = title
    def set_period(self, period):
        if period < self.countdown:
            self.countdown = max(period, 1)
        return self.card.set_period(min(period, self.max_period))
    def set_answer(self, prob):
        return self.card.set_answer(prob)
    def cid(self):
        return str(self.card.id)
    def count(self):
        return self.card.count
    def period(self):
        return self.card.period
    def title(self):
        return self.card.title
    def answer(self):
        return self.card.answer
    def ctype(self):
        return self.card.type
    def is_restricted(self):
        return self.card.restricted
    def toggle_restrict(self):
        self.card.restricted = (not self.card.restricted)
    def is_silenced(self):
        return self.card.silenced
    def toggle_silence(self):
        self.card.silenced = (not self.card.silenced)
    def is_answering(self):
        rand = random.random()
        chance = self.answer()
        if chance == 1:
            return True
        elif chance == 0:
            return False
        return rand <= chance
    def add_memory(self, mid, content):
        mem = Memory(mid, content)
        self.short_term_mem.append(mem)
    def random_memory(self):
        mem = random.choice(self.short_term_mem)
        return mem.id
    def reset_countdown(self):
        self.countdown = self.card.period
    def read(self, message):
        mid = str(message.message_id)
        if message.text is not None:
            self.read(mid, message.text)
        elif message.sticker is not None:
            self.learn_drawing(mid, ChatReader.STICKER_TAG, message.sticker.file_id)
        elif message.animation is not None:
            self.learn_drawing(mid, ChatReader.ANIM_TAG, message.animation.file_id)
        elif message.video is not None:
            self.learn_drawing(mid, ChatReader.VIDEO_TAG, message.video.file_id)
        self.card.count += 1
    def learn_drawing(self, mid, tag, drawing):
        self.learn(mid, tag + " " + drawing)
    def learn(self, mid, text):
        if "velasco" in text.casefold() and len(text.split()) <= 3:
            return
        self.add_memory(mid, text)
    def commit_long_term(self, vocab):
        for mem in self.short_term_mem:
            vocab.add(mem.content)
        self.short_term_mem = []
    """
    def learnFrom(self, scribe):
        self.card.count += scribe.chat.count
        self.vocab.cross(scribe.vocab)
    """
--- a/generator.py
+++ b/generator.py
@ -0,0 +1,166 @@
 #!/usr/bin/env python3
 import random
 import json
 def rewrite(text):
    # This splits strings into lists of words delimited by space.
    # Other whitespaces are appended space characters so they are included
    # as their own Markov chain element, so as not to pollude with
    # "different" words that would only differ in having a whitespace
    # attached or not
    words = text.replace('\n', '\n ').split(' ')
    i = 0
    while i < len(words):
        w = words[i].strip(' \t')
        if len(w) > 0:
            words[i] = w
        else:
            del words[i]
            i -= 1
        i += 1
    return words
 def getkey(w1, w2):
    # This gives a dictionary key from 2 words, ignoring case
    key = (w1.strip().casefold(), w2.strip().casefold())
    return str(key)
 def getwords(key):
    # This turns a dictionary key back into 2 separate words
    words = key.strip('()').split(', ')
    for i in range(len(words)):
        words[i].strip('\'')
    return words
 def triplets(wordlist):
    # Generates triplets of words from the given data string. So if our string
    # were "What a lovely day", we'd generate (What, a, lovely) and then
    # (a, lovely, day).
    if len(wordlist) < 3:
        return
    for i in range(len(wordlist) - 2):
        yield (wordlist[i], wordlist[i+1], wordlist[i+2])
 class Generator(object):
    MODE_JSON = "MODE_JSON"
    # This is to mark when we want to create a Generator object from a given JSON
    MODE_LIST = "MODE_LIST"
    # This is to mark when we want to create a Generator object from a given list of words
    MODE_CHAT_DATA = "MODE_CHAT_DATA"
    # This is to mark when we want to create a Generator object from Chat data (WIP)
    HEAD = "\n^MESSAGE_SEPARATOR^"
    TAIL = "^MESSAGE_SEPARATOR^"
    def __init__(self, load=None, mode=None):
        if mode is not None:
            # We ain't creating a new Generator from scratch
            if mode == Generator.MODE_JSON:
                self.cache = json.loads(load)
            elif mode == Generator.MODE_LIST:
                self.cache = {}
                self.load_list(load)
        else:
            self.cache = {}
            # The cache is where we store our words
    def load_list(self, many):
        # Takes a list of strings and adds them to the cache one by one
        for one in many:
            self.add(one)
    def dumps(self):
        # Dumps the cache dictionary into a JSON-formatted string
        return json.dumps(self.cache)
    def loads(dump):
        # Loads the cache dictionary from a JSON-formatted string
        if len(dump) == 0:
            # faulty dump gives default Generator
            return Generator()
        # otherwise
        return Generator(load=dump, mode=Generator.MODE_JSON)
    def add(self, text):
        # This takes a string and stores it in the cache, preceding it
        # with the HEAD that marks the beginning of a new message and
        # following it with the TAIL that marks the end
        words = [Generator.HEAD]
        text = text + " " + Generator.TAIL
        words.extend(text.split())
        self.database(rewrite(text))
    def database(self, words):
        # This takes a list of words and stores it in the cache, adding
        # a special entry for the first word (the HEAD marker)
        for w1, w2, w3 in triplets(words):
            if w1 == Generator.HEAD:
                if w1 in self.cache:
                    self.cache[Generator.HEAD].append(w2)
                else:
                    self.cache[Generator.HEAD] = [w2]
            key = getkey(w1, w2)
            if key in self.cache:
                # if the key exists, add the new word to the end of the chain
                self.cache[key].append(w3)
            else:
                # otherwise, create a new entry for the new key starting with
                # the new end of chain
                self.cache[key] = [w3]
    def generate(self, size=50, silence=False):
        # This generates the Markov text/word chain
        # silence tells if mentions should be silenced
        if len(self.cache) == 0:
            # If there is nothing in the cache we cannot generate anything
            return ""
        w1 = random.choice(self.cache[Generator.HEAD])
        w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)])
        # Start with a message HEAD and a random message starting word
        gen_words = []
        for i in range(size):
            # As long as we don't go over the size value (max. message length)...
            if silence and w1.startswith("@") and len(w1) > 1:
                gen_words.append(w1.replace("@", "(@)"))
                # ...append the first word, silencing any possible username mention
            else:
                gen_words.append(w1)
                # ..append the first word
            if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache:
                # When there's no key from the last 2 words to follow the chain,
                # or we reached a separation between messages, stop
                break
            else:
                w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
                # Make the second word to be the new first word, and
                # make a new random word that follows the chain to be
                # the new second word
        return ' '.join(gen_words)
    def cross(self, gen):
        # cross 2 Generators into this one
        for key in gen.cache:
            if key in self.cache:
                self.cache[key].extend(gen.cache[key])
            else:
                self.cache[key] = list(gen.cache[key])
    def new_count(self):
        # Count again the number of messages if the current number is unreliable
        count = 0
        for key in self.cache:
            for word in self.cache[key]:
                if word == Generator.TAIL:
                    count += 1
                    # by just counting message separators
        return count
--- a/markov.py
+++ b/markov.py
@ -1,105 +0,0 @@
 #!/usr/bin/env python3
 import random
 import json
 def getkey(w1, w2):
    key = (w1.strip().casefold(), w2.strip().casefold())
    return str(key)
 def getwords(key):
    words = key.strip('()').split(', ')
    for i in range(len(words)):
        words[i].strip('\'')
    return words
 def triples(wordlist):
    # Generates triples from the given data string. So if our string were
    # "What a lovely day", we'd generate (What, a, lovely) and then
    # (a, lovely, day).
    if len(wordlist) < 3:
        return
    for i in range(len(wordlist) - 2):
        yield (wordlist[i], wordlist[i+1], wordlist[i+2])
 class Markov(object):
    ModeJson = "MODE_JSON"
    ModeList = "MODE_LIST"
    ModeChatData = "MODE_CHAT_DATA"
    Head = "\n^MESSAGE_SEPARATOR^"
    Tail = "^MESSAGE_SEPARATOR^"
    def __init__(self, load=None, mode=None):
        if mode is not None:
            if mode == Markov.ModeJson:
                self.cache = json.loads(load)
            elif mode == Markov.ModeList:
                self.cache = {}
                self.loadList(load)
        else:
            self.cache = {}
    def loadList(self, lines):
        for line in lines:
            words = [Markov.Head]
            words.extend(line.split())
            self.learn_words(words)
    def dumps(self):
        return json.dumps(self.cache)
    def loads(dump):
        if len(dump) == 0:
            return Markov()
        return Markov(load=dump, mode=Markov.ModeJson)
    def learn_words(self, words):
        self.database(words)
    def database(self, wordlist):
        for w1, w2, w3 in triples(wordlist):
            if w1 == Markov.Head:
                if w1 in self.cache:
                    self.cache[Markov.Head].append(w2)
                else:
                    self.cache[Markov.Head] = [w2]
            key = getkey(w1, w2)
            if key in self.cache:
                self.cache[key].append(w3)
            else:
                self.cache[key] = [w3]
    def generate_markov_text(self, size=50, silence=False):
        if len(self.cache) == 0:
            return ""
        w1 = random.choice(self.cache[Markov.Head])
        w2 = random.choice(self.cache[getkey(Markov.Head, w1)])
        gen_words = []
        for i in range(size):
            if silence and w1.startswith("@") and len(w1) > 1:
                gen_words.append(w1.replace("@", "(@)"))
            else:
                gen_words.append(w1)
            if w2 == Markov.Tail or not getkey(w1, w2) in self.cache:
                # print("Generated text")
                break
            else:
                w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
        return ' '.join(gen_words)
    def cross(self, gen):
        for key in gen.cache:
            if key in self.cache:
                self.cache[key].extend(d[key])
            else:
                self.cache[key] = list(d[key])
    def new_count(self):
        count = 0
        for key in self.cache:
            for word in self.cache[key]:
                if word == Markov.Tail:
                    count += 1
        return count
--- a/scribe.py
+++ b/scribe.py
@ -1,194 +0,0 @@
 #!/usr/bin/env python3
 import random
 from chatlog import *
 from markov import Markov
 def getTitle(chat):
    if chat.title is not None:
        return chat.title
    elif chat.first_name is not None:
        if chat.last_name is not None:
            return chat.first_name + " " + chat.last_name
        else:
            return chat.first_name
    else:
        return ""
 def rewrite(text):
    words = text.replace('\n', '\n ').split(' ')
    i = 0
    while i < len(words):
        w = words[i].strip(' \t')
        if len(w) > 0:
            words[i] = w
        else:
            del words[i]
            i -= 1
        i += 1
    return words
 class Page(object):
    def __init__(self, mid, content):
        self.id = mid
        self.content = content
 class Scribe(object):
    TagPrefix = "^IS_"
    StickerTag = "^IS_STICKER^"
    AnimTag = "^IS_ANIMATION^"
    VideoTag = "^IS_VIDEO^"
    def __init__(self, chatlog, archivist):
        self.chat = chatlog
        self.archivist = archivist
        self.pages = []
        self.countdown = self.chat.freq
        self.logger = self.archivist.logger
    def FromChat(chat, archivist, newchat=False):
        chatlog = Chatlog(chat.id, chat.type, getTitle(chat))
        scribe = Scribe(chatlog, archivist)
        return scribe
    def FromData(data, archivist):
        return None
    def FromFile(log, archivist):
        chatlog = Chatlog.loads(log)
        return Scribe(chatlog, archivist)
    def Recall(text, archivist):
        lines = text.splitlines()
        version = parse(lines[0]).strip()
        version = version if len(version.strip()) > 1 else lines[4]
        archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) )
        if version == "v4":
            chatlog = Chatlog.loadl(lines[0:9])
            cache = '\n'.join(lines[10:])
            parrot = Markov.loads(cache)
        elif version == "v3":
            chatlog = Chatlog.loadl(lines[0:8])
            cache = '\n'.join(lines[9:])
            parrot = Markov.loads(cache)
        elif version == "v2":
            chatlog = Chatlog.loadl(lines[0:7])
            cache = '\n'.join(lines[8:])
            parrot = Markov.loads(cache)
        elif version == "dict:":
            chatlog = Chatlog.loadl(lines[0:6])
            cache = '\n'.join(lines[6:])
            parrot = Markov.loads(cache)
        else:
            chatlog = Chatlog.loadl(lines[0:4])
            cache = lines[4:]
            parrot = Markov(load=cache, mode=Markov.ModeList)
            #raise SyntaxError("Scribe: Chatlog format unrecognized.")
        s = Scribe(chatlog, archivist)
        s.parrot = parrot
        return s
    def store(self, parrot):
        self.archivist.store(self.chat.id, self.chat.dumps(), parrot)
    def checkType(self, t):
        return t in self.chat.type
    def compareType(self, t):
        return t == self.chat.type
    def setTitle(self, title):
        self.chat.title = title
    def setFreq(self, freq):
        if freq < self.countdown:
            self.countdown = max(freq, 1)
        return self.chat.set_freq(min(freq, self.archivist.maxFreq))
    def setAnswer(self, afreq):
        return self.chat.set_answer(afreq)
    def cid(self):
        return str(self.chat.id)
    def count(self):
        return self.chat.count
    def freq(self):
        return self.chat.freq
    def title(self):
        return self.chat.title
    def answer(self):
        return self.chat.answer
    def type(self):
        return self.chat.type
    def isRestricted(self):
        return self.chat.restricted
    def restrict(self):
        self.chat.restricted = (not self.chat.restricted)
    def isSilenced(self):
        return self.chat.silenced
    def silence(self):
        self.chat.silenced = (not self.chat.silenced)
    def isAnswering(self):
        rand = random.random()
        chance = self.answer()
        if chance == 1:
            return True
        elif chance == 0:
            return False
        return rand <= chance
    def addPage(self, mid, content):
        page = Page(mid, content)
        self.pages.append(page)
    def getReference(self):
        page = random.choice(self.pages)
        return page.id
    def resetCountdown(self):
        self.countdown = self.chat.freq
    def learn(self, message):
        mid = str(message.message_id)
        if message.text is not None:
            self.read(mid, message.text)
        elif message.sticker is not None:
            self.learnDrawing(mid, Scribe.StickerTag, message.sticker.file_id)
        elif message.animation is not None:
            self.learnDrawing(mid, Scribe.AnimTag, message.animation.file_id)
        elif message.video is not None:
            self.learnDrawing(mid, Scribe.VideoTag, message.video.file_id)
        self.chat.count += 1
    def learnDrawing(self, mid, tag, drawing):
        self.read(mid, tag + " " + drawing)
    def read(self, mid, text):
        if "velasco" in text.casefold() and len(text.split()) <= 3:
            return
        words = [Markov.Head]
        text = text + " " + Markov.Tail
        words.extend(rewrite(text))
        self.addPage(mid, words)
    def teachParrot(self, parrot):
        for page in self.pages:
            parrot.learn_words(page.content)
        self.pages = []
 """
    def learnFrom(self, scribe):
        self.chat.count += scribe.chat.count
        self.parrot.cross(scribe.parrot)
 """
--- a/speaker.py
+++ b/speaker.py
@ -1,24 +1,25 @@
 #!/usr/bin/env python3
 import random
-from scribe import Scribe
+from chatreader import ChatReader as Reader
 from markov import Markov
 from telegram.error import *
-def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs):
+
-    kwargs["parse_mode"] = format
+def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
    kwargs["parse_mode"] = formatting
    kwargs["reply_to_message_id"] = replying
-    if text.startswith(Scribe.TagPrefix):
+    if text.startswith(Reader.TAG_PREFIX):
        words = text.split(maxsplit=1)
        if logger:
            logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid))
            # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID'
-        if words[0] == Scribe.StickerTag:
+        if words[0] == Reader.STICKER_TAG:
            return bot.send_sticker(cid, words[1], **kwargs)
-        elif words[0] == Scribe.AnimTag:
+        elif words[0] == Reader.ANIM_TAG:
            return bot.send_animation(cid, words[1], **kwargs)
-        elif words[0] == Scribe.VideoTag:
+        elif words[0] == Reader.VIDEO_TAG:
            return bot.send_video(cid, words[1], **kwargs)
    else:
        text
@ -27,17 +28,6 @@ def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs):
            logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text))
        return bot.send_message(cid, text, **kwargs)
 def getTitle(chat):
    if chat.title:
        return chat.title
    else:
        last = chat.last_name if chat.last_name else ""
        first = chat.first_name if chat.first_name else ""
        name = " ".join([first, last]).strip()
        if len(name) == 0:
            return "Unknown"
        else:
            return name
 class Speaker(object):
    ModeFixed = "FIXED_MODE"
@ -59,7 +49,7 @@ class Speaker(object):
        self.reply = reply
        self.repeat = repeat
        self.filterCids = archivist.filterCids
-        self.bypass=archivist.bypass
+        self.bypass = archivist.bypass
    def announce(self, announcement, check=(lambda _: True)):
        for scribe in self.scriptorium:
@ -79,7 +69,7 @@ class Speaker(object):
    def getScribe(self, chat):
        cid = str(chat.id)
        if not cid in self.scriptorium:
-            scribe = Scribe.FromChat(chat, self.archivist, newchat=True)
+            scribe = Reader.FromChat(chat, self.archivist, newchat=True)
            self.scriptorium[cid] = scribe
            return scribe
        else:
--- a/velasco.py
+++ b/velasco.py
@ -18,7 +18,7 @@ speakerbot = None
 logger = logging.getLogger(__name__)
 # Enable logging
-log_format="[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper())
+log_format = "[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper())
 if coloredlogsError:
    logging.basicConfig(format=log_format, level=logging.INFO)
@ -49,20 +49,24 @@ about_msg = "I am yet another Markov Bot experiment. I read everything you type
 explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send."
 def static_reply(text, format=None):
    def reply(bot, update):
        update.message.reply_text(text, parse_mode=format)
    return reply
 def error(bot, update, error):
    logger.warning('Update "{}" caused error "{}"'.format(update, error))
 def stop(bot, update):
    scribe = speakerbot.getScribe(update.message.chat.id)
    #del chatlogs[chatlog.id]
    #os.remove(LOG_DIR + chatlog.id + LOG_EXT)
    logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid()))
 def main():
    global speakerbot
    parser = argparse.ArgumentParser(description='A Telegram markov bot.')
@ -76,7 +80,7 @@ def main():
    updater = Updater(args.token)
    #filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)]
-    filterCids=None
+    filterCids = None
    archivist = Archivist(logger,
                          chatdir="chatlogs/",
@ -84,7 +88,7 @@ def main():
                          admin=args.admin_id,
                          filterCids=filterCids,
                          readOnly=False
-                         )
+                          )
    speakerbot = Speaker("velasco", "@" + username, archivist, logger, wakeup=args.wakeup)