Added documenting comments accross all the files

Added minimum and maximum period values as argument flags
2025-07-01 00:34:38 +02:00 · 2020-10-29 08:58:21 +01:00 · 2020-10-29 08:58:21 +01:00 · 09cf241f18
commit 09cf241f18
parent a13bdd51c7
7 changed files with 288 additions and 129 deletions
--- a/archivist.py
+++ b/archivist.py
@ -7,8 +7,8 @@ from generator import Generator
 class Archivist(object):

    def __init__(self, logger, chatdir=None, chatext=None, admin=0,
-                 period_inc=5, save_count=15, max_period=100000,
-                 read_only=False
+                 period_inc=5, save_count=15, min_period=1,
+                 max_period=100000, read_only=False
                 ):
        if chatdir is None or len(chatdir) == 0:
            chatdir = "./"
@ -19,16 +19,20 @@ class Archivist(object):
        self.chatext = chatext
        self.period_inc = period_inc
        self.save_count = save_count
+        self.min_period = min_period
        self.max_period = max_period
        self.read_only = read_only

+    # Formats and returns a chat folder path
    def chat_folder(self, *formatting, **key_format):
        return (self.chatdir + "/chat_{tag}").format(*formatting, **key_format)

+    # Formats and returns a chat file path
    def chat_file(self, *formatting, **key_format):
        return (self.chatdir + "/chat_{tag}/{file}{ext}").format(*formatting, **key_format)

-    def store(self, tag, data, vocab_dumper):
+    # Stores a Reader/Generator file pair
+    def store(self, tag, data, vocab):
        chat_folder = self.chat_folder(tag=tag)
        chat_card = self.chat_file(tag=tag, file="card", ext=".txt")

@ -45,17 +49,18 @@ class Archivist(object):
        file.write(data)
        file.close()

-        if vocab_dumper is not None:
+        if vocab is not None:
            chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext)
            file = open(chat_record, 'w', encoding="utf-16")
-            vocab_dumper(file)
+            file.write(vocab)
            file.close()

+    # Loads a Generator's vocabulary file dump
    def load_vocab(self, tag):
        filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
        try:
            file = open(filepath, 'r', encoding="utf-16")
-            record = Generator.load(file)
+            record = file.read()
            file.close()
            return record
        except Exception as e:
@ -63,6 +68,7 @@ class Archivist(object):
            self.logger.exception(e)
            return None

+    # Loads a Generator's vocabulary file dump in the old UTF-8 encoding
    def load_vocab_old(self, tag):
        filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
        try:
@ -75,7 +81,8 @@ class Archivist(object):
            self.logger.exception(e)
            return None

-    def load_reader(self, tag):
+    # Loads a Metadata card file dump
+    def load_card(self, tag):
        filepath = self.chat_file(tag=tag, file="card", ext=".txt")
        try:
            reader_file = open(filepath, 'r')
@ -86,16 +93,21 @@ class Archivist(object):
            self.logger.error("Metadata file {} not found.".format(filepath))
            return None

+    # Returns a Reader for a given ID with an already working vocabulary - be it
+    # new or loaded from file
    def get_reader(self, tag):
-        reader = self.load_reader(tag)
-        if reader:
-            vocab = self.load_vocab(tag)
-            if not vocab:
+        card = self.load_card(tag)
+        if card:
+            vocab_dump = self.load_vocab(tag)
+            if vocab_dump:
+                vocab = Generator.loads(vocab_dump)
+            else:
                vocab = Generator()
-            return Reader.FromCard(reader, vocab, self.max_period, self.logger)
+            return Reader.FromCard(card, vocab, self.max_period, self.logger)
        else:
            return None

+    # Count the stored chats
    def chat_count(self):
        count = 0
        directory = os.fsencode(self.chatdir)
@ -105,6 +117,7 @@ class Archivist(object):
                count += 1
        return count

+    # Crawl through all the stored Readers
    def readers_pass(self):
        directory = os.fsencode(self.chatdir)
        for subdir in os.scandir(directory):
@ -124,6 +137,7 @@ class Archivist(object):
                    self.logger.exception(e)
                    raise e

+    # Load and immediately store every Reader
    def update(self):
        for reader in self.readers_pass():
            if reader.vocab is None:
--- a/generator.py
+++ b/generator.py
@ -4,12 +4,12 @@ import random
 import json


+# This splits strings into lists of words delimited by space.
+# Other whitespaces are appended space characters so they are included
+# as their own Markov chain element, so as not to pollude with
+# "different" words that would only differ in having a whitespace
+# attached or not
 def rewrite(text):
-    # This splits strings into lists of words delimited by space.
-    # Other whitespaces are appended space characters so they are included
-    # as their own Markov chain element, so as not to pollude with
-    # "different" words that would only differ in having a whitespace
-    # attached or not
    words = text.replace('\n', '\n ').split(' ')
    i = 0
    while i < len(words):
@ -23,24 +23,24 @@ def rewrite(text):
    return words


+# This gives a dictionary key from 2 words, ignoring case
 def getkey(w1, w2):
-    # This gives a dictionary key from 2 words, ignoring case
    key = (w1.strip().casefold(), w2.strip().casefold())
    return str(key)


+# This turns a dictionary key back into 2 separate words
 def getwords(key):
-    # This turns a dictionary key back into 2 separate words
    words = key.strip('()').split(', ')
    for i in range(len(words)):
        words[i].strip('\'')
    return words


+# Generates triplets of words from the given data string. So if our string
+# were "What a lovely day", we'd generate (What, a, lovely) and then
+# (a, lovely, day).
 def triplets(wordlist):
-    # Generates triplets of words from the given data string. So if our string
-    # were "What a lovely day", we'd generate (What, a, lovely) and then
-    # (a, lovely, day).
    if len(wordlist) < 3:
        return

@ -49,24 +49,25 @@ def triplets(wordlist):


 class Generator(object):
+    # Marks when we want to create a Generator object from a given JSON
    MODE_JSON = "MODE_JSON"
-    # This is to mark when we want to create a Generator object from a given JSON

+    # Marks when we want to create a Generator object from a given list of words
    MODE_LIST = "MODE_LIST"
-    # This is to mark when we want to create a Generator object from a given list of words

+    # Marks when we want to create a Generator object from a given dictionary
    MODE_DICT = "MODE_DICT"
-    # This is to mark when we want to create a Generator object from a given dictionary

-    MODE_CHAT_DATA = "MODE_CHAT_DATA"
-    # This is to mark when we want to create a Generator object from Chat data (WIP)
+    # Marks when we want to create a Generator object from a whole Chat history (WIP)
+    MODE_HIST = "MODE_HIST"

+    # Marks the beginning of a message
    HEAD = "\n^MESSAGE_SEPARATOR^"
+    # Marks the end of a message
    TAIL = " ^MESSAGE_SEPARATOR^"

    def __init__(self, load=None, mode=None):
        if mode is not None:
-            # We ain't creating a new Generator from scratch
            if mode == Generator.MODE_JSON:
                self.cache = json.loads(load)
            elif mode == Generator.MODE_LIST:
@ -74,45 +75,44 @@ class Generator(object):
                self.load_list(load)
            elif mode == Generator.MODE_DICT:
                self.cache = load
+            # TODO: Chat History mode
        else:
            self.cache = {}
-            # The cache is where we store our words

+    # Loads a text divided into a list of lines
    def load_list(self, many):
-        # Takes a list of strings and adds them to the cache one by one
        for one in many:
            self.add(one)

-    def dumps(self):
    # Dumps the cache dictionary into a JSON-formatted string
+    def dumps(self):
        return json.dumps(self.cache, ensure_ascii=False)

+    # Dumps the cache dictionary into a file, formatted as JSON
    def dump(self, f):
-        json.dump(self.cache, f, ensure_ascii=False, indent='')
+        json.dump(self.cache, f, ensure_ascii=False)

-    def loads(dump):
    # Loads the cache dictionary from a JSON-formatted string
+    def loads(dump):
        if len(dump) == 0:
            # faulty dump gives default Generator
            return Generator()
        # otherwise
        return Generator(load=dump, mode=Generator.MODE_JSON)

+    # Loads the cache dictionary from a file, formatted as JSON
    def load(f):
        return Generator(load=json.load(f), mode=Generator.MODE_DICT)

    def add(self, text):
-        # This takes a string and stores it in the cache, preceding it
-        # with the HEAD that marks the beginning of a new message and
-        # following it with the TAIL that marks the end
        words = [Generator.HEAD]
        text = rewrite(text + Generator.TAIL)
        words.extend(text)
        self.database(words)

-    def database(self, words):
    # This takes a list of words and stores it in the cache, adding
    # a special entry for the first word (the HEAD marker)
+    def database(self, words):
        for w1, w2, w3 in triplets(words):
            if w1 == Generator.HEAD:
                if w1 in self.cache:
@ -128,50 +128,50 @@ class Generator(object):
                # the new end of chain
                self.cache[key] = [w3]

-    def generate(self, size=50, silence=False):
    # This generates the Markov text/word chain
-        # silence tells if mentions should be silenced
+    # silence=True disables Telegram user mentions
+    def generate(self, size=50, silence=False):
        if len(self.cache) == 0:
            # If there is nothing in the cache we cannot generate anything
            return ""

+        # Start with a message HEAD and a random message starting word
        w1 = random.choice(self.cache[Generator.HEAD])
        w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)])
-        # Start with a message HEAD and a random message starting word
        gen_words = []
+        # As long as we don't go over the max. message length (in n. of words)...
        for i in range(size):
-            # As long as we don't go over the size value (max. message length)...
            if silence and w1.startswith("@") and len(w1) > 1:
+                # ...append word 1, disabling any possible Telegram mention
                gen_words.append(w1.replace("@", "(@)"))
-                # ...append the first word, silencing any possible username mention
            else:
+                # ..append word 1
                gen_words.append(w1)
-                # ..append the first word
            if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache:
                # When there's no key from the last 2 words to follow the chain,
                # or we reached a separation between messages, stop
                break
            else:
+                # Get a random third word that follows the chain of words 1
+                # and 2, then make words 2 and 3 to be the new words 1 and 2
                w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
-                # Make the second word to be the new first word, and
-                # make a new random word that follows the chain to be
-                # the new second word
        return ' '.join(gen_words)

+    # Cross a second Generator into this one
    def cross(self, gen):
-        # cross 2 Generators into this one
        for key in gen.cache:
            if key in self.cache:
                self.cache[key].extend(gen.cache[key])
            else:
                self.cache[key] = list(gen.cache[key])

+    # Count again the number of messages
+    # (for whenever the count number is unreliable)
    def new_count(self):
-        # Count again the number of messages if the current number is unreliable
        count = 0
        for key in self.cache:
            for word in self.cache[key]:
                if word == Generator.TAIL:
+                    # ...by just counting message separators
                    count += 1
-                    # by just counting message separators
        return count
--- a/memorylist.py
+++ b/memorylist.py
@ -1,11 +1,19 @@
 #!/usr/bin/env python3

-from collections.abc import MutableSequence
+from collections.abc import Sequence


-class MemoryList(MutableSequence):
+class MemoryList(Sequence):
+    """Special "memory list" class that:
+       - Whenever an item is added that was already in the list,
+         it gets moved to the back instead
+       - Whenever an item is looked for, it gets moved to the
+         back
+       - If a new item is added that goes over a given capacity
+         limit, the item at the front (oldest accessed item)
+         is removed (and returned)"""
+
    def __init__(self, capacity, data=None):
-        """Initialize the class"""
        super(MemoryList, self).__init__()
        self._capacity = capacity
        if (data is not None):
@ -16,37 +24,25 @@ class MemoryList(MutableSequence):
    def __repr__(self):
        return "<{0} {1}, capacity {2}>".format(self.__class__.__name__, self._list, self._capacity)

+    def __str__(self):
+        return "{0}, {1}/{2}".format(self._list, len(self._list), self._capacity)
+
    def __len__(self):
-        """List length"""
        return len(self._list)

    def capacity(self):
        return self._capacity

    def __getitem__(self, ii):
-        """Get a list item"""
        return self._list[ii]

-    def __delitem__(self, ii):
-        """Delete an item"""
-        del self._list[ii]
-
-    def __setitem__(self, ii, val):
-        self._list[ii] = val
-
-    def __str__(self):
-        return str(self._list)
-
    def __contains__(self, val):
        return val in self._list

    def __iter__(self):
        return self._list.__iter__()

-    def insert(self, ii, val):
-        self._list.insert(ii, val)
-
-    def append(self, val):
+    def add(self, val):
        if val in self._list:
            self._list.remove(val)

@ -58,8 +54,8 @@ class MemoryList(MutableSequence):
        else:
            return None

-    def get_next(self, cond):
-        val = next((v for v in self._list if cond(v)), None)
+    def search(self, cond, *args, **kwargs):
+        val = next((v for v in self._list if cond(v)), *args, **kwargs)
        if val is not None:
            self._list.remove(val)
            self._list.append(val)
--- a/metadata.py
+++ b/metadata.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python3

+# This reads a line in the format 'VARIABLE=value' and gives me the value.
+# See Metadata.loadl(...) for more details
 def parse_card_line(line):
-    # This reads a line in the format 'VARIABLE=value' and gives me the value.
-    # See Metadata.loadl(...) for more details
    s = line.split('=', 1)
    if len(s) < 2:
        return ""
@ -10,35 +10,37 @@ def parse_card_line(line):
        return s[1]


+# This is a chat's Metadata, holding different configuration values for
+# Velasco and other miscellaneous information about the chat
 class Metadata(object):
-    # This is a chat's Metadata, holding different configuration values for
-    # Velasco and other miscellaneous information about the chat
-
    def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False):
-        self.id = str(cid)
        # The Telegram chat's ID
-        self.type = ctype
+        self.id = str(cid)
        # The type of chat
-        self.title = title
+        self.type = ctype
        # The title of the chat
+        self.title = title
        if period is None:
            if "group" in ctype:
-                period = 10
                # Default period for groups and supergroups
+                period = 10
            else:
-                period = 2
                # Default period for private or channel chats
+                period = 2
+        # The number of messages read in a chat
        self.count = count
-        # The number of messages read
-        self.period = period
        # This chat's configured period
-        self.answer = answer
+        self.period = period
        # This chat's configured answer probability
-        self.restricted = restricted
+        self.answer = answer
        # Wether some interactions are restricted to admins only
-        self.silenced = silenced
+        self.restricted = restricted
        # Wether messages should silence user mentions
+        self.silenced = silenced

+    # Sets the period for a chat
+    # It has to be higher than 1
+    # Returns the new value
    def set_period(self, period):
        if period < 1:
            raise ValueError('Tried to set period a value less than 1.')
@ -46,6 +48,9 @@ class Metadata(object):
            self.period = period
        return self.period

+    # Sets the answer probability
+    # It's a percentage represented as a decimal between 0 and 1
+    # Returns the new value
    def set_answer(self, prob):
        if prob > 1:
            raise ValueError('Tried to set answer probability higher than 1.')
@ -55,6 +60,8 @@ class Metadata(object):
            self.answer = prob
        return self.answer

+    # Dumps the metadata into a list of lines, then joined together in a string,
+    # ready to be written into a file
    def dumps(self):
        lines = ["CARD=v5"]
        lines.append("CHAT_ID=" + self.id)
@ -68,10 +75,12 @@ class Metadata(object):
        # lines.append("WORD_DICT=")
        return ('\n'.join(lines)) + '\n'

+    # Creates a Metadata object from a previous text dump
    def loads(text):
        lines = text.splitlines()
        return Metadata.loadl(lines)

+    # Creates a Metadata object from a list of metadata lines
    def loadl(lines):
        # In a perfect world, I would get both the variable name and its corresponding value
        # from each side of the lines, but I know the order in which the lines are writen in
@ -90,6 +99,14 @@ class Metadata(object):
                            silenced=(parse_card_line(lines[8]) == 'True')
                            )
        elif version == "v3":
+            # Deprecated: this elif block will be removed in a new version
+            print("Warning! This Card format ({}) is deprecated. Update all".format(version),
+                  "your files in case that there are still some left in old formats before",
+                  "downloading the next update.")
+
+            # This is kept for retrocompatibility purposes, in case someone did a fork
+            # of this repo and still has some chat files that haven't been updated in
+            # a long while -- but I already converted all my files to v5
            return Metadata(cid=parse_card_line(lines[1]),
                            ctype=parse_card_line(lines[2]),
                            title=parse_card_line(lines[3]),
@ -99,6 +116,12 @@ class Metadata(object):
                            restricted=(parse_card_line(lines[6]) == 'True')
                            )
        elif version == "v2":
+            # Deprecated: this elif block will be removed in a new version
+            print("Warning! This Card format ({}) is deprecated. Update all".format(version),
+                  "your files in case that there are still some left in old formats before",
+                  "downloading the next update.")
+
+            # Also kept for retrocompatibility purposes
            return Metadata(cid=parse_card_line(lines[1]),
                            ctype=parse_card_line(lines[2]),
                            title=parse_card_line(lines[3]),
@ -107,6 +130,12 @@ class Metadata(object):
                            answer=float(parse_card_line(lines[5]))
                            )
        elif version == "dict:":
+            # Deprecated: this elif block will be removed in a new version
+            print("Warning! This Card format ('dict') is deprecated. Update all",
+                  "your files in case that there are still some left in old formats before",
+                  "downloading the next update.")
+
+            # Also kept for retrocompatibility purposes
            # At some point I decided to number the versions of each dictionary format,
            # but this was not always the case. This is what you get if you try to read
            # whatever there is in very old files where the version should be
@ -117,7 +146,13 @@ class Metadata(object):
                            period=int(lines[3])
                            )
        else:
-            # This is for the oldest of files
+            # Deprecated: this elif block will be removed in a new version
+            print("Warning! This ancient Card format is deprecated. Update all",
+                  "your files in case that there are still some left in old formats before",
+                  "downloading the next update.")
+
+            # Also kept for retrocompatibility purposes
+            # This is for the oldest of file formats
            return Metadata(cid=lines[0],
                            ctype=lines[1],
                            title=lines[2],
--- a/reader.py
+++ b/reader.py
@ -5,9 +5,9 @@ from metadata import Metadata, parse_card_line
 from generator import Generator


+# This gives me the chat title, or the first and maybe last
+# name of the user as fallback if it's a private chat
 def get_chat_title(chat):
-    # This gives me the chat title, or the first and maybe last
-    # name of the user as fallback if it's a private chat
    if chat.title is not None:
        return chat.title
    elif chat.first_name is not None:
@ -25,40 +25,52 @@ class Memory(object):
        self.content = content


+# This is a chat Reader object, in charge of managing the parsing of messages
+# for a specific chat, and holding said chat's metadata
 class Reader(object):
-    # This is a chat Reader object, in charge of managing the parsing of messages
-    # for a specific chat, and holding said chat's metadata
-
+    # Media tagging variables
    TAG_PREFIX = "^IS_"
    STICKER_TAG = "^IS_STICKER^"
    ANIM_TAG = "^IS_ANIMATION^"
    VIDEO_TAG = "^IS_VIDEO^"

-    def __init__(self, metadata, vocab, max_period, logger):
+    def __init__(self, metadata, vocab, max_period, logger, names=[]):
+        # The Metadata object holding a chat's specific bot parameters
        self.meta = metadata
+        # The Generator object holding the vocabulary learned so far
        self.vocab = vocab
+        # The maximum period allowed for this bot
        self.max_period = max_period
+        # The short term memory, for recently read messages (see below)
        self.short_term_mem = []
+        # The countdown until the period ends and it's time to talk
        self.countdown = self.meta.period
+        # The logger object shared program-wide
        self.logger = logger
+        # The bot's nicknames + username
+        self.names = names

-    def FromChat(chat, max_period, logger):
    # Create a new Reader from a Chat object
+    def FromChat(chat, max_period, logger):
        meta = Metadata(chat.id, chat.type, get_chat_title(chat))
        vocab = Generator()
        return Reader(meta, vocab, max_period, logger)

+    # TODO: Create a new Reader from a whole Chat history
    def FromHistory(history, vocab, max_period, logger):
-        # Create a new Reader from a whole Chat history (WIP)
        return None

-    def FromCard(meta, vocab, max_period, logger):
    # Create a new Reader from a meta's file dump
+    def FromCard(meta, vocab, max_period, logger):
        metadata = Metadata.loads(meta)
        return Reader(metadata, vocab, max_period, logger)

+    # Deprecated: this method will be removed in a new version
    def FromFile(text, max_period, logger, vocab=None):
-        # Load a Reader from a file's text string (obsolete)
+        print("Warning! This method of loading a Reader from file (Reader.FromFile(...))",
+              "is deprecated, and will be removed from the next update. Use FromCard instead.")
+
+        # Load a Reader from a file's text string
        lines = text.splitlines()
        version = parse_card_line(lines[0]).strip()
        version = version if len(version.strip()) > 1 else lines[4]
@ -86,27 +98,33 @@ class Reader(object):
        r = Reader(meta, vocab, max_period, logger)
        return r

-    def archive(self):
    # Returns a nice lice little tuple package for the archivist to save to file.
    # Also commits to long term memory any pending short term memories
+    def archive(self):
        self.commit_memory()
-        return (self.meta.id, self.meta.dumps(), self.vocab.dump)
+        return (self.meta.id, self.meta.dumps(), self.vocab.dumps())

+    # Checks type. Returns "True" for "group" even if it's supergroupA
    def check_type(self, t):
-        # Checks type. Returns "True" for "group" even if it's supergroup
        return t in self.meta.type

-    def exactly_type(self, t):
    # Hard check
+    def exactly_type(self, t):
        return t == self.meta.type

    def set_title(self, title):
        self.meta.title = title

+    # Sets a new period in the Metadata
    def set_period(self, period):
-        if period < self.countdown:
-            self.countdown = max(period, 1)
-        return self.meta.set_period(min(period, self.max_period))
+        # The period has to be under max_period; otherwise, truncate to max_period
+        new_period = min(period, self.max_period)
+        set_period = self.meta.set_period(new_period)
+        if new_period == set_period and new_period < self.countdown:
+            # If succesfully changed and the new period is less than the current
+            # remaining countdown, reduce the countdown to the new period
+            self.countdown = new_period
+        return new_period

    def set_answer(self, prob):
        return self.meta.set_answer(prob)
@ -141,6 +159,8 @@ class Reader(object):
    def toggle_silence(self):
        self.meta.silenced = (not self.meta.silenced)

+    # Rolls the chance for answering in this specific chat,
+    # according to the answer probability
    def is_answering(self):
        rand = random.random()
        chance = self.answer()
@ -150,10 +170,13 @@ class Reader(object):
            return False
        return rand <= chance

+    # Adds a new message to the short term memory
    def add_memory(self, mid, content):
        mem = Memory(mid, content)
        self.short_term_mem.append(mem)

+    # Returns a random message ID from the short memory,
+    # when answering to a random comment
    def random_memory(self):
        if len(self.short_term_mem) == 0:
            return None
@ -163,6 +186,10 @@ class Reader(object):
    def reset_countdown(self):
        self.countdown = self.meta.period

+    # Reads a message
+    # This process will determine which kind of message it is (Sticker, Anim,
+    # Video, or actual text) and pre-process it accordingly for the Generator,
+    # then store it in the short term memory
    def read(self, message):
        mid = str(message.message_id)

@ -174,16 +201,25 @@ class Reader(object):
            self.learn_drawing(mid, Reader.ANIM_TAG, message.animation.file_id)
        elif message.video is not None:
            self.learn_drawing(mid, Reader.VIDEO_TAG, message.video.file_id)
+
        self.meta.count += 1

+    # Stores a multimedia message in the short term memory as a text with
+    # TAG + the media file ID
    def learn_drawing(self, mid, tag, drawing):
        self.learn(mid, tag + " " + drawing)

+    # Stores a text message in the short term memory
    def learn(self, mid, text):
-        if "velasco" in text.casefold() and len(text.split()) <= 3:
+        for name in self.names:
+            if name.casefold() in text.casefold() and len(text.split()) <= 3:
+                # If it's less than 3 words and one of the bot's names is in
+                # the message, ignore it as it's most probably just a summon
                return
        self.add_memory(mid, text)

+    # Commits the short term memory messages into the "long term memory"
+    # aka the vocabulary Generator's cache
    def commit_memory(self):
        for mem in self.short_term_mem:
            self.vocab.add(mem.content)
--- a/speaker.py
+++ b/speaker.py
@ -8,19 +8,24 @@ from reader import Reader, get_chat_title
 from telegram.error import NetworkError


+# Auxiliar print to stderr function (alongside logger messages)
 def eprint(*args, **kwargs):
    print(*args, end=' ', file=stderr, **kwargs)


+# Auxiliar message to send a text to a chat through a bot
 def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
-    kwargs["parse_mode"] = formatting
-    kwargs["reply_to_message_id"] = replying
+    # Markdown or HTML formatting (both argument names are valid)
+    kwargs["parse_mode"] = formatting or kwargs.get("parse_mode")
+    # ID of the message it's replying to (both argument names are valid)
+    kwargs["reply_to_message_id"] = replying or kwargs.get("reply_to_message_id")
+    # Reminder that dict.get(key) defaults to None if the key isn't found

    if text.startswith(Reader.TAG_PREFIX):
+        # We're sending a media file ID
        words = text.split(maxsplit=1)
        if logger:
            logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid))
-            # eprint('[]')
            # Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID'

        if words[0] == Reader.STICKER_TAG:
@ -30,16 +35,18 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
        elif words[0] == Reader.VIDEO_TAG:
            return bot.send_video(cid, words[1], **kwargs)
    else:
-        text
+        # It's text
        if logger:
-            mtype = "reply" if replying else "message"
+            mtype = "reply" if (kwargs.get("reply_to_message_id")) else "message"
            logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text))
            # eprint('.')
        return bot.send_message(cid, text, **kwargs)


 class Speaker(object):
+    # Marks if the period is a fixed time when to send a new message
    ModeFixed = "FIXED_MODE"
+    # Marks if the "periodic" messages have a weighted random chance to be sent, depending on the period
    ModeChance = "CHANCE_MODE"

    def __init__(self, username, archivist, logger, admin=0, nicknames=[],
@ -47,35 +54,55 @@ class Speaker(object):
                 memory=20, mute_time=60, save_time=3600, bypass=False,
                 cid_whitelist=None, max_len=50
                 ):
+        # List of nicknames other than the username that the bot can be called as
        self.names = nicknames
+        # Mute time for Telegram network errors
        self.mute_time = mute_time
+        # Last mute timestamp
        self.mute_timer = None
+        # The bot's username, "@" included
        self.username = username
-
+        # The maximum chat period for this bot
        self.max_period = archivist.max_period
+
+        # The Archivist functions to load and save from and to files
        self.get_reader_file = archivist.get_reader
        self.store_file = archivist.store
+
+        # Archivist function to crawl all stored Readers
        self.readers_pass = archivist.readers_pass

+        # Legacy load logging emssages
        logger.info("----")
        logger.info("Finished loading.")
        logger.info("Loaded {} chats.".format(archivist.chat_count()))
        logger.info("----")

+        # Wakeup flag that determines if it should send a wakeup message to stored groupchats
        self.wakeup = wakeup
+        # The logger shared program-wide
        self.logger = logger
+        # Chance of sending messages as replies
        self.reply = reply
+        # Chance of sending 2 messages in a row
        self.repeat = repeat
+        # If not empty, whitelist of chat IDs to only respond to
        self.cid_whitelist = cid_whitelist
+        # Memory list/cache for the last accessed chats
        self.memory = MemoryList(memory)
+        # Minimum time to wait between memory saves (triggered at the next message from any chat)
        self.save_time = save_time
+        # Last save timestamp
        self.memory_timer = int(time.perf_counter())
+        # Admin user ID
        self.admin = admin
+        # For testing purposes
        self.bypass = bypass
+        # Max word length for a message
        self.max_len = max_len

-    def announce(self, bot, announcement, check=(lambda _: True)):
    # Sends an announcement to all chats that pass the check
+    def announce(self, bot, announcement, check=(lambda _: True)):
        for reader in self.readers_pass():
            try:
                if check(reader):
@ -84,9 +111,9 @@ class Speaker(object):
            except Exception:
                pass

-    def wake(self, bot, wake):
    # If wakeup flag is set, sends a wake-up message as announcement to all chats that
    # are groups. Also, always sends a wakeup message to the 'bot admin'
+    def wake(self, bot, wake):
        send(bot, self.admin, wake)

        if self.wakeup:
@ -94,9 +121,13 @@ class Speaker(object):
                return reader.check_type("group")
            self.announce(bot, wake, group_check)

+    # Looks up a reader in the memory list
    def get_reader(self, cid):
-        return self.memory.get_next(lambda r: r.cid() == cid)
+        return self.memory.search(lambda r: r.cid() == cid, None)

+    # Looks up and returns a reader if it's in memory, or loads up a reader from
+    # file, adds it to memory, and returns it. Any other reader pushed out of
+    # memory is saved to file
    def load_reader(self, chat):
        cid = str(chat.id)
        reader = self.get_reader(cid)
@ -107,19 +138,24 @@ class Speaker(object):
        if not reader:
            reader = Reader.FromChat(chat, self.max_period, self.logger)

-        old_reader = self.memory.append(reader)
+        old_reader = self.memory.add(reader)
        if old_reader is not None:
            old_reader.commit_memory()
            self.store(old_reader)

        return reader

+    # Returns a reader if it's in memory, or loads it up from a file and returns
+    # it otherwise. Does NOT add the Reader to memory
+    # This is useful for command prompts that do not require the Reader to be cached
    def access_reader(self, cid):
        reader = self.get_reader(cid)
        if reader is None:
            return self.get_reader_file(cid)
        return reader

+    # Returns True if the bot's username is called, or if one of the nicknames is
+    # mentioned and they're not another user's username
    def mentioned(self, text):
        if self.username in text:
            return True
@ -128,20 +164,28 @@ class Speaker(object):
                return True
        return False

+    # Returns True if not enough time has passed since the last mute timestamp
    def is_mute(self):
        current_time = int(time.perf_counter())
        return self.mute_timer is not None and (current_time - self.mute_timer) < self.mute_time

+    # Series of checks to determine if the bot should reply to a specific message, aside
+    # from the usual periodic messages
    def should_reply(self, message, reader):
        if self.is_mute():
+            # Not if mute time hasn't finished
            return False
        if not self.bypass and reader.is_restricted():
+            # If we're not in testing mode and the chat is restricted
            user = message.chat.get_member(message.from_user.id)
            if not self.user_is_admin(user):
-                # update.message.reply_text("You do not have permissions to do that.")
+                # ...And the user has no permissions, should not reply
                return False
+
+        # otherwise (testing mode, or the chat is unrestricted, or the user has permissions)
        replied = message.reply_to_message
        text = message.text.casefold() if message.text else ""
+        # Only if it's a reply to a message of ours or the bot is mentioned in the message
        return (((replied is not None) and (replied.from_user.name == self.username))
                or (self.mentioned(text)))

@ -151,12 +195,14 @@ class Speaker(object):
        else:
            self.store_file(*reader.archive())

+    # Check if enough time for saving memory has passed
    def should_save(self):
        current_time = int(time.perf_counter())
        elapsed = (current_time - self.memory_timer)
        self.logger.debug("Save check: {}".format(elapsed))
        return elapsed >= self.save_time

+    # Save all Readers in memory to files if it's save time
    def save(self):
        if self.should_save():
            self.logger.info("Saving chats in memory...")
@ -165,29 +211,38 @@ class Speaker(object):
            self.memory_timer = time.perf_counter()
            self.logger.info("Chats saved.")

+    # Reads a non-command message
    def read(self, update, context):
+        # Check for save time
        self.save()

+        # Ignore non-message updates
        if update.message is None:
            return
+
        chat = update.message.chat
        reader = self.load_reader(chat)
        reader.read(update.message)

+        # Check if it's a "replyable" message & roll the chance to do so
        if self.should_reply(update.message, reader) and reader.is_answering():
            self.say(context.bot, reader, replying=update.message.message_id)
            return

+        # Update the Reader's title if it has changed since the last message read
        title = get_chat_title(update.message.chat)
        if title != reader.title():
            reader.set_title(title)

+        # Decrease the countdown for the chat, and send a message if it reached 0
        reader.countdown -= 1
        if reader.countdown < 0:
            reader.reset_countdown()
+            # Random chance to reply to a recent message
            rid = reader.random_memory() if random.random() <= self.reply else None
            self.say(context.bot, reader, replying=rid)

+    # Handles /speak command
    def speak(self, update, context):
        chat = (update.message.chat)
        reader = self.load_reader(chat)
@ -200,12 +255,14 @@ class Speaker(object):

        mid = str(update.message.message_id)
        replied = update.message.reply_to_message
+        # Reply to the message that the command replies to, otherwise to the command itself
        rid = replied.message_id if replied else mid
        words = update.message.text.split()
        if len(words) > 1:
            reader.read(' '.join(words[1:]))
        self.say(context.bot, reader, replying=rid)

+    # Checks user permissions. Bot admin is always considered as having full permissions
    def user_is_admin(self, member):
        self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name))
        # eprint('!')
@ -214,23 +271,30 @@ class Speaker(object):
                or (member.status == 'administrator')
                or (member.user.id == self.admin))

+    # Generate speech (message)
    def speech(self, reader):
        return reader.generate_message(self.max_len)

+    # Say a newly generated message
    def say(self, bot, reader, replying=None, **kwargs):
        cid = reader.cid()
        if self.cid_whitelist is not None and cid not in self.cid_whitelist:
+            # Don't, if there's a whitelist and this chat is not in it
            return
        if self.is_mute():
+            # Don't, if mute time isn't over
            return

        try:
            send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs)
            if self.bypass:
+                # Testing mode, force a reasonable period (to not have the bot spam one specific chat with a low period)
                max_period = self.max_period
                reader.set_period(random.randint(max_period // 4, max_period))
            if random.random() <= self.repeat:
                send(bot, cid, self.speech(reader), logger=self.logger, **kwargs)
+        # Consider any Network Error as a Telegram temporary ban, as I couldn't find
+        # out in the documentation how error 429 is handled by python-telegram-bot
        except NetworkError as e:
            self.logger.error("Sending a message caused network error:")
            self.logger.exception(e)
@ -240,21 +304,25 @@ class Speaker(object):
            self.logger.error("Sending a message caused exception:")
            self.logger.exception(e)

+    # Handling /count command
    def get_count(self, update, context):
        cid = str(update.message.chat.id)
-        reader = self.access_reader(cid)
+        reader = self.load_reader(cid)

        num = str(reader.count()) if reader else "no"
        update.message.reply_text("I remember {} messages.".format(num))

+    # Handling /get_chats command (exclusive for bot admin)
    def get_chats(self, update, context):
        lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.readers_pass()]
        chat_list = "\n".join(lines)
        update.message.reply_text("I have the following chats:\n\n" + chat_list)

+    # Handling /period command
+    # Print the current period or set a new one if one is given
    def period(self, update, context):
        chat = update.message.chat
-        reader = self.access_reader(str(chat.id))
+        reader = self.load_reader(str(chat.id))

        words = update.message.text.split()
        if len(words) <= 1:
@ -270,13 +338,14 @@ class Speaker(object):
            period = int(words[1])
            period = reader.set_period(period)
            update.message.reply_text("Period of speaking set to {}.".format(period))
-            self.store_file(*reader.archive())
        except Exception:
            update.message.reply_text("Format was confusing; period unchanged from {}.".format(reader.period()))

+    # Handling /answer command
+    # Print the current answer probability or set a new one if one is given
    def answer(self, update, context):
        chat = update.message.chat
-        reader = self.access_reader(str(chat.id))
+        reader = self.load_reader(str(chat.id))

        words = update.message.text.split()
        if len(words) <= 1:
@ -292,17 +361,18 @@ class Speaker(object):
            answer = float(words[1])
            answer = reader.set_answer(answer)
            update.message.reply_text("Answer probability set to {}.".format(answer))
-            self.store_file(*reader.archive())
        except Exception:
            update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(reader.answer()))

+    # Handling /restrict command
+    # Toggle the restriction value if it's a group chat and the user has permissions to do so
    def restrict(self, update, context):
        if "group" not in update.message.chat.type:
            update.message.reply_text("That only works in groups.")
            return
        chat = update.message.chat
        user = chat.get_member(update.message.from_user.id)
-        reader = self.access_reader(str(chat.id))
+        reader = self.load_reader(str(chat.id))

        if reader.is_restricted():
            if not self.user_is_admin(user):
@ -311,15 +381,16 @@ class Speaker(object):
        reader.toggle_restrict()
        allowed = "let only admins" if reader.is_restricted() else "let everyone"
        update.message.reply_text("I will {} configure me now.".format(allowed))
-        self.store_file(*reader.archive())

+    # Handling /silence command
+    # Toggle the silence value if it's a group chat and the user has permissions to do so
    def silence(self, update, context):
        if "group" not in update.message.chat.type:
            update.message.reply_text("That only works in groups.")
            return
        chat = update.message.chat
        user = chat.get_member(update.message.from_user.id)
-        reader = self.access_reader(str(chat.id))
+        reader = self.load_reader(str(chat.id))

        if reader.is_restricted():
            if not self.user_is_admin(user):
@ -328,8 +399,8 @@ class Speaker(object):
        reader.toggle_silence()
        allowed = "avoid mentioning" if reader.is_silenced() else "mention"
        update.message.reply_text("I will {} people now.".format(allowed))
-        self.store_file(*reader.archive())

+    # Handling /who command
    def who(self, update, context):
        msg = update.message
        usr = msg.from_user
@ -346,6 +417,7 @@ class Speaker(object):

        msg.reply_markdown(answer)

+    # Handling /where command
    def where(self, update, context):
        msg = update.message
        chat = msg.chat
--- a/velasco.py
+++ b/velasco.py
@ -90,7 +90,11 @@ def main():
    parser.add_argument('-m', '--mute_time', metavar='T', type=int, default=60,
                        help='The time (in s) for the muting period when Telegram limits the bot. (default: 60).')
    parser.add_argument('-s', '--save_time', metavar='T', type=int, default=3600,
-                        help='The time (in s) for periodic saves (default: 3600).')
+                        help='The time (in s) for periodic saves. (default: 3600)')
+    parser.add_argument('-p', '--min_period', metavar='MIN_P', type=int, default=1,
+                        help='The minimum value for a chat\'s period. (default: 1)')
+    parser.add_argument('-P', '--max_period', metavar='MAX_P', type=int, default=100000,
+                        help='The maximum value for a chat\'s period. (default: 100000)')

    args = parser.parse_args()

@ -104,6 +108,8 @@ def main():
    archivist = Archivist(logger,
                          chatdir=args.directory,
                          chatext=".vls",
+                          min_period=args.min_period,
+                          max_period=args.max_period,
                          read_only=False
                          )