Added documenting comments accross all the files

Added minimum and maximum period values as argument flags
This commit is contained in:
vylion 2020-10-29 08:58:21 +01:00
parent a13bdd51c7
commit 09cf241f18
7 changed files with 288 additions and 129 deletions

View file

@ -7,8 +7,8 @@ from generator import Generator
class Archivist(object):
def __init__(self, logger, chatdir=None, chatext=None, admin=0,
period_inc=5, save_count=15, max_period=100000,
read_only=False
period_inc=5, save_count=15, min_period=1,
max_period=100000, read_only=False
):
if chatdir is None or len(chatdir) == 0:
chatdir = "./"
@ -19,16 +19,20 @@ class Archivist(object):
self.chatext = chatext
self.period_inc = period_inc
self.save_count = save_count
self.min_period = min_period
self.max_period = max_period
self.read_only = read_only
# Formats and returns a chat folder path
def chat_folder(self, *formatting, **key_format):
return (self.chatdir + "/chat_{tag}").format(*formatting, **key_format)
# Formats and returns a chat file path
def chat_file(self, *formatting, **key_format):
return (self.chatdir + "/chat_{tag}/{file}{ext}").format(*formatting, **key_format)
def store(self, tag, data, vocab_dumper):
# Stores a Reader/Generator file pair
def store(self, tag, data, vocab):
chat_folder = self.chat_folder(tag=tag)
chat_card = self.chat_file(tag=tag, file="card", ext=".txt")
@ -45,17 +49,18 @@ class Archivist(object):
file.write(data)
file.close()
if vocab_dumper is not None:
if vocab is not None:
chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext)
file = open(chat_record, 'w', encoding="utf-16")
vocab_dumper(file)
file.write(vocab)
file.close()
# Loads a Generator's vocabulary file dump
def load_vocab(self, tag):
filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
try:
file = open(filepath, 'r', encoding="utf-16")
record = Generator.load(file)
record = file.read()
file.close()
return record
except Exception as e:
@ -63,6 +68,7 @@ class Archivist(object):
self.logger.exception(e)
return None
# Loads a Generator's vocabulary file dump in the old UTF-8 encoding
def load_vocab_old(self, tag):
filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
try:
@ -75,7 +81,8 @@ class Archivist(object):
self.logger.exception(e)
return None
def load_reader(self, tag):
# Loads a Metadata card file dump
def load_card(self, tag):
filepath = self.chat_file(tag=tag, file="card", ext=".txt")
try:
reader_file = open(filepath, 'r')
@ -86,16 +93,21 @@ class Archivist(object):
self.logger.error("Metadata file {} not found.".format(filepath))
return None
# Returns a Reader for a given ID with an already working vocabulary - be it
# new or loaded from file
def get_reader(self, tag):
reader = self.load_reader(tag)
if reader:
vocab = self.load_vocab(tag)
if not vocab:
card = self.load_card(tag)
if card:
vocab_dump = self.load_vocab(tag)
if vocab_dump:
vocab = Generator.loads(vocab_dump)
else:
vocab = Generator()
return Reader.FromCard(reader, vocab, self.max_period, self.logger)
return Reader.FromCard(card, vocab, self.max_period, self.logger)
else:
return None
# Count the stored chats
def chat_count(self):
count = 0
directory = os.fsencode(self.chatdir)
@ -105,6 +117,7 @@ class Archivist(object):
count += 1
return count
# Crawl through all the stored Readers
def readers_pass(self):
directory = os.fsencode(self.chatdir)
for subdir in os.scandir(directory):
@ -124,6 +137,7 @@ class Archivist(object):
self.logger.exception(e)
raise e
# Load and immediately store every Reader
def update(self):
for reader in self.readers_pass():
if reader.vocab is None:

View file

@ -4,12 +4,12 @@ import random
import json
# This splits strings into lists of words delimited by space.
# Other whitespaces are appended space characters so they are included
# as their own Markov chain element, so as not to pollude with
# "different" words that would only differ in having a whitespace
# attached or not
def rewrite(text):
# This splits strings into lists of words delimited by space.
# Other whitespaces are appended space characters so they are included
# as their own Markov chain element, so as not to pollude with
# "different" words that would only differ in having a whitespace
# attached or not
words = text.replace('\n', '\n ').split(' ')
i = 0
while i < len(words):
@ -23,24 +23,24 @@ def rewrite(text):
return words
# This gives a dictionary key from 2 words, ignoring case
def getkey(w1, w2):
# This gives a dictionary key from 2 words, ignoring case
key = (w1.strip().casefold(), w2.strip().casefold())
return str(key)
# This turns a dictionary key back into 2 separate words
def getwords(key):
# This turns a dictionary key back into 2 separate words
words = key.strip('()').split(', ')
for i in range(len(words)):
words[i].strip('\'')
return words
# Generates triplets of words from the given data string. So if our string
# were "What a lovely day", we'd generate (What, a, lovely) and then
# (a, lovely, day).
def triplets(wordlist):
# Generates triplets of words from the given data string. So if our string
# were "What a lovely day", we'd generate (What, a, lovely) and then
# (a, lovely, day).
if len(wordlist) < 3:
return
@ -49,24 +49,25 @@ def triplets(wordlist):
class Generator(object):
# Marks when we want to create a Generator object from a given JSON
MODE_JSON = "MODE_JSON"
# This is to mark when we want to create a Generator object from a given JSON
# Marks when we want to create a Generator object from a given list of words
MODE_LIST = "MODE_LIST"
# This is to mark when we want to create a Generator object from a given list of words
# Marks when we want to create a Generator object from a given dictionary
MODE_DICT = "MODE_DICT"
# This is to mark when we want to create a Generator object from a given dictionary
MODE_CHAT_DATA = "MODE_CHAT_DATA"
# This is to mark when we want to create a Generator object from Chat data (WIP)
# Marks when we want to create a Generator object from a whole Chat history (WIP)
MODE_HIST = "MODE_HIST"
# Marks the beginning of a message
HEAD = "\n^MESSAGE_SEPARATOR^"
# Marks the end of a message
TAIL = " ^MESSAGE_SEPARATOR^"
def __init__(self, load=None, mode=None):
if mode is not None:
# We ain't creating a new Generator from scratch
if mode == Generator.MODE_JSON:
self.cache = json.loads(load)
elif mode == Generator.MODE_LIST:
@ -74,45 +75,44 @@ class Generator(object):
self.load_list(load)
elif mode == Generator.MODE_DICT:
self.cache = load
# TODO: Chat History mode
else:
self.cache = {}
# The cache is where we store our words
# Loads a text divided into a list of lines
def load_list(self, many):
# Takes a list of strings and adds them to the cache one by one
for one in many:
self.add(one)
def dumps(self):
# Dumps the cache dictionary into a JSON-formatted string
def dumps(self):
return json.dumps(self.cache, ensure_ascii=False)
# Dumps the cache dictionary into a file, formatted as JSON
def dump(self, f):
json.dump(self.cache, f, ensure_ascii=False, indent='')
json.dump(self.cache, f, ensure_ascii=False)
def loads(dump):
# Loads the cache dictionary from a JSON-formatted string
def loads(dump):
if len(dump) == 0:
# faulty dump gives default Generator
return Generator()
# otherwise
return Generator(load=dump, mode=Generator.MODE_JSON)
# Loads the cache dictionary from a file, formatted as JSON
def load(f):
return Generator(load=json.load(f), mode=Generator.MODE_DICT)
def add(self, text):
# This takes a string and stores it in the cache, preceding it
# with the HEAD that marks the beginning of a new message and
# following it with the TAIL that marks the end
words = [Generator.HEAD]
text = rewrite(text + Generator.TAIL)
words.extend(text)
self.database(words)
def database(self, words):
# This takes a list of words and stores it in the cache, adding
# a special entry for the first word (the HEAD marker)
def database(self, words):
for w1, w2, w3 in triplets(words):
if w1 == Generator.HEAD:
if w1 in self.cache:
@ -128,50 +128,50 @@ class Generator(object):
# the new end of chain
self.cache[key] = [w3]
def generate(self, size=50, silence=False):
# This generates the Markov text/word chain
# silence tells if mentions should be silenced
# silence=True disables Telegram user mentions
def generate(self, size=50, silence=False):
if len(self.cache) == 0:
# If there is nothing in the cache we cannot generate anything
return ""
# Start with a message HEAD and a random message starting word
w1 = random.choice(self.cache[Generator.HEAD])
w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)])
# Start with a message HEAD and a random message starting word
gen_words = []
# As long as we don't go over the max. message length (in n. of words)...
for i in range(size):
# As long as we don't go over the size value (max. message length)...
if silence and w1.startswith("@") and len(w1) > 1:
# ...append word 1, disabling any possible Telegram mention
gen_words.append(w1.replace("@", "(@)"))
# ...append the first word, silencing any possible username mention
else:
# ..append word 1
gen_words.append(w1)
# ..append the first word
if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache:
# When there's no key from the last 2 words to follow the chain,
# or we reached a separation between messages, stop
break
else:
# Get a random third word that follows the chain of words 1
# and 2, then make words 2 and 3 to be the new words 1 and 2
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
# Make the second word to be the new first word, and
# make a new random word that follows the chain to be
# the new second word
return ' '.join(gen_words)
# Cross a second Generator into this one
def cross(self, gen):
# cross 2 Generators into this one
for key in gen.cache:
if key in self.cache:
self.cache[key].extend(gen.cache[key])
else:
self.cache[key] = list(gen.cache[key])
# Count again the number of messages
# (for whenever the count number is unreliable)
def new_count(self):
# Count again the number of messages if the current number is unreliable
count = 0
for key in self.cache:
for word in self.cache[key]:
if word == Generator.TAIL:
# ...by just counting message separators
count += 1
# by just counting message separators
return count

View file

@ -1,11 +1,19 @@
#!/usr/bin/env python3
from collections.abc import MutableSequence
from collections.abc import Sequence
class MemoryList(MutableSequence):
class MemoryList(Sequence):
"""Special "memory list" class that:
- Whenever an item is added that was already in the list,
it gets moved to the back instead
- Whenever an item is looked for, it gets moved to the
back
- If a new item is added that goes over a given capacity
limit, the item at the front (oldest accessed item)
is removed (and returned)"""
def __init__(self, capacity, data=None):
"""Initialize the class"""
super(MemoryList, self).__init__()
self._capacity = capacity
if (data is not None):
@ -16,37 +24,25 @@ class MemoryList(MutableSequence):
def __repr__(self):
return "<{0} {1}, capacity {2}>".format(self.__class__.__name__, self._list, self._capacity)
def __str__(self):
return "{0}, {1}/{2}".format(self._list, len(self._list), self._capacity)
def __len__(self):
"""List length"""
return len(self._list)
def capacity(self):
return self._capacity
def __getitem__(self, ii):
"""Get a list item"""
return self._list[ii]
def __delitem__(self, ii):
"""Delete an item"""
del self._list[ii]
def __setitem__(self, ii, val):
self._list[ii] = val
def __str__(self):
return str(self._list)
def __contains__(self, val):
return val in self._list
def __iter__(self):
return self._list.__iter__()
def insert(self, ii, val):
self._list.insert(ii, val)
def append(self, val):
def add(self, val):
if val in self._list:
self._list.remove(val)
@ -58,8 +54,8 @@ class MemoryList(MutableSequence):
else:
return None
def get_next(self, cond):
val = next((v for v in self._list if cond(v)), None)
def search(self, cond, *args, **kwargs):
val = next((v for v in self._list if cond(v)), *args, **kwargs)
if val is not None:
self._list.remove(val)
self._list.append(val)

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python3
# This reads a line in the format 'VARIABLE=value' and gives me the value.
# See Metadata.loadl(...) for more details
def parse_card_line(line):
# This reads a line in the format 'VARIABLE=value' and gives me the value.
# See Metadata.loadl(...) for more details
s = line.split('=', 1)
if len(s) < 2:
return ""
@ -10,35 +10,37 @@ def parse_card_line(line):
return s[1]
# This is a chat's Metadata, holding different configuration values for
# Velasco and other miscellaneous information about the chat
class Metadata(object):
# This is a chat's Metadata, holding different configuration values for
# Velasco and other miscellaneous information about the chat
def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False):
self.id = str(cid)
# The Telegram chat's ID
self.type = ctype
self.id = str(cid)
# The type of chat
self.title = title
self.type = ctype
# The title of the chat
self.title = title
if period is None:
if "group" in ctype:
period = 10
# Default period for groups and supergroups
period = 10
else:
period = 2
# Default period for private or channel chats
period = 2
# The number of messages read in a chat
self.count = count
# The number of messages read
self.period = period
# This chat's configured period
self.answer = answer
self.period = period
# This chat's configured answer probability
self.restricted = restricted
self.answer = answer
# Wether some interactions are restricted to admins only
self.silenced = silenced
self.restricted = restricted
# Wether messages should silence user mentions
self.silenced = silenced
# Sets the period for a chat
# It has to be higher than 1
# Returns the new value
def set_period(self, period):
if period < 1:
raise ValueError('Tried to set period a value less than 1.')
@ -46,6 +48,9 @@ class Metadata(object):
self.period = period
return self.period
# Sets the answer probability
# It's a percentage represented as a decimal between 0 and 1
# Returns the new value
def set_answer(self, prob):
if prob > 1:
raise ValueError('Tried to set answer probability higher than 1.')
@ -55,6 +60,8 @@ class Metadata(object):
self.answer = prob
return self.answer
# Dumps the metadata into a list of lines, then joined together in a string,
# ready to be written into a file
def dumps(self):
lines = ["CARD=v5"]
lines.append("CHAT_ID=" + self.id)
@ -68,10 +75,12 @@ class Metadata(object):
# lines.append("WORD_DICT=")
return ('\n'.join(lines)) + '\n'
# Creates a Metadata object from a previous text dump
def loads(text):
lines = text.splitlines()
return Metadata.loadl(lines)
# Creates a Metadata object from a list of metadata lines
def loadl(lines):
# In a perfect world, I would get both the variable name and its corresponding value
# from each side of the lines, but I know the order in which the lines are writen in
@ -90,6 +99,14 @@ class Metadata(object):
silenced=(parse_card_line(lines[8]) == 'True')
)
elif version == "v3":
# Deprecated: this elif block will be removed in a new version
print("Warning! This Card format ({}) is deprecated. Update all".format(version),
"your files in case that there are still some left in old formats before",
"downloading the next update.")
# This is kept for retrocompatibility purposes, in case someone did a fork
# of this repo and still has some chat files that haven't been updated in
# a long while -- but I already converted all my files to v5
return Metadata(cid=parse_card_line(lines[1]),
ctype=parse_card_line(lines[2]),
title=parse_card_line(lines[3]),
@ -99,6 +116,12 @@ class Metadata(object):
restricted=(parse_card_line(lines[6]) == 'True')
)
elif version == "v2":
# Deprecated: this elif block will be removed in a new version
print("Warning! This Card format ({}) is deprecated. Update all".format(version),
"your files in case that there are still some left in old formats before",
"downloading the next update.")
# Also kept for retrocompatibility purposes
return Metadata(cid=parse_card_line(lines[1]),
ctype=parse_card_line(lines[2]),
title=parse_card_line(lines[3]),
@ -107,6 +130,12 @@ class Metadata(object):
answer=float(parse_card_line(lines[5]))
)
elif version == "dict:":
# Deprecated: this elif block will be removed in a new version
print("Warning! This Card format ('dict') is deprecated. Update all",
"your files in case that there are still some left in old formats before",
"downloading the next update.")
# Also kept for retrocompatibility purposes
# At some point I decided to number the versions of each dictionary format,
# but this was not always the case. This is what you get if you try to read
# whatever there is in very old files where the version should be
@ -117,7 +146,13 @@ class Metadata(object):
period=int(lines[3])
)
else:
# This is for the oldest of files
# Deprecated: this elif block will be removed in a new version
print("Warning! This ancient Card format is deprecated. Update all",
"your files in case that there are still some left in old formats before",
"downloading the next update.")
# Also kept for retrocompatibility purposes
# This is for the oldest of file formats
return Metadata(cid=lines[0],
ctype=lines[1],
title=lines[2],

View file

@ -5,9 +5,9 @@ from metadata import Metadata, parse_card_line
from generator import Generator
# This gives me the chat title, or the first and maybe last
# name of the user as fallback if it's a private chat
def get_chat_title(chat):
# This gives me the chat title, or the first and maybe last
# name of the user as fallback if it's a private chat
if chat.title is not None:
return chat.title
elif chat.first_name is not None:
@ -25,40 +25,52 @@ class Memory(object):
self.content = content
# This is a chat Reader object, in charge of managing the parsing of messages
# for a specific chat, and holding said chat's metadata
class Reader(object):
# This is a chat Reader object, in charge of managing the parsing of messages
# for a specific chat, and holding said chat's metadata
# Media tagging variables
TAG_PREFIX = "^IS_"
STICKER_TAG = "^IS_STICKER^"
ANIM_TAG = "^IS_ANIMATION^"
VIDEO_TAG = "^IS_VIDEO^"
def __init__(self, metadata, vocab, max_period, logger):
def __init__(self, metadata, vocab, max_period, logger, names=[]):
# The Metadata object holding a chat's specific bot parameters
self.meta = metadata
# The Generator object holding the vocabulary learned so far
self.vocab = vocab
# The maximum period allowed for this bot
self.max_period = max_period
# The short term memory, for recently read messages (see below)
self.short_term_mem = []
# The countdown until the period ends and it's time to talk
self.countdown = self.meta.period
# The logger object shared program-wide
self.logger = logger
# The bot's nicknames + username
self.names = names
def FromChat(chat, max_period, logger):
# Create a new Reader from a Chat object
def FromChat(chat, max_period, logger):
meta = Metadata(chat.id, chat.type, get_chat_title(chat))
vocab = Generator()
return Reader(meta, vocab, max_period, logger)
# TODO: Create a new Reader from a whole Chat history
def FromHistory(history, vocab, max_period, logger):
# Create a new Reader from a whole Chat history (WIP)
return None
def FromCard(meta, vocab, max_period, logger):
# Create a new Reader from a meta's file dump
def FromCard(meta, vocab, max_period, logger):
metadata = Metadata.loads(meta)
return Reader(metadata, vocab, max_period, logger)
# Deprecated: this method will be removed in a new version
def FromFile(text, max_period, logger, vocab=None):
# Load a Reader from a file's text string (obsolete)
print("Warning! This method of loading a Reader from file (Reader.FromFile(...))",
"is deprecated, and will be removed from the next update. Use FromCard instead.")
# Load a Reader from a file's text string
lines = text.splitlines()
version = parse_card_line(lines[0]).strip()
version = version if len(version.strip()) > 1 else lines[4]
@ -86,27 +98,33 @@ class Reader(object):
r = Reader(meta, vocab, max_period, logger)
return r
def archive(self):
# Returns a nice lice little tuple package for the archivist to save to file.
# Also commits to long term memory any pending short term memories
def archive(self):
self.commit_memory()
return (self.meta.id, self.meta.dumps(), self.vocab.dump)
return (self.meta.id, self.meta.dumps(), self.vocab.dumps())
# Checks type. Returns "True" for "group" even if it's supergroupA
def check_type(self, t):
# Checks type. Returns "True" for "group" even if it's supergroup
return t in self.meta.type
def exactly_type(self, t):
# Hard check
def exactly_type(self, t):
return t == self.meta.type
def set_title(self, title):
self.meta.title = title
# Sets a new period in the Metadata
def set_period(self, period):
if period < self.countdown:
self.countdown = max(period, 1)
return self.meta.set_period(min(period, self.max_period))
# The period has to be under max_period; otherwise, truncate to max_period
new_period = min(period, self.max_period)
set_period = self.meta.set_period(new_period)
if new_period == set_period and new_period < self.countdown:
# If succesfully changed and the new period is less than the current
# remaining countdown, reduce the countdown to the new period
self.countdown = new_period
return new_period
def set_answer(self, prob):
return self.meta.set_answer(prob)
@ -141,6 +159,8 @@ class Reader(object):
def toggle_silence(self):
self.meta.silenced = (not self.meta.silenced)
# Rolls the chance for answering in this specific chat,
# according to the answer probability
def is_answering(self):
rand = random.random()
chance = self.answer()
@ -150,10 +170,13 @@ class Reader(object):
return False
return rand <= chance
# Adds a new message to the short term memory
def add_memory(self, mid, content):
mem = Memory(mid, content)
self.short_term_mem.append(mem)
# Returns a random message ID from the short memory,
# when answering to a random comment
def random_memory(self):
if len(self.short_term_mem) == 0:
return None
@ -163,6 +186,10 @@ class Reader(object):
def reset_countdown(self):
self.countdown = self.meta.period
# Reads a message
# This process will determine which kind of message it is (Sticker, Anim,
# Video, or actual text) and pre-process it accordingly for the Generator,
# then store it in the short term memory
def read(self, message):
mid = str(message.message_id)
@ -174,16 +201,25 @@ class Reader(object):
self.learn_drawing(mid, Reader.ANIM_TAG, message.animation.file_id)
elif message.video is not None:
self.learn_drawing(mid, Reader.VIDEO_TAG, message.video.file_id)
self.meta.count += 1
# Stores a multimedia message in the short term memory as a text with
# TAG + the media file ID
def learn_drawing(self, mid, tag, drawing):
self.learn(mid, tag + " " + drawing)
# Stores a text message in the short term memory
def learn(self, mid, text):
if "velasco" in text.casefold() and len(text.split()) <= 3:
for name in self.names:
if name.casefold() in text.casefold() and len(text.split()) <= 3:
# If it's less than 3 words and one of the bot's names is in
# the message, ignore it as it's most probably just a summon
return
self.add_memory(mid, text)
# Commits the short term memory messages into the "long term memory"
# aka the vocabulary Generator's cache
def commit_memory(self):
for mem in self.short_term_mem:
self.vocab.add(mem.content)

View file

@ -8,19 +8,24 @@ from reader import Reader, get_chat_title
from telegram.error import NetworkError
# Auxiliar print to stderr function (alongside logger messages)
def eprint(*args, **kwargs):
print(*args, end=' ', file=stderr, **kwargs)
# Auxiliar message to send a text to a chat through a bot
def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
kwargs["parse_mode"] = formatting
kwargs["reply_to_message_id"] = replying
# Markdown or HTML formatting (both argument names are valid)
kwargs["parse_mode"] = formatting or kwargs.get("parse_mode")
# ID of the message it's replying to (both argument names are valid)
kwargs["reply_to_message_id"] = replying or kwargs.get("reply_to_message_id")
# Reminder that dict.get(key) defaults to None if the key isn't found
if text.startswith(Reader.TAG_PREFIX):
# We're sending a media file ID
words = text.split(maxsplit=1)
if logger:
logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid))
# eprint('[]')
# Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID'
if words[0] == Reader.STICKER_TAG:
@ -30,16 +35,18 @@ def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
elif words[0] == Reader.VIDEO_TAG:
return bot.send_video(cid, words[1], **kwargs)
else:
text
# It's text
if logger:
mtype = "reply" if replying else "message"
mtype = "reply" if (kwargs.get("reply_to_message_id")) else "message"
logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text))
# eprint('.')
return bot.send_message(cid, text, **kwargs)
class Speaker(object):
# Marks if the period is a fixed time when to send a new message
ModeFixed = "FIXED_MODE"
# Marks if the "periodic" messages have a weighted random chance to be sent, depending on the period
ModeChance = "CHANCE_MODE"
def __init__(self, username, archivist, logger, admin=0, nicknames=[],
@ -47,35 +54,55 @@ class Speaker(object):
memory=20, mute_time=60, save_time=3600, bypass=False,
cid_whitelist=None, max_len=50
):
# List of nicknames other than the username that the bot can be called as
self.names = nicknames
# Mute time for Telegram network errors
self.mute_time = mute_time
# Last mute timestamp
self.mute_timer = None
# The bot's username, "@" included
self.username = username
# The maximum chat period for this bot
self.max_period = archivist.max_period
# The Archivist functions to load and save from and to files
self.get_reader_file = archivist.get_reader
self.store_file = archivist.store
# Archivist function to crawl all stored Readers
self.readers_pass = archivist.readers_pass
# Legacy load logging emssages
logger.info("----")
logger.info("Finished loading.")
logger.info("Loaded {} chats.".format(archivist.chat_count()))
logger.info("----")
# Wakeup flag that determines if it should send a wakeup message to stored groupchats
self.wakeup = wakeup
# The logger shared program-wide
self.logger = logger
# Chance of sending messages as replies
self.reply = reply
# Chance of sending 2 messages in a row
self.repeat = repeat
# If not empty, whitelist of chat IDs to only respond to
self.cid_whitelist = cid_whitelist
# Memory list/cache for the last accessed chats
self.memory = MemoryList(memory)
# Minimum time to wait between memory saves (triggered at the next message from any chat)
self.save_time = save_time
# Last save timestamp
self.memory_timer = int(time.perf_counter())
# Admin user ID
self.admin = admin
# For testing purposes
self.bypass = bypass
# Max word length for a message
self.max_len = max_len
def announce(self, bot, announcement, check=(lambda _: True)):
# Sends an announcement to all chats that pass the check
def announce(self, bot, announcement, check=(lambda _: True)):
for reader in self.readers_pass():
try:
if check(reader):
@ -84,9 +111,9 @@ class Speaker(object):
except Exception:
pass
def wake(self, bot, wake):
# If wakeup flag is set, sends a wake-up message as announcement to all chats that
# are groups. Also, always sends a wakeup message to the 'bot admin'
def wake(self, bot, wake):
send(bot, self.admin, wake)
if self.wakeup:
@ -94,9 +121,13 @@ class Speaker(object):
return reader.check_type("group")
self.announce(bot, wake, group_check)
# Looks up a reader in the memory list
def get_reader(self, cid):
return self.memory.get_next(lambda r: r.cid() == cid)
return self.memory.search(lambda r: r.cid() == cid, None)
# Looks up and returns a reader if it's in memory, or loads up a reader from
# file, adds it to memory, and returns it. Any other reader pushed out of
# memory is saved to file
def load_reader(self, chat):
cid = str(chat.id)
reader = self.get_reader(cid)
@ -107,19 +138,24 @@ class Speaker(object):
if not reader:
reader = Reader.FromChat(chat, self.max_period, self.logger)
old_reader = self.memory.append(reader)
old_reader = self.memory.add(reader)
if old_reader is not None:
old_reader.commit_memory()
self.store(old_reader)
return reader
# Returns a reader if it's in memory, or loads it up from a file and returns
# it otherwise. Does NOT add the Reader to memory
# This is useful for command prompts that do not require the Reader to be cached
def access_reader(self, cid):
reader = self.get_reader(cid)
if reader is None:
return self.get_reader_file(cid)
return reader
# Returns True if the bot's username is called, or if one of the nicknames is
# mentioned and they're not another user's username
def mentioned(self, text):
if self.username in text:
return True
@ -128,20 +164,28 @@ class Speaker(object):
return True
return False
# Returns True if not enough time has passed since the last mute timestamp
def is_mute(self):
current_time = int(time.perf_counter())
return self.mute_timer is not None and (current_time - self.mute_timer) < self.mute_time
# Series of checks to determine if the bot should reply to a specific message, aside
# from the usual periodic messages
def should_reply(self, message, reader):
if self.is_mute():
# Not if mute time hasn't finished
return False
if not self.bypass and reader.is_restricted():
# If we're not in testing mode and the chat is restricted
user = message.chat.get_member(message.from_user.id)
if not self.user_is_admin(user):
# update.message.reply_text("You do not have permissions to do that.")
# ...And the user has no permissions, should not reply
return False
# otherwise (testing mode, or the chat is unrestricted, or the user has permissions)
replied = message.reply_to_message
text = message.text.casefold() if message.text else ""
# Only if it's a reply to a message of ours or the bot is mentioned in the message
return (((replied is not None) and (replied.from_user.name == self.username))
or (self.mentioned(text)))
@ -151,12 +195,14 @@ class Speaker(object):
else:
self.store_file(*reader.archive())
# Check if enough time for saving memory has passed
def should_save(self):
current_time = int(time.perf_counter())
elapsed = (current_time - self.memory_timer)
self.logger.debug("Save check: {}".format(elapsed))
return elapsed >= self.save_time
# Save all Readers in memory to files if it's save time
def save(self):
if self.should_save():
self.logger.info("Saving chats in memory...")
@ -165,29 +211,38 @@ class Speaker(object):
self.memory_timer = time.perf_counter()
self.logger.info("Chats saved.")
# Reads a non-command message
def read(self, update, context):
# Check for save time
self.save()
# Ignore non-message updates
if update.message is None:
return
chat = update.message.chat
reader = self.load_reader(chat)
reader.read(update.message)
# Check if it's a "replyable" message & roll the chance to do so
if self.should_reply(update.message, reader) and reader.is_answering():
self.say(context.bot, reader, replying=update.message.message_id)
return
# Update the Reader's title if it has changed since the last message read
title = get_chat_title(update.message.chat)
if title != reader.title():
reader.set_title(title)
# Decrease the countdown for the chat, and send a message if it reached 0
reader.countdown -= 1
if reader.countdown < 0:
reader.reset_countdown()
# Random chance to reply to a recent message
rid = reader.random_memory() if random.random() <= self.reply else None
self.say(context.bot, reader, replying=rid)
# Handles /speak command
def speak(self, update, context):
chat = (update.message.chat)
reader = self.load_reader(chat)
@ -200,12 +255,14 @@ class Speaker(object):
mid = str(update.message.message_id)
replied = update.message.reply_to_message
# Reply to the message that the command replies to, otherwise to the command itself
rid = replied.message_id if replied else mid
words = update.message.text.split()
if len(words) > 1:
reader.read(' '.join(words[1:]))
self.say(context.bot, reader, replying=rid)
# Checks user permissions. Bot admin is always considered as having full permissions
def user_is_admin(self, member):
self.logger.info("user {} ({}) requesting a restricted action".format(str(member.user.id), member.user.name))
# eprint('!')
@ -214,23 +271,30 @@ class Speaker(object):
or (member.status == 'administrator')
or (member.user.id == self.admin))
# Generate speech (message)
def speech(self, reader):
return reader.generate_message(self.max_len)
# Say a newly generated message
def say(self, bot, reader, replying=None, **kwargs):
cid = reader.cid()
if self.cid_whitelist is not None and cid not in self.cid_whitelist:
# Don't, if there's a whitelist and this chat is not in it
return
if self.is_mute():
# Don't, if mute time isn't over
return
try:
send(bot, cid, self.speech(reader), replying, logger=self.logger, **kwargs)
if self.bypass:
# Testing mode, force a reasonable period (to not have the bot spam one specific chat with a low period)
max_period = self.max_period
reader.set_period(random.randint(max_period // 4, max_period))
if random.random() <= self.repeat:
send(bot, cid, self.speech(reader), logger=self.logger, **kwargs)
# Consider any Network Error as a Telegram temporary ban, as I couldn't find
# out in the documentation how error 429 is handled by python-telegram-bot
except NetworkError as e:
self.logger.error("Sending a message caused network error:")
self.logger.exception(e)
@ -240,21 +304,25 @@ class Speaker(object):
self.logger.error("Sending a message caused exception:")
self.logger.exception(e)
# Handling /count command
def get_count(self, update, context):
cid = str(update.message.chat.id)
reader = self.access_reader(cid)
reader = self.load_reader(cid)
num = str(reader.count()) if reader else "no"
update.message.reply_text("I remember {} messages.".format(num))
# Handling /get_chats command (exclusive for bot admin)
def get_chats(self, update, context):
lines = ["[{}]: {}".format(reader.cid(), reader.title()) for reader in self.readers_pass()]
chat_list = "\n".join(lines)
update.message.reply_text("I have the following chats:\n\n" + chat_list)
# Handling /period command
# Print the current period or set a new one if one is given
def period(self, update, context):
chat = update.message.chat
reader = self.access_reader(str(chat.id))
reader = self.load_reader(str(chat.id))
words = update.message.text.split()
if len(words) <= 1:
@ -270,13 +338,14 @@ class Speaker(object):
period = int(words[1])
period = reader.set_period(period)
update.message.reply_text("Period of speaking set to {}.".format(period))
self.store_file(*reader.archive())
except Exception:
update.message.reply_text("Format was confusing; period unchanged from {}.".format(reader.period()))
# Handling /answer command
# Print the current answer probability or set a new one if one is given
def answer(self, update, context):
chat = update.message.chat
reader = self.access_reader(str(chat.id))
reader = self.load_reader(str(chat.id))
words = update.message.text.split()
if len(words) <= 1:
@ -292,17 +361,18 @@ class Speaker(object):
answer = float(words[1])
answer = reader.set_answer(answer)
update.message.reply_text("Answer probability set to {}.".format(answer))
self.store_file(*reader.archive())
except Exception:
update.message.reply_text("Format was confusing; answer probability unchanged from {}.".format(reader.answer()))
# Handling /restrict command
# Toggle the restriction value if it's a group chat and the user has permissions to do so
def restrict(self, update, context):
if "group" not in update.message.chat.type:
update.message.reply_text("That only works in groups.")
return
chat = update.message.chat
user = chat.get_member(update.message.from_user.id)
reader = self.access_reader(str(chat.id))
reader = self.load_reader(str(chat.id))
if reader.is_restricted():
if not self.user_is_admin(user):
@ -311,15 +381,16 @@ class Speaker(object):
reader.toggle_restrict()
allowed = "let only admins" if reader.is_restricted() else "let everyone"
update.message.reply_text("I will {} configure me now.".format(allowed))
self.store_file(*reader.archive())
# Handling /silence command
# Toggle the silence value if it's a group chat and the user has permissions to do so
def silence(self, update, context):
if "group" not in update.message.chat.type:
update.message.reply_text("That only works in groups.")
return
chat = update.message.chat
user = chat.get_member(update.message.from_user.id)
reader = self.access_reader(str(chat.id))
reader = self.load_reader(str(chat.id))
if reader.is_restricted():
if not self.user_is_admin(user):
@ -328,8 +399,8 @@ class Speaker(object):
reader.toggle_silence()
allowed = "avoid mentioning" if reader.is_silenced() else "mention"
update.message.reply_text("I will {} people now.".format(allowed))
self.store_file(*reader.archive())
# Handling /who command
def who(self, update, context):
msg = update.message
usr = msg.from_user
@ -346,6 +417,7 @@ class Speaker(object):
msg.reply_markdown(answer)
# Handling /where command
def where(self, update, context):
msg = update.message
chat = msg.chat

View file

@ -90,7 +90,11 @@ def main():
parser.add_argument('-m', '--mute_time', metavar='T', type=int, default=60,
help='The time (in s) for the muting period when Telegram limits the bot. (default: 60).')
parser.add_argument('-s', '--save_time', metavar='T', type=int, default=3600,
help='The time (in s) for periodic saves (default: 3600).')
help='The time (in s) for periodic saves. (default: 3600)')
parser.add_argument('-p', '--min_period', metavar='MIN_P', type=int, default=1,
help='The minimum value for a chat\'s period. (default: 1)')
parser.add_argument('-P', '--max_period', metavar='MAX_P', type=int, default=100000,
help='The maximum value for a chat\'s period. (default: 100000)')
args = parser.parse_args()
@ -104,6 +108,8 @@ def main():
archivist = Archivist(logger,
chatdir=args.directory,
chatext=".vls",
min_period=args.min_period,
max_period=args.max_period,
read_only=False
)