Overhaul 2 WIP

- Generator (Markov) ✔️
- ChatCard (Chatlog) ✔️
- ChatReader (Scribe) 🚧
- Speaker 🚧
- - Speaker->get_reader()... 🚧
This commit is contained in:
vylion 2020-10-07 23:32:10 +02:00
parent 950bbfbabd
commit 328bd6adbf
11 changed files with 548 additions and 475 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
chatlogs/* chatlogs/*
__pycache__/* __pycache__/*
misc/* misc/*
test/*

View file

@ -1,13 +1,14 @@
import os, errno, random, pickle import os, errno, random, pickle
from scribe import Scribe from chatreader import ChatReader as Reader
from markov import Markov from generator import Generator
class Archivist(object): class Archivist(object):
def __init__(self, logger, chatdir=None, chatext=None, admin=0, def __init__(self, logger, chatdir=None, chatext=None, admin=0,
freqIncrement=5, saveCount=15, maxFreq=100000, maxLen=50, freq_increment=5, save_count=15, max_period=100000, max_len=50,
readOnly=False, filterCids=None, bypass=False read_only=False, filter_cids=None, bypass=False
): ):
if chatdir is None or len(chatdir) == 0: if chatdir is None or len(chatdir) == 0:
raise ValueError("Chatlog directory name is empty") raise ValueError("Chatlog directory name is empty")
@ -17,43 +18,46 @@ class Archivist(object):
self.chatdir = chatdir self.chatdir = chatdir
self.chatext = chatext self.chatext = chatext
self.admin = admin self.admin = admin
self.freqIncrement = freqIncrement self.freq_increment = freq_increment
self.saveCount = saveCount self.save_count = save_count
self.maxFreq = maxFreq self.max_period = max_period
self.maxLen = maxLen self.max_len = max_len
self.readOnly = readOnly self.read_only = read_only
self.filterCids = filterCids self.filter_cids = filter_cids
self.bypass = bypass self.bypass = bypass
self.scribeFolder = chatdir + "chat_{tag}"
self.scribePath = chatdir + "chat_{tag}/{file}{ext}" def chat_folder(self, *formatting, **key_format):
return (self.chatdir + "chat_{tag}").format(*formatting, **key_format)
def chat_file(self, *formatting, **key_format):
return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format)
def store(self, tag, log, gen): def store(self, tag, log, gen):
scribefolder = self.scribeFolder.format(tag=tag) chat_folder = self.chat_folder(tag=tag)
cardfile = self.scribePath.format(tag=tag, file="card", ext=".txt") chat_card = self.chat_file(tag=tag, file="card", ext=".txt")
if self.readOnly: if self.read_only:
return return
try: try:
if not os.path.exists(scribefolder): if not os.path.exists(chat_folder):
os.makedirs(scribefolder, exist_ok=True) os.makedirs(chat_folder, exist_ok=True)
self.logger.info("Storing a new chat. Folder {} created.".format(scribefolder)) self.logger.info("Storing a new chat. Folder {} created.".format(chat_folder))
except: except:
self.logger.error("Failed creating {} folder.".format(scribefolder)) self.logger.error("Failed creating {} folder.".format(chat_folder))
return return
file = open(cardfile, 'w') file = open(chat_card, 'w')
file.write(log) file.write(log)
file.close() file.close()
if gen is not None: if gen is not None:
recordfile = self.scribePath.format(tag=tag, file="record", ext=self.chatext) chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext)
file = open(recordfile, 'w') file = open(chat_record, 'w')
file.write(gen) file.write(gen)
file.close() file.close()
def recall(self, filename): def get_reader(self, filename):
#print("Loading chat: " + path)
file = open(self.chatdir + filename, 'rb') file = open(self.chatdir + filename, 'rb')
scribe = None scribe = None
try: try:
scribe = Scribe.Recall(pickle.load(file), self) reader, vocab = Reader.FromFile(pickle.load(file), self)
self.logger.info("Unpickled {}{}".format(self.chatdir, filename)) self.logger.info("Unpickled {}{}".format(self.chatdir, filename))
except pickle.UnpicklingError: except pickle.UnpicklingError:
file.close() file.close()
@ -68,27 +72,24 @@ class Archivist(object):
file.close() file.close()
return scribe return scribe
def wakeScribe(self, filepath): def load_reader(self, filepath):
file = open(filepath.format(filename="card", ext=".txt"), 'r') file = open(filepath.format(filename="card", ext=".txt"), 'r')
card = file.read() card = file.read()
file.close() file.close()
return Scribe.FromFile(card, self) return Reader.FromCard(card, self)
def wakeParrot(self, tag): def wakeParrot(self, tag):
filepath = self.scribePath.format(tag=tag, file="record", ext=self.chatext) filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
try: try:
file = open(filepath, 'r') file = open(filepath, 'r')
#print("\nOPening " + filepath + "\n")
record = file.read() record = file.read()
file.close() file.close()
return Markov.loads(record) return Generator.loads(record)
except: except:
self.logger.error("Parrot file {} not found.".format(filepath)) self.logger.error("Record file {} not found.".format(filepath))
return None return None
def wakeScriptorium(self): def readers_pass(self):
scriptorium = {}
directory = os.fsencode(self.chatdir) directory = os.fsencode(self.chatdir)
for subdir in os.scandir(directory): for subdir in os.scandir(directory):
dirname = subdir.name.decode("utf-8") dirname = subdir.name.decode("utf-8")
@ -96,17 +97,16 @@ class Archivist(object):
cid = dirname[5:] cid = dirname[5:]
try: try:
filepath = self.chatdir + dirname + "/{filename}{ext}" filepath = self.chatdir + dirname + "/{filename}{ext}"
scriptorium[cid] = self.wakeScribe(filepath) reader = self.load_reader(filepath)
self.logger.info("Chat {} contents:\n".format(cid) + scriptorium[cid].chat.dumps()) self.logger.info("Chat {} contents:\n".format(cid) + reader.card.dumps())
if self.bypass: if self.bypass:
scriptorium[cid].setFreq(random.randint(self.maxFreq//2, self.maxFreq)) reader.set_period(random.randint(self.max_period//2, self.max_period))
elif scriptorium[cid].freq() > self.maxFreq: elif scriptorium[cid].freq() > self.max_period:
scriptorium[cid].setFreq(self.maxFreq) scriptorium[cid].setFreq(self.max_period)
except Exception as e: except Exception as e:
self.logger.error("Failed reading {}".format(dirname)) self.logger.error("Failed reading {}".format(dirname))
self.logger.exception(e) self.logger.exception(e)
raise e raise e
return scriptorium
""" """
def wake_old(self): def wake_old(self):
@ -117,17 +117,17 @@ class Archivist(object):
filename = os.fsdecode(file) filename = os.fsdecode(file)
if filename.endswith(self.chatext): if filename.endswith(self.chatext):
cid = filename[:-(len(self.chatext))] cid = filename[:-(len(self.chatext))]
if self.filterCids is not None: if self.filter_cids is not None:
#self.logger.info("CID " + cid) #self.logger.info("CID " + cid)
if not cid in self.filterCids: if not cid in self.filter_cids:
continue continue
scriptorium[cid] = self.recall(filename) scriptorium[cid] = self.recall(filename)
scribe = scriptorium[cid] scribe = scriptorium[cid]
if scribe is not None: if scribe is not None:
if self.bypass: if self.bypass:
scribe.setFreq(random.randint(self.maxFreq//2, self.maxFreq)) scribe.setFreq(random.randint(self.max_period//2, self.max_period))
elif scribe.freq() > self.maxFreq: elif scribe.freq() > self.max_period:
scribe.setFreq(self.maxFreq) scribe.setFreq(self.max_period)
self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]" self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]"
"\n" + "\n".join(scribe.chat.dumps())) "\n" + "\n".join(scribe.chat.dumps()))
else: else:

5
brain.py Normal file
View file

@ -0,0 +1,5 @@
#!/usr/bin/env python3
import random
from chatreader import ChatReader as Reader

122
chatcard.py Normal file
View file

@ -0,0 +1,122 @@
#!/usr/bin/env python3
def parse_card_line(line):
# This reads a line in the format 'VARIABLE=value' and gives me the value.
# See ChatCard.loadl(...) for more details
s = line.split('=', 1)
if len(s) < 2:
return ""
else:
return s[1]
class ChatCard(object):
def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False):
self.id = str(cid)
# The Telegram chat's ID
self.type = ctype
# The type of chat
self.title = title
# The title of the chat
if period is None:
if "group" in ctype:
period = 10
# Default period for groups and supergroups
else:
period = 2
# Default period for private or channel chats
self.count = count
# The number of messages read
self.period = period
# This chat's configured period
self.answer = answer
# This chat's configured answer probability
self.restricted = restricted
# Wether some interactions are restricted to admins only
self.silenced = silenced
# Wether messages should silence user mentions
def set_period(self, period):
if period < 1:
raise ValueError('Tried to set period a value less than 1.')
else:
self.period = period
return self.period
def set_answer(self, prob):
if prob > 1:
raise ValueError('Tried to set answer probability higher than 1.')
elif prob < 0:
raise ValueError('Tried to set answer probability lower than 0.')
else:
self.answer = prob
return self.answer
def dumps(self):
lines = ["CARD=v5"]
lines.append("CHAT_ID=" + self.id)
lines.append("CHAT_TYPE=" + self.type)
lines.append("CHAT_NAME=" + self.title)
lines.append("WORD_COUNT=" + str(self.count))
lines.append("MESSAGE_PERIOD=" + str(self.period))
lines.append("ANSWER_PROB=" + str(self.answer))
lines.append("RESTRICTED=" + str(self.restricted))
lines.append("SILENCED=" + str(self.silenced))
# lines.append("WORD_DICT=")
return ('\n'.join(lines)) + '\n'
def loads(text):
lines = text.splitlines()
return ChatCard.loadl(lines)
def loadl(lines):
# In a perfect world, I would get both the variable name and its corresponding value
# from each side of the lines, but I know the order in which the lines are writen in
# the file, I hardcoded it. So I can afford also hardcoding reading it back in the
# same order, and nobody can stop me
version = parse_card_line(lines[0]).strip()
version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO")
if version == "v4" or version == "v5":
return ChatCard(cid=parse_card_line(lines[1]),
ctype=parse_card_line(lines[2]),
title=parse_card_line(lines[3]),
count=int(parse_card_line(lines[4])),
period=int(parse_card_line(lines[5])),
answer=float(parse_card_line(lines[6])),
restricted=(parse_card_line(lines[7]) == 'True'),
silenced=(parse_card_line(lines[8]) == 'True')
)
elif version == "v3":
return ChatCard(cid=parse_card_line(lines[1]),
ctype=parse_card_line(lines[2]),
title=parse_card_line(lines[3]),
count=int(parse_card_line(lines[7])),
period=int(parse_card_line(lines[4])),
answer=float(parse_card_line(lines[5])),
restricted=(parse_card_line(lines[6]) == 'True')
)
elif version == "v2":
return ChatCard(cid=parse_card_line(lines[1]),
ctype=parse_card_line(lines[2]),
title=parse_card_line(lines[3]),
count=int(parse_card_line(lines[6])),
period=int(parse_card_line(lines[4])),
answer=float(parse_card_line(lines[5]))
)
elif version == "dict:":
# At some point I decided to number the versions of each dictionary format,
# but this was not always the case. This is what you get if you try to read
# whatever there is in very old files where the version should be
return ChatCard(cid=lines[0],
ctype=lines[1],
title=lines[2],
count=int(lines[5]),
period=int(lines[3])
)
else:
# This is for the oldest of files
return ChatCard(cid=lines[0],
ctype=lines[1],
title=lines[2],
period=int(lines[3])
)

View file

@ -1,106 +0,0 @@
#!/usr/bin/env python3
def parse(l):
s = l.split('=', 1)
if len(s) < 2:
return ""
else:
return s[1]
class Chatlog(object):
def __init__(self, cid, ctype, title, count=0, freq=None, answer=0.5, restricted=False, silenced=False):
self.id = str(cid)
self.type = ctype
self.title = title
if freq is None:
if "group" in ctype:
freq = 10
#elif ctype is "private":
else:
freq = 2
self.count = count
self.freq = freq
self.answer = answer
self.restricted = restricted
self.silenced = silenced
def add_msg(self, message):
self.gen.add_text(message)
self.count += 1
def set_freq(self, freq):
if freq < 1:
raise ValueError('Tried to set freq a value less than 1.')
else:
self.freq = freq
return self.freq
def set_answer(self, afreq):
if afreq > 1:
raise ValueError('Tried to set answer probability higher than 1.')
elif afreq < 0:
raise ValueError('Tried to set answer probability lower than 0.')
else:
self.answer = afreq
return self.answer
def dumps(self):
lines = ["LOG=v4"]
lines.append("CHAT_ID=" + self.id)
lines.append("CHAT_TYPE=" + self.type)
lines.append("CHAT_NAME=" + self.title)
lines.append("WORD_COUNT=" + str(self.count))
lines.append("MESSAGE_FREQ=" + str(self.freq))
lines.append("ANSWER_FREQ=" + str(self.answer))
lines.append("RESTRICTED=" + str(self.restricted))
lines.append("SILENCED=" + str(self.silenced))
#lines.append("WORD_DICT=")
return '\n'.join(lines)
def loads(text):
lines = text.splitlines()
return Chatlog.loadl(lines)
def loadl(lines):
version = parse(lines[0]).strip()
version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO")
if version == "v4":
return Chatlog(cid=parse(lines[1]),
ctype=parse(lines[2]),
title=parse(lines[3]),
count=int(parse(lines[4])),
freq=int(parse(lines[5])),
answer=float(parse(lines[6])),
restricted=(parse(lines[7]) == 'True'),
silenced=(parse(lines[8]) == 'True')
)
elif version == "v3":
return Chatlog(cid=parse(lines[1]),
ctype=parse(lines[2]),
title=parse(lines[3]),
count=int(parse(lines[7])),
freq=int(parse(lines[4])),
answer=float(parse(lines[5])),
restricted=(parse(lines[6]) == 'True')
)
elif version == "v2":
return Chatlog(cid=parse(lines[1]),
ctype=parse(lines[2]),
title=parse(lines[3]),
count=int(parse(lines[6])),
freq=int(parse(lines[4])),
answer=float(parse(lines[5]))
)
elif version == "dict:":
return Chatlog(cid=lines[0],
ctype=lines[1],
title=lines[2],
count=int(lines[5]),
freq=int(lines[3])
)
else:
return Chatlog(cid=lines[0],
ctype=lines[1],
title=lines[2],
freq=int(lines[3])
)

190
chatreader.py Normal file
View file

@ -0,0 +1,190 @@
#!/usr/bin/env python3
import random
from chatcard import ChatCard, parse_card_line
from generator import Generator
def get_chat_title(chat):
# This gives me the chat title, or the first and maybe last
# name of the user as fallback if it's a private chat
if chat.title is not None:
return chat.title
elif chat.first_name is not None:
if chat.last_name is not None:
return chat.first_name + " " + chat.last_name
else:
return chat.first_name
else:
return ""
class Memory(object):
def __init__(self, mid, content):
self.id = mid
self.content = content
class ChatReader(object):
TAG_PREFIX = "^IS_"
STICKER_TAG = "^IS_STICKER^"
ANIM_TAG = "^IS_ANIMATION^"
VIDEO_TAG = "^IS_VIDEO^"
def __init__(self, chatcard, max_period, logger):
self.card = chatcard
self.max_period = max_period
self.short_term_mem = []
self.countdown = self.card.period
self.logger = logger
def FromChat(chat, max_period, logger, newchat=False):
# Create a new ChatReader from a Chat object
card = ChatCard(chat.id, chat.type, get_chat_title(chat))
return ChatReader(card, max_period, logger)
def FromData(data, max_period, logger):
# Create a new ChatReader from a whole Chat history (WIP)
return None
def FromCard(card, max_period, logger):
# Create a new ChatReader from a card's file dump
chatcard = ChatCard.loads(card)
return ChatReader(chatcard, max_period, logger)
def FromFile(text, max_period, logger):
# Load a ChatReader from a file's text string
lines = text.splitlines()
version = parse_card_line(lines[0]).strip()
version = version if len(version.strip()) > 1 else lines[4]
logger.info("Dictionary version: {} ({} lines)".format(version, len(lines)))
vocab = None
if version == "v4" or version == "v5":
return ChatReader.FromCard(text, max_period, logger)
# I stopped saving the chat metadata and the cache together
elif version == "v3":
card = ChatCard.loadl(lines[0:8])
cache = '\n'.join(lines[9:])
vocab = Generator.loads(cache)
elif version == "v2":
card = ChatCard.loadl(lines[0:7])
cache = '\n'.join(lines[8:])
vocab = Generator.loads(cache)
elif version == "dict:":
card = ChatCard.loadl(lines[0:6])
cache = '\n'.join(lines[6:])
vocab = Generator.loads(cache)
else:
card = ChatCard.loadl(lines[0:4])
cache = lines[4:]
vocab = Generator(load=cache, mode=Generator.MODE_LIST)
# raise SyntaxError("ChatReader: ChatCard format unrecognized.")
s = ChatReader(card, max_period, logger)
return (s, vocab)
def archive(self, vocab):
# Returns a nice lice little tuple package for the archivist to save to file.
# Also commits to long term memory any pending short term memories
self.commit_long_term(vocab)
return (self.card.id, self.card.dumps(), vocab)
def check_type(self, t):
# Checks type. Returns "True" for "group" even if it's supergroup
return t in self.card.type
def exactly_type(self, t):
# Hard check
return t == self.card.type
def set_title(self, title):
self.card.title = title
def set_period(self, period):
if period < self.countdown:
self.countdown = max(period, 1)
return self.card.set_period(min(period, self.max_period))
def set_answer(self, prob):
return self.card.set_answer(prob)
def cid(self):
return str(self.card.id)
def count(self):
return self.card.count
def period(self):
return self.card.period
def title(self):
return self.card.title
def answer(self):
return self.card.answer
def ctype(self):
return self.card.type
def is_restricted(self):
return self.card.restricted
def toggle_restrict(self):
self.card.restricted = (not self.card.restricted)
def is_silenced(self):
return self.card.silenced
def toggle_silence(self):
self.card.silenced = (not self.card.silenced)
def is_answering(self):
rand = random.random()
chance = self.answer()
if chance == 1:
return True
elif chance == 0:
return False
return rand <= chance
def add_memory(self, mid, content):
mem = Memory(mid, content)
self.short_term_mem.append(mem)
def random_memory(self):
mem = random.choice(self.short_term_mem)
return mem.id
def reset_countdown(self):
self.countdown = self.card.period
def read(self, message):
mid = str(message.message_id)
if message.text is not None:
self.read(mid, message.text)
elif message.sticker is not None:
self.learn_drawing(mid, ChatReader.STICKER_TAG, message.sticker.file_id)
elif message.animation is not None:
self.learn_drawing(mid, ChatReader.ANIM_TAG, message.animation.file_id)
elif message.video is not None:
self.learn_drawing(mid, ChatReader.VIDEO_TAG, message.video.file_id)
self.card.count += 1
def learn_drawing(self, mid, tag, drawing):
self.learn(mid, tag + " " + drawing)
def learn(self, mid, text):
if "velasco" in text.casefold() and len(text.split()) <= 3:
return
self.add_memory(mid, text)
def commit_long_term(self, vocab):
for mem in self.short_term_mem:
vocab.add(mem.content)
self.short_term_mem = []
"""
def learnFrom(self, scribe):
self.card.count += scribe.chat.count
self.vocab.cross(scribe.vocab)
"""

166
generator.py Normal file
View file

@ -0,0 +1,166 @@
#!/usr/bin/env python3
import random
import json
def rewrite(text):
# This splits strings into lists of words delimited by space.
# Other whitespaces are appended space characters so they are included
# as their own Markov chain element, so as not to pollude with
# "different" words that would only differ in having a whitespace
# attached or not
words = text.replace('\n', '\n ').split(' ')
i = 0
while i < len(words):
w = words[i].strip(' \t')
if len(w) > 0:
words[i] = w
else:
del words[i]
i -= 1
i += 1
return words
def getkey(w1, w2):
# This gives a dictionary key from 2 words, ignoring case
key = (w1.strip().casefold(), w2.strip().casefold())
return str(key)
def getwords(key):
# This turns a dictionary key back into 2 separate words
words = key.strip('()').split(', ')
for i in range(len(words)):
words[i].strip('\'')
return words
def triplets(wordlist):
# Generates triplets of words from the given data string. So if our string
# were "What a lovely day", we'd generate (What, a, lovely) and then
# (a, lovely, day).
if len(wordlist) < 3:
return
for i in range(len(wordlist) - 2):
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
class Generator(object):
MODE_JSON = "MODE_JSON"
# This is to mark when we want to create a Generator object from a given JSON
MODE_LIST = "MODE_LIST"
# This is to mark when we want to create a Generator object from a given list of words
MODE_CHAT_DATA = "MODE_CHAT_DATA"
# This is to mark when we want to create a Generator object from Chat data (WIP)
HEAD = "\n^MESSAGE_SEPARATOR^"
TAIL = "^MESSAGE_SEPARATOR^"
def __init__(self, load=None, mode=None):
if mode is not None:
# We ain't creating a new Generator from scratch
if mode == Generator.MODE_JSON:
self.cache = json.loads(load)
elif mode == Generator.MODE_LIST:
self.cache = {}
self.load_list(load)
else:
self.cache = {}
# The cache is where we store our words
def load_list(self, many):
# Takes a list of strings and adds them to the cache one by one
for one in many:
self.add(one)
def dumps(self):
# Dumps the cache dictionary into a JSON-formatted string
return json.dumps(self.cache)
def loads(dump):
# Loads the cache dictionary from a JSON-formatted string
if len(dump) == 0:
# faulty dump gives default Generator
return Generator()
# otherwise
return Generator(load=dump, mode=Generator.MODE_JSON)
def add(self, text):
# This takes a string and stores it in the cache, preceding it
# with the HEAD that marks the beginning of a new message and
# following it with the TAIL that marks the end
words = [Generator.HEAD]
text = text + " " + Generator.TAIL
words.extend(text.split())
self.database(rewrite(text))
def database(self, words):
# This takes a list of words and stores it in the cache, adding
# a special entry for the first word (the HEAD marker)
for w1, w2, w3 in triplets(words):
if w1 == Generator.HEAD:
if w1 in self.cache:
self.cache[Generator.HEAD].append(w2)
else:
self.cache[Generator.HEAD] = [w2]
key = getkey(w1, w2)
if key in self.cache:
# if the key exists, add the new word to the end of the chain
self.cache[key].append(w3)
else:
# otherwise, create a new entry for the new key starting with
# the new end of chain
self.cache[key] = [w3]
def generate(self, size=50, silence=False):
# This generates the Markov text/word chain
# silence tells if mentions should be silenced
if len(self.cache) == 0:
# If there is nothing in the cache we cannot generate anything
return ""
w1 = random.choice(self.cache[Generator.HEAD])
w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)])
# Start with a message HEAD and a random message starting word
gen_words = []
for i in range(size):
# As long as we don't go over the size value (max. message length)...
if silence and w1.startswith("@") and len(w1) > 1:
gen_words.append(w1.replace("@", "(@)"))
# ...append the first word, silencing any possible username mention
else:
gen_words.append(w1)
# ..append the first word
if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache:
# When there's no key from the last 2 words to follow the chain,
# or we reached a separation between messages, stop
break
else:
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
# Make the second word to be the new first word, and
# make a new random word that follows the chain to be
# the new second word
return ' '.join(gen_words)
def cross(self, gen):
# cross 2 Generators into this one
for key in gen.cache:
if key in self.cache:
self.cache[key].extend(gen.cache[key])
else:
self.cache[key] = list(gen.cache[key])
def new_count(self):
# Count again the number of messages if the current number is unreliable
count = 0
for key in self.cache:
for word in self.cache[key]:
if word == Generator.TAIL:
count += 1
# by just counting message separators
return count

105
markov.py
View file

@ -1,105 +0,0 @@
#!/usr/bin/env python3
import random
import json
def getkey(w1, w2):
key = (w1.strip().casefold(), w2.strip().casefold())
return str(key)
def getwords(key):
words = key.strip('()').split(', ')
for i in range(len(words)):
words[i].strip('\'')
return words
def triples(wordlist):
# Generates triples from the given data string. So if our string were
# "What a lovely day", we'd generate (What, a, lovely) and then
# (a, lovely, day).
if len(wordlist) < 3:
return
for i in range(len(wordlist) - 2):
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
class Markov(object):
ModeJson = "MODE_JSON"
ModeList = "MODE_LIST"
ModeChatData = "MODE_CHAT_DATA"
Head = "\n^MESSAGE_SEPARATOR^"
Tail = "^MESSAGE_SEPARATOR^"
def __init__(self, load=None, mode=None):
if mode is not None:
if mode == Markov.ModeJson:
self.cache = json.loads(load)
elif mode == Markov.ModeList:
self.cache = {}
self.loadList(load)
else:
self.cache = {}
def loadList(self, lines):
for line in lines:
words = [Markov.Head]
words.extend(line.split())
self.learn_words(words)
def dumps(self):
return json.dumps(self.cache)
def loads(dump):
if len(dump) == 0:
return Markov()
return Markov(load=dump, mode=Markov.ModeJson)
def learn_words(self, words):
self.database(words)
def database(self, wordlist):
for w1, w2, w3 in triples(wordlist):
if w1 == Markov.Head:
if w1 in self.cache:
self.cache[Markov.Head].append(w2)
else:
self.cache[Markov.Head] = [w2]
key = getkey(w1, w2)
if key in self.cache:
self.cache[key].append(w3)
else:
self.cache[key] = [w3]
def generate_markov_text(self, size=50, silence=False):
if len(self.cache) == 0:
return ""
w1 = random.choice(self.cache[Markov.Head])
w2 = random.choice(self.cache[getkey(Markov.Head, w1)])
gen_words = []
for i in range(size):
if silence and w1.startswith("@") and len(w1) > 1:
gen_words.append(w1.replace("@", "(@)"))
else:
gen_words.append(w1)
if w2 == Markov.Tail or not getkey(w1, w2) in self.cache:
# print("Generated text")
break
else:
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
return ' '.join(gen_words)
def cross(self, gen):
for key in gen.cache:
if key in self.cache:
self.cache[key].extend(d[key])
else:
self.cache[key] = list(d[key])
def new_count(self):
count = 0
for key in self.cache:
for word in self.cache[key]:
if word == Markov.Tail:
count += 1
return count

194
scribe.py
View file

@ -1,194 +0,0 @@
#!/usr/bin/env python3
import random
from chatlog import *
from markov import Markov
def getTitle(chat):
if chat.title is not None:
return chat.title
elif chat.first_name is not None:
if chat.last_name is not None:
return chat.first_name + " " + chat.last_name
else:
return chat.first_name
else:
return ""
def rewrite(text):
words = text.replace('\n', '\n ').split(' ')
i = 0
while i < len(words):
w = words[i].strip(' \t')
if len(w) > 0:
words[i] = w
else:
del words[i]
i -= 1
i += 1
return words
class Page(object):
def __init__(self, mid, content):
self.id = mid
self.content = content
class Scribe(object):
TagPrefix = "^IS_"
StickerTag = "^IS_STICKER^"
AnimTag = "^IS_ANIMATION^"
VideoTag = "^IS_VIDEO^"
def __init__(self, chatlog, archivist):
self.chat = chatlog
self.archivist = archivist
self.pages = []
self.countdown = self.chat.freq
self.logger = self.archivist.logger
def FromChat(chat, archivist, newchat=False):
chatlog = Chatlog(chat.id, chat.type, getTitle(chat))
scribe = Scribe(chatlog, archivist)
return scribe
def FromData(data, archivist):
return None
def FromFile(log, archivist):
chatlog = Chatlog.loads(log)
return Scribe(chatlog, archivist)
def Recall(text, archivist):
lines = text.splitlines()
version = parse(lines[0]).strip()
version = version if len(version.strip()) > 1 else lines[4]
archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) )
if version == "v4":
chatlog = Chatlog.loadl(lines[0:9])
cache = '\n'.join(lines[10:])
parrot = Markov.loads(cache)
elif version == "v3":
chatlog = Chatlog.loadl(lines[0:8])
cache = '\n'.join(lines[9:])
parrot = Markov.loads(cache)
elif version == "v2":
chatlog = Chatlog.loadl(lines[0:7])
cache = '\n'.join(lines[8:])
parrot = Markov.loads(cache)
elif version == "dict:":
chatlog = Chatlog.loadl(lines[0:6])
cache = '\n'.join(lines[6:])
parrot = Markov.loads(cache)
else:
chatlog = Chatlog.loadl(lines[0:4])
cache = lines[4:]
parrot = Markov(load=cache, mode=Markov.ModeList)
#raise SyntaxError("Scribe: Chatlog format unrecognized.")
s = Scribe(chatlog, archivist)
s.parrot = parrot
return s
def store(self, parrot):
self.archivist.store(self.chat.id, self.chat.dumps(), parrot)
def checkType(self, t):
return t in self.chat.type
def compareType(self, t):
return t == self.chat.type
def setTitle(self, title):
self.chat.title = title
def setFreq(self, freq):
if freq < self.countdown:
self.countdown = max(freq, 1)
return self.chat.set_freq(min(freq, self.archivist.maxFreq))
def setAnswer(self, afreq):
return self.chat.set_answer(afreq)
def cid(self):
return str(self.chat.id)
def count(self):
return self.chat.count
def freq(self):
return self.chat.freq
def title(self):
return self.chat.title
def answer(self):
return self.chat.answer
def type(self):
return self.chat.type
def isRestricted(self):
return self.chat.restricted
def restrict(self):
self.chat.restricted = (not self.chat.restricted)
def isSilenced(self):
return self.chat.silenced
def silence(self):
self.chat.silenced = (not self.chat.silenced)
def isAnswering(self):
rand = random.random()
chance = self.answer()
if chance == 1:
return True
elif chance == 0:
return False
return rand <= chance
def addPage(self, mid, content):
page = Page(mid, content)
self.pages.append(page)
def getReference(self):
page = random.choice(self.pages)
return page.id
def resetCountdown(self):
self.countdown = self.chat.freq
def learn(self, message):
mid = str(message.message_id)
if message.text is not None:
self.read(mid, message.text)
elif message.sticker is not None:
self.learnDrawing(mid, Scribe.StickerTag, message.sticker.file_id)
elif message.animation is not None:
self.learnDrawing(mid, Scribe.AnimTag, message.animation.file_id)
elif message.video is not None:
self.learnDrawing(mid, Scribe.VideoTag, message.video.file_id)
self.chat.count += 1
def learnDrawing(self, mid, tag, drawing):
self.read(mid, tag + " " + drawing)
def read(self, mid, text):
if "velasco" in text.casefold() and len(text.split()) <= 3:
return
words = [Markov.Head]
text = text + " " + Markov.Tail
words.extend(rewrite(text))
self.addPage(mid, words)
def teachParrot(self, parrot):
for page in self.pages:
parrot.learn_words(page.content)
self.pages = []
"""
def learnFrom(self, scribe):
self.chat.count += scribe.chat.count
self.parrot.cross(scribe.parrot)
"""

View file

@ -1,24 +1,25 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import random import random
from scribe import Scribe from chatreader import ChatReader as Reader
from markov import Markov
from telegram.error import * from telegram.error import *
def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs):
kwargs["parse_mode"] = format def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
kwargs["parse_mode"] = formatting
kwargs["reply_to_message_id"] = replying kwargs["reply_to_message_id"] = replying
if text.startswith(Scribe.TagPrefix): if text.startswith(Reader.TAG_PREFIX):
words = text.split(maxsplit=1) words = text.split(maxsplit=1)
if logger: if logger:
logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid)) logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid))
# Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID'
if words[0] == Scribe.StickerTag: if words[0] == Reader.STICKER_TAG:
return bot.send_sticker(cid, words[1], **kwargs) return bot.send_sticker(cid, words[1], **kwargs)
elif words[0] == Scribe.AnimTag: elif words[0] == Reader.ANIM_TAG:
return bot.send_animation(cid, words[1], **kwargs) return bot.send_animation(cid, words[1], **kwargs)
elif words[0] == Scribe.VideoTag: elif words[0] == Reader.VIDEO_TAG:
return bot.send_video(cid, words[1], **kwargs) return bot.send_video(cid, words[1], **kwargs)
else: else:
text text
@ -27,17 +28,6 @@ def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs):
logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text)) logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text))
return bot.send_message(cid, text, **kwargs) return bot.send_message(cid, text, **kwargs)
def getTitle(chat):
if chat.title:
return chat.title
else:
last = chat.last_name if chat.last_name else ""
first = chat.first_name if chat.first_name else ""
name = " ".join([first, last]).strip()
if len(name) == 0:
return "Unknown"
else:
return name
class Speaker(object): class Speaker(object):
ModeFixed = "FIXED_MODE" ModeFixed = "FIXED_MODE"
@ -59,7 +49,7 @@ class Speaker(object):
self.reply = reply self.reply = reply
self.repeat = repeat self.repeat = repeat
self.filterCids = archivist.filterCids self.filterCids = archivist.filterCids
self.bypass=archivist.bypass self.bypass = archivist.bypass
def announce(self, announcement, check=(lambda _: True)): def announce(self, announcement, check=(lambda _: True)):
for scribe in self.scriptorium: for scribe in self.scriptorium:
@ -79,7 +69,7 @@ class Speaker(object):
def getScribe(self, chat): def getScribe(self, chat):
cid = str(chat.id) cid = str(chat.id)
if not cid in self.scriptorium: if not cid in self.scriptorium:
scribe = Scribe.FromChat(chat, self.archivist, newchat=True) scribe = Reader.FromChat(chat, self.archivist, newchat=True)
self.scriptorium[cid] = scribe self.scriptorium[cid] = scribe
return scribe return scribe
else: else:

View file

@ -18,7 +18,7 @@ speakerbot = None
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Enable logging # Enable logging
log_format="[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper()) log_format = "[{}][%(asctime)s]%(name)s::%(levelname)s: %(message)s".format(username.upper())
if coloredlogsError: if coloredlogsError:
logging.basicConfig(format=log_format, level=logging.INFO) logging.basicConfig(format=log_format, level=logging.INFO)
@ -49,20 +49,24 @@ about_msg = "I am yet another Markov Bot experiment. I read everything you type
explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send." explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send."
def static_reply(text, format=None): def static_reply(text, format=None):
def reply(bot, update): def reply(bot, update):
update.message.reply_text(text, parse_mode=format) update.message.reply_text(text, parse_mode=format)
return reply return reply
def error(bot, update, error): def error(bot, update, error):
logger.warning('Update "{}" caused error "{}"'.format(update, error)) logger.warning('Update "{}" caused error "{}"'.format(update, error))
def stop(bot, update): def stop(bot, update):
scribe = speakerbot.getScribe(update.message.chat.id) scribe = speakerbot.getScribe(update.message.chat.id)
#del chatlogs[chatlog.id] #del chatlogs[chatlog.id]
#os.remove(LOG_DIR + chatlog.id + LOG_EXT) #os.remove(LOG_DIR + chatlog.id + LOG_EXT)
logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid())) logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid()))
def main(): def main():
global speakerbot global speakerbot
parser = argparse.ArgumentParser(description='A Telegram markov bot.') parser = argparse.ArgumentParser(description='A Telegram markov bot.')
@ -76,7 +80,7 @@ def main():
updater = Updater(args.token) updater = Updater(args.token)
#filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)] #filterCids=["-1001036575277", "-1001040087584", str(args.admin_id)]
filterCids=None filterCids = None
archivist = Archivist(logger, archivist = Archivist(logger,
chatdir="chatlogs/", chatdir="chatlogs/",