mirror of
https://gitlab.com/vylion/velascobot.git
synced 2025-04-19 21:46:35 +02:00
Overhaul 2 WIP
- Generator (Markov) ✔️ - ChatCard (Chatlog) ✔️ - ChatReader (Scribe) 🚧 - Speaker 🚧 - - Speaker->get_reader()... 🚧
This commit is contained in:
parent
950bbfbabd
commit
328bd6adbf
11 changed files with 548 additions and 475 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
chatlogs/*
|
||||
__pycache__/*
|
||||
misc/*
|
||||
test/*
|
||||
|
|
90
archivist.py
90
archivist.py
|
@ -1,13 +1,14 @@
|
|||
|
||||
import os, errno, random, pickle
|
||||
from scribe import Scribe
|
||||
from markov import Markov
|
||||
from chatreader import ChatReader as Reader
|
||||
from generator import Generator
|
||||
|
||||
|
||||
class Archivist(object):
|
||||
|
||||
def __init__(self, logger, chatdir=None, chatext=None, admin=0,
|
||||
freqIncrement=5, saveCount=15, maxFreq=100000, maxLen=50,
|
||||
readOnly=False, filterCids=None, bypass=False
|
||||
freq_increment=5, save_count=15, max_period=100000, max_len=50,
|
||||
read_only=False, filter_cids=None, bypass=False
|
||||
):
|
||||
if chatdir is None or len(chatdir) == 0:
|
||||
raise ValueError("Chatlog directory name is empty")
|
||||
|
@ -17,43 +18,46 @@ class Archivist(object):
|
|||
self.chatdir = chatdir
|
||||
self.chatext = chatext
|
||||
self.admin = admin
|
||||
self.freqIncrement = freqIncrement
|
||||
self.saveCount = saveCount
|
||||
self.maxFreq = maxFreq
|
||||
self.maxLen = maxLen
|
||||
self.readOnly = readOnly
|
||||
self.filterCids = filterCids
|
||||
self.freq_increment = freq_increment
|
||||
self.save_count = save_count
|
||||
self.max_period = max_period
|
||||
self.max_len = max_len
|
||||
self.read_only = read_only
|
||||
self.filter_cids = filter_cids
|
||||
self.bypass = bypass
|
||||
self.scribeFolder = chatdir + "chat_{tag}"
|
||||
self.scribePath = chatdir + "chat_{tag}/{file}{ext}"
|
||||
|
||||
def chat_folder(self, *formatting, **key_format):
|
||||
return (self.chatdir + "chat_{tag}").format(*formatting, **key_format)
|
||||
|
||||
def chat_file(self, *formatting, **key_format):
|
||||
return (self.chatdir + "chat_{tag}/{file}{ext}").format(*formatting, **key_format)
|
||||
|
||||
def store(self, tag, log, gen):
|
||||
scribefolder = self.scribeFolder.format(tag=tag)
|
||||
cardfile = self.scribePath.format(tag=tag, file="card", ext=".txt")
|
||||
if self.readOnly:
|
||||
chat_folder = self.chat_folder(tag=tag)
|
||||
chat_card = self.chat_file(tag=tag, file="card", ext=".txt")
|
||||
if self.read_only:
|
||||
return
|
||||
try:
|
||||
if not os.path.exists(scribefolder):
|
||||
os.makedirs(scribefolder, exist_ok=True)
|
||||
self.logger.info("Storing a new chat. Folder {} created.".format(scribefolder))
|
||||
if not os.path.exists(chat_folder):
|
||||
os.makedirs(chat_folder, exist_ok=True)
|
||||
self.logger.info("Storing a new chat. Folder {} created.".format(chat_folder))
|
||||
except:
|
||||
self.logger.error("Failed creating {} folder.".format(scribefolder))
|
||||
self.logger.error("Failed creating {} folder.".format(chat_folder))
|
||||
return
|
||||
file = open(cardfile, 'w')
|
||||
file = open(chat_card, 'w')
|
||||
file.write(log)
|
||||
file.close()
|
||||
if gen is not None:
|
||||
recordfile = self.scribePath.format(tag=tag, file="record", ext=self.chatext)
|
||||
file = open(recordfile, 'w')
|
||||
chat_record = self.chat_file(tag=tag, file="record", ext=self.chatext)
|
||||
file = open(chat_record, 'w')
|
||||
file.write(gen)
|
||||
file.close()
|
||||
|
||||
def recall(self, filename):
|
||||
#print("Loading chat: " + path)
|
||||
def get_reader(self, filename):
|
||||
file = open(self.chatdir + filename, 'rb')
|
||||
scribe = None
|
||||
try:
|
||||
scribe = Scribe.Recall(pickle.load(file), self)
|
||||
reader, vocab = Reader.FromFile(pickle.load(file), self)
|
||||
self.logger.info("Unpickled {}{}".format(self.chatdir, filename))
|
||||
except pickle.UnpicklingError:
|
||||
file.close()
|
||||
|
@ -68,27 +72,24 @@ class Archivist(object):
|
|||
file.close()
|
||||
return scribe
|
||||
|
||||
def wakeScribe(self, filepath):
|
||||
def load_reader(self, filepath):
|
||||
file = open(filepath.format(filename="card", ext=".txt"), 'r')
|
||||
card = file.read()
|
||||
file.close()
|
||||
return Scribe.FromFile(card, self)
|
||||
return Reader.FromCard(card, self)
|
||||
|
||||
def wakeParrot(self, tag):
|
||||
filepath = self.scribePath.format(tag=tag, file="record", ext=self.chatext)
|
||||
filepath = self.chat_file(tag=tag, file="record", ext=self.chatext)
|
||||
try:
|
||||
file = open(filepath, 'r')
|
||||
#print("\nOPening " + filepath + "\n")
|
||||
record = file.read()
|
||||
file.close()
|
||||
return Markov.loads(record)
|
||||
return Generator.loads(record)
|
||||
except:
|
||||
self.logger.error("Parrot file {} not found.".format(filepath))
|
||||
self.logger.error("Record file {} not found.".format(filepath))
|
||||
return None
|
||||
|
||||
def wakeScriptorium(self):
|
||||
scriptorium = {}
|
||||
|
||||
def readers_pass(self):
|
||||
directory = os.fsencode(self.chatdir)
|
||||
for subdir in os.scandir(directory):
|
||||
dirname = subdir.name.decode("utf-8")
|
||||
|
@ -96,17 +97,16 @@ class Archivist(object):
|
|||
cid = dirname[5:]
|
||||
try:
|
||||
filepath = self.chatdir + dirname + "/{filename}{ext}"
|
||||
scriptorium[cid] = self.wakeScribe(filepath)
|
||||
self.logger.info("Chat {} contents:\n".format(cid) + scriptorium[cid].chat.dumps())
|
||||
reader = self.load_reader(filepath)
|
||||
self.logger.info("Chat {} contents:\n".format(cid) + reader.card.dumps())
|
||||
if self.bypass:
|
||||
scriptorium[cid].setFreq(random.randint(self.maxFreq//2, self.maxFreq))
|
||||
elif scriptorium[cid].freq() > self.maxFreq:
|
||||
scriptorium[cid].setFreq(self.maxFreq)
|
||||
reader.set_period(random.randint(self.max_period//2, self.max_period))
|
||||
elif scriptorium[cid].freq() > self.max_period:
|
||||
scriptorium[cid].setFreq(self.max_period)
|
||||
except Exception as e:
|
||||
self.logger.error("Failed reading {}".format(dirname))
|
||||
self.logger.exception(e)
|
||||
raise e
|
||||
return scriptorium
|
||||
|
||||
"""
|
||||
def wake_old(self):
|
||||
|
@ -117,17 +117,17 @@ class Archivist(object):
|
|||
filename = os.fsdecode(file)
|
||||
if filename.endswith(self.chatext):
|
||||
cid = filename[:-(len(self.chatext))]
|
||||
if self.filterCids is not None:
|
||||
if self.filter_cids is not None:
|
||||
#self.logger.info("CID " + cid)
|
||||
if not cid in self.filterCids:
|
||||
if not cid in self.filter_cids:
|
||||
continue
|
||||
scriptorium[cid] = self.recall(filename)
|
||||
scribe = scriptorium[cid]
|
||||
if scribe is not None:
|
||||
if self.bypass:
|
||||
scribe.setFreq(random.randint(self.maxFreq//2, self.maxFreq))
|
||||
elif scribe.freq() > self.maxFreq:
|
||||
scribe.setFreq(self.maxFreq)
|
||||
scribe.setFreq(random.randint(self.max_period//2, self.max_period))
|
||||
elif scribe.freq() > self.max_period:
|
||||
scribe.setFreq(self.max_period)
|
||||
self.logger.info("Loaded chat " + scribe.title() + " [" + scribe.cid() + "]"
|
||||
"\n" + "\n".join(scribe.chat.dumps()))
|
||||
else:
|
||||
|
|
5
brain.py
Normal file
5
brain.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
from chatreader import ChatReader as Reader
|
||||
|
122
chatcard.py
Normal file
122
chatcard.py
Normal file
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
def parse_card_line(line):
|
||||
# This reads a line in the format 'VARIABLE=value' and gives me the value.
|
||||
# See ChatCard.loadl(...) for more details
|
||||
s = line.split('=', 1)
|
||||
if len(s) < 2:
|
||||
return ""
|
||||
else:
|
||||
return s[1]
|
||||
|
||||
|
||||
class ChatCard(object):
|
||||
def __init__(self, cid, ctype, title, count=0, period=None, answer=0.5, restricted=False, silenced=False):
|
||||
self.id = str(cid)
|
||||
# The Telegram chat's ID
|
||||
self.type = ctype
|
||||
# The type of chat
|
||||
self.title = title
|
||||
# The title of the chat
|
||||
if period is None:
|
||||
if "group" in ctype:
|
||||
period = 10
|
||||
# Default period for groups and supergroups
|
||||
else:
|
||||
period = 2
|
||||
# Default period for private or channel chats
|
||||
self.count = count
|
||||
# The number of messages read
|
||||
self.period = period
|
||||
# This chat's configured period
|
||||
self.answer = answer
|
||||
# This chat's configured answer probability
|
||||
self.restricted = restricted
|
||||
# Wether some interactions are restricted to admins only
|
||||
self.silenced = silenced
|
||||
# Wether messages should silence user mentions
|
||||
|
||||
def set_period(self, period):
|
||||
if period < 1:
|
||||
raise ValueError('Tried to set period a value less than 1.')
|
||||
else:
|
||||
self.period = period
|
||||
return self.period
|
||||
|
||||
def set_answer(self, prob):
|
||||
if prob > 1:
|
||||
raise ValueError('Tried to set answer probability higher than 1.')
|
||||
elif prob < 0:
|
||||
raise ValueError('Tried to set answer probability lower than 0.')
|
||||
else:
|
||||
self.answer = prob
|
||||
return self.answer
|
||||
|
||||
def dumps(self):
|
||||
lines = ["CARD=v5"]
|
||||
lines.append("CHAT_ID=" + self.id)
|
||||
lines.append("CHAT_TYPE=" + self.type)
|
||||
lines.append("CHAT_NAME=" + self.title)
|
||||
lines.append("WORD_COUNT=" + str(self.count))
|
||||
lines.append("MESSAGE_PERIOD=" + str(self.period))
|
||||
lines.append("ANSWER_PROB=" + str(self.answer))
|
||||
lines.append("RESTRICTED=" + str(self.restricted))
|
||||
lines.append("SILENCED=" + str(self.silenced))
|
||||
# lines.append("WORD_DICT=")
|
||||
return ('\n'.join(lines)) + '\n'
|
||||
|
||||
def loads(text):
|
||||
lines = text.splitlines()
|
||||
return ChatCard.loadl(lines)
|
||||
|
||||
def loadl(lines):
|
||||
# In a perfect world, I would get both the variable name and its corresponding value
|
||||
# from each side of the lines, but I know the order in which the lines are writen in
|
||||
# the file, I hardcoded it. So I can afford also hardcoding reading it back in the
|
||||
# same order, and nobody can stop me
|
||||
version = parse_card_line(lines[0]).strip()
|
||||
version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO")
|
||||
if version == "v4" or version == "v5":
|
||||
return ChatCard(cid=parse_card_line(lines[1]),
|
||||
ctype=parse_card_line(lines[2]),
|
||||
title=parse_card_line(lines[3]),
|
||||
count=int(parse_card_line(lines[4])),
|
||||
period=int(parse_card_line(lines[5])),
|
||||
answer=float(parse_card_line(lines[6])),
|
||||
restricted=(parse_card_line(lines[7]) == 'True'),
|
||||
silenced=(parse_card_line(lines[8]) == 'True')
|
||||
)
|
||||
elif version == "v3":
|
||||
return ChatCard(cid=parse_card_line(lines[1]),
|
||||
ctype=parse_card_line(lines[2]),
|
||||
title=parse_card_line(lines[3]),
|
||||
count=int(parse_card_line(lines[7])),
|
||||
period=int(parse_card_line(lines[4])),
|
||||
answer=float(parse_card_line(lines[5])),
|
||||
restricted=(parse_card_line(lines[6]) == 'True')
|
||||
)
|
||||
elif version == "v2":
|
||||
return ChatCard(cid=parse_card_line(lines[1]),
|
||||
ctype=parse_card_line(lines[2]),
|
||||
title=parse_card_line(lines[3]),
|
||||
count=int(parse_card_line(lines[6])),
|
||||
period=int(parse_card_line(lines[4])),
|
||||
answer=float(parse_card_line(lines[5]))
|
||||
)
|
||||
elif version == "dict:":
|
||||
# At some point I decided to number the versions of each dictionary format,
|
||||
# but this was not always the case. This is what you get if you try to read
|
||||
# whatever there is in very old files where the version should be
|
||||
return ChatCard(cid=lines[0],
|
||||
ctype=lines[1],
|
||||
title=lines[2],
|
||||
count=int(lines[5]),
|
||||
period=int(lines[3])
|
||||
)
|
||||
else:
|
||||
# This is for the oldest of files
|
||||
return ChatCard(cid=lines[0],
|
||||
ctype=lines[1],
|
||||
title=lines[2],
|
||||
period=int(lines[3])
|
||||
)
|
106
chatlog.py
106
chatlog.py
|
@ -1,106 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
def parse(l):
|
||||
s = l.split('=', 1)
|
||||
if len(s) < 2:
|
||||
return ""
|
||||
else:
|
||||
return s[1]
|
||||
|
||||
class Chatlog(object):
|
||||
def __init__(self, cid, ctype, title, count=0, freq=None, answer=0.5, restricted=False, silenced=False):
|
||||
self.id = str(cid)
|
||||
self.type = ctype
|
||||
self.title = title
|
||||
if freq is None:
|
||||
if "group" in ctype:
|
||||
freq = 10
|
||||
#elif ctype is "private":
|
||||
else:
|
||||
freq = 2
|
||||
self.count = count
|
||||
self.freq = freq
|
||||
self.answer = answer
|
||||
self.restricted = restricted
|
||||
self.silenced = silenced
|
||||
|
||||
def add_msg(self, message):
|
||||
self.gen.add_text(message)
|
||||
self.count += 1
|
||||
|
||||
def set_freq(self, freq):
|
||||
if freq < 1:
|
||||
raise ValueError('Tried to set freq a value less than 1.')
|
||||
else:
|
||||
self.freq = freq
|
||||
return self.freq
|
||||
|
||||
def set_answer(self, afreq):
|
||||
if afreq > 1:
|
||||
raise ValueError('Tried to set answer probability higher than 1.')
|
||||
elif afreq < 0:
|
||||
raise ValueError('Tried to set answer probability lower than 0.')
|
||||
else:
|
||||
self.answer = afreq
|
||||
return self.answer
|
||||
|
||||
def dumps(self):
|
||||
lines = ["LOG=v4"]
|
||||
lines.append("CHAT_ID=" + self.id)
|
||||
lines.append("CHAT_TYPE=" + self.type)
|
||||
lines.append("CHAT_NAME=" + self.title)
|
||||
lines.append("WORD_COUNT=" + str(self.count))
|
||||
lines.append("MESSAGE_FREQ=" + str(self.freq))
|
||||
lines.append("ANSWER_FREQ=" + str(self.answer))
|
||||
lines.append("RESTRICTED=" + str(self.restricted))
|
||||
lines.append("SILENCED=" + str(self.silenced))
|
||||
#lines.append("WORD_DICT=")
|
||||
return '\n'.join(lines)
|
||||
|
||||
def loads(text):
|
||||
lines = text.splitlines()
|
||||
return Chatlog.loadl(lines)
|
||||
|
||||
def loadl(lines):
|
||||
version = parse(lines[0]).strip()
|
||||
version = version if len(version.strip()) > 1 else (lines[4] if len(lines) > 4 else "LOG_ZERO")
|
||||
if version == "v4":
|
||||
return Chatlog(cid=parse(lines[1]),
|
||||
ctype=parse(lines[2]),
|
||||
title=parse(lines[3]),
|
||||
count=int(parse(lines[4])),
|
||||
freq=int(parse(lines[5])),
|
||||
answer=float(parse(lines[6])),
|
||||
restricted=(parse(lines[7]) == 'True'),
|
||||
silenced=(parse(lines[8]) == 'True')
|
||||
)
|
||||
elif version == "v3":
|
||||
return Chatlog(cid=parse(lines[1]),
|
||||
ctype=parse(lines[2]),
|
||||
title=parse(lines[3]),
|
||||
count=int(parse(lines[7])),
|
||||
freq=int(parse(lines[4])),
|
||||
answer=float(parse(lines[5])),
|
||||
restricted=(parse(lines[6]) == 'True')
|
||||
)
|
||||
elif version == "v2":
|
||||
return Chatlog(cid=parse(lines[1]),
|
||||
ctype=parse(lines[2]),
|
||||
title=parse(lines[3]),
|
||||
count=int(parse(lines[6])),
|
||||
freq=int(parse(lines[4])),
|
||||
answer=float(parse(lines[5]))
|
||||
)
|
||||
elif version == "dict:":
|
||||
return Chatlog(cid=lines[0],
|
||||
ctype=lines[1],
|
||||
title=lines[2],
|
||||
count=int(lines[5]),
|
||||
freq=int(lines[3])
|
||||
)
|
||||
else:
|
||||
return Chatlog(cid=lines[0],
|
||||
ctype=lines[1],
|
||||
title=lines[2],
|
||||
freq=int(lines[3])
|
||||
)
|
190
chatreader.py
Normal file
190
chatreader.py
Normal file
|
@ -0,0 +1,190 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
from chatcard import ChatCard, parse_card_line
|
||||
from generator import Generator
|
||||
|
||||
|
||||
def get_chat_title(chat):
|
||||
# This gives me the chat title, or the first and maybe last
|
||||
# name of the user as fallback if it's a private chat
|
||||
if chat.title is not None:
|
||||
return chat.title
|
||||
elif chat.first_name is not None:
|
||||
if chat.last_name is not None:
|
||||
return chat.first_name + " " + chat.last_name
|
||||
else:
|
||||
return chat.first_name
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
class Memory(object):
|
||||
def __init__(self, mid, content):
|
||||
self.id = mid
|
||||
self.content = content
|
||||
|
||||
|
||||
class ChatReader(object):
|
||||
TAG_PREFIX = "^IS_"
|
||||
STICKER_TAG = "^IS_STICKER^"
|
||||
ANIM_TAG = "^IS_ANIMATION^"
|
||||
VIDEO_TAG = "^IS_VIDEO^"
|
||||
|
||||
def __init__(self, chatcard, max_period, logger):
|
||||
self.card = chatcard
|
||||
self.max_period = max_period
|
||||
self.short_term_mem = []
|
||||
self.countdown = self.card.period
|
||||
self.logger = logger
|
||||
|
||||
def FromChat(chat, max_period, logger, newchat=False):
|
||||
# Create a new ChatReader from a Chat object
|
||||
card = ChatCard(chat.id, chat.type, get_chat_title(chat))
|
||||
return ChatReader(card, max_period, logger)
|
||||
|
||||
def FromData(data, max_period, logger):
|
||||
# Create a new ChatReader from a whole Chat history (WIP)
|
||||
return None
|
||||
|
||||
def FromCard(card, max_period, logger):
|
||||
# Create a new ChatReader from a card's file dump
|
||||
chatcard = ChatCard.loads(card)
|
||||
return ChatReader(chatcard, max_period, logger)
|
||||
|
||||
def FromFile(text, max_period, logger):
|
||||
# Load a ChatReader from a file's text string
|
||||
lines = text.splitlines()
|
||||
version = parse_card_line(lines[0]).strip()
|
||||
version = version if len(version.strip()) > 1 else lines[4]
|
||||
logger.info("Dictionary version: {} ({} lines)".format(version, len(lines)))
|
||||
vocab = None
|
||||
if version == "v4" or version == "v5":
|
||||
return ChatReader.FromCard(text, max_period, logger)
|
||||
# I stopped saving the chat metadata and the cache together
|
||||
elif version == "v3":
|
||||
card = ChatCard.loadl(lines[0:8])
|
||||
cache = '\n'.join(lines[9:])
|
||||
vocab = Generator.loads(cache)
|
||||
elif version == "v2":
|
||||
card = ChatCard.loadl(lines[0:7])
|
||||
cache = '\n'.join(lines[8:])
|
||||
vocab = Generator.loads(cache)
|
||||
elif version == "dict:":
|
||||
card = ChatCard.loadl(lines[0:6])
|
||||
cache = '\n'.join(lines[6:])
|
||||
vocab = Generator.loads(cache)
|
||||
else:
|
||||
card = ChatCard.loadl(lines[0:4])
|
||||
cache = lines[4:]
|
||||
vocab = Generator(load=cache, mode=Generator.MODE_LIST)
|
||||
# raise SyntaxError("ChatReader: ChatCard format unrecognized.")
|
||||
s = ChatReader(card, max_period, logger)
|
||||
return (s, vocab)
|
||||
|
||||
def archive(self, vocab):
|
||||
# Returns a nice lice little tuple package for the archivist to save to file.
|
||||
# Also commits to long term memory any pending short term memories
|
||||
self.commit_long_term(vocab)
|
||||
return (self.card.id, self.card.dumps(), vocab)
|
||||
|
||||
def check_type(self, t):
|
||||
# Checks type. Returns "True" for "group" even if it's supergroup
|
||||
return t in self.card.type
|
||||
|
||||
def exactly_type(self, t):
|
||||
# Hard check
|
||||
return t == self.card.type
|
||||
|
||||
def set_title(self, title):
|
||||
self.card.title = title
|
||||
|
||||
def set_period(self, period):
|
||||
if period < self.countdown:
|
||||
self.countdown = max(period, 1)
|
||||
return self.card.set_period(min(period, self.max_period))
|
||||
|
||||
def set_answer(self, prob):
|
||||
return self.card.set_answer(prob)
|
||||
|
||||
def cid(self):
|
||||
return str(self.card.id)
|
||||
|
||||
def count(self):
|
||||
return self.card.count
|
||||
|
||||
def period(self):
|
||||
return self.card.period
|
||||
|
||||
def title(self):
|
||||
return self.card.title
|
||||
|
||||
def answer(self):
|
||||
return self.card.answer
|
||||
|
||||
def ctype(self):
|
||||
return self.card.type
|
||||
|
||||
def is_restricted(self):
|
||||
return self.card.restricted
|
||||
|
||||
def toggle_restrict(self):
|
||||
self.card.restricted = (not self.card.restricted)
|
||||
|
||||
def is_silenced(self):
|
||||
return self.card.silenced
|
||||
|
||||
def toggle_silence(self):
|
||||
self.card.silenced = (not self.card.silenced)
|
||||
|
||||
def is_answering(self):
|
||||
rand = random.random()
|
||||
chance = self.answer()
|
||||
if chance == 1:
|
||||
return True
|
||||
elif chance == 0:
|
||||
return False
|
||||
return rand <= chance
|
||||
|
||||
def add_memory(self, mid, content):
|
||||
mem = Memory(mid, content)
|
||||
self.short_term_mem.append(mem)
|
||||
|
||||
def random_memory(self):
|
||||
mem = random.choice(self.short_term_mem)
|
||||
return mem.id
|
||||
|
||||
def reset_countdown(self):
|
||||
self.countdown = self.card.period
|
||||
|
||||
def read(self, message):
|
||||
mid = str(message.message_id)
|
||||
|
||||
if message.text is not None:
|
||||
self.read(mid, message.text)
|
||||
elif message.sticker is not None:
|
||||
self.learn_drawing(mid, ChatReader.STICKER_TAG, message.sticker.file_id)
|
||||
elif message.animation is not None:
|
||||
self.learn_drawing(mid, ChatReader.ANIM_TAG, message.animation.file_id)
|
||||
elif message.video is not None:
|
||||
self.learn_drawing(mid, ChatReader.VIDEO_TAG, message.video.file_id)
|
||||
self.card.count += 1
|
||||
|
||||
def learn_drawing(self, mid, tag, drawing):
|
||||
self.learn(mid, tag + " " + drawing)
|
||||
|
||||
def learn(self, mid, text):
|
||||
if "velasco" in text.casefold() and len(text.split()) <= 3:
|
||||
return
|
||||
self.add_memory(mid, text)
|
||||
|
||||
def commit_long_term(self, vocab):
|
||||
for mem in self.short_term_mem:
|
||||
vocab.add(mem.content)
|
||||
self.short_term_mem = []
|
||||
|
||||
"""
|
||||
def learnFrom(self, scribe):
|
||||
self.card.count += scribe.chat.count
|
||||
self.vocab.cross(scribe.vocab)
|
||||
"""
|
166
generator.py
Normal file
166
generator.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
import json
|
||||
|
||||
|
||||
def rewrite(text):
|
||||
# This splits strings into lists of words delimited by space.
|
||||
# Other whitespaces are appended space characters so they are included
|
||||
# as their own Markov chain element, so as not to pollude with
|
||||
# "different" words that would only differ in having a whitespace
|
||||
# attached or not
|
||||
words = text.replace('\n', '\n ').split(' ')
|
||||
i = 0
|
||||
while i < len(words):
|
||||
w = words[i].strip(' \t')
|
||||
if len(w) > 0:
|
||||
words[i] = w
|
||||
else:
|
||||
del words[i]
|
||||
i -= 1
|
||||
i += 1
|
||||
return words
|
||||
|
||||
|
||||
def getkey(w1, w2):
|
||||
# This gives a dictionary key from 2 words, ignoring case
|
||||
key = (w1.strip().casefold(), w2.strip().casefold())
|
||||
return str(key)
|
||||
|
||||
|
||||
def getwords(key):
|
||||
# This turns a dictionary key back into 2 separate words
|
||||
words = key.strip('()').split(', ')
|
||||
for i in range(len(words)):
|
||||
words[i].strip('\'')
|
||||
return words
|
||||
|
||||
|
||||
def triplets(wordlist):
|
||||
# Generates triplets of words from the given data string. So if our string
|
||||
# were "What a lovely day", we'd generate (What, a, lovely) and then
|
||||
# (a, lovely, day).
|
||||
if len(wordlist) < 3:
|
||||
return
|
||||
|
||||
for i in range(len(wordlist) - 2):
|
||||
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
|
||||
|
||||
|
||||
class Generator(object):
|
||||
MODE_JSON = "MODE_JSON"
|
||||
# This is to mark when we want to create a Generator object from a given JSON
|
||||
|
||||
MODE_LIST = "MODE_LIST"
|
||||
# This is to mark when we want to create a Generator object from a given list of words
|
||||
|
||||
MODE_CHAT_DATA = "MODE_CHAT_DATA"
|
||||
# This is to mark when we want to create a Generator object from Chat data (WIP)
|
||||
|
||||
HEAD = "\n^MESSAGE_SEPARATOR^"
|
||||
TAIL = "^MESSAGE_SEPARATOR^"
|
||||
|
||||
def __init__(self, load=None, mode=None):
|
||||
if mode is not None:
|
||||
# We ain't creating a new Generator from scratch
|
||||
if mode == Generator.MODE_JSON:
|
||||
self.cache = json.loads(load)
|
||||
elif mode == Generator.MODE_LIST:
|
||||
self.cache = {}
|
||||
self.load_list(load)
|
||||
else:
|
||||
self.cache = {}
|
||||
# The cache is where we store our words
|
||||
|
||||
def load_list(self, many):
|
||||
# Takes a list of strings and adds them to the cache one by one
|
||||
for one in many:
|
||||
self.add(one)
|
||||
|
||||
def dumps(self):
|
||||
# Dumps the cache dictionary into a JSON-formatted string
|
||||
return json.dumps(self.cache)
|
||||
|
||||
def loads(dump):
|
||||
# Loads the cache dictionary from a JSON-formatted string
|
||||
if len(dump) == 0:
|
||||
# faulty dump gives default Generator
|
||||
return Generator()
|
||||
# otherwise
|
||||
return Generator(load=dump, mode=Generator.MODE_JSON)
|
||||
|
||||
def add(self, text):
|
||||
# This takes a string and stores it in the cache, preceding it
|
||||
# with the HEAD that marks the beginning of a new message and
|
||||
# following it with the TAIL that marks the end
|
||||
words = [Generator.HEAD]
|
||||
text = text + " " + Generator.TAIL
|
||||
words.extend(text.split())
|
||||
self.database(rewrite(text))
|
||||
|
||||
def database(self, words):
|
||||
# This takes a list of words and stores it in the cache, adding
|
||||
# a special entry for the first word (the HEAD marker)
|
||||
for w1, w2, w3 in triplets(words):
|
||||
if w1 == Generator.HEAD:
|
||||
if w1 in self.cache:
|
||||
self.cache[Generator.HEAD].append(w2)
|
||||
else:
|
||||
self.cache[Generator.HEAD] = [w2]
|
||||
key = getkey(w1, w2)
|
||||
if key in self.cache:
|
||||
# if the key exists, add the new word to the end of the chain
|
||||
self.cache[key].append(w3)
|
||||
else:
|
||||
# otherwise, create a new entry for the new key starting with
|
||||
# the new end of chain
|
||||
self.cache[key] = [w3]
|
||||
|
||||
def generate(self, size=50, silence=False):
|
||||
# This generates the Markov text/word chain
|
||||
# silence tells if mentions should be silenced
|
||||
if len(self.cache) == 0:
|
||||
# If there is nothing in the cache we cannot generate anything
|
||||
return ""
|
||||
|
||||
w1 = random.choice(self.cache[Generator.HEAD])
|
||||
w2 = random.choice(self.cache[getkey(Generator.HEAD, w1)])
|
||||
# Start with a message HEAD and a random message starting word
|
||||
gen_words = []
|
||||
for i in range(size):
|
||||
# As long as we don't go over the size value (max. message length)...
|
||||
if silence and w1.startswith("@") and len(w1) > 1:
|
||||
gen_words.append(w1.replace("@", "(@)"))
|
||||
# ...append the first word, silencing any possible username mention
|
||||
else:
|
||||
gen_words.append(w1)
|
||||
# ..append the first word
|
||||
if w2 == Generator.TAIL or not getkey(w1, w2) in self.cache:
|
||||
# When there's no key from the last 2 words to follow the chain,
|
||||
# or we reached a separation between messages, stop
|
||||
break
|
||||
else:
|
||||
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
|
||||
# Make the second word to be the new first word, and
|
||||
# make a new random word that follows the chain to be
|
||||
# the new second word
|
||||
return ' '.join(gen_words)
|
||||
|
||||
def cross(self, gen):
|
||||
# cross 2 Generators into this one
|
||||
for key in gen.cache:
|
||||
if key in self.cache:
|
||||
self.cache[key].extend(gen.cache[key])
|
||||
else:
|
||||
self.cache[key] = list(gen.cache[key])
|
||||
|
||||
def new_count(self):
|
||||
# Count again the number of messages if the current number is unreliable
|
||||
count = 0
|
||||
for key in self.cache:
|
||||
for word in self.cache[key]:
|
||||
if word == Generator.TAIL:
|
||||
count += 1
|
||||
# by just counting message separators
|
||||
return count
|
105
markov.py
105
markov.py
|
@ -1,105 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
import json
|
||||
|
||||
def getkey(w1, w2):
|
||||
key = (w1.strip().casefold(), w2.strip().casefold())
|
||||
return str(key)
|
||||
|
||||
def getwords(key):
|
||||
words = key.strip('()').split(', ')
|
||||
for i in range(len(words)):
|
||||
words[i].strip('\'')
|
||||
return words
|
||||
|
||||
def triples(wordlist):
|
||||
# Generates triples from the given data string. So if our string were
|
||||
# "What a lovely day", we'd generate (What, a, lovely) and then
|
||||
# (a, lovely, day).
|
||||
if len(wordlist) < 3:
|
||||
return
|
||||
|
||||
for i in range(len(wordlist) - 2):
|
||||
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
|
||||
|
||||
class Markov(object):
|
||||
ModeJson = "MODE_JSON"
|
||||
ModeList = "MODE_LIST"
|
||||
ModeChatData = "MODE_CHAT_DATA"
|
||||
|
||||
Head = "\n^MESSAGE_SEPARATOR^"
|
||||
Tail = "^MESSAGE_SEPARATOR^"
|
||||
|
||||
def __init__(self, load=None, mode=None):
|
||||
if mode is not None:
|
||||
if mode == Markov.ModeJson:
|
||||
self.cache = json.loads(load)
|
||||
elif mode == Markov.ModeList:
|
||||
self.cache = {}
|
||||
self.loadList(load)
|
||||
else:
|
||||
self.cache = {}
|
||||
|
||||
def loadList(self, lines):
|
||||
for line in lines:
|
||||
words = [Markov.Head]
|
||||
words.extend(line.split())
|
||||
self.learn_words(words)
|
||||
|
||||
def dumps(self):
|
||||
return json.dumps(self.cache)
|
||||
|
||||
def loads(dump):
|
||||
if len(dump) == 0:
|
||||
return Markov()
|
||||
return Markov(load=dump, mode=Markov.ModeJson)
|
||||
|
||||
def learn_words(self, words):
|
||||
self.database(words)
|
||||
|
||||
def database(self, wordlist):
|
||||
for w1, w2, w3 in triples(wordlist):
|
||||
if w1 == Markov.Head:
|
||||
if w1 in self.cache:
|
||||
self.cache[Markov.Head].append(w2)
|
||||
else:
|
||||
self.cache[Markov.Head] = [w2]
|
||||
key = getkey(w1, w2)
|
||||
if key in self.cache:
|
||||
self.cache[key].append(w3)
|
||||
else:
|
||||
self.cache[key] = [w3]
|
||||
|
||||
def generate_markov_text(self, size=50, silence=False):
|
||||
if len(self.cache) == 0:
|
||||
return ""
|
||||
w1 = random.choice(self.cache[Markov.Head])
|
||||
w2 = random.choice(self.cache[getkey(Markov.Head, w1)])
|
||||
gen_words = []
|
||||
for i in range(size):
|
||||
if silence and w1.startswith("@") and len(w1) > 1:
|
||||
gen_words.append(w1.replace("@", "(@)"))
|
||||
else:
|
||||
gen_words.append(w1)
|
||||
if w2 == Markov.Tail or not getkey(w1, w2) in self.cache:
|
||||
# print("Generated text")
|
||||
break
|
||||
else:
|
||||
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
|
||||
return ' '.join(gen_words)
|
||||
|
||||
def cross(self, gen):
|
||||
for key in gen.cache:
|
||||
if key in self.cache:
|
||||
self.cache[key].extend(d[key])
|
||||
else:
|
||||
self.cache[key] = list(d[key])
|
||||
|
||||
def new_count(self):
|
||||
count = 0
|
||||
for key in self.cache:
|
||||
for word in self.cache[key]:
|
||||
if word == Markov.Tail:
|
||||
count += 1
|
||||
return count
|
194
scribe.py
194
scribe.py
|
@ -1,194 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
from chatlog import *
|
||||
from markov import Markov
|
||||
|
||||
def getTitle(chat):
|
||||
if chat.title is not None:
|
||||
return chat.title
|
||||
elif chat.first_name is not None:
|
||||
if chat.last_name is not None:
|
||||
return chat.first_name + " " + chat.last_name
|
||||
else:
|
||||
return chat.first_name
|
||||
else:
|
||||
return ""
|
||||
|
||||
def rewrite(text):
|
||||
words = text.replace('\n', '\n ').split(' ')
|
||||
i = 0
|
||||
while i < len(words):
|
||||
w = words[i].strip(' \t')
|
||||
if len(w) > 0:
|
||||
words[i] = w
|
||||
else:
|
||||
del words[i]
|
||||
i -= 1
|
||||
i += 1
|
||||
return words
|
||||
|
||||
class Page(object):
|
||||
def __init__(self, mid, content):
|
||||
self.id = mid
|
||||
self.content = content
|
||||
|
||||
class Scribe(object):
|
||||
TagPrefix = "^IS_"
|
||||
StickerTag = "^IS_STICKER^"
|
||||
AnimTag = "^IS_ANIMATION^"
|
||||
VideoTag = "^IS_VIDEO^"
|
||||
|
||||
def __init__(self, chatlog, archivist):
|
||||
self.chat = chatlog
|
||||
self.archivist = archivist
|
||||
self.pages = []
|
||||
self.countdown = self.chat.freq
|
||||
self.logger = self.archivist.logger
|
||||
|
||||
def FromChat(chat, archivist, newchat=False):
|
||||
chatlog = Chatlog(chat.id, chat.type, getTitle(chat))
|
||||
scribe = Scribe(chatlog, archivist)
|
||||
return scribe
|
||||
|
||||
def FromData(data, archivist):
|
||||
return None
|
||||
|
||||
def FromFile(log, archivist):
|
||||
chatlog = Chatlog.loads(log)
|
||||
return Scribe(chatlog, archivist)
|
||||
|
||||
def Recall(text, archivist):
|
||||
lines = text.splitlines()
|
||||
version = parse(lines[0]).strip()
|
||||
version = version if len(version.strip()) > 1 else lines[4]
|
||||
archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) )
|
||||
if version == "v4":
|
||||
chatlog = Chatlog.loadl(lines[0:9])
|
||||
cache = '\n'.join(lines[10:])
|
||||
parrot = Markov.loads(cache)
|
||||
elif version == "v3":
|
||||
chatlog = Chatlog.loadl(lines[0:8])
|
||||
cache = '\n'.join(lines[9:])
|
||||
parrot = Markov.loads(cache)
|
||||
elif version == "v2":
|
||||
chatlog = Chatlog.loadl(lines[0:7])
|
||||
cache = '\n'.join(lines[8:])
|
||||
parrot = Markov.loads(cache)
|
||||
elif version == "dict:":
|
||||
chatlog = Chatlog.loadl(lines[0:6])
|
||||
cache = '\n'.join(lines[6:])
|
||||
parrot = Markov.loads(cache)
|
||||
else:
|
||||
chatlog = Chatlog.loadl(lines[0:4])
|
||||
cache = lines[4:]
|
||||
parrot = Markov(load=cache, mode=Markov.ModeList)
|
||||
#raise SyntaxError("Scribe: Chatlog format unrecognized.")
|
||||
s = Scribe(chatlog, archivist)
|
||||
s.parrot = parrot
|
||||
return s
|
||||
|
||||
def store(self, parrot):
|
||||
self.archivist.store(self.chat.id, self.chat.dumps(), parrot)
|
||||
|
||||
def checkType(self, t):
|
||||
return t in self.chat.type
|
||||
|
||||
def compareType(self, t):
|
||||
return t == self.chat.type
|
||||
|
||||
def setTitle(self, title):
|
||||
self.chat.title = title
|
||||
|
||||
def setFreq(self, freq):
|
||||
if freq < self.countdown:
|
||||
self.countdown = max(freq, 1)
|
||||
return self.chat.set_freq(min(freq, self.archivist.maxFreq))
|
||||
|
||||
def setAnswer(self, afreq):
|
||||
return self.chat.set_answer(afreq)
|
||||
|
||||
def cid(self):
|
||||
return str(self.chat.id)
|
||||
|
||||
def count(self):
|
||||
return self.chat.count
|
||||
|
||||
def freq(self):
|
||||
return self.chat.freq
|
||||
|
||||
def title(self):
|
||||
return self.chat.title
|
||||
|
||||
def answer(self):
|
||||
return self.chat.answer
|
||||
|
||||
def type(self):
|
||||
return self.chat.type
|
||||
|
||||
def isRestricted(self):
|
||||
return self.chat.restricted
|
||||
|
||||
def restrict(self):
|
||||
self.chat.restricted = (not self.chat.restricted)
|
||||
|
||||
def isSilenced(self):
|
||||
return self.chat.silenced
|
||||
|
||||
def silence(self):
|
||||
self.chat.silenced = (not self.chat.silenced)
|
||||
|
||||
def isAnswering(self):
|
||||
rand = random.random()
|
||||
chance = self.answer()
|
||||
if chance == 1:
|
||||
return True
|
||||
elif chance == 0:
|
||||
return False
|
||||
return rand <= chance
|
||||
|
||||
def addPage(self, mid, content):
|
||||
page = Page(mid, content)
|
||||
self.pages.append(page)
|
||||
|
||||
def getReference(self):
|
||||
page = random.choice(self.pages)
|
||||
return page.id
|
||||
|
||||
def resetCountdown(self):
|
||||
self.countdown = self.chat.freq
|
||||
|
||||
def learn(self, message):
|
||||
mid = str(message.message_id)
|
||||
|
||||
if message.text is not None:
|
||||
self.read(mid, message.text)
|
||||
elif message.sticker is not None:
|
||||
self.learnDrawing(mid, Scribe.StickerTag, message.sticker.file_id)
|
||||
elif message.animation is not None:
|
||||
self.learnDrawing(mid, Scribe.AnimTag, message.animation.file_id)
|
||||
elif message.video is not None:
|
||||
self.learnDrawing(mid, Scribe.VideoTag, message.video.file_id)
|
||||
self.chat.count += 1
|
||||
|
||||
def learnDrawing(self, mid, tag, drawing):
|
||||
self.read(mid, tag + " " + drawing)
|
||||
|
||||
def read(self, mid, text):
|
||||
if "velasco" in text.casefold() and len(text.split()) <= 3:
|
||||
return
|
||||
words = [Markov.Head]
|
||||
text = text + " " + Markov.Tail
|
||||
words.extend(rewrite(text))
|
||||
self.addPage(mid, words)
|
||||
|
||||
def teachParrot(self, parrot):
|
||||
for page in self.pages:
|
||||
parrot.learn_words(page.content)
|
||||
self.pages = []
|
||||
|
||||
"""
|
||||
def learnFrom(self, scribe):
|
||||
self.chat.count += scribe.chat.count
|
||||
self.parrot.cross(scribe.parrot)
|
||||
"""
|
30
speaker.py
30
speaker.py
|
@ -1,24 +1,25 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
from scribe import Scribe
|
||||
from markov import Markov
|
||||
from chatreader import ChatReader as Reader
|
||||
from telegram.error import *
|
||||
|
||||
def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs):
|
||||
kwargs["parse_mode"] = format
|
||||
|
||||
def send(bot, cid, text, replying=None, formatting=None, logger=None, **kwargs):
|
||||
kwargs["parse_mode"] = formatting
|
||||
kwargs["reply_to_message_id"] = replying
|
||||
|
||||
if text.startswith(Scribe.TagPrefix):
|
||||
if text.startswith(Reader.TAG_PREFIX):
|
||||
words = text.split(maxsplit=1)
|
||||
if logger:
|
||||
logger.info('Sending {} "{}" to {}'.format(words[0][4:-1], words[1], cid))
|
||||
# Logs something like 'Sending VIDEO "VIDEO_ID" to CHAT_ID'
|
||||
|
||||
if words[0] == Scribe.StickerTag:
|
||||
if words[0] == Reader.STICKER_TAG:
|
||||
return bot.send_sticker(cid, words[1], **kwargs)
|
||||
elif words[0] == Scribe.AnimTag:
|
||||
elif words[0] == Reader.ANIM_TAG:
|
||||
return bot.send_animation(cid, words[1], **kwargs)
|
||||
elif words[0] == Scribe.VideoTag:
|
||||
elif words[0] == Reader.VIDEO_TAG:
|
||||
return bot.send_video(cid, words[1], **kwargs)
|
||||
else:
|
||||
text
|
||||
|
@ -27,17 +28,6 @@ def send(bot, cid, text, replying=None, format=None, logger=None, **kwargs):
|
|||
logger.info("Sending a {} to {}: '{}'".format(mtype, cid, text))
|
||||
return bot.send_message(cid, text, **kwargs)
|
||||
|
||||
def getTitle(chat):
|
||||
if chat.title:
|
||||
return chat.title
|
||||
else:
|
||||
last = chat.last_name if chat.last_name else ""
|
||||
first = chat.first_name if chat.first_name else ""
|
||||
name = " ".join([first, last]).strip()
|
||||
if len(name) == 0:
|
||||
return "Unknown"
|
||||
else:
|
||||
return name
|
||||
|
||||
class Speaker(object):
|
||||
ModeFixed = "FIXED_MODE"
|
||||
|
@ -79,7 +69,7 @@ class Speaker(object):
|
|||
def getScribe(self, chat):
|
||||
cid = str(chat.id)
|
||||
if not cid in self.scriptorium:
|
||||
scribe = Scribe.FromChat(chat, self.archivist, newchat=True)
|
||||
scribe = Reader.FromChat(chat, self.archivist, newchat=True)
|
||||
self.scriptorium[cid] = scribe
|
||||
return scribe
|
||||
else:
|
||||
|
|
|
@ -49,20 +49,24 @@ about_msg = "I am yet another Markov Bot experiment. I read everything you type
|
|||
|
||||
explanation = "I decompose every message I read in groups of 3 consecutive words, so for each consecutive pair I save the word that can follow them. I then use this to make my own messages. At first I will only repeat your messages because for each 2 words I will have very few possible following words.\n\nI also separate my vocabulary by chats, so anything I learn in one chat I will only say in that chat. For privacy, you know. Also, I save my vocabulary in the form of a json dictionary, so no logs are kept.\n\nMy default frequency in private chats is one message of mine from each 2 messages received, and in group chats it\'s 10 messages I read for each message I send."
|
||||
|
||||
|
||||
def static_reply(text, format=None):
|
||||
def reply(bot, update):
|
||||
update.message.reply_text(text, parse_mode=format)
|
||||
return reply
|
||||
|
||||
|
||||
def error(bot, update, error):
|
||||
logger.warning('Update "{}" caused error "{}"'.format(update, error))
|
||||
|
||||
|
||||
def stop(bot, update):
|
||||
scribe = speakerbot.getScribe(update.message.chat.id)
|
||||
#del chatlogs[chatlog.id]
|
||||
#os.remove(LOG_DIR + chatlog.id + LOG_EXT)
|
||||
logger.warning("I got blocked by user {} [{}]".format(scribe.title(), scribe.cid()))
|
||||
|
||||
|
||||
def main():
|
||||
global speakerbot
|
||||
parser = argparse.ArgumentParser(description='A Telegram markov bot.')
|
||||
|
|
Loading…
Reference in a new issue