mirror of
https://gitlab.com/vylion/velascobot.git
synced 2025-04-19 21:46:35 +02:00
📝 Changed the whole script files hierarchy: - velasco.py starts up the telegram bot - speaker.py has all the bot behavior methods - A Parrot is what stores a Markov object - An Scribe stores a single chat's data - A Speaker has an Scriptorium, with all active Scribes - A Speaker has a single Parrot, the one associated with the last Scribe that had to send a message - An Archivist is in charge of loading the Scriptorium at startup, as well as storing Scribes and Parrots in files 🐞 Fixed a bug that stopped new Parrots from being saved, prevented because their non-existing file could not be loaded into the Speaker's Parrot
105 lines
3 KiB
Python
105 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import random
|
|
import json
|
|
|
|
def getkey(w1, w2):
|
|
key = (w1.strip().casefold(), w2.strip().casefold())
|
|
return str(key)
|
|
|
|
def getwords(key):
|
|
words = key.strip('()').split(', ')
|
|
for i in range(len(words)):
|
|
words[i].strip('\'')
|
|
return words
|
|
|
|
def triples(wordlist):
|
|
# Generates triples from the given data string. So if our string were
|
|
# "What a lovely day", we'd generate (What, a, lovely) and then
|
|
# (a, lovely, day).
|
|
if len(wordlist) < 3:
|
|
return
|
|
|
|
for i in range(len(wordlist) - 2):
|
|
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
|
|
|
|
class Markov(object):
|
|
ModeJson = "MODE_JSON"
|
|
ModeList = "MODE_LIST"
|
|
ModeChatData = "MODE_CHAT_DATA"
|
|
|
|
Head = "\n^MESSAGE_SEPARATOR^"
|
|
Tail = "^MESSAGE_SEPARATOR^"
|
|
|
|
def __init__(self, load=None, mode=None):
|
|
if mode is not None:
|
|
if mode == Markov.ModeJson:
|
|
self.cache = json.loads(load)
|
|
elif mode == Markov.ModeList:
|
|
self.cache = {}
|
|
self.loadList(load)
|
|
else:
|
|
self.cache = {}
|
|
|
|
def loadList(self, lines):
|
|
for line in lines:
|
|
words = [Markov.Head]
|
|
words.extend(line.split())
|
|
self.learn_words(words)
|
|
|
|
def dumps(self):
|
|
return json.dumps(self.cache)
|
|
|
|
def loads(dump):
|
|
if len(dump) == 0:
|
|
return Markov()
|
|
return Markov(load=dump, mode=Markov.ModeJson)
|
|
|
|
def learn_words(self, words):
|
|
self.database(words)
|
|
|
|
def database(self, wordlist):
|
|
for w1, w2, w3 in triples(wordlist):
|
|
if w1 == Markov.Head:
|
|
if w1 in self.cache:
|
|
self.cache[Markov.Head].append(w2)
|
|
else:
|
|
self.cache[Markov.Head] = [w2]
|
|
key = getkey(w1, w2)
|
|
if key in self.cache:
|
|
self.cache[key].append(w3)
|
|
else:
|
|
self.cache[key] = [w3]
|
|
|
|
def generate_markov_text(self, size=50, silence=False):
|
|
if len(self.cache) == 0:
|
|
return ""
|
|
w1 = random.choice(self.cache[Markov.Head])
|
|
w2 = random.choice(self.cache[getkey(Markov.Head, w1)])
|
|
gen_words = []
|
|
for i in range(size):
|
|
if silence and w1.startswith("@") and len(w1) > 1:
|
|
gen_words.append(w1.replace("@", "(@)"))
|
|
else:
|
|
gen_words.append(w1)
|
|
if w2 == Markov.Tail or not getkey(w1, w2) in self.cache:
|
|
# print("Generated text")
|
|
break
|
|
else:
|
|
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
|
|
return ' '.join(gen_words)
|
|
|
|
def cross(self, gen):
|
|
for key in gen.cache:
|
|
if key in self.cache:
|
|
self.cache[key].extend(d[key])
|
|
else:
|
|
self.cache[key] = list(d[key])
|
|
|
|
def new_count(self):
|
|
count = 0
|
|
for key in self.cache:
|
|
for word in self.cache[key]:
|
|
if word == Markov.Tail:
|
|
count += 1
|
|
return count
|