velascobot/markov.py
vylion d075624263 Velasco Big Overhaul Update
📝 Changed the whole script files hierarchy:
- velasco.py starts up the telegram bot
- speaker.py has all the bot behavior methods
- A Parrot is what stores a Markov object
- An Scribe stores a single chat's data
- A Speaker has an Scriptorium, with all active Scribes
- A Speaker has a single Parrot, the one associated with the last
Scribe that had to send a message
- An Archivist is in charge of loading the Scriptorium at startup, as
well as storing Scribes and Parrots in files

🐞 Fixed a bug that stopped new Parrots from being saved, prevented
because their non-existing file could not be loaded into the Speaker's
Parrot
2019-03-27 13:34:22 +01:00

105 lines
3 KiB
Python

#!/usr/bin/env python3
import random
import json
def getkey(w1, w2):
key = (w1.strip().casefold(), w2.strip().casefold())
return str(key)
def getwords(key):
words = key.strip('()').split(', ')
for i in range(len(words)):
words[i].strip('\'')
return words
def triples(wordlist):
# Generates triples from the given data string. So if our string were
# "What a lovely day", we'd generate (What, a, lovely) and then
# (a, lovely, day).
if len(wordlist) < 3:
return
for i in range(len(wordlist) - 2):
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
class Markov(object):
ModeJson = "MODE_JSON"
ModeList = "MODE_LIST"
ModeChatData = "MODE_CHAT_DATA"
Head = "\n^MESSAGE_SEPARATOR^"
Tail = "^MESSAGE_SEPARATOR^"
def __init__(self, load=None, mode=None):
if mode is not None:
if mode == Markov.ModeJson:
self.cache = json.loads(load)
elif mode == Markov.ModeList:
self.cache = {}
self.loadList(load)
else:
self.cache = {}
def loadList(self, lines):
for line in lines:
words = [Markov.Head]
words.extend(line.split())
self.learn_words(words)
def dumps(self):
return json.dumps(self.cache)
def loads(dump):
if len(dump) == 0:
return Markov()
return Markov(load=dump, mode=Markov.ModeJson)
def learn_words(self, words):
self.database(words)
def database(self, wordlist):
for w1, w2, w3 in triples(wordlist):
if w1 == Markov.Head:
if w1 in self.cache:
self.cache[Markov.Head].append(w2)
else:
self.cache[Markov.Head] = [w2]
key = getkey(w1, w2)
if key in self.cache:
self.cache[key].append(w3)
else:
self.cache[key] = [w3]
def generate_markov_text(self, size=50, silence=False):
if len(self.cache) == 0:
return ""
w1 = random.choice(self.cache[Markov.Head])
w2 = random.choice(self.cache[getkey(Markov.Head, w1)])
gen_words = []
for i in range(size):
if silence and w1.startswith("@") and len(w1) > 1:
gen_words.append(w1.replace("@", "(@)"))
else:
gen_words.append(w1)
if w2 == Markov.Tail or not getkey(w1, w2) in self.cache:
# print("Generated text")
break
else:
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
return ' '.join(gen_words)
def cross(self, gen):
for key in gen.cache:
if key in self.cache:
self.cache[key].extend(d[key])
else:
self.cache[key] = list(d[key])
def new_count(self):
count = 0
for key in self.cache:
for word in self.cache[key]:
if word == Markov.Tail:
count += 1
return count