Uploading Velasco v1.4

- It now saves the dictionary of vocabulary directly into the file. No
need to deal with the full list of messages.
- This also means that the amount of elements kept in memory has been
reduced, since there is no need to keep a list with all the words one
after another,apart from the dictionary.
- Modularized some constants, like the frequency of saves if the
frequency of speaking is too large, or the stop words that mark the
start and end of a message.
This commit is contained in:
vylion 2017-09-21 15:39:53 +02:00
parent 810e517757
commit 1d1bd6034e
5 changed files with 86 additions and 62 deletions

Binary file not shown.

Binary file not shown.

View file

@ -3,21 +3,22 @@
from markov import * from markov import *
class Chatlog(object): class Chatlog(object):
def __init__(self, ident, chattype, title, msgs=None, freq=None): def __init__(self, ident, chattype, title, text=None, freq=None):
if msgs is not None:
self.msgs = msgs
else:
self.msgs = []
self.id = str(ident) self.id = str(ident)
self.type = chattype self.type = chattype
self.title = title self.title = title
if freq is None: if freq is None:
if "group" in chattype: if "group" in chattype:
freq = 20 freq = 15
#elif chattype is "private": #elif chattype is "private":
else: else:
freq = 5 freq = 2
self.freq = freq self.freq = freq
if text is not None:
self.count = len(text)
else:
self.count = 0
self.gen = Markov(text)
def set_title(self, title): def set_title(self, title):
self.title = title self.title = title
@ -31,36 +32,30 @@ class Chatlog(object):
return self.freq return self.freq
def add_msg(self, message): def add_msg(self, message):
msg = message.split() self.gen.add_text(message + " !kvl")
msg.append("!kvl") self.count += 1
self.msgs.append(msg)
def get_markov_gen(self):
msgs = []
for m in self.msgs:
msgs.append(' '.join(m))
text = ' '.join(msgs)
self.gen = Markov(text)
def speak(self): def speak(self):
self.get_markov_gen()
return self.gen.generate_markov_text() return self.gen.generate_markov_text()
def get_count(self): def get_count(self):
return len(self.msgs) return self.count
def to_txt(self): def to_txt(self):
lines = [self.id] lines = [self.id]
lines.append(self.type) lines.append(self.type)
lines.append(self.title) lines.append(self.title)
lines.append(str(self.freq)) lines.append(str(self.freq))
for m in self.msgs: lines.append("dict:")
lines.append(' '.join(m)) txt = '\n'.join(lines)
return '\n'.join(lines) return txt + '\n' + self.gen.to_json()
def from_txt(text): def from_txt(text):
lines = text.splitlines() lines = text.splitlines()
msgs = [] if(lines[4] == "dict:"):
for m in lines[4:]: new_log = Chatlog(lines[0], lines[1], lines[2], None, int(lines[3]))
msgs.append(m.split()) cache = '\n'.join(lines[5:])
return Chatlog(lines[0], lines[1], lines[2], msgs, int(lines[3])) new_log.gen = Markov.from_json(cache)
return new_log
else:
return Chatlog(lines[0], lines[1], lines[2], lines[4:], int(lines[3]))

View file

@ -1,50 +1,75 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import random import random
import json
HEAD = "\n!kvl"
TAIL = "!kvl"
def trim_and_split(text):
words = text.split(' ')
for i in range(len(words)):
words[i] = words[i].strip(' \t')
return words
def getkey(w1, w2):
key = (w1.strip().casefold(), w2.strip().casefold())
return str(key)
def triples(wordlist):
""" Generates triples from the given data string. So if our string were
"What a lovely day", we'd generate (What, a, lovely) and then
(a, lovely, day).
"""
if len(wordlist) < 3:
return
for i in range(len(wordlist) - 2):
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
class Markov(object): class Markov(object):
def __init__(self, text=None): def __init__(self, text=None, from_json=False):
self.cache = {} if not from_json:
self.words = [] self.cache = {}
if text is None: if text is not None:
text = "" for line in text:
self.words = ("!kvl\n"+text).split() self.add_text(line)
self.word_size = len(self.words) else:
self.database() self.cache = json.loads(text)
def triples(self): def to_json(self):
""" Generates triples from the given data string. So if our string were return json.dumps(self.cache)
"What a lovely day", we'd generate (What, a, lovely) and then
(a, lovely, day).
"""
if len(self.words) < 3: def from_json(string):
return return Markov(string, True)
for i in range(len(self.words) - 2): def add_text(self, text):
yield (self.words[i], self.words[i+1], self.words[i+2]) words = trim_and_split(HEAD + " " + text)
self.database(words)
def database(self): def database(self, wordlist):
for w1, w2, w3 in self.triples(): for w1, w2, w3 in triples(wordlist):
key = (w1.casefold(), w2.casefold()) if w1 == HEAD:
if w1 in self.cache:
self.cache[HEAD].append(w2)
else:
self.cache[HEAD] = [w2]
key = getkey(w1, w2)
if key in self.cache: if key in self.cache:
self.cache[key].append(w3) self.cache[key].append(w3)
else: else:
self.cache[key] = [w3] self.cache[key] = [w3]
def generate_markov_text(self, size=50): def generate_markov_text(self, size=50):
seed = random.randint(0, self.word_size-4) w1 = random.choice(self.cache[HEAD])
seed_word, next_word, next_word2 = self.words[seed], self.words[seed+1], self.words[seed+2] w2 = random.choice(self.cache[getkey(HEAD, w1)])
while not "!kvl" in seed_word:
seed = random.randint(0, self.word_size-4)
seed_word, next_word, next_word2 = self.words[seed], self.words[seed+1], self.words[seed+2]
w1, w2 = next_word, next_word2
gen_words = [] gen_words = []
for i in range(size): for i in range(size):
gen_words.append(w1) gen_words.append(w1)
if "!kvl" in w2 or not (w1.casefold(), w2.casefold()) in self.cache: if w2 == TAIL or not getkey(w1, w2) in self.cache:
print("Generated text") print("Generated text")
break break
else: else:
w1, w2 = w2, random.choice(self.cache[(w1.casefold(), w2.casefold())]) w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
return ' '.join(gen_words) return ' '.join(gen_words)

View file

@ -2,6 +2,7 @@
import sys, os import sys, os
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
from telegram.error import *
from chatlog import * from chatlog import *
import logging import logging
import argparse import argparse
@ -15,7 +16,9 @@ logger = logging.getLogger(__name__)
chatlogs = {} chatlogs = {}
disabled = {} disabled = {}
GUILLERMO_ID = 8379173 GUILLERMO_ID = "8379173"
CHAT_INC = 5
CHAT_SAVE = 15
def wake(bot): def wake(bot):
directory = os.fsencode("chatlogs/") directory = os.fsencode("chatlogs/")
@ -25,7 +28,7 @@ def wake(bot):
if filename.endswith(".txt"): if filename.endswith(".txt"):
chat = loadchat("chatlogs/" + filename) chat = loadchat("chatlogs/" + filename)
chatlogs[chat.id] = chat chatlogs[chat.id] = chat
print("loaded chat " + chat.id) print("loaded chat " + chat.title + " [" + chat.id + "]")
continue continue
else: else:
continue continue
@ -98,12 +101,14 @@ def read(bot, update):
# TO DO: añadir % de que haga reply en vez de send # TO DO: añadir % de que haga reply en vez de send
try: try:
bot.sendMessage(chatlog.id, msg) bot.sendMessage(chatlog.id, msg)
except TelegramError: except TimedOut:
chatlog.set_freq(chatlog.freq + 20) chatlog.set_freq(chatlog.freq + CHAT_INC)
print("Increased freq for chat " + chatlog.title + " [" + chatlog.id + "]")
if get_chatname(chat) != chatlog.title: if get_chatname(chat) != chatlog.title:
chatlog.set_title(get_chatname(chat)) chatlog.set_title(get_chatname(chat))
savechat(chatlog) savechat(chatlog)
elif chatlog.freq > CHAT_SAVE and chatlog.get_count()%CHAT_SAVE == 0:
savechat(chatlog)
chatlogs[chatlog.id] = chatlog chatlogs[chatlog.id] = chatlog
def speak(bot, update): def speak(bot, update):
@ -121,12 +126,10 @@ def speak(bot, update):
msg = chatlog.speak() msg = chatlog.speak()
update.message.reply_text(msg) update.message.reply_text(msg)
savechat(chatlog) savechat(chatlog)
chatlogs[chatlog.id] = chatlog chatlogs[chatlog.id] = chatlog
def get_chatlogs(bot, update): def get_chatlogs(bot, update):
global GUILLERMO_ID if str(update.message.chat.id) == GUILLERMO_ID:
if update.message.chat.id is GUILLERMO_ID:
m = "I have these chatlogs:" m = "I have these chatlogs:"
for c in chatlogs: for c in chatlogs:
m += "\n" + chatlogs[c].id + " " + chatlogs[c].title m += "\n" + chatlogs[c].id + " " + chatlogs[c].title
@ -157,6 +160,7 @@ def set_freq(bot, update):
value = int(value) value = int(value)
value = chatlogs[ident].set_freq(value) value = chatlogs[ident].set_freq(value)
reply = "Frequency of speaking set to " + str(value) reply = "Frequency of speaking set to " + str(value)
savechat(chatlogs[ident])
except: except:
reply = "Format was confusing; frequency not changed from " + str(chatlogs[ident].freq) reply = "Format was confusing; frequency not changed from " + str(chatlogs[ident].freq)
update.message.reply_text(reply) update.message.reply_text(reply)