mirror of
https://gitlab.com/vylion/velascobot.git
synced 2025-04-19 21:46:35 +02:00
Uploading Velasco v1.4
- It now saves the dictionary of vocabulary directly into the file. No need to deal with the full list of messages. - This also means that the amount of elements kept in memory has been reduced, since there is no need to keep a list with all the words one after another,apart from the dictionary. - Modularized some constants, like the frequency of saves if the frequency of speaking is too large, or the stop words that mark the start and end of a message.
This commit is contained in:
parent
810e517757
commit
1d1bd6034e
5 changed files with 86 additions and 62 deletions
Binary file not shown.
Binary file not shown.
47
chatlog.py
47
chatlog.py
|
@ -3,21 +3,22 @@
|
||||||
from markov import *
|
from markov import *
|
||||||
|
|
||||||
class Chatlog(object):
|
class Chatlog(object):
|
||||||
def __init__(self, ident, chattype, title, msgs=None, freq=None):
|
def __init__(self, ident, chattype, title, text=None, freq=None):
|
||||||
if msgs is not None:
|
|
||||||
self.msgs = msgs
|
|
||||||
else:
|
|
||||||
self.msgs = []
|
|
||||||
self.id = str(ident)
|
self.id = str(ident)
|
||||||
self.type = chattype
|
self.type = chattype
|
||||||
self.title = title
|
self.title = title
|
||||||
if freq is None:
|
if freq is None:
|
||||||
if "group" in chattype:
|
if "group" in chattype:
|
||||||
freq = 20
|
freq = 15
|
||||||
#elif chattype is "private":
|
#elif chattype is "private":
|
||||||
else:
|
else:
|
||||||
freq = 5
|
freq = 2
|
||||||
self.freq = freq
|
self.freq = freq
|
||||||
|
if text is not None:
|
||||||
|
self.count = len(text)
|
||||||
|
else:
|
||||||
|
self.count = 0
|
||||||
|
self.gen = Markov(text)
|
||||||
|
|
||||||
def set_title(self, title):
|
def set_title(self, title):
|
||||||
self.title = title
|
self.title = title
|
||||||
|
@ -31,36 +32,30 @@ class Chatlog(object):
|
||||||
return self.freq
|
return self.freq
|
||||||
|
|
||||||
def add_msg(self, message):
|
def add_msg(self, message):
|
||||||
msg = message.split()
|
self.gen.add_text(message + " !kvl")
|
||||||
msg.append("!kvl")
|
self.count += 1
|
||||||
self.msgs.append(msg)
|
|
||||||
|
|
||||||
def get_markov_gen(self):
|
|
||||||
msgs = []
|
|
||||||
for m in self.msgs:
|
|
||||||
msgs.append(' '.join(m))
|
|
||||||
text = ' '.join(msgs)
|
|
||||||
self.gen = Markov(text)
|
|
||||||
|
|
||||||
def speak(self):
|
def speak(self):
|
||||||
self.get_markov_gen()
|
|
||||||
return self.gen.generate_markov_text()
|
return self.gen.generate_markov_text()
|
||||||
|
|
||||||
def get_count(self):
|
def get_count(self):
|
||||||
return len(self.msgs)
|
return self.count
|
||||||
|
|
||||||
def to_txt(self):
|
def to_txt(self):
|
||||||
lines = [self.id]
|
lines = [self.id]
|
||||||
lines.append(self.type)
|
lines.append(self.type)
|
||||||
lines.append(self.title)
|
lines.append(self.title)
|
||||||
lines.append(str(self.freq))
|
lines.append(str(self.freq))
|
||||||
for m in self.msgs:
|
lines.append("dict:")
|
||||||
lines.append(' '.join(m))
|
txt = '\n'.join(lines)
|
||||||
return '\n'.join(lines)
|
return txt + '\n' + self.gen.to_json()
|
||||||
|
|
||||||
def from_txt(text):
|
def from_txt(text):
|
||||||
lines = text.splitlines()
|
lines = text.splitlines()
|
||||||
msgs = []
|
if(lines[4] == "dict:"):
|
||||||
for m in lines[4:]:
|
new_log = Chatlog(lines[0], lines[1], lines[2], None, int(lines[3]))
|
||||||
msgs.append(m.split())
|
cache = '\n'.join(lines[5:])
|
||||||
return Chatlog(lines[0], lines[1], lines[2], msgs, int(lines[3]))
|
new_log.gen = Markov.from_json(cache)
|
||||||
|
return new_log
|
||||||
|
else:
|
||||||
|
return Chatlog(lines[0], lines[1], lines[2], lines[4:], int(lines[3]))
|
||||||
|
|
81
markov.py
81
markov.py
|
@ -1,50 +1,75 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
import json
|
||||||
|
|
||||||
|
HEAD = "\n!kvl"
|
||||||
|
TAIL = "!kvl"
|
||||||
|
|
||||||
|
def trim_and_split(text):
|
||||||
|
words = text.split(' ')
|
||||||
|
for i in range(len(words)):
|
||||||
|
words[i] = words[i].strip(' \t')
|
||||||
|
return words
|
||||||
|
|
||||||
|
def getkey(w1, w2):
|
||||||
|
key = (w1.strip().casefold(), w2.strip().casefold())
|
||||||
|
return str(key)
|
||||||
|
|
||||||
|
def triples(wordlist):
|
||||||
|
""" Generates triples from the given data string. So if our string were
|
||||||
|
"What a lovely day", we'd generate (What, a, lovely) and then
|
||||||
|
(a, lovely, day).
|
||||||
|
"""
|
||||||
|
|
||||||
|
if len(wordlist) < 3:
|
||||||
|
return
|
||||||
|
|
||||||
|
for i in range(len(wordlist) - 2):
|
||||||
|
yield (wordlist[i], wordlist[i+1], wordlist[i+2])
|
||||||
|
|
||||||
class Markov(object):
|
class Markov(object):
|
||||||
def __init__(self, text=None):
|
def __init__(self, text=None, from_json=False):
|
||||||
self.cache = {}
|
if not from_json:
|
||||||
self.words = []
|
self.cache = {}
|
||||||
if text is None:
|
if text is not None:
|
||||||
text = ""
|
for line in text:
|
||||||
self.words = ("!kvl\n"+text).split()
|
self.add_text(line)
|
||||||
self.word_size = len(self.words)
|
else:
|
||||||
self.database()
|
self.cache = json.loads(text)
|
||||||
|
|
||||||
def triples(self):
|
def to_json(self):
|
||||||
""" Generates triples from the given data string. So if our string were
|
return json.dumps(self.cache)
|
||||||
"What a lovely day", we'd generate (What, a, lovely) and then
|
|
||||||
(a, lovely, day).
|
|
||||||
"""
|
|
||||||
|
|
||||||
if len(self.words) < 3:
|
def from_json(string):
|
||||||
return
|
return Markov(string, True)
|
||||||
|
|
||||||
for i in range(len(self.words) - 2):
|
def add_text(self, text):
|
||||||
yield (self.words[i], self.words[i+1], self.words[i+2])
|
words = trim_and_split(HEAD + " " + text)
|
||||||
|
self.database(words)
|
||||||
|
|
||||||
def database(self):
|
def database(self, wordlist):
|
||||||
for w1, w2, w3 in self.triples():
|
for w1, w2, w3 in triples(wordlist):
|
||||||
key = (w1.casefold(), w2.casefold())
|
if w1 == HEAD:
|
||||||
|
if w1 in self.cache:
|
||||||
|
self.cache[HEAD].append(w2)
|
||||||
|
else:
|
||||||
|
self.cache[HEAD] = [w2]
|
||||||
|
key = getkey(w1, w2)
|
||||||
if key in self.cache:
|
if key in self.cache:
|
||||||
self.cache[key].append(w3)
|
self.cache[key].append(w3)
|
||||||
else:
|
else:
|
||||||
self.cache[key] = [w3]
|
self.cache[key] = [w3]
|
||||||
|
|
||||||
def generate_markov_text(self, size=50):
|
def generate_markov_text(self, size=50):
|
||||||
seed = random.randint(0, self.word_size-4)
|
w1 = random.choice(self.cache[HEAD])
|
||||||
seed_word, next_word, next_word2 = self.words[seed], self.words[seed+1], self.words[seed+2]
|
w2 = random.choice(self.cache[getkey(HEAD, w1)])
|
||||||
while not "!kvl" in seed_word:
|
|
||||||
seed = random.randint(0, self.word_size-4)
|
|
||||||
seed_word, next_word, next_word2 = self.words[seed], self.words[seed+1], self.words[seed+2]
|
|
||||||
w1, w2 = next_word, next_word2
|
|
||||||
gen_words = []
|
gen_words = []
|
||||||
for i in range(size):
|
for i in range(size):
|
||||||
gen_words.append(w1)
|
gen_words.append(w1)
|
||||||
if "!kvl" in w2 or not (w1.casefold(), w2.casefold()) in self.cache:
|
if w2 == TAIL or not getkey(w1, w2) in self.cache:
|
||||||
print("Generated text")
|
print("Generated text")
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
w1, w2 = w2, random.choice(self.cache[(w1.casefold(), w2.casefold())])
|
w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
|
||||||
return ' '.join(gen_words)
|
return ' '.join(gen_words)
|
||||||
|
|
20
velasco.py
20
velasco.py
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
|
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
|
||||||
|
from telegram.error import *
|
||||||
from chatlog import *
|
from chatlog import *
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -15,7 +16,9 @@ logger = logging.getLogger(__name__)
|
||||||
chatlogs = {}
|
chatlogs = {}
|
||||||
disabled = {}
|
disabled = {}
|
||||||
|
|
||||||
GUILLERMO_ID = 8379173
|
GUILLERMO_ID = "8379173"
|
||||||
|
CHAT_INC = 5
|
||||||
|
CHAT_SAVE = 15
|
||||||
|
|
||||||
def wake(bot):
|
def wake(bot):
|
||||||
directory = os.fsencode("chatlogs/")
|
directory = os.fsencode("chatlogs/")
|
||||||
|
@ -25,7 +28,7 @@ def wake(bot):
|
||||||
if filename.endswith(".txt"):
|
if filename.endswith(".txt"):
|
||||||
chat = loadchat("chatlogs/" + filename)
|
chat = loadchat("chatlogs/" + filename)
|
||||||
chatlogs[chat.id] = chat
|
chatlogs[chat.id] = chat
|
||||||
print("loaded chat " + chat.id)
|
print("loaded chat " + chat.title + " [" + chat.id + "]")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
@ -98,12 +101,14 @@ def read(bot, update):
|
||||||
# TO DO: añadir % de que haga reply en vez de send
|
# TO DO: añadir % de que haga reply en vez de send
|
||||||
try:
|
try:
|
||||||
bot.sendMessage(chatlog.id, msg)
|
bot.sendMessage(chatlog.id, msg)
|
||||||
except TelegramError:
|
except TimedOut:
|
||||||
chatlog.set_freq(chatlog.freq + 20)
|
chatlog.set_freq(chatlog.freq + CHAT_INC)
|
||||||
|
print("Increased freq for chat " + chatlog.title + " [" + chatlog.id + "]")
|
||||||
if get_chatname(chat) != chatlog.title:
|
if get_chatname(chat) != chatlog.title:
|
||||||
chatlog.set_title(get_chatname(chat))
|
chatlog.set_title(get_chatname(chat))
|
||||||
savechat(chatlog)
|
savechat(chatlog)
|
||||||
|
elif chatlog.freq > CHAT_SAVE and chatlog.get_count()%CHAT_SAVE == 0:
|
||||||
|
savechat(chatlog)
|
||||||
chatlogs[chatlog.id] = chatlog
|
chatlogs[chatlog.id] = chatlog
|
||||||
|
|
||||||
def speak(bot, update):
|
def speak(bot, update):
|
||||||
|
@ -121,12 +126,10 @@ def speak(bot, update):
|
||||||
msg = chatlog.speak()
|
msg = chatlog.speak()
|
||||||
update.message.reply_text(msg)
|
update.message.reply_text(msg)
|
||||||
savechat(chatlog)
|
savechat(chatlog)
|
||||||
|
|
||||||
chatlogs[chatlog.id] = chatlog
|
chatlogs[chatlog.id] = chatlog
|
||||||
|
|
||||||
def get_chatlogs(bot, update):
|
def get_chatlogs(bot, update):
|
||||||
global GUILLERMO_ID
|
if str(update.message.chat.id) == GUILLERMO_ID:
|
||||||
if update.message.chat.id is GUILLERMO_ID:
|
|
||||||
m = "I have these chatlogs:"
|
m = "I have these chatlogs:"
|
||||||
for c in chatlogs:
|
for c in chatlogs:
|
||||||
m += "\n" + chatlogs[c].id + " " + chatlogs[c].title
|
m += "\n" + chatlogs[c].id + " " + chatlogs[c].title
|
||||||
|
@ -157,6 +160,7 @@ def set_freq(bot, update):
|
||||||
value = int(value)
|
value = int(value)
|
||||||
value = chatlogs[ident].set_freq(value)
|
value = chatlogs[ident].set_freq(value)
|
||||||
reply = "Frequency of speaking set to " + str(value)
|
reply = "Frequency of speaking set to " + str(value)
|
||||||
|
savechat(chatlogs[ident])
|
||||||
except:
|
except:
|
||||||
reply = "Format was confusing; frequency not changed from " + str(chatlogs[ident].freq)
|
reply = "Format was confusing; frequency not changed from " + str(chatlogs[ident].freq)
|
||||||
update.message.reply_text(reply)
|
update.message.reply_text(reply)
|
||||||
|
|
Loading…
Reference in a new issue