Uploading Velasco v1.4

- It now saves the dictionary of vocabulary directly into the file. No need to deal with the full list of messages. - This also means that the amount of elements kept in memory has been reduced, since there is no need to keep a list with all the words one after another,apart from the dictionary. - Modularized some constants, like the frequency of saves if the frequency of speaking is too large, or the stop words that mark the start and end of a message.
2025-06-06 20:44:38 +02:00 · 2017-09-21 15:39:53 +02:00 · 2017-09-21 15:39:53 +02:00 · 1d1bd6034e
commit 1d1bd6034e
parent 810e517757
5 changed files with 86 additions and 62 deletions
--- a/pycache/chatlog.cpython-36.pyc
+++ b/pycache/chatlog.cpython-36.pyc
--- a/pycache/markov.cpython-36.pyc
+++ b/pycache/markov.cpython-36.pyc
--- a/chatlog.py
+++ b/chatlog.py
@ -3,21 +3,22 @@
 from markov import *

 class Chatlog(object):
-    def __init__(self, ident, chattype, title, msgs=None, freq=None):
-        if msgs is not None:
-            self.msgs = msgs
-        else:
-            self.msgs = []
+    def __init__(self, ident, chattype, title, text=None, freq=None):
        self.id = str(ident)
        self.type = chattype
        self.title = title
        if freq is None:
            if "group" in chattype:
-                freq = 20
+                freq = 15
            #elif chattype is "private":
            else:
-                freq = 5
+                freq = 2
        self.freq = freq
+        if text is not None:
+            self.count = len(text)
+        else:
+            self.count = 0
+        self.gen = Markov(text)

    def set_title(self, title):
        self.title = title
@ -31,36 +32,30 @@ class Chatlog(object):
        return self.freq

    def add_msg(self, message):
-        msg = message.split()
-        msg.append("!kvl")
-        self.msgs.append(msg)
-
-    def get_markov_gen(self):
-        msgs = []
-        for m in self.msgs:
-            msgs.append(' '.join(m))
-        text = ' '.join(msgs)
-        self.gen = Markov(text)
+        self.gen.add_text(message + " !kvl")
+        self.count += 1

    def speak(self):
-        self.get_markov_gen()
        return self.gen.generate_markov_text()

    def get_count(self):
-        return len(self.msgs)
+        return self.count

    def to_txt(self):
        lines = [self.id]
        lines.append(self.type)
        lines.append(self.title)
        lines.append(str(self.freq))
-        for m in self.msgs:
-            lines.append(' '.join(m))
-        return '\n'.join(lines)
+        lines.append("dict:")
+        txt = '\n'.join(lines)
+        return txt + '\n' + self.gen.to_json()

    def from_txt(text):
        lines = text.splitlines()
-        msgs = []
-        for m in lines[4:]:
-            msgs.append(m.split())
-        return Chatlog(lines[0], lines[1], lines[2], msgs, int(lines[3]))
+        if(lines[4] == "dict:"):
+            new_log = Chatlog(lines[0], lines[1], lines[2], None, int(lines[3]))
+            cache = '\n'.join(lines[5:])
+            new_log.gen = Markov.from_json(cache)
+            return new_log
+        else:
+            return Chatlog(lines[0], lines[1], lines[2], lines[4:], int(lines[3]))
--- a/markov.py
+++ b/markov.py
@ -1,50 +1,75 @@
 #!/usr/bin/env python3

 import random
+import json
+
+HEAD = "\n!kvl"
+TAIL = "!kvl"
+
+def trim_and_split(text):
+    words = text.split(' ')
+    for i in range(len(words)):
+        words[i] = words[i].strip(' \t')
+    return words
+
+def getkey(w1, w2):
+    key = (w1.strip().casefold(), w2.strip().casefold())
+    return str(key)
+
+def triples(wordlist):
+    """ Generates triples from the given data string. So if our string were
+            "What a lovely day", we'd generate (What, a, lovely) and then
+            (a, lovely, day).
+    """
+
+    if len(wordlist) < 3:
+        return
+
+    for i in range(len(wordlist) - 2):
+        yield (wordlist[i], wordlist[i+1], wordlist[i+2])

 class Markov(object):
-    def __init__(self, text=None):
-        self.cache = {}
-        self.words = []
-        if text is None:
-            text = ""
-        self.words = ("!kvl\n"+text).split()
-        self.word_size = len(self.words)
-        self.database()
+    def __init__(self, text=None, from_json=False):
+        if not from_json:
+            self.cache = {}
+            if text is not None:
+                for line in text:
+                    self.add_text(line)
+        else:
+            self.cache = json.loads(text)

-    def triples(self):
-        """ Generates triples from the given data string. So if our string were
-                "What a lovely day", we'd generate (What, a, lovely) and then
-                (a, lovely, day).
-        """
+    def to_json(self):
+        return json.dumps(self.cache)

-        if len(self.words) < 3:
-            return
+    def from_json(string):
+        return Markov(string, True)

-        for i in range(len(self.words) - 2):
-            yield (self.words[i], self.words[i+1], self.words[i+2])
+    def add_text(self, text):
+        words = trim_and_split(HEAD + " " + text)
+        self.database(words)

-    def database(self):
-        for w1, w2, w3 in self.triples():
-            key = (w1.casefold(), w2.casefold())
+    def database(self, wordlist):
+        for w1, w2, w3 in triples(wordlist):
+            if w1 == HEAD:
+                if w1 in self.cache:
+                    self.cache[HEAD].append(w2)
+                else:
+                    self.cache[HEAD] = [w2]
+            key = getkey(w1, w2)
            if key in self.cache:
                self.cache[key].append(w3)
            else:
                self.cache[key] = [w3]

    def generate_markov_text(self, size=50):
-        seed = random.randint(0, self.word_size-4)
-        seed_word, next_word, next_word2 = self.words[seed], self.words[seed+1], self.words[seed+2]
-        while not "!kvl" in seed_word:
-            seed = random.randint(0, self.word_size-4)
-            seed_word, next_word, next_word2 = self.words[seed], self.words[seed+1], self.words[seed+2]
-        w1, w2 = next_word, next_word2
+        w1 = random.choice(self.cache[HEAD])
+        w2 = random.choice(self.cache[getkey(HEAD, w1)])
        gen_words = []
        for i in range(size):
            gen_words.append(w1)
-            if "!kvl" in w2 or not (w1.casefold(), w2.casefold()) in self.cache:
+            if w2 == TAIL or not getkey(w1, w2) in self.cache:
                print("Generated text")
                break
            else:
-                w1, w2 = w2, random.choice(self.cache[(w1.casefold(), w2.casefold())])
+                w1, w2 = w2, random.choice(self.cache[getkey(w1, w2)])
        return ' '.join(gen_words)
--- a/velasco.py
+++ b/velasco.py
@ -2,6 +2,7 @@

 import sys, os
 from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
+from telegram.error import *
 from chatlog import *
 import logging
 import argparse
@ -15,7 +16,9 @@ logger = logging.getLogger(__name__)
 chatlogs = {}
 disabled = {}

-GUILLERMO_ID = 8379173
+GUILLERMO_ID = "8379173"
+CHAT_INC = 5
+CHAT_SAVE = 15

 def wake(bot):
    directory = os.fsencode("chatlogs/")
@ -25,7 +28,7 @@ def wake(bot):
        if filename.endswith(".txt"):
            chat = loadchat("chatlogs/" + filename)
            chatlogs[chat.id] = chat
-            print("loaded chat " + chat.id)
+            print("loaded chat " + chat.title + " [" + chat.id + "]")
            continue
        else:
            continue
@ -98,12 +101,14 @@ def read(bot, update):
        # TO DO: añadir % de que haga reply en vez de send
        try:
            bot.sendMessage(chatlog.id, msg)
-        except TelegramError:
-            chatlog.set_freq(chatlog.freq + 20)
+        except TimedOut:
+            chatlog.set_freq(chatlog.freq + CHAT_INC)
+            print("Increased freq for chat " + chatlog.title + " [" + chatlog.id + "]")
        if get_chatname(chat) != chatlog.title:
            chatlog.set_title(get_chatname(chat))
        savechat(chatlog)
-
+    elif chatlog.freq > CHAT_SAVE and chatlog.get_count()%CHAT_SAVE == 0:
+        savechat(chatlog)
    chatlogs[chatlog.id] = chatlog

 def speak(bot, update):
@ -121,12 +126,10 @@ def speak(bot, update):
    msg = chatlog.speak()
    update.message.reply_text(msg)
    savechat(chatlog)
-
    chatlogs[chatlog.id] = chatlog

 def get_chatlogs(bot, update):
-    global GUILLERMO_ID
-    if update.message.chat.id is GUILLERMO_ID:
+    if str(update.message.chat.id) == GUILLERMO_ID:
        m = "I have these chatlogs:"
        for c in chatlogs:
            m += "\n" + chatlogs[c].id + " " + chatlogs[c].title
@ -157,6 +160,7 @@ def set_freq(bot, update):
            value = int(value)
            value = chatlogs[ident].set_freq(value)
            reply = "Frequency of speaking set to " + str(value)
+            savechat(chatlogs[ident])
        except:
            reply = "Format was confusing; frequency not changed from " + str(chatlogs[ident].freq)
    update.message.reply_text(reply)