nlp stuff

2022-12-21 15:56:25 -05:00 · 2022-12-21 15:56:25 -05:00 · faa542aca4
commit faa542aca4
parent fe187ba59e
7 changed files with 186 additions and 43 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,2 @@
-.sekrit/
+.sekrit
-__pycache__/
+__pycache__
 */__pycache__/
--- a/lulzbot.py
+++ b/lulzbot.py
@ -23,6 +23,7 @@ import numpy as np
 from fortune import fortune
 from src.twitter import get_tweet
 from src.cartman import cartman_speak
 from src.flan import flan_speak
 chuck_quotes = open('data/chuck_quotes').read().split('\n%\n')
 ligma_list = open('data/ligma_list').read().split('\n')
@ -50,7 +51,7 @@ def chuck():
 def ac():
    return np.random.choice(aclist)
-message_handler = {'lulzbot': show_menu, # these need to be functions
+triggers = {'lulzbot': show_menu, # these need to be functions
            'musk': musk,
            'deez': ligma,
            'ligma': ligma,
@ -61,12 +62,12 @@ message_handler = {'lulzbot': show_menu, # these need to be functions
            'prost!': prost,
            'fortune': fortune,
            'chuck': chuck,
-                        'ac':ac,
+            'ac': ac,
           }
 TOKEN = open('.sekrit/discord_token').read()
 intents = discord.Intents.default()
-intents.message_content = True
+# intents.message_content = True
 client = discord.Client(activity=discord.Game(name='with myself'), intents=intents)
@client.event
@ -85,13 +86,16 @@ async def on_message(message):
        return
    elif message.channel.name == 'cartman':
        async with message.channel.typing():
        await message.channel.send(cartman_speak(user_message))
        #await message.channel.send("I'm broken, come back later.")
    elif message.channel.name == 'flan':
         await message.channel.send(flan_speak(user_message))
       # await message.channel.send('GPU is busy, come back later')
    elif message.channel.name == 'shitposting':
-         if user_message.lower() in message_handler:
+         if user_message.lower() in triggers:
-            await message.channel.send(message_handler[user_message.lower()]())
+            await message.channel.send(triggers[user_message.lower()]())
    return
 client.run(TOKEN)
--- a/src/cartman.py
+++ b/src/cartman.py
@ -1,25 +1,33 @@
-from transformers.models.auto.tokenization_auto import AutoTokenizer
+import requests
-from transformers.models.auto.modeling_auto import AutoModelForCausalLM
+import json
 import torch
-#tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
+url = 'https://doordesk.net/chat'
 #model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium')
 tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xxl')
 model = AutoModelForCausalLM.from_pretrained('google/flan-t5-xxl')
 def cartman_speak(user_message):
-    new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt')
+    message = {'Message': user_message}
-    bot_output = new_user_input_ids
+    response = requests.post(url,json.dumps(message))
-    bot_input_ids = torch.cat([new_user_input_ids, bot_output])
+    return response.json().get('Cartman')
    bot_output = model.generate(
        bot_input_ids, max_length= 200,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=3,
        do_sample=True,
        top_k=100,
        top_p=0.7,
        temperature=.8
    )
-    return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
+
 # from transformers.models.auto.tokenization_auto import AutoTokenizer
 # from transformers.models.auto.modeling_auto import AutoModelForCausalLM
 # import torch
 # 
 # tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
 # model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium')
 # 
 # def cartman_speak(user_message):
 #     new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt')
 #     bot_output = new_user_input_ids
 #     bot_input_ids = torch.cat([new_user_input_ids, bot_output])
 #     bot_output = model.generate(
 #         bot_input_ids, max_length= 200,
 #         pad_token_id=tokenizer.eos_token_id,
 #         no_repeat_ngram_size=3,
 #         do_sample=True,
 #         top_k=100,
 #         top_p=0.7,
 #         temperature=.8
 #     )
 # 
 #     return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
--- a/src/flan.py
+++ b/src/flan.py
@ -0,0 +1,38 @@
 import torch
 from transformers.models.t5.tokenization_t5_fast import T5TokenizerFast 
 from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration
 tokenizer = T5TokenizerFast.from_pretrained("google/flan-t5-xl")
 device = torch.device('cuda')
 model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl")
 model = model.to(device)
 def flan_speak(user_message):
    input_ids = tokenizer(user_message, return_tensors='pt').input_ids.to('cuda')
    user_input_word_count = len(user_message.split(' '))
    if user_input_word_count * user_input_word_count > 100:
        min_tokens = 100
    else:
        min_tokens = user_input_word_count * 2
    bot_output = model.generate(
        input_ids, 
      # min_length               = min_tokens,
        max_new_tokens           = 350,
        num_beams                = 16,
        num_beam_groups          = 8,
        no_repeat_ngram_size     = 3,
        length_penalty           = 1.4,
        diversity_penalty        = 0.0,
        repetition_penalty       = 2.1,
        early_stopping           = True,
      # do_sample                = True,
      # top_k                    = 256,
      # top_p                    = 0.92,
      # temperature              = 0.4,
    )
    output = tokenizer.batch_decode(bot_output, skip_special_tokens=True)[0]
    return output[:2000]
--- a/src/prince.py
+++ b/src/prince.py
@ -0,0 +1,61 @@
        elif user_message.lower().count('lulzbot tell me about yourself') > 0:
            await message.channel.send(\
 'In west Philadelphia born and raised\n\
 On the playground was where I spent most of my days')
            time.sleep(4.6)
            await message.channel.send('\
 Chillin\' out maxin\' relaxin\' all cool\n\
 And all shooting some b-ball outside of the school')
            time.sleep(4.6)
            await message.channel.send('\
 When a couple of guys who were up to no good\n\
 Started making trouble in my neighborhood')
            time.sleep(4.6)
            await message.channel.send('\
 I got in one little fight and my mom got scared\n\
 She said, "You\'re movin\' with your auntie and uncle in Bel-Air"')
            time.sleep(5)
            await message.channel.send('\
 I begged and pleaded with her day after day\n\
 But she packed my suitcase and sent me on my way')
            time.sleep(4.6)
            await message.channel.send('\
 She gave me a kiss and then she gave me my ticket\n\
 I put my Walkman on and said\n\
 "I might as well kick it"')
            time.sleep(4.5)
            await message.channel.send('\
 First class, yo, this is bad\n\
 Drinking orange juice out of a champagne glass')
            time.sleep(4.5)
            await message.channel.send('\
 Is this what the people of Bel-Air living like?\n\
 Hmm, this might be alright')
            time.sleep(4.5)
            await message.channel.send('\
 I whistled for a cab and when it came near\n\
 The license plate said "Fresh" and it had dice in the mirror')
            time.sleep(4.5)
            await message.channel.send('\
 If anything I could say that this cab was rare\n\
 But I thought, "Nah, forget it"\n\
 – "Yo, homes to Bel-Air"')
            time.sleep(4.5)
            await message.channel.send('\
 I')
            time.sleep(.5)
            await message.channel.send('\
 pulled')
            time.sleep(.5)
            await message.channel.send('\
 up to the house about 7 or 8\n\
 And I yelled to the cabbie\n\
 "Yo homes smell ya later"')
            time.sleep(4.5)
            await message.channel.send('\
 I looked at my kingdom\n\
 I was finally there\n\
 To sit on my throne as the Prince of Bel-Air')
            return
--- a/test/cartmantest.py
+++ b/test/cartmantest.py
@ -0,0 +1,15 @@
 import requests
 import json
 url = 'https://doordesk.net/chat'
 active = True
 while active:
    user_input = input('>> ')
    if user_input in 'q':
        active = False
        break
    message = {'Message': user_input}
    response = requests.post(url,json.dumps(message))
    print(response.json().get('Cartman'))
--- a/test/flantest.py
+++ b/test/flantest.py
@ -0,0 +1,18 @@
 import torch
 from transformers.models.t5.tokenization_t5 import T5Tokenizer 
 from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration
 device = torch.device("cuda")
 tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
 model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl").cuda()
 run = True
 while run:
    input_text = input('>> ')
    if input_text in 'q':
        run = False
        break
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids)
    print(tokenizer.decode(outputs[0]))