nlp stuff

2022-12-21 15:56:25 -05:00 · 2022-12-21 15:56:25 -05:00 · faa542aca4
commit faa542aca4
parent fe187ba59e
7 changed files with 186 additions and 43 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,2 @@
-.sekrit/
-__pycache__/
-*/__pycache__/
+.sekrit
+__pycache__
--- a/lulzbot.py
+++ b/lulzbot.py
@ -23,6 +23,7 @@ import numpy as np
 from fortune import fortune
 from src.twitter import get_tweet
 from src.cartman import cartman_speak
+from src.flan import flan_speak

 chuck_quotes = open('data/chuck_quotes').read().split('\n%\n')
 ligma_list = open('data/ligma_list').read().split('\n')
@ -50,23 +51,23 @@ def chuck():
 def ac():
    return np.random.choice(aclist)

-message_handler = {'lulzbot': show_menu, # these need to be functions
-                      'musk': musk,
-                      'deez': ligma,
-                     'ligma': ligma,
-                      'bofa': ligma,
-                     'bopha': ligma,
-                  'limerick': limerick,
-                   'limrick': limerick,
-                    'prost!': prost,
-                   'fortune': fortune,
-                     'chuck': chuck,
-                        'ac':ac,
-                   }
+triggers = {'lulzbot': show_menu, # these need to be functions
+            'musk': musk,
+            'deez': ligma,
+            'ligma': ligma,
+            'bofa': ligma,
+            'bopha': ligma,
+            'limerick': limerick,
+            'limrick': limerick,
+            'prost!': prost,
+            'fortune': fortune,
+            'chuck': chuck,
+            'ac': ac,
+           }

 TOKEN = open('.sekrit/discord_token').read()
 intents = discord.Intents.default()
-intents.message_content = True
+# intents.message_content = True
 client = discord.Client(activity=discord.Game(name='with myself'), intents=intents)

@client.event
@ -85,13 +86,16 @@ async def on_message(message):
        return

    elif message.channel.name == 'cartman':
-        async with message.channel.typing():
-            await message.channel.send(cartman_speak(user_message))
-            #await message.channel.send("I'm broken, come back later.")
+        await message.channel.send(cartman_speak(user_message))
+        #await message.channel.send("I'm broken, come back later.")
+
+    elif message.channel.name == 'flan':
+         await message.channel.send(flan_speak(user_message))
+       # await message.channel.send('GPU is busy, come back later')

    elif message.channel.name == 'shitposting':
-         if user_message.lower() in message_handler:
-            await message.channel.send(message_handler[user_message.lower()]())
+         if user_message.lower() in triggers:
+            await message.channel.send(triggers[user_message.lower()]())
    return

 client.run(TOKEN)
--- a/src/cartman.py
+++ b/src/cartman.py
@ -1,25 +1,33 @@
-from transformers.models.auto.tokenization_auto import AutoTokenizer
-from transformers.models.auto.modeling_auto import AutoModelForCausalLM
-import torch
+import requests
+import json

-#tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
-#model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium')
-
-tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xxl')
-model = AutoModelForCausalLM.from_pretrained('google/flan-t5-xxl')
+url = 'https://doordesk.net/chat'

 def cartman_speak(user_message):
-    new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt')
-    bot_output = new_user_input_ids
-    bot_input_ids = torch.cat([new_user_input_ids, bot_output])
-    bot_output = model.generate(
-        bot_input_ids, max_length= 200,
-        pad_token_id=tokenizer.eos_token_id,
-        no_repeat_ngram_size=3,
-        do_sample=True,
-        top_k=100,
-        top_p=0.7,
-        temperature=.8
-    )
+    message = {'Message': user_message}
+    response = requests.post(url,json.dumps(message))
+    return response.json().get('Cartman')

-    return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
+
+# from transformers.models.auto.tokenization_auto import AutoTokenizer
+# from transformers.models.auto.modeling_auto import AutoModelForCausalLM
+# import torch
+# 
+# tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
+# model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium')
+# 
+# def cartman_speak(user_message):
+#     new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt')
+#     bot_output = new_user_input_ids
+#     bot_input_ids = torch.cat([new_user_input_ids, bot_output])
+#     bot_output = model.generate(
+#         bot_input_ids, max_length= 200,
+#         pad_token_id=tokenizer.eos_token_id,
+#         no_repeat_ngram_size=3,
+#         do_sample=True,
+#         top_k=100,
+#         top_p=0.7,
+#         temperature=.8
+#     )
+# 
+#     return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
--- a/src/flan.py
+++ b/src/flan.py
@ -0,0 +1,38 @@
+import torch
+from transformers.models.t5.tokenization_t5_fast import T5TokenizerFast 
+from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration
+
+tokenizer = T5TokenizerFast.from_pretrained("google/flan-t5-xl")
+device = torch.device('cuda')
+model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl")
+model = model.to(device)
+
+def flan_speak(user_message):
+    input_ids = tokenizer(user_message, return_tensors='pt').input_ids.to('cuda')
+    user_input_word_count = len(user_message.split(' '))
+
+    if user_input_word_count * user_input_word_count > 100:
+        min_tokens = 100
+    else:
+        min_tokens = user_input_word_count * 2
+
+    bot_output = model.generate(
+        input_ids, 
+      # min_length               = min_tokens,
+        max_new_tokens           = 350,
+        num_beams                = 16,
+        num_beam_groups          = 8,
+        no_repeat_ngram_size     = 3,
+        length_penalty           = 1.4,
+        diversity_penalty        = 0.0,
+        repetition_penalty       = 2.1,
+        early_stopping           = True,
+
+      # do_sample                = True,
+      # top_k                    = 256,
+      # top_p                    = 0.92,
+      # temperature              = 0.4,
+    )
+
+    output = tokenizer.batch_decode(bot_output, skip_special_tokens=True)[0]
+    return output[:2000]
--- a/src/prince.py
+++ b/src/prince.py
@ -0,0 +1,61 @@
+
+        elif user_message.lower().count('lulzbot tell me about yourself') > 0:
+            await message.channel.send(\
+'In west Philadelphia born and raised\n\
+On the playground was where I spent most of my days')
+            time.sleep(4.6)
+            await message.channel.send('\
+Chillin\' out maxin\' relaxin\' all cool\n\
+And all shooting some b-ball outside of the school')
+            time.sleep(4.6)
+            await message.channel.send('\
+When a couple of guys who were up to no good\n\
+Started making trouble in my neighborhood')
+            time.sleep(4.6)
+            await message.channel.send('\
+I got in one little fight and my mom got scared\n\
+She said, "You\'re movin\' with your auntie and uncle in Bel-Air"')
+            time.sleep(5)
+            await message.channel.send('\
+I begged and pleaded with her day after day\n\
+But she packed my suitcase and sent me on my way')
+            time.sleep(4.6)
+            await message.channel.send('\
+She gave me a kiss and then she gave me my ticket\n\
+I put my Walkman on and said\n\
+"I might as well kick it"')
+            time.sleep(4.5)
+            await message.channel.send('\
+First class, yo, this is bad\n\
+Drinking orange juice out of a champagne glass')
+            time.sleep(4.5)
+            await message.channel.send('\
+Is this what the people of Bel-Air living like?\n\
+Hmm, this might be alright')
+            time.sleep(4.5)
+            await message.channel.send('\
+I whistled for a cab and when it came near\n\
+The license plate said "Fresh" and it had dice in the mirror')
+            time.sleep(4.5)
+            await message.channel.send('\
+If anything I could say that this cab was rare\n\
+But I thought, "Nah, forget it"\n\
+– "Yo, homes to Bel-Air"')
+            time.sleep(4.5)
+            await message.channel.send('\
+I')
+            time.sleep(.5)
+            await message.channel.send('\
+pulled')
+            time.sleep(.5)
+            await message.channel.send('\
+up to the house about 7 or 8\n\
+And I yelled to the cabbie\n\
+"Yo homes smell ya later"')
+            time.sleep(4.5)
+            await message.channel.send('\
+I looked at my kingdom\n\
+I was finally there\n\
+To sit on my throne as the Prince of Bel-Air')
+            return
+
--- a/test/cartmantest.py
+++ b/test/cartmantest.py
@ -0,0 +1,15 @@
+import requests
+import json
+
+url = 'https://doordesk.net/chat'
+
+active = True
+
+while active:
+    user_input = input('>> ')
+    if user_input in 'q':
+        active = False
+        break
+    message = {'Message': user_input}
+    response = requests.post(url,json.dumps(message))
+    print(response.json().get('Cartman'))
--- a/test/flantest.py
+++ b/test/flantest.py
@ -0,0 +1,18 @@
+import torch
+from transformers.models.t5.tokenization_t5 import T5Tokenizer 
+from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration
+
+device = torch.device("cuda")
+tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
+model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl").cuda()
+
+run = True
+while run:
+    input_text = input('>> ')
+    if input_text in 'q':
+        run = False
+        break
+    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
+
+    outputs = model.generate(input_ids)
+    print(tokenizer.decode(outputs[0]))