From faa542aca49f3b4926d69a49d6525ff821bd3be6 Mon Sep 17 00:00:00 2001 From: Adam <24621027+WhiteDopeOnPunk@users.noreply.github.com> Date: Wed, 21 Dec 2022 15:56:25 -0500 Subject: [PATCH] nlp stuff --- .gitignore | 5 ++-- lulzbot.py | 42 +++++++++++++++++-------------- src/cartman.py | 50 +++++++++++++++++++++---------------- src/flan.py | 38 ++++++++++++++++++++++++++++ src/prince.py | 61 +++++++++++++++++++++++++++++++++++++++++++++ test/cartmantest.py | 15 +++++++++++ test/flantest.py | 18 +++++++++++++ 7 files changed, 186 insertions(+), 43 deletions(-) create mode 100644 src/flan.py create mode 100644 src/prince.py create mode 100644 test/cartmantest.py create mode 100644 test/flantest.py diff --git a/.gitignore b/.gitignore index 10b41cd..f64629e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,2 @@ -.sekrit/ -__pycache__/ -*/__pycache__/ +.sekrit +__pycache__ diff --git a/lulzbot.py b/lulzbot.py index 3b04e98..df4a8ec 100644 --- a/lulzbot.py +++ b/lulzbot.py @@ -23,6 +23,7 @@ import numpy as np from fortune import fortune from src.twitter import get_tweet from src.cartman import cartman_speak +from src.flan import flan_speak chuck_quotes = open('data/chuck_quotes').read().split('\n%\n') ligma_list = open('data/ligma_list').read().split('\n') @@ -50,23 +51,23 @@ def chuck(): def ac(): return np.random.choice(aclist) -message_handler = {'lulzbot': show_menu, # these need to be functions - 'musk': musk, - 'deez': ligma, - 'ligma': ligma, - 'bofa': ligma, - 'bopha': ligma, - 'limerick': limerick, - 'limrick': limerick, - 'prost!': prost, - 'fortune': fortune, - 'chuck': chuck, - 'ac':ac, - } +triggers = {'lulzbot': show_menu, # these need to be functions + 'musk': musk, + 'deez': ligma, + 'ligma': ligma, + 'bofa': ligma, + 'bopha': ligma, + 'limerick': limerick, + 'limrick': limerick, + 'prost!': prost, + 'fortune': fortune, + 'chuck': chuck, + 'ac': ac, + } TOKEN = open('.sekrit/discord_token').read() intents = discord.Intents.default() -intents.message_content = True +# intents.message_content = True client = discord.Client(activity=discord.Game(name='with myself'), intents=intents) @client.event @@ -85,13 +86,16 @@ async def on_message(message): return elif message.channel.name == 'cartman': - async with message.channel.typing(): - await message.channel.send(cartman_speak(user_message)) - #await message.channel.send("I'm broken, come back later.") + await message.channel.send(cartman_speak(user_message)) + #await message.channel.send("I'm broken, come back later.") + + elif message.channel.name == 'flan': + await message.channel.send(flan_speak(user_message)) + # await message.channel.send('GPU is busy, come back later') elif message.channel.name == 'shitposting': - if user_message.lower() in message_handler: - await message.channel.send(message_handler[user_message.lower()]()) + if user_message.lower() in triggers: + await message.channel.send(triggers[user_message.lower()]()) return client.run(TOKEN) diff --git a/src/cartman.py b/src/cartman.py index 45035c3..05bc996 100644 --- a/src/cartman.py +++ b/src/cartman.py @@ -1,25 +1,33 @@ -from transformers.models.auto.tokenization_auto import AutoTokenizer -from transformers.models.auto.modeling_auto import AutoModelForCausalLM -import torch +import requests +import json -#tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large') -#model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium') - -tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xxl') -model = AutoModelForCausalLM.from_pretrained('google/flan-t5-xxl') +url = 'https://doordesk.net/chat' def cartman_speak(user_message): - new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt') - bot_output = new_user_input_ids - bot_input_ids = torch.cat([new_user_input_ids, bot_output]) - bot_output = model.generate( - bot_input_ids, max_length= 200, - pad_token_id=tokenizer.eos_token_id, - no_repeat_ngram_size=3, - do_sample=True, - top_k=100, - top_p=0.7, - temperature=.8 - ) + message = {'Message': user_message} + response = requests.post(url,json.dumps(message)) + return response.json().get('Cartman') - return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True)) + +# from transformers.models.auto.tokenization_auto import AutoTokenizer +# from transformers.models.auto.modeling_auto import AutoModelForCausalLM +# import torch +# +# tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large') +# model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium') +# +# def cartman_speak(user_message): +# new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt') +# bot_output = new_user_input_ids +# bot_input_ids = torch.cat([new_user_input_ids, bot_output]) +# bot_output = model.generate( +# bot_input_ids, max_length= 200, +# pad_token_id=tokenizer.eos_token_id, +# no_repeat_ngram_size=3, +# do_sample=True, +# top_k=100, +# top_p=0.7, +# temperature=.8 +# ) +# +# return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True)) diff --git a/src/flan.py b/src/flan.py new file mode 100644 index 0000000..b67cd2e --- /dev/null +++ b/src/flan.py @@ -0,0 +1,38 @@ +import torch +from transformers.models.t5.tokenization_t5_fast import T5TokenizerFast +from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration + +tokenizer = T5TokenizerFast.from_pretrained("google/flan-t5-xl") +device = torch.device('cuda') +model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl") +model = model.to(device) + +def flan_speak(user_message): + input_ids = tokenizer(user_message, return_tensors='pt').input_ids.to('cuda') + user_input_word_count = len(user_message.split(' ')) + + if user_input_word_count * user_input_word_count > 100: + min_tokens = 100 + else: + min_tokens = user_input_word_count * 2 + + bot_output = model.generate( + input_ids, + # min_length = min_tokens, + max_new_tokens = 350, + num_beams = 16, + num_beam_groups = 8, + no_repeat_ngram_size = 3, + length_penalty = 1.4, + diversity_penalty = 0.0, + repetition_penalty = 2.1, + early_stopping = True, + + # do_sample = True, + # top_k = 256, + # top_p = 0.92, + # temperature = 0.4, + ) + + output = tokenizer.batch_decode(bot_output, skip_special_tokens=True)[0] + return output[:2000] diff --git a/src/prince.py b/src/prince.py new file mode 100644 index 0000000..b9840a6 --- /dev/null +++ b/src/prince.py @@ -0,0 +1,61 @@ + + elif user_message.lower().count('lulzbot tell me about yourself') > 0: + await message.channel.send(\ +'In west Philadelphia born and raised\n\ +On the playground was where I spent most of my days') + time.sleep(4.6) + await message.channel.send('\ +Chillin\' out maxin\' relaxin\' all cool\n\ +And all shooting some b-ball outside of the school') + time.sleep(4.6) + await message.channel.send('\ +When a couple of guys who were up to no good\n\ +Started making trouble in my neighborhood') + time.sleep(4.6) + await message.channel.send('\ +I got in one little fight and my mom got scared\n\ +She said, "You\'re movin\' with your auntie and uncle in Bel-Air"') + time.sleep(5) + await message.channel.send('\ +I begged and pleaded with her day after day\n\ +But she packed my suitcase and sent me on my way') + time.sleep(4.6) + await message.channel.send('\ +She gave me a kiss and then she gave me my ticket\n\ +I put my Walkman on and said\n\ +"I might as well kick it"') + time.sleep(4.5) + await message.channel.send('\ +First class, yo, this is bad\n\ +Drinking orange juice out of a champagne glass') + time.sleep(4.5) + await message.channel.send('\ +Is this what the people of Bel-Air living like?\n\ +Hmm, this might be alright') + time.sleep(4.5) + await message.channel.send('\ +I whistled for a cab and when it came near\n\ +The license plate said "Fresh" and it had dice in the mirror') + time.sleep(4.5) + await message.channel.send('\ +If anything I could say that this cab was rare\n\ +But I thought, "Nah, forget it"\n\ +– "Yo, homes to Bel-Air"') + time.sleep(4.5) + await message.channel.send('\ +I') + time.sleep(.5) + await message.channel.send('\ +pulled') + time.sleep(.5) + await message.channel.send('\ +up to the house about 7 or 8\n\ +And I yelled to the cabbie\n\ +"Yo homes smell ya later"') + time.sleep(4.5) + await message.channel.send('\ +I looked at my kingdom\n\ +I was finally there\n\ +To sit on my throne as the Prince of Bel-Air') + return + diff --git a/test/cartmantest.py b/test/cartmantest.py new file mode 100644 index 0000000..4653d11 --- /dev/null +++ b/test/cartmantest.py @@ -0,0 +1,15 @@ +import requests +import json + +url = 'https://doordesk.net/chat' + +active = True + +while active: + user_input = input('>> ') + if user_input in 'q': + active = False + break + message = {'Message': user_input} + response = requests.post(url,json.dumps(message)) + print(response.json().get('Cartman')) diff --git a/test/flantest.py b/test/flantest.py new file mode 100644 index 0000000..71d6238 --- /dev/null +++ b/test/flantest.py @@ -0,0 +1,18 @@ +import torch +from transformers.models.t5.tokenization_t5 import T5Tokenizer +from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration + +device = torch.device("cuda") +tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl") +model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl").cuda() + +run = True +while run: + input_text = input('>> ') + if input_text in 'q': + run = False + break + input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda") + + outputs = model.generate(input_ids) + print(tokenizer.decode(outputs[0]))