nlp stuff
This commit is contained in:
parent
fe187ba59e
commit
faa542aca4
7 changed files with 186 additions and 43 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -1,3 +1,2 @@
|
||||||
.sekrit/
|
.sekrit
|
||||||
__pycache__/
|
__pycache__
|
||||||
*/__pycache__/
|
|
||||||
|
|
16
lulzbot.py
16
lulzbot.py
|
@ -23,6 +23,7 @@ import numpy as np
|
||||||
from fortune import fortune
|
from fortune import fortune
|
||||||
from src.twitter import get_tweet
|
from src.twitter import get_tweet
|
||||||
from src.cartman import cartman_speak
|
from src.cartman import cartman_speak
|
||||||
|
from src.flan import flan_speak
|
||||||
|
|
||||||
chuck_quotes = open('data/chuck_quotes').read().split('\n%\n')
|
chuck_quotes = open('data/chuck_quotes').read().split('\n%\n')
|
||||||
ligma_list = open('data/ligma_list').read().split('\n')
|
ligma_list = open('data/ligma_list').read().split('\n')
|
||||||
|
@ -50,7 +51,7 @@ def chuck():
|
||||||
def ac():
|
def ac():
|
||||||
return np.random.choice(aclist)
|
return np.random.choice(aclist)
|
||||||
|
|
||||||
message_handler = {'lulzbot': show_menu, # these need to be functions
|
triggers = {'lulzbot': show_menu, # these need to be functions
|
||||||
'musk': musk,
|
'musk': musk,
|
||||||
'deez': ligma,
|
'deez': ligma,
|
||||||
'ligma': ligma,
|
'ligma': ligma,
|
||||||
|
@ -61,12 +62,12 @@ message_handler = {'lulzbot': show_menu, # these need to be functions
|
||||||
'prost!': prost,
|
'prost!': prost,
|
||||||
'fortune': fortune,
|
'fortune': fortune,
|
||||||
'chuck': chuck,
|
'chuck': chuck,
|
||||||
'ac':ac,
|
'ac': ac,
|
||||||
}
|
}
|
||||||
|
|
||||||
TOKEN = open('.sekrit/discord_token').read()
|
TOKEN = open('.sekrit/discord_token').read()
|
||||||
intents = discord.Intents.default()
|
intents = discord.Intents.default()
|
||||||
intents.message_content = True
|
# intents.message_content = True
|
||||||
client = discord.Client(activity=discord.Game(name='with myself'), intents=intents)
|
client = discord.Client(activity=discord.Game(name='with myself'), intents=intents)
|
||||||
|
|
||||||
@client.event
|
@client.event
|
||||||
|
@ -85,13 +86,16 @@ async def on_message(message):
|
||||||
return
|
return
|
||||||
|
|
||||||
elif message.channel.name == 'cartman':
|
elif message.channel.name == 'cartman':
|
||||||
async with message.channel.typing():
|
|
||||||
await message.channel.send(cartman_speak(user_message))
|
await message.channel.send(cartman_speak(user_message))
|
||||||
#await message.channel.send("I'm broken, come back later.")
|
#await message.channel.send("I'm broken, come back later.")
|
||||||
|
|
||||||
|
elif message.channel.name == 'flan':
|
||||||
|
await message.channel.send(flan_speak(user_message))
|
||||||
|
# await message.channel.send('GPU is busy, come back later')
|
||||||
|
|
||||||
elif message.channel.name == 'shitposting':
|
elif message.channel.name == 'shitposting':
|
||||||
if user_message.lower() in message_handler:
|
if user_message.lower() in triggers:
|
||||||
await message.channel.send(message_handler[user_message.lower()]())
|
await message.channel.send(triggers[user_message.lower()]())
|
||||||
return
|
return
|
||||||
|
|
||||||
client.run(TOKEN)
|
client.run(TOKEN)
|
||||||
|
|
|
@ -1,25 +1,33 @@
|
||||||
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
import requests
|
||||||
from transformers.models.auto.modeling_auto import AutoModelForCausalLM
|
import json
|
||||||
import torch
|
|
||||||
|
|
||||||
#tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
|
url = 'https://doordesk.net/chat'
|
||||||
#model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium')
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xxl')
|
|
||||||
model = AutoModelForCausalLM.from_pretrained('google/flan-t5-xxl')
|
|
||||||
|
|
||||||
def cartman_speak(user_message):
|
def cartman_speak(user_message):
|
||||||
new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt')
|
message = {'Message': user_message}
|
||||||
bot_output = new_user_input_ids
|
response = requests.post(url,json.dumps(message))
|
||||||
bot_input_ids = torch.cat([new_user_input_ids, bot_output])
|
return response.json().get('Cartman')
|
||||||
bot_output = model.generate(
|
|
||||||
bot_input_ids, max_length= 200,
|
|
||||||
pad_token_id=tokenizer.eos_token_id,
|
|
||||||
no_repeat_ngram_size=3,
|
|
||||||
do_sample=True,
|
|
||||||
top_k=100,
|
|
||||||
top_p=0.7,
|
|
||||||
temperature=.8
|
|
||||||
)
|
|
||||||
|
|
||||||
return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
|
|
||||||
|
# from transformers.models.auto.tokenization_auto import AutoTokenizer
|
||||||
|
# from transformers.models.auto.modeling_auto import AutoModelForCausalLM
|
||||||
|
# import torch
|
||||||
|
#
|
||||||
|
# tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-large')
|
||||||
|
# model = AutoModelForCausalLM.from_pretrained('../southpark/output-medium')
|
||||||
|
#
|
||||||
|
# def cartman_speak(user_message):
|
||||||
|
# new_user_input_ids = tokenizer.encode(user_message + tokenizer.eos_token, return_tensors='pt')
|
||||||
|
# bot_output = new_user_input_ids
|
||||||
|
# bot_input_ids = torch.cat([new_user_input_ids, bot_output])
|
||||||
|
# bot_output = model.generate(
|
||||||
|
# bot_input_ids, max_length= 200,
|
||||||
|
# pad_token_id=tokenizer.eos_token_id,
|
||||||
|
# no_repeat_ngram_size=3,
|
||||||
|
# do_sample=True,
|
||||||
|
# top_k=100,
|
||||||
|
# top_p=0.7,
|
||||||
|
# temperature=.8
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# return '{}'.format(tokenizer.decode(bot_output[:,bot_input_ids.shape[-1]:][0], skip_special_tokens=True))
|
||||||
|
|
38
src/flan.py
Normal file
38
src/flan.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import torch
|
||||||
|
from transformers.models.t5.tokenization_t5_fast import T5TokenizerFast
|
||||||
|
from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration
|
||||||
|
|
||||||
|
tokenizer = T5TokenizerFast.from_pretrained("google/flan-t5-xl")
|
||||||
|
device = torch.device('cuda')
|
||||||
|
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl")
|
||||||
|
model = model.to(device)
|
||||||
|
|
||||||
|
def flan_speak(user_message):
|
||||||
|
input_ids = tokenizer(user_message, return_tensors='pt').input_ids.to('cuda')
|
||||||
|
user_input_word_count = len(user_message.split(' '))
|
||||||
|
|
||||||
|
if user_input_word_count * user_input_word_count > 100:
|
||||||
|
min_tokens = 100
|
||||||
|
else:
|
||||||
|
min_tokens = user_input_word_count * 2
|
||||||
|
|
||||||
|
bot_output = model.generate(
|
||||||
|
input_ids,
|
||||||
|
# min_length = min_tokens,
|
||||||
|
max_new_tokens = 350,
|
||||||
|
num_beams = 16,
|
||||||
|
num_beam_groups = 8,
|
||||||
|
no_repeat_ngram_size = 3,
|
||||||
|
length_penalty = 1.4,
|
||||||
|
diversity_penalty = 0.0,
|
||||||
|
repetition_penalty = 2.1,
|
||||||
|
early_stopping = True,
|
||||||
|
|
||||||
|
# do_sample = True,
|
||||||
|
# top_k = 256,
|
||||||
|
# top_p = 0.92,
|
||||||
|
# temperature = 0.4,
|
||||||
|
)
|
||||||
|
|
||||||
|
output = tokenizer.batch_decode(bot_output, skip_special_tokens=True)[0]
|
||||||
|
return output[:2000]
|
61
src/prince.py
Normal file
61
src/prince.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
|
||||||
|
elif user_message.lower().count('lulzbot tell me about yourself') > 0:
|
||||||
|
await message.channel.send(\
|
||||||
|
'In west Philadelphia born and raised\n\
|
||||||
|
On the playground was where I spent most of my days')
|
||||||
|
time.sleep(4.6)
|
||||||
|
await message.channel.send('\
|
||||||
|
Chillin\' out maxin\' relaxin\' all cool\n\
|
||||||
|
And all shooting some b-ball outside of the school')
|
||||||
|
time.sleep(4.6)
|
||||||
|
await message.channel.send('\
|
||||||
|
When a couple of guys who were up to no good\n\
|
||||||
|
Started making trouble in my neighborhood')
|
||||||
|
time.sleep(4.6)
|
||||||
|
await message.channel.send('\
|
||||||
|
I got in one little fight and my mom got scared\n\
|
||||||
|
She said, "You\'re movin\' with your auntie and uncle in Bel-Air"')
|
||||||
|
time.sleep(5)
|
||||||
|
await message.channel.send('\
|
||||||
|
I begged and pleaded with her day after day\n\
|
||||||
|
But she packed my suitcase and sent me on my way')
|
||||||
|
time.sleep(4.6)
|
||||||
|
await message.channel.send('\
|
||||||
|
She gave me a kiss and then she gave me my ticket\n\
|
||||||
|
I put my Walkman on and said\n\
|
||||||
|
"I might as well kick it"')
|
||||||
|
time.sleep(4.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
First class, yo, this is bad\n\
|
||||||
|
Drinking orange juice out of a champagne glass')
|
||||||
|
time.sleep(4.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
Is this what the people of Bel-Air living like?\n\
|
||||||
|
Hmm, this might be alright')
|
||||||
|
time.sleep(4.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
I whistled for a cab and when it came near\n\
|
||||||
|
The license plate said "Fresh" and it had dice in the mirror')
|
||||||
|
time.sleep(4.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
If anything I could say that this cab was rare\n\
|
||||||
|
But I thought, "Nah, forget it"\n\
|
||||||
|
– "Yo, homes to Bel-Air"')
|
||||||
|
time.sleep(4.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
I')
|
||||||
|
time.sleep(.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
pulled')
|
||||||
|
time.sleep(.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
up to the house about 7 or 8\n\
|
||||||
|
And I yelled to the cabbie\n\
|
||||||
|
"Yo homes smell ya later"')
|
||||||
|
time.sleep(4.5)
|
||||||
|
await message.channel.send('\
|
||||||
|
I looked at my kingdom\n\
|
||||||
|
I was finally there\n\
|
||||||
|
To sit on my throne as the Prince of Bel-Air')
|
||||||
|
return
|
||||||
|
|
15
test/cartmantest.py
Normal file
15
test/cartmantest.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
url = 'https://doordesk.net/chat'
|
||||||
|
|
||||||
|
active = True
|
||||||
|
|
||||||
|
while active:
|
||||||
|
user_input = input('>> ')
|
||||||
|
if user_input in 'q':
|
||||||
|
active = False
|
||||||
|
break
|
||||||
|
message = {'Message': user_input}
|
||||||
|
response = requests.post(url,json.dumps(message))
|
||||||
|
print(response.json().get('Cartman'))
|
18
test/flantest.py
Normal file
18
test/flantest.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
import torch
|
||||||
|
from transformers.models.t5.tokenization_t5 import T5Tokenizer
|
||||||
|
from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration
|
||||||
|
|
||||||
|
device = torch.device("cuda")
|
||||||
|
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
|
||||||
|
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl").cuda()
|
||||||
|
|
||||||
|
run = True
|
||||||
|
while run:
|
||||||
|
input_text = input('>> ')
|
||||||
|
if input_text in 'q':
|
||||||
|
run = False
|
||||||
|
break
|
||||||
|
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
||||||
|
|
||||||
|
outputs = model.generate(input_ids)
|
||||||
|
print(tokenizer.decode(outputs[0]))
|
Loading…
Add table
Reference in a new issue