import torch
from transformers.models.t5.tokenization_t5 import T5Tokenizer 
from transformers.models.t5.modeling_t5 import T5ForConditionalGeneration

device = torch.device("cuda")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl").cuda()

run = True
while run:
    input_text = input('>> ')
    if input_text in 'q':
        run = False
        break
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to("cuda")

    outputs = model.generate(input_ids)
    print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])