diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..335ec95 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.tar.gz diff --git a/api/readme.md b/api/readme.md index 5ae6ffd..a14fdfc 100644 --- a/api/readme.md +++ b/api/readme.md @@ -1,9 +1,15 @@ # Chatbots API -FastAPI and PyTorch +[FastAPI](https://fastapi.tiangolo.com/) and [PyTorch](https://pytorch.org/) -To build yourself you'll need to first train a model with ../train +To build one yourself you'll need to first [train a model](../train), +place the entire directory (checkpoints aren't needed) containing pytorch_model.bin in [bots](./src/bots), +then edit or duplicate [cartman.py](./src/bots/cartman.py). -My image compressed is 1.4GB +Cartman Docker images for are availible for +[x86_64](https://doordesk.net/files/chatbots_api_x86_64.tar.gz) (1.6GB) and +[aarch64](https://doordesk.net/files/chatbots_api_x86_64.tar.gz) (1.4GB) -Scripts in test to talk to it +See [run](./run) and [test](./test) to interact with it + +Live demo [here](https://doordesk.net/cartman) diff --git a/train/.gitignore b/train/.gitignore index 33a1886..2099bbc 100644 --- a/train/.gitignore +++ b/train/.gitignore @@ -1,3 +1,5 @@ __pycache__/ .ipynb_checkpoints/ cartman/ +cached/ +runs/ diff --git a/train/clean.py b/train/clean.py index e5ad18e..b874f84 100644 --- a/train/clean.py +++ b/train/clean.py @@ -1,18 +1,28 @@ import pandas as pd -df = pd.read_csv('./data/All-seasons.csv') -cleanlines = pd.Series( - [cell + +INPUT_FILE_PATH = './data/All-seasons.csv' +OUPUT_FILE_PATH = './data/train_data.csv' + + +df = pd.read_csv(INPUT_FILE_PATH) + +clean_lines = pd.Series( + [filter_lines .replace('\n', '') .replace('(', '') .replace(')', '') .replace(' ', ' ') .strip() - for cell in df.Line + for filter_lines in df.Line ] ) -train = pd.DataFrame(df.Character) -train['line'] = cleanlines -train.columns = ['name', 'line'] +train_data = pd.DataFrame(df.Character) +del df -train.to_csv('./data/train.csv', index=False) +train_data['line'] = clean_lines + +train_data.columns = ['name', 'line'] + + +train_data.to_csv(OUPUT_FILE_PATH, index=False)