cartman/train/clean.py
2023-02-12 15:03:47 -05:00

18 lines
401 B
Python

import pandas as pd
df = pd.read_csv('./data/All-seasons.csv')
cleanlines = pd.Series(
[cell
.replace('\n', '')
.replace('(', '')
.replace(')', '')
.replace(' ', ' ')
.strip()
for cell in df.Line
]
)
train = pd.DataFrame(df.Character)
train['line'] = cleanlines
train.columns = ['name', 'line']
train.to_csv('./data/train.csv', index=False)