cartman/train/clean.py

19 lines
401 B
Python
Raw Normal View History

2023-02-12 15:03:47 -05:00
import pandas as pd
df = pd.read_csv('./data/All-seasons.csv')
cleanlines = pd.Series(
[cell
.replace('\n', '')
.replace('(', '')
.replace(')', '')
.replace(' ', ' ')
.strip()
for cell in df.Line
]
)
train = pd.DataFrame(df.Character)
train['line'] = cleanlines
train.columns = ['name', 'line']
train.to_csv('./data/train.csv', index=False)