cartman/train/clean.py

29 lines
526 B
Python
Raw Permalink Normal View History

2023-02-12 15:03:47 -05:00
import pandas as pd
2023-02-12 21:40:39 -05:00
INPUT_FILE_PATH = './data/All-seasons.csv'
OUPUT_FILE_PATH = './data/train_data.csv'
df = pd.read_csv(INPUT_FILE_PATH)
clean_lines = pd.Series(
[filter_lines
2023-02-12 15:03:47 -05:00
.replace('\n', '')
.replace('(', '')
.replace(')', '')
.replace(' ', ' ')
.strip()
2023-02-12 21:40:39 -05:00
for filter_lines in df.Line
2023-02-12 15:03:47 -05:00
]
)
2023-02-12 21:40:39 -05:00
train_data = pd.DataFrame(df.Character)
del df
train_data['line'] = clean_lines
train_data.columns = ['name', 'line']
2023-02-12 15:03:47 -05:00
2023-02-12 21:40:39 -05:00
train_data.to_csv(OUPUT_FILE_PATH, index=False)