2023-02-12 15:03:47 -05:00
|
|
|
import pandas as pd
|
2023-02-12 21:40:39 -05:00
|
|
|
|
|
|
|
INPUT_FILE_PATH = './data/All-seasons.csv'
|
|
|
|
OUPUT_FILE_PATH = './data/train_data.csv'
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv(INPUT_FILE_PATH)
|
|
|
|
|
|
|
|
clean_lines = pd.Series(
|
|
|
|
[filter_lines
|
2023-02-12 15:03:47 -05:00
|
|
|
.replace('\n', '')
|
|
|
|
.replace('(', '')
|
|
|
|
.replace(')', '')
|
|
|
|
.replace(' ', ' ')
|
|
|
|
.strip()
|
2023-02-12 21:40:39 -05:00
|
|
|
for filter_lines in df.Line
|
2023-02-12 15:03:47 -05:00
|
|
|
]
|
|
|
|
)
|
|
|
|
|
2023-02-12 21:40:39 -05:00
|
|
|
train_data = pd.DataFrame(df.Character)
|
|
|
|
del df
|
|
|
|
|
|
|
|
train_data['line'] = clean_lines
|
|
|
|
|
|
|
|
train_data.columns = ['name', 'line']
|
|
|
|
|
2023-02-12 15:03:47 -05:00
|
|
|
|
2023-02-12 21:40:39 -05:00
|
|
|
train_data.to_csv(OUPUT_FILE_PATH, index=False)
|