{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "23a7a47d-40c9-4ce2-8e1d-069690edfed3", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.045197Z", "iopub.status.busy": "2022-10-18T01:39:49.044788Z", "iopub.status.idle": "2022-10-18T01:39:49.325032Z", "shell.execute_reply": "2022-10-18T01:39:49.324364Z", "shell.execute_reply.started": "2022-10-18T01:39:49.045112Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(70896, 4)\n" ] } ], "source": [ "import pandas as pd\n", "df = pd.read_csv('data/All-seasons.csv')\n", "print(df.shape)" ] }, { "cell_type": "code", "execution_count": 2, "id": "cd058417-a7b1-408f-a6b8-d02c114b380d", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.326910Z", "iopub.status.busy": "2022-10-18T01:39:49.326699Z", "iopub.status.idle": "2022-10-18T01:39:49.339491Z", "shell.execute_reply": "2022-10-18T01:39:49.338704Z", "shell.execute_reply.started": "2022-10-18T01:39:49.326893Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "2 6416\n", "3 5798\n", "4 5680\n", "6 5131\n", "5 4414\n", "7 4236\n", "1 4170\n", "8 3601\n", "9 3526\n", "11 3478\n", "10 3471\n", "14 3346\n", "12 3307\n", "13 3257\n", "16 3120\n", "15 3101\n", "18 2522\n", "17 2305\n", "Season 17\n", "Name: Season, dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.Season.value_counts()" ] }, { "cell_type": "code", "execution_count": 3, "id": "c4da2f8d-a577-49b0-a477-3eab09a38ae9", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.340976Z", "iopub.status.busy": "2022-10-18T01:39:49.340661Z", "iopub.status.idle": "2022-10-18T01:39:49.371622Z", "shell.execute_reply": "2022-10-18T01:39:49.371150Z", "shell.execute_reply.started": "2022-10-18T01:39:49.340946Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Cartman 9774\n", "Evil Cartman 23\n", "New Cartman 18\n", "Stan, Kyle, Cartman 12\n", "Kyle, Cartman 7\n", "Stan, Cartman 7\n", "Liane and Cartman 6\n", "Cartman Smurf 5\n", "Future Cartman 4\n", "Cartman on Left 3\n", "Stan/Kenny/ Cartman 3\n", "Cartman's Good Side 3\n", "Mrs. Cartman 3\n", "Stan/Kyle/ Cartman 3\n", "Cartman on Right 2\n", "Cartman's voice 2\n", "Both Cartmans 2\n", "Stan, Kyle, Kenny, Cartman 2\n", "Cartman, Stan 2\n", "Cartman, Kyle, Kenny 2\n", "Cartman, Kyle 2\n", "Cartman's Side 2\n", "Cartman, Choir 2\n", "Butters, Cartman 2\n", "Cartman's Bad Side 2\n", "Stan, Kyle, Cartman, Kenny 1\n", "Kenny, Stan, Cartman 1\n", "Stan, Cartman, Kenny 1\n", "Eric Cartman 1\n", "Wendy, Cartman 1\n", "Congressman 1, Cartman 1\n", "Cheesy Poof Cartman 1\n", "Cartmans/Boys 1\n", "Cartman/ Kenny 1\n", "Cartman's Conscience 1\n", "Kyle/Cartman 1\n", "Mrs Cartman 1\n", "Kyle, Stan, Cartman 1\n", "Cartman and Kyle 1\n", "Cartman (Butters) 1\n", "The Boys (except Cartman) and Dr. Phillips 1\n", "Cartman, Butters 1\n", "Cartman and the Gingers 1\n", "Name: Character, dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.Character.str.contains('artman')].Character.value_counts()" ] }, { "cell_type": "code", "execution_count": 4, "id": "8c2781f6-e93b-49ec-9cbc-2c357b65f239", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.372448Z", "iopub.status.busy": "2022-10-18T01:39:49.372292Z", "iopub.status.idle": "2022-10-18T01:39:49.381325Z", "shell.execute_reply": "2022-10-18T01:39:49.380648Z", "shell.execute_reply.started": "2022-10-18T01:39:49.372432Z" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SeasonEpisodeCharacterLine
0101StanYou guys, you guys! Chef is going away. \\n
1101KyleGoing away? For how long?\\n
2101StanForever.\\n
3101ChefI'm sorry boys.\\n
4101StanChef said he's been bored, so he joining a gro...
5101ChefWow!\\n
6101Mrs. GarrisonChef?? What kind of questions do you think adv...
7101ChefWhat's the meaning of life? Why are we here?\\n
8101Mrs. GarrisonI hope you're making the right choice.\\n
9101CartmanI'm gonna miss him. I'm gonna miss Chef and I...
10101StanDude, how are we gonna go on? Chef was our fuh...
11101Mayor McDanielsAnd we will all miss you, Chef, but we know y...
12101JimboBye-bye!\\n
13101GeraldGood-bye!\\n
14101Mr. MackeySo long!\\n
15101A ManSo long, Chef!\\n
16101A Sign-HolderGood-bye, Chef!\\n
17101RandyGood-bye, Chef! Have a great time with the Sup...
18101ChefGood-bye! ..\\n
19101KyleDraw two card, fatass.\\n
20101CartmanReverse to you, Jew. \\n
21101StanI'll get it. \\n
22101ChefHello there, children!\\n
23101StanHe's back!\\n
24101KyleYeah!\\n
\n", "
" ], "text/plain": [ " Season Episode Character \\\n", "0 10 1 Stan \n", "1 10 1 Kyle \n", "2 10 1 Stan \n", "3 10 1 Chef \n", "4 10 1 Stan \n", "5 10 1 Chef \n", "6 10 1 Mrs. Garrison \n", "7 10 1 Chef \n", "8 10 1 Mrs. Garrison \n", "9 10 1 Cartman \n", "10 10 1 Stan \n", "11 10 1 Mayor McDaniels \n", "12 10 1 Jimbo \n", "13 10 1 Gerald \n", "14 10 1 Mr. Mackey \n", "15 10 1 A Man \n", "16 10 1 A Sign-Holder \n", "17 10 1 Randy \n", "18 10 1 Chef \n", "19 10 1 Kyle \n", "20 10 1 Cartman \n", "21 10 1 Stan \n", "22 10 1 Chef \n", "23 10 1 Stan \n", "24 10 1 Kyle \n", "\n", " Line \n", "0 You guys, you guys! Chef is going away. \\n \n", "1 Going away? For how long?\\n \n", "2 Forever.\\n \n", "3 I'm sorry boys.\\n \n", "4 Chef said he's been bored, so he joining a gro... \n", "5 Wow!\\n \n", "6 Chef?? What kind of questions do you think adv... \n", "7 What's the meaning of life? Why are we here?\\n \n", "8 I hope you're making the right choice.\\n \n", "9 I'm gonna miss him. I'm gonna miss Chef and I... \n", "10 Dude, how are we gonna go on? Chef was our fuh... \n", "11 And we will all miss you, Chef, but we know y... \n", "12 Bye-bye!\\n \n", "13 Good-bye!\\n \n", "14 So long!\\n \n", "15 So long, Chef!\\n \n", "16 Good-bye, Chef!\\n \n", "17 Good-bye, Chef! Have a great time with the Sup... \n", "18 Good-bye! ..\\n \n", "19 Draw two card, fatass.\\n \n", "20 Reverse to you, Jew. \\n \n", "21 I'll get it. \\n \n", "22 Hello there, children!\\n \n", "23 He's back!\\n \n", "24 Yeah!\\n " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(25)" ] }, { "cell_type": "code", "execution_count": 5, "id": "2c267ff5-a11a-426b-9034-8ee776b800e7", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.382208Z", "iopub.status.busy": "2022-10-18T01:39:49.382029Z", "iopub.status.idle": "2022-10-18T01:39:49.407971Z", "shell.execute_reply": "2022-10-18T01:39:49.407239Z", "shell.execute_reply.started": "2022-10-18T01:39:49.382191Z" }, "tags": [] }, "outputs": [], "source": [ "cleanlines = pd.Series([cell.replace('\\n','').strip() for cell in df.Line])" ] }, { "cell_type": "code", "execution_count": 6, "id": "4c22081b-e255-45b5-a2f5-ea4376b26434", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.408983Z", "iopub.status.busy": "2022-10-18T01:39:49.408787Z", "iopub.status.idle": "2022-10-18T01:39:49.416631Z", "shell.execute_reply": "2022-10-18T01:39:49.415993Z", "shell.execute_reply.started": "2022-10-18T01:39:49.408965Z" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0You guys, you guys! Chef is going away.
1Going away? For how long?
2Forever.
3I'm sorry boys.
4Chef said he's been bored, so he joining a gro...
5Wow!
6Chef?? What kind of questions do you think adv...
7What's the meaning of life? Why are we here?
8I hope you're making the right choice.
9I'm gonna miss him. I'm gonna miss Chef and I...
10Dude, how are we gonna go on? Chef was our fuh...
11And we will all miss you, Chef, but we know y...
12Bye-bye!
13Good-bye!
14So long!
15So long, Chef!
16Good-bye, Chef!
17Good-bye, Chef! Have a great time with the Sup...
18Good-bye! ..
19Draw two card, fatass.
20Reverse to you, Jew.
21I'll get it.
22Hello there, children!
23He's back!
24Yeah!
\n", "
" ], "text/plain": [ " 0\n", "0 You guys, you guys! Chef is going away.\n", "1 Going away? For how long?\n", "2 Forever.\n", "3 I'm sorry boys.\n", "4 Chef said he's been bored, so he joining a gro...\n", "5 Wow!\n", "6 Chef?? What kind of questions do you think adv...\n", "7 What's the meaning of life? Why are we here?\n", "8 I hope you're making the right choice.\n", "9 I'm gonna miss him. I'm gonna miss Chef and I...\n", "10 Dude, how are we gonna go on? Chef was our fuh...\n", "11 And we will all miss you, Chef, but we know y...\n", "12 Bye-bye!\n", "13 Good-bye!\n", "14 So long!\n", "15 So long, Chef!\n", "16 Good-bye, Chef!\n", "17 Good-bye, Chef! Have a great time with the Sup...\n", "18 Good-bye! ..\n", "19 Draw two card, fatass.\n", "20 Reverse to you, Jew.\n", "21 I'll get it.\n", "22 Hello there, children!\n", "23 He's back!\n", "24 Yeah!" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(cleanlines).head(25)" ] }, { "cell_type": "code", "execution_count": 7, "id": "b8a4c7ec-e4c5-43c5-89e9-09f661304938", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.419786Z", "iopub.status.busy": "2022-10-18T01:39:49.419347Z", "iopub.status.idle": "2022-10-18T01:39:49.423453Z", "shell.execute_reply": "2022-10-18T01:39:49.422770Z", "shell.execute_reply.started": "2022-10-18T01:39:49.419765Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n" ] } ], "source": [ "print(df.shape[0] - cleanlines.shape[0])" ] }, { "cell_type": "code", "execution_count": 8, "id": "993a4b52-e98b-494f-a48c-d4adbd57f510", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.424465Z", "iopub.status.busy": "2022-10-18T01:39:49.424197Z", "iopub.status.idle": "2022-10-18T01:39:49.430702Z", "shell.execute_reply": "2022-10-18T01:39:49.429900Z", "shell.execute_reply.started": "2022-10-18T01:39:49.424442Z" }, "tags": [] }, "outputs": [], "source": [ "train = pd.DataFrame(df.Character)\n", "train['line'] = cleanlines\n", "train.columns = ['name','line']" ] }, { "cell_type": "code", "execution_count": 9, "id": "148a5ba3-4918-4421-a19a-68a84251cd7b", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.432232Z", "iopub.status.busy": "2022-10-18T01:39:49.431864Z", "iopub.status.idle": "2022-10-18T01:39:49.443756Z", "shell.execute_reply": "2022-10-18T01:39:49.442701Z", "shell.execute_reply.started": "2022-10-18T01:39:49.432200Z" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameline
0StanYou guys, you guys! Chef is going away.
1KyleGoing away? For how long?
2StanForever.
3ChefI'm sorry boys.
4StanChef said he's been bored, so he joining a gro...
5ChefWow!
6Mrs. GarrisonChef?? What kind of questions do you think adv...
7ChefWhat's the meaning of life? Why are we here?
8Mrs. GarrisonI hope you're making the right choice.
9CartmanI'm gonna miss him. I'm gonna miss Chef and I...
10StanDude, how are we gonna go on? Chef was our fuh...
11Mayor McDanielsAnd we will all miss you, Chef, but we know y...
12JimboBye-bye!
13GeraldGood-bye!
14Mr. MackeySo long!
15A ManSo long, Chef!
16A Sign-HolderGood-bye, Chef!
17RandyGood-bye, Chef! Have a great time with the Sup...
18ChefGood-bye! ..
19KyleDraw two card, fatass.
20CartmanReverse to you, Jew.
21StanI'll get it.
22ChefHello there, children!
23StanHe's back!
24KyleYeah!
\n", "
" ], "text/plain": [ " name line\n", "0 Stan You guys, you guys! Chef is going away.\n", "1 Kyle Going away? For how long?\n", "2 Stan Forever.\n", "3 Chef I'm sorry boys.\n", "4 Stan Chef said he's been bored, so he joining a gro...\n", "5 Chef Wow!\n", "6 Mrs. Garrison Chef?? What kind of questions do you think adv...\n", "7 Chef What's the meaning of life? Why are we here?\n", "8 Mrs. Garrison I hope you're making the right choice.\n", "9 Cartman I'm gonna miss him. I'm gonna miss Chef and I...\n", "10 Stan Dude, how are we gonna go on? Chef was our fuh...\n", "11 Mayor McDaniels And we will all miss you, Chef, but we know y...\n", "12 Jimbo Bye-bye!\n", "13 Gerald Good-bye!\n", "14 Mr. Mackey So long!\n", "15 A Man So long, Chef!\n", "16 A Sign-Holder Good-bye, Chef!\n", "17 Randy Good-bye, Chef! Have a great time with the Sup...\n", "18 Chef Good-bye! ..\n", "19 Kyle Draw two card, fatass.\n", "20 Cartman Reverse to you, Jew.\n", "21 Stan I'll get it.\n", "22 Chef Hello there, children!\n", "23 Stan He's back!\n", "24 Kyle Yeah!" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head(25)" ] }, { "cell_type": "code", "execution_count": 10, "id": "7466b2a6-b579-4bac-a515-df4f040a7b27", "metadata": { "execution": { "iopub.execute_input": "2022-10-18T01:39:49.445428Z", "iopub.status.busy": "2022-10-18T01:39:49.445096Z", "iopub.status.idle": "2022-10-18T01:39:49.615700Z", "shell.execute_reply": "2022-10-18T01:39:49.614962Z", "shell.execute_reply.started": "2022-10-18T01:39:49.445397Z" }, "tags": [] }, "outputs": [], "source": [ "train.to_csv('data/train.csv',index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }