{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "23a7a47d-40c9-4ce2-8e1d-069690edfed3",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.045197Z",
"iopub.status.busy": "2022-10-18T01:39:49.044788Z",
"iopub.status.idle": "2022-10-18T01:39:49.325032Z",
"shell.execute_reply": "2022-10-18T01:39:49.324364Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.045112Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(70896, 4)\n"
]
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('data/All-seasons.csv')\n",
"print(df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cd058417-a7b1-408f-a6b8-d02c114b380d",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.326910Z",
"iopub.status.busy": "2022-10-18T01:39:49.326699Z",
"iopub.status.idle": "2022-10-18T01:39:49.339491Z",
"shell.execute_reply": "2022-10-18T01:39:49.338704Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.326893Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"2 6416\n",
"3 5798\n",
"4 5680\n",
"6 5131\n",
"5 4414\n",
"7 4236\n",
"1 4170\n",
"8 3601\n",
"9 3526\n",
"11 3478\n",
"10 3471\n",
"14 3346\n",
"12 3307\n",
"13 3257\n",
"16 3120\n",
"15 3101\n",
"18 2522\n",
"17 2305\n",
"Season 17\n",
"Name: Season, dtype: int64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.Season.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c4da2f8d-a577-49b0-a477-3eab09a38ae9",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.340976Z",
"iopub.status.busy": "2022-10-18T01:39:49.340661Z",
"iopub.status.idle": "2022-10-18T01:39:49.371622Z",
"shell.execute_reply": "2022-10-18T01:39:49.371150Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.340946Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Cartman 9774\n",
"Evil Cartman 23\n",
"New Cartman 18\n",
"Stan, Kyle, Cartman 12\n",
"Kyle, Cartman 7\n",
"Stan, Cartman 7\n",
"Liane and Cartman 6\n",
"Cartman Smurf 5\n",
"Future Cartman 4\n",
"Cartman on Left 3\n",
"Stan/Kenny/ Cartman 3\n",
"Cartman's Good Side 3\n",
"Mrs. Cartman 3\n",
"Stan/Kyle/ Cartman 3\n",
"Cartman on Right 2\n",
"Cartman's voice 2\n",
"Both Cartmans 2\n",
"Stan, Kyle, Kenny, Cartman 2\n",
"Cartman, Stan 2\n",
"Cartman, Kyle, Kenny 2\n",
"Cartman, Kyle 2\n",
"Cartman's Side 2\n",
"Cartman, Choir 2\n",
"Butters, Cartman 2\n",
"Cartman's Bad Side 2\n",
"Stan, Kyle, Cartman, Kenny 1\n",
"Kenny, Stan, Cartman 1\n",
"Stan, Cartman, Kenny 1\n",
"Eric Cartman 1\n",
"Wendy, Cartman 1\n",
"Congressman 1, Cartman 1\n",
"Cheesy Poof Cartman 1\n",
"Cartmans/Boys 1\n",
"Cartman/ Kenny 1\n",
"Cartman's Conscience 1\n",
"Kyle/Cartman 1\n",
"Mrs Cartman 1\n",
"Kyle, Stan, Cartman 1\n",
"Cartman and Kyle 1\n",
"Cartman (Butters) 1\n",
"The Boys (except Cartman) and Dr. Phillips 1\n",
"Cartman, Butters 1\n",
"Cartman and the Gingers 1\n",
"Name: Character, dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.Character.str.contains('artman')].Character.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8c2781f6-e93b-49ec-9cbc-2c357b65f239",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.372448Z",
"iopub.status.busy": "2022-10-18T01:39:49.372292Z",
"iopub.status.idle": "2022-10-18T01:39:49.381325Z",
"shell.execute_reply": "2022-10-18T01:39:49.380648Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.372432Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Season | \n",
" Episode | \n",
" Character | \n",
" Line | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 10 | \n",
" 1 | \n",
" Stan | \n",
" You guys, you guys! Chef is going away. \\n | \n",
"
\n",
" \n",
" 1 | \n",
" 10 | \n",
" 1 | \n",
" Kyle | \n",
" Going away? For how long?\\n | \n",
"
\n",
" \n",
" 2 | \n",
" 10 | \n",
" 1 | \n",
" Stan | \n",
" Forever.\\n | \n",
"
\n",
" \n",
" 3 | \n",
" 10 | \n",
" 1 | \n",
" Chef | \n",
" I'm sorry boys.\\n | \n",
"
\n",
" \n",
" 4 | \n",
" 10 | \n",
" 1 | \n",
" Stan | \n",
" Chef said he's been bored, so he joining a gro... | \n",
"
\n",
" \n",
" 5 | \n",
" 10 | \n",
" 1 | \n",
" Chef | \n",
" Wow!\\n | \n",
"
\n",
" \n",
" 6 | \n",
" 10 | \n",
" 1 | \n",
" Mrs. Garrison | \n",
" Chef?? What kind of questions do you think adv... | \n",
"
\n",
" \n",
" 7 | \n",
" 10 | \n",
" 1 | \n",
" Chef | \n",
" What's the meaning of life? Why are we here?\\n | \n",
"
\n",
" \n",
" 8 | \n",
" 10 | \n",
" 1 | \n",
" Mrs. Garrison | \n",
" I hope you're making the right choice.\\n | \n",
"
\n",
" \n",
" 9 | \n",
" 10 | \n",
" 1 | \n",
" Cartman | \n",
" I'm gonna miss him. I'm gonna miss Chef and I... | \n",
"
\n",
" \n",
" 10 | \n",
" 10 | \n",
" 1 | \n",
" Stan | \n",
" Dude, how are we gonna go on? Chef was our fuh... | \n",
"
\n",
" \n",
" 11 | \n",
" 10 | \n",
" 1 | \n",
" Mayor McDaniels | \n",
" And we will all miss you, Chef, but we know y... | \n",
"
\n",
" \n",
" 12 | \n",
" 10 | \n",
" 1 | \n",
" Jimbo | \n",
" Bye-bye!\\n | \n",
"
\n",
" \n",
" 13 | \n",
" 10 | \n",
" 1 | \n",
" Gerald | \n",
" Good-bye!\\n | \n",
"
\n",
" \n",
" 14 | \n",
" 10 | \n",
" 1 | \n",
" Mr. Mackey | \n",
" So long!\\n | \n",
"
\n",
" \n",
" 15 | \n",
" 10 | \n",
" 1 | \n",
" A Man | \n",
" So long, Chef!\\n | \n",
"
\n",
" \n",
" 16 | \n",
" 10 | \n",
" 1 | \n",
" A Sign-Holder | \n",
" Good-bye, Chef!\\n | \n",
"
\n",
" \n",
" 17 | \n",
" 10 | \n",
" 1 | \n",
" Randy | \n",
" Good-bye, Chef! Have a great time with the Sup... | \n",
"
\n",
" \n",
" 18 | \n",
" 10 | \n",
" 1 | \n",
" Chef | \n",
" Good-bye! ..\\n | \n",
"
\n",
" \n",
" 19 | \n",
" 10 | \n",
" 1 | \n",
" Kyle | \n",
" Draw two card, fatass.\\n | \n",
"
\n",
" \n",
" 20 | \n",
" 10 | \n",
" 1 | \n",
" Cartman | \n",
" Reverse to you, Jew. \\n | \n",
"
\n",
" \n",
" 21 | \n",
" 10 | \n",
" 1 | \n",
" Stan | \n",
" I'll get it. \\n | \n",
"
\n",
" \n",
" 22 | \n",
" 10 | \n",
" 1 | \n",
" Chef | \n",
" Hello there, children!\\n | \n",
"
\n",
" \n",
" 23 | \n",
" 10 | \n",
" 1 | \n",
" Stan | \n",
" He's back!\\n | \n",
"
\n",
" \n",
" 24 | \n",
" 10 | \n",
" 1 | \n",
" Kyle | \n",
" Yeah!\\n | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Season Episode Character \\\n",
"0 10 1 Stan \n",
"1 10 1 Kyle \n",
"2 10 1 Stan \n",
"3 10 1 Chef \n",
"4 10 1 Stan \n",
"5 10 1 Chef \n",
"6 10 1 Mrs. Garrison \n",
"7 10 1 Chef \n",
"8 10 1 Mrs. Garrison \n",
"9 10 1 Cartman \n",
"10 10 1 Stan \n",
"11 10 1 Mayor McDaniels \n",
"12 10 1 Jimbo \n",
"13 10 1 Gerald \n",
"14 10 1 Mr. Mackey \n",
"15 10 1 A Man \n",
"16 10 1 A Sign-Holder \n",
"17 10 1 Randy \n",
"18 10 1 Chef \n",
"19 10 1 Kyle \n",
"20 10 1 Cartman \n",
"21 10 1 Stan \n",
"22 10 1 Chef \n",
"23 10 1 Stan \n",
"24 10 1 Kyle \n",
"\n",
" Line \n",
"0 You guys, you guys! Chef is going away. \\n \n",
"1 Going away? For how long?\\n \n",
"2 Forever.\\n \n",
"3 I'm sorry boys.\\n \n",
"4 Chef said he's been bored, so he joining a gro... \n",
"5 Wow!\\n \n",
"6 Chef?? What kind of questions do you think adv... \n",
"7 What's the meaning of life? Why are we here?\\n \n",
"8 I hope you're making the right choice.\\n \n",
"9 I'm gonna miss him. I'm gonna miss Chef and I... \n",
"10 Dude, how are we gonna go on? Chef was our fuh... \n",
"11 And we will all miss you, Chef, but we know y... \n",
"12 Bye-bye!\\n \n",
"13 Good-bye!\\n \n",
"14 So long!\\n \n",
"15 So long, Chef!\\n \n",
"16 Good-bye, Chef!\\n \n",
"17 Good-bye, Chef! Have a great time with the Sup... \n",
"18 Good-bye! ..\\n \n",
"19 Draw two card, fatass.\\n \n",
"20 Reverse to you, Jew. \\n \n",
"21 I'll get it. \\n \n",
"22 Hello there, children!\\n \n",
"23 He's back!\\n \n",
"24 Yeah!\\n "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(25)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2c267ff5-a11a-426b-9034-8ee776b800e7",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.382208Z",
"iopub.status.busy": "2022-10-18T01:39:49.382029Z",
"iopub.status.idle": "2022-10-18T01:39:49.407971Z",
"shell.execute_reply": "2022-10-18T01:39:49.407239Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.382191Z"
},
"tags": []
},
"outputs": [],
"source": [
"cleanlines = pd.Series([cell.replace('\\n','').strip() for cell in df.Line])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4c22081b-e255-45b5-a2f5-ea4376b26434",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.408983Z",
"iopub.status.busy": "2022-10-18T01:39:49.408787Z",
"iopub.status.idle": "2022-10-18T01:39:49.416631Z",
"shell.execute_reply": "2022-10-18T01:39:49.415993Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.408965Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" You guys, you guys! Chef is going away. | \n",
"
\n",
" \n",
" 1 | \n",
" Going away? For how long? | \n",
"
\n",
" \n",
" 2 | \n",
" Forever. | \n",
"
\n",
" \n",
" 3 | \n",
" I'm sorry boys. | \n",
"
\n",
" \n",
" 4 | \n",
" Chef said he's been bored, so he joining a gro... | \n",
"
\n",
" \n",
" 5 | \n",
" Wow! | \n",
"
\n",
" \n",
" 6 | \n",
" Chef?? What kind of questions do you think adv... | \n",
"
\n",
" \n",
" 7 | \n",
" What's the meaning of life? Why are we here? | \n",
"
\n",
" \n",
" 8 | \n",
" I hope you're making the right choice. | \n",
"
\n",
" \n",
" 9 | \n",
" I'm gonna miss him. I'm gonna miss Chef and I... | \n",
"
\n",
" \n",
" 10 | \n",
" Dude, how are we gonna go on? Chef was our fuh... | \n",
"
\n",
" \n",
" 11 | \n",
" And we will all miss you, Chef, but we know y... | \n",
"
\n",
" \n",
" 12 | \n",
" Bye-bye! | \n",
"
\n",
" \n",
" 13 | \n",
" Good-bye! | \n",
"
\n",
" \n",
" 14 | \n",
" So long! | \n",
"
\n",
" \n",
" 15 | \n",
" So long, Chef! | \n",
"
\n",
" \n",
" 16 | \n",
" Good-bye, Chef! | \n",
"
\n",
" \n",
" 17 | \n",
" Good-bye, Chef! Have a great time with the Sup... | \n",
"
\n",
" \n",
" 18 | \n",
" Good-bye! .. | \n",
"
\n",
" \n",
" 19 | \n",
" Draw two card, fatass. | \n",
"
\n",
" \n",
" 20 | \n",
" Reverse to you, Jew. | \n",
"
\n",
" \n",
" 21 | \n",
" I'll get it. | \n",
"
\n",
" \n",
" 22 | \n",
" Hello there, children! | \n",
"
\n",
" \n",
" 23 | \n",
" He's back! | \n",
"
\n",
" \n",
" 24 | \n",
" Yeah! | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0\n",
"0 You guys, you guys! Chef is going away.\n",
"1 Going away? For how long?\n",
"2 Forever.\n",
"3 I'm sorry boys.\n",
"4 Chef said he's been bored, so he joining a gro...\n",
"5 Wow!\n",
"6 Chef?? What kind of questions do you think adv...\n",
"7 What's the meaning of life? Why are we here?\n",
"8 I hope you're making the right choice.\n",
"9 I'm gonna miss him. I'm gonna miss Chef and I...\n",
"10 Dude, how are we gonna go on? Chef was our fuh...\n",
"11 And we will all miss you, Chef, but we know y...\n",
"12 Bye-bye!\n",
"13 Good-bye!\n",
"14 So long!\n",
"15 So long, Chef!\n",
"16 Good-bye, Chef!\n",
"17 Good-bye, Chef! Have a great time with the Sup...\n",
"18 Good-bye! ..\n",
"19 Draw two card, fatass.\n",
"20 Reverse to you, Jew.\n",
"21 I'll get it.\n",
"22 Hello there, children!\n",
"23 He's back!\n",
"24 Yeah!"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(cleanlines).head(25)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b8a4c7ec-e4c5-43c5-89e9-09f661304938",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.419786Z",
"iopub.status.busy": "2022-10-18T01:39:49.419347Z",
"iopub.status.idle": "2022-10-18T01:39:49.423453Z",
"shell.execute_reply": "2022-10-18T01:39:49.422770Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.419765Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n"
]
}
],
"source": [
"print(df.shape[0] - cleanlines.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "993a4b52-e98b-494f-a48c-d4adbd57f510",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.424465Z",
"iopub.status.busy": "2022-10-18T01:39:49.424197Z",
"iopub.status.idle": "2022-10-18T01:39:49.430702Z",
"shell.execute_reply": "2022-10-18T01:39:49.429900Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.424442Z"
},
"tags": []
},
"outputs": [],
"source": [
"train = pd.DataFrame(df.Character)\n",
"train['line'] = cleanlines\n",
"train.columns = ['name','line']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "148a5ba3-4918-4421-a19a-68a84251cd7b",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.432232Z",
"iopub.status.busy": "2022-10-18T01:39:49.431864Z",
"iopub.status.idle": "2022-10-18T01:39:49.443756Z",
"shell.execute_reply": "2022-10-18T01:39:49.442701Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.432200Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" line | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Stan | \n",
" You guys, you guys! Chef is going away. | \n",
"
\n",
" \n",
" 1 | \n",
" Kyle | \n",
" Going away? For how long? | \n",
"
\n",
" \n",
" 2 | \n",
" Stan | \n",
" Forever. | \n",
"
\n",
" \n",
" 3 | \n",
" Chef | \n",
" I'm sorry boys. | \n",
"
\n",
" \n",
" 4 | \n",
" Stan | \n",
" Chef said he's been bored, so he joining a gro... | \n",
"
\n",
" \n",
" 5 | \n",
" Chef | \n",
" Wow! | \n",
"
\n",
" \n",
" 6 | \n",
" Mrs. Garrison | \n",
" Chef?? What kind of questions do you think adv... | \n",
"
\n",
" \n",
" 7 | \n",
" Chef | \n",
" What's the meaning of life? Why are we here? | \n",
"
\n",
" \n",
" 8 | \n",
" Mrs. Garrison | \n",
" I hope you're making the right choice. | \n",
"
\n",
" \n",
" 9 | \n",
" Cartman | \n",
" I'm gonna miss him. I'm gonna miss Chef and I... | \n",
"
\n",
" \n",
" 10 | \n",
" Stan | \n",
" Dude, how are we gonna go on? Chef was our fuh... | \n",
"
\n",
" \n",
" 11 | \n",
" Mayor McDaniels | \n",
" And we will all miss you, Chef, but we know y... | \n",
"
\n",
" \n",
" 12 | \n",
" Jimbo | \n",
" Bye-bye! | \n",
"
\n",
" \n",
" 13 | \n",
" Gerald | \n",
" Good-bye! | \n",
"
\n",
" \n",
" 14 | \n",
" Mr. Mackey | \n",
" So long! | \n",
"
\n",
" \n",
" 15 | \n",
" A Man | \n",
" So long, Chef! | \n",
"
\n",
" \n",
" 16 | \n",
" A Sign-Holder | \n",
" Good-bye, Chef! | \n",
"
\n",
" \n",
" 17 | \n",
" Randy | \n",
" Good-bye, Chef! Have a great time with the Sup... | \n",
"
\n",
" \n",
" 18 | \n",
" Chef | \n",
" Good-bye! .. | \n",
"
\n",
" \n",
" 19 | \n",
" Kyle | \n",
" Draw two card, fatass. | \n",
"
\n",
" \n",
" 20 | \n",
" Cartman | \n",
" Reverse to you, Jew. | \n",
"
\n",
" \n",
" 21 | \n",
" Stan | \n",
" I'll get it. | \n",
"
\n",
" \n",
" 22 | \n",
" Chef | \n",
" Hello there, children! | \n",
"
\n",
" \n",
" 23 | \n",
" Stan | \n",
" He's back! | \n",
"
\n",
" \n",
" 24 | \n",
" Kyle | \n",
" Yeah! | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name line\n",
"0 Stan You guys, you guys! Chef is going away.\n",
"1 Kyle Going away? For how long?\n",
"2 Stan Forever.\n",
"3 Chef I'm sorry boys.\n",
"4 Stan Chef said he's been bored, so he joining a gro...\n",
"5 Chef Wow!\n",
"6 Mrs. Garrison Chef?? What kind of questions do you think adv...\n",
"7 Chef What's the meaning of life? Why are we here?\n",
"8 Mrs. Garrison I hope you're making the right choice.\n",
"9 Cartman I'm gonna miss him. I'm gonna miss Chef and I...\n",
"10 Stan Dude, how are we gonna go on? Chef was our fuh...\n",
"11 Mayor McDaniels And we will all miss you, Chef, but we know y...\n",
"12 Jimbo Bye-bye!\n",
"13 Gerald Good-bye!\n",
"14 Mr. Mackey So long!\n",
"15 A Man So long, Chef!\n",
"16 A Sign-Holder Good-bye, Chef!\n",
"17 Randy Good-bye, Chef! Have a great time with the Sup...\n",
"18 Chef Good-bye! ..\n",
"19 Kyle Draw two card, fatass.\n",
"20 Cartman Reverse to you, Jew.\n",
"21 Stan I'll get it.\n",
"22 Chef Hello there, children!\n",
"23 Stan He's back!\n",
"24 Kyle Yeah!"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train.head(25)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7466b2a6-b579-4bac-a515-df4f040a7b27",
"metadata": {
"execution": {
"iopub.execute_input": "2022-10-18T01:39:49.445428Z",
"iopub.status.busy": "2022-10-18T01:39:49.445096Z",
"iopub.status.idle": "2022-10-18T01:39:49.615700Z",
"shell.execute_reply": "2022-10-18T01:39:49.614962Z",
"shell.execute_reply.started": "2022-10-18T01:39:49.445397Z"
},
"tags": []
},
"outputs": [],
"source": [
"train.to_csv('data/train.csv',index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}