939 lines
32 KiB
Text
939 lines
32 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "23a7a47d-40c9-4ce2-8e1d-069690edfed3",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.045197Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.044788Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.325032Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.324364Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.045112Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(70896, 4)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"df = pd.read_csv('data/All-seasons.csv')\n",
|
|
"print(df.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "cd058417-a7b1-408f-a6b8-d02c114b380d",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.326910Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.326699Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.339491Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.338704Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.326893Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"2 6416\n",
|
|
"3 5798\n",
|
|
"4 5680\n",
|
|
"6 5131\n",
|
|
"5 4414\n",
|
|
"7 4236\n",
|
|
"1 4170\n",
|
|
"8 3601\n",
|
|
"9 3526\n",
|
|
"11 3478\n",
|
|
"10 3471\n",
|
|
"14 3346\n",
|
|
"12 3307\n",
|
|
"13 3257\n",
|
|
"16 3120\n",
|
|
"15 3101\n",
|
|
"18 2522\n",
|
|
"17 2305\n",
|
|
"Season 17\n",
|
|
"Name: Season, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df.Season.value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "c4da2f8d-a577-49b0-a477-3eab09a38ae9",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.340976Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.340661Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.371622Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.371150Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.340946Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Cartman 9774\n",
|
|
"Evil Cartman 23\n",
|
|
"New Cartman 18\n",
|
|
"Stan, Kyle, Cartman 12\n",
|
|
"Kyle, Cartman 7\n",
|
|
"Stan, Cartman 7\n",
|
|
"Liane and Cartman 6\n",
|
|
"Cartman Smurf 5\n",
|
|
"Future Cartman 4\n",
|
|
"Cartman on Left 3\n",
|
|
"Stan/Kenny/ Cartman 3\n",
|
|
"Cartman's Good Side 3\n",
|
|
"Mrs. Cartman 3\n",
|
|
"Stan/Kyle/ Cartman 3\n",
|
|
"Cartman on Right 2\n",
|
|
"Cartman's voice 2\n",
|
|
"Both Cartmans 2\n",
|
|
"Stan, Kyle, Kenny, Cartman 2\n",
|
|
"Cartman, Stan 2\n",
|
|
"Cartman, Kyle, Kenny 2\n",
|
|
"Cartman, Kyle 2\n",
|
|
"Cartman's Side 2\n",
|
|
"Cartman, Choir 2\n",
|
|
"Butters, Cartman 2\n",
|
|
"Cartman's Bad Side 2\n",
|
|
"Stan, Kyle, Cartman, Kenny 1\n",
|
|
"Kenny, Stan, Cartman 1\n",
|
|
"Stan, Cartman, Kenny 1\n",
|
|
"Eric Cartman 1\n",
|
|
"Wendy, Cartman 1\n",
|
|
"Congressman 1, Cartman 1\n",
|
|
"Cheesy Poof Cartman 1\n",
|
|
"Cartmans/Boys 1\n",
|
|
"Cartman/ Kenny 1\n",
|
|
"Cartman's Conscience 1\n",
|
|
"Kyle/Cartman 1\n",
|
|
"Mrs Cartman 1\n",
|
|
"Kyle, Stan, Cartman 1\n",
|
|
"Cartman and Kyle 1\n",
|
|
"Cartman (Butters) 1\n",
|
|
"The Boys (except Cartman) and Dr. Phillips 1\n",
|
|
"Cartman, Butters 1\n",
|
|
"Cartman and the Gingers 1\n",
|
|
"Name: Character, dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df[df.Character.str.contains('artman')].Character.value_counts()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "8c2781f6-e93b-49ec-9cbc-2c357b65f239",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.372448Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.372292Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.381325Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.380648Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.372432Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Season</th>\n",
|
|
" <th>Episode</th>\n",
|
|
" <th>Character</th>\n",
|
|
" <th>Line</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>You guys, you guys! Chef is going away. \\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Kyle</td>\n",
|
|
" <td>Going away? For how long?\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>Forever.\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>I'm sorry boys.\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>Chef said he's been bored, so he joining a gro...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>Wow!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Mrs. Garrison</td>\n",
|
|
" <td>Chef?? What kind of questions do you think adv...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>What's the meaning of life? Why are we here?\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Mrs. Garrison</td>\n",
|
|
" <td>I hope you're making the right choice.\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Cartman</td>\n",
|
|
" <td>I'm gonna miss him. I'm gonna miss Chef and I...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>Dude, how are we gonna go on? Chef was our fuh...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Mayor McDaniels</td>\n",
|
|
" <td>And we will all miss you, Chef, but we know y...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Jimbo</td>\n",
|
|
" <td>Bye-bye!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Gerald</td>\n",
|
|
" <td>Good-bye!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Mr. Mackey</td>\n",
|
|
" <td>So long!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>A Man</td>\n",
|
|
" <td>So long, Chef!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>A Sign-Holder</td>\n",
|
|
" <td>Good-bye, Chef!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Randy</td>\n",
|
|
" <td>Good-bye, Chef! Have a great time with the Sup...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>Good-bye! ..\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Kyle</td>\n",
|
|
" <td>Draw two card, fatass.\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Cartman</td>\n",
|
|
" <td>Reverse to you, Jew. \\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>I'll get it. \\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>Hello there, children!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>He's back!\\n</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>10</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Kyle</td>\n",
|
|
" <td>Yeah!\\n</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Season Episode Character \\\n",
|
|
"0 10 1 Stan \n",
|
|
"1 10 1 Kyle \n",
|
|
"2 10 1 Stan \n",
|
|
"3 10 1 Chef \n",
|
|
"4 10 1 Stan \n",
|
|
"5 10 1 Chef \n",
|
|
"6 10 1 Mrs. Garrison \n",
|
|
"7 10 1 Chef \n",
|
|
"8 10 1 Mrs. Garrison \n",
|
|
"9 10 1 Cartman \n",
|
|
"10 10 1 Stan \n",
|
|
"11 10 1 Mayor McDaniels \n",
|
|
"12 10 1 Jimbo \n",
|
|
"13 10 1 Gerald \n",
|
|
"14 10 1 Mr. Mackey \n",
|
|
"15 10 1 A Man \n",
|
|
"16 10 1 A Sign-Holder \n",
|
|
"17 10 1 Randy \n",
|
|
"18 10 1 Chef \n",
|
|
"19 10 1 Kyle \n",
|
|
"20 10 1 Cartman \n",
|
|
"21 10 1 Stan \n",
|
|
"22 10 1 Chef \n",
|
|
"23 10 1 Stan \n",
|
|
"24 10 1 Kyle \n",
|
|
"\n",
|
|
" Line \n",
|
|
"0 You guys, you guys! Chef is going away. \\n \n",
|
|
"1 Going away? For how long?\\n \n",
|
|
"2 Forever.\\n \n",
|
|
"3 I'm sorry boys.\\n \n",
|
|
"4 Chef said he's been bored, so he joining a gro... \n",
|
|
"5 Wow!\\n \n",
|
|
"6 Chef?? What kind of questions do you think adv... \n",
|
|
"7 What's the meaning of life? Why are we here?\\n \n",
|
|
"8 I hope you're making the right choice.\\n \n",
|
|
"9 I'm gonna miss him. I'm gonna miss Chef and I... \n",
|
|
"10 Dude, how are we gonna go on? Chef was our fuh... \n",
|
|
"11 And we will all miss you, Chef, but we know y... \n",
|
|
"12 Bye-bye!\\n \n",
|
|
"13 Good-bye!\\n \n",
|
|
"14 So long!\\n \n",
|
|
"15 So long, Chef!\\n \n",
|
|
"16 Good-bye, Chef!\\n \n",
|
|
"17 Good-bye, Chef! Have a great time with the Sup... \n",
|
|
"18 Good-bye! ..\\n \n",
|
|
"19 Draw two card, fatass.\\n \n",
|
|
"20 Reverse to you, Jew. \\n \n",
|
|
"21 I'll get it. \\n \n",
|
|
"22 Hello there, children!\\n \n",
|
|
"23 He's back!\\n \n",
|
|
"24 Yeah!\\n "
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df.head(25)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "2c267ff5-a11a-426b-9034-8ee776b800e7",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.382208Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.382029Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.407971Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.407239Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.382191Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"cleanlines = pd.Series([cell.replace('\\n','').strip() for cell in df.Line])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "4c22081b-e255-45b5-a2f5-ea4376b26434",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.408983Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.408787Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.416631Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.415993Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.408965Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>0</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>You guys, you guys! Chef is going away.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Going away? For how long?</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Forever.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>I'm sorry boys.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Chef said he's been bored, so he joining a gro...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Wow!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>Chef?? What kind of questions do you think adv...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>What's the meaning of life? Why are we here?</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>I hope you're making the right choice.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>I'm gonna miss him. I'm gonna miss Chef and I...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>Dude, how are we gonna go on? Chef was our fuh...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>And we will all miss you, Chef, but we know y...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>Bye-bye!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>Good-bye!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>So long!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>So long, Chef!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>Good-bye, Chef!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>Good-bye, Chef! Have a great time with the Sup...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>Good-bye! ..</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>Draw two card, fatass.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>Reverse to you, Jew.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>I'll get it.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>Hello there, children!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>He's back!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>Yeah!</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" 0\n",
|
|
"0 You guys, you guys! Chef is going away.\n",
|
|
"1 Going away? For how long?\n",
|
|
"2 Forever.\n",
|
|
"3 I'm sorry boys.\n",
|
|
"4 Chef said he's been bored, so he joining a gro...\n",
|
|
"5 Wow!\n",
|
|
"6 Chef?? What kind of questions do you think adv...\n",
|
|
"7 What's the meaning of life? Why are we here?\n",
|
|
"8 I hope you're making the right choice.\n",
|
|
"9 I'm gonna miss him. I'm gonna miss Chef and I...\n",
|
|
"10 Dude, how are we gonna go on? Chef was our fuh...\n",
|
|
"11 And we will all miss you, Chef, but we know y...\n",
|
|
"12 Bye-bye!\n",
|
|
"13 Good-bye!\n",
|
|
"14 So long!\n",
|
|
"15 So long, Chef!\n",
|
|
"16 Good-bye, Chef!\n",
|
|
"17 Good-bye, Chef! Have a great time with the Sup...\n",
|
|
"18 Good-bye! ..\n",
|
|
"19 Draw two card, fatass.\n",
|
|
"20 Reverse to you, Jew.\n",
|
|
"21 I'll get it.\n",
|
|
"22 Hello there, children!\n",
|
|
"23 He's back!\n",
|
|
"24 Yeah!"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.DataFrame(cleanlines).head(25)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "b8a4c7ec-e4c5-43c5-89e9-09f661304938",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.419786Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.419347Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.423453Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.422770Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.419765Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(df.shape[0] - cleanlines.shape[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "993a4b52-e98b-494f-a48c-d4adbd57f510",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.424465Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.424197Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.430702Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.429900Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.424442Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train = pd.DataFrame(df.Character)\n",
|
|
"train['line'] = cleanlines\n",
|
|
"train.columns = ['name','line']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "148a5ba3-4918-4421-a19a-68a84251cd7b",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.432232Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.431864Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.443756Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.442701Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.432200Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>name</th>\n",
|
|
" <th>line</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>You guys, you guys! Chef is going away.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Kyle</td>\n",
|
|
" <td>Going away? For how long?</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>Forever.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>I'm sorry boys.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>Chef said he's been bored, so he joining a gro...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>Wow!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>Mrs. Garrison</td>\n",
|
|
" <td>Chef?? What kind of questions do you think adv...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>What's the meaning of life? Why are we here?</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>Mrs. Garrison</td>\n",
|
|
" <td>I hope you're making the right choice.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>Cartman</td>\n",
|
|
" <td>I'm gonna miss him. I'm gonna miss Chef and I...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>Dude, how are we gonna go on? Chef was our fuh...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>Mayor McDaniels</td>\n",
|
|
" <td>And we will all miss you, Chef, but we know y...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>Jimbo</td>\n",
|
|
" <td>Bye-bye!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>Gerald</td>\n",
|
|
" <td>Good-bye!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>Mr. Mackey</td>\n",
|
|
" <td>So long!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>A Man</td>\n",
|
|
" <td>So long, Chef!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>A Sign-Holder</td>\n",
|
|
" <td>Good-bye, Chef!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>Randy</td>\n",
|
|
" <td>Good-bye, Chef! Have a great time with the Sup...</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>Good-bye! ..</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>Kyle</td>\n",
|
|
" <td>Draw two card, fatass.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>Cartman</td>\n",
|
|
" <td>Reverse to you, Jew.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>I'll get it.</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>Chef</td>\n",
|
|
" <td>Hello there, children!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>Stan</td>\n",
|
|
" <td>He's back!</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>Kyle</td>\n",
|
|
" <td>Yeah!</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" name line\n",
|
|
"0 Stan You guys, you guys! Chef is going away.\n",
|
|
"1 Kyle Going away? For how long?\n",
|
|
"2 Stan Forever.\n",
|
|
"3 Chef I'm sorry boys.\n",
|
|
"4 Stan Chef said he's been bored, so he joining a gro...\n",
|
|
"5 Chef Wow!\n",
|
|
"6 Mrs. Garrison Chef?? What kind of questions do you think adv...\n",
|
|
"7 Chef What's the meaning of life? Why are we here?\n",
|
|
"8 Mrs. Garrison I hope you're making the right choice.\n",
|
|
"9 Cartman I'm gonna miss him. I'm gonna miss Chef and I...\n",
|
|
"10 Stan Dude, how are we gonna go on? Chef was our fuh...\n",
|
|
"11 Mayor McDaniels And we will all miss you, Chef, but we know y...\n",
|
|
"12 Jimbo Bye-bye!\n",
|
|
"13 Gerald Good-bye!\n",
|
|
"14 Mr. Mackey So long!\n",
|
|
"15 A Man So long, Chef!\n",
|
|
"16 A Sign-Holder Good-bye, Chef!\n",
|
|
"17 Randy Good-bye, Chef! Have a great time with the Sup...\n",
|
|
"18 Chef Good-bye! ..\n",
|
|
"19 Kyle Draw two card, fatass.\n",
|
|
"20 Cartman Reverse to you, Jew.\n",
|
|
"21 Stan I'll get it.\n",
|
|
"22 Chef Hello there, children!\n",
|
|
"23 Stan He's back!\n",
|
|
"24 Kyle Yeah!"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"train.head(25)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "7466b2a6-b579-4bac-a515-df4f040a7b27",
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2022-10-18T01:39:49.445428Z",
|
|
"iopub.status.busy": "2022-10-18T01:39:49.445096Z",
|
|
"iopub.status.idle": "2022-10-18T01:39:49.615700Z",
|
|
"shell.execute_reply": "2022-10-18T01:39:49.614962Z",
|
|
"shell.execute_reply.started": "2022-10-18T01:39:49.445397Z"
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train.to_csv('data/train.csv',index=False)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|