From 72b0dda8a052f008cf0d91455edafa853d255960 Mon Sep 17 00:00:00 2001 From: Adam <24621027+WhiteDopeOnPunk@users.noreply.github.com> Date: Mon, 1 Aug 2022 09:32:07 -0400 Subject: [PATCH] update --- clean.ipynb | 605 +++++++++++++-- eda.ipynb | 2042 +++++++++++++++++++++++---------------------------- 2 files changed, 1457 insertions(+), 1190 deletions(-) diff --git a/clean.ipynb b/clean.ipynb index b1aeae8..b3e315c 100644 --- a/clean.ipynb +++ b/clean.ipynb @@ -13,7 +13,9 @@ "id": "cecbac86-abb3-4f6b-a101-2d9324d96274", "metadata": {}, "source": [ - "# Cleaning" + "# Cleaning\n", + "\n", + "Let's get this to something we can work with" ] }, { @@ -30,17 +32,19 @@ "id": "3c4bfade-d06d-4887-9eb4-ec7f5bc61625", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:36.887038Z", - "iopub.status.busy": "2022-07-21T20:29:36.886672Z", - "iopub.status.idle": "2022-07-21T20:29:37.222976Z", - "shell.execute_reply": "2022-07-21T20:29:37.222218Z", - "shell.execute_reply.started": "2022-07-21T20:29:36.886962Z" + "iopub.execute_input": "2022-08-01T00:18:59.316785Z", + "iopub.status.busy": "2022-08-01T00:18:59.315438Z", + "iopub.status.idle": "2022-08-01T00:19:00.307894Z", + "shell.execute_reply": "2022-08-01T00:19:00.307130Z", + "shell.execute_reply.started": "2022-08-01T00:18:59.316695Z" }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", "\n", "df = pd.read_csv('data/auto-mpg.data',header=None,delim_whitespace=True)\n", "df.columns = ['mpg','cylinders','displacement','horsepower','weight',\n", @@ -71,11 +75,11 @@ "id": "62bbb6bd-b5b3-4d54-a132-23cd367c4570", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.225459Z", - "iopub.status.busy": "2022-07-21T20:29:37.224901Z", - "iopub.status.idle": "2022-07-21T20:29:37.237624Z", - "shell.execute_reply": "2022-07-21T20:29:37.236773Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.225432Z" + "iopub.execute_input": "2022-08-01T00:19:00.311568Z", + "iopub.status.busy": "2022-08-01T00:19:00.310921Z", + "iopub.status.idle": "2022-08-01T00:19:00.322308Z", + "shell.execute_reply": "2022-08-01T00:19:00.321851Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.311524Z" }, "tags": [] }, @@ -112,7 +116,7 @@ "id": "6a4028ed-eda3-4c50-aed0-d9503d41a8e1", "metadata": {}, "source": [ - "Why is horsepower not a number?" + "No nulls, but why is horsepower not a number?" ] }, { @@ -121,11 +125,11 @@ "id": "58fa2876-4ccb-4ef5-bc16-d25b74efb457", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.239126Z", - "iopub.status.busy": "2022-07-21T20:29:37.238760Z", - "iopub.status.idle": "2022-07-21T20:29:37.252035Z", - "shell.execute_reply": "2022-07-21T20:29:37.251217Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.239098Z" + "iopub.execute_input": "2022-08-01T00:19:00.323107Z", + "iopub.status.busy": "2022-08-01T00:19:00.322921Z", + "iopub.status.idle": "2022-08-01T00:19:00.333299Z", + "shell.execute_reply": "2022-08-01T00:19:00.332860Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.323092Z" }, "tags": [] }, @@ -164,11 +168,11 @@ "id": "2d99ea58-ca51-4461-a127-c6b389b056a1", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.253416Z", - "iopub.status.busy": "2022-07-21T20:29:37.253082Z", - "iopub.status.idle": "2022-07-21T20:29:37.271785Z", - "shell.execute_reply": "2022-07-21T20:29:37.271054Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.253389Z" + "iopub.execute_input": "2022-08-01T00:19:00.334093Z", + "iopub.status.busy": "2022-08-01T00:19:00.333926Z", + "iopub.status.idle": "2022-08-01T00:19:00.347963Z", + "shell.execute_reply": "2022-08-01T00:19:00.347305Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.334077Z" }, "tags": [] }, @@ -314,7 +318,7 @@ "id": "498d069d-b95e-43d6-bd3d-4b707fdd9635", "metadata": {}, "source": [ - "I'll fill in what I can find online" + "I'll fill in what I can with what I can find online" ] }, { @@ -323,11 +327,11 @@ "id": "e53a2eaf-a8f9-4d7e-bf8b-07a125cf6f06", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.273324Z", - "iopub.status.busy": "2022-07-21T20:29:37.272853Z", - "iopub.status.idle": "2022-07-21T20:29:37.278574Z", - "shell.execute_reply": "2022-07-21T20:29:37.277496Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.273297Z" + "iopub.execute_input": "2022-08-01T00:19:00.348907Z", + "iopub.status.busy": "2022-08-01T00:19:00.348680Z", + "iopub.status.idle": "2022-08-01T00:19:00.352582Z", + "shell.execute_reply": "2022-08-01T00:19:00.351931Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.348891Z" }, "tags": [] }, @@ -363,11 +367,11 @@ "id": "10400330-e6aa-43e0-910f-f97869c23d0f", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.280095Z", - "iopub.status.busy": "2022-07-21T20:29:37.279777Z", - "iopub.status.idle": "2022-07-21T20:29:37.286985Z", - "shell.execute_reply": "2022-07-21T20:29:37.286202Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.280060Z" + "iopub.execute_input": "2022-08-01T00:19:00.353597Z", + "iopub.status.busy": "2022-08-01T00:19:00.353430Z", + "iopub.status.idle": "2022-08-01T00:19:00.360958Z", + "shell.execute_reply": "2022-08-01T00:19:00.359990Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.353582Z" }, "tags": [] }, @@ -392,11 +396,11 @@ "id": "e0fd9a7b-6cdf-4346-8c8d-6c5f36e167f6", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.289881Z", - "iopub.status.busy": "2022-07-21T20:29:37.289472Z", - "iopub.status.idle": "2022-07-21T20:29:37.301335Z", - "shell.execute_reply": "2022-07-21T20:29:37.300537Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.289852Z" + "iopub.execute_input": "2022-08-01T00:19:00.365129Z", + "iopub.status.busy": "2022-08-01T00:19:00.364725Z", + "iopub.status.idle": "2022-08-01T00:19:00.373554Z", + "shell.execute_reply": "2022-08-01T00:19:00.372817Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.365100Z" }, "tags": [] }, @@ -450,11 +454,11 @@ "id": "769f33e7-2f2e-46e8-b6dd-8f8fb79d13b7", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.303380Z", - "iopub.status.busy": "2022-07-21T20:29:37.302680Z", - "iopub.status.idle": "2022-07-21T20:29:37.310508Z", - "shell.execute_reply": "2022-07-21T20:29:37.309738Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.303336Z" + "iopub.execute_input": "2022-08-01T00:19:00.374606Z", + "iopub.status.busy": "2022-08-01T00:19:00.374357Z", + "iopub.status.idle": "2022-08-01T00:19:00.379646Z", + "shell.execute_reply": "2022-08-01T00:19:00.379055Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.374591Z" }, "tags": [] }, @@ -520,11 +524,11 @@ "id": "7bac1a71-53d2-4081-b566-244bccd3a3c6", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.312020Z", - "iopub.status.busy": "2022-07-21T20:29:37.311631Z", - "iopub.status.idle": "2022-07-21T20:29:37.319881Z", - "shell.execute_reply": "2022-07-21T20:29:37.318953Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.311992Z" + "iopub.execute_input": "2022-08-01T00:19:00.380599Z", + "iopub.status.busy": "2022-08-01T00:19:00.380414Z", + "iopub.status.idle": "2022-08-01T00:19:00.387779Z", + "shell.execute_reply": "2022-08-01T00:19:00.387145Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.380583Z" }, "tags": [] }, @@ -575,11 +579,11 @@ "id": "87715776-3634-4ca7-bbb4-e04633fe4791", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.321678Z", - "iopub.status.busy": "2022-07-21T20:29:37.321045Z", - "iopub.status.idle": "2022-07-21T20:29:37.354573Z", - "shell.execute_reply": "2022-07-21T20:29:37.353866Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.321651Z" + "iopub.execute_input": "2022-08-01T00:19:00.389256Z", + "iopub.status.busy": "2022-08-01T00:19:00.388803Z", + "iopub.status.idle": "2022-08-01T00:19:00.417680Z", + "shell.execute_reply": "2022-08-01T00:19:00.417117Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.389227Z" }, "tags": [] }, @@ -747,6 +751,489 @@ "Everything looks proportional" ] }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3f68b5d6-15c7-4fe0-aa49-a04ab90c4efa", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:00.418521Z", + "iopub.status.busy": "2022-08-01T00:19:00.418342Z", + "iopub.status.idle": "2022-08-01T00:19:00.645493Z", + "shell.execute_reply": "2022-08-01T00:19:00.644587Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.418505Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(x=df.mpg)\n", + "plt.title('Distribution of MPG')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "506e5033-b3da-4624-bbc5-44f3d159d9e1", + "metadata": {}, + "source": [ + "Most MPG is around 20" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4c705ec7-9106-4f3c-a65b-5081fe1ded59", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:00.647104Z", + "iopub.status.busy": "2022-08-01T00:19:00.646709Z", + "iopub.status.idle": "2022-08-01T00:19:00.732581Z", + "shell.execute_reply": "2022-08-01T00:19:00.732044Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.647075Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=df.mpg)\n", + "plt.title('Boxplot of MPG')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "6ab5a1cd-177f-4dcb-bd79-e9ad395d51d7", + "metadata": {}, + "source": [ + "There's one value considered an outlier:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "655067f4-4c89-4e65-b65a-c1a1e4158535", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:00.733588Z", + "iopub.status.busy": "2022-08-01T00:19:00.733352Z", + "iopub.status.idle": "2022-08-01T00:19:00.742775Z", + "shell.execute_reply": "2022-08-01T00:19:00.741517Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.733572Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorsepowerweightaccelerationmodel_yearorigincar_name
32246.6486.065.02110.017.9803mazda glc
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horsepower weight acceleration \\\n", + "322 46.6 4 86.0 65.0 2110.0 17.9 \n", + "\n", + " model_year origin car_name \n", + "322 80 3 mazda glc " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.mpg > 45]" + ] + }, + { + "cell_type": "markdown", + "id": "f89f5906-7b78-4268-933a-ccf02e151b85", + "metadata": {}, + "source": [ + "I'm going to leave this in because it's a real value. I guess it appears as an outlier because the data set is so small" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "2720ba7e-272d-4cf7-810c-0a7ec7ad2e58", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:00.744397Z", + "iopub.status.busy": "2022-08-01T00:19:00.744001Z", + "iopub.status.idle": "2022-08-01T00:19:00.899639Z", + "shell.execute_reply": "2022-08-01T00:19:00.899044Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.744367Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(x=df.cylinders)\n", + "plt.title('Distribution of Cylinders')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "7d99f70d-728f-4df3-a178-023617e642af", + "metadata": {}, + "source": [ + "4 Cylinder engines outnumber the others by a lot. It would be nice if we had more info to go off of, particularly if 6 cylinders could be split between inline and V configurations. It's less important for the others cause while inline-8s and V4s exist they're so uncommon in cars of this vintage that we can assume they don't exist. They'll get different fuel economy but not by enough to sway things at the level of accuracy we're at. Inline-6 vs V6 though I think there could be something to see there and it could improve accuracy slightly" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8fecb4c7-cdda-4922-8018-3e5160f6ecf1", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:00.900607Z", + "iopub.status.busy": "2022-08-01T00:19:00.900368Z", + "iopub.status.idle": "2022-08-01T00:19:01.039878Z", + "shell.execute_reply": "2022-08-01T00:19:01.039298Z", + "shell.execute_reply.started": "2022-08-01T00:19:00.900591Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(x=df.displacement)\n", + "plt.title('Distribution of Displacement')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "3235dc9c-d7d4-4795-8fd2-1d208be182c7", + "metadata": {}, + "source": [ + "Most engines in the data are smaller since most of our engines are 4 cylinders. The 3 groups seen here are the split between 4, 6, and 8 cylinders because they all come in generally the same sizes for automotive applications" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b7b4c6b4-c46c-4f61-9e2a-1243ccc39d0a", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:01.041100Z", + "iopub.status.busy": "2022-08-01T00:19:01.040804Z", + "iopub.status.idle": "2022-08-01T00:19:01.123396Z", + "shell.execute_reply": "2022-08-01T00:19:01.122989Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.041075Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=df.displacement)\n", + "plt.title('Boxplot of Displacement')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "78eff4eb-407f-4d7b-9c7d-42fde78b4cf4", + "metadata": {}, + "source": [ + "Again most engines are on the smaller side of the spectrum ranging from around 100ci to around 260ci which is representative of the market" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0235d7ee-8882-490c-b209-8d2d2fbacee5", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:01.124186Z", + "iopub.status.busy": "2022-08-01T00:19:01.124021Z", + "iopub.status.idle": "2022-08-01T00:19:01.256006Z", + "shell.execute_reply": "2022-08-01T00:19:01.255440Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.124159Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(x=df.weight)\n", + "plt.title('Distribution of Weight')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "b8bbd1af-1c95-4fde-916e-e757f95b2f04", + "metadata": {}, + "source": [ + "Weight is a major player in fuel economy as it takes more energy to move a heavy car. An inefficient engine moving less weight than a highly efficient engine can end up burning more fuel but generally less weight means higher mpg. Most cars here are around 2000lbs, which makes sense because that's about the weight of a typical commuter/economy car from the 70s. They didn't have as much stuff packed into the interior that we have today so they're lighter. That's why some of these MPG numbers may seem high, but they're real" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "762d3940-4f1e-40e1-9fef-67073f33791b", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:01.256902Z", + "iopub.status.busy": "2022-08-01T00:19:01.256753Z", + "iopub.status.idle": "2022-08-01T00:19:01.325922Z", + "shell.execute_reply": "2022-08-01T00:19:01.325506Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.256887Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=df.weight)\n", + "plt.title('Boxplot of Weight')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "6cfba8d4-ac12-44bb-8917-22ed65f513b2", + "metadata": {}, + "source": [ + "Nothing out of the ordinary here" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "acd58565-0abe-4245-b505-81fa56a0e106", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:01.326907Z", + "iopub.status.busy": "2022-08-01T00:19:01.326552Z", + "iopub.status.idle": "2022-08-01T00:19:01.473926Z", + "shell.execute_reply": "2022-08-01T00:19:01.473358Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.326892Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(x=df.acceleration)\n", + "plt.title('Distribution of Acceleration')\n", + "plt.show();" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "dd0e9f92-fe24-4de5-ba3c-e0cdb9767e0d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:01.474821Z", + "iopub.status.busy": "2022-08-01T00:19:01.474614Z", + "iopub.status.idle": "2022-08-01T00:19:01.558118Z", + "shell.execute_reply": "2022-08-01T00:19:01.557560Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.474806Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(x=df.acceleration);\n", + "plt.title('Boxplot of Acceleration')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "1087d45a-dc2f-47f9-be25-f1f28669c2ad", + "metadata": {}, + "source": [ + "I'm not even sure what acceleration is supposed to be. I assume probably it's 0-60mph time in seconds.. While it's interesting I think it contributes nothing to calculating MPG and looks a bit less than ideal anyway. Everything is close to the same value and there are some outliers" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "291e8653-3d29-4fb5-8105-2fea779cc5ed", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T00:19:01.559014Z", + "iopub.status.busy": "2022-08-01T00:19:01.558801Z", + "iopub.status.idle": "2022-08-01T00:19:01.693534Z", + "shell.execute_reply": "2022-08-01T00:19:01.692969Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.558999Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.displot(x=df.model_year);\n", + "plt.title('Distribution of Model Year')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "f7addd67-2fba-4539-88e3-347164d3cfd7", + "metadata": {}, + "source": [ + "Model year interestingly has 3 peaks. I'm not going to use this as a feature, not because of this, but because the data only spans 12 years. Model year could be a great indicator of tech but it won't work in this case because there's just not enough data and no real leaps in technology were had in these years anyway. To make predictions on unseen data if the model year is outside 1970-1982 like in the training set then it'll throw the prediction wildly off" + ] + }, { "cell_type": "markdown", "id": "042416c1-0e56-4269-96c8-6926392e11e7", @@ -757,15 +1244,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 22, "id": "b3b42cca-6960-4d06-b7c4-1570f09e9fe0", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:37.355994Z", - "iopub.status.busy": "2022-07-21T20:29:37.355617Z", - "iopub.status.idle": "2022-07-21T20:29:37.364909Z", - "shell.execute_reply": "2022-07-21T20:29:37.364122Z", - "shell.execute_reply.started": "2022-07-21T20:29:37.355966Z" + "iopub.execute_input": "2022-08-01T00:19:01.695975Z", + "iopub.status.busy": "2022-08-01T00:19:01.695735Z", + "iopub.status.idle": "2022-08-01T00:19:01.701574Z", + "shell.execute_reply": "2022-08-01T00:19:01.700997Z", + "shell.execute_reply.started": "2022-08-01T00:19:01.695960Z" }, "tags": [] }, diff --git a/eda.ipynb b/eda.ipynb index bc717be..f0c4669 100644 --- a/eda.ipynb +++ b/eda.ipynb @@ -30,11 +30,11 @@ "id": "5ffa8b01-0b17-4ad8-8e85-f2656da50c9e", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:49.282341Z", - "iopub.status.busy": "2022-07-21T20:29:49.281981Z", - "iopub.status.idle": "2022-07-21T20:29:50.950452Z", - "shell.execute_reply": "2022-07-21T20:29:50.949681Z", - "shell.execute_reply.started": "2022-07-21T20:29:49.282260Z" + "iopub.execute_input": "2022-08-01T04:20:10.964571Z", + "iopub.status.busy": "2022-08-01T04:20:10.964037Z", + "iopub.status.idle": "2022-08-01T04:20:11.911635Z", + "shell.execute_reply": "2022-08-01T04:20:11.911048Z", + "shell.execute_reply.started": "2022-08-01T04:20:10.964486Z" }, "tags": [] }, @@ -76,6 +76,23 @@ " show_plots(filenames)" ] }, + { + "cell_type": "markdown", + "id": "416a9d7e-e2ad-41f0-a674-d13c01f41896", + "metadata": {}, + "source": [ + "## A bit on engines:\n", + "\n", + "* A most basic description of an engine is that it's an air pump\n", + "* Horsepower = (Torque * RPM) / 5252\n", + "* Torque peak is where an engine is operating most efficiently as far as air flow, applied science in action. (Fluid dynamics, resonance)\n", + "* Operating above or below the torque peak reduces efficiency and efficiency == fuel economy\n", + "* Torque peaks normally occur below 5252rpm, and horsepower peaks above that, so long as the engine can actually rev that high. On a dyno sheet (measuring torque and horsepower vs rpm) you'll see the torque/horsepower lines cross at 5252rpm\n", + "* As an engine spins faster, the power output increases until combustion is so inefficient and it produces so little torque that spinning faster produces no more power, if it holds together that long\n", + "\n", + "Basically an engine that makes lots of power at high rpm but relatively little low end torque (mazda rotary), is going to have poor fuel economy because it spends most of its time outside of its efficiency range. In contrast, diesel engines typically turn lower rpms and create all kinds of torque down low. So not only do they start off making more torque but they are less likely to stray very far from torque peak. This is also why horsepower numbers on a diesel appear low, because they can't rev as high. There's more to it but this should be enough to provide context" + ] + }, { "cell_type": "markdown", "id": "7af7dcdd-9618-4e81-88c8-d2c2cde0fdc2", @@ -90,11 +107,11 @@ "id": "3e633f5f-8a7f-4776-a855-f22fcb87e88d", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:50.952984Z", - "iopub.status.busy": "2022-07-21T20:29:50.952406Z", - "iopub.status.idle": "2022-07-21T20:29:50.967175Z", - "shell.execute_reply": "2022-07-21T20:29:50.966460Z", - "shell.execute_reply.started": "2022-07-21T20:29:50.952957Z" + "iopub.execute_input": "2022-08-01T04:20:11.913047Z", + "iopub.status.busy": "2022-08-01T04:20:11.912844Z", + "iopub.status.idle": "2022-08-01T04:20:11.923117Z", + "shell.execute_reply": "2022-08-01T04:20:11.922526Z", + "shell.execute_reply.started": "2022-08-01T04:20:11.913032Z" }, "tags": [] }, @@ -134,7 +151,7 @@ "id": "b0f65dd4-16b6-4222-8758-71e2ecac473e", "metadata": {}, "source": [ - "As the number of cylinders, displacement, horsepower, or weight increase, MPG goes down." + "As the number of cylinders, displacement, horsepower, or weight increase, MPG goes down. There are some outliers, we'll get to that in a minute" ] }, { @@ -142,7 +159,7 @@ "id": "61b1b79e-46c2-4e7b-b565-84d1e2045777", "metadata": {}, "source": [ - "I want to know more:" + "There are some other things I'd like to see:" ] }, { @@ -151,11 +168,11 @@ "id": "7342da99-d04a-4f4f-ad3c-06840144ec48", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:50.969118Z", - "iopub.status.busy": "2022-07-21T20:29:50.968685Z", - "iopub.status.idle": "2022-07-21T20:29:50.977202Z", - "shell.execute_reply": "2022-07-21T20:29:50.976474Z", - "shell.execute_reply.started": "2022-07-21T20:29:50.969090Z" + "iopub.execute_input": "2022-08-01T04:20:11.924056Z", + "iopub.status.busy": "2022-08-01T04:20:11.923876Z", + "iopub.status.idle": "2022-08-01T04:20:11.935974Z", + "shell.execute_reply": "2022-08-01T04:20:11.935279Z", + "shell.execute_reply.started": "2022-08-01T04:20:11.924034Z" }, "tags": [] }, @@ -165,90 +182,21 @@ "new_features['efficiency'] = df.horsepower / df.displacement\n", "new_features['load'] = df.displacement / df.weight\n", "new_features['bore_size'] = df.displacement / df.cylinders\n", - "new_features['grunt'] = new_features.bore_size / new_features.efficiency" + "new_features['grunt'] = new_features.bore_size * new_features.efficiency * df.horsepower\n", + "# new_features['grunt'] = (df.horsepower / new_features.bore_size) * new_features.efficiency" ] }, { "cell_type": "code", "execution_count": 4, - "id": "89cea145-4b6e-457b-9970-578144c1c364", - "metadata": { - "execution": { - "iopub.execute_input": "2022-07-21T20:29:50.979078Z", - "iopub.status.busy": "2022-07-21T20:29:50.978361Z", - "iopub.status.idle": "2022-07-21T20:29:50.984305Z", - "shell.execute_reply": "2022-07-21T20:29:50.983414Z", - "shell.execute_reply.started": "2022-07-21T20:29:50.979043Z" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "merged = df.join(new_features)\n", - "del df" - ] - }, - { - "cell_type": "markdown", - "id": "0213061d-29c8-4f47-9128-705253bc6320", - "metadata": {}, - "source": [ - "Check Correlation" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "7205bdab-a7df-41b4-9ec0-c1c9e2fe1c03", - "metadata": { - "execution": { - "iopub.execute_input": "2022-07-21T20:29:50.985966Z", - "iopub.status.busy": "2022-07-21T20:29:50.985567Z", - "iopub.status.idle": "2022-07-21T20:29:50.999141Z", - "shell.execute_reply": "2022-07-21T20:29:50.998300Z", - "shell.execute_reply.started": "2022-07-21T20:29:50.985938Z" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "weight -0.831745\n", - "displacement -0.804456\n", - "horsepower -0.777897\n", - "cylinders -0.776090\n", - "bore_size -0.773403\n", - "load -0.714996\n", - "grunt -0.712074\n", - "acceleration 0.420414\n", - "efficiency 0.509309\n", - "origin 0.563833\n", - "model_year 0.580091\n", - "mpg 1.000000\n", - "dtype: float64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged.corrwith(y).sort_values()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, "id": "9fa0bf3e-d45b-4698-afac-e549db0de148", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.000637Z", - "iopub.status.busy": "2022-07-21T20:29:51.000343Z", - "iopub.status.idle": "2022-07-21T20:29:51.008598Z", - "shell.execute_reply": "2022-07-21T20:29:51.007461Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.000610Z" + "iopub.execute_input": "2022-08-01T04:20:11.936853Z", + "iopub.status.busy": "2022-08-01T04:20:11.936679Z", + "iopub.status.idle": "2022-08-01T04:20:12.329795Z", + "shell.execute_reply": "2022-08-01T04:20:12.329065Z", + "shell.execute_reply.started": "2022-08-01T04:20:11.936838Z" }, "tags": [] }, @@ -279,8 +227,7 @@ } ], "source": [ - "make_plots(new_features,y)\n", - "del new_features" + "make_plots(new_features,y)" ] }, { @@ -288,73 +235,39 @@ "id": "5cbe16d7-24ef-4ceb-acd1-0dcecfdc96c2", "metadata": {}, "source": [ - "* HP per cubic inch is a measure of engine efficiency, as this increases so does MPG\n", - "* Load is a metric of how hard the engine has to work compared to its size. Engines that work hard use more fuel and a small engine working really hard can use more fuel than a big engine not doing much\n", + "* Efficiency (HP per cubic inch) is a rough measure of engine tech/efficiency, as this increases so does MPG\n", + "* Load is a metric of how hard the engine has to work compared to its size. Engines that work hard use more fuel and a small engine working really hard can use more fuel than a big engine that's not doing much\n", "* Bore_size is an attempt to describe cylinder bore diameter which gives insight on torque curve\n", "* Grunt is an attempt to describe the power curve of an engine, or more specifically the presence/absence of low rpm torque output" ] }, { "cell_type": "markdown", - "id": "416a9d7e-e2ad-41f0-a674-d13c01f41896", + "id": "dd05abcd-9ac9-4821-b575-ffbf8544db3c", "metadata": {}, "source": [ - "## A bit on engines:\n", - "\n", - "* A most basic description of an engine is that it's an air pump\n", - "* Horsepower = (Torque * RPM) / 5252\n", - "* Torque peak is where an engine is operating most efficiently as far as air flow, applied science in action. (Fluid dynamics, resonance)\n", - "* Operating above or below the torque peak reduces efficiency and efficiency == fuel economy\n", - "* Torque peaks normally occur below 5252rpm, and horsepower peaks above that, so long as the engine can actually rev that high. On a dyno sheet (measuring torque and horsepower vs rpm) you'll see the torque/horsepower lines cross at 5252rpm\n", - "* As an engine spins faster, the power output increases until combustion is so inefficient and it produces so little torque that spinning faster produces no more power, if it holds together that long\n", - "\n", - "Basically an engine that makes lots of power at high rpm but relatively little low end torque (mazda rotary), is going to have poor fuel economy because it spends most of its time outside of its efficiency range. In contrast, diesel engines typically turn lower rpms and create all kinds of torque down low. So not only do they start off making more torque but they are less likely to stray very far from torque peak. This is also why horsepower numbers on a diesel appear low, because they can't rev as high. There's more to it than this but this should be enough to provide context." - ] - }, - { - "cell_type": "markdown", - "id": "15d0d27b-5f92-4648-ad5c-35cc811430b3", - "metadata": {}, - "source": [ - "## Some stats" + "Merge new with the old" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "8710cba8-6b7e-4219-98b9-b7d5a1b4f4b9", + "execution_count": 5, + "id": "89cea145-4b6e-457b-9970-578144c1c364", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.012153Z", - "iopub.status.busy": "2022-07-21T20:29:51.011763Z", - "iopub.status.idle": "2022-07-21T20:29:51.019164Z", - "shell.execute_reply": "2022-07-21T20:29:51.018305Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.012124Z" + "iopub.execute_input": "2022-08-01T04:20:12.331981Z", + "iopub.status.busy": "2022-08-01T04:20:12.331135Z", + "iopub.status.idle": "2022-08-01T04:20:12.338040Z", + "shell.execute_reply": "2022-08-01T04:20:12.337017Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.331935Z" }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean MPG: 23.51\n", - "Mean Weight: 2970.59\n", - "Mean Horsepower: 104.12\n", - "efficiency mean: 0.61\n", - "load mean: 0.06\n", - "bore_size mean: 33.36\n", - "grunt mean: 62.78\n" - ] - } - ], + "outputs": [], "source": [ - "print(f'''Mean MPG: {y.mean():.2f}\n", - "Mean Weight: {merged.weight.mean():.2f}\n", - "Mean Horsepower: {merged.horsepower.mean():.2f}''')\n", - "\n", - "for col in merged.columns[9:]:\n", - " print(f'{col} mean: {merged[col].mean():.2f}')" + "merged = df.join(new_features)\n", + "del new_features\n", + "del df" ] }, { @@ -362,20 +275,29 @@ "id": "d39b59e4-e596-4fc9-b886-1e6d314f597e", "metadata": {}, "source": [ - "### What's all that on the edges?" + "# What's all that on the edges?\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "fe7ee071-8aa4-4a8d-9e8e-480f3b9da9da", + "metadata": {}, + "source": [ + "## Rotaries" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "dbbfdab6-1cca-4329-a2ae-9258678ab0b1", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.020574Z", - "iopub.status.busy": "2022-07-21T20:29:51.020285Z", - "iopub.status.idle": "2022-07-21T20:29:51.043208Z", - "shell.execute_reply": "2022-07-21T20:29:51.042372Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.020547Z" + "iopub.execute_input": "2022-08-01T04:20:12.339782Z", + "iopub.status.busy": "2022-08-01T04:20:12.339317Z", + "iopub.status.idle": "2022-08-01T04:20:12.367167Z", + "shell.execute_reply": "2022-08-01T04:20:12.365967Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.339751Z" }, "tags": [] }, @@ -431,7 +353,7 @@ " 1.385714\n", " 0.030043\n", " 23.333333\n", - " 16.838488\n", + " 3136.333333\n", " \n", " \n", " 111\n", @@ -447,7 +369,7 @@ " 1.285714\n", " 0.032957\n", " 23.333333\n", - " 18.148148\n", + " 2700.000000\n", " \n", " \n", " 243\n", @@ -463,7 +385,7 @@ " 1.375000\n", " 0.029412\n", " 26.666667\n", - " 19.393939\n", + " 4033.333333\n", " \n", " \n", " 334\n", @@ -479,7 +401,7 @@ " 1.428571\n", " 0.028926\n", " 23.333333\n", - " 16.333333\n", + " 3333.333333\n", " \n", " \n", "\n", @@ -498,20 +420,21 @@ "243 77 3 mazda rx-4 1.375000 0.029412 26.666667 \n", "334 80 3 mazda rx-7 gs 1.428571 0.028926 23.333333 \n", "\n", - " grunt \n", - "71 16.838488 \n", - "111 18.148148 \n", - "243 19.393939 \n", - "334 16.333333 " + " grunt \n", + "71 3136.333333 \n", + "111 2700.000000 \n", + "243 4033.333333 \n", + "334 3333.333333 " ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "merged[merged.efficiency>1]" + "wankels = merged[merged.efficiency>1]\n", + "wankels" ] }, { @@ -521,20 +444,98 @@ "source": [ "These are the Mazda rotaries, otherwise known as [Wankel Engines](https://en.wikipedia.org/wiki/Wankel_engine)\n", "\n", - "Efficient power for their size because they can rev to 7000rpm or so, and that's where they make peak power. Not good for fuel economy. Note the low gruntiness" + "Efficient power for their size because they can rev to 7000rpm or so, and that's where they make peak power" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5eda8f40-bff6-4715-ba54-b083c74b039d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:12.371148Z", + "iopub.status.busy": "2022-08-01T04:20:12.370520Z", + "iopub.status.idle": "2022-08-01T04:20:12.488456Z", + "shell.execute_reply": "2022-08-01T04:20:12.488016Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.371118Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "wankels.efficiency.plot(kind='bar')\n", + "plt.xticks(np.arange(4),wankels.car_name)\n", + "pd.Series([merged['efficiency'].mean() for i in range(len(wankels))]).plot(kind='line',color='red')\n", + "plt.title('Mazda Rotary Efficiency (red line is average)');" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1476617f-8097-4294-8c42-fb86ff96c1d0", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:12.489303Z", + "iopub.status.busy": "2022-08-01T04:20:12.489077Z", + "iopub.status.idle": "2022-08-01T04:20:12.574526Z", + "shell.execute_reply": "2022-08-01T04:20:12.574107Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.489288Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "wankels.mpg.plot(kind='bar')\n", + "plt.xticks(np.arange(4),wankels.car_name)\n", + "pd.Series([merged['mpg'].mean() for i in range(len(wankels))]).plot(kind='line',color='red')\n", + "plt.title('Mazda Rotary MPG (red line is average)');" + ] + }, + { + "cell_type": "markdown", + "id": "4f793604-5c51-44b7-9cb6-301151304400", + "metadata": {}, + "source": [ + "## Diesels" ] }, { "cell_type": "code", "execution_count": 9, - "id": "20eaa490-f70b-408e-a9fa-c4ba05c8a1ac", + "id": "c7aece22-1e78-4a48-b969-f1207ba09aad", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.044643Z", - "iopub.status.busy": "2022-07-21T20:29:51.044307Z", - "iopub.status.idle": "2022-07-21T20:29:51.062694Z", - "shell.execute_reply": "2022-07-21T20:29:51.061875Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.044616Z" + "iopub.execute_input": "2022-08-01T04:20:12.575478Z", + "iopub.status.busy": "2022-08-01T04:20:12.575129Z", + "iopub.status.idle": "2022-08-01T04:20:12.588438Z", + "shell.execute_reply": "2022-08-01T04:20:12.587822Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.575463Z" }, "tags": [] }, @@ -577,36 +578,52 @@ " \n", " \n", " \n", - " 274\n", - " 20.3\n", - " 5\n", - " 131.0\n", - " 103.0\n", - " 2830.0\n", - " 15.9\n", + " 244\n", + " 43.1\n", + " 4\n", + " 90.0\n", + " 48.0\n", + " 1985.0\n", + " 21.5\n", " 78\n", " 2\n", - " audi 5000\n", - " 0.786260\n", - " 0.046290\n", - " 26.2\n", - " 33.322330\n", + " volkswagen rabbit custom diesel\n", + " 0.533333\n", + " 0.045340\n", + " 22.500000\n", + " 576.000000\n", " \n", " \n", - " 297\n", - " 25.4\n", - " 5\n", - " 183.0\n", - " 77.0\n", - " 3530.0\n", - " 20.1\n", - " 79\n", + " 325\n", + " 44.3\n", + " 4\n", + " 90.0\n", + " 48.0\n", + " 2085.0\n", + " 21.7\n", + " 80\n", " 2\n", - " mercedes benz 300d\n", - " 0.420765\n", - " 0.051841\n", - " 36.6\n", - " 86.984416\n", + " vw rabbit c (diesel)\n", + " 0.533333\n", + " 0.043165\n", + " 22.500000\n", + " 576.000000\n", + " \n", + " \n", + " 326\n", + " 43.4\n", + " 4\n", + " 90.0\n", + " 48.0\n", + " 2335.0\n", + " 23.7\n", + " 80\n", + " 2\n", + " vw dasher (diesel)\n", + " 0.533333\n", + " 0.038544\n", + " 22.500000\n", + " 576.000000\n", " \n", " \n", " 327\n", @@ -621,8 +638,56 @@ " audi 5000s (diesel)\n", " 0.553719\n", " 0.041017\n", - " 24.2\n", - " 43.704478\n", + " 24.200000\n", + " 897.800000\n", + " \n", + " \n", + " 358\n", + " 28.1\n", + " 4\n", + " 141.0\n", + " 80.0\n", + " 3230.0\n", + " 20.4\n", + " 81\n", + " 2\n", + " peugeot 505s turbo diesel\n", + " 0.567376\n", + " 0.043653\n", + " 35.250000\n", + " 1600.000000\n", + " \n", + " \n", + " 359\n", + " 30.7\n", + " 6\n", + " 145.0\n", + " 76.0\n", + " 3160.0\n", + " 19.6\n", + " 81\n", + " 2\n", + " volvo diesel\n", + " 0.524138\n", + " 0.045886\n", + " 24.166667\n", + " 962.666667\n", + " \n", + " \n", + " 386\n", + " 38.0\n", + " 6\n", + " 262.0\n", + " 85.0\n", + " 3015.0\n", + " 17.0\n", + " 82\n", + " 1\n", + " oldsmobile cutlass ciera (diesel)\n", + " 0.324427\n", + " 0.086899\n", + " 43.666667\n", + " 1204.166667\n", " \n", " \n", "\n", @@ -630,19 +695,31 @@ ], "text/plain": [ " mpg cylinders displacement horsepower weight acceleration \\\n", - "274 20.3 5 131.0 103.0 2830.0 15.9 \n", - "297 25.4 5 183.0 77.0 3530.0 20.1 \n", + "244 43.1 4 90.0 48.0 1985.0 21.5 \n", + "325 44.3 4 90.0 48.0 2085.0 21.7 \n", + "326 43.4 4 90.0 48.0 2335.0 23.7 \n", "327 36.4 5 121.0 67.0 2950.0 19.9 \n", + "358 28.1 4 141.0 80.0 3230.0 20.4 \n", + "359 30.7 6 145.0 76.0 3160.0 19.6 \n", + "386 38.0 6 262.0 85.0 3015.0 17.0 \n", "\n", - " model_year origin car_name efficiency load bore_size \\\n", - "274 78 2 audi 5000 0.786260 0.046290 26.2 \n", - "297 79 2 mercedes benz 300d 0.420765 0.051841 36.6 \n", - "327 80 2 audi 5000s (diesel) 0.553719 0.041017 24.2 \n", + " model_year origin car_name efficiency \\\n", + "244 78 2 volkswagen rabbit custom diesel 0.533333 \n", + "325 80 2 vw rabbit c (diesel) 0.533333 \n", + "326 80 2 vw dasher (diesel) 0.533333 \n", + "327 80 2 audi 5000s (diesel) 0.553719 \n", + "358 81 2 peugeot 505s turbo diesel 0.567376 \n", + "359 81 2 volvo diesel 0.524138 \n", + "386 82 1 oldsmobile cutlass ciera (diesel) 0.324427 \n", "\n", - " grunt \n", - "274 33.322330 \n", - "297 86.984416 \n", - "327 43.704478 " + " load bore_size grunt \n", + "244 0.045340 22.500000 576.000000 \n", + "325 0.043165 22.500000 576.000000 \n", + "326 0.038544 22.500000 576.000000 \n", + "327 0.041017 24.200000 897.800000 \n", + "358 0.043653 35.250000 1600.000000 \n", + "359 0.045886 24.166667 962.666667 \n", + "386 0.086899 43.666667 1204.166667 " ] }, "execution_count": 9, @@ -651,28 +728,72 @@ } ], "source": [ - "merged[merged.cylinders==5]" + "diesels = merged[merged.car_name.str.contains('diesel')]\n", + "diesels" ] }, { "cell_type": "markdown", - "id": "33fb0fbd-595d-4129-bfe0-dd4077553f9a", + "id": "79979a1e-de58-4610-8878-6f374f500d1c", "metadata": {}, "source": [ - "Look at the gruntiness and mpg of these diesels! For comparison the first Audi appears to be a gas engine. Consider the displacement and power. The one below as well" + "All of the diesels get higher than average MPG" ] }, { "cell_type": "code", "execution_count": 10, + "id": "92f0bf1a-af7b-4a26-b422-8ca01fdfde1b", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:12.590154Z", + "iopub.status.busy": "2022-08-01T04:20:12.589539Z", + "iopub.status.idle": "2022-08-01T04:20:12.714518Z", + "shell.execute_reply": "2022-08-01T04:20:12.713877Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.590124Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "diesels.mpg.plot(kind='barh')\n", + "plt.yticks(np.arange(len(diesels)),diesels.car_name)\n", + "plt.axvline(merged.mpg.mean(),color='red')\n", + "plt.title('Diesel MPG (red line is average)');" + ] + }, + { + "cell_type": "markdown", + "id": "df9d8d17-46ec-4ce8-950e-aa1a24d98d7f", + "metadata": {}, + "source": [ + "# Interesting" + ] + }, + { + "cell_type": "code", + "execution_count": 11, "id": "0fb1ed64-bba6-463c-9a0f-84af360515b5", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.064733Z", - "iopub.status.busy": "2022-07-21T20:29:51.064060Z", - "iopub.status.idle": "2022-07-21T20:29:51.081301Z", - "shell.execute_reply": "2022-07-21T20:29:51.080572Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.064704Z" + "iopub.execute_input": "2022-08-01T04:20:12.715450Z", + "iopub.status.busy": "2022-08-01T04:20:12.715284Z", + "iopub.status.idle": "2022-08-01T04:20:12.726019Z", + "shell.execute_reply": "2022-08-01T04:20:12.725441Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.715435Z" }, "tags": [] }, @@ -728,7 +849,7 @@ " 0.324427\n", " 0.086899\n", " 43.666667\n", - " 134.596078\n", + " 1204.166667\n", " \n", " \n", "\n", @@ -741,11 +862,11 @@ " model_year origin car_name efficiency \\\n", "386 82 1 oldsmobile cutlass ciera (diesel) 0.324427 \n", "\n", - " load bore_size grunt \n", - "386 0.086899 43.666667 134.596078 " + " load bore_size grunt \n", + "386 0.086899 43.666667 1204.166667 " ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -756,25 +877,39 @@ }, { "cell_type": "markdown", - "id": "0456df70-dc3d-4879-95cb-26e268fea9aa", + "id": "1e1ec508-df30-42c6-a63f-aea36c12d2e8", "metadata": {}, "source": [ - "This is an interesting engine. In fact, [these cars are rumored to be the reason why diesel cars are so unpopular in North America](https://www.autotrader.com/car-news/when-diesel-was-dreadful-oldsmobile-diesels-259997). [Here is a more technical write-up](https://www.dieselworldmag.com/diesel-engines/oldsmobile-350-v8)\n", - "\n", - "But that's a bit beside the point, the engines above and below for sake of conversation are basically the same, the V6 being the same as the V8 but with 2 less cylinders. So compare the stats between them as gas and diesel\n" + "This is an interesting engine. In fact, [these cars are rumored to be the reason why diesel cars are so unpopular in North America](https://www.autotrader.com/car-news/when-diesel-was-dreadful-oldsmobile-diesels-259997). [Here is a more technical write-up](https://www.dieselworldmag.com/diesel-engines/oldsmobile-350-v8)" + ] + }, + { + "cell_type": "markdown", + "id": "b9858dee-1de0-46ab-b46d-baa4cafc0efc", + "metadata": {}, + "source": [ + "
" + ] + }, + { + "cell_type": "markdown", + "id": "d8625227-6fca-4e92-ba0c-271bbea53c23", + "metadata": {}, + "source": [ + "Big lazy engines in big heavy cars don't have to have poor MPG!" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "c0c4f183-ef44-42ee-b64c-a75c63450d7b", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.083196Z", - "iopub.status.busy": "2022-07-21T20:29:51.082422Z", - "iopub.status.idle": "2022-07-21T20:29:51.103099Z", - "shell.execute_reply": "2022-07-21T20:29:51.102309Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.083153Z" + "iopub.execute_input": "2022-08-01T04:20:12.727961Z", + "iopub.status.busy": "2022-08-01T04:20:12.727510Z", + "iopub.status.idle": "2022-08-01T04:20:12.755277Z", + "shell.execute_reply": "2022-08-01T04:20:12.754339Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.727919Z" }, "tags": [] }, @@ -830,7 +965,7 @@ " 0.357143\n", " 0.089744\n", " 43.75\n", - " 122.500000\n", + " 1953.125\n", " \n", " \n", " 363\n", @@ -846,7 +981,7 @@ " 0.300000\n", " 0.093960\n", " 43.75\n", - " 145.833333\n", + " 1378.125\n", " \n", " \n", "\n", @@ -861,12 +996,12 @@ "298 79 1 cadillac eldorado 0.357143 0.089744 \n", "363 81 1 oldsmobile cutlass ls 0.300000 0.093960 \n", "\n", - " bore_size grunt \n", - "298 43.75 122.500000 \n", - "363 43.75 145.833333 " + " bore_size grunt \n", + "298 43.75 1953.125 \n", + "363 43.75 1378.125 " ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -877,23 +1012,23 @@ }, { "cell_type": "markdown", - "id": "d8625227-6fca-4e92-ba0c-271bbea53c23", + "id": "2ccec1cb-db88-430c-a118-351da41a23c1", "metadata": {}, "source": [ - "Big lazy engines in big heavy cars don't have to have poor MPG!" + "But some still do" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "8f51f87e-fb76-4c8a-b4bc-05f147fc8efa", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.104577Z", - "iopub.status.busy": "2022-07-21T20:29:51.104277Z", - "iopub.status.idle": "2022-07-21T20:29:51.120670Z", - "shell.execute_reply": "2022-07-21T20:29:51.119898Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.104550Z" + "iopub.execute_input": "2022-08-01T04:20:12.756201Z", + "iopub.status.busy": "2022-08-01T04:20:12.756022Z", + "iopub.status.idle": "2022-08-01T04:20:12.770660Z", + "shell.execute_reply": "2022-08-01T04:20:12.769833Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.756186Z" }, "tags": [] }, @@ -949,7 +1084,7 @@ " 0.494505\n", " 0.14744\n", " 56.875\n", - " 115.013889\n", + " 6328.125\n", " \n", " \n", "\n", @@ -962,11 +1097,11 @@ " model_year origin car_name efficiency load \\\n", "13 70 1 buick estate wagon (sw) 0.494505 0.14744 \n", "\n", - " bore_size grunt \n", - "13 56.875 115.013889 " + " bore_size grunt \n", + "13 56.875 6328.125 " ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -985,15 +1120,15 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "7d556866-da6d-48dd-b37a-e59c3155085d", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.122000Z", - "iopub.status.busy": "2022-07-21T20:29:51.121688Z", - "iopub.status.idle": "2022-07-21T20:29:51.127163Z", - "shell.execute_reply": "2022-07-21T20:29:51.126389Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.121973Z" + "iopub.execute_input": "2022-08-01T04:20:12.772414Z", + "iopub.status.busy": "2022-08-01T04:20:12.771774Z", + "iopub.status.idle": "2022-08-01T04:20:12.776947Z", + "shell.execute_reply": "2022-08-01T04:20:12.776094Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.772385Z" }, "tags": [] }, @@ -1005,23 +1140,396 @@ }, { "cell_type": "markdown", - "id": "15d5a2c5-cb01-4a54-8ce4-375018ebc79a", + "id": "146d6761-455a-407f-b627-24c13586a88f", "metadata": {}, "source": [ - "What vehicles have the lowest MPG?" + "## What vehicles have the Highest MPG?" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, + "id": "558d450a-2649-4005-bbe8-5f8cc509f965", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:12.778488Z", + "iopub.status.busy": "2022-08-01T04:20:12.778076Z", + "iopub.status.idle": "2022-08-01T04:20:12.930503Z", + "shell.execute_reply": "2022-08-01T04:20:12.929878Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.778459Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "top_mpg = merged.sort_values('mpg').tail(10)\n", + "\n", + "fig, ax = plt.subplots(figsize = (6,5))\n", + "ax.barh(top_mpg.car_name,top_mpg.mpg)\n", + "for i in ax.patches:\n", + " plt.text(i.get_width()+0.2, i.get_y()+0.5,\n", + " str(round((i.get_width()), 2)),\n", + " fontsize = 10, fontweight ='bold',\n", + " color ='grey')\n", + "ax.set_title('Top 10 MPG (red line is average)')\n", + "plt.axvline(merged.mpg.mean(),color='red')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "260484cb-5145-4c0f-8952-8f6ba652c8a5", + "metadata": {}, + "source": [ + "In more detail:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "38935e91-3877-47a3-96d6-cd54e2704bdb", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:12.931405Z", + "iopub.status.busy": "2022-08-01T04:20:12.931238Z", + "iopub.status.idle": "2022-08-01T04:20:12.947214Z", + "shell.execute_reply": "2022-08-01T04:20:12.946618Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.931389Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorsepowerweightaccelerationmodel_yearorigincar_nameefficiencyloadbore_sizegrunt
32246.6486.065.02110.017.9803mazda glc0.7558140.04075821.501056.2500
32944.6491.067.01850.013.8803honda civic 1500 gl0.7362640.04918922.751122.2500
32544.3490.048.02085.021.7802vw rabbit c (diesel)0.5333330.04316522.50576.0000
39344.0497.052.02130.024.6822vw pickup0.5360820.04554024.25676.0000
32643.4490.048.02335.023.7802vw dasher (diesel)0.5333330.03854422.50576.0000
24443.1490.048.01985.021.5782volkswagen rabbit custom diesel0.5333330.04534022.50576.0000
30941.5498.076.02144.014.7802vw rabbit0.7755100.04570924.501444.0000
33040.9485.053.51835.017.3802renault lecar deluxe0.6294120.04632221.25715.5625
32440.8485.065.02110.019.2803datsun 2100.7647060.04028421.251056.2500
24739.4485.070.02070.018.6783datsun b210 gx0.8235290.04106321.251225.0000
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horsepower weight acceleration \\\n", + "322 46.6 4 86.0 65.0 2110.0 17.9 \n", + "329 44.6 4 91.0 67.0 1850.0 13.8 \n", + "325 44.3 4 90.0 48.0 2085.0 21.7 \n", + "393 44.0 4 97.0 52.0 2130.0 24.6 \n", + "326 43.4 4 90.0 48.0 2335.0 23.7 \n", + "244 43.1 4 90.0 48.0 1985.0 21.5 \n", + "309 41.5 4 98.0 76.0 2144.0 14.7 \n", + "330 40.9 4 85.0 53.5 1835.0 17.3 \n", + "324 40.8 4 85.0 65.0 2110.0 19.2 \n", + "247 39.4 4 85.0 70.0 2070.0 18.6 \n", + "\n", + " model_year origin car_name efficiency \\\n", + "322 80 3 mazda glc 0.755814 \n", + "329 80 3 honda civic 1500 gl 0.736264 \n", + "325 80 2 vw rabbit c (diesel) 0.533333 \n", + "393 82 2 vw pickup 0.536082 \n", + "326 80 2 vw dasher (diesel) 0.533333 \n", + "244 78 2 volkswagen rabbit custom diesel 0.533333 \n", + "309 80 2 vw rabbit 0.775510 \n", + "330 80 2 renault lecar deluxe 0.629412 \n", + "324 80 3 datsun 210 0.764706 \n", + "247 78 3 datsun b210 gx 0.823529 \n", + "\n", + " load bore_size grunt \n", + "322 0.040758 21.50 1056.2500 \n", + "329 0.049189 22.75 1122.2500 \n", + "325 0.043165 22.50 576.0000 \n", + "393 0.045540 24.25 676.0000 \n", + "326 0.038544 22.50 576.0000 \n", + "244 0.045340 22.50 576.0000 \n", + "309 0.045709 24.50 1444.0000 \n", + "330 0.046322 21.25 715.5625 \n", + "324 0.040284 21.25 1056.2500 \n", + "247 0.041063 21.25 1225.0000 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged.sort_values('mpg',ascending=False).head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "15d5a2c5-cb01-4a54-8ce4-375018ebc79a", + "metadata": {}, + "source": [ + "## What vehicles have the lowest MPG?" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "65588fe3-762f-42b0-9427-feb64275b792", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:12.948105Z", + "iopub.status.busy": "2022-08-01T04:20:12.947948Z", + "iopub.status.idle": "2022-08-01T04:20:13.102377Z", + "shell.execute_reply": "2022-08-01T04:20:13.101717Z", + "shell.execute_reply.started": "2022-08-01T04:20:12.948090Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "low_mpg = merged.sort_values('mpg', ascending=False).tail(10)\n", + "\n", + "fig, ax = plt.subplots(figsize = (6,5))\n", + "ax.barh(low_mpg.car_name,low_mpg.mpg)\n", + "for i in ax.patches:\n", + " plt.text(i.get_width()+0.2, i.get_y()+0.5,\n", + " str(round((i.get_width()), 2)),\n", + " fontsize = 10, fontweight ='bold',\n", + " color ='grey')\n", + "ax.set_title('Bottom 10 MPG (red line is average)')\n", + "plt.axvline(merged.mpg.mean(),color='red')\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "3d1c5be8-b63f-496e-a2cd-475a47e7a542", + "metadata": {}, + "source": [ + "In more detail:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "id": "1497a48c-42a3-447e-b1fb-e3a5b78902da", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.128643Z", - "iopub.status.busy": "2022-07-21T20:29:51.128285Z", - "iopub.status.idle": "2022-07-21T20:29:51.164545Z", - "shell.execute_reply": "2022-07-21T20:29:51.163776Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.128603Z" + "iopub.execute_input": "2022-08-01T04:20:13.103850Z", + "iopub.status.busy": "2022-08-01T04:20:13.103446Z", + "iopub.status.idle": "2022-08-01T04:20:13.131408Z", + "shell.execute_reply": "2022-08-01T04:20:13.130488Z", + "shell.execute_reply.started": "2022-08-01T04:20:13.103820Z" }, "tags": [] }, @@ -1077,7 +1585,7 @@ " 0.634868\n", " 0.064243\n", " 38.000\n", - " 59.854922\n", + " 4656.125\n", " \n", " \n", " 26\n", @@ -1093,7 +1601,7 @@ " 0.651466\n", " 0.070155\n", " 38.375\n", - " 58.905625\n", + " 5000.000\n", " \n", " \n", " 25\n", @@ -1109,7 +1617,7 @@ " 0.597222\n", " 0.078007\n", " 45.000\n", - " 75.348837\n", + " 5778.125\n", " \n", " \n", " 27\n", @@ -1125,7 +1633,7 @@ " 0.660377\n", " 0.072570\n", " 39.750\n", - " 60.192857\n", + " 5512.500\n", " \n", " \n", " 103\n", @@ -1141,7 +1649,7 @@ " 0.375000\n", " 0.080048\n", " 50.000\n", - " 133.333333\n", + " 2812.500\n", " \n", " \n", " 67\n", @@ -1157,7 +1665,7 @@ " 0.484848\n", " 0.092597\n", " 53.625\n", - " 110.601562\n", + " 5408.000\n", " \n", " \n", " 124\n", @@ -1173,7 +1681,7 @@ " 0.514286\n", " 0.095524\n", " 43.750\n", - " 85.069444\n", + " 4050.000\n", " \n", " \n", " 42\n", @@ -1189,7 +1697,7 @@ " 0.469974\n", " 0.077296\n", " 47.875\n", - " 101.867361\n", + " 4050.000\n", " \n", " \n", " 95\n", @@ -1205,7 +1713,7 @@ " 0.494505\n", " 0.091901\n", " 56.875\n", - " 115.013889\n", + " 6328.125\n", " \n", " \n", " 90\n", @@ -1221,247 +1729,7 @@ " 0.461538\n", " 0.086632\n", " 53.625\n", - " 116.187500\n", - " \n", - " \n", - " 69\n", - " 12.0\n", - " 8\n", - " 350.0\n", - " 160.0\n", - " 4456.0\n", - " 13.5\n", - " 72\n", - " 1\n", - " oldsmobile delta 88 royale\n", - " 0.457143\n", - " 0.078546\n", - " 43.750\n", - " 95.703125\n", - " \n", - " \n", - " 104\n", - " 12.0\n", - " 8\n", - " 400.0\n", - " 167.0\n", - " 4906.0\n", - " 12.5\n", - " 73\n", - " 1\n", - " ford country\n", - " 0.417500\n", - " 0.081533\n", - " 50.000\n", - " 119.760479\n", - " \n", - " \n", - " 106\n", - " 12.0\n", - " 8\n", - " 350.0\n", - " 180.0\n", - " 4499.0\n", - " 12.5\n", - " 73\n", - " 1\n", - " oldsmobile vista cruiser\n", - " 0.514286\n", - " 0.077795\n", - " 43.750\n", - " 85.069444\n", - " \n", - " \n", - " 87\n", - " 13.0\n", - " 8\n", - " 350.0\n", - " 145.0\n", - " 3988.0\n", - " 13.0\n", - " 73\n", - " 1\n", - " chevrolet malibu\n", - " 0.414286\n", - " 0.087763\n", - " 43.750\n", - " 105.603448\n", - " \n", - " \n", - " 73\n", - " 13.0\n", - " 8\n", - " 307.0\n", - " 130.0\n", - " 4098.0\n", - " 14.0\n", - " 72\n", - " 1\n", - " chevrolet chevelle concours (sw)\n", - " 0.423453\n", - " 0.074915\n", - " 38.375\n", - " 90.624038\n", - " \n", - " \n", - " 74\n", - " 13.0\n", - " 8\n", - " 302.0\n", - " 140.0\n", - " 4294.0\n", - " 16.0\n", - " 72\n", - " 1\n", - " ford gran torino (sw)\n", - " 0.463576\n", - " 0.070331\n", - " 37.750\n", - " 81.432143\n", - " \n", - " \n", - " 62\n", - " 13.0\n", - " 8\n", - " 350.0\n", - " 165.0\n", - " 4274.0\n", - " 12.0\n", - " 72\n", - " 1\n", - " chevrolet impala\n", - " 0.471429\n", - " 0.081891\n", - " 43.750\n", - " 92.803030\n", - " \n", - " \n", - " 43\n", - " 13.0\n", - " 8\n", - " 400.0\n", - " 170.0\n", - " 4746.0\n", - " 12.0\n", - " 71\n", - " 1\n", - " ford country squire (sw)\n", - " 0.425000\n", - " 0.084282\n", - " 50.000\n", - " 117.647059\n", - " \n", - " \n", - " 96\n", - " 13.0\n", - " 8\n", - " 360.0\n", - " 175.0\n", - " 3821.0\n", - " 11.0\n", - " 73\n", - " 1\n", - " amc ambassador brougham\n", - " 0.486111\n", - " 0.094216\n", - " 45.000\n", - " 92.571429\n", - " \n", - " \n", - " 94\n", - " 13.0\n", - " 8\n", - " 440.0\n", - " 215.0\n", - " 4735.0\n", - " 11.0\n", - " 73\n", - " 1\n", - " chrysler new yorker brougham\n", - " 0.488636\n", - " 0.092925\n", - " 55.000\n", - " 112.558140\n", - " \n", - " \n", - " 92\n", - " 13.0\n", - " 8\n", - " 351.0\n", - " 158.0\n", - " 4363.0\n", - " 13.0\n", - " 73\n", - " 1\n", - " ford ltd\n", - " 0.450142\n", - " 0.080449\n", - " 43.875\n", - " 97.469146\n", - " \n", - " \n", - " 85\n", - " 13.0\n", - " 8\n", - " 350.0\n", - " 175.0\n", - " 4100.0\n", - " 13.0\n", - " 73\n", - " 1\n", - " buick century 350\n", - " 0.500000\n", - " 0.085366\n", - " 43.750\n", - " 87.500000\n", - " \n", - " \n", - " 137\n", - " 13.0\n", - " 8\n", - " 350.0\n", - " 150.0\n", - " 4699.0\n", - " 14.5\n", - " 74\n", - " 1\n", - " buick century luxus (sw)\n", - " 0.428571\n", - " 0.074484\n", - " 43.750\n", - " 102.083333\n", - " \n", - " \n", - " 44\n", - " 13.0\n", - " 8\n", - " 400.0\n", - " 175.0\n", - " 5140.0\n", - " 12.0\n", - " 71\n", - " 1\n", - " pontiac safari (sw)\n", - " 0.437500\n", - " 0.077821\n", - " 50.000\n", - " 114.285714\n", - " \n", - " \n", - " 215\n", - " 13.0\n", - " 8\n", - " 318.0\n", - " 150.0\n", - " 3755.0\n", - " 14.0\n", - " 76\n", - " 1\n", - " dodge d100\n", - " 0.471698\n", - " 0.084687\n", - " 39.750\n", - " 84.270000\n", + " 4900.500\n", " \n", " \n", "\n", @@ -1479,640 +1747,146 @@ "42 12.0 8 383.0 180.0 4955.0 11.5 \n", "95 12.0 8 455.0 225.0 4951.0 11.0 \n", "90 12.0 8 429.0 198.0 4952.0 11.5 \n", - "69 12.0 8 350.0 160.0 4456.0 13.5 \n", - "104 12.0 8 400.0 167.0 4906.0 12.5 \n", - "106 12.0 8 350.0 180.0 4499.0 12.5 \n", - "87 13.0 8 350.0 145.0 3988.0 13.0 \n", - "73 13.0 8 307.0 130.0 4098.0 14.0 \n", - "74 13.0 8 302.0 140.0 4294.0 16.0 \n", - "62 13.0 8 350.0 165.0 4274.0 12.0 \n", - "43 13.0 8 400.0 170.0 4746.0 12.0 \n", - "96 13.0 8 360.0 175.0 3821.0 11.0 \n", - "94 13.0 8 440.0 215.0 4735.0 11.0 \n", - "92 13.0 8 351.0 158.0 4363.0 13.0 \n", - "85 13.0 8 350.0 175.0 4100.0 13.0 \n", - "137 13.0 8 350.0 150.0 4699.0 14.5 \n", - "44 13.0 8 400.0 175.0 5140.0 12.0 \n", - "215 13.0 8 318.0 150.0 3755.0 14.0 \n", "\n", - " model_year origin car_name efficiency \\\n", - "28 70 1 hi 1200d 0.634868 \n", - "26 70 1 chevy c20 0.651466 \n", - "25 70 1 ford f250 0.597222 \n", - "27 70 1 dodge d200 0.660377 \n", - "103 73 1 chevrolet impala 0.375000 \n", - "67 72 1 mercury marquis 0.484848 \n", - "124 73 1 oldsmobile omega 0.514286 \n", - "42 71 1 dodge monaco (sw) 0.469974 \n", - "95 73 1 buick electra 225 custom 0.494505 \n", - "90 73 1 mercury marquis brougham 0.461538 \n", - "69 72 1 oldsmobile delta 88 royale 0.457143 \n", - "104 73 1 ford country 0.417500 \n", - "106 73 1 oldsmobile vista cruiser 0.514286 \n", - "87 73 1 chevrolet malibu 0.414286 \n", - "73 72 1 chevrolet chevelle concours (sw) 0.423453 \n", - "74 72 1 ford gran torino (sw) 0.463576 \n", - "62 72 1 chevrolet impala 0.471429 \n", - "43 71 1 ford country squire (sw) 0.425000 \n", - "96 73 1 amc ambassador brougham 0.486111 \n", - "94 73 1 chrysler new yorker brougham 0.488636 \n", - "92 73 1 ford ltd 0.450142 \n", - "85 73 1 buick century 350 0.500000 \n", - "137 74 1 buick century luxus (sw) 0.428571 \n", - "44 71 1 pontiac safari (sw) 0.437500 \n", - "215 76 1 dodge d100 0.471698 \n", + " model_year origin car_name efficiency load \\\n", + "28 70 1 hi 1200d 0.634868 0.064243 \n", + "26 70 1 chevy c20 0.651466 0.070155 \n", + "25 70 1 ford f250 0.597222 0.078007 \n", + "27 70 1 dodge d200 0.660377 0.072570 \n", + "103 73 1 chevrolet impala 0.375000 0.080048 \n", + "67 72 1 mercury marquis 0.484848 0.092597 \n", + "124 73 1 oldsmobile omega 0.514286 0.095524 \n", + "42 71 1 dodge monaco (sw) 0.469974 0.077296 \n", + "95 73 1 buick electra 225 custom 0.494505 0.091901 \n", + "90 73 1 mercury marquis brougham 0.461538 0.086632 \n", "\n", - " load bore_size grunt \n", - "28 0.064243 38.000 59.854922 \n", - "26 0.070155 38.375 58.905625 \n", - "25 0.078007 45.000 75.348837 \n", - "27 0.072570 39.750 60.192857 \n", - "103 0.080048 50.000 133.333333 \n", - "67 0.092597 53.625 110.601562 \n", - "124 0.095524 43.750 85.069444 \n", - "42 0.077296 47.875 101.867361 \n", - "95 0.091901 56.875 115.013889 \n", - "90 0.086632 53.625 116.187500 \n", - "69 0.078546 43.750 95.703125 \n", - "104 0.081533 50.000 119.760479 \n", - "106 0.077795 43.750 85.069444 \n", - "87 0.087763 43.750 105.603448 \n", - "73 0.074915 38.375 90.624038 \n", - "74 0.070331 37.750 81.432143 \n", - "62 0.081891 43.750 92.803030 \n", - "43 0.084282 50.000 117.647059 \n", - "96 0.094216 45.000 92.571429 \n", - "94 0.092925 55.000 112.558140 \n", - "92 0.080449 43.875 97.469146 \n", - "85 0.085366 43.750 87.500000 \n", - "137 0.074484 43.750 102.083333 \n", - "44 0.077821 50.000 114.285714 \n", - "215 0.084687 39.750 84.270000 " + " bore_size grunt \n", + "28 38.000 4656.125 \n", + "26 38.375 5000.000 \n", + "25 45.000 5778.125 \n", + "27 39.750 5512.500 \n", + "103 50.000 2812.500 \n", + "67 53.625 5408.000 \n", + "124 43.750 4050.000 \n", + "42 47.875 4050.000 \n", + "95 56.875 6328.125 \n", + "90 53.625 4900.500 " ] }, - "execution_count": 14, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "merged.sort_values('mpg').head(25)" + "merged.sort_values('mpg').head(10)" ] }, { "cell_type": "markdown", - "id": "146d6761-455a-407f-b627-24c13586a88f", + "id": "15d0d27b-5f92-4648-ad5c-35cc811430b3", "metadata": {}, "source": [ - "What vehicles have the Highest MPG?" + "## Some stats" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "38935e91-3877-47a3-96d6-cd54e2704bdb", + "execution_count": 19, + "id": "8710cba8-6b7e-4219-98b9-b7d5a1b4f4b9", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.166013Z", - "iopub.status.busy": "2022-07-21T20:29:51.165720Z", - "iopub.status.idle": "2022-07-21T20:29:51.199742Z", - "shell.execute_reply": "2022-07-21T20:29:51.199027Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.165986Z" + "iopub.execute_input": "2022-08-01T04:20:13.132876Z", + "iopub.status.busy": "2022-08-01T04:20:13.132574Z", + "iopub.status.idle": "2022-08-01T04:20:13.142096Z", + "shell.execute_reply": "2022-08-01T04:20:13.141321Z", + "shell.execute_reply.started": "2022-08-01T04:20:13.132851Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean MPG: 23.51\n", + "Mean Weight: 2975.41\n", + "Mean Horsepower: 104.12\n", + "efficiency mean: 0.61\n", + "load mean: 0.06\n", + "bore_size mean: 33.36\n", + "grunt mean: 2060.50\n" + ] + } + ], + "source": [ + "print(f'''Mean MPG: {y.mean():.2f}\n", + "Mean Weight: {merged.weight.mean():.2f}\n", + "Mean Horsepower: {merged.horsepower.mean():.2f}''')\n", + "\n", + "for col in merged.columns[9:]:\n", + " print(f'{col} mean: {merged[col].mean():.2f}')" + ] + }, + { + "cell_type": "markdown", + "id": "0213061d-29c8-4f47-9128-705253bc6320", + "metadata": {}, + "source": [ + "Check Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7205bdab-a7df-41b4-9ec0-c1c9e2fe1c03", + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-01T04:20:13.143792Z", + "iopub.status.busy": "2022-08-01T04:20:13.143335Z", + "iopub.status.idle": "2022-08-01T04:20:13.153208Z", + "shell.execute_reply": "2022-08-01T04:20:13.152547Z", + "shell.execute_reply.started": "2022-08-01T04:20:13.143758Z" }, "tags": [] }, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mpgcylindersdisplacementhorsepowerweightaccelerationmodel_yearorigincar_nameefficiencyloadbore_sizegrunt
32246.6486.065.02110.017.9803mazda glc0.7558140.04075821.50000028.446154
32944.6491.067.01850.013.8803honda civic 1500 gl0.7362640.04918922.75000030.899254
32544.3490.048.02085.021.7802vw rabbit c (diesel)0.5333330.04316522.50000042.187500
39344.0497.052.02130.024.6822vw pickup0.5360820.04554024.25000045.235577
32643.4490.048.02335.023.7802vw dasher (diesel)0.5333330.03854422.50000042.187500
24443.1490.048.01985.021.5782volkswagen rabbit custom diesel0.5333330.04534022.50000042.187500
30941.5498.076.02144.014.7802vw rabbit0.7755100.04570924.50000031.592105
33040.9485.053.51835.017.3802renault lecar deluxe0.6294120.04632221.25000033.761682
32440.8485.065.02110.019.2803datsun 2100.7647060.04028421.25000027.788462
24739.4485.070.02070.018.6783datsun b210 gx0.8235290.04106321.25000025.803571
34239.1479.058.01755.016.9813toyota starlet0.7341770.04501419.75000026.900862
34339.0486.064.01875.016.4811plymouth champ0.7441860.04586721.50000028.890625
31038.1489.060.01968.018.8803toyota corolla tercel0.6741570.04522422.25000033.004167
38438.0491.067.01995.016.2823datsun 310 gx0.7362640.04561422.75000030.899254
38238.0491.067.01965.015.0823honda civic0.7362640.04631022.75000030.899254
38638.06262.085.03015.017.0821oldsmobile cutlass ciera (diesel)0.3244270.08689943.666667134.596078
37738.04105.063.02125.014.7821plymouth horizon miser0.6000000.04941226.25000043.750000
34737.7489.062.02050.017.3813toyota tercel0.6966290.04341522.25000031.939516
30437.3491.069.02130.014.7792fiat strada custom0.7582420.04272322.75000030.003623
31237.2486.065.02019.016.4803datsun 3100.7558140.04259521.50000028.446154
37537.0491.068.02025.018.2823mazda glc custom l0.7472530.04493822.75000030.444853
34637.0485.065.01975.019.4813datsun 210 mpg0.7647060.04303821.25000027.788462
32037.04119.092.02434.015.0803datsun 510 hatchback0.7731090.04889129.75000038.480978
32736.45121.067.02950.019.9802audi 5000s (diesel)0.5537190.04101724.20000043.704478
24836.1491.060.01800.016.4783honda civic cvcc0.6593410.05055622.75000034.504167
\n", - "
" - ], "text/plain": [ - " mpg cylinders displacement horsepower weight acceleration \\\n", - "322 46.6 4 86.0 65.0 2110.0 17.9 \n", - "329 44.6 4 91.0 67.0 1850.0 13.8 \n", - "325 44.3 4 90.0 48.0 2085.0 21.7 \n", - "393 44.0 4 97.0 52.0 2130.0 24.6 \n", - "326 43.4 4 90.0 48.0 2335.0 23.7 \n", - "244 43.1 4 90.0 48.0 1985.0 21.5 \n", - "309 41.5 4 98.0 76.0 2144.0 14.7 \n", - "330 40.9 4 85.0 53.5 1835.0 17.3 \n", - "324 40.8 4 85.0 65.0 2110.0 19.2 \n", - "247 39.4 4 85.0 70.0 2070.0 18.6 \n", - "342 39.1 4 79.0 58.0 1755.0 16.9 \n", - "343 39.0 4 86.0 64.0 1875.0 16.4 \n", - "310 38.1 4 89.0 60.0 1968.0 18.8 \n", - "384 38.0 4 91.0 67.0 1995.0 16.2 \n", - "382 38.0 4 91.0 67.0 1965.0 15.0 \n", - "386 38.0 6 262.0 85.0 3015.0 17.0 \n", - "377 38.0 4 105.0 63.0 2125.0 14.7 \n", - "347 37.7 4 89.0 62.0 2050.0 17.3 \n", - "304 37.3 4 91.0 69.0 2130.0 14.7 \n", - "312 37.2 4 86.0 65.0 2019.0 16.4 \n", - "375 37.0 4 91.0 68.0 2025.0 18.2 \n", - "346 37.0 4 85.0 65.0 1975.0 19.4 \n", - "320 37.0 4 119.0 92.0 2434.0 15.0 \n", - "327 36.4 5 121.0 67.0 2950.0 19.9 \n", - "248 36.1 4 91.0 60.0 1800.0 16.4 \n", - "\n", - " model_year origin car_name efficiency \\\n", - "322 80 3 mazda glc 0.755814 \n", - "329 80 3 honda civic 1500 gl 0.736264 \n", - "325 80 2 vw rabbit c (diesel) 0.533333 \n", - "393 82 2 vw pickup 0.536082 \n", - "326 80 2 vw dasher (diesel) 0.533333 \n", - "244 78 2 volkswagen rabbit custom diesel 0.533333 \n", - "309 80 2 vw rabbit 0.775510 \n", - "330 80 2 renault lecar deluxe 0.629412 \n", - "324 80 3 datsun 210 0.764706 \n", - "247 78 3 datsun b210 gx 0.823529 \n", - "342 81 3 toyota starlet 0.734177 \n", - "343 81 1 plymouth champ 0.744186 \n", - "310 80 3 toyota corolla tercel 0.674157 \n", - "384 82 3 datsun 310 gx 0.736264 \n", - "382 82 3 honda civic 0.736264 \n", - "386 82 1 oldsmobile cutlass ciera (diesel) 0.324427 \n", - "377 82 1 plymouth horizon miser 0.600000 \n", - "347 81 3 toyota tercel 0.696629 \n", - "304 79 2 fiat strada custom 0.758242 \n", - "312 80 3 datsun 310 0.755814 \n", - "375 82 3 mazda glc custom l 0.747253 \n", - "346 81 3 datsun 210 mpg 0.764706 \n", - "320 80 3 datsun 510 hatchback 0.773109 \n", - "327 80 2 audi 5000s (diesel) 0.553719 \n", - "248 78 3 honda civic cvcc 0.659341 \n", - "\n", - " load bore_size grunt \n", - "322 0.040758 21.500000 28.446154 \n", - "329 0.049189 22.750000 30.899254 \n", - "325 0.043165 22.500000 42.187500 \n", - "393 0.045540 24.250000 45.235577 \n", - "326 0.038544 22.500000 42.187500 \n", - "244 0.045340 22.500000 42.187500 \n", - "309 0.045709 24.500000 31.592105 \n", - "330 0.046322 21.250000 33.761682 \n", - "324 0.040284 21.250000 27.788462 \n", - "247 0.041063 21.250000 25.803571 \n", - "342 0.045014 19.750000 26.900862 \n", - "343 0.045867 21.500000 28.890625 \n", - "310 0.045224 22.250000 33.004167 \n", - "384 0.045614 22.750000 30.899254 \n", - "382 0.046310 22.750000 30.899254 \n", - "386 0.086899 43.666667 134.596078 \n", - "377 0.049412 26.250000 43.750000 \n", - "347 0.043415 22.250000 31.939516 \n", - "304 0.042723 22.750000 30.003623 \n", - "312 0.042595 21.500000 28.446154 \n", - "375 0.044938 22.750000 30.444853 \n", - "346 0.043038 21.250000 27.788462 \n", - "320 0.048891 29.750000 38.480978 \n", - "327 0.041017 24.200000 43.704478 \n", - "248 0.050556 22.750000 34.504167 " + "weight -0.832707\n", + "displacement -0.804456\n", + "horsepower -0.777897\n", + "cylinders -0.776090\n", + "bore_size -0.773403\n", + "load -0.724271\n", + "grunt -0.644081\n", + "acceleration 0.420414\n", + "efficiency 0.509309\n", + "origin 0.563833\n", + "model_year 0.580091\n", + "mpg 1.000000\n", + "dtype: float64" ] }, - "execution_count": 15, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "merged.sort_values('mpg',ascending=False).head(25)" + "merged.corrwith(y).sort_values()" + ] + }, + { + "cell_type": "markdown", + "id": "d8889b56-a87c-4901-b654-aaf5a4b9fb14", + "metadata": {}, + "source": [ + "
\n", + "Math says to use weight, displacement, horsepower, cylinders...\n", + "\n", + "While I agree that these are the most important features, there's more to it than just these numbers. Like how a stew is not just a sum of its ingredients." ] }, { @@ -2120,20 +1894,20 @@ "id": "27e89d6b-7603-403c-8235-e9bad49040b3", "metadata": {}, "source": [ - "Pick a few to toss into the model and get some numbers out" + "I'll test both" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "id": "52d0ffbf-55aa-49b9-b99f-8160bf09cc79", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.201570Z", - "iopub.status.busy": "2022-07-21T20:29:51.200911Z", - "iopub.status.idle": "2022-07-21T20:29:51.207432Z", - "shell.execute_reply": "2022-07-21T20:29:51.206526Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.201525Z" + "iopub.execute_input": "2022-08-01T04:20:13.154483Z", + "iopub.status.busy": "2022-08-01T04:20:13.154106Z", + "iopub.status.idle": "2022-08-01T04:20:13.159628Z", + "shell.execute_reply": "2022-08-01T04:20:13.158886Z", + "shell.execute_reply.started": "2022-08-01T04:20:13.154458Z" }, "tags": [] }, @@ -2147,7 +1921,7 @@ " dtype='object')" ] }, - "execution_count": 16, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2158,29 +1932,35 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "id": "6a4a9e48-57a1-48b6-b289-58bc43584112", "metadata": { "execution": { - "iopub.execute_input": "2022-07-21T20:29:51.209219Z", - "iopub.status.busy": "2022-07-21T20:29:51.208620Z", - "iopub.status.idle": "2022-07-21T20:29:51.220516Z", - "shell.execute_reply": "2022-07-21T20:29:51.219766Z", - "shell.execute_reply.started": "2022-07-21T20:29:51.209190Z" + "iopub.execute_input": "2022-08-01T04:20:13.163437Z", + "iopub.status.busy": "2022-08-01T04:20:13.163108Z", + "iopub.status.idle": "2022-08-01T04:20:13.175359Z", + "shell.execute_reply": "2022-08-01T04:20:13.174579Z", + "shell.execute_reply.started": "2022-08-01T04:20:13.163422Z" }, "tags": [] }, "outputs": [], "source": [ - "X = merged[[\\\n", - " 'horsepower', # overall power\n", - " 'bore_size', # \"torque curve\"\n", - " 'grunt',\n", - " 'load', # load\n", - " ]]\n", + "y.to_csv('data/y.csv',index=False)\n", "\n", - "X.to_csv('data/X.csv',index=False)\n", - "y.to_csv('data/y.csv',index=False)" + "merged[[\\\n", + " 'horsepower',\n", + " 'bore_size',\n", + " 'grunt',\n", + " 'load',\n", + " ]].to_csv('data/X_engineered.csv',index=False)\n", + "\n", + "merged[[\\\n", + " 'horsepower',\n", + " 'weight',\n", + " 'displacement',\n", + " 'cylinders',\n", + " ]].to_csv('data/X_straight.csv',index=False)" ] }, {