mpg/model.ipynb

315 lines
204 KiB
Text
Raw Normal View History

2022-07-21 16:31:53 -04:00
{
"cells": [
{
"cell_type": "markdown",
"id": "717f94d3-edfa-4122-9902-212a3456bb8c",
"metadata": {},
"source": [
"[EDA](eda.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "c47122c5-bdcd-4b6b-8d22-8958ad910eca",
"metadata": {
"execution": {
"iopub.execute_input": "2022-07-21T20:30:04.392164Z",
"iopub.status.busy": "2022-07-21T20:30:04.391759Z",
"iopub.status.idle": "2022-07-21T20:30:06.057099Z",
"shell.execute_reply": "2022-07-21T20:30:06.056318Z",
"shell.execute_reply.started": "2022-07-21T20:30:04.392085Z"
},
"tags": []
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import (StandardScaler,\n",
" QuantileTransformer,\n",
" Normalizer,\n",
" MinMaxScaler,\n",
" RobustScaler,\n",
" PowerTransformer)\n",
"\n",
"from sklearn.linear_model import (Lars,\n",
" Ridge,\n",
" Lasso,\n",
" LarsCV,\n",
" LassoCV,\n",
" RidgeCV,\n",
" LassoLars,\n",
" ElasticNet,\n",
" LassoLarsCV,\n",
" LassoLarsIC,\n",
" ElasticNetCV,\n",
" SGDRegressor,\n",
" LinearRegression,\n",
" OrthogonalMatchingPursuit,\n",
" OrthogonalMatchingPursuitCV)\n",
"\n",
"from sklearn.neighbors import KNeighborsRegressor\n",
"from sklearn.svm import LinearSVR\n",
"from sklearn.metrics import mean_squared_error, r2_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8c3b98e1-9a8f-4d42-8a29-5d030934a4ed",
"metadata": {
"execution": {
"iopub.execute_input": "2022-07-21T20:30:06.059480Z",
"iopub.status.busy": "2022-07-21T20:30:06.059058Z",
"iopub.status.idle": "2022-07-21T20:30:11.086526Z",
"shell.execute_reply": "2022-07-21T20:30:11.085657Z",
"shell.execute_reply.started": "2022-07-21T20:30:06.059453Z"
},
"tags": []
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAF1CAYAAABPmFZlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOy9d5xlWVnu/117n3wqp849PaEnM9PIEAZUBiSJoHgRL6iAgnq5P7x4zSCXq/eKiIpXRVEExVGJI3kkM0gOM8zQE3p6Zrp7pnOoHE7eYf3+WGvtcFKFru6qmtrP51Ofqjphn7332XutZz3v876vkFKSIEGCBAkSJEiQ4OLBWusdSJAgQYIECRIk2GxICFiCBAkSJEiQIMFFRkLAEiRIkCBBggQJLjISApYgQYIECRIkSHCRkRCwBAkSJEiQIEGCi4yEgCVIkCBBggQJElxkJAQsQYIEaw4hxC1CiJNrvR9rCSHEu4UQb1njfTgghLhlLfchQYLNgoSAJUiwziGEOCqEqAohSkKIs0KIW4UQPWu9X5sBQoifEEJ8Uwgxq8/9e4UQvZHns0KI9wkh5vXzv9n0/n1CiLuFEBX9e1+nz5JSvk5K+Uf6fReckOrr6K1N+3CdlPKrF/JzEyRIoJAQsAQJNgZeLKXsAfYBTwTetLa70x1CCHut92G5EEKk2jzcD7wV2A5cA+wE/jzy/B8Ce4FLgGcBvyuEeIHeXgb4FPB+YBD4F+BT+vELig7HkiBBgnWEhIAlSLCBIKU8C3wBRcTaQgjxi0KIR4UQC0KIx4QQP68ft4UQ7xBCTOrnXy+EkGay1krbcyLb+UMhxPsj//+7VnnmhBBfF0JcF3nuViHE3wshPiuEKAPPEkJsF0J8TAgxoffjDZHX5/V7ZoQQDwJP7nbcQoinCyHu0p99lxDi6frxlwshvt/02t8QQnxa/53Vx3xcCHFOh/ny+rlbhBAnhRC/J4Q4C/xzm/P9QSnl56WUFSnlDPBe4BmRl7wK+CMp5YyU8qB+/hf1c7cAKeCvpJR1KeU7AQE8u8Mx3iqEeKsQogh8DtiuVc+SPpeWEOKNQogjQogpIcRtQogh/d49+rt8rRDiOPCVbt+ZEOJXgZ9HEcaSEOJ2/XhwDehz91dCiNP656+EENmmc/dbQohxIcQZIcQvRY7lhUKIB/U1eEoI8dvdvt8ECTYjEgKWIMEGghBiJ/DjwOEOzxeBdwI/LqXsBZ4O7NdP/wrwIpSCdhPwM8v8+M+h1J4x4B7gA03P/xzwx0Av8G3gduBeYAfwY8D/FEI8X7/2D4DL9c/zgVd3+lBNMj6jj2sY+H/AZ4QQw8CngauEEHub9uOD+u8/Ba5EEdYr9L7878hrtwJDKAXrVxc/BfwocEDv1yBKGbs38vy9gCGm1wH3yXi/t/siz7eFlLKM+o5PSyl79M9p4A3AS4Bn6s+dAd7V9PZnopQ6c57bfmdSyvfov/9Mb//FbXblzcDTUOfuRuApwP+KPL8VpRDuAF4LvEufE4B/Av6bvgavRxPCBAkShEgIWIIEGwOfFEIsACeAcRSB6QQfuF4IkZdSnpFSHtCP/yxKjTkhpZwG/mQ5OyClfJ+UckFKWUeF3m4UQvRHXvIpKeW3pJQ+8ARgVEr5f6WUDSnloyh16OWRffljKeW0lPIEilx1wk8Ah6SU/yaldKWUHwIeQoVlK6gw3ysANBG7Gvi0EEKgSOdv6M9ZAN4W2Qdzrv5AK1TVbscvhHguiigaAmd8eHORl82hCKh5Pvpc8/PLxX8D3iylPBn5Dn6mKdz4h1LKsjmWJXxn3fDzwP+VUo5LKSeA/wO8MvK8o593pJSfBUrAVZHnrhVC9Gl18J6VHXKCBI9fJAQsQYKNgZdoNeEWFMEYgSBzzoSpfl+rJ/8VeB1wRgjxGSHE1Xob21EEzuDYUj9chy/frsNf88BR/dRI5GXRbV+CCqHNmh/g94EtK9iX7W2eP4ZSXkCpXa/Qf/8c8ElNzEaBAnB3ZB8+rx83mJBS1rp8NgBCiKfpz/kZKeUj+uGS/t0XeWkfsBB5Pvpc8/PLxSXAJyLHchDwCM8pRM7pEr+zbmg+78f0YwZTUko38n+FkJS+FHghcEwI8TUhxM1L/MwECTYNEgKWIMEGgpTya8CtwDv0/6+LhKneph/7gpTyucA2lFL0Xv32M8CuyOZ2N22+jCIsBlsjf/8c8FPAc1Bhpz36cRHdvcjfJ4DHpJQDkZ9eKeULl7gvUZxGkY8odgOn9N9fBEaEyjB8BWH4cRKoAtdF9qFfJzO02+e2EEI8ERXqfI2U8o7gjcoTdgYVnjO4ER2i1L9v0EqcwQ2R57uh3X6dQIWWo+c0J6U81eF9i31nix1783nfrR9bfOelvEtK+VOo0OcngduW8r4ECTYTEgKWIMHGw18BzxVtShoIIbYIIX5Se8HqKBXG00/fBrxBCLFTe3Xe2PT2/cDLhRBpIUSzR6xXb28KRdLetsg+3gnMa4N7Xqsx1wshjNn+NuBNQohB7Wv7H1229VngSiHEzwkhUkKI/wpcC/wHgFZhPorKThwCvqQf91Hk8y+FEGP6/OyI+NAWhRDiepRq9j+klLe3ecm/Av9LH8fVqJDnrfq5r6LO/Ru0of3X9ONL8UOdA4abwoXvBv5YCHGJ3rdRIcRPddnGYt/ZOeCyLu//EOrYRoUQI6jQ6/u7vB69XxkhxM8LIfqllA4wT3gNJkiQQCMhYAkSbDBoP86/Au2KdlrAb6GUimmUKfv/08+9F5VBeS/KkP3xpve+BWWKn0H5fT4Yee5fUSGoU8CDwHcX2UcPeDHKwP0YSo36R5QSg97+Mf3cF4F/67KtKVTywG+hyMTvAi+SUk5GXvZBlNLz701hsd9DJSx8V4fhvkzoU1oKfgsVsvynSKg3qmD9AXBEH8vXgD+XUn5e73cDZZp/FTALvAYVSm4s9qFSyodQBOhRHXLcDvw1Son7ovYDfhd4apfNLPad/RPKpzUrhPhkm/e/Ffg+KnHgftQ189Y2r2uHVwJH9Tl/HfALS3xfggSbBiKeoJMgQYLNAiHEHhQBSjeRlgQJEiRIcIGRKGAJEiRIkCBBggQXGQkBS5AgQYIECRIkuMhIQpAJEiRIkCBBggQXGYkCliBBggQJEiRIcJGRELAECRIkSJAgQYKLjNTiL1k/GBkZkXv27Fnr3UiQIEGCBAkSJFgUd99996SUcrTdcxuKgO3Zs4fvf//7a70bCRIkSJAgQYIEi0II0bHN2pJCkEKIFwghHhZCHBZCNFfPRgjRL4S4XQhxrxDigBDil/Tju4QQ/ymEOKgf//XIe/5QCHFKCLFf/7ywebsJEiRIkCBBggSPRyyqgAkhbOBdwHOBk8BdQohPSykfjLzs9cCDUsoXCyFGgYeFEB8AXOC3pJT3CCF6UU1xvxR5719KKd+xqkeUIEGCBAkSJEiwzrEUBewpwGEp5aO6hcaHUQ1eo5BAr24624NqgeJKKc9IKe8BkFIuAAeBHau29wkSJEiQIEGCBBsQS/GA7QBORP4/SWv/sb9F9Sg7jWoA+191I9wAuu3JE4HvRR7+NSHEq1D9xn5LSjnT/OFCiF8FfhVg9+7dLTvnOA4nT56kVqst4VA2N3K5HDt37iSdTq/1riRIkCBBggSbGkshYKLNY83VW58P7AeejWrm+yUhxDeklPMAQoge4GPA/zSPAX8P/JHe1h8Bf4FqVhv/ICnfA7wH4KabbmqpGnvy5El6e3vZs2cPSoBL0A5SSqampjh58iSXXnrpWu9OggQJEiRIsKmxlBDkSWBX5P+dKKUril8CPi4VDqMa/F4NIIRIo8jXB6SUHzdvkFKek1J6Wil7LyrUuWzUajWGh4cT8rUIhBAMDw8nSmGCBAkSJEiwDrAUAnYXsFcIcakQIgO8HBVujOI48GMAQogtwFXAo9oT9k/AQSnl/4u+QQixLfLvTwMPrOwQSMjXEpGcpwQJEiRIkGB9YFECJqV0gV8DvoAy0d8mpTwghHidEOJ1+mV/BDxdCHE/cAfwe1LKSeAZwCuBZ7cpN/FnQoj7hRD3Ac8CfmN1D+3iwbZ
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"| R2 Test | R2 Train | RMSE Test | RMSE Train |\n",
"|-----------|----------|-------------|------------|\n",
"| Min: 0.62 | Min:0.72 | Min: 3.24 | Min:3.57 |\n",
"| Avg: 0.73 | Avg:0.75 | Avg: 4.01 | Avg:3.93 |\n",
"| Max: 0.82 | Max:0.78 | Max: 5.04 | Max:4.15 |\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAE/CAYAAAB1vdadAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOz9d5glV3Xuj392VZ3YOU1PlEY5IyEJCUQGI4GJNjbgaxuMzdUPY4wvX665Dhcbx+twsbnYmGBjAw4IAwaBECIIBCIojKRRjjMaTezpHE+sqv37Y9eucE6d1GHmzEy9z9NPd5+4T52qvd/9rnetJaSUJEiQIEGCBAkSJFhfGMd7AAkSJEiQIEGCBCcjEpKVIEGCBAkSJEiwAUhIVoIECRIkSJAgwQYgIVkJEiRIkCBBggQbgIRkJUiQIEGCBAkSbAASkpUgQYIECRIkSLABSEhWggQJEqwCQojThBDLQgjzOI7hF4UQ3zpe758gQYLmSEhWggQnOIQQ+4QQRW/BnxBCfFoI0Ru6/9NCCCmEeF3N8z7s3f4r3v9pIcSHhBAHvdd6Wgjxtw3eR//8/TH7oG1ACJERQnxKCPGMEGJJCHGfEOJVNY95uRDiMSFEQQjxPSHE6aH7hBDiL4UQM97PXwkhRNx7SSn3Syl7pZSO99zbhBDv2MDPttP7vqzQGP5dSnntRr1nggQJ1oaEZCVIcHLgtVLKXuAy4NnA79bc/wTwNv2Pt1D/PLAn9JjfBa4ErgL6gJcC98W9T+jn3ev6KTqAR4hq5zALOAC8GBgAPgD8pxBip/ecUeC/vNuHgV3A50PPvx54A3Ap8CzgNcD/b8M+RAjHUxFLkCDBxiAhWQkSnESQUk4A30SRrTC+BjxfCDHk/f9K4AFgIvSY5wBfllIelgr7pJSfXc04PEXpw0KIw97Ph4UQGe++R4UQrwk91hJCTAshLvf+f64Q4sdCiHkhxP1CiJeEHnubEOLPhBA/AgrAmTWff0VK+UFv7K6U8ibgaeAK7yE/CzwspfyClLIEfBC4VAhxvnf/24APSSkPSikPAR8CfqXBZ/SVJSHEnwEvBP4+rPAJIc4XQnxbCDErhHhcCPGm0PM/LYT4mBDiZiHECvBSIcSrPfVtUQhxQAjxwdBb/sD7Pe+9x/OEEL8ihPhh6DWvEULcLYRY8H5fU3Ps/kQI8SNP5fuWRzoRQmSFEP/mqXfz3nPH47/dBAkStIuEZCVIcBJBCLEdeBXwVM1dJeCrwFu8/98K1BKoO4D/TwjxLiHEJY3CZG3i94HnosjepSh17H97930O+IXQY68DpqWU9wohtgFfB/4UpTT9T+BLQoix0ON/GaU49QHPNBuERxTOBR72broIuF/fL6VcQal5F8Xd7/19ES0gpfx94Hbg3VrhE0L0AN8G/gPY5H3mfxBChF/vvwF/5n2WHwIrqO9mEHg18OtCiDd4j32R93vQe4+f1HzWYdSx+wgwAvwN8HUhxEjN+73dG08adXxBkcsBYIf33HcCxVafO0GCBM2RkKwECU4OfEUIsYQKlU0CfxjzmM8CbxVCDKDCaV+puf//AH8J/CIqjHZICPG2msd8xVM69M9/bzCeXwT+WEo5KaWcAv4IRY5AkY7XCSHy3v//zbsN4JeAm6WUN3tK1Le9sfx06LU/LaV8WEppSymrDd4fIUQK+HfgM1LKx7ybe4GFmocuoEhO3P0LQO8qCedrgH1Syn/xxnov8CXg50KPuVFK+SPvs5aklLdJKR/0/n8ARUhf3Ob7vRp4Ukr5r977fQ54DHht6DH/IqV8QkpZBP6TQPGsosjV2VJKR0p5j5RycRWfOUGCBCEkJCtBgpMDb5BS9gEvAc4HRmsfIKX8ITCGUpRu8hba8P2OlPKjUsrno5SUPwP+WQhxQc37DIZ+/rHBeLYSVZme8W5DSvkU8CjwWo9ovY6AZJ0O/HyYyAEvALaEXutA80MBnlfrX4EKEPaNLQP9NQ/vB5Ya3N8PLEspZav3jMHpwNU1n+UXgc2hx0Q+ixDias+MPyWEWEApSnXfZQPUHnO8/7eF/g+HhwsoUgnqWH0TuMEL7/6VR1ITJEiwBiQkK0GCkwhSyu8Dnwb+b4OH/BvwPupDhbWvU5RSfhSYAy5cxVAOo0iGxmnebRo6ZPh64BGPeIEiHf9aQ+R6pJR/ER5eszf2VKdPAePAG2vUrodR4Uv92B7gLIJwYuR+7++HaQ+14zoAfL/ms/RKKX+9yXP+AxXW3SGlHAA+DogGj61F7TEHddwPtRy4lFUp5R9JKS8ErkGpcG9t9bwECRI0R0KyEiQ4+fBh4BVCiMti7vsI8AoCE7UPIcT/EEK8RAiR88zcb0OF0WozDNvB54D/LYQY88zVf4AieBo3ANcCv06gYuE95rVCiOuEEKZnyH6J5zVrFx8DLkBlQtb6ir4MXCyEeKMQIuuN64FQOPGzKF/aNiHEVhQh/XSb73uUqBH/JuBcIcQvCyFS3s9zapTBWvQBs1LKkhDiKlQoVWMKcGveI4ybvff7b97392YUQb6p1cCFEC/1fHgmsIgKHzqtnpcgQYLmSEhWggQnGTwP1GdRZQpq75uVUt7aIPxVRGXTTQDTwG+glKC9ocd8TUTrZH25wTD+FOWlegB4ELjXu02P4wjwE5Rq8vnQ7QdQ6tbvoUjFAeC3aXOuEqrm1f8P5TWaCI3zF73XnwLeiAqFzgFXEyQDAHwClYn5IPAQykj+iXbeG/h/wM8JIeaEEB+RUi6hiORbUCrTBMrzlmnyGu8C/tjz1/0ByjeFN/aCN+4feeHH54afKKWcQSlQ7wNmgPcDr5FSTrcx9s3AF1EE61Hg+0RJcYIECVYBsTqrQYIECRIkSJAgQYJmSJSsBAkSJEiQIEGCDUBCshIkSJAgQYIECTYACclKkCBBggQJEiTYACQkK0GCBAkSJEiQYAOQkKwECRIkSJAgQYINgHW8BxCH0dFRuXPnzuM9jAQJEiRIkCBBgpa45557pqWUY7W3dyXJ2rlzJ7t27Trew0iQIEGCBAkSJGgJIURss/okXJggQYIECRIkSLABSEhWggQJEiRIkCDBBiAhWQkSJEiQIEGCBBuArvRkJUiQIEGCBAnaQ7Va5eDBg5RKpeM9lJMe2WyW7du3k0ql2np8QrISJEiQIEGCExgHDx6kr6+PnTt3IoQ43sM5aSGlZGZmhoMHD3LGGWe09ZwkXJggQYIECRKcwCiVSoyMjCQEa4MhhGBkZKQjxTAhWQkSJEiQIMEJjoRgHRt0epwTkpUgQYIECRIkWBNM0+Syyy7j4osv5rWvfS3z8/MA7Nu3DyEEH/jAB/zHTk9Pk0qlePe73w3A448/zkte8hIuu+wyLrjgAq6//noAbrvtNgYGBrjsssv8n+985zuR97366qu57LLLOO200xgbG/Mft2/fvrbGvXv3bm6++ea1H4AGSDxZCRIkSJAgQYI1IZfLsXv3bgDe9ra38dGPfpTf//3fB+DMM8/kpptu4k/+5E8A+MIXvsBFF13kP/c973kP733ve3n9618PwIMPPujf98IXvpCbbrqp4fveeeedAHz6059m165d/P3f/31H4969eze7du3ip3/6pzt6XrtIlKwEXQvXldz+5BRSyuM9lAQJEiRI0Cae97zncejQIf//XC7HBRdc4Hdy+fznP8+b3vQm//4jR46wfft2//9LLrlkTe+/Z88eXvnKV3LFFVfwwhe+kMceewxQ5O7iiy/m0ksv5UUvehGVSoU/+IM/4POf/zyXXXYZn//859f0vnFIlKwEXYs79s7wy5+6i5vf80Iu3Np/vIeTIEGCBAlawHEcbr31Vn7t134tcvtb3vIWbrjhBjZv3oxpmmzdupXDhw8D8N73vpeXvexlXHPNNVx77bW8/e1vZ3BwEIDbb7+dyy67zH+dL33pS5x11llNx3D99dfz8Y9/nHPOOYc777yTd73rXXz3u9/lj//4j/nmN7/Jtm3bmJ+fJ51O88d//MerUsDaRUKyEnQtVioOAIWKfZxHkiBBggQnBv7oaw/zyOHFdX3NC7f284evvajpY4rFou+FuuK
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Truck Avg: 16.23\n",
"Mustang Avg: 20.68\n",
"Grom Avg: 36.07\n",
"Burb Avg: 15.86\n",
"Corolla Avg: 21.06\n",
"Steve Avg: 16.37\n"
]
}
],
"source": [
"X = pd.read_csv('data/X.csv')\n",
"y = pd.read_csv('data/y.csv').mpg\n",
"\n",
"eff = 1\n",
"\n",
"v6_s197_05_hp = 216\n",
"v6_s197_05_ci = 245\n",
"v6_s197_05_cl = 6\n",
"v6_s197_05_weight = 3300\n",
"v6_s197_05_eff = (v6_s197_05_hp/\\\n",
" v6_s197_05_ci)*eff\n",
"v6_s197_05 = {'horsepower':v6_s197_05_hp,\n",
" 'bore_size':v6_s197_05_ci/v6_s197_05_cl,\n",
" 'grunt':(v6_s197_05_ci/v6_s197_05_cl)/v6_s197_05_eff,\n",
" 'load':v6_s197_05_ci/v6_s197_05_weight}\n",
"mustang_hp = 400\n",
"mustang_ci = 302\n",
"mustang_cl = 8\n",
"mustang_weight = 3600\n",
"mustang_eff = (mustang_hp/\\\n",
" mustang_ci)*eff\n",
"mustang = {'horsepower':mustang_hp,\n",
" 'bore_size':mustang_ci/mustang_cl,\n",
" 'grunt':(mustang_ci/mustang_cl)/mustang_eff,\n",
" 'load':mustang_ci/mustang_weight}\n",
"corolla_hp = 140\n",
"corolla_ci = 110\n",
"corolla_cl = 4\n",
"corolla_weight = 2800\n",
"corolla_eff = (corolla_hp/\\\n",
" corolla_ci)*eff\n",
"corolla = {'horsepower':corolla_hp,\n",
" 'bore_size':corolla_ci/corolla_cl,\n",
" 'grunt':(corolla_ci/corolla_cl)/corolla_eff,\n",
" 'load':corolla_ci/corolla_weight}\n",
"truck_hp = 500\n",
"truck_ci = 359\n",
"truck_cl = 6\n",
"truck_weight = 6500\n",
"truck_eff = (truck_hp/\\\n",
" truck_ci)*eff\n",
"truck = {'horsepower':truck_hp,\n",
" 'bore_size':truck_ci/truck_cl,\n",
" 'grunt':(truck_ci/truck_cl)/truck_eff,\n",
" 'load':truck_ci/truck_weight}\n",
"grom_hp = 12\n",
"grom_ci = 7.6\n",
"grom_cl = 1\n",
"grom_weight = 400\n",
"grom_eff = (grom_hp/\\\n",
" grom_ci)*eff\n",
"grom = {'horsepower':grom_hp,\n",
" 'bore_size':grom_ci/grom_cl,\n",
" 'grunt':(grom_ci/grom_cl)/grom_eff,\n",
" 'load':grom_ci/grom_weight}\n",
"burb_hp = 320\n",
"burb_ci = 325\n",
"burb_cl = 8\n",
"burb_weight = 6000\n",
"burb_eff = (burb_hp/\\\n",
" burb_ci)*eff\n",
"burb = {'horsepower':burb_hp,\n",
" 'bore_size':burb_ci/burb_cl,\n",
" 'grunt':(burb_ci/burb_cl)/burb_eff,\n",
" 'load':burb_ci/burb_weight}\n",
"\n",
"mdf = pd.DataFrame(mustang,index=[0])\n",
"cdf = pd.DataFrame(corolla,index=[0])\n",
"tdf = pd.DataFrame(truck,index=[0])\n",
"gdf = pd.DataFrame(grom,index=[0])\n",
"bdf = pd.DataFrame(burb,index=[0])\n",
"sm5 = pd.DataFrame(v6_s197_05,index=[0])\n",
"\n",
"mustang_predicts = []\n",
"corolla_predicts = []\n",
"truck_predicts = []\n",
"grom_predicts = []\n",
"burb_predicts = []\n",
"v6_s197_05_predicts = []\n",
"\n",
"r2_test_list = []\n",
"r2_train_list = []\n",
"rmse_test_list = []\n",
"rmse_train_list = []\n",
"\n",
"for i in range(201):\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y)\n",
"\n",
" pipe = Pipeline([\n",
" # ('minmax', MinMaxScaler()),\n",
" # ('ss', StandardScaler()),\n",
" ('qt', QuantileTransformer(n_quantiles=297)),\n",
" # ('rob', RobustScaler()),\n",
" \n",
" ('linreg', LinearRegression()),\n",
" # ('lasso', Lasso()),\n",
" # ('lassocv', LassoCV()),\n",
" # ('ridge', Ridge()),\n",
" # ('ridgeCV', RidgeCV),\n",
" # ('lsvr', LinearSVR())\n",
" ])\n",
"\n",
" model = pipe.fit(X_train,y_train)\n",
" test_predict = model.predict(X_test)\n",
" train_predict = model.predict(X_train)\n",
"\n",
" r2_test = r2_score(y_test, test_predict)\n",
" r2_train = r2_score(y_train, train_predict)\n",
" rmse_test = mean_squared_error(y_test, test_predict ,squared=False)\n",
" rmse_train = mean_squared_error(y_train, train_predict ,squared=False)\n",
"\n",
" r2_test_list.append(r2_test)\n",
" r2_train_list.append(r2_train)\n",
" rmse_test_list.append(rmse_test)\n",
" rmse_train_list.append(rmse_train)\n",
" truck_predicts.append(model.predict(tdf)[0])\n",
" mustang_predicts.append(model.predict(mdf)[0])\n",
" grom_predicts.append(model.predict(gdf)[0])\n",
" burb_predicts.append(model.predict(bdf)[0])\n",
" corolla_predicts.append(model.predict(cdf)[0])\n",
" v6_s197_05_predicts.append(model.predict(sm5)[0])\n",
"\n",
"plt.subplots(figsize=(10,6))\n",
"plt.title('R-squared over 200 iterations')\n",
"plt.plot(r2_test_list,label='R2 Test')\n",
"plt.plot(r2_train_list,label='R2 Train')\n",
"plt.legend()\n",
"plt.show();\n",
"\n",
"avg = np.mean\n",
"print(f'''| R2 Test | R2 Train | RMSE Test | RMSE Train |\n",
"|-----------|----------|-------------|------------|\n",
"| Min: {min(r2_test_list):.2f} | Min:{min(r2_train_list):.2f} | Min: {min(rmse_test_list):.2f} | Min:{min(rmse_train_list):.2f} |\n",
"| Avg: {avg(r2_test_list):.2f} | Avg:{avg(r2_train_list):.2f} | Avg: {avg(rmse_test_list):.2f} | Avg:{avg(rmse_train_list):.2f} |\n",
"| Max: {max(r2_test_list):.2f} | Max:{max(r2_train_list):.2f} | Max: {max(rmse_test_list):.2f} | Max:{max(rmse_train_list):.2f} |\n",
"''')\n",
"plt.subplots(figsize=(10,5))\n",
"plt.title('RMSE over 200 iterations')\n",
"plt.plot(rmse_test_list,label='RMSE Test')\n",
"plt.plot(rmse_train_list,label='RMSE Train')\n",
"plt.legend()\n",
"plt.show();\n",
"\n",
"print(f'Truck Avg: {avg(truck_predicts):.2f}')\n",
"print(f'Mustang Avg: {avg(mustang_predicts):.2f}')\n",
"print(f'Grom Avg: {avg(grom_predicts):.2f}')\n",
"print(f'Burb Avg: {avg(burb_predicts):.2f}')\n",
"print(f'Corolla Avg: {avg(corolla_predicts):.2f}')\n",
"print(f'Steve Avg: {avg(v6_s197_05_predicts):.2f}')"
]
},
{
"cell_type": "markdown",
"id": "67f5823f-4d25-4e75-95a0-b11249d861e8",
"metadata": {},
"source": [
"After testing lots of models, scalers, and experimenting with different mixtures of features, I've come to the conclusion that it's futile.\n",
"\n",
"It's cliché, but at under 400 rows I think I can complain about not having enough data.\n",
"\n",
"QuantileTransformer is the heavy lifter and the model really doesn't seem to matter so long as it's linear. In fact, the scores go down as the models get fancier. I think because it's overthinking such a small data set, it's making connections that are coincidental.\n",
"\n",
"The best metric I found was to make predictions on my own vehicles, in other words it's unseen data that I could just pull from nothing. I had used model year as a feature and it seemed to perform quite well until I introduced a 2012, it predicted MPG into the hundreds. I think model year would be a great feature if the data spanned across decades and there was good representation. It could probably even be made categorical as it would be an indicator of tech advancing over decades. All of the vehicles here are more or less in the same \"tech era\" but the model did seem to find a signal. At any rate it's too unstable if used with data outside the training set.\n",
"\n",
"My personal vehicle predictions are actually quite close. They're a bit on the lower end, but also consider that neither one is even close to being represented in the training data. My truck is a turbo diesel (boost to efficiency) and my car is gas but has some fancy cam phasing and electronic fuel injection. Basically they have higher efficiency compared to anything in the training set."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}