commit d0bbee40e2290f6a04b838867ce708c7482649c7 Author: Aditya <109960864+KondeAditya@users.noreply.github.com> Date: Thu Oct 30 23:08:07 2025 +0530 Add files via upload diff --git a/1_Uber.ipynb b/1_Uber.ipynb new file mode 100644 index 0000000..552bd0b --- /dev/null +++ b/1_Uber.ipynb @@ -0,0 +1,1252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#import libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import warnings \n", + "#We do not want to see warnings\n", + "warnings.filterwarnings(\"ignore\") " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#import data\n", + "data = pd.read_csv(\"uber.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Create a data copy\n", + "df = data.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Print data\n", + "df.head" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 200000 entries, 0 to 199999\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Unnamed: 0 200000 non-null int64 \n", + " 1 key 200000 non-null object \n", + " 2 fare_amount 200000 non-null float64\n", + " 3 pickup_datetime 200000 non-null object \n", + " 4 pickup_longitude 200000 non-null float64\n", + " 5 pickup_latitude 200000 non-null float64\n", + " 6 dropoff_longitude 199999 non-null float64\n", + " 7 dropoff_latitude 199999 non-null float64\n", + " 8 passenger_count 200000 non-null int64 \n", + "dtypes: float64(5), int64(2), object(2)\n", + "memory usage: 13.7+ MB\n" + ] + } + ], + "source": [ + "#Get Info\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 2015-05-07 19:52:06+00:00\n", + "1 2009-07-17 20:04:56+00:00\n", + "2 2009-08-24 21:45:00+00:00\n", + "3 2009-06-26 08:22:21+00:00\n", + "4 2014-08-28 17:47:00+00:00\n", + " ... \n", + "199995 2012-10-28 10:49:00+00:00\n", + "199996 2014-03-14 01:09:00+00:00\n", + "199997 2009-06-29 00:42:00+00:00\n", + "199998 2015-05-20 14:56:25+00:00\n", + "199999 2010-05-15 04:08:00+00:00\n", + "Name: pickup_datetime, Length: 200000, dtype: datetime64[ns, UTC]\n" + ] + } + ], + "source": [ + "#pickup_datetime is not in required data format\n", + "df[\"pickup_datetime\"] = pd.to_datetime(df[\"pickup_datetime\"])\n", + "print(df[\"pickup_datetime\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 200000 entries, 0 to 199999\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Unnamed: 0 200000 non-null int64 \n", + " 1 key 200000 non-null object \n", + " 2 fare_amount 200000 non-null float64 \n", + " 3 pickup_datetime 200000 non-null datetime64[ns, UTC]\n", + " 4 pickup_longitude 200000 non-null float64 \n", + " 5 pickup_latitude 200000 non-null float64 \n", + " 6 dropoff_longitude 199999 non-null float64 \n", + " 7 dropoff_latitude 199999 non-null float64 \n", + " 8 passenger_count 200000 non-null int64 \n", + "dtypes: datetime64[ns, UTC](1), float64(5), int64(2), object(1)\n", + "memory usage: 13.7+ MB\n" + ] + } + ], + "source": [ + "df.info()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0fare_amountpickup_longitudepickup_latitudedropoff_longitudedropoff_latitudepassenger_count
count2.000000e+05200000.000000200000.000000200000.000000199999.000000199999.000000200000.000000
mean2.771250e+0711.359955-72.52763839.935885-72.52529239.9238901.684535
std1.601382e+079.90177611.4377877.72053913.1174086.7948291.385997
min1.000000e+00-52.000000-1340.648410-74.015515-3356.666300-881.9855130.000000
25%1.382535e+076.000000-73.99206540.734796-73.99140740.7338231.000000
50%2.774550e+078.500000-73.98182340.752592-73.98009340.7530421.000000
75%4.155530e+0712.500000-73.96715440.767158-73.96365840.7680012.000000
max5.542357e+07499.00000057.4184571644.4214821153.572603872.697628208.000000
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 fare_amount pickup_longitude pickup_latitude \\\n", + "count 2.000000e+05 200000.000000 200000.000000 200000.000000 \n", + "mean 2.771250e+07 11.359955 -72.527638 39.935885 \n", + "std 1.601382e+07 9.901776 11.437787 7.720539 \n", + "min 1.000000e+00 -52.000000 -1340.648410 -74.015515 \n", + "25% 1.382535e+07 6.000000 -73.992065 40.734796 \n", + "50% 2.774550e+07 8.500000 -73.981823 40.752592 \n", + "75% 4.155530e+07 12.500000 -73.967154 40.767158 \n", + "max 5.542357e+07 499.000000 57.418457 1644.421482 \n", + "\n", + " dropoff_longitude dropoff_latitude passenger_count \n", + "count 199999.000000 199999.000000 200000.000000 \n", + "mean -72.525292 39.923890 1.684535 \n", + "std 13.117408 6.794829 1.385997 \n", + "min -3356.666300 -881.985513 0.000000 \n", + "25% -73.991407 40.733823 1.000000 \n", + "50% -73.980093 40.753042 1.000000 \n", + "75% -73.963658 40.768001 2.000000 \n", + "max 1153.572603 872.697628 208.000000 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Statistics of data\n", + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Unnamed: 0 0\n", + "key 0\n", + "fare_amount 0\n", + "pickup_datetime 0\n", + "pickup_longitude 0\n", + "pickup_latitude 0\n", + "dropoff_longitude 1\n", + "dropoff_latitude 1\n", + "passenger_count 0\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Number of missing values\n", + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Unnamed: 0 fare_amount pickup_longitude pickup_latitude \\\n", + "Unnamed: 0 1.000000 0.000589 0.000230 -0.000341 \n", + "fare_amount 0.000589 1.000000 0.010457 -0.008481 \n", + "pickup_longitude 0.000230 0.010457 1.000000 -0.816461 \n", + "pickup_latitude -0.000341 -0.008481 -0.816461 1.000000 \n", + "dropoff_longitude 0.000270 0.008986 0.833026 -0.774787 \n", + "dropoff_latitude 0.000271 -0.011014 -0.846324 0.702367 \n", + "passenger_count 0.002257 0.010150 -0.000414 -0.001560 \n", + "\n", + " dropoff_longitude dropoff_latitude passenger_count \n", + "Unnamed: 0 0.000270 0.000271 0.002257 \n", + "fare_amount 0.008986 -0.011014 0.010150 \n", + "pickup_longitude 0.833026 -0.846324 -0.000414 \n", + "pickup_latitude -0.774787 0.702367 -0.001560 \n", + "dropoff_longitude 1.000000 -0.917010 0.000033 \n", + "dropoff_latitude -0.917010 1.000000 -0.000659 \n", + "passenger_count 0.000033 -0.000659 1.000000 \n" + ] + } + ], + "source": [ + "#Correlation\n", + "cor=df.select_dtypes(include=['number']).corr()\n", + "print(cor)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Drop the rows with missing values\n", + "df.dropna(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'whiskers': [,\n", + " ],\n", + " 'caps': [,\n", + " ],\n", + " 'boxes': [],\n", + " 'medians': [],\n", + " 'fliers': [],\n", + " 'means': []}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAApqElEQVR4nO3df3TU1Z3/8VcyISEJmUFifi4JxBPdgASsaGHQ8RBlTTGyzEnS0y6I7B63btnAWQEphnVp6+4hPRjEdYtS9+wW9gh1S04MX1Kgy6ECWRlR43IOwUKjJCY2mUChzISYH2Rmvn94ZspIagkEPp/MPB/nzJF87nsy7/lD5sWd+7k3JhAIBAQAAGAisUY3AAAA8GUEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDpxRjdwPfx+vzo6OpSSkqKYmBij2wEAANcgEAiou7tb2dnZio396jmSURlQOjo6lJOTY3QbAADgOrS3t2vixIlfWTMqA0pKSoqkL96g1Wo1uBsAAHAtvF6vcnJyQp/jX2VUBpTg1zpWq5WAAgDAKHMtyzNYJAsAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAExnVG7UBiAy+Xw+NTQ0qLOzU1lZWXI4HLJYLEa3BcAAw5pB+cEPfqCYmJiwR0FBQWi8r69PFRUVSk1N1bhx41RWVqaurq6w39HW1qaSkhIlJSUpPT1da9as0eDg4Mi8GwCjVm1trfLz81VUVKRFixapqKhI+fn5qq2tNbo1AAYY9lc8d999tzo7O0OP//3f/w2NrVy5Unv27NGuXbt0+PBhdXR0qLS0NDTu8/lUUlKigYEBHT16VNu3b9e2bdu0fv36kXk3AEal2tpalZeXq7CwUC6XS93d3XK5XCosLFR5eTkhBYhCMYFAIHCtxT/4wQ9UV1en48ePXzXm8XiUlpamnTt3qry8XJJ06tQpTZkyRS6XS7Nnz9a+ffv0+OOPq6OjQxkZGZKkrVu3au3atTp37pzi4+OvqQ+v1yubzSaPx8NZPMAo5/P5lJ+fr8LCQtXV1YUdwe73++V0OtXU1KTm5ma+7gFGueF8fg97BqW5uVnZ2dm64447tHjxYrW1tUmSGhsbdfnyZc2bNy9UW1BQoNzcXLlcLkkK/YsoGE4kqbi4WF6vVydPnvyjr9nf3y+v1xv2ABAZGhoa1NraqnXr1oWFE0mKjY1VZWWlWlpa1NDQYFCHAIwwrIAya9Ysbdu2Tfv379drr72mlpYWORwOdXd3y+12Kz4+XuPHjw97TkZGhtxutyTJ7XaHhZPgeHDsj6mqqpLNZgs9cnJyhtM2ABPr7OyUJE2bNm3I8eD1YB2A6DCsu3jmz58f+vP06dM1a9YsTZo0ST//+c+VmJg44s0FVVZWatWqVaGfvV4vIQWIEFlZWZKkpqYmzZ49+6rxpqamsDoA0eGG9kEZP3687rrrLn388cfKzMzUwMCALl68GFbT1dWlzMxMSVJmZuZVd/UEfw7WDCUhIUFWqzXsASAyOBwOTZ48WRs2bJDf7w8b8/v9qqqqUl5enhwOh0EdAjDCDQWUS5cu6ZNPPlFWVpZmzpypMWPG6ODBg6Hx06dPq62tTXa7XZJkt9t14sQJnT17NlRz4MABWa1WTZ069UZaATBKWSwWbdq0SfX19XI6nWF38TidTtXX16u6upoFskCUGdZdPM8++6wWLFigSZMmqaOjQ9///vd1/PhxffTRR0pLS9OyZcu0d+9ebdu2TVarVStWrJAkHT16VNIXq/XvueceZWdna+PGjXK73VqyZIn+9m//Vhs2bLjmprmLB4g8tbW1Wr16tVpbW0PX8vLyVF1dHbZdAYDRazif38Nag/LZZ5/pr/7qr3T+/HmlpaXpwQcf1Lvvvqu0tDRJ0ubNmxUbG6uysjL19/eruLhYr776auj5FotF9fX1WrZsmex2u5KTk7V06VK98MIL1/E2AUSS0tJSLVy4kJ1kAUga5gyKWTCDAgDA6HNT90EBAAC42QgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdG4ooPzoRz9STEyMnnnmmdC1vr4+VVRUKDU1VePGjVNZWZm6urrCntfW1qaSkhIlJSUpPT1da9as0eDg4I20AgAAIsh1B5T3339fP/nJTzR9+vSw6ytXrtSePXu0a9cuHT58WB0dHSotLQ2N+3w+lZSUaGBgQEePHtX27du1bds2rV+//vrfBQAAiCjXFVAuXbqkxYsX69///d912223ha57PB79x3/8h1566SU9/PDDmjlzpn7605/q6NGjevfddyVJ//M//6OPPvpIb7zxhu655x7Nnz9f//zP/6wtW7ZoYGBgZN4VAAAY1a4roFRUVKikpETz5s0Lu97Y2KjLly+HXS8oKFBubq5cLpckyeVyqbCwUBkZGaGa4uJieb1enTx5csjX6+/vl9frDXsAAIDIFTfcJ7z55pv68MMP9f7771815na7FR8fr/Hjx4ddz8jIkNvtDtVcGU6C48GxoVRVVemHP/zhcFsFAACj1LBmUNrb2/UP//AP2rFjh8aOHXuzerpKZWWlPB5P6NHe3n7LXhsAANx6wwoojY2NOnv2rO69917FxcUpLi5Ohw8f1iuvvKK4uDhlZGRoYGBAFy9eDHteV1eXMjMzJUmZmZlX3dUT/DlY82UJCQmyWq1hDwAAELmGFVAeeeQRnThxQsePHw897rvvPi1evDj05zFjxujgwYOh55w+fVptbW2y2+2SJLvdrhMnTujs2bOhmgMHDshqtWrq1Kkj9LYAAMBoNqw1KCkpKZo2bVrYteTkZKWmpoauP/XUU1q1apUmTJggq9WqFStWyG63a/bs2ZKkRx99VFOnTtWSJUu0ceNGud1uPf/886qoqFBCQsIIvS0AADCaDXuR7J+yefNmxcbGqqysTP39/SouLtarr74aGrdYLKqvr9eyZctkt9uVnJyspUuX6oUXXhjpVgAAwCgVEwgEAkY3MVxer1c2m00ej4f1KAAAjBLD+fzmLB4AAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6cUY3AABBPp9PDQ0N6uzsVFZWlhwOhywWi9FtATAAMygATKG2tlb5+fkqKirSokWLVFRUpPz8fNXW1hrdGgADEFAAGK62tlbl5eUqLCyUy+VSd3e3XC6XCgsLVV5eTkgBolBMIBAIGN3EcHm9XtlsNnk8HlmtVqPbAXADfD6f8vPzVVhYqLq6OsXG/uHfTX6/X06nU01NTWpububrHmCUG87nNzMoAAzV0NCg1tZWrVu3LiycSFJsbKwqKyvV0tKihoYGgzoEYAQCCgBDdXZ2SpKmTZs25HjwerAOQHQgoAAwVFZWliSpqalpyPHg9WAdgOhAQAFgKIfDocmTJ2vDhg3y+/1hY36/X1VVVcrLy5PD4TCoQwBGIKAAMJTFYtGmTZtUX18vp9MZdheP0+lUfX29qqurWSALRBk2agNguNLSUtXU1Gj16tWaM2dO6HpeXp5qampUWlpqYHcAjMBtxgBMg51kgcg2nM9vZlAAmIbFYtHcuXONbgOACbAGBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmM6wAsprr72m6dOny2q1ymq1ym63a9++faHxvr4+VVRUKDU1VePGjVNZWZm6urrCfkdbW5tKSkqUlJSk9PR0rVmzRoODgyPzbgAAQEQYVkCZOHGifvSjH6mxsVEffPCBHn74YS1cuFAnT56UJK1cuVJ79uzRrl27dPjwYXV0dKi0tDT0fJ/Pp5KSEg0MDOjo0aPavn27tm3bpvXr14/suwIAAKNaTCAQCNzIL5gwYYJefPFFlZeXKy0tTTt37lR5ebkk6dSpU5oyZYpcLpdmz56tffv26fHHH1dHR4cyMjIkSVu3btXatWt17tw5xcfHX9Nrer1e2Ww2eTweWa3WG2kfAADcIsP5/L7uNSg+n09vvvmmenp6ZLfb1djYqMuXL2vevHmhmoKCAuXm5srlckmSXC6XCgsLQ+FEkoqLi+X1ekOzMEPp7++X1+sNewAAgMg17IBy4sQJjRs3TgkJCfrud7+rt956S1OnTpXb7VZ8fLzGjx8fVp+RkSG32y1JcrvdYeEkOB4c+2Oqqqpks9lCj5ycnOG2DQAARpFhB5Q///M/1/Hjx3Xs2DEtW7ZMS5cu1UcffXQzeguprKyUx+MJPdrb22/q6wEAAGPFDfcJ8fHxys/PlyTNnDlT77//vv71X/9V3/rWtzQwMKCLFy+GzaJ0dXUpMzNTkpSZman33nsv7PcF7/IJ1gwlISFBCQkJw20VAACMUje8D4rf71d/f79mzpypMWPG6ODBg6Gx06dPq62tTXa7XZJkt9t14sQJnT17NlRz4MABWa1WTZ069UZbAQAAEWJYMyiVlZWaP3++cnNz1d3drZ07d+rQoUP65S9/KZvNpqeeekqrVq3ShAkTZLVatWLFCtntds2ePVuS9Oijj2rq1KlasmSJNm7cKLfbreeff14VFRXMkAAAgJBhBZSzZ8/qySefVGdnp2w2m6ZPn65f/vKX+ou/+AtJ0ubNmxUbG6uysjL19/eruLhYr776auj5FotF9fX1WrZsmex2u5KTk7V06VK98MILI/uuAADAqHbD+6AYgX1QAAAYfW7JPigAAAA3CwEFAACYDgEFAACYzrD3QQGAm8Xn86mhoUGdnZ3KysqSw+GQxWIxui0ABmAGBYAp1NbWKj8/X0VFRVq0aJGKioqUn5+v2tpao1sDYAACCgDD1dbWqry8XIWFhXK5XOru7g4dLlpeXk5IAaIQtxkDMJTP51N+fr4KCwtVV1en2Ng//LvJ7/fL6XSqqalJzc3NfN0DjHLcZgxg1GhoaFBra6vWrVsXFk4kKTY2VpWVlWppaVFDQ4NBHQIwAgEFgKE6OzslSdOmTRtyPHg9WAcgOhBQABgqKytLktTU1DTkePB6sA5AdCCgADCUw+HQ5MmTtWHDBvn9/rAxv9+vqqoq5eXlyeFwGNQhACMQUAAYymKxaNOmTaqvr5fT6Qy7i8fpdKq+vl7V1dUskAWiDBu1ATBcaWmpampqtHr1as2ZMyd0PS8vTzU1NSotLTWwOwBG4DZjAKbBTrJAZBvO5zczKABMw2KxaO7cuUa3AcAEWIMCAABMh4ACAABMh4ACAABMhzUoAEyDRbIAgphBAWAKtbW1ys/PV1FRkRYtWqSioiLl5+dzkjEQpQgoAAxXW1ur8vJyFRYWhm3UVlhYqPLyckIKEIXYBwWAoXw+n/Lz81VYWKi6urqwE439fr+cTqeamprU3NzM1z3AKDecz29mUAAYqqGhQa2trVq3bl1YOJGk2NhYVVZWqqWlRQ0NDQZ1CMAIBBQAhurs7JQkTZs2bcjx4PVgHYDoQEABYKisrCxJUlNT05DjwevBOgDRgYACwFAOh0OTJ0/Whg0b5Pf7w8b8fr+qqqqUl5cnh8NhUIcAjEBAAWAoi8WiTZs2qb6+Xk6nM+wuHqfTqfr6elVXV7NAFogybNQGwHClpaWqqanR6tWrNWfOnND1vLw81dTUqLS01MDuABiB24wBmAY7yQKRbTif38ygADANi8WiuXPnGt0GABNgDQoAADAdZlAAmAZf8QAIYgYFgClwWCCAKxFQABiOwwIBfBl38QAwFIcFAtGDwwIBjBocFghgKAQUAIbisEAAQyGgADDUlYcF+nw+HTp0SD/72c906NAh+Xw+DgsEohRrUAAYKrgG5fbbb9e5c+f06aefhsYmTZqktLQ0nT9/njUoQARgDQqAUcNiseib3/ymPvjgA/X19en1119XR0eHXn/9dfX19emDDz5QeXk54QSIMsygADDUlTMov/vd79Ta2hoay8vLU2pqKjMoQITgLB4Ao0bwLp6f/exnuv/++6/aSfa9997TnDlz1NDQwDk9QBQhoAAw1JV38Qx1WCB38QDRiTUoAAx15V08Q+EuHiA6EVAAGMrhcGjy5MnasGGD/H5/2Jjf71dVVZXy8vLkcDgM6hCAEfiKB4ChLBaLNm3apPLyci1cuFDf+MY3lJiYqN7eXu3fv1+/+MUvVFNTwwJZIMpwFw8AU/je976nzZs3a3BwMHQtLi5OK1eu1MaNGw3sDMBI4S4eAKNKbW2tqqurVVJSovnz54dmUPbt26fq6mrNnj1bpaWlRrcJ4BZiBgWAoTjNGIge7CQLYNS48jTjQCAQdhZPIBDgNGMgSg0roFRVVen+++9XSkqK0tPT5XQ6dfr06bCavr4+VVRUKDU1VePGjVNZWZm6urrCatra2lRSUqKkpCSlp6drzZo1Yd87A4gewf1NPvnkE+Xn56uoqEiLFi1SUVGR8vPzdebMmbA6ANFhWAHl8OHDqqio0LvvvqsDBw7o8uXLevTRR9XT0xOqWblypfbs2aNdu3bp8OHD6ujoCPvu2OfzqaSkRAMDAzp69Ki2b9+ubdu2af369SP3rgCMGsH9TZYsWaJp06Zpy5Yt+s///E9t2bJF06ZN05IlS8LqAESHG1qDcu7cOaWnp+vw4cN66KGH5PF4lJaWpp07d6q8vFySdOrUKU2ZMkUul0uzZ8/Wvn379Pjjj6ujo0MZGRmSpK1bt2rt2rU6d+6c4uPj/+TrsgYFiBwDAwNKTk5WcnKyxo8ff9VpxhcvXlRPT496enqu6e8HAOZ1y9ageDweSdKECRMkSY2Njbp8+bLmzZsXqikoKFBubq5cLpckyeVyqbCwMBROJKm4uFher1cnT54c8nX6+/vl9XrDHgAiw9GjRzU4OCiPx6O+vj6tXr1aW7Zs0erVq9XX1yePx6PBwUEdPXrU6FYB3ELXfZux3+/XM888owceeCB0Vobb7VZ8fLzGjx8fVpuRkSG32x2quTKcBMeDY0OpqqrSD3/4w+ttFYCJ/fa3v5X0xcnFra2t2rRpU2gsJiZGeXl5amlpCdUBiA7XPYNSUVGhpqYmvfnmmyPZz5AqKyvl8XhCj/b29pv+mgBujXPnzkmSWlpahhwPXg/WAYgO1zWDsnz5ctXX1+vIkSOaOHFi6HpmZqYGBgZ08eLFsFmUrq4uZWZmhmree++9sN8XvMsnWPNlCQkJSkhIuJ5WAZhcamrqiNYBiAzDmkEJBAJavny53nrrLf3qV79SXl5e2PjMmTM1ZswYHTx4MHTt9OnTamtrk91ulyTZ7XadOHFCZ8+eDdUcOHBAVqtVU6dOvZH3AmAUuvKr3bS0NL3++uvq6OjQ66+/rrS0tCHrAES+Yc2gVFRUaOfOndq9e7dSUlJCf2HYbDYlJibKZrPpqaee0qpVqzRhwgRZrVatWLFCdrtds2fPliQ9+uijmjp1qpYsWaKNGzfK7Xbr+eefV0VFBbMkQBT6v//7P0nS2LFjlZSUpKeffjo0lpeXp7Fjx6qvry9UByA6DCugvPbaa5KkuXPnhl3/6U9/qr/+67+WJG3evFmxsbEqKytTf3+/iouL9eqrr4ZqLRaL6uvrtWzZMtntdiUnJ2vp0qV64YUXbuydABiVgmvK+vv7dffdd+sv//Iv1dfXp7Fjx+qTTz5Ra2trWB2A6MBZPAAM9cQTT2jHjh1KTU0N3VIcFBcXJ5vNpvPnz2vx4sV64403DOwUwI3iNGMAo8aTTz6pHTt26Pz580pLS1NRUZGSk5PV09Ojt99+O3T3zpNPPmlwpwBuJWZQABhqYGBAY8eO1Vf9VRQTE6O+vj52kgVGOU4zBjBqHD169CvDifTFHYTsJAtEFwIKAEMFd4j92te+ppycnLCx3Nxcfe1rXwurAxAdCCgADBVcY/L3f//3amlp0dtvv62dO3fq7bff1pkzZ/Td7343rA5AdCCgADBUcDO22tpa+f3+sDG/36+6urqwOgDRgbt4ABjqz/7szyRJ+/fvl81mU29vb2gsMTFRfX19YXUAogMzKAAM5XA4lJaWpkAgEAojQX19fQoEAkpPT5fD4TCoQwBGYAYFgOEGBgYkSbfffruefPJJ3XHHHTpz5oz+67/+S+fOnVN/f7/BHQK41QgoAAx16NAheTweFRQUqKenR5s2bQqN5ebmqqCgQKdOndKhQ4f0yCOPGNgpgFuJr3gAGOrQoUOSpOnTp6ujoyNs7Le//a0KCwvD6gBEBwIKAFP4+c9/ftWGbYFAQLt27TKoIwBG4iseAIaaM2dO6M/FxcV6/PHHlZiYqN7eXtXX12vfvn1X1QGIfAQUAIY6efJk6M+HDh0KBRLpi9uMr6ybP3/+Le0NgHH4igeAod55553Qn798t07w7p4v1wGIfAQUAIZKTk6WJOXk5GjixIlhYxMnTgydzxOsAxAd+IoHgKFmzJihHTt26OLFizp37pxcLpc6OzuVlZUlu90e2uJ+xowZBncK4FZiBgWAobKzsyVJ3d3dys3N1Z49e3ThwgXt2bNHubm56u7uDqsDEB2YQQFgqCvP2Dl79qxeeumlP1kHIPIxgwLAUA6HQ+np6V9Zw1k8QPQhoAAw3IULF25oHEDkIaAAMNT+/fs1ODj4lTWDg4Pav3//LeoIgBnEBL68t/Qo4PV6ZbPZ5PF4ZLVajW4HwA2YOXOmPvzwQ0lSSUmJHnvssdBOsnv37tUvfvELSdK9996rxsZGI1sFcIOG8/nNDAoAQwUPCLzrrrtUU1OjgYEBffjhhxoYGFBNTY3y8/PD6gBEB+7iAWColJQUud1utbW1KSUlJezrnjVr1iguLi5UByB6MIMCwFALFy6UJPX19V21FmVwcFB9fX1hdQCiAwEFgKEefvjhEa0DEBkIKAAMdeVpxiNRByAyEFAAGOpaTynmNGMguhBQABgqeEpxamqqYmPD/0qKjY1VampqWB2A6EBAAWCoe+65R5J0/vx5+f3+sDG/36/z58+H1QGIDgQUAIbKyMgY0ToAkYGAAsBQXV1dI1oHIDIQUAAY6tixYyNaByAyEFAAGKqhoWFE6wBEBgIKAEP19PSMaB2AyMBZPABMY8KECbrtttvU29urxMRE/f73v9eFCxeMbguAAQgoAAyVkpKiS5cuSZIuXLjwRwMJhwUC0YWveAAYituMAQyFgALAUA899NCI1gGIDAQUAIZyuVwjWgcgMhBQABiqqalpROsARAYCCgBDBQKBEa0DEBkIKAAMNWnSpBGtAxAZCCgADDVlypQRrQMQGQgoAAzFIlkAQyGgADBUb2/viNYBiAwEFACGGhgYGNE6AJGBgALAULGx1/bX0LXWAYgM/B8PwFAxMTEjWgcgMgw7oBw5ckQLFixQdna2YmJiVFdXFzYeCAS0fv16ZWVlKTExUfPmzVNzc3NYzYULF7R48WJZrVaNHz9eTz31VOiwMADRhRkUAEMZ9v/xPT09mjFjhrZs2TLk+MaNG/XKK69o69atOnbsmJKTk1VcXKy+vr5QzeLFi3Xy5EkdOHBA9fX1OnLkiJ5++unrfxcARq3Lly+PaB2AyBATuIHtGWNiYvTWW2/J6XRK+mL2JDs7W6tXr9azzz4rSfJ4PMrIyNC2bdv07W9/W7/+9a81depUvf/++7rvvvskSfv379djjz2mzz77TNnZ2X/ydb1er2w2mzwej6xW6/W2D8AELBaL/H7/n6yLjY2Vz+e7BR0BuFmG8/k9onOmLS0tcrvdmjdvXuiazWbTrFmzQnsYuFwujR8/PhROJGnevHmKjY3VsWPHhvy9/f398nq9YQ8AkeFawslw6gBEhhENKG63W5KUkZERdj0jIyM05na7lZ6eHjYeFxenCRMmhGq+rKqqSjabLfTIyckZybYBAIDJjIpVZ5WVlfJ4PKFHe3u70S0BAICbaEQDSmZmpiSpq6sr7HpXV1doLDMzU2fPng0bHxwc1IULF0I1X5aQkCCr1Rr2AAAAkWtEA0peXp4yMzN18ODB0DWv16tjx47JbrdLkux2uy5evKjGxsZQza9+9Sv5/X7NmjVrJNsBAACjVNxwn3Dp0iV9/PHHoZ9bWlp0/PhxTZgwQbm5uXrmmWf0L//yL7rzzjuVl5enf/qnf1J2dnboTp8pU6boG9/4hr7zne9o69atunz5spYvX65vf/vb13QHDwAAiHzDDigffPCBioqKQj+vWrVKkrR06VJt27ZN3/ve99TT06Onn35aFy9e1IMPPqj9+/dr7Nixoefs2LFDy5cv1yOPPKLY2FiVlZXplVdeGYG3AwAAIsEN7YNiFPZBASLHcLawH4V/XQG4gmH7oAAAAIwEAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADCdOKMbADD6ff755zp16tRNf50PP/zwup5XUFCgpKSkEe4GwM1EQAFww06dOqWZM2fe9Ne53tdobGzUvffeO8LdALiZCCgAblhBQYEaGxuv67nDCR3X+xoFBQXX9TwAxiGgALhhSUlJt2SGglkQIHqwSBaAoQKBwIjWAYgMBBQAhvtT4YNwAkQfvuIBolxzc7O6u7uNbkONjY1DrkdpbGy87rt3RlJKSoruvPNOo9sAogYBBYhizc3Nuuuuu4xu4yvdiruDrtVvfvMbQgpwixBQgCgWnDl54403NGXKFIO7+UJvb69aW1s1efJkJSYmGt2OJOnXv/61nnjiCVPMNAHRgoACRLnMcTG6N8uiKZlmWZKWrAfy7ja6iTCJFy3KHBdjdBtAVCGgAFHs888/19/NjNeUI38nHTG6G/OaIunvZsYb3QYQVQwNKFu2bNGLL74ot9utGTNm6N/+7d/09a9/3ciWgKhy6tQp/aRxQP/v9GWjWzG9zksBLU5JMboNIGoYFlD++7//W6tWrdLWrVs1a9YsvfzyyyouLtbp06eVnp5uVFtAVHE6nZLMdVZNcL2HmdbFSNzFA9xqMQGDNhiYNWuW7r//fv34xz+WJPn9fuXk5GjFihV67rnnvvK5Xq9XNptNHo9HVqv1VrQL4Bbo7e3V0qVLtWvXLn3zm9/U9u3bTbNQFsCNG87ntyGr4gYGBtTY2Kh58+b9oZHYWM2bN08ul+uq+v7+fnm93rAHgMjidDqVlJSkXbt2SZJ27dqlpKSk0CwPgOhiSED53e9+J5/Pp4yMjLDrGRkZcrvdV9VXVVXJZrOFHjk5ObeqVQC3gNPp1O7du4cc2717NyEFiEKj4i6eyspKrVq1KvSz1+slpAAm8vnnn+vUqVPX9dze3t5QOLHZbHrggQe0d+9ePfbYY3rnnXfk8Xi0e/duvfPOO9f9dY+Z1tgAuDaGBJTbb79dFotFXV1dYde7urqUmZl5VX1CQoISEhJuVXsAhunUqVMjsuOrx+PR3r17JSn036AHH3zwun9vY2MjJyEDo4whASU+Pl4zZ87UwYMHQ1O3fr9fBw8e1PLly41oCcANKCgoUGNj43U9t7S0VJ9++qkk6aGHHtKiRYsUFxenwcFB7dy5U0eOfLFBy6RJk1RbW3vd/QEYXQz7imfVqlVaunSp7rvvPn3961/Xyy+/rJ6eHv3N3/yNUS0BuE5JSUnXPUMxbtw4SdLkyZP19ttvKzb2D0vjvvOd7+iOO+7Qp59+qnHjxjELAkQRwwLKt771LZ07d07r16+X2+3WPffco/3791+1cBZAZJszZ45Onjyp9vZ2DQ4OKj7+Dzu2Dg4O6rPPPgvVAYgehh6+sXz5cn366afq7+/XsWPHNGvWLCPbAWAAh8MhSfL5fEpJSdHatWv1m9/8RmvXrlVKSop8Pl9YHYDoMCru4gEQua68I29gYEAbN27Uxo0bv7IOQOQjoAAwlMPh0OTJk2WxWHTmzBldubl1TEyM7rjjDvn9fmZQgChDQAFgKIvFok2bNqm8vFzz589XYmKifv/73+u2225Tb2+v9u3bp5qaGlksFqNbBXALEVAAGK60tFTPPvusNm/erMHBwdD1uLg4PfvssyotLTWwOwBGIKAAMFxtba2qq6tVUlISmkUJzp5UV1dr9uzZhBQgyhh2mvGN4DRjIHL4fD7l5+ersLBQdXV1Yfug+P1+OZ1ONTU1qbm5ma95gFHO9KcZA0BQQ0ODWltbtW7durBwIn1xynllZaVaWlrU0NBgUIcAjEBAAWCozs5OSdK0adOGHA9eD9YBiA4EFACGysrKkiQ1NTUNOR68HqwDEB0IKAAMFdwHZcOGDfL7/WFjfr9fVVVVysvLYx8UIMoQUAAYKrgPSn19vZxOp1wul7q7u+VyueR0OlVfX6/q6moWyAJRhtuMARiutLRUNTU1Wr16ddihgHl5eaqpqeEWYyAKcZsxANPw+XxqaGhQZ2ensrKy5HA4mDkBIshwPr+ZQQFgGhaLRXPnzjW6DQAmwBoUAABgOgQUAABgOgQUAABgOgQUAABgOiySBWAa3MUDIIiAAsAUamtr9cwzz6i9vT10LScnRy+//DL7oABRiK94ABiutrZWZWVlYeFEktrb21VWVqba2lqDOgNgFAIKAEP5fD498cQTkqT4+Hg999xz+vjjj/Xcc88pPj5ekvTEE0/I5/MZ2SaAW4ydZAEYat++fXrsscc0ZswYXbp0KRRKJGlgYEDjxo3T5cuXtXfvXs2fP9/ATgHcqOF8fjODAsBQL730kiRpxYoVYeFE+mJGpaKiIqwOQHQgoAAw1MWLFyVJdrt9yPFZs2aF1QGIDgQUAIa67777JEnPPfec/H5/2Jjf79c//uM/htUBiA4EFACGCn5188knn2jBggVyuVzq7u6Wy+XSggULdObMmbA6ANGBfVAAGCoxMVELFy7U7t27tXfvXu3du/eqmoULFyoxMdGA7gAYhRkUAIarq6vTwoULhxxbuHCh6urqbm1DAAzHDAoAU6irq1Nvb6/WrFmj5uZm3XnnnXrxxReZOQGiFAEFgGkkJibqxz/+sdFtADABvuIBAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmMyp3kg0EApIkr9drcCcAAOBaBT+3g5/jX2VUBpTu7m5JUk5OjsGdAACA4eru7pbNZvvKmpjAtcQYk/H7/ero6FBKSopiYmKMbgfACPJ6vcrJyVF7e7usVqvR7QAYQYFAQN3d3crOzlZs7FevMhmVAQVA5PJ6vbLZbPJ4PAQUIIqxSBYAAJgOAQUAAJgOAQWAqSQkJOj73/++EhISjG4FgIFYgwIAAEyHGRQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQApnDkyBEtWLBA2dnZiomJUV1dndEtATAQAQWAKfT09GjGjBnasmWL0a0AMIFReVgggMgzf/58zZ8/3+g2AJgEMygAAMB0CCgAAMB0CCgAAMB0CCgAAMB0CCgAAMB0uIsHgClcunRJH3/8cejnlpYWHT9+XBMmTFBubq6BnQEwAqcZAzCFQ4cOqaio6KrrS5cu1bZt2259QwAMRUABAACmwxoUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOv8fgqVnJ7CqL2sAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.boxplot(df['fare_amount'])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Unnamed: 0 key fare_amount \\\n", + "0 24238194 2015-05-07 19:52:06.0000003 7.5 \n", + "1 27835199 2009-07-17 20:04:56.0000002 7.7 \n", + "2 44984355 2009-08-24 21:45:00.00000061 12.9 \n", + "3 25894730 2009-06-26 08:22:21.0000001 5.3 \n", + "4 17610152 2014-08-28 17:47:00.000000188 16.0 \n", + "... ... ... ... \n", + "199994 3189201 2014-01-31 14:42:00.000000181 12.0 \n", + "199996 16382965 2014-03-14 01:09:00.0000008 7.5 \n", + "199997 27804658 2009-06-29 00:42:00.00000078 30.9 \n", + "199998 20259894 2015-05-20 14:56:25.0000004 14.5 \n", + "199999 11951496 2010-05-15 04:08:00.00000076 14.1 \n", + "\n", + " pickup_datetime pickup_longitude pickup_latitude \\\n", + "0 2015-05-07 19:52:06+00:00 -73.999817 40.738354 \n", + "1 2009-07-17 20:04:56+00:00 -73.994355 40.728225 \n", + "2 2009-08-24 21:45:00+00:00 -74.005043 40.740770 \n", + "3 2009-06-26 08:22:21+00:00 -73.976124 40.790844 \n", + "4 2014-08-28 17:47:00+00:00 -73.925023 40.744085 \n", + "... ... ... ... \n", + "199994 2014-01-31 14:42:00+00:00 -73.983070 40.760770 \n", + "199996 2014-03-14 01:09:00+00:00 -73.984722 40.736837 \n", + "199997 2009-06-29 00:42:00+00:00 -73.986017 40.756487 \n", + "199998 2015-05-20 14:56:25+00:00 -73.997124 40.725452 \n", + "199999 2010-05-15 04:08:00+00:00 -73.984395 40.720077 \n", + "\n", + " dropoff_longitude dropoff_latitude passenger_count \n", + "0 -73.999512 40.723217 1 \n", + "1 -73.994710 40.750325 1 \n", + "2 -73.962565 40.772647 1 \n", + "3 -73.965316 40.803349 3 \n", + "4 -73.973082 40.761247 5 \n", + "... ... ... ... \n", + "199994 -73.972972 40.754177 1 \n", + "199996 -74.006672 40.739620 1 \n", + "199997 -73.858957 40.692588 2 \n", + "199998 -73.983215 40.695415 1 \n", + "199999 -73.985508 40.768793 1 \n", + "\n", + "[194911 rows x 9 columns]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'whiskers': [,\n", + " ],\n", + " 'caps': [,\n", + " ],\n", + " 'boxes': [],\n", + " 'medians': [],\n", + " 'fliers': [],\n", + " 'means': []}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWZElEQVR4nO3dcWjc9f348dfVtDG1uavpNDGYqGO6yya1NAwbdEO7bqUMv5ZG+M2fZd3oti8jLbNlbBS2uckgsvFbN6F1Y0i74Yqsf1jpYMrosGWsFY0WdGtqHZVmxEQY9C5t07Ss9/3Dr/czW7W95PK+JH084EO9z+dzn3tV8O7p5z53lymVSqUAAEhkTq0HAACuLOIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSqqv1AP/uwoULMTg4GI2NjZHJZGo9DgBwGUqlUoyMjERra2vMmfPh5zamXXwMDg5GW1tbrccAACZgYGAgbrzxxg/dZ9rFR2NjY0S8O3w2m63xNADA5SgWi9HW1lZ+Hf8w0y4+3nurJZvNig8AmGEu55IJF5wCAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AEmMjo7Ghg0bYuXKlbFhw4YYHR2t9UhAjWRKpVKp1kO8X7FYjFwuF4VCwTecwiyxevXqePbZZ/9j/f333x979uxJPxBQdZW8fjvzAUypDwqPiIhnn302Vq9enXYgoOac+QCmzOjoaMyfP/+S+505cyYaGhoSTARMFWc+gGlh/fr1425fe+21sWTJkrj22ms/dD9gdnPmA5gyl/Prlu+ZZk9FQIWc+QAApi3xAQAkJT6AKXPVVVdVdT9gdhAfwJSZM+fynmIudz9gdvBfPDBlzp8/X9X9gNlBfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACRVUXz84Ac/iEwmM27J5/Pl7WfPno2enp5YtGhRLFiwILq7u2N4eLjqQwMAM1fFZz4++clPxttvv11e/vznP5e3bdq0Kfbu3Ru7d++O/fv3x+DgYKxZs6aqAwMAM1tdxXeoq4uWlpb/WF8oFOLJJ5+MXbt2xfLlyyMiYseOHdHR0RGHDh2KZcuWTX5aAGDGq/jMx7Fjx6K1tTU++tGPxkMPPRQnTpyIiIi+vr44f/58rFixorxvPp+P9vb2OHjw4Aceb2xsLIrF4rgFAJi9KoqPO++8M3bu3BnPPfdcPPHEE3H8+PH49Kc/HSMjIzE0NBTz5s2LhQsXjrtPc3NzDA0NfeAxe3t7I5fLlZe2trYJ/UUAgJmhorddVq1aVf7nxYsXx5133hk33XRT/O53v4uGhoYJDbBly5bYvHlz+XaxWBQgADCLTeqjtgsXLozbbrst3nzzzWhpaYlz587FyZMnx+0zPDx80WtE3lNfXx/ZbHbcAgDMXpOKj1OnTsXf//73uOGGG6KzszPmzp0b+/btK28/evRonDhxIrq6uiY9KAAwO1T0tsu3vvWtuO++++Kmm26KwcHBeOSRR+Kqq66KBx98MHK5XKxfvz42b94cTU1Nkc1mY+PGjdHV1eWTLgBAWUXx8Y9//CMefPDB+Oc//xnXXXdd3H333XHo0KG47rrrIiJi69atMWfOnOju7o6xsbFYuXJlbN++fUoGBwBmpkypVCrVeoj3KxaLkcvlolAouP4DZrhMJnPZ+06zpyKgQpW8fvttFwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AIKlJxcdjjz0WmUwmHn744fK6s2fPRk9PTyxatCgWLFgQ3d3dMTw8PNk5AYBZYsLx8dJLL8Uvf/nLWLx48bj1mzZtir1798bu3btj//79MTg4GGvWrJn0oADA7DCh+Dh16lQ89NBD8atf/Squvfba8vpCoRBPPvlk/PSnP43ly5dHZ2dn7NixI/7yl7/EoUOHqjY0ADBzTSg+enp64gtf+EKsWLFi3Pq+vr44f/78uPX5fD7a29vj4MGDFz3W2NhYFIvFcQsAMHvVVXqHp59+Ol555ZV46aWX/mPb0NBQzJs3LxYuXDhufXNzcwwNDV30eL29vfHDH/6w0jEAgBmqojMfAwMD8c1vfjN++9vfxtVXX12VAbZs2RKFQqG8DAwMVOW4AMD0VFF89PX1xTvvvBNLly6Nurq6qKuri/3798fjjz8edXV10dzcHOfOnYuTJ0+Ou9/w8HC0tLRc9Jj19fWRzWbHLQDA7FXR2y6f/exn47XXXhu37itf+Urk8/n4zne+E21tbTF37tzYt29fdHd3R0TE0aNH48SJE9HV1VW9qQGAGaui+GhsbIzbb7993LprrrkmFi1aVF6/fv362Lx5czQ1NUU2m42NGzdGV1dXLFu2rHpTAwAzVsUXnF7K1q1bY86cOdHd3R1jY2OxcuXK2L59e7UfBgCYoTKlUqlU6yHer1gsRi6Xi0Kh4PoPmOEymcxl7zvNnoqAClXy+u23XQCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkVVF8PPHEE7F48eLIZrORzWajq6sr/vCHP5S3nz17Nnp6emLRokWxYMGC6O7ujuHh4aoPDQDMXBXFx4033hiPPfZY9PX1xcsvvxzLly+P+++/P/76179GRMSmTZti7969sXv37ti/f38MDg7GmjVrpmRwAGBmypRKpdJkDtDU1BQ/+clP4oEHHojrrrsudu3aFQ888EBERPT390dHR0ccPHgwli1bdlnHKxaLkcvlolAoRDabncxoQI1lMpnL3neST0VAjVXy+j3haz7+9a9/xdNPPx2nT5+Orq6u6Ovri/Pnz8eKFSvK++Tz+Whvb4+DBw9O9GEAgFmmrtI7vPbaa9HV1RVnz56NBQsWxDPPPBOf+MQn4vDhwzFv3rxYuHDhuP2bm5tjaGjoA483NjYWY2Nj5dvFYrHSkQCAGaTiMx8f//jH4/Dhw/Hiiy/GN77xjVi3bl387W9/m/AAvb29kcvlyktbW9uEjwUATH8Vx8e8efPiYx/7WHR2dkZvb2/ccccd8fOf/zxaWlri3LlzcfLkyXH7Dw8PR0tLywceb8uWLVEoFMrLwMBAxX8JAGDmmPT3fFy4cCHGxsais7Mz5s6dG/v27StvO3r0aJw4cSK6uro+8P719fXlj+6+twAAs1dF13xs2bIlVq1aFe3t7TEyMhK7du2KF154IZ5//vnI5XKxfv362Lx5czQ1NUU2m42NGzdGV1fXZX/SBQCY/SqKj3feeSe+9KUvxdtvvx25XC4WL14czz//fHzuc5+LiIitW7fGnDlzoru7O8bGxmLlypWxffv2KRkcAJiZJv09H9Xmez5g9vA9H3DlSPI9HwAAEyE+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEiqrtYDANPfmTNnor+/f0of45VXXpnQ/fL5fMyfP7/K0wBTSXwAl9Tf3x+dnZ1T+hgTPX5fX18sXbq0ytMAU0l8AJeUz+ejr6+v4vu9/vrrsW7dukvu9+tf/zpuv/32iYwW+Xx+QvcDaidTKpVKtR7i/YrFYuRyuSgUCpHNZms9DjBJmUzmkvtMs6chYAIqef12wSkwpS4VFsIDrjziA5hypVIpXn311XHrXn31VeEBVyjxASSxZMmS8nUjfX19sWTJktoOBNSM+AAAkhIfAEBS4gMASEp8AABJVRQfvb298alPfSoaGxvj+uuvj9WrV8fRo0fH7XP27Nno6emJRYsWxYIFC6K7uzuGh4erOjQAMHNVFB/79++Pnp6eOHToUPzxj3+M8+fPx+c///k4ffp0eZ9NmzbF3r17Y/fu3bF///4YHByMNWvWVH1wAGBmqujr1Z977rlxt3fu3BnXX3999PX1xWc+85koFArx5JNPxq5du2L58uUREbFjx47o6OiIQ4cOxbJly6o3OQAwI03qmo9CoRAREU1NTRHx7mf3z58/HytWrCjvk8/no729PQ4ePHjRY4yNjUWxWBy3AACz14Tj48KFC/Hwww/HXXfdVf5BqKGhoZg3b14sXLhw3L7Nzc0xNDR00eP09vZGLpcrL21tbRMdCQCYASYcHz09PfH666/H008/PakBtmzZEoVCobwMDAxM6ngAwPRW0TUf79mwYUP8/ve/jwMHDsSNN95YXt/S0hLnzp2LkydPjjv7MTw8HC0tLRc9Vn19fdTX109kDABgBqrozEepVIoNGzbEM888E3/605/illtuGbe9s7Mz5s6dG/v27SuvO3r0aJw4cSK6urqqMzEAMKNVdOajp6cndu3aFc8++2w0NjaWr+PI5XLR0NAQuVwu1q9fH5s3b46mpqbIZrOxcePG6Orq8kkXACAiKoyPJ554IiIi7rnnnnHrd+zYEV/+8pcjImLr1q0xZ86c6O7ujrGxsVi5cmVs3769KsMCADNfRfFRKpUuuc/VV18d27Zti23btk14KABg9vLbLgBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJ1dV6AGDqHDt2LEZGRmo9RtmRI0fG/TmdNDY2xq233lrrMeCKID5gljp27FjcdttttR7jotauXVvrES7qjTfeECCQgPiAWeq9Mx5PPfVUdHR01Hiad42OjsZbb70VN998czQ0NNR6nLIjR47E2rVrp9VZIpjNxAfMch0dHbF06dJaj1F211131XoEoMZccAoAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApCqOjwMHDsR9990Xra2tkclkYs+ePeO2l0ql+P73vx833HBDNDQ0xIoVK+LYsWPVmhcAmOEqjo/Tp0/HHXfcEdu2bbvo9h//+Mfx+OOPxy9+8Yt48cUX45prromVK1fG2bNnJz0sADDzVfzDcqtWrYpVq1ZddFupVIqf/exn8d3vfjfuv//+iIj4zW9+E83NzbFnz5744he/OLlpAYAZr6rXfBw/fjyGhoZixYoV5XW5XC7uvPPOOHjw4EXvMzY2FsVicdwCAMxeVY2PoaGhiIhobm4et765ubm87d/19vZGLpcrL21tbdUcCQCYZmr+aZctW7ZEoVAoLwMDA7UeCQCYQlWNj5aWloiIGB4eHrd+eHi4vO3f1dfXRzabHbcAALNXVePjlltuiZaWlti3b195XbFYjBdffDG6urqq+VAAwAxV8addTp06FW+++Wb59vHjx+Pw4cPR1NQU7e3t8fDDD8ePfvSjuPXWW+OWW26J733ve9Ha2hqrV6+u5twAwAxVcXy8/PLLce+995Zvb968OSIi1q1bFzt37oxvf/vbcfr06fj6178eJ0+ejLvvvjuee+65uPrqq6s3NQAwY1UcH/fcc0+USqUP3J7JZOLRRx+NRx99dFKDAQCzU80/7QIAXFnEBwCQVMVvuwAzR8uCTDScfCNi0P9nfJiGk29Ey4JMrceAK4b4gFnsvzvnRceB/444UOtJpreOePffFZCG+IBZ7Jd95+L/fH9ndOTztR5lWjvS3x+//H//N/6r1oPAFUJ8wCw2dKoUowtvi2hdUutRprXRoQsxdOqDP8UHVJc3ggGApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUn5YDmapM2fORETEK6+8UuNJ/r/R0dF466234uabb46GhoZaj1N25MiRWo8AVxTxAbNUf39/RER87Wtfq/EkM0djY2OtR4ArgviAWWr16tUREZHP52P+/Pm1HeZ/HTlyJNauXRtPPfVUdHR01HqccRobG+PWW2+t9RhwRRAfMEt95CMfia9+9au1HuOiOjo6YunSpbUeA6gRF5wCAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJKqq/UAwPR35syZ6O/vn/Rxjhw5Mu7Pasjn8zF//vyqHQ+YeuIDuKT+/v7o7Oys2vHWrl1btWP19fXF0qVLq3Y8YOqJD+CS8vl89PX1Tfo4o6Oj8dZbb8XNN98cDQ0NVZjs3dmAmSVTKpVKtR7i/YrFYuRyuSgUCpHNZms9DgBwGSp5/XbBKQCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEnV1XqAf/fej+wWi8UaTwIAXK73Xrffex3/MNMuPkZGRiIioq2trcaTAACVGhkZiVwu96H7ZEqXkygJXbhwIQYHB6OxsTEymUytxwGqqFgsRltbWwwMDEQ2m631OEAVlUqlGBkZidbW1pgz58Ov6ph28QHMXsViMXK5XBQKBfEBVzAXnAIASYkPACAp8QEkU19fH4888kjU19fXehSghlzzAQAk5cwHAJCU+AAAkhIfAEBS4gMASEp8AFPuwIEDcd9990Vra2tkMpnYs2dPrUcCakh8AFPu9OnTcccdd8S2bdtqPQowDUy7H5YDZp9Vq1bFqlWraj0GME048wEAJCU+AICkxAcAkJT4AACSEh8AQFI+7QJMuVOnTsWbb75Zvn38+PE4fPhwNDU1RXt7ew0nA2rBr9oCU+6FF16Ie++99z/Wr1u3Lnbu3Jl+IKCmxAcAkJRrPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUv8DNaUxyQSgSpIAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Remove Outliers\n", + "q_low = df[\"fare_amount\"].quantile(0.01)\n", + "q_hi = df[\"fare_amount\"].quantile(0.99)\n", + "\n", + "df = df[(df[\"fare_amount\"] < q_hi) & (df[\"fare_amount\"] > q_low)]\n", + "print(df)\n", + "plt.boxplot(df['fare_amount'])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Unnamed: 0 0\n", + "key 0\n", + "fare_amount 0\n", + "pickup_datetime 0\n", + "pickup_longitude 0\n", + "pickup_latitude 0\n", + "dropoff_longitude 0\n", + "dropoff_latitude 0\n", + "passenger_count 0\n", + "dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Check the missing values now\n", + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Time to apply learning models\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Take x as predictor variable\n", + "x = df.drop(\"fare_amount\", axis = 1)\n", + "#And y as target variable\n", + "y = df['fare_amount']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Necessary to apply model\n", + "x['pickup_datetime'] = pd.to_numeric(pd.to_datetime(x['pickup_datetime']))\n", + "x = x.loc[:, x.columns.str.contains('^Unnamed')]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lrmodel = LinearRegression()\n", + "lrmodel.fit(x_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Prediction\n", + "predict = lrmodel.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE error for the model is 8.063863046328835\n" + ] + } + ], + "source": [ + "#Check Error\n", + "from sklearn.metrics import mean_squared_error\n", + "lrmodelrmse = np.sqrt(mean_squared_error(predict, y_test))\n", + "print(\"RMSE error for the model is \", lrmodelrmse)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Let's Apply Random Forest Regressor\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "rfrmodel = RandomForestRegressor(n_estimators = 100, random_state = 101)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "#Fit the Forest\n", + "rfrmodel.fit(x_train, y_train)\n", + "rfrmodel_pred = rfrmodel.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE value for Random Forest is: 9.757713738069647\n" + ] + } + ], + "source": [ + "#Errors for the forest\n", + "rfrmodel_rmse = np.sqrt(mean_squared_error(rfrmodel_pred, y_test))\n", + "print(\"RMSE value for Random Forest is:\",rfrmodel_rmse)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.9 USD\n" + ] + } + ], + "source": [ + "X = df[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude']]\n", + "y = df['fare_amount']\n", + "rfrmodel.fit(X, y)\n", + "\n", + "new=[[-73.985,40.748,-73.985,40.758]]\n", + "pr=rfrmodel.predict(new)\n", + "print(round(pr[0],2),\"USD\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/2_Email.ipynb b/2_Email.ipynb new file mode 100644 index 0000000..3288a17 --- /dev/null +++ b/2_Email.ipynb @@ -0,0 +1,121 @@ +{ + "metadata": { + "kernelspec": { + "name": "python", + "display_name": "Python (Pyodide)", + "language": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8" + } + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "id": "1787aa40-6173-48cb-ac24-169a13a92b25", + "cell_type": "code", + "source": "import pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.metrics import accuracy_score, confusion_matrix, classification_report", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 1 + }, + { + "id": "e529d131-cb7a-4408-8624-96b481da9f94", + "cell_type": "code", + "source": "data = pd.read_csv(\"emails.csv\")\nprint(data.head())", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": " Email No. the to ect and for of a you hou ... connevey jay \\\n0 Email 1 0 0 1 0 0 0 2 0 0 ... 0 0 \n1 Email 2 8 13 24 6 6 2 102 1 27 ... 0 0 \n2 Email 3 0 0 1 0 0 0 8 0 0 ... 0 0 \n3 Email 4 0 5 22 0 5 1 51 2 10 ... 0 0 \n4 Email 5 7 6 17 1 5 2 57 0 9 ... 0 0 \n\n valued lay infrastructure military allowing ff dry Prediction \n0 0 0 0 0 0 0 0 0 \n1 0 0 0 0 0 1 0 0 \n2 0 0 0 0 0 0 0 0 \n3 0 0 0 0 0 0 0 0 \n4 0 0 0 0 0 1 0 0 \n\n[5 rows x 3002 columns]\n" + } + ], + "execution_count": 2 + }, + { + "id": "adcce56e-4742-4a1a-8ed6-82df3e5ad9c4", + "cell_type": "code", + "source": "X = data.drop(columns=['Email No.', 'Prediction'], errors='ignore') # features\ny = data['Prediction'] ", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "963f4ccd-9790-4d1f-8458-995bdbfc84eb", + "cell_type": "code", + "source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "481d8d23-ce4d-4419-a820-779ad333be2a", + "cell_type": "code", + "source": "scaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "255b9746-202d-425b-9353-41d7bb18d99c", + "cell_type": "code", + "source": "knn = KNeighborsClassifier(n_neighbors=5)\nknn.fit(X_train, y_train)\ny_pred_knn = knn.predict(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "0c5e2c44-afc4-43d8-bdd7-e0a136d54413", + "cell_type": "code", + "source": "svm = SVC(kernel='linear', C=1)\nsvm.fit(X_train, y_train)\ny_pred_svm = svm.predict(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "77909086-350b-4442-97d6-e7bbef15648f", + "cell_type": "code", + "source": "print(\"===== KNN Model Evaluation =====\")\nprint(\"Accuracy:\", accuracy_score(y_test, y_pred_knn))\nprint(confusion_matrix(y_test, y_pred_knn))\nprint(classification_report(y_test, y_pred_knn))", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "id": "72a0433c-8085-4ea4-aa29-28ed856ab086", + "cell_type": "code", + "source": "print(\"\\n===== SVM Model Evaluation =====\")\nprint(\"Accuracy:\", accuracy_score(y_test, y_pred_svm))\nprint(confusion_matrix(y_test, y_pred_svm))\nprint(classification_report(y_test, y_pred_svm))", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/3_Churn_modelling.ipynb b/3_Churn_modelling.ipynb new file mode 100644 index 0000000..8f8e833 --- /dev/null +++ b/3_Churn_modelling.ipynb @@ -0,0 +1,151 @@ +{ + "metadata": { + "kernelspec": { + "name": "python", + "display_name": "Python (Pyodide)", + "language": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8" + } + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "id": "d4cec5b7-5725-44d3-bfb7-04278fdf9bb4", + "cell_type": "code", + "source": "import pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler, LabelEncoder\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.metrics import accuracy_score, confusion_matrix", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 2 + }, + { + "id": "0e3d0d27-300b-4152-96e0-70ff1fbab83a", + "cell_type": "code", + "source": "data = pd.read_csv(\"Churn_Modelling.csv\")", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 3 + }, + { + "id": "06fd9a81-ed4d-4796-bc38-e0c68fe1dc3e", + "cell_type": "code", + "source": "X = data.iloc[:, 3:13] # Features from CreditScore to EstimatedSalary\ny = data.iloc[:, 13] ", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 4 + }, + { + "id": "90c9c5aa-0b8a-424b-a625-ff4fc2d73380", + "cell_type": "code", + "source": "le = LabelEncoder()\nX[\"Gender\"] = le.fit_transform(X[\"Gender\"])\nX = pd.get_dummies(X, columns=[\"Geography\"], drop_first=True)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 5 + }, + { + "id": "4b0ecfe6-7245-4866-a842-e422c5658928", + "cell_type": "code", + "source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 6 + }, + { + "id": "55cd9306-83f0-4ea6-8399-278444f3e839", + "cell_type": "code", + "source": "scaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 7 + }, + { + "id": "60cdf8fb-c656-4a24-a120-de0c4c9abf94", + "cell_type": "code", + "source": "model = MLPClassifier(hidden_layer_sizes=(10, 10), # two hidden layers\n activation='relu',\n solver='adam',\n max_iter=300,\n random_state=42)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 8 + }, + { + "id": "566bf1ad-0836-4ba4-9a48-68554a7b9bf7", + "cell_type": "code", + "source": "model.fit(X_train, y_train)", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "execution_count": 9, + "output_type": "execute_result", + "data": { + "text/plain": "MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=300, random_state=42)", + "text/html": "
MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=300, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + }, + "metadata": {} + } + ], + "execution_count": 9 + }, + { + "id": "5bfab65c-8310-4e57-b129-cc46f5f875c0", + "cell_type": "code", + "source": "y_pred = model.predict(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 10 + }, + { + "id": "fb318e31-6e59-4fc2-af8f-1de050e4d64c", + "cell_type": "code", + "source": "accuracy = accuracy_score(y_test, y_pred)\nconf_matrix = confusion_matrix(y_test, y_pred)\nprint(\"Accuracy:\", round(accuracy * 100, 2), \"%\")\nprint(\"Confusion Matrix:\\n\", conf_matrix)", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Accuracy: 86.45 %\nConfusion Matrix:\n [[1543 64]\n [ 207 186]]\n" + } + ], + "execution_count": 11 + }, + { + "id": "89d3749a-40be-4fba-a5b3-f19b15b20ccc", + "cell_type": "code", + "source": "", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/3_bank_customer.ipynb b/3_bank_customer.ipynb new file mode 100644 index 0000000..5740a5b --- /dev/null +++ b/3_bank_customer.ipynb @@ -0,0 +1,95 @@ +{ + "metadata": { + "kernelspec": { + "name": "python", + "display_name": "Python (Pyodide)", + "language": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8" + } + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "id": "d4cec5b7-5725-44d3-bfb7-04278fdf9bb4", + "cell_type": "code", + "source": "import pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler, LabelEncoder\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.metrics import accuracy_score, confusion_matrix", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 2 + }, + { + "id": "0e3d0d27-300b-4152-96e0-70ff1fbab83a", + "cell_type": "code", + "source": "data = pd.read_csv(\"Churn_Modelling.csv\")", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 3 + }, + { + "id": "06fd9a81-ed4d-4796-bc38-e0c68fe1dc3e", + "cell_type": "code", + "source": "X = data.iloc[:, 3:13] # Features from CreditScore to EstimatedSalary\ny = data.iloc[:, 13] ", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 4 + }, + { + "id": "90c9c5aa-0b8a-424b-a625-ff4fc2d73380", + "cell_type": "code", + "source": "le = LabelEncoder()\nX[\"Gender\"] = le.fit_transform(X[\"Gender\"])\nX = pd.get_dummies(X, columns=[\"Geography\"], drop_first=True)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 5 + }, + { + "id": "4b0ecfe6-7245-4866-a842-e422c5658928", + "cell_type": "code", + "source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 6 + }, + { + "id": "55cd9306-83f0-4ea6-8399-278444f3e839", + "cell_type": "code", + "source": "scaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 7 + }, + { + "id": "60cdf8fb-c656-4a24-a120-de0c4c9abf94", + "cell_type": "code", + "source": "", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/4_GDA.py b/4_GDA.py new file mode 100644 index 0000000..7ba957f --- /dev/null +++ b/4_GDA.py @@ -0,0 +1,19 @@ +def f(x): + return (x + 3)**2 + +def df(x): + return 2 * (x + 3) + +# Step 2: Initialize parameters +x = 2 # starting point +learning_rate = 0.1 # step size +epochs = 30 # number of iterations + +# Step 3: Gradient Descent loop +for i in range(epochs): + grad = df(x) # compute gradient + x = x - learning_rate * grad # update x + print(f"Iteration {i+1}: x = {x:.4f}, f(x) = {f(x):.4f}") + +print("\nLocal minima occurs at x =", round(x, 4)) +print("Minimum value of function =", round(f(x), 4)) \ No newline at end of file diff --git a/5_KNN_diabetes.ipynb b/5_KNN_diabetes.ipynb new file mode 100644 index 0000000..4515550 --- /dev/null +++ b/5_KNN_diabetes.ipynb @@ -0,0 +1,111 @@ +{ + "metadata": { + "kernelspec": { + "name": "python", + "display_name": "Python (Pyodide)", + "language": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8" + } + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [ + { + "id": "3034a8f4-bf94-4105-9e51-70be64145d33", + "cell_type": "code", + "source": "import pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 1 + }, + { + "id": "f3643f05-2a1f-4083-bb1e-156a9fc4a116", + "cell_type": "code", + "source": "data = pd.read_csv(\"diabetes.csv\")\nprint(data.head())", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n0 6 148 72 35 0 33.6 \n1 1 85 66 29 0 26.6 \n2 8 183 64 0 0 23.3 \n3 1 89 66 23 94 28.1 \n4 0 137 40 35 168 43.1 \n\n Pedigree Age Outcome \n0 0.627 50 1 \n1 0.351 31 0 \n2 0.672 32 1 \n3 0.167 21 0 \n4 2.288 33 1 \n" + } + ], + "execution_count": 3 + }, + { + "id": "d4982e5c-0006-41ea-bc8d-b652368156cf", + "cell_type": "code", + "source": "X = data.drop(columns=['Outcome'])\ny = data['Outcome']", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 4 + }, + { + "id": "f0e7b2f7-f0bb-4d67-b98e-3695617e1d65", + "cell_type": "code", + "source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 5 + }, + { + "id": "1ea952ce-b927-4b39-ba82-ec2a95adfc1d", + "cell_type": "code", + "source": "scaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 6 + }, + { + "id": "f321cb2f-6633-45c1-a170-bd31e792d354", + "cell_type": "code", + "source": "knn = KNeighborsClassifier(n_neighbors=5) # K=5\nknn.fit(X_train, y_train)", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "execution_count": 7, + "output_type": "execute_result", + "data": { + "text/plain": "KNeighborsClassifier()", + "text/html": "
KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + }, + "metadata": {} + } + ], + "execution_count": 7 + }, + { + "id": "d689a957-2a8f-4925-8466-1b10b197048c", + "cell_type": "code", + "source": "", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/6_Kmeans_salesd.ipynb b/6_Kmeans_salesd.ipynb new file mode 100644 index 0000000..721cf57 --- /dev/null +++ b/6_Kmeans_salesd.ipynb @@ -0,0 +1,8 @@ +{ + "metadata": { + "orig_nbformat": 4 + }, + "nbformat_minor": 5, + "nbformat": 4, + "cells": [] +} \ No newline at end of file