{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "trusted": true }, "outputs": [], "source": [ "#import libraries\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import warnings \n", "#We do not want to see warnings\n", "warnings.filterwarnings(\"ignore\") " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "trusted": true }, "outputs": [], "source": [ "#import data\n", "data = pd.read_csv(\"uber.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Create a data copy\n", "df = data.copy()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "trusted": true }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Print data\n", "df.head" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 200000 entries, 0 to 199999\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Unnamed: 0 200000 non-null int64 \n", " 1 key 200000 non-null object \n", " 2 fare_amount 200000 non-null float64\n", " 3 pickup_datetime 200000 non-null object \n", " 4 pickup_longitude 200000 non-null float64\n", " 5 pickup_latitude 200000 non-null float64\n", " 6 dropoff_longitude 199999 non-null float64\n", " 7 dropoff_latitude 199999 non-null float64\n", " 8 passenger_count 200000 non-null int64 \n", "dtypes: float64(5), int64(2), object(2)\n", "memory usage: 13.7+ MB\n" ] } ], "source": [ "#Get Info\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 2015-05-07 19:52:06+00:00\n", "1 2009-07-17 20:04:56+00:00\n", "2 2009-08-24 21:45:00+00:00\n", "3 2009-06-26 08:22:21+00:00\n", "4 2014-08-28 17:47:00+00:00\n", " ... \n", "199995 2012-10-28 10:49:00+00:00\n", "199996 2014-03-14 01:09:00+00:00\n", "199997 2009-06-29 00:42:00+00:00\n", "199998 2015-05-20 14:56:25+00:00\n", "199999 2010-05-15 04:08:00+00:00\n", "Name: pickup_datetime, Length: 200000, dtype: datetime64[ns, UTC]\n" ] } ], "source": [ "#pickup_datetime is not in required data format\n", "df[\"pickup_datetime\"] = pd.to_datetime(df[\"pickup_datetime\"])\n", "print(df[\"pickup_datetime\"])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 200000 entries, 0 to 199999\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Unnamed: 0 200000 non-null int64 \n", " 1 key 200000 non-null object \n", " 2 fare_amount 200000 non-null float64 \n", " 3 pickup_datetime 200000 non-null datetime64[ns, UTC]\n", " 4 pickup_longitude 200000 non-null float64 \n", " 5 pickup_latitude 200000 non-null float64 \n", " 6 dropoff_longitude 199999 non-null float64 \n", " 7 dropoff_latitude 199999 non-null float64 \n", " 8 passenger_count 200000 non-null int64 \n", "dtypes: datetime64[ns, UTC](1), float64(5), int64(2), object(1)\n", "memory usage: 13.7+ MB\n" ] } ], "source": [ "df.info()\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0fare_amountpickup_longitudepickup_latitudedropoff_longitudedropoff_latitudepassenger_count
count2.000000e+05200000.000000200000.000000200000.000000199999.000000199999.000000200000.000000
mean2.771250e+0711.359955-72.52763839.935885-72.52529239.9238901.684535
std1.601382e+079.90177611.4377877.72053913.1174086.7948291.385997
min1.000000e+00-52.000000-1340.648410-74.015515-3356.666300-881.9855130.000000
25%1.382535e+076.000000-73.99206540.734796-73.99140740.7338231.000000
50%2.774550e+078.500000-73.98182340.752592-73.98009340.7530421.000000
75%4.155530e+0712.500000-73.96715440.767158-73.96365840.7680012.000000
max5.542357e+07499.00000057.4184571644.4214821153.572603872.697628208.000000
\n", "
" ], "text/plain": [ " Unnamed: 0 fare_amount pickup_longitude pickup_latitude \\\n", "count 2.000000e+05 200000.000000 200000.000000 200000.000000 \n", "mean 2.771250e+07 11.359955 -72.527638 39.935885 \n", "std 1.601382e+07 9.901776 11.437787 7.720539 \n", "min 1.000000e+00 -52.000000 -1340.648410 -74.015515 \n", "25% 1.382535e+07 6.000000 -73.992065 40.734796 \n", "50% 2.774550e+07 8.500000 -73.981823 40.752592 \n", "75% 4.155530e+07 12.500000 -73.967154 40.767158 \n", "max 5.542357e+07 499.000000 57.418457 1644.421482 \n", "\n", " dropoff_longitude dropoff_latitude passenger_count \n", "count 199999.000000 199999.000000 200000.000000 \n", "mean -72.525292 39.923890 1.684535 \n", "std 13.117408 6.794829 1.385997 \n", "min -3356.666300 -881.985513 0.000000 \n", "25% -73.991407 40.733823 1.000000 \n", "50% -73.980093 40.753042 1.000000 \n", "75% -73.963658 40.768001 2.000000 \n", "max 1153.572603 872.697628 208.000000 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Statistics of data\n", "df.describe()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "trusted": true }, "outputs": [ { "data": { "text/plain": [ "Unnamed: 0 0\n", "key 0\n", "fare_amount 0\n", "pickup_datetime 0\n", "pickup_longitude 0\n", "pickup_latitude 0\n", "dropoff_longitude 1\n", "dropoff_latitude 1\n", "passenger_count 0\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Number of missing values\n", "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 fare_amount pickup_longitude pickup_latitude \\\n", "Unnamed: 0 1.000000 0.000589 0.000230 -0.000341 \n", "fare_amount 0.000589 1.000000 0.010457 -0.008481 \n", "pickup_longitude 0.000230 0.010457 1.000000 -0.816461 \n", "pickup_latitude -0.000341 -0.008481 -0.816461 1.000000 \n", "dropoff_longitude 0.000270 0.008986 0.833026 -0.774787 \n", "dropoff_latitude 0.000271 -0.011014 -0.846324 0.702367 \n", "passenger_count 0.002257 0.010150 -0.000414 -0.001560 \n", "\n", " dropoff_longitude dropoff_latitude passenger_count \n", "Unnamed: 0 0.000270 0.000271 0.002257 \n", "fare_amount 0.008986 -0.011014 0.010150 \n", "pickup_longitude 0.833026 -0.846324 -0.000414 \n", "pickup_latitude -0.774787 0.702367 -0.001560 \n", "dropoff_longitude 1.000000 -0.917010 0.000033 \n", "dropoff_latitude -0.917010 1.000000 -0.000659 \n", "passenger_count 0.000033 -0.000659 1.000000 \n" ] } ], "source": [ "#Correlation\n", "cor=df.select_dtypes(include=['number']).corr()\n", "print(cor)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Drop the rows with missing values\n", "df.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "trusted": true }, "outputs": [ { "data": { "text/plain": [ "{'whiskers': [,\n", " ],\n", " 'caps': [,\n", " ],\n", " 'boxes': [],\n", " 'medians': [],\n", " 'fliers': [],\n", " 'means': []}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAApqElEQVR4nO3df3TU1Z3/8VcyISEJmUFifi4JxBPdgASsaGHQ8RBlTTGyzEnS0y6I7B63btnAWQEphnVp6+4hPRjEdYtS9+wW9gh1S04MX1Kgy6ECWRlR43IOwUKjJCY2mUChzISYH2Rmvn94ZspIagkEPp/MPB/nzJF87nsy7/lD5sWd+7k3JhAIBAQAAGAisUY3AAAA8GUEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDoEFAAAYDpxRjdwPfx+vzo6OpSSkqKYmBij2wEAANcgEAiou7tb2dnZio396jmSURlQOjo6lJOTY3QbAADgOrS3t2vixIlfWTMqA0pKSoqkL96g1Wo1uBsAAHAtvF6vcnJyQp/jX2VUBpTg1zpWq5WAAgDAKHMtyzNYJAsAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAEyHgAIAAExnVG7UBiAy+Xw+NTQ0qLOzU1lZWXI4HLJYLEa3BcAAw5pB+cEPfqCYmJiwR0FBQWi8r69PFRUVSk1N1bhx41RWVqaurq6w39HW1qaSkhIlJSUpPT1da9as0eDg4Mi8GwCjVm1trfLz81VUVKRFixapqKhI+fn5qq2tNbo1AAYY9lc8d999tzo7O0OP//3f/w2NrVy5Unv27NGuXbt0+PBhdXR0qLS0NDTu8/lUUlKigYEBHT16VNu3b9e2bdu0fv36kXk3AEal2tpalZeXq7CwUC6XS93d3XK5XCosLFR5eTkhBYhCMYFAIHCtxT/4wQ9UV1en48ePXzXm8XiUlpamnTt3qry8XJJ06tQpTZkyRS6XS7Nnz9a+ffv0+OOPq6OjQxkZGZKkrVu3au3atTp37pzi4+OvqQ+v1yubzSaPx8NZPMAo5/P5lJ+fr8LCQtXV1YUdwe73++V0OtXU1KTm5ma+7gFGueF8fg97BqW5uVnZ2dm64447tHjxYrW1tUmSGhsbdfnyZc2bNy9UW1BQoNzcXLlcLkkK/YsoGE4kqbi4WF6vVydPnvyjr9nf3y+v1xv2ABAZGhoa1NraqnXr1oWFE0mKjY1VZWWlWlpa1NDQYFCHAIwwrIAya9Ysbdu2Tfv379drr72mlpYWORwOdXd3y+12Kz4+XuPHjw97TkZGhtxutyTJ7XaHhZPgeHDsj6mqqpLNZgs9cnJyhtM2ABPr7OyUJE2bNm3I8eD1YB2A6DCsu3jmz58f+vP06dM1a9YsTZo0ST//+c+VmJg44s0FVVZWatWqVaGfvV4vIQWIEFlZWZKkpqYmzZ49+6rxpqamsDoA0eGG9kEZP3687rrrLn388cfKzMzUwMCALl68GFbT1dWlzMxMSVJmZuZVd/UEfw7WDCUhIUFWqzXsASAyOBwOTZ48WRs2bJDf7w8b8/v9qqqqUl5enhwOh0EdAjDCDQWUS5cu6ZNPPlFWVpZmzpypMWPG6ODBg6Hx06dPq62tTXa7XZJkt9t14sQJnT17NlRz4MABWa1WTZ069UZaATBKWSwWbdq0SfX19XI6nWF38TidTtXX16u6upoFskCUGdZdPM8++6wWLFigSZMmqaOjQ9///vd1/PhxffTRR0pLS9OyZcu0d+9ebdu2TVarVStWrJAkHT16VNIXq/XvueceZWdna+PGjXK73VqyZIn+9m//Vhs2bLjmprmLB4g8tbW1Wr16tVpbW0PX8vLyVF1dHbZdAYDRazif38Nag/LZZ5/pr/7qr3T+/HmlpaXpwQcf1Lvvvqu0tDRJ0ubNmxUbG6uysjL19/eruLhYr776auj5FotF9fX1WrZsmex2u5KTk7V06VK98MIL1/E2AUSS0tJSLVy4kJ1kAUga5gyKWTCDAgDA6HNT90EBAAC42QgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdG4ooPzoRz9STEyMnnnmmdC1vr4+VVRUKDU1VePGjVNZWZm6urrCntfW1qaSkhIlJSUpPT1da9as0eDg4I20AgAAIsh1B5T3339fP/nJTzR9+vSw6ytXrtSePXu0a9cuHT58WB0dHSotLQ2N+3w+lZSUaGBgQEePHtX27du1bds2rV+//vrfBQAAiCjXFVAuXbqkxYsX69///d912223ha57PB79x3/8h1566SU9/PDDmjlzpn7605/q6NGjevfddyVJ//M//6OPPvpIb7zxhu655x7Nnz9f//zP/6wtW7ZoYGBgZN4VAAAY1a4roFRUVKikpETz5s0Lu97Y2KjLly+HXS8oKFBubq5cLpckyeVyqbCwUBkZGaGa4uJieb1enTx5csjX6+/vl9frDXsAAIDIFTfcJ7z55pv68MMP9f7771815na7FR8fr/Hjx4ddz8jIkNvtDtVcGU6C48GxoVRVVemHP/zhcFsFAACj1LBmUNrb2/UP//AP2rFjh8aOHXuzerpKZWWlPB5P6NHe3n7LXhsAANx6wwoojY2NOnv2rO69917FxcUpLi5Ohw8f1iuvvKK4uDhlZGRoYGBAFy9eDHteV1eXMjMzJUmZmZlX3dUT/DlY82UJCQmyWq1hDwAAELmGFVAeeeQRnThxQsePHw897rvvPi1evDj05zFjxujgwYOh55w+fVptbW2y2+2SJLvdrhMnTujs2bOhmgMHDshqtWrq1Kkj9LYAAMBoNqw1KCkpKZo2bVrYteTkZKWmpoauP/XUU1q1apUmTJggq9WqFStWyG63a/bs2ZKkRx99VFOnTtWSJUu0ceNGud1uPf/886qoqFBCQsIIvS0AADCaDXuR7J+yefNmxcbGqqysTP39/SouLtarr74aGrdYLKqvr9eyZctkt9uVnJyspUuX6oUXXhjpVgAAwCgVEwgEAkY3MVxer1c2m00ej4f1KAAAjBLD+fzmLB4AAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6cUY3AABBPp9PDQ0N6uzsVFZWlhwOhywWi9FtATAAMygATKG2tlb5+fkqKirSokWLVFRUpPz8fNXW1hrdGgADEFAAGK62tlbl5eUqLCyUy+VSd3e3XC6XCgsLVV5eTkgBolBMIBAIGN3EcHm9XtlsNnk8HlmtVqPbAXADfD6f8vPzVVhYqLq6OsXG/uHfTX6/X06nU01NTWpububrHmCUG87nNzMoAAzV0NCg1tZWrVu3LiycSFJsbKwqKyvV0tKihoYGgzoEYAQCCgBDdXZ2SpKmTZs25HjwerAOQHQgoAAwVFZWliSpqalpyPHg9WAdgOhAQAFgKIfDocmTJ2vDhg3y+/1hY36/X1VVVcrLy5PD4TCoQwBGIKAAMJTFYtGmTZtUX18vp9MZdheP0+lUfX29qqurWSALRBk2agNguNLSUtXU1Gj16tWaM2dO6HpeXp5qampUWlpqYHcAjMBtxgBMg51kgcg2nM9vZlAAmIbFYtHcuXONbgOACbAGBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmA4BBQAAmM6wAsprr72m6dOny2q1ymq1ym63a9++faHxvr4+VVRUKDU1VePGjVNZWZm6urrCfkdbW5tKSkqUlJSk9PR0rVmzRoODgyPzbgAAQEQYVkCZOHGifvSjH6mxsVEffPCBHn74YS1cuFAnT56UJK1cuVJ79uzRrl27dPjwYXV0dKi0tDT0fJ/Pp5KSEg0MDOjo0aPavn27tm3bpvXr14/suwIAAKNaTCAQCNzIL5gwYYJefPFFlZeXKy0tTTt37lR5ebkk6dSpU5oyZYpcLpdmz56tffv26fHHH1dHR4cyMjIkSVu3btXatWt17tw5xcfHX9Nrer1e2Ww2eTweWa3WG2kfAADcIsP5/L7uNSg+n09vvvmmenp6ZLfb1djYqMuXL2vevHmhmoKCAuXm5srlckmSXC6XCgsLQ+FEkoqLi+X1ekOzMEPp7++X1+sNewAAgMg17IBy4sQJjRs3TgkJCfrud7+rt956S1OnTpXb7VZ8fLzGjx8fVp+RkSG32y1JcrvdYeEkOB4c+2Oqqqpks9lCj5ycnOG2DQAARpFhB5Q///M/1/Hjx3Xs2DEtW7ZMS5cu1UcffXQzeguprKyUx+MJPdrb22/q6wEAAGPFDfcJ8fHxys/PlyTNnDlT77//vv71X/9V3/rWtzQwMKCLFy+GzaJ0dXUpMzNTkpSZman33nsv7PcF7/IJ1gwlISFBCQkJw20VAACMUje8D4rf71d/f79mzpypMWPG6ODBg6Gx06dPq62tTXa7XZJkt9t14sQJnT17NlRz4MABWa1WTZ069UZbAQAAEWJYMyiVlZWaP3++cnNz1d3drZ07d+rQoUP65S9/KZvNpqeeekqrVq3ShAkTZLVatWLFCtntds2ePVuS9Oijj2rq1KlasmSJNm7cKLfbreeff14VFRXMkAAAgJBhBZSzZ8/qySefVGdnp2w2m6ZPn65f/vKX+ou/+AtJ0ubNmxUbG6uysjL19/eruLhYr776auj5FotF9fX1WrZsmex2u5KTk7V06VK98MILI/uuAADAqHbD+6AYgX1QAAAYfW7JPigAAAA3CwEFAACYDgEFAACYzrD3QQGAm8Xn86mhoUGdnZ3KysqSw+GQxWIxui0ABmAGBYAp1NbWKj8/X0VFRVq0aJGKioqUn5+v2tpao1sDYAACCgDD1dbWqry8XIWFhXK5XOru7g4dLlpeXk5IAaIQtxkDMJTP51N+fr4KCwtVV1en2Ng//LvJ7/fL6XSqqalJzc3NfN0DjHLcZgxg1GhoaFBra6vWrVsXFk4kKTY2VpWVlWppaVFDQ4NBHQIwAgEFgKE6OzslSdOmTRtyPHg9WAcgOhBQABgqKytLktTU1DTkePB6sA5AdCCgADCUw+HQ5MmTtWHDBvn9/rAxv9+vqqoq5eXlyeFwGNQhACMQUAAYymKxaNOmTaqvr5fT6Qy7i8fpdKq+vl7V1dUskAWiDBu1ATBcaWmpampqtHr1as2ZMyd0PS8vTzU1NSotLTWwOwBG4DZjAKbBTrJAZBvO5zczKABMw2KxaO7cuUa3AcAEWIMCAABMh4ACAABMh4ACAABMhzUoAEyDRbIAgphBAWAKtbW1ys/PV1FRkRYtWqSioiLl5+dzkjEQpQgoAAxXW1ur8vJyFRYWhm3UVlhYqPLyckIKEIXYBwWAoXw+n/Lz81VYWKi6urqwE439fr+cTqeamprU3NzM1z3AKDecz29mUAAYqqGhQa2trVq3bl1YOJGk2NhYVVZWqqWlRQ0NDQZ1CMAIBBQAhurs7JQkTZs2bcjx4PVgHYDoQEABYKisrCxJUlNT05DjwevBOgDRgYACwFAOh0OTJ0/Whg0b5Pf7w8b8fr+qqqqUl5cnh8NhUIcAjEBAAWAoi8WiTZs2qb6+Xk6nM+wuHqfTqfr6elVXV7NAFogybNQGwHClpaWqqanR6tWrNWfOnND1vLw81dTUqLS01MDuABiB24wBmAY7yQKRbTif38ygADANi8WiuXPnGt0GABNgDQoAADAdZlAAmAZf8QAIYgYFgClwWCCAKxFQABiOwwIBfBl38QAwFIcFAtGDwwIBjBocFghgKAQUAIbisEAAQyGgADDUlYcF+nw+HTp0SD/72c906NAh+Xw+DgsEohRrUAAYKrgG5fbbb9e5c+f06aefhsYmTZqktLQ0nT9/njUoQARgDQqAUcNiseib3/ymPvjgA/X19en1119XR0eHXn/9dfX19emDDz5QeXk54QSIMsygADDUlTMov/vd79Ta2hoay8vLU2pqKjMoQITgLB4Ao0bwLp6f/exnuv/++6/aSfa9997TnDlz1NDQwDk9QBQhoAAw1JV38Qx1WCB38QDRiTUoAAx15V08Q+EuHiA6EVAAGMrhcGjy5MnasGGD/H5/2Jjf71dVVZXy8vLkcDgM6hCAEfiKB4ChLBaLNm3apPLyci1cuFDf+MY3lJiYqN7eXu3fv1+/+MUvVFNTwwJZIMpwFw8AU/je976nzZs3a3BwMHQtLi5OK1eu1MaNGw3sDMBI4S4eAKNKbW2tqqurVVJSovnz54dmUPbt26fq6mrNnj1bpaWlRrcJ4BZiBgWAoTjNGIge7CQLYNS48jTjQCAQdhZPIBDgNGMgSg0roFRVVen+++9XSkqK0tPT5XQ6dfr06bCavr4+VVRUKDU1VePGjVNZWZm6urrCatra2lRSUqKkpCSlp6drzZo1Yd87A4gewf1NPvnkE+Xn56uoqEiLFi1SUVGR8vPzdebMmbA6ANFhWAHl8OHDqqio0LvvvqsDBw7o8uXLevTRR9XT0xOqWblypfbs2aNdu3bp8OHD6ujoCPvu2OfzqaSkRAMDAzp69Ki2b9+ubdu2af369SP3rgCMGsH9TZYsWaJp06Zpy5Yt+s///E9t2bJF06ZN05IlS8LqAESHG1qDcu7cOaWnp+vw4cN66KGH5PF4lJaWpp07d6q8vFySdOrUKU2ZMkUul0uzZ8/Wvn379Pjjj6ujo0MZGRmSpK1bt2rt2rU6d+6c4uPj/+TrsgYFiBwDAwNKTk5WcnKyxo8ff9VpxhcvXlRPT496enqu6e8HAOZ1y9ageDweSdKECRMkSY2Njbp8+bLmzZsXqikoKFBubq5cLpckyeVyqbCwMBROJKm4uFher1cnT54c8nX6+/vl9XrDHgAiw9GjRzU4OCiPx6O+vj6tXr1aW7Zs0erVq9XX1yePx6PBwUEdPXrU6FYB3ELXfZux3+/XM888owceeCB0Vobb7VZ8fLzGjx8fVpuRkSG32x2quTKcBMeDY0OpqqrSD3/4w+ttFYCJ/fa3v5X0xcnFra2t2rRpU2gsJiZGeXl5amlpCdUBiA7XPYNSUVGhpqYmvfnmmyPZz5AqKyvl8XhCj/b29pv+mgBujXPnzkmSWlpahhwPXg/WAYgO1zWDsnz5ctXX1+vIkSOaOHFi6HpmZqYGBgZ08eLFsFmUrq4uZWZmhmree++9sN8XvMsnWPNlCQkJSkhIuJ5WAZhcamrqiNYBiAzDmkEJBAJavny53nrrLf3qV79SXl5e2PjMmTM1ZswYHTx4MHTt9OnTamtrk91ulyTZ7XadOHFCZ8+eDdUcOHBAVqtVU6dOvZH3AmAUuvKr3bS0NL3++uvq6OjQ66+/rrS0tCHrAES+Yc2gVFRUaOfOndq9e7dSUlJCf2HYbDYlJibKZrPpqaee0qpVqzRhwgRZrVatWLFCdrtds2fPliQ9+uijmjp1qpYsWaKNGzfK7Xbr+eefV0VFBbMkQBT6v//7P0nS2LFjlZSUpKeffjo0lpeXp7Fjx6qvry9UByA6DCugvPbaa5KkuXPnhl3/6U9/qr/+67+WJG3evFmxsbEqKytTf3+/iouL9eqrr4ZqLRaL6uvrtWzZMtntdiUnJ2vp0qV64YUXbuydABiVgmvK+vv7dffdd+sv//Iv1dfXp7Fjx+qTTz5Ra2trWB2A6MBZPAAM9cQTT2jHjh1KTU0N3VIcFBcXJ5vNpvPnz2vx4sV64403DOwUwI3iNGMAo8aTTz6pHTt26Pz580pLS1NRUZGSk5PV09Ojt99+O3T3zpNPPmlwpwBuJWZQABhqYGBAY8eO1Vf9VRQTE6O+vj52kgVGOU4zBjBqHD169CvDifTFHYTsJAtEFwIKAEMFd4j92te+ppycnLCx3Nxcfe1rXwurAxAdCCgADBVcY/L3f//3amlp0dtvv62dO3fq7bff1pkzZ/Td7343rA5AdCCgADBUcDO22tpa+f3+sDG/36+6urqwOgDRgbt4ABjqz/7szyRJ+/fvl81mU29vb2gsMTFRfX19YXUAogMzKAAM5XA4lJaWpkAgEAojQX19fQoEAkpPT5fD4TCoQwBGYAYFgOEGBgYkSbfffruefPJJ3XHHHTpz5oz+67/+S+fOnVN/f7/BHQK41QgoAAx16NAheTweFRQUqKenR5s2bQqN5ebmqqCgQKdOndKhQ4f0yCOPGNgpgFuJr3gAGOrQoUOSpOnTp6ujoyNs7Le//a0KCwvD6gBEBwIKAFP4+c9/ftWGbYFAQLt27TKoIwBG4iseAIaaM2dO6M/FxcV6/PHHlZiYqN7eXtXX12vfvn1X1QGIfAQUAIY6efJk6M+HDh0KBRLpi9uMr6ybP3/+Le0NgHH4igeAod55553Qn798t07w7p4v1wGIfAQUAIZKTk6WJOXk5GjixIlhYxMnTgydzxOsAxAd+IoHgKFmzJihHTt26OLFizp37pxcLpc6OzuVlZUlu90e2uJ+xowZBncK4FZiBgWAobKzsyVJ3d3dys3N1Z49e3ThwgXt2bNHubm56u7uDqsDEB2YQQFgqCvP2Dl79qxeeumlP1kHIPIxgwLAUA6HQ+np6V9Zw1k8QPQhoAAw3IULF25oHEDkIaAAMNT+/fs1ODj4lTWDg4Pav3//LeoIgBnEBL68t/Qo4PV6ZbPZ5PF4ZLVajW4HwA2YOXOmPvzwQ0lSSUmJHnvssdBOsnv37tUvfvELSdK9996rxsZGI1sFcIOG8/nNDAoAQwUPCLzrrrtUU1OjgYEBffjhhxoYGFBNTY3y8/PD6gBEB+7iAWColJQUud1utbW1KSUlJezrnjVr1iguLi5UByB6MIMCwFALFy6UJPX19V21FmVwcFB9fX1hdQCiAwEFgKEefvjhEa0DEBkIKAAMdeVpxiNRByAyEFAAGOpaTynmNGMguhBQABgqeEpxamqqYmPD/0qKjY1VampqWB2A6EBAAWCoe+65R5J0/vx5+f3+sDG/36/z58+H1QGIDgQUAIbKyMgY0ToAkYGAAsBQXV1dI1oHIDIQUAAY6tixYyNaByAyEFAAGKqhoWFE6wBEBgIKAEP19PSMaB2AyMBZPABMY8KECbrtttvU29urxMRE/f73v9eFCxeMbguAAQgoAAyVkpKiS5cuSZIuXLjwRwMJhwUC0YWveAAYituMAQyFgALAUA899NCI1gGIDAQUAIZyuVwjWgcgMhBQABiqqalpROsARAYCCgBDBQKBEa0DEBkIKAAMNWnSpBGtAxAZCCgADDVlypQRrQMQGQgoAAzFIlkAQyGgADBUb2/viNYBiAwEFACGGhgYGNE6AJGBgALAULGx1/bX0LXWAYgM/B8PwFAxMTEjWgcgMgw7oBw5ckQLFixQdna2YmJiVFdXFzYeCAS0fv16ZWVlKTExUfPmzVNzc3NYzYULF7R48WJZrVaNHz9eTz31VOiwMADRhRkUAEMZ9v/xPT09mjFjhrZs2TLk+MaNG/XKK69o69atOnbsmJKTk1VcXKy+vr5QzeLFi3Xy5EkdOHBA9fX1OnLkiJ5++unrfxcARq3Lly+PaB2AyBATuIHtGWNiYvTWW2/J6XRK+mL2JDs7W6tXr9azzz4rSfJ4PMrIyNC2bdv07W9/W7/+9a81depUvf/++7rvvvskSfv379djjz2mzz77TNnZ2X/ydb1er2w2mzwej6xW6/W2D8AELBaL/H7/n6yLjY2Vz+e7BR0BuFmG8/k9onOmLS0tcrvdmjdvXuiazWbTrFmzQnsYuFwujR8/PhROJGnevHmKjY3VsWPHhvy9/f398nq9YQ8AkeFawslw6gBEhhENKG63W5KUkZERdj0jIyM05na7lZ6eHjYeFxenCRMmhGq+rKqqSjabLfTIyckZybYBAIDJjIpVZ5WVlfJ4PKFHe3u70S0BAICbaEQDSmZmpiSpq6sr7HpXV1doLDMzU2fPng0bHxwc1IULF0I1X5aQkCCr1Rr2AAAAkWtEA0peXp4yMzN18ODB0DWv16tjx47JbrdLkux2uy5evKjGxsZQza9+9Sv5/X7NmjVrJNsBAACjVNxwn3Dp0iV9/PHHoZ9bWlp0/PhxTZgwQbm5uXrmmWf0L//yL7rzzjuVl5enf/qnf1J2dnboTp8pU6boG9/4hr7zne9o69atunz5spYvX65vf/vb13QHDwAAiHzDDigffPCBioqKQj+vWrVKkrR06VJt27ZN3/ve99TT06Onn35aFy9e1IMPPqj9+/dr7Nixoefs2LFDy5cv1yOPPKLY2FiVlZXplVdeGYG3AwAAIsEN7YNiFPZBASLHcLawH4V/XQG4gmH7oAAAAIwEAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADCdOKMbADD6ff755zp16tRNf50PP/zwup5XUFCgpKSkEe4GwM1EQAFww06dOqWZM2fe9Ne53tdobGzUvffeO8LdALiZCCgAblhBQYEaGxuv67nDCR3X+xoFBQXX9TwAxiGgALhhSUlJt2SGglkQIHqwSBaAoQKBwIjWAYgMBBQAhvtT4YNwAkQfvuIBolxzc7O6u7uNbkONjY1DrkdpbGy87rt3RlJKSoruvPNOo9sAogYBBYhizc3Nuuuuu4xu4yvdiruDrtVvfvMbQgpwixBQgCgWnDl54403NGXKFIO7+UJvb69aW1s1efJkJSYmGt2OJOnXv/61nnjiCVPMNAHRgoACRLnMcTG6N8uiKZlmWZKWrAfy7ja6iTCJFy3KHBdjdBtAVCGgAFHs888/19/NjNeUI38nHTG6G/OaIunvZsYb3QYQVQwNKFu2bNGLL74ot9utGTNm6N/+7d/09a9/3ciWgKhy6tQp/aRxQP/v9GWjWzG9zksBLU5JMboNIGoYFlD++7//W6tWrdLWrVs1a9YsvfzyyyouLtbp06eVnp5uVFtAVHE6nZLMdVZNcL2HmdbFSNzFA9xqMQGDNhiYNWuW7r//fv34xz+WJPn9fuXk5GjFihV67rnnvvK5Xq9XNptNHo9HVqv1VrQL4Bbo7e3V0qVLtWvXLn3zm9/U9u3bTbNQFsCNG87ntyGr4gYGBtTY2Kh58+b9oZHYWM2bN08ul+uq+v7+fnm93rAHgMjidDqVlJSkXbt2SZJ27dqlpKSk0CwPgOhiSED53e9+J5/Pp4yMjLDrGRkZcrvdV9VXVVXJZrOFHjk5ObeqVQC3gNPp1O7du4cc2717NyEFiEKj4i6eyspKrVq1KvSz1+slpAAm8vnnn+vUqVPX9dze3t5QOLHZbHrggQe0d+9ePfbYY3rnnXfk8Xi0e/duvfPOO9f9dY+Z1tgAuDaGBJTbb79dFotFXV1dYde7urqUmZl5VX1CQoISEhJuVXsAhunUqVMjsuOrx+PR3r17JSn036AHH3zwun9vY2MjJyEDo4whASU+Pl4zZ87UwYMHQ1O3fr9fBw8e1PLly41oCcANKCgoUGNj43U9t7S0VJ9++qkk6aGHHtKiRYsUFxenwcFB7dy5U0eOfLFBy6RJk1RbW3vd/QEYXQz7imfVqlVaunSp7rvvPn3961/Xyy+/rJ6eHv3N3/yNUS0BuE5JSUnXPUMxbtw4SdLkyZP19ttvKzb2D0vjvvOd7+iOO+7Qp59+qnHjxjELAkQRwwLKt771LZ07d07r16+X2+3WPffco/3791+1cBZAZJszZ45Onjyp9vZ2DQ4OKj7+Dzu2Dg4O6rPPPgvVAYgehh6+sXz5cn366afq7+/XsWPHNGvWLCPbAWAAh8MhSfL5fEpJSdHatWv1m9/8RmvXrlVKSop8Pl9YHYDoMCru4gEQua68I29gYEAbN27Uxo0bv7IOQOQjoAAwlMPh0OTJk2WxWHTmzBldubl1TEyM7rjjDvn9fmZQgChDQAFgKIvFok2bNqm8vFzz589XYmKifv/73+u2225Tb2+v9u3bp5qaGlksFqNbBXALEVAAGK60tFTPPvusNm/erMHBwdD1uLg4PfvssyotLTWwOwBGIKAAMFxtba2qq6tVUlISmkUJzp5UV1dr9uzZhBQgyhh2mvGN4DRjIHL4fD7l5+ersLBQdXV1Yfug+P1+OZ1ONTU1qbm5ma95gFHO9KcZA0BQQ0ODWltbtW7durBwIn1xynllZaVaWlrU0NBgUIcAjEBAAWCozs5OSdK0adOGHA9eD9YBiA4EFACGysrKkiQ1NTUNOR68HqwDEB0IKAAMFdwHZcOGDfL7/WFjfr9fVVVVysvLYx8UIMoQUAAYKrgPSn19vZxOp1wul7q7u+VyueR0OlVfX6/q6moWyAJRhtuMARiutLRUNTU1Wr16ddihgHl5eaqpqeEWYyAKcZsxANPw+XxqaGhQZ2ensrKy5HA4mDkBIshwPr+ZQQFgGhaLRXPnzjW6DQAmwBoUAABgOgQUAABgOgQUAABgOgQUAABgOiySBWAa3MUDIIiAAsAUamtr9cwzz6i9vT10LScnRy+//DL7oABRiK94ABiutrZWZWVlYeFEktrb21VWVqba2lqDOgNgFAIKAEP5fD498cQTkqT4+Hg999xz+vjjj/Xcc88pPj5ekvTEE0/I5/MZ2SaAW4ydZAEYat++fXrsscc0ZswYXbp0KRRKJGlgYEDjxo3T5cuXtXfvXs2fP9/ATgHcqOF8fjODAsBQL730kiRpxYoVYeFE+mJGpaKiIqwOQHQgoAAw1MWLFyVJdrt9yPFZs2aF1QGIDgQUAIa67777JEnPPfec/H5/2Jjf79c//uM/htUBiA4EFACGCn5188knn2jBggVyuVzq7u6Wy+XSggULdObMmbA6ANGBfVAAGCoxMVELFy7U7t27tXfvXu3du/eqmoULFyoxMdGA7gAYhRkUAIarq6vTwoULhxxbuHCh6urqbm1DAAzHDAoAU6irq1Nvb6/WrFmj5uZm3XnnnXrxxReZOQGiFAEFgGkkJibqxz/+sdFtADABvuIBAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmMyp3kg0EApIkr9drcCcAAOBaBT+3g5/jX2VUBpTu7m5JUk5OjsGdAACA4eru7pbNZvvKmpjAtcQYk/H7/ero6FBKSopiYmKMbgfACPJ6vcrJyVF7e7usVqvR7QAYQYFAQN3d3crOzlZs7FevMhmVAQVA5PJ6vbLZbPJ4PAQUIIqxSBYAAJgOAQUAAJgOAQWAqSQkJOj73/++EhISjG4FgIFYgwIAAEyHGRQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6BBQApnDkyBEtWLBA2dnZiomJUV1dndEtATAQAQWAKfT09GjGjBnasmWL0a0AMIFReVgggMgzf/58zZ8/3+g2AJgEMygAAMB0CCgAAMB0CCgAAMB0CCgAAMB0CCgAAMB0uIsHgClcunRJH3/8cejnlpYWHT9+XBMmTFBubq6BnQEwAqcZAzCFQ4cOqaio6KrrS5cu1bZt2259QwAMRUABAACmwxoUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAABgOv8fgqVnJ7CqL2sAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.boxplot(df['fare_amount'])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Unnamed: 0 key fare_amount \\\n", "0 24238194 2015-05-07 19:52:06.0000003 7.5 \n", "1 27835199 2009-07-17 20:04:56.0000002 7.7 \n", "2 44984355 2009-08-24 21:45:00.00000061 12.9 \n", "3 25894730 2009-06-26 08:22:21.0000001 5.3 \n", "4 17610152 2014-08-28 17:47:00.000000188 16.0 \n", "... ... ... ... \n", "199994 3189201 2014-01-31 14:42:00.000000181 12.0 \n", "199996 16382965 2014-03-14 01:09:00.0000008 7.5 \n", "199997 27804658 2009-06-29 00:42:00.00000078 30.9 \n", "199998 20259894 2015-05-20 14:56:25.0000004 14.5 \n", "199999 11951496 2010-05-15 04:08:00.00000076 14.1 \n", "\n", " pickup_datetime pickup_longitude pickup_latitude \\\n", "0 2015-05-07 19:52:06+00:00 -73.999817 40.738354 \n", "1 2009-07-17 20:04:56+00:00 -73.994355 40.728225 \n", "2 2009-08-24 21:45:00+00:00 -74.005043 40.740770 \n", "3 2009-06-26 08:22:21+00:00 -73.976124 40.790844 \n", "4 2014-08-28 17:47:00+00:00 -73.925023 40.744085 \n", "... ... ... ... \n", "199994 2014-01-31 14:42:00+00:00 -73.983070 40.760770 \n", "199996 2014-03-14 01:09:00+00:00 -73.984722 40.736837 \n", "199997 2009-06-29 00:42:00+00:00 -73.986017 40.756487 \n", "199998 2015-05-20 14:56:25+00:00 -73.997124 40.725452 \n", "199999 2010-05-15 04:08:00+00:00 -73.984395 40.720077 \n", "\n", " dropoff_longitude dropoff_latitude passenger_count \n", "0 -73.999512 40.723217 1 \n", "1 -73.994710 40.750325 1 \n", "2 -73.962565 40.772647 1 \n", "3 -73.965316 40.803349 3 \n", "4 -73.973082 40.761247 5 \n", "... ... ... ... \n", "199994 -73.972972 40.754177 1 \n", "199996 -74.006672 40.739620 1 \n", "199997 -73.858957 40.692588 2 \n", "199998 -73.983215 40.695415 1 \n", "199999 -73.985508 40.768793 1 \n", "\n", "[194911 rows x 9 columns]\n" ] }, { "data": { "text/plain": [ "{'whiskers': [,\n", " ],\n", " 'caps': [,\n", " ],\n", " 'boxes': [],\n", " 'medians': [],\n", " 'fliers': [],\n", " 'means': []}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWZElEQVR4nO3dcWjc9f348dfVtDG1uavpNDGYqGO6yya1NAwbdEO7bqUMv5ZG+M2fZd3oti8jLbNlbBS2uckgsvFbN6F1Y0i74Yqsf1jpYMrosGWsFY0WdGtqHZVmxEQY9C5t07Ss9/3Dr/czW7W95PK+JH084EO9z+dzn3tV8O7p5z53lymVSqUAAEhkTq0HAACuLOIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSqqv1AP/uwoULMTg4GI2NjZHJZGo9DgBwGUqlUoyMjERra2vMmfPh5zamXXwMDg5GW1tbrccAACZgYGAgbrzxxg/dZ9rFR2NjY0S8O3w2m63xNADA5SgWi9HW1lZ+Hf8w0y4+3nurJZvNig8AmGEu55IJF5wCAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AEmMjo7Ghg0bYuXKlbFhw4YYHR2t9UhAjWRKpVKp1kO8X7FYjFwuF4VCwTecwiyxevXqePbZZ/9j/f333x979uxJPxBQdZW8fjvzAUypDwqPiIhnn302Vq9enXYgoOac+QCmzOjoaMyfP/+S+505cyYaGhoSTARMFWc+gGlh/fr1425fe+21sWTJkrj22ms/dD9gdnPmA5gyl/Prlu+ZZk9FQIWc+QAApi3xAQAkJT6AKXPVVVdVdT9gdhAfwJSZM+fynmIudz9gdvBfPDBlzp8/X9X9gNlBfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACRVUXz84Ac/iEwmM27J5/Pl7WfPno2enp5YtGhRLFiwILq7u2N4eLjqQwMAM1fFZz4++clPxttvv11e/vznP5e3bdq0Kfbu3Ru7d++O/fv3x+DgYKxZs6aqAwMAM1tdxXeoq4uWlpb/WF8oFOLJJ5+MXbt2xfLlyyMiYseOHdHR0RGHDh2KZcuWTX5aAGDGq/jMx7Fjx6K1tTU++tGPxkMPPRQnTpyIiIi+vr44f/58rFixorxvPp+P9vb2OHjw4Aceb2xsLIrF4rgFAJi9KoqPO++8M3bu3BnPPfdcPPHEE3H8+PH49Kc/HSMjIzE0NBTz5s2LhQsXjrtPc3NzDA0NfeAxe3t7I5fLlZe2trYJ/UUAgJmhorddVq1aVf7nxYsXx5133hk33XRT/O53v4uGhoYJDbBly5bYvHlz+XaxWBQgADCLTeqjtgsXLozbbrst3nzzzWhpaYlz587FyZMnx+0zPDx80WtE3lNfXx/ZbHbcAgDMXpOKj1OnTsXf//73uOGGG6KzszPmzp0b+/btK28/evRonDhxIrq6uiY9KAAwO1T0tsu3vvWtuO++++Kmm26KwcHBeOSRR+Kqq66KBx98MHK5XKxfvz42b94cTU1Nkc1mY+PGjdHV1eWTLgBAWUXx8Y9//CMefPDB+Oc//xnXXXdd3H333XHo0KG47rrrIiJi69atMWfOnOju7o6xsbFYuXJlbN++fUoGBwBmpkypVCrVeoj3KxaLkcvlolAouP4DZrhMJnPZ+06zpyKgQpW8fvttFwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AIKlJxcdjjz0WmUwmHn744fK6s2fPRk9PTyxatCgWLFgQ3d3dMTw8PNk5AYBZYsLx8dJLL8Uvf/nLWLx48bj1mzZtir1798bu3btj//79MTg4GGvWrJn0oADA7DCh+Dh16lQ89NBD8atf/Squvfba8vpCoRBPPvlk/PSnP43ly5dHZ2dn7NixI/7yl7/EoUOHqjY0ADBzTSg+enp64gtf+EKsWLFi3Pq+vr44f/78uPX5fD7a29vj4MGDFz3W2NhYFIvFcQsAMHvVVXqHp59+Ol555ZV46aWX/mPb0NBQzJs3LxYuXDhufXNzcwwNDV30eL29vfHDH/6w0jEAgBmqojMfAwMD8c1vfjN++9vfxtVXX12VAbZs2RKFQqG8DAwMVOW4AMD0VFF89PX1xTvvvBNLly6Nurq6qKuri/3798fjjz8edXV10dzcHOfOnYuTJ0+Ou9/w8HC0tLRc9Jj19fWRzWbHLQDA7FXR2y6f/exn47XXXhu37itf+Urk8/n4zne+E21tbTF37tzYt29fdHd3R0TE0aNH48SJE9HV1VW9qQGAGaui+GhsbIzbb7993LprrrkmFi1aVF6/fv362Lx5czQ1NUU2m42NGzdGV1dXLFu2rHpTAwAzVsUXnF7K1q1bY86cOdHd3R1jY2OxcuXK2L59e7UfBgCYoTKlUqlU6yHer1gsRi6Xi0Kh4PoPmOEymcxl7zvNnoqAClXy+u23XQCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkVVF8PPHEE7F48eLIZrORzWajq6sr/vCHP5S3nz17Nnp6emLRokWxYMGC6O7ujuHh4aoPDQDMXBXFx4033hiPPfZY9PX1xcsvvxzLly+P+++/P/76179GRMSmTZti7969sXv37ti/f38MDg7GmjVrpmRwAGBmypRKpdJkDtDU1BQ/+clP4oEHHojrrrsudu3aFQ888EBERPT390dHR0ccPHgwli1bdlnHKxaLkcvlolAoRDabncxoQI1lMpnL3neST0VAjVXy+j3haz7+9a9/xdNPPx2nT5+Orq6u6Ovri/Pnz8eKFSvK++Tz+Whvb4+DBw9O9GEAgFmmrtI7vPbaa9HV1RVnz56NBQsWxDPPPBOf+MQn4vDhwzFv3rxYuHDhuP2bm5tjaGjoA483NjYWY2Nj5dvFYrHSkQCAGaTiMx8f//jH4/Dhw/Hiiy/GN77xjVi3bl387W9/m/AAvb29kcvlyktbW9uEjwUATH8Vx8e8efPiYx/7WHR2dkZvb2/ccccd8fOf/zxaWlri3LlzcfLkyXH7Dw8PR0tLywceb8uWLVEoFMrLwMBAxX8JAGDmmPT3fFy4cCHGxsais7Mz5s6dG/v27StvO3r0aJw4cSK6uro+8P719fXlj+6+twAAs1dF13xs2bIlVq1aFe3t7TEyMhK7du2KF154IZ5//vnI5XKxfv362Lx5czQ1NUU2m42NGzdGV1fXZX/SBQCY/SqKj3feeSe+9KUvxdtvvx25XC4WL14czz//fHzuc5+LiIitW7fGnDlzoru7O8bGxmLlypWxffv2KRkcAJiZJv09H9Xmez5g9vA9H3DlSPI9HwAAEyE+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEiqrtYDANPfmTNnor+/f0of45VXXpnQ/fL5fMyfP7/K0wBTSXwAl9Tf3x+dnZ1T+hgTPX5fX18sXbq0ytMAU0l8AJeUz+ejr6+v4vu9/vrrsW7dukvu9+tf/zpuv/32iYwW+Xx+QvcDaidTKpVKtR7i/YrFYuRyuSgUCpHNZms9DjBJmUzmkvtMs6chYAIqef12wSkwpS4VFsIDrjziA5hypVIpXn311XHrXn31VeEBVyjxASSxZMmS8nUjfX19sWTJktoOBNSM+AAAkhIfAEBS4gMASEp8AABJVRQfvb298alPfSoaGxvj+uuvj9WrV8fRo0fH7XP27Nno6emJRYsWxYIFC6K7uzuGh4erOjQAMHNVFB/79++Pnp6eOHToUPzxj3+M8+fPx+c///k4ffp0eZ9NmzbF3r17Y/fu3bF///4YHByMNWvWVH1wAGBmqujr1Z977rlxt3fu3BnXX3999PX1xWc+85koFArx5JNPxq5du2L58uUREbFjx47o6OiIQ4cOxbJly6o3OQAwI03qmo9CoRAREU1NTRHx7mf3z58/HytWrCjvk8/no729PQ4ePHjRY4yNjUWxWBy3AACz14Tj48KFC/Hwww/HXXfdVf5BqKGhoZg3b14sXLhw3L7Nzc0xNDR00eP09vZGLpcrL21tbRMdCQCYASYcHz09PfH666/H008/PakBtmzZEoVCobwMDAxM6ngAwPRW0TUf79mwYUP8/ve/jwMHDsSNN95YXt/S0hLnzp2LkydPjjv7MTw8HC0tLRc9Vn19fdTX109kDABgBqrozEepVIoNGzbEM888E3/605/illtuGbe9s7Mz5s6dG/v27SuvO3r0aJw4cSK6urqqMzEAMKNVdOajp6cndu3aFc8++2w0NjaWr+PI5XLR0NAQuVwu1q9fH5s3b46mpqbIZrOxcePG6Orq8kkXACAiKoyPJ554IiIi7rnnnnHrd+zYEV/+8pcjImLr1q0xZ86c6O7ujrGxsVi5cmVs3769KsMCADNfRfFRKpUuuc/VV18d27Zti23btk14KABg9vLbLgBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJ1dV6AGDqHDt2LEZGRmo9RtmRI0fG/TmdNDY2xq233lrrMeCKID5gljp27FjcdttttR7jotauXVvrES7qjTfeECCQgPiAWeq9Mx5PPfVUdHR01Hiad42OjsZbb70VN998czQ0NNR6nLIjR47E2rVrp9VZIpjNxAfMch0dHbF06dJaj1F211131XoEoMZccAoAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEmJDwAgKfEBACQlPgCApCqOjwMHDsR9990Xra2tkclkYs+ePeO2l0ql+P73vx833HBDNDQ0xIoVK+LYsWPVmhcAmOEqjo/Tp0/HHXfcEdu2bbvo9h//+Mfx+OOPxy9+8Yt48cUX45prromVK1fG2bNnJz0sADDzVfzDcqtWrYpVq1ZddFupVIqf/exn8d3vfjfuv//+iIj4zW9+E83NzbFnz5744he/OLlpAYAZr6rXfBw/fjyGhoZixYoV5XW5XC7uvPPOOHjw4EXvMzY2FsVicdwCAMxeVY2PoaGhiIhobm4et765ubm87d/19vZGLpcrL21tbdUcCQCYZmr+aZctW7ZEoVAoLwMDA7UeCQCYQlWNj5aWloiIGB4eHrd+eHi4vO3f1dfXRzabHbcAALNXVePjlltuiZaWlti3b195XbFYjBdffDG6urqq+VAAwAxV8addTp06FW+++Wb59vHjx+Pw4cPR1NQU7e3t8fDDD8ePfvSjuPXWW+OWW26J733ve9Ha2hqrV6+u5twAwAxVcXy8/PLLce+995Zvb968OSIi1q1bFzt37oxvf/vbcfr06fj6178eJ0+ejLvvvjuee+65uPrqq6s3NQAwY1UcH/fcc0+USqUP3J7JZOLRRx+NRx99dFKDAQCzU80/7QIAXFnEBwCQVMVvuwAzR8uCTDScfCNi0P9nfJiGk29Ey4JMrceAK4b4gFnsvzvnRceB/444UOtJpreOePffFZCG+IBZ7Jd95+L/fH9ndOTztR5lWjvS3x+//H//N/6r1oPAFUJ8wCw2dKoUowtvi2hdUutRprXRoQsxdOqDP8UHVJc3ggGApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUn5YDmapM2fORETEK6+8UuNJ/r/R0dF466234uabb46GhoZaj1N25MiRWo8AVxTxAbNUf39/RER87Wtfq/EkM0djY2OtR4ArgviAWWr16tUREZHP52P+/Pm1HeZ/HTlyJNauXRtPPfVUdHR01HqccRobG+PWW2+t9RhwRRAfMEt95CMfia9+9au1HuOiOjo6YunSpbUeA6gRF5wCAEmJDwAgKfEBACQlPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJKqq/UAwPR35syZ6O/vn/Rxjhw5Mu7Pasjn8zF//vyqHQ+YeuIDuKT+/v7o7Oys2vHWrl1btWP19fXF0qVLq3Y8YOqJD+CS8vl89PX1Tfo4o6Oj8dZbb8XNN98cDQ0NVZjs3dmAmSVTKpVKtR7i/YrFYuRyuSgUCpHNZms9DgBwGSp5/XbBKQCQlPgAAJISHwBAUuIDAEhKfAAASYkPACAp8QEAJCU+AICkxAcAkJT4AACSEh8AQFLiAwBISnwAAEnV1XqAf/fej+wWi8UaTwIAXK73Xrffex3/MNMuPkZGRiIioq2trcaTAACVGhkZiVwu96H7ZEqXkygJXbhwIQYHB6OxsTEymUytxwGqqFgsRltbWwwMDEQ2m631OEAVlUqlGBkZidbW1pgz58Ov6ph28QHMXsViMXK5XBQKBfEBVzAXnAIASYkPACAp8QEkU19fH4888kjU19fXehSghlzzAQAk5cwHAJCU+AAAkhIfAEBS4gMASEp8AFPuwIEDcd9990Vra2tkMpnYs2dPrUcCakh8AFPu9OnTcccdd8S2bdtqPQowDUy7H5YDZp9Vq1bFqlWraj0GME048wEAJCU+AICkxAcAkJT4AACSEh8AQFI+7QJMuVOnTsWbb75Zvn38+PE4fPhwNDU1RXt7ew0nA2rBr9oCU+6FF16Ie++99z/Wr1u3Lnbu3Jl+IKCmxAcAkJRrPgCApMQHAJCU+AAAkhIfAEBS4gMASEp8AABJiQ8AICnxAQAkJT4AgKTEBwCQlPgAAJISHwBAUv8DNaUxyQSgSpIAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Remove Outliers\n", "q_low = df[\"fare_amount\"].quantile(0.01)\n", "q_hi = df[\"fare_amount\"].quantile(0.99)\n", "\n", "df = df[(df[\"fare_amount\"] < q_hi) & (df[\"fare_amount\"] > q_low)]\n", "print(df)\n", "plt.boxplot(df['fare_amount'])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "trusted": true }, "outputs": [ { "data": { "text/plain": [ "Unnamed: 0 0\n", "key 0\n", "fare_amount 0\n", "pickup_datetime 0\n", "pickup_longitude 0\n", "pickup_latitude 0\n", "dropoff_longitude 0\n", "dropoff_latitude 0\n", "passenger_count 0\n", "dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Check the missing values now\n", "df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Time to apply learning models\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Take x as predictor variable\n", "x = df.drop(\"fare_amount\", axis = 1)\n", "#And y as target variable\n", "y = df['fare_amount']\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Necessary to apply model\n", "x['pickup_datetime'] = pd.to_numeric(pd.to_datetime(x['pickup_datetime']))\n", "x = x.loc[:, x.columns.str.contains('^Unnamed')]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "trusted": true }, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "trusted": true }, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "trusted": true }, "outputs": [ { "data": { "text/html": [ "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LinearRegression()" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lrmodel = LinearRegression()\n", "lrmodel.fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Prediction\n", "predict = lrmodel.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSE error for the model is 8.063863046328835\n" ] } ], "source": [ "#Check Error\n", "from sklearn.metrics import mean_squared_error\n", "lrmodelrmse = np.sqrt(mean_squared_error(predict, y_test))\n", "print(\"RMSE error for the model is \", lrmodelrmse)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Let's Apply Random Forest Regressor\n", "from sklearn.ensemble import RandomForestRegressor\n", "rfrmodel = RandomForestRegressor(n_estimators = 100, random_state = 101)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "trusted": true }, "outputs": [], "source": [ "#Fit the Forest\n", "rfrmodel.fit(x_train, y_train)\n", "rfrmodel_pred = rfrmodel.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSE value for Random Forest is: 9.757713738069647\n" ] } ], "source": [ "#Errors for the forest\n", "rfrmodel_rmse = np.sqrt(mean_squared_error(rfrmodel_pred, y_test))\n", "print(\"RMSE value for Random Forest is:\",rfrmodel_rmse)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6.9 USD\n" ] } ], "source": [ "X = df[['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude']]\n", "y = df['fare_amount']\n", "rfrmodel.fit(X, y)\n", "\n", "new=[[-73.985,40.748,-73.985,40.758]]\n", "pr=rfrmodel.predict(new)\n", "print(round(pr[0],2),\"USD\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 4 }