diff --git a/Notebooks/Notebook-A5 (Data Analytics-2).ipynb b/Notebooks/Notebook-A5 (Data Analytics-2).ipynb new file mode 100755 index 0000000..0c9a343 --- /dev/null +++ b/Notebooks/Notebook-A5 (Data Analytics-2).ipynb @@ -0,0 +1,889 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "920f58f3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2b4a4744", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
User IDGenderAgeEstimatedSalaryPurchased
015624510019190000
115810944035200000
215668575126430000
315603246127570000
415804002019760000
..................
39515691863146410001
39615706071051230001
39715654296150200001
39815755018036330000
39915594041149360001
\n", + "

400 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " User ID Gender Age EstimatedSalary Purchased\n", + "0 15624510 0 19 19000 0\n", + "1 15810944 0 35 20000 0\n", + "2 15668575 1 26 43000 0\n", + "3 15603246 1 27 57000 0\n", + "4 15804002 0 19 76000 0\n", + ".. ... ... ... ... ...\n", + "395 15691863 1 46 41000 1\n", + "396 15706071 0 51 23000 1\n", + "397 15654296 1 50 20000 1\n", + "398 15755018 0 36 33000 0\n", + "399 15594041 1 49 36000 1\n", + "\n", + "[400 rows x 5 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"Social_Network_Ads.csv\")\n", + "df[\"Gender\"].replace({\"Male\":0,\"Female\":1}, inplace=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d05d408e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['User ID', 'Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a670ac1", + "metadata": {}, + "outputs": [], + "source": [ + "x = df[['User ID', 'Gender', 'Age', 'EstimatedSalary']]\n", + "y = df[['Purchased']]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cdbde239", + "metadata": {}, + "outputs": [], + "source": [ + "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=29)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5f4c8777", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/admin1/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:1408: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n" + ] + }, + { + "data": { + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f9942185", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,\n", + " 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n", + " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = model.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8c7fa3e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8633333333333333" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3e5f5f8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.85" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(x,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "563f8479", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[63, 6],\n", + " [13, 18]])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cm = confusion_matrix(y_test,y_pred)\n", + "cm" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "653499ed", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "63 6 13 18\n" + ] + } + ], + "source": [ + "tn, fp, fn, tp = confusion_matrix(y_test,y_pred).ravel()\n", + "print(tn,fp,fn,tp)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8d61b752", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.81" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = accuracy_score(y_test,y_pred)\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1df0dfe0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.18999999999999995" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e = 1 - a\n", + "e" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c3b06c0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.75" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "precision_score(y_test,y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f5fb059c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5806451612903226" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recall_score(y_test,y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5683d512", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index d0840a4..26000c0 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,15 @@ ### Codes -1. [Code-A9 (Data Visualisation-2)](Codes/Code-A9%20%28Data%20Visualisation-2%29.md) -2. [Code-A10 (Data Visualisation-3)](Codes/Code-A10%20%28Data%20Visualisation-3%29.md) -3. [Code-A5 (Data Analytics-2)](Codes/Code-A5%20%28Data%20Analytics-2%29.md) + +1. [Code-A5 (Data Analytics-2)](Codes/Code-A5%20%28Data%20Analytics-2%29.md) +2. [Code-A9 (Data Visualisation-2)](Codes/Code-A9%20%28Data%20Visualisation-2%29.md) +3. [Code-A10 (Data Visualisation-3)](Codes/Code-A10%20%28Data%20Visualisation-3%29.md) ### Notebooks -1. [Notebook-A9 (Data Visualisation-2)](Notebooks/Notebook-A9%20%28Data%20Visualization-2%29.ipynb) +1. [Notebook-A5 (Data Analytics-2)](Notebooks/Notebook-A5%20%28Data%20Analytics-2%29.ipynb) +2. [Notebook-A9 (Data Visualisation-2)](Notebooks/Notebook-A9%20%28Data%20Visualization-2%29.ipynb) ### Practical