{
  "metadata": {
    "kernelspec": {
      "name": "python",
      "display_name": "Python (Pyodide)",
      "language": "python"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "python",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8"
    }
  },
  "nbformat_minor": 5,
  "nbformat": 4,
  "cells": [
    {
      "id": "1787aa40-6173-48cb-ac24-169a13a92b25",
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.svm import SVC\nfrom sklearn.metrics import accuracy_score, confusion_matrix, classification_report",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": 1
    },
    {
      "id": "e529d131-cb7a-4408-8624-96b481da9f94",
      "cell_type": "code",
      "source": "data = pd.read_csv(\"emails.csv\")\nprint(data.head())",
      "metadata": {
        "trusted": true
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": "  Email No.  the  to  ect  and  for  of    a  you  hou  ...  connevey  jay  \\\n0   Email 1    0   0    1    0    0   0    2    0    0  ...         0    0   \n1   Email 2    8  13   24    6    6   2  102    1   27  ...         0    0   \n2   Email 3    0   0    1    0    0   0    8    0    0  ...         0    0   \n3   Email 4    0   5   22    0    5   1   51    2   10  ...         0    0   \n4   Email 5    7   6   17    1    5   2   57    0    9  ...         0    0   \n\n   valued  lay  infrastructure  military  allowing  ff  dry  Prediction  \n0       0    0               0         0         0   0    0           0  \n1       0    0               0         0         0   1    0           0  \n2       0    0               0         0         0   0    0           0  \n3       0    0               0         0         0   0    0           0  \n4       0    0               0         0         0   1    0           0  \n\n[5 rows x 3002 columns]\n"
        }
      ],
      "execution_count": 2
    },
    {
      "id": "adcce56e-4742-4a1a-8ed6-82df3e5ad9c4",
      "cell_type": "code",
      "source": "X = data.drop(columns=['Email No.', 'Prediction'], errors='ignore')  # features\ny = data['Prediction'] ",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    },
    {
      "id": "963f4ccd-9790-4d1f-8458-995bdbfc84eb",
      "cell_type": "code",
      "source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    },
    {
      "id": "481d8d23-ce4d-4419-a820-779ad333be2a",
      "cell_type": "code",
      "source": "scaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    },
    {
      "id": "255b9746-202d-425b-9353-41d7bb18d99c",
      "cell_type": "code",
      "source": "knn = KNeighborsClassifier(n_neighbors=5)\nknn.fit(X_train, y_train)\ny_pred_knn = knn.predict(X_test)",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    },
    {
      "id": "0c5e2c44-afc4-43d8-bdd7-e0a136d54413",
      "cell_type": "code",
      "source": "svm = SVC(kernel='linear', C=1)\nsvm.fit(X_train, y_train)\ny_pred_svm = svm.predict(X_test)",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    },
    {
      "id": "77909086-350b-4442-97d6-e7bbef15648f",
      "cell_type": "code",
      "source": "print(\"===== KNN Model Evaluation =====\")\nprint(\"Accuracy:\", accuracy_score(y_test, y_pred_knn))\nprint(confusion_matrix(y_test, y_pred_knn))\nprint(classification_report(y_test, y_pred_knn))",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    },
    {
      "id": "72a0433c-8085-4ea4-aa29-28ed856ab086",
      "cell_type": "code",
      "source": "print(\"\\n===== SVM Model Evaluation =====\")\nprint(\"Accuracy:\", accuracy_score(y_test, y_pred_svm))\nprint(confusion_matrix(y_test, y_pred_svm))\nprint(classification_report(y_test, y_pred_svm))",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "execution_count": null
    }
  ]
}