{ "cells": [ { "cell_type": "markdown", "id": "af1d39a1-915d-44e2-b06f-49777bfe4cf6", "metadata": {}, "source": [ "# Practical-1.2\n", "\n", "Problem Statement: Implement Page Rank Algorithm.\n", "\n", "Code from InformationRetrieval (SPPU - Final Year - Computer Engineering - Content) repository on KSKA Git: https://git.kska.io/sppu-be-comp-content/InformationRetrieval/\n", "\n", "---" ] }, { "cell_type": "code", "execution_count": 12, "id": "fcd4c298-e888-44ee-93d9-b9d3f3a9b05f", "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 13, "id": "6d446fd6-e2ab-46d4-b9ee-ea1baa3e0b76", "metadata": {}, "outputs": [], "source": [ "# Constants for PageRank\n", "threshold = 1e-13\n", "beta = 0.85" ] }, { "cell_type": "code", "execution_count": 14, "id": "25966376-d37f-41ef-a1ca-adbdf5831bd3", "metadata": {}, "outputs": [], "source": [ "# Spider Trap Network represented as adjacency matrix\n", "A = [\n", " [0, 0, 1, 0],\n", " [1, 0, 0, 0],\n", " [1, 1, 0, 0],\n", " [1, 1, 0, 1]\n", "]\n", "\n", "# Convert adjacency matrix to a numpy array\n", "arr = np.array(A, dtype=float)" ] }, { "cell_type": "code", "execution_count": 15, "id": "e9932efe-ba91-4bd8-9e1b-aa96ea1fbc5b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summation of columns: [3.0, 2.0, 1.0, 1.0]\n" ] } ], "source": [ "# Calculate summation of columns\n", "s = []\n", "for i in range(len(A)):\n", " s.append(np.sum(arr[:, i]))\n", "\n", "print(\"Summation of columns: \", s)" ] }, { "cell_type": "code", "execution_count": 16, "id": "5f41e472-4f23-4a83-ac92-737581dd566c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Column stochastic probability matrix, M:\n", "[[0. 0. 1. 0. ]\n", " [0.33333333 0. 0. 0. ]\n", " [0.33333333 0.5 0. 0. ]\n", " [0.33333333 0.5 0. 1. ]]\n" ] } ], "source": [ "# Create the column stochastic probability matrix, M\n", "M = arr.copy()\n", "for j in range(len(A)):\n", " if s[j] != 0: # Prevent division by zero\n", " M[:, j] = M[:, j] / s[j]\n", "\n", "print(\"Column stochastic probability matrix, M:\")\n", "print(M)" ] }, { "cell_type": "code", "execution_count": 17, "id": "e0c63b43-1825-4edb-873b-bab9d2e2f3d3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial rank vector:\n", "[[0.25]\n", " [0.25]\n", " [0.25]\n", " [0.25]]\n" ] } ], "source": [ "# Initialize rank vector\n", "r = (1.0 + np.zeros([len(M), 1])) / len(M)\n", "print(\"Initial rank vector:\")\n", "print(r)" ] }, { "cell_type": "code", "execution_count": 18, "id": "f540571b-5fd7-4ced-a8a5-7daeb4625f18", "metadata": {}, "outputs": [], "source": [ "# Calculate the uniform rank contribution\n", "uniformR = (1.0 - beta) * r\n", "r_prev = r.copy()" ] }, { "cell_type": "code", "execution_count": 19, "id": "b0d7f809-f901-4bf0-9676-ea4ea976a33a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Iteration: 1\n", "The rank vector: \n", "[[0.25 ]\n", " [0.10833333]\n", " [0.21458333]\n", " [0.42708333]]\n", "Iteration: 2\n", "The rank vector: \n", "[[0.21989583]\n", " [0.10833333]\n", " [0.154375 ]\n", " [0.51739583]]\n", "Iteration: 3\n", "The rank vector: \n", "[[0.16871875]\n", " [0.09980382]\n", " [0.14584549]\n", " [0.58563194]]\n", "Iteration: 4\n", "The rank vector: \n", "[[0.16146866]\n", " [0.08530365]\n", " [0.12772027]\n", " [0.62550742]]\n", "Iteration: 5\n", "The rank vector: \n", "[[0.14606223]\n", " [0.08324945]\n", " [0.1195035 ]\n", " [0.65118481]]\n", "Iteration: 6\n", "The rank vector: \n", "[[0.13907798]\n", " [0.0788843 ]\n", " [0.11426532]\n", " [0.66777241]]\n", "Iteration: 7\n", "The rank vector: \n", "[[0.13462552]\n", " [0.07690543]\n", " [0.11043125]\n", " [0.6780378 ]]\n", "Iteration: 8\n", "The rank vector: \n", "[[0.13136657]\n", " [0.0756439 ]\n", " [0.1083287 ]\n", " [0.68466083]]\n", "Iteration: 9\n", "The rank vector: \n", "[[0.1295794 ]\n", " [0.07472053]\n", " [0.10686918]\n", " [0.68883089]]\n", "Iteration: 10\n", "The rank vector: \n", "[[0.12833881]\n", " [0.07421416]\n", " [0.10597039]\n", " [0.69147664]]\n", "Iteration: 11\n", "The rank vector: \n", "[[0.12757483]\n", " [0.07386266]\n", " [0.10540368]\n", " [0.69315883]]\n", "Iteration: 12\n", "The rank vector: \n", "[[0.12709313]\n", " [0.0736462 ]\n", " [0.10503783]\n", " [0.69422284]]\n", "Iteration: 13\n", "The rank vector: \n", "[[0.12678216]\n", " [0.07350972]\n", " [0.10480936]\n", " [0.69489877]]\n", "Iteration: 14\n", "The rank vector: \n", "[[0.12658795]\n", " [0.07342161]\n", " [0.10466324]\n", " [0.69532719]]\n", "Iteration: 15\n", "The rank vector: \n", "[[0.12646376]\n", " [0.07336659]\n", " [0.10457077]\n", " [0.69559889]]\n", "Iteration: 16\n", "The rank vector: \n", "[[0.12638516]\n", " [0.0733314 ]\n", " [0.1045122 ]\n", " [0.69577125]]\n", "Iteration: 17\n", "The rank vector: \n", "[[0.12633537]\n", " [0.07330913]\n", " [0.10447497]\n", " [0.69588053]]\n", "Iteration: 18\n", "The rank vector: \n", "[[0.12630373]\n", " [0.07329502]\n", " [0.1044514 ]\n", " [0.69594985]]\n", "Iteration: 19\n", "The rank vector: \n", "[[0.12628369]\n", " [0.07328606]\n", " [0.10443644]\n", " [0.69599382]]\n", "Iteration: 20\n", "The rank vector: \n", "[[0.12627097]\n", " [0.07328038]\n", " [0.10442695]\n", " [0.6960217 ]]\n", "Iteration: 21\n", "The rank vector: \n", "[[0.12626291]\n", " [0.07327678]\n", " [0.10442094]\n", " [0.69603938]]\n", "Iteration: 22\n", "The rank vector: \n", "[[0.1262578 ]\n", " [0.07327449]\n", " [0.10441712]\n", " [0.69605059]]\n", "Iteration: 23\n", "The rank vector: \n", "[[0.12625455]\n", " [0.07327304]\n", " [0.1044147 ]\n", " [0.6960577 ]]\n", "Iteration: 24\n", "The rank vector: \n", "[[0.1262525 ]\n", " [0.07327212]\n", " [0.10441317]\n", " [0.69606221]]\n", "Iteration: 25\n", "The rank vector: \n", "[[0.12625119]\n", " [0.07327154]\n", " [0.10441219]\n", " [0.69606508]]\n", "Iteration: 26\n", "The rank vector: \n", "[[0.12625036]\n", " [0.07327117]\n", " [0.10441158]\n", " [0.69606689]]\n", "Iteration: 27\n", "The rank vector: \n", "[[0.12624984]\n", " [0.07327094]\n", " [0.10441118]\n", " [0.69606804]]\n", "Iteration: 28\n", "The rank vector: \n", "[[0.12624951]\n", " [0.07327079]\n", " [0.10441094]\n", " [0.69606877]]\n", "Iteration: 29\n", "The rank vector: \n", "[[0.1262493 ]\n", " [0.07327069]\n", " [0.10441078]\n", " [0.69606923]]\n", "Iteration: 30\n", "The rank vector: \n", "[[0.12624916]\n", " [0.07327063]\n", " [0.10441068]\n", " [0.69606953]]\n", "Iteration: 31\n", "The rank vector: \n", "[[0.12624908]\n", " [0.0732706 ]\n", " [0.10441062]\n", " [0.69606971]]\n", "Iteration: 32\n", "The rank vector: \n", "[[0.12624902]\n", " [0.07327057]\n", " [0.10441057]\n", " [0.69606983]]\n", "Iteration: 33\n", "The rank vector: \n", "[[0.12624899]\n", " [0.07327056]\n", " [0.10441055]\n", " [0.69606991]]\n", "Iteration: 34\n", "The rank vector: \n", "[[0.12624897]\n", " [0.07327055]\n", " [0.10441053]\n", " [0.69606995]]\n", "Iteration: 35\n", "The rank vector: \n", "[[0.12624895]\n", " [0.07327054]\n", " [0.10441052]\n", " [0.69606998]]\n", "Iteration: 36\n", "The rank vector: \n", "[[0.12624894]\n", " [0.07327054]\n", " [0.10441052]\n", " [0.69607 ]]\n", "Iteration: 37\n", "The rank vector: \n", "[[0.12624894]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607001]]\n", "Iteration: 38\n", "The rank vector: \n", "[[0.12624894]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607002]]\n", "Iteration: 39\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 40\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 41\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 42\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 43\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 44\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 45\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 46\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 47\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607003]]\n", "Iteration: 48\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 49\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 50\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 51\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 52\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 53\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 54\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 55\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 56\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 57\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 58\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 59\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 60\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 61\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 62\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 63\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 64\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n", "Iteration: 65\n", "The rank vector: \n", "[[0.12624893]\n", " [0.07327053]\n", " [0.10441051]\n", " [0.69607004]]\n" ] } ], "source": [ "# PageRank iterations\n", "for i in range(1, 1001):\n", " print(\"Iteration: \", i)\n", " r = beta * np.matmul(M, r_prev) + uniformR\n", " print(\"The rank vector: \")\n", " print(r)\n", "\n", " diff = np.sum(abs(r - r_prev))\n", " if diff < threshold:\n", " break\n", " r_prev = r.copy()" ] }, { "cell_type": "code", "execution_count": 20, "id": "9fddbce3-0f30-4912-bfaa-f71a2d00d385", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The final rank vector: \n", "[0.12624893 0.07327053 0.10441051 0.69607004]\n" ] } ], "source": [ "# Display the final rank vector\n", "print(\"The final rank vector: \")\n", "print(r[:, 0])" ] }, { "cell_type": "markdown", "id": "bcbaa397-957c-4e79-b68a-e2070ee11baf", "metadata": {}, "source": [ "---" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.20" } }, "nbformat": 4, "nbformat_minor": 5 }