617 lines
15 KiB
Plaintext
617 lines
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "af1d39a1-915d-44e2-b06f-49777bfe4cf6",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Practical-1.2\n",
|
|
"\n",
|
|
"Problem Statement: Implement Page Rank Algorithm.\n",
|
|
"\n",
|
|
"Code from InformationRetrieval (SPPU - Final Year - Computer Engineering - Content) repository on KSKA Git: https://git.kska.io/sppu-be-comp-content/InformationRetrieval/\n",
|
|
"\n",
|
|
"---"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "fcd4c298-e888-44ee-93d9-b9d3f3a9b05f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Import libraries\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "6d446fd6-e2ab-46d4-b9ee-ea1baa3e0b76",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Constants for PageRank\n",
|
|
"threshold = 1e-13\n",
|
|
"beta = 0.85"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "25966376-d37f-41ef-a1ca-adbdf5831bd3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Spider Trap Network represented as adjacency matrix\n",
|
|
"A = [\n",
|
|
" [0, 0, 1, 0],\n",
|
|
" [1, 0, 0, 0],\n",
|
|
" [1, 1, 0, 0],\n",
|
|
" [1, 1, 0, 1]\n",
|
|
"]\n",
|
|
"\n",
|
|
"# Convert adjacency matrix to a numpy array\n",
|
|
"arr = np.array(A, dtype=float)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "e9932efe-ba91-4bd8-9e1b-aa96ea1fbc5b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Summation of columns: [3.0, 2.0, 1.0, 1.0]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Calculate summation of columns\n",
|
|
"s = []\n",
|
|
"for i in range(len(A)):\n",
|
|
" s.append(np.sum(arr[:, i]))\n",
|
|
"\n",
|
|
"print(\"Summation of columns: \", s)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "5f41e472-4f23-4a83-ac92-737581dd566c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Column stochastic probability matrix, M:\n",
|
|
"[[0. 0. 1. 0. ]\n",
|
|
" [0.33333333 0. 0. 0. ]\n",
|
|
" [0.33333333 0.5 0. 0. ]\n",
|
|
" [0.33333333 0.5 0. 1. ]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Create the column stochastic probability matrix, M\n",
|
|
"M = arr.copy()\n",
|
|
"for j in range(len(A)):\n",
|
|
" if s[j] != 0: # Prevent division by zero\n",
|
|
" M[:, j] = M[:, j] / s[j]\n",
|
|
"\n",
|
|
"print(\"Column stochastic probability matrix, M:\")\n",
|
|
"print(M)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "e0c63b43-1825-4edb-873b-bab9d2e2f3d3",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Initial rank vector:\n",
|
|
"[[0.25]\n",
|
|
" [0.25]\n",
|
|
" [0.25]\n",
|
|
" [0.25]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Initialize rank vector\n",
|
|
"r = (1.0 + np.zeros([len(M), 1])) / len(M)\n",
|
|
"print(\"Initial rank vector:\")\n",
|
|
"print(r)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "f540571b-5fd7-4ced-a8a5-7daeb4625f18",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Calculate the uniform rank contribution\n",
|
|
"uniformR = (1.0 - beta) * r\n",
|
|
"r_prev = r.copy()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "b0d7f809-f901-4bf0-9676-ea4ea976a33a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Iteration: 1\n",
|
|
"The rank vector: \n",
|
|
"[[0.25 ]\n",
|
|
" [0.10833333]\n",
|
|
" [0.21458333]\n",
|
|
" [0.42708333]]\n",
|
|
"Iteration: 2\n",
|
|
"The rank vector: \n",
|
|
"[[0.21989583]\n",
|
|
" [0.10833333]\n",
|
|
" [0.154375 ]\n",
|
|
" [0.51739583]]\n",
|
|
"Iteration: 3\n",
|
|
"The rank vector: \n",
|
|
"[[0.16871875]\n",
|
|
" [0.09980382]\n",
|
|
" [0.14584549]\n",
|
|
" [0.58563194]]\n",
|
|
"Iteration: 4\n",
|
|
"The rank vector: \n",
|
|
"[[0.16146866]\n",
|
|
" [0.08530365]\n",
|
|
" [0.12772027]\n",
|
|
" [0.62550742]]\n",
|
|
"Iteration: 5\n",
|
|
"The rank vector: \n",
|
|
"[[0.14606223]\n",
|
|
" [0.08324945]\n",
|
|
" [0.1195035 ]\n",
|
|
" [0.65118481]]\n",
|
|
"Iteration: 6\n",
|
|
"The rank vector: \n",
|
|
"[[0.13907798]\n",
|
|
" [0.0788843 ]\n",
|
|
" [0.11426532]\n",
|
|
" [0.66777241]]\n",
|
|
"Iteration: 7\n",
|
|
"The rank vector: \n",
|
|
"[[0.13462552]\n",
|
|
" [0.07690543]\n",
|
|
" [0.11043125]\n",
|
|
" [0.6780378 ]]\n",
|
|
"Iteration: 8\n",
|
|
"The rank vector: \n",
|
|
"[[0.13136657]\n",
|
|
" [0.0756439 ]\n",
|
|
" [0.1083287 ]\n",
|
|
" [0.68466083]]\n",
|
|
"Iteration: 9\n",
|
|
"The rank vector: \n",
|
|
"[[0.1295794 ]\n",
|
|
" [0.07472053]\n",
|
|
" [0.10686918]\n",
|
|
" [0.68883089]]\n",
|
|
"Iteration: 10\n",
|
|
"The rank vector: \n",
|
|
"[[0.12833881]\n",
|
|
" [0.07421416]\n",
|
|
" [0.10597039]\n",
|
|
" [0.69147664]]\n",
|
|
"Iteration: 11\n",
|
|
"The rank vector: \n",
|
|
"[[0.12757483]\n",
|
|
" [0.07386266]\n",
|
|
" [0.10540368]\n",
|
|
" [0.69315883]]\n",
|
|
"Iteration: 12\n",
|
|
"The rank vector: \n",
|
|
"[[0.12709313]\n",
|
|
" [0.0736462 ]\n",
|
|
" [0.10503783]\n",
|
|
" [0.69422284]]\n",
|
|
"Iteration: 13\n",
|
|
"The rank vector: \n",
|
|
"[[0.12678216]\n",
|
|
" [0.07350972]\n",
|
|
" [0.10480936]\n",
|
|
" [0.69489877]]\n",
|
|
"Iteration: 14\n",
|
|
"The rank vector: \n",
|
|
"[[0.12658795]\n",
|
|
" [0.07342161]\n",
|
|
" [0.10466324]\n",
|
|
" [0.69532719]]\n",
|
|
"Iteration: 15\n",
|
|
"The rank vector: \n",
|
|
"[[0.12646376]\n",
|
|
" [0.07336659]\n",
|
|
" [0.10457077]\n",
|
|
" [0.69559889]]\n",
|
|
"Iteration: 16\n",
|
|
"The rank vector: \n",
|
|
"[[0.12638516]\n",
|
|
" [0.0733314 ]\n",
|
|
" [0.1045122 ]\n",
|
|
" [0.69577125]]\n",
|
|
"Iteration: 17\n",
|
|
"The rank vector: \n",
|
|
"[[0.12633537]\n",
|
|
" [0.07330913]\n",
|
|
" [0.10447497]\n",
|
|
" [0.69588053]]\n",
|
|
"Iteration: 18\n",
|
|
"The rank vector: \n",
|
|
"[[0.12630373]\n",
|
|
" [0.07329502]\n",
|
|
" [0.1044514 ]\n",
|
|
" [0.69594985]]\n",
|
|
"Iteration: 19\n",
|
|
"The rank vector: \n",
|
|
"[[0.12628369]\n",
|
|
" [0.07328606]\n",
|
|
" [0.10443644]\n",
|
|
" [0.69599382]]\n",
|
|
"Iteration: 20\n",
|
|
"The rank vector: \n",
|
|
"[[0.12627097]\n",
|
|
" [0.07328038]\n",
|
|
" [0.10442695]\n",
|
|
" [0.6960217 ]]\n",
|
|
"Iteration: 21\n",
|
|
"The rank vector: \n",
|
|
"[[0.12626291]\n",
|
|
" [0.07327678]\n",
|
|
" [0.10442094]\n",
|
|
" [0.69603938]]\n",
|
|
"Iteration: 22\n",
|
|
"The rank vector: \n",
|
|
"[[0.1262578 ]\n",
|
|
" [0.07327449]\n",
|
|
" [0.10441712]\n",
|
|
" [0.69605059]]\n",
|
|
"Iteration: 23\n",
|
|
"The rank vector: \n",
|
|
"[[0.12625455]\n",
|
|
" [0.07327304]\n",
|
|
" [0.1044147 ]\n",
|
|
" [0.6960577 ]]\n",
|
|
"Iteration: 24\n",
|
|
"The rank vector: \n",
|
|
"[[0.1262525 ]\n",
|
|
" [0.07327212]\n",
|
|
" [0.10441317]\n",
|
|
" [0.69606221]]\n",
|
|
"Iteration: 25\n",
|
|
"The rank vector: \n",
|
|
"[[0.12625119]\n",
|
|
" [0.07327154]\n",
|
|
" [0.10441219]\n",
|
|
" [0.69606508]]\n",
|
|
"Iteration: 26\n",
|
|
"The rank vector: \n",
|
|
"[[0.12625036]\n",
|
|
" [0.07327117]\n",
|
|
" [0.10441158]\n",
|
|
" [0.69606689]]\n",
|
|
"Iteration: 27\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624984]\n",
|
|
" [0.07327094]\n",
|
|
" [0.10441118]\n",
|
|
" [0.69606804]]\n",
|
|
"Iteration: 28\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624951]\n",
|
|
" [0.07327079]\n",
|
|
" [0.10441094]\n",
|
|
" [0.69606877]]\n",
|
|
"Iteration: 29\n",
|
|
"The rank vector: \n",
|
|
"[[0.1262493 ]\n",
|
|
" [0.07327069]\n",
|
|
" [0.10441078]\n",
|
|
" [0.69606923]]\n",
|
|
"Iteration: 30\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624916]\n",
|
|
" [0.07327063]\n",
|
|
" [0.10441068]\n",
|
|
" [0.69606953]]\n",
|
|
"Iteration: 31\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624908]\n",
|
|
" [0.0732706 ]\n",
|
|
" [0.10441062]\n",
|
|
" [0.69606971]]\n",
|
|
"Iteration: 32\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624902]\n",
|
|
" [0.07327057]\n",
|
|
" [0.10441057]\n",
|
|
" [0.69606983]]\n",
|
|
"Iteration: 33\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624899]\n",
|
|
" [0.07327056]\n",
|
|
" [0.10441055]\n",
|
|
" [0.69606991]]\n",
|
|
"Iteration: 34\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624897]\n",
|
|
" [0.07327055]\n",
|
|
" [0.10441053]\n",
|
|
" [0.69606995]]\n",
|
|
"Iteration: 35\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624895]\n",
|
|
" [0.07327054]\n",
|
|
" [0.10441052]\n",
|
|
" [0.69606998]]\n",
|
|
"Iteration: 36\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624894]\n",
|
|
" [0.07327054]\n",
|
|
" [0.10441052]\n",
|
|
" [0.69607 ]]\n",
|
|
"Iteration: 37\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624894]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607001]]\n",
|
|
"Iteration: 38\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624894]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607002]]\n",
|
|
"Iteration: 39\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 40\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 41\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 42\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 43\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 44\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 45\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 46\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 47\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607003]]\n",
|
|
"Iteration: 48\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 49\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 50\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 51\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 52\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 53\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 54\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 55\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 56\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 57\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 58\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 59\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 60\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 61\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 62\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 63\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 64\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n",
|
|
"Iteration: 65\n",
|
|
"The rank vector: \n",
|
|
"[[0.12624893]\n",
|
|
" [0.07327053]\n",
|
|
" [0.10441051]\n",
|
|
" [0.69607004]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# PageRank iterations\n",
|
|
"for i in range(1, 1001):\n",
|
|
" print(\"Iteration: \", i)\n",
|
|
" r = beta * np.matmul(M, r_prev) + uniformR\n",
|
|
" print(\"The rank vector: \")\n",
|
|
" print(r)\n",
|
|
"\n",
|
|
" diff = np.sum(abs(r - r_prev))\n",
|
|
" if diff < threshold:\n",
|
|
" break\n",
|
|
" r_prev = r.copy()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "9fddbce3-0f30-4912-bfaa-f71a2d00d385",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The final rank vector: \n",
|
|
"[0.12624893 0.07327053 0.10441051 0.69607004]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Display the final rank vector\n",
|
|
"print(\"The final rank vector: \")\n",
|
|
"print(r[:, 0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bcbaa397-957c-4e79-b68a-e2070ee11baf",
|
|
"metadata": {},
|
|
"source": [
|
|
"---"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.20"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|