Added codes 1.1, 1.2, 1.3 and 1.5

This commit is contained in:
K
2025-10-12 22:51:57 +05:30
parent 0ac0a2859b
commit 4c84c01a65
4 changed files with 188 additions and 0 deletions
+35
View File
@@ -0,0 +1,35 @@
# pip install requests beautifulsoup4
import requests
from bs4 import BeautifulSoup
import time
def crawl(url, depth):
if depth == 0:
return
try:
response = requests.get(url)
response.raise_for_status() # Check for HTTP errors
except requests.RequestException as e:
print(f"Failed to retrieve {url}: {e}")
return
soup = BeautifulSoup(response.text, 'html.parser')
print(f"Crawling: {url}")
# Find all links in the HTML
links = set()
for link in soup.find_all('a', href=True):
full_url = link['href']
if full_url.startswith('http'):
links.add(full_url)
# Recursively crawl each link
for link in links:
time.sleep(1) # Be polite and avoid overwhelming the server
crawl(link, depth - 1)
if __name__ == "__main__":
start_url = input("Enter the URL to crawl: ")
crawl_depth = int(input("Enter the crawl depth: "))
crawl(start_url, crawl_depth)