Added code for practical-1.1, i.e. email header analysis.
This commit is contained in:
@@ -0,0 +1,426 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# CSDF - Practical-1 (Email Header Analysis)
|
||||
|
||||
"""
|
||||
THIS CODE HAS BEEN TESTED AND IS FULLY OPERATIONAL.
|
||||
|
||||
Problem Statement: Email Header Analysis - Write a program for Tracking Emails and Investigating Email Crimes. i.e. Write a program to analyze e–mail header.
|
||||
|
||||
Code from CyberSecurityAndDigitalForensics (SPPU - Final Year - Computer Engineering - Content) repository on KSKA Git: https://git.kska.io/sppu-be-comp-content/CyberSecurityAndDigitalForensics
|
||||
"""
|
||||
|
||||
# BEGINNING OF CODE
|
||||
import re
|
||||
import json
|
||||
from email import message_from_string, message_from_file
|
||||
from email.parser import Parser
|
||||
from datetime import datetime
|
||||
import socket
|
||||
import ipaddress
|
||||
|
||||
class EmailHeaderAnalyzer:
|
||||
"""
|
||||
A comprehensive email header analyzer for forensic investigation
|
||||
"""
|
||||
|
||||
def __init__(self, email_content):
|
||||
"""
|
||||
Initialize the analyzer with email content
|
||||
|
||||
Args:
|
||||
email_content: Raw email string or file object
|
||||
"""
|
||||
if isinstance(email_content, str):
|
||||
self.email = message_from_string(email_content)
|
||||
else:
|
||||
self.email = message_from_file(email_content)
|
||||
|
||||
self.analysis_results = {}
|
||||
|
||||
def extract_basic_headers(self):
|
||||
"""Extract basic email header information"""
|
||||
headers = {
|
||||
'From': self.email.get('From', 'Not Found'),
|
||||
'To': self.email.get('To', 'Not Found'),
|
||||
'Subject': self.email.get('Subject', 'Not Found'),
|
||||
'Date': self.email.get('Date', 'Not Found'),
|
||||
'Message-ID': self.email.get('Message-ID', 'Not Found'),
|
||||
'Return-Path': self.email.get('Return-Path', 'Not Found'),
|
||||
'Reply-To': self.email.get('Reply-To', 'Not Found'),
|
||||
'MIME-Version': self.email.get('MIME-Version', 'Not Found'),
|
||||
'Content-Type': self.email.get('Content-Type', 'Not Found')
|
||||
}
|
||||
|
||||
self.analysis_results['basic_headers'] = headers
|
||||
return headers
|
||||
|
||||
def extract_received_headers(self):
|
||||
"""
|
||||
Extract and parse all 'Received' headers to trace email path
|
||||
Critical for tracking email transmission route
|
||||
"""
|
||||
received_headers = self.email.get_all('Received', [])
|
||||
parsed_received = []
|
||||
|
||||
for idx, received in enumerate(received_headers):
|
||||
hop_info = {
|
||||
'hop_number': idx + 1,
|
||||
'raw_header': received,
|
||||
'timestamp': self._extract_timestamp(received),
|
||||
'from_server': self._extract_from_server(received),
|
||||
'by_server': self._extract_by_server(received),
|
||||
'ip_address': self._extract_ip_from_received(received)
|
||||
}
|
||||
parsed_received.append(hop_info)
|
||||
|
||||
self.analysis_results['received_headers'] = parsed_received
|
||||
return parsed_received
|
||||
|
||||
def extract_originating_ip(self):
|
||||
"""
|
||||
Extract the originating IP address (X-Originating-IP)
|
||||
This is crucial for tracing the actual sender location
|
||||
"""
|
||||
originating_ip = self.email.get('X-Originating-IP', None)
|
||||
|
||||
if originating_ip:
|
||||
# Clean up IP address (remove brackets if present)
|
||||
originating_ip = re.search(r'(\d+\.\d+\.\d+\.\d+)', originating_ip)
|
||||
if originating_ip:
|
||||
originating_ip = originating_ip.group(1)
|
||||
|
||||
# If X-Originating-IP not found, try to get from first Received header
|
||||
if not originating_ip:
|
||||
received_headers = self.email.get_all('Received', [])
|
||||
if received_headers:
|
||||
originating_ip = self._extract_ip_from_received(received_headers[-1])
|
||||
|
||||
self.analysis_results['originating_ip'] = originating_ip
|
||||
return originating_ip
|
||||
|
||||
def analyze_authentication(self):
|
||||
"""
|
||||
Analyze email authentication headers (SPF, DKIM, DMARC)
|
||||
Helps detect spoofing and verify email authenticity
|
||||
"""
|
||||
auth_results = {
|
||||
'SPF': self.email.get('Received-SPF', 'Not Found'),
|
||||
'DKIM-Signature': self.email.get('DKIM-Signature', 'Not Found'),
|
||||
'Authentication-Results': self.email.get('Authentication-Results', 'Not Found'),
|
||||
'ARC-Authentication-Results': self.email.get('ARC-Authentication-Results', 'Not Found')
|
||||
}
|
||||
|
||||
# Determine if email passed authentication
|
||||
spf_pass = 'pass' in str(auth_results['SPF']).lower()
|
||||
dkim_pass = 'DKIM-Signature' in str(auth_results['DKIM-Signature'])
|
||||
|
||||
auth_results['spf_passed'] = spf_pass
|
||||
auth_results['dkim_present'] = dkim_pass
|
||||
auth_results['likely_spoofed'] = not (spf_pass or dkim_pass)
|
||||
|
||||
self.analysis_results['authentication'] = auth_results
|
||||
return auth_results
|
||||
|
||||
def extract_message_id(self):
|
||||
"""
|
||||
Extract and analyze Message-ID
|
||||
Useful for tracking email threads and identifying patterns
|
||||
"""
|
||||
message_id = self.email.get('Message-ID', 'Not Found')
|
||||
|
||||
# Extract domain from Message-ID
|
||||
domain = None
|
||||
if message_id != 'Not Found':
|
||||
domain_match = re.search(r'@([a-zA-Z0-9.-]+)', message_id)
|
||||
if domain_match:
|
||||
domain = domain_match.group(1)
|
||||
|
||||
message_id_info = {
|
||||
'message_id': message_id,
|
||||
'domain': domain
|
||||
}
|
||||
|
||||
self.analysis_results['message_id_info'] = message_id_info
|
||||
return message_id_info
|
||||
|
||||
def analyze_sender_info(self):
|
||||
"""
|
||||
Detailed analysis of sender information
|
||||
Extracts email addresses and identifies potential spoofing
|
||||
"""
|
||||
from_header = self.email.get('From', '')
|
||||
return_path = self.email.get('Return-Path', '')
|
||||
|
||||
# Extract email addresses
|
||||
from_email = self._extract_email_address(from_header)
|
||||
return_email = self._extract_email_address(return_path)
|
||||
|
||||
# Check for mismatch (potential spoofing indicator)
|
||||
mismatch = from_email != return_email if from_email and return_email else False
|
||||
|
||||
sender_info = {
|
||||
'from_header': from_header,
|
||||
'from_email': from_email,
|
||||
'return_path': return_path,
|
||||
'return_email': return_email,
|
||||
'address_mismatch': mismatch,
|
||||
'potential_spoofing': mismatch
|
||||
}
|
||||
|
||||
self.analysis_results['sender_analysis'] = sender_info
|
||||
return sender_info
|
||||
|
||||
def get_ip_geolocation_info(self, ip_address):
|
||||
"""
|
||||
Get basic information about an IP address
|
||||
Note: For production, integrate with geolocation APIs
|
||||
"""
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(ip_address)
|
||||
ip_info = {
|
||||
'ip': ip_address,
|
||||
'is_private': ip_obj.is_private,
|
||||
'is_global': ip_obj.is_global,
|
||||
'is_loopback': ip_obj.is_loopback,
|
||||
'version': ip_obj.version
|
||||
}
|
||||
|
||||
# Try reverse DNS lookup
|
||||
try:
|
||||
hostname = socket.gethostbyaddr(ip_address)[0]
|
||||
ip_info['hostname'] = hostname
|
||||
except:
|
||||
ip_info['hostname'] = 'Reverse DNS lookup failed'
|
||||
|
||||
return ip_info
|
||||
except ValueError:
|
||||
return {'error': 'Invalid IP address'}
|
||||
|
||||
def detect_suspicious_patterns(self):
|
||||
"""
|
||||
Detect common patterns associated with email crimes
|
||||
"""
|
||||
suspicious_indicators = []
|
||||
|
||||
# Check for authentication failures
|
||||
if self.analysis_results.get('authentication', {}).get('likely_spoofed'):
|
||||
suspicious_indicators.append('Email failed authentication checks (SPF/DKIM)')
|
||||
|
||||
# Check for sender/return-path mismatch
|
||||
if self.analysis_results.get('sender_analysis', {}).get('potential_spoofing'):
|
||||
suspicious_indicators.append('Mismatch between From and Return-Path addresses')
|
||||
|
||||
# Check for missing Message-ID
|
||||
if self.analysis_results.get('message_id_info', {}).get('message_id') == 'Not Found':
|
||||
suspicious_indicators.append('Missing Message-ID (unusual for legitimate emails)')
|
||||
|
||||
# Check for suspicious keywords in subject
|
||||
subject = self.email.get('Subject', '').lower()
|
||||
suspicious_keywords = ['urgent', 'verify account', 'suspended', 'confirm', 'prize', 'winner']
|
||||
found_keywords = [kw for kw in suspicious_keywords if kw in subject]
|
||||
if found_keywords:
|
||||
suspicious_indicators.append(f'Suspicious keywords in subject: {", ".join(found_keywords)}')
|
||||
|
||||
self.analysis_results['suspicious_indicators'] = suspicious_indicators
|
||||
return suspicious_indicators
|
||||
|
||||
def generate_forensic_report(self):
|
||||
"""
|
||||
Generate a comprehensive forensic analysis report
|
||||
"""
|
||||
# Run all analysis methods
|
||||
self.extract_basic_headers()
|
||||
self.extract_received_headers()
|
||||
self.extract_originating_ip()
|
||||
self.analyze_authentication()
|
||||
self.extract_message_id()
|
||||
self.analyze_sender_info()
|
||||
self.detect_suspicious_patterns()
|
||||
|
||||
# Analyze originating IP if available
|
||||
orig_ip = self.analysis_results.get('originating_ip')
|
||||
if orig_ip:
|
||||
self.analysis_results['ip_analysis'] = self.get_ip_geolocation_info(orig_ip)
|
||||
|
||||
return self.analysis_results
|
||||
|
||||
def print_report(self):
|
||||
"""Print a formatted forensic report"""
|
||||
report = self.generate_forensic_report()
|
||||
|
||||
print("="*80)
|
||||
print(" EMAIL FORENSIC ANALYSIS REPORT ".center(80, "="))
|
||||
print("="*80)
|
||||
print(f"\nGenerated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
|
||||
# Basic Headers
|
||||
print("\n" + "="*80)
|
||||
print("1. BASIC HEADER INFORMATION")
|
||||
print("="*80)
|
||||
for key, value in report['basic_headers'].items():
|
||||
print(f"{key:20s}: {value}")
|
||||
|
||||
# Sender Analysis
|
||||
print("\n" + "="*80)
|
||||
print("2. SENDER ANALYSIS")
|
||||
print("="*80)
|
||||
sender = report.get('sender_analysis', {})
|
||||
print(f"From Email : {sender.get('from_email', 'N/A')}")
|
||||
print(f"Return Email : {sender.get('return_email', 'N/A')}")
|
||||
print(f"Address Mismatch : {sender.get('address_mismatch', False)}")
|
||||
print(f"Potential Spoofing : {sender.get('potential_spoofing', False)}")
|
||||
|
||||
# Authentication
|
||||
print("\n" + "="*80)
|
||||
print("3. AUTHENTICATION ANALYSIS")
|
||||
print("="*80)
|
||||
auth = report.get('authentication', {})
|
||||
print(f"SPF Passed : {auth.get('spf_passed', False)}")
|
||||
print(f"DKIM Present : {auth.get('dkim_present', False)}")
|
||||
print(f"Likely Spoofed : {auth.get('likely_spoofed', True)}")
|
||||
|
||||
# Message ID
|
||||
print("\n" + "="*80)
|
||||
print("4. MESSAGE ID ANALYSIS")
|
||||
print("="*80)
|
||||
msg_id = report.get('message_id_info', {})
|
||||
print(f"Message ID : {msg_id.get('message_id', 'N/A')}")
|
||||
print(f"Domain : {msg_id.get('domain', 'N/A')}")
|
||||
|
||||
# Originating IP
|
||||
print("\n" + "="*80)
|
||||
print("5. ORIGINATING IP INFORMATION")
|
||||
print("="*80)
|
||||
print(f"Originating IP : {report.get('originating_ip', 'Not Found')}")
|
||||
|
||||
if 'ip_analysis' in report:
|
||||
ip_info = report['ip_analysis']
|
||||
print(f"IP Version : IPv{ip_info.get('version', 'N/A')}")
|
||||
print(f"Is Private : {ip_info.get('is_private', 'N/A')}")
|
||||
print(f"Is Global : {ip_info.get('is_global', 'N/A')}")
|
||||
print(f"Hostname : {ip_info.get('hostname', 'N/A')}")
|
||||
|
||||
# Transmission Path
|
||||
print("\n" + "="*80)
|
||||
print("6. EMAIL TRANSMISSION PATH")
|
||||
print("="*80)
|
||||
received = report.get('received_headers', [])
|
||||
if received:
|
||||
for hop in received:
|
||||
print(f"\nHop {hop['hop_number']}:")
|
||||
print(f" From Server : {hop.get('from_server', 'N/A')}")
|
||||
print(f" By Server : {hop.get('by_server', 'N/A')}")
|
||||
print(f" IP Address : {hop.get('ip_address', 'N/A')}")
|
||||
print(f" Timestamp : {hop.get('timestamp', 'N/A')}")
|
||||
else:
|
||||
print("No Received headers found")
|
||||
|
||||
# Suspicious Indicators
|
||||
print("\n" + "="*80)
|
||||
print("7. SUSPICIOUS INDICATORS")
|
||||
print("="*80)
|
||||
indicators = report.get('suspicious_indicators', [])
|
||||
if indicators:
|
||||
for idx, indicator in enumerate(indicators, 1):
|
||||
print(f"{idx}. {indicator}")
|
||||
else:
|
||||
print("No suspicious indicators detected")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print(" END OF REPORT ".center(80, "="))
|
||||
print("="*80)
|
||||
|
||||
def export_json(self, filename='email_analysis.json'):
|
||||
"""Export analysis results to JSON file"""
|
||||
report = self.generate_forensic_report()
|
||||
with open(filename, 'w') as f:
|
||||
json.dump(report, f, indent=4)
|
||||
print(f"\nAnalysis exported to {filename}")
|
||||
|
||||
# Helper methods
|
||||
def _extract_timestamp(self, received_header):
|
||||
"""Extract timestamp from Received header"""
|
||||
timestamp_match = re.search(r';\s*(.+)$', received_header)
|
||||
return timestamp_match.group(1).strip() if timestamp_match else 'Not Found'
|
||||
|
||||
def _extract_from_server(self, received_header):
|
||||
"""Extract 'from' server information"""
|
||||
from_match = re.search(r'from\s+([^\s]+)', received_header, re.IGNORECASE)
|
||||
return from_match.group(1) if from_match else 'Not Found'
|
||||
|
||||
def _extract_by_server(self, received_header):
|
||||
"""Extract 'by' server information"""
|
||||
by_match = re.search(r'by\s+([^\s]+)', received_header, re.IGNORECASE)
|
||||
return by_match.group(1) if by_match else 'Not Found'
|
||||
|
||||
def _extract_ip_from_received(self, received_header):
|
||||
"""Extract IP address from Received header"""
|
||||
ip_match = re.search(r'\[(\d+\.\d+\.\d+\.\d+)\]', received_header)
|
||||
return ip_match.group(1) if ip_match else 'Not Found'
|
||||
|
||||
def _extract_email_address(self, header_value):
|
||||
"""Extract email address from header value"""
|
||||
email_match = re.search(r'[\w\.-]+@[\w\.-]+\.\w+', header_value)
|
||||
return email_match.group(0) if email_match else None
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to demonstrate email header analysis
|
||||
"""
|
||||
print("Email Header Analysis Tool for Digital Forensics\n")
|
||||
print("Choose input method:")
|
||||
print("1. Paste raw email content")
|
||||
print("2. Load from file")
|
||||
print("3. Use sample email")
|
||||
|
||||
choice = input("\nEnter choice (1-3): ").strip()
|
||||
|
||||
if choice == '1':
|
||||
print("\nPaste the raw email (including headers). Press Ctrl+D (Linux/MacOS) or Ctrl+Z (Windows) when done: ")
|
||||
import sys
|
||||
email_content = sys.stdin.read()
|
||||
|
||||
elif choice == '2':
|
||||
filename = input("Enter email file path: ").strip()
|
||||
try:
|
||||
with open(filename, 'r') as f:
|
||||
email_content = f.read()
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{filename}' not found")
|
||||
return
|
||||
|
||||
else:
|
||||
# Sample email for demonstration
|
||||
email_content = """From: sender@example.com
|
||||
To: recipient@example.com
|
||||
Subject: Urgent Account Verification Required
|
||||
Date: Mon, 9 Oct 2025 10:30:00 +0530
|
||||
Message-ID: <12345.67890@mail.example.com>
|
||||
Return-Path: different@suspicious.com
|
||||
Received: from mail.example.com ([192.168.1.100]) by server.example.com with SMTP; Mon, 9 Oct 2025 10:30:00 +0530
|
||||
Received: from client.suspicious.com ([203.0.113.45]) by mail.example.com with ESMTP; Mon, 9 Oct 2025 10:29:55 +0530
|
||||
X-Originating-IP: [203.0.113.45]
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
This is a sample email body for forensic analysis.
|
||||
"""
|
||||
|
||||
# Create analyzer instance and generate report
|
||||
analyzer = EmailHeaderAnalyzer(email_content)
|
||||
analyzer.print_report()
|
||||
|
||||
# Ask if user wants to export to JSON
|
||||
export = input("\nExport analysis to JSON? (y/n): ").strip().lower()
|
||||
if export == 'y':
|
||||
filename = input("Enter filename (default: email_analysis.json): ").strip()
|
||||
if not filename:
|
||||
filename = 'email_analysis.json'
|
||||
analyzer.export_json(filename)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# END OF CODE
|
||||
Reference in New Issue
Block a user