Files
LP3/DAA/A2.py
T
2025-11-05 18:07:57 +05:30

97 lines
2.9 KiB
Python

import heapq
from collections import Counter, namedtuple
# Node used in heap: (frequency, unique_id, node)
# unique_id breaks ties deterministically
class Node:
def __init__(self, freq, symbol=None, left=None, right=None):
self.freq = freq
self.symbol = symbol
self.left = left
self.right = right
def is_leaf(self):
return self.symbol is not None
def build_huffman_tree(freqs):
"""Build Huffman tree and return root node."""
heap = []
uid = 0
for sym, f in freqs.items():
node = Node(f, symbol=sym)
heapq.heappush(heap, (f, uid, node))
uid += 1
# Edge case: single unique symbol -> create a dummy sibling
if len(heap) == 1:
f, _, node = heapq.heappop(heap)
dummy = Node(0, symbol=None) # zero-frequency sibling
new = Node(f + dummy.freq, left=dummy, right=node)
return new
while len(heap) > 1:
f1, _, n1 = heapq.heappop(heap)
f2, _, n2 = heapq.heappop(heap)
merged = Node(f1 + f2, left=n1, right=n2)
heapq.heappush(heap, (merged.freq, uid, merged))
uid += 1
return heapq.heappop(heap)[2]
def generate_codes(root):
"""Return dict: symbol -> code (string of '0'/'1')."""
codes = {}
def dfs(node, prefix):
if node is None:
return
if node.is_leaf():
# If tree had single symbol, ensure code length >= 1
codes[node.symbol] = prefix or "0"
return
dfs(node.left, prefix + "0")
dfs(node.right, prefix + "1")
dfs(root, "")
return codes
def huffman_encode(s):
"""Encode string s. Returns (encoded_bitstring, codes)."""
if not s:
return "", {}
freqs = Counter(s)
root = build_huffman_tree(freqs)
codes = generate_codes(root)
encoded = "".join(codes[ch] for ch in s)
return encoded, codes, root
def huffman_decode(encoded_bits, root):
"""Decode bitstring using Huffman tree root."""
if not encoded_bits:
# If tree has single symbol and encoded_bits empty, return repeated symbol?
# But typically empty input -> empty output.
return ""
res_chars = []
node = root
i = 0
while i < len(encoded_bits):
bit = encoded_bits[i]
node = node.left if bit == "0" else node.right
if node.is_leaf():
res_chars.append(node.symbol)
node = root
i += 1
return "".join(res_chars)
# Example / test
if __name__ == "__main__":
sample = "this is an example for huffman encoding"
encoded, codes, root = huffman_encode(sample)
decoded = huffman_decode(encoded, root)
print("Original:", sample)
print("Codes:")
for k in sorted(codes, key=lambda x: (len(codes[x]), x)):
print(f" {repr(k)} : {codes[k]}")
print("Encoded bit length:", len(encoded))
print("Encoded (first 200 bits):", encoded[:200])
print("Decoded matches original?", decoded == sample)