import heapq from collections import Counter, namedtuple # Node used in heap: (frequency, unique_id, node) # unique_id breaks ties deterministically class Node: def __init__(self, freq, symbol=None, left=None, right=None): self.freq = freq self.symbol = symbol self.left = left self.right = right def is_leaf(self): return self.symbol is not None def build_huffman_tree(freqs): """Build Huffman tree and return root node.""" heap = [] uid = 0 for sym, f in freqs.items(): node = Node(f, symbol=sym) heapq.heappush(heap, (f, uid, node)) uid += 1 # Edge case: single unique symbol -> create a dummy sibling if len(heap) == 1: f, _, node = heapq.heappop(heap) dummy = Node(0, symbol=None) # zero-frequency sibling new = Node(f + dummy.freq, left=dummy, right=node) return new while len(heap) > 1: f1, _, n1 = heapq.heappop(heap) f2, _, n2 = heapq.heappop(heap) merged = Node(f1 + f2, left=n1, right=n2) heapq.heappush(heap, (merged.freq, uid, merged)) uid += 1 return heapq.heappop(heap)[2] def generate_codes(root): """Return dict: symbol -> code (string of '0'/'1').""" codes = {} def dfs(node, prefix): if node is None: return if node.is_leaf(): # If tree had single symbol, ensure code length >= 1 codes[node.symbol] = prefix or "0" return dfs(node.left, prefix + "0") dfs(node.right, prefix + "1") dfs(root, "") return codes def huffman_encode(s): """Encode string s. Returns (encoded_bitstring, codes).""" if not s: return "", {} freqs = Counter(s) root = build_huffman_tree(freqs) codes = generate_codes(root) encoded = "".join(codes[ch] for ch in s) return encoded, codes, root def huffman_decode(encoded_bits, root): """Decode bitstring using Huffman tree root.""" if not encoded_bits: # If tree has single symbol and encoded_bits empty, return repeated symbol? # But typically empty input -> empty output. return "" res_chars = [] node = root i = 0 while i < len(encoded_bits): bit = encoded_bits[i] node = node.left if bit == "0" else node.right if node.is_leaf(): res_chars.append(node.symbol) node = root i += 1 return "".join(res_chars) # Example / test if __name__ == "__main__": sample = "this is an example for huffman encoding" encoded, codes, root = huffman_encode(sample) decoded = huffman_decode(encoded, root) print("Original:", sample) print("Codes:") for k in sorted(codes, key=lambda x: (len(codes[x]), x)): print(f" {repr(k)} : {codes[k]}") print("Encoded bit length:", len(encoded)) print("Encoded (first 200 bits):", encoded[:200]) print("Decoded matches original?", decoded == sample)