Added all DAA in python
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
import heapq
|
||||
from collections import Counter, namedtuple
|
||||
|
||||
# Node used in heap: (frequency, unique_id, node)
|
||||
# unique_id breaks ties deterministically
|
||||
class Node:
|
||||
def __init__(self, freq, symbol=None, left=None, right=None):
|
||||
self.freq = freq
|
||||
self.symbol = symbol
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def is_leaf(self):
|
||||
return self.symbol is not None
|
||||
|
||||
def build_huffman_tree(freqs):
|
||||
"""Build Huffman tree and return root node."""
|
||||
heap = []
|
||||
uid = 0
|
||||
for sym, f in freqs.items():
|
||||
node = Node(f, symbol=sym)
|
||||
heapq.heappush(heap, (f, uid, node))
|
||||
uid += 1
|
||||
|
||||
# Edge case: single unique symbol -> create a dummy sibling
|
||||
if len(heap) == 1:
|
||||
f, _, node = heapq.heappop(heap)
|
||||
dummy = Node(0, symbol=None) # zero-frequency sibling
|
||||
new = Node(f + dummy.freq, left=dummy, right=node)
|
||||
return new
|
||||
|
||||
while len(heap) > 1:
|
||||
f1, _, n1 = heapq.heappop(heap)
|
||||
f2, _, n2 = heapq.heappop(heap)
|
||||
merged = Node(f1 + f2, left=n1, right=n2)
|
||||
heapq.heappush(heap, (merged.freq, uid, merged))
|
||||
uid += 1
|
||||
|
||||
return heapq.heappop(heap)[2]
|
||||
|
||||
def generate_codes(root):
|
||||
"""Return dict: symbol -> code (string of '0'/'1')."""
|
||||
codes = {}
|
||||
def dfs(node, prefix):
|
||||
if node is None:
|
||||
return
|
||||
if node.is_leaf():
|
||||
# If tree had single symbol, ensure code length >= 1
|
||||
codes[node.symbol] = prefix or "0"
|
||||
return
|
||||
dfs(node.left, prefix + "0")
|
||||
dfs(node.right, prefix + "1")
|
||||
dfs(root, "")
|
||||
return codes
|
||||
|
||||
def huffman_encode(s):
|
||||
"""Encode string s. Returns (encoded_bitstring, codes)."""
|
||||
if not s:
|
||||
return "", {}
|
||||
freqs = Counter(s)
|
||||
root = build_huffman_tree(freqs)
|
||||
codes = generate_codes(root)
|
||||
encoded = "".join(codes[ch] for ch in s)
|
||||
return encoded, codes, root
|
||||
|
||||
def huffman_decode(encoded_bits, root):
|
||||
"""Decode bitstring using Huffman tree root."""
|
||||
if not encoded_bits:
|
||||
# If tree has single symbol and encoded_bits empty, return repeated symbol?
|
||||
# But typically empty input -> empty output.
|
||||
return ""
|
||||
res_chars = []
|
||||
node = root
|
||||
i = 0
|
||||
while i < len(encoded_bits):
|
||||
bit = encoded_bits[i]
|
||||
node = node.left if bit == "0" else node.right
|
||||
if node.is_leaf():
|
||||
res_chars.append(node.symbol)
|
||||
node = root
|
||||
i += 1
|
||||
return "".join(res_chars)
|
||||
|
||||
# Example / test
|
||||
if __name__ == "__main__":
|
||||
sample = "this is an example for huffman encoding"
|
||||
encoded, codes, root = huffman_encode(sample)
|
||||
decoded = huffman_decode(encoded, root)
|
||||
|
||||
print("Original:", sample)
|
||||
print("Codes:")
|
||||
for k in sorted(codes, key=lambda x: (len(codes[x]), x)):
|
||||
print(f" {repr(k)} : {codes[k]}")
|
||||
print("Encoded bit length:", len(encoded))
|
||||
print("Encoded (first 200 bits):", encoded[:200])
|
||||
print("Decoded matches original?", decoded == sample)
|
||||
Reference in New Issue
Block a user