Huffman.py
This commit is contained in:
@@ -1,42 +1,38 @@
|
|||||||
import heapq
|
def build_tree(freq):
|
||||||
import itertools
|
nodes = [[freq[ch], ch] for ch in freq] # each node = [weight, data]
|
||||||
|
while len(nodes) > 1:
|
||||||
|
nodes = sorted(nodes, key=lambda x: x[0])
|
||||||
|
left = nodes.pop(0)
|
||||||
|
right = nodes.pop(0)
|
||||||
|
new = [left[0] + right[0], [left, right]] # merge
|
||||||
|
nodes.append(new)
|
||||||
|
return nodes[0]
|
||||||
|
|
||||||
def build_huffman_tree(freq):
|
|
||||||
heap = []
|
|
||||||
counter = itertools.count() # unique sequence count
|
|
||||||
for ch, weight in freq.items():
|
|
||||||
heap.append([weight, next(counter), [ch]]) # add counter as second element
|
|
||||||
heapq.heapify(heap)
|
|
||||||
|
|
||||||
while len(heap) > 1:
|
def assign(node, code="", table={}):
|
||||||
low = heapq.heappop(heap)
|
if isinstance(node[1], str): # leaf
|
||||||
high = heapq.heappop(heap)
|
table[node[1]] = code
|
||||||
combined_weight = low[0] + high[0]
|
|
||||||
combined_node = [low[2], high[2]]
|
|
||||||
heapq.heappush(heap, [combined_weight, next(counter), combined_node])
|
|
||||||
|
|
||||||
return heap[0]
|
|
||||||
|
|
||||||
def assign_codes(node, prefix="", codebook={}):
|
|
||||||
# node is [char] for leaf, or [left, right] for internal node
|
|
||||||
if len(node) == 1 and isinstance(node[0], str):
|
|
||||||
codebook[node[0]] = prefix
|
|
||||||
else:
|
else:
|
||||||
assign_codes(node[0], prefix + "0", codebook)
|
left, right = node[1]
|
||||||
assign_codes(node[1], prefix + "1", codebook)
|
assign(left, code+"0", table)
|
||||||
return codebook
|
assign(right, code+"1", table)
|
||||||
|
return table
|
||||||
|
|
||||||
def huff_en(txt):
|
|
||||||
|
def huffman(text):
|
||||||
|
# freq count
|
||||||
freq = {}
|
freq = {}
|
||||||
for c in txt:
|
for c in text:
|
||||||
freq[c] = freq.get(c, 0) + 1
|
freq[c] = freq.get(c, 0) + 1
|
||||||
|
|
||||||
root = build_huffman_tree(freq)
|
# build + assign
|
||||||
codes = assign_codes(root[2]) # Note: root[2] because of tie-breaker
|
root = build_tree(freq)
|
||||||
|
codes = assign(root)
|
||||||
|
|
||||||
print("Huffman Codes:")
|
# print
|
||||||
for ch in sorted(codes):
|
for ch in sorted(codes):
|
||||||
print(f"{ch}: {codes[ch]}")
|
print(ch, ":", codes[ch])
|
||||||
|
|
||||||
|
|
||||||
txt = "abbbbbbcccddddddddee"
|
txt = "abbbbbbcccddddddddee"
|
||||||
huff_en(txt)
|
huffman(txt)
|
||||||
|
|||||||
Reference in New Issue
Block a user