1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
"""Huffman编码Python实现。"""
class Node(object):
def __init__(self, score, value=None, lchild=None, rchild=None): self.score = score self.value = value self.lchild = lchild self.rchild = rchild self.bit = 0
def generate_huffman_tree(sequence):
char2frequency = dict() for c in sequence: char2frequency.setdefault(c, 0) char2frequency[c] += 1
def _cmp(a, b): delta = a.score - b.score if delta != 0: return sequence.index(a.value) - sequence.index(b.value) return delta
queue = [Node(char2frequency[x], x) for x in set(sequence)] queue.sort(cmp=_cmp)
while len(queue) > 1: lchild, rchild = queue.pop(), queue.pop() lchild.bit, rchild.bit = 0, 1 parent_node = Node( lchild.score+rchild.score, lchild=lchild, rchild=rchild) queue.insert(0, parent_node) queue.sort(key=lambda o: o.score)
return queue[0]
def generate_check_list(node):
stack = [] check_list = dict() while node is not None or stack: while node is not None: stack.append(node) node = node.lchild if stack: node = stack.pop() if node.value is not None: sequence = [x.bit for x in stack] sequence.append(node.bit) check_list[node.value] = int(''.join(map(str, sequence)), 2) node = node.rchild return check_list
if __name__ == '__main__': tree = generate_huffman_tree('shootsheetjobwork') check_list = generate_check_list(tree) print check_list
|