Using Huffman - extra

173 days ago by bennoms

from sage.coding.source_coding.huffman import Huffman, frequency_table 
       
source = open(DATA + 'source.txt', "r").read() 
       
ft = frequency_table(source); ft 
       
{' ': 80, ',': 8, '.': 3, 'A': 1, 'C': 3, 'B': 3, 'E': 1, 'G': 2, 'I':
1, 'H': 1, 'M': 3, 'L': 2, 'Q': 1, 'P': 2, 'T': 2, 'a': 32, 'c': 23,
'b': 6, 'e': 42, 'd': 14, 'g': 10, 'f': 6, 'i': 34, 'h': 24, 'k': 5,
'm': 10, 'l': 8, 'o': 32, 'n': 31, 'q': 2, 'p': 15, 's': 25, 'r': 25,
'u': 10, 't': 36, 'w': 6, 'v': 5, 'y': 10}
{' ': 80, ',': 8, '.': 3, 'A': 1, 'C': 3, 'B': 3, 'E': 1, 'G': 2, 'I': 1, 'H': 1, 'M': 3, 'L': 2, 'Q': 1, 'P': 2, 'T': 2, 'a': 32, 'c': 23, 'b': 6, 'e': 42, 'd': 14, 'g': 10, 'f': 6, 'i': 34, 'h': 24, 'k': 5, 'm': 10, 'l': 8, 'o': 32, 'n': 31, 'q': 2, 'p': 15, 's': 25, 'r': 25, 'u': 10, 't': 36, 'w': 6, 'v': 5, 'y': 10}
#ft = {'a': 1, 'b': 1, 'c': 1, 'd': 1, 'r': 1} # is equal to *ft = frequency_table("abcdr")* #ft = {'a': 8, 'b': 4, 'c': 2, 'd': 1, 'r': 4} #ft = {'a': 0,421052632, 'b': 0,210526316, 'c': 0,105263158, 'd': 0,052631579, 'r': 0,210526316} 
       
#ft 
       
h = Huffman(ft) 
       
encode_example = h.encode(source); encode_example 
       
'10110000000011101100001111100001001111011001000001000100011110101101111\
110001111011111000110010110011111001001100111110111110001100101110110001\
011101010110100000010111000001001001010101000001110011111101110110111101\
010100001100100111111001011010010101111110110111011110100110010101101101\
110101000001110110010000101110101110100101011101010000011101101011001100\
011100001011100100001011100101111010011010110101100100001101110001001001\
111111101111100111111000110010101000101101001101110110000001111000110101\
100001111000001110011011011111000101100100000011100010000010001111011011\
110011010110101000001110110101110111011111000001110101100100100111101010\
011000010111010000001011101111100111111000110010101000101101001101110110\
000001111000110101101010000011101101011001010101100110110110111001111100\
100001011111111000010010101111001101011001110101010011101010000011101100\
100111101111110010110000001110001110111011100101101011010000001011100100\
011010010101111001110011110010000011001110101010011101000101000001110001\
111001110101111111001111001011010110000110001110011010100101011011011100\
010111100001010101110101110001000011101100100011001111010111000111100010\
111011111101010011000010100101101111110001111011111000111101010000011101\
100100111101111011111001101110000110010111011100101101011010000001011100\
110010111100010001101110011111111011110101110111100110111110011111100011\
001010100010110100110111011000000111100110011101010100111000101000101110\
111011010000001011101010000010010010110101110011110101110001110010111011\
011110100100101100001000111001011100100111010000101110100101011010111000\
111111011100111100001100010110000110100110100000110101000001110110011000\
111001110101001111111001011000111010110100010011010110100101011111101101\
110111101001100101011011011101010000010000010111011000011000001000110000\
110100000111111010111001001110100101011100010111011111001111101010110011\
110101100100010100011111011001110100010000011101101011000000001110110000\
100100010001111010110111111000010111111011011101111010011110001011000011\
010011010000011001111100100110010010111111011110010001010011000010111010\
000001011100100011101000101010001110110101101011000110111101110101101010\
111110111011011111100010111001100101111101010010101101101011010110011101\
010100111010110001101111011101011010101111101110110111110011111100011001\
0101000101101001101110110000001111000001110'
'101100000000111011000011111000010011110110010000010001000111101011011111100011110111110001100101100111110010011001111101111100011001011101100010111010101101000000101110000010010010101010000011100111111011101101111010101000011001001111110010110100101011111101101110111101001100101011011011101010000011101100100001011101011101001010111010100000111011010110011000111000010111001000010111001011110100110101101011001000011011100010010011111111011111001111110001100101010001011010011011101100000011110001101011000011110000011100110110111110001011001000000111000100000100011110110111100110101101010000011101101011101110111110000011101011001001001111010100110000101110100000010111011111001111110001100101010001011010011011101100000011110001101011010100000111011010110010101011001101101101110011111001000010111111110000100101011110011010110011101010100111010100000111011001001111011111100101100000011100011101110111001011010110100000010111001000110100101011110011100111100100000110011101010100111010001010000011100011110011101011111110011110010110101100001100011100110101001010110110111000101111000010101011101011100010000111011001000110011110101110001111000101110111111010100110000101001011011111100011110111110001111010100000111011001001111011110111110011011100001100101110111001011010110100000010111001100101111000100011011100111111110111101011101111001101111100111111000110010101000101101001101110110000001111001100111010101001110001010001011101110110100000010111010100000100100101101011100111101011100011100101110110111101001001011000010001110010111001001110100001011101001010110101110001111110111001111000011000101100001101001101000001101010000011101100110001110011101010011111110010110001110101101000100110101101001010111111011011101111010011001010110110111010100000100000101110110000110000010001100001101000001111110101110010011101001010111000101110111110011111010101100111101011001000101000111110110011101000100000111011010110000000011101100001001000100011110101101111110000101111110110111011110100111100010110000110100110100000110011111001001100100101111110111100100010100110000101110100000010111001000111010001010100011101101011010110001101111011101011010101111101110110111111000101110011001011111010100101011011010110101100111010101001110101100011011110111010110101011111011101101111100111111000110010101000101101001101110110000001111000001110'
#encode_example = h.encode("abracabradabracabra"); #encode_example 
       
x = h.decode(encode_example); x 
       
'The Code Book covers a diverse set of historical topics including the
Man in the Iron Mask, Arabic cryptography, Charles Babbage, the
mechanisation of cryptography, the Enigma Machine, and the decipherment
of Linear B and other ancient writing systems. Later sections cover the
development of public key cryptography and some of this material is
based on interviews with the participants, including those who worked in
secret at GCHQ. The book concludes with a discussion of PGP, quantum
computing, and quantum cryptography.'
'The Code Book covers a diverse set of historical topics including the Man in the Iron Mask, Arabic cryptography, Charles Babbage, the mechanisation of cryptography, the Enigma Machine, and the decipherment of Linear B and other ancient writing systems. Later sections cover the development of public key cryptography and some of this material is based on interviews with the participants, including those who worked in secret at GCHQ. The book concludes with a discussion of PGP, quantum computing, and quantum cryptography.'
T = sorted(h.encoding_table().items()) for symbol, code in T: print symbol, code 
       
  110
, 011010
. 0001110
A 101100100
B 0100000
C 0001111
E 101100101
G 01000101
H 101100111
I 101100110
L 01000110
M 0100001
P 01000111
Q 01000100
T 10110000
a 0111
b 000100
c 11111
d 01001
e 1110
f 000101
g 101101
h 0000
i 1001
k 1111010
l 011011
m 101110
n 0101
o 1000
p 01100
q 10110001
r 0011
s 0010
t 1010
u 101111
v 1111011
w 000110
y 111100
  110
, 011010
. 0001110
A 101100100
B 0100000
C 0001111
E 101100101
G 01000101
H 101100111
I 101100110
L 01000110
M 0100001
P 01000111
Q 01000100
T 10110000
a 0111
b 000100
c 11111
d 01001
e 1110
f 000101
g 101101
h 0000
i 1001
k 1111010
l 011011
m 101110
n 0101
o 1000
p 01100
q 10110001
r 0011
s 0010
t 1010
u 101111
v 1111011
w 000110
y 111100
im = h.tree(); im.show(graph_border=True, figsize=[25,25]) #W,H #im.show(graph_border=True, vertex_size=1000, figsize=[10,10]) #W,H