ALib C++ Framework
by
Library Version: 2605 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
huffman.hpp
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_bitbuffer of the \aliblong.
4///
5/// Copyright 2013-2026 A-Worx GmbH, Germany.
6/// Published under #"mainpage_license".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace bitbuffer { namespace ac_v1 {
9
10/// This class, together with sibling #"ac_v1::HuffmanDecoder" implements the well known
11/// \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
12/// The class uses type #"BitWriter" to write the bit stream into an underlying
13/// #"BitBuffer".
14///
15/// The use of this class is quite straight forward:
16/// - Create an instance of this class
17/// - Feed the data to be compressed once to the algorithm using method
18/// #"HuffmanEncoder;CountSymbol"
19/// - Invoke method #"HuffmanEncoder;Generate"
20/// - Feed all data a second time using method
21/// #"HuffmanEncoder;Write".
23 public:
24 static constexpr int WORD_SIZE = 32; ///< Codes are stored in two words of size 32
25 ///< (this constant)
26 static constexpr int MAX_WORDS = 2; ///< Codes are stored in two (this constant) words
27 ///< of size 32.
28
29 /// The maximum length of a huffman code.
30 /// In theory, with 256 symbols, a maximum code length of 255 could occur. For this, each
31 /// next symbol needed to have the double frequency than the previous. Hence, setting the
32 /// value to 64, limits the encoded data size to 2^64 symbols, which should be more than
33 /// enough for the next decades.
34 static constexpr int MAX_CODE_LENGTH = 64;
35
36 /// Information about the encoding of symbols. The symbol's value (between \c 0 and \c 255)
37 /// is not included, but deduced from the objects' position in the symbol array found in
38 /// field #"HuffmanEncoder;symbols".
39 struct Symbol {
40 std::size_t frequency = 0; ///< The number of occurrences of the symbol.
41 alib::ShiftOpRHS wordLength = 0; ///< 0: symbol not used, otherwise between 1 and 255.
42 uint32_t words[MAX_WORDS] = {0,0}; ///< The bitcode of the symbol.
43 };
44
45 protected:
46
47 BitWriter& bw; ///< The bit writer to use for encoding the data.
48 Symbol symbols[256]; ///< The symbol table.
49
50ALIB_DBG( bool dbgAllValuesAreSame; )
51
52 public:
53 /// Constructor.
54 /// @param bitWriter The bit writer to write the encoding information as well as the encoded
55 /// symbols to. (Stored in field #"bw".)
57 : bw(bitWriter) {}
58
59 /// Counts a symbol.
60 /// This method has to be invoked for each byte to compress later.
61 /// @param symbol The symbol to count.
62 void CountSymbol(uint8_t symbol) { symbols[symbol].frequency++; }
63
64 /// Generates the huffman encoding table and writes this information to the bit writer.
66 void Generate();
67
68 /// Writes the given \p{symbol} to the bit stream.
69 /// @param symbol The symbol to write.
70 void Write(uint8_t symbol) {
71 auto& rec = symbols[symbol];
72 int bitsLeft= rec.wordLength;
73 #if ALIB_STRINGS
74 ALIB_ASSERT_ERROR( (bitsLeft != 0) | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
75 "Try to write symbol unknown to Huffman table: {}, ", symbol, bitsLeft )
76 #else
77 ALIB_ASSERT_ERROR( bitsLeft | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
78 "Try to write symbol unknown to Huffman table: ", symbol )
79 #endif
80
81
82 auto* word= rec.words;
83 if( bitsLeft > 32) {
84 bw.WriteBits<32>( *word );
85 word++;
86 bitsLeft-= 32;
87 }
88
89 bw.WriteBits(bitsLeft, *word );
90 }
91
92}; // HuffmanEncoder
93
94/// This class, together with sibling #"ac_v1::HuffmanEncoder" implements the well known
95/// \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
96/// The class uses type #"BitReader" to read the bit stream from an underlying
97/// #"BitBuffer".
98///
99/// The use of this class is quite straight forward:
100/// - Prepare a #"%BitReader" to point to the beginning of the bit stream generated with
101/// class #"%HuffmanEncoder".
102/// - Invoke method #"HuffmanDecoder;ReadTree" once.
103/// - For each encoded byte, invoke #"HuffmanDecoder;Read".
104///
105/// Note, that the length of the stream (the number of bytes to be decompressed) have to be
106/// known by the using software. This class is not responsible for storing this piece of information.
108 protected:
109 /// Internal struct representing nodes of the huffman code tree.
110 struct Node {
111 Node* left; ///< The left child node.
112 Node* right; ///< The right child node.
113 uint8_t symbol; ///< If this is a leaf node (neither #".left" nor #".right" are set,
114 ///< then this is the symbol found.
115
116 /// Constructor.
118 : left (nullptr)
119 , right (nullptr) {}
120 };
121
122 static constexpr int MAX_NODES= 511; ///< The maximum number of nodes in the tree.
123
124 BitReader& br; ///< The bit reader given in the constructor.
125 Node tree; ///< The root node of the symbol tree.
126 Node nodePool[MAX_NODES]; ///< Pre-allocated node objects.
127 int npNext= 0; ///< The next node in #".nodePool" to use.
128
129 public:
130 /// Constructor.
131 /// @param bitReader The bit reader to read the huffman encoding table and then the encoded
132 /// symbols from. (Stored in field #".br".)
134 :br(bitReader) {}
135
136 /// Reads the information to decode the data from the beginning of the bit stream.
137 /// This method has to be invoked once before reading the symbols with method #".Read".
139 void ReadTree();
140
141 /// Reads a symbol from the bit stream.
142 /// This method has to be invoked for every symbol stored, after once reading the encoding
143 /// information with method #".ReadTree".
144 /// @return The symbol read.
145 uint8_t Read() {
146 Node* node= &tree;
147 while(true) {
148 if( node->left == nullptr) // (could also test on right)
149 return node->symbol;
150
151 auto v= br.ReadBits<1>();
152 node= v ? node->right
153 : node->left;
154 } }
155
156}; // HuffmanDecoder
157
158}}} // namespace [alib::bitbuffer::ac_v1]
#define ALIB_DLL
#define ALIB_EXPORT
#define ALIB_DBG(...)
#define ALIB_ASSERT_ERROR(cond, domain,...)
Reads bits from a #"BitBufferBase".
Writes bits into a #"BitBufferBase".
int npNext
The next node in #".nodePool" to use.
Definition huffman.hpp:127
static constexpr int MAX_NODES
The maximum number of nodes in the tree.
Definition huffman.hpp:122
HuffmanDecoder(BitReader &bitReader)
Definition huffman.hpp:133
Node tree
The root node of the symbol tree.
Definition huffman.hpp:125
Node nodePool[MAX_NODES]
Pre-allocated node objects.
Definition huffman.hpp:126
BitReader & br
The bit reader given in the constructor.
Definition huffman.hpp:124
BitWriter & bw
The bit writer to use for encoding the data.
Definition huffman.hpp:47
HuffmanEncoder(BitWriter &bitWriter)
Definition huffman.hpp:56
static constexpr int MAX_CODE_LENGTH
Definition huffman.hpp:34
Symbol symbols[256]
The symbol table.
Definition huffman.hpp:48
void Generate()
Generates the huffman encoding table and writes this information to the bit writer.
Definition huffman.cpp:81
Definition alox.cpp:14
int ShiftOpRHS
Type alias in namespace #"%alib".
Internal struct representing nodes of the huffman code tree.
Definition huffman.hpp:110
Node * right
The right child node.
Definition huffman.hpp:112
std::size_t frequency
The number of occurrences of the symbol.
Definition huffman.hpp:40
uint32_t words[MAX_WORDS]
The bitcode of the symbol.
Definition huffman.hpp:42
alib::ShiftOpRHS wordLength
0: symbol not used, otherwise between 1 and 255.
Definition huffman.hpp:41