Line data Source code
1 : /********************************************************************
2 : * *
3 : * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 : * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 : * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 : * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 : * *
8 : * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 : * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 : * *
11 : ********************************************************************
12 :
13 : function:
14 : last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $
15 :
16 : ********************************************************************/
17 :
18 : #include <stdlib.h>
19 : #include <string.h>
20 : #include <ogg/ogg.h>
21 : #include "huffdec.h"
22 : #include "decint.h"
23 :
24 :
25 :
26 : /*Instead of storing every branching in the tree, subtrees can be collapsed
27 : into one node, with a table of size 1<<nbits pointing directly to its
28 : descedents nbits levels down.
29 : This allows more than one bit to be read at a time, and avoids following all
30 : the intermediate branches with next to no increased code complexity once
31 : the collapsed tree has been built.
32 : We do _not_ require that a subtree be complete to be collapsed, but instead
33 : store duplicate pointers in the table, and record the actual depth of the
34 : node below its parent.
35 : This tells us the number of bits to advance the stream after reaching it.
36 :
37 : This turns out to be equivalent to the method described in \cite{Hash95},
38 : without the requirement that codewords be sorted by length.
39 : If the codewords were sorted by length (so-called ``canonical-codes''), they
40 : could be decoded much faster via either Lindell and Moffat's approach or
41 : Hashemian's Condensed Huffman Code approach, the latter of which has an
42 : extremely small memory footprint.
43 : We can't use Choueka et al.'s finite state machine approach, which is
44 : extremely fast, because we can't allow multiple symbols to be output at a
45 : time; the codebook can and does change between symbols.
46 : It also has very large memory requirements, which impairs cache coherency.
47 :
48 : We store the tree packed in an array of 16-bit integers (words).
49 : Each node consists of a single word, followed consecutively by two or more
50 : indices of its children.
51 : Let n be the value of this first word.
52 : This is the number of bits that need to be read to traverse the node, and
53 : must be positive.
54 : 1<<n entries follow in the array, each an index to a child node.
55 : If the child is positive, then it is the index of another internal node in
56 : the table.
57 : If the child is negative or zero, then it is a leaf node.
58 : These are stored directly in the child pointer to save space, since they only
59 : require a single word.
60 : If a leaf node would have been encountered before reading n bits, then it is
61 : duplicated the necessary number of times in this table.
62 : Leaf nodes pack both a token value and their actual depth in the tree.
63 : The token in the leaf node is (-leaf&255).
64 : The number of bits that need to be consumed to reach the leaf, starting from
65 : the current node, is (-leaf>>8).
66 :
67 : @ARTICLE{Hash95,
68 : author="Reza Hashemian",
69 : title="Memory Efficient and High-Speed Search {Huffman} Coding",
70 : journal="{IEEE} Transactions on Communications",
71 : volume=43,
72 : number=10,
73 : pages="2576--2581",
74 : month=Oct,
75 : year=1995
76 : }*/
77 :
78 :
79 :
80 : /*The map from external spec-defined tokens to internal tokens.
81 : This is constructed so that any extra bits read with the original token value
82 : can be masked off the least significant bits of its internal token index.
83 : In addition, all of the tokens which require additional extra bits are placed
84 : at the start of the list, and grouped by type.
85 : OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
86 : giving it index 0 may simplify comparisons on some architectures.
87 : These requirements require some substantial reordering.*/
88 : static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
89 : /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
90 : 15,
91 : /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
92 : 16,
93 : /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
94 : 17,
95 : /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
96 : 88,
97 : /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
98 : 80,
99 : /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
100 : 1,
101 : /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
102 : 0,
103 : /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
104 : 48,
105 : /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
106 : 14,
107 : /*OC_ONE_TOKEN (0 extra bits)*/
108 : 56,
109 : /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
110 : 57,
111 : /*OC_TWO_TOKEN (0 extra bits)*/
112 : 58,
113 : /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
114 : 59,
115 : /*OC_DCT_VAL_CAT2 (1 extra bit)*/
116 : 60,
117 : 62,
118 : 64,
119 : 66,
120 : /*OC_DCT_VAL_CAT3 (2 extra bits)*/
121 : 68,
122 : /*OC_DCT_VAL_CAT4 (3 extra bits)*/
123 : 72,
124 : /*OC_DCT_VAL_CAT5 (4 extra bits)*/
125 : 2,
126 : /*OC_DCT_VAL_CAT6 (5 extra bits)*/
127 : 4,
128 : /*OC_DCT_VAL_CAT7 (6 extra bits)*/
129 : 6,
130 : /*OC_DCT_VAL_CAT8 (10 extra bits)*/
131 : 8,
132 : /*OC_DCT_RUN_CAT1A (1 extra bit)*/
133 : 18,
134 : 20,
135 : 22,
136 : 24,
137 : 26,
138 : /*OC_DCT_RUN_CAT1B (3 extra bits)*/
139 : 32,
140 : /*OC_DCT_RUN_CAT1C (4 extra bits)*/
141 : 12,
142 : /*OC_DCT_RUN_CAT2A (2 extra bits)*/
143 : 28,
144 : /*OC_DCT_RUN_CAT2B (3 extra bits)*/
145 : 40
146 : };
147 :
148 : /*The log base 2 of number of internal tokens associated with each of the spec
149 : tokens (i.e., how many of the extra bits are folded into the token value).
150 : Increasing the maximum value beyond 3 will enlarge the amount of stack
151 : required for tree construction.*/
152 : static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
153 : 0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
154 : };
155 :
156 :
157 : /*The size a lookup table is allowed to grow to relative to the number of
158 : unique nodes it contains.
159 : E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
160 : wasted (1/4 of the space must be used).
161 : Larger numbers can decode tokens with fewer read operations, while smaller
162 : numbers may save more space.
163 : With a sample file:
164 : 32233473 read calls are required when no tree collapsing is done (100.0%).
165 : 19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
166 : 11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
167 : 10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
168 : 10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
169 : Since a value of 2 gets us the vast majority of the speed-up with only a
170 : small amount of wasted memory, this is what we use.
171 : This value must be less than 128, or you could create a tree with more than
172 : 32767 entries, which would overflow the 16-bit words used to index it.*/
173 : #define OC_HUFF_SLUSH (2)
174 : /*The root of the tree is on the fast path, and a larger value here is more
175 : beneficial than elsewhere in the tree.
176 : 7 appears to give the best performance, trading off between increased use of
177 : the single-read fast path and cache footprint for the tables, though
178 : obviously this will depend on your cache size.
179 : Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
180 : #define OC_ROOT_HUFF_SLUSH (7)
181 :
182 :
183 :
184 : /*Unpacks a Huffman codebook.
185 : _opb: The buffer to unpack from.
186 : _tokens: Stores a list of internal tokens, in the order they were found in
187 : the codebook, and the lengths of their corresponding codewords.
188 : This is enough to completely define the codebook, while minimizing
189 : stack usage and avoiding temporary allocations (for platforms
190 : where free() is a no-op).
191 : Return: The number of internal tokens in the codebook, or a negative value
192 : on error.*/
193 0 : int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
194 : ogg_uint32_t code;
195 : int len;
196 : int ntokens;
197 : int nleaves;
198 0 : code=0;
199 0 : len=ntokens=nleaves=0;
200 0 : for(;;){
201 : long bits;
202 0 : bits=oc_pack_read1(_opb);
203 : /*Only process nodes so long as there's more bits in the buffer.*/
204 0 : if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
205 : /*Read an internal node:*/
206 0 : if(!bits){
207 0 : len++;
208 : /*Don't allow codewords longer than 32 bits.*/
209 0 : if(len>32)return TH_EBADHEADER;
210 : }
211 : /*Read a leaf node:*/
212 : else{
213 : ogg_uint32_t code_bit;
214 : int neb;
215 : int nentries;
216 : int token;
217 : /*Don't allow more than 32 spec-tokens per codebook.*/
218 0 : if(++nleaves>32)return TH_EBADHEADER;
219 0 : bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
220 0 : neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
221 0 : token=OC_DCT_TOKEN_MAP[bits];
222 0 : nentries=1<<neb;
223 0 : while(nentries-->0){
224 0 : _tokens[ntokens][0]=(unsigned char)token++;
225 0 : _tokens[ntokens][1]=(unsigned char)(len+neb);
226 0 : ntokens++;
227 : }
228 0 : code_bit=0x80000000U>>len-1;
229 0 : while(len>0&&(code&code_bit)){
230 0 : code^=code_bit;
231 0 : code_bit<<=1;
232 0 : len--;
233 : }
234 0 : if(len<=0)break;
235 0 : code|=code_bit;
236 : }
237 : }
238 0 : return ntokens;
239 : }
240 :
241 : /*Count how many tokens would be required to fill a subtree at depth _depth.
242 : _tokens: A list of internal tokens, in the order they are found in the
243 : codebook, and the lengths of their corresponding codewords.
244 : _depth: The depth of the desired node in the corresponding tree structure.
245 : Return: The number of tokens that belong to that subtree.*/
246 0 : static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
247 : ogg_uint32_t code;
248 : int ti;
249 0 : code=0;
250 0 : ti=0;
251 : do{
252 0 : if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
253 : else{
254 : /*Because of the expanded internal tokens, we can have codewords as long
255 : as 35 bits.
256 : A single recursion here is enough to advance past them.*/
257 0 : code++;
258 0 : ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
259 : }
260 : }
261 0 : while(code<0x80000000U);
262 0 : return ti;
263 : }
264 :
265 : /*Compute the number of bits to use for a collapsed tree node at the given
266 : depth.
267 : _tokens: A list of internal tokens, in the order they are found in the
268 : codebook, and the lengths of their corresponding codewords.
269 : _ntokens: The number of tokens corresponding to this tree node.
270 : _depth: The depth of this tree node.
271 : Return: The number of bits to use for a collapsed tree node rooted here.
272 : This is always at least one, even if this was a leaf node.*/
273 0 : static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
274 : int _ntokens,int _depth){
275 : int got_leaves;
276 : int loccupancy;
277 : int occupancy;
278 : int slush;
279 : int nbits;
280 : int best_nbits;
281 0 : slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
282 : /*It's legal to have a tree with just a single node, which requires no bits
283 : to decode and always returns the same token.
284 : However, no encoder actually does this (yet).
285 : To avoid a special case in oc_huff_token_decode(), we force the number of
286 : lookahead bits to be at least one.
287 : This will produce a tree that looks ahead one bit and then advances the
288 : stream zero bits.*/
289 0 : nbits=1;
290 0 : occupancy=2;
291 0 : got_leaves=1;
292 : do{
293 : int ti;
294 0 : if(got_leaves)best_nbits=nbits;
295 0 : nbits++;
296 0 : got_leaves=0;
297 0 : loccupancy=occupancy;
298 0 : for(occupancy=ti=0;ti<_ntokens;occupancy++){
299 0 : if(_tokens[ti][1]<_depth+nbits)ti++;
300 0 : else if(_tokens[ti][1]==_depth+nbits){
301 0 : got_leaves=1;
302 0 : ti++;
303 : }
304 0 : else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
305 : }
306 : }
307 0 : while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
308 0 : return best_nbits;
309 : }
310 :
311 : /*Determines the size in words of a Huffman tree node that represents a
312 : subtree of depth _nbits.
313 : _nbits: The depth of the subtree.
314 : This must be greater than zero.
315 : Return: The number of words required to store the node.*/
316 0 : static size_t oc_huff_node_size(int _nbits){
317 0 : return 1+(1<<_nbits);
318 : }
319 :
320 : /*Produces a collapsed-tree representation of the given token list.
321 : _tree: The storage for the collapsed Huffman tree.
322 : This may be NULL to compute the required storage size instead of
323 : constructing the tree.
324 : _tokens: A list of internal tokens, in the order they are found in the
325 : codebook, and the lengths of their corresponding codewords.
326 : _ntokens: The number of tokens corresponding to this tree node.
327 : Return: The number of words required to store the tree.*/
328 : #if defined(_MSC_VER) && _MSC_VER >= 1700
329 : #pragma optimize( "", off )
330 : #endif
331 0 : static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
332 : unsigned char _tokens[][2],int _ntokens){
333 : ogg_int16_t node[34];
334 : unsigned char depth[34];
335 : unsigned char last[34];
336 : size_t ntree;
337 : int ti;
338 : int l;
339 0 : depth[0]=0;
340 0 : last[0]=(unsigned char)(_ntokens-1);
341 0 : ntree=0;
342 0 : ti=0;
343 0 : l=0;
344 : do{
345 : int nbits;
346 0 : nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
347 0 : node[l]=(ogg_int16_t)ntree;
348 0 : ntree+=oc_huff_node_size(nbits);
349 0 : if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
350 : do{
351 0 : while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
352 0 : if(_tree!=NULL){
353 : ogg_int16_t leaf;
354 : int nentries;
355 0 : nentries=1<<depth[l]+nbits-_tokens[ti][1];
356 0 : leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
357 0 : while(nentries-->0)_tree[node[l]++]=leaf;
358 : }
359 0 : ti++;
360 : }
361 0 : if(ti<=last[l]){
362 : /*We need to recurse*/
363 0 : depth[l+1]=(unsigned char)(depth[l]+nbits);
364 0 : if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
365 0 : l++;
366 0 : last[l]=
367 0 : (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
368 0 : break;
369 : }
370 : /*Pop back up a level of recursion.*/
371 0 : else if(l-->0)nbits=depth[l+1]-depth[l];
372 : }
373 0 : while(l>=0);
374 : }
375 0 : while(l>=0);
376 0 : return ntree;
377 : }
378 : #if defined(_MSC_VER) && _MSC_VER >= 1700
379 : #pragma optimize( "", on )
380 : #endif
381 :
382 : /*Unpacks a set of Huffman trees, and reduces them to a collapsed
383 : representation.
384 : _opb: The buffer to unpack the trees from.
385 : _nodes: The table to fill with the Huffman trees.
386 : Return: 0 on success, or a negative value on error.
387 : The caller is responsible for cleaning up any partially initialized
388 : _nodes on failure.*/
389 0 : int oc_huff_trees_unpack(oc_pack_buf *_opb,
390 : ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
391 : int i;
392 0 : for(i=0;i<TH_NHUFFMAN_TABLES;i++){
393 : unsigned char tokens[256][2];
394 : int ntokens;
395 : ogg_int16_t *tree;
396 : size_t size;
397 : /*Unpack the full tree into a temporary buffer.*/
398 0 : ntokens=oc_huff_tree_unpack(_opb,tokens);
399 0 : if(ntokens<0)return ntokens;
400 : /*Figure out how big the collapsed tree will be and allocate space for it.*/
401 0 : size=oc_huff_tree_collapse(NULL,tokens,ntokens);
402 : /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
403 : OC_ROOT_HUFF_SLUSH too large.*/
404 0 : if(size>32767)return TH_EIMPL;
405 0 : tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
406 0 : if(tree==NULL)return TH_EFAULT;
407 : /*Construct the collapsed the tree.*/
408 0 : oc_huff_tree_collapse(tree,tokens,ntokens);
409 0 : _nodes[i]=tree;
410 : }
411 0 : return 0;
412 : }
413 :
414 : /*Determines the size in words of a Huffman subtree.
415 : _tree: The complete Huffman tree.
416 : _node: The index of the root of the desired subtree.
417 : Return: The number of words required to store the tree.*/
418 0 : static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
419 : size_t size;
420 : int nchildren;
421 : int n;
422 : int i;
423 0 : n=_tree[_node];
424 0 : size=oc_huff_node_size(n);
425 0 : nchildren=1<<n;
426 0 : i=0;
427 : do{
428 : int child;
429 0 : child=_tree[_node+i+1];
430 0 : if(child<=0)i+=1<<n-(-child>>8);
431 : else{
432 0 : size+=oc_huff_tree_size(_tree,child);
433 0 : i++;
434 : }
435 : }
436 0 : while(i<nchildren);
437 0 : return size;
438 : }
439 :
440 : /*Makes a copy of the given set of Huffman trees.
441 : _dst: The array to store the copy in.
442 : _src: The array of trees to copy.*/
443 0 : int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
444 : const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
445 : int total;
446 : int i;
447 0 : total=0;
448 0 : for(i=0;i<TH_NHUFFMAN_TABLES;i++){
449 : size_t size;
450 0 : size=oc_huff_tree_size(_src[i],0);
451 0 : total+=size;
452 0 : _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
453 0 : if(_dst[i]==NULL){
454 0 : while(i-->0)_ogg_free(_dst[i]);
455 0 : return TH_EFAULT;
456 : }
457 0 : memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
458 : }
459 0 : return 0;
460 : }
461 :
462 : /*Frees the memory used by a set of Huffman trees.
463 : _nodes: The array of trees to free.*/
464 0 : void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
465 : int i;
466 0 : for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
467 0 : }
468 :
469 :
470 : /*Unpacks a single token using the given Huffman tree.
471 : _opb: The buffer to unpack the token from.
472 : _node: The tree to unpack the token with.
473 : Return: The token value.*/
474 0 : int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
475 : const unsigned char *ptr;
476 : const unsigned char *stop;
477 : oc_pb_window window;
478 : int available;
479 : long bits;
480 : int node;
481 : int n;
482 0 : ptr=_opb->ptr;
483 0 : window=_opb->window;
484 0 : stop=_opb->stop;
485 0 : available=_opb->bits;
486 0 : node=0;
487 : for(;;){
488 0 : n=_tree[node];
489 0 : if(n>available){
490 : unsigned shift;
491 0 : shift=OC_PB_WINDOW_SIZE-available;
492 : do{
493 : /*We don't bother setting eof because we won't check for it after we've
494 : started decoding DCT tokens.*/
495 0 : if(ptr>=stop){
496 0 : shift=(unsigned)-OC_LOTS_OF_BITS;
497 0 : break;
498 : }
499 0 : shift-=8;
500 0 : window|=(oc_pb_window)*ptr++<<shift;
501 : }
502 0 : while(shift>=8);
503 : /*Note: We never request more than 24 bits, so there's no need to fill in
504 : the last partial byte here.*/
505 0 : available=OC_PB_WINDOW_SIZE-shift;
506 : }
507 0 : bits=window>>OC_PB_WINDOW_SIZE-n;
508 0 : node=_tree[node+1+bits];
509 0 : if(node<=0)break;
510 0 : window<<=n;
511 0 : available-=n;
512 : }
513 0 : node=-node;
514 0 : n=node>>8;
515 0 : window<<=n;
516 0 : available-=n;
517 0 : _opb->ptr=ptr;
518 0 : _opb->window=window;
519 0 : _opb->bits=available;
520 0 : return node&255;
521 : }
|