Line data Source code
1 : /* cairo - a vector graphics library with display and print output
2 : *
3 : * Copyright © 2006 Red Hat, Inc.
4 : *
5 : * This library is free software; you can redistribute it and/or
6 : * modify it either under the terms of the GNU Lesser General Public
7 : * License version 2.1 as published by the Free Software Foundation
8 : * (the "LGPL") or, at your option, under the terms of the Mozilla
9 : * Public License Version 1.1 (the "MPL"). If you do not alter this
10 : * notice, a recipient may use your version of this file under either
11 : * the MPL or the LGPL.
12 : *
13 : * You should have received a copy of the LGPL along with this library
14 : * in the file COPYING-LGPL-2.1; if not, write to the Free Software
15 : * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
16 : * You should have received a copy of the MPL along with this library
17 : * in the file COPYING-MPL-1.1
18 : *
19 : * The contents of this file are subject to the Mozilla Public License
20 : * Version 1.1 (the "License"); you may not use this file except in
21 : * compliance with the License. You may obtain a copy of the License at
22 : * http://www.mozilla.org/MPL/
23 : *
24 : * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
25 : * OF ANY KIND, either express or implied. See the LGPL or the MPL for
26 : * the specific language governing rights and limitations.
27 : *
28 : * The Original Code is the cairo graphics library.
29 : *
30 : * The Initial Developer of the Original Code is University of Southern
31 : * California.
32 : *
33 : * Contributor(s):
34 : * Carl D. Worth <cworth@cworth.org>
35 : */
36 :
37 : #include "cairoint.h"
38 : #include "cairo-error-private.h"
39 :
40 : typedef struct _lzw_buf {
41 : cairo_status_t status;
42 :
43 : unsigned char *data;
44 : int data_size;
45 : int num_data;
46 : uint32_t pending;
47 : unsigned int pending_bits;
48 : } lzw_buf_t;
49 :
50 : /* An lzw_buf_t is a simple, growable chunk of memory for holding
51 : * variable-size objects of up to 16 bits each.
52 : *
53 : * Initialize an lzw_buf_t to the given size in bytes.
54 : *
55 : * To store objects into the lzw_buf_t, call _lzw_buf_store_bits and
56 : * when finished, call _lzw_buf_store_pending, (which flushes out the
57 : * last few bits that hadn't yet made a complete byte yet).
58 : *
59 : * Instead of returning failure from any functions, lzw_buf_t provides
60 : * a status value that the caller can query, (and should query at
61 : * least once when done with the object). The status value will be
62 : * either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY;
63 : */
64 : static void
65 0 : _lzw_buf_init (lzw_buf_t *buf, int size)
66 : {
67 0 : if (size == 0)
68 0 : size = 16;
69 :
70 0 : buf->status = CAIRO_STATUS_SUCCESS;
71 0 : buf->data_size = size;
72 0 : buf->num_data = 0;
73 0 : buf->pending = 0;
74 0 : buf->pending_bits = 0;
75 :
76 0 : buf->data = malloc (size);
77 0 : if (unlikely (buf->data == NULL)) {
78 0 : buf->data_size = 0;
79 0 : buf->status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
80 0 : return;
81 : }
82 : }
83 :
84 : /* Increase the buffer size by doubling.
85 : *
86 : * Returns %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY
87 : */
88 : static cairo_status_t
89 0 : _lzw_buf_grow (lzw_buf_t *buf)
90 : {
91 0 : int new_size = buf->data_size * 2;
92 : unsigned char *new_data;
93 :
94 0 : if (buf->status)
95 0 : return buf->status;
96 :
97 0 : new_data = NULL;
98 : /* check for integer overflow */
99 0 : if (new_size / 2 == buf->data_size)
100 0 : new_data = realloc (buf->data, new_size);
101 :
102 0 : if (unlikely (new_data == NULL)) {
103 0 : free (buf->data);
104 0 : buf->data_size = 0;
105 0 : buf->status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
106 0 : return buf->status;
107 : }
108 :
109 0 : buf->data = new_data;
110 0 : buf->data_size = new_size;
111 :
112 0 : return CAIRO_STATUS_SUCCESS;
113 : }
114 :
115 : /* Store the lowest num_bits bits of values into buf.
116 : *
117 : * Note: The bits of value above size_in_bits must be 0, (so don't lie
118 : * about the size).
119 : *
120 : * See also _lzw_buf_store_pending which must be called after the last
121 : * call to _lzw_buf_store_bits.
122 : *
123 : * Sets buf->status to either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY.
124 : */
125 : static void
126 0 : _lzw_buf_store_bits (lzw_buf_t *buf, uint16_t value, int num_bits)
127 : {
128 : cairo_status_t status;
129 :
130 0 : assert (value <= (1 << num_bits) - 1);
131 :
132 0 : if (buf->status)
133 0 : return;
134 :
135 0 : buf->pending = (buf->pending << num_bits) | value;
136 0 : buf->pending_bits += num_bits;
137 :
138 0 : while (buf->pending_bits >= 8) {
139 0 : if (buf->num_data >= buf->data_size) {
140 0 : status = _lzw_buf_grow (buf);
141 0 : if (unlikely (status))
142 0 : return;
143 : }
144 0 : buf->data[buf->num_data++] = buf->pending >> (buf->pending_bits - 8);
145 0 : buf->pending_bits -= 8;
146 : }
147 : }
148 :
149 : /* Store the last remaining pending bits into the buffer.
150 : *
151 : * Note: This function must be called after the last call to
152 : * _lzw_buf_store_bits.
153 : *
154 : * Sets buf->status to either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY.
155 : */
156 : static void
157 0 : _lzw_buf_store_pending (lzw_buf_t *buf)
158 : {
159 : cairo_status_t status;
160 :
161 0 : if (buf->status)
162 0 : return;
163 :
164 0 : if (buf->pending_bits == 0)
165 0 : return;
166 :
167 0 : assert (buf->pending_bits < 8);
168 :
169 0 : if (buf->num_data >= buf->data_size) {
170 0 : status = _lzw_buf_grow (buf);
171 0 : if (unlikely (status))
172 0 : return;
173 : }
174 :
175 0 : buf->data[buf->num_data++] = buf->pending << (8 - buf->pending_bits);
176 0 : buf->pending_bits = 0;
177 : }
178 :
179 : /* LZW defines a few magic code values */
180 : #define LZW_CODE_CLEAR_TABLE 256
181 : #define LZW_CODE_EOD 257
182 : #define LZW_CODE_FIRST 258
183 :
184 : /* We pack three separate values into a symbol as follows:
185 : *
186 : * 12 bits (31 down to 20): CODE: code value used to represent this symbol
187 : * 12 bits (19 down to 8): PREV: previous code value in chain
188 : * 8 bits ( 7 down to 0): NEXT: next byte value in chain
189 : */
190 : typedef uint32_t lzw_symbol_t;
191 :
192 : #define LZW_SYMBOL_SET(sym, prev, next) ((sym) = ((prev) << 8)|(next))
193 : #define LZW_SYMBOL_SET_CODE(sym, code, prev, next) ((sym) = ((code << 20)|(prev) << 8)|(next))
194 : #define LZW_SYMBOL_GET_CODE(sym) (((sym) >> 20))
195 : #define LZW_SYMBOL_GET_PREV(sym) (((sym) >> 8) & 0x7ff)
196 : #define LZW_SYMBOL_GET_BYTE(sym) (((sym) >> 0) & 0x0ff)
197 :
198 : /* The PREV+NEXT fields can be seen as the key used to fetch values
199 : * from the hash table, while the code is the value fetched.
200 : */
201 : #define LZW_SYMBOL_KEY_MASK 0x000fffff
202 :
203 : /* Since code values are only stored starting with 258 we can safely
204 : * use a zero value to represent free slots in the hash table. */
205 : #define LZW_SYMBOL_FREE 0x00000000
206 :
207 : /* These really aren't very free for modifying. First, the PostScript
208 : * specification sets the 9-12 bit range. Second, the encoding of
209 : * lzw_symbol_t above also relies on 2 of LZW_BITS_MAX plus one byte
210 : * fitting within 32 bits.
211 : *
212 : * But other than that, the LZW compression scheme could function with
213 : * more bits per code.
214 : */
215 : #define LZW_BITS_MIN 9
216 : #define LZW_BITS_MAX 12
217 : #define LZW_BITS_BOUNDARY(bits) ((1<<(bits))-1)
218 : #define LZW_MAX_SYMBOLS (1<<LZW_BITS_MAX)
219 :
220 : #define LZW_SYMBOL_TABLE_SIZE 9013
221 : #define LZW_SYMBOL_MOD1 LZW_SYMBOL_TABLE_SIZE
222 : #define LZW_SYMBOL_MOD2 9011
223 :
224 : typedef struct _lzw_symbol_table {
225 : lzw_symbol_t table[LZW_SYMBOL_TABLE_SIZE];
226 : } lzw_symbol_table_t;
227 :
228 : /* Initialize the hash table to entirely empty */
229 : static void
230 0 : _lzw_symbol_table_init (lzw_symbol_table_t *table)
231 : {
232 0 : memset (table->table, 0, LZW_SYMBOL_TABLE_SIZE * sizeof (lzw_symbol_t));
233 0 : }
234 :
235 : /* Lookup a symbol in the symbol table. The PREV and NEXT fields of
236 : * symbol form the key for the lookup.
237 : *
238 : * If successful, then this function returns %TRUE and slot_ret will be
239 : * left pointing at the result that will have the CODE field of
240 : * interest.
241 : *
242 : * If the lookup fails, then this function returns %FALSE and slot_ret
243 : * will be pointing at the location in the table to which a new CODE
244 : * value should be stored along with PREV and NEXT.
245 : */
246 : static cairo_bool_t
247 0 : _lzw_symbol_table_lookup (lzw_symbol_table_t *table,
248 : lzw_symbol_t symbol,
249 : lzw_symbol_t **slot_ret)
250 : {
251 : /* The algorithm here is identical to that in cairo-hash.c. We
252 : * copy it here to allow for a rather more efficient
253 : * implementation due to several circumstances that do not apply
254 : * to the more general case:
255 : *
256 : * 1) We have a known bound on the total number of symbols, so we
257 : * have a fixed-size table without any copying when growing
258 : *
259 : * 2) We never delete any entries, so we don't need to
260 : * support/check for DEAD entries during lookup.
261 : *
262 : * 3) The object fits in 32 bits so we store each object in its
263 : * entirety within the table rather than storing objects
264 : * externally and putting pointers in the table, (which here
265 : * would just double the storage requirements and have negative
266 : * impacts on memory locality).
267 : */
268 0 : int i, idx, step, hash = symbol & LZW_SYMBOL_KEY_MASK;
269 : lzw_symbol_t candidate;
270 :
271 0 : idx = hash % LZW_SYMBOL_MOD1;
272 0 : step = 0;
273 :
274 0 : *slot_ret = NULL;
275 0 : for (i = 0; i < LZW_SYMBOL_TABLE_SIZE; i++)
276 : {
277 0 : candidate = table->table[idx];
278 0 : if (candidate == LZW_SYMBOL_FREE)
279 : {
280 0 : *slot_ret = &table->table[idx];
281 0 : return FALSE;
282 : }
283 : else /* candidate is LIVE */
284 : {
285 0 : if ((candidate & LZW_SYMBOL_KEY_MASK) ==
286 : (symbol & LZW_SYMBOL_KEY_MASK))
287 : {
288 0 : *slot_ret = &table->table[idx];
289 0 : return TRUE;
290 : }
291 : }
292 :
293 0 : if (step == 0) {
294 0 : step = hash % LZW_SYMBOL_MOD2;
295 0 : if (step == 0)
296 0 : step = 1;
297 : }
298 :
299 0 : idx += step;
300 0 : if (idx >= LZW_SYMBOL_TABLE_SIZE)
301 0 : idx -= LZW_SYMBOL_TABLE_SIZE;
302 : }
303 :
304 0 : return FALSE;
305 : }
306 :
307 : /* Compress a bytestream using the LZW algorithm.
308 : *
309 : * This is an original implementation based on reading the
310 : * specification of the LZWDecode filter in the PostScript Language
311 : * Reference. The free parameters in the LZW algorithm are set to the
312 : * values mandated by PostScript, (symbols encoded with widths from 9
313 : * to 12 bits).
314 : *
315 : * This function returns a pointer to a newly allocated buffer holding
316 : * the compressed data, or %NULL if an out-of-memory situation
317 : * occurs.
318 : *
319 : * Notice that any one of the _lzw_buf functions called here could
320 : * trigger an out-of-memory condition. But lzw_buf_t uses cairo's
321 : * shutdown-on-error idiom, so it's safe to continue to call into
322 : * lzw_buf without having to check for errors, (until a final check at
323 : * the end).
324 : */
325 : unsigned char *
326 0 : _cairo_lzw_compress (unsigned char *data, unsigned long *size_in_out)
327 : {
328 0 : int bytes_remaining = *size_in_out;
329 : lzw_buf_t buf;
330 : lzw_symbol_table_t table;
331 0 : lzw_symbol_t symbol, *slot = NULL; /* just to squelch a warning */
332 0 : int code_next = LZW_CODE_FIRST;
333 0 : int code_bits = LZW_BITS_MIN;
334 0 : int prev, next = 0; /* just to squelch a warning */
335 :
336 0 : if (*size_in_out == 0)
337 0 : return NULL;
338 :
339 0 : _lzw_buf_init (&buf, *size_in_out);
340 :
341 0 : _lzw_symbol_table_init (&table);
342 :
343 : /* The LZW header is a clear table code. */
344 0 : _lzw_buf_store_bits (&buf, LZW_CODE_CLEAR_TABLE, code_bits);
345 :
346 : while (1) {
347 :
348 : /* Find the longest existing code in the symbol table that
349 : * matches the current input, if any. */
350 0 : prev = *data++;
351 0 : bytes_remaining--;
352 0 : if (bytes_remaining) {
353 : do
354 : {
355 0 : next = *data++;
356 0 : bytes_remaining--;
357 0 : LZW_SYMBOL_SET (symbol, prev, next);
358 0 : if (_lzw_symbol_table_lookup (&table, symbol, &slot))
359 0 : prev = LZW_SYMBOL_GET_CODE (*slot);
360 0 : } while (bytes_remaining && *slot != LZW_SYMBOL_FREE);
361 0 : if (*slot == LZW_SYMBOL_FREE) {
362 0 : data--;
363 0 : bytes_remaining++;
364 : }
365 : }
366 :
367 : /* Write the code into the output. This is either a byte read
368 : * directly from the input, or a code from the last successful
369 : * lookup. */
370 0 : _lzw_buf_store_bits (&buf, prev, code_bits);
371 :
372 0 : if (bytes_remaining == 0)
373 0 : break;
374 :
375 0 : LZW_SYMBOL_SET_CODE (*slot, code_next++, prev, next);
376 :
377 0 : if (code_next > LZW_BITS_BOUNDARY(code_bits))
378 : {
379 0 : code_bits++;
380 0 : if (code_bits > LZW_BITS_MAX) {
381 0 : _lzw_symbol_table_init (&table);
382 0 : _lzw_buf_store_bits (&buf, LZW_CODE_CLEAR_TABLE, code_bits - 1);
383 0 : code_bits = LZW_BITS_MIN;
384 0 : code_next = LZW_CODE_FIRST;
385 : }
386 : }
387 : }
388 :
389 : /* The LZW footer is an end-of-data code. */
390 0 : _lzw_buf_store_bits (&buf, LZW_CODE_EOD, code_bits);
391 :
392 0 : _lzw_buf_store_pending (&buf);
393 :
394 : /* See if we ever ran out of memory while writing to buf. */
395 0 : if (buf.status == CAIRO_STATUS_NO_MEMORY) {
396 0 : *size_in_out = 0;
397 0 : return NULL;
398 : }
399 :
400 0 : assert (buf.status == CAIRO_STATUS_SUCCESS);
401 :
402 0 : *size_in_out = buf.num_data;
403 0 : return buf.data;
404 : }
|