Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/* Copyright (C) 1994, 1996, 1998 Aladdin Enterprises.  All rights reserved.
2
 
3
  This software is provided AS-IS with no warranty, either express or
4
  implied.
5
 
6
  This software is distributed under license and may not be copied,
7
  modified or distributed except as expressly authorized under the terms
8
  of the license contained in the file LICENSE in this distribution.
9
 
10
  For more information about licensing, please refer to
11
  http://www.ghostscript.com/licensing/. For information on
12
  commercial licensing, go to http://www.artifex.com/licensing/ or
13
  contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14
  San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15
*/
16
 
17
/* $Id: shcgen.c,v 1.4 2002/02/21 22:24:54 giles Exp $ */
18
/* Generate (bounded) Huffman code definitions from frequencies, */
19
/* and tables from definitions. */
20
#include "memory_.h"
21
#include "stdio_.h"
22
#include <stdlib.h>		/* for qsort */
23
#include "gdebug.h"
24
#include "gserror.h"
25
#include "gserrors.h"
26
#include "gsmemory.h"
27
#include "scommon.h"
28
#include "shc.h"
29
#include "shcgen.h"
30
 
31
/* ------ Frequency -> definition procedure ------ */
32
 
33
/* Define a node for the Huffman code tree. */
34
typedef struct count_node_s count_node;
35
struct count_node_s {
36
    long freq;			/* frequency of value */
37
    uint value;			/* data value being encoded */
38
    uint code_length;		/* length of Huffman code */
39
    count_node *next;		/* next node in freq-sorted list */
40
    count_node *left;		/* left child in tree (smaller code_length) */
41
    count_node *right;		/* right child in tree (greater code_length) */
42
};
43
 
44
#ifdef DEBUG
45
#  define debug_print_nodes(nodes, n, tag, lengths)\
46
     if ( gs_debug_c('W') ) print_nodes_proc(nodes, n, tag, lengths);
47
private void
48
print_nodes_proc(const count_node * nodes, int n, const char *tag, int lengths)
49
{
50
    int i;
51
 
52
    dlprintf1("[w]---------------- %s ----------------\n", tag);
53
    for (i = 0; i < n; ++i)
54
	dlprintf7("[w]node %d: f=%ld v=%d len=%d N=%d L=%d R=%d\n",
55
		  i, nodes[i].freq, nodes[i].value, nodes[i].code_length,
56
		  (nodes[i].next == 0 ? -1 : (int)(nodes[i].next - nodes)),
57
		  (nodes[i].left == 0 ? -1 : (int)(nodes[i].left - nodes)),
58
		(nodes[i].right == 0 ? -1 : (int)(nodes[i].right - nodes)));
59
    for (i = lengths; i > 0;) {
60
	int j = i;
61
	int len = nodes[--j].code_length;
62
 
63
	while (j > 0 && nodes[j - 1].code_length == len)
64
	    --j;
65
	dlprintf2("[w]%d codes of length %d\n", i - j, len);
66
	i = j;
67
    }
68
}
69
#else
70
#  define debug_print_nodes(nodes, n, tag, lengths) DO_NOTHING
71
#endif
72
 
73
/* Node comparison procedures for sorting. */
74
#define pn1 ((const count_node *)p1)
75
#define pn2 ((const count_node *)p2)
76
/* Sort by decreasing frequency. */
77
private int
78
compare_freqs(const void *p1, const void *p2)
79
{
80
    long diff = pn2->freq - pn1->freq;
81
 
82
    return (diff < 0 ? -1 : diff > 0 ? 1 : 0);
83
}
84
/* Sort by increasing code length, and secondarily by decreasing frequency. */
85
private int
86
compare_code_lengths(const void *p1, const void *p2)
87
{
88
    int diff = pn1->code_length - pn2->code_length;
89
 
90
    return (diff < 0 ? -1 : diff > 0 ? 1 : compare_freqs(p1, p2));
91
}
92
/* Sort by increasing code value. */
93
private int
94
compare_values(const void *p1, const void *p2)
95
{
96
    return (pn1->value < pn2->value ? -1 :
97
	    pn1->value > pn2->value ? 1 : 0);
98
}
99
#undef pn1
100
#undef pn2
101
 
102
/* Adjust code lengths so that none of them exceeds max_length. */
103
/* We break this out just to help organize the code; it's only called */
104
/* from one place in hc_compute. */
105
private void
106
hc_limit_code_lengths(count_node * nodes, uint num_values, int max_length)
107
{
108
    int needed;			/* # of max_length codes we need to free up */
109
    count_node *longest = nodes + num_values;
110
 
111
    {				/* Compute the number of additional max_length codes */
112
	/* we need to make available. */
113
	int length = longest[-1].code_length;
114
	int next_length;
115
	int avail = 0;
116
 
117
	while ((next_length = longest[-1].code_length) > max_length) {
118
	    avail >>= length - next_length;
119
	    length = next_length;
120
	    (--longest)->code_length = max_length;
121
	    ++avail;
122
	}
123
	needed = (nodes + num_values - longest) -
124
	    (avail >>= (length - max_length));
125
	if_debug2('W', "[w]avail=%d, needed=%d\n",
126
		  avail, needed);
127
    }
128
    /* Skip over all max_length codes. */
129
    while (longest[-1].code_length == max_length)
130
	--longest;
131
 
132
    /*
133
     * To make available a code of length N, suppose that the next
134
     * shortest used code is of length M.
135
     * We take the lowest-frequency code of length M and change it
136
     * to M+1; we then have to compensate by reducing the length of
137
     * some of the highest-frequency codes of length N, as follows:
138
     *       M      new lengths for codes of length N
139
     *      ---     -----------
140
     *      N-1     (none)
141
     *      N-2     N-1
142
     *      <N-2    M+2, M+2, N-1
143
     * In the present situation, N = max_length.
144
     */
145
    for (; needed > 0; --needed) {	/* longest points to the first code of length max_length. */
146
	/* Since codes are sorted by increasing code length, */
147
	/* longest-1 is the desired code of length M. */
148
	int M1 = ++(longest[-1].code_length);
149
 
150
	switch (max_length - M1) {
151
	    case 0:		/* M == N-1 */
152
		--longest;
153
		break;
154
	    case 1:		/* M == N-2 */
155
		longest++->code_length = M1;
156
		break;
157
	    default:
158
		longest->code_length = M1 + 1;
159
		longest[1].code_length = M1 + 1;
160
		longest[2].code_length--;
161
		longest += 3;
162
	}
163
    }
164
}
165
 
166
/* Compute an optimal Huffman code from an input data set. */
167
/* The client must have set all the elements of *def. */
168
int
169
hc_compute(hc_definition * def, const long *freqs, gs_memory_t * mem)
170
{
171
    uint num_values = def->num_values;
172
    count_node *nodes =
173
    (count_node *) gs_alloc_byte_array(mem, num_values * 2 - 1,
174
				       sizeof(count_node), "hc_compute");
175
    int i;
176
    count_node *lowest;
177
    count_node *comb;
178
 
179
    if (nodes == 0)
180
	return_error(gs_error_VMerror);
181
 
182
    /* Create leaf nodes for the input data. */
183
    for (i = 0; i < num_values; ++i)
184
	nodes[i].freq = freqs[i], nodes[i].value = i;
185
 
186
    /* Create a list sorted by increasing frequency. */
187
    /* Also initialize the tree structure. */
188
    qsort(nodes, num_values, sizeof(count_node), compare_freqs);
189
    for (i = 0; i < num_values; ++i)
190
	nodes[i].next = &nodes[i - 1],
191
	    nodes[i].code_length = 0,
192
	    nodes[i].left = nodes[i].right = 0;
193
    nodes[0].next = 0;
194
    debug_print_nodes(nodes, num_values, "after sort", 0);
195
 
196
    /* Construct the Huffman code tree. */
197
    for (lowest = &nodes[num_values - 1], comb = &nodes[num_values];;
198
	 ++comb
199
	) {
200
	count_node *pn1 = lowest;
201
	count_node *pn2 = pn1->next;
202
	long freq = pn1->freq + pn2->freq;
203
 
204
	/* Create a parent for the two lowest-frequency nodes. */
205
	lowest = pn2->next;
206
	comb->freq = freq;
207
	if (pn1->code_length <= pn2->code_length)
208
	    comb->left = pn1, comb->right = pn2,
209
		comb->code_length = pn2->code_length + 1;
210
	else
211
	    comb->left = pn2, comb->right = pn1,
212
		comb->code_length = pn1->code_length + 1;
213
	if (lowest == 0)	/* no nodes left to combine */
214
	    break;
215
	/* Insert comb in the sorted list. */
216
	if (freq < lowest->freq)
217
	    comb->next = lowest, lowest = comb;
218
	else {
219
	    count_node *here = lowest;
220
 
221
	    while (here->next != 0 && freq >= here->next->freq)
222
		here = here->next;
223
	    comb->next = here->next;
224
	    here->next = comb;
225
	}
226
    }
227
 
228
    /* comb (i.e., &nodes[num_values * 2 - 2] is the root of the tree. */
229
    /* Note that the left and right children of an interior node */
230
    /* were constructed before, and therefore have lower indices */
231
    /* in the nodes array than, the parent node.  Thus we can assign */
232
    /* the code lengths (node depths) in a single descending-order */
233
    /* sweep. */
234
    comb++->code_length = 0;
235
    while (comb > nodes + num_values) {
236
	--comb;
237
	comb->left->code_length = comb->right->code_length =
238
	    comb->code_length + 1;
239
    }
240
    debug_print_nodes(nodes, num_values * 2 - 1, "after combine", 0);
241
 
242
    /* Sort the leaves again by code length. */
243
    qsort(nodes, num_values, sizeof(count_node), compare_code_lengths);
244
    debug_print_nodes(nodes, num_values, "after re-sort", num_values);
245
 
246
    /* Limit the code length to def->num_counts. */
247
    hc_limit_code_lengths(nodes, num_values, def->num_counts);
248
    debug_print_nodes(nodes, num_values, "after limit", num_values);
249
 
250
    /* Sort within each code length by increasing code value. */
251
    /* This doesn't affect data compression, but it makes */
252
    /* the code definition itself compress better using our */
253
    /* incremental encoding. */
254
    for (i = num_values; i > 0;) {
255
	int j = i;
256
	int len = nodes[--j].code_length;
257
 
258
	while (j > 0 && nodes[j - 1].code_length == len)
259
	    --j;
260
	qsort(&nodes[j], i - j, sizeof(count_node), compare_values);
261
	i = j;
262
    }
263
 
264
    /* Extract the definition from the nodes. */
265
    memset(def->counts, 0, sizeof(*def->counts) * (def->num_counts + 1));
266
    for (i = 0; i < num_values; ++i) {
267
	def->values[i] = nodes[i].value;
268
	def->counts[nodes[i].code_length]++;
269
    }
270
 
271
    /* All done, release working storage. */
272
    gs_free_object(mem, nodes, "hc_compute");
273
    return 0;
274
}
275
 
276
/* ------ Byte string <-> definition procedures ------ */
277
 
278
/*
279
 * We define a compressed representation for (well-behaved) definitions
280
 * as a byte string.  A "well-behaved" definition is one where if
281
 * code values A and B have the same code length and A < B,
282
 * A precedes B in the values table of the definition, and hence
283
 * A's encoding lexicographically precedes B's.
284
 *
285
 * The successive bytes in the compressed string  give the code lengths for
286
 * runs of decoded values, in the form nnnnllll where nnnn is the number of
287
 * consecutive values -1 and llll is the code length -1.
288
 */
289
 
290
/* Convert a definition to a byte string. */
291
/* The caller must provide the byte string, of length def->num_values. */
292
/* Assume (do not check) that the definition is well-behaved. */
293
/* Return the actual length of the string. */
294
int
295
hc_bytes_from_definition(byte * dbytes, const hc_definition * def)
296
{
297
    int i, j;
298
    byte *bp = dbytes;
299
    const byte *lp = dbytes;
300
    const byte *end = dbytes + def->num_values;
301
    const ushort *values = def->values;
302
 
303
    /* Temporarily use the output string as a map from */
304
    /* values to code lengths. */
305
    for (i = 1; i <= def->num_counts; i++)
306
	for (j = 0; j < def->counts[i]; j++)
307
	    bp[*values++] = i;
308
 
309
    /* Now construct the actual string. */
310
    while (lp < end) {
311
	const byte *vp;
312
	byte len = *lp;
313
 
314
	for (vp = lp + 1; vp < end && vp < lp + 16 && *vp == len;)
315
	    vp++;
316
	*bp++ = ((vp - lp - 1) << 4) + (len - 1);
317
	lp = vp;
318
    }
319
 
320
    return bp - dbytes;
321
}
322
 
323
/* Extract num_counts and num_values from a byte string. */
324
void
325
hc_sizes_from_bytes(hc_definition * def, const byte * dbytes, int num_bytes)
326
{
327
    uint num_counts = 0, num_values = 0;
328
    int i;
329
 
330
    for (i = 0; i < num_bytes; i++) {
331
	int n = (dbytes[i] >> 4) + 1;
332
	int l = (dbytes[i] & 15) + 1;
333
 
334
	if (l > num_counts)
335
	    num_counts = l;
336
	num_values += n;
337
    }
338
    def->num_counts = num_counts;
339
    def->num_values = num_values;
340
}
341
 
342
/* Convert a byte string back to a definition. */
343
/* The caller must initialize *def, including allocating counts and values. */
344
void
345
hc_definition_from_bytes(hc_definition * def, const byte * dbytes)
346
{
347
    int v, i;
348
    ushort counts[max_hc_length + 1];
349
 
350
    /* Make a first pass to set the counts for each code length. */
351
    memset(counts, 0, sizeof(counts[0]) * (def->num_counts + 1));
352
    for (i = 0, v = 0; v < def->num_values; i++) {
353
	int n = (dbytes[i] >> 4) + 1;
354
	int l = (dbytes[i] & 15) + 1;
355
 
356
	counts[l] += n;
357
	v += n;
358
    }
359
 
360
    /* Now fill in the definition. */
361
    memcpy(def->counts, counts, sizeof(counts[0]) * (def->num_counts + 1));
362
    for (i = 1, v = 0; i <= def->num_counts; i++) {
363
	uint prev = counts[i];
364
 
365
	counts[i] = v;
366
	v += prev;
367
    }
368
    for (i = 0, v = 0; v < def->num_values; i++) {
369
	int n = (dbytes[i] >> 4) + 1;
370
	int l = (dbytes[i] & 15) + 1;
371
	int j;
372
 
373
	for (j = 0; j < n; n++)
374
	    def->values[counts[l]++] = v++;
375
    }
376
}
377
 
378
/* ------ Definition -> table procedures ------ */
379
 
380
/* Generate the encoding table from the definition. */
381
/* The size of the encode array is def->num_values. */
382
void
383
hc_make_encoding(hce_code * encode, const hc_definition * def)
384
{
385
    uint next = 0;
386
    const ushort *pvalue = def->values;
387
    uint i, k;
388
 
389
    for (i = 1; i <= def->num_counts; i++) {
390
	for (k = 0; k < def->counts[i]; k++, pvalue++, next++) {
391
	    hce_code *pce = encode + *pvalue;
392
 
393
	    pce->code = next;
394
	    pce->code_length = i;
395
	}
396
	next <<= 1;
397
    }
398
}
399
 
400
/* We decode in two steps, first indexing into a table with */
401
/* a fixed number of bits from the source, and then indexing into */
402
/* an auxiliary table if necessary.  (See shc.h for details.) */
403
 
404
/* Calculate the size of the decoding table. */
405
uint
406
hc_sizeof_decoding(const hc_definition * def, int initial_bits)
407
{
408
    uint size = 1 << initial_bits;
409
    uint carry = 0, mask = (uint) ~ 1;
410
    uint i;
411
 
412
    for (i = initial_bits + 1; i <= def->num_counts;
413
	 i++, carry <<= 1, mask <<= 1
414
	) {
415
	carry += def->counts[i];
416
	size += carry & mask;
417
	carry &= ~mask;
418
    }
419
    return size;
420
}
421
 
422
/* Generate the decoding tables. */
423
void
424
hc_make_decoding(hcd_code * decode, const hc_definition * def,
425
		 int initial_bits)
426
{				/* Make entries for single-dispatch codes. */
427
    {
428
	hcd_code *pcd = decode;
429
	const ushort *pvalue = def->values;
430
	uint i, k, d;
431
 
432
	for (i = 0; i <= initial_bits; i++) {
433
	    for (k = 0; k < def->counts[i]; k++, pvalue++) {
434
		for (d = 1 << (initial_bits - i); d > 0;
435
		     d--, pcd++
436
		    )
437
		    pcd->value = *pvalue,
438
			pcd->code_length = i;
439
	    }
440
	}
441
    }
442
    /* Make entries for two-dispatch codes. */
443
    /* By working backward, we can do this more easily */
444
    /* in a single pass. */
445
    {
446
	uint dsize = hc_sizeof_decoding(def, initial_bits);
447
	hcd_code *pcd = decode + (1 << initial_bits);
448
	hcd_code *pcd2 = decode + dsize;
449
	const ushort *pvalue = def->values + def->num_values;
450
	uint entries_left = 0, slots_left = 0, mult_shift = 0;
451
	uint i = def->num_counts + 1, j;
452
 
453
	for (;;) {
454
	    if (slots_left == 0) {
455
		if (entries_left != 0) {
456
		    slots_left = 1 << (i - initial_bits);
457
		    mult_shift = 0;
458
		    continue;
459
		}
460
		if (--i <= initial_bits)
461
		    break;
462
		entries_left = def->counts[i];
463
		continue;
464
	    }
465
	    if (entries_left == 0) {
466
		entries_left = def->counts[--i];
467
		mult_shift++;
468
		continue;
469
	    }
470
	    --entries_left, --pvalue;
471
	    for (j = 1 << mult_shift; j > 0; j--) {
472
		--pcd2;
473
		pcd2->value = *pvalue;
474
		pcd2->code_length = i - initial_bits;
475
	    }
476
	    if ((slots_left -= 1 << mult_shift) == 0) {
477
		--pcd;
478
		pcd->value = pcd2 - decode;
479
		pcd->code_length = i + mult_shift;
480
	    }
481
	}
482
    }
483
}