Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/* Copyright (C) 1994, 1995, 1998, 1999 Aladdin Enterprises.  All rights reserved.
2
 
3
  This software is provided AS-IS with no warranty, either express or
4
  implied.
5
 
6
  This software is distributed under license and may not be copied,
7
  modified or distributed except as expressly authorized under the terms
8
  of the license contained in the file LICENSE in this distribution.
9
 
10
  For more information about licensing, please refer to
11
  http://www.ghostscript.com/licensing/. For information on
12
  commercial licensing, go to http://www.artifex.com/licensing/ or
13
  contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14
  San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15
*/
16
 
17
/* $Id: sbwbs.c,v 1.5 2002/06/16 03:58:14 lpd Exp $ */
18
/* Burrows/Wheeler block sorting compression filters */
19
#include "stdio_.h"
20
#include "memory_.h"
21
#include <stdlib.h>		/* for qsort */
22
#include "gdebug.h"
23
#include "strimpl.h"
24
#include "sfilter.h"
25
#include "sbwbs.h"
26
 
27
/* ------ Common code for streams that buffer a block ------ */
28
 
29
private_st_buffered_state();
30
 
31
/* Initialize */
32
private void
33
s_buffered_set_defaults(stream_state * st)
34
{
35
    stream_buffered_state *const ss = (stream_buffered_state *) st;
36
 
37
    /* Clear pointers */
38
    ss->buffer = 0;
39
}
40
private int
41
s_buffered_no_block_init(stream_state * st)
42
{
43
    stream_buffered_state *const ss = (stream_buffered_state *) st;
44
 
45
    ss->buffer = 0;
46
    ss->filling = true;
47
    ss->bpos = 0;
48
    return 0;
49
}
50
private int
51
s_buffered_block_init(stream_state * st)
52
{
53
    stream_buffered_state *const ss = (stream_buffered_state *) st;
54
 
55
    s_buffered_no_block_init(st);
56
    ss->buffer = gs_alloc_bytes(st->memory, ss->BlockSize, "buffer");
57
    if (ss->buffer == 0)
58
	return ERRC;
59
/****** WRONG ******/
60
    return 0;
61
}
62
 
63
/* Continue filling the buffer if needed. */
64
/* Return 0 if the buffer isn't full yet, 1 if it is full or if */
65
/* we reached the end of input data. */
66
/* In the latter case, also set filling = false. */
67
/* Note that this procedure doesn't take pw as an argument. */
68
private int
69
s_buffered_process(stream_state * st, stream_cursor_read * pr, bool last)
70
{
71
    stream_buffered_state *const ss = (stream_buffered_state *) st;
72
    register const byte *p = pr->ptr;
73
    const byte *rlimit = pr->limit;
74
    uint count = rlimit - p;
75
    uint left = ss->bsize - ss->bpos;
76
 
77
    if (!ss->filling)
78
	return 1;
79
    if (left < count)
80
	count = left;
81
    if_debug3('w', "[w]buffering %d bytes to position %d, last = %s\n",
82
	      count, ss->bpos, (last ? "true" : "false"));
83
    memcpy(ss->buffer + ss->bpos, p + 1, count);
84
    pr->ptr = p += count;
85
    ss->bpos += count;
86
    if (ss->bpos == ss->bsize || (p == rlimit && last)) {
87
	ss->filling = false;
88
	return 1;
89
    }
90
    return 0;
91
}
92
 
93
/* Release */
94
private void
95
s_buffered_release(stream_state * st)
96
{
97
    stream_buffered_state *const ss = (stream_buffered_state *) st;
98
 
99
    gs_free_object(st->memory, ss->buffer, "buffer");
100
}
101
 
102
/* ------ Common code for Burrows/Wheeler block sorting filters ------ */
103
 
104
private_st_BWBS_state();
105
private void s_BWBS_release(stream_state *);
106
 
107
/* Set default parameter values (actually, just clear pointers). */
108
private void
109
s_BWBS_set_defaults(stream_state * st)
110
{
111
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
112
 
113
    s_buffered_set_defaults(st);
114
    ss->offsets = 0;
115
}
116
 
117
/* Initialize */
118
private int
119
bwbs_init(stream_state * st, uint osize)
120
{
121
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
122
    int code;
123
 
124
    ss->bsize = ss->BlockSize;
125
    code = s_buffered_block_init(st);
126
    if (code != 0)
127
	return code;
128
    ss->offsets = (void *)gs_alloc_bytes(st->memory, osize,
129
					 "BWBlockSort offsets");
130
    if (ss->offsets == 0) {
131
	s_BWBS_release(st);
132
	return ERRC;
133
/****** WRONG ******/
134
    }
135
    ss->I = -1;			/* haven't read I yet */
136
    return 0;
137
}
138
 
139
/* Release the filter. */
140
private void
141
s_BWBS_release(stream_state * st)
142
{
143
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
144
 
145
    gs_free_object(st->memory, ss->offsets, "BWBlockSort offsets");
146
    s_buffered_release(st);
147
}
148
 
149
/* ------ BWBlockSortEncode ------ */
150
 
151
/* Initialize */
152
private int
153
s_BWBSE_init(stream_state * st)
154
{
155
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
156
 
157
    return bwbs_init(st, ss->BlockSize * sizeof(int));
158
}
159
 
160
/* Compare two rotated strings for sorting. */
161
private stream_BWBS_state *bwbs_compare_ss;
162
private int
163
bwbs_compare_rotations(const void *p1, const void *p2)
164
{
165
    const byte *buffer = bwbs_compare_ss->buffer;
166
    const byte *s1 = buffer + *(const int *)p1;
167
    const byte *s2 = buffer + *(const int *)p2;
168
    const byte *start1;
169
    const byte *end;
170
    int swap;
171
 
172
    if (*s1 != *s2)
173
	return (*s1 < *s2 ? -1 : 1);
174
    if (s1 < s2)
175
	swap = 1;
176
    else {
177
	const byte *t = s1;
178
 
179
	s1 = s2;
180
	s2 = t;
181
	swap = -1;
182
    }
183
    start1 = s1;
184
    end = buffer + bwbs_compare_ss->N;
185
    for (s1++, s2++; s2 < end; s1++, s2++)
186
	if (*s1 != *s2)
187
	    return (*s1 < *s2 ? -swap : swap);
188
    s2 = buffer;
189
    for (; s1 < end; s1++, s2++)
190
	if (*s1 != *s2)
191
	    return (*s1 < *s2 ? -swap : swap);
192
    s1 = buffer;
193
    for (; s1 < start1; s1++, s2++)
194
	if (*s1 != *s2)
195
	    return (*s1 < *s2 ? -swap : swap);
196
    return 0;
197
}
198
/* Sort the strings. */
199
private void
200
bwbse_sort(const byte * buffer, uint * indices, int N)
201
{
202
    offsets_full Cs;
203
 
204
#define C Cs.v
205
    /* Sort the strings.  We start with a radix sort. */
206
    uint sum = 0, j, ch;
207
 
208
    memset(C, 0, sizeof(Cs));
209
    for (j = 0; j < N; j++)
210
	C[buffer[j]]++;
211
    for (ch = 0; ch <= 255; ch++) {
212
	sum += C[ch];
213
	C[ch] = sum - C[ch];
214
    }
215
    for (j = 0; j < N; j++)
216
	indices[C[buffer[j]]++] = j;
217
    /* Now C[ch] = the number of strings that start */
218
    /* with a character less than or equal to ch. */
219
    sum = 0;
220
    /* qsort each bucket produced by the radix sort. */
221
    for (ch = 0; ch <= 255; sum = C[ch], ch++)
222
	qsort(indices + sum, C[ch] - sum,
223
	      sizeof(*indices),
224
	      bwbs_compare_rotations);
225
#undef C
226
}
227
 
228
/* Encode a buffer */
229
private int
230
s_BWBSE_process(stream_state * st, stream_cursor_read * pr,
231
		stream_cursor_write * pw, bool last)
232
{
233
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
234
    register byte *q = pw->ptr;
235
    byte *wlimit = pw->limit;
236
    uint wcount = wlimit - q;
237
    uint *indices = ss->offsets;
238
 
239
    if (ss->filling) {
240
	int status, j, N;
241
	byte *buffer = ss->buffer;
242
	if (wcount < sizeof(int) * 2)
243
	        return 1;
244
 
245
	/* Continue filling the buffer. */
246
	status = s_buffered_process(st, pr, last);
247
	if (!status)
248
	    return 0;
249
	ss->N = N = ss->bpos;
250
	/* We reverse the string before encoding it, */
251
	/* so it will come out of the decoder correctly. */
252
	for (j = N / 2 - 1; j >= 0; j--) {
253
	    byte *p0 = &buffer[j];
254
	    byte *p1 = &buffer[N - 1 - j];
255
	    byte b = *p0;
256
 
257
	    *p0 = *p1;
258
	    *p1 = b;
259
	}
260
	/* Save st in a static, because that's the only way */
261
	/* we can pass it to the comparison procedure (ugh). */
262
	bwbs_compare_ss = ss;
263
	/* Sort the strings. */
264
	bwbse_sort(buffer, indices, N);
265
	/* Find the unrotated string. */
266
	for (j = 0; j < N; j++)
267
	    if (indices[j] == 0) {
268
		ss->I = j;
269
		break;
270
	    }
271
	for (j = sizeof(int); --j >= 0;)
272
	    *++q = (byte) (N >> (j * 8));
273
	for (j = sizeof(int); --j >= 0;)
274
	    *++q = (byte) (ss->I >> (j * 8));
275
	ss->bpos = 0;
276
    }
277
    /* We're reading out of the buffer, writing the permuted string. */
278
    while (q < wlimit && ss->bpos < ss->N) {
279
	int i = indices[ss->bpos++];
280
 
281
	*++q = ss->buffer[(i == 0 ? ss->N - 1 : i - 1)];
282
    }
283
    if (ss->bpos == ss->N) {
284
	ss->filling = true;
285
	ss->bpos = 0;
286
    }
287
    pw->ptr = q;
288
    if (q == wlimit)
289
	return 1;
290
    return 0;
291
}
292
 
293
/* Stream template */
294
const stream_template s_BWBSE_template = {
295
    &st_BWBS_state, s_BWBSE_init, s_BWBSE_process, sizeof(int) * 2, 1,
296
    s_BWBS_release, s_BWBS_set_defaults
297
};
298
 
299
/* ------ BWBlockSortDecode ------ */
300
 
301
#define SHORT_OFFSETS
302
 
303
#ifdef SHORT_OFFSETS
304
 
305
/*
306
 * Letting S[0..N-1] be the data block before depermutation, we need
307
 * a table P[0..N-1] that maps the index i to O(S[i],i), where O(c,i) is
308
 * the number of occurrences of c in S before position i.
309
 * We observe that for a fixed c, O(c,i) is monotonic with i,
310
 * and falls in the range 0 .. i; consequently, if 0 <= i <= j,
311
 * 0 <= O(c,j) - O(c,i) <= j - i.  Proceeding from this observation,
312
 * rather than allocate an entire int to each entry of P,
313
 * we construct three tables as follows:
314
 *      P2[k,c] = O(c,k*65536) for k = 0 .. (N-1)/65536;
315
 *              each entry requires an int.
316
 *      P1[k,c] = O(c,k*4096) - P2[k/16,c] for k = 0 .. (N-1)/4096;
317
 *              each entry falls in the range 0 .. 15*4096 and hence
318
 *              requires 16 bits.
319
 *      P0[i] = O(S[i],i) - P1[i/4096,S[i]] for i = 0 .. N-1;
320
 *              each entry falls in the range 0 .. 4095 and hence
321
 *              requires 12 bits.
322
 * Since the value we need in the decompression loop is actually
323
 * P[i] + C[S[i]], where C[c] is the sum of O(0,N) ... O(c-1,N),
324
 * we add C[c] into P2[k,c] for all k.
325
 */
326
							/*typedef struct { uint v[256]; } offsets_full; *//* in sbwbs.h */
327
typedef struct {
328
    bits16 v[256];
329
} offsets_4k;
330
 
331
#if arch_sizeof_int > 2
332
#  define ceil_64k(n) (((n) + 0xffff) >> 16)
333
#else
334
#  define ceil_64k(n) 1
335
#endif
336
#define ceil_4k(n) (((n) + 0xfff) >> 12)
337
#define offset_space(bsize)\
338
  (ceil_64k(bsize) * sizeof(offsets_full) +\
339
   ceil_4k(bsize) * sizeof(offsets_4k) +\
340
   ((bsize + 1) >> 1) * 3)
341
 
342
#else /* !SHORT_OFFSETS */
343
 
344
#define offset_space(bsize)\
345
  (bsize * sizeof(int))
346
 
347
#endif /* (!)SHORT_OFFSETS */
348
 
349
/* Initialize */
350
private int
351
s_BWBSD_init(stream_state * st)
352
{
353
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
354
    uint bsize = ss->BlockSize;
355
 
356
    return bwbs_init(st, offset_space(bsize));
357
}
358
 
359
/* Construct the decoding tables. */
360
 
361
#ifdef SHORT_OFFSETS
362
 
363
private void
364
bwbsd_construct_offsets(stream_BWBS_state * sst, offsets_full * po64k,
365
			offsets_4k * po4k, byte * po1, int N)
366
{
367
    offsets_full Cs;
368
 
369
#define C Cs.v
370
    uint i1;
371
    byte *b = sst->buffer;
372
    offsets_full *p2 = po64k - 1;
373
    offsets_4k *p1 = po4k;
374
    byte *p0 = po1;
375
 
376
    memset(C, 0, sizeof(Cs));
377
    for (i1 = 0; i1 < ceil_4k(N); i1++, p1++) {
378
	int j;
379
 
380
	if (!(i1 & 15))
381
	    *++p2 = Cs;
382
	for (j = 0; j < 256; j++)
383
	    p1->v[j] = C[j] - p2->v[j];
384
	j = (N + 1 - (i1 << 12)) >> 1;
385
	if (j > 4096 / 2)
386
	    j = 4096 / 2;
387
	for (; j > 0; j--, b += 2, p0 += 3) {
388
	    byte b0 = b[0];
389
	    uint d0 = C[b0]++ - (p1->v[b0] + p2->v[b0]);
390
	    byte b1 = b[1];
391
	    uint d1 = C[b1]++ - (p1->v[b1] + p2->v[b1]);
392
 
393
	    p0[0] = d0 >> 4;
394
	    p0[1] = (byte) ((d0 << 4) + (d1 >> 8));
395
	    p0[2] = (byte) d1;
396
	}
397
    }
398
    /* If the block length is odd, discount the extra byte. */
399
    if (N & 1)
400
	C[sst->buffer[N]]--;
401
    /* Compute the cumulative totals in C. */
402
    {
403
	int sum = 0, ch;
404
 
405
	for (ch = 0; ch <= 255; ch++) {
406
	    sum += C[ch];
407
	    C[ch] = sum - C[ch];
408
	}
409
    }
410
    /* Add the C values back into the 64K table, */
411
    /* which saves an addition of C[b] in the decoding loop. */
412
    {
413
	int i2, ch;
414
 
415
	for (p2 = po64k, i2 = ceil_64k(N); i2 > 0; p2++, i2--)
416
	    for (ch = 0; ch < 256; ch++)
417
		p2->v[ch] += C[ch];
418
    }
419
#undef C
420
}
421
 
422
#else /* !SHORT_OFFSETS */
423
 
424
private void
425
bwbsd_construct_offsets(stream_BWBS_state * sst, int *po, int N)
426
{
427
    offsets_full Cs;
428
 
429
#define C Cs.v
430
    uint i;
431
    byte *b = sst->buffer;
432
    int *p = po;
433
 
434
    memset(C, 0, sizeof(Cs));
435
    for (i = 0; i < N; i++, p++, b++)
436
	*p = C[*b]++;
437
    /* Compute the cumulative totals in C. */
438
    {
439
	int sum = 0, ch;
440
 
441
	for (ch = 0; ch <= 255; ch++) {
442
	    sum += C[ch];
443
	    C[ch] = sum - C[ch];
444
	}
445
    }
446
    /* Add the C values back into the offsets. */
447
    for (i = 0, b = sst->buffer, p = po; i < N; i++, b++, p++)
448
	*p += C[*b];
449
#undef C
450
}
451
 
452
#endif /* (!)SHORT_OFFSETS */
453
 
454
/* Decode a buffer */
455
private int
456
s_BWBSD_process(stream_state * st, stream_cursor_read * pr,
457
		stream_cursor_write * pw, bool last)
458
{
459
    stream_BWBS_state *const ss = (stream_BWBS_state *) st;
460
    register const byte *p = pr->ptr;
461
    const byte *rlimit = pr->limit;
462
    uint count = rlimit - p;
463
    register byte *q = pw->ptr;
464
    byte *wlimit = pw->limit;
465
 
466
#ifdef SHORT_OFFSETS
467
    uint BlockSize = ss->BlockSize;
468
    offsets_full *po64k = ss->offsets;
469
    offsets_4k *po4k = (offsets_4k *) (po64k + ceil_64k(BlockSize));
470
    byte *po1 = (byte *) (po4k + ceil_4k(BlockSize));
471
 
472
#else /* !SHORT_OFFSETS */
473
    int *po = ss->offsets;
474
 
475
#endif /* (!)SHORT_OFFSETS */
476
 
477
    if (ss->I < 0) {		/* Read block parameters */
478
	int I, N, j;
479
	if (count < sizeof(int) * 2)
480
	        return 0;
481
	for (N = 0, j = 0; j < sizeof(int); j++)
482
 
483
	    N = (N << 8) + *++p;
484
	for (I = 0, j = 0; j < sizeof(int); j++)
485
 
486
	    I = (I << 8) + *++p;
487
	ss->N = N;
488
	ss->I = I;
489
	if_debug2('w', "[w]N=%d I=%d\n", N, I);
490
	pr->ptr = p;
491
	if (N < 0 || N > ss->BlockSize || I < 0 || I >= N)
492
	    return ERRC;
493
	if (N == 0)
494
	    return EOFC;
495
	count -= sizeof(int) * 2;
496
 
497
	ss->bpos = 0;
498
	ss->bsize = N;
499
    }
500
    if (ss->filling) {		/* Continue filling the buffer. */
501
	if (!s_buffered_process(st, pr, last))
502
	    return 0;
503
	/* Construct the inverse sort order. */
504
#ifdef SHORT_OFFSETS
505
	bwbsd_construct_offsets(ss, po64k, po4k, po1, ss->bsize);
506
#else /* !SHORT_OFFSETS */
507
	bwbsd_construct_offsets(ss, po, ss->bsize);
508
#endif /* (!)SHORT_OFFSETS */
509
	ss->bpos = 0;
510
	ss->i = ss->I;
511
    }
512
    /* We're reading out of the buffer. */
513
    while (q < wlimit && ss->bpos < ss->bsize) {
514
	int i = ss->i;
515
	byte b = ss->buffer[i];
516
 
517
#ifdef SHORT_OFFSETS
518
	uint d;
519
	const byte *pd = &po1[(i >> 1) + i];
520
 
521
	*++q = b;
522
	if (!(i & 1))
523
	    d = ((uint) pd[0] << 4) + (pd[1] >> 4);
524
	else
525
	    d = ((pd[0] & 0xf) << 8) + pd[1];
526
	ss->i = po64k[i >> 16].v[b] + po4k[i >> 12].v[b] + d;
527
#else /* !SHORT_OFFSETS */
528
	*++q = b;
529
	ss->i = po[i];
530
#endif /* (!)SHORT_OFFSETS */
531
	ss->bpos++;
532
    }
533
    if (ss->bpos == ss->bsize) {
534
	ss->I = -1;
535
	ss->filling = true;
536
    }
537
    pw->ptr = q;
538
    if (q == wlimit)
539
	return 1;
540
    return 0;
541
}
542
 
543
/* Stream template */
544
const stream_template s_BWBSD_template = {
545
    &st_BWBS_state, s_BWBSD_init, s_BWBSD_process, 1, sizeof(int) * 2,
546
    s_BWBS_release, s_BWBS_set_defaults
547
};