Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "u.h"
2
#include "lib.h"
3
#include "mem.h"
4
#include "dat.h"
5
#include "fns.h"
6
 
7
#include "thwack.h"
8
 
9
typedef struct Huff	Huff;
10
 
11
enum
12
{
13
	MaxFastLen	= 9,
14
	BigLenCode	= 0x1f4,	/* minimum code for large lenth encoding */
15
	BigLenBits	= 9,
16
	BigLenBase	= 4		/* starting items to encode for big lens */
17
};
18
 
19
enum
20
{
21
	StatBytes,
22
	StatOutBytes,
23
	StatLits,
24
	StatMatches,
25
	StatOffBits,
26
	StatLenBits,
27
 
28
	StatDelay,
29
	StatHist,
30
 
31
	MaxStat
32
};
33
 
34
struct Huff
35
{
36
	short	bits;				/* length of the code */
37
	ulong	encode;				/* the code */
38
};
39
 
40
static	Huff	lentab[MaxFastLen] =
41
{
42
	{2,	0x2},		/* 10 */
43
	{3,	0x6},		/* 110 */
44
	{5,	0x1c},		/* 11100 */
45
	{5,	0x1d},		/* 11101 */
46
	{6,	0x3c},		/* 111100 */
47
	{7,	0x7a},		/* 1111010 */
48
	{7,	0x7b},		/* 1111011 */
49
	{8,	0xf8},		/* 11111000 */
50
	{8,	0xf9},		/* 11111001 */
51
};
52
 
53
void
54
thwackinit(Thwack *tw)
55
{
56
	int i;
57
 
58
	memset(tw, 0, sizeof *tw);
59
	for(i = 0; i < EWinBlocks; i++){
60
		tw->blocks[i].data = tw->data[i];
61
		tw->blocks[i].edata = tw->blocks[i].data;
62
		tw->blocks[i].hash = tw->hash[i];
63
		tw->blocks[i].acked = 0;
64
	}
65
}
66
 
67
/*
68
 * acknowledgement for block seq & nearby preds
69
 */
70
void
71
thwackack(Thwack *tw, ulong seq, ulong mask)
72
{
73
	int slot, b;
74
 
75
	slot = tw->slot;
76
	for(;;){
77
		slot--;
78
		if(slot < 0)
79
			slot += EWinBlocks;
80
		if(slot == tw->slot)
81
			break;
82
		if(tw->blocks[slot].seq != seq)
83
			continue;
84
 
85
		tw->blocks[slot].acked = 1;
86
 
87
		if(mask == 0)
88
			break;
89
		do{
90
			b = mask & 1;
91
			seq--;
92
			mask >>= 1;
93
		}while(!b);
94
	}
95
}
96
 
97
/*
98
 * find a string in the dictionary
99
 */
100
static int
101
thwmatch(ThwBlock *b, ThwBlock *eblocks, uchar **ss, uchar *esrc, ulong h)
102
{
103
	int then, toff, w, ok;
104
	uchar *s, *t;
105
 
106
	s = *ss;
107
	if(esrc < s + MinMatch)
108
		return 0;
109
 
110
	toff = 0;
111
	for(; b < eblocks; b++){
112
		then = b->hash[(h ^ b->seq) & HashMask];
113
		toff += b->maxoff;
114
		w = (ushort)(then - b->begin);
115
 
116
		if(w >= b->maxoff)
117
			continue;
118
 
119
 
120
		/*
121
		 * don't need to check for the end because
122
		 * 1) s too close check above
123
		 * 2) entries too close not added to hash tables
124
		 */
125
		t = w + b->data;
126
		if(s[0] != t[0] || s[1] != t[1] || s[2] != t[2])
127
			continue;
128
		ok = b->edata - t;
129
		if(esrc - s > ok)
130
			esrc = s + ok;
131
 
132
		t += 3;
133
		for(s += 3; s < esrc; s++){
134
			if(*s != *t)
135
				break;
136
			t++;
137
		}
138
		*ss = s;
139
		return toff - w;
140
	}
141
	return 0;
142
}
143
 
144
/*
145
 * knuth vol. 3 multiplicative hashing
146
 * each byte x chosen according to rules
147
 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
148
 * with reasonable spread between the bytes & their complements
149
 *
150
 * the 3 byte value appears to be as almost good as the 4 byte value,
151
 * and might be faster on some machines
152
 */
153
/*
154
#define hashit(c)	(((ulong)(c) * 0x6b43a9) >> (24 - HashLog))
155
*/
156
#define hashit(c)	((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
157
 
158
/*
159
 * lz77 compression with single lookup in a hash table for each block
160
 */
161
int
162
thwack(Thwack *tw, uchar *dst, uchar *src, int n, ulong seq, ulong stats[ThwStats])
163
{
164
	ThwBlock *eblocks, *b, blocks[CompBlocks];
165
	uchar *s, *ss, *sss, *esrc, *half, *twdst, *twdmax;
166
	ulong cont, cseq, bseq, cmask, code, twbits;
167
	int now, toff, lithist, h, len, slot, bits, use, twnbits, lits, matches, offbits, lenbits, nhist;
168
 
169
	if(n > ThwMaxBlock || n < MinMatch)
170
		return -1;
171
 
172
	twdst = dst;
173
	twdmax = dst + n;
174
 
175
	/*
176
	 * add source to the coding window
177
	 * there is always enough space
178
	 */
179
	slot = tw->slot;
180
	b = &tw->blocks[slot];
181
	b->seq = seq;
182
	b->acked = 0;
183
	now = b->begin + b->maxoff;
184
	s = b->data;
185
	memmove(s, src, n);
186
	b->edata = s + n;
187
	b->begin = now;
188
	b->maxoff = n;
189
 
190
	/*
191
	 * set up the history blocks
192
	 */
193
	cseq = seq;
194
	cmask = 0;
195
	*blocks = *b;
196
	b = blocks;
197
	b->maxoff = 0;
198
	b++;
199
	nhist = 0;
200
	while(b < blocks + CompBlocks){
201
		slot--;
202
		if(slot < 0)
203
			slot += EWinBlocks;
204
		if(slot == tw->slot)
205
			break;
206
		if(!tw->blocks[slot].acked)
207
			continue;
208
		bseq = tw->blocks[slot].seq;
209
		if(cseq == seq){
210
			if(seq - bseq >= MaxSeqStart)
211
				break;
212
			cseq = bseq;
213
		}else if(cseq - bseq > MaxSeqMask)
214
			break;
215
		else
216
			cmask |= 1 << (cseq - bseq - 1);
217
		*b = tw->blocks[slot];
218
		nhist += b->maxoff;
219
		b++;
220
	}
221
	eblocks = b;
222
	*twdst++ = seq - cseq;
223
	*twdst++ = cmask;
224
 
225
	cont = (s[0] << 16) | (s[1] << 8) | s[2];
226
 
227
	esrc = s + n;
228
	half = s + (n >> 1);
229
	twnbits = 0;
230
	twbits = 0;
231
	lits = 0;
232
	matches = 0;
233
	offbits = 0;
234
	lenbits = 0;
235
	lithist = ~0;
236
	while(s < esrc){
237
		h = hashit(cont);
238
 
239
		sss = s;
240
		toff = thwmatch(blocks, eblocks, &sss, esrc, h);
241
		ss = sss;
242
 
243
		len = ss - s;
244
		for(; twnbits >= 8; twnbits -= 8){
245
			if(twdst >= twdmax)
246
				return -1;
247
			*twdst++ = twbits >> (twnbits - 8);
248
		}
249
		if(len < MinMatch){
250
			toff = *s;
251
			lithist = (lithist << 1) | (toff < 32) | (toff > 127);
252
			if(lithist & 0x1e){
253
				twbits = (twbits << 9) | toff;
254
				twnbits += 9;
255
			}else if(lithist & 1){
256
				toff = (toff + 64) & 0xff;
257
				if(toff < 96){
258
					twbits = (twbits << 10) | toff;
259
					twnbits += 10;
260
				}else{
261
					twbits = (twbits << 11) | toff;
262
					twnbits += 11;
263
				}
264
			}else{
265
				twbits = (twbits << 8) | toff;
266
				twnbits += 8;
267
			}
268
			lits++;
269
			blocks->maxoff++;
270
 
271
			/*
272
			 * speed hack
273
			 * check for compression progress, bail if none achieved
274
			 */
275
			if(s > half){
276
				if(4 * blocks->maxoff < 5 * lits)
277
					return -1;
278
				half = esrc;
279
			}
280
 
281
			if(s + MinMatch <= esrc){
282
				blocks->hash[(h ^ blocks->seq) & HashMask] = now;
283
				if(s + MinMatch < esrc)
284
					cont = (cont << 8) | s[MinMatch];
285
			}
286
			now++;
287
			s++;
288
			continue;
289
		}
290
 
291
		blocks->maxoff += len;
292
		matches++;
293
 
294
		/*
295
		 * length of match
296
		 */
297
		len -= MinMatch;
298
		if(len < MaxFastLen){
299
			bits = lentab[len].bits;
300
			twbits = (twbits << bits) | lentab[len].encode;
301
			twnbits += bits;
302
			lenbits += bits;
303
		}else{
304
			code = BigLenCode;
305
			bits = BigLenBits;
306
			use = BigLenBase;
307
			len -= MaxFastLen;
308
			while(len >= use){
309
				len -= use;
310
				code = (code + use) << 1;
311
				use <<= (bits & 1) ^ 1;
312
				bits++;
313
			}
314
			twbits = (twbits << bits) | (code + len);
315
			twnbits += bits;
316
			lenbits += bits;
317
 
318
			for(; twnbits >= 8; twnbits -= 8){
319
				if(twdst >= twdmax)
320
					return -1;
321
				*twdst++ = twbits >> (twnbits - 8);
322
			}
323
		}
324
 
325
		/*
326
		 * offset in history
327
		 */
328
		toff--;
329
		for(bits = OffBase; toff >= (1 << bits); bits++)
330
			;
331
		if(bits < MaxOff+OffBase-1){
332
			twbits = (twbits << 3) | (bits - OffBase);
333
			if(bits != OffBase)
334
				bits--;
335
			twnbits += bits + 3;
336
			offbits += bits + 3;
337
		}else{
338
			twbits = (twbits << 4) | 0xe | (bits - (MaxOff+OffBase-1));
339
			bits--;
340
			twnbits += bits + 4;
341
			offbits += bits + 4;
342
		}
343
		twbits = (twbits << bits) | toff & ((1 << bits) - 1);
344
 
345
		for(; s != ss; s++){
346
			if(s + MinMatch <= esrc){
347
				h = hashit(cont);
348
				blocks->hash[(h ^ blocks->seq) & HashMask] = now;
349
				if(s + MinMatch < esrc)
350
					cont = (cont << 8) | s[MinMatch];
351
			}
352
			now++;
353
		}
354
	}
355
 
356
 
357
	if(twnbits & 7){
358
		twbits <<= 8 - (twnbits & 7);
359
		twnbits += 8 - (twnbits & 7);
360
	}
361
	for(; twnbits >= 8; twnbits -= 8){
362
		if(twdst >= twdmax)
363
			return -1;
364
		*twdst++ = twbits >> (twnbits - 8);
365
	}
366
 
367
	tw->slot++;
368
	if(tw->slot >= EWinBlocks)
369
		tw->slot = 0;
370
 
371
	stats[StatBytes] += blocks->maxoff;
372
	stats[StatLits] += lits;
373
	stats[StatMatches] += matches;
374
	stats[StatOffBits] += offbits;
375
	stats[StatLenBits] += lenbits;
376
	stats[StatDelay] = stats[StatDelay]*7/8 + dst[0];
377
	stats[StatHist] = stats[StatHist]*7/8 + nhist;
378
	stats[StatOutBytes] += twdst - dst;
379
 
380
	return twdst - dst;
381
}