Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * compress - File compression ala IEEE Computer, June 1984.
3
 *
4
 * Algorithm from "A Technique for High Performance Data Compression",
5
 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
6
 *
7
 * Usage: compress [-dfvc] [-b bits] [file ...]
8
 * Inputs:
9
 *	-b:	limit the max number of bits/code.
10
 *	-c:	write output on stdout, don't remove original.
11
 *	-d:	decompress instead.
12
 *	-f:	Forces output file to be generated, even if one already
13
 *		exists, and even if no space is saved by compressing.
14
 *		If -f is not used, the user will be prompted if stdin is
15
 *		a tty, otherwise, the output file will not be overwritten.
16
 *	-v:	Write compression statistics
17
 *
18
 * 	file ...: Files to be compressed.  If none specified, stdin is used.
19
 * Outputs:
20
 *	file.Z:	Compressed form of file with same mode, owner, and utimes
21
 * 		or stdout (if stdin used as input)
22
 *
23
 * Assumptions:
24
 *	When filenames are given, replaces with the compressed version
25
 *	(.Z suffix) only if the file decreases in size.
26
 * Algorithm:
27
 * 	Modified Lempel-Ziv method (LZW).  Basically finds common
28
 * substrings and replaces them with a variable size code.  This is
29
 * deterministic, and can be done on the fly.  Thus, the decompression
30
 * procedure needs no input table, but tracks the way the table was built.
31
 
32
 * Authors:	Spencer W. Thomas	(decvax!harpo!utah-cs!utah-gr!thomas)
33
 *		Jim McKie		(decvax!mcvax!jim)
34
 *		Steve Davies		(decvax!vax135!petsd!peora!srd)
35
 *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
36
 *		James A. Woods		(decvax!ihnp4!ames!jaw)
37
 *		Joe Orost		(decvax!vax135!petsd!joe)
38
 */
39
#define _PLAN9_SOURCE
40
#define _BSD_EXTENSION
41
#define _POSIX_SOURCE
42
 
43
#include <u.h>
44
#include <stdio.h>
45
#include <ctype.h>
46
#include <stdlib.h>
47
#include <unistd.h>
48
#include <string.h>
49
#include <signal.h>
50
#include <utime.h>
51
#include <sys/types.h>
52
#include <sys/stat.h>
53
 
54
#define	min(a,b)	((a>b) ? b : a)
55
 
56
#define BITS	16
57
#define HSIZE	69001		/* 95% occupancy */
58
 
59
/*
60
 * a code_int must be able to hold 2**BITS values of type int, and also -1
61
 */
62
typedef long	code_int;
63
typedef long	count_int;
64
 
65
static char rcs_ident[] = "$Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $";
66
 
67
uchar magic_header[] = { 0x1F, 0x9D };	/* 1F 9D */
68
 
69
/* Defines for third byte of header */
70
#define BIT_MASK	0x1f
71
#define BLOCK_MASK	0x80
72
/* Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
73
	a fourth header byte (for expansion).
74
*/
75
#define INIT_BITS 9			/* initial number of bits/code */
76
 
77
#define ARGVAL() (*++(*argv) || (--argc && *++argv))
78
 
79
int n_bits;				/* number of bits/code */
80
int maxbits = BITS;			/* user settable max # bits/code */
81
code_int maxcode;			/* maximum code, given n_bits */
82
code_int maxmaxcode = 1 << BITS;	/* should NEVER generate this code */
83
 
84
#define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
85
 
86
count_int htab[HSIZE];
87
ushort codetab[HSIZE];
88
 
89
#define htabof(i)	htab[i]
90
#define codetabof(i)	codetab[i]
91
 
92
code_int hsize = HSIZE;			/* for dynamic table sizing */
93
count_int fsize;
94
 
95
/*
96
 * To save much memory, we overlay the table used by compress() with those
97
 * used by decompress().  The tab_prefix table is the same size and type
98
 * as the codetab.  The tab_suffix table needs 2**BITS characters.  We
99
 * get this from the beginning of htab.  The output stack uses the rest
100
 * of htab, and contains characters.  There is plenty of room for any
101
 * possible stack (stack used to be 8000 characters).
102
 */
103
 
104
#define tab_prefixof(i)	codetabof(i)
105
#define tab_suffixof(i)	((uchar *)(htab))[i]
106
#define de_stack		((uchar *)&tab_suffixof(1<<BITS))
107
 
108
code_int free_ent = 0;			/* first unused entry */
109
int exit_stat = 0;
110
 
111
void	cl_block(void);
112
void	cl_hash(count_int);
113
void	compress(void);
114
void	copystat(char *, char *);
115
void	decompress(void);
116
int	foreground(void);
117
code_int getcode(void);
118
void	onintr(int);
119
void	oops(int);
120
void	output(code_int);
121
void	prratio(FILE *, long, long);
122
void	version(void);
123
void	writeerr(void);
124
 
125
void
126
Usage(void)
127
{
128
#ifdef DEBUG
129
	fprintf(stderr,"Usage: compress [-cdfDV] [-b maxbits] [file ...]\n");
130
#else
131
	fprintf(stderr,"Usage: compress [-cdfvV] [-b maxbits] [file ...]\n");
132
#endif /* DEBUG */
133
}
134
 
135
int debug = 0;
136
int nomagic = 0;	/* Use a 3-byte magic number header, unless old file */
137
int zcat_flg = 0;	/* Write output on stdout, suppress messages */
138
int quiet = 1;		/* don't tell me about compression */
139
 
140
/*
141
 * block compression parameters -- after all codes are used up,
142
 * and compression rate changes, start over.
143
 */
144
int block_compress = BLOCK_MASK;
145
int clear_flg = 0;
146
long ratio = 0;
147
#define CHECK_GAP 10000	/* ratio check interval */
148
count_int checkpoint = CHECK_GAP;
149
/*
150
 * the next two codes should not be changed lightly, as they must not
151
 * lie within the contiguous general code space.
152
 */
153
#define FIRST	257	/* first free entry */
154
#define	CLEAR	256	/* table clear output code */
155
 
156
int force = 0;
157
char ofname [100];
158
#ifdef DEBUG
159
int verbose = 0;
160
#endif /* DEBUG */
161
void (*bgnd_flag)(int);
162
 
163
int do_decomp = 0;
164
 
165
main(argc, argv)
166
int argc;
167
char **argv;
168
{
169
	int overwrite = 0;	/* Do not overwrite unless given -f flag */
170
	char tempname[512];
171
	char **filelist, **fileptr;
172
	char *cp;
173
	struct stat statbuf;
174
 
175
	if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
176
		signal(SIGINT, onintr);
177
		signal(SIGSEGV, oops);
178
	}
179
 
180
	filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
181
	*filelist = NULL;
182
 
183
	if((cp = strrchr(argv[0], '/')) != 0)
184
		cp++;
185
	else
186
		cp = argv[0];
187
	if(strcmp(cp, "uncompress") == 0)
188
		do_decomp = 1;
189
	else if(strcmp(cp, "zcat") == 0) {
190
		do_decomp = 1;
191
		zcat_flg = 1;
192
	}
193
 
194
	/*
195
	 * Argument Processing
196
	 * All flags are optional.
197
	 * -C	generate output compatible with compress 2.0.
198
	 * -D	debug
199
	 * -V	print Version; debug verbose
200
	 * -b maxbits	maxbits.  If -b is specified, then maxbits MUST be
201
	 *		given also.
202
	 * -c	cat all output to stdout
203
	 * -d	do_decomp
204
	 * -f	force overwrite of output file
205
	 * -n	no header: useful to uncompress old files
206
	 * -v	unquiet
207
	 * if a string is left, must be an input filename.
208
	 */
209
	for (argc--, argv++; argc > 0; argc--, argv++) {
210
	if (**argv == '-') {	/* A flag argument */
211
		while (*++(*argv)) {	/* Process all flags in this arg */
212
		switch (**argv) {
213
		case 'C':
214
			block_compress = 0;
215
			break;
216
#ifdef DEBUG
217
		case 'D':
218
			debug = 1;
219
			break;
220
		case 'V':
221
			verbose = 1;
222
			version();
223
			break;
224
#else
225
		case 'V':
226
			version();
227
			break;
228
#endif
229
		case 'b':
230
			if (!ARGVAL()) {
231
				fprintf(stderr, "Missing maxbits\n");
232
				Usage();
233
				exit(1);
234
			}
235
			maxbits = atoi(*argv);
236
			goto nextarg;
237
		case 'c':
238
			zcat_flg = 1;
239
			break;
240
		case 'd':
241
			do_decomp = 1;
242
			break;
243
		case 'f':
244
		case 'F':
245
			overwrite = 1;
246
			force = 1;
247
			break;
248
		case 'n':
249
			nomagic = 1;
250
			break;
251
		case 'q':
252
			quiet = 1;
253
			break;
254
		case 'v':
255
			quiet = 0;
256
			break;
257
		default:
258
			fprintf(stderr, "Unknown flag: '%c'; ", **argv);
259
			Usage();
260
			exit(1);
261
		}
262
		}
263
	} else {			/* Input file name */
264
		*fileptr++ = *argv;	/* Build input file list */
265
		*fileptr = NULL;
266
		/* process nextarg; */
267
	}
268
nextarg:
269
		continue;
270
	}
271
 
272
	if(maxbits < INIT_BITS) maxbits = INIT_BITS;
273
	if (maxbits > BITS) maxbits = BITS;
274
	maxmaxcode = 1 << maxbits;
275
 
276
	if (*filelist != NULL) {
277
	for (fileptr = filelist; *fileptr; fileptr++) {
278
		exit_stat = 0;
279
		if (do_decomp != 0) {			/* DECOMPRESSION */
280
		/* Check for .Z suffix */
281
		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
282
			/* No .Z: tack one on */
283
			strcpy(tempname, *fileptr);
284
			strcat(tempname, ".Z");
285
			*fileptr = tempname;
286
		}
287
		/* Open input file */
288
		if ((freopen(*fileptr, "r", stdin)) == NULL) {
289
			perror(*fileptr);
290
			continue;
291
		}
292
		/* Check the magic number */
293
		if (nomagic == 0) {
294
			if ((getchar() != (magic_header[0] & 0xFF))
295
			|| (getchar() != (magic_header[1] & 0xFF))) {
296
				fprintf(stderr, "%s: not in compressed format\n",
297
					*fileptr);
298
				continue;
299
			}
300
			maxbits = getchar();	/* set -b from file */
301
			block_compress = maxbits & BLOCK_MASK;
302
			maxbits &= BIT_MASK;
303
			maxmaxcode = 1 << maxbits;
304
			if(maxbits > BITS) {
305
				fprintf(stderr,
306
		"%s: compressed with %d bits, can only handle %d bits\n",
307
					*fileptr, maxbits, BITS);
308
				continue;
309
			}
310
		}
311
		/* Generate output filename */
312
		strcpy(ofname, *fileptr);
313
		ofname[strlen(*fileptr) - 2] = '\0';  /* Strip off .Z */
314
		} else {				/* COMPRESSION */
315
		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
316
			fprintf(stderr,
317
				"%s: already has .Z suffix -- no change\n",
318
				*fileptr);
319
			continue;
320
		}
321
		/* Open input file */
322
		if ((freopen(*fileptr, "r", stdin)) == NULL) {
323
			perror(*fileptr);
324
			continue;
325
		}
326
		(void) stat(*fileptr, &statbuf);
327
		fsize = (long) statbuf.st_size;
328
		/*
329
		 * tune hash table size for small files -- ad hoc,
330
		 * but the sizes match earlier #defines, which
331
		 * serve as upper bounds on the number of output codes.
332
		 */
333
		hsize = HSIZE;
334
		if (fsize < (1 << 12))
335
			hsize = min(5003, HSIZE);
336
		else if (fsize < (1 << 13))
337
			hsize = min(9001, HSIZE);
338
		else if (fsize < (1 << 14))
339
			hsize = min (18013, HSIZE);
340
		else if (fsize < (1 << 15))
341
			hsize = min (35023, HSIZE);
342
		else if (fsize < 47000)
343
			hsize = min (50021, HSIZE);
344
 
345
		/* Generate output filename */
346
		strcpy(ofname, *fileptr);
347
#ifndef BSD4_2
348
		if ((cp=strrchr(ofname,'/')) != NULL)
349
			cp++;
350
		else
351
			cp = ofname;
352
		/*
353
		 *** changed 12 to 25;  should be NAMELEN-3, but I don't want
354
		 * to fight the headers.	ehg 5 Nov 92 **
355
		 */
356
		if (strlen(cp) > 25) {
357
			fprintf(stderr, "%s: filename too long to tack on .Z\n",
358
				cp);
359
			continue;
360
		}
361
#endif
362
		strcat(ofname, ".Z");
363
		}
364
		/* Check for overwrite of existing file */
365
		if (overwrite == 0 && zcat_flg == 0 &&
366
		   stat(ofname, &statbuf) == 0) {
367
			char response[2];
368
 
369
			response[0] = 'n';
370
			fprintf(stderr, "%s already exists;", ofname);
371
			if (foreground()) {
372
				fprintf(stderr,
373
				    " do you wish to overwrite %s (y or n)? ",
374
					ofname);
375
				fflush(stderr);
376
				(void) read(2, response, 2);
377
				while (response[1] != '\n')
378
					if (read(2, response+1, 1) < 0) {
379
						/* Ack! */
380
						perror("stderr");
381
						break;
382
					}
383
			}
384
			if (response[0] != 'y') {
385
				fprintf(stderr, "\tnot overwritten\n");
386
				continue;
387
			}
388
		}
389
		if(zcat_flg == 0) {		/* Open output file */
390
			if (freopen(ofname, "w", stdout) == NULL) {
391
				perror(ofname);
392
				continue;
393
			}
394
			if(!quiet)
395
				fprintf(stderr, "%s: ", *fileptr);
396
		}
397
 
398
		/* Actually do the compression/decompression */
399
		if (do_decomp == 0)
400
			compress();
401
#ifndef DEBUG
402
		else
403
			decompress();
404
#else
405
		else if (debug == 0)
406
			decompress();
407
		else
408
			printcodes();
409
		if (verbose)
410
			dump_tab();
411
#endif						/* DEBUG */
412
		if(zcat_flg == 0) {
413
			copystat(*fileptr, ofname);	/* Copy stats */
414
			if (exit_stat == 1 || !quiet)
415
				putc('\n', stderr);
416
		}
417
	}
418
	} else {		/* Standard input */
419
	if (do_decomp == 0) {
420
		compress();
421
#ifdef DEBUG
422
		if(verbose)
423
			dump_tab();
424
#endif
425
		if(!quiet)
426
			putc('\n', stderr);
427
	} else {
428
		/* Check the magic number */
429
		if (nomagic == 0) {
430
		if ((getchar()!=(magic_header[0] & 0xFF))
431
		 || (getchar()!=(magic_header[1] & 0xFF))) {
432
			fprintf(stderr, "stdin: not in compressed format\n");
433
			exit(1);
434
		}
435
		maxbits = getchar();	/* set -b from file */
436
		block_compress = maxbits & BLOCK_MASK;
437
		maxbits &= BIT_MASK;
438
		maxmaxcode = 1 << maxbits;
439
		fsize = 100000;		/* assume stdin large for USERMEM */
440
		if(maxbits > BITS) {
441
			fprintf(stderr,
442
			"stdin: compressed with %d bits, can only handle %d bits\n",
443
			maxbits, BITS);
444
			exit(1);
445
		}
446
		}
447
#ifndef DEBUG
448
		decompress();
449
#else
450
		if (debug == 0)
451
			decompress();
452
		else
453
			printcodes();
454
		if (verbose)
455
			dump_tab();
456
#endif						/* DEBUG */
457
	}
458
	}
459
	exit(exit_stat);
460
	return 0;
461
}
462
 
463
static int offset;
464
long in_count = 1;		/* length of input */
465
long bytes_out;			/* length of compressed output */
466
long out_count = 0;		/* # of codes output (for debugging) */
467
 
468
/*
469
 * compress stdin to stdout
470
 *
471
 * Algorithm:  use open addressing double hashing (no chaining) on the
472
 * prefix code / next character combination.  We do a variant of Knuth's
473
 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
474
 * secondary probe.  Here, the modular division first probe is gives way
475
 * to a faster exclusive-or manipulation.  Also do block compression with
476
 * an adaptive reset, whereby the code table is cleared when the compression
477
 * ratio decreases, but after the table fills.  The variable-length output
478
 * codes are re-sized at this point, and a special CLEAR code is generated
479
 * for the decompressor.  Late addition:  construct the table according to
480
 * file size for noticeable speed improvement on small files.  Please direct
481
 * questions about this implementation to ames!jaw.
482
 */
483
void
484
compress(void)
485
{
486
	code_int ent, hsize_reg;
487
	code_int i;
488
	int c, disp, hshift;
489
	long fcode;
490
 
491
	if (nomagic == 0) {
492
		putchar(magic_header[0]);
493
		putchar(magic_header[1]);
494
		putchar((char)(maxbits | block_compress));
495
		if(ferror(stdout))
496
			writeerr();
497
	}
498
	offset = 0;
499
	bytes_out = 3;		/* includes 3-byte header mojo */
500
	out_count = 0;
501
	clear_flg = 0;
502
	ratio = 0;
503
	in_count = 1;
504
	checkpoint = CHECK_GAP;
505
	maxcode = MAXCODE(n_bits = INIT_BITS);
506
	free_ent = (block_compress? FIRST: 256);
507
 
508
	ent = getchar ();
509
 
510
	hshift = 0;
511
	for (fcode = (long)hsize;  fcode < 65536L; fcode *= 2)
512
		hshift++;
513
	hshift = 8 - hshift;		/* set hash code range bound */
514
 
515
	hsize_reg = hsize;
516
	cl_hash( (count_int) hsize_reg);		/* clear hash table */
517
 
518
	while ((c = getchar()) != EOF) {
519
		in_count++;
520
		fcode = (long) (((long) c << maxbits) + ent);
521
	 	i = ((c << hshift) ^ ent);	/* xor hashing */
522
 
523
		if (htabof (i) == fcode) {
524
			ent = codetabof(i);
525
			continue;
526
		} else if ((long)htabof(i) < 0 )	/* empty slot */
527
			goto nomatch;
528
	 	disp = hsize_reg - i;	/* secondary hash (after G. Knott) */
529
		if (i == 0)
530
			disp = 1;
531
probe:
532
		if ((i -= disp) < 0)
533
			i += hsize_reg;
534
 
535
		if (htabof (i) == fcode) {
536
			ent = codetabof(i);
537
			continue;
538
		}
539
		if ((long)htabof(i) > 0)
540
			goto probe;
541
nomatch:
542
		output((code_int)ent);
543
		out_count++;
544
	 	ent = c;
545
		if (free_ent < maxmaxcode) {
546
	 		codetabof(i) = free_ent++;	/* code -> hashtable */
547
			htabof(i) = fcode;
548
		} else if ((count_int)in_count >= checkpoint && block_compress)
549
			cl_block ();
550
	}
551
	/*
552
	* Put out the final code.
553
	*/
554
	output( (code_int)ent );
555
	out_count++;
556
	output( (code_int)-1 );
557
 
558
	/*
559
	* Print out stats on stderr
560
	*/
561
	if(zcat_flg == 0 && !quiet) {
562
#ifdef DEBUG
563
	fprintf( stderr,
564
		"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
565
		in_count, out_count, bytes_out );
566
	prratio( stderr, in_count, bytes_out );
567
	fprintf( stderr, "\n");
568
	fprintf( stderr, "\tCompression as in compact: " );
569
	prratio( stderr, in_count-bytes_out, in_count );
570
	fprintf( stderr, "\n");
571
	fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
572
		free_ent - 1, n_bits );
573
#else /* !DEBUG */
574
	fprintf( stderr, "Compression: " );
575
	prratio( stderr, in_count-bytes_out, in_count );
576
#endif /* DEBUG */
577
	}
578
	if(bytes_out > in_count)	/* exit(2) if no savings */
579
		exit_stat = 2;
580
}
581
 
582
/*
583
 * TAG( output )
584
 *
585
 * Output the given code.
586
 * Inputs:
587
 * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
588
 *		that n_bits =< (long)wordsize - 1.
589
 * Outputs:
590
 * 	Outputs code to the file.
591
 * Assumptions:
592
 *	Chars are 8 bits long.
593
 * Algorithm:
594
 * 	Maintain a BITS character long buffer (so that 8 codes will
595
 * fit in it exactly).  When the buffer fills up empty it and start over.
596
 */
597
 
598
static char buf[BITS];
599
 
600
uchar lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
601
uchar rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
602
 
603
void
604
output( code )
605
code_int  code;
606
{
607
#ifdef DEBUG
608
	static int col = 0;
609
#endif
610
	int r_off = offset, bits= n_bits;
611
	char *bp = buf;
612
 
613
#ifdef DEBUG
614
	if (verbose)
615
		fprintf(stderr, "%5d%c", code,
616
			(col+=6) >= 74? (col = 0, '\n'): ' ');
617
#endif
618
	if (code >= 0) {
619
		/*
620
		 * byte/bit numbering on the VAX is simulated by the
621
		 * following code
622
		 */
623
		/*
624
		 * Get to the first byte.
625
		 */
626
		bp += (r_off >> 3);
627
		r_off &= 7;
628
		/*
629
		 * Since code is always >= 8 bits, only need to mask the first
630
		 * hunk on the left.
631
		 */
632
		*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
633
		bp++;
634
		bits -=  8 - r_off;
635
		code >>= 8 - r_off;
636
		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
637
		if ( bits >= 8 ) {
638
			*bp++ = code;
639
			code >>= 8;
640
			bits -= 8;
641
		}
642
		/* Last bits. */
643
		if(bits)
644
			*bp = code;
645
 
646
		offset += n_bits;
647
		if ( offset == (n_bits << 3) ) {
648
			bp = buf;
649
			bits = n_bits;
650
			bytes_out += bits;
651
			do {
652
				putchar(*bp++);
653
			} while(--bits);
654
			offset = 0;
655
		}
656
 
657
		/*
658
		 * If the next entry is going to be too big for the code size,
659
		 * then increase it, if possible.
660
		 */
661
		if ( free_ent > maxcode || (clear_flg > 0)) {
662
			/*
663
			* Write the whole buffer, because the input side won't
664
			* discover the size increase until after it has read it.
665
			*/
666
			if ( offset > 0 ) {
667
				if( fwrite( buf, 1, n_bits, stdout ) != n_bits)
668
					writeerr();
669
				bytes_out += n_bits;
670
			}
671
			offset = 0;
672
 
673
			if ( clear_flg ) {
674
				maxcode = MAXCODE (n_bits = INIT_BITS);
675
				clear_flg = 0;
676
			} else {
677
				n_bits++;
678
				if ( n_bits == maxbits )
679
					maxcode = maxmaxcode;
680
				else
681
					maxcode = MAXCODE(n_bits);
682
			}
683
#ifdef DEBUG
684
			if ( debug ) {
685
				fprintf(stderr,
686
					"\nChange to %d bits\n", n_bits);
687
				col = 0;
688
			}
689
#endif
690
		}
691
	} else {
692
		/*
693
		 * At EOF, write the rest of the buffer.
694
		 */
695
		if ( offset > 0 )
696
			fwrite( buf, 1, (offset + 7) / 8, stdout );
697
		bytes_out += (offset + 7) / 8;
698
		offset = 0;
699
		fflush( stdout );
700
#ifdef DEBUG
701
		if ( verbose )
702
			fprintf( stderr, "\n" );
703
#endif
704
		if( ferror( stdout ) )
705
			writeerr();
706
	}
707
}
708
 
709
/*
710
 * Decompress stdin to stdout.  This routine adapts to the codes in the
711
 * file building the "string" table on-the-fly; requiring no table to
712
 * be stored in the compressed file.  The tables used herein are shared
713
 * with those of the compress() routine.  See the definitions above.
714
 */
715
void
716
decompress(void)
717
{
718
	int finchar;
719
	code_int code, oldcode, incode;
720
	uchar *stackp;
721
 
722
	/*
723
	* As above, initialize the first 256 entries in the table.
724
	*/
725
	maxcode = MAXCODE(n_bits = INIT_BITS);
726
	for (code = 255; code >= 0; code--) {
727
		tab_prefixof(code) = 0;
728
		tab_suffixof(code) = (uchar)code;
729
	}
730
	free_ent = (block_compress? FIRST: 256);
731
 
732
	finchar = oldcode = getcode();
733
	if(oldcode == -1)		/* EOF already? */
734
		return;			/* Get out of here */
735
	putchar((char)finchar);		/* first code must be 8 bits = char */
736
	if(ferror(stdout))		/* Crash if can't write */
737
		writeerr();
738
	stackp = de_stack;
739
 
740
	while ((code = getcode()) > -1) {
741
		if ((code == CLEAR) && block_compress) {
742
			for (code = 255; code >= 0; code--)
743
				tab_prefixof(code) = 0;
744
			clear_flg = 1;
745
			free_ent = FIRST - 1;
746
			if ((code = getcode()) == -1)	/* O, untimely death! */
747
				break;
748
		}
749
		incode = code;
750
		/*
751
		 * Special case for KwKwK string.
752
		 */
753
		if (code >= free_ent) {
754
			*stackp++ = finchar;
755
			code = oldcode;
756
		}
757
 
758
		/*
759
		 * Generate output characters in reverse order
760
		 */
761
		while (code >= 256) {
762
			*stackp++ = tab_suffixof(code);
763
			code = tab_prefixof(code);
764
		}
765
		*stackp++ = finchar = tab_suffixof(code);
766
 
767
		/*
768
		 * And put them out in forward order
769
		 */
770
		do {
771
			putchar(*--stackp);
772
		} while (stackp > de_stack);
773
 
774
		/*
775
		 * Generate the new entry.
776
		 */
777
		if ( (code=free_ent) < maxmaxcode ) {
778
			tab_prefixof(code) = (ushort)oldcode;
779
			tab_suffixof(code) = finchar;
780
			free_ent = code+1;
781
		}
782
		/*
783
		 * Remember previous code.
784
		 */
785
		oldcode = incode;
786
	}
787
	fflush(stdout);
788
	if(ferror(stdout))
789
		writeerr();
790
}
791
 
792
/*
793
 * TAG( getcode )
794
 *
795
 * Read one code from the standard input.  If EOF, return -1.
796
 * Inputs:
797
 * 	stdin
798
 * Outputs:
799
 * 	code or -1 is returned.
800
 */
801
code_int
802
getcode()
803
{
804
	int r_off, bits;
805
	code_int code;
806
	static int offset = 0, size = 0;
807
	static uchar buf[BITS];
808
	uchar *bp = buf;
809
 
810
	if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
811
		/*
812
		 * If the next entry will be too big for the current code
813
		 * size, then we must increase the size.  This implies reading
814
		 * a new buffer full, too.
815
		 */
816
		if ( free_ent > maxcode ) {
817
			n_bits++;
818
			if ( n_bits == maxbits )
819
				maxcode = maxmaxcode; /* won't get any bigger now */
820
			else
821
				maxcode = MAXCODE(n_bits);
822
		}
823
		if ( clear_flg > 0) {
824
			maxcode = MAXCODE(n_bits = INIT_BITS);
825
			clear_flg = 0;
826
		}
827
		size = fread(buf, 1, n_bits, stdin);
828
		if (size <= 0)
829
			return -1;			/* end of file */
830
		offset = 0;
831
		/* Round size down to integral number of codes */
832
		size = (size << 3) - (n_bits - 1);
833
	}
834
	r_off = offset;
835
	bits = n_bits;
836
	/*
837
	 * Get to the first byte.
838
	 */
839
	bp += (r_off >> 3);
840
	r_off &= 7;
841
	/* Get first part (low order bits) */
842
	code = (*bp++ >> r_off);
843
	bits -= (8 - r_off);
844
	r_off = 8 - r_off;		/* now, offset into code word */
845
	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
846
	if (bits >= 8) {
847
		code |= *bp++ << r_off;
848
		r_off += 8;
849
		bits -= 8;
850
	}
851
	/* high order bits. */
852
	code |= (*bp & rmask[bits]) << r_off;
853
	offset += n_bits;
854
	return code;
855
}
856
 
857
#ifdef DEBUG
858
printcodes()
859
{
860
	/*
861
	* Just print out codes from input file.  For debugging.
862
	*/
863
	code_int code;
864
	int col = 0, bits;
865
 
866
	bits = n_bits = INIT_BITS;
867
	maxcode = MAXCODE(n_bits);
868
	free_ent = ((block_compress) ? FIRST : 256 );
869
	while ( ( code = getcode() ) >= 0 ) {
870
	if ( (code == CLEAR) && block_compress ) {
871
			free_ent = FIRST - 1;
872
			clear_flg = 1;
873
	}
874
	else if ( free_ent < maxmaxcode )
875
		free_ent++;
876
	if ( bits != n_bits ) {
877
		fprintf(stderr, "\nChange to %d bits\n", n_bits );
878
		bits = n_bits;
879
		col = 0;
880
	}
881
	fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
882
	}
883
	putc( '\n', stderr );
884
	exit( 0 );
885
}
886
 
887
code_int sorttab[1<<BITS];	/* sorted pointers into htab */
888
 
889
#define STACK_SIZE	15000
890
 
891
dump_tab()	/* dump string table */
892
{
893
	int i, first, c, ent;
894
	int stack_top = STACK_SIZE;
895
 
896
	if(do_decomp == 0) {	/* compressing */
897
	int flag = 1;
898
 
899
	for(i=0; i<hsize; i++) {	/* build sort pointers */
900
		if((long)htabof(i) >= 0) {
901
			sorttab[codetabof(i)] = i;
902
		}
903
	}
904
	first = block_compress ? FIRST : 256;
905
	for(i = first; i < free_ent; i++) {
906
		fprintf(stderr, "%5d: \"", i);
907
		de_stack[--stack_top] = '\n';
908
		de_stack[--stack_top] = '"';
909
		stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff,
910
	stack_top);
911
		for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1);
912
			ent > 256;
913
			ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) {
914
			stack_top = in_stack(htabof(sorttab[ent]) >> maxbits,
915
						stack_top);
916
		}
917
		stack_top = in_stack(ent, stack_top);
918
		fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr);
919
			stack_top = STACK_SIZE;
920
	}
921
	} else if(!debug) {	/* decompressing */
922
 
923
	for ( i = 0; i < free_ent; i++ ) {
924
		ent = i;
925
		c = tab_suffixof(ent);
926
		if ( isascii(c) && isprint(c) )
927
		fprintf( stderr, "%5d: %5d/'%c'  \"",
928
				ent, tab_prefixof(ent), c );
929
		else
930
		fprintf( stderr, "%5d: %5d/\\%03o \"",
931
				ent, tab_prefixof(ent), c );
932
		de_stack[--stack_top] = '\n';
933
		de_stack[--stack_top] = '"';
934
		for ( ; ent != NULL;
935
			ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) {
936
		stack_top = in_stack(tab_suffixof(ent), stack_top);
937
		}
938
		fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr );
939
		stack_top = STACK_SIZE;
940
	}
941
	}
942
}
943
 
944
int
945
in_stack(int c, int stack_top)
946
{
947
	if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
948
		de_stack[--stack_top] = c;
949
	} else {
950
		switch( c ) {
951
		case '\n': de_stack[--stack_top] = 'n'; break;
952
		case '\t': de_stack[--stack_top] = 't'; break;
953
		case '\b': de_stack[--stack_top] = 'b'; break;
954
		case '\f': de_stack[--stack_top] = 'f'; break;
955
		case '\r': de_stack[--stack_top] = 'r'; break;
956
		case '\\': de_stack[--stack_top] = '\\'; break;
957
		default:
958
	 	de_stack[--stack_top] = '0' + c % 8;
959
	 	de_stack[--stack_top] = '0' + (c / 8) % 8;
960
	 	de_stack[--stack_top] = '0' + c / 64;
961
	 	break;
962
		}
963
		de_stack[--stack_top] = '\\';
964
	}
965
	return stack_top;
966
}
967
#endif /* DEBUG */
968
 
969
void
970
writeerr(void)
971
{
972
	perror(ofname);
973
	unlink(ofname);
974
	exit(1);
975
}
976
 
977
void
978
copystat(ifname, ofname)
979
char *ifname, *ofname;
980
{
981
	int mode;
982
	time_t timep[2];			/* should be struct utimbuf */
983
	struct stat statbuf;
984
 
985
	fclose(stdout);
986
	if (stat(ifname, &statbuf)) {		/* Get stat on input file */
987
		perror(ifname);
988
		return;
989
	}
990
	if (!S_ISREG(statbuf.st_mode)) {
991
		if (quiet)
992
			fprintf(stderr, "%s: ", ifname);
993
		fprintf(stderr, " -- not a regular file: unchanged");
994
		exit_stat = 1;
995
	} else if (exit_stat == 2 && !force) {
996
		/* No compression: remove file.Z */
997
		if (!quiet)
998
			fprintf(stderr, " -- file unchanged");
999
	} else {			/* Successful Compression */
1000
		exit_stat = 0;
1001
		mode = statbuf.st_mode & 0777;
1002
		if (chmod(ofname, mode))		/* Copy modes */
1003
			perror(ofname);
1004
		/* Copy ownership */
1005
		chown(ofname, statbuf.st_uid, statbuf.st_gid);
1006
		timep[0] = statbuf.st_atime;
1007
		timep[1] = statbuf.st_mtime;
1008
		/* Update last accessed and modified times */
1009
		utime(ofname, (struct utimbuf *)timep);
1010
//		if (unlink(ifname))	/* Remove input file */
1011
//			perror(ifname);
1012
		return;			/* success */
1013
	}
1014
 
1015
	/* Unsuccessful return -- one of the tests failed */
1016
	if (unlink(ofname))
1017
		perror(ofname);
1018
}
1019
 
1020
/*
1021
 * This routine returns 1 if we are running in the foreground and stderr
1022
 * is a tty.
1023
 */
1024
int
1025
foreground(void)
1026
{
1027
	if(bgnd_flag)			/* background? */
1028
		return 0;
1029
	else				/* foreground */
1030
		return isatty(2);	/* and stderr is a tty */
1031
}
1032
 
1033
void
1034
onintr(int x)
1035
{
1036
	USED(x);
1037
	unlink(ofname);
1038
	exit(1);
1039
}
1040
 
1041
void
1042
oops(int x)		/* wild pointer -- assume bad input */
1043
{
1044
	USED(x);
1045
	if (do_decomp == 1)
1046
		fprintf(stderr, "uncompress: corrupt input\n");
1047
	unlink(ofname);
1048
	exit(1);
1049
}
1050
 
1051
void
1052
cl_block(void)		/* table clear for block compress */
1053
{
1054
	long rat;
1055
 
1056
	checkpoint = in_count + CHECK_GAP;
1057
#ifdef DEBUG
1058
	if ( debug ) {
1059
		fprintf ( stderr, "count: %ld, ratio: ", in_count );
1060
		prratio ( stderr, in_count, bytes_out );
1061
		fprintf ( stderr, "\n");
1062
	}
1063
#endif /* DEBUG */
1064
 
1065
	if (in_count > 0x007fffff) {	/* shift will overflow */
1066
		rat = bytes_out >> 8;
1067
		if (rat == 0)		/* Don't divide by zero */
1068
			rat = 0x7fffffff;
1069
		else
1070
			rat = in_count / rat;
1071
	} else
1072
		rat = (in_count << 8) / bytes_out;	/* 8 fractional bits */
1073
	if (rat > ratio)
1074
		ratio = rat;
1075
	else {
1076
		ratio = 0;
1077
#ifdef DEBUG
1078
		if (verbose)
1079
			dump_tab();	/* dump string table */
1080
#endif
1081
		cl_hash((count_int)hsize);
1082
		free_ent = FIRST;
1083
		clear_flg = 1;
1084
		output((code_int)CLEAR);
1085
#ifdef DEBUG
1086
		if (debug)
1087
			fprintf(stderr, "clear\n");
1088
#endif /* DEBUG */
1089
	}
1090
}
1091
 
1092
void
1093
cl_hash(count_int hsize)		/* reset code table */
1094
{
1095
	count_int *htab_p = htab+hsize;
1096
	long i;
1097
	long m1 = -1;
1098
 
1099
	i = hsize - 16;
1100
 	do {				/* might use Sys V memset(3) here */
1101
		*(htab_p-16) = m1;
1102
		*(htab_p-15) = m1;
1103
		*(htab_p-14) = m1;
1104
		*(htab_p-13) = m1;
1105
		*(htab_p-12) = m1;
1106
		*(htab_p-11) = m1;
1107
		*(htab_p-10) = m1;
1108
		*(htab_p-9) = m1;
1109
		*(htab_p-8) = m1;
1110
		*(htab_p-7) = m1;
1111
		*(htab_p-6) = m1;
1112
		*(htab_p-5) = m1;
1113
		*(htab_p-4) = m1;
1114
		*(htab_p-3) = m1;
1115
		*(htab_p-2) = m1;
1116
		*(htab_p-1) = m1;
1117
		htab_p -= 16;
1118
	} while ((i -= 16) >= 0);
1119
	for ( i += 16; i > 0; i-- )
1120
		*--htab_p = m1;
1121
}
1122
 
1123
void
1124
prratio(stream, num, den)
1125
FILE *stream;
1126
long num, den;
1127
{
1128
	int q;				/* Doesn't need to be long */
1129
 
1130
	if(num > 214748L)		/* 2147483647/10000 */
1131
		q = num / (den / 10000L);
1132
	else
1133
		q = 10000L * num / den;	/* Long calculations, though */
1134
	if (q < 0) {
1135
		putc('-', stream);
1136
		q = -q;
1137
	}
1138
	fprintf(stream, "%d.%02d%%", q / 100, q % 100);
1139
}
1140
 
1141
void
1142
version(void)
1143
{
1144
	fprintf(stderr, "%s\n", rcs_ident);
1145
	fprintf(stderr, "Options: ");
1146
#ifdef DEBUG
1147
	fprintf(stderr, "DEBUG, ");
1148
#endif
1149
#ifdef BSD4_2
1150
	fprintf(stderr, "BSD4_2, ");
1151
#endif
1152
	fprintf(stderr, "BITS = %d\n", BITS);
1153
}
1154
 
1155
/*
1156
 * The revision-history novel:
1157
 *
1158
 * $Header: compress.c,v 4.0 85/07/30 12:50:00 joe Release $
1159
 * $Log:	compress.c,v $
1160
 * Revision 4.0  85/07/30  12:50:00  joe
1161
 * Removed ferror() calls in output routine on every output except first.
1162
 * Prepared for release to the world.
1163
 *
1164
 * Revision 3.6  85/07/04  01:22:21  joe
1165
 * Remove much wasted storage by overlaying hash table with the tables
1166
 * used by decompress: tab_suffix[1<<BITS], stack[8000].  Updated USERMEM
1167
 * computations.  Fixed dump_tab() DEBUG routine.
1168
 *
1169
 * Revision 3.5  85/06/30  20:47:21  jaw
1170
 * Change hash function to use exclusive-or.  Rip out hash cache.  These
1171
 * speedups render the megamemory version defunct, for now.  Make decoder
1172
 * stack global.  Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
1173
 *
1174
 * Revision 3.4  85/06/27  12:00:00  ken
1175
 * Get rid of all floating-point calculations by doing all compression ratio
1176
 * calculations in fixed point.
1177
 *
1178
 * Revision 3.3  85/06/24  21:53:24  joe
1179
 * Incorporate portability suggestion for M_XENIX.  Got rid of text on #else
1180
 * and #endif lines.  Cleaned up #ifdefs for vax and interdata.
1181
 *
1182
 * Revision 3.2  85/06/06  21:53:24  jaw
1183
 * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
1184
 * Default to "quiet" output (no compression statistics).
1185
 *
1186
 * Revision 3.1  85/05/12  18:56:13  jaw
1187
 * Integrate decompress() stack speedups (from early pointer mods by McKie).
1188
 * Repair multi-file USERMEM gaffe.  Unify 'force' flags to mimic semantics
1189
 * of SVR2 'pack'.  Streamline block-compress table clear logic.  Increase
1190
 * output byte count by magic number size.
1191
 *
1192
 * Revision 3.0	84/11/27  11:50:00  petsd!joe
1193
 * Set HSIZE depending on BITS.  Set BITS depending on USERMEM.  Unrolled
1194
 * loops in clear routines.  Added "-C" flag for 2.0 compatibility.  Used
1195
 * unsigned compares on Perkin-Elmer.  Fixed foreground check.
1196
 *
1197
 * Revision 2.7	84/11/16  19:35:39  ames!jaw
1198
 * Cache common hash codes based on input statistics; this improves
1199
 * performance for low-density raster images.  Pass on #ifdef bundle
1200
 * from Turkowski.
1201
 *
1202
 * Revision 2.6	84/11/05  19:18:21  ames!jaw
1203
 * Vary size of hash tables to reduce time for small files.
1204
 * Tune PDP-11 hash function.
1205
 *
1206
 * Revision 2.5	84/10/30  20:15:14  ames!jaw
1207
 * Junk chaining; replace with the simpler (and, on the VAX, faster)
1208
 * double hashing, discussed within.  Make block compression standard.
1209
 *
1210
 * Revision 2.4	84/10/16  11:11:11  ames!jaw
1211
 * Introduce adaptive reset for block compression, to boost the rate
1212
 * another several percent.  (See mailing list notes.)
1213
 *
1214
 * Revision 2.3	84/09/22  22:00:00  petsd!joe
1215
 * Implemented "-B" block compress.  Implemented REVERSE sorting of tab_next.
1216
 * Bug fix for last bits.  Changed fwrite to putchar loop everywhere.
1217
 *
1218
 * Revision 2.2	84/09/18  14:12:21  ames!jaw
1219
 * Fold in news changes, small machine typedef from thomas,
1220
 * #ifdef interdata from joe.
1221
 *
1222
 * Revision 2.1	84/09/10  12:34:56  ames!jaw
1223
 * Configured fast table lookup for 32-bit machines.
1224
 * This cuts user time in half for b <= FBITS, and is useful for news batching
1225
 * from VAX to PDP sites.  Also sped up decompress() [fwrite->putc] and
1226
 * added signal catcher [plus beef in writeerr()] to delete effluvia.
1227
 *
1228
 * Revision 2.0	84/08/28  22:00:00  petsd!joe
1229
 * Add check for foreground before prompting user.  Insert maxbits into
1230
 * compressed file.  Force file being uncompressed to end with ".Z".
1231
 * Added "-c" flag and "zcat".  Prepared for release.
1232
 *
1233
 * Revision 1.10  84/08/24  18:28:00  turtlevax!ken
1234
 * Will only compress regular files (no directories), added a magic number
1235
 * header (plus an undocumented -n flag to handle old files without headers),
1236
 * added -f flag to force overwriting of possibly existing destination file,
1237
 * otherwise the user is prompted for a response.  Will tack on a .Z to a
1238
 * filename if it doesn't have one when decompressing.  Will only replace
1239
 * file if it was compressed.
1240
 *
1241
 * Revision 1.9  84/08/16  17:28:00  turtlevax!ken
1242
 * Removed scanargs(), getopt(), added .Z extension and unlimited number of
1243
 * filenames to compress.  Flags may be clustered (-Ddvb12) or separated
1244
 * (-D -d -v -b 12), or combination thereof.  Modes and other status is
1245
 * copied with copystat().  -O bug for 4.2 seems to have disappeared with
1246
 * 1.8.
1247
 *
1248
 * Revision 1.8  84/08/09  23:15:00  joe
1249
 * Made it compatible with vax version, installed jim's fixes/enhancements
1250
 *
1251
 * Revision 1.6  84/08/01  22:08:00  joe
1252
 * Sped up algorithm significantly by sorting the compress chain.
1253
 *
1254
 * Revision 1.5  84/07/13  13:11:00  srd
1255
 * Added C version of vax asm routines.  Changed structure to arrays to
1256
 * save much memory.  Do unsigned compares where possible (faster on
1257
 * Perkin-Elmer)
1258
 *
1259
 * Revision 1.4  84/07/05  03:11:11  thomas
1260
 * Clean up the code a little and lint it.  (Lint complains about all
1261
 * the regs used in the asm, but I'm not going to "fix" this.)
1262
 *
1263
 * Revision 1.3  84/07/05  02:06:54  thomas
1264
 * Minor fixes.
1265
 *
1266
 * Revision 1.2  84/07/05  00:27:27  thomas
1267
 * Add variable bit length output.
1268
 */