Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include	<u.h>
2
#include	<libc.h>
3
#include	<bio.h>
4
 
5
/*
6
bugs:
7
	00/ff for end of file can conflict with 00/ff characters
8
*/
9
 
10
enum
11
{
12
	Nline	= 100000,		/* default max number of lines saved in memory */
13
	Nmerge	= 10,			/* max number of temporary files merged */
14
	Nfield	= 20,			/* max number of argument fields */
15
 
16
	Bflag	= 1<<0,			/* flags per field */
17
	B1flag	= 1<<1,
18
 
19
	Dflag	= 1<<2,
20
	Fflag	= 1<<3,
21
	Gflag	= 1<<4,
22
	Iflag	= 1<<5,
23
	Mflag	= 1<<6,
24
	Nflag	= 1<<7,
25
	Rflag	= 1<<8,
26
	Wflag	= 1<<9,
27
 
28
	NSstart	= 0,			/* states for number to key decoding */
29
	NSsign,
30
	NSzero,
31
	NSdigit,
32
	NSpoint,
33
	NSfract,
34
	NSzerofract,
35
	NSexp,
36
	NSexpsign,
37
	NSexpdigit,
38
};
39
 
40
typedef	struct	Line	Line;
41
typedef	struct	Key	Key;
42
typedef	struct	Merge	Merge;
43
typedef	struct	Field	Field;
44
 
45
struct	Line
46
{
47
	Key*	key;
48
	int	llen;		/* always >= 1 */
49
	uchar	line[1];	/* always ends in '\n' */
50
};
51
 
52
struct	Merge
53
{
54
	Key*	key;		/* copy of line->key so (Line*) looks like (Merge*) */
55
	Line*	line;		/* line at the head of a merged temp file */
56
	int	fd;		/* file descriptor */
57
	Biobuf	b;		/* iobuf for reading a temp file */
58
};
59
 
60
struct	Key
61
{
62
	int	klen;
63
	uchar	key[1];
64
};
65
 
66
struct	Field
67
{
68
	int	beg1;
69
	int	beg2;
70
	int	end1;
71
	int	end2;
72
 
73
	long	flags;
74
	uchar	mapto[1+255];
75
 
76
	void	(*dokey)(Key*, uchar*, uchar*, Field*);
77
};
78
 
79
struct args
80
{
81
	char*	ofile;
82
	char*	tname;
83
	Rune	tabchar;
84
	char	cflag;
85
	char	uflag;
86
	char	vflag;
87
	int	nfield;
88
	int	nfile;
89
	Field	field[Nfield];
90
 
91
	Line**	linep;
92
	long	nline;			/* number of lines in this temp file */
93
	long	lineno;			/* overall ordinal for -s option */
94
	int	ntemp;
95
	long	mline;			/* max lines per file */
96
} args;
97
 
98
extern	Rune*	month[12];
99
 
100
void	buildkey(Line*);
101
void	doargs(int, char*[]);
102
void	dofield(char*, int*, int*, int, int);
103
void	dofile(Biobuf*);
104
void	dokey_(Key*, uchar*, uchar*, Field*);
105
void	dokey_dfi(Key*, uchar*, uchar*, Field*);
106
void	dokey_gn(Key*, uchar*, uchar*, Field*);
107
void	dokey_m(Key*, uchar*, uchar*, Field*);
108
void	dokey_r(Key*, uchar*, uchar*, Field*);
109
void	done(char*);
110
int	kcmp(Key*, Key*);
111
void	makemapd(Field*);
112
void	makemapm(Field*);
113
void	mergefiles(int, int, Biobuf*);
114
void	mergeout(Biobuf*);
115
void	newfield(void);
116
Line*	newline(Biobuf*);
117
void	nomem(void);
118
void	notifyf(void*, char*);
119
void	printargs(void);
120
void	printout(Biobuf*);
121
void	setfield(int, int);
122
uchar*	skip(uchar*, int, int, int, int);
123
void	sort4(void*, ulong);
124
char*	tempfile(int);
125
void	tempout(void);
126
void	lineout(Biobuf*, Line*);
127
 
128
void
129
main(int argc, char *argv[])
130
{
131
	int i, f;
132
	char *s;
133
	Biobuf bbuf;
134
 
135
	notify(notifyf);	/**/
136
	doargs(argc, argv);
137
	if(args.vflag)
138
		printargs();
139
 
140
	for(i=1; i<argc; i++) {
141
		s = argv[i];
142
		if(s == 0)
143
			continue;
144
		if(strcmp(s, "-") == 0) {
145
			Binit(&bbuf, 0, OREAD);
146
			dofile(&bbuf);
147
			Bterm(&bbuf);
148
			continue;
149
		}
150
		f = open(s, OREAD);
151
		if(f < 0) {
152
			fprint(2, "sort: open %s: %r\n", s);
153
			done("open");
154
		}
155
		Binit(&bbuf, f, OREAD);
156
		dofile(&bbuf);
157
		Bterm(&bbuf);
158
		close(f);
159
	}
160
	if(args.nfile == 0) {
161
		Binit(&bbuf, 0, OREAD);
162
		dofile(&bbuf);
163
		Bterm(&bbuf);
164
	}
165
	if(args.cflag)
166
		done(0);
167
	if(args.vflag)
168
		fprint(2, "=========\n");
169
 
170
	f = 1;
171
	if(args.ofile) {
172
		f = create(args.ofile, OWRITE, 0666);
173
		if(f < 0) {
174
			fprint(2, "sort: create %s: %r\n", args.ofile);
175
			done("create");
176
		}
177
	}
178
 
179
	Binit(&bbuf, f, OWRITE);
180
	if(args.ntemp) {
181
		tempout();
182
		mergeout(&bbuf);
183
	} else {
184
		printout(&bbuf);
185
	}
186
	Bterm(&bbuf);
187
	done(0);
188
}
189
 
190
void
191
dofile(Biobuf *b)
192
{
193
	Line *l, *ol;
194
	int n;
195
 
196
	if(args.cflag) {
197
		ol = newline(b);
198
		if(ol == 0)
199
			return;
200
		for(;;) {
201
			l = newline(b);
202
			if(l == 0)
203
				break;
204
			n = kcmp(ol->key, l->key);
205
			if(n > 0 || (n == 0 && args.uflag)) {
206
				fprint(2, "sort: -c file not in sort\n"); /**/
207
				done("order");
208
			}
209
			free(ol->key);
210
			free(ol);
211
			ol = l;
212
		}
213
		return;
214
	}
215
 
216
	if(args.linep == 0) {
217
		args.linep = malloc(args.mline * sizeof(args.linep));
218
		if(args.linep == 0)
219
			nomem();
220
	}
221
	for(;;) {
222
		l = newline(b);
223
		if(l == 0)
224
			break;
225
		if(args.nline >= args.mline)
226
			tempout();
227
		args.linep[args.nline] = l;
228
		args.nline++;
229
		args.lineno++;
230
	}
231
}
232
 
233
void
234
notifyf(void*, char *s)
235
{
236
 
237
	if(strcmp(s, "interrupt") == 0)
238
		done(0);
239
	if(strcmp(s, "hangup") == 0)
240
		done(0);
241
	if(strcmp(s, "kill") == 0)
242
		done(0);
243
	if(strncmp(s, "sys: write on closed pipe", 25) == 0)
244
		done(0);
245
	fprint(2, "sort: note: %s\n", s);
246
	abort();
247
}
248
 
249
Line*
250
newline(Biobuf *b)
251
{
252
	Line *l;
253
	char *p;
254
	int n, c;
255
 
256
	p = Brdline(b, '\n');
257
	n = Blinelen(b);
258
	if(p == 0) {
259
		if(n == 0)
260
			return 0;
261
		l = 0;
262
		for(n=0;;) {
263
			if((n & 31) == 0) {
264
				l = realloc(l, sizeof(Line) +
265
					(n+31)*sizeof(l->line[0]));
266
				if(l == 0)
267
					nomem();
268
			}
269
			c = Bgetc(b);
270
			if(c < 0) {
271
				fprint(2, "sort: newline added\n");
272
				c = '\n';
273
			}
274
			l->line[n++] = c;
275
			if(c == '\n')
276
				break;
277
		}
278
		l->llen = n;
279
		buildkey(l);
280
		return l;
281
	}
282
	l = malloc(sizeof(Line) +
283
		(n-1)*sizeof(l->line[0]));
284
	if(l == 0)
285
		nomem();
286
	l->llen = n;
287
	memmove(l->line, p, n);
288
	buildkey(l);
289
	return l;
290
}
291
 
292
void
293
lineout(Biobuf *b, Line *l)
294
{
295
	int n, m;
296
 
297
	n = l->llen;
298
	m = Bwrite(b, l->line, n);
299
	if(n != m)
300
		exits("write");
301
}
302
 
303
void
304
tempout(void)
305
{
306
	long n;
307
	Line **lp, *l;
308
	char *tf;
309
	int f;
310
	Biobuf tb;
311
 
312
	sort4(args.linep, args.nline);
313
	tf = tempfile(args.ntemp);
314
	args.ntemp++;
315
	f = create(tf, OWRITE, 0666);
316
	if(f < 0) {
317
		fprint(2, "sort: create %s: %r\n", tf);
318
		done("create");
319
	}
320
 
321
	Binit(&tb, f, OWRITE);
322
	lp = args.linep;
323
	for(n=args.nline; n>0; n--) {
324
		l = *lp++;
325
		lineout(&tb, l);
326
		free(l->key);
327
		free(l);
328
	}
329
	args.nline = 0;
330
	Bterm(&tb);
331
	close(f);
332
}
333
 
334
void
335
done(char *xs)
336
{
337
	int i;
338
 
339
	for(i=0; i<args.ntemp; i++)
340
		remove(tempfile(i));
341
	exits(xs);
342
}
343
 
344
void
345
nomem(void)
346
{
347
	fprint(2, "sort: out of memory\n");
348
	done("mem");
349
}
350
 
351
char*
352
tempfile(int n)
353
{
354
	static char file[100];
355
	static uint pid;
356
	char *dir;
357
 
358
	dir = "/tmp";
359
	if(args.tname)
360
		dir = args.tname;
361
	if(strlen(dir) >= nelem(file)-20) {
362
		fprint(2, "temp file directory name is too long: %s\n", dir);
363
		done("tdir");
364
	}
365
 
366
	if(pid == 0) {
367
		pid = getpid();
368
		if(pid == 0) {
369
			pid = time(0);
370
			if(pid == 0)
371
				pid = 1;
372
		}
373
	}
374
 
375
	sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
376
	return file;
377
}
378
 
379
void
380
mergeout(Biobuf *b)
381
{
382
	int n, i, f;
383
	char *tf;
384
	Biobuf tb;
385
 
386
	for(i=0; i<args.ntemp; i+=n) {
387
		n = args.ntemp - i;
388
		if(n > Nmerge) {
389
			tf = tempfile(args.ntemp);
390
			args.ntemp++;
391
			f = create(tf, OWRITE, 0666);
392
			if(f < 0) {
393
				fprint(2, "sort: create %s: %r\n", tf);
394
				done("create");
395
			}
396
			Binit(&tb, f, OWRITE);
397
 
398
			n = Nmerge;
399
			mergefiles(i, n, &tb);
400
 
401
			Bterm(&tb);
402
			close(f);
403
		} else
404
			mergefiles(i, n, b);
405
	}
406
}
407
 
408
void
409
mergefiles(int t, int n, Biobuf *b)
410
{
411
	Merge *m, *mp, **mmp;
412
	Key *ok;
413
	Line *l;
414
	char *tf;
415
	int i, f, nn;
416
 
417
	mmp = malloc(n*sizeof(*mmp));
418
	mp = malloc(n*sizeof(*mp));
419
	if(mmp == 0 || mp == 0)
420
		nomem();
421
 
422
	nn = 0;
423
	m = mp;
424
	for(i=0; i<n; i++,m++) {
425
		tf = tempfile(t+i);
426
		f = open(tf, OREAD);
427
		if(f < 0) {
428
			fprint(2, "sort: reopen %s: %r\n", tf);
429
			done("open");
430
		}
431
		m->fd = f;
432
		Binit(&m->b, f, OREAD);
433
		mmp[nn] = m;
434
 
435
		l = newline(&m->b);
436
		if(l == 0)
437
			continue;
438
		nn++;
439
		m->line = l;
440
		m->key = l->key;
441
	}
442
 
443
	ok = 0;
444
	for(;;) {
445
		sort4(mmp, nn);
446
		m = *mmp;
447
		if(nn == 0)
448
			break;
449
		for(;;) {
450
			l = m->line;
451
			if(args.uflag && ok && kcmp(ok, l->key) == 0) {
452
				free(l->key);
453
				free(l);
454
			} else {
455
				lineout(b, l);
456
				if(ok)
457
					free(ok);
458
				ok = l->key;
459
				free(l);
460
			}
461
 
462
			l = newline(&m->b);
463
			if(l == 0) {
464
				nn--;
465
				mmp[0] = mmp[nn];
466
				break;
467
			}
468
			m->line = l;
469
			m->key = l->key;
470
			if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
471
				break;
472
		}
473
	}
474
	if(ok)
475
		free(ok);
476
 
477
	m = mp;
478
	for(i=0; i<n; i++,m++) {
479
		Bterm(&m->b);
480
		close(m->fd);
481
	}
482
 
483
	free(mp);
484
	free(mmp);
485
}
486
 
487
int
488
kcmp(Key *ka, Key *kb)
489
{
490
	int n, m;
491
 
492
	/*
493
	 * set n to length of smaller key
494
	 */
495
	n = ka->klen;
496
	m = kb->klen;
497
	if(n > m)
498
		n = m;
499
	return memcmp(ka->key, kb->key, n);
500
}
501
 
502
void
503
printout(Biobuf *b)
504
{
505
	long n;
506
	Line **lp, *l;
507
	Key *ok;
508
 
509
	sort4(args.linep, args.nline);
510
	lp = args.linep;
511
	ok = 0;
512
	for(n=args.nline; n>0; n--) {
513
		l = *lp++;
514
		if(args.uflag && ok && kcmp(ok, l->key) == 0)
515
			continue;
516
		lineout(b, l);
517
		ok = l->key;
518
	}
519
}
520
 
521
void
522
setfield(int n, int c)
523
{
524
	Field *f;
525
 
526
	f = &args.field[n];
527
	switch(c) {
528
	default:
529
		fprint(2, "sort: unknown option: field.%C\n", c);
530
		done("option");
531
	case 'b':	/* skip blanks */
532
		f->flags |= Bflag;
533
		break;
534
	case 'd':	/* directory order */
535
		f->flags |= Dflag;
536
		break;
537
	case 'f':	/* fold case */
538
		f->flags |= Fflag;
539
		break;
540
	case 'g':	/* floating point -n case */
541
		f->flags |= Gflag;
542
		break;
543
	case 'i':	/* ignore non-ascii */
544
		f->flags |= Iflag;
545
		break;
546
	case 'M':	/* month */
547
		f->flags |= Mflag;
548
		break;
549
	case 'n':	/* numbers */
550
		f->flags |= Nflag;
551
		break;
552
	case 'r':	/* reverse */
553
		f->flags |= Rflag;
554
		break;
555
	case 'w':	/* ignore white */
556
		f->flags |= Wflag;
557
		break;
558
	}
559
}
560
 
561
void
562
dofield(char *s, int *n1, int *n2, int off1, int off2)
563
{
564
	int c, n;
565
 
566
	c = *s++;
567
	if(c >= '0' && c <= '9') {
568
		n = 0;
569
		while(c >= '0' && c <= '9') {
570
			n = n*10 + (c-'0');
571
			c = *s++;
572
		}
573
		n -= off1;	/* posix committee: rot in hell */
574
		if(n < 0) {
575
			fprint(2, "sort: field offset must be positive\n");
576
			done("option");
577
		}
578
		*n1 = n;
579
	}
580
	if(c == '.') {
581
		c = *s++;
582
		if(c >= '0' && c <= '9') {
583
			n = 0;
584
			while(c >= '0' && c <= '9') {
585
				n = n*10 + (c-'0');
586
				c = *s++;
587
			}
588
			n -= off2;
589
			if(n < 0) {
590
				fprint(2, "sort: character offset must be positive\n");
591
				done("option");
592
			}
593
			*n2 = n;
594
		}
595
	}
596
	while(c != 0) {
597
		setfield(args.nfield, c);
598
		c = *s++;
599
	}
600
}
601
 
602
void
603
printargs(void)
604
{
605
	int i, n;
606
	Field *f;
607
	char *prefix;
608
 
609
	fprint(2, "sort");
610
	for(i=0; i<=args.nfield; i++) {
611
		f = &args.field[i];
612
		prefix = " -";
613
		if(i) {
614
			n = f->beg1;
615
			if(n >= 0)
616
				fprint(2, " +%d", n);
617
			else
618
				fprint(2, " +*");
619
			n = f->beg2;
620
			if(n >= 0)
621
				fprint(2, ".%d", n);
622
			else
623
				fprint(2, ".*");
624
 
625
			if(f->flags & B1flag)
626
				fprint(2, "b");
627
 
628
			n = f->end1;
629
			if(n >= 0)
630
				fprint(2, " -%d", n);
631
			else
632
				fprint(2, " -*");
633
			n = f->end2;
634
			if(n >= 0)
635
				fprint(2, ".%d", n);
636
			else
637
				fprint(2, ".*");
638
			prefix = "";
639
		}
640
		if(f->flags & Bflag)
641
			fprint(2, "%sb", prefix);
642
		if(f->flags & Dflag)
643
			fprint(2, "%sd", prefix);
644
		if(f->flags & Fflag)
645
			fprint(2, "%sf", prefix);
646
		if(f->flags & Gflag)
647
			fprint(2, "%sg", prefix);
648
		if(f->flags & Iflag)
649
			fprint(2, "%si", prefix);
650
		if(f->flags & Mflag)
651
			fprint(2, "%sM", prefix);
652
		if(f->flags & Nflag)
653
			fprint(2, "%sn", prefix);
654
		if(f->flags & Rflag)
655
			fprint(2, "%sr", prefix);
656
		if(f->flags & Wflag)
657
			fprint(2, "%sw", prefix);
658
	}
659
	if(args.cflag)
660
		fprint(2, " -c");
661
	if(args.uflag)
662
		fprint(2, " -u");
663
	if(args.ofile)
664
		fprint(2, " -o %s", args.ofile);
665
	if(args.mline != Nline)
666
		fprint(2, " -l %ld", args.mline);
667
	fprint(2, "\n");
668
}
669
 
670
void
671
newfield(void)
672
{
673
	int n;
674
	Field *f;
675
 
676
	n = args.nfield + 1;
677
	if(n >= Nfield) {
678
		fprint(2, "sort: too many fields specified\n");
679
		done("option");
680
	}
681
	args.nfield = n;
682
	f = &args.field[n];
683
	f->beg1 = -1;
684
	f->beg2 = -1;
685
	f->end1 = -1;
686
	f->end2 = -1;
687
}
688
 
689
void
690
doargs(int argc, char *argv[])
691
{
692
	int i, c, hadplus;
693
	char *s, *p, *q;
694
	Field *f;
695
 
696
	hadplus = 0;
697
	args.mline = Nline;
698
	for(i=1; i<argc; i++) {
699
		s = argv[i];
700
		c = *s++;
701
		if(c == '-') {
702
			c = *s;
703
			if(c == 0)		/* forced end of arg marker */
704
				break;
705
			argv[i] = 0;		/* clobber args processed */
706
			if(c == '.' || (c >= '0' && c <= '9')) {
707
				if(!hadplus)
708
					newfield();
709
				f = &args.field[args.nfield];
710
				dofield(s, &f->end1, &f->end2, 0, 0);
711
				hadplus = 0;
712
				continue;
713
			}
714
 
715
			while(c = *s++)
716
			switch(c) {
717
			case '-':	/* end of options */
718
				i = argc;
719
				continue;
720
			case 'T':	/* temp directory */
721
				if(*s == 0) {
722
					i++;
723
					if(i < argc) {
724
						args.tname = argv[i];
725
						argv[i] = 0;
726
					}
727
				} else
728
					args.tname = s;
729
				s = strchr(s, 0);
730
				break;
731
			case 'o':	/* output file */
732
				if(*s == 0) {
733
					i++;
734
					if(i < argc) {
735
						args.ofile = argv[i];
736
						argv[i] = 0;
737
					}
738
				} else
739
					args.ofile = s;
740
				s = strchr(s, 0);
741
				break;
742
			case 'k':	/* posix key (what were they thinking?) */
743
				p = 0;
744
				if(*s == 0) {
745
					i++;
746
					if(i < argc) {
747
						p = argv[i];
748
						argv[i] = 0;
749
					}
750
				} else
751
					p = s;
752
				s = strchr(s, 0);
753
				if(p == 0)
754
					break;
755
 
756
				newfield();
757
				q = strchr(p, ',');
758
				if(q)
759
					*q++ = 0;
760
				f = &args.field[args.nfield];
761
				dofield(p, &f->beg1, &f->beg2, 1, 1);
762
				if(f->flags & Bflag) {
763
					f->flags |= B1flag;
764
					f->flags &= ~Bflag;
765
				}
766
				if(q) {
767
					dofield(q, &f->end1, &f->end2, 1, 0);
768
					if(f->end2 <= 0)
769
						f->end1++;
770
				}
771
				hadplus = 0;
772
				break;
773
			case 't':	/* tab character */
774
				if(*s == 0) {
775
					i++;
776
					if(i < argc) {
777
						chartorune(&args.tabchar, argv[i]);
778
						argv[i] = 0;
779
					}
780
				} else
781
					s += chartorune(&args.tabchar, s);
782
				if(args.tabchar == '\n') {
783
					fprint(2, "aw come on, rob\n");
784
					done("rob");
785
				}
786
				break;
787
			case 'c':	/* check order */
788
				args.cflag = 1;
789
				break;
790
			case 'u':	/* unique */
791
				args.uflag = 1;
792
				break;
793
			case 'v':	/* debugging noise */
794
				args.vflag = 1;
795
				break;
796
			case 'l':
797
				if(*s == 0) {
798
					i++;
799
					if(i < argc) {
800
						args.mline = atol(argv[i]);
801
						argv[i] = 0;
802
					}
803
				} else
804
					args.mline = atol(s);
805
				s = strchr(s, 0);
806
				break;
807
 
808
			case 'M':	/* month */
809
			case 'b':	/* skip blanks */
810
			case 'd':	/* directory order */
811
			case 'f':	/* fold case */
812
			case 'g':	/* floating numbers */
813
			case 'i':	/* ignore non-ascii */
814
			case 'n':	/* numbers */
815
			case 'r':	/* reverse */
816
			case 'w':	/* ignore white */
817
				if(args.nfield > 0)
818
					fprint(2, "sort: global field set after -k\n");
819
				setfield(0, c);
820
				break;
821
			case 'm':
822
				/* option m silently ignored but required by posix */
823
				break;
824
			default:
825
				fprint(2, "sort: unknown option: -%C\n", c);
826
				done("option");
827
			}
828
			continue;
829
		}
830
		if(c == '+') {
831
			argv[i] = 0;		/* clobber args processed */
832
			c = *s;
833
			if(c == '.' || (c >= '0' && c <= '9')) {
834
				newfield();
835
				f = &args.field[args.nfield];
836
				dofield(s, &f->beg1, &f->beg2, 0, 0);
837
				if(f->flags & Bflag) {
838
					f->flags |= B1flag;
839
					f->flags &= ~Bflag;
840
				}
841
				hadplus = 1;
842
				continue;
843
			}
844
			fprint(2, "sort: unknown option: +%C\n", c);
845
			done("option");
846
		}
847
		args.nfile++;
848
	}
849
 
850
	for(i=0; i<=args.nfield; i++) {
851
		f = &args.field[i];
852
 
853
		/*
854
		 * global options apply to fields that
855
		 * specify no options
856
		 */
857
		if(f->flags == 0) {
858
			f->flags = args.field[0].flags;
859
			if(args.field[0].flags & Bflag)
860
				f->flags |= B1flag;
861
		}
862
 
863
 
864
		/*
865
		 * build buildkey specification
866
		 */
867
		switch(f->flags & ~(Bflag|B1flag)) {
868
		default:
869
			fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
870
			done("option");
871
		case 0:
872
			f->dokey = dokey_;
873
			break;
874
		case Rflag:
875
			f->dokey = dokey_r;
876
			break;
877
		case Gflag:
878
		case Nflag:
879
		case Gflag|Nflag:
880
		case Gflag|Rflag:
881
		case Nflag|Rflag:
882
		case Gflag|Nflag|Rflag:
883
			f->dokey = dokey_gn;
884
			break;
885
		case Mflag:
886
		case Mflag|Rflag:
887
			f->dokey = dokey_m;
888
			makemapm(f);
889
			break;
890
		case Dflag:
891
		case Dflag|Fflag:
892
		case Dflag|Fflag|Iflag:
893
		case Dflag|Fflag|Iflag|Rflag:
894
		case Dflag|Fflag|Iflag|Rflag|Wflag:
895
		case Dflag|Fflag|Iflag|Wflag:
896
		case Dflag|Fflag|Rflag:
897
		case Dflag|Fflag|Rflag|Wflag:
898
		case Dflag|Fflag|Wflag:
899
		case Dflag|Iflag:
900
		case Dflag|Iflag|Rflag:
901
		case Dflag|Iflag|Rflag|Wflag:
902
		case Dflag|Iflag|Wflag:
903
		case Dflag|Rflag:
904
		case Dflag|Rflag|Wflag:
905
		case Dflag|Wflag:
906
		case Fflag:
907
		case Fflag|Iflag:
908
		case Fflag|Iflag|Rflag:
909
		case Fflag|Iflag|Rflag|Wflag:
910
		case Fflag|Iflag|Wflag:
911
		case Fflag|Rflag:
912
		case Fflag|Rflag|Wflag:
913
		case Fflag|Wflag:
914
		case Iflag:
915
		case Iflag|Rflag:
916
		case Iflag|Rflag|Wflag:
917
		case Iflag|Wflag:
918
		case Wflag:
919
			f->dokey = dokey_dfi;
920
			makemapd(f);
921
			break;
922
		}
923
	}
924
 
925
	/*
926
	 * random spot checks
927
	 */
928
	if(args.nfile > 1 && args.cflag) {
929
		fprint(2, "sort: -c can have at most one input file\n");
930
		done("option");
931
	}
932
	return;
933
}
934
 
935
uchar*
936
skip(uchar *l, int n1, int n2, int bflag, int endfield)
937
{
938
	int i, c, tc;
939
	Rune r;
940
 
941
	if(endfield && n1 < 0)
942
		return 0;
943
 
944
	c = *l++;
945
	tc = args.tabchar;
946
	if(tc) {
947
		if(tc < Runeself) {
948
			for(i=n1; i>0; i--) {
949
				while(c != tc) {
950
					if(c == '\n')
951
						return 0;
952
					c = *l++;
953
				}
954
				if(!(endfield && i == 1))
955
					c = *l++;
956
			}
957
		} else {
958
			l--;
959
			l += chartorune(&r, (char*)l);
960
			for(i=n1; i>0; i--) {
961
				while(r != tc) {
962
					if(r == '\n')
963
						return 0;
964
					l += chartorune(&r, (char*)l);
965
				}
966
				if(!(endfield && i == 1))
967
					l += chartorune(&r, (char*)l);
968
			}
969
			c = r;
970
		}
971
	} else {
972
		for(i=n1; i>0; i--) {
973
			while(c == ' ' || c == '\t')
974
				c = *l++;
975
			while(c != ' ' && c != '\t') {
976
				if(c == '\n')
977
					return 0;
978
				c = *l++;
979
			}
980
		}
981
	}
982
 
983
	if(bflag)
984
		while(c == ' ' || c == '\t')
985
			c = *l++;
986
 
987
	l--;
988
	for(i=n2; i>0; i--) {
989
		c = *l;
990
		if(c < Runeself) {
991
			if(c == '\n')
992
				return 0;
993
			l++;
994
			continue;
995
		}
996
		l += chartorune(&r, (char*)l);
997
	}
998
	return l;
999
}
1000
 
1001
void
1002
dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)
1003
{
1004
	uchar *kp;
1005
	int c, cl, dp;
1006
	int state, nzero, exp, expsign, rflag;
1007
 
1008
	cl = k->klen + 3;
1009
	kp = k->key + cl;	/* skip place for sign, exponent[2] */
1010
 
1011
	nzero = 0;		/* number of trailing zeros */
1012
	exp = 0;		/* value of the exponent */
1013
	expsign = 0;		/* sign of the exponent */
1014
	dp = 0x4040;		/* location of decimal point */
1015
	rflag = f->flags&Rflag;	/* xor of rflag and - sign */
1016
	state = NSstart;
1017
 
1018
	for(;; lp++) {
1019
		if(lp >= lpe)
1020
			break;
1021
		c = *lp;
1022
 
1023
		if(c == ' ' || c == '\t') {
1024
			switch(state) {
1025
			case NSstart:
1026
			case NSsign:
1027
				continue;
1028
			}
1029
			break;
1030
		}
1031
		if(c == '+' || c == '-') {
1032
			switch(state) {
1033
			case NSstart:
1034
				state = NSsign;
1035
				if(c == '-')
1036
					rflag = !rflag;
1037
				continue;
1038
			case NSexp:
1039
				state = NSexpsign;
1040
				if(c == '-')
1041
					expsign = 1;
1042
				continue;
1043
			}
1044
			break;
1045
		}
1046
		if(c == '0') {
1047
			switch(state) {
1048
			case NSdigit:
1049
				if(rflag)
1050
					c = ~c;
1051
				*kp++ = c;
1052
				cl++;
1053
				nzero++;
1054
				dp++;
1055
				state = NSdigit;
1056
				continue;
1057
			case NSfract:
1058
				if(rflag)
1059
					c = ~c;
1060
				*kp++ = c;
1061
				cl++;
1062
				nzero++;
1063
				state = NSfract;
1064
				continue;
1065
			case NSstart:
1066
			case NSsign:
1067
			case NSzero:
1068
				state = NSzero;
1069
				continue;
1070
			case NSzerofract:
1071
			case NSpoint:
1072
				dp--;
1073
				state = NSzerofract;
1074
				continue;
1075
			case NSexpsign:
1076
			case NSexp:
1077
			case NSexpdigit:
1078
				exp = exp*10 + (c - '0');
1079
				state = NSexpdigit;
1080
				continue;
1081
			}
1082
			break;
1083
		}
1084
		if(c >= '1' && c <= '9') {
1085
			switch(state) {
1086
			case NSzero:
1087
			case NSstart:
1088
			case NSsign:
1089
			case NSdigit:
1090
				if(rflag)
1091
					c = ~c;
1092
				*kp++ = c;
1093
				cl++;
1094
				nzero = 0;
1095
				dp++;
1096
				state = NSdigit;
1097
				continue;
1098
			case NSzerofract:
1099
			case NSpoint:
1100
			case NSfract:
1101
				if(rflag)
1102
					c = ~c;
1103
				*kp++ = c;
1104
				cl++;
1105
				nzero = 0;
1106
				state = NSfract;
1107
				continue;
1108
			case NSexpsign:
1109
			case NSexp:
1110
			case NSexpdigit:
1111
				exp = exp*10 + (c - '0');
1112
				state = NSexpdigit;
1113
				continue;
1114
			}
1115
			break;
1116
		}
1117
		if(c == '.') {
1118
			switch(state) {
1119
			case NSstart:
1120
			case NSsign:
1121
				state = NSpoint;
1122
				continue;
1123
			case NSzero:
1124
				state = NSzerofract;
1125
				continue;
1126
			case NSdigit:
1127
				state = NSfract;
1128
				continue;
1129
			}
1130
			break;
1131
		}
1132
		if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
1133
			switch(state) {
1134
			case NSdigit:
1135
			case NSfract:
1136
				state = NSexp;
1137
				continue;
1138
			}
1139
			break;
1140
		}
1141
		break;
1142
	}
1143
 
1144
	switch(state) {
1145
	/*
1146
	 * result is zero
1147
	 */
1148
	case NSstart:
1149
	case NSsign:
1150
	case NSzero:
1151
	case NSzerofract:
1152
	case NSpoint:
1153
		kp = k->key + k->klen;
1154
		k->klen += 2;
1155
		kp[0] = 0x20;	/* between + and - */
1156
		kp[1] = 0;
1157
		return;
1158
	/*
1159
	 * result has exponent
1160
	 */
1161
	case NSexpsign:
1162
	case NSexp:
1163
	case NSexpdigit:
1164
		if(expsign)
1165
			exp = -exp;
1166
		dp += exp;
1167
 
1168
	/*
1169
	 * result is fixed point number
1170
	 */
1171
	case NSdigit:
1172
	case NSfract:
1173
		kp -= nzero;
1174
		cl -= nzero;
1175
		break;
1176
	}
1177
 
1178
	/*
1179
	 * end of number
1180
	 */
1181
	c = 0;
1182
	if(rflag)
1183
		c = ~c;
1184
	*kp = c;
1185
 
1186
	/*
1187
	 * sign and exponent
1188
	 */
1189
	c = 0x30;
1190
	if(rflag) {
1191
		c = 0x10;
1192
		dp = ~dp;
1193
	}
1194
	kp = k->key + k->klen;
1195
	kp[0] = c;
1196
	kp[1] = (dp >> 8);
1197
	kp[2] = dp;
1198
	k->klen = cl+1;
1199
}
1200
 
1201
void
1202
dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f)
1203
{
1204
	uchar *kp;
1205
	Rune r, place[3];
1206
	int c, cl, pc;
1207
	int rflag;
1208
 
1209
	rflag = f->flags&Rflag;
1210
	pc = 0;
1211
 
1212
	cl = k->klen;
1213
	kp = k->key + cl;
1214
 
1215
	for(;;) {
1216
		/*
1217
		 * get the character
1218
		 */
1219
		if(lp >= lpe)
1220
			break;
1221
		c = *lp;
1222
		if(c >= Runeself) {
1223
			lp += chartorune(&r, (char*)lp);
1224
			c = r;
1225
		} else
1226
			lp++;
1227
 
1228
		if(c < nelem(f->mapto)) {
1229
			c = f->mapto[c];
1230
			if(c == 0)
1231
				continue;
1232
		}
1233
		place[pc++] = c;
1234
		if(pc < 3)
1235
			continue;
1236
		for(c=11; c>=0; c--)
1237
			if(memcmp(month[c], place, sizeof(place)) == 0)
1238
				break;
1239
		c += 10;
1240
		if(rflag)
1241
			c = ~c;
1242
		*kp++ = c;
1243
		cl++;
1244
		break;
1245
	}
1246
 
1247
	c = 0;
1248
	if(rflag)
1249
		c = ~c;
1250
	*kp = c;
1251
	k->klen = cl+1;
1252
}
1253
 
1254
void
1255
dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f)
1256
{
1257
	uchar *kp;
1258
	Rune r;
1259
	int c, cl, n, rflag;
1260
 
1261
	cl = k->klen;
1262
	kp = k->key + cl;
1263
	rflag = f->flags & Rflag;
1264
 
1265
	for(;;) {
1266
		/*
1267
		 * get the character
1268
		 */
1269
		if(lp >= lpe)
1270
			break;
1271
		c = *lp;
1272
		if(c >= Runeself) {
1273
			lp += chartorune(&r, (char*)lp);
1274
			c = r;
1275
		} else
1276
			lp++;
1277
 
1278
		/*
1279
		 * do the various mappings.
1280
		 * the common case is handled
1281
		 * completely by the table.
1282
		 */
1283
		if(c != 0 && c < Runeself) {
1284
			c = f->mapto[c];
1285
			if(c) {
1286
				*kp++ = c;
1287
				cl++;
1288
			}
1289
			continue;
1290
		}
1291
 
1292
		/*
1293
		 * for characters out of range,
1294
		 * the table does not do Rflag.
1295
		 * ignore is based on mapto[nelem(f->mapto)-1]
1296
		 */
1297
		if(c != 0 && c < nelem(f->mapto)) {
1298
			c = f->mapto[c];
1299
			if(c == 0)
1300
				continue;
1301
		} else {
1302
			if(f->mapto[nelem(f->mapto)-1] == 0)
1303
				continue;
1304
			/*
1305
			 * consider building maps as necessary
1306
			 */
1307
			if(f->flags & Fflag)
1308
				c = tolowerrune(tobaserune(c));
1309
			if(f->flags & Dflag && !isalpharune(c) &&
1310
			    !isdigitrune(c) && !isspacerune(c))
1311
				continue;
1312
			if((f->flags & Wflag) && isspacerune(c))
1313
				continue;
1314
		}
1315
 
1316
		/*
1317
		 * put it in the key
1318
		 */
1319
		r = c;
1320
		n = runetochar((char*)kp, &r);
1321
		kp += n;
1322
		cl += n;
1323
		if(rflag)
1324
			while(n > 0) {
1325
				kp[-n] = ~kp[-n];
1326
				n--;
1327
			}
1328
	}
1329
 
1330
	/*
1331
	 * end of key
1332
	 */
1333
	k->klen = cl+1;
1334
	if(rflag) {
1335
		*kp = ~0;
1336
		return;
1337
	}
1338
	*kp = 0;
1339
}
1340
 
1341
void
1342
dokey_r(Key *k, uchar *lp, uchar *lpe, Field*)
1343
{
1344
	int cl, n;
1345
	uchar *kp;
1346
 
1347
	n = lpe - lp;
1348
	if(n < 0)
1349
		n = 0;
1350
	cl = k->klen;
1351
	kp = k->key + cl;
1352
	k->klen = cl+n+1;
1353
 
1354
	lpe -= 3;
1355
	while(lp < lpe) {
1356
		kp[0] = ~lp[0];
1357
		kp[1] = ~lp[1];
1358
		kp[2] = ~lp[2];
1359
		kp[3] = ~lp[3];
1360
		kp += 4;
1361
		lp += 4;
1362
	}
1363
 
1364
	lpe += 3;
1365
	while(lp < lpe)
1366
		*kp++ = ~*lp++;
1367
	*kp = ~0;
1368
}
1369
 
1370
void
1371
dokey_(Key *k, uchar *lp, uchar *lpe, Field*)
1372
{
1373
	int n, cl;
1374
	uchar *kp;
1375
 
1376
	n = lpe - lp;
1377
	if(n < 0)
1378
		n = 0;
1379
	cl = k->klen;
1380
	kp = k->key + cl;
1381
	k->klen = cl+n+1;
1382
	memmove(kp, lp, n);
1383
	kp[n] = 0;
1384
}
1385
 
1386
void
1387
buildkey(Line *l)
1388
{
1389
	Key *k;
1390
	uchar *lp, *lpe;
1391
	int ll, kl, cl, i, n;
1392
	Field *f;
1393
 
1394
	ll = l->llen - 1;
1395
	kl = 0;			/* allocated length */
1396
	cl = 0;			/* current length */
1397
	k = 0;
1398
 
1399
	for(i=1; i<=args.nfield; i++) {
1400
		f = &args.field[i];
1401
		lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
1402
		if(lp == 0)
1403
			lp = l->line + ll;
1404
		lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
1405
		if(lpe == 0)
1406
			lpe = l->line + ll;
1407
		n = (lpe - lp) + 1;
1408
		if(n <= 0)
1409
			n = 1;
1410
		if(cl+(n+4) > kl) {
1411
			kl = cl+(n+4);
1412
			k = realloc(k, sizeof(Key) +
1413
				(kl-1)*sizeof(k->key[0]));
1414
			if(k == 0)
1415
				nomem();
1416
		}
1417
		k->klen = cl;
1418
		(*f->dokey)(k, lp, lpe, f);
1419
		cl = k->klen;
1420
	}
1421
 
1422
	/*
1423
	 * global comparisons
1424
	 */
1425
	if(!(args.uflag && cl > 0)) {
1426
		f = &args.field[0];
1427
		if(cl+(ll+4) > kl) {
1428
			kl = cl+(ll+4);
1429
			k = realloc(k, sizeof(Key) +
1430
				(kl-1)*sizeof(k->key[0]));
1431
			if(k == 0)
1432
				nomem();
1433
		}
1434
		k->klen = cl;
1435
		(*f->dokey)(k, l->line, l->line+ll, f);
1436
		cl = k->klen;
1437
	}
1438
 
1439
	l->key = k;
1440
	k->klen = cl;
1441
 
1442
	if(args.vflag) {
1443
		if(write(2, l->line, l->llen) != l->llen)
1444
			exits("write");
1445
		for(i=0; i<k->klen; i++) {
1446
			fprint(2, " %.2x", k->key[i]);
1447
			if(k->key[i] == 0x00 || k->key[i] == 0xff)
1448
				fprint(2, "\n");
1449
		}
1450
	}
1451
}
1452
 
1453
void
1454
makemapm(Field *f)
1455
{
1456
	int i, c;
1457
 
1458
	for(i=0; i<nelem(f->mapto); i++) {
1459
		c = 1;
1460
		if(i == ' ' || i == '\t')
1461
			c = 0;
1462
		if(i >= 'a' && i <= 'z')
1463
			c = i + ('A' - 'a');
1464
		if(i >= 'A' && i <= 'Z')
1465
			c = i;
1466
		f->mapto[i] = c;
1467
		if(args.vflag) {
1468
			if((i & 15) == 0)
1469
				fprint(2, "	");
1470
			fprint(2, " %.2x", c);
1471
			if((i & 15) == 15)
1472
				fprint(2, "\n");
1473
		}
1474
	}
1475
}
1476
 
1477
void
1478
makemapd(Field *f)
1479
{
1480
	int i, c;
1481
 
1482
	for(i=0; i<nelem(f->mapto); i++) {
1483
		c = i;
1484
		if(f->flags & Iflag)
1485
			if(c < 040 || c > 0176)
1486
				c = -1;
1487
		if((f->flags & Wflag) && c >= 0)
1488
			if(c == ' ' || c == '\t')
1489
				c = -1;
1490
		if((f->flags & Dflag) && c >= 0)
1491
			if(!(c == ' ' || c == '\t' ||
1492
			    (c >= 'a' && c <= 'z') ||
1493
			    (c >= 'A' && c <= 'Z') ||
1494
			    (c >= '0' && c <= '9'))){
1495
				if(!isupperrune(c = toupperrune(c)))
1496
					c = -1;
1497
			}
1498
		if((f->flags & Fflag) && c >= 0)
1499
			c = toupperrune(tobaserune(c));
1500
		if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
1501
			c = ~c & 0xff;
1502
		if(c < 0)
1503
			c = 0;
1504
		f->mapto[i] = c;
1505
		if(args.vflag) {
1506
			if((i & 15) == 0)
1507
				fprint(2, "	");
1508
			fprint(2, " %.2x", c);
1509
			if((i & 15) == 15)
1510
				fprint(2, "\n");
1511
		}
1512
	}
1513
}
1514
 
1515
Rune*	month[12] =
1516
{
1517
	L"JAN",
1518
	L"FEB",
1519
	L"MAR",
1520
	L"APR",
1521
	L"MAY",
1522
	L"JUN",
1523
	L"JUL",
1524
	L"AUG",
1525
	L"SEP",
1526
	L"OCT",
1527
	L"NOV",
1528
	L"DEC",
1529
};
1530
 
1531
/************** radix sort ***********/
1532
 
1533
enum
1534
{
1535
	Threshold	= 14,
1536
};
1537
 
1538
void	rsort4(Key***, ulong, int);
1539
void	bsort4(Key***, ulong, int);
1540
 
1541
void
1542
sort4(void *a, ulong n)
1543
{
1544
	if(n > Threshold)
1545
		rsort4((Key***)a, n, 0);
1546
	else
1547
		bsort4((Key***)a, n, 0);
1548
}
1549
 
1550
void
1551
rsort4(Key ***a, ulong n, int b)
1552
{
1553
	Key ***ea, ***t, ***u, **t1, **u1, *k;
1554
	Key ***part[257];
1555
	static long count[257];
1556
	long clist[257+257], *cp, *cp1;
1557
	int c, lowc, higc;
1558
 
1559
	/*
1560
	 * pass 1 over all keys,
1561
	 * count the number of each key[b].
1562
	 * find low count and high count.
1563
	 */
1564
	lowc = 256;
1565
	higc = 0;
1566
	ea = a+n;
1567
	for(t=a; t<ea; t++) {
1568
		k = **t;
1569
		n = k->klen;
1570
		if(b >= n) {
1571
			count[256]++;
1572
			continue;
1573
		}
1574
		c = k->key[b];
1575
		n = count[c]++;
1576
		if(n == 0) {
1577
			if(c < lowc)
1578
				lowc = c;
1579
			if(c > higc)
1580
				higc = c;
1581
		}
1582
	}
1583
 
1584
	/*
1585
	 * pass 2 over all counts,
1586
	 * put partition pointers in part[c].
1587
	 * save compacted indexes and counts
1588
	 * in clist[].
1589
	 */
1590
	t = a;
1591
	n = count[256];
1592
	clist[0] = n;
1593
	part[256] = t;
1594
	t += n;
1595
 
1596
	cp1 = clist+1;
1597
	cp = count+lowc;
1598
	for(c=lowc; c<=higc; c++,cp++) {
1599
		n = *cp;
1600
		if(n) {
1601
			cp1[0] = n;
1602
			cp1[1] = c;
1603
			cp1 += 2;
1604
			part[c] = t;
1605
			t += n;
1606
		}
1607
	}
1608
	*cp1 = 0;
1609
 
1610
	/*
1611
	 * pass 3 over all counts.
1612
	 * chase lowest pointer in each partition
1613
	 * around a permutation until it comes
1614
	 * back and is stored where it started.
1615
	 * static array, count[], should be
1616
	 * reduced to zero entries except maybe
1617
	 * count[256].
1618
	 */
1619
	for(cp1=clist+1; cp1[0]; cp1+=2) {
1620
		c = cp1[1];
1621
		cp = count+c;
1622
		while(*cp) {
1623
			t1 = *part[c];
1624
			for(;;) {
1625
				k = *t1;
1626
				n = 256;
1627
				if(b < k->klen)
1628
					n = k->key[b];
1629
				u = part[n]++;
1630
				count[n]--;
1631
				u1 = *u;
1632
				*u = t1;
1633
				if(n == c)
1634
					break;
1635
				t1 = u1;
1636
			}
1637
		}
1638
	}
1639
 
1640
	/*
1641
	 * pass 4 over all partitions.
1642
	 * call recursively.
1643
	 */
1644
	b++;
1645
	t = a + clist[0];
1646
	count[256] = 0;
1647
	for(cp1=clist+1; n=cp1[0]; cp1+=2) {
1648
		if(n > Threshold)
1649
			rsort4(t, n, b);
1650
		else
1651
		if(n > 1)
1652
			bsort4(t, n, b);
1653
		t += n;
1654
	}
1655
}
1656
 
1657
/*
1658
 * bubble sort to pick up
1659
 * the pieces.
1660
 */
1661
void
1662
bsort4(Key ***a, ulong n, int b)
1663
{
1664
	Key ***i, ***j, ***k, ***l, **t;
1665
	Key *ka, *kb;
1666
	int n1, n2;
1667
 
1668
	l = a+n;
1669
	j = a;
1670
 
1671
loop:
1672
	i = j;
1673
	j++;
1674
	if(j >= l)
1675
		return;
1676
 
1677
	ka = **i;
1678
	kb = **j;
1679
	n1 = ka->klen - b;
1680
	n2 = kb->klen - b;
1681
	if(n1 > n2)
1682
		n1 = n2;
1683
	if(n1 <= 0)
1684
		goto loop;
1685
	n2 = ka->key[b] - kb->key[b];
1686
	if(n2 == 0)
1687
		n2 = memcmp(ka->key+b, kb->key+b, n1);
1688
	if(n2 <= 0)
1689
		goto loop;
1690
 
1691
	for(;;) {
1692
		k = i+1;
1693
 
1694
		t = *k;
1695
		*k = *i;
1696
		*i = t;
1697
 
1698
		if(i <= a)
1699
			goto loop;
1700
 
1701
		i--;
1702
		ka = **i;
1703
		kb = *t;
1704
		n1 = ka->klen - b;
1705
		n2 = kb->klen - b;
1706
		if(n1 > n2)
1707
			n1 = n2;
1708
		if(n1 <= 0)
1709
			goto loop;
1710
		n2 = ka->key[b] - kb->key[b];
1711
		if(n2 == 0)
1712
			n2 = memcmp(ka->key+b, kb->key+b, n1);
1713
		if(n2 <= 0)
1714
			goto loop;
1715
	}
1716
}