Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * sed -- stream editor
3
 */
4
#include <u.h>
5
#include <libc.h>
6
#include <bio.h>
7
#include <regexp.h>
8
 
9
enum {
10
	DEPTH		= 20,		/* max nesting depth of {} */
11
	MAXCMDS		= 512,		/* max sed commands */
12
	ADDSIZE		= 10000,	/* size of add & read buffer */
13
	MAXADDS		= 20,		/* max pending adds and reads */
14
	LBSIZE		= 8192,		/* input line size */
15
	LABSIZE		= 50,		/* max number of labels */
16
	MAXSUB		= 10,		/* max number of sub reg exp */
17
	MAXFILES	= 120,		/* max output files */
18
};
19
 
20
/*
21
 * An address is a line #, a R.E., "$", a reference to the last
22
 * R.E., or nothing.
23
 */
24
typedef struct {
25
	enum {
26
		A_NONE,
27
		A_DOL,
28
		A_LINE,
29
		A_RE,
30
		A_LAST,
31
	}type;
32
	union {
33
		long	line;		/* Line # */
34
		Reprog	*rp;		/* Compiled R.E. */
35
	};
36
} Addr;
37
 
38
typedef struct	SEDCOM {
39
	Addr	ad1;			/* optional start address */
40
	Addr	ad2;			/* optional end address */
41
	union {
42
		Reprog	*re1;		/* compiled R.E. */
43
		Rune	*text;		/* added text or file name */
44
		struct	SEDCOM	*lb1;	/* destination command of branch */
45
	};
46
	Rune	*rhs;			/* Right-hand side of substitution */
47
	Biobuf*	fcode;			/* File ID for read and write */
48
	char	command;		/* command code -see below */
49
	char	gfl;			/* 'Global' flag for substitutions */
50
	char	pfl;			/* 'print' flag for substitutions */
51
	char	active;			/* 1 => data between start and end */
52
	char	negfl;			/* negation flag */
53
} SedCom;
54
 
55
/* Command Codes for field SedCom.command */
56
#define ACOM	01
57
#define BCOM	020
58
#define CCOM	02
59
#define	CDCOM	025
60
#define	CNCOM	022
61
#define COCOM	017
62
#define	CPCOM	023
63
#define DCOM	03
64
#define ECOM	015
65
#define EQCOM	013
66
#define FCOM	016
67
#define GCOM	027
68
#define CGCOM	030
69
#define HCOM	031
70
#define CHCOM	032
71
#define ICOM	04
72
#define LCOM	05
73
#define NCOM	012
74
#define PCOM	010
75
#define QCOM	011
76
#define RCOM	06
77
#define SCOM	07
78
#define TCOM	021
79
#define WCOM	014
80
#define	CWCOM	024
81
#define	YCOM	026
82
#define XCOM	033
83
 
84
typedef struct label {			/* Label symbol table */
85
	Rune	uninm[9];		/* Label name */
86
	SedCom	*chain;
87
	SedCom	*address;		/* Command associated with label */
88
} Label;
89
 
90
typedef	struct	FILE_CACHE {		/* Data file control block */
91
	struct FILE_CACHE *next;	/* Forward Link */
92
	char	*name;			/* Name of file */
93
} FileCache;
94
 
95
SedCom pspace[MAXCMDS];			/* Command storage */
96
SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
97
SedCom *rep = pspace;			/* Current fill point */
98
 
99
Reprog	*lastre = 0;			/* Last regular expression */
100
Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/
101
 
102
Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
103
Rune	*addend = addspace+ADDSIZE;
104
 
105
SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
106
SedCom	**aptr = abuf;
107
 
108
struct {				/* Sed program input control block */
109
	enum PTYPE { 			/* Either on command line or in file */
110
		P_ARG,
111
		P_FILE,
112
	} type;
113
	union PCTL {			/* Pointer to data */
114
		Biobuf	*bp;
115
		char	*curr;
116
	};
117
} prog;
118
 
119
Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */
120
 
121
FileCache	*fhead = 0;		/* Head of File Cache Chain */
122
FileCache	*ftail = 0;		/* Tail of File Cache Chain */
123
 
124
Rune	*loc1;				/* Start of pattern match */
125
Rune	*loc2;				/* End of pattern match */
126
Rune	seof;				/* Pattern delimiter char */
127
 
128
Rune	linebuf[LBSIZE+1];		/* Input data buffer */
129
Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
130
Rune	*spend = linebuf;		/* End of input data */
131
Rune	*cp;				/* Current scan point in linebuf */
132
 
133
Rune	holdsp[LBSIZE+1];		/* Hold buffer */
134
Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
135
Rune	*hspend = holdsp;		/* End of hold data */
136
 
137
int	nflag;				/* Command line flags */
138
int	gflag;
139
 
140
int	dolflag;			/* Set when at true EOF */
141
int	sflag;				/* Set when substitution done */
142
int	jflag;				/* Set when jump required */
143
int	delflag;			/* Delete current line when set */
144
 
145
long	lnum = 0;			/* Input line count */
146
 
147
char	fname[MAXFILES][40];		/* File name cache */
148
Biobuf	*fcode[MAXFILES];		/* File ID cache */
149
int	nfiles = 0;			/* Cache fill point */
150
 
151
Biobuf	fout;				/* Output stream */
152
Biobuf	stdin;				/* Default input */
153
Biobuf*	f = 0;				/* Input data */
154
 
155
Label	ltab[LABSIZE];			/* Label name symbol table */
156
Label	*labend = ltab+LABSIZE;		/* End of label table */
157
Label	*lab = ltab+1;			/* Current Fill point */
158
 
159
int	depth = 0;			/* {} stack pointer */
160
 
161
Rune	bad;				/* Dummy err ptr reference */
162
Rune	*badp = &bad;
163
 
164
 
165
char	CGMES[]	 = 	"%S command garbled: %S";
166
char	TMMES[]	 = 	"Too much text: %S";
167
char	LTL[]	 = 	"Label too long: %S";
168
char	AD0MES[] =	"No addresses allowed: %S";
169
char	AD1MES[] =	"Only one address allowed: %S";
170
 
171
void	address(Addr *);
172
void	arout(void);
173
int	cmp(char *, char *);
174
int	rcmp(Rune *, Rune *);
175
void	command(SedCom *);
176
Reprog	*compile(void);
177
Rune	*compsub(Rune *, Rune *);
178
void	dechain(void);
179
void	dosub(Rune *);
180
int	ecmp(Rune *, Rune *, int);
181
void	enroll(char *);
182
void	errexit(void);
183
int	executable(SedCom *);
184
void	execute(void);
185
void	fcomp(void);
186
long	getrune(void);
187
Rune	*gline(Rune *);
188
int	match(Reprog *, Rune *);
189
void	newfile(enum PTYPE, char *);
190
int 	opendata(void);
191
Biobuf	*open_file(char *);
192
Rune	*place(Rune *, Rune *, Rune *);
193
void	quit(char *, ...);
194
int	rline(Rune *, Rune *);
195
Label	*search(Label *);
196
int	substitute(SedCom *);
197
char	*text(char *);
198
Rune	*stext(Rune *, Rune *);
199
int	ycomp(SedCom *);
200
char *	trans(int c);
201
void	putline(Biobuf *bp, Rune *buf, int n);
202
 
203
void
204
main(int argc, char **argv)
205
{
206
	int compfl;
207
 
208
	lnum = 0;
209
	Binit(&fout, 1, OWRITE);
210
	fcode[nfiles++] = &fout;
211
	compfl = 0;
212
 
213
	if(argc == 1)
214
		exits(0);
215
	ARGBEGIN{
216
	case 'e':
217
		if (argc <= 1)
218
			quit("missing pattern");
219
		newfile(P_ARG, ARGF());
220
		fcomp();
221
		compfl = 1;
222
		continue;
223
	case 'f':
224
		if(argc <= 1)
225
			quit("no pattern-file");
226
		newfile(P_FILE, ARGF());
227
		fcomp();
228
		compfl = 1;
229
		continue;
230
	case 'g':
231
		gflag++;
232
		continue;
233
	case 'n':
234
		nflag++;
235
		continue;
236
	default:
237
		fprint(2, "sed: Unknown flag: %c\n", ARGC());
238
		continue;
239
	} ARGEND
240
 
241
	if(compfl == 0) {
242
		if (--argc < 0)
243
			quit("missing pattern");
244
		newfile(P_ARG, *argv++);
245
		fcomp();
246
	}
247
 
248
	if(depth)
249
		quit("Too many {'s");
250
 
251
	ltab[0].address = rep;
252
 
253
	dechain();
254
 
255
	if(argc <= 0)
256
		enroll(0);		/* Add stdin to cache */
257
	else
258
		while(--argc >= 0)
259
			enroll(*argv++);
260
	execute();
261
	exits(0);
262
}
263
 
264
void
265
fcomp(void)
266
{
267
	int	i;
268
	Label	*lpt;
269
	Rune	*tp;
270
	SedCom	*pt, *pt1;
271
	static Rune	*p = addspace;
272
	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */
273
 
274
	while (rline(linebuf, lbend) >= 0) {
275
		cp = linebuf;
276
comploop:
277
		while(*cp == L' ' || *cp == L'\t')
278
			cp++;
279
		if(*cp == L'\0' || *cp == L'#')
280
			continue;
281
		if(*cp == L';') {
282
			cp++;
283
			goto comploop;
284
		}
285
 
286
		address(&rep->ad1);
287
		if (rep->ad1.type != A_NONE) {
288
			if (rep->ad1.type == A_LAST) {
289
				if (!lastre)
290
					quit("First RE may not be null");
291
				rep->ad1.type = A_RE;
292
				rep->ad1.rp = lastre;
293
			}
294
			if(*cp == L',' || *cp == L';') {
295
				cp++;
296
				address(&rep->ad2);
297
				if (rep->ad2.type == A_LAST) {
298
					rep->ad2.type = A_RE;
299
					rep->ad2.rp = lastre;
300
				}
301
			} else
302
				rep->ad2.type = A_NONE;
303
		}
304
		while(*cp == L' ' || *cp == L'\t')
305
			cp++;
306
 
307
swit:
308
		switch(*cp++) {
309
		default:
310
			quit("Unrecognized command: %S", linebuf);
311
 
312
		case '!':
313
			rep->negfl = 1;
314
			goto swit;
315
 
316
		case '{':
317
			rep->command = BCOM;
318
			rep->negfl = !rep->negfl;
319
			cmpend[depth++] = &rep->lb1;
320
			if(++rep >= pend)
321
				quit("Too many commands: %S", linebuf);
322
			if(*cp == '\0')
323
				continue;
324
			goto comploop;
325
 
326
		case '}':
327
			if(rep->ad1.type != A_NONE)
328
				quit(AD0MES, linebuf);
329
			if(--depth < 0)
330
				quit("Too many }'s");
331
			*cmpend[depth] = rep;
332
			if(*cp == 0)
333
				continue;
334
			goto comploop;
335
 
336
		case '=':
337
			rep->command = EQCOM;
338
			if(rep->ad2.type != A_NONE)
339
				quit(AD1MES, linebuf);
340
			break;
341
 
342
		case ':':
343
			if(rep->ad1.type != A_NONE)
344
				quit(AD0MES, linebuf);
345
 
346
			while(*cp == L' ')
347
				cp++;
348
			tp = lab->uninm;
349
			while (*cp && *cp != L';' && *cp != L' ' &&
350
			    *cp != L'\t' && *cp != L'#') {
351
				*tp++ = *cp++;
352
				if(tp >= &lab->uninm[8])
353
					quit(LTL, linebuf);
354
			}
355
			*tp = L'\0';
356
 
357
			if (*lab->uninm == L'\0')		/* no label? */
358
				quit(CGMES, L":", linebuf);
359
			if(lpt = search(lab)) {
360
				if(lpt->address)
361
					quit("Duplicate labels: %S", linebuf);
362
			} else {
363
				lab->chain = 0;
364
				lpt = lab;
365
				if(++lab >= labend)
366
					quit("Too many labels: %S", linebuf);
367
			}
368
			lpt->address = rep;
369
			if (*cp == L'#')
370
				continue;
371
			rep--;			/* reuse this slot */
372
			break;
373
 
374
		case 'a':
375
			rep->command = ACOM;
376
			if(rep->ad2.type != A_NONE)
377
				quit(AD1MES, linebuf);
378
			if(*cp == L'\\')
379
				cp++;
380
			if(*cp++ != L'\n')
381
				quit(CGMES, L"a", linebuf);
382
			rep->text = p;
383
			p = stext(p, addend);
384
			break;
385
		case 'c':
386
			rep->command = CCOM;
387
			if(*cp == L'\\')
388
				cp++;
389
			if(*cp++ != L'\n')
390
				quit(CGMES, L"c", linebuf);
391
			rep->text = p;
392
			p = stext(p, addend);
393
			break;
394
		case 'i':
395
			rep->command = ICOM;
396
			if(rep->ad2.type != A_NONE)
397
				quit(AD1MES, linebuf);
398
			if(*cp == L'\\')
399
				cp++;
400
			if(*cp++ != L'\n')
401
				quit(CGMES, L"i", linebuf);
402
			rep->text = p;
403
			p = stext(p, addend);
404
			break;
405
 
406
		case 'g':
407
			rep->command = GCOM;
408
			break;
409
 
410
		case 'G':
411
			rep->command = CGCOM;
412
			break;
413
 
414
		case 'h':
415
			rep->command = HCOM;
416
			break;
417
 
418
		case 'H':
419
			rep->command = CHCOM;
420
			break;
421
 
422
		case 't':
423
			rep->command = TCOM;
424
			goto jtcommon;
425
 
426
		case 'b':
427
			rep->command = BCOM;
428
jtcommon:
429
			while(*cp == L' ')
430
				cp++;
431
			if(*cp == L'\0' || *cp == L';') {
432
				/* no label; jump to end */
433
				if(pt = ltab[0].chain) {
434
					while((pt1 = pt->lb1) != nil)
435
						pt = pt1;
436
					pt->lb1 = rep;
437
				} else
438
					ltab[0].chain = rep;
439
				break;
440
			}
441
 
442
			/* copy label into lab->uninm */
443
			tp = lab->uninm;
444
			while((*tp = *cp++) != L'\0' && *tp != L';')
445
				if(++tp >= &lab->uninm[8])
446
					quit(LTL, linebuf);
447
			cp--;
448
			*tp = L'\0';
449
 
450
			if (*lab->uninm == L'\0')
451
				/* shouldn't get here */
452
				quit(CGMES, L"b or t", linebuf);
453
			if((lpt = search(lab)) != nil) {
454
				if(lpt->address)
455
					rep->lb1 = lpt->address;
456
				else {
457
					for(pt = lpt->chain; pt != nil &&
458
					    (pt1 = pt->lb1) != nil; pt = pt1)
459
						;
460
					if (pt)
461
						pt->lb1 = rep;
462
				}
463
			} else {			/* add new label */
464
				lab->chain = rep;
465
				lab->address = 0;
466
				if(++lab >= labend)
467
					quit("Too many labels: %S", linebuf);
468
			}
469
			break;
470
 
471
		case 'n':
472
			rep->command = NCOM;
473
			break;
474
 
475
		case 'N':
476
			rep->command = CNCOM;
477
			break;
478
 
479
		case 'p':
480
			rep->command = PCOM;
481
			break;
482
 
483
		case 'P':
484
			rep->command = CPCOM;
485
			break;
486
 
487
		case 'r':
488
			rep->command = RCOM;
489
			if(rep->ad2.type != A_NONE)
490
				quit(AD1MES, linebuf);
491
			if(*cp++ != L' ')
492
				quit(CGMES, L"r", linebuf);
493
			rep->text = p;
494
			p = stext(p, addend);
495
			break;
496
 
497
		case 'd':
498
			rep->command = DCOM;
499
			break;
500
 
501
		case 'D':
502
			rep->command = CDCOM;
503
			rep->lb1 = pspace;
504
			break;
505
 
506
		case 'q':
507
			rep->command = QCOM;
508
			if(rep->ad2.type != A_NONE)
509
				quit(AD1MES, linebuf);
510
			break;
511
 
512
		case 'l':
513
			rep->command = LCOM;
514
			break;
515
 
516
		case 's':
517
			rep->command = SCOM;
518
			seof = *cp++;
519
			if ((rep->re1 = compile()) == 0) {
520
				if(!lastre)
521
					quit("First RE may not be null.");
522
				rep->re1 = lastre;
523
			}
524
			rep->rhs = p;
525
			if((p = compsub(p, addend)) == 0)
526
				quit(CGMES, L"s", linebuf);
527
			if(*cp == L'g') {
528
				cp++;
529
				rep->gfl++;
530
			} else if(gflag)
531
				rep->gfl++;
532
 
533
			if(*cp == L'p') {
534
				cp++;
535
				rep->pfl = 1;
536
			}
537
 
538
			if(*cp == L'P') {
539
				cp++;
540
				rep->pfl = 2;
541
			}
542
 
543
			if(*cp == L'w') {
544
				cp++;
545
				if(*cp++ !=  L' ')
546
					quit(CGMES, L"s", linebuf);
547
				text(fname[nfiles]);
548
				for(i = nfiles - 1; i >= 0; i--)
549
					if(cmp(fname[nfiles], fname[i]) == 0) {
550
						rep->fcode = fcode[i];
551
						goto done;
552
					}
553
				if(nfiles >= MAXFILES)
554
					quit("Too many files in w commands 1");
555
				rep->fcode = open_file(fname[nfiles]);
556
			}
557
			break;
558
 
559
		case 'w':
560
			rep->command = WCOM;
561
			if(*cp++ != L' ')
562
				quit(CGMES, L"w", linebuf);
563
			text(fname[nfiles]);
564
			for(i = nfiles - 1; i >= 0; i--)
565
				if(cmp(fname[nfiles], fname[i]) == 0) {
566
					rep->fcode = fcode[i];
567
					goto done;
568
				}
569
			if(nfiles >= MAXFILES){
570
				fprint(2, "sed: Too many files in w commands 2 \n");
571
				fprint(2, "nfiles = %d; MAXF = %d\n",
572
					nfiles, MAXFILES);
573
				errexit();
574
			}
575
			rep->fcode = open_file(fname[nfiles]);
576
			break;
577
 
578
		case 'x':
579
			rep->command = XCOM;
580
			break;
581
 
582
		case 'y':
583
			rep->command = YCOM;
584
			seof = *cp++;
585
			if (ycomp(rep) == 0)
586
				quit(CGMES, L"y", linebuf);
587
			break;
588
 
589
		}
590
done:
591
		if(++rep >= pend)
592
			quit("Too many commands, last: %S", linebuf);
593
		if(*cp++ != L'\0') {
594
			if(cp[-1] == L';')
595
				goto comploop;
596
			quit(CGMES, cp - 1, linebuf);
597
		}
598
	}
599
}
600
 
601
Biobuf *
602
open_file(char *name)
603
{
604
	int fd;
605
	Biobuf *bp;
606
 
607
	if ((bp = malloc(sizeof(Biobuf))) == 0)
608
		quit("Out of memory");
609
	if ((fd = open(name, OWRITE)) < 0 &&
610
	    (fd = create(name, OWRITE, 0666)) < 0)
611
		quit("Cannot create %s", name);
612
	Binit(bp, fd, OWRITE);
613
	Bseek(bp, 0, 2);
614
	fcode[nfiles++] = bp;
615
	return bp;
616
}
617
 
618
Rune *
619
compsub(Rune *rhs, Rune *end)
620
{
621
	Rune r;
622
 
623
	while ((r = *cp++) != '\0') {
624
		if(r == '\\') {
625
			if (rhs < end)
626
				*rhs++ = Runemax;
627
			else
628
				return 0;
629
			r = *cp++;
630
			if(r == 'n')
631
				r = '\n';
632
		} else {
633
			if(r == seof) {
634
				if (rhs < end)
635
					*rhs++ = '\0';
636
				else
637
					return 0;
638
				return rhs;
639
			}
640
		}
641
		if (rhs < end)
642
			*rhs++ = r;
643
		else
644
			return 0;
645
	}
646
	return 0;
647
}
648
 
649
Reprog *
650
compile(void)
651
{
652
	Rune c;
653
	char *ep;
654
	char expbuf[512];
655
 
656
	if((c = *cp++) == seof)		/* L'//' */
657
		return 0;
658
	ep = expbuf;
659
	do {
660
		if (c == L'\0' || c == L'\n')
661
			quit(TMMES, linebuf);
662
		if (c == L'\\') {
663
			if (ep >= expbuf+sizeof(expbuf))
664
				quit(TMMES, linebuf);
665
			ep += runetochar(ep, &c);
666
			if ((c = *cp++) == L'n')
667
				c = L'\n';
668
		}
669
		if (ep >= expbuf + sizeof(expbuf))
670
			quit(TMMES, linebuf);
671
		ep += runetochar(ep, &c);
672
	} while ((c = *cp++) != seof);
673
	*ep = 0;
674
	return lastre = regcomp(expbuf);
675
}
676
 
677
void
678
regerror(char *s)
679
{
680
	USED(s);
681
	quit(CGMES, L"r.e.-using", linebuf);
682
}
683
 
684
void
685
newfile(enum PTYPE type, char *name)
686
{
687
	if (type == P_ARG)
688
		prog.curr = name;
689
	else if ((prog.bp = Bopen(name, OREAD)) == 0)
690
		quit("Cannot open pattern-file: %s\n", name);
691
	prog.type = type;
692
}
693
 
694
int
695
rline(Rune *buf, Rune *end)
696
{
697
	long c;
698
	Rune r;
699
 
700
	while ((c = getrune()) >= 0) {
701
		r = c;
702
		if (r == '\\') {
703
			if (buf <= end)
704
				*buf++ = r;
705
			if ((c = getrune()) < 0)
706
				break;
707
			r = c;
708
		} else if (r == '\n') {
709
			*buf = '\0';
710
			return 1;
711
		}
712
		if (buf <= end)
713
			*buf++ = r;
714
	}
715
	*buf = '\0';
716
	return -1;
717
}
718
 
719
long
720
getrune(void)
721
{
722
	long c;
723
	Rune r;
724
	char *p;
725
 
726
	if (prog.type == P_ARG) {
727
		if ((p = prog.curr) != 0) {
728
			if (*p) {
729
				prog.curr += chartorune(&r, p);
730
				c = r;
731
			} else {
732
				c = '\n';	/* fake an end-of-line */
733
				prog.curr = 0;
734
			}
735
		} else
736
			c = -1;
737
	} else if ((c = Bgetrune(prog.bp)) < 0)
738
		Bterm(prog.bp);
739
	return c;
740
}
741
 
742
void
743
address(Addr *ap)
744
{
745
	int c;
746
	long lno;
747
 
748
	if((c = *cp++) == '$')
749
		ap->type = A_DOL;
750
	else if(c == '/') {
751
		seof = c;
752
		if (ap->rp = compile())
753
			ap->type = A_RE;
754
		else
755
			ap->type = A_LAST;
756
	}
757
	else if (c >= '0' && c <= '9') {
758
		lno = c - '0';
759
		while ((c = *cp) >= '0' && c <= '9')
760
			lno = lno*10 + *cp++ - '0';
761
		if(!lno)
762
			quit("line number 0 is illegal",0);
763
		ap->type = A_LINE;
764
		ap->line = lno;
765
	}
766
	else {
767
		cp--;
768
		ap->type = A_NONE;
769
	}
770
}
771
 
772
cmp(char *a, char *b)		/* compare characters */
773
{
774
	while(*a == *b++)
775
		if (*a == '\0')
776
			return 0;
777
		else
778
			a++;
779
	return 1;
780
}
781
rcmp(Rune *a, Rune *b)		/* compare runes */
782
{
783
	while(*a == *b++)
784
		if (*a == '\0')
785
			return 0;
786
		else
787
			a++;
788
	return 1;
789
}
790
 
791
char *
792
text(char *p)		/* extract character string */
793
{
794
	Rune r;
795
 
796
	while(*cp == ' ' || *cp == '\t')
797
		cp++;
798
	while (*cp) {
799
		if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
800
			break;
801
		if (r == '\n')
802
			while (*cp == ' ' || *cp == '\t')
803
				cp++;
804
		p += runetochar(p, &r);
805
	}
806
	*p++ = '\0';
807
	return p;
808
}
809
 
810
Rune *
811
stext(Rune *p, Rune *end)		/* extract rune string */
812
{
813
	while(*cp == L' ' || *cp == L'\t')
814
		cp++;
815
	while (*cp) {
816
		if (*cp == L'\\' && *++cp == L'\0')
817
			break;
818
		if (p >= end-1)
819
			quit(TMMES, linebuf);
820
		if ((*p++ = *cp++) == L'\n')
821
			while(*cp == L' ' || *cp == L'\t')
822
				cp++;
823
	}
824
	*p++ = 0;
825
	return p;
826
}
827
 
828
 
829
Label *
830
search(Label *ptr)
831
{
832
	Label	*rp;
833
 
834
	for (rp = ltab; rp < ptr; rp++)
835
		if(rcmp(rp->uninm, ptr->uninm) == 0)
836
			return(rp);
837
	return(0);
838
}
839
 
840
void
841
dechain(void)
842
{
843
	Label	*lptr;
844
	SedCom	*rptr, *trptr;
845
 
846
	for(lptr = ltab; lptr < lab; lptr++) {
847
		if(lptr->address == 0)
848
			quit("Undefined label: %S", lptr->uninm);
849
		if(lptr->chain) {
850
			rptr = lptr->chain;
851
			while((trptr = rptr->lb1) != nil) {
852
				rptr->lb1 = lptr->address;
853
				rptr = trptr;
854
			}
855
			rptr->lb1 = lptr->address;
856
		}
857
	}
858
}
859
 
860
int
861
ycomp(SedCom *r)
862
{
863
	int i;
864
	Rune *rp, *sp, *tsp;
865
	Rune c, highc;
866
 
867
	highc = 0;
868
	for(tsp = cp; *tsp != seof; tsp++) {
869
		if(*tsp == L'\\')
870
			tsp++;
871
		if(*tsp == L'\n' || *tsp == L'\0')
872
			return 0;
873
		if (*tsp > highc)
874
			highc = *tsp;
875
	}
876
	tsp++;
877
	if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
878
		quit("Out of memory");
879
	*rp++ = highc;				/* save upper bound */
880
	for (i = 0; i <= highc; i++)
881
		rp[i] = i;
882
	sp = cp;
883
	while((c = *sp++) != seof) {
884
		if(c == L'\\' && *sp == L'n') {
885
			sp++;
886
			c = L'\n';
887
		}
888
		if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
889
			rp[c] = L'\n';
890
			tsp++;
891
		}
892
		if(rp[c] == seof || rp[c] == L'\0') {
893
			free(r->re1);
894
			r->re1 = nil;
895
			return 0;
896
		}
897
	}
898
	if(*tsp != seof) {
899
		free(r->re1);
900
		r->re1 = nil;
901
		return 0;
902
	}
903
	cp = tsp+1;
904
	return 1;
905
}
906
 
907
void
908
execute(void)
909
{
910
	SedCom	*ipc;
911
 
912
	while (spend = gline(linebuf)){
913
		for(ipc = pspace; ipc->command; ) {
914
			if (!executable(ipc)) {
915
				ipc++;
916
				continue;
917
			}
918
			command(ipc);
919
 
920
			if(delflag)
921
				break;
922
			if(jflag) {
923
				jflag = 0;
924
				if((ipc = ipc->lb1) == 0)
925
					break;
926
			} else
927
				ipc++;
928
		}
929
		if(!nflag && !delflag)
930
			putline(&fout, linebuf, spend - linebuf);
931
		if(aptr > abuf)
932
			arout();
933
		delflag = 0;
934
	}
935
}
936
 
937
/* determine if a statement should be applied to an input line */
938
int
939
executable(SedCom *ipc)
940
{
941
	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
942
		if (ipc->active == 1)		/* Second line */
943
			ipc->active = 2;
944
		switch(ipc->ad2.type) {
945
		case A_NONE:		/* No second addr; use first */
946
			ipc->active = 0;
947
			break;
948
		case A_DOL:		/* Accept everything */
949
			return !ipc->negfl;
950
		case A_LINE:		/* Line at end of range? */
951
			if (lnum <= ipc->ad2.line) {
952
				if (ipc->ad2.line == lnum)
953
					ipc->active = 0;
954
				return !ipc->negfl;
955
			}
956
			ipc->active = 0;	/* out of range */
957
			return ipc->negfl;
958
		case A_RE:		/* Check for matching R.E. */
959
			if (match(ipc->ad2.rp, linebuf))
960
				ipc->active = 0;
961
			return !ipc->negfl;
962
		default:
963
			quit("Internal error");
964
		}
965
	}
966
	switch (ipc->ad1.type) {	/* Check first address */
967
	case A_NONE:			/* Everything matches */
968
		return !ipc->negfl;
969
	case A_DOL:			/* Only last line */
970
		if (dolflag)
971
			return !ipc->negfl;
972
		break;
973
	case A_LINE:			/* Check line number */
974
		if (ipc->ad1.line == lnum) {
975
			ipc->active = 1;	/* In range */
976
			return !ipc->negfl;
977
		}
978
		break;
979
	case A_RE:			/* Check R.E. */
980
		if (match(ipc->ad1.rp, linebuf)) {
981
			ipc->active = 1;	/* In range */
982
			return !ipc->negfl;
983
		}
984
		break;
985
	default:
986
		quit("Internal error");
987
	}
988
	return ipc->negfl;
989
}
990
 
991
int
992
match(Reprog *pattern, Rune *buf)
993
{
994
	if (!pattern)
995
		return 0;
996
	subexp[0].rsp = buf;
997
	subexp[0].ep = 0;
998
	if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
999
		loc1 = subexp[0].rsp;
1000
		loc2 = subexp[0].rep;
1001
		return 1;
1002
	}
1003
	loc1 = loc2 = 0;
1004
	return 0;
1005
}
1006
 
1007
int
1008
substitute(SedCom *ipc)
1009
{
1010
	int len;
1011
 
1012
	if(!match(ipc->re1, linebuf))
1013
		return 0;
1014
 
1015
	/*
1016
	 * we have at least one match.  some patterns, e.g. '$' or '^', can
1017
	 * produce 0-length matches, so during a global substitute we must
1018
	 * bump to the character after a 0-length match to keep from looping.
1019
	 */
1020
	sflag = 1;
1021
	if(ipc->gfl == 0)			/* single substitution */
1022
		dosub(ipc->rhs);
1023
	else
1024
		do{				/* global substitution */
1025
			len = loc2 - loc1;	/* length of match */
1026
			dosub(ipc->rhs);	/* dosub moves loc2 */
1027
			if(*loc2 == 0)		/* end of string */
1028
				break;
1029
			if(len == 0)		/* zero-length R.E. match */
1030
				loc2++;		/* bump over 0-length match */
1031
			if(*loc2 == 0)		/* end of string */
1032
				break;
1033
		} while(match(ipc->re1, loc2));
1034
	return 1;
1035
}
1036
 
1037
void
1038
dosub(Rune *rhsbuf)
1039
{
1040
	int c, n;
1041
	Rune *lp, *sp, *rp;
1042
 
1043
	lp = linebuf;
1044
	sp = genbuf;
1045
	rp = rhsbuf;
1046
	while (lp < loc1)
1047
		*sp++ = *lp++;
1048
	while(c = *rp++) {
1049
		if (c == '&') {
1050
			sp = place(sp, loc1, loc2);
1051
			continue;
1052
		}
1053
		if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1054
			n = c-'0';
1055
			if (subexp[n].rsp && subexp[n].rep) {
1056
				sp = place(sp, subexp[n].rsp, subexp[n].rep);
1057
				continue;
1058
			}
1059
			else {
1060
				fprint(2, "sed: Invalid back reference \\%d\n",n);
1061
				errexit();
1062
			}
1063
		}
1064
		*sp++ = c;
1065
		if (sp >= &genbuf[LBSIZE])
1066
			fprint(2, "sed: Output line too long.\n");
1067
	}
1068
	lp = loc2;
1069
	loc2 = sp - genbuf + linebuf;
1070
	while (*sp++ = *lp++)
1071
		if (sp >= &genbuf[LBSIZE])
1072
			fprint(2, "sed: Output line too long.\n");
1073
	lp = linebuf;
1074
	sp = genbuf;
1075
	while (*lp++ = *sp++)
1076
		;
1077
	spend = lp - 1;
1078
}
1079
 
1080
Rune *
1081
place(Rune *sp, Rune *l1, Rune *l2)
1082
{
1083
	while (l1 < l2) {
1084
		*sp++ = *l1++;
1085
		if (sp >= &genbuf[LBSIZE])
1086
			fprint(2, "sed: Output line too long.\n");
1087
	}
1088
	return sp;
1089
}
1090
 
1091
char *
1092
trans(int c)
1093
{
1094
	static char buf[] = "\\x0000";
1095
	static char hex[] = "0123456789abcdef";
1096
 
1097
	switch(c) {
1098
	case '\b':
1099
		return "\\b";
1100
	case '\n':
1101
		return "\\n";
1102
	case '\r':
1103
		return "\\r";
1104
	case '\t':
1105
		return "\\t";
1106
	case '\\':
1107
		return "\\\\";
1108
	}
1109
	buf[2] = hex[(c>>12)&0xF];
1110
	buf[3] = hex[(c>>8)&0xF];
1111
	buf[4] = hex[(c>>4)&0xF];
1112
	buf[5] = hex[c&0xF];
1113
	return buf;
1114
}
1115
 
1116
void
1117
command(SedCom *ipc)
1118
{
1119
	int i, c;
1120
	char *ucp;
1121
	Rune *execp, *p1, *p2, *rp;
1122
 
1123
	switch(ipc->command) {
1124
	case ACOM:
1125
		*aptr++ = ipc;
1126
		if(aptr >= abuf+MAXADDS)
1127
			quit("sed: Too many appends after line %ld\n",
1128
				(char *)lnum);
1129
		*aptr = 0;
1130
		break;
1131
	case CCOM:
1132
		delflag = 1;
1133
		if(ipc->active == 1) {
1134
			for(rp = ipc->text; *rp; rp++)
1135
				Bputrune(&fout, *rp);
1136
			Bputc(&fout, '\n');
1137
		}
1138
		break;
1139
	case DCOM:
1140
		delflag++;
1141
		break;
1142
	case CDCOM:
1143
		p1 = p2 = linebuf;
1144
		while(*p1 != '\n') {
1145
			if(*p1++ == 0) {
1146
				delflag++;
1147
				return;
1148
			}
1149
		}
1150
		p1++;
1151
		while(*p2++ = *p1++)
1152
			;
1153
		spend = p2 - 1;
1154
		jflag++;
1155
		break;
1156
	case EQCOM:
1157
		Bprint(&fout, "%ld\n", lnum);
1158
		break;
1159
	case GCOM:
1160
		p1 = linebuf;
1161
		p2 = holdsp;
1162
		while(*p1++ = *p2++)
1163
			;
1164
		spend = p1 - 1;
1165
		break;
1166
	case CGCOM:
1167
		*spend++ = '\n';
1168
		p1 = spend;
1169
		p2 = holdsp;
1170
		while(*p1++ = *p2++)
1171
			if(p1 >= lbend)
1172
				break;
1173
		spend = p1 - 1;
1174
		break;
1175
	case HCOM:
1176
		p1 = holdsp;
1177
		p2 = linebuf;
1178
		while(*p1++ = *p2++);
1179
		hspend = p1 - 1;
1180
		break;
1181
	case CHCOM:
1182
		*hspend++ = '\n';
1183
		p1 = hspend;
1184
		p2 = linebuf;
1185
		while(*p1++ = *p2++)
1186
			if(p1 >= hend)
1187
				break;
1188
		hspend = p1 - 1;
1189
		break;
1190
	case ICOM:
1191
		for(rp = ipc->text; *rp; rp++)
1192
			Bputrune(&fout, *rp);
1193
		Bputc(&fout, '\n');
1194
		break;
1195
	case BCOM:
1196
		jflag = 1;
1197
		break;
1198
	case LCOM:
1199
		c = 0;
1200
		for (i = 0, rp = linebuf; *rp; rp++) {
1201
			c = *rp;
1202
			if(c >= 0x20 && c < 0x7F && c != '\\') {
1203
				Bputc(&fout, c);
1204
				if(i++ > 71) {
1205
					Bprint(&fout, "\\\n");
1206
					i = 0;
1207
				}
1208
			} else {
1209
				for (ucp = trans(*rp); *ucp; ucp++){
1210
					c = *ucp;
1211
					Bputc(&fout, c);
1212
					if(i++ > 71) {
1213
						Bprint(&fout, "\\\n");
1214
						i = 0;
1215
					}
1216
				}
1217
			}
1218
		}
1219
		if(c == ' ')
1220
			Bprint(&fout, "\\n");
1221
		Bputc(&fout, '\n');
1222
		break;
1223
	case NCOM:
1224
		if(!nflag)
1225
			putline(&fout, linebuf, spend-linebuf);
1226
 
1227
		if(aptr > abuf)
1228
			arout();
1229
		if((execp = gline(linebuf)) == 0) {
1230
			delflag = 1;
1231
			break;
1232
		}
1233
		spend = execp;
1234
		break;
1235
	case CNCOM:
1236
		if(aptr > abuf)
1237
			arout();
1238
		*spend++ = '\n';
1239
		if((execp = gline(spend)) == 0) {
1240
			delflag = 1;
1241
			break;
1242
		}
1243
		spend = execp;
1244
		break;
1245
	case PCOM:
1246
		putline(&fout, linebuf, spend-linebuf);
1247
		break;
1248
	case CPCOM:
1249
cpcom:
1250
		for(rp = linebuf; *rp && *rp != '\n'; rp++)
1251
			Bputc(&fout, *rp);
1252
		Bputc(&fout, '\n');
1253
		break;
1254
	case QCOM:
1255
		if(!nflag)
1256
			putline(&fout, linebuf, spend-linebuf);
1257
		if(aptr > abuf)
1258
			arout();
1259
		exits(0);
1260
	case RCOM:
1261
		*aptr++ = ipc;
1262
		if(aptr >= &abuf[MAXADDS])
1263
			quit("sed: Too many reads after line %ld\n",
1264
				(char *)lnum);
1265
		*aptr = 0;
1266
		break;
1267
	case SCOM:
1268
		i = substitute(ipc);
1269
		if(i && ipc->pfl)
1270
			if(ipc->pfl == 1)
1271
				putline(&fout, linebuf, spend-linebuf);
1272
			else
1273
				goto cpcom;
1274
		if(i && ipc->fcode)
1275
			goto wcom;
1276
		break;
1277
 
1278
	case TCOM:
1279
		if(sflag) {
1280
			sflag = 0;
1281
			jflag = 1;
1282
		}
1283
		break;
1284
 
1285
	case WCOM:
1286
wcom:
1287
		putline(ipc->fcode,linebuf, spend - linebuf);
1288
		break;
1289
	case XCOM:
1290
		p1 = linebuf;
1291
		p2 = genbuf;
1292
		while(*p2++ = *p1++)
1293
			;
1294
		p1 = holdsp;
1295
		p2 = linebuf;
1296
		while(*p2++ = *p1++)
1297
			;
1298
		spend = p2 - 1;
1299
		p1 = genbuf;
1300
		p2 = holdsp;
1301
		while(*p2++ = *p1++)
1302
			;
1303
		hspend = p2 - 1;
1304
		break;
1305
	case YCOM:
1306
		p1 = linebuf;
1307
		p2 = ipc->text;
1308
		for (i = *p2++;	*p1; p1++)
1309
			if (*p1 <= i)
1310
				*p1 = p2[*p1];
1311
		break;
1312
	}
1313
}
1314
 
1315
void
1316
putline(Biobuf *bp, Rune *buf, int n)
1317
{
1318
	while (n--)
1319
		Bputrune(bp, *buf++);
1320
	Bputc(bp, '\n');
1321
}
1322
ecmp(Rune *a, Rune *b, int count)
1323
{
1324
	while(count--)
1325
		if(*a++ != *b++)
1326
			return 0;
1327
	return 1;
1328
}
1329
 
1330
void
1331
arout(void)
1332
{
1333
	int	c;
1334
	char	*s;
1335
	char	buf[128];
1336
	Rune	*p1;
1337
	Biobuf	*fi;
1338
 
1339
	for (aptr = abuf; *aptr; aptr++) {
1340
		if((*aptr)->command == ACOM) {
1341
			for(p1 = (*aptr)->text; *p1; p1++ )
1342
				Bputrune(&fout, *p1);
1343
			Bputc(&fout, '\n');
1344
		} else {
1345
			for(s = buf, p1 = (*aptr)->text; *p1; p1++)
1346
				s += runetochar(s, p1);
1347
			*s = '\0';
1348
			if((fi = Bopen(buf, OREAD)) == 0)
1349
				continue;
1350
			while((c = Bgetc(fi)) >= 0)
1351
				Bputc(&fout, c);
1352
			Bterm(fi);
1353
		}
1354
	}
1355
	aptr = abuf;
1356
	*aptr = 0;
1357
}
1358
 
1359
void
1360
errexit(void)
1361
{
1362
	exits("error");
1363
}
1364
 
1365
void
1366
quit(char *fmt, ...)
1367
{
1368
	char *p, *ep;
1369
	char msg[256];
1370
	va_list arg;
1371
 
1372
	ep = msg + sizeof msg;
1373
	p = seprint(msg, ep, "sed: ");
1374
	va_start(arg, fmt);
1375
	p = vseprint(p, ep, fmt, arg);
1376
	va_end(arg);
1377
	p = seprint(p, ep, "\n");
1378
	write(2, msg, p - msg);
1379
	errexit();
1380
}
1381
 
1382
Rune *
1383
gline(Rune *addr)
1384
{
1385
	long c;
1386
	Rune *p;
1387
	static long peekc = 0;
1388
 
1389
	if (f == 0 && opendata() < 0)
1390
		return 0;
1391
	sflag = 0;
1392
	lnum++;
1393
/*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
1394
	do {
1395
		p = addr;
1396
		for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1397
			if (c == '\n') {
1398
				if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
1399
					dolflag = 1;
1400
				*p = '\0';
1401
				return p;
1402
			}
1403
			if (c && p < lbend)
1404
				*p++ = c;
1405
		}
1406
		/* return partial final line, adding implicit newline */
1407
		if(p != addr) {
1408
			*p = '\0';
1409
			peekc = -1;
1410
			if (fhead == 0)
1411
				dolflag = 1;
1412
			return p;
1413
		}
1414
		peekc = 0;
1415
		Bterm(f);
1416
	} while (opendata() > 0);		/* Switch to next stream */
1417
	f = 0;
1418
	return 0;
1419
}
1420
 
1421
/*
1422
 * Data file input section - the intent is to transparently
1423
 *	catenate all data input streams.
1424
 */
1425
void
1426
enroll(char *filename)		/* Add a file to the input file cache */
1427
{
1428
	FileCache *fp;
1429
 
1430
	if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1431
		quit("Out of memory");
1432
	if (ftail == nil)
1433
		fhead = fp;
1434
	else
1435
		ftail->next = fp;
1436
	ftail = fp;
1437
	fp->next = nil;
1438
	fp->name = filename;		/* 0 => stdin */
1439
}
1440
 
1441
int
1442
opendata(void)
1443
{
1444
	if (fhead == nil)
1445
		return -1;
1446
	if (fhead->name) {
1447
		if ((f = Bopen(fhead->name, OREAD)) == nil)
1448
			quit("Can't open %s", fhead->name);
1449
	} else {
1450
		Binit(&stdin, 0, OREAD);
1451
		f = &stdin;
1452
	}
1453
	fhead = fhead->next;
1454
	return 1;
1455
}