Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
#include <ctype.h>
5
 
6
#define	Bungetrune	Bungetc		/* ok for now. */
7
 
8
/*
9
 * all these are 32 bit
10
 */
11
#define TBITSET		((32+NTERMS)/32)	/* BOTCH?? +31 */
12
#define BIT(a,i)	((a)[(i)>>5] & (1<<((i)&037)))
13
#define SETBIT(a,i)	((a)[(i)>>5] |= (1<<((i)&037)))
14
#define NWORDS(n)	(((n)+32)/32)
15
 
16
#define PARSER		"/sys/lib/yaccpar"
17
#define PARSERS		"/sys/lib/yaccpars"
18
#define TEMPNAME	"y.tmp.XXXXXX"
19
#define ACTNAME		"y.acts.XXXXXX"
20
#define OFILE		"tab.c"
21
#define FILEU		"output"
22
#define FILED		"tab.h"
23
#define FILEDEBUG	"debug"
24
 
25
enum
26
{
27
/*
28
 * the following are adjustable
29
 * according to memory size
30
 */
31
	ACTSIZE		= 40000,
32
	MEMSIZE		= 40000,
33
	NSTATES		= 2000,
34
	NTERMS		= 511,
35
	NPROD		= 1600,
36
	NNONTERM	= 600,
37
	TEMPSIZE	= 2000,
38
	CNAMSZ		= 10000,
39
	LSETSIZE	= 2400,
40
	WSETSIZE	= 350,
41
 
42
	NAMESIZE	= 50,
43
	NTYPES		= 63,
44
	ISIZE		= 400,
45
 
46
	PRIVATE		= 0xE000,	/* unicode private use */
47
 
48
	/* relationships which must hold:
49
		TBITSET ints must hold NTERMS+1 bits...
50
		WSETSIZE >= NNONTERM
51
		LSETSIZE >= NNONTERM
52
		TEMPSIZE >= NTERMS + NNONTERM + 1
53
		TEMPSIZE >= NSTATES
54
	*/
55
 
56
	NTBASE		= 010000,
57
	ERRCODE		= 8190,
58
	ACCEPTCODE	= 8191,
59
 
60
	NOASC		= 0,	/* no assoc. */
61
	LASC		= 1,	/* left assoc. */
62
	RASC		= 2,	/* right assoc. */
63
	BASC		= 3,	/* binary assoc. */
64
 
65
	/* flags for state generation */
66
 
67
	DONE		= 0,
68
	MUSTDO		= 1,
69
	MUSTLOOKAHEAD	= 2,
70
 
71
	/* flags for a rule having an action, and being reduced */
72
 
73
	ACTFLAG		= 04,
74
	REDFLAG		= 010,
75
 
76
	/* output parser flags */
77
	YYFLAG1		= -1000,
78
 
79
	/* parse tokens */
80
	IDENTIFIER	= PRIVATE,
81
	MARK,
82
	TERM,
83
	LEFT,
84
	RIGHT,
85
	BINARY,
86
	PREC,
87
	LCURLY,
88
	IDENTCOLON,
89
	NUMBER,
90
	START,
91
	TYPEDEF,
92
	TYPENAME,
93
	UNION,
94
 
95
	ENDFILE		= 0,
96
 
97
	EMPTY		= 1,
98
	WHOKNOWS	= 0,
99
	OK		= 1,
100
	NOMORE		= -1000,
101
};
102
 
103
	/* macros for getting associativity and precedence levels */
104
 
105
#define ASSOC(i)	((i)&03)
106
#define PLEVEL(i)	(((i)>>4)&077)
107
#define TYPE(i)		(((i)>>10)&077)
108
 
109
	/* macros for setting associativity and precedence levels */
110
 
111
#define SETASC(i,j)	i |= j
112
#define SETPLEV(i,j)	i |= (j<<4)
113
#define SETTYPE(i,j)	i |= (j<<10)
114
 
115
	/* looping macros */
116
 
117
#define TLOOP(i)	for(i=1; i<=ntokens; i++)
118
#define NTLOOP(i)	for(i=0; i<=nnonter; i++)
119
#define PLOOP(s,i)	for(i=s; i<nprod; i++)
120
#define SLOOP(i)	for(i=0; i<nstate; i++)
121
#define WSBUMP(x)	x++
122
#define WSLOOP(s,j)	for(j=s; j<cwp; j++)
123
#define ITMLOOP(i,p,q)	for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124
#define SETLOOP(i)	for(i=0; i<tbitset; i++)
125
 
126
	/* command to clobber tempfiles after use */
127
 
128
#define	ZAPFILE(x)	if(x) remove(x)
129
 
130
	/* I/O descriptors */
131
 
132
Biobuf*	faction;	/* file for saving actions */
133
Biobuf*	fdefine;	/* file for #defines */
134
Biobuf*	fdebug;		/* y.debug for strings for debugging */
135
Biobuf*	ftable;		/* y.tab.c file */
136
Biobuf*	ftemp;		/* tempfile to pass 2 */
137
Biobuf*	finput;		/* input file */
138
Biobuf*	foutput;	/* y.output file */
139
 
140
	/* communication variables between various I/O routines */
141
 
142
char*	infile;			/* input file name */
143
int	numbval;		/* value of an input number */
144
char	tokname[NAMESIZE+UTFmax+1]; /* input token name, slop for runes and 0 */
145
 
146
	/* structure declarations */
147
 
148
typedef
149
struct
150
{
151
	int	lset[TBITSET];
152
} Lkset;
153
 
154
typedef
155
struct
156
{
157
	int*	pitem;
158
	Lkset*	look;
159
} Item;
160
 
161
typedef
162
struct
163
{
164
	char*	name;
165
	int	value;
166
} Symb;
167
 
168
typedef
169
struct
170
{
171
	int*	pitem;
172
	int	flag;
173
	Lkset	ws;
174
} Wset;
175
 
176
	/* storage of names */
177
 
178
char	cnames[CNAMSZ];		/* place where token and nonterminal names are stored */
179
int	cnamsz = CNAMSZ;	/* size of cnames */
180
char*	cnamp = cnames;		/* place where next name is to be put in */
181
int	ndefout = 4;		/* number of defined symbols output */
182
char*	tempname;
183
char*	actname;
184
char	ttempname[] = TEMPNAME;
185
char	tactname[] = ACTNAME;
186
char*	parser = PARSER;
187
char*	yydebug;
188
 
189
	/* storage of types */
190
int	ntypes;			/* number of types defined */
191
char*	typeset[NTYPES];	/* pointers to type tags */
192
 
193
	/* token information */
194
 
195
int	ntokens = 0 ;		/* number of tokens */
196
Symb	tokset[NTERMS];
197
int	toklev[NTERMS];		/* vector with the precedence of the terminals */
198
 
199
	/* nonterminal information */
200
 
201
int	nnonter = -1;		/* the number of nonterminals */
202
Symb	nontrst[NNONTERM];
203
int	start;			/* start symbol */
204
 
205
	/* assigned token type values */
206
int	extval = 0;
207
 
208
char*	ytabc = OFILE;	/* name of y.tab.c */
209
 
210
	/* grammar rule information */
211
 
212
int	mem0[MEMSIZE] ;		/* production storage */
213
int*	mem = mem0;
214
int	nprod = 1;		/* number of productions */
215
int*	prdptr[NPROD];		/* pointers to descriptions of productions */
216
int	levprd[NPROD];		/* precedence levels for the productions */
217
int	rlines[NPROD];		/* line number for this rule */
218
 
219
	/* state information */
220
 
221
int	nstate = 0;		/* number of states */
222
Item*	pstate[NSTATES+2];	/* pointers to the descriptions of the states */
223
int	tystate[NSTATES];	/* contains type information about the states */
224
int	defact[NSTATES];	/* the default actions of states */
225
int	tstates[NTERMS];	/* states generated by terminal gotos */
226
int	ntstates[NNONTERM]; 	/* states generated by nonterminal gotos */
227
int	mstates[NSTATES];	/* chain of overflows of term/nonterm generation lists  */
228
int	lastred; 		/* the number of the last reduction of a state */
229
 
230
	/* lookahead set information */
231
 
232
Lkset	lkst[LSETSIZE];
233
int	nolook;			/* flag to turn off lookahead computations */
234
int	tbitset;		/* size of lookahead sets */
235
int	nlset = 0;		/* next lookahead set index */
236
int	nolook = 0;		/* flag to suppress lookahead computations */
237
Lkset	clset;  		/* temporary storage for lookahead computations */
238
 
239
	/* working set information */
240
 
241
Wset	wsets[WSETSIZE];
242
Wset*	cwp;
243
 
244
	/* storage for action table */
245
 
246
int	amem[ACTSIZE];		/* action table storage */
247
int*	memp = amem;		/* next free action table position */
248
int	indgo[NSTATES];		/* index to the stored goto table */
249
 
250
	/* temporary vector, indexable by states, terms, or ntokens */
251
 
252
int	temp1[TEMPSIZE];	/* temporary storage, indexed by terms + ntokens or states */
253
int	lineno = 1;		/* current input line number */
254
int	fatfl = 1;  		/* if on, error is fatal */
255
int	nerrors = 0;		/* number of errors */
256
 
257
	/* statistics collection variables */
258
 
259
int	zzgoent;
260
int	zzgobest;
261
int	zzacent;
262
int	zzexcp;
263
int	zzclose;
264
int	zzrrconf;
265
int	zzsrconf;
266
 
267
int*	ggreed = lkst[0].lset;
268
int*	pgo = wsets[0].ws.lset;
269
int*	yypgo = &nontrst[0].value;
270
 
271
int	maxspr = 0;  		/* maximum spread of any entry */
272
int	maxoff = 0;  		/* maximum offset into a array */
273
int*	pmem = mem0;
274
int*	maxa;
275
int	nxdb = 0;
276
int	adb = 0;
277
 
278
 
279
	/* storage for information about the nonterminals */
280
 
281
int**	pres[NNONTERM+2];  	/* vector of pointers to productions yielding each nonterminal */
282
Lkset*	pfirst[NNONTERM+2];	/* vector of pointers to first sets for each nonterminal */
283
int	pempty[NNONTERM+1];	/* vector of nonterminals nontrivially deriving e */
284
 
285
	/* random stuff picked out from between functions */
286
 
287
int	indebug = 0;
288
Wset*	zzcwp = wsets;
289
int	zzgoent = 0;
290
int	zzgobest = 0;
291
int	zzacent = 0;
292
int	zzexcp = 0;
293
int	zzclose = 0;
294
int	zzsrconf = 0;
295
int*	zzmemsz = mem0;
296
int	zzrrconf = 0;
297
int	pidebug = 0;		/* debugging flag for putitem */
298
int	gsdebug = 0;
299
int	cldebug = 0;		/* debugging flag for closure */
300
int	pkdebug = 0;
301
int	g2debug = 0;
302
 
303
struct
304
{
305
	char*	name;
306
	long	value;
307
} resrv[] =
308
{
309
	"binary",	BINARY,
310
	"left",		LEFT,
311
	"nonassoc",	BINARY,
312
	"prec",		PREC,
313
	"right",	RIGHT,
314
	"start",	START,
315
	"term",		TERM,
316
	"token",	TERM,
317
	"type",		TYPEDEF,
318
	"union",	UNION,
319
	0,
320
};
321
 
322
	/* define functions */
323
 
324
void	main(int, char**);
325
void	others(void);
326
char*	chcopy(char*, char*);
327
char*	writem(int*);
328
char*	symnam(int);
329
void	summary(void);
330
void	error(char*, ...);
331
void	aryfil(int*, int, int);
332
int	setunion(int*, int*);
333
void	prlook(Lkset*);
334
void	cpres(void);
335
void	cpfir(void);
336
int	state(int);
337
void	putitem(int*, Lkset*);
338
void	cempty(void);
339
void	stagen(void);
340
void	closure(int);
341
Lkset*	flset(Lkset*);
342
void	cleantmp(void);
343
void	intr(void);
344
void	setup(int, char**);
345
void	finact(void);
346
int	defin(int, char*);
347
void	defout(int);
348
char*	cstash(char*);
349
long	gettok(void);
350
int	fdtype(int);
351
int	chfind(int, char*);
352
void	cpyunion(void);
353
void	cpycode(void);
354
int	skipcom(void);
355
void	cpyact(int);
356
void	openup(char*, int, int, int, char*);
357
void	output(void);
358
int	apack(int*, int);
359
void	go2out(void);
360
void	go2gen(int);
361
void	precftn(int, int, int);
362
void	wract(int);
363
void	wrstate(int);
364
void	warray(char*, int*, int);
365
void	hideprod(void);
366
void	callopt(void);
367
void	gin(int);
368
void	stin(int);
369
int	nxti(void);
370
void	osummary(void);
371
void	aoutput(void);
372
void	arout(char*, int*, int);
373
int	gtnm(void);
374
 
375
void
376
main(int argc, char *argv[])
377
{
378
 
379
	setup(argc, argv);	/* initialize and read productions */
380
	tbitset = NWORDS(ntokens);
381
	cpres();		/* make table of which productions yield a given nonterminal */
382
	cempty();		/* make a table of which nonterminals can match the empty string */
383
	cpfir();		/* make a table of firsts of nonterminals */
384
	stagen();		/* generate the states */
385
	output();		/* write the states and the tables */
386
	go2out();
387
	hideprod();
388
	summary();
389
	callopt();
390
	others();
391
	exits(0);
392
}
393
 
394
/*
395
 * put out other arrays, copy the parsers
396
 */
397
void
398
others(void)
399
{
400
	int c, i, j;
401
 
402
	finput = Bopen(parser, OREAD);
403
	if(finput == 0)
404
		error("cannot find parser %s", parser);
405
	warray("yyr1", levprd, nprod);
406
	aryfil(temp1, nprod, 0);
407
	PLOOP(1, i)
408
		temp1[i] = prdptr[i+1]-prdptr[i]-2;
409
	warray("yyr2", temp1, nprod);
410
 
411
	aryfil(temp1, nstate, -1000);
412
	TLOOP(i)
413
		for(j=tstates[i]; j!=0; j=mstates[j])
414
			temp1[j] = i;
415
	NTLOOP(i)
416
		for(j=ntstates[i]; j!=0; j=mstates[j])
417
			temp1[j] = -i;
418
	warray("yychk", temp1, nstate);
419
	warray("yydef", defact, nstate);
420
 
421
	/* put out token translation tables */
422
	/* table 1 has 0-256 */
423
	aryfil(temp1, 256, 0);
424
	c = 0;
425
	TLOOP(i) {
426
		j = tokset[i].value;
427
		if(j >= 0 && j < 256) {
428
			if(temp1[j]) {
429
				print("yacc bug -- cant have 2 different Ts with same value\n");
430
				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431
				nerrors++;
432
			}
433
			temp1[j] = i;
434
			if(j > c)
435
				c = j;
436
		}
437
	}
438
	warray("yytok1", temp1, c+1);
439
 
440
	/* table 2 has PRIVATE-PRIVATE+256 */
441
	aryfil(temp1, 256, 0);
442
	c = 0;
443
	TLOOP(i) {
444
		j = tokset[i].value - PRIVATE;
445
		if(j >= 0 && j < 256) {
446
			if(temp1[j]) {
447
				print("yacc bug -- cant have 2 different Ts with same value\n");
448
				print("	%s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449
				nerrors++;
450
			}
451
			temp1[j] = i;
452
			if(j > c)
453
				c = j;
454
		}
455
	}
456
	warray("yytok2", temp1, c+1);
457
 
458
	/* table 3 has everything else */
459
	Bprint(ftable, "long	yytok3[] =\n{\n");
460
	c = 0;
461
	TLOOP(i) {
462
		j = tokset[i].value;
463
		if(j >= 0 && j < 256)
464
			continue;
465
		if(j >= PRIVATE && j < 256+PRIVATE)
466
			continue;
467
 
468
		Bprint(ftable, "%4d,%4d,", j, i);
469
		c++;
470
		if(c%5 == 0)
471
			Bprint(ftable, "\n");
472
	}
473
	Bprint(ftable, "%4d\n};\n", 0);
474
 
475
	/* copy parser text */
476
	while((c=Bgetrune(finput)) != Beof) {
477
		if(c == '$') {
478
			if((c = Bgetrune(finput)) != 'A')
479
				Bputrune(ftable, '$');
480
			else { /* copy actions */
481
				faction = Bopen(actname, OREAD);
482
				if(faction == 0)
483
					error("cannot reopen action tempfile");
484
				while((c=Bgetrune(faction)) != Beof)
485
					Bputrune(ftable, c);
486
				Bterm(faction);
487
				ZAPFILE(actname);
488
				c = Bgetrune(finput);
489
			}
490
		}
491
		Bputrune(ftable, c);
492
	}
493
	Bterm(ftable);
494
}
495
 
496
/*
497
 * copies string q into p, returning next free char ptr
498
 */
499
char*
500
chcopy(char* p, char* q)
501
{
502
	int c;
503
 
504
	while(c = *q) {
505
		if(c == '"')
506
			*p++ = '\\';
507
		*p++ = c;
508
		q++;
509
	}
510
	*p = 0;
511
	return p;
512
}
513
 
514
/*
515
 * creates output string for item pointed to by pp
516
 */
517
char*
518
writem(int *pp)
519
{
520
	int i,*p;
521
	static char sarr[ISIZE];
522
	char* q;
523
 
524
	for(p=pp; *p>0; p++)
525
		;
526
	p = prdptr[-*p];
527
	q = chcopy(sarr, nontrst[*p-NTBASE].name);
528
	q = chcopy(q, ": ");
529
	for(;;) {
530
		*q = ' ';
531
		p++;
532
		if(p == pp)
533
			*q = '.';
534
		q++;
535
		*q = '\0';
536
		i = *p;
537
		if(i <= 0)
538
			break;
539
		q = chcopy(q, symnam(i));
540
		if(q > &sarr[ISIZE-30])
541
			error("item too big");
542
	}
543
 
544
	/* an item calling for a reduction */
545
	i = *pp;
546
	if(i < 0 ) {
547
		q = chcopy(q, "    (");
548
		sprint(q, "%d)", -i);
549
	}
550
	return sarr;
551
}
552
 
553
/*
554
 * return a pointer to the name of symbol i
555
 */
556
char*
557
symnam(int i)
558
{
559
	char* cp;
560
 
561
	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562
	if(*cp == ' ')
563
		cp++;
564
	return cp;
565
}
566
 
567
/*
568
 * output the summary on y.output
569
 */
570
void
571
summary(void)
572
{
573
 
574
	if(foutput != 0) {
575
		Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576
			ntokens, NTERMS, nnonter, NNONTERM);
577
		Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578
			nprod, NPROD, nstate, NSTATES);
579
		Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580
			zzsrconf, zzrrconf);
581
		Bprint(foutput, "%d/%d working sets used\n",
582
			(int)(zzcwp-wsets), WSETSIZE);
583
		Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584
			(int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585
		Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586
		Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587
		Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588
		Bprint(foutput, "%d goto entries\n", zzgoent);
589
		Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
590
	}
591
	if(zzsrconf != 0 || zzrrconf != 0) {
592
		print("\nconflicts: ");
593
		if(zzsrconf)
594
			print("%d shift/reduce", zzsrconf);
595
		if(zzsrconf && zzrrconf)
596
			print(", ");
597
		if(zzrrconf)
598
			print("%d reduce/reduce", zzrrconf);
599
		print("\n");
600
	}
601
	if(ftemp != 0) {
602
		Bterm(ftemp);
603
		ftemp = 0;
604
	}
605
	if(fdefine != 0) {
606
		Bterm(fdefine);
607
		fdefine = 0;
608
	}
609
}
610
 
611
/*
612
 * write out error comment -- NEEDS WORK
613
 */
614
void
615
error(char *s, ...)
616
{
617
 
618
	nerrors++;
619
	fprint(2, "\n fatal error:");
620
	fprint(2, s, (&s)[1]);
621
	fprint(2, ", %s:%d\n", infile, lineno);
622
	if(!fatfl)
623
		return;
624
	summary();
625
	cleantmp();
626
	exits("error");
627
}
628
 
629
/*
630
 * set elements 0 through n-1 to c
631
 */
632
void
633
aryfil(int *v, int n, int c)
634
{
635
	int i;
636
 
637
	for(i=0; i<n; i++)
638
		v[i] = c;
639
}
640
 
641
/*
642
 * set a to the union of a and b
643
 * return 1 if b is not a subset of a, 0 otherwise
644
 */
645
int
646
setunion(int *a, int *b)
647
{
648
	int i, x, sub;
649
 
650
	sub = 0;
651
	SETLOOP(i) {
652
		x = *a;
653
		*a |= *b;
654
		if(*a != x)
655
			sub = 1;
656
		a++;
657
		b++;
658
	}
659
	return sub;
660
}
661
 
662
void
663
prlook(Lkset* p)
664
{
665
	int j, *pp;
666
 
667
	pp = p->lset;
668
	if(pp == 0)
669
		Bprint(foutput, "\tNULL");
670
	else {
671
		Bprint(foutput, " { ");
672
		TLOOP(j)
673
			if(BIT(pp,j))
674
				Bprint(foutput, "%s ", symnam(j));
675
		Bprint(foutput, "}");
676
	}
677
}
678
 
679
/*
680
 * compute an array with the beginnings of  productions yielding given nonterminals
681
 * The array pres points to these lists
682
 * the array pyield has the lists: the total size is only NPROD+1
683
 */
684
void
685
cpres(void)
686
{
687
	int c, j, i, **pmem;
688
	static int *pyield[NPROD];
689
 
690
	pmem = pyield;
691
	NTLOOP(i) {
692
		c = i+NTBASE;
693
		pres[i] = pmem;
694
		fatfl = 0;  	/* make undefined  symbols  nonfatal */
695
		PLOOP(0, j)
696
			if(*prdptr[j] == c)
697
				*pmem++ =  prdptr[j]+1;
698
		if(pres[i] == pmem)
699
			error("nonterminal %s not defined!", nontrst[i].name);
700
	}
701
	pres[i] = pmem;
702
	fatfl = 1;
703
	if(nerrors) {
704
		summary();
705
		cleantmp();
706
		exits("error");
707
	}
708
	if(pmem != &pyield[nprod])
709
		error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
710
}
711
 
712
/*
713
 * compute an array with the first of nonterminals
714
 */
715
void
716
cpfir(void)
717
{
718
	int *p, **s, i, **t, ch, changes;
719
 
720
	zzcwp = &wsets[nnonter];
721
	NTLOOP(i) {
722
		aryfil(wsets[i].ws.lset, tbitset, 0);
723
		t = pres[i+1];
724
		/* initially fill the sets */
725
		for(s=pres[i]; s<t; ++s)
726
			for(p = *s; (ch = *p) > 0; ++p) {
727
				if(ch < NTBASE) {
728
					SETBIT(wsets[i].ws.lset, ch);
729
					break;
730
				}
731
				if(!pempty[ch-NTBASE])
732
					break;
733
			}
734
	}
735
 
736
	/* now, reflect transitivity */
737
	changes = 1;
738
	while(changes) {
739
		changes = 0;
740
		NTLOOP(i) {
741
			t = pres[i+1];
742
			for(s = pres[i]; s < t; ++s)
743
				for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744
					changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745
					if(!pempty[ch])
746
						break;
747
				}
748
		}
749
	}
750
 
751
	NTLOOP(i)
752
		pfirst[i] = flset(&wsets[i].ws);
753
	if(!indebug)
754
		return;
755
	if(foutput != 0)
756
		NTLOOP(i) {
757
			Bprint(foutput, "\n%s: ", nontrst[i].name);
758
			prlook(pfirst[i]);
759
			Bprint(foutput, " %d\n", pempty[i]);
760
		}
761
}
762
 
763
/*
764
 * sorts last state,and sees if it equals earlier ones. returns state number
765
 */
766
int
767
state(int c)
768
{
769
	Item *p1, *p2, *k, *l, *q1, *q2;
770
	int size1, size2, i;
771
 
772
	p1 = pstate[nstate];
773
	p2 = pstate[nstate+1];
774
	if(p1 == p2)
775
		return 0;	/* null state */
776
	/* sort the items */
777
	for(k = p2-1; k > p1; k--)	/* make k the biggest */
778
		for(l = k-1; l >= p1; --l)
779
			if(l->pitem > k->pitem) {
780
				int *s;
781
				Lkset *ss;
782
 
783
				s = k->pitem;
784
				k->pitem = l->pitem;
785
				l->pitem = s;
786
				ss = k->look;
787
				k->look = l->look;
788
				l->look = ss;
789
			}
790
	size1 = p2 - p1;	/* size of state */
791
 
792
	for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793
		/* get ith state */
794
		q1 = pstate[i];
795
		q2 = pstate[i+1];
796
		size2 = q2 - q1;
797
		if(size1 != size2)
798
			continue;
799
		k = p1;
800
		for(l = q1; l < q2; l++) {
801
			if(l->pitem != k->pitem)
802
				break;
803
			k++;
804
		}
805
		if(l != q2)
806
			continue;
807
		/* found it */
808
		pstate[nstate+1] = pstate[nstate];	/* delete last state */
809
		/* fix up lookaheads */
810
		if(nolook)
811
			return i;
812
		for(l = q1, k = p1; l < q2; ++l, ++k ) {
813
			int s;
814
 
815
			SETLOOP(s)
816
				clset.lset[s] = l->look->lset[s];
817
			if(setunion(clset.lset, k->look->lset)) {
818
				tystate[i] = MUSTDO;
819
				/* register the new set */
820
				l->look = flset( &clset );
821
			}
822
		}
823
		return i;
824
	}
825
	/* state is new */
826
	if(nolook)
827
		error("yacc state/nolook error");
828
	pstate[nstate+2] = p2;
829
	if(nstate+1 >= NSTATES)
830
		error("too many states");
831
	if(c >= NTBASE) {
832
		mstates[nstate] = ntstates[c-NTBASE];
833
		ntstates[c-NTBASE] = nstate;
834
	} else {
835
		mstates[nstate] = tstates[c];
836
		tstates[c] = nstate;
837
	}
838
	tystate[nstate] = MUSTDO;
839
	return nstate++;
840
}
841
 
842
void
843
putitem(int *ptr, Lkset *lptr)
844
{
845
	Item *j;
846
 
847
	if(pidebug && foutput != 0)
848
		Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849
	j = pstate[nstate+1];
850
	j->pitem = ptr;
851
	if(!nolook)
852
		j->look = flset(lptr);
853
	pstate[nstate+1] = ++j;
854
	if((int*)j > zzmemsz) {
855
		zzmemsz = (int*)j;
856
		if(zzmemsz >=  &mem0[MEMSIZE])
857
			error("out of state space");
858
	}
859
}
860
 
861
/*
862
 * mark nonterminals which derive the empty string
863
 * also, look for nonterminals which don't derive any token strings
864
 */
865
void
866
cempty(void)
867
{
868
 
869
	int i, *p;
870
 
871
	/* first, use the array pempty to detect productions that can never be reduced */
872
	/* set pempty to WHONOWS */
873
	aryfil(pempty, nnonter+1, WHOKNOWS);
874
 
875
	/* now, look at productions, marking nonterminals which derive something */
876
more:
877
	PLOOP(0, i) {
878
		if(pempty[*prdptr[i] - NTBASE])
879
			continue;
880
		for(p = prdptr[i]+1; *p >= 0; ++p)
881
			if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882
				break;
883
		/* production can be derived */
884
		if(*p < 0) {
885
			pempty[*prdptr[i]-NTBASE] = OK;
886
			goto more;
887
		}
888
	}
889
 
890
	/* now, look at the nonterminals, to see if they are all OK */
891
	NTLOOP(i) {
892
		/* the added production rises or falls as the start symbol ... */
893
		if(i == 0)
894
			continue;
895
		if(pempty[i] != OK) {
896
			fatfl = 0;
897
			error("nonterminal %s never derives any token string", nontrst[i].name);
898
		}
899
	}
900
 
901
	if(nerrors) {
902
		summary();
903
		cleantmp();
904
		exits("error");
905
	}
906
 
907
	/* now, compute the pempty array, to see which nonterminals derive the empty string */
908
	/* set pempty to WHOKNOWS */
909
	aryfil( pempty, nnonter+1, WHOKNOWS);
910
 
911
	/* loop as long as we keep finding empty nonterminals */
912
 
913
again:
914
	PLOOP(1, i) {
915
		/* not known to be empty */
916
		if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917
			for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
918
				;
919
			/* we have a nontrivially empty nonterminal */
920
			if(*p < 0) {
921
				pempty[*prdptr[i]-NTBASE] = EMPTY;
922
				/* got one ... try for another */
923
				goto again;
924
			}
925
		}
926
	}
927
}
928
 
929
/*
930
 * generate the states
931
 */
932
void
933
stagen(void)
934
{
935
 
936
	int c, i, j, more;
937
	Wset *p, *q;
938
 
939
	/* initialize */
940
	nstate = 0;
941
 
942
	/* THIS IS FUNNY from the standpoint of portability
943
	 * it represents the magic moment when the mem0 array, which has
944
	 * been holding the productions, starts to hold item pointers, of a
945
	 * different type...
946
	 * someday, alloc should be used to allocate all this stuff... for now, we
947
	 * accept that if pointers don't fit in integers, there is a problem...
948
	 */
949
 
950
	pstate[0] = pstate[1] = (Item*)mem;
951
	aryfil(clset.lset, tbitset, 0);
952
	putitem(prdptr[0]+1, &clset);
953
	tystate[0] = MUSTDO;
954
	nstate = 1;
955
	pstate[2] = pstate[1];
956
 
957
	aryfil(amem, ACTSIZE, 0);
958
 
959
	/* now, the main state generation loop */
960
	for(more=1; more;) {
961
		more = 0;
962
		SLOOP(i) {
963
			if(tystate[i] != MUSTDO)
964
				continue;
965
			tystate[i] = DONE;
966
			aryfil(temp1, nnonter+1, 0);
967
			/* take state i, close it, and do gotos */
968
			closure(i);
969
			/* generate goto's */
970
			WSLOOP(wsets, p) {
971
				if(p->flag)
972
					continue;
973
				p->flag = 1;
974
				c = *(p->pitem);
975
				if(c <= 1) {
976
					if(pstate[i+1]-pstate[i] <= p-wsets)
977
						tystate[i] = MUSTLOOKAHEAD;
978
					continue;
979
				}
980
				/* do a goto on c */
981
				WSLOOP(p, q)
982
					/* this item contributes to the goto */
983
					if(c == *(q->pitem)) {
984
						putitem(q->pitem+1, &q->ws);
985
						q->flag = 1;
986
					}
987
				if(c < NTBASE)
988
					state(c);	/* register new state */
989
				else
990
					temp1[c-NTBASE] = state(c);
991
			}
992
			if(gsdebug && foutput != 0) {
993
				Bprint(foutput, "%d: ", i);
994
				NTLOOP(j)
995
					if(temp1[j])
996
						Bprint(foutput, "%s %d, ",
997
						nontrst[j].name, temp1[j]);
998
				Bprint(foutput, "\n");
999
			}
1000
			indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001
			/* do some more */
1002
			more = 1;
1003
		}
1004
	}
1005
}
1006
 
1007
/*
1008
 * generate the closure of state i
1009
 */
1010
void
1011
closure(int i)
1012
{
1013
 
1014
	Wset *u, *v;
1015
	Item *p, *q;
1016
	int c, ch, work, k, *pi, **s, **t;
1017
 
1018
	zzclose++;
1019
 
1020
	/* first, copy kernel of state i to wsets */
1021
	cwp = wsets;
1022
	ITMLOOP(i, p, q) {
1023
		cwp->pitem = p->pitem;
1024
		cwp->flag = 1;			/* this item must get closed */
1025
		SETLOOP(k)
1026
			cwp->ws.lset[k] = p->look->lset[k];
1027
		WSBUMP(cwp);
1028
	}
1029
 
1030
	/* now, go through the loop, closing each item */
1031
	work = 1;
1032
	while(work) {
1033
		work = 0;
1034
		WSLOOP(wsets, u) {
1035
			if(u->flag == 0)
1036
				continue;
1037
			/* dot is before c */
1038
			c = *(u->pitem);
1039
			if(c < NTBASE) {
1040
				u->flag = 0;
1041
				/* only interesting case is where . is before nonterminal */
1042
				continue;
1043
			}
1044
 
1045
			/* compute the lookahead */
1046
			aryfil(clset.lset, tbitset, 0);
1047
 
1048
			/* find items involving c */
1049
			WSLOOP(u, v)
1050
				if(v->flag == 1 && *(pi=v->pitem) == c) {
1051
					v->flag = 0;
1052
					if(nolook)
1053
						continue;
1054
					while((ch = *++pi) > 0) {
1055
						/* terminal symbol */
1056
						if(ch < NTBASE) {
1057
							SETBIT(clset.lset, ch);
1058
							break;
1059
						}
1060
						/* nonterminal symbol */
1061
						setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062
						if(!pempty[ch-NTBASE])
1063
							break;
1064
					}
1065
					if(ch <= 0)
1066
						setunion(clset.lset, v->ws.lset);
1067
				}
1068
 
1069
			/*
1070
			 * now loop over productions derived from c
1071
			 * c is now nonterminal number
1072
			 */
1073
			c -= NTBASE;
1074
			t = pres[c+1];
1075
			for(s = pres[c]; s < t; ++s) {
1076
				/*
1077
				 * put these items into the closure
1078
				 * is the item there
1079
				 */
1080
				WSLOOP(wsets, v)
1081
					/* yes, it is there */
1082
					if(v->pitem == *s) {
1083
						if(nolook)
1084
							goto nexts;
1085
						if(setunion(v->ws.lset, clset.lset))
1086
							v->flag = work = 1;
1087
						goto nexts;
1088
					}
1089
 
1090
				/*  not there; make a new entry */
1091
				if(cwp-wsets+1 >= WSETSIZE)
1092
					error( "working set overflow");
1093
				cwp->pitem = *s;
1094
				cwp->flag = 1;
1095
				if(!nolook) {
1096
					work = 1;
1097
					SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1098
				}
1099
				WSBUMP(cwp);
1100
 
1101
			nexts:;
1102
			}
1103
		}
1104
	}
1105
 
1106
	/* have computed closure; flags are reset; return */
1107
	if(cwp > zzcwp)
1108
		zzcwp = cwp;
1109
	if(cldebug && foutput != 0) {
1110
		Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111
		WSLOOP(wsets, u) {
1112
			if(u->flag)
1113
				Bprint(foutput, "flag set!\n");
1114
			u->flag = 0;
1115
			Bprint(foutput, "\t%s", writem(u->pitem));
1116
			prlook(&u->ws);
1117
			Bprint(foutput, "\n");
1118
		}
1119
	}
1120
}
1121
 
1122
/*
1123
 * decide if the lookahead set pointed to by p is known
1124
 * return pointer to a perminent location for the set
1125
 */
1126
Lkset*
1127
flset(Lkset *p)
1128
{
1129
	Lkset *q;
1130
	int *u, *v, *w, j;
1131
 
1132
	for(q = &lkst[nlset]; q-- > lkst;) {
1133
		u = p->lset;
1134
		v = q->lset;
1135
		w = &v[tbitset];
1136
		while(v < w)
1137
			if(*u++ != *v++)
1138
				goto more;
1139
		/* we have matched */
1140
		return q;
1141
	more:;
1142
	}
1143
	/* add a new one */
1144
	q = &lkst[nlset++];
1145
	if(nlset >= LSETSIZE)
1146
		error("too many lookahead sets");
1147
	SETLOOP(j)
1148
		q->lset[j] = p->lset[j];
1149
	return q;
1150
}
1151
 
1152
void
1153
cleantmp(void)
1154
{
1155
	ZAPFILE(actname);
1156
	ZAPFILE(tempname);
1157
}
1158
 
1159
void
1160
intr(void)
1161
{
1162
	cleantmp();
1163
	exits("interrupted");
1164
}
1165
 
1166
void
1167
usage(void)
1168
{
1169
	fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
1170
	exits("usage");
1171
}
1172
 
1173
void
1174
setup(int argc, char *argv[])
1175
{
1176
	long c, t;
1177
	int i, j, lev, ty, ytab, *p;
1178
	int vflag, dflag, stem;
1179
	char actnm[8], *stemc, *s, dirbuf[128];
1180
 
1181
	ytab = 0;
1182
	vflag = 0;
1183
	dflag = 0;
1184
	stem = 0;
1185
	stemc = "y";
1186
	foutput = 0;
1187
	fdefine = 0;
1188
	fdebug = 0;
1189
	ARGBEGIN{
1190
	case 'v':
1191
	case 'V':
1192
		vflag++;
1193
		break;
1194
	case 'D':
1195
		yydebug = EARGF(usage());
1196
		break;
1197
	case 'd':
1198
		dflag++;
1199
		break;
1200
	case 'o':
1201
		ytab++;
1202
		ytabc = EARGF(usage());
1203
		break;
1204
	case 's':
1205
		stem++;
1206
		stemc = ARGF();
1207
		break;
1208
	case 'S':
1209
		parser = PARSERS;
1210
		break;
1211
	default:
1212
		error("illegal option: %c", ARGC());
1213
	}ARGEND
1214
	openup(stemc, dflag, vflag, ytab, ytabc);
1215
 
1216
	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
1217
	faction = Bopen(actname = mktemp(tactname), OWRITE);
1218
	if(ftemp == 0 || faction == 0)
1219
		error("cannot open temp file");
1220
	if(argc < 1)
1221
		error("no input file");
1222
	infile = argv[0];
1223
	if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1224
		i = strlen(infile)+1+strlen(dirbuf)+1+10;
1225
		s = malloc(i);
1226
		if(s != nil){
1227
			snprint(s, i, "%s/%s", dirbuf, infile);
1228
			cleanname(s);
1229
			infile = s;
1230
		}
1231
	}
1232
	finput = Bopen(infile, OREAD);
1233
	if(finput == 0)
1234
		error("cannot open '%s'", argv[0]);
1235
	cnamp = cnames;
1236
 
1237
	defin(0, "$end");
1238
	extval = PRIVATE;	/* tokens start in unicode 'private use' */
1239
	defin(0, "error");
1240
	defin(1, "$accept");
1241
	defin(0, "$unk");
1242
	mem = mem0;
1243
	i = 0;
1244
 
1245
	for(t = gettok(); t != MARK && t != ENDFILE;)
1246
	switch(t) {
1247
	case ';':
1248
		t = gettok();
1249
		break;
1250
 
1251
	case START:
1252
		if(gettok() != IDENTIFIER)
1253
			error("bad %%start construction");
1254
		start = chfind(1, tokname);
1255
		t = gettok();
1256
		continue;
1257
 
1258
	case TYPEDEF:
1259
		if(gettok() != TYPENAME)
1260
			error("bad syntax in %%type");
1261
		ty = numbval;
1262
		for(;;) {
1263
			t = gettok();
1264
			switch(t) {
1265
			case IDENTIFIER:
1266
				if((t=chfind(1, tokname)) < NTBASE) {
1267
					j = TYPE(toklev[t]);
1268
					if(j != 0 && j != ty)
1269
						error("type redeclaration of token %s",
1270
							tokset[t].name);
1271
					else
1272
						SETTYPE(toklev[t], ty);
1273
				} else {
1274
					j = nontrst[t-NTBASE].value;
1275
					if(j != 0 && j != ty)
1276
						error("type redeclaration of nonterminal %s",
1277
							nontrst[t-NTBASE].name );
1278
					else
1279
						nontrst[t-NTBASE].value = ty;
1280
				}
1281
			case ',':
1282
				continue;
1283
			case ';':
1284
				t = gettok();
1285
			default:
1286
				break;
1287
			}
1288
			break;
1289
		}
1290
		continue;
1291
 
1292
	case UNION:
1293
		/* copy the union declaration to the output */
1294
		cpyunion();
1295
		t = gettok();
1296
		continue;
1297
 
1298
	case LEFT:
1299
	case BINARY:
1300
	case RIGHT:
1301
		i++;
1302
 
1303
	case TERM:
1304
		/* nonzero means new prec. and assoc. */
1305
		lev = t-TERM;
1306
		ty = 0;
1307
 
1308
		/* get identifiers so defined */
1309
		t = gettok();
1310
 
1311
		/* there is a type defined */
1312
		if(t == TYPENAME) {
1313
			ty = numbval;
1314
			t = gettok();
1315
		}
1316
		for(;;) {
1317
			switch(t) {
1318
			case ',':
1319
				t = gettok();
1320
				continue;
1321
 
1322
			case ';':
1323
				break;
1324
 
1325
			case IDENTIFIER:
1326
				j = chfind(0, tokname);
1327
				if(j >= NTBASE)
1328
					error("%s defined earlier as nonterminal", tokname);
1329
				if(lev) {
1330
					if(ASSOC(toklev[j]))
1331
						error("redeclaration of precedence of %s", tokname);
1332
					SETASC(toklev[j], lev);
1333
					SETPLEV(toklev[j], i);
1334
				}
1335
				if(ty) {
1336
					if(TYPE(toklev[j]))
1337
						error("redeclaration of type of %s", tokname);
1338
					SETTYPE(toklev[j],ty);
1339
				}
1340
				t = gettok();
1341
				if(t == NUMBER) {
1342
					tokset[j].value = numbval;
1343
					if(j < ndefout && j > 3)
1344
						error("please define type number of %s earlier",
1345
							tokset[j].name);
1346
					t = gettok();
1347
				}
1348
				continue;
1349
			}
1350
			break;
1351
		}
1352
		continue;
1353
 
1354
	case LCURLY:
1355
		defout(0);
1356
		cpycode();
1357
		t = gettok();
1358
		continue;
1359
 
1360
	default:
1361
		error("syntax error");
1362
	}
1363
	if(t == ENDFILE)
1364
		error("unexpected EOF before %%");
1365
 
1366
	/* t is MARK */
1367
	Bprint(ftable, "extern	int	yyerrflag;\n");
1368
	Bprint(ftable, "#ifndef	YYMAXDEPTH\n");
1369
	Bprint(ftable, "#define	YYMAXDEPTH	150\n");
1370
	Bprint(ftable, "#endif\n" );
1371
	if(!ntypes) {
1372
		Bprint(ftable, "#ifndef	YYSTYPE\n");
1373
		Bprint(ftable, "#define	YYSTYPE	int\n");
1374
		Bprint(ftable, "#endif\n");
1375
	}
1376
	Bprint(ftable, "YYSTYPE	yylval;\n");
1377
	Bprint(ftable, "YYSTYPE	yyval;\n");
1378
 
1379
	prdptr[0] = mem;
1380
 
1381
	/* added production */
1382
	*mem++ = NTBASE;
1383
 
1384
	/* if start is 0, we will overwrite with the lhs of the first rule */
1385
	*mem++ = start;
1386
	*mem++ = 1;
1387
	*mem++ = 0;
1388
	prdptr[1] = mem;
1389
	while((t=gettok()) == LCURLY)
1390
		cpycode();
1391
	if(t != IDENTCOLON)
1392
		error("bad syntax on first rule");
1393
 
1394
	if(!start)
1395
		prdptr[0][1] = chfind(1, tokname);
1396
 
1397
	/* read rules */
1398
	while(t != MARK && t != ENDFILE) {
1399
		/* process a rule */
1400
		rlines[nprod] = lineno;
1401
		if(t == '|')
1402
			*mem++ = *prdptr[nprod-1];
1403
		else
1404
			if(t == IDENTCOLON) {
1405
				*mem = chfind(1, tokname);
1406
				if(*mem < NTBASE)
1407
					error("token illegal on LHS of grammar rule");
1408
				mem++;
1409
			} else
1410
				error("illegal rule: missing semicolon or | ?");
1411
		/* read rule body */
1412
		t = gettok();
1413
 
1414
	more_rule:
1415
		while(t == IDENTIFIER) {
1416
			*mem = chfind(1, tokname);
1417
			if(*mem < NTBASE)
1418
				levprd[nprod] = toklev[*mem];
1419
			mem++;
1420
			t = gettok();
1421
		}
1422
		if(t == PREC) {
1423
			if(gettok() != IDENTIFIER)
1424
				error("illegal %%prec syntax");
1425
			j = chfind(2, tokname);
1426
			if(j >= NTBASE)
1427
				error("nonterminal %s illegal after %%prec",
1428
					nontrst[j-NTBASE].name);
1429
			levprd[nprod] = toklev[j];
1430
			t = gettok();
1431
		}
1432
		if(t == '=') {
1433
			levprd[nprod] |= ACTFLAG;
1434
			Bprint(faction, "\ncase %d:", nprod);
1435
			cpyact(mem-prdptr[nprod]-1);
1436
			Bprint(faction, " break;");
1437
			if((t=gettok()) == IDENTIFIER) {
1438
 
1439
				/* action within rule... */
1440
				sprint(actnm, "$$%d", nprod);
1441
 
1442
				/* make it a nonterminal */
1443
				j = chfind(1, actnm);
1444
 
1445
				/*
1446
				 * the current rule will become rule number nprod+1
1447
				 * move the contents down, and make room for the null
1448
				 */
1449
				for(p = mem; p >= prdptr[nprod]; --p)
1450
					p[2] = *p;
1451
				mem += 2;
1452
 
1453
				/* enter null production for action */
1454
				p = prdptr[nprod];
1455
				*p++ = j;
1456
				*p++ = -nprod;
1457
 
1458
				/* update the production information */
1459
				levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1460
				levprd[nprod] = ACTFLAG;
1461
				if(++nprod >= NPROD)
1462
					error("more than %d rules", NPROD);
1463
				prdptr[nprod] = p;
1464
 
1465
				/* make the action appear in the original rule */
1466
				*mem++ = j;
1467
 
1468
				/* get some more of the rule */
1469
				goto more_rule;
1470
			}
1471
		}
1472
 
1473
		while(t == ';')
1474
			t = gettok();
1475
		*mem++ = -nprod;
1476
 
1477
		/* check that default action is reasonable */
1478
		if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1479
 
1480
			/* no explicit action, LHS has value */
1481
			int tempty;
1482
 
1483
			tempty = prdptr[nprod][1];
1484
			if(tempty < 0)
1485
				error("must return a value, since LHS has a type");
1486
			else
1487
				if(tempty >= NTBASE)
1488
					tempty = nontrst[tempty-NTBASE].value;
1489
				else
1490
					tempty = TYPE(toklev[tempty]);
1491
			if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1492
				error("default action causes potential type clash");
1493
		}
1494
		nprod++;
1495
		if(nprod >= NPROD)
1496
			error("more than %d rules", NPROD);
1497
		prdptr[nprod] = mem;
1498
		levprd[nprod] = 0;
1499
	}
1500
 
1501
	/* end of all rules */
1502
	defout(1);
1503
 
1504
	finact();
1505
	if(t == MARK) {
1506
		Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1507
		while((c=Bgetrune(finput)) != Beof)
1508
			Bputrune(ftable, c);
1509
	}
1510
	Bterm(finput);
1511
}
1512
 
1513
/*
1514
 * finish action routine
1515
 */
1516
void
1517
finact(void)
1518
{
1519
 
1520
	Bterm(faction);
1521
	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1522
	Bprint(ftable, "#define YYERRCODE %d\n", 2);
1523
}
1524
 
1525
/*
1526
 * define s to be a terminal if t=0
1527
 * or a nonterminal if t=1
1528
 */
1529
int
1530
defin(int nt, char *s)
1531
{
1532
	int val;
1533
	Rune rune;
1534
 
1535
	val = 0;
1536
	if(nt) {
1537
		nnonter++;
1538
		if(nnonter >= NNONTERM)
1539
			error("too many nonterminals, limit %d",NNONTERM);
1540
		nontrst[nnonter].name = cstash(s);
1541
		return NTBASE + nnonter;
1542
	}
1543
 
1544
	/* must be a token */
1545
	ntokens++;
1546
	if(ntokens >= NTERMS)
1547
		error("too many terminals, limit %d", NTERMS);
1548
	tokset[ntokens].name = cstash(s);
1549
 
1550
	/* establish value for token */
1551
	/* single character literal */
1552
	if(s[0] == ' ') {
1553
		val = chartorune(&rune, &s[1]);
1554
		if(s[val+1] == 0) {
1555
			val = rune;
1556
			goto out;
1557
		}
1558
	}
1559
 
1560
	/* escape sequence */
1561
	if(s[0] == ' ' && s[1] == '\\') {
1562
		if(s[3] == 0) {
1563
			/* single character escape sequence */
1564
			switch(s[2]) {
1565
			case 'n':	val = '\n'; break;
1566
			case 'r':	val = '\r'; break;
1567
			case 'b':	val = '\b'; break;
1568
			case 't':	val = '\t'; break;
1569
			case 'f':	val = '\f'; break;
1570
			case '\'':	val = '\''; break;
1571
			case '"':	val = '"'; break;
1572
			case '\\':	val = '\\'; break;
1573
			default:	error("invalid escape");
1574
			}
1575
			goto out;
1576
		}
1577
 
1578
		/* \nnn sequence */
1579
		if(s[2] >= '0' && s[2] <= '7') {
1580
			if(s[3] < '0' ||
1581
			   s[3] > '7' ||
1582
			   s[4] < '0' ||
1583
			   s[4] > '7' ||
1584
			   s[5] != 0)
1585
				error("illegal \\nnn construction");
1586
			val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1587
			if(val == 0)
1588
				error("'\\000' is illegal");
1589
			goto out;
1590
		}
1591
		error("unknown escape");
1592
	}
1593
	val = extval++;
1594
 
1595
out:
1596
	tokset[ntokens].value = val;
1597
	toklev[ntokens] = 0;
1598
	return ntokens;
1599
}
1600
 
1601
/*
1602
 * write out the defines (at the end of the declaration section)
1603
 */
1604
void
1605
defout(int last)
1606
{
1607
	int i, c;
1608
	char sar[NAMESIZE+10];
1609
 
1610
	for(i=ndefout; i<=ntokens; i++) {
1611
		/* non-literals */
1612
		c = tokset[i].name[0];
1613
		if(c != ' ' && c != '$') {
1614
			Bprint(ftable, "#define	%s	%d\n",
1615
				tokset[i].name, tokset[i].value);
1616
			if(fdefine)
1617
				Bprint(fdefine, "#define\t%s\t%d\n",
1618
					tokset[i].name, tokset[i].value);
1619
		}
1620
	}
1621
	ndefout = ntokens+1;
1622
	if(last && fdebug) {
1623
		Bprint(fdebug, "char*	yytoknames[] =\n{\n");
1624
		TLOOP(i) {
1625
			if(tokset[i].name) {
1626
				chcopy(sar, tokset[i].name);
1627
				Bprint(fdebug, "\t\"%s\",\n", sar);
1628
				continue;
1629
			}
1630
			Bprint(fdebug, "\t0,\n");
1631
		}
1632
		Bprint(fdebug, "};\n");
1633
	}
1634
}
1635
 
1636
char*
1637
cstash(char *s)
1638
{
1639
	char *temp;
1640
 
1641
	temp = cnamp;
1642
	do {
1643
		if(cnamp >= &cnames[cnamsz])
1644
			error("too many characters in id's and literals");
1645
		else
1646
			*cnamp++ = *s;
1647
	} while(*s++);
1648
	return temp;
1649
}
1650
 
1651
long
1652
gettok(void)
1653
{
1654
	long c;
1655
	Rune rune;
1656
	int i, base, match, reserve;
1657
	static int peekline;
1658
 
1659
begin:
1660
	reserve = 0;
1661
	lineno += peekline;
1662
	peekline = 0;
1663
	c = Bgetrune(finput);
1664
	while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1665
		if(c == '\n')
1666
			lineno++;
1667
		c = Bgetrune(finput);
1668
	}
1669
 
1670
	/* skip comment */
1671
	if(c == '/') {
1672
		lineno += skipcom();
1673
		goto begin;
1674
	}
1675
	switch(c) {
1676
	case Beof:
1677
		return ENDFILE;
1678
 
1679
	case '{':
1680
		Bungetrune(finput);
1681
		return '=';
1682
 
1683
	case '<':
1684
		/* get, and look up, a type name (union member name) */
1685
		i = 0;
1686
		while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1687
			rune = c;
1688
			c = runetochar(&tokname[i], &rune);
1689
			if(i < NAMESIZE)
1690
				i += c;
1691
		}
1692
		if(c != '>')
1693
			error("unterminated < ... > clause");
1694
		tokname[i] = 0;
1695
		for(i=1; i<=ntypes; i++)
1696
			if(!strcmp(typeset[i], tokname)) {
1697
				numbval = i;
1698
				return TYPENAME;
1699
			}
1700
		ntypes++;
1701
		numbval = ntypes;
1702
		typeset[numbval] = cstash(tokname);
1703
		return TYPENAME;
1704
 
1705
	case '"':
1706
	case '\'':
1707
		match = c;
1708
		tokname[0] = ' ';
1709
		i = 1;
1710
		for(;;) {
1711
			c = Bgetrune(finput);
1712
			if(c == '\n' || c <= 0)
1713
				error("illegal or missing ' or \"" );
1714
			if(c == '\\') {
1715
				tokname[i] = '\\';
1716
				if(i < NAMESIZE)
1717
					i++;
1718
				c = Bgetrune(finput);
1719
			} else
1720
				if(c == match)
1721
					break;
1722
			rune = c;
1723
			c = runetochar(&tokname[i], &rune);
1724
			if(i < NAMESIZE)
1725
				i += c;
1726
		}
1727
		break;
1728
 
1729
	case '%':
1730
	case '\\':
1731
		switch(c = Bgetrune(finput)) {
1732
		case '0':	return TERM;
1733
		case '<':	return LEFT;
1734
		case '2':	return BINARY;
1735
		case '>':	return RIGHT;
1736
		case '%':
1737
		case '\\':	return MARK;
1738
		case '=':	return PREC;
1739
		case '{':	return LCURLY;
1740
		default:	reserve = 1;
1741
		}
1742
 
1743
	default:
1744
		/* number */
1745
		if(isdigit(c)) {
1746
			numbval = c-'0';
1747
			base = (c=='0')? 8: 10;
1748
			for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1749
				numbval = numbval*base + (c-'0');
1750
			Bungetrune(finput);
1751
			return NUMBER;
1752
		}
1753
		if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$')  {
1754
			i = 0;
1755
			while(islower(c) || isupper(c) || isdigit(c) ||
1756
			    c == '-' || c=='_' || c=='.' || c=='$') {
1757
				if(reserve && isupper(c))
1758
					c += 'a'-'A';
1759
				rune = c;
1760
				c = runetochar(&tokname[i], &rune);
1761
				if(i < NAMESIZE)
1762
					i += c;
1763
				c = Bgetrune(finput);
1764
			}
1765
		} else
1766
			return c;
1767
		Bungetrune(finput);
1768
	}
1769
	tokname[i] = 0;
1770
 
1771
	/* find a reserved word */
1772
	if(reserve) {
1773
		for(c=0; resrv[c].name; c++)
1774
			if(strcmp(tokname, resrv[c].name) == 0)
1775
				return resrv[c].value;
1776
		error("invalid escape, or illegal reserved word: %s", tokname);
1777
	}
1778
 
1779
	/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1780
	c = Bgetrune(finput);
1781
	while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1782
		if(c == '\n')
1783
			peekline++;
1784
		/* look for comments */
1785
		if(c == '/')
1786
			peekline += skipcom();
1787
		c = Bgetrune(finput);
1788
	}
1789
	if(c == ':')
1790
		return IDENTCOLON;
1791
	Bungetrune(finput);
1792
	return IDENTIFIER;
1793
}
1794
 
1795
/*
1796
 * determine the type of a symbol
1797
 */
1798
int
1799
fdtype(int t)
1800
{
1801
	int v;
1802
 
1803
	if(t >= NTBASE)
1804
		v = nontrst[t-NTBASE].value;
1805
	else
1806
		v = TYPE(toklev[t]);
1807
	if(v <= 0)
1808
		error("must specify type for %s", (t>=NTBASE)?
1809
			nontrst[t-NTBASE].name: tokset[t].name);
1810
	return v;
1811
}
1812
 
1813
int
1814
chfind(int t, char *s)
1815
{
1816
	int i;
1817
 
1818
	if(s[0] == ' ')
1819
		t = 0;
1820
	TLOOP(i)
1821
		if(!strcmp(s, tokset[i].name))
1822
			return i;
1823
	NTLOOP(i)
1824
		if(!strcmp(s, nontrst[i].name))
1825
			return NTBASE+i;
1826
 
1827
	/* cannot find name */
1828
	if(t > 1)
1829
		error("%s should have been defined earlier", s);
1830
	return defin(t, s);
1831
}
1832
 
1833
/*
1834
 * copy the union declaration to the output, and the define file if present
1835
 */
1836
void
1837
cpyunion(void)
1838
{
1839
	long c;
1840
	int level;
1841
 
1842
	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1843
	Bprint(ftable, "typedef union ");
1844
	if(fdefine != 0)
1845
		Bprint(fdefine, "\ntypedef union ");
1846
 
1847
	level = 0;
1848
	for(;;) {
1849
		if((c=Bgetrune(finput)) == Beof)
1850
			error("EOF encountered while processing %%union");
1851
		Bputrune(ftable, c);
1852
		if(fdefine != 0)
1853
			Bputrune(fdefine, c);
1854
		switch(c) {
1855
		case '\n':
1856
			lineno++;
1857
			break;
1858
		case '{':
1859
			level++;
1860
			break;
1861
		case '}':
1862
			level--;
1863
 
1864
			/* we are finished copying */
1865
			if(level == 0) {
1866
				Bprint(ftable, " YYSTYPE;\n");
1867
				if(fdefine != 0)
1868
					Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1869
				return;
1870
			}
1871
		}
1872
	}
1873
}
1874
 
1875
/*
1876
 * copies code between \{ and \}
1877
 */
1878
void
1879
cpycode(void)
1880
{
1881
 
1882
	long c;
1883
 
1884
	c = Bgetrune(finput);
1885
	if(c == '\n') {
1886
		c = Bgetrune(finput);
1887
		lineno++;
1888
	}
1889
	Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1890
	while(c != Beof) {
1891
		if(c == '\\') {
1892
			if((c=Bgetrune(finput)) == '}')
1893
				return;
1894
			Bputc(ftable, '\\');
1895
		}
1896
		if(c == '%') {
1897
			if((c=Bgetrune(finput)) == '}')
1898
				return;
1899
			Bputc(ftable, '%');
1900
		}
1901
		Bputrune(ftable, c);
1902
		if(c == '\n')
1903
			lineno++;
1904
		c = Bgetrune(finput);
1905
	}
1906
	error("eof before %%}");
1907
}
1908
 
1909
/*
1910
 * skip over comments
1911
 * skipcom is called after reading a '/'
1912
 */
1913
int
1914
skipcom(void)
1915
{
1916
	long c;
1917
	int i;
1918
 
1919
	/* i is the number of lines skipped */
1920
	i = 0;
1921
	c = Bgetrune(finput);
1922
	if(c == '/'){			/* C++ //: skip to end of line */
1923
		while((c = Bgetrune(finput)) != Beof)
1924
			if(c == '\n')
1925
				return 1;
1926
	}else if(c == '*'){		/* normal C comment */
1927
		while((c = Bgetrune(finput)) != Beof) {
1928
			while(c == '*')
1929
				if((c = Bgetrune(finput)) == '/')
1930
					return i;
1931
			if(c == '\n')
1932
				i++;
1933
		}
1934
	}else
1935
		error("illegal comment");
1936
 
1937
	error("EOF inside comment");
1938
	return 0;
1939
}
1940
 
1941
/*
1942
 * copy C action to the next ; or closing }
1943
 */
1944
void
1945
cpyact(int offset)
1946
{
1947
	long c;
1948
	int brac, match, j, s, fnd, tok;
1949
 
1950
	Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1951
	brac = 0;
1952
 
1953
loop:
1954
	c = Bgetrune(finput);
1955
swt:
1956
	switch(c) {
1957
	case ';':
1958
		if(brac == 0) {
1959
			Bputrune(faction, c);
1960
			return;
1961
		}
1962
		goto lcopy;
1963
 
1964
	case '{':
1965
		brac++;
1966
		goto lcopy;
1967
 
1968
	case '$':
1969
		s = 1;
1970
		tok = -1;
1971
		c = Bgetrune(finput);
1972
 
1973
		/* type description */
1974
		if(c == '<') {
1975
			Bungetrune(finput);
1976
			if(gettok() != TYPENAME)
1977
				error("bad syntax on $<ident> clause");
1978
			tok = numbval;
1979
			c = Bgetrune(finput);
1980
		}
1981
		if(c == '$') {
1982
			Bprint(faction, "yyval");
1983
 
1984
			/* put out the proper tag... */
1985
			if(ntypes) {
1986
				if(tok < 0)
1987
					tok = fdtype(*prdptr[nprod]);
1988
				Bprint(faction, ".%s", typeset[tok]);
1989
			}
1990
			goto loop;
1991
		}
1992
		if(c == '-') {
1993
			s = -s;
1994
			c = Bgetrune(finput);
1995
		}
1996
		if(isdigit(c)) {
1997
			j = 0;
1998
			while(isdigit(c)) {
1999
				j = j*10 + (c-'0');
2000
				c = Bgetrune(finput);
2001
			}
2002
			Bungetrune(finput);
2003
			j = j*s - offset;
2004
			if(j > 0)
2005
				error("Illegal use of $%d", j+offset);
2006
 
2007
		dollar:
2008
			Bprint(faction, "yypt[-%d].yyv", -j);
2009
 
2010
			/* put out the proper tag */
2011
			if(ntypes) {
2012
				if(j+offset <= 0 && tok < 0)
2013
					error("must specify type of $%d", j+offset);
2014
				if(tok < 0)
2015
					tok = fdtype(prdptr[nprod][j+offset]);
2016
				Bprint(faction, ".%s", typeset[tok]);
2017
			}
2018
			goto loop;
2019
		}
2020
		if(isupper(c) || islower(c) || c == '_' || c == '.') {
2021
			int tok; /* tok used oustide for type info */
2022
 
2023
			/* look for $name */
2024
			Bungetrune(finput);
2025
			if(gettok() != IDENTIFIER)
2026
				error("$ must be followed by an identifier");
2027
			tok = chfind(2, tokname);
2028
			if((c = Bgetrune(finput)) != '#') {
2029
				Bungetrune(finput);
2030
				fnd = -1;
2031
			} else
2032
				if(gettok() != NUMBER) {
2033
					error("# must be followed by number");
2034
					fnd = -1;
2035
				} else
2036
					fnd = numbval;
2037
			for(j=1; j<=offset; ++j)
2038
				if(tok == prdptr[nprod][j]) {
2039
					if(--fnd <= 0) {
2040
						j -= offset;
2041
						goto dollar;
2042
					}
2043
				}
2044
			error("$name or $name#number not found");
2045
		}
2046
		Bputc(faction, '$');
2047
		if(s < 0 )
2048
			Bputc(faction, '-');
2049
		goto swt;
2050
 
2051
	case '}':
2052
		brac--;
2053
		if(brac)
2054
			goto lcopy;
2055
		Bputrune(faction, c);
2056
		return;
2057
 
2058
	case '/':
2059
		/* look for comments */
2060
		Bputrune(faction, c);
2061
		c = Bgetrune(finput);
2062
		if(c != '*')
2063
			goto swt;
2064
 
2065
		/* it really is a comment; copy it */
2066
		Bputrune(faction, c);
2067
		c = Bgetrune(finput);
2068
		while(c >= 0) {
2069
			while(c == '*') {
2070
				Bputrune(faction, c);
2071
				if((c=Bgetrune(finput)) == '/')
2072
					goto lcopy;
2073
			}
2074
			Bputrune(faction, c);
2075
			if(c == '\n')
2076
				lineno++;
2077
			c = Bgetrune(finput);
2078
		}
2079
		error("EOF inside comment");
2080
 
2081
	case '\'':
2082
		/* character constant */
2083
		match = '\'';
2084
		goto string;
2085
 
2086
	case '"':
2087
		/* character string */
2088
		match = '"';
2089
 
2090
	string:
2091
		Bputrune(faction, c);
2092
		while(c = Bgetrune(finput)) {
2093
			if(c == '\\') {
2094
				Bputrune(faction, c);
2095
				c = Bgetrune(finput);
2096
				if(c == '\n')
2097
					lineno++;
2098
			} else
2099
				if(c == match)
2100
					goto lcopy;
2101
				if(c == '\n')
2102
					error("newline in string or char. const.");
2103
			Bputrune(faction, c);
2104
		}
2105
		error("EOF in string or character constant");
2106
 
2107
	case Beof:
2108
		error("action does not terminate");
2109
 
2110
	case '\n':
2111
		lineno++;
2112
		goto lcopy;
2113
	}
2114
 
2115
lcopy:
2116
	Bputrune(faction, c);
2117
	goto loop;
2118
}
2119
 
2120
void
2121
openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2122
{
2123
	char buf[256];
2124
 
2125
	if(vflag) {
2126
		snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
2127
		foutput = Bopen(buf, OWRITE);
2128
		if(foutput == 0)
2129
			error("cannot open %s", buf);
2130
	}
2131
	if(yydebug) {
2132
		snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
2133
		if((fdebug = Bopen(buf, OWRITE)) == 0)
2134
			error("can't open %s", buf);
2135
	}
2136
	if(dflag) {
2137
		snprint(buf, sizeof buf, "%s.%s", stem, FILED);
2138
		fdefine = Bopen(buf, OWRITE);
2139
		if(fdefine == 0)
2140
			error("can't create %s", buf);
2141
	}
2142
	if(ytab == 0)
2143
		snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
2144
	else
2145
		strecpy(buf, buf+sizeof buf, ytabc);
2146
	ftable = Bopen(buf, OWRITE);
2147
	if(ftable == 0)
2148
		error("cannot open table file %s", buf);
2149
}
2150
 
2151
/*
2152
 * print the output for the states
2153
 */
2154
void
2155
output(void)
2156
{
2157
	int i, k, c;
2158
	Wset *u, *v;
2159
 
2160
	Bprint(ftable, "short	yyexca[] =\n{");
2161
	if(fdebug)
2162
		Bprint(fdebug, "char*	yystates[] =\n{\n");
2163
 
2164
	/* output the stuff for state i */
2165
	SLOOP(i) {
2166
		nolook = tystate[i]!=MUSTLOOKAHEAD;
2167
		closure(i);
2168
 
2169
		/* output actions */
2170
		nolook = 1;
2171
		aryfil(temp1, ntokens+nnonter+1, 0);
2172
		WSLOOP(wsets, u) {
2173
			c = *(u->pitem);
2174
			if(c > 1 && c < NTBASE && temp1[c] == 0) {
2175
				WSLOOP(u, v)
2176
					if(c == *(v->pitem))
2177
						putitem(v->pitem+1, (Lkset*)0);
2178
				temp1[c] = state(c);
2179
			} else
2180
				if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2181
					temp1[c+ntokens] = amem[indgo[i]+c];
2182
		}
2183
		if(i == 1)
2184
			temp1[1] = ACCEPTCODE;
2185
 
2186
		/* now, we have the shifts; look at the reductions */
2187
		lastred = 0;
2188
		WSLOOP(wsets, u) {
2189
			c = *u->pitem;
2190
 
2191
			/* reduction */
2192
			if(c <= 0) {
2193
				lastred = -c;
2194
				TLOOP(k)
2195
					if(BIT(u->ws.lset, k)) {
2196
						if(temp1[k] == 0)
2197
							temp1[k] = c;
2198
						else
2199
						if(temp1[k] < 0) { /* reduce/reduce conflict */
2200
							if(foutput)
2201
								Bprint(foutput,
2202
									"\n%d: reduce/reduce conflict"
2203
									" (red'ns %d and %d ) on %s",
2204
									i, -temp1[k], lastred,
2205
									symnam(k));
2206
							if(-temp1[k] > lastred)
2207
								temp1[k] = -lastred;
2208
							zzrrconf++;
2209
						} else
2210
							/* potential shift/reduce conflict */
2211
							precftn( lastred, k, i );
2212
					}
2213
				}
2214
		}
2215
		wract(i);
2216
	}
2217
 
2218
	if(fdebug)
2219
		Bprint(fdebug, "};\n");
2220
	Bprint(ftable, "};\n");
2221
	Bprint(ftable, "#define	YYNPROD	%d\n", nprod);
2222
	Bprint(ftable, "#define	YYPRIVATE %d\n", PRIVATE);
2223
	if(yydebug)
2224
		Bprint(ftable, "#define	yydebug	%s\n", yydebug);
2225
}
2226
 
2227
/*
2228
 * pack state i from temp1 into amem
2229
 */
2230
int
2231
apack(int *p, int n)
2232
{
2233
	int *pp, *qq, *rr, off, *q, *r;
2234
 
2235
	/* we don't need to worry about checking because
2236
	 * we will only look at entries known to be there...
2237
	 * eliminate leading and trailing 0's
2238
	 */
2239
 
2240
	q = p+n;
2241
	for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2242
		;
2243
 	/* no actions */
2244
	if(pp > q)
2245
		return 0;
2246
	p = pp;
2247
 
2248
	/* now, find a place for the elements from p to q, inclusive */
2249
	r = &amem[ACTSIZE-1];
2250
	for(rr = amem; rr <= r; rr++, off++) {
2251
		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2252
			if(*pp != 0)
2253
				if(*pp != *qq && *qq != 0)
2254
					goto nextk;
2255
 
2256
		/* we have found an acceptable k */
2257
		if(pkdebug && foutput != 0)
2258
			Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2259
		for(qq = rr, pp = p; pp <= q; pp++, qq++)
2260
			if(*pp) {
2261
				if(qq > r)
2262
					error("action table overflow");
2263
				if(qq > memp)
2264
					memp = qq;
2265
				*qq = *pp;
2266
			}
2267
		if(pkdebug && foutput != 0)
2268
			for(pp = amem; pp <= memp; pp += 10) {
2269
				Bprint(foutput, "\t");
2270
				for(qq = pp; qq <= pp+9; qq++)
2271
					Bprint(foutput, "%d ", *qq);
2272
				Bprint(foutput, "\n");
2273
			}
2274
		return(off);
2275
	nextk:;
2276
	}
2277
	error("no space in action table");
2278
	return 0;
2279
}
2280
 
2281
/*
2282
 * output the gotos for the nontermninals
2283
 */
2284
void
2285
go2out(void)
2286
{
2287
	int i, j, k, best, count, cbest, times;
2288
 
2289
	/* mark begining of gotos */
2290
	Bprint(ftemp, "$\n");
2291
	for(i = 1; i <= nnonter; i++) {
2292
		go2gen(i);
2293
 
2294
		/* find the best one to make default */
2295
		best = -1;
2296
		times = 0;
2297
 
2298
		/* is j the most frequent */
2299
		for(j = 0; j <= nstate; j++) {
2300
			if(tystate[j] == 0)
2301
				continue;
2302
			if(tystate[j] == best)
2303
				continue;
2304
 
2305
			/* is tystate[j] the most frequent */
2306
			count = 0;
2307
			cbest = tystate[j];
2308
			for(k = j; k <= nstate; k++)
2309
				if(tystate[k] == cbest)
2310
					count++;
2311
			if(count > times) {
2312
				best = cbest;
2313
				times = count;
2314
			}
2315
		}
2316
 
2317
		/* best is now the default entry */
2318
		zzgobest += times-1;
2319
		for(j = 0; j <= nstate; j++)
2320
			if(tystate[j] != 0 && tystate[j] != best) {
2321
				Bprint(ftemp, "%d,%d,", j, tystate[j]);
2322
				zzgoent++;
2323
			}
2324
 
2325
		/* now, the default */
2326
		if(best == -1)
2327
			best = 0;
2328
		zzgoent++;
2329
		Bprint(ftemp, "%d\n", best);
2330
	}
2331
}
2332
 
2333
/*
2334
 * output the gotos for nonterminal c
2335
 */
2336
void
2337
go2gen(int c)
2338
{
2339
	int i, work, cc;
2340
	Item *p, *q;
2341
 
2342
 
2343
	/* first, find nonterminals with gotos on c */
2344
	aryfil(temp1, nnonter+1, 0);
2345
	temp1[c] = 1;
2346
	work = 1;
2347
	while(work) {
2348
		work = 0;
2349
		PLOOP(0, i)
2350
 
2351
			/* cc is a nonterminal */
2352
			if((cc=prdptr[i][1]-NTBASE) >= 0)
2353
				/* cc has a goto on c */
2354
				if(temp1[cc] != 0) {
2355
 
2356
					/* thus, the left side of production i does too */
2357
					cc = *prdptr[i]-NTBASE;
2358
					if(temp1[cc] == 0) {
2359
						  work = 1;
2360
						  temp1[cc] = 1;
2361
					}
2362
				}
2363
	}
2364
 
2365
	/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2366
	if(g2debug && foutput != 0) {
2367
		Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2368
		NTLOOP(i)
2369
			if(temp1[i])
2370
				Bprint(foutput, "%s ", nontrst[i].name);
2371
		Bprint(foutput, "\n");
2372
	}
2373
 
2374
	/* now, go through and put gotos into tystate */
2375
	aryfil(tystate, nstate, 0);
2376
	SLOOP(i)
2377
		ITMLOOP(i, p, q)
2378
			if((cc = *p->pitem) >= NTBASE)
2379
				/* goto on c is possible */
2380
				if(temp1[cc-NTBASE]) {
2381
					tystate[i] = amem[indgo[i]+c];
2382
					break;
2383
				}
2384
}
2385
 
2386
/*
2387
 * decide a shift/reduce conflict by precedence.
2388
 * r is a rule number, t a token number
2389
 * the conflict is in state s
2390
 * temp1[t] is changed to reflect the action
2391
 */
2392
void
2393
precftn(int r, int t, int s)
2394
{
2395
	int lp, lt, action;
2396
 
2397
	lp = levprd[r];
2398
	lt = toklev[t];
2399
	if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2400
 
2401
		/* conflict */
2402
		if(foutput != 0)
2403
			Bprint(foutput,
2404
				"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2405
				s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2406
		zzsrconf++;
2407
		return;
2408
	}
2409
	if(PLEVEL(lt) == PLEVEL(lp))
2410
		action = ASSOC(lt);
2411
	else
2412
		if(PLEVEL(lt) > PLEVEL(lp))
2413
			action = RASC;  /* shift */
2414
		else
2415
			action = LASC;  /* reduce */
2416
	switch(action) {
2417
	case BASC:  /* error action */
2418
		temp1[t] = ERRCODE;
2419
		break;
2420
	case LASC:  /* reduce */
2421
		temp1[t] = -r;
2422
		break;
2423
	}
2424
}
2425
 
2426
/*
2427
 * output state i
2428
 * temp1 has the actions, lastred the default
2429
 */
2430
void
2431
wract(int i)
2432
{
2433
	int p, p0, p1, ntimes, tred, count, j, flag;
2434
 
2435
	/* find the best choice for lastred */
2436
	lastred = 0;
2437
	ntimes = 0;
2438
	TLOOP(j) {
2439
		if(temp1[j] >= 0)
2440
			continue;
2441
		if(temp1[j]+lastred == 0)
2442
			continue;
2443
		/* count the number of appearances of temp1[j] */
2444
		count = 0;
2445
		tred = -temp1[j];
2446
		levprd[tred] |= REDFLAG;
2447
		TLOOP(p)
2448
			if(temp1[p]+tred == 0)
2449
				count++;
2450
		if(count > ntimes) {
2451
			lastred = tred;
2452
			ntimes = count;
2453
		}
2454
	}
2455
 
2456
	/*
2457
	 * for error recovery, arrange that, if there is a shift on the
2458
	 * error recovery token, `error', that the default be the error action
2459
	 */
2460
	if(temp1[2] > 0)
2461
		lastred = 0;
2462
 
2463
	/* clear out entries in temp1 which equal lastred */
2464
	TLOOP(p)
2465
		if(temp1[p]+lastred == 0)
2466
			temp1[p] = 0;
2467
 
2468
	wrstate(i);
2469
	defact[i] = lastred;
2470
	flag = 0;
2471
	TLOOP(p0)
2472
		if((p1=temp1[p0]) != 0) {
2473
			if(p1 < 0) {
2474
				p1 = -p1;
2475
				goto exc;
2476
			}
2477
			if(p1 == ACCEPTCODE) {
2478
				p1 = -1;
2479
				goto exc;
2480
			}
2481
			if(p1 == ERRCODE) {
2482
				p1 = 0;
2483
			exc:
2484
				if(flag++ == 0)
2485
					Bprint(ftable, "-1, %d,\n", i);
2486
				Bprint(ftable, "\t%d, %d,\n", p0, p1);
2487
				zzexcp++;
2488
				continue;
2489
			}
2490
			Bprint(ftemp, "%d,%d,", p0, p1);
2491
			zzacent++;
2492
		}
2493
	if(flag) {
2494
		defact[i] = -2;
2495
		Bprint(ftable, "\t-2, %d,\n", lastred);
2496
	}
2497
	Bprint(ftemp, "\n");
2498
}
2499
 
2500
/*
2501
 * writes state i
2502
 */
2503
void
2504
wrstate(int i)
2505
{
2506
	int j0, j1;
2507
	Item *pp, *qq;
2508
	Wset *u;
2509
 
2510
	if(fdebug) {
2511
		if(lastred) {
2512
			Bprint(fdebug, "	0, /*%d*/\n", i);
2513
		} else {
2514
			Bprint(fdebug, "	\"");
2515
			ITMLOOP(i, pp, qq)
2516
				Bprint(fdebug, "%s\\n", writem(pp->pitem));
2517
			if(tystate[i] == MUSTLOOKAHEAD)
2518
				WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2519
					if(*u->pitem < 0)
2520
						Bprint(fdebug, "%s\\n", writem(u->pitem));
2521
			Bprint(fdebug, "\", /*%d*/\n", i);
2522
		}
2523
	}
2524
	if(foutput == 0)
2525
		return;
2526
	Bprint(foutput, "\nstate %d\n", i);
2527
	ITMLOOP(i, pp, qq)
2528
		Bprint(foutput, "\t%s\n", writem(pp->pitem));
2529
	if(tystate[i] == MUSTLOOKAHEAD)
2530
		/* print out empty productions in closure */
2531
		WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2532
			if(*u->pitem < 0)
2533
				Bprint(foutput, "\t%s\n", writem(u->pitem));
2534
 
2535
	/* check for state equal to another */
2536
	TLOOP(j0)
2537
		if((j1=temp1[j0]) != 0) {
2538
			Bprint(foutput, "\n\t%s  ", symnam(j0));
2539
			/* shift, error, or accept */
2540
			if(j1 > 0) {
2541
				if(j1 == ACCEPTCODE)
2542
					Bprint(foutput,  "accept");
2543
				else
2544
					if(j1 == ERRCODE)
2545
						Bprint(foutput, "error");
2546
					else
2547
						Bprint(foutput, "shift %d", j1);
2548
			} else
2549
				Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2550
		}
2551
 
2552
	/* output the final production */
2553
	if(lastred)
2554
		Bprint(foutput, "\n\t.  reduce %d (src line %d)\n\n",
2555
			lastred, rlines[lastred]);
2556
	else
2557
		Bprint(foutput, "\n\t.  error\n\n");
2558
 
2559
	/* now, output nonterminal actions */
2560
	j1 = ntokens;
2561
	for(j0 = 1; j0 <= nnonter; j0++) {
2562
		j1++;
2563
		if(temp1[j1])
2564
			Bprint(foutput, "\t%s  goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2565
	}
2566
}
2567
 
2568
void
2569
warray(char *s, int *v, int n)
2570
{
2571
	int i;
2572
 
2573
	Bprint(ftable, "short	%s[] =\n{", s);
2574
	for(i=0;;) {
2575
		if(i%10 == 0)
2576
			Bprint(ftable, "\n");
2577
		Bprint(ftable, "%4d", v[i]);
2578
		i++;
2579
		if(i >= n) {
2580
			Bprint(ftable, "\n};\n");
2581
			break;
2582
		}
2583
		Bprint(ftable, ",");
2584
	}
2585
}
2586
 
2587
/*
2588
 * in order to free up the mem and amem arrays for the optimizer,
2589
 * and still be able to output yyr1, etc., after the sizes of
2590
 * the action array is known, we hide the nonterminals
2591
 * derived by productions in levprd.
2592
 */
2593
 
2594
void
2595
hideprod(void)
2596
{
2597
	int i, j;
2598
 
2599
	j = 0;
2600
	levprd[0] = 0;
2601
	PLOOP(1, i) {
2602
		if(!(levprd[i] & REDFLAG)) {
2603
			j++;
2604
			if(foutput != 0)
2605
				Bprint(foutput, "Rule not reduced:   %s\n", writem(prdptr[i]));
2606
		}
2607
		levprd[i] = *prdptr[i] - NTBASE;
2608
	}
2609
	if(j)
2610
		print("%d rules never reduced\n", j);
2611
}
2612
 
2613
void
2614
callopt(void)
2615
{
2616
	int i, *p, j, k, *q;
2617
 
2618
	/* read the arrays from tempfile and set parameters */
2619
	finput = Bopen(tempname, OREAD);
2620
	if(finput == 0)
2621
		error("optimizer cannot open tempfile");
2622
 
2623
	pgo[0] = 0;
2624
	temp1[0] = 0;
2625
	nstate = 0;
2626
	nnonter = 0;
2627
	for(;;) {
2628
		switch(gtnm()) {
2629
		case '\n':
2630
			nstate++;
2631
			pmem--;
2632
			temp1[nstate] = pmem - mem0;
2633
		case ',':
2634
			continue;
2635
		case '$':
2636
			break;
2637
		default:
2638
			error("bad tempfile");
2639
		}
2640
		break;
2641
	}
2642
 
2643
	pmem--;
2644
	temp1[nstate] = yypgo[0] = pmem - mem0;
2645
	for(;;) {
2646
		switch(gtnm()) {
2647
		case '\n':
2648
			nnonter++;
2649
			yypgo[nnonter] = pmem-mem0;
2650
		case ',':
2651
			continue;
2652
		case -1:
2653
			break;
2654
		default:
2655
			error("bad tempfile");
2656
		}
2657
		break;
2658
	}
2659
	pmem--;
2660
	yypgo[nnonter--] = pmem - mem0;
2661
	for(i = 0; i < nstate; i++) {
2662
		k = 32000;
2663
		j = 0;
2664
		q = mem0 + temp1[i+1];
2665
		for(p = mem0 + temp1[i]; p < q ; p += 2) {
2666
			if(*p > j)
2667
				j = *p;
2668
			if(*p < k)
2669
				k = *p;
2670
		}
2671
		/* nontrivial situation */
2672
		if(k <= j) {
2673
			/* j is now the range */
2674
/*			j -= k;			/* call scj */
2675
			if(k > maxoff)
2676
				maxoff = k;
2677
		}
2678
		tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2679
		if(j > maxspr)
2680
			maxspr = j;
2681
	}
2682
 
2683
	/* initialize ggreed table */
2684
	for(i = 1; i <= nnonter; i++) {
2685
		ggreed[i] = 1;
2686
		j = 0;
2687
 
2688
		/* minimum entry index is always 0 */
2689
		q = mem0 + yypgo[i+1] - 1;
2690
		for(p = mem0+yypgo[i]; p < q ; p += 2) {
2691
			ggreed[i] += 2;
2692
			if(*p > j)
2693
				j = *p;
2694
		}
2695
		ggreed[i] = ggreed[i] + 2*j;
2696
		if(j > maxoff)
2697
			maxoff = j;
2698
	}
2699
 
2700
	/* now, prepare to put the shift actions into the amem array */
2701
	for(i = 0; i < ACTSIZE; i++)
2702
		amem[i] = 0;
2703
	maxa = amem;
2704
	for(i = 0; i < nstate; i++) {
2705
		if(tystate[i] == 0 && adb > 1)
2706
			Bprint(ftable, "State %d: null\n", i);
2707
		indgo[i] = YYFLAG1;
2708
	}
2709
	while((i = nxti()) != NOMORE)
2710
		if(i >= 0)
2711
			stin(i);
2712
		else
2713
			gin(-i);
2714
 
2715
	/* print amem array */
2716
	if(adb > 2 )
2717
		for(p = amem; p <= maxa; p += 10) {
2718
			Bprint(ftable, "%4d  ", (int)(p-amem));
2719
			for(i = 0; i < 10; ++i)
2720
				Bprint(ftable, "%4d  ", p[i]);
2721
			Bprint(ftable, "\n");
2722
		}
2723
 
2724
	/* write out the output appropriate to the language */
2725
	aoutput();
2726
	osummary();
2727
	ZAPFILE(tempname);
2728
}
2729
 
2730
void
2731
gin(int i)
2732
{
2733
	int *p, *r, *s, *q1, *q2;
2734
 
2735
	/* enter gotos on nonterminal i into array amem */
2736
	ggreed[i] = 0;
2737
 
2738
	q2 = mem0+ yypgo[i+1] - 1;
2739
	q1 = mem0 + yypgo[i];
2740
 
2741
	/* now, find amem place for it */
2742
	for(p = amem; p < &amem[ACTSIZE]; p++) {
2743
		if(*p)
2744
			continue;
2745
		for(r = q1; r < q2; r += 2) {
2746
			s = p + *r + 1;
2747
			if(*s)
2748
				goto nextgp;
2749
			if(s > maxa)
2750
				if((maxa = s) > &amem[ACTSIZE])
2751
					error("a array overflow");
2752
		}
2753
		/* we have found amem spot */
2754
		*p = *q2;
2755
		if(p > maxa)
2756
			if((maxa = p) > &amem[ACTSIZE])
2757
				error("a array overflow");
2758
		for(r = q1; r < q2; r += 2) {
2759
			s = p + *r + 1;
2760
			*s = r[1];
2761
		}
2762
		pgo[i] = p-amem;
2763
		if(adb > 1)
2764
			Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2765
		return;
2766
 
2767
	nextgp:;
2768
	}
2769
	error("cannot place goto %d\n", i);
2770
}
2771
 
2772
void
2773
stin(int i)
2774
{
2775
	int *r, *s, n, flag, j, *q1, *q2;
2776
 
2777
	tystate[i] = 0;
2778
 
2779
	/* enter state i into the amem array */
2780
	q2 = mem0+temp1[i+1];
2781
	q1 = mem0+temp1[i];
2782
	/* find an acceptable place */
2783
	for(n = -maxoff; n < ACTSIZE; n++) {
2784
		flag = 0;
2785
		for(r = q1; r < q2; r += 2) {
2786
			if((s = *r + n + amem) < amem)
2787
				goto nextn;
2788
			if(*s == 0)
2789
				flag++;
2790
			else
2791
				if(*s != r[1])
2792
					goto nextn;
2793
		}
2794
 
2795
		/* check that the position equals another only if the states are identical */
2796
		for(j=0; j<nstate; j++) {
2797
			if(indgo[j] == n) {
2798
 
2799
				/* we have some disagreement */
2800
				if(flag)
2801
					goto nextn;
2802
				if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2803
 
2804
					/* states are equal */
2805
					indgo[i] = n;
2806
					if(adb > 1)
2807
						Bprint(ftable,
2808
						"State %d: entry at %d equals state %d\n",
2809
						i, n, j);
2810
					return;
2811
				}
2812
 
2813
				/* we have some disagreement */
2814
				goto nextn;
2815
			}
2816
		}
2817
 
2818
		for(r = q1; r < q2; r += 2) {
2819
			if((s = *r+n+amem) >= &amem[ACTSIZE])
2820
				error("out of space in optimizer a array");
2821
			if(s > maxa)
2822
				maxa = s;
2823
			if(*s != 0 && *s != r[1])
2824
				error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2825
			*s = r[1];
2826
		}
2827
		indgo[i] = n;
2828
		if(adb > 1)
2829
			Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2830
		return;
2831
	nextn:;
2832
	}
2833
	error("Error; failure to place state %d\n", i);
2834
}
2835
 
2836
/*
2837
 * finds the next i
2838
 */
2839
int
2840
nxti(void)
2841
{
2842
	int i, max, maxi;
2843
 
2844
	max = 0;
2845
	maxi = 0;
2846
	for(i = 1; i <= nnonter; i++)
2847
		if(ggreed[i] >= max) {
2848
			max = ggreed[i];
2849
			maxi = -i;
2850
		}
2851
	for(i = 0; i < nstate; ++i)
2852
		if(tystate[i] >= max) {
2853
			max = tystate[i];
2854
			maxi = i;
2855
		}
2856
	if(nxdb)
2857
		Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2858
	if(max == 0)
2859
		return NOMORE;
2860
	return maxi;
2861
}
2862
 
2863
/*
2864
 * write summary
2865
 */
2866
void
2867
osummary(void)
2868
{
2869
 
2870
	int i, *p;
2871
 
2872
	if(foutput == 0)
2873
		return;
2874
	i = 0;
2875
	for(p = maxa; p >= amem; p--)
2876
		if(*p == 0)
2877
			i++;
2878
 
2879
	Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2880
		(int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2881
	Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2882
	Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2883
}
2884
 
2885
/*
2886
 * this version is for C
2887
 * write out the optimized parser
2888
 */
2889
void
2890
aoutput(void)
2891
{
2892
	Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2893
	arout("yyact", amem, (maxa-amem)+1);
2894
	arout("yypact", indgo, nstate);
2895
	arout("yypgo", pgo, nnonter+1);
2896
}
2897
 
2898
void
2899
arout(char *s, int *v, int n)
2900
{
2901
	int i;
2902
 
2903
	Bprint(ftable, "short	%s[] =\n{", s);
2904
	for(i = 0; i < n;) {
2905
		if(i%10 == 0)
2906
			Bprint(ftable, "\n");
2907
		Bprint(ftable, "%4d", v[i]);
2908
		i++;
2909
		if(i == n)
2910
			Bprint(ftable, "\n};\n");
2911
		else
2912
			Bprint(ftable, ",");
2913
	}
2914
}
2915
 
2916
/*
2917
 * read and convert an integer from the standard input
2918
 * return the terminating character
2919
 * blanks, tabs, and newlines are ignored
2920
 */
2921
int
2922
gtnm(void)
2923
{
2924
	int sign, val, c;
2925
 
2926
	sign = 0;
2927
	val = 0;
2928
	while((c=Bgetrune(finput)) != Beof) {
2929
		if(isdigit(c)) {
2930
			val = val*10 + c-'0';
2931
			continue;
2932
		}
2933
		if(c == '-') {
2934
			sign = 1;
2935
			continue;
2936
		}
2937
		break;
2938
	}
2939
	if(sign)
2940
		val = -val;
2941
	*pmem++ = val;
2942
	if(pmem >= &mem0[MEMSIZE])
2943
		error("out of space");
2944
	return c;
2945
}