Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
96 7u83 1
/****************************************************************
2
Copyright (C) Lucent Technologies 1997
3
All Rights Reserved
4
 
5
Permission to use, copy, modify, and distribute this software and
6
its documentation for any purpose and without fee is hereby
7
granted, provided that the above copyright notice appear in all
8
copies and that both that the copyright notice and this
9
permission notice and warranty disclaimer appear in supporting
10
documentation, and that the name Lucent Technologies or any of
11
its entities not be used in advertising or publicity pertaining
12
to distribution of the software without specific, written prior
13
permission.
14
 
15
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22
THIS SOFTWARE.
23
****************************************************************/
24
 
25
/* lasciate ogne speranza, voi ch'intrate. */
26
 
27
#define	DEBUG
28
 
29
#include <ctype.h>
30
#include <stdio.h>
31
#include <string.h>
32
#include <stdlib.h>
33
#include "awk.h"
34
#include "ytab.h"
35
 
36
#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
37
				/* NCHARS is 2**n */
38
#define MAXLIN 22
39
 
40
#define type(v)		(v)->nobj	/* badly overloaded here */
41
#define info(v)		(v)->ntype	/* badly overloaded here */
42
#define left(v)		(v)->narg[0]
43
#define right(v)	(v)->narg[1]
44
#define parent(v)	(v)->nnext
45
 
46
#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47
#define ELEAF	case EMPTYRE:		/* empty string in regexp */
48
#define UNARY	case STAR: case PLUS: case QUEST:
49
 
50
/* encoding in tree Nodes:
51
	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
52
		left is index, right contains value or pointer to value
53
	unary (STAR, PLUS, QUEST): left is child, right is null
54
	binary (CAT, OR): left and right are children
55
	parent contains pointer to parent
56
*/
57
 
58
 
59
int	*setvec;
60
int	*tmpset;
61
int	maxsetvec = 0;
62
 
63
int	rtok;		/* next token in current re */
64
int	rlxval;
65
static uschar	*rlxstr;
66
static uschar	*prestr;	/* current position in current re */
67
static uschar	*lastre;	/* origin of last re */
68
 
69
static	int setcnt;
70
static	int poscnt;
71
 
72
char	*patbeg;
73
int	patlen;
74
 
75
#define	NFA	20	/* cache this many dynamic fa's */
76
fa	*fatab[NFA];
77
int	nfatab	= 0;	/* entries in fatab */
78
 
79
fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
80
{
81
	int i, use, nuse;
82
	fa *pfa;
83
	static int now = 1;
84
 
85
	if (setvec == 0) {	/* first time through any RE */
86
		maxsetvec = MAXLIN;
87
		setvec = (int *) malloc(maxsetvec * sizeof(int));
88
		tmpset = (int *) malloc(maxsetvec * sizeof(int));
89
		if (setvec == 0 || tmpset == 0)
90
			overflo("out of space initializing makedfa");
91
	}
92
 
93
	if (compile_time)	/* a constant for sure */
94
		return mkdfa(s, anchor);
95
	for (i = 0; i < nfatab; i++)	/* is it there already? */
96
		if (fatab[i]->anchor == anchor
97
		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
98
			fatab[i]->use = now++;
99
			return fatab[i];
100
		}
101
	pfa = mkdfa(s, anchor);
102
	if (nfatab < NFA) {	/* room for another */
103
		fatab[nfatab] = pfa;
104
		fatab[nfatab]->use = now++;
105
		nfatab++;
106
		return pfa;
107
	}
108
	use = fatab[0]->use;	/* replace least-recently used */
109
	nuse = 0;
110
	for (i = 1; i < nfatab; i++)
111
		if (fatab[i]->use < use) {
112
			use = fatab[i]->use;
113
			nuse = i;
114
		}
115
	freefa(fatab[nuse]);
116
	fatab[nuse] = pfa;
117
	pfa->use = now++;
118
	return pfa;
119
}
120
 
121
fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
122
				/* anchor = 1 for anchored matches, else 0 */
123
{
124
	Node *p, *p1;
125
	fa *f;
126
 
127
	p = reparse(s);
128
	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129
		/* put ALL STAR in front of reg.  exp. */
130
	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131
		/* put FINAL after reg.  exp. */
132
 
133
	poscnt = 0;
134
	penter(p1);	/* enter parent pointers and leaf indices */
135
	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136
		overflo("out of space for fa");
137
	f->accept = poscnt-1;	/* penter has computed number of positions in re */
138
	cfoll(f, p1);	/* set up follow sets */
139
	freetr(p1);
140
	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141
			overflo("out of space in makedfa");
142
	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143
		overflo("out of space in makedfa");
144
	*f->posns[1] = 0;
145
	f->initstat = makeinit(f, anchor);
146
	f->anchor = anchor;
147
	f->restr = (uschar *) tostring(s);
148
	return f;
149
}
150
 
151
int makeinit(fa *f, int anchor)
152
{
153
	int i, k;
154
 
155
	f->curstat = 2;
156
	f->out[2] = 0;
157
	f->reset = 0;
158
	k = *(f->re[0].lfollow);
159
	xfree(f->posns[2]);			
160
	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161
		overflo("out of space in makeinit");
162
	for (i=0; i <= k; i++) {
163
		(f->posns[2])[i] = (f->re[0].lfollow)[i];
164
	}
165
	if ((f->posns[2])[1] == f->accept)
166
		f->out[2] = 1;
167
	for (i=0; i < NCHARS; i++)
168
		f->gototab[2][i] = 0;
169
	f->curstat = cgoto(f, 2, HAT);
170
	if (anchor) {
171
		*f->posns[2] = k-1;	/* leave out position 0 */
172
		for (i=0; i < k; i++) {
173
			(f->posns[0])[i] = (f->posns[2])[i];
174
		}
175
 
176
		f->out[0] = f->out[2];
177
		if (f->curstat != 2)
178
			--(*f->posns[f->curstat]);
179
	}
180
	return f->curstat;
181
}
182
 
183
void penter(Node *p)	/* set up parent pointers and leaf indices */
184
{
185
	switch (type(p)) {
186
	ELEAF
187
	LEAF
188
		info(p) = poscnt;
189
		poscnt++;
190
		break;
191
	UNARY
192
		penter(left(p));
193
		parent(left(p)) = p;
194
		break;
195
	case CAT:
196
	case OR:
197
		penter(left(p));
198
		penter(right(p));
199
		parent(left(p)) = p;
200
		parent(right(p)) = p;
201
		break;
202
	default:	/* can't happen */
203
		FATAL("can't happen: unknown type %d in penter", type(p));
204
		break;
205
	}
206
}
207
 
208
void freetr(Node *p)	/* free parse tree */
209
{
210
	switch (type(p)) {
211
	ELEAF
212
	LEAF
213
		xfree(p);
214
		break;
215
	UNARY
216
		freetr(left(p));
217
		xfree(p);
218
		break;
219
	case CAT:
220
	case OR:
221
		freetr(left(p));
222
		freetr(right(p));
223
		xfree(p);
224
		break;
225
	default:	/* can't happen */
226
		FATAL("can't happen: unknown type %d in freetr", type(p));
227
		break;
228
	}
229
}
230
 
231
/* in the parsing of regular expressions, metacharacters like . have */
232
/* to be seen literally;  \056 is not a metacharacter. */
233
 
234
int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
235
{			/* only pick up one 8-bit byte (2 chars) */
236
	uschar *p;
237
	int n = 0;
238
	int i;
239
 
240
	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
241
		if (isdigit(*p))
242
			n = 16 * n + *p - '0';
243
		else if (*p >= 'a' && *p <= 'f')
244
			n = 16 * n + *p - 'a' + 10;
245
		else if (*p >= 'A' && *p <= 'F')
246
			n = 16 * n + *p - 'A' + 10;
247
	}
248
	*pp = (uschar *) p;
249
	return n;
250
}
251
 
252
#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
253
 
254
int quoted(uschar **pp)	/* pick up next thing after a \\ */
255
			/* and increment *pp */
256
{
257
	uschar *p = *pp;
258
	int c;
259
 
260
	if ((c = *p++) == 't')
261
		c = '\t';
262
	else if (c == 'n')
263
		c = '\n';
264
	else if (c == 'f')
265
		c = '\f';
266
	else if (c == 'r')
267
		c = '\r';
268
	else if (c == 'b')
269
		c = '\b';
270
	else if (c == '\\')
271
		c = '\\';
272
	else if (c == 'x') {	/* hexadecimal goo follows */
273
		c = hexstr(&p);	/* this adds a null if number is invalid */
274
	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
275
		int n = c - '0';
276
		if (isoctdigit(*p)) {
277
			n = 8 * n + *p++ - '0';
278
			if (isoctdigit(*p))
279
				n = 8 * n + *p++ - '0';
280
		}
281
		c = n;
282
	} /* else */
283
		/* c = c; */
284
	*pp = p;
285
	return c;
286
}
287
 
288
char *cclenter(const char *argp)	/* add a character class */
289
{
290
	int i, c, c2;
291
	uschar *p = (uschar *) argp;
292
	uschar *op, *bp;
293
	static uschar *buf = 0;
294
	static int bufsz = 100;
295
 
296
	op = p;
297
	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
298
		FATAL("out of space for character class [%.10s...] 1", p);
299
	bp = buf;
300
	for (i = 0; (c = *p++) != 0; ) {
301
		if (c == '\\') {
302
			c = quoted(&p);
303
		} else if (c == '-' && i > 0 && bp[-1] != 0) {
304
			if (*p != 0) {
305
				c = bp[-1];
306
				c2 = *p++;
307
				if (c2 == '\\')
308
					c2 = quoted(&p);
309
				if (c > c2) {	/* empty; ignore */
310
					bp--;
311
					i--;
312
					continue;
313
				}
314
				while (c < c2) {
315
					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
316
						FATAL("out of space for character class [%.10s...] 2", p);
317
					*bp++ = ++c;
318
					i++;
319
				}
320
				continue;
321
			}
322
		}
323
		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
324
			FATAL("out of space for character class [%.10s...] 3", p);
325
		*bp++ = c;
326
		i++;
327
	}
328
	*bp = 0;
329
	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
330
	xfree(op);
331
	return (char *) tostring((char *) buf);
332
}
333
 
334
void overflo(const char *s)
335
{
336
	FATAL("regular expression too big: %.30s...", s);
337
}
338
 
339
void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
340
{
341
	int i;
342
	int *p;
343
 
344
	switch (type(v)) {
345
	ELEAF
346
	LEAF
347
		f->re[info(v)].ltype = type(v);
348
		f->re[info(v)].lval.np = right(v);
349
		while (f->accept >= maxsetvec) {	/* guessing here! */
350
			maxsetvec *= 4;
351
			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
352
			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
353
			if (setvec == 0 || tmpset == 0)
354
				overflo("out of space in cfoll()");
355
		}
356
		for (i = 0; i <= f->accept; i++)
357
			setvec[i] = 0;
358
		setcnt = 0;
359
		follow(v);	/* computes setvec and setcnt */
360
		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
361
			overflo("out of space building follow set");
362
		f->re[info(v)].lfollow = p;
363
		*p = setcnt;
364
		for (i = f->accept; i >= 0; i--)
365
			if (setvec[i] == 1)
366
				*++p = i;
367
		break;
368
	UNARY
369
		cfoll(f,left(v));
370
		break;
371
	case CAT:
372
	case OR:
373
		cfoll(f,left(v));
374
		cfoll(f,right(v));
375
		break;
376
	default:	/* can't happen */
377
		FATAL("can't happen: unknown type %d in cfoll", type(v));
378
	}
379
}
380
 
381
int first(Node *p)	/* collects initially active leaves of p into setvec */
382
			/* returns 0 if p matches empty string */
383
{
384
	int b, lp;
385
 
386
	switch (type(p)) {
387
	ELEAF
388
	LEAF
389
		lp = info(p);	/* look for high-water mark of subscripts */
390
		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
391
			maxsetvec *= 4;
392
			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
393
			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
394
			if (setvec == 0 || tmpset == 0)
395
				overflo("out of space in first()");
396
		}
397
		if (type(p) == EMPTYRE) {
398
			setvec[lp] = 0;
399
			return(0);
400
		}
401
		if (setvec[lp] != 1) {
402
			setvec[lp] = 1;
403
			setcnt++;
404
		}
405
		if (type(p) == CCL && (*(char *) right(p)) == '\0')
406
			return(0);		/* empty CCL */
407
		else return(1);
408
	case PLUS:
409
		if (first(left(p)) == 0) return(0);
410
		return(1);
411
	case STAR:
412
	case QUEST:
413
		first(left(p));
414
		return(0);
415
	case CAT:
416
		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
417
		return(1);
418
	case OR:
419
		b = first(right(p));
420
		if (first(left(p)) == 0 || b == 0) return(0);
421
		return(1);
422
	}
423
	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
424
	return(-1);
425
}
426
 
427
void follow(Node *v)	/* collects leaves that can follow v into setvec */
428
{
429
	Node *p;
430
 
431
	if (type(v) == FINAL)
432
		return;
433
	p = parent(v);
434
	switch (type(p)) {
435
	case STAR:
436
	case PLUS:
437
		first(v);
438
		follow(p);
439
		return;
440
 
441
	case OR:
442
	case QUEST:
443
		follow(p);
444
		return;
445
 
446
	case CAT:
447
		if (v == left(p)) {	/* v is left child of p */
448
			if (first(right(p)) == 0) {
449
				follow(p);
450
				return;
451
			}
452
		} else		/* v is right child */
453
			follow(p);
454
		return;
455
	}
456
}
457
 
458
int member(int c, const char *sarg)	/* is c in s? */
459
{
460
	uschar *s = (uschar *) sarg;
461
 
462
	while (*s)
463
		if (c == *s++)
464
			return(1);
465
	return(0);
466
}
467
 
468
int match(fa *f, const char *p0)	/* shortest match ? */
469
{
470
	int s, ns;
471
	uschar *p = (uschar *) p0;
472
 
473
	s = f->reset ? makeinit(f,0) : f->initstat;
474
	if (f->out[s])
475
		return(1);
476
	do {
477
		/* assert(*p < NCHARS); */
478
		if ((ns = f->gototab[s][*p]) != 0)
479
			s = ns;
480
		else
481
			s = cgoto(f, s, *p);
482
		if (f->out[s])
483
			return(1);
484
	} while (*p++ != 0);
485
	return(0);
486
}
487
 
488
int pmatch(fa *f, const char *p0)	/* longest match, for sub */
489
{
490
	int s, ns;
491
	uschar *p = (uschar *) p0;
492
	uschar *q;
493
	int i, k;
494
 
495
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
496
	if (f->reset) {
497
		f->initstat = s = makeinit(f,1);
498
	} else {
499
		s = f->initstat;
500
	}
501
	patbeg = (char *) p;
502
	patlen = -1;
503
	do {
504
		q = p;
505
		do {
506
			if (f->out[s])		/* final state */
507
				patlen = q-p;
508
			/* assert(*q < NCHARS); */
509
			if ((ns = f->gototab[s][*q]) != 0)
510
				s = ns;
511
			else
512
				s = cgoto(f, s, *q);
513
			if (s == 1) {	/* no transition */
514
				if (patlen >= 0) {
515
					patbeg = (char *) p;
516
					return(1);
517
				}
518
				else
519
					goto nextin;	/* no match */
520
			}
521
		} while (*q++ != 0);
522
		if (f->out[s])
523
			patlen = q-p-1;	/* don't count $ */
524
		if (patlen >= 0) {
525
			patbeg = (char *) p;
526
			return(1);
527
		}
528
	nextin:
529
		s = 2;
530
		if (f->reset) {
531
			for (i = 2; i <= f->curstat; i++)
532
				xfree(f->posns[i]);
533
			k = *f->posns[0];			
534
			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
535
				overflo("out of space in pmatch");
536
			for (i = 0; i <= k; i++)
537
				(f->posns[2])[i] = (f->posns[0])[i];
538
			f->initstat = f->curstat = 2;
539
			f->out[2] = f->out[0];
540
			for (i = 0; i < NCHARS; i++)
541
				f->gototab[2][i] = 0;
542
		}
543
	} while (*p++ != 0);
544
	return (0);
545
}
546
 
547
int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
548
{
549
	int s, ns;
550
	uschar *p = (uschar *) p0;
551
	uschar *q;
552
	int i, k;
553
 
554
	/* s = f->reset ? makeinit(f,1) : f->initstat; */
555
	if (f->reset) {
556
		f->initstat = s = makeinit(f,1);
557
	} else {
558
		s = f->initstat;
559
	}
560
	patlen = -1;
561
	while (*p) {
562
		q = p;
563
		do {
564
			if (f->out[s])		/* final state */
565
				patlen = q-p;
566
			/* assert(*q < NCHARS); */
567
			if ((ns = f->gototab[s][*q]) != 0)
568
				s = ns;
569
			else
570
				s = cgoto(f, s, *q);
571
			if (s == 1) {	/* no transition */
572
				if (patlen > 0) {
573
					patbeg = (char *) p;
574
					return(1);
575
				} else
576
					goto nnextin;	/* no nonempty match */
577
			}
578
		} while (*q++ != 0);
579
		if (f->out[s])
580
			patlen = q-p-1;	/* don't count $ */
581
		if (patlen > 0 ) {
582
			patbeg = (char *) p;
583
			return(1);
584
		}
585
	nnextin:
586
		s = 2;
587
		if (f->reset) {
588
			for (i = 2; i <= f->curstat; i++)
589
				xfree(f->posns[i]);
590
			k = *f->posns[0];			
591
			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
592
				overflo("out of state space");
593
			for (i = 0; i <= k; i++)
594
				(f->posns[2])[i] = (f->posns[0])[i];
595
			f->initstat = f->curstat = 2;
596
			f->out[2] = f->out[0];
597
			for (i = 0; i < NCHARS; i++)
598
				f->gototab[2][i] = 0;
599
		}
600
		p++;
601
	}
602
	return (0);
603
}
604
 
605
Node *reparse(const char *p)	/* parses regular expression pointed to by p */
606
{			/* uses relex() to scan regular expression */
607
	Node *np;
608
 
609
	dprintf( ("reparse <%s>\n", p) );
610
	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
611
	rtok = relex();
612
	/* GNU compatibility: an empty regexp matches anything */
613
	if (rtok == '\0') {
614
		/* FATAL("empty regular expression"); previous */
615
		return(op2(EMPTYRE, NIL, NIL));
616
	}
617
	np = regexp();
618
	if (rtok != '\0')
619
		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
620
	return(np);
621
}
622
 
623
Node *regexp(void)	/* top-level parse of reg expr */
624
{
625
	return (alt(concat(primary())));
626
}
627
 
628
Node *primary(void)
629
{
630
	Node *np;
631
 
632
	switch (rtok) {
633
	case CHAR:
634
		np = op2(CHAR, NIL, itonp(rlxval));
635
		rtok = relex();
636
		return (unary(np));
637
	case ALL:
638
		rtok = relex();
639
		return (unary(op2(ALL, NIL, NIL)));
640
	case EMPTYRE:
641
		rtok = relex();
642
		return (unary(op2(ALL, NIL, NIL)));
643
	case DOT:
644
		rtok = relex();
645
		return (unary(op2(DOT, NIL, NIL)));
646
	case CCL:
647
		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
648
		rtok = relex();
649
		return (unary(np));
650
	case NCCL:
651
		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
652
		rtok = relex();
653
		return (unary(np));
654
	case '^':
655
		rtok = relex();
656
		return (unary(op2(CHAR, NIL, itonp(HAT))));
657
	case '$':
658
		rtok = relex();
659
		return (unary(op2(CHAR, NIL, NIL)));
660
	case '(':
661
		rtok = relex();
662
		if (rtok == ')') {	/* special pleading for () */
663
			rtok = relex();
664
			return unary(op2(CCL, NIL, (Node *) tostring("")));
665
		}
666
		np = regexp();
667
		if (rtok == ')') {
668
			rtok = relex();
669
			return (unary(np));
670
		}
671
		else
672
			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
673
	default:
674
		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
675
	}
676
	return 0;	/*NOTREACHED*/
677
}
678
 
679
Node *concat(Node *np)
680
{
681
	switch (rtok) {
682
	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
683
		return (concat(op2(CAT, np, primary())));
684
	}
685
	return (np);
686
}
687
 
688
Node *alt(Node *np)
689
{
690
	if (rtok == OR) {
691
		rtok = relex();
692
		return (alt(op2(OR, np, concat(primary()))));
693
	}
694
	return (np);
695
}
696
 
697
Node *unary(Node *np)
698
{
699
	switch (rtok) {
700
	case STAR:
701
		rtok = relex();
702
		return (unary(op2(STAR, np, NIL)));
703
	case PLUS:
704
		rtok = relex();
705
		return (unary(op2(PLUS, np, NIL)));
706
	case QUEST:
707
		rtok = relex();
708
		return (unary(op2(QUEST, np, NIL)));
709
	default:
710
		return (np);
711
	}
712
}
713
 
714
/*
715
 * Character class definitions conformant to the POSIX locale as
716
 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
717
 * and operating character sets are both ASCII (ISO646) or supersets
718
 * thereof.
719
 *
720
 * Note that to avoid overflowing the temporary buffer used in
721
 * relex(), the expanded character class (prior to range expansion)
722
 * must be less than twice the size of their full name.
723
 */
724
 
725
/* Because isblank doesn't show up in any of the header files on any
726
 * system i use, it's defined here.  if some other locale has a richer
727
 * definition of "blank", define HAS_ISBLANK and provide your own
728
 * version.
729
 * the parentheses here are an attempt to find a path through the maze
730
 * of macro definition and/or function and/or version provided.  thanks
731
 * to nelson beebe for the suggestion; let's see if it works everywhere.
732
 */
733
 
734
/* #define HAS_ISBLANK */
735
#ifndef HAS_ISBLANK
736
 
737
int (xisblank)(int c)
738
{
739
	return c==' ' || c=='\t';
740
}
741
 
742
#endif
743
 
744
struct charclass {
745
	const char *cc_name;
746
	int cc_namelen;
747
	int (*cc_func)(int);
748
} charclasses[] = {
749
	{ "alnum",	5,	isalnum },
750
	{ "alpha",	5,	isalpha },
751
#ifndef HAS_ISBLANK
752
	{ "blank",	5,	xisblank },
753
#else
754
	{ "blank",	5,	isblank },
755
#endif
756
	{ "cntrl",	5,	iscntrl },
757
	{ "digit",	5,	isdigit },
758
	{ "graph",	5,	isgraph },
759
	{ "lower",	5,	islower },
760
	{ "print",	5,	isprint },
761
	{ "punct",	5,	ispunct },
762
	{ "space",	5,	isspace },
763
	{ "upper",	5,	isupper },
764
	{ "xdigit",	6,	isxdigit },
765
	{ NULL,		0,	NULL },
766
};
767
 
768
 
769
int relex(void)		/* lexical analyzer for reparse */
770
{
771
	int c, n;
772
	int cflag;
773
	static uschar *buf = 0;
774
	static int bufsz = 100;
775
	uschar *bp;
776
	struct charclass *cc;
777
	int i;
778
 
779
	switch (c = *prestr++) {
780
	case '|': return OR;
781
	case '*': return STAR;
782
	case '+': return PLUS;
783
	case '?': return QUEST;
784
	case '.': return DOT;
785
	case '\0': prestr--; return '\0';
786
	case '^':
787
	case '$':
788
	case '(':
789
	case ')':
790
		return c;
791
	case '\\':
792
		rlxval = quoted(&prestr);
793
		return CHAR;
794
	default:
795
		rlxval = c;
796
		return CHAR;
797
	case '[': 
798
		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
799
			FATAL("out of space in reg expr %.10s..", lastre);
800
		bp = buf;
801
		if (*prestr == '^') {
802
			cflag = 1;
803
			prestr++;
804
		}
805
		else
806
			cflag = 0;
807
		n = 2 * strlen((const char *) prestr)+1;
808
		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
809
			FATAL("out of space for reg expr %.10s...", lastre);
810
		for (; ; ) {
811
			if ((c = *prestr++) == '\\') {
812
				*bp++ = '\\';
813
				if ((c = *prestr++) == '\0')
814
					FATAL("nonterminated character class %.20s...", lastre);
815
				*bp++ = c;
816
			/* } else if (c == '\n') { */
817
			/* 	FATAL("newline in character class %.20s...", lastre); */
818
			} else if (c == '[' && *prestr == ':') {
819
				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
820
				for (cc = charclasses; cc->cc_name; cc++)
821
					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
822
						break;
823
				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
824
				    prestr[2 + cc->cc_namelen] == ']') {
825
					prestr += cc->cc_namelen + 3;
826
					/*
827
					 * BUG: We begin at 1, instead of 0, since we
828
					 * would otherwise prematurely terminate the
829
					 * string for classes like [[:cntrl:]]. This
830
					 * means that we can't match the NUL character,
831
					 * not without first adapting the entire
832
					 * program to track each string's length.
833
					 */
834
					for (i = 1; i < NCHARS; i++) {
835
						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
836
						    FATAL("out of space for reg expr %.10s...", lastre);
837
						if (cc->cc_func(i)) {
838
							*bp++ = i;
839
							n++;
840
						}
841
					}
842
				} else
843
					*bp++ = c;
844
			} else if (c == '\0') {
845
				FATAL("nonterminated character class %.20s", lastre);
846
			} else if (bp == buf) {	/* 1st char is special */
847
				*bp++ = c;
848
			} else if (c == ']') {
849
				*bp++ = 0;
850
				rlxstr = (uschar *) tostring((char *) buf);
851
				if (cflag == 0)
852
					return CCL;
853
				else
854
					return NCCL;
855
			} else
856
				*bp++ = c;
857
		}
858
	}
859
}
860
 
861
int cgoto(fa *f, int s, int c)
862
{
863
	int i, j, k;
864
	int *p, *q;
865
 
866
	assert(c == HAT || c < NCHARS);
867
	while (f->accept >= maxsetvec) {	/* guessing here! */
868
		maxsetvec *= 4;
869
		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
870
		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
871
		if (setvec == 0 || tmpset == 0)
872
			overflo("out of space in cgoto()");
873
	}
874
	for (i = 0; i <= f->accept; i++)
875
		setvec[i] = 0;
876
	setcnt = 0;
877
	/* compute positions of gototab[s,c] into setvec */
878
	p = f->posns[s];
879
	for (i = 1; i <= *p; i++) {
880
		if ((k = f->re[p[i]].ltype) != FINAL) {
881
			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
882
			 || (k == DOT && c != 0 && c != HAT)
883
			 || (k == ALL && c != 0)
884
			 || (k == EMPTYRE && c != 0)
885
			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
886
			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
887
				q = f->re[p[i]].lfollow;
888
				for (j = 1; j <= *q; j++) {
889
					if (q[j] >= maxsetvec) {
890
						maxsetvec *= 4;
891
						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
892
						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
893
						if (setvec == 0 || tmpset == 0)
894
							overflo("cgoto overflow");
895
					}
896
					if (setvec[q[j]] == 0) {
897
						setcnt++;
898
						setvec[q[j]] = 1;
899
					}
900
				}
901
			}
902
		}
903
	}
904
	/* determine if setvec is a previous state */
905
	tmpset[0] = setcnt;
906
	j = 1;
907
	for (i = f->accept; i >= 0; i--)
908
		if (setvec[i]) {
909
			tmpset[j++] = i;
910
		}
911
	/* tmpset == previous state? */
912
	for (i = 1; i <= f->curstat; i++) {
913
		p = f->posns[i];
914
		if ((k = tmpset[0]) != p[0])
915
			goto different;
916
		for (j = 1; j <= k; j++)
917
			if (tmpset[j] != p[j])
918
				goto different;
919
		/* setvec is state i */
920
		f->gototab[s][c] = i;
921
		return i;
922
	  different:;
923
	}
924
 
925
	/* add tmpset to current set of states */
926
	if (f->curstat >= NSTATES-1) {
927
		f->curstat = 2;
928
		f->reset = 1;
929
		for (i = 2; i < NSTATES; i++)
930
			xfree(f->posns[i]);
931
	} else
932
		++(f->curstat);
933
	for (i = 0; i < NCHARS; i++)
934
		f->gototab[f->curstat][i] = 0;
935
	xfree(f->posns[f->curstat]);
936
	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
937
		overflo("out of space in cgoto");
938
 
939
	f->posns[f->curstat] = p;
940
	f->gototab[s][c] = f->curstat;
941
	for (i = 0; i <= setcnt; i++)
942
		p[i] = tmpset[i];
943
	if (setvec[f->accept])
944
		f->out[f->curstat] = 1;
945
	else
946
		f->out[f->curstat] = 0;
947
	return f->curstat;
948
}
949
 
950
 
951
void freefa(fa *f)	/* free a finite automaton */
952
{
953
	int i;
954
 
955
	if (f == NULL)
956
		return;
957
	for (i = 0; i <= f->curstat; i++)
958
		xfree(f->posns[i]);
959
	for (i = 0; i <= f->accept; i++) {
960
		xfree(f->re[i].lfollow);
961
		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
962
			xfree((f->re[i].lval.np));
963
	}
964
	xfree(f->restr);
965
	xfree(f);
966
}