Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
99 7u83 1
/*
2
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
3
 *
4
 * Sccsid @(#)regparse.c	1.12 (gritter) 9/22/03
5
 */
6
/*  UNIX(R) Regular Expresssion Library
7
 *
8
 *  Note: Code is released under the GNU LGPL
9
 *
10
 *  Copyright (C) 2001 Caldera International, Inc.
11
 *
12
 *  This library is free software; you can redistribute it and/or
13
 *  modify it under the terms of the GNU Lesser General Public
14
 *  License as published by the Free Software Foundation; either
15
 *  version 2 of the License, or (at your option) any later version.
16
 *
17
 *  This library is distributed in the hope that it will be useful,
18
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 *  Lesser General Public License for more details.
21
 *
22
 *  You should have received a copy of the GNU Lesser General Public
23
 *  License along with this library; if not, write to:
24
 *        Free Software Foundation, Inc.
25
 *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26
 */
27
 
28
/*	#include "synonyms.h"	*/
29
#include <stdlib.h>
30
#include <ctype.h>
31
#include "re.h"
32
 
33
LIBUXRE_STATIC void
34
libuxre_regdeltree(Tree *tp, int all)
35
{
36
	if (tp == 0)
37
		return;
38
	if (tp->op < 0)
39
	{
40
		switch (KIND_ROP(tp->op))
41
		{
42
		case BINARY_ROP:
43
			libuxre_regdeltree(tp->right.ptr, all);
44
			/*FALLTHROUGH*/
45
		case UNARY_ROP:
46
			libuxre_regdeltree(tp->left.ptr, all);
47
			break;
48
		default:
49
			if (tp->op == ROP_BKT && all)
50
			{
51
				libuxre_bktfree(tp->right.info.bkt);
52
				free(tp->right.info.bkt);
53
			}
54
			break;
55
		}
56
	}
57
	free(tp);
58
}
59
 
60
LIBUXRE_STATIC Tree *
61
libuxre_reg1tree(w_type op, Tree *lp)
62
{
63
	Tree *tp;
64
 
65
	if ((tp = malloc(sizeof(Tree))) == 0)
66
	{
67
		if (lp != 0)
68
			libuxre_regdeltree(lp, 1);
69
		return 0;
70
	}
71
	tp->op = op;
72
	tp->left.ptr = lp;
73
	if (lp != 0)
74
		lp->parent = tp;
75
	return tp;
76
}
77
 
78
LIBUXRE_STATIC Tree *
79
libuxre_reg2tree(w_type op, Tree *lp, Tree *rp)
80
{
81
	Tree *tp;
82
 
83
	if ((tp = malloc(sizeof(Tree))) == 0)
84
	{
85
		libuxre_regdeltree(lp, 1);
86
		libuxre_regdeltree(rp, 1);
87
		return 0;
88
	}
89
	tp->op = op;
90
	tp->left.ptr = lp;
91
	lp->parent = tp;
92
	tp->right.ptr = rp;
93
	rp->parent = tp;
94
	return tp;
95
}
96
 
97
static int
98
lex(Lex *lxp)
99
{
100
	size_t num;
101
	w_type wc;
102
	int n, mb_cur_max;
103
 
104
	mb_cur_max = lxp->mb_cur_max;
105
nextc:	switch (wc = *lxp->pat++) /* interesting ones are single bytes */
106
	{
107
	case '\0':
108
		lxp->pat--;	/* continue to report ROP_END */
109
		wc = ROP_END;
110
		break;
111
	case '(':
112
		if (lxp->flags & REG_PARENS)
113
		{
114
		leftparen:;
115
			/*
116
			* Must keep track of the closed and
117
			* yet-to-be closed groups as a list.
118
			* Consider (()a(()b(()c(()d... in which
119
			* at each letter another even-numbered
120
			* group is made available, but no
121
			* odd-numbered ones are.
122
			*/
123
			if ((lxp->flags & REG_NOBACKREF) == 0)
124
			{
125
				if (lxp->nleft >= lxp->nclist) /* grow it */
126
				{
127
					unsigned char *p;
128
 
129
					lxp->nclist += 8; /* arbitrary */
130
					if ((p = realloc(lxp->clist,
131
						lxp->nclist)) == 0)
132
					{
133
						lxp->err = REG_ESPACE;
134
						return -1;
135
					}
136
					lxp->clist = p;
137
				}
138
				lxp->clist[lxp->nleft] = 0; /* unavailable */
139
			}
140
			lxp->nleft++;
141
			wc = ROP_LP;
142
		}
143
		break;
144
	case ')':
145
		/*
146
		* For REG_PARENS, only take a right paren as a close
147
		* if there is a matching left paren.
148
		*/
149
		if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft)
150
		{
151
			lxp->nright++;
152
		rightparen:;
153
			/*
154
			* The group that is being closed is the highest
155
			* numbered as-yet-unclosed group.
156
			*/
157
			if ((lxp->flags & REG_NOBACKREF) == 0)
158
			{
159
				num = lxp->nleft;
160
				while (lxp->clist[--num] != 0)
161
					;
162
				lxp->clist[num] = 1;
163
			}
164
			wc = ROP_RP;
165
		}
166
		break;
167
	case '.':
168
		wc = ROP_ANYCH;
169
		if (lxp->flags & REG_NEWLINE)
170
			wc = ROP_NOTNL;
171
		break;
172
	case '*':
173
		if (lxp->flags & REG_ADDITIVE)
174
		{
175
	nxtstar:	switch (*lxp->pat)
176
			{
177
			case '+':
178
				if ((lxp->flags & REG_PLUS) == 0)
179
					break;
180
				lxp->pat++;
181
				goto nxtstar;
182
			case '?':
183
				if ((lxp->flags & REG_QUEST) == 0)
184
					break;
185
				/*FALLTHRU*/
186
			case '*':
187
				lxp->pat++;
188
				goto nxtstar;
189
			}
190
		}
191
		wc = ROP_STAR;
192
		break;
193
	case '^':
194
		/*
195
		* Look "behind" to see if this is an anchor.
196
		* Take it as an anchor if it follows an alternation
197
		* operator.  (lxp->tok is initially set to ROP_OR.)
198
		*/
199
		if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) {
200
			if (lxp->flags & REG_ADDITIVE)
201
			{
202
				int optional = 0;
203
 
204
			nxtcar:	switch (*lxp->pat)
205
				{
206
				case '+':
207
					if ((lxp->flags & REG_PLUS) == 0)
208
						break;
209
					lxp->pat++;
210
					goto nxtcar;
211
				case '?':
212
					if ((lxp->flags & REG_QUEST) == 0)
213
						break;
214
					/*FALLTHRU*/
215
				case '*':
216
					optional = 1;
217
					lxp->pat++;
218
					goto nxtcar;
219
				}
220
				if (optional)
221
					goto nextc;
222
			}
223
			wc = ROP_BOL;
224
		}
225
		break;
226
	case '$':
227
		/*
228
		* Look ahead to see if this is an anchor,
229
		* unless any '$' is an anchor.
230
		* Take it as an anchor if it occurs just before
231
		* the pattern end or an alternation operator.
232
		*/
233
		if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0'
234
			|| (lxp->flags & REG_OR && *lxp->pat == '|')
235
			|| (lxp->flags & REG_NLALT && *lxp->pat == '\n'))
236
		{
237
			if (lxp->flags & REG_ADDITIVE)
238
			{
239
				int optional = 0;
240
 
241
			nxtdol:	switch (*lxp->pat)
242
				{
243
				case '+':
244
					if ((lxp->flags & REG_PLUS) == 0)
245
						break;
246
					lxp->pat++;
247
					goto nxtdol;
248
				case '?':
249
					if ((lxp->flags & REG_QUEST) == 0)
250
						break;
251
					/*FALLTHRU*/
252
				case '*':
253
					optional = 1;
254
					lxp->pat++;
255
					goto nxtdol;
256
				}
257
				if (optional)
258
					goto nextc;
259
			}
260
			wc = ROP_EOL;
261
		}
262
		break;
263
	case '+':
264
		if (lxp->flags & REG_PLUS)
265
		{
266
			wc = ROP_PLUS;
267
			if (lxp->flags & REG_ADDITIVE)
268
			{
269
		nxtplus:	switch (*lxp->pat)
270
				{
271
				case '?':
272
					if ((lxp->flags & REG_QUEST) == 0)
273
						break;
274
				case '*':
275
					wc = ROP_STAR;
276
					/*FALLTHRU*/
277
				case '+':
278
					lxp->pat++;
279
					goto nxtplus;
280
				}
281
			}
282
		}
283
		break;
284
	case '?':
285
		if (lxp->flags & REG_QUEST)
286
		{
287
			wc = ROP_QUEST;
288
			if (lxp->flags & REG_ADDITIVE)
289
			{
290
		nxtquest:	switch (*lxp->pat)
291
				{
292
				case '+':
293
					if ((lxp->flags & REG_PLUS) == 0)
294
						break;
295
				case '*':
296
					wc = ROP_STAR;
297
					/*FALLTHRU*/
298
				case '?':
299
					lxp->pat++;
300
					goto nxtquest;
301
				}
302
			}
303
		}
304
		break;
305
	case '\n':
306
		if (lxp->flags & REG_NLALT)
307
		{
308
			/*
309
			* Even when newline is an alternative separator,
310
			* it doesn't permit parenthesized subexpressions
311
			* to include it.
312
			*/
313
			if (lxp->nleft != lxp->nright)
314
			{
315
				lxp->err = REG_EPAREN;
316
				return -1;
317
			}
318
			wc = ROP_OR;
319
		}
320
		else if (lxp->flags & REG_NEWLINE)
321
			lxp->flags |= REG_NFA;
322
		break;
323
	case '|':
324
		if (lxp->flags & REG_OR)
325
			wc = ROP_OR;
326
		break;
327
	case '[':
328
		if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0)
329
		{
330
			lxp->err = REG_ESPACE;
331
			return -1;
332
		}
333
		if ((lxp->flags & REG_GOTBKT) == 0)	/* first time */
334
		{
335
			struct lc_collate *col;
336
 
337
			lxp->flags |= REG_GOTBKT;
338
			lxp->bktflags = 0;
339
			if (lxp->flags & REG_ICASE)
340
				lxp->bktflags |= BKT_ONECASE;
341
			if (lxp->flags & REG_NEWLINE)
342
				lxp->bktflags |= BKT_NOTNL;
343
			if (lxp->flags & REG_BADRANGE)
344
				lxp->bktflags |= BKT_BADRANGE;
345
			if (lxp->flags & REG_ODDRANGE)
346
				lxp->bktflags |= BKT_ODDRANGE;
347
			if (lxp->flags & REG_SEPRANGE)
348
				lxp->bktflags |= BKT_SEPRANGE;
349
			if (lxp->flags & REG_BKTQUOTE)
350
				lxp->bktflags |= BKT_QUOTE;
351
			if (lxp->flags & REG_BKTEMPTY)
352
				lxp->bktflags |= BKT_EMPTY;
353
			if (lxp->flags & REG_ESCNL)
354
				lxp->bktflags |= BKT_ESCNL;
355
			if (lxp->flags & REG_NLALT)
356
				lxp->bktflags |= BKT_NLBAD;
357
			if (lxp->flags & REG_ESCSEQ)
358
				lxp->bktflags |= BKT_ESCSEQ;
359
			if (lxp->flags & REG_BKTESCAPE)
360
				lxp->bktflags |= BKT_ESCAPE;
361
			if (lxp->flags & REG_NOI18N)
362
				lxp->bktflags |= BKT_NOI18N;
363
			if (lxp->flags & REG_OLDESC)
364
				lxp->bktflags |= BKT_OLDESC;
365
			if ((col = libuxre_lc_collate(0)) != 0)
366
			{
367
				if (col->maintbl == 0
368
					|| col->flags & CHF_ENCODED)
369
				{
370
					(void)libuxre_lc_collate(col);
371
					col = 0;
372
				}
373
				else if (col->flags & CHF_MULTICH)
374
					lxp->flags |= REG_NFA;
375
			}
376
			lxp->col = col;
377
		}
378
		n = lxp->bktflags;
379
		if (*lxp->pat == '^')
380
		{
381
			n |= BKT_NEGATED;
382
			lxp->pat++;
383
		}
384
		lxp->info.bkt->col = lxp->col;
385
		if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat,
386
						n, mb_cur_max)) < 0)
387
		{
388
			free(lxp->info.bkt);
389
			lxp->err = -n;	/* convert to REG_* errors */
390
			return -1;
391
		}
392
		/*
393
		* NFA forced if newline can be a match and REG_NEWLINE is set.
394
		*/
395
		if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE
396
			&& lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */
397
			&& libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0)
398
		{
399
			lxp->flags |= REG_NFA;
400
		}
401
		lxp->pat += n;
402
		wc = ROP_BKT;
403
		break;
404
	case '{':
405
		if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0)
406
			break;
407
	interval:;
408
		if (!isdigit(num = *lxp->pat))
409
		{
410
		badbr:;
411
			lxp->err = REG_BADBR;
412
			if (*lxp->pat == '\0')
413
				lxp->err = REG_EBRACE; /* more accurate */
414
			return -1;
415
		}
416
		num -= '0';
417
		while (isdigit(wc = *++lxp->pat))
418
		{
419
			num *= 10;
420
			if ((num += wc - '0') > BRACE_MAX)
421
				goto badbr;
422
		}
423
		lxp->info.num[0] = num;
424
		lxp->info.num[1] = num;
425
		if (wc == ',')
426
		{
427
			lxp->info.num[1] = BRACE_INF;
428
			if (isdigit(wc = *++lxp->pat))
429
			{
430
				num = wc - '0';
431
				while (isdigit(wc = *++lxp->pat))
432
				{
433
					num *= 10;
434
					if ((num += wc - '0') > BRACE_MAX)
435
						goto badbr;
436
				}
437
				if (num < lxp->info.num[0])
438
					goto badbr;
439
				lxp->info.num[1] = num;
440
			}
441
		}
442
		if ((lxp->flags & REG_BRACES) == 0)
443
		{
444
			if (wc != '\\')
445
				goto badbr;
446
			wc = *++lxp->pat;
447
		}
448
		if (wc != '}')
449
			goto badbr;
450
		lxp->pat++;
451
		wc = ROP_BRACE;
452
		/*
453
		* Replace interval with simpler equivalents where possible,
454
		* even when the operators are not otherwise available.
455
		*/
456
		if (lxp->info.num[1] <= 1)
457
		{
458
			if (lxp->info.num[0] == 1)
459
				wc = ROP_NOP;	/* {1,1} is noise */
460
			else if (lxp->info.num[1] == 0)
461
				wc = ROP_EMPTY;	/* {0,0} is empty string */
462
			else
463
				wc = ROP_QUEST;	/* {0,1} is ? */
464
		}
465
		else if (lxp->info.num[1] == BRACE_INF)
466
		{
467
			if (lxp->info.num[0] == 0)
468
				wc = ROP_STAR;
469
			else if (lxp->info.num[0] == 1)
470
				wc = ROP_PLUS;
471
			else if (lxp->info.num[0] > BRACE_DFAMAX)
472
				lxp->flags |= REG_NFA;
473
		}
474
		else if (lxp->info.num[1] > BRACE_DFAMAX)
475
		{
476
			lxp->flags |= REG_NFA;
477
		}
478
		break;
479
	case '\\':
480
		switch (wc = *lxp->pat++)
481
		{
482
		case '\0':
483
			lxp->err = REG_EESCAPE;
484
			return -1;
485
		case '<':
486
			if (lxp->flags & REG_ANGLES)
487
			{
488
				lxp->flags |= REG_NFA;
489
				wc = ROP_LT;
490
			}
491
			goto out;
492
		case '>':
493
			if (lxp->flags & REG_ANGLES)
494
			{
495
				lxp->flags |= REG_NFA;
496
				wc = ROP_GT;
497
			}
498
			goto out;
499
		case '(':
500
			if ((lxp->flags & REG_PARENS) == 0)
501
				goto leftparen;
502
			goto out;
503
		case ')':
504
			if ((lxp->flags & REG_PARENS) == 0)
505
			{
506
				if (++lxp->nright > lxp->nleft)
507
				{
508
					lxp->err = REG_EPAREN;
509
					return -1;
510
				}
511
				goto rightparen;
512
			}
513
			goto out;
514
		case '{':
515
			if (lxp->flags & (REG_BRACES|REG_NOBRACES))
516
				goto out;
517
			goto interval;
518
		case '1':
519
		case '2':
520
		case '3':
521
		case '4':
522
		case '5':
523
		case '6':
524
		case '7':
525
		case '8':
526
		case '9':
527
			num = wc - '0';
528
			if ((lxp->flags & REG_NOBACKREF) == 0)
529
			{
530
			backref:;
531
				if (num > lxp->nleft
532
					|| lxp->clist[num - 1] == 0)
533
				{
534
					lxp->err = REG_ESUBREG;
535
					return -1;
536
				}
537
				lxp->info.sub = num;
538
				if (lxp->maxref < num)
539
					lxp->maxref = num;
540
				lxp->flags |= REG_NFA;
541
				wc = ROP_REF;
542
				goto out;
543
			}
544
			/*
545
			* For compatibility (w/awk), permit "octal" 8 and 9.
546
			* Already have the value of the first digit in num.
547
			*
548
			* If REG_OLDESC, exactly three digits must be present.
549
			*/
550
		tryoctal:;
551
			if ((lxp->flags & REG_ESCSEQ) == 0)
552
				goto out;
553
			if ((wc = *lxp->pat) >= '0' && wc <= '9')
554
			{
555
				num <<= 3;
556
				num += wc - '0';
557
				if ((wc = *++lxp->pat) >= '0' && wc <= '9')
558
				{
559
					num <<= 3;
560
					num += wc - '0';
561
					lxp->pat++;
562
				}
563
				else if (lxp->flags & REG_OLDESC)
564
				{
565
					lxp->pat--;
566
					wc = lxp->pat[-1];
567
					goto out;
568
				}
569
			}
570
			else if (lxp->flags & REG_OLDESC)
571
			{
572
				wc = lxp->pat[-1];
573
				goto out;
574
			}
575
			if ((wc = num) <= 0)
576
			{
577
				lxp->err = REG_BADESC;
578
				return -1;
579
			}
580
			goto out;
581
		case '0':
582
			if ((lxp->flags & REG_NOBACKREF) == 0
583
				&& (num = *lxp->pat) >= '0' && num <= '9')
584
			{
585
				num -= '0';
586
				/*
587
				* This loop ignores wraparounds.
588
				* Keep track of number of digits in n.
589
				*/
590
				n = 1;
591
				while ((wc = *++lxp->pat) >= '0' && wc <= '9')
592
				{
593
					num *= 10;
594
					num += wc - '0';
595
					n++;
596
				}
597
				if (num != 0)
598
					goto backref;
599
				lxp->pat -= n;
600
			}
601
			num = 0;
602
			goto tryoctal;
603
		case 'a':
604
			if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
605
				wc = '\a';
606
			goto out;
607
		case 'b':
608
			if (lxp->flags & REG_ESCSEQ)
609
				wc = '\b';
610
			goto out;
611
		case 'f':
612
			if (lxp->flags & REG_ESCSEQ)
613
				wc = '\f';
614
			goto out;
615
		case 'n':
616
			if (lxp->flags & (REG_ESCSEQ | REG_ESCNL))
617
			{
618
				wc = '\n';
619
				if (lxp->flags & REG_NEWLINE)
620
					lxp->flags |= REG_NFA;
621
			}
622
			goto out;
623
		case 'r':
624
			if (lxp->flags & REG_ESCSEQ)
625
				wc = '\r';
626
			goto out;
627
		case 't':
628
			if (lxp->flags & REG_ESCSEQ)
629
				wc = '\t';
630
			goto out;
631
		case 'v':
632
			if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
633
				wc = '\v';
634
			goto out;
635
		case 'x':
636
			if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ
637
				&& isxdigit(num = *lxp->pat))
638
			{
639
				wc = num;
640
				num = 0;
641
				/*
642
				* Take as many hex digits as possible,
643
				* ignoring overflows.
644
				* If the result (squeezed into a w_type)
645
				* is positive, it's okay.
646
				*/
647
				do
648
				{
649
					if (isdigit(wc))
650
						wc -= '0';
651
					else if (isupper(wc))
652
						wc -= 'A' + 10;
653
					else
654
						wc -= 'a' + 10;
655
					num <<= 4;
656
					num |= wc;
657
				} while (isxdigit(wc = *++lxp->pat));
658
				if ((wc = num) <= 0)
659
				{
660
					lxp->err = REG_BADESC;
661
					return -1;
662
				}
663
			}
664
			goto out;
665
		}
666
		/*FALLTHROUGH*/
667
	default:
668
		if (!ISONEBYTE(wc))
669
		{
670
			if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0)
671
				lxp->pat += n;
672
			else if (n < 0)
673
			{
674
				lxp->err = REG_ILLSEQ;
675
				return -1;
676
			}
677
		}
678
		if (lxp->flags & REG_ICASE)
679
			wc = to_lower(wc);
680
		break;
681
	}
682
out:;
683
	lxp->tok = wc;
684
	return 0;
685
}
686
 
687
static Tree	*alt(Lex *);
688
 
689
static Tree *
690
leaf(Lex *lxp)
691
{
692
	Tree *tp;
693
 
694
	if ((tp = malloc(sizeof(Tree))) == 0)
695
	{
696
		lxp->err = REG_ESPACE;
697
		return 0;
698
	}
699
	switch (tp->op = lxp->tok) /* covers most cases */
700
	{
701
	default:
702
		if (tp->op < 0)
703
		{
704
			lxp->err = REG_BADPAT;
705
			tp->right.ptr = 0;
706
			goto badunary;
707
		}
708
		break;
709
	case ROP_STAR:
710
	case ROP_PLUS:
711
	case ROP_QUEST:
712
		if ((lxp->flags & REG_NOAUTOQUOTE) == 0
713
			&& lxp->pat[-1] != '}')
714
		{
715
			tp->op = lxp->pat[-1];
716
			break;
717
		}
718
		/*FALLTHROUGH*/
719
	case ROP_BRACE:
720
	case ROP_EMPTY:	/* was {0,0} ROP_BRACE */
721
	case ROP_NOP:	/* was {1,1} ROP_BRACE */
722
		lxp->err = REG_BADRPT;
723
	badunary:;
724
		tp->left.ptr = 0;
725
		goto err;
726
	case ROP_ANYCH:
727
	case ROP_NOTNL:
728
		break;
729
	case ROP_BOL:
730
	case ROP_EOL:
731
	case ROP_LT:
732
	case ROP_GT:
733
		/*
734
		* Look ahead for what would have been taken to be
735
		* postfix operators.
736
		*/
737
		if (lex(lxp) != 0)
738
			goto err;
739
		switch (lxp->tok)
740
		{
741
		case ROP_STAR:
742
		case ROP_PLUS:
743
		case ROP_QUEST:
744
			if ((lxp->flags & REG_NOAUTOQUOTE) == 0
745
				&& lxp->pat[-1] != '}')
746
			{
747
				lxp->tok = lxp->pat[-1];
748
				break;
749
			}
750
			/*FALLTHROUGH*/
751
		case ROP_BRACE:
752
		case ROP_EMPTY:	/* was {0,0} ROP_BRACE */
753
		case ROP_NOP:	/* was {1,1} ROP_BRACE */
754
			lxp->err = REG_BADRPT;
755
			goto err;
756
		}
757
		return tp;
758
	case ROP_BKT:
759
		tp->right.info.bkt = lxp->info.bkt;
760
		break;
761
	case ROP_REF:
762
		tp->right.info.sub = lxp->info.sub;
763
		break;
764
	case ROP_LP:
765
		tp->right.info.sub = lxp->nleft;
766
		if (lex(lxp) != 0)
767
			goto badunary;
768
		if (lxp->tok == ROP_RP)	/* empty parens; choice of meaning */
769
		{
770
			if (lxp->flags & REG_MTPARENBAD)
771
			{
772
				lxp->err = REG_EMPTYPAREN;
773
				goto badunary;
774
			}
775
			lxp->tok = ROP_EMPTY;
776
			if (lxp->flags & REG_MTPARENFAIL)
777
				lxp->tok = ROP_NONE;
778
			if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0)
779
				goto badunary;
780
		}
781
		else if ((tp->left.ptr = alt(lxp)) == 0)
782
		{
783
			if (lxp->err == REG_BADPAT)
784
				goto parenerr;
785
			goto badunary;
786
		}
787
		else if (lxp->tok != ROP_RP)
788
		{
789
			lxp->err = REG_BADPAT;
790
		parenerr:;
791
			if (lxp->nleft != lxp->nright)
792
				lxp->err = REG_EPAREN;	/* better choice */
793
			goto badunary;
794
		}
795
		tp->left.ptr->parent = tp;
796
		break;
797
	}
798
	if (lex(lxp) != 0)
799
	{
800
	err:;
801
		libuxre_regdeltree(tp, 1);
802
		tp = 0;
803
	}
804
	return tp;
805
}
806
 
807
static Tree *
808
post(Lex *lxp)
809
{
810
	Tree *lp;
811
 
812
	if ((lp = leaf(lxp)) == 0)
813
		return 0;
814
	switch (lxp->tok)
815
	{
816
	case ROP_EMPTY:	/* this was {0,0} ROP_BRACE */
817
		libuxre_regdeltree(lp, 1);
818
		lp = 0;
819
		/*FALLTHROUGH*/
820
	case ROP_BRACE:
821
	case ROP_STAR:
822
	case ROP_PLUS:
823
	case ROP_QUEST:
824
		if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0)
825
		{
826
			lxp->err = REG_ESPACE;
827
			return 0;
828
		}
829
		if (lxp->tok == ROP_BRACE)
830
			lp->right.info = lxp->info;
831
		/*FALLTHROUGH*/
832
	case ROP_NOP:	/* this was {1,1} ROP_BRACE */
833
		if (lex(lxp) != 0)
834
		{
835
			libuxre_regdeltree(lp, 1);
836
			return 0;
837
		}
838
		break;
839
	}
840
	return lp;
841
}
842
 
843
static Tree *
844
cat(Lex *lxp)
845
{
846
	Tree *lp, *rp;
847
 
848
	if ((lp = post(lxp)) == 0)
849
		return 0;
850
	for (;;)
851
	{
852
		if (lxp->tok == ROP_OR || lxp->tok == ROP_RP
853
			|| lxp->tok == ROP_END)
854
		{
855
			return lp;
856
		}
857
		if ((rp = post(lxp)) == 0)
858
			break;
859
		if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
860
		{
861
			lxp->err = REG_ESPACE;
862
			return 0;
863
		}
864
	}
865
	libuxre_regdeltree(lp, 1);
866
	return 0;
867
}
868
 
869
static Tree *
870
alt(Lex *lxp)
871
{
872
	Tree *lp, *rp;
873
 
874
	if ((lp = cat(lxp)) == 0)
875
		return 0;
876
	for (;;)
877
	{
878
		if (lxp->tok != ROP_OR)
879
			return lp;
880
		if (lex(lxp) != 0)
881
			break;
882
		if (lxp->tok == ROP_END)
883
			return lp;	/* ignore trailing '|' */
884
		if ((rp = cat(lxp)) == 0)
885
			break;
886
		if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0)
887
		{
888
			lxp->err = REG_ESPACE;
889
			return 0;
890
		}
891
	}
892
	libuxre_regdeltree(lp, 1);
893
	return 0;
894
}
895
 
896
LIBUXRE_STATIC Tree *
897
libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags)
898
{
899
	Tree *lp, *rp;
900
 
901
	lp = 0;			/* in case of error */
902
	lxp->clist = 0;
903
	lxp->col = 0;
904
	lxp->err = 0;
905
	lxp->maxref = 0;
906
	lxp->nleft = 0;
907
	lxp->nright = 0;
908
	lxp->nclist = 0;
909
	lxp->mb_cur_max = MB_CUR_MAX;
910
	if (flags & REG_OR && *pat == '|')
911
		pat++;	/* skip initial OR like egrep did */
912
	lxp->pat = pat;
913
	lxp->flags = flags;
914
	lxp->tok = ROP_OR;	/* enables ^ as anchor */
915
	/*
916
	* Get initial token.
917
	*/
918
	if (lex(lxp) != 0)
919
	{
920
	err:;
921
		if (lp != 0)
922
		{
923
			libuxre_regdeltree(lp, 1);
924
			lp = 0;
925
		}
926
		if (lxp->err == 0)
927
			lxp->err = REG_ESPACE;
928
		goto ret;
929
	}
930
	if (lxp->tok == ROP_END)
931
	{
932
		lxp->err = REG_NOPAT;
933
		goto err;
934
	}
935
	if ((lp = alt(lxp)) == 0)	/* parse entire RE */
936
		goto err;
937
	if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0)
938
	{
939
		if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0)
940
			goto err;
941
		lp->right.info.sub = 0;
942
	}
943
	if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0)
944
		goto err;
945
	if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
946
		goto err;
947
	lp->parent = 0;
948
ret:;
949
	if (lxp->clist != 0)
950
		free(lxp->clist);
951
	return lp;
952
}
953
 
954
#ifdef REGDEBUG
955
 
956
LIBUXRE_STATIC void
957
libuxre_regtree(Tree *tp, int n)
958
{
959
	const char *opstr;
960
	char buf[32];
961
	int kind, next;
962
 
963
	if (n < 0)
964
		next = -n + 2;
965
	else
966
		next = n + 2;
967
	switch (tp->op)
968
	{
969
	case ROP_OR:
970
		opstr = "|";
971
		kind = BINARY_ROP;
972
		break;
973
	case ROP_CAT:
974
		opstr = "&";
975
		kind = BINARY_ROP;
976
		break;
977
	case ROP_STAR:
978
		opstr = "*";
979
		kind = UNARY_ROP;
980
		break;
981
	case ROP_PLUS:
982
		opstr = "+";
983
		kind = UNARY_ROP;
984
		break;
985
	case ROP_QUEST:
986
		opstr = "?";
987
		kind = UNARY_ROP;
988
		break;
989
	case ROP_BRACE:
990
		opstr = buf;
991
		if (tp->right.info.num[1] == BRACE_INF)
992
		{
993
			sprintf(buf, "{%u,inf}",
994
				(unsigned)tp->right.info.num[0]);
995
		}
996
		else
997
		{
998
			sprintf(buf, "{%u,%u}",
999
				(unsigned)tp->right.info.num[0],
1000
				(unsigned)tp->right.info.num[1]);
1001
		}
1002
		kind = UNARY_ROP;
1003
		break;
1004
	case ROP_LP:
1005
		opstr = buf;
1006
		sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub);
1007
		kind = UNARY_ROP;
1008
		break;
1009
	case ROP_RP:
1010
		opstr = buf;
1011
		sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub);
1012
		kind = UNARY_ROP;
1013
		break;
1014
	case ROP_NOP:
1015
		opstr = "<NOP>";
1016
		kind = LEAF_ROP;
1017
		break;
1018
	case ROP_BOL:
1019
		opstr = "<BOL>";
1020
		kind = LEAF_ROP;
1021
		break;
1022
	case ROP_EOL:
1023
		opstr = "<EOL>";
1024
		kind = LEAF_ROP;
1025
		break;
1026
	case ROP_ALL:
1027
		opstr = "<ALL>";
1028
		kind = LEAF_ROP;
1029
		break;
1030
	case ROP_ANYCH:
1031
		opstr = "<ANYCH>";
1032
		kind = LEAF_ROP;
1033
		break;
1034
	case ROP_NOTNL:
1035
		opstr = "<NOTNL>";
1036
		kind = LEAF_ROP;
1037
		break;
1038
	case ROP_EMPTY:
1039
		opstr = "<MT>";
1040
		kind = LEAF_ROP;
1041
		break;
1042
	case ROP_NONE:
1043
		opstr = "<NONE>";
1044
		kind = LEAF_ROP;
1045
		break;
1046
	case ROP_BKT:
1047
		opstr = buf;
1048
		sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt);
1049
		kind = LEAF_ROP;
1050
		break;
1051
	case ROP_BKTCOPY:
1052
		opstr = buf;
1053
		sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt);
1054
		kind = LEAF_ROP;
1055
		break;
1056
	case ROP_LT:
1057
		opstr = "\\<";
1058
		kind = LEAF_ROP;
1059
		break;
1060
	case ROP_GT:
1061
		opstr = "\\>";
1062
		kind = LEAF_ROP;
1063
		break;
1064
	case ROP_REF:
1065
		opstr = buf;
1066
		sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub);
1067
		kind = LEAF_ROP;
1068
		break;
1069
	case ROP_END:
1070
		opstr = "<END>";
1071
		kind = LEAF_ROP;
1072
		break;
1073
	default:
1074
		opstr = buf;
1075
		if (tp->op > UCHAR_MAX)
1076
			sprintf(buf, "W%#x", tp->op);
1077
		else if (tp->op <= 0)
1078
			sprintf(buf, "UNK=%u", tp->op);
1079
		else
1080
			sprintf(buf, "%c", tp->op);
1081
		kind = LEAF_ROP;
1082
		break;
1083
	}
1084
	if (kind == BINARY_ROP)
1085
		libuxre_regtree(tp->right.ptr, -next);
1086
	printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr);
1087
	if (kind != LEAF_ROP)
1088
		libuxre_regtree(tp->left.ptr, next);
1089
}
1090
 
1091
#endif /*REGDEBUG*/