Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
99 7u83 1
/*
2
 * Simple Regular Expression functions. Derived from Unix 7th Edition,
3
 * /usr/src/cmd/expr.y
4
 *
5
 * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002.
6
 *
7
 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 *   Redistributions of source code and documentation must retain the
13
 *    above copyright notice, this list of conditions and the following
14
 *    disclaimer.
15
 *   Redistributions in binary form must reproduce the above copyright
16
 *    notice, this list of conditions and the following disclaimer in the
17
 *    documentation and/or other materials provided with the distribution.
18
 *   All advertising materials mentioning features or use of this software
19
 *    must display the following acknowledgement:
20
 *      This product includes software developed or owned by Caldera
21
 *      International, Inc.
22
 *   Neither the name of Caldera International, Inc. nor the names of
23
 *    other contributors may be used to endorse or promote products
24
 *    derived from this software without specific prior written permission.
25
 *
26
 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
27
 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
28
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
31
 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
35
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
36
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
37
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
 */
39
 
40
#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4
41
#define	REGEXP_H_USED	__attribute__ ((used))
42
#elif defined __GNUC__
43
#define	REGEXP_H_USED	__attribute__ ((unused))
44
#else
45
#define	REGEXP_H_USED
46
#endif
47
static const char regexp_h_sccsid[] REGEXP_H_USED =
48
	"@(#)regexp.sl	1.54 (gritter) 2/19/05";
49
 
50
#if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__)
51
#define	REGEXP_H_WCHARS
52
#endif
53
 
54
#define	CBRA	2
55
#define	CCHR	4
56
#define	CDOT	8
57
#define	CCL	12
58
/*	CLNUM	14	used in sed */
59
/*	CEND	16	used in sed */
60
#define	CDOL	20
61
#define	CCEOF	22
62
#define	CKET	24
63
#define	CBACK	36
64
#define	CNCL	40
65
#define	CBRC	44
66
#define	CLET	48
67
#define	CCH1	52
68
#define	CCH2	56
69
#define	CCH3	60
70
 
71
#define	STAR	01
72
#define RNGE	03
73
#define	REGEXP_H_LEAST	0100
74
 
75
#ifdef	REGEXP_H_WCHARS
76
#define	CMB	0200
77
#else	/* !REGEXP_H_WCHARS */
78
#define	CMB	0
79
#endif	/* !REGEXP_H_WCHARS */
80
 
81
#define	NBRA	9
82
 
83
#define PLACE(c)	ep[c >> 3] |= bittab[c & 07]
84
#define ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
85
 
86
#ifdef	REGEXP_H_WCHARS
87
#define	REGEXP_H_IS_THERE(ep, c)	((ep)[c >> 3] & bittab[c & 07])
88
#endif
89
 
90
#include	<ctype.h>
91
#include	<string.h>
92
#include	<limits.h>
93
#ifdef	REGEXP_H_WCHARS
94
#include	<stdlib.h>
95
#include	<wchar.h>
96
#include	<wctype.h>
97
#endif	/* REGEXP_H_WCHARS */
98
 
99
#define	regexp_h_uletter(c)	(isalpha(c) || (c) == '_')
100
#ifdef	REGEXP_H_WCHARS
101
#define	regexp_h_wuletter(c)	(iswalpha(c) || (c) == L'_')
102
 
103
/*
104
 * Used to allocate memory for the multibyte star algorithm.
105
 */
106
#ifndef	regexp_h_malloc
107
#define	regexp_h_malloc(n)	malloc(n)
108
#endif
109
#ifndef	regexp_h_free
110
#define	regexp_h_free(p)	free(p)
111
#endif
112
 
113
/*
114
 * Can be predefined to 'inline' to inline some multibyte functions;
115
 * may improve performance for files that contain many multibyte
116
 * sequences.
117
 */
118
#ifndef	regexp_h_inline
119
#define	regexp_h_inline
120
#endif
121
 
122
/*
123
 * Mask to determine whether the first byte of a sequence possibly
124
 * starts a multibyte character. Set to 0377 to force mbtowc() for
125
 * any byte sequence (except 0).
126
 */
127
#ifndef	REGEXP_H_MASK
128
#define	REGEXP_H_MASK	0200
129
#endif
130
#endif	/* REGEXP_H_WCHARS */
131
 
132
/*
133
 * For regexpr.h.
134
 */
135
#ifndef	regexp_h_static
136
#define	regexp_h_static
137
#endif
138
#ifndef	REGEXP_H_STEP_INIT
139
#define	REGEXP_H_STEP_INIT
140
#endif
141
#ifndef	REGEXP_H_ADVANCE_INIT
142
#define	REGEXP_H_ADVANCE_INIT
143
#endif
144
 
145
char	*braslist[NBRA];
146
char	*braelist[NBRA];
147
int	nbra;
148
char	*loc1, *loc2, *locs;
149
int	sed;
150
int	nodelim;
151
 
152
regexp_h_static int	circf;
153
regexp_h_static int	low;
154
regexp_h_static int	size;
155
 
156
regexp_h_static unsigned char	bittab[] = {
157
	1,
158
	2,
159
	4,
160
	8,
161
	16,
162
	32,
163
	64,
164
	128
165
};
166
static int	regexp_h_advance(register const char *lp,
167
			register const char *ep);
168
static void	regexp_h_getrnge(register const char *str, int least);
169
 
170
static const char	*regexp_h_bol;	/* beginning of input line (for \<) */
171
 
172
#ifdef	REGEXP_H_WCHARS
173
static int	regexp_h_wchars;
174
static int	regexp_h_mbcurmax;
175
 
176
static const char	*regexp_h_firstwc;	/* location of first
177
						   multibyte character
178
						   on input line */
179
 
180
#define	regexp_h_getwc(c)	{ \
181
	if (regexp_h_wchars) { \
182
		char mbbuf[MB_LEN_MAX + 1], *mbptr; \
183
		wchar_t wcbuf; \
184
		int mb, len; \
185
		mbptr = mbbuf; \
186
		do { \
187
			mb = GETC(); \
188
			*mbptr++ = mb; \
189
			*mbptr = '\0'; \
190
		} while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \
191
			&& mb != eof && mbptr < mbbuf + MB_LEN_MAX); \
192
		if (len == -1) \
193
			ERROR(67); \
194
		c = wcbuf; \
195
	} else { \
196
		c = GETC(); \
197
	} \
198
}
199
 
200
#define	regexp_h_store(wc, mb, me)	{ \
201
	int len; \
202
	if (wc == WEOF) \
203
		ERROR(67); \
204
	if ((len = me - mb) <= regexp_h_mbcurmax) { \
205
		char mt[MB_LEN_MAX]; \
206
		if (wctomb(mt, wc) >= len) \
207
			ERROR(50); \
208
	} \
209
	switch (len = wctomb(mb, wc)) { \
210
	case -1: \
211
		 ERROR(67); \
212
	case 0: \
213
		mb++; \
214
		break; \
215
	default: \
216
		mb += len; \
217
	} \
218
}
219
 
220
static regexp_h_inline wint_t
221
regexp_h_fetchwc(const char **mb, int islp)
222
{
223
	wchar_t wc;
224
	int len;
225
 
226
	if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) {
227
		(*mb)++;
228
		return WEOF;
229
	}
230
	if (islp && regexp_h_firstwc == NULL)
231
		regexp_h_firstwc = *mb;
232
	/*if (len == 0) {
233
		(*mb)++;
234
		return L'\0';
235
	} handled in singlebyte code */
236
	*mb += len;
237
	return wc;
238
}
239
 
240
#define	regexp_h_fetch(mb, islp)	((*(mb) & REGEXP_H_MASK) == 0 ? \
241
						(*(mb)++&0377): \
242
						regexp_h_fetchwc(&(mb), islp))
243
 
244
static regexp_h_inline wint_t
245
regexp_h_showwc(const char *mb)
246
{
247
	wchar_t wc;
248
 
249
	if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0)
250
		return WEOF;
251
	return wc;
252
}
253
 
254
#define	regexp_h_show(mb)	((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \
255
					regexp_h_showwc(mb))
256
 
257
/*
258
 * Return the character immediately preceding mb. Since no byte is
259
 * required to be the first byte of a character, the longest multibyte
260
 * character ending at &[mb-1] is searched.
261
 */
262
static regexp_h_inline wint_t
263
regexp_h_previous(const char *mb)
264
{
265
	const char *p = mb;
266
	wchar_t wc, lastwc = WEOF;
267
	int len, max = 0;
268
 
269
	if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc)
270
		return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF);
271
	while (p-- > regexp_h_bol) {
272
		mbtowc(NULL, NULL, 0);
273
		if ((len = mbtowc(&wc, p, mb - p)) >= 0) {
274
			if (len < max || len < mb - p)
275
				break;
276
			max = len;
277
			lastwc = wc;
278
		} else if (len < 0 && max > 0)
279
			break;
280
	}
281
	return lastwc;
282
}
283
 
284
#define	regexp_h_cclass(set, c, af)	\
285
	((c) == 0 || (c) == WEOF ? 0 : ( \
286
		((c) > 0177) ? \
287
			regexp_h_cclass_wc(set, c, af) : ( \
288
				REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \
289
			) \
290
		) \
291
	)
292
 
293
static regexp_h_inline int
294
regexp_h_cclass_wc(const char *set, register wint_t c, int af)
295
{
296
	register wint_t wc, wl = WEOF;
297
	const char *end;
298
 
299
	end = &set[18] + set[0] - 1;
300
	set += 17;
301
	while (set < end) {
302
		wc = regexp_h_fetch(set, 0);
303
#ifdef	REGEXP_H_VI_BACKSLASH
304
		if (wc == '\\' && set < end &&
305
				(*set == ']' || *set == '-' ||
306
				 *set == '^' || *set == '\\')) {
307
			wc = regexp_h_fetch(set, 0);
308
		} else
309
#endif	/* REGEXP_H_VI_BACKSLASH */
310
		if (wc == '-' && wl != WEOF && set < end) {
311
			wc = regexp_h_fetch(set, 0);
312
#ifdef	REGEXP_H_VI_BACKSLASH
313
			if (wc == '\\' && set < end &&
314
					(*set == ']' || *set == '-' ||
315
					 *set == '^' || *set == '\\')) {
316
				wc = regexp_h_fetch(set, 0);
317
			}
318
#endif	/* REGEXP_H_VI_BACKSLASH */
319
			if (c > wl && c < wc)
320
				return af;
321
		}
322
		if (c == wc)
323
			return af;
324
		wl = wc;
325
	}
326
	return !af;
327
}
328
#else	/* !REGEXP_H_WCHARS */
329
#define	regexp_h_wchars		0
330
#define	regexp_h_getwc(c)	{ c = GETC(); }
331
#endif	/* !REGEXP_H_WCHARS */
332
 
333
regexp_h_static char *
334
compile(char *instring, char *ep, const char *endbuf, int seof)
335
{
336
	INIT	/* Dependent declarations and initializations */
337
	register int c;
338
	register int eof = seof;
339
	char *lastep = instring;
340
	int cclcnt;
341
	char bracket[NBRA], *bracketp;
342
	int closed;
343
	char neg;
344
	int lc;
345
	int i, cflg;
346
 
347
#ifdef	REGEXP_H_WCHARS
348
	char *eq;
349
	regexp_h_mbcurmax = MB_CUR_MAX;
350
	regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0;
351
#endif
352
	lastep = 0;
353
	bracketp = bracket;
354
	if((c = GETC()) == eof || c == '\n') {
355
		if (c == '\n') {
356
			UNGETC(c);
357
			nodelim = 1;
358
		}
359
		if(*ep == 0 && !sed)
360
			ERROR(41);
361
		if (bracketp > bracket)
362
			ERROR(42);
363
		RETURN(ep);
364
	}
365
	circf = closed = nbra = 0;
366
	if (c == '^')
367
		circf++;
368
	else
369
		UNGETC(c);
370
	for (;;) {
371
		if (ep >= endbuf)
372
			ERROR(50);
373
		regexp_h_getwc(c);
374
		if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
375
			lastep = ep;
376
		if (c == eof) {
377
			*ep++ = CCEOF;
378
			if (bracketp > bracket)
379
				ERROR(42);
380
			RETURN(ep);
381
		}
382
		switch (c) {
383
 
384
		case '.':
385
			*ep++ = CDOT|regexp_h_wchars;
386
			continue;
387
 
388
		case '\n':
389
			if (sed == 0) {
390
				UNGETC(c);
391
				*ep++ = CCEOF;
392
				nodelim = 1;
393
				RETURN(ep);
394
			}
395
			ERROR(36);
396
		case '*':
397
			if (lastep==0 || *lastep==CBRA || *lastep==CKET ||
398
					*lastep==(CBRC|regexp_h_wchars) ||
399
					*lastep==(CLET|regexp_h_wchars))
400
				goto defchar;
401
			*lastep |= STAR;
402
			continue;
403
 
404
		case '$':
405
			if(PEEKC() != eof)
406
				goto defchar;
407
			*ep++ = CDOL;
408
			continue;
409
 
410
		case '[':
411
#ifdef	REGEXP_H_WCHARS
412
			if (regexp_h_wchars == 0) {
413
#endif
414
				if(&ep[33] >= endbuf)
415
					ERROR(50);
416
 
417
				*ep++ = CCL;
418
				lc = 0;
419
				for(i = 0; i < 32; i++)
420
					ep[i] = 0;
421
 
422
				neg = 0;
423
				if((c = GETC()) == '^') {
424
					neg = 1;
425
					c = GETC();
426
				}
427
 
428
				do {
429
					c &= 0377;
430
					if(c == '\0' || c == '\n')
431
						ERROR(49);
432
#ifdef	REGEXP_H_VI_BACKSLASH
433
					if(c == '\\' && ((c = PEEKC()) == ']' ||
434
							c == '-' || c == '^' ||
435
							c == '\\')) {
436
						c = GETC();
437
						c &= 0377;
438
					} else
439
#endif	/* REGEXP_H_VI_BACKSLASH */
440
					if(c == '-' && lc != 0) {
441
						if ((c = GETC()) == ']') {
442
							PLACE('-');
443
							break;
444
						}
445
#ifdef	REGEXP_H_VI_BACKSLASH
446
						if(c == '\\' &&
447
							((c = PEEKC()) == ']' ||
448
								c == '-' ||
449
								c == '^' ||
450
								c == '\\'))
451
							c = GETC();
452
#endif	/* REGEXP_H_VI_BACKSLASH */
453
						c &= 0377;
454
						while(lc < c) {
455
							PLACE(lc);
456
							lc++;
457
						}
458
					}
459
					lc = c;
460
					PLACE(c);
461
				} while((c = GETC()) != ']');
462
				if(neg) {
463
					for(cclcnt = 0; cclcnt < 32; cclcnt++)
464
						ep[cclcnt] ^= 0377;
465
					ep[0] &= 0376;
466
				}
467
 
468
				ep += 32;
469
#ifdef	REGEXP_H_WCHARS
470
			} else {
471
				if (&ep[18] >= endbuf)
472
					ERROR(50);
473
				*ep++ = CCL|CMB;
474
				*ep++ = 0;
475
				lc = 0;
476
				for (i = 0; i < 16; i++)
477
					ep[i] = 0;
478
				eq = &ep[16];
479
				regexp_h_getwc(c);
480
				if (c == L'^') {
481
					regexp_h_getwc(c);
482
					ep[-2] = CNCL|CMB;
483
				}
484
				do {
485
					if (c == '\0' || c == '\n')
486
						ERROR(49);
487
#ifdef	REGEXP_H_VI_BACKSLASH
488
					if(c == '\\' && ((c = PEEKC()) == ']' ||
489
							c == '-' || c == '^' ||
490
							c == '\\')) {
491
						regexp_h_store(c, eq, endbuf);
492
						regexp_h_getwc(c);
493
					} else
494
#endif	/* REGEXP_H_VI_BACKSLASH */
495
					if (c == '-' && lc != 0 && lc <= 0177) {
496
						regexp_h_store(c, eq, endbuf);
497
						regexp_h_getwc(c);
498
						if (c == ']') {
499
							PLACE('-');
500
							break;
501
						}
502
#ifdef	REGEXP_H_VI_BACKSLASH
503
						if(c == '\\' &&
504
							((c = PEEKC()) == ']' ||
505
								c == '-' ||
506
								c == '^' ||
507
								c == '\\')) {
508
							regexp_h_store(c, eq,
509
								endbuf);
510
							regexp_h_getwc(c);
511
						}
512
#endif	/* REGEXP_H_VI_BACKSLASH */
513
						while (lc < (c & 0177)) {
514
							PLACE(lc);
515
							lc++;
516
						}
517
					}
518
					lc = c;
519
					if (c <= 0177)
520
						PLACE(c);
521
					regexp_h_store(c, eq, endbuf);
522
					regexp_h_getwc(c);
523
				} while (c != L']');
524
				if ((i = eq - &ep[16]) > 255)
525
					ERROR(50);
526
				lastep[1] = i;
527
				ep = eq;
528
			}
529
#endif	/* REGEXP_H_WCHARS */
530
 
531
			continue;
532
 
533
		case '\\':
534
			regexp_h_getwc(c);
535
			switch(c) {
536
 
537
			case '(':
538
				if(nbra >= NBRA)
539
					ERROR(43);
540
				*bracketp++ = nbra;
541
				*ep++ = CBRA;
542
				*ep++ = nbra++;
543
				continue;
544
 
545
			case ')':
546
				if(bracketp <= bracket)
547
					ERROR(42);
548
				*ep++ = CKET;
549
				*ep++ = *--bracketp;
550
				closed++;
551
				continue;
552
 
553
			case '<':
554
				*ep++ = CBRC|regexp_h_wchars;
555
				continue;
556
 
557
			case '>':
558
				*ep++ = CLET|regexp_h_wchars;
559
				continue;
560
 
561
			case '{':
562
				if(lastep == (char *) (0))
563
					goto defchar;
564
				*lastep |= RNGE;
565
				cflg = 0;
566
			nlim:
567
				c = GETC();
568
				i = 0;
569
				do {
570
					if ('0' <= c && c <= '9')
571
						i = 10 * i + c - '0';
572
					else
573
						ERROR(16);
574
				} while(((c = GETC()) != '\\') && (c != ','));
575
				if (i > 255)
576
					ERROR(11);
577
				*ep++ = i;
578
				if (c == ',') {
579
					if(cflg++)
580
						ERROR(44);
581
					if((c = GETC()) == '\\') {
582
						*ep++ = (char)255;
583
						*lastep |= REGEXP_H_LEAST;
584
					} else {
585
						UNGETC(c);
586
						goto nlim; /* get 2'nd number */
587
					}
588
				}
589
				if(GETC() != '}')
590
					ERROR(45);
591
				if(!cflg)	/* one number */
592
					*ep++ = i;
593
				else if((ep[-1] & 0377) < (ep[-2] & 0377))
594
					ERROR(46);
595
				continue;
596
 
597
			case '\n':
598
				ERROR(36);
599
 
600
			case 'n':
601
				c = '\n';
602
				goto defchar;
603
 
604
			default:
605
				if(c >= '1' && c <= '9') {
606
					if((c -= '1') >= closed)
607
						ERROR(25);
608
					*ep++ = CBACK;
609
					*ep++ = c;
610
					continue;
611
				}
612
			}
613
			/* Drop through to default to use \ to turn off special chars */
614
 
615
		defchar:
616
		default:
617
			lastep = ep;
618
#ifdef	REGEXP_H_WCHARS
619
			if (regexp_h_wchars == 0) {
620
#endif
621
				*ep++ = CCHR;
622
				*ep++ = c;
623
#ifdef	REGEXP_H_WCHARS
624
			} else {
625
				char	mbbuf[MB_LEN_MAX];
626
 
627
				switch (wctomb(mbbuf, c)) {
628
				case 1: *ep++ = CCH1;
629
					break;
630
				case 2:	*ep++ = CCH2;
631
					break;
632
				case 3:	*ep++ = CCH3;
633
					break;
634
				default:
635
					*ep++ = CCHR|CMB;
636
				}
637
				regexp_h_store(c, ep, endbuf);
638
			}
639
#endif	/* REGEXP_H_WCHARS */
640
		}
641
	}
642
}
643
 
644
int
645
step(const char *p1, const char *p2)
646
{
647
	register int c;
648
#ifdef	REGEXP_H_WCHARS
649
	register int d;
650
#endif	/* REGEXP_H_WCHARS */
651
 
652
	REGEXP_H_STEP_INIT	/* get circf */
653
	regexp_h_bol = p1;
654
#ifdef	REGEXP_H_WCHARS
655
	regexp_h_firstwc = NULL;
656
#endif	/* REGEXP_H_WCHARS */
657
	if (circf) {
658
		loc1 = (char *)p1;
659
		return(regexp_h_advance(p1, p2));
660
	}
661
	/* fast check for first character */
662
	if (*p2==CCHR) {
663
		c = p2[1] & 0377;
664
		do {
665
			if ((*p1 & 0377) != c)
666
				continue;
667
			if (regexp_h_advance(p1, p2)) {
668
				loc1 = (char *)p1;
669
				return(1);
670
			}
671
		} while (*p1++);
672
		return(0);
673
	}
674
#ifdef	REGEXP_H_WCHARS
675
	else if (*p2==CCH1) {
676
		do {
677
			if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) {
678
				loc1 = (char *)p1;
679
				return(1);
680
			}
681
			c = regexp_h_fetch(p1, 1);
682
		} while (c);
683
		return(0);
684
	} else if (*p2==CCH2) {
685
		do {
686
			if (p1[0] == p2[1] && p1[1] == p2[2] &&
687
					regexp_h_advance(p1, p2)) {
688
				loc1 = (char *)p1;
689
				return(1);
690
			}
691
			c = regexp_h_fetch(p1, 1);
692
		} while (c);
693
		return(0);
694
	} else if (*p2==CCH3) {
695
		do {
696
			if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&&
697
					regexp_h_advance(p1, p2)) {
698
				loc1 = (char *)p1;
699
				return(1);
700
			}
701
			c = regexp_h_fetch(p1, 1);
702
		} while (c);
703
		return(0);
704
	} else if ((*p2&0377)==(CCHR|CMB)) {
705
		d = regexp_h_fetch(p2, 0);
706
		do {
707
			c = regexp_h_fetch(p1, 1);
708
			if (c == d && regexp_h_advance(p1, p2)) {
709
				loc1 = (char *)p1;
710
				return(1);
711
			}
712
		} while(c);
713
		return(0);
714
	}
715
		/* regular algorithm */
716
	if (regexp_h_wchars)
717
		do {
718
			if (regexp_h_advance(p1, p2)) {
719
				loc1 = (char *)p1;
720
				return(1);
721
			}
722
			c = regexp_h_fetch(p1, 1);
723
		} while (c);
724
	else
725
#endif	/* REGEXP_H_WCHARS */
726
		do {
727
			if (regexp_h_advance(p1, p2)) {
728
				loc1 = (char *)p1;
729
				return(1);
730
			}
731
		} while (*p1++);
732
	return(0);
733
}
734
 
735
#ifdef	REGEXP_H_WCHARS
736
/*
737
 * It is painfully slow to read character-wise backwards in a
738
 * multibyte string (see regexp_h_previous() above). For the star
739
 * algorithm, we therefore keep track of every character as it is
740
 * read in forward direction.
741
 *
742
 * Don't use alloca() for stack blocks since there is no measurable
743
 * speedup and huge amounts of memory are used up for long input
744
 * lines.
745
 */
746
#ifndef	REGEXP_H_STAKBLOK
747
#define	REGEXP_H_STAKBLOK	1000
748
#endif
749
 
750
struct	regexp_h_stack {
751
	struct regexp_h_stack	*s_nxt;
752
	struct regexp_h_stack	*s_prv;
753
	const char	*s_ptr[REGEXP_H_STAKBLOK];
754
};
755
 
756
#define	regexp_h_push(sb, sp, sc, lp)	(regexp_h_wchars ? \
757
			regexp_h_pushwc(sb, sp, sc, lp) : (void)0)
758
 
759
static regexp_h_inline void
760
regexp_h_pushwc(struct regexp_h_stack **sb,
761
		struct regexp_h_stack **sp,
762
		const char ***sc, const char *lp)
763
{
764
	if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc)
765
		return;
766
	if (*sb == NULL) {
767
		if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL)
768
			return;
769
		(*sb)->s_nxt = (*sb)->s_prv = NULL;
770
		*sp = *sb;
771
		*sc = &(*sb)->s_ptr[0];
772
	} else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) {
773
		if ((*sp)->s_nxt == NULL) {
774
			struct regexp_h_stack	*bq;
775
 
776
			if ((bq = regexp_h_malloc(sizeof *bq)) == NULL)
777
				return;
778
			bq->s_nxt = NULL;
779
			bq->s_prv = *sp;
780
			(*sp)->s_nxt = bq;
781
			*sp = bq;
782
		} else
783
			*sp = (*sp)->s_nxt;
784
		*sc = &(*sp)->s_ptr[0];
785
	}
786
	*(*sc)++ = lp;
787
}
788
 
789
static regexp_h_inline const char *
790
regexp_h_pop(struct regexp_h_stack **sp, const char ***sc,
791
		const char *lp)
792
{
793
	if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc)
794
		return &lp[-1];
795
	if (*sp == NULL)
796
		return regexp_h_firstwc;
797
	if (*sc == &(*sp)->s_ptr[0]) {
798
		if ((*sp)->s_prv == NULL) {
799
			regexp_h_free(*sp);
800
			*sp = NULL;
801
			return regexp_h_firstwc;
802
		}
803
		*sp = (*sp)->s_prv;
804
		regexp_h_free((*sp)->s_nxt);
805
		(*sp)->s_nxt = NULL ;
806
		*sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK];
807
	}
808
	return *(--(*sc));
809
}
810
 
811
static void
812
regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp)
813
{
814
	for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt)
815
		if ((*sp)->s_prv)
816
			regexp_h_free((*sp)->s_prv);
817
	if (*sp) {
818
		if ((*sp)->s_prv)
819
			regexp_h_free((*sp)->s_prv);
820
		regexp_h_free(*sp);
821
	}
822
	*sp = *sb = NULL;
823
}
824
#else	/* !REGEXP_H_WCHARS */
825
#define	regexp_h_push(sb, sp, sc, lp)
826
#endif	/* !REGEXP_H_WCHARS */
827
 
828
static int
829
regexp_h_advance(const char *lp, const char *ep)
830
{
831
	register const char *curlp;
832
	int c, least;
833
#ifdef	REGEXP_H_WCHARS
834
	int d;
835
	struct regexp_h_stack	*sb = NULL, *sp = NULL;
836
	const char	**sc;
837
#endif	/* REGEXP_H_WCHARS */
838
	char *bbeg;
839
	int ct;
840
 
841
	for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) {
842
 
843
	case CCHR:
844
#ifdef	REGEXP_H_WCHARS
845
	case CCH1:
846
#endif
847
		if (*ep++ == *lp++)
848
			continue;
849
		return(0);
850
 
851
#ifdef	REGEXP_H_WCHARS
852
	case CCHR|CMB:
853
		if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1))
854
			continue;
855
		return(0);
856
 
857
	case CCH2:
858
		if (ep[0] == lp[0] && ep[1] == lp[1]) {
859
			ep += 2, lp += 2;
860
			continue;
861
		}
862
		return(0);
863
 
864
	case CCH3:
865
		if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) {
866
			ep += 3, lp += 3;
867
			continue;
868
		}
869
		return(0);
870
#endif	/* REGEXP_H_WCHARS */
871
 
872
	case CDOT:
873
		if (*lp++)
874
			continue;
875
		return(0);
876
#ifdef	REGEXP_H_WCHARS
877
	case CDOT|CMB:
878
		if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF)
879
			continue;
880
		return(0);
881
#endif	/* REGEXP_H_WCHARS */
882
 
883
	case CDOL:
884
		if (*lp==0)
885
			continue;
886
		return(0);
887
 
888
	case CCEOF:
889
		loc2 = (char *)lp;
890
		return(1);
891
 
892
	case CCL:
893
		c = *lp++ & 0377;
894
		if(ISTHERE(c)) {
895
			ep += 32;
896
			continue;
897
		}
898
		return(0);
899
 
900
#ifdef	REGEXP_H_WCHARS
901
	case CCL|CMB:
902
	case CNCL|CMB:
903
		c = regexp_h_fetch(lp, 1);
904
		if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) {
905
			ep += (*ep & 0377) + 17;
906
			continue;
907
		}
908
		return 0;
909
#endif	/* REGEXP_H_WCHARS */
910
 
911
	case CBRA:
912
		braslist[*ep++ & 0377] = (char *)lp;
913
		continue;
914
 
915
	case CKET:
916
		braelist[*ep++ & 0377] = (char *)lp;
917
		continue;
918
 
919
	case CBRC:
920
		if (lp == regexp_h_bol && locs == NULL)
921
			continue;
922
		if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377))
923
				&& !regexp_h_uletter(lp[-1] & 0377)
924
				&& !isdigit(lp[-1] & 0377))
925
			continue;
926
		return(0);
927
 
928
#ifdef	REGEXP_H_WCHARS
929
	case CBRC|CMB:
930
		c = regexp_h_show(lp);
931
		d = regexp_h_previous(lp);
932
		if ((iswdigit(c) || regexp_h_wuletter(c))
933
				&& !regexp_h_wuletter(d)
934
				&& !iswdigit(d))
935
			continue;
936
		return(0);
937
#endif	/* REGEXP_H_WCHARS */
938
 
939
	case CLET:
940
		if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377))
941
			continue;
942
		return(0);
943
 
944
#ifdef	REGEXP_H_WCHARS
945
	case CLET|CMB:
946
		c = regexp_h_show(lp);
947
		if (!regexp_h_wuletter(c) && !iswdigit(c))
948
			continue;
949
		return(0);
950
#endif	/* REGEXP_H_WCHARS */
951
 
952
	case CCHR|RNGE:
953
		c = *ep++;
954
		regexp_h_getrnge(ep, least);
955
		while(low--)
956
			if(*lp++ != c)
957
				return(0);
958
		curlp = lp;
959
		while(size--) {
960
			regexp_h_push(&sb, &sp, &sc, lp);
961
			if(*lp++ != c)
962
				break;
963
		}
964
		if(size < 0) {
965
			regexp_h_push(&sb, &sp, &sc, lp);
966
			lp++;
967
		}
968
		ep += 2;
969
		goto star;
970
 
971
#ifdef	REGEXP_H_WCHARS
972
	case CCHR|RNGE|CMB:
973
	case CCH1|RNGE:
974
	case CCH2|RNGE:
975
	case CCH3|RNGE:
976
		c = regexp_h_fetch(ep, 0);
977
		regexp_h_getrnge(ep, least);
978
		while (low--)
979
			if (regexp_h_fetch(lp, 1) != c)
980
				return 0;
981
		curlp = lp;
982
		while (size--) {
983
			regexp_h_push(&sb, &sp, &sc, lp);
984
			if (regexp_h_fetch(lp, 1) != c)
985
				break;
986
		}
987
		if(size < 0) {
988
			regexp_h_push(&sb, &sp, &sc, lp);
989
			regexp_h_fetch(lp, 1);
990
		}
991
		ep += 2;
992
		goto star;
993
#endif	/* REGEXP_H_WCHARS */
994
 
995
	case CDOT|RNGE:
996
		regexp_h_getrnge(ep, least);
997
		while(low--)
998
			if(*lp++ == '\0')
999
				return(0);
1000
		curlp = lp;
1001
		while(size--) {
1002
			regexp_h_push(&sb, &sp, &sc, lp);
1003
			if(*lp++ == '\0')
1004
				break;
1005
		}
1006
		if(size < 0) {
1007
			regexp_h_push(&sb, &sp, &sc, lp);
1008
			lp++;
1009
		}
1010
		ep += 2;
1011
		goto star;
1012
 
1013
#ifdef	REGEXP_H_WCHARS
1014
	case CDOT|RNGE|CMB:
1015
		regexp_h_getrnge(ep, least);
1016
		while (low--)
1017
			if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
1018
				return 0;
1019
		curlp = lp;
1020
		while (size--) {
1021
			regexp_h_push(&sb, &sp, &sc, lp);
1022
			if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
1023
				break;
1024
		}
1025
		if (size < 0) {
1026
			regexp_h_push(&sb, &sp, &sc, lp);
1027
			regexp_h_fetch(lp, 1);
1028
		}
1029
		ep += 2;
1030
		goto star;
1031
#endif	/* REGEXP_H_WCHARS */
1032
 
1033
	case CCL|RNGE:
1034
		regexp_h_getrnge(ep + 32, least);
1035
		while(low--) {
1036
			c = *lp++ & 0377;
1037
			if(!ISTHERE(c))
1038
				return(0);
1039
		}
1040
		curlp = lp;
1041
		while(size--) {
1042
			regexp_h_push(&sb, &sp, &sc, lp);
1043
			c = *lp++ & 0377;
1044
			if(!ISTHERE(c))
1045
				break;
1046
		}
1047
		if(size < 0) {
1048
			regexp_h_push(&sb, &sp, &sc, lp);
1049
			lp++;
1050
		}
1051
		ep += 34;		/* 32 + 2 */
1052
		goto star;
1053
 
1054
#ifdef	REGEXP_H_WCHARS
1055
	case CCL|RNGE|CMB:
1056
	case CNCL|RNGE|CMB:
1057
		regexp_h_getrnge(ep + (*ep & 0377) + 17, least);
1058
		while (low--) {
1059
			c = regexp_h_fetch(lp, 1);
1060
			if (!regexp_h_cclass(ep, c,
1061
					(ep[-1] & 0377 & ~REGEXP_H_LEAST)
1062
					== (CCL|RNGE|CMB)))
1063
				return 0;
1064
		}
1065
		curlp = lp;
1066
		while (size--) {
1067
			regexp_h_push(&sb, &sp, &sc, lp);
1068
			c = regexp_h_fetch(lp, 1);
1069
			if (!regexp_h_cclass(ep, c,
1070
					(ep[-1] & 0377 & ~REGEXP_H_LEAST)
1071
					== (CCL|RNGE|CMB)))
1072
				break;
1073
		}
1074
		if (size < 0) {
1075
			regexp_h_push(&sb, &sp, &sc, lp);
1076
			regexp_h_fetch(lp, 1);
1077
		}
1078
		ep += (*ep & 0377) + 19;
1079
		goto star;
1080
#endif	/* REGEXP_H_WCHARS */
1081
 
1082
	case CBACK:
1083
		bbeg = braslist[*ep & 0377];
1084
		ct = braelist[*ep++ & 0377] - bbeg;
1085
 
1086
		if(strncmp(bbeg, lp, ct) == 0) {
1087
			lp += ct;
1088
			continue;
1089
		}
1090
		return(0);
1091
 
1092
	case CBACK|STAR:
1093
		bbeg = braslist[*ep & 0377];
1094
		ct = braelist[*ep++ & 0377] - bbeg;
1095
		curlp = lp;
1096
		while(strncmp(bbeg, lp, ct) == 0)
1097
			lp += ct;
1098
 
1099
		while(lp >= curlp) {
1100
			if(regexp_h_advance(lp, ep))	return(1);
1101
			lp -= ct;
1102
		}
1103
		return(0);
1104
 
1105
 
1106
	case CDOT|STAR:
1107
		curlp = lp;
1108
		do
1109
			regexp_h_push(&sb, &sp, &sc, lp);
1110
		while (*lp++);
1111
		goto star;
1112
 
1113
#ifdef	REGEXP_H_WCHARS
1114
	case CDOT|STAR|CMB:
1115
		curlp = lp;
1116
		do
1117
			regexp_h_push(&sb, &sp, &sc, lp);
1118
		while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF);
1119
		goto star;
1120
#endif	/* REGEXP_H_WCHARS */
1121
 
1122
	case CCHR|STAR:
1123
		curlp = lp;
1124
		do
1125
			regexp_h_push(&sb, &sp, &sc, lp);
1126
		while (*lp++ == *ep);
1127
		ep++;
1128
		goto star;
1129
 
1130
#ifdef	REGEXP_H_WCHARS
1131
	case CCHR|STAR|CMB:
1132
	case CCH1|STAR:
1133
	case CCH2|STAR:
1134
	case CCH3|STAR:
1135
		curlp = lp;
1136
		d = regexp_h_fetch(ep, 0);
1137
		do
1138
			regexp_h_push(&sb, &sp, &sc, lp);
1139
		while (regexp_h_fetch(lp, 1) == d);
1140
		goto star;
1141
#endif	/* REGEXP_H_WCHARS */
1142
 
1143
	case CCL|STAR:
1144
		curlp = lp;
1145
		do {
1146
			regexp_h_push(&sb, &sp, &sc, lp);
1147
			c = *lp++ & 0377;
1148
		} while(ISTHERE(c));
1149
		ep += 32;
1150
		goto star;
1151
 
1152
#ifdef	REGEXP_H_WCHARS
1153
	case CCL|STAR|CMB:
1154
	case CNCL|STAR|CMB:
1155
		curlp = lp;
1156
		do {
1157
			regexp_h_push(&sb, &sp, &sc, lp);
1158
			c = regexp_h_fetch(lp, 1);
1159
		} while (regexp_h_cclass(ep, c, (ep[-1] & 0377)
1160
					== (CCL|STAR|CMB)));
1161
		ep += (*ep & 0377) + 17;
1162
		goto star;
1163
#endif	/* REGEXP_H_WCHARS */
1164
 
1165
	star:
1166
#ifdef	REGEXP_H_WCHARS
1167
		if (regexp_h_wchars == 0) {
1168
#endif
1169
			do {
1170
				if(--lp == locs)
1171
					break;
1172
				if (regexp_h_advance(lp, ep))
1173
					return(1);
1174
			} while (lp > curlp);
1175
#ifdef	REGEXP_H_WCHARS
1176
		} else {
1177
			do {
1178
				lp = regexp_h_pop(&sp, &sc, lp);
1179
				if (lp <= locs)
1180
					break;
1181
				if (regexp_h_advance(lp, ep)) {
1182
					regexp_h_zerostak(&sb, &sp);
1183
					return(1);
1184
				}
1185
			} while (lp > curlp);
1186
			regexp_h_zerostak(&sb, &sp);
1187
		}
1188
#endif	/* REGEXP_H_WCHARS */
1189
		return(0);
1190
 
1191
	}
1192
}
1193
 
1194
static void
1195
regexp_h_getrnge(register const char *str, int least)
1196
{
1197
	low = *str++ & 0377;
1198
	size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low;
1199
}
1200
 
1201
int
1202
advance(const char *lp, const char *ep)
1203
{
1204
	REGEXP_H_ADVANCE_INIT	/* skip past circf */
1205
	regexp_h_bol = lp;
1206
#ifdef	REGEXP_H_WCHARS
1207
	regexp_h_firstwc = NULL;
1208
#endif	/* REGEXP_H_WCHARS */
1209
	return regexp_h_advance(lp, ep);
1210
}