Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/****************************************************************
2
Copyright (C) Lucent Technologies 1997
3
All Rights Reserved
4
 
5
Permission to use, copy, modify, and distribute this software and
6
its documentation for any purpose and without fee is hereby
7
granted, provided that the above copyright notice appear in all
8
copies and that both that the copyright notice and this
9
permission notice and warranty disclaimer appear in supporting
10
documentation, and that the name Lucent Technologies or any of
11
its entities not be used in advertising or publicity pertaining
12
to distribution of the software without specific, written prior
13
permission.
14
 
15
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22
THIS SOFTWARE.
23
****************************************************************/
24
 
25
 
26
#define DEBUG
27
#include <stdio.h>
28
#include <ctype.h>
29
#include <setjmp.h>
30
#include <math.h>
31
#include <string.h>
32
#include <stdlib.h>
33
#include <time.h>
34
#include "awk.h"
35
#include "y.tab.h"
36
#include "regexp.h"
37
 
38
	/* This file provides the interface between the main body of
39
	 * awk and the pattern matching package.  It preprocesses
40
	 * patterns prior to compilation to provide awk-like semantics
41
	 * to character sequences not supported by the pattern package.
42
	 * The following conversions are performed:
43
	 *
44
	 *	"()"		->	"[]"
45
	 *	"[-"		->	"[\-"
46
	 *	"[^-"		->	"[^\-"
47
	 *	"-]"		->	"\-]"
48
	 *	"[]"		->	"[]*"
49
	 *	"\xdddd"	->	"\z" where 'z' is the UTF sequence
50
	 *					for the hex value
51
	 *	"\ddd"		->	"\o" where 'o' is a char octal value
52
	 *	"\b"		->	"\B"	where 'B' is backspace
53
	 *	"\t"		->	"\T"	where 'T' is tab
54
	 *	"\f"		->	"\F"	where 'F' is form feed
55
	 *	"\n"		->	"\N"	where 'N' is newline
56
	 *	"\r"		->	"\r"	where 'C' is cr
57
	 */
58
 
59
#define	MAXRE	512
60
 
61
static char	re[MAXRE];	/* copy buffer */
62
 
63
char	*patbeg;
64
int	patlen;			/* number of chars in pattern */
65
 
66
#define	NPATS	20		/* number of slots in pattern cache */
67
 
68
static struct pat_list		/* dynamic pattern cache */
69
{
70
	char	*re;
71
	int	use;
72
	Reprog	*program;
73
} pattern[NPATS];
74
 
75
static int npats;		/* cache fill level */
76
 
77
	/* Compile a pattern */
78
void
79
*compre(char *pat)
80
{
81
	int i, j, inclass;
82
	char c, *p, *s;
83
	Reprog *program;
84
 
85
	if (!compile_time) {	/* search cache for dynamic pattern */
86
		for (i = 0; i < npats; i++)
87
			if (!strcmp(pat, pattern[i].re)) {
88
				pattern[i].use++;
89
				return((void *) pattern[i].program);
90
			}
91
	}
92
		/* Preprocess Pattern for compilation */
93
	p = re;
94
	s = pat;
95
	inclass = 0;
96
	while (c = *s++) {
97
		if (c == '\\') {
98
			quoted(&s, &p, re+MAXRE);
99
			continue;
100
		}
101
		else if (!inclass && c == '(' && *s == ')') {
102
			if (p < re+MAXRE-2) {	/* '()' -> '[]*' */
103
				*p++ = '[';
104
				*p++ = ']';
105
				c = '*';
106
				s++;
107
			}
108
			else overflow();
109
		}
110
		else if (c == '['){			/* '[-' -> '[\-' */
111
			inclass = 1;
112
			if (*s == '-') {
113
				if (p < re+MAXRE-2) {
114
					*p++ = '[';
115
					*p++ = '\\';
116
					c = *s++;
117
				}
118
				else overflow();
119
			}				/* '[^-' -> '[^\-'*/
120
			else if (*s == '^' && s[1] == '-'){
121
				if (p < re+MAXRE-3) {
122
					*p++ = '[';
123
					*p++ = *s++;
124
					*p++ = '\\';
125
					c = *s++;
126
				}
127
				else overflow();
128
			}
129
			else if (*s == '['){		/* skip '[[' */
130
				if (p < re+MAXRE-1)
131
					*p++ = c;
132
				else overflow();
133
				c = *s++;
134
			}
135
			else if (*s == '^' && s[1] == '[') {	/* skip '[^['*/
136
				if (p < re+MAXRE-2) {
137
					*p++ = c;
138
					*p++ = *s++;
139
					c = *s++;
140
				}
141
				else overflow();
142
			}
143
			else if (*s == ']') {		/* '[]' -> '[]*' */
144
				if (p < re+MAXRE-2) {
145
					*p++ = c;
146
					*p++ = *s++;
147
					c = '*';
148
					inclass = 0;
149
				}
150
				else overflow();
151
			}
152
		}
153
		else if (c == '-' && *s == ']') {	/* '-]' -> '\-]' */
154
			if (p < re+MAXRE-1)
155
				*p++ = '\\';
156
			else overflow();
157
		}
158
		else if (c == ']')
159
			inclass = 0;
160
		if (p < re+MAXRE-1)
161
			*p++ = c;
162
		else overflow();
163
	}
164
	*p = 0;
165
	program = regcomp(re);		/* compile pattern */
166
	if (!compile_time) {
167
		if (npats < NPATS)	/* Room in cache */
168
			i = npats++;
169
		else {			/* Throw out least used */
170
			int use = pattern[0].use;
171
			i = 0;
172
			for (j = 1; j < NPATS; j++) {
173
				if (pattern[j].use < use) {
174
					use = pattern[j].use;
175
					i = j;
176
				}
177
			}
178
			xfree(pattern[i].program);
179
			xfree(pattern[i].re);
180
		}
181
		pattern[i].re = tostring(pat);
182
		pattern[i].program = program;
183
		pattern[i].use = 1;
184
	}
185
	return((void *) program);
186
}
187
 
188
	/* T/F match indication - matched string not exported */
189
int
190
match(void *p, char *s, char *)
191
{
192
	return regexec((Reprog *) p, (char *) s, 0, 0);
193
}
194
 
195
	/* match and delimit the matched string */
196
int
197
pmatch(void *p, char *s, char *start)
198
{
199
	Resub m;
200
 
201
	m.s.sp = start;
202
	m.e.ep = 0;
203
	if (regexec((Reprog *) p, (char *) s, &m, 1)) {
204
		patbeg = m.s.sp;
205
		patlen = m.e.ep-m.s.sp;
206
		return 1;
207
	}
208
	patlen = -1;
209
	patbeg = start;
210
	return 0;
211
}
212
 
213
	/* perform a non-empty match */
214
int
215
nematch(void *p, char *s, char *start)
216
{
217
	if (pmatch(p, s, start) == 1 && patlen > 0)
218
		return 1;
219
	patlen = -1;
220
	patbeg = start; 
221
	return 0;
222
}
223
/* in the parsing of regular expressions, metacharacters like . have */
224
/* to be seen literally;  \056 is not a metacharacter. */
225
 
226
hexstr(char **pp)	/* find and eval hex string at pp, return new p */
227
{
228
	char c;
229
	int n = 0;
230
	int i;
231
 
232
	for (i = 0, c = (*pp)[i]; i < 4 && isxdigit(c); i++, c = (*pp)[i]) {
233
		if (isdigit(c))
234
			n = 16 * n + c - '0';
235
		else if ('a' <= c && c <= 'f')
236
			n = 16 * n + c - 'a' + 10;
237
		else if ('A' <= c && c <= 'F')
238
			n = 16 * n + c - 'A' + 10;
239
	}
240
	*pp += i;
241
	return n;
242
}
243
 
244
	/* look for awk-specific escape sequences */
245
 
246
#define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
247
 
248
void
249
quoted(char **s, char **to, char *end)	/* handle escaped sequence */
250
{
251
	char *p = *s;
252
	char *t = *to;
253
	wchar_t c;
254
 
255
	switch(c = *p++) {
256
	case 't':
257
		c = '\t';
258
		break;
259
	case 'n':
260
		c = '\n';
261
		break;
262
	case 'f':
263
		c = '\f';
264
		break;
265
	case 'r':
266
		c = '\r';
267
		break;
268
	case 'b':
269
		c = '\b';
270
		break;
271
	default:
272
		if (t < end-1)		/* all else must be escaped */
273
			*t++ = '\\';
274
		if (c == 'x') {		/* hexadecimal goo follows */
275
			c = hexstr(&p);
276
			if (t < end-MB_CUR_MAX)
277
				t += wctomb(t, c);
278
			else overflow();
279
			*to = t;
280
			*s = p;
281
			return;
282
		} else if (isoctdigit(c)) {	/* \d \dd \ddd */
283
			c -= '0';
284
			if (isoctdigit(*p)) {
285
				c = 8 * c + *p++ - '0';
286
				if (isoctdigit(*p))
287
					c = 8 * c + *p++ - '0';
288
			}
289
		}
290
		break;
291
	}
292
	if (t < end-1)
293
		*t++ = c;
294
	*s = p;
295
	*to = t;
296
}
297
	/* count rune positions */
298
int
299
countposn(char *s, int n)
300
{
301
	int i, j;
302
	char *end;
303
 
304
	for (i = 0, end = s+n; *s && s < end; i++){
305
		j = mblen(s, n);
306
		if(j <= 0)
307
			j = 1;
308
		s += j;
309
	}
310
	return(i);
311
}
312
 
313
	/* pattern package error handler */
314
 
315
void
316
regerror(char *s)
317
{
318
	FATAL("%s", s);
319
}
320
 
321
void
322
overflow(void)
323
{
324
	FATAL("%s", "regular expression too big");
325
}