Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
96 7u83 1
/****************************************************************
2
Copyright (C) Lucent Technologies 1997
3
All Rights Reserved
4
 
5
Permission to use, copy, modify, and distribute this software and
6
its documentation for any purpose and without fee is hereby
7
granted, provided that the above copyright notice appear in all
8
copies and that both that the copyright notice and this
9
permission notice and warranty disclaimer appear in supporting
10
documentation, and that the name Lucent Technologies or any of
11
its entities not be used in advertising or publicity pertaining
12
to distribution of the software without specific, written prior
13
permission.
14
 
15
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22
THIS SOFTWARE.
23
****************************************************************/
24
 
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
#include <ctype.h>
29
#include "awk.h"
30
#include "ytab.h"
31
 
32
extern YYSTYPE	yylval;
33
extern int	infunc;
34
 
35
int	lineno	= 1;
36
int	bracecnt = 0;
37
int	brackcnt  = 0;
38
int	parencnt = 0;
39
 
40
typedef struct Keyword {
41
	const char *word;
42
	int	sub;
43
	int	type;
44
} Keyword;
45
 
46
Keyword keywords[] ={	/* keep sorted: binary searched */
47
	{ "BEGIN",	XBEGIN,		XBEGIN },
48
	{ "END",	XEND,		XEND },
49
	{ "NF",		VARNF,		VARNF },
50
	{ "atan2",	FATAN,		BLTIN },
51
	{ "break",	BREAK,		BREAK },
52
	{ "close",	CLOSE,		CLOSE },
53
	{ "continue",	CONTINUE,	CONTINUE },
54
	{ "cos",	FCOS,		BLTIN },
55
	{ "delete",	DELETE,		DELETE },
56
	{ "do",		DO,		DO },
57
	{ "else",	ELSE,		ELSE },
58
	{ "exit",	EXIT,		EXIT },
59
	{ "exp",	FEXP,		BLTIN },
60
	{ "fflush",	FFLUSH,		BLTIN },
61
	{ "for",	FOR,		FOR },
62
	{ "func",	FUNC,		FUNC },
63
	{ "function",	FUNC,		FUNC },
64
	{ "getline",	GETLINE,	GETLINE },
65
	{ "gsub",	GSUB,		GSUB },
66
	{ "if",		IF,		IF },
67
	{ "in",		IN,		IN },
68
	{ "index",	INDEX,		INDEX },
69
	{ "int",	FINT,		BLTIN },
70
	{ "length",	FLENGTH,	BLTIN },
71
	{ "log",	FLOG,		BLTIN },
72
	{ "match",	MATCHFCN,	MATCHFCN },
73
	{ "next",	NEXT,		NEXT },
74
	{ "nextfile",	NEXTFILE,	NEXTFILE },
75
	{ "print",	PRINT,		PRINT },
76
	{ "printf",	PRINTF,		PRINTF },
77
	{ "rand",	FRAND,		BLTIN },
78
	{ "return",	RETURN,		RETURN },
79
	{ "sin",	FSIN,		BLTIN },
80
	{ "split",	SPLIT,		SPLIT },
81
	{ "sprintf",	SPRINTF,	SPRINTF },
82
	{ "sqrt",	FSQRT,		BLTIN },
83
	{ "srand",	FSRAND,		BLTIN },
84
	{ "sub",	SUB,		SUB },
85
	{ "substr",	SUBSTR,		SUBSTR },
86
	{ "system",	FSYSTEM,	BLTIN },
87
	{ "tolower",	FTOLOWER,	BLTIN },
88
	{ "toupper",	FTOUPPER,	BLTIN },
89
	{ "while",	WHILE,		WHILE },
90
};
91
 
92
#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93
 
94
int peek(void)
95
{
96
	int c = input();
97
	unput(c);
98
	return c;
99
}
100
 
101
int gettok(char **pbuf, int *psz)	/* get next input token */
102
{
103
	int c, retc;
104
	char *buf = *pbuf;
105
	int sz = *psz;
106
	char *bp = buf;
107
 
108
	c = input();
109
	if (c == 0)
110
		return 0;
111
	buf[0] = c;
112
	buf[1] = 0;
113
	if (!isalnum(c) && c != '.' && c != '_')
114
		return c;
115
 
116
	*bp++ = c;
117
	if (isalpha(c) || c == '_') {	/* it's a varname */
118
		for ( ; (c = input()) != 0; ) {
119
			if (bp-buf >= sz)
120
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121
					FATAL( "out of space for name %.10s...", buf );
122
			if (isalnum(c) || c == '_')
123
				*bp++ = c;
124
			else {
125
				*bp = 0;
126
				unput(c);
127
				break;
128
			}
129
		}
130
		*bp = 0;
131
		retc = 'a';	/* alphanumeric */
132
	} else {	/* maybe it's a number, but could be . */
133
		char *rem;
134
		/* read input until can't be a number */
135
		for ( ; (c = input()) != 0; ) {
136
			if (bp-buf >= sz)
137
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138
					FATAL( "out of space for number %.10s...", buf );
139
			if (isdigit(c) || c == 'e' || c == 'E' 
140
			  || c == '.' || c == '+' || c == '-')
141
				*bp++ = c;
142
			else {
143
				unput(c);
144
				break;
145
			}
146
		}
147
		*bp = 0;
148
		strtod(buf, &rem);	/* parse the number */
149
		if (rem == buf) {	/* it wasn't a valid number at all */
150
			buf[1] = 0;	/* return one character as token */
151
			retc = buf[0];	/* character is its own type */
152
			unputstr(rem+1); /* put rest back for later */
153
		} else {	/* some prefix was a number */
154
			unputstr(rem);	/* put rest back for later */
155
			rem[0] = 0;	/* truncate buf after number part */
156
			retc = '0';	/* type is number */
157
		}
158
	}
159
	*pbuf = buf;
160
	*psz = sz;
161
	return retc;
162
}
163
 
164
int	word(char *);
165
int	string(void);
166
int	regexpr(void);
167
int	sc	= 0;	/* 1 => return a } right now */
168
int	reg	= 0;	/* 1 => return a REGEXPR now */
169
 
170
int yylex(void)
171
{
172
	int c;
173
	static char *buf = 0;
174
	static int bufsize = 5; /* BUG: setting this small causes core dump! */
175
 
176
	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
177
		FATAL( "out of space in yylex" );
178
	if (sc) {
179
		sc = 0;
180
		RET('}');
181
	}
182
	if (reg) {
183
		reg = 0;
184
		return regexpr();
185
	}
186
	for (;;) {
187
		c = gettok(&buf, &bufsize);
188
		if (c == 0)
189
			return 0;
190
		if (isalpha(c) || c == '_')
191
			return word(buf);
192
		if (isdigit(c)) {
193
			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
194
			/* should this also have STR set? */
195
			RET(NUMBER);
196
		}
197
 
198
		yylval.i = c;
199
		switch (c) {
200
		case '\n':	/* {EOL} */
201
			lineno++;
202
			RET(NL);
203
		case '\r':	/* assume \n is coming */
204
		case ' ':	/* {WS}+ */
205
		case '\t':
206
			break;
207
		case '#':	/* #.* strip comments */
208
			while ((c = input()) != '\n' && c != 0)
209
				;
210
			unput(c);
211
			break;
212
		case ';':
213
			RET(';');
214
		case '\\':
215
			if (peek() == '\n') {
216
				input();
217
				lineno++;
218
			} else if (peek() == '\r') {
219
				input(); input();	/* \n */
220
				lineno++;
221
			} else {
222
				RET(c);
223
			}
224
			break;
225
		case '&':
226
			if (peek() == '&') {
227
				input(); RET(AND);
228
			} else 
229
				RET('&');
230
		case '|':
231
			if (peek() == '|') {
232
				input(); RET(BOR);
233
			} else
234
				RET('|');
235
		case '!':
236
			if (peek() == '=') {
237
				input(); yylval.i = NE; RET(NE);
238
			} else if (peek() == '~') {
239
				input(); yylval.i = NOTMATCH; RET(MATCHOP);
240
			} else
241
				RET(NOT);
242
		case '~':
243
			yylval.i = MATCH;
244
			RET(MATCHOP);
245
		case '<':
246
			if (peek() == '=') {
247
				input(); yylval.i = LE; RET(LE);
248
			} else {
249
				yylval.i = LT; RET(LT);
250
			}
251
		case '=':
252
			if (peek() == '=') {
253
				input(); yylval.i = EQ; RET(EQ);
254
			} else {
255
				yylval.i = ASSIGN; RET(ASGNOP);
256
			}
257
		case '>':
258
			if (peek() == '=') {
259
				input(); yylval.i = GE; RET(GE);
260
			} else if (peek() == '>') {
261
				input(); yylval.i = APPEND; RET(APPEND);
262
			} else {
263
				yylval.i = GT; RET(GT);
264
			}
265
		case '+':
266
			if (peek() == '+') {
267
				input(); yylval.i = INCR; RET(INCR);
268
			} else if (peek() == '=') {
269
				input(); yylval.i = ADDEQ; RET(ASGNOP);
270
			} else
271
				RET('+');
272
		case '-':
273
			if (peek() == '-') {
274
				input(); yylval.i = DECR; RET(DECR);
275
			} else if (peek() == '=') {
276
				input(); yylval.i = SUBEQ; RET(ASGNOP);
277
			} else
278
				RET('-');
279
		case '*':
280
			if (peek() == '=') {	/* *= */
281
				input(); yylval.i = MULTEQ; RET(ASGNOP);
282
			} else if (peek() == '*') {	/* ** or **= */
283
				input();	/* eat 2nd * */
284
				if (peek() == '=') {
285
					input(); yylval.i = POWEQ; RET(ASGNOP);
286
				} else {
287
					RET(POWER);
288
				}
289
			} else
290
				RET('*');
291
		case '/':
292
			RET('/');
293
		case '%':
294
			if (peek() == '=') {
295
				input(); yylval.i = MODEQ; RET(ASGNOP);
296
			} else
297
				RET('%');
298
		case '^':
299
			if (peek() == '=') {
300
				input(); yylval.i = POWEQ; RET(ASGNOP);
301
			} else
302
				RET(POWER);
303
 
304
		case '$':
305
			/* BUG: awkward, if not wrong */
306
			c = gettok(&buf, &bufsize);
307
			if (isalpha(c)) {
308
				if (strcmp(buf, "NF") == 0) {	/* very special */
309
					unputstr("(NF)");
310
					RET(INDIRECT);
311
				}
312
				c = peek();
313
				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
314
					unputstr(buf);
315
					RET(INDIRECT);
316
				}
317
				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
318
				RET(IVAR);
319
			} else if (c == 0) {	/*  */
320
				SYNTAX( "unexpected end of input after $" );
321
				RET(';');
322
			} else {
323
				unputstr(buf);
324
				RET(INDIRECT);
325
			}
326
 
327
		case '}':
328
			if (--bracecnt < 0)
329
				SYNTAX( "extra }" );
330
			sc = 1;
331
			RET(';');
332
		case ']':
333
			if (--brackcnt < 0)
334
				SYNTAX( "extra ]" );
335
			RET(']');
336
		case ')':
337
			if (--parencnt < 0)
338
				SYNTAX( "extra )" );
339
			RET(')');
340
		case '{':
341
			bracecnt++;
342
			RET('{');
343
		case '[':
344
			brackcnt++;
345
			RET('[');
346
		case '(':
347
			parencnt++;
348
			RET('(');
349
 
350
		case '"':
351
			return string();	/* BUG: should be like tran.c ? */
352
 
353
		default:
354
			RET(c);
355
		}
356
	}
357
}
358
 
359
int string(void)
360
{
361
	int c, n;
362
	char *s, *bp;
363
	static char *buf = 0;
364
	static int bufsz = 500;
365
 
366
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
367
		FATAL("out of space for strings");
368
	for (bp = buf; (c = input()) != '"'; ) {
369
		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
370
			FATAL("out of space for string %.10s...", buf);
371
		switch (c) {
372
		case '\n':
373
		case '\r':
374
		case 0:
375
			*bp = '\0';
376
			SYNTAX( "non-terminated string %.10s...", buf );
377
			if (c == 0)	/* hopeless */
378
				FATAL( "giving up" );
379
			lineno++;
380
			break;
381
		case '\\':
382
			c = input();
383
			switch (c) {
384
			case '"': *bp++ = '"'; break;
385
			case 'n': *bp++ = '\n'; break;	
386
			case 't': *bp++ = '\t'; break;
387
			case 'f': *bp++ = '\f'; break;
388
			case 'r': *bp++ = '\r'; break;
389
			case 'b': *bp++ = '\b'; break;
390
			case 'v': *bp++ = '\v'; break;
391
			case 'a': *bp++ = '\007'; break;
392
			case '\\': *bp++ = '\\'; break;
393
 
394
			case '0': case '1': case '2': /* octal: \d \dd \ddd */
395
			case '3': case '4': case '5': case '6': case '7':
396
				n = c - '0';
397
				if ((c = peek()) >= '0' && c < '8') {
398
					n = 8 * n + input() - '0';
399
					if ((c = peek()) >= '0' && c < '8')
400
						n = 8 * n + input() - '0';
401
				}
402
				*bp++ = n;
403
				break;
404
 
405
			case 'x':	/* hex  \x0-9a-fA-F + */
406
			    {	char xbuf[100], *px;
407
				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
408
					if (isdigit(c)
409
					 || (c >= 'a' && c <= 'f')
410
					 || (c >= 'A' && c <= 'F'))
411
						*px++ = c;
412
					else
413
						break;
414
				}
415
				*px = 0;
416
				unput(c);
417
	  			sscanf(xbuf, "%x", (unsigned int *) &n);
418
				*bp++ = n;
419
				break;
420
			    }
421
 
422
			default: 
423
				*bp++ = c;
424
				break;
425
			}
426
			break;
427
		default:
428
			*bp++ = c;
429
			break;
430
		}
431
	}
432
	*bp = 0; 
433
	s = tostring(buf);
434
	*bp++ = ' '; *bp++ = 0;
435
	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
436
	RET(STRING);
437
}
438
 
439
 
440
int binsearch(char *w, Keyword *kp, int n)
441
{
442
	int cond, low, mid, high;
443
 
444
	low = 0;
445
	high = n - 1;
446
	while (low <= high) {
447
		mid = (low + high) / 2;
448
		if ((cond = strcmp(w, kp[mid].word)) < 0)
449
			high = mid - 1;
450
		else if (cond > 0)
451
			low = mid + 1;
452
		else
453
			return mid;
454
	}
455
	return -1;
456
}
457
 
458
int word(char *w) 
459
{
460
	Keyword *kp;
461
	int c, n;
462
 
463
	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
464
/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
465
	kp = keywords + n;
466
	if (n != -1) {	/* found in table */
467
		yylval.i = kp->sub;
468
		switch (kp->type) {	/* special handling */
469
		case BLTIN:
470
			if (kp->sub == FSYSTEM && safe)
471
				SYNTAX( "system is unsafe" );
472
			RET(kp->type);
473
		case FUNC:
474
			if (infunc)
475
				SYNTAX( "illegal nested function" );
476
			RET(kp->type);
477
		case RETURN:
478
			if (!infunc)
479
				SYNTAX( "return not in function" );
480
			RET(kp->type);
481
		case VARNF:
482
			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
483
			RET(VARNF);
484
		default:
485
			RET(kp->type);
486
		}
487
	}
488
	c = peek();	/* look for '(' */
489
	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
490
		yylval.i = n;
491
		RET(ARG);
492
	} else {
493
		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
494
		if (c == '(') {
495
			RET(CALL);
496
		} else {
497
			RET(VAR);
498
		}
499
	}
500
}
501
 
502
void startreg(void)	/* next call to yylex will return a regular expression */
503
{
504
	reg = 1;
505
}
506
 
507
int regexpr(void)
508
{
509
	int c;
510
	static char *buf = 0;
511
	static int bufsz = 500;
512
	char *bp;
513
 
514
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
515
		FATAL("out of space for rex expr");
516
	bp = buf;
517
	for ( ; (c = input()) != '/' && c != 0; ) {
518
		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
519
			FATAL("out of space for reg expr %.10s...", buf);
520
		if (c == '\n') {
521
			*bp = '\0';
522
			SYNTAX( "newline in regular expression %.10s...", buf ); 
523
			unput('\n');
524
			break;
525
		} else if (c == '\\') {
526
			*bp++ = '\\'; 
527
			*bp++ = input();
528
		} else {
529
			*bp++ = c;
530
		}
531
	}
532
	*bp = 0;
533
	if (c == 0)
534
		SYNTAX("non-terminated regular expression %.10s...", buf);
535
	yylval.s = tostring(buf);
536
	unput('/');
537
	RET(REGEXPR);
538
}
539
 
540
/* low-level lexical stuff, sort of inherited from lex */
541
 
542
char	ebuf[300];
543
char	*ep = ebuf;
544
char	yysbuf[100];	/* pushback buffer */
545
char	*yysptr = yysbuf;
546
FILE	*yyin = 0;
547
 
548
int input(void)	/* get next lexical input character */
549
{
550
	int c;
551
	extern char *lexprog;
552
 
553
	if (yysptr > yysbuf)
554
		c = (uschar)*--yysptr;
555
	else if (lexprog != NULL) {	/* awk '...' */
556
		if ((c = (uschar)*lexprog) != 0)
557
			lexprog++;
558
	} else				/* awk -f ... */
559
		c = pgetc();
560
	if (c == EOF)
561
		c = 0;
562
	if (ep >= ebuf + sizeof ebuf)
563
		ep = ebuf;
564
	*ep = c;
565
	if (c != 0) {
566
		ep++;
567
	}
568
	return (c);
569
}
570
 
571
void unput(int c)	/* put lexical character back on input */
572
{
573
	if (yysptr >= yysbuf + sizeof(yysbuf))
574
		FATAL("pushed back too much: %.20s...", yysbuf);
575
	*yysptr++ = c;
576
	if (--ep < ebuf)
577
		ep = ebuf + sizeof(ebuf) - 1;
578
}
579
 
580
void unputstr(const char *s)	/* put a string back on input */
581
{
582
	int i;
583
 
584
	for (i = strlen(s)-1; i >= 0; i--)
585
		unput(s[i]);
586
}