Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/****************************************************************
2
Copyright (C) Lucent Technologies 1997
3
All Rights Reserved
4
 
5
Permission to use, copy, modify, and distribute this software and
6
its documentation for any purpose and without fee is hereby
7
granted, provided that the above copyright notice appear in all
8
copies and that both that the copyright notice and this
9
permission notice and warranty disclaimer appear in supporting
10
documentation, and that the name Lucent Technologies or any of
11
its entities not be used in advertising or publicity pertaining
12
to distribution of the software without specific, written prior
13
permission.
14
 
15
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22
THIS SOFTWARE.
23
****************************************************************/
24
 
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
#include <ctype.h>
29
#include "awk.h"
30
#include "y.tab.h"
31
 
32
extern YYSTYPE	yylval;
33
extern int	infunc;
34
 
35
int	lineno	= 1;
36
int	bracecnt = 0;
37
int	brackcnt  = 0;
38
int	parencnt = 0;
39
 
40
typedef struct Keyword {
41
	char	*word;
42
	int	sub;
43
	int	type;
44
} Keyword;
45
 
46
Keyword keywords[] ={	/* keep sorted: binary searched */
47
	{ "BEGIN",	XBEGIN,		XBEGIN },
48
	{ "END",	XEND,		XEND },
49
	{ "NF",		VARNF,		VARNF },
50
	{ "atan2",	FATAN,		BLTIN },
51
	{ "break",	BREAK,		BREAK },
52
	{ "close",	CLOSE,		CLOSE },
53
	{ "continue",	CONTINUE,	CONTINUE },
54
	{ "cos",	FCOS,		BLTIN },
55
	{ "delete",	DELETE,		DELETE },
56
	{ "do",		DO,		DO },
57
	{ "else",	ELSE,		ELSE },
58
	{ "exit",	EXIT,		EXIT },
59
	{ "exp",	FEXP,		BLTIN },
60
	{ "fflush",	FFLUSH,		BLTIN },
61
	{ "for",	FOR,		FOR },
62
	{ "func",	FUNC,		FUNC },
63
	{ "function",	FUNC,		FUNC },
64
	{ "getline",	GETLINE,	GETLINE },
65
	{ "gsub",	GSUB,		GSUB },
66
	{ "if",		IF,		IF },
67
	{ "in",		IN,		IN },
68
	{ "index",	INDEX,		INDEX },
69
	{ "int",	FINT,		BLTIN },
70
	{ "length",	FLENGTH,	BLTIN },
71
	{ "log",	FLOG,		BLTIN },
72
	{ "match",	MATCHFCN,	MATCHFCN },
73
	{ "next",	NEXT,		NEXT },
74
	{ "nextfile",	NEXTFILE,	NEXTFILE },
75
	{ "print",	PRINT,		PRINT },
76
	{ "printf",	PRINTF,		PRINTF },
77
	{ "rand",	FRAND,		BLTIN },
78
	{ "return",	RETURN,		RETURN },
79
	{ "sin",	FSIN,		BLTIN },
80
	{ "split",	SPLIT,		SPLIT },
81
	{ "sprintf",	SPRINTF,	SPRINTF },
82
	{ "sqrt",	FSQRT,		BLTIN },
83
	{ "srand",	FSRAND,		BLTIN },
84
	{ "sub",	SUB,		SUB },
85
	{ "substr",	SUBSTR,		SUBSTR },
86
	{ "system",	FSYSTEM,	BLTIN },
87
	{ "tolower",	FTOLOWER,	BLTIN },
88
	{ "toupper",	FTOUPPER,	BLTIN },
89
	{ "utf",	FUTF,		BLTIN },
90
	{ "while",	WHILE,		WHILE },
91
};
92
 
93
#define DEBUG
94
#ifdef	DEBUG
95
#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
96
#else
97
#define	RET(x)	return(x)
98
#endif
99
 
100
int peek(void)
101
{
102
	int c = input();
103
	unput(c);
104
	return c;
105
}
106
 
107
int gettok(char **pbuf, int *psz)	/* get next input token */
108
{
109
	int c;
110
	char *buf = *pbuf;
111
	int sz = *psz;
112
	char *bp = buf;
113
 
114
	c = input();
115
	if (c == 0)
116
		return 0;
117
	buf[0] = c;
118
	buf[1] = 0;
119
	if (!isalnum(c) && c != '.' && c != '_')
120
		return c;
121
 
122
	*bp++ = c;
123
	if (isalpha(c) || c == '_') {	/* it's a varname */
124
		for ( ; (c = input()) != 0; ) {
125
			if (bp-buf >= sz)
126
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
127
					FATAL( "out of space for name %.10s...", buf );
128
			if (isalnum(c) || c == '_')
129
				*bp++ = c;
130
			else {
131
				*bp = 0;
132
				unput(c);
133
				break;
134
			}
135
		}
136
	} else {	/* it's a number */
137
		char *rem;
138
		/* read input until can't be a number */
139
		for ( ; (c = input()) != 0; ) {
140
			if (bp-buf >= sz)
141
				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
142
					FATAL( "out of space for number %.10s...", buf );
143
			if (isdigit(c) || c == 'e' || c == 'E' 
144
			  || c == '.' || c == '+' || c == '-')
145
				*bp++ = c;
146
			else {
147
				unput(c);
148
				break;
149
			}
150
		}
151
		*bp = 0;
152
		strtod(buf, &rem);	/* parse the number */
153
		unputstr(rem);		/* put rest back for later */
154
		rem[0] = 0;
155
	}
156
	*pbuf = buf;
157
	*psz = sz;
158
	return buf[0];
159
}
160
 
161
int	word(char *);
162
int	string(void);
163
int	regexpr(void);
164
int	sc	= 0;	/* 1 => return a } right now */
165
int	reg	= 0;	/* 1 => return a REGEXPR now */
166
 
167
int yylex(void)
168
{
169
	int c;
170
	static char *buf = 0;
171
	static int bufsize = 500;
172
 
173
	if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
174
		FATAL( "out of space in yylex" );
175
	if (sc) {
176
		sc = 0;
177
		RET('}');
178
	}
179
	if (reg) {
180
		reg = 0;
181
		return regexpr();
182
	}
183
	for (;;) {
184
		c = gettok(&buf, &bufsize);
185
		if (c == 0)
186
			return 0;
187
		if (isalpha(c) || c == '_')
188
			return word(buf);
189
		if (isdigit(c) || c == '.') {
190
			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
191
			/* should this also have STR set? */
192
			RET(NUMBER);
193
		}
194
 
195
		yylval.i = c;
196
		switch (c) {
197
		case '\n':	/* {EOL} */
198
			RET(NL);
199
		case '\r':	/* assume \n is coming */
200
		case ' ':	/* {WS}+ */
201
		case '\t':
202
			break;
203
		case '#':	/* #.* strip comments */
204
			while ((c = input()) != '\n' && c != 0)
205
				;
206
			unput(c);
207
			break;
208
		case ';':
209
			RET(';');
210
		case '\\':
211
			if (peek() == '\n') {
212
				input();
213
			} else if (peek() == '\r') {
214
				input(); input();	/* \n */
215
				lineno++;
216
			} else {
217
				RET(c);
218
			}
219
			break;
220
		case '&':
221
			if (peek() == '&') {
222
				input(); RET(AND);
223
			} else 
224
				RET('&');
225
		case '|':
226
			if (peek() == '|') {
227
				input(); RET(BOR);
228
			} else
229
				RET('|');
230
		case '!':
231
			if (peek() == '=') {
232
				input(); yylval.i = NE; RET(NE);
233
			} else if (peek() == '~') {
234
				input(); yylval.i = NOTMATCH; RET(MATCHOP);
235
			} else
236
				RET(NOT);
237
		case '~':
238
			yylval.i = MATCH;
239
			RET(MATCHOP);
240
		case '<':
241
			if (peek() == '=') {
242
				input(); yylval.i = LE; RET(LE);
243
			} else {
244
				yylval.i = LT; RET(LT);
245
			}
246
		case '=':
247
			if (peek() == '=') {
248
				input(); yylval.i = EQ; RET(EQ);
249
			} else {
250
				yylval.i = ASSIGN; RET(ASGNOP);
251
			}
252
		case '>':
253
			if (peek() == '=') {
254
				input(); yylval.i = GE; RET(GE);
255
			} else if (peek() == '>') {
256
				input(); yylval.i = APPEND; RET(APPEND);
257
			} else {
258
				yylval.i = GT; RET(GT);
259
			}
260
		case '+':
261
			if (peek() == '+') {
262
				input(); yylval.i = INCR; RET(INCR);
263
			} else if (peek() == '=') {
264
				input(); yylval.i = ADDEQ; RET(ASGNOP);
265
			} else
266
				RET('+');
267
		case '-':
268
			if (peek() == '-') {
269
				input(); yylval.i = DECR; RET(DECR);
270
			} else if (peek() == '=') {
271
				input(); yylval.i = SUBEQ; RET(ASGNOP);
272
			} else
273
				RET('-');
274
		case '*':
275
			if (peek() == '=') {	/* *= */
276
				input(); yylval.i = MULTEQ; RET(ASGNOP);
277
			} else if (peek() == '*') {	/* ** or **= */
278
				input();	/* eat 2nd * */
279
				if (peek() == '=') {
280
					input(); yylval.i = POWEQ; RET(ASGNOP);
281
				} else {
282
					RET(POWER);
283
				}
284
			} else
285
				RET('*');
286
		case '/':
287
			RET('/');
288
		case '%':
289
			if (peek() == '=') {
290
				input(); yylval.i = MODEQ; RET(ASGNOP);
291
			} else
292
				RET('%');
293
		case '^':
294
			if (peek() == '=') {
295
				input(); yylval.i = POWEQ; RET(ASGNOP);
296
			} else
297
				RET(POWER);
298
 
299
		case '$':
300
			/* BUG: awkward, if not wrong */
301
			c = gettok(&buf, &bufsize);
302
			if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
303
				unputstr(buf);
304
				RET(INDIRECT);
305
			} else if (isalpha(c)) {
306
				if (strcmp(buf, "NF") == 0) {	/* very special */
307
					unputstr("(NF)");
308
					RET(INDIRECT);
309
				}
310
				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
311
				RET(IVAR);
312
			} else {
313
				unputstr(buf);
314
				RET(INDIRECT);
315
			}
316
 
317
		case '}':
318
			if (--bracecnt < 0)
319
				SYNTAX( "extra }" );
320
			sc = 1;
321
			RET(';');
322
		case ']':
323
			if (--brackcnt < 0)
324
				SYNTAX( "extra ]" );
325
			RET(']');
326
		case ')':
327
			if (--parencnt < 0)
328
				SYNTAX( "extra )" );
329
			RET(')');
330
		case '{':
331
			bracecnt++;
332
			RET('{');
333
		case '[':
334
			brackcnt++;
335
			RET('[');
336
		case '(':
337
			parencnt++;
338
			RET('(');
339
 
340
		case '"':
341
			return string();	/* BUG: should be like tran.c ? */
342
 
343
		default:
344
			RET(c);
345
		}
346
	}
347
}
348
 
349
int string(void)
350
{
351
	int c, n;
352
	char *s, *bp;
353
	static char *buf = 0;
354
	static int bufsz = 500;
355
 
356
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
357
		FATAL("out of space for strings");
358
	for (bp = buf; (c = input()) != '"'; ) {
359
		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
360
			FATAL("out of space for string %.10s...", buf);
361
		switch (c) {
362
		case '\n':
363
		case '\r':
364
		case 0:
365
			SYNTAX( "non-terminated string %.10s...", buf );
366
			lineno++;
367
			break;
368
		case '\\':
369
			c = input();
370
			switch (c) {
371
			case '"': *bp++ = '"'; break;
372
			case 'n': *bp++ = '\n'; break;	
373
			case 't': *bp++ = '\t'; break;
374
			case 'f': *bp++ = '\f'; break;
375
			case 'r': *bp++ = '\r'; break;
376
			case 'b': *bp++ = '\b'; break;
377
			case 'v': *bp++ = '\v'; break;
378
			case 'a': *bp++ = '\007'; break;
379
			case '\\': *bp++ = '\\'; break;
380
 
381
			case '0': case '1': case '2': /* octal: \d \dd \ddd */
382
			case '3': case '4': case '5': case '6': case '7':
383
				n = c - '0';
384
				if ((c = peek()) >= '0' && c < '8') {
385
					n = 8 * n + input() - '0';
386
					if ((c = peek()) >= '0' && c < '8')
387
						n = 8 * n + input() - '0';
388
				}
389
				*bp++ = n;
390
				break;
391
 
392
			case 'x':	/* hex  \x0-9a-fA-F + */
393
			    {	char xbuf[100], *px;
394
				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
395
					if (isdigit(c)
396
					 || (c >= 'a' && c <= 'f')
397
					 || (c >= 'A' && c <= 'F'))
398
						*px++ = c;
399
					else
400
						break;
401
				}
402
				*px = 0;
403
				unput(c);
404
	  			sscanf(xbuf, "%x", &n);
405
				*bp++ = n;
406
				break;
407
			    }
408
 
409
			default: 
410
				*bp++ = c;
411
				break;
412
			}
413
			break;
414
		default:
415
			*bp++ = c;
416
			break;
417
		}
418
	}
419
	*bp = 0; 
420
	s = tostring(buf);
421
	*bp++ = ' '; *bp++ = 0;
422
	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
423
	RET(STRING);
424
}
425
 
426
 
427
int binsearch(char *w, Keyword *kp, int n)
428
{
429
	int cond, low, mid, high;
430
 
431
	low = 0;
432
	high = n - 1;
433
	while (low <= high) {
434
		mid = (low + high) / 2;
435
		if ((cond = strcmp(w, kp[mid].word)) < 0)
436
			high = mid - 1;
437
		else if (cond > 0)
438
			low = mid + 1;
439
		else
440
			return mid;
441
	}
442
	return -1;
443
}
444
 
445
int word(char *w) 
446
{
447
	Keyword *kp;
448
	int c, n;
449
 
450
	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
451
	kp = keywords + n;
452
	if (n != -1) {	/* found in table */
453
		yylval.i = kp->sub;
454
		switch (kp->type) {	/* special handling */
455
		case FSYSTEM:
456
			if (safe)
457
				SYNTAX( "system is unsafe" );
458
			RET(kp->type);
459
		case FUNC:
460
			if (infunc)
461
				SYNTAX( "illegal nested function" );
462
			RET(kp->type);
463
		case RETURN:
464
			if (!infunc)
465
				SYNTAX( "return not in function" );
466
			RET(kp->type);
467
		case VARNF:
468
			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
469
			RET(VARNF);
470
		default:
471
			RET(kp->type);
472
		}
473
	}
474
	c = peek();	/* look for '(' */
475
	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
476
		yylval.i = n;
477
		RET(ARG);
478
	} else {
479
		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
480
		if (c == '(') {
481
			RET(CALL);
482
		} else {
483
			RET(VAR);
484
		}
485
	}
486
}
487
 
488
void startreg(void)	/* next call to yyles will return a regular expression */
489
{
490
	reg = 1;
491
}
492
 
493
int regexpr(void)
494
{
495
	int c;
496
	static char *buf = 0;
497
	static int bufsz = 500;
498
	char *bp;
499
 
500
	if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
501
		FATAL("out of space for rex expr");
502
	bp = buf;
503
	for ( ; (c = input()) != '/' && c != 0; ) {
504
		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
505
			FATAL("out of space for reg expr %.10s...", buf);
506
		if (c == '\n') {
507
			SYNTAX( "newline in regular expression %.10s...", buf ); 
508
			unput('\n');
509
			break;
510
		} else if (c == '\\') {
511
			*bp++ = '\\'; 
512
			*bp++ = input();
513
		} else {
514
			*bp++ = c;
515
		}
516
	}
517
	*bp = 0;
518
	yylval.s = tostring(buf);
519
	unput('/');
520
	RET(REGEXPR);
521
}
522
 
523
/* low-level lexical stuff, sort of inherited from lex */
524
 
525
char	ebuf[300];
526
char	*ep = ebuf;
527
char	yysbuf[100];	/* pushback buffer */
528
char	*yysptr = yysbuf;
529
FILE	*yyin = 0;
530
 
531
int input(void)	/* get next lexical input character */
532
{
533
	int c;
534
	extern char *lexprog;
535
 
536
	if (yysptr > yysbuf)
537
		c = *--yysptr;
538
	else if (lexprog != NULL) {	/* awk '...' */
539
		if ((c = *lexprog) != 0)
540
			lexprog++;
541
	} else				/* awk -f ... */
542
		c = pgetc();
543
	if (c == '\n')
544
		lineno++;
545
	else if (c == EOF)
546
		c = 0;
547
	if (ep >= ebuf + sizeof ebuf)
548
		ep = ebuf;
549
	return *ep++ = c;
550
}
551
 
552
void unput(int c)	/* put lexical character back on input */
553
{
554
	if (c == '\n')
555
		lineno--;
556
	if (yysptr >= yysbuf + sizeof(yysbuf))
557
		FATAL("pushed back too much: %.20s...", yysbuf);
558
	*yysptr++ = c;
559
	if (--ep < ebuf)
560
		ep = ebuf + sizeof(ebuf) - 1;
561
}
562
 
563
void unputstr(char *s)	/* put a string back on input */
564
{
565
	int i;
566
 
567
	for (i = strlen(s)-1; i >= 0; i--)
568
		unput(s[i]);
569
}