Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * lexical analysis and source input
3
 */
4
 
5
#include "sh.h"
6
#include <ctype.h>
7
 
8
 
9
/* Structure to keep track of the lexing state and the various pieces of info
10
 * needed for each particular state.
11
 */
12
typedef struct lex_state Lex_state;
13
struct lex_state {
14
	int ls_state;
15
	union {
16
	    /* $(...) */
17
	    struct scsparen_info {
18
		    int nparen;		/* count open parenthesis */
19
		    int csstate; /* XXX remove */
20
#define ls_scsparen ls_info.u_scsparen
21
	    } u_scsparen;
22
 
23
	    /* $((...)) */
24
	    struct sasparen_info {
25
		    int nparen;		/* count open parenthesis */
26
		    int start;		/* marks start of $(( in output str */
27
#define ls_sasparen ls_info.u_sasparen
28
	    } u_sasparen;
29
 
30
	    /* ((...)) */
31
	    struct sletparen_info {
32
		    int nparen;		/* count open parenthesis */
33
#define ls_sletparen ls_info.u_sletparen
34
	    } u_sletparen;
35
 
36
	    /* `...` */
37
	    struct sbquote_info {
38
		    int indquotes;	/* true if in double quotes: "`...`" */
39
#define ls_sbquote ls_info.u_sbquote
40
	    } u_sbquote;
41
 
42
	    Lex_state *base;		/* used to point to next state block */
43
	} ls_info;
44
};
45
 
46
typedef struct State_info State_info;
47
struct State_info {
48
	Lex_state	*base;
49
	Lex_state	*end;
50
};
51
 
52
 
53
static void	readhere ARGS((struct ioword *iop));
54
static int	getsc__ ARGS((void));
55
static void	getsc_line ARGS((Source *s));
56
static int	getsc_bn ARGS((void));
57
static char	*get_brace_var ARGS((XString *wsp, char *wp));
58
static int	arraysub ARGS((char **strp));
59
static const char *ungetsc ARGS((int c));
60
static void	gethere ARGS((void));
61
static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
62
static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
63
 
64
static int backslash_skip;
65
static int ignore_backslash_newline;
66
 
67
/* optimized getsc_bn() */
68
#define getsc()		(*source->str != '\0' && *source->str != '\\' \
69
			 && !backslash_skip ? *source->str++ : getsc_bn())
70
/* optimized getsc__() */
71
#define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
72
 
73
#define STATE_BSIZE	32
74
 
75
#define PUSH_STATE(s)	do { \
76
			    if (++statep == state_info.end) \
77
				statep = push_state_(&state_info, statep); \
78
			    state = statep->ls_state = (s); \
79
			} while (0)
80
 
81
#define POP_STATE()	do { \
82
			    if (--statep == state_info.base) \
83
				statep = pop_state_(&state_info, statep); \
84
			    state = statep->ls_state; \
85
			} while (0)
86
 
87
 
88
 
89
/*
90
 * Lexical analyzer
91
 *
92
 * tokens are not regular expressions, they are LL(1).
93
 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
94
 * hence the state stack.
95
 */
96
 
97
int
98
yylex(cf)
99
	int cf;
100
{
101
	Lex_state states[STATE_BSIZE], *statep;
102
	State_info state_info;
103
	register int c, state;
104
	XString ws;		/* expandable output word */
105
	register char *wp;	/* output word pointer */
106
	char *sp, *dp;
107
	int c2;
108
 
109
 
110
  Again:
111
	states[0].ls_state = -1;
112
	states[0].ls_info.base = (Lex_state *) 0;
113
	statep = &states[1];
114
	state_info.base = states;
115
	state_info.end = &states[STATE_BSIZE];
116
 
117
	Xinit(ws, wp, 64, ATEMP);
118
 
119
	backslash_skip = 0;
120
	ignore_backslash_newline = 0;
121
 
122
	if (cf&ONEWORD)
123
		state = SWORD;
124
#ifdef KSH
125
	else if (cf&LETEXPR) {
126
		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
127
		state = SLETPAREN;	
128
		statep->ls_sletparen.nparen = 0;
129
	}
130
#endif /* KSH */
131
	else {		/* normal lexing */
132
		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
133
		while ((c = getsc()) == ' ' || c == '\t')
134
			;
135
		if (c == '#') {
136
			ignore_backslash_newline++;
137
			while ((c = getsc()) != '\0' && c != '\n')
138
				;
139
			ignore_backslash_newline--;
140
		}
141
		ungetsc(c);
142
	}
143
	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
144
		source->flags &= ~SF_ALIAS;
145
		/* In POSIX mode, a trailing space only counts if we are
146
		 * parsing a simple command
147
		 */
148
		if (!Flag(FPOSIX) || (cf & CMDWORD))
149
			cf |= ALIAS;
150
	}
151
 
152
	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
153
	statep->ls_state = state;
154
 
155
	/* collect non-special or quoted characters to form word */
156
	while (!((c = getsc()) == 0
157
		 || ((state == SBASE || state == SHEREDELIM)
158
		     && ctype(c, C_LEX1))))
159
	{
160
		Xcheck(ws, wp);
161
		switch (state) {
162
		  case SBASE:
163
			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
164
				*wp = EOS; /* temporary */
165
				if (is_wdvarname(Xstring(ws, wp), FALSE))
166
				{
167
					char *p, *tmp;
168
 
169
					if (arraysub(&tmp)) {
170
						*wp++ = CHAR;
171
						*wp++ = c;
172
						for (p = tmp; *p; ) {
173
							Xcheck(ws, wp);
174
							*wp++ = CHAR;
175
							*wp++ = *p++;
176
						}
177
						afree(tmp, ATEMP);
178
						break;
179
					} else {
180
						Source *s;
181
 
182
						s = pushs(SREREAD,
183
							  source->areap);
184
						s->start = s->str
185
							= s->u.freeme = tmp;
186
						s->next = source;
187
						source = s;
188
					}
189
				}
190
				*wp++ = CHAR;
191
				*wp++ = c;
192
				break;
193
			}
194
			/* fall through.. */
195
		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
196
#ifdef KSH
197
			if (c == '*' || c == '@' || c == '+' || c == '?'
198
			    || c == '!')
199
			{
200
				c2 = getsc();
201
				if (c2 == '(' /*)*/ ) {
202
					*wp++ = OPAT;
203
					*wp++ = c;
204
					PUSH_STATE(SPATTERN);
205
					break;
206
				}
207
				ungetsc(c2);
208
			}
209
#endif /* KSH */
210
			/* fall through.. */
211
		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
212
			switch (c) {
213
			  case '\\':
214
				c = getsc();
215
#ifdef OS2
216
				if (isalnum(c)) {
217
					*wp++ = CHAR, *wp++ = '\\';
218
					*wp++ = CHAR, *wp++ = c;
219
				} else 
220
#endif
221
				if (c) /* trailing \ is lost */
222
					*wp++ = QCHAR, *wp++ = c;
223
				break;
224
			  case '\'':
225
				*wp++ = OQUOTE;
226
				ignore_backslash_newline++;
227
				PUSH_STATE(SSQUOTE);
228
				break;
229
			  case '"':
230
				*wp++ = OQUOTE;
231
				PUSH_STATE(SDQUOTE);
232
				break;
233
			  default:
234
				goto Subst;
235
			}
236
			break;
237
 
238
		  Subst:
239
			switch (c) {
240
			  case '\\':
241
				c = getsc();
242
				switch (c) {
243
				  case '"': case '\\':
244
				  case '$': case '`':
245
					*wp++ = QCHAR, *wp++ = c;
246
					break;
247
				  default:
248
					Xcheck(ws, wp);
249
					if (c) { /* trailing \ is lost */
250
						*wp++ = CHAR, *wp++ = '\\';
251
						*wp++ = CHAR, *wp++ = c;
252
					}
253
					break;
254
				}
255
				break;
256
			  case '$':
257
				c = getsc();
258
				if (c == '(') /*)*/ {
259
					c = getsc();
260
					if (c == '(') /*)*/ {
261
						PUSH_STATE(SASPAREN);
262
						statep->ls_sasparen.nparen = 2;
263
						statep->ls_sasparen.start =
264
							Xsavepos(ws, wp);
265
						*wp++ = EXPRSUB;
266
					} else {
267
						ungetsc(c);
268
						PUSH_STATE(SCSPAREN);
269
						statep->ls_scsparen.nparen = 1;
270
						statep->ls_scsparen.csstate = 0;
271
						*wp++ = COMSUB;
272
					}
273
				} else if (c == '{') /*}*/ {
274
					*wp++ = OSUBST;
275
					*wp++ = '{'; /*}*/
276
					wp = get_brace_var(&ws, wp);
277
					c = getsc();
278
					/* allow :# and :% (ksh88 compat) */
279
					if (c == ':') {
280
						*wp++ = CHAR, *wp++ = c;
281
						c = getsc();
282
					}
283
					/* If this is a trim operation,
284
					 * treat (,|,) specially in STBRACE.
285
					 */
286
					if (c == '#' || c == '%') {
287
						ungetsc(c);
288
						PUSH_STATE(STBRACE);
289
					} else {
290
						ungetsc(c);
291
						PUSH_STATE(SBRACE);
292
					}
293
				} else if (ctype(c, C_ALPHA)) {
294
					*wp++ = OSUBST;
295
					*wp++ = 'X';
296
					do {
297
						Xcheck(ws, wp);
298
						*wp++ = c;
299
						c = getsc();
300
					} while (ctype(c, C_ALPHA|C_DIGIT));
301
					*wp++ = '\0';
302
					*wp++ = CSUBST;
303
					*wp++ = 'X';
304
					ungetsc(c);
305
				} else if (ctype(c, C_DIGIT|C_VAR1)) {
306
					Xcheck(ws, wp);
307
					*wp++ = OSUBST;
308
					*wp++ = 'X';
309
					*wp++ = c;
310
					*wp++ = '\0';
311
					*wp++ = CSUBST;
312
					*wp++ = 'X';
313
				} else {
314
					*wp++ = CHAR, *wp++ = '$';
315
					ungetsc(c);
316
				}
317
				break;
318
			  case '`':
319
				PUSH_STATE(SBQUOTE);
320
				*wp++ = COMSUB;
321
				/* Need to know if we are inside double quotes
322
				 * since sh/at&t-ksh translate the \" to " in
323
				 * "`..\"..`".
324
				 * This is not done in posix mode (section
325
				 * 3.2.3, Double Quotes: "The backquote shall
326
				 * retain its special meaning introducing the
327
				 * other form of command substitution (see
328
				 * 3.6.3). The portion of the quoted string
329
				 * from the initial backquote and the
330
				 * characters up to the next backquote that
331
				 * is not preceded by a backslash (having
332
				 * escape characters removed) defines that
333
				 * command whose output replaces `...` when
334
				 * the word is expanded."
335
				 * Section 3.6.3, Command Substitution:
336
				 * "Within the backquoted style of command
337
				 * substitution, backslash shall retain its
338
				 * literal meaning, except when followed by
339
				 * $ ` \.").
340
				 */
341
				statep->ls_sbquote.indquotes = 0;
342
				if (!Flag(FPOSIX)) {
343
					Lex_state *s = statep;
344
					Lex_state *base = state_info.base;
345
					while (1) {
346
						for (; s != base; s--) {
347
							if (s->ls_state == SDQUOTE) {
348
								statep->ls_sbquote.indquotes = 1;
349
								break;
350
							}
351
						}
352
						if (s != base)
353
							break;
354
						if (!(s = s->ls_info.base))
355
							break;
356
						base = s-- - STATE_BSIZE;
357
					}
358
				}
359
				break;
360
			  default:
361
				*wp++ = CHAR, *wp++ = c;
362
			}
363
			break;
364
 
365
		  case SSQUOTE:
366
			if (c == '\'') {
367
				POP_STATE();
368
				*wp++ = CQUOTE;
369
				ignore_backslash_newline--;
370
			} else
371
				*wp++ = QCHAR, *wp++ = c;
372
			break;
373
 
374
		  case SDQUOTE:
375
			if (c == '"') {
376
				POP_STATE();
377
				*wp++ = CQUOTE;
378
			} else
379
				goto Subst;
380
			break;
381
 
382
		  case SCSPAREN: /* $( .. ) */
383
			/* todo: deal with $(...) quoting properly
384
			 * kludge to partly fake quoting inside $(..): doesn't
385
			 * really work because nested $(..) or ${..} inside
386
			 * double quotes aren't dealt with.
387
			 */
388
			switch (statep->ls_scsparen.csstate) {
389
			  case 0: /* normal */
390
				switch (c) {
391
				  case '(':
392
					statep->ls_scsparen.nparen++;
393
					break;
394
				  case ')':
395
					statep->ls_scsparen.nparen--;
396
					break;
397
				  case '\\':
398
					statep->ls_scsparen.csstate = 1;
399
					break;
400
				  case '"':
401
					statep->ls_scsparen.csstate = 2;
402
					break;
403
				  case '\'':
404
					statep->ls_scsparen.csstate = 4;
405
					ignore_backslash_newline++;
406
					break;
407
				}
408
				break;
409
 
410
			  case 1: /* backslash in normal mode */
411
			  case 3: /* backslash in double quotes */
412
				--statep->ls_scsparen.csstate;
413
				break;
414
 
415
			  case 2: /* double quotes */
416
				if (c == '"')
417
					statep->ls_scsparen.csstate = 0;
418
				else if (c == '\\')
419
					statep->ls_scsparen.csstate = 3;
420
				break;
421
 
422
			  case 4: /* single quotes */
423
				if (c == '\'') {
424
					statep->ls_scsparen.csstate = 0;
425
					ignore_backslash_newline--;
426
				}
427
				break;
428
			}
429
			if (statep->ls_scsparen.nparen == 0) {
430
				POP_STATE();
431
				*wp++ = 0; /* end of COMSUB */
432
			} else
433
				*wp++ = c;
434
			break;
435
 
436
		  case SASPAREN: /* $(( .. )) */
437
			/* todo: deal with $((...); (...)) properly */
438
			/* XXX should nest using existing state machine
439
			 *     (embed "..", $(...), etc.) */
440
			if (c == '(')
441
				statep->ls_sasparen.nparen++;
442
			else if (c == ')') {
443
				statep->ls_sasparen.nparen--;
444
				if (statep->ls_sasparen.nparen == 1) {
445
					/*(*/
446
					if ((c2 = getsc()) == ')') {
447
						POP_STATE();
448
						*wp++ = 0; /* end of EXPRSUB */
449
						break;
450
					} else {
451
						char *s;
452
 
453
						ungetsc(c2);
454
						/* mismatched parenthesis -
455
						 * assume we were really
456
						 * parsing a $(..) expression
457
						 */
458
						s = Xrestpos(ws, wp,
459
						     statep->ls_sasparen.start);
460
						memmove(s + 1, s, wp - s);
461
						*s++ = COMSUB;
462
						*s = '('; /*)*/
463
						wp++;
464
						statep->ls_scsparen.nparen = 1;
465
						statep->ls_scsparen.csstate = 0;
466
						state = statep->ls_state
467
							= SCSPAREN;
468
 
469
					}
470
				}
471
			}
472
			*wp++ = c;
473
			break;
474
 
475
		  case SBRACE:
476
			/*{*/
477
			if (c == '}') {
478
				POP_STATE();
479
				*wp++ = CSUBST;
480
				*wp++ = /*{*/ '}';
481
			} else
482
				goto Sbase1;
483
			break;
484
 
485
		  case STBRACE:
486
			/* Same as SBRACE, except (,|,) treated specially */
487
			/*{*/
488
			if (c == '}') {
489
				POP_STATE();
490
				*wp++ = CSUBST;
491
				*wp++ = /*{*/ '}';
492
			} else if (c == '|') {
493
				*wp++ = SPAT;
494
			} else if (c == '(') {
495
				*wp++ = OPAT;
496
				*wp++ = ' ';	/* simile for @ */
497
				PUSH_STATE(SPATTERN);
498
			} else
499
				goto Sbase1;
500
			break;
501
 
502
		  case SBQUOTE:
503
			if (c == '`') {
504
				*wp++ = 0;
505
				POP_STATE();
506
			} else if (c == '\\') {
507
				switch (c = getsc()) {
508
				  case '\\':
509
				  case '$': case '`':
510
					*wp++ = c;
511
					break;
512
				  case '"':
513
					if (statep->ls_sbquote.indquotes) {
514
						*wp++ = c;
515
						break;
516
					}
517
					/* fall through.. */
518
				  default:
519
					if (c) { /* trailing \ is lost */
520
						*wp++ = '\\';
521
						*wp++ = c;
522
					}
523
					break;
524
				}
525
			} else
526
				*wp++ = c;
527
			break;
528
 
529
		  case SWORD:	/* ONEWORD */
530
			goto Subst;
531
 
532
#ifdef KSH
533
		  case SLETPAREN:	/* LETEXPR: (( ... )) */
534
			/*(*/
535
			if (c == ')') {
536
				if (statep->ls_sletparen.nparen > 0)
537
				    --statep->ls_sletparen.nparen;
538
				/*(*/
539
				else if ((c2 = getsc()) == ')') {
540
					c = 0;
541
					*wp++ = CQUOTE;
542
					goto Done;
543
				} else
544
					ungetsc(c2);
545
			} else if (c == '(')
546
				/* parenthesis inside quotes and backslashes
547
				 * are lost, but at&t ksh doesn't count them
548
				 * either
549
				 */
550
				++statep->ls_sletparen.nparen;
551
			goto Sbase2;
552
#endif /* KSH */
553
 
554
		  case SHEREDELIM:	/* <<,<<- delimiter */
555
			/* XXX chuck this state (and the next) - use
556
			 * the existing states ($ and \`..` should be
557
			 * stripped of their specialness after the
558
			 * fact).
559
			 */
560
			/* here delimiters need a special case since
561
			 * $ and `..` are not to be treated specially
562
			 */
563
			if (c == '\\') {
564
				c = getsc();
565
				if (c) { /* trailing \ is lost */
566
					*wp++ = QCHAR;
567
					*wp++ = c;
568
				}
569
			} else if (c == '\'') {
570
				PUSH_STATE(SSQUOTE);
571
				*wp++ = OQUOTE;
572
				ignore_backslash_newline++;
573
			} else if (c == '"') {
574
				state = statep->ls_state = SHEREDQUOTE;
575
				*wp++ = OQUOTE;
576
			} else {
577
				*wp++ = CHAR;
578
				*wp++ = c;
579
			}
580
			break;
581
 
582
		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
583
			if (c == '"') {
584
				*wp++ = CQUOTE;
585
				state = statep->ls_state = SHEREDELIM;
586
			} else {
587
				if (c == '\\') {
588
					switch (c = getsc()) {
589
					  case '\\': case '"':
590
					  case '$': case '`':
591
						break;
592
					  default:
593
						if (c) { /* trailing \ lost */
594
							*wp++ = CHAR;
595
							*wp++ = '\\';
596
						}
597
						break;
598
					}
599
				}
600
				*wp++ = CHAR;
601
				*wp++ = c;
602
			}
603
			break;
604
 
605
		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
606
			if ( /*(*/ c == ')') {
607
				*wp++ = CPAT;
608
				POP_STATE();
609
			} else if (c == '|') {
610
				*wp++ = SPAT;
611
			} else if (c == '(') {
612
				*wp++ = OPAT;
613
				*wp++ = ' ';	/* simile for @ */
614
				PUSH_STATE(SPATTERN);
615
			} else
616
				goto Sbase1;
617
			break;
618
		}
619
	}
620
Done:
621
	Xcheck(ws, wp);
622
	if (statep != &states[1])
623
		/* XXX figure out what is missing */
624
		yyerror("no closing quote\n");
625
 
626
	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
627
	if (state == SHEREDELIM)
628
		state = SBASE;
629
 
630
	dp = Xstring(ws, wp);
631
	if ((c == '<' || c == '>') && state == SBASE
632
	    && ((c2 = Xlength(ws, wp)) == 0
633
	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
634
	{
635
		struct ioword *iop =
636
				(struct ioword *) alloc(sizeof(*iop), ATEMP);
637
 
638
		if (c2 == 2)
639
			iop->unit = dp[1] - '0';
640
		else
641
			iop->unit = c == '>'; /* 0 for <, 1 for > */
642
 
643
		c2 = getsc();
644
		/* <<, >>, <> are ok, >< is not */
645
		if (c == c2 || (c == '<' && c2 == '>')) {
646
			iop->flag = c == c2 ?
647
				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
648
			if (iop->flag == IOHERE)
649
				if ((c2 = getsc()) == '-')
650
					iop->flag |= IOSKIP;
651
				else
652
					ungetsc(c2);
653
		} else if (c2 == '&')
654
			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
655
		else {
656
			iop->flag = c == '>' ? IOWRITE : IOREAD;
657
			if (c == '>' && c2 == '|')
658
				iop->flag |= IOCLOB;
659
			else
660
				ungetsc(c2);
661
		}
662
 
663
		iop->name = (char *) 0;
664
		iop->delim = (char *) 0;
665
		iop->heredoc = (char *) 0;
666
		Xfree(ws, wp);	/* free word */
667
		yylval.iop = iop;
668
		return REDIR;
669
	}
670
 
671
	if (wp == dp && state == SBASE) {
672
		Xfree(ws, wp);	/* free word */
673
		/* no word, process LEX1 character */
674
		switch (c) {
675
		  default:
676
			return c;
677
 
678
		  case '|':
679
		  case '&':
680
		  case ';':
681
			if ((c2 = getsc()) == c)
682
				c = (c == ';') ? BREAK :
683
				    (c == '|') ? LOGOR :
684
				    (c == '&') ? LOGAND :
685
				    YYERRCODE;
686
#ifdef KSH
687
			else if (c == '|' && c2 == '&')
688
				c = COPROC;
689
#endif /* KSH */
690
			else
691
				ungetsc(c2);
692
			return c;
693
 
694
		  case '\n':
695
			gethere();
696
			if (cf & CONTIN)
697
				goto Again;
698
			return c;
699
 
700
		  case '(':  /*)*/
701
#ifdef KSH
702
			if ((c2 = getsc()) == '(') /*)*/
703
				/* XXX need to handle ((...); (...)) */
704
				c = MDPAREN;
705
			else
706
				ungetsc(c2);
707
#endif /* KSH */
708
			return c;
709
		  /*(*/
710
		  case ')':
711
			return c;
712
		}
713
	}
714
 
715
	*wp++ = EOS;		/* terminate word */
716
	yylval.cp = Xclose(ws, wp);
717
	if (state == SWORD
718
#ifdef KSH
719
		|| state == SLETPAREN
720
#endif /* KSH */
721
		)	/* ONEWORD? */
722
		return LWORD;
723
	ungetsc(c);		/* unget terminator */
724
 
725
	/* copy word to unprefixed string ident */
726
	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
727
		*dp++ = *sp++;
728
	/* Make sure the ident array stays '\0' paded */
729
	memset(dp, 0, (ident+IDENT) - dp + 1);
730
	if (c != EOS)
731
		*ident = '\0';	/* word is not unquoted */
732
 
733
	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
734
		struct tbl *p;
735
		int h = hash(ident);
736
 
737
		/* { */
738
		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
739
		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
740
		{
741
			afree(yylval.cp, ATEMP);
742
			return p->val.i;
743
		}
744
		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
745
		    && (p->flag & ISSET))
746
		{
747
			register Source *s;
748
 
749
			for (s = source; s->type == SALIAS; s = s->next)
750
				if (s->u.tblp == p)
751
					return LWORD;
752
			/* push alias expansion */
753
			s = pushs(SALIAS, source->areap);
754
			s->start = s->str = p->val.s;
755
			s->u.tblp = p;
756
			s->next = source;
757
			source = s;
758
			afree(yylval.cp, ATEMP);
759
			goto Again;
760
		}
761
	}
762
 
763
	return LWORD;
764
}
765
 
766
static void
767
gethere()
768
{
769
	register struct ioword **p;
770
 
771
	for (p = heres; p < herep; p++)
772
		readhere(*p);
773
	herep = heres;
774
}
775
 
776
/*
777
 * read "<<word" text into temp file
778
 */
779
 
780
static void
781
readhere(iop)
782
	struct ioword *iop;
783
{
784
	register int c;
785
	char *volatile eof;
786
	char *eofp;
787
	int skiptabs;
788
	XString xs;
789
	char *xp;
790
	int xpos;
791
 
792
	eof = evalstr(iop->delim, 0);
793
 
794
	if (!(iop->flag & IOEVAL))
795
		ignore_backslash_newline++;
796
 
797
	Xinit(xs, xp, 256, ATEMP);
798
 
799
	for (;;) {
800
		eofp = eof;
801
		skiptabs = iop->flag & IOSKIP;
802
		xpos = Xsavepos(xs, xp);
803
		while ((c = getsc()) != 0) {
804
			if (skiptabs) {
805
				if (c == '\t')
806
					continue;
807
				skiptabs = 0;
808
			}
809
			if (c != *eofp)
810
				break;
811
			Xcheck(xs, xp);
812
			Xput(xs, xp, c);
813
			eofp++;
814
		}
815
		/* Allow EOF here so commands with out trailing newlines
816
		 * will work (eg, ksh -c '...', $(...), etc).
817
		 */
818
		if (*eofp == '\0' && (c == 0 || c == '\n')) {
819
			xp = Xrestpos(xs, xp, xpos);
820
			break;
821
		}
822
		ungetsc(c);
823
		while ((c = getsc()) != '\n') {
824
			if (c == 0)
825
				yyerror("here document `%s' unclosed\n", eof);
826
			Xcheck(xs, xp);
827
			Xput(xs, xp, c);
828
		}
829
		Xcheck(xs, xp);
830
		Xput(xs, xp, c);
831
	}
832
	Xput(xs, xp, '\0');
833
	iop->heredoc = Xclose(xs, xp);
834
 
835
	if (!(iop->flag & IOEVAL))
836
		ignore_backslash_newline--;
837
}
838
 
839
void
840
#ifdef HAVE_PROTOTYPES
841
yyerror(const char *fmt, ...)
842
#else
843
yyerror(fmt, va_alist)
844
	const char *fmt;
845
	va_dcl
846
#endif
847
{
848
	va_list va;
849
 
850
	/* pop aliases and re-reads */
851
	while (source->type == SALIAS || source->type == SREREAD)
852
		source = source->next;
853
	source->str = null;	/* zap pending input */
854
 
855
	error_prefix(TRUE);
856
	SH_VA_START(va, fmt);
857
	shf_vfprintf(shl_out, fmt, va);
858
	va_end(va);
859
	errorf(null);
860
}
861
 
862
/*
863
 * input for yylex with alias expansion
864
 */
865
 
866
Source *
867
pushs(type, areap)
868
	int type;
869
	Area *areap;
870
{
871
	register Source *s;
872
 
873
	s = (Source *) alloc(sizeof(Source), areap);
874
	s->type = type;
875
	s->str = null;
876
	s->start = NULL;
877
	s->line = 0;
878
	s->errline = 0;
879
	s->file = NULL;
880
	s->flags = 0;
881
	s->next = NULL;
882
	s->areap = areap;
883
	if (type == SFILE || type == SSTDIN) {
884
		char *dummy;
885
		Xinit(s->xs, dummy, 256, s->areap);
886
	} else
887
		memset(&s->xs, 0, sizeof(s->xs));
888
	return s;
889
}
890
 
891
static int
892
getsc__()
893
{
894
	register Source *s = source;
895
	register int c;
896
 
897
	while ((c = *s->str++) == 0) {
898
		s->str = NULL;		/* return 0 for EOF by default */
899
		switch (s->type) {
900
		  case SEOF:
901
			s->str = null;
902
			return 0;
903
 
904
		  case SSTDIN:
905
		  case SFILE:
906
			getsc_line(s);
907
			break;
908
 
909
		  case SWSTR:
910
			break;
911
 
912
		  case SSTRING:
913
			break;
914
 
915
		  case SWORDS:
916
			s->start = s->str = *s->u.strv++;
917
			s->type = SWORDSEP;
918
			break;
919
 
920
		  case SWORDSEP:
921
			if (*s->u.strv == NULL) {
922
				s->start = s->str = newline;
923
				s->type = SEOF;
924
			} else {
925
				s->start = s->str = space;
926
				s->type = SWORDS;
927
			}
928
			break;
929
 
930
		  case SALIAS:
931
			if (s->flags & SF_ALIASEND) {
932
				/* pass on an unused SF_ALIAS flag */
933
				source = s->next;
934
				source->flags |= s->flags & SF_ALIAS;
935
				s = source;
936
			} else if (*s->u.tblp->val.s
937
				 && isspace(strchr(s->u.tblp->val.s, 0)[-1]))
938
			{
939
				source = s = s->next;	/* pop source stack */
940
				/* Note that this alias ended with a space,
941
				 * enabling alias expansion on the following
942
				 * word.
943
				 */
944
				s->flags |= SF_ALIAS;
945
			} else {
946
				/* At this point, we need to keep the current
947
				 * alias in the source list so recursive
948
				 * aliases can be detected and we also need
949
				 * to return the next character.  Do this
950
				 * by temporarily popping the alias to get
951
				 * the next character and then put it back
952
				 * in the source list with the SF_ALIASEND
953
				 * flag set.
954
				 */
955
				source = s->next;	/* pop source stack */
956
				source->flags |= s->flags & SF_ALIAS;
957
				c = getsc__();
958
				if (c) {
959
					s->flags |= SF_ALIASEND;
960
					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
961
					s->start = s->str = s->ugbuf;
962
					s->next = source;
963
					source = s;
964
				} else {
965
					s = source;
966
					/* avoid reading eof twice */
967
					s->str = NULL;
968
					break;
969
				}
970
			}
971
			continue;
972
 
973
		  case SREREAD:
974
			if (s->start != s->ugbuf) /* yuck */
975
				afree(s->u.freeme, ATEMP);
976
			source = s = s->next;
977
			continue;
978
		}
979
		if (s->str == NULL) {
980
			s->type = SEOF;
981
			s->start = s->str = null;
982
			return '\0';
983
		}
984
		if (s->flags & SF_ECHO) {
985
			shf_puts(s->str, shl_out);
986
			shf_flush(shl_out);
987
		}
988
	}
989
	return c;
990
}
991
 
992
static void
993
getsc_line(s)
994
	Source *s;
995
{
996
	char *xp = Xstring(s->xs, xp);
997
	int interactive = Flag(FTALKING) && s->type == SSTDIN;
998
	int have_tty = interactive && (s->flags & SF_TTY);
999
 
1000
	/* Done here to ensure nothing odd happens when a timeout occurs */
1001
	XcheckN(s->xs, xp, LINE);
1002
	*xp = '\0';
1003
	s->start = s->str = xp;
1004
 
1005
#ifdef KSH
1006
	if (have_tty && ksh_tmout) {
1007
		ksh_tmout_state = TMOUT_READING;
1008
		alarm(ksh_tmout);
1009
	}
1010
#endif /* KSH */
1011
#ifdef EDIT
1012
	if (have_tty && (0
1013
# ifdef VI
1014
			 || Flag(FVI)
1015
# endif /* VI */
1016
# ifdef EMACS
1017
			 || Flag(FEMACS) || Flag(FGMACS)
1018
# endif /* EMACS */
1019
		))
1020
	{
1021
		int nread;
1022
 
1023
		nread = x_read(xp, LINE);
1024
		if (nread < 0)	/* read error */
1025
			nread = 0;
1026
		xp[nread] = '\0';
1027
		xp += nread;
1028
	}
1029
	else
1030
#endif /* EDIT */
1031
	{
1032
		if (interactive) {
1033
			pprompt(prompt, 0);
1034
		} else
1035
			s->line++;
1036
 
1037
		while (1) {
1038
			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1039
 
1040
			if (!p && shf_error(s->u.shf)
1041
			    && shf_errno(s->u.shf) == EINTR)
1042
			{
1043
				shf_clearerr(s->u.shf);
1044
				if (trap)
1045
					runtraps(0);
1046
				continue;
1047
			}
1048
			if (!p || (xp = p, xp[-1] == '\n'))
1049
				break;
1050
			/* double buffer size */
1051
			xp++; /* move past null so doubling works... */
1052
			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1053
			xp--; /* ...and move back again */
1054
		}
1055
		/* flush any unwanted input so other programs/builtins
1056
		 * can read it.  Not very optimal, but less error prone
1057
		 * than flushing else where, dealing with redirections,
1058
		 * etc..
1059
		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1060
		 */
1061
		if (s->type == SSTDIN)
1062
			shf_flush(s->u.shf);
1063
	}
1064
	/* XXX: temporary kludge to restore source after a
1065
	 * trap may have been executed.
1066
	 */
1067
	source = s;
1068
#ifdef KSH
1069
	if (have_tty && ksh_tmout)
1070
	{
1071
		ksh_tmout_state = TMOUT_EXECUTING;
1072
		alarm(0);
1073
	}
1074
#endif /* KSH */
1075
	s->start = s->str = Xstring(s->xs, xp);
1076
	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1077
	/* Note: if input is all nulls, this is not eof */
1078
	if (Xlength(s->xs, xp) == 0) { /* EOF */
1079
		if (s->type == SFILE)
1080
			shf_fdclose(s->u.shf);
1081
		s->str = NULL;
1082
	} else if (interactive) {
1083
#ifdef HISTORY
1084
		char *p = Xstring(s->xs, xp);
1085
		if (cur_prompt == PS1)
1086
			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1087
				p++;
1088
		if (*p) {
1089
# ifdef EASY_HISTORY
1090
			if (cur_prompt == PS2)
1091
				histappend(Xstring(s->xs, xp), 1);
1092
			else
1093
# endif /* EASY_HISTORY */
1094
			{
1095
				s->line++;
1096
				histsave(s->line, s->str, 1);
1097
			}
1098
		}
1099
#endif /* HISTORY */
1100
	}
1101
	if (interactive)
1102
		set_prompt(PS2, (Source *) 0);
1103
}
1104
 
1105
void
1106
set_prompt(to, s)
1107
	int to;
1108
	Source *s;
1109
{
1110
	cur_prompt = to;
1111
 
1112
	switch (to) {
1113
	case PS1: /* command */
1114
#ifdef KSH
1115
		/* Substitute ! and !! here, before substitutions are done
1116
		 * so ! in expanded variables are not expanded.
1117
		 * NOTE: this is not what at&t ksh does (it does it after
1118
		 * substitutions, POSIX doesn't say which is to be done.
1119
		 */
1120
		{
1121
			struct shf *shf;
1122
			char *ps1;
1123
			Area *saved_atemp;
1124
 
1125
			ps1 = str_val(global("PS1"));
1126
			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1127
				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1128
			while (*ps1) {
1129
				if (*ps1 != '!' || *++ps1 == '!')
1130
					shf_putchar(*ps1++, shf);
1131
				else
1132
					shf_fprintf(shf, "%d",
1133
						s ? s->line + 1 : 0);
1134
			}
1135
			ps1 = shf_sclose(shf);
1136
			saved_atemp = ATEMP;
1137
			newenv(E_ERRH);
1138
			if (ksh_sigsetjmp(e->jbuf, 0)) {
1139
				prompt = safe_prompt;
1140
				/* Don't print an error - assume it has already
1141
				 * been printed.  Reason is we may have forked
1142
				 * to run a command and the child may be
1143
				 * unwinding its stack through this code as it
1144
				 * exits.
1145
				 */
1146
			} else
1147
				prompt = str_save(substitute(ps1, 0),
1148
						 saved_atemp);
1149
			quitenv();
1150
		}
1151
#else /* KSH */
1152
		prompt = str_val(global("PS1"));
1153
#endif /* KSH */
1154
		break;
1155
 
1156
	case PS2: /* command continuation */
1157
		prompt = str_val(global("PS2"));
1158
		break;
1159
	}
1160
}
1161
 
1162
/* See also related routine, promptlen() in edit.c */
1163
void
1164
pprompt(cp, ntruncate)
1165
	const char *cp;
1166
	int ntruncate;
1167
{
1168
#if 0
1169
	char nbuf[32];
1170
	int c;
1171
 
1172
	while (*cp != 0) {
1173
		if (*cp != '!')
1174
			c = *cp++;
1175
		else if (*++cp == '!')
1176
			c = *cp++;
1177
		else {
1178
			int len;
1179
			char *p;
1180
 
1181
			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1182
				source->line + 1);
1183
			len = strlen(nbuf);
1184
			if (ntruncate) {
1185
				if (ntruncate >= len) {
1186
					ntruncate -= len;
1187
					continue;
1188
				}
1189
				p += ntruncate;
1190
				len -= ntruncate;
1191
				ntruncate = 0;
1192
			}
1193
			shf_write(p, len, shl_out);
1194
			continue;
1195
		}
1196
		if (ntruncate)
1197
			--ntruncate;
1198
		else
1199
			shf_putc(c, shl_out);
1200
	}
1201
#endif /* 0 */
1202
	shf_puts(cp + ntruncate, shl_out);
1203
	shf_flush(shl_out);
1204
}
1205
 
1206
/* Read the variable part of a ${...} expression (ie, up to but not including
1207
 * the :[-+?=#%] or close-brace.
1208
 */
1209
static char *
1210
get_brace_var(wsp, wp)
1211
	XString *wsp;
1212
	char *wp;
1213
{
1214
	enum parse_state {
1215
			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1216
			   PS_NUMBER, PS_VAR1, PS_END
1217
			 }
1218
		state;
1219
	char c;
1220
 
1221
	state = PS_INITIAL;
1222
	while (1) {
1223
		c = getsc();
1224
		/* State machine to figure out where the variable part ends. */
1225
		switch (state) {
1226
		  case PS_INITIAL:
1227
			if (c == '#') {
1228
				state = PS_SAW_HASH;
1229
				break;
1230
			}
1231
			/* fall through.. */
1232
		  case PS_SAW_HASH:
1233
			if (letter(c))
1234
				state = PS_IDENT;
1235
			else if (digit(c))
1236
				state = PS_NUMBER;
1237
			else if (ctype(c, C_VAR1))
1238
				state = PS_VAR1;
1239
			else
1240
				state = PS_END;
1241
			break;
1242
		  case PS_IDENT:
1243
			if (!letnum(c)) {
1244
				state = PS_END;
1245
				if (c == '[') {
1246
					char *tmp, *p;
1247
 
1248
					if (!arraysub(&tmp))
1249
						yyerror("missing ]\n");
1250
					*wp++ = c;
1251
					for (p = tmp; *p; ) {
1252
						Xcheck(*wsp, wp);
1253
						*wp++ = *p++;
1254
					}
1255
					afree(tmp, ATEMP);
1256
					c = getsc(); /* the ] */
1257
				}
1258
			}
1259
			break;
1260
		  case PS_NUMBER:
1261
			if (!digit(c))
1262
				state = PS_END;
1263
			break;
1264
		  case PS_VAR1:
1265
			state = PS_END;
1266
			break;
1267
		  case PS_END: /* keep gcc happy */
1268
			break;
1269
		}
1270
		if (state == PS_END) {
1271
			*wp++ = '\0';	/* end of variable part */
1272
			ungetsc(c);
1273
			break;
1274
		}
1275
		Xcheck(*wsp, wp);
1276
		*wp++ = c;
1277
	}
1278
	return wp;
1279
}
1280
 
1281
/*
1282
 * Save an array subscript - returns true if matching bracket found, false
1283
 * if eof or newline was found.
1284
 * (Returned string double null terminated)
1285
 */
1286
static int
1287
arraysub(strp)
1288
	char **strp;
1289
{
1290
	XString ws;
1291
	char	*wp;
1292
	char	c;
1293
	int 	depth = 1;	/* we are just past the initial [ */
1294
 
1295
	Xinit(ws, wp, 32, ATEMP);
1296
 
1297
	do {
1298
		c = getsc();
1299
		Xcheck(ws, wp);
1300
		*wp++ = c;
1301
		if (c == '[')
1302
			depth++;
1303
		else if (c == ']')
1304
			depth--;
1305
	} while (depth > 0 && c && c != '\n');
1306
 
1307
	*wp++ = '\0';
1308
	*strp = Xclose(ws, wp);
1309
 
1310
	return depth == 0 ? 1 : 0;
1311
}
1312
 
1313
/* Unget a char: handles case when we are already at the start of the buffer */
1314
static const char *
1315
ungetsc(c)
1316
	int c;
1317
{
1318
	if (backslash_skip)
1319
		backslash_skip--;
1320
	/* Don't unget eof... */
1321
	if (source->str == null && c == '\0')
1322
		return source->str;
1323
	if (source->str > source->start)
1324
		source->str--;
1325
	else {
1326
		Source *s;
1327
 
1328
		s = pushs(SREREAD, source->areap);
1329
		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1330
		s->start = s->str = s->ugbuf;
1331
		s->next = source;
1332
		source = s;
1333
	}
1334
	return source->str;
1335
}
1336
 
1337
 
1338
/* Called to get a char that isn't a \newline sequence. */
1339
static int
1340
getsc_bn ARGS((void))
1341
{
1342
	int c, c2;
1343
 
1344
	if (ignore_backslash_newline)
1345
		return getsc_();
1346
 
1347
	if (backslash_skip == 1) {
1348
		backslash_skip = 2;
1349
		return getsc_();
1350
	}
1351
 
1352
	backslash_skip = 0;
1353
 
1354
	while (1) {
1355
		c = getsc_();
1356
		if (c == '\\') {
1357
			if ((c2 = getsc_()) == '\n')
1358
				/* ignore the \newline; get the next char... */
1359
				continue;
1360
			ungetsc(c2);
1361
			backslash_skip = 1;
1362
		}
1363
		return c;
1364
	}
1365
}
1366
 
1367
static Lex_state *
1368
push_state_(si, old_end)
1369
	State_info *si;
1370
	Lex_state *old_end;
1371
{
1372
	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1373
 
1374
	new[0].ls_info.base = old_end;
1375
	si->base = &new[0];
1376
	si->end = &new[STATE_BSIZE];
1377
	return &new[1];
1378
}
1379
 
1380
static Lex_state *
1381
pop_state_(si, old_end)
1382
	State_info *si;
1383
	Lex_state *old_end;
1384
{
1385
	Lex_state *old_base = si->base;
1386
 
1387
	si->base = old_end->ls_info.base - STATE_BSIZE;
1388
	si->end = old_end->ls_info.base;
1389
 
1390
	afree(old_base, ATEMP);
1391
 
1392
	return si->base + STATE_BSIZE - 1;;
1393
}