Subversion Repositories tendra.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 7u83 1
/*
7 7u83 2
 * Copyright (c) 2002-2005 The TenDRA Project <http://www.tendra.org/>.
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 *
8
 * 1. Redistributions of source code must retain the above copyright notice,
9
 *    this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright notice,
11
 *    this list of conditions and the following disclaimer in the documentation
12
 *    and/or other materials provided with the distribution.
13
 * 3. Neither the name of The TenDRA Project nor the names of its contributors
14
 *    may be used to endorse or promote products derived from this software
15
 *    without specific, prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
18
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
21
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
 * EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * $Id$
30
 */
31
/*
2 7u83 32
    		 Crown Copyright (c) 1997
7 7u83 33
 
2 7u83 34
    This TenDRA(r) Computer Program is subject to Copyright
35
    owned by the United Kingdom Secretary of State for Defence
36
    acting through the Defence Evaluation and Research Agency
37
    (DERA).  It is made available to Recipients with a
38
    royalty-free licence for its use, reproduction, transfer
39
    to other parties and amendment for any purpose not excluding
40
    product development provided that any such use et cetera
41
    shall be deemed to be acceptance of the following conditions:-
7 7u83 42
 
2 7u83 43
        (1) Its Recipients shall ensure that this Notice is
44
        reproduced upon any copies or amended versions of it;
7 7u83 45
 
2 7u83 46
        (2) Any amended version of it shall be clearly marked to
47
        show both the nature of and the organisation responsible
48
        for the relevant amendment or amendments;
7 7u83 49
 
2 7u83 50
        (3) Its onward transfer from a recipient to another
51
        party shall be deemed to be that party's acceptance of
52
        these conditions;
7 7u83 53
 
2 7u83 54
        (4) DERA gives no warranty or assurance as to its
55
        quality or suitability for any purpose and DERA accepts
56
        no liability whatsoever in relation to any use to which
57
        it may be put.
58
*/
59
 
60
 
61
#include "config.h"
62
#include <limits.h>
63
#if FS_MULTIBYTE
64
#include <locale.h>
65
#endif
66
#include "c_types.h"
67
#include "exp_ops.h"
68
#include "hashid_ops.h"
69
#include "id_ops.h"
70
#include "member_ops.h"
71
#include "str_ops.h"
72
#include "error.h"
73
#include "catalog.h"
74
#include "option.h"
75
#include "buffer.h"
76
#include "char.h"
77
#include "constant.h"
78
#include "file.h"
79
#include "dump.h"
80
#include "hash.h"
81
#include "lex.h"
82
#include "literal.h"
83
#include "macro.h"
84
#include "parse.h"
85
#include "pragma.h"
86
#include "preproc.h"
87
#include "print.h"
88
#include "syntax.h"
89
#include "ustring.h"
90
#include "xalloc.h"
91
 
92
 
93
/*
94
    PARSER OPTIONS
95
 
96
    These flags control the behaviour of the parser and determine whether
97
    such features as trigraphs and digraphs are allowed.
98
*/
99
 
7 7u83 100
int allow_trigraphs = 1;
101
int allow_digraphs = 1;
102
int allow_unicodes = LANGUAGE_CPP;
103
int allow_multibyte = 1;
104
int allow_cpp_comments = LANGUAGE_CPP;
105
int allow_dos_newline = 0;
106
int allow_extra_symbols = 0;
107
int allow_iso_keywords = LANGUAGE_CPP;
108
int allow_newline_strings = 0;
109
int analyse_comments = 1;
110
unsigned long max_id_length = 1024;
2 7u83 111
 
112
 
113
/*
114
    TABLE OF SYMBOLS AND KEYWORDS
115
 
116
    This table gives the mapping between lexical token numbers and the
117
    corresponding symbols and keywords.  It is derived from the list of
118
    tokens in symbols.h.
119
*/
120
 
7 7u83 121
CONST char *token_names[] = {
122
#define LEX_TOKEN(A, B, C)		(B),
2 7u83 123
#include "symbols.h"
124
#undef LEX_TOKEN
7 7u83 125
	NULL
126
};
2 7u83 127
 
128
 
129
/*
130
    TRANSLATION A LEXICAL TOKEN TO ITS PRIMARY FORM
131
 
132
    This routine translates the alternative ISO keywords and digraphs
133
    into their primary form.
134
*/
135
 
7 7u83 136
int
137
primary_form(int t)
2 7u83 138
{
7 7u83 139
	int u = t;
140
	switch (u) {
141
	case lex_and_H2:
142
		u = lex_and_H1;
143
		break;
144
	case lex_and_Heq_H2:
145
		u = lex_and_Heq_H1;
146
		break;
147
	case lex_close_Hbrace_H2:
148
		u = lex_close_Hbrace_H1;
149
		break;
150
	case lex_close_Hsquare_H2:
151
		u = lex_close_Hsquare_H1;
152
		break;
153
	case lex_compl_H2:
154
		u = lex_compl_H1;
155
		break;
156
	case lex_hash_H2:
157
		u = lex_hash_H1;
158
		break;
159
	case lex_hash_Hhash_H2:
160
		u = lex_hash_Hhash_H1;
161
		break;
162
	case lex_logical_Hand_H2:
163
		u = lex_logical_Hand_H1;
164
		break;
165
	case lex_logical_Hor_H2:
166
		u = lex_logical_Hor_H1;
167
		break;
168
	case lex_not_H2:
169
		u = lex_not_H1;
170
		break;
171
	case lex_not_Heq_H2:
172
		u = lex_not_Heq_H1;
173
		break;
174
	case lex_open_Hbrace_H2:
175
		u = lex_open_Hbrace_H1;
176
		break;
177
	case lex_open_Hsquare_H2:
178
		u = lex_open_Hsquare_H1;
179
		break;
180
	case lex_or_H2:
181
		u = lex_or_H1;
182
		break;
183
	case lex_or_Heq_H2:
184
		u = lex_or_Heq_H1;
185
		break;
186
	case lex_xor_H2:
187
		u = lex_xor_H1;
188
		break;
189
	case lex_xor_Heq_H2:
190
		u = lex_xor_Heq_H1;
191
		break;
192
	}
193
	return(u);
2 7u83 194
}
195
 
196
 
197
/*
198
    REPORT A DIGRAPH TOKEN
199
 
200
    This routine reports the digraph t, returning the primary form of t.
201
*/
202
 
7 7u83 203
int
204
get_digraph(int t)
2 7u83 205
{
7 7u83 206
	int u = primary_form(t);
207
	if (u != t) {
208
		update_column();
209
		report(crt_loc, ERR_lex_digraph_replace(t, u));
210
	}
211
	return(u);
2 7u83 212
}
213
 
214
 
215
/*
216
    CREATE A KEYWORD
217
 
218
    This routine creates a keyword identifier with name nm and lexical
219
    token number key.  The special case when key is lex_unknown is used
220
    to indicate a reserved identifier.
221
*/
222
 
7 7u83 223
IDENTIFIER
224
make_keyword(HASHID nm, int key, IDENTIFIER id)
2 7u83 225
{
7 7u83 226
	PTR(IDENTIFIER)ptr = hashid_id(nm);
227
	if (IS_NULL_id(id)) {
228
		/* Find keyword type */
229
		unsigned tag = id_keyword_tag;
230
		if (key >= FIRST_ISO_KEYWORD && key <= LAST_ISO_KEYWORD) {
231
			tag = id_iso_keyword_tag;
232
		} else if (key >= FIRST_SYMBOL && key <= LAST_SYMBOL) {
233
			tag = id_iso_keyword_tag;
234
		} else if (key == lex_unknown) {
235
			tag = id_reserved_tag;
236
		}
237
 
238
		/* Create keyword identifier */
239
		MAKE_id_keyword_etc(tag, nm, dspec_none, NULL_nspace, crt_loc,
240
				    id);
241
		COPY_ulong(id_no(id), (unsigned long)key);
2 7u83 242
	}
7 7u83 243
	COPY_id(hashid_cache(nm), NULL_id);
244
	if (do_keyword) {
245
		dump_declare(id, &crt_loc, 1);
246
	}
2 7u83 247
 
7 7u83 248
	/* Add keyword to identifier meanings */
249
	for (;;) {
250
		IDENTIFIER pid = DEREF_id(ptr);
251
		switch (TAG_id(pid)) {
252
		case id_dummy_tag:
253
		case id_keyword_tag:
254
		case id_iso_keyword_tag:
255
		case id_reserved_tag:
256
			COPY_id(id_alias(id), pid);
257
			COPY_id(ptr, id);
258
			return(id);
259
		}
260
		ptr = id_alias(pid);
2 7u83 261
	}
7 7u83 262
	/* NOTREACHED */
2 7u83 263
}
264
 
265
 
266
/*
267
    INITIALISE KEYWORDS
268
 
269
    This routine initialises the hash table entries for the keywords.
270
*/
271
 
7 7u83 272
void
273
init_keywords(void)
2 7u83 274
{
7 7u83 275
	int key;
2 7u83 276
 
7 7u83 277
	/* Set up keyword entries */
278
	for (key = FIRST_KEYWORD; key <= LAST_KEYWORD; key++) {
279
		int ext = 0;
280
		string keyword = token_name(key);
281
		unsigned long h = hash(keyword);
282
		if (keyword[0] == char_less) {
283
			ext = 1;
284
		}
285
		KEYWORD(key) = lookup_name(keyword, h, ext, key);
286
	}
2 7u83 287
 
7 7u83 288
	/* Bring the C keywords into scope */
289
	for (key = FIRST_C_KEYWORD; key <= LAST_C_KEYWORD; key++) {
290
		HASHID nm = KEYWORD(key);
291
		IGNORE make_keyword(nm, key, NULL_id);
292
	}
2 7u83 293
 
7 7u83 294
	/* Bring the C++ keywords into scope */
295
	for (key = FIRST_CPP_KEYWORD; key <= LAST_CPP_KEYWORD; key++) {
296
		HASHID nm = KEYWORD(key);
2 7u83 297
#if LANGUAGE_CPP
7 7u83 298
		IGNORE make_keyword(nm, key, NULL_id);
2 7u83 299
#else
7 7u83 300
		if (key != lex_wchar_Ht) {
301
			IGNORE make_keyword(nm, lex_unknown, NULL_id);
302
		}
303
#endif
2 7u83 304
	}
305
 
7 7u83 306
	/* Bring the ISO alternative keywords into scope */
307
	for (key = FIRST_ISO_KEYWORD; key <= LAST_ISO_KEYWORD; key++) {
308
		HASHID nm = KEYWORD(key);
309
		if (allow_iso_keywords) {
310
			IGNORE make_keyword(nm, key, NULL_id);
311
		} else {
312
			IGNORE make_keyword(nm, lex_unknown, NULL_id);
313
		}
2 7u83 314
	}
315
 
7 7u83 316
	/* Find underlying dummy identifier for 'operator' */
317
	underlying_op = DEREF_id(hashid_id(KEYWORD(lex_operator)));
318
	underlying_op = underlying_id(underlying_op);
319
	return;
2 7u83 320
}
321
 
322
 
323
/*
324
    ADJUST A CHARACTER FOR TRIGRAPHS
325
 
326
    This routine is called after a question mark has been read from the
327
    input file to allow for trigraphs.  It returns the trigraph replacement
328
    character or '?' if the following characters do not form a trigraph.
329
*/
330
 
7 7u83 331
static int
332
adjust_trigraph(void)
2 7u83 333
{
7 7u83 334
	if (allow_trigraphs) {
335
		int c = next_char();
336
		if (c == char_end) {
337
			c = refill_char();
2 7u83 338
		}
7 7u83 339
		if (c == char_question) {
340
			int d;
341
			c = next_char();
342
			if (c == char_end) {
343
				c = refill_char();
344
			}
345
			switch (c) {
346
			case char_close_round:
347
				/* Map '\?\?)' to ']' */
348
				d = char_close_square;
349
				break;
350
			case char_equal:
351
				/* Map '\?\?=' to '#' */
352
				d = char_hash;
353
				break;
354
			case char_exclaim:
355
				/* Map '\?\?!' to '|' */
356
				d = char_bar;
357
				break;
358
			case char_greater:
359
				/* Map '\?\?>' to '}' */
360
				d = char_close_brace;
361
				break;
362
			case char_less:
363
				/* Map '\?\?<' to '{' */
364
				d = char_open_brace;
365
				break;
366
			case char_minus:
367
				/* Map '\?\?-' to '~' */
368
				d = char_tilde;
369
				break;
370
			case char_open_round:
371
				/* Map '\?\?(' to '[' */
372
				d = char_open_square;
373
				break;
374
			case char_single_quote:
375
				/* Map '\?\?\'' to '^' */
376
				d = char_circum;
377
				break;
378
			case char_slash:
379
				/* Map '\?\?/' to '\\' */
380
				d = char_backslash;
381
				break;
382
			default:
383
				/* Not a trigraph */
384
				unread_char(c);
385
				unread_char(char_question);
386
				return(char_question);
387
			}
388
			update_column();
389
			report(crt_loc, ERR_lex_trigraph_replace(c, d));
390
			return(d);
391
		} else {
392
			/* Not a trigraph */
393
			unread_char(c);
2 7u83 394
		}
395
	}
7 7u83 396
	return(char_question);
2 7u83 397
}
398
 
399
 
400
/*
401
    READ A NEWLINE CHARACTER
402
 
403
    This routine is called after each carriage return character, checking
404
    for a following newline character.
405
*/
406
 
7 7u83 407
static int
408
read_newline(void)
2 7u83 409
{
7 7u83 410
	if (allow_dos_newline) {
411
		int c = next_char();
412
		if (c == char_end) {
413
			c = refill_char();
414
		}
415
		if (c == char_newline) {
416
			return(c);
417
		}
418
		unread_char(c);
419
	}
420
	return(char_return);
2 7u83 421
}
422
 
423
 
424
/*
425
    READ AN END OF FILE CHARACTER
426
 
427
    This routine is called after each terminate character, checking for
428
    a following end of file character.
429
*/
430
 
7 7u83 431
static int
432
read_eof(void)
2 7u83 433
{
7 7u83 434
	if (allow_dos_newline) {
435
		int c = next_char();
436
		if (c == char_end) {
437
			c = refill_char();
438
		}
439
		if (c == char_eof) {
440
			return(c);
441
		}
442
		unread_char(c);
443
	}
444
	return(char_sub);
2 7u83 445
}
446
 
447
 
448
/*
449
    READ THE NEXT CHARACTER ALLOWING FOR TRIGRAPHS ETC.
450
 
451
    This routine reads the next character from the input file, adjusting
452
    it as necessary for trigraphs and escaped newlines.  This routine
453
    corresponds to phases 1 and 2 of the phases of translation.
454
*/
455
 
7 7u83 456
static int
457
read_char(void)
2 7u83 458
{
7 7u83 459
	for (;;) {
460
		int c = next_char();
461
		if (c == char_end) {
462
			c = refill_char();
463
		}
464
		if (c == char_question) {
465
			c = adjust_trigraph();
466
		}
467
		if (c != char_backslash) {
468
			/* Not an escaped newline */
469
			return(c);
470
		}
471
		c = next_char();
472
		if (c == char_end) {
473
			c = refill_char();
474
		}
475
		if (c == char_return) {
476
			c = read_newline();
477
		}
478
		if (c != char_newline) {
479
			/* Not an escaped newline */
480
			unread_char(c);
481
			return(char_backslash);
482
		}
483
		crt_loc.line++;
484
		crt_loc.column = 0;
485
		input_crt = input_posn;
2 7u83 486
	}
7 7u83 487
	/* NOTREACHED */
2 7u83 488
}
489
 
490
 
491
/*
492
    CHARACTER LOOK-UP TABLE
493
 
494
    This look-up table gives the various character types.  Note that the
495
    default look-up table is for ASCII, for other codesets the table
496
    needs to be rewritten.  The only really interesting points in the
497
    table itself are that newline has not been classified as a white-space
498
    and that character char_eof (-1) represents end of file.
499
*/
500
 
501
#define SPACE_M			0x01
502
#define ALPHA_M			0x02
503
#define DIGIT_M			0x04
504
#define ALNUM_M			0x08
505
#define PPDIG_M			0x10
506
#define SYMBL_M			0x20
507
#define NLINE_M			0x40
508
#define LEGAL_M			0x80
509
 
510
#define ILLEG			0x00
511
#define LEGAL			LEGAL_M
7 7u83 512
#define SPACE			(SPACE_M | LEGAL_M)
513
#define ALPHA			(ALPHA_M | ALNUM_M | PPDIG_M | LEGAL_M)
514
#define DIGIT			(DIGIT_M | ALNUM_M | PPDIG_M | LEGAL_M)
515
#define SYMBL			(SYMBL_M | LEGAL_M)
516
#define POINT			(PPDIG_M | SYMBL_M | LEGAL_M)
517
#define NLINE			(NLINE_M | LEGAL_M)
2 7u83 518
 
7 7u83 519
#define main_characters		(characters + 1)
520
#define lookup_char(C)		((int)main_characters[C])
521
#define is_white(T)		((T) & SPACE_M)
522
#define is_alpha(T)		((T) & ALPHA_M)
523
#define is_digit(T)		((T) & DIGIT_M)
524
#define is_alphanum(T)		((T) & ALNUM_M)
525
#define is_ppdigit(T)		((T) & PPDIG_M)
526
#define is_symbol(T)		((T) & SYMBL_M)
527
#define is_newline(T)		((T) & NLINE_M)
528
#define is_legal(T)		((T) & LEGAL_M)
2 7u83 529
 
7 7u83 530
static unsigned char characters[NO_CHAR + 2] = {
531
	LEGAL,			/* EOF */
532
#define CHAR_DATA(A, B, C, D)	(A),
2 7u83 533
#include "char.h"
534
#undef CHAR_DATA
7 7u83 535
	ILLEG			/* dummy */
536
};
2 7u83 537
 
7 7u83 538
static unsigned char *copy_characters = main_characters;
2 7u83 539
 
540
 
541
/*
542
    SET A CHARACTER LOOK-UP
543
 
544
    This routine sets the look-up value for character a to be equal to
545
    the underlying value for character b.  As a special case, setting
546
    the look-up for a carriage return to that for newline enables
547
    DOS-like rules on newline and end of file characters.
548
*/
549
 
7 7u83 550
void
551
set_char_lookup(int a, int b)
2 7u83 552
{
7 7u83 553
	if (a >= 0 && a < NO_CHAR && b >= 0 && b < NO_CHAR) {
554
		unsigned char t = copy_characters[b];
555
		if (a == char_return) {
556
			if (b == char_newline) {
557
				/* Set DOS-like newline rules */
558
				allow_dos_newline = 1;
559
				return;
560
			}
561
			if (b == char_return) {
562
				/* Unset DOS-like newline rules */
563
				allow_dos_newline = 0;
564
			}
565
		}
566
		main_characters[a] = t;
2 7u83 567
	}
7 7u83 568
	return;
2 7u83 569
}
570
 
571
 
572
/*
573
    SET A NUMBER OF CHARACTER LOOK-UPS
574
 
575
    This routine sets the character look-ups for all the elements of the
576
    string or character literal expression a to be equal to that for the
577
    character literal expression b.  If b is the null expression then
578
    the look-up is set to be an illegal character.
579
*/
580
 
7 7u83 581
void
582
set_character(EXP a, EXP b)
2 7u83 583
{
7 7u83 584
	int c = get_char_value(b);
585
	if (IS_exp_string_lit(a)) {
586
		STRING s = DEREF_str(exp_string_lit_str(a));
587
		unsigned long n = DEREF_ulong(str_simple_len(s));
588
		string t = DEREF_string(str_simple_text(s));
589
		unsigned kind = DEREF_unsigned(str_simple_kind(s));
590
		if (kind & STRING_MULTI) {
591
			while (n) {
592
				int ch = CHAR_SIMPLE;
593
				unsigned long d = get_multi_char(t, &ch);
594
				if (d < (unsigned long)NO_CHAR) {
595
					set_char_lookup((int)d, c);
596
				}
597
				t += MULTI_WIDTH;
598
				n--;
599
			}
600
		} else {
601
			while (n) {
602
				int d = (int)*t;
603
				set_char_lookup(d, c);
604
				t++;
605
				n--;
606
			}
2 7u83 607
		}
608
	} else {
7 7u83 609
		int d = get_char_value(a);
610
		if (d != char_illegal) {
611
			set_char_lookup(d, c);
612
		}
2 7u83 613
	}
7 7u83 614
	return;
2 7u83 615
}
616
 
617
 
618
/*
619
    CHECK FOR WHITE SPACE CHARACTERS
620
 
621
    This routine checks whether the character a represents a white space.
622
    The newline character constitutes a special case.
623
*/
624
 
7 7u83 625
int
626
is_white_char(unsigned long a)
2 7u83 627
{
7 7u83 628
	int t;
629
	if (a >= NO_CHAR) {
630
		return(0);
631
	}
632
	t = lookup_char(a);
633
	return(is_white(t) || is_newline(t));
2 7u83 634
}
635
 
636
 
637
/*
638
    CHECK FOR ALPHABETIC CHARACTERS
639
 
640
    This routine checks whether the character a represents an alphabetic
641
    character.
642
*/
643
 
7 7u83 644
int
645
is_alpha_char(unsigned long a)
2 7u83 646
{
7 7u83 647
	if (a >= NO_CHAR) {
648
		return(0);
649
	}
650
	return(is_alpha(lookup_char(a)));
2 7u83 651
}
652
 
653
 
654
/*
655
    CHECK FOR LEGAL CHARACTERS
656
 
657
    This routine checks whether the character a represents a legal character.
658
*/
659
 
7 7u83 660
int
661
is_legal_char(unsigned long a)
2 7u83 662
{
7 7u83 663
	if (a >= NO_CHAR) {
664
		return(0);
665
	}
666
	return(is_legal(lookup_char(a)));
2 7u83 667
}
668
 
669
 
670
/*
671
    PEEK AHEAD ONE CHARACTER
672
 
673
    This routine tests whether the next character is a (which will not be
674
    newline).  If so the current character is advanced one, otherwise it
675
    is left unchanged.  legal is set to false if the next character is
676
    not legal.
677
*/
678
 
7 7u83 679
int
680
peek_char(int a, int *legal)
2 7u83 681
{
7 7u83 682
	int c = read_char();
683
	ASSERT(a != char_newline);
684
	if (c == a) {
685
		return(1);
686
	}
687
	*legal = is_legal_char((unsigned long)c);
688
	unread_char(c);
689
	return(0);
2 7u83 690
}
691
 
692
 
693
/*
694
    TOKEN BUFFER
695
 
696
    This buffer is used by read_token to hold the values of identifiers,
697
    numbers and strings.
698
*/
699
 
7 7u83 700
BUFFER token_buff = NULL_buff;
2 7u83 701
 
702
 
703
/*
704
    TOKEN IDENTIFICATION MACROS
705
 
706
    These macros are used to identify the start or end of certain tokens
707
    such as comments and strings.
708
*/
709
 
7 7u83 710
#define START_COMMENT(A)	((A) == char_asterix)
711
#define END_COMMENT(A, B)	((A) == char_asterix && (B) == char_slash)
712
#define START_CPP_COMMENT(A)	((A) == char_slash && allow_cpp_comments)
713
#define END_CPP_COMMENT(A)	((A) == char_newline)
714
#define START_STRING(A)		((A) == char_quote || (A) == char_single_quote)
715
#define END_STRING(A, Q)	((A) == (Q))
2 7u83 716
 
717
 
718
/*
719
    END OF FILE FLAG
720
 
721
    Each source file should end in a newline character, which is not
722
    preceded by a backspace.  This flag is used to indicate whether the
723
    end of the present file has the correct form.
724
*/
725
 
7 7u83 726
static int good_eof = 0;
2 7u83 727
 
728
 
729
/*
730
    SKIP A STRING
731
 
732
    This routine skips a string or character literal.  It is entered after
733
    the initial quote, q, has been read.  Escape sequences are always
734
    allowed.  The routine returns lex_string_Hlit if the string terminates
735
    correctly and lex_eof otherwise.
736
*/
737
 
7 7u83 738
static int
739
skip_string(int q)
2 7u83 740
{
7 7u83 741
	int e = q;
742
	LOCATION loc;
743
	unsigned nl = 0;
744
	int escaped = 0;
745
	int have_char = 0;
746
	int allow_nl = allow_newline_strings;
747
	if (e == char_single_quote || in_preproc_dir == 1) {
748
		allow_nl = 0;
749
	}
750
	update_column();
751
	loc = crt_loc;
2 7u83 752
 
7 7u83 753
	/* Scan to end of string */
754
	for (;;) {
755
		int c = read_char();
756
		if (END_STRING(c, e) && !escaped) {
757
			if (e == char_single_quote && !have_char) {
758
				update_column();
759
				report(crt_loc, ERR_lex_ccon_empty());
760
			}
761
			if (nl) {
762
				report(loc, ERR_lex_string_nl(nl, nl));
763
			}
764
			return(lex_string_Hlit);
765
		}
766
		if (c == char_newline) {
767
			if (allow_nl) {
768
				/* Report newlines but continue */
769
				crt_loc.line++;
770
				crt_loc.column = 0;
771
				input_crt = input_posn;
772
				nl++;
773
			} else {
774
				unread_char(c);
775
				update_column();
776
				report(crt_loc, ERR_lex_string_pp_nl());
777
				break;
778
			}
779
		} else if (c == char_eof) {
780
			report(loc, ERR_lex_phases_str_eof());
781
			good_eof = 1;
782
			nl = 0;
783
			break;
784
		}
785
		if (escaped) {
786
			escaped = 0;
787
		} else {
788
			if (c == char_backslash) {
789
				escaped = 1;
790
			}
791
		}
792
		if (!escaped) {
793
			have_char = 1;
794
		}
2 7u83 795
	}
7 7u83 796
	if (nl) {
797
		/* Report newlines in string */
798
		report(loc, ERR_lex_string_nl(nl, nl));
2 7u83 799
	} else {
7 7u83 800
		/* Don't bother with error recovery */
801
		/* EMPTY */
2 7u83 802
	}
7 7u83 803
	return(lex_eof);
2 7u83 804
}
805
 
806
 
807
/*
808
    READ THE BODY OF A STRING
809
 
810
    This routine reads the body of a string or character literal or of a
811
    header name.  It is entered after the initial quote has been read.
812
    The corresponding close quote is passed in as q.  The esc argument
813
    indicates whether escape sequences are allowed (they are not in
814
    header names for example).  The string itself is built up in
815
    token_buff.  The routine returns lex_string_Hlit if the string
816
    terminates correctly and lex_eof otherwise.  It also sets
817
    token_buff.posn to point to the end of the string.
818
*/
819
 
7 7u83 820
int
821
read_string(int q, int esc)
2 7u83 822
{
7 7u83 823
	int c;
824
	int e = q;
825
	LOCATION loc;
826
	long posn = -1;
827
	int escaped = 0;
828
	unsigned nl = 0;
829
	int have_char = 0;
830
	string s = token_buff.start;
831
	string se = token_buff.end;
832
	int allow_nl = allow_newline_strings;
833
	update_column();
834
	if (e == char_single_quote) {
835
		posn = tell_buffer(crt_buff_no);
836
		allow_nl = 0;
837
	} else if (in_preproc_dir == 1) {
838
		allow_nl = 0;
839
	}
840
	loc = crt_loc;
2 7u83 841
 
7 7u83 842
	/* Scan the string */
843
	for (;;) {
844
		c = read_char();
845
		if (END_STRING(c, e) && !escaped) {
846
			if (e == char_single_quote && !have_char) {
847
				update_column();
848
				report(crt_loc, ERR_lex_ccon_empty());
849
			}
850
			if (nl) {
851
				report(loc, ERR_lex_string_nl(nl, nl));
852
			}
853
			token_buff.posn = s;
854
			*s = 0;
855
			return(lex_string_Hlit);
856
		}
857
		if (c == char_newline) {
858
			if (allow_nl) {
859
				/* Report newlines but continue */
860
				crt_loc.line++;
861
				crt_loc.column = 0;
862
				input_crt = input_posn;
863
				nl++;
864
			} else {
865
				unread_char(c);
866
				update_column();
867
				if (e == char_greater) {
868
					/* Header name */
869
					report(crt_loc,
870
					       ERR_cpp_include_incompl());
871
				} else {
872
					report(crt_loc, ERR_lex_string_pp_nl());
873
				}
874
				break;
875
			}
876
		} else if (c == char_eof) {
877
			report(loc, ERR_lex_phases_str_eof());
878
			good_eof = 1;
879
			nl = 0;
880
			break;
881
		}
882
		*s = (character)c;
883
		if (++s == se) {
884
			s = extend_buffer(&token_buff, s);
885
			se = token_buff.end;
886
		}
887
		if (escaped) {
888
			escaped = 0;
2 7u83 889
		} else {
7 7u83 890
			if (c == char_backslash) {
891
				escaped = esc;
892
			}
2 7u83 893
		}
7 7u83 894
		if (!escaped)have_char = 1;
2 7u83 895
	}
7 7u83 896
	if (nl) {
897
		/* Report newlines in string */
898
		report(loc, ERR_lex_string_nl(nl, nl));
2 7u83 899
	} else {
7 7u83 900
		/* Error recovery */
901
		if (e == char_single_quote && have_char) {
902
			seek_buffer(crt_buff_no, posn, 1);
903
			crt_loc = loc;
904
			s = token_buff.start;
905
			c = read_char();
906
			*(s++) = (character)c;
907
			if (c == char_backslash && esc) {
908
				c = read_char();
909
				*(s++) = (character)c;
910
			}
911
		}
2 7u83 912
	}
7 7u83 913
	token_buff.posn = s;
914
	*s = 0;
915
	return(lex_eof);
2 7u83 916
}
917
 
918
 
919
/*
920
    SKIP A C STYLE COMMENT
921
 
922
    This routine skips a C style comment, returning lex_ignore_token if
923
    the comment is terminated correctly and lex_eof otherwise.  It is
924
    entered after the first two characters comprising the comment start
925
    have been read.  If keep is true then the comment text is built up
926
    in token_buff, otherwise it is discarded.
927
*/
928
 
7 7u83 929
static int
930
skip_comment(int keep)
2 7u83 931
{
7 7u83 932
	int c = 0;
933
	int lastc;
934
	string s, se;
935
	LOCATION loc;
936
	update_column();
937
	loc = crt_loc;
938
	if (keep) {
939
		s = token_buff.start;
940
		se = token_buff.end;
941
	} else {
942
		s = NULL;
943
		se = NULL;
944
	}
945
	do {
946
		lastc = c;
947
read_label:
948
		/* Inlined version of read_char */
949
		c = next_char();
950
		if (c == char_end) {
951
			c = refill_char();
2 7u83 952
		}
7 7u83 953
		if (c == char_question) {
954
			c = adjust_trigraph();
2 7u83 955
		}
7 7u83 956
		if (c == char_backslash) {
957
			c = next_char();
958
			if (c == char_end) {
959
				c = refill_char();
960
			}
961
			if (c == char_return) {
962
				c = read_newline();
963
			}
964
			if (c == char_newline) {
965
				/* Allow for escaped newlines */
966
				crt_loc.line++;
967
				crt_loc.column = 0;
968
				input_crt = input_posn;
969
				goto read_label;
970
			}
971
			unread_char(c);
972
			c = char_backslash;
973
		} else if (c == char_newline) {
974
			/* New line characters */
975
			crt_loc.line++;
976
			crt_loc.column = 0;
977
			input_crt = input_posn;
978
			crt_line_changed = 1;
979
			crt_spaces = 0;
980
		} else if (c == char_eof) {
981
			/* End of file characters */
982
			report(loc, ERR_lex_phases_comm_eof());
983
			good_eof = 1;
984
			if (s) {
985
				token_buff.posn = s;
986
				*s = 0;
987
			}
988
			return(lex_eof);
989
		} else if (c == char_asterix && lastc == char_slash) {
990
			/* Nested comments */
991
			update_column();
992
			report(crt_loc, ERR_lex_comment_nest());
2 7u83 993
		}
7 7u83 994
		if (s) {
995
			*s = (character)c;
996
			if (++s == se) {
997
				s = extend_buffer(&token_buff, s);
998
				se = token_buff.end;
999
			}
1000
		}
1001
	} while (!END_COMMENT(lastc, c));
1002
	if (s) {
1003
		s -= 2;
1004
		token_buff.posn = s;
1005
		*s = 0;
2 7u83 1006
	}
7 7u83 1007
	crt_spaces++;
1008
	return(lex_ignore_token);
2 7u83 1009
}
1010
 
1011
 
1012
/*
1013
    SKIP A C++ STYLE COMMENT
1014
 
1015
    This routine skips a C++ style comment, returning lex_ignore_token
1016
    if the comment terminates correctly and lex_eof otherwise.  It is
1017
    entered after the first two characters comprising the comment start
1018
    have been read.  The next token read after the comment will be the
1019
    terminating newline.  If keep is true then the comment text is built
1020
    up in token_buff, otherwise it is discarded.
1021
*/
1022
 
7 7u83 1023
static int
1024
skip_cpp_comment(int keep)
2 7u83 1025
{
7 7u83 1026
	int c;
1027
	string s, se;
1028
	if (keep) {
1029
		s = token_buff.start;
1030
		se = token_buff.end;
1031
	} else {
1032
		s = NULL;
1033
		se = NULL;
1034
	}
1035
	do {
1036
read_label:
1037
		/* Inlined version of read_char */
1038
		c = next_char();
1039
		if (c == char_end) {
1040
			c = refill_char();
2 7u83 1041
		}
7 7u83 1042
		if (c == char_question) {
1043
			c = adjust_trigraph();
2 7u83 1044
		}
7 7u83 1045
		if (c == char_backslash) {
1046
			c = next_char();
1047
			if (c == char_end) {
1048
				c = refill_char();
1049
			}
1050
			if (c == char_return) {
1051
				c = read_newline();
1052
			}
1053
			if (c == char_newline) {
1054
				/* Allow for escaped newlines */
1055
				crt_loc.line++;
1056
				crt_loc.column = 0;
1057
				input_crt = input_posn;
1058
				goto read_label;
1059
			}
1060
			unread_char(c);
1061
			c = char_backslash;
1062
		} else if (c == char_eof) {
1063
			/* End of file characters */
1064
			update_column();
1065
			report(crt_loc, ERR_lex_phases_comm_eof());
1066
			good_eof = 1;
1067
			if (s) {
1068
				token_buff.posn = s;
1069
				*s = 0;
1070
			}
1071
			return(lex_eof);
2 7u83 1072
		}
7 7u83 1073
		if (s) {
1074
			*s = (character)c;
1075
			if (++s == se) {
1076
				s = extend_buffer(&token_buff, s);
1077
				se = token_buff.end;
1078
			}
1079
		}
1080
	} while (!END_CPP_COMMENT(c));
1081
	unread_char(c);
1082
	if (s) {
1083
		s -= 1;
1084
		token_buff.posn = s;
1085
		*s = 0;
2 7u83 1086
	}
7 7u83 1087
	crt_line_changed = 1;
1088
	crt_spaces = 0;
1089
	return(lex_ignore_token);
2 7u83 1090
}
1091
 
1092
 
1093
/*
1094
    SKIP WHITE-SPACE CHARACTERS
1095
 
1096
    This routine skips any white-space characters (including comments).
1097
    Newline characters are treated as white-space only if nl is true.
1098
    The result is a bitpattern formed from the components:
1099
 
1100
	WHITE_SPACE		for white-space characters;
1101
	WHITE_NEWLINE		for newline characters;
1102
	WHITE_ESC_NEWLINE	for escaped newlines;
1103
 
1104
    the result being reset to WHITE_NEWLINE after each newline.  Note that
1105
    trigraphs and escaped newlines are treated by hand.  The effect of this
1106
    routine is that all non-empty sequences of white-space characters other
1107
    than newlines are treated as if they were a single space (the C/C++
1108
    specification says that this is implementation-defined).
1109
*/
1110
 
7 7u83 1111
unsigned long
1112
skip_white(int nl)
2 7u83 1113
{
7 7u83 1114
	int c;
1115
	unsigned long sp = 0;
1116
	for (;;) {
1117
		c = next_char();
1118
		if (c == char_end) {
1119
			c = refill_char();
1120
		}
1121
		if (c == char_return) {
1122
			c = read_newline();
1123
		}
1124
		if (c == char_sub) {
1125
			c = read_eof();
1126
		}
1127
		if (c == char_newline) {
1128
			/* Deal with newline characters */
1129
			if (!nl) {
1130
				break;
1131
			}
1132
			sp = WHITE_NEWLINE;
1133
			crt_loc.line++;
1134
			crt_loc.column = 0;
1135
			input_crt = input_posn;
1136
			crt_line_changed = 1;
1137
			crt_spaces = 0;
1138
		} else if (c == char_space) {
1139
			/* Deal with simple spaces */
1140
			sp |= WHITE_SPACE;
1141
			crt_spaces++;
1142
		} else if (c == char_tab) {
1143
			/* Deal with tab characters */
1144
			unsigned long tab = tab_width;
1145
			sp |= WHITE_SPACE;
1146
			crt_spaces = tab *(crt_spaces / tab + 1);
1147
		} else if (c == char_eof) {
1148
			/* End of file */
1149
			if (sp == WHITE_NEWLINE) {
1150
				good_eof = 1;
1151
			}
1152
			break;
1153
		} else {
1154
			int t;
2 7u83 1155
#if FS_EXTENDED_CHAR
7 7u83 1156
			if (IS_EXTENDED(c)) {
1157
				break;
1158
			}
2 7u83 1159
#endif
7 7u83 1160
			t = lookup_char(c);
1161
			if (is_white(t)) {
1162
				/* Deal with other white space characters */
1163
				sp |= WHITE_SPACE;
1164
				crt_spaces++;
1165
			} else {
1166
				if (c == char_question)c = adjust_trigraph();
1167
				if (c == char_slash) {
1168
					/* Deal with comments */
1169
					int b = read_char();
1170
					if (START_COMMENT(b)) {
1171
						sp |= WHITE_SPACE;
1172
						b = skip_comment(0);
1173
						if (b == lex_eof)  {
1174
							return(sp);
1175
						}
1176
					} else if (START_CPP_COMMENT(b)) {
1177
						sp |= WHITE_SPACE;
1178
						b = skip_cpp_comment(0);
1179
						if (b == lex_eof) {
1180
							return(sp);
1181
						}
1182
						if (!nl) {
1183
							return(sp);
1184
						}
1185
					} else {
1186
						unread_char(b);
1187
						break;
1188
					}
1189
				} else if (c == char_backslash) {
1190
					/* Deal with escaped newlines */
1191
					int b = next_char();
1192
					if (b == char_end) {
1193
						b = refill_char();
1194
					}
1195
					if (b == char_return) {
1196
						b = read_newline();
1197
					}
1198
					if (b == char_newline) {
1199
						crt_loc.line++;
1200
						crt_loc.column = 0;
1201
						input_crt = input_posn;
1202
					} else {
1203
						unread_char(b);
1204
						break;
1205
					}
1206
					sp |= WHITE_ESC_NEWLINE;
1207
				} else {
1208
					break;
1209
				}
1210
			}
2 7u83 1211
		}
1212
	}
7 7u83 1213
	unread_char(c);
1214
	return(sp);
2 7u83 1215
}
1216
 
1217
 
1218
/*
1219
    PATCH UP WHITE-SPACE CHARACTERS
1220
 
1221
    Calling skip_white ( 1 ) can mess up the parser as regards spotting
1222
    preprocessing directives and valid end of file markers.  This routine
1223
    may be called with the return value of skip_white as an argument to
1224
    patch up the buffer in order to get the parser back into the right
1225
    state.
1226
*/
1227
 
7 7u83 1228
void
1229
patch_white(unsigned long sp)
2 7u83 1230
{
7 7u83 1231
	if (sp & WHITE_NEWLINE) {
1232
		if (sp & WHITE_SPACE) {
1233
			/* Patch in a space after a newline */
1234
			unsigned long n;
1235
			update_column();
1236
			n = crt_loc.column;
1237
			while (n) {
1238
				unread_char(char_space);
1239
				if (input_posn <= input_start) {
1240
					break;
1241
				}
1242
				n--;
1243
			}
1244
		} else if (sp & WHITE_ESC_NEWLINE) {
1245
			/* Patch in an escaped newline after a newline */
1246
			unread_char(char_backslash);
1247
			unread_char(char_newline);
1248
			crt_loc.line--;
1249
		}
1250
		/* Patch in a newline */
1251
		unread_char(char_newline);
1252
		crt_loc.line--;
1253
		crt_loc.column = 0;
1254
		crt_spaces = 0;
2 7u83 1255
	}
7 7u83 1256
	return;
2 7u83 1257
}
1258
 
1259
 
1260
/*
1261
    SKIP TO END OF LINE
1262
 
1263
    This routine skips to the end of the current line.  It returns 0 if
1264
    only white-space characters are encountered.  It uses skip_white to
1265
    jump over white-space (including comments).
1266
*/
1267
 
7 7u83 1268
int
1269
skip_to_end(void)
2 7u83 1270
{
7 7u83 1271
	int c;
1272
	int res = 0;
1273
	in_preproc_dir = 0;
1274
	for (;;) {
1275
		IGNORE skip_white(0);
1276
read_label:
1277
		/* Inlined version of read_char */
1278
		c = next_char();
1279
		if (c == char_end) {
1280
			c = refill_char();
2 7u83 1281
		}
7 7u83 1282
		if (c == char_question) {
1283
			c = adjust_trigraph();
1284
		}
1285
		if (c == char_backslash) {
1286
			c = next_char();
1287
			if (c == char_end) {
1288
				c = refill_char();
1289
			}
1290
			if (c == char_return) {
1291
				c = read_newline();
1292
			}
1293
			if (c == char_newline) {
1294
				/* Allow for escaped newlines */
1295
				crt_loc.line++;
1296
				crt_loc.column = 0;
1297
				input_crt = input_posn;
1298
				goto read_label;
1299
			}
1300
			unread_char(c);
1301
		} else if (c == char_newline) {
1302
			/* New line characters */
1303
			crt_loc.line++;
1304
			crt_loc.column = 0;
1305
			input_crt = input_posn;
1306
			crt_line_changed = 1;
1307
			crt_spaces = 0;
1308
			return(res);
1309
		} else if (START_STRING(c)) {
1310
			/* String literals */
1311
			res = 1;
1312
			c = skip_string(c);
1313
			if (c == lex_eof) {
1314
				return(res);
1315
			}
1316
		} else if (c == char_eof) {
1317
			/* End of file characters */
1318
			break;
1319
		} else {
1320
			res = 1;
1321
		}
2 7u83 1322
	}
7 7u83 1323
	update_column();
1324
	report(crt_loc, ERR_lex_phases_eof());
1325
	good_eof = 1;
1326
	return(res);
2 7u83 1327
}
1328
 
1329
 
1330
/*
1331
    READ A UNICODE CHARACTER
1332
 
1333
    This routine reads a unicode character.  It is entered after the
1334
    initial backslash and the following character, c, have been read.
1335
    It assigns the character type to pc and returns the character code.
1336
*/
1337
 
7 7u83 1338
static unsigned long
1339
read_unicode(int c, int *pc)
2 7u83 1340
{
7 7u83 1341
	unsigned i, n;
1342
	unsigned long u;
1343
	character s[10];
1344
	ERROR err = NULL_err;
1345
	string p = s;
1346
	if (c == char_u && allow_unicodes) {
1347
		/* Read '\uxxxx' */
1348
		*pc = CHAR_UNI4;
1349
		n = 4;
1350
	} else if (c == char_U && allow_unicodes) {
1351
		/* Read '\Uxxxxxxxx' */
1352
		*pc = CHAR_UNI8;
1353
		n = 8;
1354
	} else {
1355
		unread_char(c);
1356
		*pc = CHAR_NONE;
1357
		return(0);
1358
	}
1359
	for (i = 0; i < n; i++) {
1360
		int t;
1361
		int d = read_char();
1362
		if (d == char_eof) {
1363
			break;
1364
		}
2 7u83 1365
#if FS_EXTENDED_CHAR
7 7u83 1366
		if (IS_EXTENDED(d)) {
1367
			unread_char(d);
1368
			break;
1369
		}
2 7u83 1370
#endif
7 7u83 1371
		t = lookup_char(d);
1372
		if (!is_alphanum(t)) {
1373
			unread_char(d);
1374
			break;
1375
		}
1376
		s[i] = (character)d;
2 7u83 1377
	}
7 7u83 1378
	s[i] = 0;
1379
	u = eval_unicode(c, n, pc, &p, &err);
1380
	if (!IS_NULL_err(err)) {
1381
		update_column();
1382
		report(crt_loc, err);
1383
	}
1384
	return(u);
2 7u83 1385
}
1386
 
1387
 
1388
/*
1389
    READ AN EXTENDED IDENTIFIER
1390
 
1391
    This routine reads an extended identifier name (one including a unicode
1392
    character).  It is entered after reading the simple characters in the
1393
    token buffer plus the unicode character given by u and ch.
1394
*/
1395
 
7 7u83 1396
static HASHID
1397
read_extended_id(unsigned long u, int ch)
2 7u83 1398
{
7 7u83 1399
	string s;
1400
	int c, t;
1401
	HASHID nm;
1402
	unsigned long h;
1403
	BUFFER *bf = &token_buff;
1404
	do {
1405
		if (!unicode_alpha(u)) {
1406
			/* Report illegal identifiers */
1407
			update_column();
1408
			report(crt_loc, ERR_lex_name_extendid(u));
1409
		}
1410
		print_char(u, ch, 0, bf);
1411
		for (;;) {
1412
			c = read_char();
2 7u83 1413
#if FS_EXTENDED_CHAR
7 7u83 1414
			if (IS_EXTENDED(c)) {
1415
				break;
1416
			}
2 7u83 1417
#endif
7 7u83 1418
			t = lookup_char(c);
1419
			if (!is_alphanum(t)) {
1420
				break;
1421
			}
1422
			bfputc(bf, c);
1423
		}
1424
		ch = CHAR_NONE;
1425
		if (c == char_backslash) {
1426
			int nextc = read_char();
1427
			u = read_unicode(nextc, &ch);
1428
		}
1429
	} while (ch != CHAR_NONE);
1430
	unread_char(c);
1431
	bfputc(bf, 0);
1432
	s = bf->start;
1433
	h = hash(s);
1434
	nm = lookup_name(s, h, 1, lex_unknown);
1435
	return(nm);
2 7u83 1436
}
1437
 
1438
 
1439
/*
1440
    HASH VALUE FOR IDENTIFIERS
1441
 
1442
    The hash value for identifiers is built up as the identifier is read.
1443
    It is then stored in this variable.  The algorithm for calculuating
1444
    the hash value needs to be kept in step with the routine hash (it
1445
    is checked by an assertion in lookup_name, so any errors should be
1446
    caught quickly if in debug mode).
1447
*/
1448
 
7 7u83 1449
HASHID token_hashid = NULL_hashid;
2 7u83 1450
 
1451
 
1452
/*
1453
    MAIN PASS ANALYSER
1454
 
1455
    This routine reads the next preprocessing token from the input file.
1456
    It is designed for speed rather than elegance, hence the rather
1457
    indiscriminate use of labels.  Trigraphs and escaped newlines
1458
    involving the first character are processed by hand.  This routine
1459
    corresponds to phase 3 of the phases of translation.  The position
1460
    within the line is tracked by column - this is zero at the start of
1461
    a line, positive if only white space has been read and negative
1462
    otherwise.  preproc keeps track of the last preprocessing directive.
1463
*/
1464
 
7 7u83 1465
int
1466
read_token(void)
2 7u83 1467
{
7 7u83 1468
	int c, t;
1469
	int column = -1;
1470
	int preproc = lex_ignore_token;
2 7u83 1471
 
7 7u83 1472
	/* Read the next character */
1473
start_label:
1474
	c = next_char();
1475
	if (c == char_end)c = refill_char();
1476
restart_label:
2 7u83 1477
#if FS_EXTENDED_CHAR
7 7u83 1478
	if (IS_EXTENDED(c)) {
1479
		goto unknown_label;
1480
	}
2 7u83 1481
#endif
7 7u83 1482
	t = lookup_char(c);
1483
	if (is_white(t)) {
1484
		crt_spaces++;
1485
		goto start_label;
2 7u83 1486
	}
7 7u83 1487
process_label:
1488
	/* Process the next character */
2 7u83 1489
 
7 7u83 1490
	/* Check symbols and punctuation */
1491
	if (is_symbol(t)) {
1492
		switch (c) {
2 7u83 1493
 
7 7u83 1494
		case char_question: {
1495
			/* Deal with '?' and trigraphs */
1496
			c = adjust_trigraph();
1497
			if (c == char_question) return(lex_question);
1498
			goto restart_label;
2 7u83 1499
		}
1500
 
7 7u83 1501
		case char_backslash: {
1502
			/* Deal with escaped newlines */
1503
			unsigned long u;
1504
			int ch = CHAR_NONE;
1505
			int nextc = next_char();
1506
			if (nextc == char_end)nextc = refill_char();
1507
			if (nextc == char_return)nextc = read_newline();
1508
			if (nextc == char_newline) {
1509
				crt_loc.line++;
1510
				crt_loc.column = 0;
1511
				input_crt = input_posn;
1512
				if (column == 0)column = 1;
1513
				goto start_label;
1514
			}
1515
 
1516
			/* Check for unicode characters */
1517
			u = read_unicode(nextc, &ch);
1518
			if (ch != CHAR_NONE) {
1519
				token_buff.posn = token_buff.start;
1520
				token_hashid = read_extended_id(u, ch);
1521
				return(lex_identifier);
1522
			}
1523
			return(lex_backslash);
2 7u83 1524
		}
1525
 
7 7u83 1526
		case char_hash:
1527
			/* Deal with '#' and '##' */
1528
			c = read_char();
1529
			if (c == char_hash) {
1530
				return(lex_hash_Hhash_H1);
1531
			}
1532
			unread_char(c);
2 7u83 1533
 
7 7u83 1534
			/* Return with '#' if not at start of line */
1535
			if (column < 0 || no_preproc_dir) {
1536
				return(lex_hash_H1);
1537
			}
2 7u83 1538
 
7 7u83 1539
			/* Deal with preprocessing directives */
1540
preproc_label:	{
1541
			unsigned long sp = skip_white(0);
1542
			update_column();
1543
			if (column) {
1544
				report(crt_loc, ERR_cpp_indent());
1545
			}
1546
			if (sp & (WHITE_SPACE | WHITE_ESC_NEWLINE)) {
1547
				report(preproc_loc, ERR_cpp_indent_dir());
1548
			}
1549
			preproc = read_preproc_dir(1, preproc);
1550
			if (preproc < 0) {
1551
				goto start_line_label;
1552
			}
1553
			unread_char(char_newline);
1554
			crt_loc.line--;
1555
			crt_loc.column = 0;
1556
			return(preproc);
2 7u83 1557
		}
1558
 
7 7u83 1559
		case char_percent:
1560
			/* Deal with '%', '%=', '%>', '%:' and '%:%:' */
1561
			c = read_char();
1562
			if (c == char_equal) {
1563
				return(lex_rem_Heq);
2 7u83 1564
			}
7 7u83 1565
			if (c == char_greater && allow_digraphs) {
1566
				return(lex_close_Hbrace_H2);
1567
			}
1568
			if (c == char_colon && allow_digraphs) {
1569
				/* Check for '%:' and '%:%:' */
1570
				c = read_char();
1571
				if (c == char_percent) {
1572
					int nextc = read_char();
1573
					if (nextc == char_colon) {
1574
						return(lex_hash_Hhash_H2);
1575
					}
1576
					unread_char(nextc);
1577
				}
1578
				unread_char(c);
2 7u83 1579
 
7 7u83 1580
				/* Return with '%:' if not at start of line */
1581
				if (column < 0 || no_preproc_dir) {
1582
					return(lex_hash_H2);
1583
				}
2 7u83 1584
 
7 7u83 1585
				/* Otherwise this is a preprocessing
1586
				 * directive */
1587
				IGNORE get_digraph(lex_hash_H2);
1588
				goto preproc_label;
1589
			}
1590
			unread_char(c);
1591
			return(lex_rem);
2 7u83 1592
 
7 7u83 1593
		case char_quote:
1594
			/* Deal with string literals */
1595
			IGNORE read_string(c, 1);
1596
			return(lex_string_Hlit);
2 7u83 1597
 
7 7u83 1598
		case char_single_quote:
1599
			/* Deal with character literals */
1600
			IGNORE read_string(c, 1);
1601
			return(lex_char_Hlit);
2 7u83 1602
 
7 7u83 1603
		case char_exclaim:
1604
			/* Deal with '!' and '!=' */
1605
			c = read_char();
1606
			if (c == char_equal) {
1607
				return(lex_not_Heq_H1);
1608
			}
1609
			unread_char(c);
1610
			return(lex_not_H1);
2 7u83 1611
 
7 7u83 1612
		case char_ampersand:
1613
			/* Deal with '&', '&&' and '&=' */
1614
			c = read_char();
1615
			if (c == char_ampersand) {
1616
				return(lex_logical_Hand_H1);
1617
			}
1618
			if (c == char_equal) {
1619
				return(lex_and_Heq_H1);
1620
			}
1621
			unread_char(c);
1622
			return(lex_and_H1);
2 7u83 1623
 
7 7u83 1624
		case char_asterix:
1625
			/* Deal with '*' and '*=' */
1626
			c = read_char();
1627
			if (c == char_equal) {
1628
				return(lex_star_Heq);
1629
			}
1630
			unread_char(c);
1631
			return(lex_star);
2 7u83 1632
 
7 7u83 1633
		case char_plus:
1634
			/* Deal with '+', '++' and '+=' */
1635
			c = read_char();
1636
			if (c == char_plus) {
1637
				return(lex_plus_Hplus);
1638
			}
1639
			if (c == char_equal) {
1640
				return(lex_plus_Heq);
1641
			}
1642
			if (c == char_question && allow_extra_symbols) {
1643
				return(lex_abs);
1644
			}
1645
			unread_char(c);
1646
			return(lex_plus);
2 7u83 1647
 
7 7u83 1648
		case char_minus:
1649
			/* Deal with '-', '--', '-=', '->' and '->*' */
1650
			c = read_char();
1651
			if (c == char_minus) {
1652
				return(lex_minus_Hminus);
1653
			}
1654
			if (c == char_equal) {
1655
				return(lex_minus_Heq);
1656
			}
1657
			if (c == char_greater) {
2 7u83 1658
#if LANGUAGE_CPP
7 7u83 1659
				/* '->*' is only allowed in C++ */
1660
				c = read_char();
1661
				if (c == char_asterix) {
1662
					return(lex_arrow_Hstar);
1663
				}
1664
				unread_char(c);
2 7u83 1665
#endif
7 7u83 1666
				return(lex_arrow);
1667
			}
1668
			unread_char(c);
1669
			return(lex_minus);
2 7u83 1670
 
7 7u83 1671
		case char_dot:
1672
			/* Deal with '.', '...', '.*' and numbers */
1673
			c = read_char();
1674
			if (c == char_dot) {
1675
				c = read_char();
1676
				if (c == char_dot) {
1677
					return(lex_ellipsis);
1678
				}
1679
				unread_char(c);
1680
				unread_char(char_dot);
1681
				return(lex_dot);
1682
			}
2 7u83 1683
#if LANGUAGE_CPP
7 7u83 1684
			/* '.*' is only allowed in C++ */
1685
			if (c == char_asterix) {
1686
				return(lex_dot_Hstar);
1687
			}
2 7u83 1688
#endif
1689
#if FS_EXTENDED_CHAR
7 7u83 1690
			if (IS_EXTENDED(c)) {
1691
				unread_char(c);
1692
				return(lex_dot);
1693
			}
2 7u83 1694
#endif
7 7u83 1695
			t = lookup_char(c);
1696
			if (is_digit(t)) {
1697
				/* Indicate a number with first digit '.' */
1698
				t = POINT;
1699
				goto number_label;
1700
			}
1701
			unread_char(c);
1702
			return(lex_dot);
2 7u83 1703
 
7 7u83 1704
		case char_slash:
1705
			/* Deal with '/', '/=' and comments */
1706
			c = read_char();
1707
			if (START_COMMENT(c)) {
1708
				int a = analyse_comments;
1709
				c = skip_comment(a);
1710
				if (c == lex_eof) {
1711
					goto eof_label;
1712
				}
1713
				if (a) {
1714
					c = lint_comment();
1715
					if (c >= 0) return(c);
1716
				}
1717
				if (column == 0) {
1718
					column = 1;
1719
				}
1720
				goto start_label;
1721
			}
1722
			if (START_CPP_COMMENT(c)) {
1723
				int a = analyse_comments;
1724
				c = skip_cpp_comment(a);
1725
				if (c == lex_eof) {
1726
					goto eof_label;
1727
				}
1728
				if (a) {
1729
					c = lint_comment();
1730
					if (c >= 0) {
1731
						return(c);
1732
					}
1733
				}
1734
				IGNORE read_char();
1735
				goto newline_label;
1736
			}
1737
			if (c == char_equal) {
1738
				return(lex_div_Heq);
1739
			}
1740
			unread_char(c);
1741
			return(lex_div);
2 7u83 1742
 
7 7u83 1743
		case char_colon:
1744
			/* Deal with ':', '::' and ':>' */
1745
			c = read_char();
2 7u83 1746
#if LANGUAGE_CPP
7 7u83 1747
			/* '::' is only allowed in C++ */
1748
			if (c == char_colon) {
1749
				return(lex_colon_Hcolon);
1750
			}
2 7u83 1751
#endif
7 7u83 1752
			if (c == char_greater && allow_digraphs) {
1753
				return(lex_close_Hsquare_H2);
1754
			}
1755
			unread_char(c);
1756
			return(lex_colon);
2 7u83 1757
 
7 7u83 1758
		case char_less:
1759
			/* Deal with '<', '<=', '<<', '<<=', '<%', '<:' */
1760
			c = read_char();
1761
			if (c == char_equal) {
1762
				return(lex_less_Heq);
1763
			}
1764
			if (c == char_less) {
1765
				c = read_char();
1766
				if (c == char_equal) {
1767
					return(lex_lshift_Heq);
1768
				}
1769
				unread_char(c);
1770
				return(lex_lshift);
1771
			}
1772
			if (c == char_percent && allow_digraphs) {
1773
				return(lex_open_Hbrace_H2);
1774
			}
1775
			if (c == char_colon && allow_digraphs) {
1776
				return(lex_open_Hsquare_H2);
1777
			}
1778
			if (c == char_question && allow_extra_symbols) {
1779
				return(lex_min);
1780
			}
1781
			unread_char(c);
1782
			return(lex_less);
2 7u83 1783
 
7 7u83 1784
		case char_equal:
1785
			/* Deal with '=' and '==' */
1786
			c = read_char();
1787
			switch (c) {
1788
			case char_equal:
1789
				return(lex_eq);
1790
			case char_ampersand:
1791
			case char_asterix:
1792
			case char_minus:
1793
			case char_plus:
1794
				update_column();
1795
				report(crt_loc, ERR_lex_op_old_assign(c, c));
1796
				break;
1797
			}
1798
			unread_char(c);
1799
			return(lex_assign);
2 7u83 1800
 
7 7u83 1801
		case char_greater:
1802
			/* Deal with '>', '>=', '>>' and '>>=' */
1803
			c = read_char();
1804
			if (c == char_equal) {
1805
				return(lex_greater_Heq);
1806
			}
1807
			if (c == char_greater) {
1808
				c = read_char();
1809
				if (c == char_equal) {
1810
					return(lex_rshift_Heq);
1811
				}
1812
				unread_char(c);
1813
				return(lex_rshift);
1814
			}
1815
			if (c == char_question && allow_extra_symbols) {
1816
				return(lex_max);
1817
			}
1818
			unread_char(c);
1819
			return(lex_greater);
2 7u83 1820
 
7 7u83 1821
		case char_circum:
1822
			/* Deal with '^' and '^=' */
1823
			c = read_char();
1824
			if (c == char_equal) {
1825
				return(lex_xor_Heq_H1);
1826
			}
1827
			unread_char(c);
1828
			return(lex_xor_H1);
2 7u83 1829
 
7 7u83 1830
		case char_bar:
1831
			/* Deal with '|', '||' and '|=' */
1832
			c = read_char();
1833
			if (c == char_bar) {
1834
				return(lex_logical_Hor_H1);
1835
			}
1836
			if (c == char_equal) {
1837
				return(lex_or_Heq_H1);
1838
			}
1839
			unread_char(c);
1840
			return(lex_or_H1);
2 7u83 1841
 
7 7u83 1842
		case char_open_round:
1843
			/* Deal with '(' */
1844
			return(lex_open_Hround);
2 7u83 1845
 
7 7u83 1846
		case char_close_round:
1847
			/* Deal with ')' */
1848
			return(lex_close_Hround);
2 7u83 1849
 
7 7u83 1850
		case char_comma:
1851
			/* Deal with ',' */
1852
			return(lex_comma);
2 7u83 1853
 
7 7u83 1854
		case char_semicolon:
1855
			/* Deal with ';' */
1856
			return(lex_semicolon);
2 7u83 1857
 
7 7u83 1858
		case char_open_square:
1859
			/* Deal with '[' */
1860
			return(lex_open_Hsquare_H1);
2 7u83 1861
 
7 7u83 1862
		case char_close_square:
1863
			/* Deal with ']' */
1864
			return(lex_close_Hsquare_H1);
2 7u83 1865
 
7 7u83 1866
		case char_open_brace:
1867
			/* Deal with '{' */
1868
			return(lex_open_Hbrace_H1);
2 7u83 1869
 
7 7u83 1870
		case char_close_brace:
1871
			/* Deal with '}' */
1872
			return(lex_close_Hbrace_H1);
2 7u83 1873
 
7 7u83 1874
		case char_tilde:
1875
			/* Deal with '~' */
1876
			return(lex_compl_H1);
2 7u83 1877
 
7 7u83 1878
		default:
1879
			/* Anything else is an unknown character */
1880
			goto unknown_label;
1881
		}
2 7u83 1882
	}
1883
 
7 7u83 1884
	/* Read an identifier (calculating hash value on fly) */
1885
	if (is_alpha(t)) {
1886
		HASHID nm;
1887
		LOCATION loc;
1888
		BUFFER *bf = &token_buff;
1889
		string s = bf->start;
1890
		string se = bf->end;
1891
		unsigned long h = (unsigned long)c;
1892
		*(s++) = (character)c;
2 7u83 1893
 
7 7u83 1894
		/* Get the second character */
1895
		update_column();
1896
		loc = crt_loc;
1897
		c = read_char();
2 7u83 1898
#if FS_EXTENDED_CHAR
7 7u83 1899
		t = (IS_EXTENDED(c)? ILLEG : lookup_char(c));
2 7u83 1900
#else
7 7u83 1901
		t = lookup_char(c);
2 7u83 1902
#endif
7 7u83 1903
		if (is_alphanum(t)) {
1904
			/* Scan the third and subsequent characters */
1905
			do {
1906
				h = HASH_POWER * h + (unsigned long)c;
1907
				*s = (character)c;
1908
				if (++s == se) {
1909
					s = extend_buffer(bf, s);
1910
					se = bf->end;
1911
				}
1912
				c = read_char();
2 7u83 1913
#if FS_EXTENDED_CHAR
7 7u83 1914
				if (IS_EXTENDED(c)) {
1915
					break;
1916
				}
2 7u83 1917
#endif
7 7u83 1918
				t = lookup_char(c);
1919
			} while (is_alphanum(t));
1920
		} else {
1921
			/* Allow for wide strings and characters */
1922
			if (h == char_L && is_symbol(t)) {
1923
				if (c == char_quote) {
1924
					IGNORE read_string(c, 1);
1925
					return(lex_wstring_Hlit);
1926
				}
1927
				if (c == char_single_quote) {
1928
					IGNORE read_string(c, 1);
1929
					return(lex_wchar_Hlit);
1930
				}
1931
			}
1932
			/* Identifier of length one */
2 7u83 1933
		}
7 7u83 1934
		if (c == char_backslash) {
1935
			/* Allow for extended identifiers */
1936
			int ch = CHAR_NONE;
1937
			int nextc = read_char();
1938
			unsigned long u = read_unicode(nextc, &ch);
1939
			if (ch != CHAR_NONE) {
1940
				bf->posn = s;
1941
				nm = read_extended_id(u, ch);
1942
				goto identifier_label;
1943
			}
2 7u83 1944
		}
7 7u83 1945
		unread_char(c);
1946
		se = s;
1947
		*se = 0;
2 7u83 1948
 
7 7u83 1949
		/* Look up the symbol in the hash table */
1950
		h %= HASH_SIZE;
1951
		s = bf->start;
1952
		nm = lookup_name(s, h, 0, lex_unknown);
1953
identifier_label:
1954
		{
1955
			IDENTIFIER id = DEREF_id(hashid_id(nm));
1956
			while (!IS_id_dummy(id)) {
1957
				/* Scan to last hidden value */
1958
				id = DEREF_id(id_alias(id));
1959
			}
1960
			COPY_loc(id_loc(id), loc);
1961
		}
1962
		token_hashid = nm;
1963
		return(lex_identifier);
2 7u83 1964
	}
1965
 
7 7u83 1966
	/* Read the first token in a line */
1967
	if (c == char_return) {
1968
		c = read_newline();
2 7u83 1969
	}
7 7u83 1970
	if (c == char_newline) {
1971
newline_label:
1972
		/* Re-entry point after C++ style comments */
1973
		crt_loc.line++;
1974
		crt_loc.column = 0;
1975
		input_crt = input_posn;
1976
		crt_line_changed = 1;
1977
		crt_spaces = 0;
1978
		if (in_preproc_dir == 1) {
1979
			in_preproc_dir = 0;
1980
			return(lex_newline);
1981
		}
1982
start_line_label:
1983
		/* Re-entry point after preprocessing directives */
1984
		column = 0;
1985
		for (;;) {
1986
			/* Step over any obvious spaces */
1987
			c = next_char();
1988
			if (c == char_end) {
1989
				c = refill_char();
1990
			}
1991
			if (c == char_return) {
1992
				c = read_newline();
1993
			}
1994
			if (c == char_sub) {
1995
				c = read_eof();
1996
			}
1997
			if (c == char_newline) {
1998
				crt_loc.line++;
1999
				crt_loc.column = 0;
2000
				input_crt = input_posn;
2001
				crt_line_changed = 1;
2002
				crt_spaces = 0;
2003
				column = 0;
2004
			} else if (c == char_eof) {
2005
				/* Check for end of file (should start line) */
2006
				if (column == 0) {
2007
					good_eof = 1;
2008
				}
2009
				goto eof_label;
2010
			} else if (c == char_space) {
2011
				crt_spaces++;
2012
				column = 1;
2013
			} else if (c == char_tab) {
2014
				unsigned long tab = tab_width;
2015
				crt_spaces = tab *(crt_spaces / tab + 1);
2016
				column = 1;
2017
			} else {
2 7u83 2018
#if FS_EXTENDED_CHAR
7 7u83 2019
				if (IS_EXTENDED(c)) {
2020
					t = ILLEG;
2021
					break;
2022
				}
2 7u83 2023
#endif
7 7u83 2024
				t = lookup_char(c);
2025
				if (is_white(t)) {
2026
					if (!is_newline(t)) {
2027
						crt_spaces++;
2028
						column = 1;
2029
					}
2030
				} else {
2031
					break;
2032
				}
2 7u83 2033
			}
2034
		}
7 7u83 2035
		/* c and t now hold the next character */
2036
		goto process_label;
2 7u83 2037
	}
2038
 
7 7u83 2039
	/* Read a pp-number */
2040
	if (is_digit(t)) {
2041
number_label:	{
2042
			int lastc;
2043
			BUFFER *bf = &token_buff;
2044
			string s = bf->start;
2045
			string se = bf->end;
2046
			if (t == POINT) {
2047
				/* t is set to POINT to indicate an initial
2048
				 * '.' */
2049
				*(s++) = char_dot;
2050
			}
2051
digit_label:
2052
			/* Step over alphanumeric characters and '.' */
2053
			do {
2054
				*s = (character)c;
2055
				if (++s == se) {
2056
					s = extend_buffer(bf, s);
2057
					se = bf->end;
2058
				}
2059
next_digit_label:
2060
				lastc = c;
2061
				c = read_char();
2 7u83 2062
#if FS_EXTENDED_CHAR
7 7u83 2063
				if (IS_EXTENDED(c)) {
2064
					break;
2065
				}
2 7u83 2066
#endif
7 7u83 2067
				t = lookup_char(c);
2068
			} while (is_ppdigit(t));
2069
			if (c == char_plus || c == char_minus) {
2070
				/* Allow for [Ee][+-] */
2071
				if (lastc == char_e || lastc == char_E) {
2072
					goto digit_label;
2073
				}
2074
			}
2075
			if (c == char_backslash) {
2076
				/* Allow for unicode characters */
2077
				int ch = CHAR_NONE;
2078
				int nextc = read_char();
2079
				unsigned long u = read_unicode(nextc, &ch);
2080
				if (ch != CHAR_NONE) {
2081
					bf->posn = s;
2082
					print_char(u, ch, 0, bf);
2083
					s = bf->posn;
2084
					se = bf->end;
2085
					goto next_digit_label;
2086
				}
2087
			}
2088
			*s = 0;
2089
			unread_char(c);
2 7u83 2090
		}
7 7u83 2091
		return(lex_integer_Hlit);
2092
	}
2093
 
2094
	/* End of file marker */
2095
	if (c == char_sub) {
2096
		c = read_eof();
2097
	}
2098
	if (c == char_eof) {
2099
eof_label:
2100
		if (in_preproc_dir != 0) {
2101
			return(lex_eof);
2 7u83 2102
		}
7 7u83 2103
		if (!good_eof) {
2104
			update_column();
2105
			report(crt_loc, ERR_lex_phases_eof());
2106
			good_eof = 1;
2107
		}
2108
		if (end_include(preproc)) {
2109
			/* Revert to previous file */
2110
			good_eof = 0;
2111
			preproc = lex_ignore_token;
2112
			goto start_line_label;
2113
		}
2114
		/* End of main file */
2115
		return(lex_eof);
2 7u83 2116
	}
2117
 
7 7u83 2118
	/* Unknown characters */
2119
unknown_label:
2120
	{
2121
		string s = token_buff.start;
2122
		add_multi_char(s, (unsigned long)c, CHAR_SIMPLE);
2123
		return(lex_unknown);
2 7u83 2124
	}
2125
}
2126
 
2127
 
2128
/*
2129
    INITIALISE INPUT VARIABLES
2130
 
2131
    This routine initialises the tables of character look-ups and the token
2132
    buffer.
2133
*/
2134
 
7 7u83 2135
void
2136
init_char(void)
2 7u83 2137
{
7 7u83 2138
	int i;
2139
	unsigned char *p, *q;
2 7u83 2140
 
7 7u83 2141
	/* Set native locale for multibyte characters */
2 7u83 2142
#if FS_MULTIBYTE
7 7u83 2143
	if (allow_multibyte) {
2144
		IGNORE setlocale(LC_CTYPE, "");
2145
	}
2 7u83 2146
#endif
2147
 
7 7u83 2148
	/* Allow for non-ASCII codesets */
2149
	map_ascii(main_characters);
2150
	map_ascii(digit_values);
2151
	map_ascii(escape_sequences);
2 7u83 2152
 
7 7u83 2153
	/* Set up extra characters */
2154
	p = xmalloc_nof(unsigned char, NO_CHAR);
2155
	q = main_characters;
2156
	copy_characters = p;
2157
	for (i = 0; i < NO_CHAR; i++) {
2158
		*(p++) = *(q++);
2159
	}
2 7u83 2160
 
7 7u83 2161
	/* Initialise token buffer */
2162
	token_buff.posn = extend_buffer(&token_buff, token_buff.posn);
2163
	return;
2 7u83 2164
}
2165
 
2166
 
2167
/*
2168
    INITIALISE INPUT FILE READING
2169
 
2170
    This routine initialises the lexical analysis routines in preparation
2171
    for parsing or preprocessing the current input file.
2172
*/
2173
 
7 7u83 2174
void
2175
init_lex(void)
2 7u83 2176
{
7 7u83 2177
	/* Initialise file variables */
2178
	crt_buff_no = 0;
2179
	IGNORE init_buffer(crt_buff_no);
2180
	start_preproc_if ();
2181
	preproc_loc = crt_loc;
2182
	have_syntax_error = 0;
2183
	if (do_header) {
2184
		dump_start(&crt_loc, NIL(INCL_DIR));
2185
	}
2 7u83 2186
 
7 7u83 2187
	/* Deal with first start-up file */
2188
	open_startup();
2 7u83 2189
 
7 7u83 2190
	/* Force processing to start at the beginning of a line */
2191
	unread_char(char_newline);
2192
	crt_loc.line--;
2 7u83 2193
 
7 7u83 2194
	/* Initialise the parser */
2195
	init_parser(NIL(PPTOKEN));
2196
	return;
2 7u83 2197
}
2198
 
2199
 
2200
/*
2201
    PARSE INPUT FILE
2202
 
2203
    This routine is the main entry point for the parsing of the current
2204
    input file.
2205
*/
2206
 
7 7u83 2207
void
2208
process_file(void)
2 7u83 2209
{
7 7u83 2210
	init_lex();
2211
	ADVANCE_LEXER;
2212
	parse_file(NULL_type, dspec_none);
2213
	return;
2 7u83 2214
}