Warning: Undefined variable $n in /usr/local/www/websvn.planix.org/include/diff_util.php on line 243

Warning: Undefined variable $n in /usr/local/www/websvn.planix.org/include/diff_util.php on line 247

Warning: Undefined variable $m in /usr/local/www/websvn.planix.org/include/diff_util.php on line 251
WebSVN – tendra.SVN – Diff – /trunk/src/producers/common/parse/lex.c – Rev 2 and 7

Subversion Repositories tendra.SVN

Rev

Rev 2 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2 Rev 7
Line -... Line 1...
-
 
1
/*
-
 
2
 * Copyright (c) 2002-2005 The TenDRA Project <http://www.tendra.org/>.
-
 
3
 * All rights reserved.
-
 
4
 *
-
 
5
 * Redistribution and use in source and binary forms, with or without
-
 
6
 * modification, are permitted provided that the following conditions are met:
-
 
7
 *
-
 
8
 * 1. Redistributions of source code must retain the above copyright notice,
-
 
9
 *    this list of conditions and the following disclaimer.
-
 
10
 * 2. Redistributions in binary form must reproduce the above copyright notice,
-
 
11
 *    this list of conditions and the following disclaimer in the documentation
-
 
12
 *    and/or other materials provided with the distribution.
-
 
13
 * 3. Neither the name of The TenDRA Project nor the names of its contributors
-
 
14
 *    may be used to endorse or promote products derived from this software
-
 
15
 *    without specific, prior written permission.
-
 
16
 *
-
 
17
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
-
 
18
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-
 
19
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-
 
20
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
-
 
21
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-
 
22
 * EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-
 
23
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-
 
24
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-
 
25
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-
 
26
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-
 
27
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
 
28
 *
-
 
29
 * $Id$
-
 
30
 */
1
/*
31
/*
2
    		 Crown Copyright (c) 1997
32
    		 Crown Copyright (c) 1997
3
    
33
 
4
    This TenDRA(r) Computer Program is subject to Copyright
34
    This TenDRA(r) Computer Program is subject to Copyright
5
    owned by the United Kingdom Secretary of State for Defence
35
    owned by the United Kingdom Secretary of State for Defence
6
    acting through the Defence Evaluation and Research Agency
36
    acting through the Defence Evaluation and Research Agency
7
    (DERA).  It is made available to Recipients with a
37
    (DERA).  It is made available to Recipients with a
8
    royalty-free licence for its use, reproduction, transfer
38
    royalty-free licence for its use, reproduction, transfer
9
    to other parties and amendment for any purpose not excluding
39
    to other parties and amendment for any purpose not excluding
10
    product development provided that any such use et cetera
40
    product development provided that any such use et cetera
11
    shall be deemed to be acceptance of the following conditions:-
41
    shall be deemed to be acceptance of the following conditions:-
12
    
42
 
13
        (1) Its Recipients shall ensure that this Notice is
43
        (1) Its Recipients shall ensure that this Notice is
14
        reproduced upon any copies or amended versions of it;
44
        reproduced upon any copies or amended versions of it;
15
    
45
 
16
        (2) Any amended version of it shall be clearly marked to
46
        (2) Any amended version of it shall be clearly marked to
17
        show both the nature of and the organisation responsible
47
        show both the nature of and the organisation responsible
18
        for the relevant amendment or amendments;
48
        for the relevant amendment or amendments;
19
    
49
 
20
        (3) Its onward transfer from a recipient to another
50
        (3) Its onward transfer from a recipient to another
21
        party shall be deemed to be that party's acceptance of
51
        party shall be deemed to be that party's acceptance of
22
        these conditions;
52
        these conditions;
23
    
53
 
24
        (4) DERA gives no warranty or assurance as to its
54
        (4) DERA gives no warranty or assurance as to its
25
        quality or suitability for any purpose and DERA accepts
55
        quality or suitability for any purpose and DERA accepts
26
        no liability whatsoever in relation to any use to which
56
        no liability whatsoever in relation to any use to which
27
        it may be put.
57
        it may be put.
28
*/
58
*/
Line 65... Line 95...
65
 
95
 
66
    These flags control the behaviour of the parser and determine whether
96
    These flags control the behaviour of the parser and determine whether
67
    such features as trigraphs and digraphs are allowed.
97
    such features as trigraphs and digraphs are allowed.
68
*/
98
*/
69
 
99
 
70
int allow_trigraphs = 1 ;
100
int allow_trigraphs = 1;
71
int allow_digraphs = 1 ;
101
int allow_digraphs = 1;
72
int allow_unicodes = LANGUAGE_CPP ;
102
int allow_unicodes = LANGUAGE_CPP;
73
int allow_multibyte = 1 ;
103
int allow_multibyte = 1;
74
int allow_cpp_comments = LANGUAGE_CPP ;
104
int allow_cpp_comments = LANGUAGE_CPP;
75
int allow_dos_newline = 0 ;
105
int allow_dos_newline = 0;
76
int allow_extra_symbols = 0 ;
106
int allow_extra_symbols = 0;
77
int allow_iso_keywords = LANGUAGE_CPP ;
107
int allow_iso_keywords = LANGUAGE_CPP;
78
int allow_newline_strings = 0 ;
108
int allow_newline_strings = 0;
79
int analyse_comments = 1 ;
109
int analyse_comments = 1;
80
unsigned long max_id_length = 1024 ;
110
unsigned long max_id_length = 1024;
81
 
111
 
82
 
112
 
83
/*
113
/*
84
    TABLE OF SYMBOLS AND KEYWORDS
114
    TABLE OF SYMBOLS AND KEYWORDS
85
 
115
 
86
    This table gives the mapping between lexical token numbers and the
116
    This table gives the mapping between lexical token numbers and the
87
    corresponding symbols and keywords.  It is derived from the list of
117
    corresponding symbols and keywords.  It is derived from the list of
88
    tokens in symbols.h.
118
    tokens in symbols.h.
89
*/
119
*/
90
 
120
 
91
CONST char *token_names [] = {
121
CONST char *token_names[] = {
92
#define LEX_TOKEN( A, B, C )		( B ),
122
#define LEX_TOKEN(A, B, C)		(B),
93
#include "symbols.h"
123
#include "symbols.h"
94
#undef LEX_TOKEN
124
#undef LEX_TOKEN
95
    NULL
125
	NULL
96
} ;
126
};
97
 
127
 
98
 
128
 
99
/*
129
/*
100
    TRANSLATION A LEXICAL TOKEN TO ITS PRIMARY FORM
130
    TRANSLATION A LEXICAL TOKEN TO ITS PRIMARY FORM
101
 
131
 
102
    This routine translates the alternative ISO keywords and digraphs
132
    This routine translates the alternative ISO keywords and digraphs
103
    into their primary form.
133
    into their primary form.
104
*/
134
*/
105
 
135
 
106
int primary_form
136
int
107
    PROTO_N ( ( t ) )
-
 
108
    PROTO_T ( int t )
137
primary_form(int t)
109
{
138
{
110
    int u = t ;
139
	int u = t;
111
    switch ( u ) {
140
	switch (u) {
-
 
141
	case lex_and_H2:
-
 
142
		u = lex_and_H1;
-
 
143
		break;
112
	case lex_and_H2 : u = lex_and_H1 ; break ;
144
	case lex_and_Heq_H2:
113
	case lex_and_Heq_H2 : u = lex_and_Heq_H1 ; break ;
145
		u = lex_and_Heq_H1;
-
 
146
		break;
114
	case lex_close_Hbrace_H2 : u = lex_close_Hbrace_H1 ; break ;
147
	case lex_close_Hbrace_H2:
-
 
148
		u = lex_close_Hbrace_H1;
-
 
149
		break;
115
	case lex_close_Hsquare_H2 : u = lex_close_Hsquare_H1 ; break ;
150
	case lex_close_Hsquare_H2:
-
 
151
		u = lex_close_Hsquare_H1;
-
 
152
		break;
116
	case lex_compl_H2 : u = lex_compl_H1 ; break ;
153
	case lex_compl_H2:
-
 
154
		u = lex_compl_H1;
-
 
155
		break;
-
 
156
	case lex_hash_H2:
-
 
157
		u = lex_hash_H1;
-
 
158
		break;
117
	case lex_hash_H2 : u = lex_hash_H1 ; break ;
159
	case lex_hash_Hhash_H2:
118
	case lex_hash_Hhash_H2 : u = lex_hash_Hhash_H1 ; break ;
160
		u = lex_hash_Hhash_H1;
-
 
161
		break;
119
	case lex_logical_Hand_H2 : u = lex_logical_Hand_H1 ; break ;
162
	case lex_logical_Hand_H2:
-
 
163
		u = lex_logical_Hand_H1;
-
 
164
		break;
120
	case lex_logical_Hor_H2 : u = lex_logical_Hor_H1 ; break ;
165
	case lex_logical_Hor_H2:
-
 
166
		u = lex_logical_Hor_H1;
-
 
167
		break;
-
 
168
	case lex_not_H2:
-
 
169
		u = lex_not_H1;
-
 
170
		break;
121
	case lex_not_H2 : u = lex_not_H1 ; break ;
171
	case lex_not_Heq_H2:
122
	case lex_not_Heq_H2 : u = lex_not_Heq_H1 ; break ;
172
		u = lex_not_Heq_H1;
-
 
173
		break;
123
	case lex_open_Hbrace_H2 : u = lex_open_Hbrace_H1 ; break ;
174
	case lex_open_Hbrace_H2:
-
 
175
		u = lex_open_Hbrace_H1;
-
 
176
		break;
124
	case lex_open_Hsquare_H2 : u = lex_open_Hsquare_H1 ; break ;
177
	case lex_open_Hsquare_H2:
-
 
178
		u = lex_open_Hsquare_H1;
-
 
179
		break;
-
 
180
	case lex_or_H2:
-
 
181
		u = lex_or_H1;
-
 
182
		break;
125
	case lex_or_H2 : u = lex_or_H1 ; break ;
183
	case lex_or_Heq_H2:
126
	case lex_or_Heq_H2 : u = lex_or_Heq_H1 ; break ;
184
		u = lex_or_Heq_H1;
-
 
185
		break;
-
 
186
	case lex_xor_H2:
-
 
187
		u = lex_xor_H1;
-
 
188
		break;
127
	case lex_xor_H2 : u = lex_xor_H1 ; break ;
189
	case lex_xor_Heq_H2:
128
	case lex_xor_Heq_H2 : u = lex_xor_Heq_H1 ; break ;
190
		u = lex_xor_Heq_H1;
-
 
191
		break;
129
    }
192
	}
130
    return ( u ) ;
193
	return(u);
131
}
194
}
132
 
195
 
133
 
196
 
134
/*
197
/*
135
    REPORT A DIGRAPH TOKEN
198
    REPORT A DIGRAPH TOKEN
136
 
199
 
137
    This routine reports the digraph t, returning the primary form of t.
200
    This routine reports the digraph t, returning the primary form of t.
138
*/
201
*/
139
 
202
 
140
int get_digraph
203
int
141
    PROTO_N ( ( t ) )
-
 
142
    PROTO_T ( int t )
204
get_digraph(int t)
143
{
205
{
144
    int u = primary_form ( t ) ;
206
	int u = primary_form(t);
145
    if ( u != t ) {
207
	if (u != t) {
146
	update_column () ;
208
		update_column();
147
	report ( crt_loc, ERR_lex_digraph_replace ( t, u ) ) ;
209
		report(crt_loc, ERR_lex_digraph_replace(t, u));
148
    }
210
	}
149
    return ( u ) ;
211
	return(u);
150
}
212
}
151
 
213
 
152
 
214
 
153
/*
215
/*
154
    CREATE A KEYWORD
216
    CREATE A KEYWORD
155
 
217
 
156
    This routine creates a keyword identifier with name nm and lexical
218
    This routine creates a keyword identifier with name nm and lexical
157
    token number key.  The special case when key is lex_unknown is used
219
    token number key.  The special case when key is lex_unknown is used
158
    to indicate a reserved identifier.
220
    to indicate a reserved identifier.
159
*/
221
*/
160
 
222
 
161
IDENTIFIER make_keyword
223
IDENTIFIER
162
    PROTO_N ( ( nm, key, id ) )
-
 
163
    PROTO_T ( HASHID nm X int key X IDENTIFIER id )
224
make_keyword(HASHID nm, int key, IDENTIFIER id)
164
{
225
{
165
    PTR ( IDENTIFIER ) ptr = hashid_id ( nm ) ;
226
	PTR(IDENTIFIER)ptr = hashid_id(nm);
166
    if ( IS_NULL_id ( id ) ) {
227
	if (IS_NULL_id(id)) {
167
	/* Find keyword type */
228
		/* Find keyword type */
168
	unsigned tag = id_keyword_tag ;
229
		unsigned tag = id_keyword_tag;
169
	if ( key >= FIRST_ISO_KEYWORD && key <= LAST_ISO_KEYWORD ) {
230
		if (key >= FIRST_ISO_KEYWORD && key <= LAST_ISO_KEYWORD) {
170
	    tag = id_iso_keyword_tag ;
231
			tag = id_iso_keyword_tag;
171
	} else if ( key >= FIRST_SYMBOL && key <= LAST_SYMBOL ) {
232
		} else if (key >= FIRST_SYMBOL && key <= LAST_SYMBOL) {
172
	    tag = id_iso_keyword_tag ;
233
			tag = id_iso_keyword_tag;
173
	} else if ( key == lex_unknown ) {
234
		} else if (key == lex_unknown) {
174
	    tag = id_reserved_tag ;
235
			tag = id_reserved_tag;
175
	}
236
		}
176
 
237
 
177
	/* Create keyword identifier */
238
		/* Create keyword identifier */
178
	MAKE_id_keyword_etc ( tag, nm, dspec_none, NULL_nspace, crt_loc, id ) ;
239
		MAKE_id_keyword_etc(tag, nm, dspec_none, NULL_nspace, crt_loc,
-
 
240
				    id);
179
	COPY_ulong ( id_no ( id ), ( unsigned long ) key ) ;
241
		COPY_ulong(id_no(id), (unsigned long)key);
180
    }
242
	}
181
    COPY_id ( hashid_cache ( nm ), NULL_id ) ;
243
	COPY_id(hashid_cache(nm), NULL_id);
-
 
244
	if (do_keyword) {
182
    if ( do_keyword ) dump_declare ( id, &crt_loc, 1 ) ;
245
		dump_declare(id, &crt_loc, 1);
-
 
246
	}
183
 
247
 
184
    /* Add keyword to identifier meanings */
248
	/* Add keyword to identifier meanings */
185
    for ( ; ; ) {
249
	for (;;) {
186
	IDENTIFIER pid = DEREF_id ( ptr ) ;
250
		IDENTIFIER pid = DEREF_id(ptr);
187
	switch ( TAG_id ( pid ) ) {
251
		switch (TAG_id(pid)) {
188
	    case id_dummy_tag :
252
		case id_dummy_tag:
189
	    case id_keyword_tag :
253
		case id_keyword_tag:
190
	    case id_iso_keyword_tag :
254
		case id_iso_keyword_tag:
191
	    case id_reserved_tag : {
255
		case id_reserved_tag:
192
		COPY_id ( id_alias ( id ), pid ) ;
256
			COPY_id(id_alias(id), pid);
193
		COPY_id ( ptr, id ) ;
257
			COPY_id(ptr, id);
194
		return ( id ) ;
258
			return(id);
195
	    }
-
 
196
	}
259
		}
197
	ptr = id_alias ( pid ) ;
260
		ptr = id_alias(pid);
198
    }
261
	}
199
    /* NOTREACHED */
262
	/* NOTREACHED */
200
}
263
}
201
 
264
 
202
 
265
 
203
/*
266
/*
204
    INITIALISE KEYWORDS
267
    INITIALISE KEYWORDS
205
 
268
 
206
    This routine initialises the hash table entries for the keywords.
269
    This routine initialises the hash table entries for the keywords.
207
*/
270
*/
208
 
-
 
209
void init_keywords
-
 
210
    PROTO_Z ()
-
 
211
{
-
 
212
    int key ;
-
 
213
 
271
 
-
 
272
void
214
    /* Set up keyword entries */
273
init_keywords(void)
215
    for ( key = FIRST_KEYWORD ; key <= LAST_KEYWORD ; key++ ) {
-
 
-
 
274
{
216
	int ext = 0 ;
275
	int key;
217
	string keyword = token_name ( key ) ;
-
 
218
	unsigned long h = hash ( keyword ) ;
-
 
219
	if ( keyword [0] == char_less ) ext = 1 ;
-
 
220
	KEYWORD ( key ) = lookup_name ( keyword, h, ext, key ) ;
-
 
221
    }
-
 
222
 
276
 
-
 
277
	/* Set up keyword entries */
-
 
278
	for (key = FIRST_KEYWORD; key <= LAST_KEYWORD; key++) {
-
 
279
		int ext = 0;
-
 
280
		string keyword = token_name(key);
-
 
281
		unsigned long h = hash(keyword);
-
 
282
		if (keyword[0] == char_less) {
-
 
283
			ext = 1;
-
 
284
		}
-
 
285
		KEYWORD(key) = lookup_name(keyword, h, ext, key);
-
 
286
	}
-
 
287
 
223
    /* Bring the C keywords into scope */
288
	/* Bring the C keywords into scope */
224
    for ( key = FIRST_C_KEYWORD ; key <= LAST_C_KEYWORD ; key++ ) {
289
	for (key = FIRST_C_KEYWORD; key <= LAST_C_KEYWORD; key++) {
225
	HASHID nm = KEYWORD ( key ) ;
290
		HASHID nm = KEYWORD(key);
226
	IGNORE make_keyword ( nm, key, NULL_id ) ;
291
		IGNORE make_keyword(nm, key, NULL_id);
227
    }
292
	}
228
 
293
 
229
    /* Bring the C++ keywords into scope */
294
	/* Bring the C++ keywords into scope */
230
    for ( key = FIRST_CPP_KEYWORD ; key <= LAST_CPP_KEYWORD ; key++ ) {
295
	for (key = FIRST_CPP_KEYWORD; key <= LAST_CPP_KEYWORD; key++) {
231
	HASHID nm = KEYWORD ( key ) ;
296
		HASHID nm = KEYWORD(key);
232
#if LANGUAGE_CPP
297
#if LANGUAGE_CPP
233
	IGNORE make_keyword ( nm, key, NULL_id ) ;
298
		IGNORE make_keyword(nm, key, NULL_id);
234
#else
299
#else
235
	if ( key != lex_wchar_Ht ) {
300
		if (key != lex_wchar_Ht) {
236
	    IGNORE make_keyword ( nm, lex_unknown, NULL_id ) ;
301
			IGNORE make_keyword(nm, lex_unknown, NULL_id);
237
	}
302
		}
238
#endif
303
#endif
239
    }
-
 
240
 
-
 
241
    /* Bring the ISO alternative keywords into scope */
-
 
242
    for ( key = FIRST_ISO_KEYWORD ; key <= LAST_ISO_KEYWORD ; key++ ) {
-
 
243
	HASHID nm = KEYWORD ( key ) ;
-
 
244
	if ( allow_iso_keywords ) {
-
 
245
	    IGNORE make_keyword ( nm, key, NULL_id ) ;
-
 
246
	} else {
-
 
247
	    IGNORE make_keyword ( nm, lex_unknown, NULL_id ) ;
-
 
248
	}
304
	}
-
 
305
 
-
 
306
	/* Bring the ISO alternative keywords into scope */
-
 
307
	for (key = FIRST_ISO_KEYWORD; key <= LAST_ISO_KEYWORD; key++) {
-
 
308
		HASHID nm = KEYWORD(key);
-
 
309
		if (allow_iso_keywords) {
-
 
310
			IGNORE make_keyword(nm, key, NULL_id);
-
 
311
		} else {
-
 
312
			IGNORE make_keyword(nm, lex_unknown, NULL_id);
249
    }
313
		}
-
 
314
	}
250
 
315
 
251
    /* Find underlying dummy identifier for 'operator' */
316
	/* Find underlying dummy identifier for 'operator' */
252
    underlying_op = DEREF_id ( hashid_id ( KEYWORD ( lex_operator ) ) ) ;
317
	underlying_op = DEREF_id(hashid_id(KEYWORD(lex_operator)));
253
    underlying_op = underlying_id ( underlying_op ) ;
318
	underlying_op = underlying_id(underlying_op);
254
    return ;
319
	return;
255
}
320
}
256
 
321
 
257
 
322
 
258
/*
323
/*
259
    ADJUST A CHARACTER FOR TRIGRAPHS
324
    ADJUST A CHARACTER FOR TRIGRAPHS
260
 
325
 
261
    This routine is called after a question mark has been read from the
326
    This routine is called after a question mark has been read from the
262
    input file to allow for trigraphs.  It returns the trigraph replacement
327
    input file to allow for trigraphs.  It returns the trigraph replacement
263
    character or '?' if the following characters do not form a trigraph.
328
    character or '?' if the following characters do not form a trigraph.
264
*/
329
*/
265
 
330
 
266
static int adjust_trigraph
331
static int
267
    PROTO_Z ()
332
adjust_trigraph(void)
268
{
333
{
269
    if ( allow_trigraphs ) {
334
	if (allow_trigraphs) {
270
	int c = next_char () ;
335
		int c = next_char();
271
	if ( c == char_end ) c = refill_char () ;
-
 
272
	if ( c == char_question ) {
336
		if (c == char_end) {
273
	    int d ;
-
 
274
	    c = next_char () ;
337
			c = refill_char();
275
	    if ( c == char_end ) c = refill_char () ;
-
 
276
	    switch ( c ) {
-
 
277
		case char_close_round : {
-
 
278
		    /* Map '\?\?)' to ']' */
-
 
279
		    d = char_close_square ;
-
 
280
		    break ;
-
 
281
		}
-
 
282
		case char_equal : {
-
 
283
		    /* Map '\?\?=' to '#' */
-
 
284
		    d = char_hash ;
-
 
285
		    break ;
-
 
286
		}
-
 
287
		case char_exclaim : {
-
 
288
		    /* Map '\?\?!' to '|' */
-
 
289
		    d = char_bar ;
-
 
290
		    break ;
-
 
291
		}
-
 
292
		case char_greater : {
-
 
293
		    /* Map '\?\?>' to '}' */
-
 
294
		    d = char_close_brace ;
-
 
295
		    break ;
-
 
296
		}
338
		}
-
 
339
		if (c == char_question) {
-
 
340
			int d;
-
 
341
			c = next_char();
-
 
342
			if (c == char_end) {
-
 
343
				c = refill_char();
-
 
344
			}
-
 
345
			switch (c) {
-
 
346
			case char_close_round:
-
 
347
				/* Map '\?\?)' to ']' */
-
 
348
				d = char_close_square;
-
 
349
				break;
-
 
350
			case char_equal:
-
 
351
				/* Map '\?\?=' to '#' */
-
 
352
				d = char_hash;
-
 
353
				break;
-
 
354
			case char_exclaim:
-
 
355
				/* Map '\?\?!' to '|' */
-
 
356
				d = char_bar;
-
 
357
				break;
-
 
358
			case char_greater:
-
 
359
				/* Map '\?\?>' to '}' */
-
 
360
				d = char_close_brace;
-
 
361
				break;
297
		case char_less : {
362
			case char_less:
298
		    /* Map '\?\?<' to '{' */
363
				/* Map '\?\?<' to '{' */
299
		    d = char_open_brace ;
364
				d = char_open_brace;
-
 
365
				break;
-
 
366
			case char_minus:
-
 
367
				/* Map '\?\?-' to '~' */
-
 
368
				d = char_tilde;
-
 
369
				break;
-
 
370
			case char_open_round:
-
 
371
				/* Map '\?\?(' to '[' */
-
 
372
				d = char_open_square;
-
 
373
				break;
-
 
374
			case char_single_quote:
-
 
375
				/* Map '\?\?\'' to '^' */
-
 
376
				d = char_circum;
-
 
377
				break;
-
 
378
			case char_slash:
-
 
379
				/* Map '\?\?/' to '\\' */
-
 
380
				d = char_backslash;
300
		    break ;
381
				break;
-
 
382
			default:
-
 
383
				/* Not a trigraph */
-
 
384
				unread_char(c);
-
 
385
				unread_char(char_question);
-
 
386
				return(char_question);
-
 
387
			}
-
 
388
			update_column();
-
 
389
			report(crt_loc, ERR_lex_trigraph_replace(c, d));
-
 
390
			return(d);
-
 
391
		} else {
-
 
392
			/* Not a trigraph */
-
 
393
			unread_char(c);
301
		}
394
		}
302
		case char_minus : {
-
 
303
		    /* Map '\?\?-' to '~' */
-
 
304
		    d = char_tilde ;
-
 
305
		    break ;
-
 
306
		}
-
 
307
		case char_open_round : {
-
 
308
		    /* Map '\?\?(' to '[' */
-
 
309
		    d = char_open_square ;
-
 
310
		    break ;
-
 
311
		}
-
 
312
		case char_single_quote : {
-
 
313
		    /* Map '\?\?\'' to '^' */
-
 
314
		    d = char_circum ;
-
 
315
		    break ;
-
 
316
		}
-
 
317
		case char_slash : {
-
 
318
		    /* Map '\?\?/' to '\\' */
-
 
319
		    d = char_backslash ;
-
 
320
		    break ;
-
 
321
		}
-
 
322
		default : {
-
 
323
		    /* Not a trigraph */
-
 
324
		    unread_char ( c ) ;
-
 
325
		    unread_char ( char_question ) ;
-
 
326
		    return ( char_question ) ;
-
 
327
		}
-
 
328
	    }
-
 
329
	    update_column () ;
-
 
330
	    report ( crt_loc, ERR_lex_trigraph_replace ( c, d ) ) ;
-
 
331
	    return ( d ) ;
-
 
332
	} else {
-
 
333
	    /* Not a trigraph */
-
 
334
	    unread_char ( c ) ;
-
 
335
	}
395
	}
336
    }
-
 
337
    return ( char_question ) ;
396
	return(char_question);
338
}
397
}
339
 
398
 
340
 
399
 
341
/*
400
/*
342
    READ A NEWLINE CHARACTER
401
    READ A NEWLINE CHARACTER
343
 
402
 
344
    This routine is called after each carriage return character, checking
403
    This routine is called after each carriage return character, checking
345
    for a following newline character.
404
    for a following newline character.
346
*/
405
*/
347
 
406
 
348
static int read_newline
407
static int
349
    PROTO_Z ()
408
read_newline(void)
350
{
409
{
351
    if ( allow_dos_newline ) {
410
	if (allow_dos_newline) {
352
	int c = next_char () ;
411
		int c = next_char();
353
	if ( c == char_end ) c = refill_char () ;
412
		if (c == char_end) {
-
 
413
			c = refill_char();
-
 
414
		}
354
	if ( c == char_newline ) return ( c ) ;
415
		if (c == char_newline) {
-
 
416
			return(c);
-
 
417
		}
355
	unread_char ( c ) ;
418
		unread_char(c);
356
    }
419
	}
357
    return ( char_return ) ;
420
	return(char_return);
358
}
421
}
359
 
422
 
360
 
423
 
361
/*
424
/*
362
    READ AN END OF FILE CHARACTER
425
    READ AN END OF FILE CHARACTER
363
 
426
 
364
    This routine is called after each terminate character, checking for
427
    This routine is called after each terminate character, checking for
365
    a following end of file character.
428
    a following end of file character.
366
*/
429
*/
367
 
430
 
368
static int read_eof
431
static int
369
    PROTO_Z ()
432
read_eof(void)
370
{
433
{
371
    if ( allow_dos_newline ) {
434
	if (allow_dos_newline) {
372
	int c = next_char () ;
435
		int c = next_char();
373
	if ( c == char_end ) c = refill_char () ;
436
		if (c == char_end) {
-
 
437
			c = refill_char();
-
 
438
		}
374
	if ( c == char_eof ) return ( c ) ;
439
		if (c == char_eof) {
-
 
440
			return(c);
-
 
441
		}
375
	unread_char ( c ) ;
442
		unread_char(c);
376
    }
443
	}
377
    return ( char_sub ) ;
444
	return(char_sub);
378
}
445
}
379
 
446
 
380
 
447
 
381
/*
448
/*
382
    READ THE NEXT CHARACTER ALLOWING FOR TRIGRAPHS ETC.
449
    READ THE NEXT CHARACTER ALLOWING FOR TRIGRAPHS ETC.
Line 384... Line 451...
384
    This routine reads the next character from the input file, adjusting
451
    This routine reads the next character from the input file, adjusting
385
    it as necessary for trigraphs and escaped newlines.  This routine
452
    it as necessary for trigraphs and escaped newlines.  This routine
386
    corresponds to phases 1 and 2 of the phases of translation.
453
    corresponds to phases 1 and 2 of the phases of translation.
387
*/
454
*/
388
 
455
 
389
static int read_char
456
static int
390
    PROTO_Z ()
457
read_char(void)
391
{
458
{
392
    for ( ; ; ) {
459
	for (;;) {
393
	int c = next_char () ;
460
		int c = next_char();
394
	if ( c == char_end ) c = refill_char () ;
461
		if (c == char_end) {
-
 
462
			c = refill_char();
-
 
463
		}
395
	if ( c == char_question ) c = adjust_trigraph () ;
464
		if (c == char_question) {
-
 
465
			c = adjust_trigraph();
-
 
466
		}
396
	if ( c != char_backslash ) {
467
		if (c != char_backslash) {
397
	    /* Not an escaped newline */
468
			/* Not an escaped newline */
398
	    return ( c ) ;
469
			return(c);
399
	}
470
		}
400
	c = next_char () ;
471
		c = next_char();
401
	if ( c == char_end ) c = refill_char () ;
472
		if (c == char_end) {
-
 
473
			c = refill_char();
-
 
474
		}
402
	if ( c == char_return ) c = read_newline () ;
475
		if (c == char_return) {
-
 
476
			c = read_newline();
-
 
477
		}
403
	if ( c != char_newline ) {
478
		if (c != char_newline) {
404
	    /* Not an escaped newline */
479
			/* Not an escaped newline */
405
	    unread_char ( c ) ;
480
			unread_char(c);
406
	    return ( char_backslash ) ;
481
			return(char_backslash);
-
 
482
		}
-
 
483
		crt_loc.line++;
-
 
484
		crt_loc.column = 0;
-
 
485
		input_crt = input_posn;
407
	}
486
	}
408
	crt_loc.line++ ;
-
 
409
	crt_loc.column = 0 ;
-
 
410
	input_crt = input_posn ;
-
 
411
    }
-
 
412
    /* NOTREACHED */
487
	/* NOTREACHED */
413
}
488
}
414
 
489
 
415
 
490
 
416
/*
491
/*
417
    CHARACTER LOOK-UP TABLE
492
    CHARACTER LOOK-UP TABLE
Line 432... Line 507...
432
#define NLINE_M			0x40
507
#define NLINE_M			0x40
433
#define LEGAL_M			0x80
508
#define LEGAL_M			0x80
434
 
509
 
435
#define ILLEG			0x00
510
#define ILLEG			0x00
436
#define LEGAL			LEGAL_M
511
#define LEGAL			LEGAL_M
437
#define SPACE			( SPACE_M | LEGAL_M )
512
#define SPACE			(SPACE_M | LEGAL_M)
438
#define ALPHA			( ALPHA_M | ALNUM_M | PPDIG_M | LEGAL_M )
513
#define ALPHA			(ALPHA_M | ALNUM_M | PPDIG_M | LEGAL_M)
439
#define DIGIT			( DIGIT_M | ALNUM_M | PPDIG_M | LEGAL_M )
514
#define DIGIT			(DIGIT_M | ALNUM_M | PPDIG_M | LEGAL_M)
440
#define SYMBL			( SYMBL_M | LEGAL_M )
515
#define SYMBL			(SYMBL_M | LEGAL_M)
441
#define POINT			( PPDIG_M | SYMBL_M | LEGAL_M )
516
#define POINT			(PPDIG_M | SYMBL_M | LEGAL_M)
442
#define NLINE			( NLINE_M | LEGAL_M )
517
#define NLINE			(NLINE_M | LEGAL_M)
443
 
-
 
444
#define main_characters		( characters + 1 )
-
 
445
#define lookup_char( C )	( ( int ) main_characters [C] )
-
 
446
#define is_white( T )		( ( T ) & SPACE_M )
-
 
447
#define is_alpha( T )		( ( T ) & ALPHA_M )
-
 
448
#define is_digit( T )		( ( T ) & DIGIT_M )
-
 
449
#define is_alphanum( T )	( ( T ) & ALNUM_M )
-
 
450
#define is_ppdigit( T )		( ( T ) & PPDIG_M )
-
 
451
#define is_symbol( T )		( ( T ) & SYMBL_M )
-
 
452
#define is_newline( T )		( ( T ) & NLINE_M )
-
 
453
#define is_legal( T )		( ( T ) & LEGAL_M )
-
 
454
 
518
 
-
 
519
#define main_characters		(characters + 1)
-
 
520
#define lookup_char(C)		((int)main_characters[C])
-
 
521
#define is_white(T)		((T) & SPACE_M)
-
 
522
#define is_alpha(T)		((T) & ALPHA_M)
-
 
523
#define is_digit(T)		((T) & DIGIT_M)
-
 
524
#define is_alphanum(T)		((T) & ALNUM_M)
-
 
525
#define is_ppdigit(T)		((T) & PPDIG_M)
-
 
526
#define is_symbol(T)		((T) & SYMBL_M)
-
 
527
#define is_newline(T)		((T) & NLINE_M)
-
 
528
#define is_legal(T)		((T) & LEGAL_M)
-
 
529
 
455
static unsigned char characters [ NO_CHAR + 2 ] = {
530
static unsigned char characters[NO_CHAR + 2] = {
456
    LEGAL,			/* EOF */
531
	LEGAL,			/* EOF */
457
#define CHAR_DATA( A, B, C, D )	( A ),
532
#define CHAR_DATA(A, B, C, D)	(A),
458
#include "char.h"
533
#include "char.h"
459
#undef CHAR_DATA
534
#undef CHAR_DATA
460
    ILLEG			/* dummy */
535
	ILLEG			/* dummy */
461
} ;
536
};
462
 
537
 
463
static unsigned char *copy_characters = main_characters ;
538
static unsigned char *copy_characters = main_characters;
464
 
539
 
465
 
540
 
466
/*
541
/*
467
    SET A CHARACTER LOOK-UP
542
    SET A CHARACTER LOOK-UP
468
 
543
 
Line 470... Line 545...
470
    the underlying value for character b.  As a special case, setting
545
    the underlying value for character b.  As a special case, setting
471
    the look-up for a carriage return to that for newline enables
546
    the look-up for a carriage return to that for newline enables
472
    DOS-like rules on newline and end of file characters.
547
    DOS-like rules on newline and end of file characters.
473
*/
548
*/
474
 
549
 
475
void set_char_lookup
550
void
476
    PROTO_N ( ( a, b ) )
-
 
477
    PROTO_T ( int a X int b )
551
set_char_lookup(int a, int b)
478
{
552
{
479
    if ( a >= 0 && a < NO_CHAR && b >= 0 && b < NO_CHAR ) {
553
	if (a >= 0 && a < NO_CHAR && b >= 0 && b < NO_CHAR) {
480
	unsigned char t = copy_characters [b] ;
554
		unsigned char t = copy_characters[b];
481
	if ( a == char_return ) {
555
		if (a == char_return) {
482
	    if ( b == char_newline ) {
556
			if (b == char_newline) {
483
		/* Set DOS-like newline rules */
557
				/* Set DOS-like newline rules */
484
		allow_dos_newline = 1 ;
558
				allow_dos_newline = 1;
485
		return ;
559
				return;
486
	    }
560
			}
487
	    if ( b == char_return ) {
561
			if (b == char_return) {
488
		/* Unset DOS-like newline rules */
562
				/* Unset DOS-like newline rules */
489
		allow_dos_newline = 0 ;
563
				allow_dos_newline = 0;
490
	    }
564
			}
-
 
565
		}
-
 
566
		main_characters[a] = t;
491
	}
567
	}
492
	main_characters [a] = t ;
-
 
493
    }
-
 
494
    return ;
568
	return;
495
}
569
}
496
 
570
 
497
 
571
 
498
/*
572
/*
499
    SET A NUMBER OF CHARACTER LOOK-UPS
573
    SET A NUMBER OF CHARACTER LOOK-UPS
500
 
574
 
501
    This routine sets the character look-ups for all the elements of the
575
    This routine sets the character look-ups for all the elements of the
502
    string or character literal expression a to be equal to that for the
576
    string or character literal expression a to be equal to that for the
503
    character literal expression b.  If b is the null expression then
577
    character literal expression b.  If b is the null expression then
504
    the look-up is set to be an illegal character.
578
    the look-up is set to be an illegal character.
505
*/
579
*/
506
 
580
 
507
void set_character
581
void
508
    PROTO_N ( ( a, b ) )
-
 
509
    PROTO_T ( EXP a X EXP b )
582
set_character(EXP a, EXP b)
510
{
583
{
511
    int c = get_char_value ( b ) ;
584
	int c = get_char_value(b);
512
    if ( IS_exp_string_lit ( a ) ) {
585
	if (IS_exp_string_lit(a)) {
513
	STRING s = DEREF_str ( exp_string_lit_str ( a ) ) ;
586
		STRING s = DEREF_str(exp_string_lit_str(a));
514
	unsigned long n = DEREF_ulong ( str_simple_len ( s ) ) ;
587
		unsigned long n = DEREF_ulong(str_simple_len(s));
515
	string t = DEREF_string ( str_simple_text ( s ) ) ;
588
		string t = DEREF_string(str_simple_text(s));
516
	unsigned kind = DEREF_unsigned ( str_simple_kind ( s ) ) ;
589
		unsigned kind = DEREF_unsigned(str_simple_kind(s));
517
	if ( kind & STRING_MULTI ) {
590
		if (kind & STRING_MULTI) {
518
	    while ( n ) {
591
			while (n) {
519
		int ch = CHAR_SIMPLE ;
592
				int ch = CHAR_SIMPLE;
520
		unsigned long d = get_multi_char ( t, &ch ) ;
593
				unsigned long d = get_multi_char(t, &ch);
521
		if ( d < ( unsigned long ) NO_CHAR ) {
594
				if (d < (unsigned long)NO_CHAR) {
522
		    set_char_lookup ( ( int ) d, c ) ;
595
					set_char_lookup((int)d, c);
523
		}
596
				}
524
		t += MULTI_WIDTH ;
597
				t += MULTI_WIDTH;
-
 
598
				n--;
-
 
599
			}
-
 
600
		} else {
-
 
601
			while (n) {
-
 
602
				int d = (int)*t;
-
 
603
				set_char_lookup(d, c);
-
 
604
				t++;
525
		n-- ;
605
				n--;
526
	    }
606
			}
-
 
607
		}
527
	} else {
608
	} else {
528
	    while ( n ) {
609
		int d = get_char_value(a);
529
		int d = ( int ) *t ;
610
		if (d != char_illegal) {
530
		set_char_lookup ( d, c ) ;
611
			set_char_lookup(d, c);
531
		t++ ;
-
 
532
		n-- ;
-
 
533
	    }
612
		}
534
	}
613
	}
535
    } else {
-
 
536
	int d = get_char_value ( a ) ;
-
 
537
	if ( d != char_illegal ) set_char_lookup ( d, c ) ;
-
 
538
    }
-
 
539
    return ;
614
	return;
540
}
615
}
541
 
616
 
542
 
617
 
543
/*
618
/*
544
    CHECK FOR WHITE SPACE CHARACTERS
619
    CHECK FOR WHITE SPACE CHARACTERS
545
 
620
 
546
    This routine checks whether the character a represents a white space.
621
    This routine checks whether the character a represents a white space.
547
    The newline character constitutes a special case.
622
    The newline character constitutes a special case.
548
*/
623
*/
549
 
624
 
550
int is_white_char
625
int
551
    PROTO_N ( ( a ) )
-
 
552
    PROTO_T ( unsigned long a )
626
is_white_char(unsigned long a)
553
{
627
{
554
    int t ;
628
	int t;
555
    if ( a >= NO_CHAR ) return ( 0 ) ;
629
	if (a >= NO_CHAR) {
-
 
630
		return(0);
-
 
631
	}
556
    t = lookup_char ( a ) ;
632
	t = lookup_char(a);
557
    return ( is_white ( t ) || is_newline ( t ) ) ;
633
	return(is_white(t) || is_newline(t));
558
}
634
}
559
 
635
 
560
 
636
 
561
/*
637
/*
562
    CHECK FOR ALPHABETIC CHARACTERS
638
    CHECK FOR ALPHABETIC CHARACTERS
563
 
639
 
564
    This routine checks whether the character a represents an alphabetic
640
    This routine checks whether the character a represents an alphabetic
565
    character.
641
    character.
566
*/
642
*/
567
 
643
 
568
int is_alpha_char
644
int
569
    PROTO_N ( ( a ) )
-
 
570
    PROTO_T ( unsigned long a )
645
is_alpha_char(unsigned long a)
571
{
646
{
572
    if ( a >= NO_CHAR ) return ( 0 ) ;
647
	if (a >= NO_CHAR) {
-
 
648
		return(0);
-
 
649
	}
573
    return ( is_alpha ( lookup_char ( a ) ) ) ;
650
	return(is_alpha(lookup_char(a)));
574
}
651
}
575
 
652
 
576
 
653
 
577
/*
654
/*
578
    CHECK FOR LEGAL CHARACTERS
655
    CHECK FOR LEGAL CHARACTERS
579
 
656
 
580
    This routine checks whether the character a represents a legal character.
657
    This routine checks whether the character a represents a legal character.
581
*/
658
*/
582
 
659
 
583
int is_legal_char
660
int
584
    PROTO_N ( ( a ) )
-
 
585
    PROTO_T ( unsigned long a )
661
is_legal_char(unsigned long a)
586
{
662
{
587
    if ( a >= NO_CHAR ) return ( 0 ) ;
663
	if (a >= NO_CHAR) {
-
 
664
		return(0);
-
 
665
	}
588
    return ( is_legal ( lookup_char ( a ) ) ) ;
666
	return(is_legal(lookup_char(a)));
589
}
667
}
590
 
668
 
591
 
669
 
592
/*
670
/*
593
    PEEK AHEAD ONE CHARACTER
671
    PEEK AHEAD ONE CHARACTER
Line 596... Line 674...
596
    newline).  If so the current character is advanced one, otherwise it
674
    newline).  If so the current character is advanced one, otherwise it
597
    is left unchanged.  legal is set to false if the next character is
675
    is left unchanged.  legal is set to false if the next character is
598
    not legal.
676
    not legal.
599
*/
677
*/
600
 
678
 
601
int peek_char
679
int
602
    PROTO_N ( ( a, legal ) )
-
 
603
    PROTO_T ( int a X int *legal )
680
peek_char(int a, int *legal)
604
{
681
{
605
    int c = read_char () ;
682
	int c = read_char();
606
    ASSERT ( a != char_newline ) ;
683
	ASSERT(a != char_newline);
607
    if ( c == a ) return ( 1 ) ;
684
	if (c == a) {
-
 
685
		return(1);
-
 
686
	}
608
    *legal = is_legal_char ( ( unsigned long ) c ) ;
687
	*legal = is_legal_char((unsigned long)c);
609
    unread_char ( c ) ;
688
	unread_char(c);
610
    return ( 0 ) ;
689
	return(0);
611
}
690
}
612
 
691
 
613
 
692
 
614
/*
693
/*
615
    TOKEN BUFFER
694
    TOKEN BUFFER
616
 
695
 
617
    This buffer is used by read_token to hold the values of identifiers,
696
    This buffer is used by read_token to hold the values of identifiers,
618
    numbers and strings.
697
    numbers and strings.
619
*/
698
*/
620
 
699
 
621
BUFFER token_buff = NULL_buff ;
700
BUFFER token_buff = NULL_buff;
622
 
701
 
623
 
702
 
624
/*
703
/*
625
    TOKEN IDENTIFICATION MACROS
704
    TOKEN IDENTIFICATION MACROS
626
 
705
 
627
    These macros are used to identify the start or end of certain tokens
706
    These macros are used to identify the start or end of certain tokens
628
    such as comments and strings.
707
    such as comments and strings.
629
*/
708
*/
630
 
709
 
631
#define START_COMMENT( A )\
710
#define START_COMMENT(A)	((A) == char_asterix)
632
	( ( A ) == char_asterix )
-
 
633
#define END_COMMENT( A, B )\
-
 
634
	( ( A ) == char_asterix && ( B ) == char_slash )
711
#define END_COMMENT(A, B)	((A) == char_asterix && (B) == char_slash)
635
#define START_CPP_COMMENT( A )\
-
 
636
	( ( A ) == char_slash && allow_cpp_comments )
712
#define START_CPP_COMMENT(A)	((A) == char_slash && allow_cpp_comments)
637
#define END_CPP_COMMENT( A )\
713
#define END_CPP_COMMENT(A)	((A) == char_newline)
638
	( ( A ) == char_newline )
-
 
639
#define START_STRING( A )\
-
 
640
	( ( A ) == char_quote || ( A ) == char_single_quote )
714
#define START_STRING(A)		((A) == char_quote || (A) == char_single_quote)
641
#define END_STRING( A, Q )\
715
#define END_STRING(A, Q)	((A) == (Q))
642
	( ( A ) == ( Q ) )
-
 
643
 
716
 
644
 
717
 
645
/*
718
/*
646
    END OF FILE FLAG
719
    END OF FILE FLAG
647
 
720
 
648
    Each source file should end in a newline character, which is not
721
    Each source file should end in a newline character, which is not
649
    preceded by a backspace.  This flag is used to indicate whether the
722
    preceded by a backspace.  This flag is used to indicate whether the
650
    end of the present file has the correct form.
723
    end of the present file has the correct form.
651
*/
724
*/
652
 
725
 
653
static int good_eof = 0 ;
726
static int good_eof = 0;
654
 
727
 
655
 
728
 
656
/*
729
/*
657
    SKIP A STRING
730
    SKIP A STRING
658
 
731
 
659
    This routine skips a string or character literal.  It is entered after
732
    This routine skips a string or character literal.  It is entered after
660
    the initial quote, q, has been read.  Escape sequences are always
733
    the initial quote, q, has been read.  Escape sequences are always
661
    allowed.  The routine returns lex_string_Hlit if the string terminates
734
    allowed.  The routine returns lex_string_Hlit if the string terminates
662
    correctly and lex_eof otherwise.
735
    correctly and lex_eof otherwise.
663
*/
736
*/
664
 
737
 
665
static int skip_string
738
static int
666
    PROTO_N ( ( q ) )
-
 
667
    PROTO_T ( int q )
739
skip_string(int q)
668
{
740
{
669
    int e = q ;
741
	int e = q;
670
    LOCATION loc ;
742
	LOCATION loc;
671
    unsigned nl = 0 ;
743
	unsigned nl = 0;
672
    int escaped = 0 ;
744
	int escaped = 0;
673
    int have_char = 0 ;
745
	int have_char = 0;
674
    int allow_nl = allow_newline_strings ;
746
	int allow_nl = allow_newline_strings;
675
    if ( e == char_single_quote || in_preproc_dir == 1 ) allow_nl = 0 ;
747
	if (e == char_single_quote || in_preproc_dir == 1) {
676
    update_column () ;
-
 
677
    loc = crt_loc ;
-
 
678
 
-
 
679
    /* Scan to end of string */
-
 
680
    for ( ; ; ) {
-
 
681
	int c = read_char () ;
-
 
682
	if ( END_STRING ( c, e ) && !escaped ) {
-
 
683
	    if ( e == char_single_quote && !have_char ) {
-
 
684
		update_column () ;
748
		allow_nl = 0;
685
		report ( crt_loc, ERR_lex_ccon_empty () ) ;
-
 
686
	    }
-
 
687
	    if ( nl ) report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;
-
 
688
	    return ( lex_string_Hlit ) ;
-
 
689
	}
749
	}
-
 
750
	update_column();
-
 
751
	loc = crt_loc;
-
 
752
 
-
 
753
	/* Scan to end of string */
-
 
754
	for (;;) {
-
 
755
		int c = read_char();
-
 
756
		if (END_STRING(c, e) && !escaped) {
-
 
757
			if (e == char_single_quote && !have_char) {
-
 
758
				update_column();
-
 
759
				report(crt_loc, ERR_lex_ccon_empty());
-
 
760
			}
-
 
761
			if (nl) {
-
 
762
				report(loc, ERR_lex_string_nl(nl, nl));
-
 
763
			}
-
 
764
			return(lex_string_Hlit);
-
 
765
		}
690
	if ( c == char_newline ) {
766
		if (c == char_newline) {
691
	    if ( allow_nl ) {
767
			if (allow_nl) {
692
		/* Report newlines but continue */
768
				/* Report newlines but continue */
693
		crt_loc.line++ ;
769
				crt_loc.line++;
694
		crt_loc.column = 0 ;
770
				crt_loc.column = 0;
695
		input_crt = input_posn ;
771
				input_crt = input_posn;
696
		nl++ ;
772
				nl++;
697
	    } else {
773
			} else {
698
		unread_char ( c ) ;
774
				unread_char(c);
699
		update_column () ;
775
				update_column();
700
		report ( crt_loc, ERR_lex_string_pp_nl () ) ;
776
				report(crt_loc, ERR_lex_string_pp_nl());
701
		break ;
777
				break;
702
	    }
778
			}
703
	} else if ( c == char_eof ) {
779
		} else if (c == char_eof) {
704
	    report ( loc, ERR_lex_phases_str_eof () ) ;
780
			report(loc, ERR_lex_phases_str_eof());
705
	    good_eof = 1 ;
781
			good_eof = 1;
706
	    nl = 0 ;
782
			nl = 0;
707
	    break ;
783
			break;
-
 
784
		}
-
 
785
		if (escaped) {
-
 
786
			escaped = 0;
-
 
787
		} else {
-
 
788
			if (c == char_backslash) {
-
 
789
				escaped = 1;
-
 
790
			}
-
 
791
		}
-
 
792
		if (!escaped) {
-
 
793
			have_char = 1;
-
 
794
		}
708
	}
795
	}
709
	if ( escaped ) {
796
	if (nl) {
710
	    escaped = 0 ;
797
		/* Report newlines in string */
-
 
798
		report(loc, ERR_lex_string_nl(nl, nl));
711
	} else {
799
	} else {
712
	    if ( c == char_backslash ) escaped = 1 ;
800
		/* Don't bother with error recovery */
-
 
801
		/* EMPTY */
713
	}
802
	}
714
	if ( !escaped ) have_char = 1 ;
-
 
715
    }
-
 
716
    if ( nl ) {
-
 
717
	/* Report newlines in string */
-
 
718
	report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;
-
 
719
    } else {
-
 
720
	/* Don't bother with error recovery */
-
 
721
	/* EMPTY */
-
 
722
    }
-
 
723
    return ( lex_eof ) ;
803
	return(lex_eof);
724
}
804
}
725
 
805
 
726
 
806
 
727
/*
807
/*
728
    READ THE BODY OF A STRING
808
    READ THE BODY OF A STRING
Line 733... Line 813...
733
    indicates whether escape sequences are allowed (they are not in
813
    indicates whether escape sequences are allowed (they are not in
734
    header names for example).  The string itself is built up in
814
    header names for example).  The string itself is built up in
735
    token_buff.  The routine returns lex_string_Hlit if the string
815
    token_buff.  The routine returns lex_string_Hlit if the string
736
    terminates correctly and lex_eof otherwise.  It also sets
816
    terminates correctly and lex_eof otherwise.  It also sets
737
    token_buff.posn to point to the end of the string.
817
    token_buff.posn to point to the end of the string.
738
*/
818
*/
739
 
819
 
740
int read_string
820
int
741
    PROTO_N ( ( q, esc ) )
-
 
742
    PROTO_T ( int q X int esc )
821
read_string(int q, int esc)
743
{
822
{
744
    int c ;
823
	int c;
745
    int e = q ;
824
	int e = q;
746
    LOCATION loc ;
825
	LOCATION loc;
747
    long posn = -1 ;
826
	long posn = -1;
748
    int escaped = 0 ;
827
	int escaped = 0;
749
    unsigned nl = 0 ;
828
	unsigned nl = 0;
750
    int have_char = 0 ;
829
	int have_char = 0;
751
    string s = token_buff.start ;
830
	string s = token_buff.start;
752
    string se = token_buff.end ;
831
	string se = token_buff.end;
753
    int allow_nl = allow_newline_strings ;
832
	int allow_nl = allow_newline_strings;
754
    update_column () ;
833
	update_column();
755
    if ( e == char_single_quote ) {
834
	if (e == char_single_quote) {
756
	posn = tell_buffer ( crt_buff_no ) ;
835
		posn = tell_buffer(crt_buff_no);
757
	allow_nl = 0 ;
836
		allow_nl = 0;
758
    } else if ( in_preproc_dir == 1 ) {
837
	} else if (in_preproc_dir == 1) {
759
	allow_nl = 0 ;
838
		allow_nl = 0;
760
    }
-
 
761
    loc = crt_loc ;
-
 
762
 
-
 
763
    /* Scan the string */
-
 
764
    for ( ; ; ) {
-
 
765
	c = read_char () ;
-
 
766
	if ( END_STRING ( c, e ) && !escaped ) {
-
 
767
	    if ( e == char_single_quote && !have_char ) {
-
 
768
		update_column () ;
-
 
769
		report ( crt_loc, ERR_lex_ccon_empty () ) ;
-
 
770
	    }
-
 
771
	    if ( nl ) report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;
-
 
772
	    token_buff.posn = s ;
-
 
773
	    *s = 0 ;
-
 
774
	    return ( lex_string_Hlit ) ;
-
 
775
	}
839
	}
-
 
840
	loc = crt_loc;
-
 
841
 
-
 
842
	/* Scan the string */
-
 
843
	for (;;) {
-
 
844
		c = read_char();
-
 
845
		if (END_STRING(c, e) && !escaped) {
-
 
846
			if (e == char_single_quote && !have_char) {
-
 
847
				update_column();
-
 
848
				report(crt_loc, ERR_lex_ccon_empty());
-
 
849
			}
-
 
850
			if (nl) {
-
 
851
				report(loc, ERR_lex_string_nl(nl, nl));
-
 
852
			}
-
 
853
			token_buff.posn = s;
-
 
854
			*s = 0;
-
 
855
			return(lex_string_Hlit);
-
 
856
		}
776
	if ( c == char_newline ) {
857
		if (c == char_newline) {
777
	    if ( allow_nl ) {
858
			if (allow_nl) {
778
		/* Report newlines but continue */
859
				/* Report newlines but continue */
779
		crt_loc.line++ ;
860
				crt_loc.line++;
780
		crt_loc.column = 0 ;
861
				crt_loc.column = 0;
781
		input_crt = input_posn ;
862
				input_crt = input_posn;
782
		nl++ ;
863
				nl++;
783
	    } else {
864
			} else {
784
		unread_char ( c ) ;
865
				unread_char(c);
785
		update_column () ;
866
				update_column();
786
		if ( e == char_greater ) {
867
				if (e == char_greater) {
787
		    /* Header name */
868
					/* Header name */
-
 
869
					report(crt_loc,
788
		    report ( crt_loc, ERR_cpp_include_incompl () ) ;
870
					       ERR_cpp_include_incompl());
789
		} else {
871
				} else {
790
		    report ( crt_loc, ERR_lex_string_pp_nl () ) ;
872
					report(crt_loc, ERR_lex_string_pp_nl());
-
 
873
				}
-
 
874
				break;
-
 
875
			}
-
 
876
		} else if (c == char_eof) {
-
 
877
			report(loc, ERR_lex_phases_str_eof());
-
 
878
			good_eof = 1;
-
 
879
			nl = 0;
-
 
880
			break;
-
 
881
		}
-
 
882
		*s = (character)c;
-
 
883
		if (++s == se) {
-
 
884
			s = extend_buffer(&token_buff, s);
-
 
885
			se = token_buff.end;
791
		}
886
		}
-
 
887
		if (escaped) {
792
		break ;
888
			escaped = 0;
793
	    }
889
		} else {
794
	} else if ( c == char_eof ) {
890
			if (c == char_backslash) {
795
	    report ( loc, ERR_lex_phases_str_eof () ) ;
-
 
796
	    good_eof = 1 ;
891
				escaped = esc;
797
	    nl = 0 ;
-
 
798
	    break ;
-
 
799
	}
892
			}
800
	*s = ( character ) c ;
-
 
801
	if ( ++s == se ) {
-
 
802
	    s = extend_buffer ( &token_buff, s ) ;
-
 
803
	    se = token_buff.end ;
-
 
804
	}
893
		}
805
	if ( escaped ) {
894
		if (!escaped)have_char = 1;
806
	    escaped = 0 ;
-
 
807
	} else {
-
 
808
	    if ( c == char_backslash ) escaped = esc ;
-
 
809
	}
895
	}
810
	if ( !escaped ) have_char = 1 ;
-
 
811
    }
-
 
812
    if ( nl ) {
896
	if (nl) {
813
	/* Report newlines in string */
897
		/* Report newlines in string */
814
	report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;
898
		report(loc, ERR_lex_string_nl(nl, nl));
815
    } else {
899
	} else {
816
	/* Error recovery */
900
		/* Error recovery */
817
	if ( e == char_single_quote && have_char ) {
901
		if (e == char_single_quote && have_char) {
818
	    seek_buffer ( crt_buff_no, posn, 1 ) ;
902
			seek_buffer(crt_buff_no, posn, 1);
819
	    crt_loc = loc ;
903
			crt_loc = loc;
820
	    s = token_buff.start ;
904
			s = token_buff.start;
821
	    c = read_char () ;
905
			c = read_char();
822
	    *( s++ ) = ( character ) c ;
906
			*(s++) = (character)c;
823
	    if ( c == char_backslash && esc ) {
907
			if (c == char_backslash && esc) {
824
		c = read_char () ;
908
				c = read_char();
825
		*( s++ ) = ( character ) c ;
909
				*(s++) = (character)c;
826
	    }
910
			}
-
 
911
		}
827
	}
912
	}
828
    }
-
 
829
    token_buff.posn = s ;
913
	token_buff.posn = s;
830
    *s = 0 ;
914
	*s = 0;
831
    return ( lex_eof ) ;
915
	return(lex_eof);
832
}
916
}
833
 
917
 
834
 
918
 
835
/*
919
/*
836
    SKIP A C STYLE COMMENT
920
    SKIP A C STYLE COMMENT
Line 838... Line 922...
838
    This routine skips a C style comment, returning lex_ignore_token if
922
    This routine skips a C style comment, returning lex_ignore_token if
839
    the comment is terminated correctly and lex_eof otherwise.  It is
923
    the comment is terminated correctly and lex_eof otherwise.  It is
840
    entered after the first two characters comprising the comment start
924
    entered after the first two characters comprising the comment start
841
    have been read.  If keep is true then the comment text is built up
925
    have been read.  If keep is true then the comment text is built up
842
    in token_buff, otherwise it is discarded.
926
    in token_buff, otherwise it is discarded.
843
*/
927
*/
844
 
928
 
845
static int skip_comment
929
static int
846
    PROTO_N ( ( keep ) )
-
 
847
    PROTO_T ( int keep )
930
skip_comment(int keep)
848
{
931
{
849
    int c = 0 ;
932
	int c = 0;
850
    int lastc ;
933
	int lastc;
851
    string s, se ;
934
	string s, se;
852
    LOCATION loc ;
935
	LOCATION loc;
853
    update_column () ;
936
	update_column();
854
    loc = crt_loc ;
937
	loc = crt_loc;
855
    if ( keep ) {
938
	if (keep) {
856
	s = token_buff.start ;
939
		s = token_buff.start;
857
	se = token_buff.end ;
940
		se = token_buff.end;
858
    } else {
941
	} else {
859
	s = NULL ;
942
		s = NULL;
860
	se = NULL ;
943
		se = NULL;
861
    }
944
	}
862
    do {
945
	do {
863
	lastc = c ;
946
		lastc = c;
864
	read_label : {
947
read_label:
865
	    /* Inlined version of read_char */
948
		/* Inlined version of read_char */
866
	    c = next_char () ;
949
		c = next_char();
867
	    if ( c == char_end ) c = refill_char () ;
950
		if (c == char_end) {
-
 
951
			c = refill_char();
-
 
952
		}
868
	    if ( c == char_question ) c = adjust_trigraph () ;
953
		if (c == char_question) {
-
 
954
			c = adjust_trigraph();
-
 
955
		}
869
	    if ( c == char_backslash ) {
956
		if (c == char_backslash) {
870
		c = next_char () ;
957
			c = next_char();
871
		if ( c == char_end ) c = refill_char () ;
958
			if (c == char_end) {
-
 
959
				c = refill_char();
-
 
960
			}
872
		if ( c == char_return ) c = read_newline () ;
961
			if (c == char_return) {
-
 
962
				c = read_newline();
-
 
963
			}
873
		if ( c == char_newline ) {
964
			if (c == char_newline) {
874
		    /* Allow for escaped newlines */
965
				/* Allow for escaped newlines */
875
		    crt_loc.line++ ;
966
				crt_loc.line++;
876
		    crt_loc.column = 0 ;
967
				crt_loc.column = 0;
877
		    input_crt = input_posn ;
968
				input_crt = input_posn;
878
		    goto read_label ;
969
				goto read_label;
879
		}
970
			}
880
		unread_char ( c ) ;
971
			unread_char(c);
881
		c = char_backslash ;
972
			c = char_backslash;
882
	    } else if ( c == char_newline ) {
973
		} else if (c == char_newline) {
883
		/* New line characters */
974
			/* New line characters */
884
		crt_loc.line++ ;
975
			crt_loc.line++;
885
		crt_loc.column = 0 ;
976
			crt_loc.column = 0;
886
		input_crt = input_posn ;
977
			input_crt = input_posn;
887
		crt_line_changed = 1 ;
978
			crt_line_changed = 1;
888
		crt_spaces = 0 ;
979
			crt_spaces = 0;
889
	    } else if ( c == char_eof ) {
980
		} else if (c == char_eof) {
890
		/* End of file characters */
981
			/* End of file characters */
891
		report ( loc, ERR_lex_phases_comm_eof () ) ;
982
			report(loc, ERR_lex_phases_comm_eof());
892
		good_eof = 1 ;
983
			good_eof = 1;
893
		if ( s ) {
984
			if (s) {
894
		    token_buff.posn = s ;
985
				token_buff.posn = s;
895
		    *s = 0 ;
986
				*s = 0;
896
		}
987
			}
897
		return ( lex_eof ) ;
988
			return(lex_eof);
898
	    } else if ( c == char_asterix && lastc == char_slash ) {
989
		} else if (c == char_asterix && lastc == char_slash) {
899
		/* Nested comments */
990
			/* Nested comments */
900
		update_column () ;
991
			update_column();
901
		report ( crt_loc, ERR_lex_comment_nest () ) ;
992
			report(crt_loc, ERR_lex_comment_nest());
902
	    }
993
		}
903
	    if ( s ) {
994
		if (s) {
904
		*s = ( character ) c ;
995
			*s = (character)c;
905
		if ( ++s == se ) {
996
			if (++s == se) {
906
		    s = extend_buffer ( &token_buff, s ) ;
997
				s = extend_buffer(&token_buff, s);
907
		    se = token_buff.end ;
998
				se = token_buff.end;
-
 
999
			}
908
		}
1000
		}
-
 
1001
	} while (!END_COMMENT(lastc, c));
-
 
1002
	if (s) {
-
 
1003
		s -= 2;
-
 
1004
		token_buff.posn = s;
909
	    }
1005
		*s = 0;
910
	}
1006
	}
911
    } while ( !END_COMMENT ( lastc, c ) ) ;
-
 
912
    if ( s ) {
-
 
913
	s -= 2 ;
-
 
914
	token_buff.posn = s ;
-
 
915
	*s = 0 ;
-
 
916
    }
-
 
917
    crt_spaces++ ;
1007
	crt_spaces++;
918
    return ( lex_ignore_token ) ;
1008
	return(lex_ignore_token);
919
}
1009
}
920
 
1010
 
921
 
1011
 
922
/*
1012
/*
923
    SKIP A C++ STYLE COMMENT
1013
    SKIP A C++ STYLE COMMENT
Line 928... Line 1018...
928
    have been read.  The next token read after the comment will be the
1018
    have been read.  The next token read after the comment will be the
929
    terminating newline.  If keep is true then the comment text is built
1019
    terminating newline.  If keep is true then the comment text is built
930
    up in token_buff, otherwise it is discarded.
1020
    up in token_buff, otherwise it is discarded.
931
*/
1021
*/
932
 
1022
 
933
static int skip_cpp_comment
1023
static int
934
    PROTO_N ( ( keep ) )
-
 
935
    PROTO_T ( int keep )
1024
skip_cpp_comment(int keep)
936
{
1025
{
937
    int c ;
1026
	int c;
938
    string s, se ;
1027
	string s, se;
939
    if ( keep ) {
1028
	if (keep) {
940
	s = token_buff.start ;
1029
		s = token_buff.start;
941
	se = token_buff.end ;
1030
		se = token_buff.end;
942
    } else {
1031
	} else {
943
	s = NULL ;
1032
		s = NULL;
944
	se = NULL ;
1033
		se = NULL;
945
    }
1034
	}
946
    do {
1035
	do {
947
	read_label : {
1036
read_label:
948
	    /* Inlined version of read_char */
1037
		/* Inlined version of read_char */
949
	    c = next_char () ;
-
 
950
	    if ( c == char_end ) c = refill_char () ;
-
 
951
	    if ( c == char_question ) c = adjust_trigraph () ;
-
 
952
	    if ( c == char_backslash ) {
-
 
953
		c = next_char () ;
1038
		c = next_char();
954
		if ( c == char_end ) c = refill_char () ;
-
 
955
		if ( c == char_return ) c = read_newline () ;
-
 
956
		if ( c == char_newline ) {
1039
		if (c == char_end) {
957
		    /* Allow for escaped newlines */
-
 
958
		    crt_loc.line++ ;
-
 
959
		    crt_loc.column = 0 ;
-
 
960
		    input_crt = input_posn ;
-
 
961
		    goto read_label ;
1040
			c = refill_char();
962
		}
1041
		}
963
		unread_char ( c ) ;
-
 
964
		c = char_backslash ;
1042
		if (c == char_question) {
965
	    } else if ( c == char_eof ) {
-
 
966
		/* End of file characters */
-
 
967
		update_column () ;
1043
			c = adjust_trigraph();
968
		report ( crt_loc, ERR_lex_phases_comm_eof () ) ;
-
 
969
		good_eof = 1 ;
-
 
970
		if ( s ) {
-
 
971
		    token_buff.posn = s ;
-
 
972
		    *s = 0 ;
-
 
973
		}
1044
		}
-
 
1045
		if (c == char_backslash) {
-
 
1046
			c = next_char();
-
 
1047
			if (c == char_end) {
-
 
1048
				c = refill_char();
-
 
1049
			}
-
 
1050
			if (c == char_return) {
-
 
1051
				c = read_newline();
-
 
1052
			}
-
 
1053
			if (c == char_newline) {
-
 
1054
				/* Allow for escaped newlines */
-
 
1055
				crt_loc.line++;
-
 
1056
				crt_loc.column = 0;
-
 
1057
				input_crt = input_posn;
-
 
1058
				goto read_label;
-
 
1059
			}
-
 
1060
			unread_char(c);
-
 
1061
			c = char_backslash;
-
 
1062
		} else if (c == char_eof) {
-
 
1063
			/* End of file characters */
-
 
1064
			update_column();
-
 
1065
			report(crt_loc, ERR_lex_phases_comm_eof());
-
 
1066
			good_eof = 1;
-
 
1067
			if (s) {
-
 
1068
				token_buff.posn = s;
-
 
1069
				*s = 0;
-
 
1070
			}
974
		return ( lex_eof ) ;
1071
			return(lex_eof);
975
	    }
1072
		}
976
	    if ( s ) {
1073
		if (s) {
977
		*s = ( character ) c ;
1074
			*s = (character)c;
978
		if ( ++s == se ) {
1075
			if (++s == se) {
979
		    s = extend_buffer ( &token_buff, s ) ;
1076
				s = extend_buffer(&token_buff, s);
980
		    se = token_buff.end ;
1077
				se = token_buff.end;
-
 
1078
			}
981
		}
1079
		}
-
 
1080
	} while (!END_CPP_COMMENT(c));
-
 
1081
	unread_char(c);
-
 
1082
	if (s) {
982
	    }
1083
		s -= 1;
-
 
1084
		token_buff.posn = s;
-
 
1085
		*s = 0;
983
	}
1086
	}
984
    } while ( !END_CPP_COMMENT ( c ) ) ;
-
 
985
    unread_char ( c ) ;
-
 
986
    if ( s ) {
-
 
987
	s -= 1 ;
-
 
988
	token_buff.posn = s ;
-
 
989
	*s = 0 ;
-
 
990
    }
-
 
991
    crt_line_changed = 1 ;
1087
	crt_line_changed = 1;
992
    crt_spaces = 0 ;
1088
	crt_spaces = 0;
993
    return ( lex_ignore_token ) ;
1089
	return(lex_ignore_token);
994
}
1090
}
995
 
1091
 
996
 
1092
 
997
/*
1093
/*
998
    SKIP WHITE-SPACE CHARACTERS
1094
    SKIP WHITE-SPACE CHARACTERS
Line 1010... Line 1106...
1010
    routine is that all non-empty sequences of white-space characters other
1106
    routine is that all non-empty sequences of white-space characters other
1011
    than newlines are treated as if they were a single space (the C/C++
1107
    than newlines are treated as if they were a single space (the C/C++
1012
    specification says that this is implementation-defined).
1108
    specification says that this is implementation-defined).
1013
*/
1109
*/
1014
 
1110
 
1015
unsigned long skip_white
1111
unsigned long
1016
    PROTO_N ( ( nl ) )
-
 
1017
    PROTO_T ( int nl )
1112
skip_white(int nl)
1018
{
1113
{
1019
    int c ;
1114
	int c;
1020
    unsigned long sp = 0 ;
1115
	unsigned long sp = 0;
1021
    for ( ; ; ) {
1116
	for (;;) {
1022
	c = next_char () ;
1117
		c = next_char();
1023
	if ( c == char_end ) c = refill_char () ;
1118
		if (c == char_end) {
-
 
1119
			c = refill_char();
-
 
1120
		}
1024
	if ( c == char_return ) c = read_newline () ;
1121
		if (c == char_return) {
-
 
1122
			c = read_newline();
-
 
1123
		}
1025
	if ( c == char_sub ) c = read_eof () ;
1124
		if (c == char_sub) {
-
 
1125
			c = read_eof();
-
 
1126
		}
1026
	if ( c == char_newline ) {
1127
		if (c == char_newline) {
1027
	    /* Deal with newline characters */
1128
			/* Deal with newline characters */
1028
	    if ( !nl ) break ;
1129
			if (!nl) {
-
 
1130
				break;
-
 
1131
			}
1029
	    sp = WHITE_NEWLINE ;
1132
			sp = WHITE_NEWLINE;
1030
	    crt_loc.line++ ;
1133
			crt_loc.line++;
1031
	    crt_loc.column = 0 ;
1134
			crt_loc.column = 0;
1032
	    input_crt = input_posn ;
1135
			input_crt = input_posn;
1033
	    crt_line_changed = 1 ;
1136
			crt_line_changed = 1;
1034
	    crt_spaces = 0 ;
1137
			crt_spaces = 0;
1035
	} else if ( c == char_space ) {
1138
		} else if (c == char_space) {
1036
	    /* Deal with simple spaces */
1139
			/* Deal with simple spaces */
1037
	    sp |= WHITE_SPACE ;
1140
			sp |= WHITE_SPACE;
1038
	    crt_spaces++ ;
1141
			crt_spaces++;
1039
	} else if ( c == char_tab ) {
1142
		} else if (c == char_tab) {
1040
	    /* Deal with tab characters */
1143
			/* Deal with tab characters */
1041
	    unsigned long tab = tab_width ;
1144
			unsigned long tab = tab_width;
1042
	    sp |= WHITE_SPACE ;
1145
			sp |= WHITE_SPACE;
1043
	    crt_spaces = tab * ( crt_spaces / tab + 1 ) ;
1146
			crt_spaces = tab *(crt_spaces / tab + 1);
1044
	} else if ( c == char_eof ) {
1147
		} else if (c == char_eof) {
1045
	    /* End of file */
1148
			/* End of file */
1046
	    if ( sp == WHITE_NEWLINE ) good_eof = 1 ;
1149
			if (sp == WHITE_NEWLINE) {
-
 
1150
				good_eof = 1;
-
 
1151
			}
1047
	    break ;
1152
			break;
1048
	} else {
1153
		} else {
1049
	    int t ;
1154
			int t;
1050
#if FS_EXTENDED_CHAR
1155
#if FS_EXTENDED_CHAR
1051
	    if ( IS_EXTENDED ( c ) ) break ;
1156
			if (IS_EXTENDED(c)) {
-
 
1157
				break;
-
 
1158
			}
1052
#endif
1159
#endif
1053
	    t = lookup_char ( c ) ;
1160
			t = lookup_char(c);
1054
	    if ( is_white ( t ) ) {
1161
			if (is_white(t)) {
1055
		/* Deal with other white space characters */
1162
				/* Deal with other white space characters */
1056
		sp |= WHITE_SPACE ;
1163
				sp |= WHITE_SPACE;
1057
		crt_spaces++ ;
1164
				crt_spaces++;
1058
	    } else {
1165
			} else {
1059
		if ( c == char_question ) c = adjust_trigraph () ;
1166
				if (c == char_question)c = adjust_trigraph();
1060
		if ( c == char_slash ) {
1167
				if (c == char_slash) {
1061
		    /* Deal with comments */
1168
					/* Deal with comments */
1062
		    int b = read_char () ;
1169
					int b = read_char();
1063
		    if ( START_COMMENT ( b ) ) {
1170
					if (START_COMMENT(b)) {
1064
			sp |= WHITE_SPACE ;
1171
						sp |= WHITE_SPACE;
1065
			b = skip_comment ( 0 ) ;
1172
						b = skip_comment(0);
1066
			if ( b == lex_eof ) return ( sp ) ;
1173
						if (b == lex_eof)  {
-
 
1174
							return(sp);
-
 
1175
						}
1067
		    } else if ( START_CPP_COMMENT ( b ) ) {
1176
					} else if (START_CPP_COMMENT(b)) {
1068
			sp |= WHITE_SPACE ;
1177
						sp |= WHITE_SPACE;
1069
			b = skip_cpp_comment ( 0 ) ;
1178
						b = skip_cpp_comment(0);
1070
			if ( b == lex_eof ) return ( sp ) ;
1179
						if (b == lex_eof) {
-
 
1180
							return(sp);
-
 
1181
						}
-
 
1182
						if (!nl) {
1071
			if ( !nl ) return ( sp ) ;
1183
							return(sp);
-
 
1184
						}
1072
		    } else {
1185
					} else {
1073
			unread_char ( b ) ;
1186
						unread_char(b);
1074
			break ;
1187
						break;
1075
		    }
1188
					}
1076
		} else if ( c == char_backslash ) {
1189
				} else if (c == char_backslash) {
1077
		    /* Deal with escaped newlines */
1190
					/* Deal with escaped newlines */
1078
		    int b = next_char () ;
1191
					int b = next_char();
1079
		    if ( b == char_end ) b = refill_char () ;
1192
					if (b == char_end) {
-
 
1193
						b = refill_char();
-
 
1194
					}
1080
		    if ( b == char_return ) b = read_newline () ;
1195
					if (b == char_return) {
-
 
1196
						b = read_newline();
-
 
1197
					}
1081
		    if ( b == char_newline ) {
1198
					if (b == char_newline) {
1082
			crt_loc.line++ ;
1199
						crt_loc.line++;
1083
			crt_loc.column = 0 ;
1200
						crt_loc.column = 0;
1084
			input_crt = input_posn ;
1201
						input_crt = input_posn;
1085
		    } else {
1202
					} else {
1086
			unread_char ( b ) ;
1203
						unread_char(b);
1087
			break ;
1204
						break;
1088
		    }
1205
					}
1089
		    sp |= WHITE_ESC_NEWLINE ;
1206
					sp |= WHITE_ESC_NEWLINE;
1090
		} else {
1207
				} else {
1091
		    break ;
1208
					break;
1092
		}
1209
				}
1093
	    }
1210
			}
1094
	}
1211
		}
1095
    }
1212
	}
1096
    unread_char ( c ) ;
1213
	unread_char(c);
1097
    return ( sp ) ;
1214
	return(sp);
1098
}
1215
}
1099
 
1216
 
1100
 
1217
 
1101
/*
1218
/*
1102
    PATCH UP WHITE-SPACE CHARACTERS
1219
    PATCH UP WHITE-SPACE CHARACTERS
1103
 
1220
 
1104
    Calling skip_white ( 1 ) can mess up the parser as regards spotting
1221
    Calling skip_white ( 1 ) can mess up the parser as regards spotting
1105
    preprocessing directives and valid end of file markers.  This routine
1222
    preprocessing directives and valid end of file markers.  This routine
1106
    may be called with the return value of skip_white as an argument to
1223
    may be called with the return value of skip_white as an argument to
1107
    patch up the buffer in order to get the parser back into the right
1224
    patch up the buffer in order to get the parser back into the right
1108
    state.
1225
    state.
1109
*/
1226
*/
1110
 
1227
 
1111
void patch_white
1228
void
1112
    PROTO_N ( ( sp ) )
-
 
1113
    PROTO_T ( unsigned long sp )
1229
patch_white(unsigned long sp)
1114
{
1230
{
1115
    if ( sp & WHITE_NEWLINE ) {
1231
	if (sp & WHITE_NEWLINE) {
1116
	if ( sp & WHITE_SPACE ) {
1232
		if (sp & WHITE_SPACE) {
1117
	    /* Patch in a space after a newline */
1233
			/* Patch in a space after a newline */
1118
	    unsigned long n ;
1234
			unsigned long n;
1119
	    update_column () ;
1235
			update_column();
1120
	    n = crt_loc.column ;
1236
			n = crt_loc.column;
1121
	    while ( n ) {
1237
			while (n) {
1122
		unread_char ( char_space ) ;
1238
				unread_char(char_space);
1123
		if ( input_posn <= input_start ) break ;
1239
				if (input_posn <= input_start) {
-
 
1240
					break;
-
 
1241
				}
1124
		n-- ;
1242
				n--;
1125
	    }
1243
			}
1126
	} else if ( sp & WHITE_ESC_NEWLINE ) {
1244
		} else if (sp & WHITE_ESC_NEWLINE) {
1127
	    /* Patch in an escaped newline after a newline */
1245
			/* Patch in an escaped newline after a newline */
1128
	    unread_char ( char_backslash ) ;
1246
			unread_char(char_backslash);
1129
	    unread_char ( char_newline ) ;
1247
			unread_char(char_newline);
1130
	    crt_loc.line-- ;
1248
			crt_loc.line--;
1131
	}
1249
		}
1132
	/* Patch in a newline */
1250
		/* Patch in a newline */
1133
	unread_char ( char_newline ) ;
1251
		unread_char(char_newline);
1134
	crt_loc.line-- ;
1252
		crt_loc.line--;
1135
	crt_loc.column = 0 ;
1253
		crt_loc.column = 0;
1136
	crt_spaces = 0 ;
1254
		crt_spaces = 0;
1137
    }
1255
	}
1138
    return ;
1256
	return;
1139
}
1257
}
1140
 
1258
 
1141
 
1259
 
1142
/*
1260
/*
1143
    SKIP TO END OF LINE
1261
    SKIP TO END OF LINE
1144
 
1262
 
1145
    This routine skips to the end of the current line.  It returns 0 if
1263
    This routine skips to the end of the current line.  It returns 0 if
1146
    only white-space characters are encountered.  It uses skip_white to
1264
    only white-space characters are encountered.  It uses skip_white to
1147
    jump over white-space (including comments).
1265
    jump over white-space (including comments).
1148
*/
1266
*/
1149
 
1267
 
1150
int skip_to_end
1268
int
1151
    PROTO_Z ()
1269
skip_to_end(void)
1152
{
1270
{
1153
    int c ;
1271
	int c;
1154
    int res = 0 ;
1272
	int res = 0;
1155
    in_preproc_dir = 0 ;
1273
	in_preproc_dir = 0;
1156
    for ( ; ; ) {
1274
	for (;;) {
1157
	IGNORE skip_white ( 0 ) ;
1275
		IGNORE skip_white(0);
1158
	read_label : {
1276
read_label:
1159
	    /* Inlined version of read_char */
1277
		/* Inlined version of read_char */
1160
	    c = next_char () ;
1278
		c = next_char();
1161
	    if ( c == char_end ) c = refill_char () ;
1279
		if (c == char_end) {
-
 
1280
			c = refill_char();
-
 
1281
		}
1162
	    if ( c == char_question ) c = adjust_trigraph () ;
1282
		if (c == char_question) {
-
 
1283
			c = adjust_trigraph();
-
 
1284
		}
1163
	    if ( c == char_backslash ) {
1285
		if (c == char_backslash) {
1164
		c = next_char () ;
1286
			c = next_char();
1165
		if ( c == char_end ) c = refill_char () ;
1287
			if (c == char_end) {
-
 
1288
				c = refill_char();
-
 
1289
			}
1166
		if ( c == char_return ) c = read_newline () ;
1290
			if (c == char_return) {
-
 
1291
				c = read_newline();
-
 
1292
			}
1167
		if ( c == char_newline ) {
1293
			if (c == char_newline) {
1168
		    /* Allow for escaped newlines */
1294
				/* Allow for escaped newlines */
1169
		    crt_loc.line++ ;
1295
				crt_loc.line++;
1170
		    crt_loc.column = 0 ;
1296
				crt_loc.column = 0;
1171
		    input_crt = input_posn ;
1297
				input_crt = input_posn;
1172
		    goto read_label ;
1298
				goto read_label;
-
 
1299
			}
-
 
1300
			unread_char(c);
-
 
1301
		} else if (c == char_newline) {
-
 
1302
			/* New line characters */
-
 
1303
			crt_loc.line++;
-
 
1304
			crt_loc.column = 0;
-
 
1305
			input_crt = input_posn;
-
 
1306
			crt_line_changed = 1;
-
 
1307
			crt_spaces = 0;
-
 
1308
			return(res);
-
 
1309
		} else if (START_STRING(c)) {
-
 
1310
			/* String literals */
-
 
1311
			res = 1;
-
 
1312
			c = skip_string(c);
-
 
1313
			if (c == lex_eof) {
-
 
1314
				return(res);
-
 
1315
			}
-
 
1316
		} else if (c == char_eof) {
-
 
1317
			/* End of file characters */
-
 
1318
			break;
-
 
1319
		} else {
-
 
1320
			res = 1;
1173
		}
1321
		}
1174
		unread_char ( c ) ;
-
 
1175
	    } else if ( c == char_newline ) {
-
 
1176
		/* New line characters */
-
 
1177
		crt_loc.line++ ;
-
 
1178
		crt_loc.column = 0 ;
-
 
1179
		input_crt = input_posn ;
-
 
1180
		crt_line_changed = 1 ;
-
 
1181
		crt_spaces = 0 ;
-
 
1182
		return ( res ) ;
-
 
1183
	    } else if ( START_STRING ( c ) ) {
-
 
1184
		/* String literals */
-
 
1185
		res = 1 ;
-
 
1186
		c = skip_string ( c ) ;
-
 
1187
		if ( c == lex_eof ) return ( res ) ;
-
 
1188
	    } else if ( c == char_eof ) {
-
 
1189
		/* End of file characters */
-
 
1190
		break ;
-
 
1191
	    } else {
-
 
1192
		res = 1 ;
-
 
1193
	    }
-
 
1194
	}
1322
	}
1195
    }
-
 
1196
    update_column () ;
1323
	update_column();
1197
    report ( crt_loc, ERR_lex_phases_eof () ) ;
1324
	report(crt_loc, ERR_lex_phases_eof());
1198
    good_eof = 1 ;
1325
	good_eof = 1;
1199
    return ( res ) ;
1326
	return(res);
1200
}
1327
}
1201
 
1328
 
1202
 
1329
 
1203
/*
1330
/*
1204
    READ A UNICODE CHARACTER
1331
    READ A UNICODE CHARACTER
1205
 
1332
 
1206
    This routine reads a unicode character.  It is entered after the
1333
    This routine reads a unicode character.  It is entered after the
1207
    initial backslash and the following character, c, have been read.
1334
    initial backslash and the following character, c, have been read.
1208
    It assigns the character type to pc and returns the character code.
1335
    It assigns the character type to pc and returns the character code.
1209
*/
1336
*/
1210
 
1337
 
1211
static unsigned long read_unicode
1338
static unsigned long
1212
    PROTO_N ( ( c, pc ) )
-
 
1213
    PROTO_T ( int c X int *pc )
1339
read_unicode(int c, int *pc)
1214
{
1340
{
1215
    unsigned i, n ;
1341
	unsigned i, n;
1216
    unsigned long u ;
1342
	unsigned long u;
1217
    character s [10] ;
1343
	character s[10];
1218
    ERROR err = NULL_err ;
1344
	ERROR err = NULL_err;
1219
    string p = s ;
1345
	string p = s;
1220
    if ( c == char_u && allow_unicodes ) {
1346
	if (c == char_u && allow_unicodes) {
1221
	/* Read '\uxxxx' */
1347
		/* Read '\uxxxx' */
1222
	*pc = CHAR_UNI4 ;
1348
		*pc = CHAR_UNI4;
1223
	n = 4 ;
1349
		n = 4;
1224
    } else if ( c == char_U && allow_unicodes ) {
1350
	} else if (c == char_U && allow_unicodes) {
1225
	/* Read '\Uxxxxxxxx' */
1351
		/* Read '\Uxxxxxxxx' */
1226
	*pc = CHAR_UNI8 ;
1352
		*pc = CHAR_UNI8;
1227
	n = 8 ;
1353
		n = 8;
1228
    } else {
1354
	} else {
1229
	unread_char ( c ) ;
1355
		unread_char(c);
1230
	*pc = CHAR_NONE ;
1356
		*pc = CHAR_NONE;
1231
	return ( 0 ) ;
1357
		return(0);
1232
    }
-
 
1233
    for ( i = 0 ; i < n ; i++ ) {
-
 
1234
	int t ;
-
 
1235
	int d = read_char () ;
-
 
1236
	if ( d == char_eof ) break ;
-
 
1237
#if FS_EXTENDED_CHAR
-
 
1238
	if ( IS_EXTENDED ( d ) ) {
-
 
1239
	    unread_char ( d ) ;
-
 
1240
	    break ;
-
 
1241
	}
1358
	}
-
 
1359
	for (i = 0; i < n; i++) {
-
 
1360
		int t;
-
 
1361
		int d = read_char();
-
 
1362
		if (d == char_eof) {
-
 
1363
			break;
-
 
1364
		}
-
 
1365
#if FS_EXTENDED_CHAR
-
 
1366
		if (IS_EXTENDED(d)) {
-
 
1367
			unread_char(d);
-
 
1368
			break;
-
 
1369
		}
1242
#endif
1370
#endif
1243
	t = lookup_char ( d ) ;
1371
		t = lookup_char(d);
1244
	if ( !is_alphanum ( t ) ) {
1372
		if (!is_alphanum(t)) {
1245
	    unread_char ( d ) ;
1373
			unread_char(d);
1246
	    break ;
1374
			break;
-
 
1375
		}
-
 
1376
		s[i] = (character)d;
-
 
1377
	}
-
 
1378
	s[i] = 0;
-
 
1379
	u = eval_unicode(c, n, pc, &p, &err);
-
 
1380
	if (!IS_NULL_err(err)) {
-
 
1381
		update_column();
-
 
1382
		report(crt_loc, err);
1247
	}
1383
	}
1248
	s [i] = ( character ) d ;
-
 
1249
    }
-
 
1250
    s [i] = 0 ;
-
 
1251
    u = eval_unicode ( c, n, pc, &p, &err ) ;
-
 
1252
    if ( !IS_NULL_err ( err ) ) {
-
 
1253
	update_column () ;
-
 
1254
	report ( crt_loc, err ) ;
-
 
1255
    }
-
 
1256
    return ( u ) ;
1384
	return(u);
1257
}
1385
}
1258
 
1386
 
1259
 
1387
 
1260
/*
1388
/*
1261
    READ AN EXTENDED IDENTIFIER
1389
    READ AN EXTENDED IDENTIFIER
1262
 
1390
 
1263
    This routine reads an extended identifier name (one including a unicode
1391
    This routine reads an extended identifier name (one including a unicode
1264
    character).  It is entered after reading the simple characters in the
1392
    character).  It is entered after reading the simple characters in the
1265
    token buffer plus the unicode character given by u and ch.
1393
    token buffer plus the unicode character given by u and ch.
1266
*/
1394
*/
1267
 
1395
 
1268
static HASHID read_extended_id
1396
static HASHID
1269
    PROTO_N ( ( u, ch ) )
-
 
1270
    PROTO_T ( unsigned long u X int ch )
1397
read_extended_id(unsigned long u, int ch)
1271
{
1398
{
1272
    string s ;
1399
	string s;
1273
    int c, t ;
1400
	int c, t;
1274
    HASHID nm ;
1401
	HASHID nm;
1275
    unsigned long h ;
1402
	unsigned long h;
1276
    BUFFER *bf = &token_buff ;
1403
	BUFFER *bf = &token_buff;
1277
    do {
1404
	do {
1278
	if ( !unicode_alpha ( u ) ) {
1405
		if (!unicode_alpha(u)) {
1279
	    /* Report illegal identifiers */
1406
			/* Report illegal identifiers */
1280
	    update_column () ;
1407
			update_column();
1281
	    report ( crt_loc, ERR_lex_name_extendid ( u ) ) ;
1408
			report(crt_loc, ERR_lex_name_extendid(u));
1282
	}
1409
		}
1283
	print_char ( u, ch, 0, bf ) ;
1410
		print_char(u, ch, 0, bf);
1284
	for ( ; ; ) {
1411
		for (;;) {
1285
	    c = read_char () ;
1412
			c = read_char();
1286
#if FS_EXTENDED_CHAR
1413
#if FS_EXTENDED_CHAR
1287
	    if ( IS_EXTENDED ( c ) ) break ;
1414
			if (IS_EXTENDED(c)) {
-
 
1415
				break;
-
 
1416
			}
1288
#endif
1417
#endif
1289
	    t = lookup_char ( c ) ;
1418
			t = lookup_char(c);
1290
	    if ( !is_alphanum ( t ) ) break ;
1419
			if (!is_alphanum(t)) {
-
 
1420
				break;
-
 
1421
			}
1291
	    bfputc ( bf, c ) ;
1422
			bfputc(bf, c);
1292
	}
1423
		}
1293
	ch = CHAR_NONE ;
1424
		ch = CHAR_NONE;
1294
	if ( c == char_backslash ) {
1425
		if (c == char_backslash) {
1295
	    int nextc = read_char () ;
1426
			int nextc = read_char();
1296
	    u = read_unicode ( nextc, &ch ) ;
1427
			u = read_unicode(nextc, &ch);
1297
	}
1428
		}
1298
    } while ( ch != CHAR_NONE ) ;
1429
	} while (ch != CHAR_NONE);
1299
    unread_char ( c ) ;
1430
	unread_char(c);
1300
    bfputc ( bf, 0 ) ;
1431
	bfputc(bf, 0);
1301
    s = bf->start ;
1432
	s = bf->start;
1302
    h = hash ( s ) ;
1433
	h = hash(s);
1303
    nm = lookup_name ( s, h, 1, lex_unknown ) ;
1434
	nm = lookup_name(s, h, 1, lex_unknown);
1304
    return ( nm ) ;
1435
	return(nm);
1305
}
1436
}
1306
 
1437
 
1307
 
1438
 
1308
/*
1439
/*
1309
    HASH VALUE FOR IDENTIFIERS
1440
    HASH VALUE FOR IDENTIFIERS
Line 1313... Line 1444...
1313
    the hash value needs to be kept in step with the routine hash (it
1444
    the hash value needs to be kept in step with the routine hash (it
1314
    is checked by an assertion in lookup_name, so any errors should be
1445
    is checked by an assertion in lookup_name, so any errors should be
1315
    caught quickly if in debug mode).
1446
    caught quickly if in debug mode).
1316
*/
1447
*/
1317
 
1448
 
1318
HASHID token_hashid = NULL_hashid ;
1449
HASHID token_hashid = NULL_hashid;
1319
 
1450
 
1320
 
1451
 
1321
/*
1452
/*
1322
    MAIN PASS ANALYSER
1453
    MAIN PASS ANALYSER
1323
 
1454
 
1324
    This routine reads the next preprocessing token from the input file.
1455
    This routine reads the next preprocessing token from the input file.
1325
    It is designed for speed rather than elegance, hence the rather
1456
    It is designed for speed rather than elegance, hence the rather
1326
    indiscriminate use of labels.  Trigraphs and escaped newlines
1457
    indiscriminate use of labels.  Trigraphs and escaped newlines
1327
    involving the first character are processed by hand.  This routine
1458
    involving the first character are processed by hand.  This routine
1328
    corresponds to phase 3 of the phases of translation.  The position
1459
    corresponds to phase 3 of the phases of translation.  The position
1329
    within the line is tracked by column - this is zero at the start of
1460
    within the line is tracked by column - this is zero at the start of
1330
    a line, positive if only white space has been read and negative
1461
    a line, positive if only white space has been read and negative
1331
    otherwise.  preproc keeps track of the last preprocessing directive.
1462
    otherwise.  preproc keeps track of the last preprocessing directive.
1332
*/
1463
*/
1333
 
1464
 
1334
int read_token
1465
int
1335
    PROTO_Z ()
1466
read_token(void)
1336
{
1467
{
1337
    int c, t ;
1468
	int c, t;
1338
    int column = -1 ;
1469
	int column = -1;
1339
    int preproc = lex_ignore_token ;
1470
	int preproc = lex_ignore_token;
1340
 
1471
 
1341
    /* Read the next character */
1472
	/* Read the next character */
1342
    start_label : {
1473
start_label:
1343
	c = next_char () ;
1474
	c = next_char();
1344
	if ( c == char_end ) c = refill_char () ;
1475
	if (c == char_end)c = refill_char();
1345
	restart_label : {
1476
restart_label:
1346
#if FS_EXTENDED_CHAR
1477
#if FS_EXTENDED_CHAR
1347
	    if ( IS_EXTENDED ( c ) ) {
1478
	if (IS_EXTENDED(c)) {
1348
		goto unknown_label ;
1479
		goto unknown_label;
1349
	    }
-
 
1350
#endif
-
 
1351
	    t = lookup_char ( c ) ;
-
 
1352
	    if ( is_white ( t ) ) {
-
 
1353
		crt_spaces++ ;
-
 
1354
		goto start_label ;
-
 
1355
	    }
-
 
1356
	}
-
 
1357
	process_label : {
-
 
1358
	    /* Process the next character */
-
 
1359
	}
1480
	}
1360
    }
-
 
1361
 
-
 
1362
    /* Check symbols and punctuation */
-
 
1363
    if ( is_symbol ( t ) ) {
-
 
1364
	switch ( c ) {
-
 
1365
 
-
 
1366
	    case char_question : {
-
 
1367
		/* Deal with '?' and trigraphs */
-
 
1368
		c = adjust_trigraph () ;
-
 
1369
		if ( c == char_question ) return ( lex_question ) ;
-
 
1370
		goto restart_label ;
-
 
1371
	    }
1481
#endif
1372
 
-
 
1373
	    case char_backslash : {
-
 
1374
		/* Deal with escaped newlines */
-
 
1375
		unsigned long u ;
-
 
1376
		int ch = CHAR_NONE ;
-
 
1377
		int nextc = next_char () ;
1482
	t = lookup_char(c);
1378
		if ( nextc == char_end ) nextc = refill_char () ;
-
 
1379
		if ( nextc == char_return ) nextc = read_newline () ;
-
 
1380
		if ( nextc == char_newline ) {
1483
	if (is_white(t)) {
1381
		    crt_loc.line++ ;
1484
		crt_spaces++;
1382
		    crt_loc.column = 0 ;
-
 
1383
		    input_crt = input_posn ;
-
 
1384
		    if ( column == 0 ) column = 1 ;
-
 
1385
		    goto start_label ;
1485
		goto start_label;
1386
		}
-
 
1387
 
-
 
1388
		/* Check for unicode characters */
-
 
1389
		u = read_unicode ( nextc, &ch ) ;
-
 
1390
		if ( ch != CHAR_NONE ) {
-
 
1391
		    token_buff.posn = token_buff.start ;
-
 
1392
		    token_hashid = read_extended_id ( u, ch ) ;
-
 
1393
		    return ( lex_identifier ) ;
-
 
1394
		}
-
 
1395
		return ( lex_backslash ) ;
-
 
1396
	    }
-
 
1397
 
-
 
1398
	    case char_hash : {
-
 
1399
		/* Deal with '#' and '##' */
-
 
1400
		c = read_char () ;
-
 
1401
		if ( c == char_hash ) return ( lex_hash_Hhash_H1 ) ;
-
 
1402
		unread_char ( c ) ;
-
 
1403
 
-
 
1404
		/* Return with '#' if not at start of line */
-
 
1405
		if ( column < 0 || no_preproc_dir ) {
-
 
1406
		    return ( lex_hash_H1 ) ;
-
 
1407
		}
1486
	}
1408
 
-
 
1409
		/* Deal with preprocessing directives */
-
 
1410
		preproc_label : {
1487
process_label:
1411
		    unsigned long sp = skip_white ( 0 ) ;
-
 
1412
		    update_column () ;
-
 
1413
		    if ( column ) report ( crt_loc, ERR_cpp_indent () ) ;
-
 
1414
		    if ( sp & ( WHITE_SPACE | WHITE_ESC_NEWLINE ) ) {
-
 
1415
			report ( preproc_loc, ERR_cpp_indent_dir () ) ;
-
 
1416
		    }
-
 
1417
		    preproc = read_preproc_dir ( 1, preproc ) ;
-
 
1418
		    if ( preproc < 0 ) goto start_line_label ;
-
 
1419
		    unread_char ( char_newline ) ;
1488
	/* Process the next character */
1420
		    crt_loc.line-- ;
-
 
1421
		    crt_loc.column = 0 ;
-
 
1422
		    return ( preproc ) ;
-
 
1423
		}
-
 
1424
	    }
-
 
1425
 
1489
 
1426
	    case char_percent : {
-
 
1427
		/* Deal with '%', '%=', '%>', '%:' and '%:%:' */
-
 
1428
		c = read_char () ;
-
 
1429
		if ( c == char_equal ) return ( lex_rem_Heq ) ;
-
 
1430
		if ( c == char_greater && allow_digraphs ) {
-
 
1431
		    return ( lex_close_Hbrace_H2 ) ;
-
 
1432
		}
-
 
1433
		if ( c == char_colon && allow_digraphs ) {
-
 
1434
		    /* Check for '%:' and '%:%:' */
1490
	/* Check symbols and punctuation */
1435
		    c = read_char () ;
-
 
1436
		    if ( c == char_percent ) {
-
 
1437
			int nextc = read_char () ;
-
 
1438
			if ( nextc == char_colon ) {
1491
	if (is_symbol(t)) {
1439
			    return ( lex_hash_Hhash_H2 ) ;
-
 
1440
			}
-
 
1441
			unread_char ( nextc ) ;
-
 
1442
		    }
-
 
1443
		    unread_char ( c ) ;
-
 
1444
 
-
 
1445
		    /* Return with '%:' if not at start of line */
-
 
1446
		    if ( column < 0 || no_preproc_dir ) {
-
 
1447
			return ( lex_hash_H2 ) ;
-
 
1448
		    }
-
 
1449
 
-
 
1450
		    /* Otherwise this is a preprocessing directive */
-
 
1451
		    IGNORE get_digraph ( lex_hash_H2 ) ;
-
 
1452
		    goto preproc_label ;
-
 
1453
		}
-
 
1454
		unread_char ( c ) ;
1492
		switch (c) {
1455
		return ( lex_rem ) ;
-
 
1456
	    }
-
 
1457
 
1493
 
1458
	    case char_quote : {
1494
		case char_question: {
1459
		/* Deal with string literals */
-
 
1460
		IGNORE read_string ( c, 1 ) ;
-
 
1461
		return ( lex_string_Hlit ) ;
-
 
1462
	    }
-
 
1463
 
-
 
1464
	    case char_single_quote : {
-
 
1465
		/* Deal with character literals */
-
 
1466
		IGNORE read_string ( c, 1 ) ;
-
 
1467
		return ( lex_char_Hlit ) ;
-
 
1468
	    }
-
 
1469
 
-
 
1470
	    case char_exclaim : {
-
 
1471
		/* Deal with '!' and '!=' */
1495
			/* Deal with '?' and trigraphs */
1472
		c = read_char () ;
1496
			c = adjust_trigraph();
1473
		if ( c == char_equal ) return ( lex_not_Heq_H1 ) ;
1497
			if (c == char_question) return(lex_question);
1474
		unread_char ( c ) ;
-
 
1475
		return ( lex_not_H1 ) ;
1498
			goto restart_label;
1476
	    }
1499
		}
1477
 
1500
 
1478
	    case char_ampersand : {
1501
		case char_backslash: {
1479
		/* Deal with '&', '&&' and '&=' */
1502
			/* Deal with escaped newlines */
-
 
1503
			unsigned long u;
-
 
1504
			int ch = CHAR_NONE;
1480
		c = read_char () ;
1505
			int nextc = next_char();
1481
		if ( c == char_ampersand ) return ( lex_logical_Hand_H1 ) ;
1506
			if (nextc == char_end)nextc = refill_char();
1482
		if ( c == char_equal ) return ( lex_and_Heq_H1 ) ;
1507
			if (nextc == char_return)nextc = read_newline();
-
 
1508
			if (nextc == char_newline) {
-
 
1509
				crt_loc.line++;
1483
		unread_char ( c ) ;
1510
				crt_loc.column = 0;
-
 
1511
				input_crt = input_posn;
-
 
1512
				if (column == 0)column = 1;
1484
		return ( lex_and_H1 ) ;
1513
				goto start_label;
1485
	    }
1514
			}
1486
 
1515
 
1487
	    case char_asterix : {
1516
			/* Check for unicode characters */
1488
		/* Deal with '*' and '*=' */
1517
			u = read_unicode(nextc, &ch);
1489
		c = read_char () ;
1518
			if (ch != CHAR_NONE) {
1490
		if ( c == char_equal ) return ( lex_star_Heq ) ;
1519
				token_buff.posn = token_buff.start;
1491
		unread_char ( c ) ;
1520
				token_hashid = read_extended_id(u, ch);
1492
		return ( lex_star ) ;
1521
				return(lex_identifier);
1493
	    }
1522
			}
1494
 
-
 
1495
	    case char_plus : {
-
 
1496
		/* Deal with '+', '++' and '+=' */
-
 
1497
		c = read_char () ;
-
 
1498
		if ( c == char_plus ) return ( lex_plus_Hplus ) ;
-
 
1499
		if ( c == char_equal ) return ( lex_plus_Heq ) ;
-
 
1500
		if ( c == char_question && allow_extra_symbols ) {
-
 
1501
		    return ( lex_abs ) ;
1523
			return(lex_backslash);
1502
		}
1524
		}
1503
		unread_char ( c ) ;
-
 
1504
		return ( lex_plus ) ;
-
 
1505
	    }
-
 
1506
 
-
 
1507
	    case char_minus : {
-
 
1508
		/* Deal with '-', '--', '-=', '->' and '->*' */
-
 
1509
		c = read_char () ;
-
 
1510
		if ( c == char_minus ) return ( lex_minus_Hminus ) ;
-
 
1511
		if ( c == char_equal ) return ( lex_minus_Heq ) ;
-
 
1512
		if ( c == char_greater ) {
-
 
1513
#if LANGUAGE_CPP
-
 
1514
		    /* '->*' is only allowed in C++ */
-
 
1515
		    c = read_char () ;
-
 
1516
		    if ( c == char_asterix ) return ( lex_arrow_Hstar ) ;
-
 
1517
		    unread_char ( c ) ;
-
 
1518
#endif
-
 
1519
		    return ( lex_arrow ) ;
-
 
1520
		}
-
 
1521
		unread_char ( c ) ;
-
 
1522
		return ( lex_minus ) ;
-
 
1523
	    }
-
 
1524
 
1525
 
1525
	    case char_dot : {
1526
		case char_hash:
1526
		/* Deal with '.', '...', '.*' and numbers */
1527
			/* Deal with '#' and '##' */
1527
		c = read_char () ;
1528
			c = read_char();
1528
		if ( c == char_dot ) {
1529
			if (c == char_hash) {
-
 
1530
				return(lex_hash_Hhash_H1);
-
 
1531
			}
1529
		    c = read_char () ;
1532
			unread_char(c);
-
 
1533
 
-
 
1534
			/* Return with '#' if not at start of line */
-
 
1535
			if (column < 0 || no_preproc_dir) {
-
 
1536
				return(lex_hash_H1);
-
 
1537
			}
-
 
1538
 
-
 
1539
			/* Deal with preprocessing directives */
-
 
1540
preproc_label:	{
-
 
1541
			unsigned long sp = skip_white(0);
-
 
1542
			update_column();
-
 
1543
			if (column) {
-
 
1544
				report(crt_loc, ERR_cpp_indent());
-
 
1545
			}
-
 
1546
			if (sp & (WHITE_SPACE | WHITE_ESC_NEWLINE)) {
-
 
1547
				report(preproc_loc, ERR_cpp_indent_dir());
-
 
1548
			}
1530
		    if ( c == char_dot ) return ( lex_ellipsis ) ;
1549
			preproc = read_preproc_dir(1, preproc);
1531
		    unread_char ( c ) ;
1550
			if (preproc < 0) {
-
 
1551
				goto start_line_label;
-
 
1552
			}
1532
		    unread_char ( char_dot ) ;
1553
			unread_char(char_newline);
-
 
1554
			crt_loc.line--;
-
 
1555
			crt_loc.column = 0;
1533
		    return ( lex_dot ) ;
1556
			return(preproc);
1534
		}
1557
		}
1535
#if LANGUAGE_CPP
-
 
1536
		/* '.*' is only allowed in C++ */
-
 
1537
		if ( c == char_asterix ) return ( lex_dot_Hstar ) ;
-
 
1538
#endif
-
 
1539
#if FS_EXTENDED_CHAR
-
 
1540
		if ( IS_EXTENDED ( c ) ) {
-
 
1541
		    unread_char ( c ) ;
-
 
1542
		    return ( lex_dot ) ;
-
 
1543
		}
-
 
1544
#endif
-
 
1545
		t = lookup_char ( c ) ;
-
 
1546
		if ( is_digit ( t ) ) {
-
 
1547
		    /* Indicate a number with first digit '.' */
-
 
1548
		    t = POINT ;
-
 
1549
		    goto number_label ;
-
 
1550
		}
-
 
1551
		unread_char ( c ) ;
-
 
1552
		return ( lex_dot ) ;
-
 
1553
	    }
-
 
1554
 
-
 
1555
	    case char_slash : {
-
 
1556
		/* Deal with '/', '/=' and comments */
-
 
1557
		c = read_char () ;
-
 
1558
		if ( START_COMMENT ( c ) ) {
-
 
1559
		    int a = analyse_comments ;
-
 
1560
		    c = skip_comment ( a ) ;
-
 
1561
		    if ( c == lex_eof ) goto eof_label ;
-
 
1562
		    if ( a ) {
-
 
1563
			c = lint_comment () ;
-
 
1564
			if ( c >= 0 ) return ( c ) ;
-
 
1565
		    }
-
 
1566
		    if ( column == 0 ) column = 1 ;
-
 
1567
		    goto start_label ;
-
 
1568
		}
-
 
1569
		if ( START_CPP_COMMENT ( c ) ) {
-
 
1570
		    int a = analyse_comments ;
-
 
1571
		    c = skip_cpp_comment ( a ) ;
-
 
1572
		    if ( c == lex_eof ) goto eof_label ;
-
 
1573
		    if ( a ) {
-
 
1574
			c = lint_comment () ;
-
 
1575
			if ( c >= 0 ) return ( c ) ;
-
 
1576
		    }
-
 
1577
		    IGNORE read_char () ;
-
 
1578
		    goto newline_label ;
-
 
1579
		}
-
 
1580
		if ( c == char_equal ) return ( lex_div_Heq ) ;
-
 
1581
		unread_char ( c ) ;
-
 
1582
		return ( lex_div ) ;
-
 
1583
	    }
-
 
1584
 
1558
 
-
 
1559
		case char_percent:
-
 
1560
			/* Deal with '%', '%=', '%>', '%:' and '%:%:' */
-
 
1561
			c = read_char();
-
 
1562
			if (c == char_equal) {
-
 
1563
				return(lex_rem_Heq);
-
 
1564
			}
-
 
1565
			if (c == char_greater && allow_digraphs) {
-
 
1566
				return(lex_close_Hbrace_H2);
-
 
1567
			}
-
 
1568
			if (c == char_colon && allow_digraphs) {
-
 
1569
				/* Check for '%:' and '%:%:' */
-
 
1570
				c = read_char();
-
 
1571
				if (c == char_percent) {
-
 
1572
					int nextc = read_char();
-
 
1573
					if (nextc == char_colon) {
-
 
1574
						return(lex_hash_Hhash_H2);
-
 
1575
					}
-
 
1576
					unread_char(nextc);
-
 
1577
				}
-
 
1578
				unread_char(c);
-
 
1579
 
-
 
1580
				/* Return with '%:' if not at start of line */
-
 
1581
				if (column < 0 || no_preproc_dir) {
-
 
1582
					return(lex_hash_H2);
-
 
1583
				}
-
 
1584
 
-
 
1585
				/* Otherwise this is a preprocessing
-
 
1586
				 * directive */
-
 
1587
				IGNORE get_digraph(lex_hash_H2);
-
 
1588
				goto preproc_label;
-
 
1589
			}
-
 
1590
			unread_char(c);
-
 
1591
			return(lex_rem);
-
 
1592
 
1585
	    case char_colon : {
1593
		case char_quote:
-
 
1594
			/* Deal with string literals */
-
 
1595
			IGNORE read_string(c, 1);
-
 
1596
			return(lex_string_Hlit);
-
 
1597
 
-
 
1598
		case char_single_quote:
-
 
1599
			/* Deal with character literals */
-
 
1600
			IGNORE read_string(c, 1);
-
 
1601
			return(lex_char_Hlit);
-
 
1602
 
-
 
1603
		case char_exclaim:
-
 
1604
			/* Deal with '!' and '!=' */
-
 
1605
			c = read_char();
-
 
1606
			if (c == char_equal) {
-
 
1607
				return(lex_not_Heq_H1);
-
 
1608
			}
-
 
1609
			unread_char(c);
-
 
1610
			return(lex_not_H1);
-
 
1611
 
-
 
1612
		case char_ampersand:
-
 
1613
			/* Deal with '&', '&&' and '&=' */
-
 
1614
			c = read_char();
-
 
1615
			if (c == char_ampersand) {
-
 
1616
				return(lex_logical_Hand_H1);
-
 
1617
			}
-
 
1618
			if (c == char_equal) {
-
 
1619
				return(lex_and_Heq_H1);
-
 
1620
			}
-
 
1621
			unread_char(c);
-
 
1622
			return(lex_and_H1);
-
 
1623
 
-
 
1624
		case char_asterix:
-
 
1625
			/* Deal with '*' and '*=' */
-
 
1626
			c = read_char();
-
 
1627
			if (c == char_equal) {
-
 
1628
				return(lex_star_Heq);
-
 
1629
			}
-
 
1630
			unread_char(c);
-
 
1631
			return(lex_star);
-
 
1632
 
-
 
1633
		case char_plus:
1586
		/* Deal with ':', '::' and ':>' */
1634
			/* Deal with '+', '++' and '+=' */
-
 
1635
			c = read_char();
-
 
1636
			if (c == char_plus) {
-
 
1637
				return(lex_plus_Hplus);
-
 
1638
			}
-
 
1639
			if (c == char_equal) {
-
 
1640
				return(lex_plus_Heq);
-
 
1641
			}
-
 
1642
			if (c == char_question && allow_extra_symbols) {
-
 
1643
				return(lex_abs);
-
 
1644
			}
-
 
1645
			unread_char(c);
-
 
1646
			return(lex_plus);
-
 
1647
 
-
 
1648
		case char_minus:
-
 
1649
			/* Deal with '-', '--', '-=', '->' and '->*' */
1587
		c = read_char () ;
1650
			c = read_char();
-
 
1651
			if (c == char_minus) {
-
 
1652
				return(lex_minus_Hminus);
-
 
1653
			}
-
 
1654
			if (c == char_equal) {
-
 
1655
				return(lex_minus_Heq);
-
 
1656
			}
-
 
1657
			if (c == char_greater) {
1588
#if LANGUAGE_CPP
1658
#if LANGUAGE_CPP
1589
		/* '::' is only allowed in C++ */
1659
				/* '->*' is only allowed in C++ */
-
 
1660
				c = read_char();
1590
		if ( c == char_colon ) return ( lex_colon_Hcolon ) ;
1661
				if (c == char_asterix) {
-
 
1662
					return(lex_arrow_Hstar);
-
 
1663
				}
-
 
1664
				unread_char(c);
1591
#endif
1665
#endif
1592
		if ( c == char_greater && allow_digraphs ) {
-
 
1593
		    return ( lex_close_Hsquare_H2 ) ;
1666
				return(lex_arrow);
1594
		}
1667
			}
1595
		unread_char ( c ) ;
1668
			unread_char(c);
1596
		return ( lex_colon ) ;
1669
			return(lex_minus);
1597
	    }
-
 
1598
 
1670
 
1746
#endif
1751
#endif
-
 
1752
			if (c == char_greater && allow_digraphs) {
-
 
1753
				return(lex_close_Hsquare_H2);
-
 
1754
			}
-
 
1755
			unread_char(c);
-
 
1756
			return(lex_colon);
-
 
1757
 
-
 
1758
		case char_less:
-
 
1759
			/* Deal with '<', '<=', '<<', '<<=', '<%', '<:' */
-
 
1760
			c = read_char();
1747
	if ( is_alphanum ( t ) ) {
1761
			if (c == char_equal) {
-
 
1762
				return(lex_less_Heq);
-
 
1763
			}
-
 
1764
			if (c == char_less) {
-
 
1765
				c = read_char();
-
 
1766
				if (c == char_equal) {
-
 
1767
					return(lex_lshift_Heq);
-
 
1768
				}
-
 
1769
				unread_char(c);
-
 
1770
				return(lex_lshift);
-
 
1771
			}
-
 
1772
			if (c == char_percent && allow_digraphs) {
-
 
1773
				return(lex_open_Hbrace_H2);
-
 
1774
			}
-
 
1775
			if (c == char_colon && allow_digraphs) {
-
 
1776
				return(lex_open_Hsquare_H2);
-
 
1777
			}
1748
	    /* Scan the third and subsequent characters */
1778
			if (c == char_question && allow_extra_symbols) {
-
 
1779
				return(lex_min);
-
 
1780
			}
-
 
1781
			unread_char(c);
-
 
1782
			return(lex_less);
-
 
1783
 
-
 
1784
		case char_equal:
-
 
1785
			/* Deal with '=' and '==' */
-
 
1786
			c = read_char();
-
 
1787
			switch (c) {
-
 
1788
			case char_equal:
-
 
1789
				return(lex_eq);
-
 
1790
			case char_ampersand:
-
 
1791
			case char_asterix:
-
 
1792
			case char_minus:
-
 
1793
			case char_plus:
-
 
1794
				update_column();
-
 
1795
				report(crt_loc, ERR_lex_op_old_assign(c, c));
1749
	    do {
1796
				break;
-
 
1797
			}
-
 
1798
			unread_char(c);
-
 
1799
			return(lex_assign);
-
 
1800
 
-
 
1801
		case char_greater:
1750
		h = HASH_POWER * h + ( unsigned long ) c ;
1802
			/* Deal with '>', '>=', '>>' and '>>=' */
-
 
1803
			c = read_char();
-
 
1804
			if (c == char_equal) {
-
 
1805
				return(lex_greater_Heq);
-
 
1806
			}
1751
		*s = ( character ) c ;
1807
			if (c == char_greater) {
-
 
1808
				c = read_char();
-
 
1809
				if (c == char_equal) {
-
 
1810
					return(lex_rshift_Heq);
-
 
1811
				}
-
 
1812
				unread_char(c);
-
 
1813
				return(lex_rshift);
-
 
1814
			}
-
 
1815
			if (c == char_question && allow_extra_symbols) {
-
 
1816
				return(lex_max);
-
 
1817
			}
-
 
1818
			unread_char(c);
-
 
1819
			return(lex_greater);
-
 
1820
 
-
 
1821
		case char_circum:
-
 
1822
			/* Deal with '^' and '^=' */
-
 
1823
			c = read_char();
-
 
1824
			if (c == char_equal) {
-
 
1825
				return(lex_xor_Heq_H1);
-
 
1826
			}
-
 
1827
			unread_char(c);
-
 
1828
			return(lex_xor_H1);
-
 
1829
 
-
 
1830
		case char_bar:
-
 
1831
			/* Deal with '|', '||' and '|=' */
-
 
1832
			c = read_char();
1752
		if ( ++s == se ) {
1833
			if (c == char_bar) {
-
 
1834
				return(lex_logical_Hor_H1);
-
 
1835
			}
-
 
1836
			if (c == char_equal) {
-
 
1837
				return(lex_or_Heq_H1);
-
 
1838
			}
-
 
1839
			unread_char(c);
-
 
1840
			return(lex_or_H1);
-
 
1841
 
-
 
1842
		case char_open_round:
-
 
1843
			/* Deal with '(' */
-
 
1844
			return(lex_open_Hround);
-
 
1845
 
-
 
1846
		case char_close_round:
-
 
1847
			/* Deal with ')' */
-
 
1848
			return(lex_close_Hround);
-
 
1849
 
-
 
1850
		case char_comma:
-
 
1851
			/* Deal with ',' */
-
 
1852
			return(lex_comma);
-
 
1853
 
-
 
1854
		case char_semicolon:
-
 
1855
			/* Deal with ';' */
-
 
1856
			return(lex_semicolon);
-
 
1857
 
-
 
1858
		case char_open_square:
-
 
1859
			/* Deal with '[' */
-
 
1860
			return(lex_open_Hsquare_H1);
-
 
1861
 
-
 
1862
		case char_close_square:
-
 
1863
			/* Deal with ']' */
-
 
1864
			return(lex_close_Hsquare_H1);
-
 
1865
 
-
 
1866
		case char_open_brace:
-
 
1867
			/* Deal with '{' */
-
 
1868
			return(lex_open_Hbrace_H1);
-
 
1869
 
-
 
1870
		case char_close_brace:
-
 
1871
			/* Deal with '}' */
1753
		    s = extend_buffer ( bf, s ) ;
1872
			return(lex_close_Hbrace_H1);
-
 
1873
 
1754
		    se = bf->end ;
1874
		case char_tilde:
-
 
1875
			/* Deal with '~' */
-
 
1876
			return(lex_compl_H1);
-
 
1877
 
-
 
1878
		default:
-
 
1879
			/* Anything else is an unknown character */
-
 
1880
			goto unknown_label;
1755
		}
1881
		}
-
 
1882
	}
-
 
1883
 
-
 
1884
	/* Read an identifier (calculating hash value on fly) */
-
 
1885
	if (is_alpha(t)) {
-
 
1886
		HASHID nm;
-
 
1887
		LOCATION loc;
-
 
1888
		BUFFER *bf = &token_buff;
-
 
1889
		string s = bf->start;
-
 
1890
		string se = bf->end;
-
 
1891
		unsigned long h = (unsigned long)c;
-
 
1892
		*(s++) = (character)c;
-
 
1893
 
-
 
1894
		/* Get the second character */
-
 
1895
		update_column();
-
 
1896
		loc = crt_loc;
1756
		c = read_char () ;
1897
		c = read_char();
1757
#if FS_EXTENDED_CHAR
1898
#if FS_EXTENDED_CHAR
-
 
1899
		t = (IS_EXTENDED(c)? ILLEG : lookup_char(c));
-
 
1900
#else
-
 
1901
		t = lookup_char(c);
-
 
1902
#endif
-
 
1903
		if (is_alphanum(t)) {
-
 
1904
			/* Scan the third and subsequent characters */
-
 
1905
			do {
-
 
1906
				h = HASH_POWER * h + (unsigned long)c;
-
 
1907
				*s = (character)c;
-
 
1908
				if (++s == se) {
-
 
1909
					s = extend_buffer(bf, s);
-
 
1910
					se = bf->end;
-
 
1911
				}
-
 
1912
				c = read_char();
-
 
1913
#if FS_EXTENDED_CHAR
1758
		if ( IS_EXTENDED ( c ) ) break ;
1914
				if (IS_EXTENDED(c)) {
-
 
1915
					break;
-
 
1916
				}
1759
#endif
1917
#endif
1760
		t = lookup_char ( c ) ;
1918
				t = lookup_char(c);
1761
	    } while ( is_alphanum ( t ) ) ;
1919
			} while (is_alphanum(t));
1762
	} else {
1920
		} else {
1763
	    /* Allow for wide strings and characters */
1921
			/* Allow for wide strings and characters */
1764
	    if ( h == char_L && is_symbol ( t ) ) {
1922
			if (h == char_L && is_symbol(t)) {
1765
		if ( c == char_quote ) {
1923
				if (c == char_quote) {
1766
		    IGNORE read_string ( c, 1 ) ;
1924
					IGNORE read_string(c, 1);
1767
		    return ( lex_wstring_Hlit ) ;
1925
					return(lex_wstring_Hlit);
1768
		}
1926
				}
1769
		if ( c == char_single_quote ) {
1927
				if (c == char_single_quote) {
1770
		    IGNORE read_string ( c, 1 ) ;
1928
					IGNORE read_string(c, 1);
1771
		    return ( lex_wchar_Hlit ) ;
1929
					return(lex_wchar_Hlit);
-
 
1930
				}
-
 
1931
			}
-
 
1932
			/* Identifier of length one */
-
 
1933
		}
-
 
1934
		if (c == char_backslash) {
-
 
1935
			/* Allow for extended identifiers */
-
 
1936
			int ch = CHAR_NONE;
-
 
1937
			int nextc = read_char();
-
 
1938
			unsigned long u = read_unicode(nextc, &ch);
-
 
1939
			if (ch != CHAR_NONE) {
-
 
1940
				bf->posn = s;
-
 
1941
				nm = read_extended_id(u, ch);
-
 
1942
				goto identifier_label;
-
 
1943
			}
-
 
1944
		}
-
 
1945
		unread_char(c);
-
 
1946
		se = s;
-
 
1947
		*se = 0;
-
 
1948
 
-
 
1949
		/* Look up the symbol in the hash table */
-
 
1950
		h %= HASH_SIZE;
-
 
1951
		s = bf->start;
-
 
1952
		nm = lookup_name(s, h, 0, lex_unknown);
-
 
1953
identifier_label:
-
 
1954
		{
-
 
1955
			IDENTIFIER id = DEREF_id(hashid_id(nm));
-
 
1956
			while (!IS_id_dummy(id)) {
-
 
1957
				/* Scan to last hidden value */
-
 
1958
				id = DEREF_id(id_alias(id));
-
 
1959
			}
-
 
1960
			COPY_loc(id_loc(id), loc);
-
 
1961
		}
-
 
1962
		token_hashid = nm;
-
 
1963
		return(lex_identifier);
-
 
1964
	}
-
 
1965
 
-
 
1966
	/* Read the first token in a line */
-
 
1967
	if (c == char_return) {
-
 
1968
		c = read_newline();
-
 
1969
	}
-
 
1970
	if (c == char_newline) {
-
 
1971
newline_label:
-
 
1972
		/* Re-entry point after C++ style comments */
-
 
1973
		crt_loc.line++;
-
 
1974
		crt_loc.column = 0;
-
 
1975
		input_crt = input_posn;
-
 
1976
		crt_line_changed = 1;
-
 
1977
		crt_spaces = 0;
-
 
1978
		if (in_preproc_dir == 1) {
-
 
1979
			in_preproc_dir = 0;
-
 
1980
			return(lex_newline);
-
 
1981
		}
-
 
1982
start_line_label:
-
 
1983
		/* Re-entry point after preprocessing directives */
-
 
1984
		column = 0;
-
 
1985
		for (;;) {
-
 
1986
			/* Step over any obvious spaces */
-
 
1987
			c = next_char();
-
 
1988
			if (c == char_end) {
-
 
1989
				c = refill_char();
-
 
1990
			}
-
 
1991
			if (c == char_return) {
-
 
1992
				c = read_newline();
-
 
1993
			}
-
 
1994
			if (c == char_sub) {
-
 
1995
				c = read_eof();
-
 
1996
			}
-
 
1997
			if (c == char_newline) {
-
 
1998
				crt_loc.line++;
-
 
1999
				crt_loc.column = 0;
-
 
2000
				input_crt = input_posn;
-
 
2001
				crt_line_changed = 1;
-
 
2002
				crt_spaces = 0;
-
 
2003
				column = 0;
-
 
2004
			} else if (c == char_eof) {
-
 
2005
				/* Check for end of file (should start line) */
-
 
2006
				if (column == 0) {
-
 
2007
					good_eof = 1;
-
 
2008
				}
-
 
2009
				goto eof_label;
-
 
2010
			} else if (c == char_space) {
-
 
2011
				crt_spaces++;
-
 
2012
				column = 1;
-
 
2013
			} else if (c == char_tab) {
-
 
2014
				unsigned long tab = tab_width;
-
 
2015
				crt_spaces = tab *(crt_spaces / tab + 1);
-
 
2016
				column = 1;
-
 
2017
			} else {
-
 
2018
#if FS_EXTENDED_CHAR
-
 
2019
				if (IS_EXTENDED(c)) {
-
 
2020
					t = ILLEG;
-
 
2021
					break;
-
 
2022
				}
-
 
2023
#endif
-
 
2024
				t = lookup_char(c);
-
 
2025
				if (is_white(t)) {
-
 
2026
					if (!is_newline(t)) {
-
 
2027
						crt_spaces++;
-
 
2028
						column = 1;
-
 
2029
					}
-
 
2030
				} else {
-
 
2031
					break;
-
 
2032
				}
-
 
2033
			}
1772
		}
2034
		}
1773
	    }
-
 
1774
	    /* Identifier of length one */
2035
		/* c and t now hold the next character */
-
 
2036
		goto process_label;
1775
	}
2037
	}
1776
	if ( c == char_backslash ) {
-
 
1777
	    /* Allow for extended identifiers */
-
 
1778
	    int ch = CHAR_NONE ;
-
 
1779
	    int nextc = read_char () ;
-
 
1780
	    unsigned long u = read_unicode ( nextc, &ch ) ;
-
 
1781
	    if ( ch != CHAR_NONE ) {
-
 
1782
		bf->posn = s ;
-
 
1783
		nm = read_extended_id ( u, ch ) ;
-
 
1784
		goto identifier_label ;
-
 
1785
	    }
-
 
1786
	}
-
 
1787
	unread_char ( c ) ;
-
 
1788
	se = s ;
-
 
1789
	*se = 0 ;
-
 
1790
 
2038
 
1791
	/* Look up the symbol in the hash table */
-
 
1792
	h %= HASH_SIZE ;
-
 
1793
	s = bf->start ;
-
 
1794
	nm = lookup_name ( s, h, 0, lex_unknown ) ;
-
 
1795
	identifier_label : {
-
 
1796
	    IDENTIFIER id = DEREF_id ( hashid_id ( nm ) ) ;
-
 
1797
	    while ( !IS_id_dummy ( id ) ) {
-
 
1798
		/* Scan to last hidden value */
2039
	/* Read a pp-number */
1799
		id = DEREF_id ( id_alias ( id ) ) ;
-
 
1800
	    }
-
 
1801
	    COPY_loc ( id_loc ( id ), loc ) ;
-
 
1802
	}
-
 
1803
	token_hashid = nm ;
-
 
1804
	return ( lex_identifier ) ;
2040
	if (is_digit(t)) {
1805
    }
-
 
1806
 
-
 
1807
    /* Read the first token in a line */
-
 
1808
    if ( c == char_return ) c = read_newline () ;
-
 
1809
    if ( c == char_newline ) {
-
 
1810
	newline_label : {
2041
number_label:	{
1811
	    /* Re-entry point after C++ style comments */
-
 
1812
	    crt_loc.line++ ;
2042
			int lastc;
1813
	    crt_loc.column = 0 ;
2043
			BUFFER *bf = &token_buff;
1814
	    input_crt = input_posn ;
2044
			string s = bf->start;
1815
	    crt_line_changed = 1 ;
2045
			string se = bf->end;
1816
	    crt_spaces = 0 ;
2046
			if (t == POINT) {
1817
	    if ( in_preproc_dir == 1 ) {
2047
				/* t is set to POINT to indicate an initial
1818
		in_preproc_dir = 0 ;
2048
				 * '.' */
1819
		return ( lex_newline ) ;
2049
				*(s++) = char_dot;
1820
	    }
-
 
1821
	}
2050
			}
1822
	start_line_label : {
2051
digit_label:
1823
	    /* Re-entry point after preprocessing directives */
2052
			/* Step over alphanumeric characters and '.' */
1824
	    column = 0 ;
-
 
1825
	    for ( ; ; ) {
2053
			do {
1826
		/* Step over any obvious spaces */
-
 
1827
		c = next_char () ;
2054
				*s = (character)c;
1828
		if ( c == char_end ) c = refill_char () ;
-
 
1829
		if ( c == char_return ) c = read_newline () ;
-
 
1830
		if ( c == char_sub ) c = read_eof () ;
-
 
1831
		if ( c == char_newline ) {
2055
				if (++s == se) {
1832
		    crt_loc.line++ ;
-
 
1833
		    crt_loc.column = 0 ;
-
 
1834
		    input_crt = input_posn ;
-
 
1835
		    crt_line_changed = 1 ;
2056
					s = extend_buffer(bf, s);
1836
		    crt_spaces = 0 ;
2057
					se = bf->end;
1837
		    column = 0 ;
2058
				}
1838
		} else if ( c == char_eof ) {
-
 
1839
		    /* Check for end of file (should start line) */
-
 
1840
		    if ( column == 0 ) good_eof = 1 ;
-
 
1841
		    goto eof_label ;
2059
next_digit_label:
1842
		} else if ( c == char_space ) {
-
 
1843
		    crt_spaces++ ;
-
 
1844
		    column = 1 ;
2060
				lastc = c;
1845
		} else if ( c == char_tab ) {
-
 
1846
		    unsigned long tab = tab_width ;
-
 
1847
		    crt_spaces = tab * ( crt_spaces / tab + 1 ) ;
-
 
1848
		    column = 1 ;
2061
				c = read_char();
1849
		} else {
-
 
1850
#if FS_EXTENDED_CHAR
2062
#if FS_EXTENDED_CHAR
1851
		    if ( IS_EXTENDED ( c ) ) {
2063
				if (IS_EXTENDED(c)) {
1852
			t = ILLEG ;
-
 
1853
			break ;
2064
					break;
1854
		    }
2065
				}
1855
#endif
2066
#endif
1856
		    t = lookup_char ( c ) ;
2067
				t = lookup_char(c);
1857
		    if ( is_white ( t ) ) {
2068
			} while (is_ppdigit(t));
-
 
2069
			if (c == char_plus || c == char_minus) {
-
 
2070
				/* Allow for [Ee][+-] */
-
 
2071
				if (lastc == char_e || lastc == char_E) {
-
 
2072
					goto digit_label;
-
 
2073
				}
-
 
2074
			}
1858
			if ( !is_newline ( t ) ) {
2075
			if (c == char_backslash) {
-
 
2076
				/* Allow for unicode characters */
1859
			    crt_spaces++ ;
2077
				int ch = CHAR_NONE;
-
 
2078
				int nextc = read_char();
-
 
2079
				unsigned long u = read_unicode(nextc, &ch);
-
 
2080
				if (ch != CHAR_NONE) {
1860
			    column = 1 ;
2081
					bf->posn = s;
-
 
2082
					print_char(u, ch, 0, bf);
-
 
2083
					s = bf->posn;
-
 
2084
					se = bf->end;
-
 
2085
					goto next_digit_label;
-
 
2086
				}
1861
			}
2087
			}
1862
		    } else {
2088
			*s = 0;
1863
			break ;
2089
			unread_char(c);
1864
		    }
-
 
1865
		}
2090
		}
1866
	    }
-
 
1867
	    /* c and t now hold the next character */
-
 
1868
	    goto process_label ;
2091
		return(lex_integer_Hlit);
1869
	}
2092
	}
1870
    }
-
 
1871
 
2093
 
1872
    /* Read a pp-number */
2094
	/* End of file marker */
1873
    if ( is_digit ( t ) ) {
-
 
1874
	number_label : {
-
 
1875
	    int lastc ;
-
 
1876
	    BUFFER *bf = &token_buff ;
-
 
1877
	    string s = bf->start ;
-
 
1878
	    string se = bf->end ;
-
 
1879
	    if ( t == POINT ) {
-
 
1880
		/* t is set to POINT to indicate an initial '.' */
-
 
1881
		*( s++ ) = char_dot ;
2095
	if (c == char_sub) {
1882
	    }
-
 
1883
	    digit_label : {
-
 
1884
		/* Step over alphanumeric characters and '.' */
-
 
1885
		do {
-
 
1886
		    *s = ( character ) c ;
-
 
1887
		    if ( ++s == se ) {
-
 
1888
			s = extend_buffer ( bf, s ) ;
-
 
1889
			se = bf->end ;
-
 
1890
		    }
-
 
1891
		    next_digit_label : {
-
 
1892
			lastc = c ;
-
 
1893
			c = read_char () ;
2096
		c = read_eof();
1894
#if FS_EXTENDED_CHAR
-
 
1895
			if ( IS_EXTENDED ( c ) ) break ;
-
 
1896
#endif
-
 
1897
			t = lookup_char ( c ) ;
-
 
1898
		    }
-
 
1899
		} while ( is_ppdigit ( t ) ) ;
-
 
1900
		if ( c == char_plus || c == char_minus ) {
-
 
1901
		    /* Allow for [Ee][+-] */
-
 
1902
		    if ( lastc == char_e || lastc == char_E ) {
-
 
1903
			goto digit_label ;
-
 
1904
		    }
-
 
1905
		}
-
 
1906
		if ( c == char_backslash ) {
-
 
1907
		    /* Allow for unicode characters */
-
 
1908
		    int ch = CHAR_NONE ;
-
 
1909
		    int nextc = read_char () ;
-
 
1910
		    unsigned long u = read_unicode ( nextc, &ch ) ;
-
 
1911
		    if ( ch != CHAR_NONE ) {
-
 
1912
			bf->posn = s ;
-
 
1913
			print_char ( u, ch, 0, bf ) ;
-
 
1914
			s = bf->posn ;
-
 
1915
			se = bf->end ;
-
 
1916
			goto next_digit_label ;
-
 
1917
		    }
-
 
1918
		}
-
 
1919
	    }
-
 
1920
	    *s = 0 ;
-
 
1921
	    unread_char ( c ) ;
-
 
1922
	}
2097
	}
1923
	return ( lex_integer_Hlit ) ;
-
 
1924
    }
-
 
1925
 
-
 
1926
    /* End of file marker */
-
 
1927
    if ( c == char_sub ) c = read_eof () ;
-
 
1928
    if ( c == char_eof ) {
2098
	if (c == char_eof) {
1929
	eof_label : {
2099
eof_label:
1930
	    if ( in_preproc_dir != 0 ) return ( lex_eof ) ;
2100
		if (in_preproc_dir != 0) {
-
 
2101
			return(lex_eof);
-
 
2102
		}
1931
	    if ( !good_eof ) {
2103
		if (!good_eof) {
1932
		update_column () ;
2104
			update_column();
1933
		report ( crt_loc, ERR_lex_phases_eof () ) ;
2105
			report(crt_loc, ERR_lex_phases_eof());
1934
		good_eof = 1 ;
2106
			good_eof = 1;
1935
	    }
2107
		}
1936
	    if ( end_include ( preproc ) ) {
2108
		if (end_include(preproc)) {
1937
		/* Revert to previous file */
2109
			/* Revert to previous file */
1938
		good_eof = 0 ;
2110
			good_eof = 0;
1939
		preproc = lex_ignore_token ;
2111
			preproc = lex_ignore_token;
1940
		goto start_line_label ;
2112
			goto start_line_label;
1941
	    }
2113
		}
-
 
2114
		/* End of main file */
-
 
2115
		return(lex_eof);
1942
	}
2116
	}
1943
	/* End of main file */
-
 
1944
	return ( lex_eof ) ;
-
 
1945
    }
-
 
1946
 
2117
 
1947
    /* Unknown characters */
2118
	/* Unknown characters */
1948
    unknown_label : {
2119
unknown_label:
-
 
2120
	{
1949
	string s = token_buff.start ;
2121
		string s = token_buff.start;
1950
	add_multi_char ( s, ( unsigned long ) c, CHAR_SIMPLE ) ;
2122
		add_multi_char(s, (unsigned long)c, CHAR_SIMPLE);
1951
    }
-
 
1952
    return ( lex_unknown ) ;
2123
		return(lex_unknown);
-
 
2124
	}
1953
}
2125
}
1954
 
2126
 
1955
 
2127
 
1956
/*
2128
/*
1957
    INITIALISE INPUT VARIABLES
2129
    INITIALISE INPUT VARIABLES
1958
 
2130
 
1959
    This routine initialises the tables of character look-ups and the token
2131
    This routine initialises the tables of character look-ups and the token
1960
    buffer.
2132
    buffer.
1961
*/
2133
*/
1962
 
2134
 
1963
void init_char
2135
void
1964
    PROTO_Z ()
2136
init_char(void)
1965
{
2137
{
1966
    int i ;
2138
	int i;
1967
    unsigned char *p, *q ;
2139
	unsigned char *p, *q;
1968
 
2140
 
1969
    /* Set native locale for multibyte characters */
2141
	/* Set native locale for multibyte characters */
1970
#if FS_MULTIBYTE
2142
#if FS_MULTIBYTE
-
 
2143
	if (allow_multibyte) {
1971
    if ( allow_multibyte ) IGNORE setlocale ( LC_CTYPE, "" ) ;
2144
		IGNORE setlocale(LC_CTYPE, "");
-
 
2145
	}
1972
#endif
2146
#endif
1973
 
2147
 
1974
    /* Allow for non-ASCII codesets */
2148
	/* Allow for non-ASCII codesets */
1975
    map_ascii ( main_characters ) ;
2149
	map_ascii(main_characters);
1976
    map_ascii ( digit_values ) ;
2150
	map_ascii(digit_values);
1977
    map_ascii ( escape_sequences ) ;
2151
	map_ascii(escape_sequences);
1978
 
2152
 
1979
    /* Set up extra characters */
2153
	/* Set up extra characters */
1980
    p = xmalloc_nof ( unsigned char, NO_CHAR ) ;
2154
	p = xmalloc_nof(unsigned char, NO_CHAR);
1981
    q = main_characters ;
2155
	q = main_characters;
1982
    copy_characters = p ;
2156
	copy_characters = p;
1983
    for ( i = 0 ; i < NO_CHAR ; i++ ) *( p++ ) = *( q++ ) ;
2157
	for (i = 0; i < NO_CHAR; i++) {
-
 
2158
		*(p++) = *(q++);
-
 
2159
	}
1984
 
2160
 
1985
    /* Initialise token buffer */
2161
	/* Initialise token buffer */
1986
    token_buff.posn = extend_buffer ( &token_buff, token_buff.posn ) ;
2162
	token_buff.posn = extend_buffer(&token_buff, token_buff.posn);
1987
    return ;
2163
	return;
1988
}
2164
}
1989
 
2165
 
1990
 
2166
 
1991
/*
2167
/*
1992
    INITIALISE INPUT FILE READING
2168
    INITIALISE INPUT FILE READING
1993
 
2169
 
1994
    This routine initialises the lexical analysis routines in preparation
2170
    This routine initialises the lexical analysis routines in preparation
1995
    for parsing or preprocessing the current input file.
2171
    for parsing or preprocessing the current input file.
1996
*/
2172
*/
1997
 
2173
 
1998
void init_lex
2174
void
1999
    PROTO_Z ()
2175
init_lex(void)
2000
{
2176
{
2001
    /* Initialise file variables */
2177
	/* Initialise file variables */
2002
    crt_buff_no = 0 ;
2178
	crt_buff_no = 0;
2003
    IGNORE init_buffer ( crt_buff_no ) ;
2179
	IGNORE init_buffer(crt_buff_no);
2004
    start_preproc_if () ;
2180
	start_preproc_if ();
2005
    preproc_loc = crt_loc ;
2181
	preproc_loc = crt_loc;
2006
    have_syntax_error = 0 ;
2182
	have_syntax_error = 0;
-
 
2183
	if (do_header) {
2007
    if ( do_header ) dump_start ( &crt_loc, NIL ( INCL_DIR ) ) ;
2184
		dump_start(&crt_loc, NIL(INCL_DIR));
-
 
2185
	}
2008
 
2186
 
2009
    /* Deal with first start-up file */
2187
	/* Deal with first start-up file */
2010
    open_startup () ;
2188
	open_startup();
2011
 
2189
 
2012
    /* Force processing to start at the beginning of a line */
2190
	/* Force processing to start at the beginning of a line */
2013
    unread_char ( char_newline ) ;
2191
	unread_char(char_newline);
2014
    crt_loc.line-- ;
2192
	crt_loc.line--;
2015
 
2193
 
2016
    /* Initialise the parser */
2194
	/* Initialise the parser */
2017
    init_parser ( NIL ( PPTOKEN ) ) ;
2195
	init_parser(NIL(PPTOKEN));
2018
    return ;
2196
	return;
2019
}
2197
}
2020
 
2198
 
2021
 
2199
 
2022
/*
2200
/*
2023
    PARSE INPUT FILE
2201
    PARSE INPUT FILE
2024
 
2202
 
2025
    This routine is the main entry point for the parsing of the current
2203
    This routine is the main entry point for the parsing of the current
2026
    input file.
2204
    input file.
2027
*/
2205
*/
2028
 
2206
 
2029
void process_file
2207
void
2030
    PROTO_Z ()
2208
process_file(void)
2031
{
2209
{
2032
    init_lex () ;
2210
	init_lex();
2033
    ADVANCE_LEXER ;
2211
	ADVANCE_LEXER;
2034
    parse_file ( NULL_type, dspec_none ) ;
2212
	parse_file(NULL_type, dspec_none);
2035
    return ;
2213
	return;
2036
}
2214
}