WebSVN – tendra.SVN – Blame – //trunk/src/producers/common/parse/lex.c

Rev	Author	Line No.	Line
2	7u83	1	`/*`
		2	`Crown Copyright (c) 1997`
		3
		4	`This TenDRA(r) Computer Program is subject to Copyright`
		5	`owned by the United Kingdom Secretary of State for Defence`
		6	`acting through the Defence Evaluation and Research Agency`
		7	`(DERA). It is made available to Recipients with a`
		8	`royalty-free licence for its use, reproduction, transfer`
		9	`to other parties and amendment for any purpose not excluding`
		10	`product development provided that any such use et cetera`
		11	`shall be deemed to be acceptance of the following conditions:-`
		12
		13	`(1) Its Recipients shall ensure that this Notice is`
		14	`reproduced upon any copies or amended versions of it;`
		15
		16	`(2) Any amended version of it shall be clearly marked to`
		17	`show both the nature of and the organisation responsible`
		18	`for the relevant amendment or amendments;`
		19
		20	`(3) Its onward transfer from a recipient to another`
		21	`party shall be deemed to be that party's acceptance of`
		22	`these conditions;`
		23
		24	`(4) DERA gives no warranty or assurance as to its`
		25	`quality or suitability for any purpose and DERA accepts`
		26	`no liability whatsoever in relation to any use to which`
		27	`it may be put.`
		28	`*/`
		29
		30
		31	`#include "config.h"`
		32	`#include <limits.h>`
		33	`#if FS_MULTIBYTE`
		34	`#include <locale.h>`
		35	`#endif`
		36	`#include "c_types.h"`
		37	`#include "exp_ops.h"`
		38	`#include "hashid_ops.h"`
		39	`#include "id_ops.h"`
		40	`#include "member_ops.h"`
		41	`#include "str_ops.h"`
		42	`#include "error.h"`
		43	`#include "catalog.h"`
		44	`#include "option.h"`
		45	`#include "buffer.h"`
		46	`#include "char.h"`
		47	`#include "constant.h"`
		48	`#include "file.h"`
		49	`#include "dump.h"`
		50	`#include "hash.h"`
		51	`#include "lex.h"`
		52	`#include "literal.h"`
		53	`#include "macro.h"`
		54	`#include "parse.h"`
		55	`#include "pragma.h"`
		56	`#include "preproc.h"`
		57	`#include "print.h"`
		58	`#include "syntax.h"`
		59	`#include "ustring.h"`
		60	`#include "xalloc.h"`
		61
		62
		63	`/*`
		64	`PARSER OPTIONS`
		65
		66	`These flags control the behaviour of the parser and determine whether`
		67	`such features as trigraphs and digraphs are allowed.`
		68	`*/`
		69
		70	`int allow_trigraphs = 1 ;`
		71	`int allow_digraphs = 1 ;`
		72	`int allow_unicodes = LANGUAGE_CPP ;`
		73	`int allow_multibyte = 1 ;`
		74	`int allow_cpp_comments = LANGUAGE_CPP ;`
		75	`int allow_dos_newline = 0 ;`
		76	`int allow_extra_symbols = 0 ;`
		77	`int allow_iso_keywords = LANGUAGE_CPP ;`
		78	`int allow_newline_strings = 0 ;`
		79	`int analyse_comments = 1 ;`
		80	`unsigned long max_id_length = 1024 ;`
		81
		82
		83	`/*`
		84	`TABLE OF SYMBOLS AND KEYWORDS`
		85
		86	`This table gives the mapping between lexical token numbers and the`
		87	`corresponding symbols and keywords. It is derived from the list of`
		88	`tokens in symbols.h.`
		89	`*/`
		90
		91	`CONST char *token_names [] = {`
		92	`#define LEX_TOKEN( A, B, C ) ( B ),`
		93	`#include "symbols.h"`
		94	`#undef LEX_TOKEN`
		95	`NULL`
		96	`} ;`
		97
		98
		99	`/*`
		100	`TRANSLATION A LEXICAL TOKEN TO ITS PRIMARY FORM`
		101
		102	`This routine translates the alternative ISO keywords and digraphs`
		103	`into their primary form.`
		104	`*/`
		105
		106	`int primary_form`
		107	`PROTO_N ( ( t ) )`
		108	`PROTO_T ( int t )`
		109	`{`
		110	`int u = t ;`
		111	`switch ( u ) {`
		112	`case lex_and_H2 : u = lex_and_H1 ; break ;`
		113	`case lex_and_Heq_H2 : u = lex_and_Heq_H1 ; break ;`
		114	`case lex_close_Hbrace_H2 : u = lex_close_Hbrace_H1 ; break ;`
		115	`case lex_close_Hsquare_H2 : u = lex_close_Hsquare_H1 ; break ;`
		116	`case lex_compl_H2 : u = lex_compl_H1 ; break ;`
		117	`case lex_hash_H2 : u = lex_hash_H1 ; break ;`
		118	`case lex_hash_Hhash_H2 : u = lex_hash_Hhash_H1 ; break ;`
		119	`case lex_logical_Hand_H2 : u = lex_logical_Hand_H1 ; break ;`
		120	`case lex_logical_Hor_H2 : u = lex_logical_Hor_H1 ; break ;`
		121	`case lex_not_H2 : u = lex_not_H1 ; break ;`
		122	`case lex_not_Heq_H2 : u = lex_not_Heq_H1 ; break ;`
		123	`case lex_open_Hbrace_H2 : u = lex_open_Hbrace_H1 ; break ;`
		124	`case lex_open_Hsquare_H2 : u = lex_open_Hsquare_H1 ; break ;`
		125	`case lex_or_H2 : u = lex_or_H1 ; break ;`
		126	`case lex_or_Heq_H2 : u = lex_or_Heq_H1 ; break ;`
		127	`case lex_xor_H2 : u = lex_xor_H1 ; break ;`
		128	`case lex_xor_Heq_H2 : u = lex_xor_Heq_H1 ; break ;`
		129	`}`
		130	`return ( u ) ;`
		131	`}`
		132
		133
		134	`/*`
		135	`REPORT A DIGRAPH TOKEN`
		136
		137	`This routine reports the digraph t, returning the primary form of t.`
		138	`*/`
		139
		140	`int get_digraph`
		141	`PROTO_N ( ( t ) )`
		142	`PROTO_T ( int t )`
		143	`{`
		144	`int u = primary_form ( t ) ;`
		145	`if ( u != t ) {`
		146	`update_column () ;`
		147	`report ( crt_loc, ERR_lex_digraph_replace ( t, u ) ) ;`
		148	`}`
		149	`return ( u ) ;`
		150	`}`
		151
		152
		153	`/*`
		154	`CREATE A KEYWORD`
		155
		156	`This routine creates a keyword identifier with name nm and lexical`
		157	`token number key. The special case when key is lex_unknown is used`
		158	`to indicate a reserved identifier.`
		159	`*/`
		160
		161	`IDENTIFIER make_keyword`
		162	`PROTO_N ( ( nm, key, id ) )`
		163	`PROTO_T ( HASHID nm X int key X IDENTIFIER id )`
		164	`{`
		165	`PTR ( IDENTIFIER ) ptr = hashid_id ( nm ) ;`
		166	`if ( IS_NULL_id ( id ) ) {`
		167	`/* Find keyword type */`
		168	`unsigned tag = id_keyword_tag ;`
		169	`if ( key >= FIRST_ISO_KEYWORD && key <= LAST_ISO_KEYWORD ) {`
		170	`tag = id_iso_keyword_tag ;`
		171	`} else if ( key >= FIRST_SYMBOL && key <= LAST_SYMBOL ) {`
		172	`tag = id_iso_keyword_tag ;`
		173	`} else if ( key == lex_unknown ) {`
		174	`tag = id_reserved_tag ;`
		175	`}`
		176
		177	`/* Create keyword identifier */`
		178	`MAKE_id_keyword_etc ( tag, nm, dspec_none, NULL_nspace, crt_loc, id ) ;`
		179	`COPY_ulong ( id_no ( id ), ( unsigned long ) key ) ;`
		180	`}`
		181	`COPY_id ( hashid_cache ( nm ), NULL_id ) ;`
		182	`if ( do_keyword ) dump_declare ( id, &crt_loc, 1 ) ;`
		183
		184	`/* Add keyword to identifier meanings */`
		185	`for ( ; ; ) {`
		186	`IDENTIFIER pid = DEREF_id ( ptr ) ;`
		187	`switch ( TAG_id ( pid ) ) {`
		188	`case id_dummy_tag :`
		189	`case id_keyword_tag :`
		190	`case id_iso_keyword_tag :`
		191	`case id_reserved_tag : {`
		192	`COPY_id ( id_alias ( id ), pid ) ;`
		193	`COPY_id ( ptr, id ) ;`
		194	`return ( id ) ;`
		195	`}`
		196	`}`
		197	`ptr = id_alias ( pid ) ;`
		198	`}`
		199	`/* NOTREACHED */`
		200	`}`
		201
		202
		203	`/*`
		204	`INITIALISE KEYWORDS`
		205
		206	`This routine initialises the hash table entries for the keywords.`
		207	`*/`
		208
		209	`void init_keywords`
		210	`PROTO_Z ()`
		211	`{`
		212	`int key ;`
		213
		214	`/* Set up keyword entries */`
		215	`for ( key = FIRST_KEYWORD ; key <= LAST_KEYWORD ; key++ ) {`
		216	`int ext = 0 ;`
		217	`string keyword = token_name ( key ) ;`
		218	`unsigned long h = hash ( keyword ) ;`
		219	`if ( keyword [0] == char_less ) ext = 1 ;`
		220	`KEYWORD ( key ) = lookup_name ( keyword, h, ext, key ) ;`
		221	`}`
		222
		223	`/* Bring the C keywords into scope */`
		224	`for ( key = FIRST_C_KEYWORD ; key <= LAST_C_KEYWORD ; key++ ) {`
		225	`HASHID nm = KEYWORD ( key ) ;`
		226	`IGNORE make_keyword ( nm, key, NULL_id ) ;`
		227	`}`
		228
		229	`/* Bring the C++ keywords into scope */`
		230	`for ( key = FIRST_CPP_KEYWORD ; key <= LAST_CPP_KEYWORD ; key++ ) {`
		231	`HASHID nm = KEYWORD ( key ) ;`
		232	`#if LANGUAGE_CPP`
		233	`IGNORE make_keyword ( nm, key, NULL_id ) ;`
		234	`#else`
		235	`if ( key != lex_wchar_Ht ) {`
		236	`IGNORE make_keyword ( nm, lex_unknown, NULL_id ) ;`
		237	`}`
		238	`#endif`
		239	`}`
		240
		241	`/* Bring the ISO alternative keywords into scope */`
		242	`for ( key = FIRST_ISO_KEYWORD ; key <= LAST_ISO_KEYWORD ; key++ ) {`
		243	`HASHID nm = KEYWORD ( key ) ;`
		244	`if ( allow_iso_keywords ) {`
		245	`IGNORE make_keyword ( nm, key, NULL_id ) ;`
		246	`} else {`
		247	`IGNORE make_keyword ( nm, lex_unknown, NULL_id ) ;`
		248	`}`
		249	`}`
		250
		251	`/* Find underlying dummy identifier for 'operator' */`
		252	`underlying_op = DEREF_id ( hashid_id ( KEYWORD ( lex_operator ) ) ) ;`
		253	`underlying_op = underlying_id ( underlying_op ) ;`
		254	`return ;`
		255	`}`
		256
		257
		258	`/*`
		259	`ADJUST A CHARACTER FOR TRIGRAPHS`
		260
		261	`This routine is called after a question mark has been read from the`
		262	`input file to allow for trigraphs. It returns the trigraph replacement`
		263	`character or '?' if the following characters do not form a trigraph.`
		264	`*/`
		265
		266	`static int adjust_trigraph`
		267	`PROTO_Z ()`
		268	`{`
		269	`if ( allow_trigraphs ) {`
		270	`int c = next_char () ;`
		271	`if ( c == char_end ) c = refill_char () ;`
		272	`if ( c == char_question ) {`
		273	`int d ;`
		274	`c = next_char () ;`
		275	`if ( c == char_end ) c = refill_char () ;`
		276	`switch ( c ) {`
		277	`case char_close_round : {`
		278	`/* Map '\?\?)' to ']' */`
		279	`d = char_close_square ;`
		280	`break ;`
		281	`}`
		282	`case char_equal : {`
		283	`/* Map '\?\?=' to '#' */`
		284	`d = char_hash ;`
		285	`break ;`
		286	`}`
		287	`case char_exclaim : {`
		288	`/* Map '\?\?!' to '\|' */`
		289	`d = char_bar ;`
		290	`break ;`
		291	`}`
		292	`case char_greater : {`
		293	`/* Map '\?\?>' to '}' */`
		294	`d = char_close_brace ;`
		295	`break ;`
		296	`}`
		297	`case char_less : {`
		298	`/* Map '\?\?<' to '{' */`
		299	`d = char_open_brace ;`
		300	`break ;`
		301	`}`
		302	`case char_minus : {`
		303	`/* Map '\?\?-' to '~' */`
		304	`d = char_tilde ;`
		305	`break ;`
		306	`}`
		307	`case char_open_round : {`
		308	`/* Map '\?\?(' to '[' */`
		309	`d = char_open_square ;`
		310	`break ;`
		311	`}`
		312	`case char_single_quote : {`
		313	`/* Map '\?\?\'' to '^' */`
		314	`d = char_circum ;`
		315	`break ;`
		316	`}`
		317	`case char_slash : {`
		318	`/* Map '\?\?/' to '\\' */`
		319	`d = char_backslash ;`
		320	`break ;`
		321	`}`
		322	`default : {`
		323	`/* Not a trigraph */`
		324	`unread_char ( c ) ;`
		325	`unread_char ( char_question ) ;`
		326	`return ( char_question ) ;`
		327	`}`
		328	`}`
		329	`update_column () ;`
		330	`report ( crt_loc, ERR_lex_trigraph_replace ( c, d ) ) ;`
		331	`return ( d ) ;`
		332	`} else {`
		333	`/* Not a trigraph */`
		334	`unread_char ( c ) ;`
		335	`}`
		336	`}`
		337	`return ( char_question ) ;`
		338	`}`
		339
		340
		341	`/*`
		342	`READ A NEWLINE CHARACTER`
		343
		344	`This routine is called after each carriage return character, checking`
		345	`for a following newline character.`
		346	`*/`
		347
		348	`static int read_newline`
		349	`PROTO_Z ()`
		350	`{`
		351	`if ( allow_dos_newline ) {`
		352	`int c = next_char () ;`
		353	`if ( c == char_end ) c = refill_char () ;`
		354	`if ( c == char_newline ) return ( c ) ;`
		355	`unread_char ( c ) ;`
		356	`}`
		357	`return ( char_return ) ;`
		358	`}`
		359
		360
		361	`/*`
		362	`READ AN END OF FILE CHARACTER`
		363
		364	`This routine is called after each terminate character, checking for`
		365	`a following end of file character.`
		366	`*/`
		367
		368	`static int read_eof`
		369	`PROTO_Z ()`
		370	`{`
		371	`if ( allow_dos_newline ) {`
		372	`int c = next_char () ;`
		373	`if ( c == char_end ) c = refill_char () ;`
		374	`if ( c == char_eof ) return ( c ) ;`
		375	`unread_char ( c ) ;`
		376	`}`
		377	`return ( char_sub ) ;`
		378	`}`
		379
		380
		381	`/*`
		382	`READ THE NEXT CHARACTER ALLOWING FOR TRIGRAPHS ETC.`
		383
		384	`This routine reads the next character from the input file, adjusting`
		385	`it as necessary for trigraphs and escaped newlines. This routine`
		386	`corresponds to phases 1 and 2 of the phases of translation.`
		387	`*/`
		388
		389	`static int read_char`
		390	`PROTO_Z ()`
		391	`{`
		392	`for ( ; ; ) {`
		393	`int c = next_char () ;`
		394	`if ( c == char_end ) c = refill_char () ;`
		395	`if ( c == char_question ) c = adjust_trigraph () ;`
		396	`if ( c != char_backslash ) {`
		397	`/* Not an escaped newline */`
		398	`return ( c ) ;`
		399	`}`
		400	`c = next_char () ;`
		401	`if ( c == char_end ) c = refill_char () ;`
		402	`if ( c == char_return ) c = read_newline () ;`
		403	`if ( c != char_newline ) {`
		404	`/* Not an escaped newline */`
		405	`unread_char ( c ) ;`
		406	`return ( char_backslash ) ;`
		407	`}`
		408	`crt_loc.line++ ;`
		409	`crt_loc.column = 0 ;`
		410	`input_crt = input_posn ;`
		411	`}`
		412	`/* NOTREACHED */`
		413	`}`
		414
		415
		416	`/*`
		417	`CHARACTER LOOK-UP TABLE`
		418
		419	`This look-up table gives the various character types. Note that the`
		420	`default look-up table is for ASCII, for other codesets the table`
		421	`needs to be rewritten. The only really interesting points in the`
		422	`table itself are that newline has not been classified as a white-space`
		423	`and that character char_eof (-1) represents end of file.`
		424	`*/`
		425
		426	`#define SPACE_M 0x01`
		427	`#define ALPHA_M 0x02`
		428	`#define DIGIT_M 0x04`
		429	`#define ALNUM_M 0x08`
		430	`#define PPDIG_M 0x10`
		431	`#define SYMBL_M 0x20`
		432	`#define NLINE_M 0x40`
		433	`#define LEGAL_M 0x80`
		434
		435	`#define ILLEG 0x00`
		436	`#define LEGAL LEGAL_M`
		437	`#define SPACE ( SPACE_M \| LEGAL_M )`
		438	`#define ALPHA ( ALPHA_M \| ALNUM_M \| PPDIG_M \| LEGAL_M )`
		439	`#define DIGIT ( DIGIT_M \| ALNUM_M \| PPDIG_M \| LEGAL_M )`
		440	`#define SYMBL ( SYMBL_M \| LEGAL_M )`
		441	`#define POINT ( PPDIG_M \| SYMBL_M \| LEGAL_M )`
		442	`#define NLINE ( NLINE_M \| LEGAL_M )`
		443
		444	`#define main_characters ( characters + 1 )`
		445	`#define lookup_char( C ) ( ( int ) main_characters [C] )`
		446	`#define is_white( T ) ( ( T ) & SPACE_M )`
		447	`#define is_alpha( T ) ( ( T ) & ALPHA_M )`
		448	`#define is_digit( T ) ( ( T ) & DIGIT_M )`
		449	`#define is_alphanum( T ) ( ( T ) & ALNUM_M )`
		450	`#define is_ppdigit( T ) ( ( T ) & PPDIG_M )`
		451	`#define is_symbol( T ) ( ( T ) & SYMBL_M )`
		452	`#define is_newline( T ) ( ( T ) & NLINE_M )`
		453	`#define is_legal( T ) ( ( T ) & LEGAL_M )`
		454
		455	`static unsigned char characters [ NO_CHAR + 2 ] = {`
		456	`LEGAL, /* EOF */`
		457	`#define CHAR_DATA( A, B, C, D ) ( A ),`
		458	`#include "char.h"`
		459	`#undef CHAR_DATA`
		460	`ILLEG /* dummy */`
		461	`} ;`
		462
		463	`static unsigned char *copy_characters = main_characters ;`
		464
		465
		466	`/*`
		467	`SET A CHARACTER LOOK-UP`
		468
		469	`This routine sets the look-up value for character a to be equal to`
		470	`the underlying value for character b. As a special case, setting`
		471	`the look-up for a carriage return to that for newline enables`
		472	`DOS-like rules on newline and end of file characters.`
		473	`*/`
		474
		475	`void set_char_lookup`
		476	`PROTO_N ( ( a, b ) )`
		477	`PROTO_T ( int a X int b )`
		478	`{`
		479	`if ( a >= 0 && a < NO_CHAR && b >= 0 && b < NO_CHAR ) {`
		480	`unsigned char t = copy_characters [b] ;`
		481	`if ( a == char_return ) {`
		482	`if ( b == char_newline ) {`
		483	`/* Set DOS-like newline rules */`
		484	`allow_dos_newline = 1 ;`
		485	`return ;`
		486	`}`
		487	`if ( b == char_return ) {`
		488	`/* Unset DOS-like newline rules */`
		489	`allow_dos_newline = 0 ;`
		490	`}`
		491	`}`
		492	`main_characters [a] = t ;`
		493	`}`
		494	`return ;`
		495	`}`
		496
		497
		498	`/*`
		499	`SET A NUMBER OF CHARACTER LOOK-UPS`
		500
		501	`This routine sets the character look-ups for all the elements of the`
		502	`string or character literal expression a to be equal to that for the`
		503	`character literal expression b. If b is the null expression then`
		504	`the look-up is set to be an illegal character.`
		505	`*/`
		506
		507	`void set_character`
		508	`PROTO_N ( ( a, b ) )`
		509	`PROTO_T ( EXP a X EXP b )`
		510	`{`
		511	`int c = get_char_value ( b ) ;`
		512	`if ( IS_exp_string_lit ( a ) ) {`
		513	`STRING s = DEREF_str ( exp_string_lit_str ( a ) ) ;`
		514	`unsigned long n = DEREF_ulong ( str_simple_len ( s ) ) ;`
		515	`string t = DEREF_string ( str_simple_text ( s ) ) ;`
		516	`unsigned kind = DEREF_unsigned ( str_simple_kind ( s ) ) ;`
		517	`if ( kind & STRING_MULTI ) {`
		518	`while ( n ) {`
		519	`int ch = CHAR_SIMPLE ;`
		520	`unsigned long d = get_multi_char ( t, &ch ) ;`
		521	`if ( d < ( unsigned long ) NO_CHAR ) {`
		522	`set_char_lookup ( ( int ) d, c ) ;`
		523	`}`
		524	`t += MULTI_WIDTH ;`
		525	`n-- ;`
		526	`}`
		527	`} else {`
		528	`while ( n ) {`
		529	`int d = ( int ) *t ;`
		530	`set_char_lookup ( d, c ) ;`
		531	`t++ ;`
		532	`n-- ;`
		533	`}`
		534	`}`
		535	`} else {`
		536	`int d = get_char_value ( a ) ;`
		537	`if ( d != char_illegal ) set_char_lookup ( d, c ) ;`
		538	`}`
		539	`return ;`
		540	`}`
		541
		542
		543	`/*`
		544	`CHECK FOR WHITE SPACE CHARACTERS`
		545
		546	`This routine checks whether the character a represents a white space.`
		547	`The newline character constitutes a special case.`
		548	`*/`
		549
		550	`int is_white_char`
		551	`PROTO_N ( ( a ) )`
		552	`PROTO_T ( unsigned long a )`
		553	`{`
		554	`int t ;`
		555	`if ( a >= NO_CHAR ) return ( 0 ) ;`
		556	`t = lookup_char ( a ) ;`
		557	`return ( is_white ( t ) \|\| is_newline ( t ) ) ;`
		558	`}`
		559
		560
		561	`/*`
		562	`CHECK FOR ALPHABETIC CHARACTERS`
		563
		564	`This routine checks whether the character a represents an alphabetic`
		565	`character.`
		566	`*/`
		567
		568	`int is_alpha_char`
		569	`PROTO_N ( ( a ) )`
		570	`PROTO_T ( unsigned long a )`
		571	`{`
		572	`if ( a >= NO_CHAR ) return ( 0 ) ;`
		573	`return ( is_alpha ( lookup_char ( a ) ) ) ;`
		574	`}`
		575
		576
		577	`/*`
		578	`CHECK FOR LEGAL CHARACTERS`
		579
		580	`This routine checks whether the character a represents a legal character.`
		581	`*/`
		582
		583	`int is_legal_char`
		584	`PROTO_N ( ( a ) )`
		585	`PROTO_T ( unsigned long a )`
		586	`{`
		587	`if ( a >= NO_CHAR ) return ( 0 ) ;`
		588	`return ( is_legal ( lookup_char ( a ) ) ) ;`
		589	`}`
		590
		591
		592	`/*`
		593	`PEEK AHEAD ONE CHARACTER`
		594
		595	`This routine tests whether the next character is a (which will not be`
		596	`newline). If so the current character is advanced one, otherwise it`
		597	`is left unchanged. legal is set to false if the next character is`
		598	`not legal.`
		599	`*/`
		600
		601	`int peek_char`
		602	`PROTO_N ( ( a, legal ) )`
		603	`PROTO_T ( int a X int *legal )`
		604	`{`
		605	`int c = read_char () ;`
		606	`ASSERT ( a != char_newline ) ;`
		607	`if ( c == a ) return ( 1 ) ;`
		608	`*legal = is_legal_char ( ( unsigned long ) c ) ;`
		609	`unread_char ( c ) ;`
		610	`return ( 0 ) ;`
		611	`}`
		612
		613
		614	`/*`
		615	`TOKEN BUFFER`
		616
		617	`This buffer is used by read_token to hold the values of identifiers,`
		618	`numbers and strings.`
		619	`*/`
		620
		621	`BUFFER token_buff = NULL_buff ;`
		622
		623
		624	`/*`
		625	`TOKEN IDENTIFICATION MACROS`
		626
		627	`These macros are used to identify the start or end of certain tokens`
		628	`such as comments and strings.`
		629	`*/`
		630
		631	`#define START_COMMENT( A )\`
		632	`( ( A ) == char_asterix )`
		633	`#define END_COMMENT( A, B )\`
		634	`( ( A ) == char_asterix && ( B ) == char_slash )`
		635	`#define START_CPP_COMMENT( A )\`
		636	`( ( A ) == char_slash && allow_cpp_comments )`
		637	`#define END_CPP_COMMENT( A )\`
		638	`( ( A ) == char_newline )`
		639	`#define START_STRING( A )\`
		640	`( ( A ) == char_quote \|\| ( A ) == char_single_quote )`
		641	`#define END_STRING( A, Q )\`
		642	`( ( A ) == ( Q ) )`
		643
		644
		645	`/*`
		646	`END OF FILE FLAG`
		647
		648	`Each source file should end in a newline character, which is not`
		649	`preceded by a backspace. This flag is used to indicate whether the`
		650	`end of the present file has the correct form.`
		651	`*/`
		652
		653	`static int good_eof = 0 ;`
		654
		655
		656	`/*`
		657	`SKIP A STRING`
		658
		659	`This routine skips a string or character literal. It is entered after`
		660	`the initial quote, q, has been read. Escape sequences are always`
		661	`allowed. The routine returns lex_string_Hlit if the string terminates`
		662	`correctly and lex_eof otherwise.`
		663	`*/`
		664
		665	`static int skip_string`
		666	`PROTO_N ( ( q ) )`
		667	`PROTO_T ( int q )`
		668	`{`
		669	`int e = q ;`
		670	`LOCATION loc ;`
		671	`unsigned nl = 0 ;`
		672	`int escaped = 0 ;`
		673	`int have_char = 0 ;`
		674	`int allow_nl = allow_newline_strings ;`
		675	`if ( e == char_single_quote \|\| in_preproc_dir == 1 ) allow_nl = 0 ;`
		676	`update_column () ;`
		677	`loc = crt_loc ;`
		678
		679	`/* Scan to end of string */`
		680	`for ( ; ; ) {`
		681	`int c = read_char () ;`
		682	`if ( END_STRING ( c, e ) && !escaped ) {`
		683	`if ( e == char_single_quote && !have_char ) {`
		684	`update_column () ;`
		685	`report ( crt_loc, ERR_lex_ccon_empty () ) ;`
		686	`}`
		687	`if ( nl ) report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;`
		688	`return ( lex_string_Hlit ) ;`
		689	`}`
		690	`if ( c == char_newline ) {`
		691	`if ( allow_nl ) {`
		692	`/* Report newlines but continue */`
		693	`crt_loc.line++ ;`
		694	`crt_loc.column = 0 ;`
		695	`input_crt = input_posn ;`
		696	`nl++ ;`
		697	`} else {`
		698	`unread_char ( c ) ;`
		699	`update_column () ;`
		700	`report ( crt_loc, ERR_lex_string_pp_nl () ) ;`
		701	`break ;`
		702	`}`
		703	`} else if ( c == char_eof ) {`
		704	`report ( loc, ERR_lex_phases_str_eof () ) ;`
		705	`good_eof = 1 ;`
		706	`nl = 0 ;`
		707	`break ;`
		708	`}`
		709	`if ( escaped ) {`
		710	`escaped = 0 ;`
		711	`} else {`
		712	`if ( c == char_backslash ) escaped = 1 ;`
		713	`}`
		714	`if ( !escaped ) have_char = 1 ;`
		715	`}`
		716	`if ( nl ) {`
		717	`/* Report newlines in string */`
		718	`report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;`
		719	`} else {`
		720	`/* Don't bother with error recovery */`
		721	`/* EMPTY */`
		722	`}`
		723	`return ( lex_eof ) ;`
		724	`}`
		725
		726
		727	`/*`
		728	`READ THE BODY OF A STRING`
		729
		730	`This routine reads the body of a string or character literal or of a`
		731	`header name. It is entered after the initial quote has been read.`
		732	`The corresponding close quote is passed in as q. The esc argument`
		733	`indicates whether escape sequences are allowed (they are not in`
		734	`header names for example). The string itself is built up in`
		735	`token_buff. The routine returns lex_string_Hlit if the string`
		736	`terminates correctly and lex_eof otherwise. It also sets`
		737	`token_buff.posn to point to the end of the string.`
		738	`*/`
		739
		740	`int read_string`
		741	`PROTO_N ( ( q, esc ) )`
		742	`PROTO_T ( int q X int esc )`
		743	`{`
		744	`int c ;`
		745	`int e = q ;`
		746	`LOCATION loc ;`
		747	`long posn = -1 ;`
		748	`int escaped = 0 ;`
		749	`unsigned nl = 0 ;`
		750	`int have_char = 0 ;`
		751	`string s = token_buff.start ;`
		752	`string se = token_buff.end ;`
		753	`int allow_nl = allow_newline_strings ;`
		754	`update_column () ;`
		755	`if ( e == char_single_quote ) {`
		756	`posn = tell_buffer ( crt_buff_no ) ;`
		757	`allow_nl = 0 ;`
		758	`} else if ( in_preproc_dir == 1 ) {`
		759	`allow_nl = 0 ;`
		760	`}`
		761	`loc = crt_loc ;`
		762
		763	`/* Scan the string */`
		764	`for ( ; ; ) {`
		765	`c = read_char () ;`
		766	`if ( END_STRING ( c, e ) && !escaped ) {`
		767	`if ( e == char_single_quote && !have_char ) {`
		768	`update_column () ;`
		769	`report ( crt_loc, ERR_lex_ccon_empty () ) ;`
		770	`}`
		771	`if ( nl ) report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;`
		772	`token_buff.posn = s ;`
		773	`*s = 0 ;`
		774	`return ( lex_string_Hlit ) ;`
		775	`}`
		776	`if ( c == char_newline ) {`
		777	`if ( allow_nl ) {`
		778	`/* Report newlines but continue */`
		779	`crt_loc.line++ ;`
		780	`crt_loc.column = 0 ;`
		781	`input_crt = input_posn ;`
		782	`nl++ ;`
		783	`} else {`
		784	`unread_char ( c ) ;`
		785	`update_column () ;`
		786	`if ( e == char_greater ) {`
		787	`/* Header name */`
		788	`report ( crt_loc, ERR_cpp_include_incompl () ) ;`
		789	`} else {`
		790	`report ( crt_loc, ERR_lex_string_pp_nl () ) ;`
		791	`}`
		792	`break ;`
		793	`}`
		794	`} else if ( c == char_eof ) {`
		795	`report ( loc, ERR_lex_phases_str_eof () ) ;`
		796	`good_eof = 1 ;`
		797	`nl = 0 ;`
		798	`break ;`
		799	`}`
		800	`*s = ( character ) c ;`
		801	`if ( ++s == se ) {`
		802	`s = extend_buffer ( &token_buff, s ) ;`
		803	`se = token_buff.end ;`
		804	`}`
		805	`if ( escaped ) {`
		806	`escaped = 0 ;`
		807	`} else {`
		808	`if ( c == char_backslash ) escaped = esc ;`
		809	`}`
		810	`if ( !escaped ) have_char = 1 ;`
		811	`}`
		812	`if ( nl ) {`
		813	`/* Report newlines in string */`
		814	`report ( loc, ERR_lex_string_nl ( nl, nl ) ) ;`
		815	`} else {`
		816	`/* Error recovery */`
		817	`if ( e == char_single_quote && have_char ) {`
		818	`seek_buffer ( crt_buff_no, posn, 1 ) ;`
		819	`crt_loc = loc ;`
		820	`s = token_buff.start ;`
		821	`c = read_char () ;`
		822	`*( s++ ) = ( character ) c ;`
		823	`if ( c == char_backslash && esc ) {`
		824	`c = read_char () ;`
		825	`*( s++ ) = ( character ) c ;`
		826	`}`
		827	`}`
		828	`}`
		829	`token_buff.posn = s ;`
		830	`*s = 0 ;`
		831	`return ( lex_eof ) ;`
		832	`}`
		833
		834
		835	`/*`
		836	`SKIP A C STYLE COMMENT`
		837
		838	`This routine skips a C style comment, returning lex_ignore_token if`
		839	`the comment is terminated correctly and lex_eof otherwise. It is`
		840	`entered after the first two characters comprising the comment start`
		841	`have been read. If keep is true then the comment text is built up`
		842	`in token_buff, otherwise it is discarded.`
		843	`*/`
		844
		845	`static int skip_comment`
		846	`PROTO_N ( ( keep ) )`
		847	`PROTO_T ( int keep )`
		848	`{`
		849	`int c = 0 ;`
		850	`int lastc ;`
		851	`string s, se ;`
		852	`LOCATION loc ;`
		853	`update_column () ;`
		854	`loc = crt_loc ;`
		855	`if ( keep ) {`
		856	`s = token_buff.start ;`
		857	`se = token_buff.end ;`
		858	`} else {`
		859	`s = NULL ;`
		860	`se = NULL ;`
		861	`}`
		862	`do {`
		863	`lastc = c ;`
		864	`read_label : {`
		865	`/* Inlined version of read_char */`
		866	`c = next_char () ;`
		867	`if ( c == char_end ) c = refill_char () ;`
		868	`if ( c == char_question ) c = adjust_trigraph () ;`
		869	`if ( c == char_backslash ) {`
		870	`c = next_char () ;`
		871	`if ( c == char_end ) c = refill_char () ;`
		872	`if ( c == char_return ) c = read_newline () ;`
		873	`if ( c == char_newline ) {`
		874	`/* Allow for escaped newlines */`
		875	`crt_loc.line++ ;`
		876	`crt_loc.column = 0 ;`
		877	`input_crt = input_posn ;`
		878	`goto read_label ;`
		879	`}`
		880	`unread_char ( c ) ;`
		881	`c = char_backslash ;`
		882	`} else if ( c == char_newline ) {`
		883	`/* New line characters */`
		884	`crt_loc.line++ ;`
		885	`crt_loc.column = 0 ;`
		886	`input_crt = input_posn ;`
		887	`crt_line_changed = 1 ;`
		888	`crt_spaces = 0 ;`
		889	`} else if ( c == char_eof ) {`
		890	`/* End of file characters */`
		891	`report ( loc, ERR_lex_phases_comm_eof () ) ;`
		892	`good_eof = 1 ;`
		893	`if ( s ) {`
		894	`token_buff.posn = s ;`
		895	`*s = 0 ;`
		896	`}`
		897	`return ( lex_eof ) ;`
		898	`} else if ( c == char_asterix && lastc == char_slash ) {`
		899	`/* Nested comments */`
		900	`update_column () ;`
		901	`report ( crt_loc, ERR_lex_comment_nest () ) ;`
		902	`}`
		903	`if ( s ) {`
		904	`*s = ( character ) c ;`
		905	`if ( ++s == se ) {`
		906	`s = extend_buffer ( &token_buff, s ) ;`
		907	`se = token_buff.end ;`
		908	`}`
		909	`}`
		910	`}`
		911	`} while ( !END_COMMENT ( lastc, c ) ) ;`
		912	`if ( s ) {`
		913	`s -= 2 ;`
		914	`token_buff.posn = s ;`
		915	`*s = 0 ;`
		916	`}`
		917	`crt_spaces++ ;`
		918	`return ( lex_ignore_token ) ;`
		919	`}`
		920
		921
		922	`/*`
		923	`SKIP A C++ STYLE COMMENT`
		924
		925	`This routine skips a C++ style comment, returning lex_ignore_token`
		926	`if the comment terminates correctly and lex_eof otherwise. It is`
		927	`entered after the first two characters comprising the comment start`
		928	`have been read. The next token read after the comment will be the`
		929	`terminating newline. If keep is true then the comment text is built`
		930	`up in token_buff, otherwise it is discarded.`
		931	`*/`
		932
		933	`static int skip_cpp_comment`
		934	`PROTO_N ( ( keep ) )`
		935	`PROTO_T ( int keep )`
		936	`{`
		937	`int c ;`
		938	`string s, se ;`
		939	`if ( keep ) {`
		940	`s = token_buff.start ;`
		941	`se = token_buff.end ;`
		942	`} else {`
		943	`s = NULL ;`
		944	`se = NULL ;`
		945	`}`
		946	`do {`
		947	`read_label : {`
		948	`/* Inlined version of read_char */`
		949	`c = next_char () ;`
		950	`if ( c == char_end ) c = refill_char () ;`
		951	`if ( c == char_question ) c = adjust_trigraph () ;`
		952	`if ( c == char_backslash ) {`
		953	`c = next_char () ;`
		954	`if ( c == char_end ) c = refill_char () ;`
		955	`if ( c == char_return ) c = read_newline () ;`
		956	`if ( c == char_newline ) {`
		957	`/* Allow for escaped newlines */`
		958	`crt_loc.line++ ;`
		959	`crt_loc.column = 0 ;`
		960	`input_crt = input_posn ;`
		961	`goto read_label ;`
		962	`}`
		963	`unread_char ( c ) ;`
		964	`c = char_backslash ;`
		965	`} else if ( c == char_eof ) {`
		966	`/* End of file characters */`
		967	`update_column () ;`
		968	`report ( crt_loc, ERR_lex_phases_comm_eof () ) ;`
		969	`good_eof = 1 ;`
		970	`if ( s ) {`
		971	`token_buff.posn = s ;`
		972	`*s = 0 ;`
		973	`}`
		974	`return ( lex_eof ) ;`
		975	`}`
		976	`if ( s ) {`
		977	`*s = ( character ) c ;`
		978	`if ( ++s == se ) {`
		979	`s = extend_buffer ( &token_buff, s ) ;`
		980	`se = token_buff.end ;`
		981	`}`
		982	`}`
		983	`}`
		984	`} while ( !END_CPP_COMMENT ( c ) ) ;`
		985	`unread_char ( c ) ;`
		986	`if ( s ) {`
		987	`s -= 1 ;`
		988	`token_buff.posn = s ;`
		989	`*s = 0 ;`
		990	`}`
		991	`crt_line_changed = 1 ;`
		992	`crt_spaces = 0 ;`
		993	`return ( lex_ignore_token ) ;`
		994	`}`
		995
		996
		997	`/*`
		998	`SKIP WHITE-SPACE CHARACTERS`
		999
		1000	`This routine skips any white-space characters (including comments).`
		1001	`Newline characters are treated as white-space only if nl is true.`
		1002	`The result is a bitpattern formed from the components:`
		1003
		1004	`WHITE_SPACE for white-space characters;`
		1005	`WHITE_NEWLINE for newline characters;`
		1006	`WHITE_ESC_NEWLINE for escaped newlines;`
		1007
		1008	`the result being reset to WHITE_NEWLINE after each newline. Note that`
		1009	`trigraphs and escaped newlines are treated by hand. The effect of this`
		1010	`routine is that all non-empty sequences of white-space characters other`
		1011	`than newlines are treated as if they were a single space (the C/C++`
		1012	`specification says that this is implementation-defined).`
		1013	`*/`
		1014
		1015	`unsigned long skip_white`
		1016	`PROTO_N ( ( nl ) )`
		1017	`PROTO_T ( int nl )`
		1018	`{`
		1019	`int c ;`
		1020	`unsigned long sp = 0 ;`
		1021	`for ( ; ; ) {`
		1022	`c = next_char () ;`
		1023	`if ( c == char_end ) c = refill_char () ;`
		1024	`if ( c == char_return ) c = read_newline () ;`
		1025	`if ( c == char_sub ) c = read_eof () ;`
		1026	`if ( c == char_newline ) {`
		1027	`/* Deal with newline characters */`
		1028	`if ( !nl ) break ;`
		1029	`sp = WHITE_NEWLINE ;`
		1030	`crt_loc.line++ ;`
		1031	`crt_loc.column = 0 ;`
		1032	`input_crt = input_posn ;`
		1033	`crt_line_changed = 1 ;`
		1034	`crt_spaces = 0 ;`
		1035	`} else if ( c == char_space ) {`
		1036	`/* Deal with simple spaces */`
		1037	`sp \|= WHITE_SPACE ;`
		1038	`crt_spaces++ ;`
		1039	`} else if ( c == char_tab ) {`
		1040	`/* Deal with tab characters */`
		1041	`unsigned long tab = tab_width ;`
		1042	`sp \|= WHITE_SPACE ;`
		1043	`crt_spaces = tab * ( crt_spaces / tab + 1 ) ;`
		1044	`} else if ( c == char_eof ) {`
		1045	`/* End of file */`
		1046	`if ( sp == WHITE_NEWLINE ) good_eof = 1 ;`
		1047	`break ;`
		1048	`} else {`
		1049	`int t ;`
		1050	`#if FS_EXTENDED_CHAR`
		1051	`if ( IS_EXTENDED ( c ) ) break ;`
		1052	`#endif`
		1053	`t = lookup_char ( c ) ;`
		1054	`if ( is_white ( t ) ) {`
		1055	`/* Deal with other white space characters */`
		1056	`sp \|= WHITE_SPACE ;`
		1057	`crt_spaces++ ;`
		1058	`} else {`
		1059	`if ( c == char_question ) c = adjust_trigraph () ;`
		1060	`if ( c == char_slash ) {`
		1061	`/* Deal with comments */`
		1062	`int b = read_char () ;`
		1063	`if ( START_COMMENT ( b ) ) {`
		1064	`sp \|= WHITE_SPACE ;`
		1065	`b = skip_comment ( 0 ) ;`
		1066	`if ( b == lex_eof ) return ( sp ) ;`
		1067	`} else if ( START_CPP_COMMENT ( b ) ) {`
		1068	`sp \|= WHITE_SPACE ;`
		1069	`b = skip_cpp_comment ( 0 ) ;`
		1070	`if ( b == lex_eof ) return ( sp ) ;`
		1071	`if ( !nl ) return ( sp ) ;`
		1072	`} else {`
		1073	`unread_char ( b ) ;`
		1074	`break ;`
		1075	`}`
		1076	`} else if ( c == char_backslash ) {`
		1077	`/* Deal with escaped newlines */`
		1078	`int b = next_char () ;`
		1079	`if ( b == char_end ) b = refill_char () ;`
		1080	`if ( b == char_return ) b = read_newline () ;`
		1081	`if ( b == char_newline ) {`
		1082	`crt_loc.line++ ;`
		1083	`crt_loc.column = 0 ;`
		1084	`input_crt = input_posn ;`
		1085	`} else {`
		1086	`unread_char ( b ) ;`
		1087	`break ;`
		1088	`}`
		1089	`sp \|= WHITE_ESC_NEWLINE ;`
		1090	`} else {`
		1091	`break ;`
		1092	`}`
		1093	`}`
		1094	`}`
		1095	`}`
		1096	`unread_char ( c ) ;`
		1097	`return ( sp ) ;`
		1098	`}`
		1099
		1100
		1101	`/*`
		1102	`PATCH UP WHITE-SPACE CHARACTERS`
		1103
		1104	`Calling skip_white ( 1 ) can mess up the parser as regards spotting`
		1105	`preprocessing directives and valid end of file markers. This routine`
		1106	`may be called with the return value of skip_white as an argument to`
		1107	`patch up the buffer in order to get the parser back into the right`
		1108	`state.`
		1109	`*/`
		1110
		1111	`void patch_white`
		1112	`PROTO_N ( ( sp ) )`
		1113	`PROTO_T ( unsigned long sp )`
		1114	`{`
		1115	`if ( sp & WHITE_NEWLINE ) {`
		1116	`if ( sp & WHITE_SPACE ) {`
		1117	`/* Patch in a space after a newline */`
		1118	`unsigned long n ;`
		1119	`update_column () ;`
		1120	`n = crt_loc.column ;`
		1121	`while ( n ) {`
		1122	`unread_char ( char_space ) ;`
		1123	`if ( input_posn <= input_start ) break ;`
		1124	`n-- ;`
		1125	`}`
		1126	`} else if ( sp & WHITE_ESC_NEWLINE ) {`
		1127	`/* Patch in an escaped newline after a newline */`
		1128	`unread_char ( char_backslash ) ;`
		1129	`unread_char ( char_newline ) ;`
		1130	`crt_loc.line-- ;`
		1131	`}`
		1132	`/* Patch in a newline */`
		1133	`unread_char ( char_newline ) ;`
		1134	`crt_loc.line-- ;`
		1135	`crt_loc.column = 0 ;`
		1136	`crt_spaces = 0 ;`
		1137	`}`
		1138	`return ;`
		1139	`}`
		1140
		1141
		1142	`/*`
		1143	`SKIP TO END OF LINE`
		1144
		1145	`This routine skips to the end of the current line. It returns 0 if`
		1146	`only white-space characters are encountered. It uses skip_white to`
		1147	`jump over white-space (including comments).`
		1148	`*/`
		1149
		1150	`int skip_to_end`
		1151	`PROTO_Z ()`
		1152	`{`
		1153	`int c ;`
		1154	`int res = 0 ;`
		1155	`in_preproc_dir = 0 ;`
		1156	`for ( ; ; ) {`
		1157	`IGNORE skip_white ( 0 ) ;`
		1158	`read_label : {`
		1159	`/* Inlined version of read_char */`
		1160	`c = next_char () ;`
		1161	`if ( c == char_end ) c = refill_char () ;`
		1162	`if ( c == char_question ) c = adjust_trigraph () ;`
		1163	`if ( c == char_backslash ) {`
		1164	`c = next_char () ;`
		1165	`if ( c == char_end ) c = refill_char () ;`
		1166	`if ( c == char_return ) c = read_newline () ;`
		1167	`if ( c == char_newline ) {`
		1168	`/* Allow for escaped newlines */`
		1169	`crt_loc.line++ ;`
		1170	`crt_loc.column = 0 ;`
		1171	`input_crt = input_posn ;`
		1172	`goto read_label ;`
		1173	`}`
		1174	`unread_char ( c ) ;`
		1175	`} else if ( c == char_newline ) {`
		1176	`/* New line characters */`
		1177	`crt_loc.line++ ;`
		1178	`crt_loc.column = 0 ;`
		1179	`input_crt = input_posn ;`
		1180	`crt_line_changed = 1 ;`
		1181	`crt_spaces = 0 ;`
		1182	`return ( res ) ;`
		1183	`} else if ( START_STRING ( c ) ) {`
		1184	`/* String literals */`
		1185	`res = 1 ;`
		1186	`c = skip_string ( c ) ;`
		1187	`if ( c == lex_eof ) return ( res ) ;`
		1188	`} else if ( c == char_eof ) {`
		1189	`/* End of file characters */`
		1190	`break ;`
		1191	`} else {`
		1192	`res = 1 ;`
		1193	`}`
		1194	`}`
		1195	`}`
		1196	`update_column () ;`
		1197	`report ( crt_loc, ERR_lex_phases_eof () ) ;`
		1198	`good_eof = 1 ;`
		1199	`return ( res ) ;`
		1200	`}`
		1201
		1202
		1203	`/*`
		1204	`READ A UNICODE CHARACTER`
		1205
		1206	`This routine reads a unicode character. It is entered after the`
		1207	`initial backslash and the following character, c, have been read.`
		1208	`It assigns the character type to pc and returns the character code.`
		1209	`*/`
		1210
		1211	`static unsigned long read_unicode`
		1212	`PROTO_N ( ( c, pc ) )`
		1213	`PROTO_T ( int c X int *pc )`
		1214	`{`
		1215	`unsigned i, n ;`
		1216	`unsigned long u ;`
		1217	`character s [10] ;`
		1218	`ERROR err = NULL_err ;`
		1219	`string p = s ;`
		1220	`if ( c == char_u && allow_unicodes ) {`
		1221	`/* Read '\uxxxx' */`
		1222	`*pc = CHAR_UNI4 ;`
		1223	`n = 4 ;`
		1224	`} else if ( c == char_U && allow_unicodes ) {`
		1225	`/* Read '\Uxxxxxxxx' */`
		1226	`*pc = CHAR_UNI8 ;`
		1227	`n = 8 ;`
		1228	`} else {`
		1229	`unread_char ( c ) ;`
		1230	`*pc = CHAR_NONE ;`
		1231	`return ( 0 ) ;`
		1232	`}`
		1233	`for ( i = 0 ; i < n ; i++ ) {`
		1234	`int t ;`
		1235	`int d = read_char () ;`
		1236	`if ( d == char_eof ) break ;`
		1237	`#if FS_EXTENDED_CHAR`
		1238	`if ( IS_EXTENDED ( d ) ) {`
		1239	`unread_char ( d ) ;`
		1240	`break ;`
		1241	`}`
		1242	`#endif`
		1243	`t = lookup_char ( d ) ;`
		1244	`if ( !is_alphanum ( t ) ) {`
		1245	`unread_char ( d ) ;`
		1246	`break ;`
		1247	`}`
		1248	`s [i] = ( character ) d ;`
		1249	`}`
		1250	`s [i] = 0 ;`
		1251	`u = eval_unicode ( c, n, pc, &p, &err ) ;`
		1252	`if ( !IS_NULL_err ( err ) ) {`
		1253	`update_column () ;`
		1254	`report ( crt_loc, err ) ;`
		1255	`}`
		1256	`return ( u ) ;`
		1257	`}`
		1258
		1259
		1260	`/*`
		1261	`READ AN EXTENDED IDENTIFIER`
		1262
		1263	`This routine reads an extended identifier name (one including a unicode`
		1264	`character). It is entered after reading the simple characters in the`
		1265	`token buffer plus the unicode character given by u and ch.`
		1266	`*/`
		1267
		1268	`static HASHID read_extended_id`
		1269	`PROTO_N ( ( u, ch ) )`
		1270	`PROTO_T ( unsigned long u X int ch )`
		1271	`{`
		1272	`string s ;`
		1273	`int c, t ;`
		1274	`HASHID nm ;`
		1275	`unsigned long h ;`
		1276	`BUFFER *bf = &token_buff ;`
		1277	`do {`
		1278	`if ( !unicode_alpha ( u ) ) {`
		1279	`/* Report illegal identifiers */`
		1280	`update_column () ;`
		1281	`report ( crt_loc, ERR_lex_name_extendid ( u ) ) ;`
		1282	`}`
		1283	`print_char ( u, ch, 0, bf ) ;`
		1284	`for ( ; ; ) {`
		1285	`c = read_char () ;`
		1286	`#if FS_EXTENDED_CHAR`
		1287	`if ( IS_EXTENDED ( c ) ) break ;`
		1288	`#endif`
		1289	`t = lookup_char ( c ) ;`
		1290	`if ( !is_alphanum ( t ) ) break ;`
		1291	`bfputc ( bf, c ) ;`
		1292	`}`
		1293	`ch = CHAR_NONE ;`
		1294	`if ( c == char_backslash ) {`
		1295	`int nextc = read_char () ;`
		1296	`u = read_unicode ( nextc, &ch ) ;`
		1297	`}`
		1298	`} while ( ch != CHAR_NONE ) ;`
		1299	`unread_char ( c ) ;`
		1300	`bfputc ( bf, 0 ) ;`
		1301	`s = bf->start ;`
		1302	`h = hash ( s ) ;`
		1303	`nm = lookup_name ( s, h, 1, lex_unknown ) ;`
		1304	`return ( nm ) ;`
		1305	`}`
		1306
		1307
		1308	`/*`
		1309	`HASH VALUE FOR IDENTIFIERS`
		1310
		1311	`The hash value for identifiers is built up as the identifier is read.`
		1312	`It is then stored in this variable. The algorithm for calculuating`
		1313	`the hash value needs to be kept in step with the routine hash (it`
		1314	`is checked by an assertion in lookup_name, so any errors should be`
		1315	`caught quickly if in debug mode).`
		1316	`*/`
		1317
		1318	`HASHID token_hashid = NULL_hashid ;`
		1319
		1320
		1321	`/*`
		1322	`MAIN PASS ANALYSER`
		1323
		1324	`This routine reads the next preprocessing token from the input file.`
		1325	`It is designed for speed rather than elegance, hence the rather`
		1326	`indiscriminate use of labels. Trigraphs and escaped newlines`
		1327	`involving the first character are processed by hand. This routine`
		1328	`corresponds to phase 3 of the phases of translation. The position`
		1329	`within the line is tracked by column - this is zero at the start of`
		1330	`a line, positive if only white space has been read and negative`
		1331	`otherwise. preproc keeps track of the last preprocessing directive.`
		1332	`*/`
		1333
		1334	`int read_token`
		1335	`PROTO_Z ()`
		1336	`{`
		1337	`int c, t ;`
		1338	`int column = -1 ;`
		1339	`int preproc = lex_ignore_token ;`
		1340
		1341	`/* Read the next character */`
		1342	`start_label : {`
		1343	`c = next_char () ;`
		1344	`if ( c == char_end ) c = refill_char () ;`
		1345	`restart_label : {`
		1346	`#if FS_EXTENDED_CHAR`
		1347	`if ( IS_EXTENDED ( c ) ) {`
		1348	`goto unknown_label ;`
		1349	`}`
		1350	`#endif`
		1351	`t = lookup_char ( c ) ;`
		1352	`if ( is_white ( t ) ) {`
		1353	`crt_spaces++ ;`
		1354	`goto start_label ;`
		1355	`}`
		1356	`}`
		1357	`process_label : {`
		1358	`/* Process the next character */`
		1359	`}`
		1360	`}`
		1361
		1362	`/* Check symbols and punctuation */`
		1363	`if ( is_symbol ( t ) ) {`
		1364	`switch ( c ) {`
		1365
		1366	`case char_question : {`
		1367	`/* Deal with '?' and trigraphs */`
		1368	`c = adjust_trigraph () ;`
		1369	`if ( c == char_question ) return ( lex_question ) ;`
		1370	`goto restart_label ;`
		1371	`}`
		1372
		1373	`case char_backslash : {`
		1374	`/* Deal with escaped newlines */`
		1375	`unsigned long u ;`
		1376	`int ch = CHAR_NONE ;`
		1377	`int nextc = next_char () ;`
		1378	`if ( nextc == char_end ) nextc = refill_char () ;`
		1379	`if ( nextc == char_return ) nextc = read_newline () ;`
		1380	`if ( nextc == char_newline ) {`
		1381	`crt_loc.line++ ;`
		1382	`crt_loc.column = 0 ;`
		1383	`input_crt = input_posn ;`
		1384	`if ( column == 0 ) column = 1 ;`
		1385	`goto start_label ;`
		1386	`}`
		1387
		1388	`/* Check for unicode characters */`
		1389	`u = read_unicode ( nextc, &ch ) ;`
		1390	`if ( ch != CHAR_NONE ) {`
		1391	`token_buff.posn = token_buff.start ;`
		1392	`token_hashid = read_extended_id ( u, ch ) ;`
		1393	`return ( lex_identifier ) ;`
		1394	`}`
		1395	`return ( lex_backslash ) ;`
		1396	`}`
		1397
		1398	`case char_hash : {`
		1399	`/* Deal with '#' and '##' */`
		1400	`c = read_char () ;`
		1401	`if ( c == char_hash ) return ( lex_hash_Hhash_H1 ) ;`
		1402	`unread_char ( c ) ;`
		1403
		1404	`/* Return with '#' if not at start of line */`
		1405	`if ( column < 0 \|\| no_preproc_dir ) {`
		1406	`return ( lex_hash_H1 ) ;`
		1407	`}`
		1408
		1409	`/* Deal with preprocessing directives */`
		1410	`preproc_label : {`
		1411	`unsigned long sp = skip_white ( 0 ) ;`
		1412	`update_column () ;`
		1413	`if ( column ) report ( crt_loc, ERR_cpp_indent () ) ;`
		1414	`if ( sp & ( WHITE_SPACE \| WHITE_ESC_NEWLINE ) ) {`
		1415	`report ( preproc_loc, ERR_cpp_indent_dir () ) ;`
		1416	`}`
		1417	`preproc = read_preproc_dir ( 1, preproc ) ;`
		1418	`if ( preproc < 0 ) goto start_line_label ;`
		1419	`unread_char ( char_newline ) ;`
		1420	`crt_loc.line-- ;`
		1421	`crt_loc.column = 0 ;`
		1422	`return ( preproc ) ;`
		1423	`}`
		1424	`}`
		1425
		1426	`case char_percent : {`
		1427	`/* Deal with '%', '%=', '%>', '%:' and '%:%:' */`
		1428	`c = read_char () ;`
		1429	`if ( c == char_equal ) return ( lex_rem_Heq ) ;`
		1430	`if ( c == char_greater && allow_digraphs ) {`
		1431	`return ( lex_close_Hbrace_H2 ) ;`
		1432	`}`
		1433	`if ( c == char_colon && allow_digraphs ) {`
		1434	`/* Check for '%:' and '%:%:' */`
		1435	`c = read_char () ;`
		1436	`if ( c == char_percent ) {`
		1437	`int nextc = read_char () ;`
		1438	`if ( nextc == char_colon ) {`
		1439	`return ( lex_hash_Hhash_H2 ) ;`
		1440	`}`
		1441	`unread_char ( nextc ) ;`
		1442	`}`
		1443	`unread_char ( c ) ;`
		1444
		1445	`/* Return with '%:' if not at start of line */`
		1446	`if ( column < 0 \|\| no_preproc_dir ) {`
		1447	`return ( lex_hash_H2 ) ;`
		1448	`}`
		1449
		1450	`/* Otherwise this is a preprocessing directive */`
		1451	`IGNORE get_digraph ( lex_hash_H2 ) ;`
		1452	`goto preproc_label ;`
		1453	`}`
		1454	`unread_char ( c ) ;`
		1455	`return ( lex_rem ) ;`
		1456	`}`
		1457
		1458	`case char_quote : {`
		1459	`/* Deal with string literals */`
		1460	`IGNORE read_string ( c, 1 ) ;`
		1461	`return ( lex_string_Hlit ) ;`
		1462	`}`
		1463
		1464	`case char_single_quote : {`
		1465	`/* Deal with character literals */`
		1466	`IGNORE read_string ( c, 1 ) ;`
		1467	`return ( lex_char_Hlit ) ;`
		1468	`}`
		1469
		1470	`case char_exclaim : {`
		1471	`/* Deal with '!' and '!=' */`
		1472	`c = read_char () ;`
		1473	`if ( c == char_equal ) return ( lex_not_Heq_H1 ) ;`
		1474	`unread_char ( c ) ;`
		1475	`return ( lex_not_H1 ) ;`
		1476	`}`
		1477
		1478	`case char_ampersand : {`
		1479	`/* Deal with '&', '&&' and '&=' */`
		1480	`c = read_char () ;`
		1481	`if ( c == char_ampersand ) return ( lex_logical_Hand_H1 ) ;`
		1482	`if ( c == char_equal ) return ( lex_and_Heq_H1 ) ;`
		1483	`unread_char ( c ) ;`
		1484	`return ( lex_and_H1 ) ;`
		1485	`}`
		1486
		1487	`case char_asterix : {`
		1488	`/* Deal with '' and '=' */`
		1489	`c = read_char () ;`
		1490	`if ( c == char_equal ) return ( lex_star_Heq ) ;`
		1491	`unread_char ( c ) ;`
		1492	`return ( lex_star ) ;`
		1493	`}`
		1494
		1495	`case char_plus : {`
		1496	`/* Deal with '+', '++' and '+=' */`
		1497	`c = read_char () ;`
		1498	`if ( c == char_plus ) return ( lex_plus_Hplus ) ;`
		1499	`if ( c == char_equal ) return ( lex_plus_Heq ) ;`
		1500	`if ( c == char_question && allow_extra_symbols ) {`
		1501	`return ( lex_abs ) ;`
		1502	`}`
		1503	`unread_char ( c ) ;`
		1504	`return ( lex_plus ) ;`
		1505	`}`
		1506
		1507	`case char_minus : {`
		1508	`/* Deal with '-', '--', '-=', '->' and '->' /`
		1509	`c = read_char () ;`
		1510	`if ( c == char_minus ) return ( lex_minus_Hminus ) ;`
		1511	`if ( c == char_equal ) return ( lex_minus_Heq ) ;`
		1512	`if ( c == char_greater ) {`
		1513	`#if LANGUAGE_CPP`
		1514	`/* '->' is only allowed in C++ /`
		1515	`c = read_char () ;`
		1516	`if ( c == char_asterix ) return ( lex_arrow_Hstar ) ;`
		1517	`unread_char ( c ) ;`
		1518	`#endif`
		1519	`return ( lex_arrow ) ;`
		1520	`}`
		1521	`unread_char ( c ) ;`
		1522	`return ( lex_minus ) ;`
		1523	`}`
		1524
		1525	`case char_dot : {`
		1526	`/* Deal with '.', '...', '.' and numbers /`
		1527	`c = read_char () ;`
		1528	`if ( c == char_dot ) {`
		1529	`c = read_char () ;`
		1530	`if ( c == char_dot ) return ( lex_ellipsis ) ;`
		1531	`unread_char ( c ) ;`
		1532	`unread_char ( char_dot ) ;`
		1533	`return ( lex_dot ) ;`
		1534	`}`
		1535	`#if LANGUAGE_CPP`
		1536	`/* '.' is only allowed in C++ /`
		1537	`if ( c == char_asterix ) return ( lex_dot_Hstar ) ;`
		1538	`#endif`
		1539	`#if FS_EXTENDED_CHAR`
		1540	`if ( IS_EXTENDED ( c ) ) {`
		1541	`unread_char ( c ) ;`
		1542	`return ( lex_dot ) ;`
		1543	`}`
		1544	`#endif`
		1545	`t = lookup_char ( c ) ;`
		1546	`if ( is_digit ( t ) ) {`
		1547	`/* Indicate a number with first digit '.' */`
		1548	`t = POINT ;`
		1549	`goto number_label ;`
		1550	`}`
		1551	`unread_char ( c ) ;`
		1552	`return ( lex_dot ) ;`
		1553	`}`
		1554
		1555	`case char_slash : {`
		1556	`/* Deal with '/', '/=' and comments */`
		1557	`c = read_char () ;`
		1558	`if ( START_COMMENT ( c ) ) {`
		1559	`int a = analyse_comments ;`
		1560	`c = skip_comment ( a ) ;`
		1561	`if ( c == lex_eof ) goto eof_label ;`
		1562	`if ( a ) {`
		1563	`c = lint_comment () ;`
		1564	`if ( c >= 0 ) return ( c ) ;`
		1565	`}`
		1566	`if ( column == 0 ) column = 1 ;`
		1567	`goto start_label ;`
		1568	`}`
		1569	`if ( START_CPP_COMMENT ( c ) ) {`
		1570	`int a = analyse_comments ;`
		1571	`c = skip_cpp_comment ( a ) ;`
		1572	`if ( c == lex_eof ) goto eof_label ;`
		1573	`if ( a ) {`
		1574	`c = lint_comment () ;`
		1575	`if ( c >= 0 ) return ( c ) ;`
		1576	`}`
		1577	`IGNORE read_char () ;`
		1578	`goto newline_label ;`
		1579	`}`
		1580	`if ( c == char_equal ) return ( lex_div_Heq ) ;`
		1581	`unread_char ( c ) ;`
		1582	`return ( lex_div ) ;`
		1583	`}`
		1584
		1585	`case char_colon : {`
		1586	`/* Deal with ':', '::' and ':>' */`
		1587	`c = read_char () ;`
		1588	`#if LANGUAGE_CPP`
		1589	`/* '::' is only allowed in C++ */`
		1590	`if ( c == char_colon ) return ( lex_colon_Hcolon ) ;`
		1591	`#endif`
		1592	`if ( c == char_greater && allow_digraphs ) {`
		1593	`return ( lex_close_Hsquare_H2 ) ;`
		1594	`}`
		1595	`unread_char ( c ) ;`
		1596	`return ( lex_colon ) ;`
		1597	`}`
		1598
		1599	`case char_less : {`
		1600	`/* Deal with '<', '<=', '<<', '<<=', '<%', '<:' */`
		1601	`c = read_char () ;`
		1602	`if ( c == char_equal ) return ( lex_less_Heq ) ;`
		1603	`if ( c == char_less ) {`
		1604	`c = read_char () ;`
		1605	`if ( c == char_equal ) return ( lex_lshift_Heq ) ;`
		1606	`unread_char ( c ) ;`
		1607	`return ( lex_lshift ) ;`
		1608	`}`
		1609	`if ( c == char_percent && allow_digraphs ) {`
		1610	`return ( lex_open_Hbrace_H2 ) ;`
		1611	`}`
		1612	`if ( c == char_colon && allow_digraphs ) {`
		1613	`return ( lex_open_Hsquare_H2 ) ;`
		1614	`}`
		1615	`if ( c == char_question && allow_extra_symbols ) {`
		1616	`return ( lex_min ) ;`
		1617	`}`
		1618	`unread_char ( c ) ;`
		1619	`return ( lex_less ) ;`
		1620	`}`
		1621
		1622	`case char_equal : {`
		1623	`/* Deal with '=' and '==' */`
		1624	`c = read_char () ;`
		1625	`switch ( c ) {`
		1626	`case char_equal : {`
		1627	`return ( lex_eq ) ;`
		1628	`}`
		1629	`case char_ampersand :`
		1630	`case char_asterix :`
		1631	`case char_minus :`
		1632	`case char_plus : {`
		1633	`update_column () ;`
		1634	`report ( crt_loc, ERR_lex_op_old_assign ( c, c ) ) ;`
		1635	`break ;`
		1636	`}`
		1637	`}`
		1638	`unread_char ( c ) ;`
		1639	`return ( lex_assign ) ;`
		1640	`}`
		1641
		1642	`case char_greater : {`
		1643	`/* Deal with '>', '>=', '>>' and '>>=' */`
		1644	`c = read_char () ;`
		1645	`if ( c == char_equal ) return ( lex_greater_Heq ) ;`
		1646	`if ( c == char_greater ) {`
		1647	`c = read_char () ;`
		1648	`if ( c == char_equal ) return ( lex_rshift_Heq ) ;`
		1649	`unread_char ( c ) ;`
		1650	`return ( lex_rshift ) ;`
		1651	`}`
		1652	`if ( c == char_question && allow_extra_symbols ) {`
		1653	`return ( lex_max ) ;`
		1654	`}`
		1655	`unread_char ( c ) ;`
		1656	`return ( lex_greater ) ;`
		1657	`}`
		1658
		1659	`case char_circum : {`
		1660	`/* Deal with '^' and '^=' */`
		1661	`c = read_char () ;`
		1662	`if ( c == char_equal ) return ( lex_xor_Heq_H1 ) ;`
		1663	`unread_char ( c ) ;`
		1664	`return ( lex_xor_H1 ) ;`
		1665	`}`
		1666
		1667	`case char_bar : {`
		1668	`/* Deal with '\|', '\|\|' and '\|=' */`
		1669	`c = read_char () ;`
		1670	`if ( c == char_bar ) return ( lex_logical_Hor_H1 ) ;`
		1671	`if ( c == char_equal ) return ( lex_or_Heq_H1 ) ;`
		1672	`unread_char ( c ) ;`
		1673	`return ( lex_or_H1 ) ;`
		1674	`}`
		1675
		1676	`case char_open_round : {`
		1677	`/* Deal with '(' */`
		1678	`return ( lex_open_Hround ) ;`
		1679	`}`
		1680
		1681	`case char_close_round : {`
		1682	`/* Deal with ')' */`
		1683	`return ( lex_close_Hround ) ;`
		1684	`}`
		1685
		1686	`case char_comma : {`
		1687	`/* Deal with ',' */`
		1688	`return ( lex_comma ) ;`
		1689	`}`
		1690
		1691	`case char_semicolon : {`
		1692	`/* Deal with ';' */`
		1693	`return ( lex_semicolon ) ;`
		1694	`}`
		1695
		1696	`case char_open_square : {`
		1697	`/* Deal with '[' */`
		1698	`return ( lex_open_Hsquare_H1 ) ;`
		1699	`}`
		1700
		1701	`case char_close_square : {`
		1702	`/* Deal with ']' */`
		1703	`return ( lex_close_Hsquare_H1 ) ;`
		1704	`}`
		1705
		1706	`case char_open_brace : {`
		1707	`/* Deal with '{' */`
		1708	`return ( lex_open_Hbrace_H1 ) ;`
		1709	`}`
		1710
		1711	`case char_close_brace : {`
		1712	`/* Deal with '}' */`
		1713	`return ( lex_close_Hbrace_H1 ) ;`
		1714	`}`
		1715
		1716	`case char_tilde : {`
		1717	`/* Deal with '~' */`
		1718	`return ( lex_compl_H1 ) ;`
		1719	`}`
		1720
		1721	`default : {`
		1722	`/* Anything else is an unknown character */`
		1723	`goto unknown_label ;`
		1724	`}`
		1725	`}`
		1726	`}`
		1727
		1728	`/* Read an identifier (calculating hash value on fly) */`
		1729	`if ( is_alpha ( t ) ) {`
		1730	`HASHID nm ;`
		1731	`LOCATION loc ;`
		1732	`BUFFER *bf = &token_buff ;`
		1733	`string s = bf->start ;`
		1734	`string se = bf->end ;`
		1735	`unsigned long h = ( unsigned long ) c ;`
		1736	`*( s++ ) = ( character ) c ;`
		1737
		1738	`/* Get the second character */`
		1739	`update_column () ;`
		1740	`loc = crt_loc ;`
		1741	`c = read_char () ;`
		1742	`#if FS_EXTENDED_CHAR`
		1743	`t = ( IS_EXTENDED ( c ) ? ILLEG : lookup_char ( c ) ) ;`
		1744	`#else`
		1745	`t = lookup_char ( c ) ;`
		1746	`#endif`
		1747	`if ( is_alphanum ( t ) ) {`
		1748	`/* Scan the third and subsequent characters */`
		1749	`do {`
		1750	`h = HASH_POWER * h + ( unsigned long ) c ;`
		1751	`*s = ( character ) c ;`
		1752	`if ( ++s == se ) {`
		1753	`s = extend_buffer ( bf, s ) ;`
		1754	`se = bf->end ;`
		1755	`}`
		1756	`c = read_char () ;`
		1757	`#if FS_EXTENDED_CHAR`
		1758	`if ( IS_EXTENDED ( c ) ) break ;`
		1759	`#endif`
		1760	`t = lookup_char ( c ) ;`
		1761	`} while ( is_alphanum ( t ) ) ;`
		1762	`} else {`
		1763	`/* Allow for wide strings and characters */`
		1764	`if ( h == char_L && is_symbol ( t ) ) {`
		1765	`if ( c == char_quote ) {`
		1766	`IGNORE read_string ( c, 1 ) ;`
		1767	`return ( lex_wstring_Hlit ) ;`
		1768	`}`
		1769	`if ( c == char_single_quote ) {`
		1770	`IGNORE read_string ( c, 1 ) ;`
		1771	`return ( lex_wchar_Hlit ) ;`
		1772	`}`
		1773	`}`
		1774	`/* Identifier of length one */`
		1775	`}`
		1776	`if ( c == char_backslash ) {`
		1777	`/* Allow for extended identifiers */`
		1778	`int ch = CHAR_NONE ;`
		1779	`int nextc = read_char () ;`
		1780	`unsigned long u = read_unicode ( nextc, &ch ) ;`
		1781	`if ( ch != CHAR_NONE ) {`
		1782	`bf->posn = s ;`
		1783	`nm = read_extended_id ( u, ch ) ;`
		1784	`goto identifier_label ;`
		1785	`}`
		1786	`}`
		1787	`unread_char ( c ) ;`
		1788	`se = s ;`
		1789	`*se = 0 ;`
		1790
		1791	`/* Look up the symbol in the hash table */`
		1792	`h %= HASH_SIZE ;`
		1793	`s = bf->start ;`
		1794	`nm = lookup_name ( s, h, 0, lex_unknown ) ;`
		1795	`identifier_label : {`
		1796	`IDENTIFIER id = DEREF_id ( hashid_id ( nm ) ) ;`
		1797	`while ( !IS_id_dummy ( id ) ) {`
		1798	`/* Scan to last hidden value */`
		1799	`id = DEREF_id ( id_alias ( id ) ) ;`
		1800	`}`
		1801	`COPY_loc ( id_loc ( id ), loc ) ;`
		1802	`}`
		1803	`token_hashid = nm ;`
		1804	`return ( lex_identifier ) ;`
		1805	`}`
		1806
		1807	`/* Read the first token in a line */`
		1808	`if ( c == char_return ) c = read_newline () ;`
		1809	`if ( c == char_newline ) {`
		1810	`newline_label : {`
		1811	`/* Re-entry point after C++ style comments */`
		1812	`crt_loc.line++ ;`
		1813	`crt_loc.column = 0 ;`
		1814	`input_crt = input_posn ;`
		1815	`crt_line_changed = 1 ;`
		1816	`crt_spaces = 0 ;`
		1817	`if ( in_preproc_dir == 1 ) {`
		1818	`in_preproc_dir = 0 ;`
		1819	`return ( lex_newline ) ;`
		1820	`}`
		1821	`}`
		1822	`start_line_label : {`
		1823	`/* Re-entry point after preprocessing directives */`
		1824	`column = 0 ;`
		1825	`for ( ; ; ) {`
		1826	`/* Step over any obvious spaces */`
		1827	`c = next_char () ;`
		1828	`if ( c == char_end ) c = refill_char () ;`
		1829	`if ( c == char_return ) c = read_newline () ;`
		1830	`if ( c == char_sub ) c = read_eof () ;`
		1831	`if ( c == char_newline ) {`
		1832	`crt_loc.line++ ;`
		1833	`crt_loc.column = 0 ;`
		1834	`input_crt = input_posn ;`
		1835	`crt_line_changed = 1 ;`
		1836	`crt_spaces = 0 ;`
		1837	`column = 0 ;`
		1838	`} else if ( c == char_eof ) {`
		1839	`/* Check for end of file (should start line) */`
		1840	`if ( column == 0 ) good_eof = 1 ;`
		1841	`goto eof_label ;`
		1842	`} else if ( c == char_space ) {`
		1843	`crt_spaces++ ;`
		1844	`column = 1 ;`
		1845	`} else if ( c == char_tab ) {`
		1846	`unsigned long tab = tab_width ;`
		1847	`crt_spaces = tab * ( crt_spaces / tab + 1 ) ;`
		1848	`column = 1 ;`
		1849	`} else {`
		1850	`#if FS_EXTENDED_CHAR`
		1851	`if ( IS_EXTENDED ( c ) ) {`
		1852	`t = ILLEG ;`
		1853	`break ;`
		1854	`}`
		1855	`#endif`
		1856	`t = lookup_char ( c ) ;`
		1857	`if ( is_white ( t ) ) {`
		1858	`if ( !is_newline ( t ) ) {`
		1859	`crt_spaces++ ;`
		1860	`column = 1 ;`
		1861	`}`
		1862	`} else {`
		1863	`break ;`
		1864	`}`
		1865	`}`
		1866	`}`
		1867	`/* c and t now hold the next character */`
		1868	`goto process_label ;`
		1869	`}`
		1870	`}`
		1871
		1872	`/* Read a pp-number */`
		1873	`if ( is_digit ( t ) ) {`
		1874	`number_label : {`
		1875	`int lastc ;`
		1876	`BUFFER *bf = &token_buff ;`
		1877	`string s = bf->start ;`
		1878	`string se = bf->end ;`
		1879	`if ( t == POINT ) {`
		1880	`/* t is set to POINT to indicate an initial '.' */`
		1881	`*( s++ ) = char_dot ;`
		1882	`}`
		1883	`digit_label : {`
		1884	`/* Step over alphanumeric characters and '.' */`
		1885	`do {`
		1886	`*s = ( character ) c ;`
		1887	`if ( ++s == se ) {`
		1888	`s = extend_buffer ( bf, s ) ;`
		1889	`se = bf->end ;`
		1890	`}`
		1891	`next_digit_label : {`
		1892	`lastc = c ;`
		1893	`c = read_char () ;`
		1894	`#if FS_EXTENDED_CHAR`
		1895	`if ( IS_EXTENDED ( c ) ) break ;`
		1896	`#endif`
		1897	`t = lookup_char ( c ) ;`
		1898	`}`
		1899	`} while ( is_ppdigit ( t ) ) ;`
		1900	`if ( c == char_plus \|\| c == char_minus ) {`
		1901	`/* Allow for [Ee][+-] */`
		1902	`if ( lastc == char_e \|\| lastc == char_E ) {`
		1903	`goto digit_label ;`
		1904	`}`
		1905	`}`
		1906	`if ( c == char_backslash ) {`
		1907	`/* Allow for unicode characters */`
		1908	`int ch = CHAR_NONE ;`
		1909	`int nextc = read_char () ;`
		1910	`unsigned long u = read_unicode ( nextc, &ch ) ;`
		1911	`if ( ch != CHAR_NONE ) {`
		1912	`bf->posn = s ;`
		1913	`print_char ( u, ch, 0, bf ) ;`
		1914	`s = bf->posn ;`
		1915	`se = bf->end ;`
		1916	`goto next_digit_label ;`
		1917	`}`
		1918	`}`
		1919	`}`
		1920	`*s = 0 ;`
		1921	`unread_char ( c ) ;`
		1922	`}`
		1923	`return ( lex_integer_Hlit ) ;`
		1924	`}`
		1925
		1926	`/* End of file marker */`
		1927	`if ( c == char_sub ) c = read_eof () ;`
		1928	`if ( c == char_eof ) {`
		1929	`eof_label : {`
		1930	`if ( in_preproc_dir != 0 ) return ( lex_eof ) ;`
		1931	`if ( !good_eof ) {`
		1932	`update_column () ;`
		1933	`report ( crt_loc, ERR_lex_phases_eof () ) ;`
		1934	`good_eof = 1 ;`
		1935	`}`
		1936	`if ( end_include ( preproc ) ) {`
		1937	`/* Revert to previous file */`
		1938	`good_eof = 0 ;`
		1939	`preproc = lex_ignore_token ;`
		1940	`goto start_line_label ;`
		1941	`}`
		1942	`}`
		1943	`/* End of main file */`
		1944	`return ( lex_eof ) ;`
		1945	`}`
		1946
		1947	`/* Unknown characters */`
		1948	`unknown_label : {`
		1949	`string s = token_buff.start ;`
		1950	`add_multi_char ( s, ( unsigned long ) c, CHAR_SIMPLE ) ;`
		1951	`}`
		1952	`return ( lex_unknown ) ;`
		1953	`}`
		1954
		1955
		1956	`/*`
		1957	`INITIALISE INPUT VARIABLES`
		1958
		1959	`This routine initialises the tables of character look-ups and the token`
		1960	`buffer.`
		1961	`*/`
		1962
		1963	`void init_char`
		1964	`PROTO_Z ()`
		1965	`{`
		1966	`int i ;`
		1967	`unsigned char p, q ;`
		1968
		1969	`/* Set native locale for multibyte characters */`
		1970	`#if FS_MULTIBYTE`
		1971	`if ( allow_multibyte ) IGNORE setlocale ( LC_CTYPE, "" ) ;`
		1972	`#endif`
		1973
		1974	`/* Allow for non-ASCII codesets */`
		1975	`map_ascii ( main_characters ) ;`
		1976	`map_ascii ( digit_values ) ;`
		1977	`map_ascii ( escape_sequences ) ;`
		1978
		1979	`/* Set up extra characters */`
		1980	`p = xmalloc_nof ( unsigned char, NO_CHAR ) ;`
		1981	`q = main_characters ;`
		1982	`copy_characters = p ;`
		1983	`for ( i = 0 ; i < NO_CHAR ; i++ ) ( p++ ) = ( q++ ) ;`
		1984
		1985	`/* Initialise token buffer */`
		1986	`token_buff.posn = extend_buffer ( &token_buff, token_buff.posn ) ;`
		1987	`return ;`
		1988	`}`
		1989
		1990
		1991	`/*`
		1992	`INITIALISE INPUT FILE READING`
		1993
		1994	`This routine initialises the lexical analysis routines in preparation`
		1995	`for parsing or preprocessing the current input file.`
		1996	`*/`
		1997
		1998	`void init_lex`
		1999	`PROTO_Z ()`
		2000	`{`
		2001	`/* Initialise file variables */`
		2002	`crt_buff_no = 0 ;`
		2003	`IGNORE init_buffer ( crt_buff_no ) ;`
		2004	`start_preproc_if () ;`
		2005	`preproc_loc = crt_loc ;`
		2006	`have_syntax_error = 0 ;`
		2007	`if ( do_header ) dump_start ( &crt_loc, NIL ( INCL_DIR ) ) ;`
		2008
		2009	`/* Deal with first start-up file */`
		2010	`open_startup () ;`
		2011
		2012	`/* Force processing to start at the beginning of a line */`
		2013	`unread_char ( char_newline ) ;`
		2014	`crt_loc.line-- ;`
		2015
		2016	`/* Initialise the parser */`
		2017	`init_parser ( NIL ( PPTOKEN ) ) ;`
		2018	`return ;`
		2019	`}`
		2020
		2021
		2022	`/*`
		2023	`PARSE INPUT FILE`
		2024
		2025	`This routine is the main entry point for the parsing of the current`
		2026	`input file.`
		2027	`*/`
		2028
		2029	`void process_file`
		2030	`PROTO_Z ()`
		2031	`{`
		2032	`init_lex () ;`
		2033	`ADVANCE_LEXER ;`
		2034	`parse_file ( NULL_type, dspec_none ) ;`
		2035	`return ;`
		2036	`}`

Subversion Repositories tendra.SVN

(root)//trunk/src/producers/common/parse/lex.c – Rev 2