WebSVN – tendra.SVN – Blame – /trunk/src/producers/common/parse/lex.c

Rev	Author	Line No.	Line
2	7u83	1	`/*`
7	7u83	2	`* Copyright (c) 2002-2005 The TenDRA Project <http://www.tendra.org/>.`
		3	`* All rights reserved.`
		4	`*`
		5	`* Redistribution and use in source and binary forms, with or without`
		6	`* modification, are permitted provided that the following conditions are met:`
		7	`*`
		8	`* 1. Redistributions of source code must retain the above copyright notice,`
		9	`* this list of conditions and the following disclaimer.`
		10	`* 2. Redistributions in binary form must reproduce the above copyright notice,`
		11	`* this list of conditions and the following disclaimer in the documentation`
		12	`* and/or other materials provided with the distribution.`
		13	`* 3. Neither the name of The TenDRA Project nor the names of its contributors`
		14	`* may be used to endorse or promote products derived from this software`
		15	`* without specific, prior written permission.`
		16	`*`
		17	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
		18	`* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,`
		19	`* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR`
		20	`* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR`
		21	`* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,`
		22	`* EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,`
		23	`* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;`
		24	`* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,`
		25	`* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR`
		26	`* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF`
		27	`* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
		28	`*`
		29	`* $Id$`
		30	`*/`
		31	`/*`
2	7u83	32	`Crown Copyright (c) 1997`
7	7u83	33
2	7u83	34	`This TenDRA(r) Computer Program is subject to Copyright`
		35	`owned by the United Kingdom Secretary of State for Defence`
		36	`acting through the Defence Evaluation and Research Agency`
		37	`(DERA). It is made available to Recipients with a`
		38	`royalty-free licence for its use, reproduction, transfer`
		39	`to other parties and amendment for any purpose not excluding`
		40	`product development provided that any such use et cetera`
		41	`shall be deemed to be acceptance of the following conditions:-`
7	7u83	42
2	7u83	43	`(1) Its Recipients shall ensure that this Notice is`
		44	`reproduced upon any copies or amended versions of it;`
7	7u83	45
2	7u83	46	`(2) Any amended version of it shall be clearly marked to`
		47	`show both the nature of and the organisation responsible`
		48	`for the relevant amendment or amendments;`
7	7u83	49
2	7u83	50	`(3) Its onward transfer from a recipient to another`
		51	`party shall be deemed to be that party's acceptance of`
		52	`these conditions;`
7	7u83	53
2	7u83	54	`(4) DERA gives no warranty or assurance as to its`
		55	`quality or suitability for any purpose and DERA accepts`
		56	`no liability whatsoever in relation to any use to which`
		57	`it may be put.`
		58	`*/`
		59
		60
		61	`#include "config.h"`
		62	`#include <limits.h>`
		63	`#if FS_MULTIBYTE`
		64	`#include <locale.h>`
		65	`#endif`
		66	`#include "c_types.h"`
		67	`#include "exp_ops.h"`
		68	`#include "hashid_ops.h"`
		69	`#include "id_ops.h"`
		70	`#include "member_ops.h"`
		71	`#include "str_ops.h"`
		72	`#include "error.h"`
		73	`#include "catalog.h"`
		74	`#include "option.h"`
		75	`#include "buffer.h"`
		76	`#include "char.h"`
		77	`#include "constant.h"`
		78	`#include "file.h"`
		79	`#include "dump.h"`
		80	`#include "hash.h"`
		81	`#include "lex.h"`
		82	`#include "literal.h"`
		83	`#include "macro.h"`
		84	`#include "parse.h"`
		85	`#include "pragma.h"`
		86	`#include "preproc.h"`
		87	`#include "print.h"`
		88	`#include "syntax.h"`
		89	`#include "ustring.h"`
		90	`#include "xalloc.h"`
		91
		92
		93	`/*`
		94	`PARSER OPTIONS`
		95
		96	`These flags control the behaviour of the parser and determine whether`
		97	`such features as trigraphs and digraphs are allowed.`
		98	`*/`
		99
7	7u83	100	`int allow_trigraphs = 1;`
		101	`int allow_digraphs = 1;`
		102	`int allow_unicodes = LANGUAGE_CPP;`
		103	`int allow_multibyte = 1;`
		104	`int allow_cpp_comments = LANGUAGE_CPP;`
		105	`int allow_dos_newline = 0;`
		106	`int allow_extra_symbols = 0;`
		107	`int allow_iso_keywords = LANGUAGE_CPP;`
		108	`int allow_newline_strings = 0;`
		109	`int analyse_comments = 1;`
		110	`unsigned long max_id_length = 1024;`
2	7u83	111
		112
		113	`/*`
		114	`TABLE OF SYMBOLS AND KEYWORDS`
		115
		116	`This table gives the mapping between lexical token numbers and the`
		117	`corresponding symbols and keywords. It is derived from the list of`
		118	`tokens in symbols.h.`
		119	`*/`
		120
7	7u83	121	`CONST char *token_names[] = {`
		122	`#define LEX_TOKEN(A, B, C) (B),`
2	7u83	123	`#include "symbols.h"`
		124	`#undef LEX_TOKEN`
7	7u83	125	`NULL`
		126	`};`
2	7u83	127
		128
		129	`/*`
		130	`TRANSLATION A LEXICAL TOKEN TO ITS PRIMARY FORM`
		131
		132	`This routine translates the alternative ISO keywords and digraphs`
		133	`into their primary form.`
		134	`*/`
		135
7	7u83	136	`int`
		137	`primary_form(int t)`
2	7u83	138	`{`
7	7u83	139	`int u = t;`
		140	`switch (u) {`
		141	`case lex_and_H2:`
		142	`u = lex_and_H1;`
		143	`break;`
		144	`case lex_and_Heq_H2:`
		145	`u = lex_and_Heq_H1;`
		146	`break;`
		147	`case lex_close_Hbrace_H2:`
		148	`u = lex_close_Hbrace_H1;`
		149	`break;`
		150	`case lex_close_Hsquare_H2:`
		151	`u = lex_close_Hsquare_H1;`
		152	`break;`
		153	`case lex_compl_H2:`
		154	`u = lex_compl_H1;`
		155	`break;`
		156	`case lex_hash_H2:`
		157	`u = lex_hash_H1;`
		158	`break;`
		159	`case lex_hash_Hhash_H2:`
		160	`u = lex_hash_Hhash_H1;`
		161	`break;`
		162	`case lex_logical_Hand_H2:`
		163	`u = lex_logical_Hand_H1;`
		164	`break;`
		165	`case lex_logical_Hor_H2:`
		166	`u = lex_logical_Hor_H1;`
		167	`break;`
		168	`case lex_not_H2:`
		169	`u = lex_not_H1;`
		170	`break;`
		171	`case lex_not_Heq_H2:`
		172	`u = lex_not_Heq_H1;`
		173	`break;`
		174	`case lex_open_Hbrace_H2:`
		175	`u = lex_open_Hbrace_H1;`
		176	`break;`
		177	`case lex_open_Hsquare_H2:`
		178	`u = lex_open_Hsquare_H1;`
		179	`break;`
		180	`case lex_or_H2:`
		181	`u = lex_or_H1;`
		182	`break;`
		183	`case lex_or_Heq_H2:`
		184	`u = lex_or_Heq_H1;`
		185	`break;`
		186	`case lex_xor_H2:`
		187	`u = lex_xor_H1;`
		188	`break;`
		189	`case lex_xor_Heq_H2:`
		190	`u = lex_xor_Heq_H1;`
		191	`break;`
		192	`}`
		193	`return(u);`
2	7u83	194	`}`
		195
		196
		197	`/*`
		198	`REPORT A DIGRAPH TOKEN`
		199
		200	`This routine reports the digraph t, returning the primary form of t.`
		201	`*/`
		202
7	7u83	203	`int`
		204	`get_digraph(int t)`
2	7u83	205	`{`
7	7u83	206	`int u = primary_form(t);`
		207	`if (u != t) {`
		208	`update_column();`
		209	`report(crt_loc, ERR_lex_digraph_replace(t, u));`
		210	`}`
		211	`return(u);`
2	7u83	212	`}`
		213
		214
		215	`/*`
		216	`CREATE A KEYWORD`
		217
		218	`This routine creates a keyword identifier with name nm and lexical`
		219	`token number key. The special case when key is lex_unknown is used`
		220	`to indicate a reserved identifier.`
		221	`*/`
		222
7	7u83	223	`IDENTIFIER`
		224	`make_keyword(HASHID nm, int key, IDENTIFIER id)`
2	7u83	225	`{`
7	7u83	226	`PTR(IDENTIFIER)ptr = hashid_id(nm);`
		227	`if (IS_NULL_id(id)) {`
		228	`/* Find keyword type */`
		229	`unsigned tag = id_keyword_tag;`
		230	`if (key >= FIRST_ISO_KEYWORD && key <= LAST_ISO_KEYWORD) {`
		231	`tag = id_iso_keyword_tag;`
		232	`} else if (key >= FIRST_SYMBOL && key <= LAST_SYMBOL) {`
		233	`tag = id_iso_keyword_tag;`
		234	`} else if (key == lex_unknown) {`
		235	`tag = id_reserved_tag;`
		236	`}`
		237
		238	`/* Create keyword identifier */`
		239	`MAKE_id_keyword_etc(tag, nm, dspec_none, NULL_nspace, crt_loc,`
		240	`id);`
		241	`COPY_ulong(id_no(id), (unsigned long)key);`
2	7u83	242	`}`
7	7u83	243	`COPY_id(hashid_cache(nm), NULL_id);`
		244	`if (do_keyword) {`
		245	`dump_declare(id, &crt_loc, 1);`
		246	`}`
2	7u83	247
7	7u83	248	`/* Add keyword to identifier meanings */`
		249	`for (;;) {`
		250	`IDENTIFIER pid = DEREF_id(ptr);`
		251	`switch (TAG_id(pid)) {`
		252	`case id_dummy_tag:`
		253	`case id_keyword_tag:`
		254	`case id_iso_keyword_tag:`
		255	`case id_reserved_tag:`
		256	`COPY_id(id_alias(id), pid);`
		257	`COPY_id(ptr, id);`
		258	`return(id);`
		259	`}`
		260	`ptr = id_alias(pid);`
2	7u83	261	`}`
7	7u83	262	`/* NOTREACHED */`
2	7u83	263	`}`
		264
		265
		266	`/*`
		267	`INITIALISE KEYWORDS`
		268
		269	`This routine initialises the hash table entries for the keywords.`
		270	`*/`
		271
7	7u83	272	`void`
		273	`init_keywords(void)`
2	7u83	274	`{`
7	7u83	275	`int key;`
2	7u83	276
7	7u83	277	`/* Set up keyword entries */`
		278	`for (key = FIRST_KEYWORD; key <= LAST_KEYWORD; key++) {`
		279	`int ext = 0;`
		280	`string keyword = token_name(key);`
		281	`unsigned long h = hash(keyword);`
		282	`if (keyword[0] == char_less) {`
		283	`ext = 1;`
		284	`}`
		285	`KEYWORD(key) = lookup_name(keyword, h, ext, key);`
		286	`}`
2	7u83	287
7	7u83	288	`/* Bring the C keywords into scope */`
		289	`for (key = FIRST_C_KEYWORD; key <= LAST_C_KEYWORD; key++) {`
		290	`HASHID nm = KEYWORD(key);`
		291	`IGNORE make_keyword(nm, key, NULL_id);`
		292	`}`
2	7u83	293
7	7u83	294	`/* Bring the C++ keywords into scope */`
		295	`for (key = FIRST_CPP_KEYWORD; key <= LAST_CPP_KEYWORD; key++) {`
		296	`HASHID nm = KEYWORD(key);`
2	7u83	297	`#if LANGUAGE_CPP`
7	7u83	298	`IGNORE make_keyword(nm, key, NULL_id);`
2	7u83	299	`#else`
7	7u83	300	`if (key != lex_wchar_Ht) {`
		301	`IGNORE make_keyword(nm, lex_unknown, NULL_id);`
		302	`}`
		303	`#endif`
2	7u83	304	`}`
		305
7	7u83	306	`/* Bring the ISO alternative keywords into scope */`
		307	`for (key = FIRST_ISO_KEYWORD; key <= LAST_ISO_KEYWORD; key++) {`
		308	`HASHID nm = KEYWORD(key);`
		309	`if (allow_iso_keywords) {`
		310	`IGNORE make_keyword(nm, key, NULL_id);`
		311	`} else {`
		312	`IGNORE make_keyword(nm, lex_unknown, NULL_id);`
		313	`}`
2	7u83	314	`}`
		315
7	7u83	316	`/* Find underlying dummy identifier for 'operator' */`
		317	`underlying_op = DEREF_id(hashid_id(KEYWORD(lex_operator)));`
		318	`underlying_op = underlying_id(underlying_op);`
		319	`return;`
2	7u83	320	`}`
		321
		322
		323	`/*`
		324	`ADJUST A CHARACTER FOR TRIGRAPHS`
		325
		326	`This routine is called after a question mark has been read from the`
		327	`input file to allow for trigraphs. It returns the trigraph replacement`
		328	`character or '?' if the following characters do not form a trigraph.`
		329	`*/`
		330
7	7u83	331	`static int`
		332	`adjust_trigraph(void)`
2	7u83	333	`{`
7	7u83	334	`if (allow_trigraphs) {`
		335	`int c = next_char();`
		336	`if (c == char_end) {`
		337	`c = refill_char();`
2	7u83	338	`}`
7	7u83	339	`if (c == char_question) {`
		340	`int d;`
		341	`c = next_char();`
		342	`if (c == char_end) {`
		343	`c = refill_char();`
		344	`}`
		345	`switch (c) {`
		346	`case char_close_round:`
		347	`/* Map '\?\?)' to ']' */`
		348	`d = char_close_square;`
		349	`break;`
		350	`case char_equal:`
		351	`/* Map '\?\?=' to '#' */`
		352	`d = char_hash;`
		353	`break;`
		354	`case char_exclaim:`
		355	`/* Map '\?\?!' to '\|' */`
		356	`d = char_bar;`
		357	`break;`
		358	`case char_greater:`
		359	`/* Map '\?\?>' to '}' */`
		360	`d = char_close_brace;`
		361	`break;`
		362	`case char_less:`
		363	`/* Map '\?\?<' to '{' */`
		364	`d = char_open_brace;`
		365	`break;`
		366	`case char_minus:`
		367	`/* Map '\?\?-' to '~' */`
		368	`d = char_tilde;`
		369	`break;`
		370	`case char_open_round:`
		371	`/* Map '\?\?(' to '[' */`
		372	`d = char_open_square;`
		373	`break;`
		374	`case char_single_quote:`
		375	`/* Map '\?\?\'' to '^' */`
		376	`d = char_circum;`
		377	`break;`
		378	`case char_slash:`
		379	`/* Map '\?\?/' to '\\' */`
		380	`d = char_backslash;`
		381	`break;`
		382	`default:`
		383	`/* Not a trigraph */`
		384	`unread_char(c);`
		385	`unread_char(char_question);`
		386	`return(char_question);`
		387	`}`
		388	`update_column();`
		389	`report(crt_loc, ERR_lex_trigraph_replace(c, d));`
		390	`return(d);`
		391	`} else {`
		392	`/* Not a trigraph */`
		393	`unread_char(c);`
2	7u83	394	`}`
		395	`}`
7	7u83	396	`return(char_question);`
2	7u83	397	`}`
		398
		399
		400	`/*`
		401	`READ A NEWLINE CHARACTER`
		402
		403	`This routine is called after each carriage return character, checking`
		404	`for a following newline character.`
		405	`*/`
		406
7	7u83	407	`static int`
		408	`read_newline(void)`
2	7u83	409	`{`
7	7u83	410	`if (allow_dos_newline) {`
		411	`int c = next_char();`
		412	`if (c == char_end) {`
		413	`c = refill_char();`
		414	`}`
		415	`if (c == char_newline) {`
		416	`return(c);`
		417	`}`
		418	`unread_char(c);`
		419	`}`
		420	`return(char_return);`
2	7u83	421	`}`
		422
		423
		424	`/*`
		425	`READ AN END OF FILE CHARACTER`
		426
		427	`This routine is called after each terminate character, checking for`
		428	`a following end of file character.`
		429	`*/`
		430
7	7u83	431	`static int`
		432	`read_eof(void)`
2	7u83	433	`{`
7	7u83	434	`if (allow_dos_newline) {`
		435	`int c = next_char();`
		436	`if (c == char_end) {`
		437	`c = refill_char();`
		438	`}`
		439	`if (c == char_eof) {`
		440	`return(c);`
		441	`}`
		442	`unread_char(c);`
		443	`}`
		444	`return(char_sub);`
2	7u83	445	`}`
		446
		447
		448	`/*`
		449	`READ THE NEXT CHARACTER ALLOWING FOR TRIGRAPHS ETC.`
		450
		451	`This routine reads the next character from the input file, adjusting`
		452	`it as necessary for trigraphs and escaped newlines. This routine`
		453	`corresponds to phases 1 and 2 of the phases of translation.`
		454	`*/`
		455
7	7u83	456	`static int`
		457	`read_char(void)`
2	7u83	458	`{`
7	7u83	459	`for (;;) {`
		460	`int c = next_char();`
		461	`if (c == char_end) {`
		462	`c = refill_char();`
		463	`}`
		464	`if (c == char_question) {`
		465	`c = adjust_trigraph();`
		466	`}`
		467	`if (c != char_backslash) {`
		468	`/* Not an escaped newline */`
		469	`return(c);`
		470	`}`
		471	`c = next_char();`
		472	`if (c == char_end) {`
		473	`c = refill_char();`
		474	`}`
		475	`if (c == char_return) {`
		476	`c = read_newline();`
		477	`}`
		478	`if (c != char_newline) {`
		479	`/* Not an escaped newline */`
		480	`unread_char(c);`
		481	`return(char_backslash);`
		482	`}`
		483	`crt_loc.line++;`
		484	`crt_loc.column = 0;`
		485	`input_crt = input_posn;`
2	7u83	486	`}`
7	7u83	487	`/* NOTREACHED */`
2	7u83	488	`}`
		489
		490
		491	`/*`
		492	`CHARACTER LOOK-UP TABLE`
		493
		494	`This look-up table gives the various character types. Note that the`
		495	`default look-up table is for ASCII, for other codesets the table`
		496	`needs to be rewritten. The only really interesting points in the`
		497	`table itself are that newline has not been classified as a white-space`
		498	`and that character char_eof (-1) represents end of file.`
		499	`*/`
		500
		501	`#define SPACE_M 0x01`
		502	`#define ALPHA_M 0x02`
		503	`#define DIGIT_M 0x04`
		504	`#define ALNUM_M 0x08`
		505	`#define PPDIG_M 0x10`
		506	`#define SYMBL_M 0x20`
		507	`#define NLINE_M 0x40`
		508	`#define LEGAL_M 0x80`
		509
		510	`#define ILLEG 0x00`
		511	`#define LEGAL LEGAL_M`
7	7u83	512	`#define SPACE (SPACE_M \| LEGAL_M)`
		513	`#define ALPHA (ALPHA_M \| ALNUM_M \| PPDIG_M \| LEGAL_M)`
		514	`#define DIGIT (DIGIT_M \| ALNUM_M \| PPDIG_M \| LEGAL_M)`
		515	`#define SYMBL (SYMBL_M \| LEGAL_M)`
		516	`#define POINT (PPDIG_M \| SYMBL_M \| LEGAL_M)`
		517	`#define NLINE (NLINE_M \| LEGAL_M)`
2	7u83	518
7	7u83	519	`#define main_characters (characters + 1)`
		520	`#define lookup_char(C) ((int)main_characters[C])`
		521	`#define is_white(T) ((T) & SPACE_M)`
		522	`#define is_alpha(T) ((T) & ALPHA_M)`
		523	`#define is_digit(T) ((T) & DIGIT_M)`
		524	`#define is_alphanum(T) ((T) & ALNUM_M)`
		525	`#define is_ppdigit(T) ((T) & PPDIG_M)`
		526	`#define is_symbol(T) ((T) & SYMBL_M)`
		527	`#define is_newline(T) ((T) & NLINE_M)`
		528	`#define is_legal(T) ((T) & LEGAL_M)`
2	7u83	529
7	7u83	530	`static unsigned char characters[NO_CHAR + 2] = {`
		531	`LEGAL, /* EOF */`
		532	`#define CHAR_DATA(A, B, C, D) (A),`
2	7u83	533	`#include "char.h"`
		534	`#undef CHAR_DATA`
7	7u83	535	`ILLEG /* dummy */`
		536	`};`
2	7u83	537
7	7u83	538	`static unsigned char *copy_characters = main_characters;`
2	7u83	539
		540
		541	`/*`
		542	`SET A CHARACTER LOOK-UP`
		543
		544	`This routine sets the look-up value for character a to be equal to`
		545	`the underlying value for character b. As a special case, setting`
		546	`the look-up for a carriage return to that for newline enables`
		547	`DOS-like rules on newline and end of file characters.`
		548	`*/`
		549
7	7u83	550	`void`
		551	`set_char_lookup(int a, int b)`
2	7u83	552	`{`
7	7u83	553	`if (a >= 0 && a < NO_CHAR && b >= 0 && b < NO_CHAR) {`
		554	`unsigned char t = copy_characters[b];`
		555	`if (a == char_return) {`
		556	`if (b == char_newline) {`
		557	`/* Set DOS-like newline rules */`
		558	`allow_dos_newline = 1;`
		559	`return;`
		560	`}`
		561	`if (b == char_return) {`
		562	`/* Unset DOS-like newline rules */`
		563	`allow_dos_newline = 0;`
		564	`}`
		565	`}`
		566	`main_characters[a] = t;`
2	7u83	567	`}`
7	7u83	568	`return;`
2	7u83	569	`}`
		570
		571
		572	`/*`
		573	`SET A NUMBER OF CHARACTER LOOK-UPS`
		574
		575	`This routine sets the character look-ups for all the elements of the`
		576	`string or character literal expression a to be equal to that for the`
		577	`character literal expression b. If b is the null expression then`
		578	`the look-up is set to be an illegal character.`
		579	`*/`
		580
7	7u83	581	`void`
		582	`set_character(EXP a, EXP b)`
2	7u83	583	`{`
7	7u83	584	`int c = get_char_value(b);`
		585	`if (IS_exp_string_lit(a)) {`
		586	`STRING s = DEREF_str(exp_string_lit_str(a));`
		587	`unsigned long n = DEREF_ulong(str_simple_len(s));`
		588	`string t = DEREF_string(str_simple_text(s));`
		589	`unsigned kind = DEREF_unsigned(str_simple_kind(s));`
		590	`if (kind & STRING_MULTI) {`
		591	`while (n) {`
		592	`int ch = CHAR_SIMPLE;`
		593	`unsigned long d = get_multi_char(t, &ch);`
		594	`if (d < (unsigned long)NO_CHAR) {`
		595	`set_char_lookup((int)d, c);`
		596	`}`
		597	`t += MULTI_WIDTH;`
		598	`n--;`
		599	`}`
		600	`} else {`
		601	`while (n) {`
		602	`int d = (int)*t;`
		603	`set_char_lookup(d, c);`
		604	`t++;`
		605	`n--;`
		606	`}`
2	7u83	607	`}`
		608	`} else {`
7	7u83	609	`int d = get_char_value(a);`
		610	`if (d != char_illegal) {`
		611	`set_char_lookup(d, c);`
		612	`}`
2	7u83	613	`}`
7	7u83	614	`return;`
2	7u83	615	`}`
		616
		617
		618	`/*`
		619	`CHECK FOR WHITE SPACE CHARACTERS`
		620
		621	`This routine checks whether the character a represents a white space.`
		622	`The newline character constitutes a special case.`
		623	`*/`
		624
7	7u83	625	`int`
		626	`is_white_char(unsigned long a)`
2	7u83	627	`{`
7	7u83	628	`int t;`
		629	`if (a >= NO_CHAR) {`
		630	`return(0);`
		631	`}`
		632	`t = lookup_char(a);`
		633	`return(is_white(t) \|\| is_newline(t));`
2	7u83	634	`}`
		635
		636
		637	`/*`
		638	`CHECK FOR ALPHABETIC CHARACTERS`
		639
		640	`This routine checks whether the character a represents an alphabetic`
		641	`character.`
		642	`*/`
		643
7	7u83	644	`int`
		645	`is_alpha_char(unsigned long a)`
2	7u83	646	`{`
7	7u83	647	`if (a >= NO_CHAR) {`
		648	`return(0);`
		649	`}`
		650	`return(is_alpha(lookup_char(a)));`
2	7u83	651	`}`
		652
		653
		654	`/*`
		655	`CHECK FOR LEGAL CHARACTERS`
		656
		657	`This routine checks whether the character a represents a legal character.`
		658	`*/`
		659
7	7u83	660	`int`
		661	`is_legal_char(unsigned long a)`
2	7u83	662	`{`
7	7u83	663	`if (a >= NO_CHAR) {`
		664	`return(0);`
		665	`}`
		666	`return(is_legal(lookup_char(a)));`
2	7u83	667	`}`
		668
		669
		670	`/*`
		671	`PEEK AHEAD ONE CHARACTER`
		672
		673	`This routine tests whether the next character is a (which will not be`
		674	`newline). If so the current character is advanced one, otherwise it`
		675	`is left unchanged. legal is set to false if the next character is`
		676	`not legal.`
		677	`*/`
		678
7	7u83	679	`int`
		680	`peek_char(int a, int *legal)`
2	7u83	681	`{`
7	7u83	682	`int c = read_char();`
		683	`ASSERT(a != char_newline);`
		684	`if (c == a) {`
		685	`return(1);`
		686	`}`
		687	`*legal = is_legal_char((unsigned long)c);`
		688	`unread_char(c);`
		689	`return(0);`
2	7u83	690	`}`
		691
		692
		693	`/*`
		694	`TOKEN BUFFER`
		695
		696	`This buffer is used by read_token to hold the values of identifiers,`
		697	`numbers and strings.`
		698	`*/`
		699
7	7u83	700	`BUFFER token_buff = NULL_buff;`
2	7u83	701
		702
		703	`/*`
		704	`TOKEN IDENTIFICATION MACROS`
		705
		706	`These macros are used to identify the start or end of certain tokens`
		707	`such as comments and strings.`
		708	`*/`
		709
7	7u83	710	`#define START_COMMENT(A) ((A) == char_asterix)`
		711	`#define END_COMMENT(A, B) ((A) == char_asterix && (B) == char_slash)`
		712	`#define START_CPP_COMMENT(A) ((A) == char_slash && allow_cpp_comments)`
		713	`#define END_CPP_COMMENT(A) ((A) == char_newline)`
		714	`#define START_STRING(A) ((A) == char_quote \|\| (A) == char_single_quote)`
		715	`#define END_STRING(A, Q) ((A) == (Q))`
2	7u83	716
		717
		718	`/*`
		719	`END OF FILE FLAG`
		720
		721	`Each source file should end in a newline character, which is not`
		722	`preceded by a backspace. This flag is used to indicate whether the`
		723	`end of the present file has the correct form.`
		724	`*/`
		725
7	7u83	726	`static int good_eof = 0;`
2	7u83	727
		728
		729	`/*`
		730	`SKIP A STRING`
		731
		732	`This routine skips a string or character literal. It is entered after`
		733	`the initial quote, q, has been read. Escape sequences are always`
		734	`allowed. The routine returns lex_string_Hlit if the string terminates`
		735	`correctly and lex_eof otherwise.`
		736	`*/`
		737
7	7u83	738	`static int`
		739	`skip_string(int q)`
2	7u83	740	`{`
7	7u83	741	`int e = q;`
		742	`LOCATION loc;`
		743	`unsigned nl = 0;`
		744	`int escaped = 0;`
		745	`int have_char = 0;`
		746	`int allow_nl = allow_newline_strings;`
		747	`if (e == char_single_quote \|\| in_preproc_dir == 1) {`
		748	`allow_nl = 0;`
		749	`}`
		750	`update_column();`
		751	`loc = crt_loc;`
2	7u83	752
7	7u83	753	`/* Scan to end of string */`
		754	`for (;;) {`
		755	`int c = read_char();`
		756	`if (END_STRING(c, e) && !escaped) {`
		757	`if (e == char_single_quote && !have_char) {`
		758	`update_column();`
		759	`report(crt_loc, ERR_lex_ccon_empty());`
		760	`}`
		761	`if (nl) {`
		762	`report(loc, ERR_lex_string_nl(nl, nl));`
		763	`}`
		764	`return(lex_string_Hlit);`
		765	`}`
		766	`if (c == char_newline) {`
		767	`if (allow_nl) {`
		768	`/* Report newlines but continue */`
		769	`crt_loc.line++;`
		770	`crt_loc.column = 0;`
		771	`input_crt = input_posn;`
		772	`nl++;`
		773	`} else {`
		774	`unread_char(c);`
		775	`update_column();`
		776	`report(crt_loc, ERR_lex_string_pp_nl());`
		777	`break;`
		778	`}`
		779	`} else if (c == char_eof) {`
		780	`report(loc, ERR_lex_phases_str_eof());`
		781	`good_eof = 1;`
		782	`nl = 0;`
		783	`break;`
		784	`}`
		785	`if (escaped) {`
		786	`escaped = 0;`
		787	`} else {`
		788	`if (c == char_backslash) {`
		789	`escaped = 1;`
		790	`}`
		791	`}`
		792	`if (!escaped) {`
		793	`have_char = 1;`
		794	`}`
2	7u83	795	`}`
7	7u83	796	`if (nl) {`
		797	`/* Report newlines in string */`
		798	`report(loc, ERR_lex_string_nl(nl, nl));`
2	7u83	799	`} else {`
7	7u83	800	`/* Don't bother with error recovery */`
		801	`/* EMPTY */`
2	7u83	802	`}`
7	7u83	803	`return(lex_eof);`
2	7u83	804	`}`
		805
		806
		807	`/*`
		808	`READ THE BODY OF A STRING`
		809
		810	`This routine reads the body of a string or character literal or of a`
		811	`header name. It is entered after the initial quote has been read.`
		812	`The corresponding close quote is passed in as q. The esc argument`
		813	`indicates whether escape sequences are allowed (they are not in`
		814	`header names for example). The string itself is built up in`
		815	`token_buff. The routine returns lex_string_Hlit if the string`
		816	`terminates correctly and lex_eof otherwise. It also sets`
		817	`token_buff.posn to point to the end of the string.`
		818	`*/`
		819
7	7u83	820	`int`
		821	`read_string(int q, int esc)`
2	7u83	822	`{`
7	7u83	823	`int c;`
		824	`int e = q;`
		825	`LOCATION loc;`
		826	`long posn = -1;`
		827	`int escaped = 0;`
		828	`unsigned nl = 0;`
		829	`int have_char = 0;`
		830	`string s = token_buff.start;`
		831	`string se = token_buff.end;`
		832	`int allow_nl = allow_newline_strings;`
		833	`update_column();`
		834	`if (e == char_single_quote) {`
		835	`posn = tell_buffer(crt_buff_no);`
		836	`allow_nl = 0;`
		837	`} else if (in_preproc_dir == 1) {`
		838	`allow_nl = 0;`
		839	`}`
		840	`loc = crt_loc;`
2	7u83	841
7	7u83	842	`/* Scan the string */`
		843	`for (;;) {`
		844	`c = read_char();`
		845	`if (END_STRING(c, e) && !escaped) {`
		846	`if (e == char_single_quote && !have_char) {`
		847	`update_column();`
		848	`report(crt_loc, ERR_lex_ccon_empty());`
		849	`}`
		850	`if (nl) {`
		851	`report(loc, ERR_lex_string_nl(nl, nl));`
		852	`}`
		853	`token_buff.posn = s;`
		854	`*s = 0;`
		855	`return(lex_string_Hlit);`
		856	`}`
		857	`if (c == char_newline) {`
		858	`if (allow_nl) {`
		859	`/* Report newlines but continue */`
		860	`crt_loc.line++;`
		861	`crt_loc.column = 0;`
		862	`input_crt = input_posn;`
		863	`nl++;`
		864	`} else {`
		865	`unread_char(c);`
		866	`update_column();`
		867	`if (e == char_greater) {`
		868	`/* Header name */`
		869	`report(crt_loc,`
		870	`ERR_cpp_include_incompl());`
		871	`} else {`
		872	`report(crt_loc, ERR_lex_string_pp_nl());`
		873	`}`
		874	`break;`
		875	`}`
		876	`} else if (c == char_eof) {`
		877	`report(loc, ERR_lex_phases_str_eof());`
		878	`good_eof = 1;`
		879	`nl = 0;`
		880	`break;`
		881	`}`
		882	`*s = (character)c;`
		883	`if (++s == se) {`
		884	`s = extend_buffer(&token_buff, s);`
		885	`se = token_buff.end;`
		886	`}`
		887	`if (escaped) {`
		888	`escaped = 0;`
2	7u83	889	`} else {`
7	7u83	890	`if (c == char_backslash) {`
		891	`escaped = esc;`
		892	`}`
2	7u83	893	`}`
7	7u83	894	`if (!escaped)have_char = 1;`
2	7u83	895	`}`
7	7u83	896	`if (nl) {`
		897	`/* Report newlines in string */`
		898	`report(loc, ERR_lex_string_nl(nl, nl));`
2	7u83	899	`} else {`
7	7u83	900	`/* Error recovery */`
		901	`if (e == char_single_quote && have_char) {`
		902	`seek_buffer(crt_buff_no, posn, 1);`
		903	`crt_loc = loc;`
		904	`s = token_buff.start;`
		905	`c = read_char();`
		906	`*(s++) = (character)c;`
		907	`if (c == char_backslash && esc) {`
		908	`c = read_char();`
		909	`*(s++) = (character)c;`
		910	`}`
		911	`}`
2	7u83	912	`}`
7	7u83	913	`token_buff.posn = s;`
		914	`*s = 0;`
		915	`return(lex_eof);`
2	7u83	916	`}`
		917
		918
		919	`/*`
		920	`SKIP A C STYLE COMMENT`
		921
		922	`This routine skips a C style comment, returning lex_ignore_token if`
		923	`the comment is terminated correctly and lex_eof otherwise. It is`
		924	`entered after the first two characters comprising the comment start`
		925	`have been read. If keep is true then the comment text is built up`
		926	`in token_buff, otherwise it is discarded.`
		927	`*/`
		928
7	7u83	929	`static int`
		930	`skip_comment(int keep)`
2	7u83	931	`{`
7	7u83	932	`int c = 0;`
		933	`int lastc;`
		934	`string s, se;`
		935	`LOCATION loc;`
		936	`update_column();`
		937	`loc = crt_loc;`
		938	`if (keep) {`
		939	`s = token_buff.start;`
		940	`se = token_buff.end;`
		941	`} else {`
		942	`s = NULL;`
		943	`se = NULL;`
		944	`}`
		945	`do {`
		946	`lastc = c;`
		947	`read_label:`
		948	`/* Inlined version of read_char */`
		949	`c = next_char();`
		950	`if (c == char_end) {`
		951	`c = refill_char();`
2	7u83	952	`}`
7	7u83	953	`if (c == char_question) {`
		954	`c = adjust_trigraph();`
2	7u83	955	`}`
7	7u83	956	`if (c == char_backslash) {`
		957	`c = next_char();`
		958	`if (c == char_end) {`
		959	`c = refill_char();`
		960	`}`
		961	`if (c == char_return) {`
		962	`c = read_newline();`
		963	`}`
		964	`if (c == char_newline) {`
		965	`/* Allow for escaped newlines */`
		966	`crt_loc.line++;`
		967	`crt_loc.column = 0;`
		968	`input_crt = input_posn;`
		969	`goto read_label;`
		970	`}`
		971	`unread_char(c);`
		972	`c = char_backslash;`
		973	`} else if (c == char_newline) {`
		974	`/* New line characters */`
		975	`crt_loc.line++;`
		976	`crt_loc.column = 0;`
		977	`input_crt = input_posn;`
		978	`crt_line_changed = 1;`
		979	`crt_spaces = 0;`
		980	`} else if (c == char_eof) {`
		981	`/* End of file characters */`
		982	`report(loc, ERR_lex_phases_comm_eof());`
		983	`good_eof = 1;`
		984	`if (s) {`
		985	`token_buff.posn = s;`
		986	`*s = 0;`
		987	`}`
		988	`return(lex_eof);`
		989	`} else if (c == char_asterix && lastc == char_slash) {`
		990	`/* Nested comments */`
		991	`update_column();`
		992	`report(crt_loc, ERR_lex_comment_nest());`
2	7u83	993	`}`
7	7u83	994	`if (s) {`
		995	`*s = (character)c;`
		996	`if (++s == se) {`
		997	`s = extend_buffer(&token_buff, s);`
		998	`se = token_buff.end;`
		999	`}`
		1000	`}`
		1001	`} while (!END_COMMENT(lastc, c));`
		1002	`if (s) {`
		1003	`s -= 2;`
		1004	`token_buff.posn = s;`
		1005	`*s = 0;`
2	7u83	1006	`}`
7	7u83	1007	`crt_spaces++;`
		1008	`return(lex_ignore_token);`
2	7u83	1009	`}`
		1010
		1011
		1012	`/*`
		1013	`SKIP A C++ STYLE COMMENT`
		1014
		1015	`This routine skips a C++ style comment, returning lex_ignore_token`
		1016	`if the comment terminates correctly and lex_eof otherwise. It is`
		1017	`entered after the first two characters comprising the comment start`
		1018	`have been read. The next token read after the comment will be the`
		1019	`terminating newline. If keep is true then the comment text is built`
		1020	`up in token_buff, otherwise it is discarded.`
		1021	`*/`
		1022
7	7u83	1023	`static int`
		1024	`skip_cpp_comment(int keep)`
2	7u83	1025	`{`
7	7u83	1026	`int c;`
		1027	`string s, se;`
		1028	`if (keep) {`
		1029	`s = token_buff.start;`
		1030	`se = token_buff.end;`
		1031	`} else {`
		1032	`s = NULL;`
		1033	`se = NULL;`
		1034	`}`
		1035	`do {`
		1036	`read_label:`
		1037	`/* Inlined version of read_char */`
		1038	`c = next_char();`
		1039	`if (c == char_end) {`
		1040	`c = refill_char();`
2	7u83	1041	`}`
7	7u83	1042	`if (c == char_question) {`
		1043	`c = adjust_trigraph();`
2	7u83	1044	`}`
7	7u83	1045	`if (c == char_backslash) {`
		1046	`c = next_char();`
		1047	`if (c == char_end) {`
		1048	`c = refill_char();`
		1049	`}`
		1050	`if (c == char_return) {`
		1051	`c = read_newline();`
		1052	`}`
		1053	`if (c == char_newline) {`
		1054	`/* Allow for escaped newlines */`
		1055	`crt_loc.line++;`
		1056	`crt_loc.column = 0;`
		1057	`input_crt = input_posn;`
		1058	`goto read_label;`
		1059	`}`
		1060	`unread_char(c);`
		1061	`c = char_backslash;`
		1062	`} else if (c == char_eof) {`
		1063	`/* End of file characters */`
		1064	`update_column();`
		1065	`report(crt_loc, ERR_lex_phases_comm_eof());`
		1066	`good_eof = 1;`
		1067	`if (s) {`
		1068	`token_buff.posn = s;`
		1069	`*s = 0;`
		1070	`}`
		1071	`return(lex_eof);`
2	7u83	1072	`}`
7	7u83	1073	`if (s) {`
		1074	`*s = (character)c;`
		1075	`if (++s == se) {`
		1076	`s = extend_buffer(&token_buff, s);`
		1077	`se = token_buff.end;`
		1078	`}`
		1079	`}`
		1080	`} while (!END_CPP_COMMENT(c));`
		1081	`unread_char(c);`
		1082	`if (s) {`
		1083	`s -= 1;`
		1084	`token_buff.posn = s;`
		1085	`*s = 0;`
2	7u83	1086	`}`
7	7u83	1087	`crt_line_changed = 1;`
		1088	`crt_spaces = 0;`
		1089	`return(lex_ignore_token);`
2	7u83	1090	`}`
		1091
		1092
		1093	`/*`
		1094	`SKIP WHITE-SPACE CHARACTERS`
		1095
		1096	`This routine skips any white-space characters (including comments).`
		1097	`Newline characters are treated as white-space only if nl is true.`
		1098	`The result is a bitpattern formed from the components:`
		1099
		1100	`WHITE_SPACE for white-space characters;`
		1101	`WHITE_NEWLINE for newline characters;`
		1102	`WHITE_ESC_NEWLINE for escaped newlines;`
		1103
		1104	`the result being reset to WHITE_NEWLINE after each newline. Note that`
		1105	`trigraphs and escaped newlines are treated by hand. The effect of this`
		1106	`routine is that all non-empty sequences of white-space characters other`
		1107	`than newlines are treated as if they were a single space (the C/C++`
		1108	`specification says that this is implementation-defined).`
		1109	`*/`
		1110
7	7u83	1111	`unsigned long`
		1112	`skip_white(int nl)`
2	7u83	1113	`{`
7	7u83	1114	`int c;`
		1115	`unsigned long sp = 0;`
		1116	`for (;;) {`
		1117	`c = next_char();`
		1118	`if (c == char_end) {`
		1119	`c = refill_char();`
		1120	`}`
		1121	`if (c == char_return) {`
		1122	`c = read_newline();`
		1123	`}`
		1124	`if (c == char_sub) {`
		1125	`c = read_eof();`
		1126	`}`
		1127	`if (c == char_newline) {`
		1128	`/* Deal with newline characters */`
		1129	`if (!nl) {`
		1130	`break;`
		1131	`}`
		1132	`sp = WHITE_NEWLINE;`
		1133	`crt_loc.line++;`
		1134	`crt_loc.column = 0;`
		1135	`input_crt = input_posn;`
		1136	`crt_line_changed = 1;`
		1137	`crt_spaces = 0;`
		1138	`} else if (c == char_space) {`
		1139	`/* Deal with simple spaces */`
		1140	`sp \|= WHITE_SPACE;`
		1141	`crt_spaces++;`
		1142	`} else if (c == char_tab) {`
		1143	`/* Deal with tab characters */`
		1144	`unsigned long tab = tab_width;`
		1145	`sp \|= WHITE_SPACE;`
		1146	`crt_spaces = tab *(crt_spaces / tab + 1);`
		1147	`} else if (c == char_eof) {`
		1148	`/* End of file */`
		1149	`if (sp == WHITE_NEWLINE) {`
		1150	`good_eof = 1;`
		1151	`}`
		1152	`break;`
		1153	`} else {`
		1154	`int t;`
2	7u83	1155	`#if FS_EXTENDED_CHAR`
7	7u83	1156	`if (IS_EXTENDED(c)) {`
		1157	`break;`
		1158	`}`
2	7u83	1159	`#endif`
7	7u83	1160	`t = lookup_char(c);`
		1161	`if (is_white(t)) {`
		1162	`/* Deal with other white space characters */`
		1163	`sp \|= WHITE_SPACE;`
		1164	`crt_spaces++;`
		1165	`} else {`
		1166	`if (c == char_question)c = adjust_trigraph();`
		1167	`if (c == char_slash) {`
		1168	`/* Deal with comments */`
		1169	`int b = read_char();`
		1170	`if (START_COMMENT(b)) {`
		1171	`sp \|= WHITE_SPACE;`
		1172	`b = skip_comment(0);`
		1173	`if (b == lex_eof) {`
		1174	`return(sp);`
		1175	`}`
		1176	`} else if (START_CPP_COMMENT(b)) {`
		1177	`sp \|= WHITE_SPACE;`
		1178	`b = skip_cpp_comment(0);`
		1179	`if (b == lex_eof) {`
		1180	`return(sp);`
		1181	`}`
		1182	`if (!nl) {`
		1183	`return(sp);`
		1184	`}`
		1185	`} else {`
		1186	`unread_char(b);`
		1187	`break;`
		1188	`}`
		1189	`} else if (c == char_backslash) {`
		1190	`/* Deal with escaped newlines */`
		1191	`int b = next_char();`
		1192	`if (b == char_end) {`
		1193	`b = refill_char();`
		1194	`}`
		1195	`if (b == char_return) {`
		1196	`b = read_newline();`
		1197	`}`
		1198	`if (b == char_newline) {`
		1199	`crt_loc.line++;`
		1200	`crt_loc.column = 0;`
		1201	`input_crt = input_posn;`
		1202	`} else {`
		1203	`unread_char(b);`
		1204	`break;`
		1205	`}`
		1206	`sp \|= WHITE_ESC_NEWLINE;`
		1207	`} else {`
		1208	`break;`
		1209	`}`
		1210	`}`
2	7u83	1211	`}`
		1212	`}`
7	7u83	1213	`unread_char(c);`
		1214	`return(sp);`
2	7u83	1215	`}`
		1216
		1217
		1218	`/*`
		1219	`PATCH UP WHITE-SPACE CHARACTERS`
		1220
		1221	`Calling skip_white ( 1 ) can mess up the parser as regards spotting`
		1222	`preprocessing directives and valid end of file markers. This routine`
		1223	`may be called with the return value of skip_white as an argument to`
		1224	`patch up the buffer in order to get the parser back into the right`
		1225	`state.`
		1226	`*/`
		1227
7	7u83	1228	`void`
		1229	`patch_white(unsigned long sp)`
2	7u83	1230	`{`
7	7u83	1231	`if (sp & WHITE_NEWLINE) {`
		1232	`if (sp & WHITE_SPACE) {`
		1233	`/* Patch in a space after a newline */`
		1234	`unsigned long n;`
		1235	`update_column();`
		1236	`n = crt_loc.column;`
		1237	`while (n) {`
		1238	`unread_char(char_space);`
		1239	`if (input_posn <= input_start) {`
		1240	`break;`
		1241	`}`
		1242	`n--;`
		1243	`}`
		1244	`} else if (sp & WHITE_ESC_NEWLINE) {`
		1245	`/* Patch in an escaped newline after a newline */`
		1246	`unread_char(char_backslash);`
		1247	`unread_char(char_newline);`
		1248	`crt_loc.line--;`
		1249	`}`
		1250	`/* Patch in a newline */`
		1251	`unread_char(char_newline);`
		1252	`crt_loc.line--;`
		1253	`crt_loc.column = 0;`
		1254	`crt_spaces = 0;`
2	7u83	1255	`}`
7	7u83	1256	`return;`
2	7u83	1257	`}`
		1258
		1259
		1260	`/*`
		1261	`SKIP TO END OF LINE`
		1262
		1263	`This routine skips to the end of the current line. It returns 0 if`
		1264	`only white-space characters are encountered. It uses skip_white to`
		1265	`jump over white-space (including comments).`
		1266	`*/`
		1267
7	7u83	1268	`int`
		1269	`skip_to_end(void)`
2	7u83	1270	`{`
7	7u83	1271	`int c;`
		1272	`int res = 0;`
		1273	`in_preproc_dir = 0;`
		1274	`for (;;) {`
		1275	`IGNORE skip_white(0);`
		1276	`read_label:`
		1277	`/* Inlined version of read_char */`
		1278	`c = next_char();`
		1279	`if (c == char_end) {`
		1280	`c = refill_char();`
2	7u83	1281	`}`
7	7u83	1282	`if (c == char_question) {`
		1283	`c = adjust_trigraph();`
		1284	`}`
		1285	`if (c == char_backslash) {`
		1286	`c = next_char();`
		1287	`if (c == char_end) {`
		1288	`c = refill_char();`
		1289	`}`
		1290	`if (c == char_return) {`
		1291	`c = read_newline();`
		1292	`}`
		1293	`if (c == char_newline) {`
		1294	`/* Allow for escaped newlines */`
		1295	`crt_loc.line++;`
		1296	`crt_loc.column = 0;`
		1297	`input_crt = input_posn;`
		1298	`goto read_label;`
		1299	`}`
		1300	`unread_char(c);`
		1301	`} else if (c == char_newline) {`
		1302	`/* New line characters */`
		1303	`crt_loc.line++;`
		1304	`crt_loc.column = 0;`
		1305	`input_crt = input_posn;`
		1306	`crt_line_changed = 1;`
		1307	`crt_spaces = 0;`
		1308	`return(res);`
		1309	`} else if (START_STRING(c)) {`
		1310	`/* String literals */`
		1311	`res = 1;`
		1312	`c = skip_string(c);`
		1313	`if (c == lex_eof) {`
		1314	`return(res);`
		1315	`}`
		1316	`} else if (c == char_eof) {`
		1317	`/* End of file characters */`
		1318	`break;`
		1319	`} else {`
		1320	`res = 1;`
		1321	`}`
2	7u83	1322	`}`
7	7u83	1323	`update_column();`
		1324	`report(crt_loc, ERR_lex_phases_eof());`
		1325	`good_eof = 1;`
		1326	`return(res);`
2	7u83	1327	`}`
		1328
		1329
		1330	`/*`
		1331	`READ A UNICODE CHARACTER`
		1332
		1333	`This routine reads a unicode character. It is entered after the`
		1334	`initial backslash and the following character, c, have been read.`
		1335	`It assigns the character type to pc and returns the character code.`
		1336	`*/`
		1337
7	7u83	1338	`static unsigned long`
		1339	`read_unicode(int c, int *pc)`
2	7u83	1340	`{`
7	7u83	1341	`unsigned i, n;`
		1342	`unsigned long u;`
		1343	`character s[10];`
		1344	`ERROR err = NULL_err;`
		1345	`string p = s;`
		1346	`if (c == char_u && allow_unicodes) {`
		1347	`/* Read '\uxxxx' */`
		1348	`*pc = CHAR_UNI4;`
		1349	`n = 4;`
		1350	`} else if (c == char_U && allow_unicodes) {`
		1351	`/* Read '\Uxxxxxxxx' */`
		1352	`*pc = CHAR_UNI8;`
		1353	`n = 8;`
		1354	`} else {`
		1355	`unread_char(c);`
		1356	`*pc = CHAR_NONE;`
		1357	`return(0);`
		1358	`}`
		1359	`for (i = 0; i < n; i++) {`
		1360	`int t;`
		1361	`int d = read_char();`
		1362	`if (d == char_eof) {`
		1363	`break;`
		1364	`}`
2	7u83	1365	`#if FS_EXTENDED_CHAR`
7	7u83	1366	`if (IS_EXTENDED(d)) {`
		1367	`unread_char(d);`
		1368	`break;`
		1369	`}`
2	7u83	1370	`#endif`
7	7u83	1371	`t = lookup_char(d);`
		1372	`if (!is_alphanum(t)) {`
		1373	`unread_char(d);`
		1374	`break;`
		1375	`}`
		1376	`s[i] = (character)d;`
2	7u83	1377	`}`
7	7u83	1378	`s[i] = 0;`
		1379	`u = eval_unicode(c, n, pc, &p, &err);`
		1380	`if (!IS_NULL_err(err)) {`
		1381	`update_column();`
		1382	`report(crt_loc, err);`
		1383	`}`
		1384	`return(u);`
2	7u83	1385	`}`
		1386
		1387
		1388	`/*`
		1389	`READ AN EXTENDED IDENTIFIER`
		1390
		1391	`This routine reads an extended identifier name (one including a unicode`
		1392	`character). It is entered after reading the simple characters in the`
		1393	`token buffer plus the unicode character given by u and ch.`
		1394	`*/`
		1395
7	7u83	1396	`static HASHID`
		1397	`read_extended_id(unsigned long u, int ch)`
2	7u83	1398	`{`
7	7u83	1399	`string s;`
		1400	`int c, t;`
		1401	`HASHID nm;`
		1402	`unsigned long h;`
		1403	`BUFFER *bf = &token_buff;`
		1404	`do {`
		1405	`if (!unicode_alpha(u)) {`
		1406	`/* Report illegal identifiers */`
		1407	`update_column();`
		1408	`report(crt_loc, ERR_lex_name_extendid(u));`
		1409	`}`
		1410	`print_char(u, ch, 0, bf);`
		1411	`for (;;) {`
		1412	`c = read_char();`
2	7u83	1413	`#if FS_EXTENDED_CHAR`
7	7u83	1414	`if (IS_EXTENDED(c)) {`
		1415	`break;`
		1416	`}`
2	7u83	1417	`#endif`
7	7u83	1418	`t = lookup_char(c);`
		1419	`if (!is_alphanum(t)) {`
		1420	`break;`
		1421	`}`
		1422	`bfputc(bf, c);`
		1423	`}`
		1424	`ch = CHAR_NONE;`
		1425	`if (c == char_backslash) {`
		1426	`int nextc = read_char();`
		1427	`u = read_unicode(nextc, &ch);`
		1428	`}`
		1429	`} while (ch != CHAR_NONE);`
		1430	`unread_char(c);`
		1431	`bfputc(bf, 0);`
		1432	`s = bf->start;`
		1433	`h = hash(s);`
		1434	`nm = lookup_name(s, h, 1, lex_unknown);`
		1435	`return(nm);`
2	7u83	1436	`}`
		1437
		1438
		1439	`/*`
		1440	`HASH VALUE FOR IDENTIFIERS`
		1441
		1442	`The hash value for identifiers is built up as the identifier is read.`
		1443	`It is then stored in this variable. The algorithm for calculuating`
		1444	`the hash value needs to be kept in step with the routine hash (it`
		1445	`is checked by an assertion in lookup_name, so any errors should be`
		1446	`caught quickly if in debug mode).`
		1447	`*/`
		1448
7	7u83	1449	`HASHID token_hashid = NULL_hashid;`
2	7u83	1450
		1451
		1452	`/*`
		1453	`MAIN PASS ANALYSER`
		1454
		1455	`This routine reads the next preprocessing token from the input file.`
		1456	`It is designed for speed rather than elegance, hence the rather`
		1457	`indiscriminate use of labels. Trigraphs and escaped newlines`
		1458	`involving the first character are processed by hand. This routine`
		1459	`corresponds to phase 3 of the phases of translation. The position`
		1460	`within the line is tracked by column - this is zero at the start of`
		1461	`a line, positive if only white space has been read and negative`
		1462	`otherwise. preproc keeps track of the last preprocessing directive.`
		1463	`*/`
		1464
7	7u83	1465	`int`
		1466	`read_token(void)`
2	7u83	1467	`{`
7	7u83	1468	`int c, t;`
		1469	`int column = -1;`
		1470	`int preproc = lex_ignore_token;`
2	7u83	1471
7	7u83	1472	`/* Read the next character */`
		1473	`start_label:`
		1474	`c = next_char();`
		1475	`if (c == char_end)c = refill_char();`
		1476	`restart_label:`
2	7u83	1477	`#if FS_EXTENDED_CHAR`
7	7u83	1478	`if (IS_EXTENDED(c)) {`
		1479	`goto unknown_label;`
		1480	`}`
2	7u83	1481	`#endif`
7	7u83	1482	`t = lookup_char(c);`
		1483	`if (is_white(t)) {`
		1484	`crt_spaces++;`
		1485	`goto start_label;`
2	7u83	1486	`}`
7	7u83	1487	`process_label:`
		1488	`/* Process the next character */`
2	7u83	1489
7	7u83	1490	`/* Check symbols and punctuation */`
		1491	`if (is_symbol(t)) {`
		1492	`switch (c) {`
2	7u83	1493
7	7u83	1494	`case char_question: {`
		1495	`/* Deal with '?' and trigraphs */`
		1496	`c = adjust_trigraph();`
		1497	`if (c == char_question) return(lex_question);`
		1498	`goto restart_label;`
2	7u83	1499	`}`
		1500
7	7u83	1501	`case char_backslash: {`
		1502	`/* Deal with escaped newlines */`
		1503	`unsigned long u;`
		1504	`int ch = CHAR_NONE;`
		1505	`int nextc = next_char();`
		1506	`if (nextc == char_end)nextc = refill_char();`
		1507	`if (nextc == char_return)nextc = read_newline();`
		1508	`if (nextc == char_newline) {`
		1509	`crt_loc.line++;`
		1510	`crt_loc.column = 0;`
		1511	`input_crt = input_posn;`
		1512	`if (column == 0)column = 1;`
		1513	`goto start_label;`
		1514	`}`
		1515
		1516	`/* Check for unicode characters */`
		1517	`u = read_unicode(nextc, &ch);`
		1518	`if (ch != CHAR_NONE) {`
		1519	`token_buff.posn = token_buff.start;`
		1520	`token_hashid = read_extended_id(u, ch);`
		1521	`return(lex_identifier);`
		1522	`}`
		1523	`return(lex_backslash);`
2	7u83	1524	`}`
		1525
7	7u83	1526	`case char_hash:`
		1527	`/* Deal with '#' and '##' */`
		1528	`c = read_char();`
		1529	`if (c == char_hash) {`
		1530	`return(lex_hash_Hhash_H1);`
		1531	`}`
		1532	`unread_char(c);`
2	7u83	1533
7	7u83	1534	`/* Return with '#' if not at start of line */`
		1535	`if (column < 0 \|\| no_preproc_dir) {`
		1536	`return(lex_hash_H1);`
		1537	`}`
2	7u83	1538
7	7u83	1539	`/* Deal with preprocessing directives */`
		1540	`preproc_label: {`
		1541	`unsigned long sp = skip_white(0);`
		1542	`update_column();`
		1543	`if (column) {`
		1544	`report(crt_loc, ERR_cpp_indent());`
		1545	`}`
		1546	`if (sp & (WHITE_SPACE \| WHITE_ESC_NEWLINE)) {`
		1547	`report(preproc_loc, ERR_cpp_indent_dir());`
		1548	`}`
		1549	`preproc = read_preproc_dir(1, preproc);`
		1550	`if (preproc < 0) {`
		1551	`goto start_line_label;`
		1552	`}`
		1553	`unread_char(char_newline);`
		1554	`crt_loc.line--;`
		1555	`crt_loc.column = 0;`
		1556	`return(preproc);`
2	7u83	1557	`}`
		1558
7	7u83	1559	`case char_percent:`
		1560	`/* Deal with '%', '%=', '%>', '%:' and '%:%:' */`
		1561	`c = read_char();`
		1562	`if (c == char_equal) {`
		1563	`return(lex_rem_Heq);`
2	7u83	1564	`}`
7	7u83	1565	`if (c == char_greater && allow_digraphs) {`
		1566	`return(lex_close_Hbrace_H2);`
		1567	`}`
		1568	`if (c == char_colon && allow_digraphs) {`
		1569	`/* Check for '%:' and '%:%:' */`
		1570	`c = read_char();`
		1571	`if (c == char_percent) {`
		1572	`int nextc = read_char();`
		1573	`if (nextc == char_colon) {`
		1574	`return(lex_hash_Hhash_H2);`
		1575	`}`
		1576	`unread_char(nextc);`
		1577	`}`
		1578	`unread_char(c);`
2	7u83	1579
7	7u83	1580	`/* Return with '%:' if not at start of line */`
		1581	`if (column < 0 \|\| no_preproc_dir) {`
		1582	`return(lex_hash_H2);`
		1583	`}`
2	7u83	1584
7	7u83	1585	`/* Otherwise this is a preprocessing`
		1586	`* directive */`
		1587	`IGNORE get_digraph(lex_hash_H2);`
		1588	`goto preproc_label;`
		1589	`}`
		1590	`unread_char(c);`
		1591	`return(lex_rem);`
2	7u83	1592
7	7u83	1593	`case char_quote:`
		1594	`/* Deal with string literals */`
		1595	`IGNORE read_string(c, 1);`
		1596	`return(lex_string_Hlit);`
2	7u83	1597
7	7u83	1598	`case char_single_quote:`
		1599	`/* Deal with character literals */`
		1600	`IGNORE read_string(c, 1);`
		1601	`return(lex_char_Hlit);`
2	7u83	1602
7	7u83	1603	`case char_exclaim:`
		1604	`/* Deal with '!' and '!=' */`
		1605	`c = read_char();`
		1606	`if (c == char_equal) {`
		1607	`return(lex_not_Heq_H1);`
		1608	`}`
		1609	`unread_char(c);`
		1610	`return(lex_not_H1);`
2	7u83	1611
7	7u83	1612	`case char_ampersand:`
		1613	`/* Deal with '&', '&&' and '&=' */`
		1614	`c = read_char();`
		1615	`if (c == char_ampersand) {`
		1616	`return(lex_logical_Hand_H1);`
		1617	`}`
		1618	`if (c == char_equal) {`
		1619	`return(lex_and_Heq_H1);`
		1620	`}`
		1621	`unread_char(c);`
		1622	`return(lex_and_H1);`
2	7u83	1623
7	7u83	1624	`case char_asterix:`
		1625	`/* Deal with '' and '=' */`
		1626	`c = read_char();`
		1627	`if (c == char_equal) {`
		1628	`return(lex_star_Heq);`
		1629	`}`
		1630	`unread_char(c);`
		1631	`return(lex_star);`
2	7u83	1632
7	7u83	1633	`case char_plus:`
		1634	`/* Deal with '+', '++' and '+=' */`
		1635	`c = read_char();`
		1636	`if (c == char_plus) {`
		1637	`return(lex_plus_Hplus);`
		1638	`}`
		1639	`if (c == char_equal) {`
		1640	`return(lex_plus_Heq);`
		1641	`}`
		1642	`if (c == char_question && allow_extra_symbols) {`
		1643	`return(lex_abs);`
		1644	`}`
		1645	`unread_char(c);`
		1646	`return(lex_plus);`
2	7u83	1647
7	7u83	1648	`case char_minus:`
		1649	`/* Deal with '-', '--', '-=', '->' and '->' /`
		1650	`c = read_char();`
		1651	`if (c == char_minus) {`
		1652	`return(lex_minus_Hminus);`
		1653	`}`
		1654	`if (c == char_equal) {`
		1655	`return(lex_minus_Heq);`
		1656	`}`
		1657	`if (c == char_greater) {`
2	7u83	1658	`#if LANGUAGE_CPP`
7	7u83	1659	`/* '->' is only allowed in C++ /`
		1660	`c = read_char();`
		1661	`if (c == char_asterix) {`
		1662	`return(lex_arrow_Hstar);`
		1663	`}`
		1664	`unread_char(c);`
2	7u83	1665	`#endif`
7	7u83	1666	`return(lex_arrow);`
		1667	`}`
		1668	`unread_char(c);`
		1669	`return(lex_minus);`
2	7u83	1670
7	7u83	1671	`case char_dot:`
		1672	`/* Deal with '.', '...', '.' and numbers /`
		1673	`c = read_char();`
		1674	`if (c == char_dot) {`
		1675	`c = read_char();`
		1676	`if (c == char_dot) {`
		1677	`return(lex_ellipsis);`
		1678	`}`
		1679	`unread_char(c);`
		1680	`unread_char(char_dot);`
		1681	`return(lex_dot);`
		1682	`}`
2	7u83	1683	`#if LANGUAGE_CPP`
7	7u83	1684	`/* '.' is only allowed in C++ /`
		1685	`if (c == char_asterix) {`
		1686	`return(lex_dot_Hstar);`
		1687	`}`
2	7u83	1688	`#endif`
		1689	`#if FS_EXTENDED_CHAR`
7	7u83	1690	`if (IS_EXTENDED(c)) {`
		1691	`unread_char(c);`
		1692	`return(lex_dot);`
		1693	`}`
2	7u83	1694	`#endif`
7	7u83	1695	`t = lookup_char(c);`
		1696	`if (is_digit(t)) {`
		1697	`/* Indicate a number with first digit '.' */`
		1698	`t = POINT;`
		1699	`goto number_label;`
		1700	`}`
		1701	`unread_char(c);`
		1702	`return(lex_dot);`
2	7u83	1703
7	7u83	1704	`case char_slash:`
		1705	`/* Deal with '/', '/=' and comments */`
		1706	`c = read_char();`
		1707	`if (START_COMMENT(c)) {`
		1708	`int a = analyse_comments;`
		1709	`c = skip_comment(a);`
		1710	`if (c == lex_eof) {`
		1711	`goto eof_label;`
		1712	`}`
		1713	`if (a) {`
		1714	`c = lint_comment();`
		1715	`if (c >= 0) return(c);`
		1716	`}`
		1717	`if (column == 0) {`
		1718	`column = 1;`
		1719	`}`
		1720	`goto start_label;`
		1721	`}`
		1722	`if (START_CPP_COMMENT(c)) {`
		1723	`int a = analyse_comments;`
		1724	`c = skip_cpp_comment(a);`
		1725	`if (c == lex_eof) {`
		1726	`goto eof_label;`
		1727	`}`
		1728	`if (a) {`
		1729	`c = lint_comment();`
		1730	`if (c >= 0) {`
		1731	`return(c);`
		1732	`}`
		1733	`}`
		1734	`IGNORE read_char();`
		1735	`goto newline_label;`
		1736	`}`
		1737	`if (c == char_equal) {`
		1738	`return(lex_div_Heq);`
		1739	`}`
		1740	`unread_char(c);`
		1741	`return(lex_div);`
2	7u83	1742
7	7u83	1743	`case char_colon:`
		1744	`/* Deal with ':', '::' and ':>' */`
		1745	`c = read_char();`
2	7u83	1746	`#if LANGUAGE_CPP`
7	7u83	1747	`/* '::' is only allowed in C++ */`
		1748	`if (c == char_colon) {`
		1749	`return(lex_colon_Hcolon);`
		1750	`}`
2	7u83	1751	`#endif`
7	7u83	1752	`if (c == char_greater && allow_digraphs) {`
		1753	`return(lex_close_Hsquare_H2);`
		1754	`}`
		1755	`unread_char(c);`
		1756	`return(lex_colon);`
2	7u83	1757
7	7u83	1758	`case char_less:`
		1759	`/* Deal with '<', '<=', '<<', '<<=', '<%', '<:' */`
		1760	`c = read_char();`
		1761	`if (c == char_equal) {`
		1762	`return(lex_less_Heq);`
		1763	`}`
		1764	`if (c == char_less) {`
		1765	`c = read_char();`
		1766	`if (c == char_equal) {`
		1767	`return(lex_lshift_Heq);`
		1768	`}`
		1769	`unread_char(c);`
		1770	`return(lex_lshift);`
		1771	`}`
		1772	`if (c == char_percent && allow_digraphs) {`
		1773	`return(lex_open_Hbrace_H2);`
		1774	`}`
		1775	`if (c == char_colon && allow_digraphs) {`
		1776	`return(lex_open_Hsquare_H2);`
		1777	`}`
		1778	`if (c == char_question && allow_extra_symbols) {`
		1779	`return(lex_min);`
		1780	`}`
		1781	`unread_char(c);`
		1782	`return(lex_less);`
2	7u83	1783
7	7u83	1784	`case char_equal:`
		1785	`/* Deal with '=' and '==' */`
		1786	`c = read_char();`
		1787	`switch (c) {`
		1788	`case char_equal:`
		1789	`return(lex_eq);`
		1790	`case char_ampersand:`
		1791	`case char_asterix:`
		1792	`case char_minus:`
		1793	`case char_plus:`
		1794	`update_column();`
		1795	`report(crt_loc, ERR_lex_op_old_assign(c, c));`
		1796	`break;`
		1797	`}`
		1798	`unread_char(c);`
		1799	`return(lex_assign);`
2	7u83	1800
7	7u83	1801	`case char_greater:`
		1802	`/* Deal with '>', '>=', '>>' and '>>=' */`
		1803	`c = read_char();`
		1804	`if (c == char_equal) {`
		1805	`return(lex_greater_Heq);`
		1806	`}`
		1807	`if (c == char_greater) {`
		1808	`c = read_char();`
		1809	`if (c == char_equal) {`
		1810	`return(lex_rshift_Heq);`
		1811	`}`
		1812	`unread_char(c);`
		1813	`return(lex_rshift);`
		1814	`}`
		1815	`if (c == char_question && allow_extra_symbols) {`
		1816	`return(lex_max);`
		1817	`}`
		1818	`unread_char(c);`
		1819	`return(lex_greater);`
2	7u83	1820
7	7u83	1821	`case char_circum:`
		1822	`/* Deal with '^' and '^=' */`
		1823	`c = read_char();`
		1824	`if (c == char_equal) {`
		1825	`return(lex_xor_Heq_H1);`
		1826	`}`
		1827	`unread_char(c);`
		1828	`return(lex_xor_H1);`
2	7u83	1829
7	7u83	1830	`case char_bar:`
		1831	`/* Deal with '\|', '\|\|' and '\|=' */`
		1832	`c = read_char();`
		1833	`if (c == char_bar) {`
		1834	`return(lex_logical_Hor_H1);`
		1835	`}`
		1836	`if (c == char_equal) {`
		1837	`return(lex_or_Heq_H1);`
		1838	`}`
		1839	`unread_char(c);`
		1840	`return(lex_or_H1);`
2	7u83	1841
7	7u83	1842	`case char_open_round:`
		1843	`/* Deal with '(' */`
		1844	`return(lex_open_Hround);`
2	7u83	1845
7	7u83	1846	`case char_close_round:`
		1847	`/* Deal with ')' */`
		1848	`return(lex_close_Hround);`
2	7u83	1849
7	7u83	1850	`case char_comma:`
		1851	`/* Deal with ',' */`
		1852	`return(lex_comma);`
2	7u83	1853
7	7u83	1854	`case char_semicolon:`
		1855	`/* Deal with ';' */`
		1856	`return(lex_semicolon);`
2	7u83	1857
7	7u83	1858	`case char_open_square:`
		1859	`/* Deal with '[' */`
		1860	`return(lex_open_Hsquare_H1);`
2	7u83	1861
7	7u83	1862	`case char_close_square:`
		1863	`/* Deal with ']' */`
		1864	`return(lex_close_Hsquare_H1);`
2	7u83	1865
7	7u83	1866	`case char_open_brace:`
		1867	`/* Deal with '{' */`
		1868	`return(lex_open_Hbrace_H1);`
2	7u83	1869
7	7u83	1870	`case char_close_brace:`
		1871	`/* Deal with '}' */`
		1872	`return(lex_close_Hbrace_H1);`
2	7u83	1873
7	7u83	1874	`case char_tilde:`
		1875	`/* Deal with '~' */`
		1876	`return(lex_compl_H1);`
2	7u83	1877
7	7u83	1878	`default:`
		1879	`/* Anything else is an unknown character */`
		1880	`goto unknown_label;`
		1881	`}`
2	7u83	1882	`}`
		1883
7	7u83	1884	`/* Read an identifier (calculating hash value on fly) */`
		1885	`if (is_alpha(t)) {`
		1886	`HASHID nm;`
		1887	`LOCATION loc;`
		1888	`BUFFER *bf = &token_buff;`
		1889	`string s = bf->start;`
		1890	`string se = bf->end;`
		1891	`unsigned long h = (unsigned long)c;`
		1892	`*(s++) = (character)c;`
2	7u83	1893
7	7u83	1894	`/* Get the second character */`
		1895	`update_column();`
		1896	`loc = crt_loc;`
		1897	`c = read_char();`
2	7u83	1898	`#if FS_EXTENDED_CHAR`
7	7u83	1899	`t = (IS_EXTENDED(c)? ILLEG : lookup_char(c));`
2	7u83	1900	`#else`
7	7u83	1901	`t = lookup_char(c);`
2	7u83	1902	`#endif`
7	7u83	1903	`if (is_alphanum(t)) {`
		1904	`/* Scan the third and subsequent characters */`
		1905	`do {`
		1906	`h = HASH_POWER * h + (unsigned long)c;`
		1907	`*s = (character)c;`
		1908	`if (++s == se) {`
		1909	`s = extend_buffer(bf, s);`
		1910	`se = bf->end;`
		1911	`}`
		1912	`c = read_char();`
2	7u83	1913	`#if FS_EXTENDED_CHAR`
7	7u83	1914	`if (IS_EXTENDED(c)) {`
		1915	`break;`
		1916	`}`
2	7u83	1917	`#endif`
7	7u83	1918	`t = lookup_char(c);`
		1919	`} while (is_alphanum(t));`
		1920	`} else {`
		1921	`/* Allow for wide strings and characters */`
		1922	`if (h == char_L && is_symbol(t)) {`
		1923	`if (c == char_quote) {`
		1924	`IGNORE read_string(c, 1);`
		1925	`return(lex_wstring_Hlit);`
		1926	`}`
		1927	`if (c == char_single_quote) {`
		1928	`IGNORE read_string(c, 1);`
		1929	`return(lex_wchar_Hlit);`
		1930	`}`
		1931	`}`
		1932	`/* Identifier of length one */`
2	7u83	1933	`}`
7	7u83	1934	`if (c == char_backslash) {`
		1935	`/* Allow for extended identifiers */`
		1936	`int ch = CHAR_NONE;`
		1937	`int nextc = read_char();`
		1938	`unsigned long u = read_unicode(nextc, &ch);`
		1939	`if (ch != CHAR_NONE) {`
		1940	`bf->posn = s;`
		1941	`nm = read_extended_id(u, ch);`
		1942	`goto identifier_label;`
		1943	`}`
2	7u83	1944	`}`
7	7u83	1945	`unread_char(c);`
		1946	`se = s;`
		1947	`*se = 0;`
2	7u83	1948
7	7u83	1949	`/* Look up the symbol in the hash table */`
		1950	`h %= HASH_SIZE;`
		1951	`s = bf->start;`
		1952	`nm = lookup_name(s, h, 0, lex_unknown);`
		1953	`identifier_label:`
		1954	`{`
		1955	`IDENTIFIER id = DEREF_id(hashid_id(nm));`
		1956	`while (!IS_id_dummy(id)) {`
		1957	`/* Scan to last hidden value */`
		1958	`id = DEREF_id(id_alias(id));`
		1959	`}`
		1960	`COPY_loc(id_loc(id), loc);`
		1961	`}`
		1962	`token_hashid = nm;`
		1963	`return(lex_identifier);`
2	7u83	1964	`}`
		1965
7	7u83	1966	`/* Read the first token in a line */`
		1967	`if (c == char_return) {`
		1968	`c = read_newline();`
2	7u83	1969	`}`
7	7u83	1970	`if (c == char_newline) {`
		1971	`newline_label:`
		1972	`/* Re-entry point after C++ style comments */`
		1973	`crt_loc.line++;`
		1974	`crt_loc.column = 0;`
		1975	`input_crt = input_posn;`
		1976	`crt_line_changed = 1;`
		1977	`crt_spaces = 0;`
		1978	`if (in_preproc_dir == 1) {`
		1979	`in_preproc_dir = 0;`
		1980	`return(lex_newline);`
		1981	`}`
		1982	`start_line_label:`
		1983	`/* Re-entry point after preprocessing directives */`
		1984	`column = 0;`
		1985	`for (;;) {`
		1986	`/* Step over any obvious spaces */`
		1987	`c = next_char();`
		1988	`if (c == char_end) {`
		1989	`c = refill_char();`
		1990	`}`
		1991	`if (c == char_return) {`
		1992	`c = read_newline();`
		1993	`}`
		1994	`if (c == char_sub) {`
		1995	`c = read_eof();`
		1996	`}`
		1997	`if (c == char_newline) {`
		1998	`crt_loc.line++;`
		1999	`crt_loc.column = 0;`
		2000	`input_crt = input_posn;`
		2001	`crt_line_changed = 1;`
		2002	`crt_spaces = 0;`
		2003	`column = 0;`
		2004	`} else if (c == char_eof) {`
		2005	`/* Check for end of file (should start line) */`
		2006	`if (column == 0) {`
		2007	`good_eof = 1;`
		2008	`}`
		2009	`goto eof_label;`
		2010	`} else if (c == char_space) {`
		2011	`crt_spaces++;`
		2012	`column = 1;`
		2013	`} else if (c == char_tab) {`
		2014	`unsigned long tab = tab_width;`
		2015	`crt_spaces = tab *(crt_spaces / tab + 1);`
		2016	`column = 1;`
		2017	`} else {`
2	7u83	2018	`#if FS_EXTENDED_CHAR`
7	7u83	2019	`if (IS_EXTENDED(c)) {`
		2020	`t = ILLEG;`
		2021	`break;`
		2022	`}`
2	7u83	2023	`#endif`
7	7u83	2024	`t = lookup_char(c);`
		2025	`if (is_white(t)) {`
		2026	`if (!is_newline(t)) {`
		2027	`crt_spaces++;`
		2028	`column = 1;`
		2029	`}`
		2030	`} else {`
		2031	`break;`
		2032	`}`
2	7u83	2033	`}`
		2034	`}`
7	7u83	2035	`/* c and t now hold the next character */`
		2036	`goto process_label;`
2	7u83	2037	`}`
		2038
7	7u83	2039	`/* Read a pp-number */`
		2040	`if (is_digit(t)) {`
		2041	`number_label: {`
		2042	`int lastc;`
		2043	`BUFFER *bf = &token_buff;`
		2044	`string s = bf->start;`
		2045	`string se = bf->end;`
		2046	`if (t == POINT) {`
		2047	`/* t is set to POINT to indicate an initial`
		2048	`* '.' */`
		2049	`*(s++) = char_dot;`
		2050	`}`
		2051	`digit_label:`
		2052	`/* Step over alphanumeric characters and '.' */`
		2053	`do {`
		2054	`*s = (character)c;`
		2055	`if (++s == se) {`
		2056	`s = extend_buffer(bf, s);`
		2057	`se = bf->end;`
		2058	`}`
		2059	`next_digit_label:`
		2060	`lastc = c;`
		2061	`c = read_char();`
2	7u83	2062	`#if FS_EXTENDED_CHAR`
7	7u83	2063	`if (IS_EXTENDED(c)) {`
		2064	`break;`
		2065	`}`
2	7u83	2066	`#endif`
7	7u83	2067	`t = lookup_char(c);`
		2068	`} while (is_ppdigit(t));`
		2069	`if (c == char_plus \|\| c == char_minus) {`
		2070	`/* Allow for [Ee][+-] */`
		2071	`if (lastc == char_e \|\| lastc == char_E) {`
		2072	`goto digit_label;`
		2073	`}`
		2074	`}`
		2075	`if (c == char_backslash) {`
		2076	`/* Allow for unicode characters */`
		2077	`int ch = CHAR_NONE;`
		2078	`int nextc = read_char();`
		2079	`unsigned long u = read_unicode(nextc, &ch);`
		2080	`if (ch != CHAR_NONE) {`
		2081	`bf->posn = s;`
		2082	`print_char(u, ch, 0, bf);`
		2083	`s = bf->posn;`
		2084	`se = bf->end;`
		2085	`goto next_digit_label;`
		2086	`}`
		2087	`}`
		2088	`*s = 0;`
		2089	`unread_char(c);`
2	7u83	2090	`}`
7	7u83	2091	`return(lex_integer_Hlit);`
		2092	`}`
		2093
		2094	`/* End of file marker */`
		2095	`if (c == char_sub) {`
		2096	`c = read_eof();`
		2097	`}`
		2098	`if (c == char_eof) {`
		2099	`eof_label:`
		2100	`if (in_preproc_dir != 0) {`
		2101	`return(lex_eof);`
2	7u83	2102	`}`
7	7u83	2103	`if (!good_eof) {`
		2104	`update_column();`
		2105	`report(crt_loc, ERR_lex_phases_eof());`
		2106	`good_eof = 1;`
		2107	`}`
		2108	`if (end_include(preproc)) {`
		2109	`/* Revert to previous file */`
		2110	`good_eof = 0;`
		2111	`preproc = lex_ignore_token;`
		2112	`goto start_line_label;`
		2113	`}`
		2114	`/* End of main file */`
		2115	`return(lex_eof);`
2	7u83	2116	`}`
		2117
7	7u83	2118	`/* Unknown characters */`
		2119	`unknown_label:`
		2120	`{`
		2121	`string s = token_buff.start;`
		2122	`add_multi_char(s, (unsigned long)c, CHAR_SIMPLE);`
		2123	`return(lex_unknown);`
2	7u83	2124	`}`
		2125	`}`
		2126
		2127
		2128	`/*`
		2129	`INITIALISE INPUT VARIABLES`
		2130
		2131	`This routine initialises the tables of character look-ups and the token`
		2132	`buffer.`
		2133	`*/`
		2134
7	7u83	2135	`void`
		2136	`init_char(void)`
2	7u83	2137	`{`
7	7u83	2138	`int i;`
		2139	`unsigned char p, q;`
2	7u83	2140
7	7u83	2141	`/* Set native locale for multibyte characters */`
2	7u83	2142	`#if FS_MULTIBYTE`
7	7u83	2143	`if (allow_multibyte) {`
		2144	`IGNORE setlocale(LC_CTYPE, "");`
		2145	`}`
2	7u83	2146	`#endif`
		2147
7	7u83	2148	`/* Allow for non-ASCII codesets */`
		2149	`map_ascii(main_characters);`
		2150	`map_ascii(digit_values);`
		2151	`map_ascii(escape_sequences);`
2	7u83	2152
7	7u83	2153	`/* Set up extra characters */`
		2154	`p = xmalloc_nof(unsigned char, NO_CHAR);`
		2155	`q = main_characters;`
		2156	`copy_characters = p;`
		2157	`for (i = 0; i < NO_CHAR; i++) {`
		2158	`(p++) = (q++);`
		2159	`}`
2	7u83	2160
7	7u83	2161	`/* Initialise token buffer */`
		2162	`token_buff.posn = extend_buffer(&token_buff, token_buff.posn);`
		2163	`return;`
2	7u83	2164	`}`
		2165
		2166
		2167	`/*`
		2168	`INITIALISE INPUT FILE READING`
		2169
		2170	`This routine initialises the lexical analysis routines in preparation`
		2171	`for parsing or preprocessing the current input file.`
		2172	`*/`
		2173
7	7u83	2174	`void`
		2175	`init_lex(void)`
2	7u83	2176	`{`
7	7u83	2177	`/* Initialise file variables */`
		2178	`crt_buff_no = 0;`
		2179	`IGNORE init_buffer(crt_buff_no);`
		2180	`start_preproc_if ();`
		2181	`preproc_loc = crt_loc;`
		2182	`have_syntax_error = 0;`
		2183	`if (do_header) {`
		2184	`dump_start(&crt_loc, NIL(INCL_DIR));`
		2185	`}`
2	7u83	2186
7	7u83	2187	`/* Deal with first start-up file */`
		2188	`open_startup();`
2	7u83	2189
7	7u83	2190	`/* Force processing to start at the beginning of a line */`
		2191	`unread_char(char_newline);`
		2192	`crt_loc.line--;`
2	7u83	2193
7	7u83	2194	`/* Initialise the parser */`
		2195	`init_parser(NIL(PPTOKEN));`
		2196	`return;`
2	7u83	2197	`}`
		2198
		2199
		2200	`/*`
		2201	`PARSE INPUT FILE`
		2202
		2203	`This routine is the main entry point for the parsing of the current`
		2204	`input file.`
		2205	`*/`
		2206
7	7u83	2207	`void`
		2208	`process_file(void)`
2	7u83	2209	`{`
7	7u83	2210	`init_lex();`
		2211	`ADVANCE_LEXER;`
		2212	`parse_file(NULL_type, dspec_none);`
		2213	`return;`
2	7u83	2214	`}`

Subversion Repositories tendra.SVN

(root)/trunk/src/producers/common/parse/lex.c – Rev 7