Rev 5 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
/*
* Copyright (c) 2002-2005 The TenDRA Project <http://www.tendra.org/>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of The TenDRA Project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific, prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id$
*/
/*
Crown Copyright (c) 1997
This TenDRA(r) Computer Program is subject to Copyright
owned by the United Kingdom Secretary of State for Defence
acting through the Defence Evaluation and Research Agency
(DERA). It is made available to Recipients with a
royalty-free licence for its use, reproduction, transfer
to other parties and amendment for any purpose not excluding
product development provided that any such use et cetera
shall be deemed to be acceptance of the following conditions:-
(1) Its Recipients shall ensure that this Notice is
reproduced upon any copies or amended versions of it;
(2) Any amended version of it shall be clearly marked to
show both the nature of and the organisation responsible
for the relevant amendment or amendments;
(3) Its onward transfer from a recipient to another
party shall be deemed to be that party's acceptance of
these conditions;
(4) DERA gives no warranty or assurance as to its
quality or suitability for any purpose and DERA accepts
no liability whatsoever in relation to any use to which
it may be put.
*/
#include "config.h"
#include "system.h"
#include "version.h"
#include "c_types.h"
#include "hashid_ops.h"
#include "id_ops.h"
#include "nspace_ops.h"
#include "error.h"
#include "catalog.h"
#include "option.h"
#include "buffer.h"
#include "char.h"
#include "dump.h"
#include "file.h"
#include "hash.h"
#include "lex.h"
#include "literal.h"
#include "macro.h"
#include "namespace.h"
#include "predict.h"
#include "preproc.h"
#include "print.h"
#include "symbols.h"
#include "syntax.h"
#include "ustring.h"
#include "xalloc.h"
/*
LIST OF FREE LEXICAL TOKENS
All the free lexical tokens are formed into a list.
*/
PPTOKEN *free_tokens = NULL;
static LIST(PPTOKEN_P)alloc_tokens = NULL_list(PPTOKEN_P);
/*
ALLOCATE A NEW TOKEN
This routine allocates a new token from the list free_tokens.
*/
PPTOKEN *
new_pptok(void)
{
PPTOKEN *p = free_tokens;
if (p == NULL) {
PPTOKEN *q;
int i, n = 2000;
p = xmalloc_nof(PPTOKEN, n);
CONS_pptok(p, alloc_tokens, alloc_tokens);
q = p;
for (i = 1; i < n; i++) {
q->next = q + 1;
q++;
}
q->next = NULL;
}
free_tokens = p->next;
p->pp_opts = real_opts;
return(p);
}
/*
FREE A SINGLE TOKEN
This macro frees the single token P by adding it to the list of all
free tokens.
*/
#define free_pptok(P) \
{ \
(P)->next = free_tokens; \
free_tokens = (P); \
}
/*
FREE A LIST OF TOKENS
This routine adds the list of tokens p to the list of all free tokens.
*/
void
free_tok_list(PPTOKEN *p)
{
PPTOKEN *q = p;
if (q == NULL) {
return;
}
while (q->next) {
q = q->next;
}
q->next = free_tokens;
free_tokens = p;
return;
}
/*
FREE ALL ALLOCATED PREPROCESSING TOKENS
This routine frees all the space allocated for preprocessing tokens.
It should only be called after the input has been completely processed.
*/
void
term_macros(void)
{
LIST(PPTOKEN_P)p = alloc_tokens;
while (!IS_NULL_list(p)) {
PPTOKEN *q;
DESTROY_CONS_pptok(destroy, q, p, p);
xfree_nof(q);
}
alloc_tokens = p;
free_tokens = NULL;
return;
}
/*
COPY A TOKEN
This macro copies the contents of the token with token value T and data
Q into P.
*/
#define copy_pptok(P, T, Q) \
{ \
(P)->tok = (T); \
(P)->pp_data = (Q)->pp_data; \
(P)->pp_opts = (Q)->pp_opts; \
(P)->pp_space = (Q)->pp_space; \
}
/*
ASSIGN TOKEN COMPONENTS
This routine assigns the token components for the token t, which has
just been read from the input file (or faked on occasions - these are
indicated) into p. It is only necessary to call this routine is T is
less than or equal to LAST_COMPLEX_TOKEN (defined in symbols.h). If any
cases are added to this routine then it may be necessary to change the
value of this macro.
*/
void
token_parts(int t, PPTOKEN *p)
{
switch (t) {
case lex_identifier: {
/* Identifiers */
HASHID nm = token_hashid;
IDENTIFIER id = DEREF_id(hashid_id(nm));
p->pp_data.id.hash = nm;
p->pp_data.id.use = id;
break;
}
case lex_char_Hlit:
case lex_string_Hlit:
case lex_wchar_Hlit:
case lex_wstring_Hlit: {
/* String and character literals */
string s1 = token_buff.start;
gen_size n = (gen_size)(token_buff.posn - s1);
string s2;
if (n < 2) {
/* Optimise for small strings */
s2 = xustrcpy(s1);
} else {
s2 = xustr(n + 1);
xumemcpy(s2, s1, n);
s2[n] = 0;
}
p->pp_data.str.start = s2;
p->pp_data.str.end = s2 + n;
break;
}
case lex_integer_Hlit:
/* Integer and floating-point literals */
p->pp_data.text = xustrcpy(token_buff.start);
break;
case lex_hash_Hif:
case lex_hash_Helif:
/* Target dependent conditionals */
p->pp_data.exp = crt_hash_if_exp;
break;
case lex_unknown: {
/* Unknown characters */
int i;
string s1 = token_buff.start;
string s2 = p->pp_data.buff;
ASSERT(MULTI_WIDTH <= sizeof(p->pp_data.buff));
for (i = 0; i < MULTI_WIDTH; i++)s2[i] = s1[i];
break;
}
}
return;
}
/*
REMOVE ANY IGNORED TOKENS FROM A LIST
This routine removes any ignored tokens from the list tok, returning
the result.
*/
PPTOKEN *
clean_tok_list(PPTOKEN *toks)
{
unsigned long sp = 0;
PPTOKEN p0, *p = &p0;
PPTOKEN *q;
p->next = toks;
while (q = p->next, q != NULL) {
if (q->tok == lex_ignore_token) {
sp |= q->pp_space;
p->next = q->next;
free_pptok(q);
q = p->next;
if (q == NULL) {
break;
}
} else {
if (sp) {
q->pp_space |= sp;
sp = 0;
}
}
p = q;
}
return(p0.next);
}
/*
READ A LINE OF TOKENS
This routine reads the sequence of preprocessing tokens comprising a
preprocessing directive (for example, a macro definition). If t1 is
not lex_ignore_token then it is taken to be the first token in the
definition, similarly tn gives the last token.
*/
PPTOKEN *
read_line(int t1, int tn)
{
int t = t1;
unsigned long sp = 0;
PPTOKEN dummy_tok, *this_tok = &dummy_tok;
if (t == lex_ignore_token) {
t = read_token();
update_column();
if (in_preproc_dir) {
preproc_loc = crt_loc;
}
}
while (t != lex_newline && t != lex_eof) {
this_tok->next = new_pptok();
this_tok = this_tok->next;
this_tok->tok = t;
if (t <= LAST_COMPLEX_TOKEN) {
token_parts(t, this_tok);
}
this_tok->pp_space = (sp & WHITE_MASK);
sp = skip_white(0);
t = read_token();
update_column();
if (in_preproc_dir) {
preproc_loc = crt_loc;
}
}
if (tn != lex_ignore_token) {
this_tok->next = new_pptok();
this_tok = this_tok->next;
this_tok->tok = tn;
token_parts(tn, this_tok);
this_tok->pp_space = (sp & WHITE_MASK);
}
this_tok->next = NULL;
if (in_preproc_dir) {
IGNORE skip_to_end();
}
return(dummy_tok.next);
}
/*
COPY A LIST OF TOKENS
This routine copies the list of tokens toks, excluding any ignored
tokens.
*/
static PPTOKEN *
copy_tok_list(PPTOKEN *toks)
{
PPTOKEN *ptr_tok;
PPTOKEN dummy_tok, *this_tok = &dummy_tok;
for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) {
int t = ptr_tok->tok;
if (t != lex_ignore_token) {
this_tok->next = new_pptok();
this_tok = this_tok->next;
copy_pptok(this_tok, t, ptr_tok);
}
}
this_tok->next = NULL;
return(dummy_tok.next);
}
/*
STRINGISE A LIST OF TOKENS
This routine turns the list of tokens toks into a string. The result
is built up in token_buff. If esc is true then any '"' (or whatever
the value of quote is) and '\' characters in string and character
literals (including the initial and terminating quotes) are preceded
by a '\'. This routine is used in the implementation of the # operator,
in macro #include directives and a couple of other preprocessing
directives. It returns 1 to indicate a valid string.
*/
int
quote_tok_list(PPTOKEN *toks, int esc, int quote)
{
int res = 1;
string st, se;
int started = 0;
int escaped = 0;
PPTOKEN *ptr_tok;
character qo = (character)quote;
BUFFER *bf = clear_buffer(&token_buff, NIL(FILE));
/* Scan through tokens */
for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) {
character p, q;
int t = ptr_tok->tok;
if (t == lex_ignore_token) {
continue;
}
/* Print initial space if necessary */
if (ptr_tok->pp_space && started) {
bfputc(bf, char_space);
}
/* Find the token name */
switch (t) {
case lex_identifier: {
/* Identifiers */
HASHID nm = ptr_tok->pp_data.id.hash;
st = DEREF_string(hashid_name_etc_text(nm));
bfputs(bf, st);
break;
}
case lex_integer_Hlit:
/* Integer and floating-point literals */
st = ptr_tok->pp_data.text;
bfputs(bf, st);
break;
case lex_char_Hlit:
/* Character literals */
p = 0;
q = char_single_quote;
string_label:
st = ptr_tok->pp_data.str.start;
se = ptr_tok->pp_data.str.end;
/* Prefix and opening quote */
if (p) {
bfputc(bf, (int)p);
}
if (esc && q == qo) {
bfputc(bf, char_backslash);
}
bfputc(bf, (int)q);
/* Copy string */
while (st != se) {
character c = *(st++);
if (c == qo || c == char_backslash) {
/* Escaped characters */
if (esc) {
bfputc(bf, char_backslash);
}
}
bfputc(bf, (int)c);
}
/* Closing quote */
if (esc && q == qo) {
bfputc(bf, char_backslash);
}
bfputc(bf, (int)q);
break;
case lex_wchar_Hlit:
/* Wide character literals */
p = char_L;
q = char_single_quote;
goto string_label;
case lex_string_Hlit:
/* String literals */
p = 0;
q = char_quote;
goto string_label;
case lex_wstring_Hlit:
/* Wide string literals */
p = char_L;
q = char_quote;
goto string_label;
case lex_unknown: {
/* Unknown characters */
unsigned long u;
int ch = CHAR_SIMPLE;
u = get_multi_char(ptr_tok->pp_data.buff, &ch);
if (ch == CHAR_SIMPLE) {
bfputc(bf, (int)u);
} else {
print_char(u, ch, 0, bf);
}
break;
}
case lex_macro_Harg: {
/* Macro parameters */
HASHID nm = ptr_tok->pp_data.par.hash;
st = DEREF_string(hashid_name_etc_text(nm));
bfputs(bf, st);
break;
}
default:
/* Symbols */
st = token_name(t);
bfputs(bf, st);
break;
}
started = 1;
}
/* End of string */
bfputc(bf, 0);
bf->posn--;
/* Check for legal strings */
st = bf->start;
se = bf->posn;
while (st != se) {
if (escaped) {
escaped = 0;
} else {
character c = *st;
if (c == qo) {
res = 0;
}
if (c == char_backslash) {
escaped = 1;
}
}
st++;
}
if (escaped) {
res = 0;
}
return(res);
}
/*
CONCATENATE TWO TOKENS
This routine concatenates the two tokens p and q into a single token.
This is used to implement the ## operator. If the result is a valid
preprocessing token then p is overwritten by the result and 1 is
returned. Otherwise p and q are unchanged and 0 is returned.
*/
static int
concat_pptoks(PPTOKEN *p, PPTOKEN *q)
{
int a = p->tok;
int b = q->tok;
unsigned long sa = p->pp_space;
unsigned long sb = q->pp_space;
p->pp_space = (sa | sb);
q->pp_space = 0;
if (a >= FIRST_SYMBOL && a <= LAST_SYMBOL) {
if (b >= FIRST_SYMBOL && b <= LAST_SYMBOL) {
/* Two symbols may combine to give another symbol */
int c;
string s = token_buff.start;
ustrcpy_v(s, token_name(a));
ustrcpy_v(s + ustrlen(s), token_name(b));
for (c = FIRST_SYMBOL; c <= LAST_SYMBOL; c++) {
if (ustreq(s, token_name(c))) {
/* Token found - check options */
p->tok = c;
if (c >= FIRST_C_SYMBOL &&
c <= LAST_C_SYMBOL) {
return(1);
}
#if LANGUAGE_CPP
if (c >= FIRST_CPP_SYMBOL &&
c <= LAST_CPP_SYMBOL) {
return(1);
}
#endif
if (c >= FIRST_EXTRA_SYMBOL &&
c <= LAST_EXTRA_SYMBOL) {
if (allow_extra_symbols) {
return(1);
}
}
if (c >= FIRST_DIGRAPH &&
c <= LAST_DIGRAPH) {
if (allow_digraphs) {
return(1);
}
}
p->tok = a;
}
}
return(0);
} else if (a == lex_dot && b == lex_integer_Hlit) {
/* A dot may start a number */
string s = q->pp_data.text;
if (s[0] == char_dot) {
return(0);
}
p->tok = lex_integer_Hlit;
p->pp_data.text = xustrcat(token_name(a), s);
return(1);
} else if (a == lex_backslash && b == lex_identifier) {
/* A backslash may start a universal character */
/* NOT YET IMPLEMENTED */
/* EMPTY */
}
} else if (a == lex_identifier) {
HASHID nm = p->pp_data.id.hash;
string s = DEREF_string(hashid_name_etc_text(nm));
if (b == lex_identifier) {
/* Two identifiers give another identifier */
HASHID nm2 = q->pp_data.id.hash;
string s2 = DEREF_string(hashid_name_etc_text(nm2));
s = xustrcat(s, s2);
nm = lookup_name(s, hash(s), 2, lex_identifier);
p->pp_data.id.hash = nm;
p->pp_data.id.use = DEREF_id(hashid_id(nm));
return(1);
} else if (b == lex_integer_Hlit) {
/* An identifier and a number may give an identifier */
character c;
string n = q->pp_data.text;
while (c = *(n++), c != 0) {
if (c == char_dot || c == char_plus ||
c == char_minus) {
/* The number must be entirely
* alphanumeric */
return(0);
}
}
s = xustrcat(s, q->pp_data.text);
nm = lookup_name(s, hash(s), 2, lex_identifier);
p->pp_data.id.hash = nm;
p->pp_data.id.use = DEREF_id(hashid_id(nm));
return(1);
} else if (s[0] == char_L && s[1] == 0) {
/* An L may start a wide character or string */
if (b == lex_char_Hlit) {
p->tok = lex_wchar_Hlit;
p->pp_data.str.start = q->pp_data.str.start;
p->pp_data.str.end = q->pp_data.str.end;
return(1);
} else if (b == lex_string_Hlit) {
p->tok = lex_wstring_Hlit;
p->pp_data.str.start = q->pp_data.str.start;
p->pp_data.str.end = q->pp_data.str.end;
return(1);
}
}
} else if (a == lex_integer_Hlit) {
string s = p->pp_data.text;
if (b == lex_identifier) {
/* A number followed by an identifier is a number */
HASHID nm = q->pp_data.id.hash;
string s2 = DEREF_string(hashid_name_etc_text(nm));
p->pp_data.text = xustrcat(s, s2);
return(1);
} else if (b == lex_integer_Hlit) {
/* Two numbers form another number */
string s2 = q->pp_data.text;
p->pp_data.text = xustrcat(s, s2);
return(1);
} else if (b == lex_dot || b == lex_ellipsis) {
/* A number followed by a sequence of dots is a
* number */
p->pp_data.text = xustrcat(s, token_name(b));
return(1);
} else if (b == lex_plus || b == lex_minus) {
/* A sign may terminate a number after e or E */
unsigned n = (unsigned)ustrlen(s) - 1;
if (s[n] == char_e || s[n] == char_E) {
p->pp_data.text = xustrcat(s, token_name(b));
return(1);
}
}
}
return(0);
}
/*
DUMMY LOCATION FOR INPUT FILE
This dummy location represents tokens read directly from the input file.
If present, it will always be the last element of a list of token
locations.
*/
static PPTOKEN *dummy_loc_toks = NULL;
static TOKEN_LOC dummy_loc = { &dummy_loc_toks, NULL };
TOKEN_LOC *file_loc = &dummy_loc;
/*
FORWARD DECLARATION
The functions expand_macro, expand_toks and expand_tok_list are defined
recursively. This gives the necessary forward declarations.
*/
static PPTOKEN *expand_toks(PPTOKEN *, TOKEN_LOC *, int);
/*
HANDLE OLD STYLE STRINGISING
This routine handles the old style stringising for the definition defn
for the given macro. Argument replacement has already been performed
on defn. If this facility is enabled then in macro definitions of the
form:
#define f( X ) "X"
quotes are classified as unknown characters rather than string
terminators. This means that the X is recognised as a macro parameter
and is replaced during argument replacement. The job of this routine
is to spot these unrecognised quotes and turn them into proper strings.
*/
PPTOKEN *
recognise_strings(PPTOKEN *defn, HASHID macro, int act)
{
PPTOKEN *this_tok = defn;
PPTOKEN *last_tok = defn;
while (this_tok != NULL) {
if (this_tok->tok == lex_unknown) {
unsigned long u;
int ch = CHAR_SIMPLE;
character qo = char_question;
u = get_multi_char(this_tok->pp_data.buff, &ch);
if (ch == CHAR_SIMPLE) {
qo = (character)u;
}
if (qo == char_quote || qo == char_single_quote) {
/* Start of string */
int t;
int escaped = 0;
PPTOKEN *next_tok = this_tok->next;
PPTOKEN *ptr_tok = next_tok;
while (ptr_tok != NULL) {
t = ptr_tok->tok;
if (t == lex_macro_Harg) {
HASHID nm =
ptr_tok->pp_data.par.hash;
ERROR err =
ERR_cpp_stringize_old(nm,
macro);
report(preproc_loc, err);
}
if (escaped) {
escaped = 0;
} else if (t == lex_unknown) {
character qc = char_question;
u = get_multi_char(ptr_tok->pp_data.buff, &ch);
if (ch == CHAR_SIMPLE) {
qc = (character)u;
}
if (qc == qo) {
break;
}
if (qc == char_backslash) {
escaped = 1;
}
}
ptr_tok = ptr_tok->next;
}
if (act) {
if (ptr_tok == NULL) {
/* No closing quote */
report(crt_loc,
ERR_cpp_stringize_bad(macro));
this_tok->next = NULL;
} else {
ptr_tok->tok = lex_ignore_token;
this_tok->next = ptr_tok->next;
ptr_tok->next = NULL;
}
/* Form the string */
if (!quote_tok_list(next_tok, 0,
(int)qo)) {
report(crt_loc,
ERR_cpp_stringize_bad(macro));
}
t = (qo == char_quote ?
lex_string_Hlit : lex_char_Hlit);
this_tok->tok = t;
token_parts(t, this_tok);
free_tok_list(next_tok);
/* Check for wide strings */
if (last_tok->tok == lex_identifier) {
string s;
HASHID nm =
last_tok->pp_data.id.hash;
s = DEREF_string(hashid_name_etc_text(nm));
if (s[0] == char_L &&
s[1] == 0) {
if (t ==
lex_string_Hlit) {
t = lex_wstring_Hlit;
} else {
t = lex_wchar_Hlit;
}
copy_pptok(last_tok, t,
this_tok);
last_tok->next =
this_tok->next;
free_pptok(this_tok);
this_tok = last_tok;
}
}
}
}
}
last_tok = this_tok;
this_tok = this_tok->next;
}
return(defn);
}
/*
HANDLE TOKEN CONCATENATION
This routine handles any ## operators in the definition defn of the
given macro. Note that any initial or terminal ## operators have
already been reported.
*/
static PPTOKEN *
process_concat(PPTOKEN *defn, HASHID macro)
{
PPTOKEN *this_tok;
while (defn && defn->tok == lex_hash_Hhash_Hop) {
/* Check for initial ## */
this_tok = defn;
defn = defn->next;
free_pptok(this_tok);
}
this_tok = defn;
while (this_tok != NULL) {
PPTOKEN *next_tok = this_tok->next;
if (next_tok == NULL) {
break;
}
if (next_tok->tok == lex_hash_Hhash_Hop) {
/* Delete the ## */
this_tok->next = next_tok->next;
free_pptok(next_tok);
/* Check for terminal ## */
if (this_tok->next == NULL) {
break;
}
/* Do the token concatenation */
if (concat_pptoks(this_tok, this_tok->next)) {
/* Delete the second argument if successful */
next_tok = this_tok->next;
this_tok->next = next_tok->next;
free_pptok(next_tok);
} else {
report(crt_loc, ERR_cpp_concat_bad(macro));
}
/* Now reprocess this_tok */
} else {
this_tok = next_tok;
}
}
return(defn);
}
/*
MAXIMUM NUMBER OF MACRO PARAMETERS
This macro defines the maximum number of macro parameters which
expand_macro can handle without having to allocate temporary space
to hold them. With allocation the number of parameters is unlimited.
*/
#define MAX_MACRO_PARAMS 256
/*
EXPAND A MACRO DEFINITION
This routine expands the macro given by the hash table entry macro.
The argument locs gives a list of locations where macro arguments can
be read from. locs will never be NULL. The argument complete is true
to indicate that this is a complete macro expansion, and that any
argument errors should be reported. If locs contains file_loc then
complete will always be true. When reading from file_loc we always
set in_preproc_dir to 2 to make read_token return lex_eof at the end
of each file, rather than automatically reverting to the including
file, and to cause it to ignore any preprocessing directives.
Note that the entry for the macro in the hash table is marked during
expansion to prevent recursive expansions. Several points concerning
macro expansion are undefined; in this implementation:
1. Firstly, # operators are evaluated from left to right;
2. Secondly, ## operators are evaluated from left to right;
3. If a ## b is not a valid preprocessing token then it is
resolved to a b;
4. A # operator in a function-like macro which is not followed
by a macro argument is ignored (it is left as # in object-like
macros of course);
5. A ## operator at the start or end of a macro is ignored;
6. Any preprocessing directives in the macro arguments are treated
as normal sequences of preprocessing tokens.
A further undefined area concerns the ban on recursive macro expansions.
This is extended from the macro definition itself to any extra tokens
which are read during the expansion of the macro definition. For
example, in:
#define f( a ) a * g
#define g( a ) f ( a )
f ( 2 ) ( 9 )
the result is '2 * f ( 9 )', rather than '2 * 9 * g'.
*/
PPTOKEN *
expand_macro(HASHID macro, TOKEN_LOC *locs, int complete)
{
LOCATION loc;
int state = 0;
PPTOKEN *defn;
unsigned long sp = 0;
unsigned no_pars = 0;
int have_unknown = 0;
int have_hash_hash = 0;
unsigned long ws = crt_spaces;
PPTOKEN dummy_tok, *this_tok = &dummy_tok;
PPTOKEN *arg_array_base[MAX_MACRO_PARAMS + 1];
PPTOKEN **arg_array = arg_array_base;
/* Get the macro identifier */
IDENTIFIER id = DEREF_id(hashid_id(macro));
unsigned tag = TAG_id(id);
DECL_SPEC ds = DEREF_dspec(id_storage(id));
/* Mark the macro as being used */
loc = crt_loc;
ds |= dspec_used;
COPY_dspec(id_storage(id), ds);
if (do_macro && do_usage)dump_use(id, &crt_loc, 1);
/* Get macro definition and other data */
if (tag == id_obj_macro_tag) {
/* Object-like macros */
defn = DEREF_pptok(id_obj_macro_defn(id));
if (defn == NULL) {
return(NULL);
}
if (ds & dspec_builtin) {
/* Check built-in macros */
int t = defn->tok;
if (t == lex_builtin_Hline) {
/* Construct an integer literal for __LINE__ */
BUFFER *bf = clear_buffer(&token_buff,
NIL(FILE));
bfprintf(bf, "%lu", loc.line);
bfputc(bf, 0);
this_tok = new_pptok();
this_tok->tok = lex_integer_Hlit;
this_tok->next = NULL;
this_tok->pp_opts = NULL;
this_tok->pp_space = 0;
token_parts(lex_integer_Hlit, this_tok);
return(this_tok);
}
if (t == lex_builtin_Hfile) {
/* Construct a string literal for __FILE__ */
character c;
string fn =
DEREF_string(posn_file(crt_loc.posn));
BUFFER *bf = clear_buffer(&token_buff,
NIL(FILE));
while (c = *(fn++), c != 0) {
if (c == char_quote ||
c == char_backslash) {
/* Escape quotes and
* backslashes */
bfputc(bf, char_backslash);
}
bfputc(bf, (int)c);
}
this_tok = new_pptok();
this_tok->tok = lex_string_Hlit;
this_tok->next = NULL;
this_tok->pp_opts = NULL;
this_tok->pp_space = 0;
token_parts(lex_string_Hlit, this_tok);
return(this_tok);
}
}
} else {
/* Function-like macros */
int t;
unsigned n;
TOKEN_LOC *lc;
int brackets = 0;
unsigned no_args = 0;
PPTOKEN *ptr_tok = NULL;
TOKEN_LOC *ptr_loc = locs;
/* Check for following open bracket */
for (;;) {
if (ptr_loc == file_loc) {
/* Read token from input location */
int legal = 1;
sp = skip_white(1);
if (peek_char(char_open_round, &legal)) {
/* Next token in file is '(' */
update_column();
t = lex_open_Hround;
} else {
/* Other cases */
t = lex_unknown;
if (sp)patch_white(sp);
}
break;
} else if (ptr_loc == NULL) {
/* No more locations */
t = lex_eof;
break;
} else {
/* Read token from current location */
ptr_tok = (*(ptr_loc->toks))->next;
while (ptr_tok && ptr_tok->tok ==
lex_ignore_token) {
/* Step over any ignored tokens */
ptr_tok = ptr_tok->next;
}
if (ptr_tok != NULL) {
/* Return the next token */
t = ptr_tok->tok;
ptr_tok = ptr_tok->next;
break;
}
/* Move on to next location */
ptr_loc = ptr_loc->next;
}
}
/* Next token is not an open bracket */
if (t != lex_open_Hround) {
if (complete) {
report(loc, ERR_cpp_replace_arg_none(macro));
}
incomplete_macro:
/* Return macro identifier */
this_tok = new_pptok();
this_tok->tok = lex_identifier;
this_tok->next = NULL;
this_tok->pp_space = 0;
this_tok->pp_data.id.hash = macro;
this_tok->pp_data.id.use = id;
return(this_tok);
}
/* Check argument array size */
no_pars = DEREF_unsigned(id_func_macro_no_params(id));
if (no_pars > MAX_MACRO_PARAMS) {
arg_array = xmalloc_nof(PPTOKEN *, no_pars + 1);
}
/* Scan macro arguments */
for (;;) {
/* Get the next token */
int refill = 0;
for (;;) {
if (ptr_loc == file_loc) {
/* Read token from file location */
sp = skip_white(1);
in_preproc_dir = 2;
t = read_token();
update_column();
if (t == lex_hash_H1 ||
t == lex_hash_H2) {
if (sp & WHITE_NEWLINE) {
/* Looks like
* preprocessing
* directive */
ERROR err = ERR_cpp_replace_arg_ppdir(macro);
report(crt_loc, err);
}
}
break;
} else if (ptr_loc == NULL) {
/* No more locations to read token
* from */
t = lex_eof;
break;
} else {
/* Read token from next location */
if (refill)ptr_tok =
(*(ptr_loc->toks))->next;
if (ptr_tok != NULL) {
t = ptr_tok->tok;
break;
}
ptr_loc = ptr_loc->next;
refill = 1;
}
}
/* Examine this token */
if (t == lex_open_Hround) {
brackets++;
} else if (t == lex_close_Hround) {
/* Close brackets mark the end of the argument
* list */
if (brackets == 0) {
break;
}
brackets--;
} else if (t == lex_comma) {
/* Commas mark the end of an argument */
if (brackets == 0) {
this_tok->next = NULL;
no_args++;
if (dummy_tok.next) {
dummy_tok.next->pp_space = 0;
} else if (complete) {
ERROR err;
err = ERR_cpp_replace_arg_empty(no_args, macro);
report(crt_loc, err);
}
if (no_args <= no_pars) {
arg_array[no_args] = dummy_tok.next;
} else {
free_tok_list(dummy_tok.next);
}
if (ptr_tok) {
ptr_tok = ptr_tok->next;
}
this_tok = &dummy_tok;
continue;
}
} else if (t == lex_eof) {
break;
}
/* Build up current argument */
this_tok->next = new_pptok();
this_tok = this_tok->next;
if (ptr_tok) {
copy_pptok(this_tok, t, ptr_tok);
ptr_tok = ptr_tok->next;
} else {
this_tok->tok = t;
if (t <= LAST_COMPLEX_TOKEN) {
token_parts(t, this_tok);
}
this_tok->pp_space = (sp & WHITE_MASK);
}
}
/* Create last argument */
in_preproc_dir = 0;
this_tok->next = NULL;
if (no_args || dummy_tok.next) {
no_args++;
if (dummy_tok.next) {
dummy_tok.next->pp_space = 0;
} else if (complete) {
ERROR err = ERR_cpp_replace_arg_empty(no_args,
macro);
report(crt_loc, err);
}
if (no_args <= no_pars) {
arg_array[no_args] = dummy_tok.next;
} else {
free_tok_list(dummy_tok.next);
}
}
if (sp)patch_white(sp);
this_tok = &dummy_tok;
/* Check for incomplete argument lists */
if (t == lex_eof) {
if (complete) {
/* Report error, but carry on */
report(loc, ERR_cpp_replace_arg_eof(macro));
} else {
/* Free those arguments actually read */
for (n = 1; n <= no_args && n <= no_pars; n++) {
free_tok_list(arg_array[n]);
}
if (arg_array != arg_array_base) {
xfree_nof(arg_array);
}
goto incomplete_macro;
}
}
/* Update location pointers */
if (ptr_loc) {
*(ptr_loc)->toks = ptr_tok;
}
for (lc = locs; lc != ptr_loc; lc = lc->next) {
*(lc)->toks = NULL;
}
/* Check that argument and parameter lists match */
if (no_pars != no_args) {
ERROR err;
n = no_args;
err = ERR_cpp_replace_arg_number(macro, n, n, no_pars);
report(crt_loc, err);
/* Add extra arguments if there are not enough */
for (n = no_args + 1; n <= no_pars; n++) {
arg_array[n] = NULL;
}
}
IGNORE check_value(OPT_VAL_macro_args, (ulong)no_args);
/* Get the macro definition */
defn = DEREF_pptok(id_func_macro_defn(id));
}
crt_spaces = ws;
/* Copy the definition, expanding macro arguments */
while (defn != NULL) {
int t = defn->tok;
if (t == lex_macro_Harg) {
/* Macro argument - identified by argument number */
unsigned long n = defn->pp_data.par.no;
PPTOKEN *arg = arg_array[n];
if (state == 0) {
if (defn->next &&
defn->next->tok == lex_hash_Hhash_Hop) {
/* Preceding ##, just copy argument */
this_tok->next = copy_tok_list(arg);
} else {
/* Normal argument expansion */
TOKEN_LOC *arg_locs = NULL;
this_tok->next =
expand_toks(arg, arg_locs, 0);
}
} else if (state == 1) {
/* Following #, fake reading a string literal */
this_tok->next = new_pptok();
if (!quote_tok_list(arg, 1, char_quote)) {
report(crt_loc,
ERR_cpp_stringize_bad(macro));
}
this_tok->next->tok = lex_string_Hlit;
token_parts(lex_string_Hlit, this_tok->next);
this_tok->next->next = NULL;
this_tok->next->pp_space = 0;
} else {
/* Following ##, just copy argument */
this_tok->next = copy_tok_list(arg);
}
sp = defn->pp_space;
if (sp && this_tok->next) {
this_tok->next->pp_space = sp;
sp = 0;
}
while (this_tok->next) {
this_tok = this_tok->next;
}
state = 0;
} else if (t == lex_hash_Hop) {
/* Check for # operator */
state = 1;
} else if (t != lex_ignore_token) {
/* Copy other tokens */
this_tok->next = new_pptok();
this_tok = this_tok->next;
copy_pptok(this_tok, t, defn);
if (sp) {
this_tok->pp_space = sp;
sp = 0;
}
if (t == lex_hash_Hhash_Hop) {
/* Check for ## operator */
have_hash_hash = 1;
state = 2;
} else {
if (t == lex_unknown) {
have_unknown = 1;
}
state = 0;
}
}
defn = defn->next;
}
this_tok->next = NULL;
defn = dummy_tok.next;
/* Allow for argument expansion in strings */
if (have_unknown) {
defn = recognise_strings(defn, macro, 1);
}
/* Rescan for ## directives */
if (have_hash_hash) {
defn = process_concat(defn, macro);
}
/* Rescan for further expansion (but not expanding macro) */
COPY_dspec(id_storage(id), (ds | dspec_temp));
this_tok = expand_toks(defn, locs, complete);
free_tok_list(defn);
defn = this_tok;
COPY_dspec(id_storage(id), ds);
/* Clean up after macro expansion */
if (tag == id_func_macro_tag) {
/* Free the macro arguments */
unsigned n;
for (n = 1; n <= no_pars; n++) {
free_tok_list(arg_array[n]);
}
if (arg_array != arg_array_base) {
xfree_nof(arg_array);
}
}
/* Return the result */
return(defn);
}
/*
EXPAND A LIST OF TOKENS
This is the main macro expansion routine. It expands the list of macros
tok, returning the result. If toks ends in an unterminated function-like
macro then further tokens may be read from the locations given in locs.
The complete argument is as in expand_macro.
*/
static PPTOKEN *
expand_toks(PPTOKEN *toks, TOKEN_LOC *locs, int complete)
{
PPTOKEN *ptr_tok;
unsigned long sp = 0;
PPTOKEN dummy_tok, *this_tok = &dummy_tok;
/* Copy list of tokens */
for (ptr_tok = toks; ptr_tok != NULL; ptr_tok = ptr_tok->next) {
int t = ptr_tok->tok;
if (t == lex_ignore_token) {
sp |= ptr_tok->pp_space;
continue;
}
this_tok->next = new_pptok();
this_tok = this_tok->next;
copy_pptok(this_tok, t, ptr_tok);
if (sp) {
this_tok->pp_space |= sp;
sp = 0;
}
/* Check for macros */
if (t == lex_identifier) {
HASHID m = ptr_tok->pp_data.id.hash;
IDENTIFIER id = DEREF_id(hashid_id(m));
unsigned tag = TAG_id(id);
switch (tag) {
case id_obj_macro_tag:
case id_func_macro_tag: {
DECL_SPEC ds;
TOKEN_LOC tloc;
/* Check for non-expanding tokens */
if (IS_NULL_id(this_tok->pp_data.id.use)) {
break;
}
/* Check for recursive macro definitions */
ds = DEREF_dspec(id_storage(id));
if (ds & dspec_temp) {
/* Mark this token as non-expanding */
ERROR err = ERR_cpp_rescan_recursive(m);
report(crt_loc, err);
this_tok->pp_data.id.use = NULL_id;
break;
}
/* Expand the macro using an extra location */
tloc.toks = &ptr_tok;
tloc.next = locs;
this_tok->tok = lex_ignore_token;
this_tok->next = expand_macro(m, &tloc,
complete);
while (this_tok->next)this_tok = this_tok->next;
break;
}
}
if (ptr_tok == NULL) {
break;
}
}
}
this_tok->next = NULL;
return(dummy_tok.next);
}
/*
EXPAND A SIMPLE LIST OF TOKENS
This routine is the simplest form of expand_toks, where toks is a
complete list, with no locations for reading further tokens.
*/
PPTOKEN *
expand_tok_list(PPTOKEN *toks)
{
return(expand_toks(toks, NIL(TOKEN_LOC), 1));
}
/*
ASSERTION NAMESPACE
The assertions occupy a namespace distinct from all other namespaces,
including the macro namespace.
*/
NAMESPACE assert_namespace;
/*
CREATE A BUILT-IN MACRO
This routine creates a built-in macro named nm defined by a single
preprocessing token with token type t and associated data d.
*/
static void
builtin_macro(CONST char *nm, int t, CONST char *d)
{
if (d) {
IDENTIFIER id;
string s = ustrlit(nm);
unsigned long h = hash(s);
HASHID macro = lookup_name(s, h, 0, lex_identifier);
IDENTIFIER pid = DEREF_id(hashid_id(macro));
DECL_SPEC ds = (dspec_defn | dspec_builtin);
/* Set up the token definition */
PPTOKEN *p = new_pptok();
p->tok = t;
p->pp_space = 0;
p->pp_opts = NULL;
p->next = NULL;
if (t == lex_integer_Hlit) {
/* Set up associated integer data */
string c = xustrcpy(ustrlit(d));
p->pp_data.text = c;
} else if (t == lex_string_Hlit) {
/* Set up associated string data */
string c = xustrcpy(ustrlit(d));
p->pp_data.str.start = c;
p->pp_data.str.end = c + ustrlen(c);
} else if (t == lex_builtin_Hline || t == lex_builtin_Hfile) {
/* Set up associated location data */
p->pp_space = crt_loc.column;
p->pp_data.loc.line = crt_loc.line;
p->pp_data.loc.posn = crt_loc.posn;
}
/* Define the macro */
MAKE_id_obj_macro(macro, ds, NULL_nspace, crt_loc, p, id);
COPY_id(id_alias(id), pid);
COPY_id(hashid_id(macro), id);
if (do_macro) {
dump_declare(id, &crt_loc, 1);
}
}
return;
}
/*
INITIALISE BUILT-IN MACROS
This routine initialises the built-in macros, and sets up the assertion
namespace.
*/
void
init_macros(int m, int a)
{
CONST char *d = find_date("%s %2d %d");
CONST char *t = find_time("%.2d:%.2d:%.2d");
if (m) {
/* Define built-in macros */
builtin_macro("__LINE__", lex_builtin_Hline, "1");
builtin_macro("__FILE__", lex_builtin_Hfile, "<unknown>");
builtin_macro("__DATE__", lex_string_Hlit, d);
builtin_macro("__TIME__", lex_string_Hlit, t);
builtin_macro("__STDC__", lex_integer_Hlit, C_VERSION);
builtin_macro("__STDC_VERSION__", lex_integer_Hlit,
ISOC_VERSION);
#if LANGUAGE_CPP
builtin_macro("__cplusplus", lex_integer_Hlit, CPP_VERSION);
builtin_macro("__tcpplus", lex_integer_Hlit, "1");
#else
builtin_macro("__tcpplus", lex_integer_Hlit, "0");
#endif
}
assert_namespace = make_global_nspace("<assert>", 20);
if (a) {
/* Define built-in assertions */
IGNORE make_assert(KEYWORD(lex_include), lex_include);
IGNORE make_assert(KEYWORD(lex_keyword), lex_keyword);
IGNORE make_assert(KEYWORD(lex_option), lex_option);
}
return;
}