WebSVN – tendra.SVN – /branches/tendra5/src/producers/common/parse/macro.c

/*
                 Crown Copyright (c) 1997
    
    This TenDRA(r) Computer Program is subject to Copyright
    owned by the United Kingdom Secretary of State for Defence
    acting through the Defence Evaluation and Research Agency
    (DERA).  It is made available to Recipients with a
    royalty-free licence for its use, reproduction, transfer
    to other parties and amendment for any purpose not excluding
    product development provided that any such use et cetera
    shall be deemed to be acceptance of the following conditions:-
    
        (1) Its Recipients shall ensure that this Notice is
        reproduced upon any copies or amended versions of it;
    
        (2) Any amended version of it shall be clearly marked to
        show both the nature of and the organisation responsible
        for the relevant amendment or amendments;
    
        (3) Its onward transfer from a recipient to another
        party shall be deemed to be that party's acceptance of
        these conditions;
    
        (4) DERA gives no warranty or assurance as to its
        quality or suitability for any purpose and DERA accepts
        no liability whatsoever in relation to any use to which
        it may be put.
*/


#include "config.h"
#include "system.h"
#include "version.h"
#include "c_types.h"
#include "hashid_ops.h"
#include "id_ops.h"
#include "nspace_ops.h"
#include "error.h"
#include "catalog.h"
#include "option.h"
#include "buffer.h"
#include "char.h"
#include "dump.h"
#include "file.h"
#include "hash.h"
#include "lex.h"
#include "literal.h"
#include "macro.h"
#include "namespace.h"
#include "predict.h"
#include "preproc.h"
#include "print.h"
#include "symbols.h"
#include "syntax.h"
#include "ustring.h"
#include "xalloc.h"


/*
    LIST OF FREE LEXICAL TOKENS

    All the free lexical tokens are formed into a list.
*/

PPTOKEN *free_tokens = NULL ;
static LIST ( PPTOKEN_P ) alloc_tokens = NULL_list ( PPTOKEN_P ) ;


/*
    ALLOCATE A NEW TOKEN

    This routine allocates a new token from the list free_tokens.
*/

PPTOKEN *new_pptok
    PROTO_Z ()
{
    PPTOKEN *p = free_tokens ;
    if ( p == NULL ) {
        PPTOKEN *q ;
        int i, n = 2000 ;
        p = xmalloc_nof ( PPTOKEN, n ) ;
        CONS_pptok ( p, alloc_tokens, alloc_tokens ) ;
        q = p ;
        for ( i = 1 ; i < n ; i++ ) {
            q->next = q + 1 ;
            q++ ;
        }
        q->next = NULL ;
    }
    free_tokens = p->next ;
    p->pp_opts = real_opts ;
    return ( p ) ;
}


/*
    FREE A SINGLE TOKEN

    This macro frees the single token P by adding it to the list of all
    free tokens.
*/

#define free_pptok( P )\
    {\
        ( P )->next = free_tokens ;\
        free_tokens = ( P ) ;\
    }


/*
    FREE A LIST OF TOKENS

    This routine adds the list of tokens p to the list of all free tokens.
*/

void free_tok_list
    PROTO_N ( ( p ) )
    PROTO_T ( PPTOKEN *p )
{
    PPTOKEN *q = p ;
    if ( q == NULL ) return ;
    while ( q->next ) q = q->next ;
    q->next = free_tokens ;
    free_tokens = p ;
    return ;
}


/*
    FREE ALL ALLOCATED PREPROCESSING TOKENS

    This routine frees all the space allocated for preprocessing tokens.
    It should only be called after the input has been completely processed.
*/

void term_macros
    PROTO_Z ()
{
    LIST ( PPTOKEN_P ) p = alloc_tokens ;
    while ( !IS_NULL_list ( p ) ) {
        PPTOKEN *q ;
        DESTROY_CONS_pptok ( destroy, q, p, p ) ;
        xfree_nof ( q ) ;
    }
    alloc_tokens = p ;
    free_tokens = NULL ;
    return ;
}


/*
    COPY A TOKEN

    This macro copies the contents of the token with token value T and data
    Q into P.
*/

#define copy_pptok( P, T, Q )\
    {\
        ( P )->tok = ( T ) ;\
        ( P )->pp_data = ( Q )->pp_data ;\
        ( P )->pp_opts = ( Q )->pp_opts ;\
        ( P )->pp_space = ( Q )->pp_space ;\
    }


/*
    ASSIGN TOKEN COMPONENTS

    This routine assigns the token components for the token t, which has
    just been read from the input file (or faked on occasions - these are
    indicated) into p.  It is only necessary to call this routine is T is
    less than or equal to LAST_COMPLEX_TOKEN (defined in symbols.h).  If any
    cases are added to this routine then it may be necessary to change the
    value of this macro.
*/

void token_parts
    PROTO_N ( ( t, p ) )
    PROTO_T ( int t X PPTOKEN *p )
{
    switch ( t ) {
        case lex_identifier : {
            /* Identifiers */
            HASHID nm = token_hashid ;
            IDENTIFIER id = DEREF_id ( hashid_id ( nm ) ) ;
            p->pp_data.id.hash = nm ;
            p->pp_data.id.use = id ;
            break ;
        }
        case lex_char_Hlit :
        case lex_string_Hlit :
        case lex_wchar_Hlit :
        case lex_wstring_Hlit : {
            /* String and character literals */
            string s1 = token_buff.start ;
            gen_size n = ( gen_size ) ( token_buff.posn - s1 ) ;
            string s2 ;
            if ( n < 2 ) {
                /* Optimise for small strings */
                s2 = xustrcpy ( s1 ) ;
            } else {
                s2 = xustr ( n + 1 ) ;
                xumemcpy ( s2, s1, n ) ;
                s2 [n] = 0 ;
            }
            p->pp_data.str.start = s2 ;
            p->pp_data.str.end = s2 + n ;
            break ;
        }
        case lex_integer_Hlit : {
            /* Integer and floating-point literals */
            p->pp_data.text = xustrcpy ( token_buff.start ) ;
            break ;
        }
        case lex_hash_Hif :
        case lex_hash_Helif : {
            /* Target dependent conditionals */
            p->pp_data.exp = crt_hash_if_exp ;
            break ;
        }
        case lex_unknown : {
            /* Unknown characters */
            int i ;
            string s1 = token_buff.start ;
            string s2 = p->pp_data.buff ;
            ASSERT ( MULTI_WIDTH <= sizeof ( p->pp_data.buff ) ) ;
            for ( i = 0 ; i < MULTI_WIDTH ; i++ ) s2 [i] = s1 [i] ;
            break ;
        }
    }
    return ;
}


/*
    REMOVE ANY IGNORED TOKENS FROM A LIST

    This routine removes any ignored tokens from the list tok, returning
    the result.
*/

PPTOKEN *clean_tok_list
    PROTO_N ( ( toks ) )
    PROTO_T ( PPTOKEN *toks )
{
    unsigned long sp = 0 ;
    PPTOKEN p0, *p = &p0 ;
    PPTOKEN *q ;
    p->next = toks ;
    while ( q = p->next, q != NULL ) {
        if ( q->tok == lex_ignore_token ) {
            sp |= q->pp_space ;
            p->next = q->next ;
            free_pptok ( q ) ;
            q = p->next ;
            if ( q == NULL ) break ;
        } else {
            if ( sp ) {
                q->pp_space |= sp ;
                sp = 0 ;
            }
        }
        p = q ;
    }
    return ( p0.next ) ;
}


/*
    READ A LINE OF TOKENS

    This routine reads the sequence of preprocessing tokens comprising a
    preprocessing directive (for example, a macro definition).  If t1 is
    not lex_ignore_token then it is taken to be the first token in the
    definition, similarly tn gives the last token.
*/

PPTOKEN *read_line
    PROTO_N ( ( t1, tn ) )
    PROTO_T ( int t1 X int tn )
{
    int t = t1 ;
    unsigned long sp = 0 ;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok ;
    if ( t == lex_ignore_token ) {
        t = read_token () ;
        update_column () ;
        if ( in_preproc_dir ) preproc_loc = crt_loc ;
    }
    while ( t != lex_newline && t != lex_eof ) {
        this_tok->next = new_pptok () ;
        this_tok = this_tok->next ;
        this_tok->tok = t ;
        if ( t <= LAST_COMPLEX_TOKEN ) token_parts ( t, this_tok ) ;
        this_tok->pp_space = ( sp & WHITE_MASK ) ;
        sp = skip_white ( 0 ) ;
        t = read_token () ;
        update_column () ;
        if ( in_preproc_dir ) preproc_loc = crt_loc ;
    }
    if ( tn != lex_ignore_token ) {
        this_tok->next = new_pptok () ;
        this_tok = this_tok->next ;
        this_tok->tok = tn ;
        token_parts ( tn, this_tok ) ;
        this_tok->pp_space = ( sp & WHITE_MASK ) ;
    }
    this_tok->next = NULL ;
    if ( in_preproc_dir ) IGNORE skip_to_end () ;
    return ( dummy_tok.next ) ;
}


/*
    COPY A LIST OF TOKENS

    This routine copies the list of tokens toks, excluding any ignored
    tokens.
*/

static PPTOKEN *copy_tok_list
    PROTO_N ( ( toks ) )
    PROTO_T ( PPTOKEN *toks )
{
    PPTOKEN *ptr_tok ;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok ;
    for ( ptr_tok = toks ; ptr_tok != NULL ; ptr_tok = ptr_tok->next ) {
        int t = ptr_tok->tok ;
        if ( t != lex_ignore_token ) {
            this_tok->next = new_pptok () ;
            this_tok = this_tok->next ;
            copy_pptok ( this_tok, t, ptr_tok ) ;
        }
    }
    this_tok->next = NULL ;
    return ( dummy_tok.next ) ;
}


/*
    STRINGISE A LIST OF TOKENS

    This routine turns the list of tokens toks into a string.  The result
    is built up in token_buff.  If esc is true then any '"' (or whatever
    the value of quote is) and '\' characters in string and character
    literals (including the initial and terminating quotes) are preceded
    by a '\'.  This routine is used in the implementation of the # operator,
    in macro #include directives and a couple of other preprocessing
    directives.  It returns 1 to indicate a valid string.
*/

int quote_tok_list
    PROTO_N ( ( toks, esc, quote ) )
    PROTO_T ( PPTOKEN *toks X int esc X int quote )
{
    int res = 1 ;
    string st, se ;
    int started = 0 ;
    int escaped = 0 ;
    PPTOKEN *ptr_tok ;
    character qo = ( character ) quote ;
    BUFFER *bf = clear_buffer ( &token_buff, NIL ( FILE ) ) ;

    /* Scan through tokens */
    for ( ptr_tok = toks ; ptr_tok != NULL ; ptr_tok = ptr_tok->next ) {
        character p, q ;
        int t = ptr_tok->tok ;
        if ( t == lex_ignore_token ) continue ;

        /* Print initial space if necessary */
        if ( ptr_tok->pp_space && started ) bfputc ( bf, char_space ) ;

        /* Find the token name */
        switch ( t ) {
            case lex_identifier : {
                /* Identifiers */
                HASHID nm = ptr_tok->pp_data.id.hash ;
                st = DEREF_string ( hashid_name_etc_text ( nm ) ) ;
                bfputs ( bf, st ) ;
                break ;
            }
            case lex_integer_Hlit : {
                /* Integer and floating-point literals */
                st = ptr_tok->pp_data.text ;
                bfputs ( bf, st ) ;
                break ;
            }
            case lex_char_Hlit : {
                /* Character literals */
                p = 0 ;
                q = char_single_quote ;
                string_label : {
                    st = ptr_tok->pp_data.str.start ;
                    se = ptr_tok->pp_data.str.end ;

                    /* Prefix and opening quote */
                    if ( p ) bfputc ( bf, ( int ) p ) ;
                    if ( esc && q == qo ) bfputc ( bf, char_backslash ) ;
                    bfputc ( bf, ( int ) q ) ;

                    /* Copy string */
                    while ( st != se ) {
                        character c = *( st++ ) ;
                        if ( c == qo || c == char_backslash ) {
                            /* Escaped characters */
                            if ( esc ) bfputc ( bf, char_backslash ) ;
                        }
                        bfputc ( bf, ( int ) c ) ;
                    }

                    /* Closing quote */
                    if ( esc && q == qo ) bfputc ( bf, char_backslash ) ;
                    bfputc ( bf, ( int ) q ) ;
                }
                break ;
            }
            case lex_wchar_Hlit : {
                /* Wide character literals */
                p = char_L ;
                q = char_single_quote ;
                goto string_label ;
            }
            case lex_string_Hlit : {
                /* String literals */
                p = 0 ;
                q = char_quote ;
                goto string_label ;
            }
            case lex_wstring_Hlit : {
                /* Wide string literals */
                p = char_L ;
                q = char_quote ;
                goto string_label ;
            }
            case lex_unknown : {
                /* Unknown characters */
                unsigned long u ;
                int ch = CHAR_SIMPLE ;
                u = get_multi_char ( ptr_tok->pp_data.buff, &ch ) ;
                if ( ch == CHAR_SIMPLE ) {
                    bfputc ( bf, ( int ) u ) ;
                } else {
                    print_char ( u, ch, 0, bf ) ;
                }
                break ;
            }
            case lex_macro_Harg : {
                /* Macro parameters */
                HASHID nm = ptr_tok->pp_data.par.hash ;
                st = DEREF_string ( hashid_name_etc_text ( nm ) ) ;
                bfputs ( bf, st ) ;
                break ;
            }
            default : {
                /* Symbols */
                st = token_name ( t ) ;
                bfputs ( bf, st ) ;
                break ;
            }
        }
        started = 1 ;
    }

    /* End of string */
    bfputc ( bf, 0 ) ;
    bf->posn-- ;

    /* Check for legal strings */
    st = bf->start ;
    se = bf->posn ;
    while ( st != se ) {
        if ( escaped ) {
            escaped = 0 ;
        } else {
            character c = *st ;
            if ( c == qo ) res = 0 ;
            if ( c == char_backslash ) escaped = 1 ;
        }
        st++ ;
    }
    if ( escaped ) res = 0 ;
    return ( res ) ;
}


/*
    CONCATENATE TWO TOKENS

    This routine concatenates the two tokens p and q into a single token.
    This is used to implement the ## operator.  If the result is a valid
    preprocessing token then p is overwritten by the result and 1 is
    returned.  Otherwise p and q are unchanged and 0 is returned.
*/

static int concat_pptoks
    PROTO_N ( ( p, q ) )
    PROTO_T ( PPTOKEN *p X PPTOKEN *q )
{
    int a = p->tok ;
    int b = q->tok ;
    unsigned long sa = p->pp_space ;
    unsigned long sb = q->pp_space ;
    p->pp_space = ( sa | sb ) ;
    q->pp_space = 0 ;
    if ( a >= FIRST_SYMBOL && a <= LAST_SYMBOL ) {
        if ( b >= FIRST_SYMBOL && b <= LAST_SYMBOL ) {
            /* Two symbols may combine to give another symbol */
            int c ;
            string s = token_buff.start ;
            ustrcpy_v ( s, token_name ( a ) ) ;
            ustrcpy_v ( s + ustrlen ( s ), token_name ( b ) ) ;
            for ( c = FIRST_SYMBOL ; c <= LAST_SYMBOL ; c++ ) {
                if ( ustreq ( s, token_name ( c ) ) ) {
                    /* Token found - check options */
                    p->tok = c ;
                    if ( c >= FIRST_C_SYMBOL && c <= LAST_C_SYMBOL ) {
                        return ( 1 ) ;
                    }
#if LANGUAGE_CPP
                    if ( c >= FIRST_CPP_SYMBOL && c <= LAST_CPP_SYMBOL ) {
                        return ( 1 ) ;
                    }
#endif
                    if ( c >= FIRST_EXTRA_SYMBOL && c <= LAST_EXTRA_SYMBOL ) {
                        if ( allow_extra_symbols ) return ( 1 ) ;
                    }
                    if ( c >= FIRST_DIGRAPH && c <= LAST_DIGRAPH ) {
                        if ( allow_digraphs ) return ( 1 ) ;
                    }
                    p->tok = a ;
                }
            }
            return ( 0 ) ;

        } else if ( a == lex_dot && b == lex_integer_Hlit ) {
            /* A dot may start a number */
            string s = q->pp_data.text ;
            if ( s [0] == char_dot ) return ( 0 ) ;
            p->tok = lex_integer_Hlit ;
            p->pp_data.text = xustrcat ( token_name ( a ), s ) ;
            return ( 1 ) ;

        } else if ( a == lex_backslash && b == lex_identifier ) {
            /* A backslash may start a universal character */
            /* NOT YET IMPLEMENTED */
            /* EMPTY */
        }

    } else if ( a == lex_identifier ) {
        HASHID nm = p->pp_data.id.hash ;
        string s = DEREF_string ( hashid_name_etc_text ( nm ) ) ;
        if ( b == lex_identifier ) {
            /* Two identifiers give another identifier */
            HASHID nm2 = q->pp_data.id.hash ;
            string s2 = DEREF_string ( hashid_name_etc_text ( nm2 ) ) ;
            s = xustrcat ( s, s2 ) ;
            nm = lookup_name ( s, hash ( s ), 2, lex_identifier ) ;
            p->pp_data.id.hash = nm ;
            p->pp_data.id.use = DEREF_id ( hashid_id ( nm ) ) ;
            return ( 1 ) ;

        } else if ( b == lex_integer_Hlit ) {
            /* An identifier and a number may give an identifier */
            character c ;
            string n = q->pp_data.text ;
            while ( c = *( n++ ), c != 0 ) {
                if ( c == char_dot || c == char_plus || c == char_minus ) {
                    /* The number must be entirely alphanumeric */
                    return ( 0 ) ;
                }
            }
            s = xustrcat ( s, q->pp_data.text ) ;
            nm = lookup_name ( s, hash ( s ), 2, lex_identifier ) ;
            p->pp_data.id.hash = nm ;
            p->pp_data.id.use = DEREF_id ( hashid_id ( nm ) ) ;
            return ( 1 ) ;

        } else if ( s [0] == char_L && s [1] == 0 ) {
            /* An L may start a wide character or string */
            if ( b == lex_char_Hlit ) {
                p->tok = lex_wchar_Hlit ;
                p->pp_data.str.start = q->pp_data.str.start ;
                p->pp_data.str.end = q->pp_data.str.end ;
                return ( 1 ) ;
            } else if ( b == lex_string_Hlit ) {
                p->tok = lex_wstring_Hlit ;
                p->pp_data.str.start = q->pp_data.str.start ;
                p->pp_data.str.end = q->pp_data.str.end ;
                return ( 1 ) ;
            }
        }

    } else if ( a == lex_integer_Hlit ) {
        string s = p->pp_data.text ;
        if ( b == lex_identifier ) {
            /* A number followed by an identifier is a number */
            HASHID nm = q->pp_data.id.hash ;
            string s2 = DEREF_string ( hashid_name_etc_text ( nm ) ) ;
            p->pp_data.text = xustrcat ( s, s2 ) ;
            return ( 1 ) ;

        } else if ( b == lex_integer_Hlit ) {
            /* Two numbers form another number */
            string s2 = q->pp_data.text ;
            p->pp_data.text = xustrcat ( s, s2 ) ;
            return ( 1 ) ;

        } else if ( b == lex_dot || b == lex_ellipsis ) {
            /* A number followed by a sequence of dots is a number */
            p->pp_data.text = xustrcat ( s, token_name ( b ) ) ;
            return ( 1 ) ;

        } else if ( b == lex_plus || b == lex_minus ) {
            /* A sign may terminate a number after e or E */
            unsigned n = ( unsigned ) ustrlen ( s ) - 1 ;
            if ( s [n] == char_e || s [n] == char_E ) {
                p->pp_data.text = xustrcat ( s, token_name ( b ) ) ;
                return ( 1 ) ;
            }
        }
    }
    return ( 0 ) ;
}


/*
    DUMMY LOCATION FOR INPUT FILE

    This dummy location represents tokens read directly from the input file.
    If present, it will always be the last element of a list of token
    locations.
*/

static PPTOKEN *dummy_loc_toks = NULL ;
static TOKEN_LOC dummy_loc = { &dummy_loc_toks, NULL } ;
TOKEN_LOC *file_loc = &dummy_loc ;


/*
    FORWARD DECLARATION

    The functions expand_macro, expand_toks and expand_tok_list are defined
    recursively.  This gives the necessary forward declarations.
*/

static PPTOKEN *expand_toks PROTO_S ( ( PPTOKEN *, TOKEN_LOC *, int ) ) ;


/*
    HANDLE OLD STYLE STRINGISING

    This routine handles the old style stringising for the definition defn
    for the given macro.  Argument replacement has already been performed
    on defn.  If this facility is enabled then in macro definitions of the
    form:

                #define f( X )  "X"

    quotes are classified as unknown characters rather than string
    terminators.  This means that the X is recognised as a macro parameter
    and is replaced during argument replacement.  The job of this routine
    is to spot these unrecognised quotes and turn them into proper strings.
*/

PPTOKEN *recognise_strings
    PROTO_N ( ( defn, macro, act ) )
    PROTO_T ( PPTOKEN *defn X HASHID macro X int act )
{
    PPTOKEN *this_tok = defn ;
    PPTOKEN *last_tok = defn ;
    while ( this_tok != NULL ) {
        if ( this_tok->tok == lex_unknown ) {
            unsigned long u ;
            int ch = CHAR_SIMPLE ;
            character qo = char_question ;
            u = get_multi_char ( this_tok->pp_data.buff, &ch ) ;
            if ( ch == CHAR_SIMPLE ) qo = ( character ) u ;
            if ( qo == char_quote || qo == char_single_quote ) {
                /* Start of string */
                int t ;
                int escaped = 0 ;
                PPTOKEN *next_tok = this_tok->next ;
                PPTOKEN *ptr_tok = next_tok ;
                while ( ptr_tok != NULL ) {
                    t = ptr_tok->tok ;
                    if ( t == lex_macro_Harg ) {
                        HASHID nm = ptr_tok->pp_data.par.hash ;
                        ERROR err = ERR_cpp_stringize_old ( nm, macro ) ;
                        report ( preproc_loc, err ) ;
                    }
                    if ( escaped ) {
                        escaped = 0 ;
                    } else if ( t == lex_unknown ) {
                        character qc = char_question ;
                        u = get_multi_char ( ptr_tok->pp_data.buff, &ch ) ;
                        if ( ch == CHAR_SIMPLE ) qc = ( character ) u ;
                        if ( qc == qo ) break ;
                        if ( qc == char_backslash ) escaped = 1 ;
                    }
                    ptr_tok = ptr_tok->next ;
                }
                if ( act ) {
                    if ( ptr_tok == NULL ) {
                        /* No closing quote */
                        report ( crt_loc, ERR_cpp_stringize_bad ( macro ) ) ;
                        this_tok->next = NULL ;
                    } else {
                        ptr_tok->tok = lex_ignore_token ;
                        this_tok->next = ptr_tok->next ;
                        ptr_tok->next = NULL ;
                    }

                    /* Form the string */
                    if ( !quote_tok_list ( next_tok, 0, ( int ) qo ) ) {
                        report ( crt_loc, ERR_cpp_stringize_bad ( macro ) ) ;
                    }
                    t = ( qo == char_quote ? lex_string_Hlit : lex_char_Hlit ) ;
                    this_tok->tok = t ;
                    token_parts ( t, this_tok ) ;
                    free_tok_list ( next_tok ) ;

                    /* Check for wide strings */
                    if ( last_tok->tok == lex_identifier ) {
                        string s ;
                        HASHID nm = last_tok->pp_data.id.hash ;
                        s = DEREF_string ( hashid_name_etc_text ( nm ) ) ;
                        if ( s [0] == char_L && s [1] == 0 ) {
                            if ( t == lex_string_Hlit ) {
                                t = lex_wstring_Hlit ;
                            } else {
                                t = lex_wchar_Hlit ;
                            }
                            copy_pptok ( last_tok, t, this_tok ) ;
                            last_tok->next = this_tok->next ;
                            free_pptok ( this_tok ) ;
                            this_tok = last_tok ;
                        }
                    }
                }
            }
        }
        last_tok = this_tok ;
        this_tok = this_tok->next ;
    }
    return ( defn ) ;
}


/*
    HANDLE TOKEN CONCATENATION

    This routine handles any ## operators in the definition defn of the
    given macro.  Note that any initial or terminal ## operators have
    already been reported.
*/

static PPTOKEN *process_concat
    PROTO_N ( ( defn, macro ) )
    PROTO_T ( PPTOKEN *defn X HASHID macro )
{
    PPTOKEN *this_tok ;
    while ( defn && defn->tok == lex_hash_Hhash_Hop ) {
        /* Check for initial ## */
        this_tok = defn ;
        defn = defn->next ;
        free_pptok ( this_tok ) ;
    }
    this_tok = defn ;
    while ( this_tok != NULL ) {
        PPTOKEN *next_tok = this_tok->next ;
        if ( next_tok == NULL ) break ;
        if ( next_tok->tok == lex_hash_Hhash_Hop ) {
            /* Delete the ## */
            this_tok->next = next_tok->next ;
            free_pptok ( next_tok ) ;

            /* Check for terminal ## */
            if ( this_tok->next == NULL ) break ;

            /* Do the token concatenation */
            if ( concat_pptoks ( this_tok, this_tok->next ) ) {
                /* Delete the second argument if successful */
                next_tok = this_tok->next ;
                this_tok->next = next_tok->next ;
                free_pptok ( next_tok ) ;
            } else {
                report ( crt_loc, ERR_cpp_concat_bad ( macro ) ) ;
            }
            /* Now reprocess this_tok */
        } else {
            this_tok = next_tok ;
        }
    }
    return ( defn ) ;
}


/*
    MAXIMUM NUMBER OF MACRO PARAMETERS

    This macro defines the maximum number of macro parameters which
    expand_macro can handle without having to allocate temporary space
    to hold them.  With allocation the number of parameters is unlimited.
*/

#define MAX_MACRO_PARAMS        256


/*
    EXPAND A MACRO DEFINITION

    This routine expands the macro given by the hash table entry macro.
    The argument locs gives a list of locations where macro arguments can
    be read from.  locs will never be NULL.  The argument complete is true
    to indicate that this is a complete macro expansion, and that any
    argument errors should be reported.  If locs contains file_loc then
    complete will always be true.  When reading from file_loc we always
    set in_preproc_dir to 2 to make read_token return lex_eof at the end
    of each file, rather than automatically reverting to the including
    file, and to cause it to ignore any preprocessing directives.

    Note that the entry for the macro in the hash table is marked during
    expansion to prevent recursive expansions.  Several points concerning
    macro expansion are undefined; in this implementation:

        1.  Firstly, # operators are evaluated from left to right;
        2.  Secondly, ## operators are evaluated from left to right;
        3.  If a ## b is not a valid preprocessing token then it is
            resolved to a b;
        4.  A # operator in a function-like macro which is not followed
            by a macro argument is ignored (it is left as # in object-like
            macros of course);
        5.  A ## operator at the start or end of a macro is ignored;
        6.  Any preprocessing directives in the macro arguments are treated
            as normal sequences of preprocessing tokens.

    A further undefined area concerns the ban on recursive macro expansions.
    This is extended from the macro definition itself to any extra tokens
    which are read during the expansion of the macro definition.  For
    example, in:

                    #define f( a )      a * g
                    #define g( a )      f ( a )
                    f ( 2 ) ( 9 )

    the result is '2 * f ( 9 )', rather than '2 * 9 * g'.
*/

PPTOKEN *expand_macro
    PROTO_N ( ( macro, locs, complete ) )
    PROTO_T ( HASHID macro X TOKEN_LOC *locs X int complete )
{
    LOCATION loc ;
    int state = 0 ;
    PPTOKEN *defn ;
    unsigned long sp = 0 ;
    unsigned no_pars = 0 ;
    int have_unknown = 0 ;
    int have_hash_hash = 0 ;
    unsigned long ws = crt_spaces ;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok ;
    PPTOKEN *arg_array_base [ MAX_MACRO_PARAMS + 1 ] ;
    PPTOKEN **arg_array = arg_array_base ;

    /* Get the macro identifier */
    IDENTIFIER id = DEREF_id ( hashid_id ( macro ) ) ;
    unsigned tag = TAG_id ( id ) ;
    DECL_SPEC ds = DEREF_dspec ( id_storage ( id ) ) ;

    /* Mark the macro as being used */
    loc = crt_loc ;
    ds |= dspec_used ;
    COPY_dspec ( id_storage ( id ), ds ) ;
    if ( do_macro && do_usage ) dump_use ( id, &crt_loc, 1 ) ;

    /* Get macro definition and other data */
    if ( tag == id_obj_macro_tag ) {
        /* Object-like macros */
        defn = DEREF_pptok ( id_obj_macro_defn ( id ) ) ;
        if ( defn == NULL ) return ( NULL ) ;

        if ( ds & dspec_builtin ) {
            /* Check built-in macros */
            int t = defn->tok ;
            if ( t == lex_builtin_Hline ) {
                /* Construct an integer literal for __LINE__ */
                BUFFER *bf = clear_buffer ( &token_buff, NIL ( FILE ) ) ;
                bfprintf ( bf, "%lu", loc.line ) ;
                bfputc ( bf, 0 ) ;
                this_tok = new_pptok () ;
                this_tok->tok = lex_integer_Hlit ;
                this_tok->next = NULL ;
                this_tok->pp_opts = NULL ;
                this_tok->pp_space = 0 ;
                token_parts ( lex_integer_Hlit, this_tok ) ;
                return ( this_tok ) ;
            }

            if ( t == lex_builtin_Hfile ) {
                /* Construct a string literal for __FILE__ */
                character c ;
                string fn = DEREF_string ( posn_file ( crt_loc.posn ) ) ;
                BUFFER *bf = clear_buffer ( &token_buff, NIL ( FILE ) ) ;
                while ( c = *( fn++ ), c != 0 ) {
                    if ( c == char_quote || c == char_backslash ) {
                        /* Escape quotes and backslashes */
                        bfputc ( bf, char_backslash ) ;
                    }
                    bfputc ( bf, ( int ) c ) ;
                }
                this_tok = new_pptok () ;
                this_tok->tok = lex_string_Hlit ;
                this_tok->next = NULL ;
                this_tok->pp_opts = NULL ;
                this_tok->pp_space = 0 ;
                token_parts ( lex_string_Hlit, this_tok ) ;
                return ( this_tok ) ;
            }
        }

    } else {
        /* Function-like macros */
        int t ;
        unsigned n ;
        TOKEN_LOC *lc ;
        int brackets = 0 ;
        unsigned no_args = 0 ;
        PPTOKEN *ptr_tok = NULL ;
        TOKEN_LOC *ptr_loc = locs ;

        /* Check for following open bracket */
        for ( ; ; ) {
            if ( ptr_loc == file_loc ) {
                /* Read token from input location */
                int legal = 1 ;
                sp = skip_white ( 1 ) ;
                if ( peek_char ( char_open_round, &legal ) ) {
                    /* Next token in file is '(' */
                    update_column () ;
                    t = lex_open_Hround ;
                } else {
                    /* Other cases */
                    t = lex_unknown ;
                    if ( sp ) patch_white ( sp ) ;
                }
                break ;
            } else if ( ptr_loc == NULL ) {
                /* No more locations */
                t = lex_eof ;
                break ;
            } else {
                /* Read token from current location */
                ptr_tok = ( *( ptr_loc->toks ) )->next ;
                while ( ptr_tok && ptr_tok->tok == lex_ignore_token ) {
                    /* Step over any ignored tokens */
                    ptr_tok = ptr_tok->next ;
                }
                if ( ptr_tok != NULL ) {
                    /* Return the next token */
                    t = ptr_tok->tok ;
                    ptr_tok = ptr_tok->next ;
                    break ;
                }
                /* Move on to next location */
                ptr_loc = ptr_loc->next ;
            }
        }

        /* Next token is not an open bracket */
        if ( t != lex_open_Hround ) {
            if ( complete ) {
                report ( loc, ERR_cpp_replace_arg_none ( macro ) ) ;
            }
            incomplete_macro : {
                /* Return macro identifier */
                this_tok = new_pptok () ;
                this_tok->tok = lex_identifier ;
                this_tok->next = NULL ;
                this_tok->pp_space = 0 ;
                this_tok->pp_data.id.hash = macro ;
                this_tok->pp_data.id.use = id ;
                return ( this_tok ) ;
            }
        }

        /* Check argument array size */
        no_pars = DEREF_unsigned ( id_func_macro_no_params ( id ) ) ;
        if ( no_pars > MAX_MACRO_PARAMS ) {
            arg_array = xmalloc_nof ( PPTOKEN *, no_pars + 1 ) ;
        }

        /* Scan macro arguments */
        for ( ; ; ) {
            /* Get the next token */
            int refill = 0 ;
            for ( ; ; ) {
                if ( ptr_loc == file_loc ) {
                    /* Read token from file location */
                    sp = skip_white ( 1 ) ;
                    in_preproc_dir = 2 ;
                    t = read_token () ;
                    update_column () ;
                    if ( t == lex_hash_H1 || t == lex_hash_H2 ) {
                        if ( sp & WHITE_NEWLINE ) {
                            /* Looks like preprocessing directive */
                            ERROR err = ERR_cpp_replace_arg_ppdir ( macro ) ;
                            report ( crt_loc, err ) ;
                        }
                    }
                    break ;
                } else if ( ptr_loc == NULL ) {
                    /* No more locations to read token from */
                    t = lex_eof ;
                    break ;
                } else {
                    /* Read token from next location */
                    if ( refill ) ptr_tok = ( *( ptr_loc->toks ) )->next ;
                    if ( ptr_tok != NULL ) {
                        t = ptr_tok->tok ;
                        break ;
                    }
                    ptr_loc = ptr_loc->next ;
                    refill = 1 ;
                }
            }

            /* Examine this token */
            if ( t == lex_open_Hround ) {
                brackets++ ;
            } else if ( t == lex_close_Hround ) {
                /* Close brackets mark the end of the argument list */
                if ( brackets == 0 ) break ;
                brackets-- ;
            } else if ( t == lex_comma ) {
                /* Commas mark the end of an argument */
                if ( brackets == 0 ) {
                    this_tok->next = NULL ;
                    no_args++ ;
                    if ( dummy_tok.next ) {
                        dummy_tok.next->pp_space = 0 ;
                    } else if ( complete ) {
                        ERROR err ;
                        err = ERR_cpp_replace_arg_empty ( no_args, macro ) ;
                        report ( crt_loc, err ) ;
                    }
                    if ( no_args <= no_pars ) {
                        arg_array [ no_args ] = dummy_tok.next ;
                    } else {
                        free_tok_list ( dummy_tok.next ) ;
                    }
                    if ( ptr_tok ) ptr_tok = ptr_tok->next ;
                    this_tok = &dummy_tok ;
                    continue ;
                }
            } else if ( t == lex_eof ) {
                break ;
            }

            /* Build up current argument */
            this_tok->next = new_pptok () ;
            this_tok = this_tok->next ;
            if ( ptr_tok ) {
                copy_pptok ( this_tok, t, ptr_tok ) ;
                ptr_tok = ptr_tok->next ;
            } else {
                this_tok->tok = t ;
                if ( t <= LAST_COMPLEX_TOKEN ) token_parts ( t, this_tok ) ;
                this_tok->pp_space = ( sp & WHITE_MASK ) ;
            }
        }

        /* Create last argument */
        in_preproc_dir = 0 ;
        this_tok->next = NULL ;
        if ( no_args || dummy_tok.next ) {
            no_args++ ;
            if ( dummy_tok.next ) {
                dummy_tok.next->pp_space = 0 ;
            } else if ( complete ) {
                ERROR err = ERR_cpp_replace_arg_empty ( no_args, macro ) ;
                report ( crt_loc, err ) ;
            }
            if ( no_args <= no_pars ) {
                arg_array [ no_args ] = dummy_tok.next ;
            } else {
                free_tok_list ( dummy_tok.next ) ;
            }
        }
        if ( sp ) patch_white ( sp ) ;
        this_tok = &dummy_tok ;

        /* Check for incomplete argument lists */
        if ( t == lex_eof ) {
            if ( complete ) {
                /* Report error, but carry on */
                report ( loc, ERR_cpp_replace_arg_eof ( macro ) ) ;
            } else {
                /* Free those arguments actually read */
                for ( n = 1 ; n <= no_args && n <= no_pars ; n++ ) {
                    free_tok_list ( arg_array [n] ) ;
                }
                if ( arg_array != arg_array_base ) xfree_nof ( arg_array ) ;
                goto incomplete_macro ;
            }
        }

        /* Update location pointers */
        if ( ptr_loc ) *( ptr_loc )->toks = ptr_tok ;
        for ( lc = locs ; lc != ptr_loc ; lc = lc->next ) {
            *( lc )->toks = NULL ;
        }

        /* Check that argument and parameter lists match */
        if ( no_pars != no_args ) {
            ERROR err ;
            n = no_args ;
            err = ERR_cpp_replace_arg_number ( macro, n, n, no_pars ) ;
            report ( crt_loc, err ) ;

            /* Add extra arguments if there are not enough */
            for ( n = no_args + 1 ; n <= no_pars ; n++ ) {
                arg_array [n] = NULL ;
            }
        }
        IGNORE check_value ( OPT_VAL_macro_args, ( ulong ) no_args ) ;

        /* Get the macro definition */
        defn = DEREF_pptok ( id_func_macro_defn ( id ) ) ;
    }
    crt_spaces = ws ;

    /* Copy the definition, expanding macro arguments */
    while ( defn != NULL ) {
        int t = defn->tok ;

        if ( t == lex_macro_Harg ) {
            /* Macro argument - identified by argument number */
            unsigned long n = defn->pp_data.par.no ;
            PPTOKEN *arg = arg_array [n] ;

            if ( state == 0 ) {
                if ( defn->next && defn->next->tok == lex_hash_Hhash_Hop ) {
                    /* Preceding ##, just copy argument */
                    this_tok->next = copy_tok_list ( arg ) ;
                } else {
                    /* Normal argument expansion */
                    TOKEN_LOC *arg_locs = NULL ;
                    this_tok->next = expand_toks ( arg, arg_locs, 0 ) ;
                }

            } else if ( state == 1 ) {
                /* Following #, fake reading a string literal */
                this_tok->next = new_pptok () ;
                if ( !quote_tok_list ( arg, 1, char_quote ) ) {
                    report ( crt_loc, ERR_cpp_stringize_bad ( macro ) ) ;
                }
                this_tok->next->tok = lex_string_Hlit ;
                token_parts ( lex_string_Hlit, this_tok->next ) ;
                this_tok->next->next = NULL ;
                this_tok->next->pp_space = 0 ;

            } else {
                /* Following ##, just copy argument */
                this_tok->next = copy_tok_list ( arg ) ;
            }

            sp = defn->pp_space ;
            if ( sp && this_tok->next ) {
                this_tok->next->pp_space = sp ;
                sp = 0 ;
            }
            while ( this_tok->next ) this_tok = this_tok->next ;
            state = 0 ;

        } else if ( t == lex_hash_Hop ) {
            /* Check for # operator */
            state = 1 ;

        } else if ( t != lex_ignore_token ) {
            /* Copy other tokens */
            this_tok->next = new_pptok () ;
            this_tok = this_tok->next ;
            copy_pptok ( this_tok, t, defn ) ;
            if ( sp ) {
                this_tok->pp_space = sp ;
                sp = 0 ;
            }
            if ( t == lex_hash_Hhash_Hop ) {
                /* Check for ## operator */
                have_hash_hash = 1 ;
                state = 2 ;
            } else {
                if ( t == lex_unknown ) have_unknown = 1 ;
                state = 0 ;
            }
        }
        defn = defn->next ;
    }
    this_tok->next = NULL ;
    defn = dummy_tok.next ;

    /* Allow for argument expansion in strings */
    if ( have_unknown ) defn = recognise_strings ( defn, macro, 1 ) ;

    /* Rescan for ## directives */
    if ( have_hash_hash ) defn = process_concat ( defn, macro ) ;

    /* Rescan for further expansion (but not expanding macro) */
    COPY_dspec ( id_storage ( id ), ( ds | dspec_temp ) ) ;
    this_tok = expand_toks ( defn, locs, complete ) ;
    free_tok_list ( defn ) ;
    defn = this_tok ;
    COPY_dspec ( id_storage ( id ), ds ) ;

    /* Clean up after macro expansion */
    if ( tag == id_func_macro_tag ) {
        /* Free the macro arguments */
        unsigned n ;
        for ( n = 1 ; n <= no_pars ; n++ ) free_tok_list ( arg_array [n] ) ;
        if ( arg_array != arg_array_base ) xfree_nof ( arg_array ) ;
    }

    /* Return the result */
    return ( defn ) ;
}


/*
    EXPAND A LIST OF TOKENS

    This is the main macro expansion routine.  It expands the list of macros
    tok, returning the result.  If toks ends in an unterminated function-like
    macro then further tokens may be read from the locations given in locs.
    The complete argument is as in expand_macro.
*/

static PPTOKEN *expand_toks
    PROTO_N ( ( toks, locs, complete ) )
    PROTO_T ( PPTOKEN *toks X TOKEN_LOC *locs X int complete )
{
    PPTOKEN *ptr_tok ;
    unsigned long sp = 0 ;
    PPTOKEN dummy_tok, *this_tok = &dummy_tok ;

    /* Copy list of tokens */
    for ( ptr_tok = toks ; ptr_tok != NULL ; ptr_tok = ptr_tok->next ) {
        int t = ptr_tok->tok ;
        if ( t == lex_ignore_token ) {
            sp |= ptr_tok->pp_space ;
            continue ;
        }
        this_tok->next = new_pptok () ;
        this_tok = this_tok->next ;
        copy_pptok ( this_tok, t, ptr_tok ) ;
        if ( sp ) {
            this_tok->pp_space |= sp ;
            sp = 0 ;
        }

        /* Check for macros */
        if ( t == lex_identifier ) {
            HASHID m = ptr_tok->pp_data.id.hash ;
            IDENTIFIER id = DEREF_id ( hashid_id ( m ) ) ;
            unsigned tag = TAG_id ( id ) ;
            switch ( tag ) {
                case id_obj_macro_tag :
                case id_func_macro_tag : {
                    DECL_SPEC ds ;
                    TOKEN_LOC tloc ;

                    /* Check for non-expanding tokens */
                    if ( IS_NULL_id ( this_tok->pp_data.id.use ) ) {
                        break ;
                    }

                    /* Check for recursive macro definitions */
                    ds = DEREF_dspec ( id_storage ( id ) ) ;
                    if ( ds & dspec_temp ) {
                        /* Mark this token as non-expanding */
                        ERROR err = ERR_cpp_rescan_recursive ( m ) ;
                        report ( crt_loc, err ) ;
                        this_tok->pp_data.id.use = NULL_id ;
                        break ;
                    }

                    /* Expand the macro using an extra location */
                    tloc.toks = &ptr_tok ;
                    tloc.next = locs ;
                    this_tok->tok = lex_ignore_token ;
                    this_tok->next = expand_macro ( m, &tloc, complete ) ;
                    while ( this_tok->next ) this_tok = this_tok->next ;
                    break ;
                }
            }
            if ( ptr_tok == NULL ) break ;
        }
    }
    this_tok->next = NULL ;
    return ( dummy_tok.next ) ;
}


/*
    EXPAND A SIMPLE LIST OF TOKENS

    This routine is the simplest form of expand_toks, where toks is a
    complete list, with no locations for reading further tokens.
*/

PPTOKEN *expand_tok_list
    PROTO_N ( ( toks ) )
    PROTO_T ( PPTOKEN *toks )
{
    return ( expand_toks ( toks, NIL ( TOKEN_LOC ), 1 ) ) ;
}


/*
    ASSERTION NAMESPACE

    The assertions occupy a namespace distinct from all other namespaces,
    including the macro namespace.
*/

NAMESPACE assert_namespace ;


/*
    CREATE A BUILT-IN MACRO

    This routine creates a built-in macro named nm defined by a single
    preprocessing token with token type t and associated data d.
*/

static void builtin_macro
    PROTO_N ( ( nm, t, d ) )
    PROTO_T ( CONST char *nm X int t X CONST char *d )
{
    if ( d ) {
        IDENTIFIER id ;
        string s = ustrlit ( nm ) ;
        unsigned long h = hash ( s ) ;
        HASHID macro = lookup_name ( s, h, 0, lex_identifier ) ;
        IDENTIFIER pid = DEREF_id ( hashid_id ( macro ) ) ;
        DECL_SPEC ds = ( dspec_defn | dspec_builtin ) ;

        /* Set up the token definition */
        PPTOKEN *p = new_pptok () ;
        p->tok = t ;
        p->pp_space = 0 ;
        p->pp_opts = NULL ;
        p->next = NULL ;
        if ( t == lex_integer_Hlit ) {
            /* Set up associated integer data */
            string c = xustrcpy ( ustrlit ( d ) ) ;
            p->pp_data.text = c ;
        } else if ( t == lex_string_Hlit ) {
            /* Set up associated string data */
            string c = xustrcpy ( ustrlit ( d ) ) ;
            p->pp_data.str.start = c ;
            p->pp_data.str.end = c + ustrlen ( c ) ;
        } else if ( t == lex_builtin_Hline || t == lex_builtin_Hfile ) {
            /* Set up associated location data */
            p->pp_space = crt_loc.column ;
            p->pp_data.loc.line = crt_loc.line ;
            p->pp_data.loc.posn = crt_loc.posn ;
        }

        /* Define the macro */
        MAKE_id_obj_macro ( macro, ds, NULL_nspace, crt_loc, p, id ) ;
        COPY_id ( id_alias ( id ), pid ) ;
        COPY_id ( hashid_id ( macro ), id ) ;
        if ( do_macro ) dump_declare ( id, &crt_loc, 1 ) ;
    }
    return ;
}


/*
    INITIALISE BUILT-IN MACROS

    This routine initialises the built-in macros, and sets up the assertion
    namespace.
*/

void init_macros
    PROTO_N ( ( m, a ) )
    PROTO_T ( int m X int a )
{
    CONST char *d = find_date ( "%s %2d %d" ) ;
    CONST char *t = find_time ( "%.2d:%.2d:%.2d" ) ;
    if ( m ) {
        /* Define built-in macros */
        builtin_macro ( "__LINE__", lex_builtin_Hline, "1" ) ;
        builtin_macro ( "__FILE__", lex_builtin_Hfile, "<unknown>" ) ;
        builtin_macro ( "__DATE__", lex_string_Hlit, d ) ;
        builtin_macro ( "__TIME__", lex_string_Hlit, t ) ;
        builtin_macro ( "__STDC__", lex_integer_Hlit, C_VERSION ) ;
        builtin_macro ( "__STDC_VERSION__", lex_integer_Hlit, ISOC_VERSION ) ;
#if LANGUAGE_CPP
        builtin_macro ( "__cplusplus", lex_integer_Hlit, CPP_VERSION ) ;
        builtin_macro ( "__tcpplus", lex_integer_Hlit, "1" ) ;
#else
        builtin_macro ( "__tcpplus", lex_integer_Hlit, "0" ) ;
#endif
    }
    assert_namespace = make_global_nspace ( "<assert>", 20 ) ;
    if ( a ) {
        /* Define built-in assertions */
        IGNORE make_assert ( KEYWORD ( lex_include ), lex_include ) ;
        IGNORE make_assert ( KEYWORD ( lex_keyword ), lex_keyword ) ;
        IGNORE make_assert ( KEYWORD ( lex_option ), lex_option ) ;
    }
    return ;
}
Subversion Repositories tendra.SVN

(root)/branches/tendra5/src/producers/common/parse/macro.c – Rev 5