WebSVN – tendra.SVN – /branches/tendra5/src/producers/common/parse/literal.c

/*
                 Crown Copyright (c) 1997, 1998
    
    This TenDRA(r) Computer Program is subject to Copyright
    owned by the United Kingdom Secretary of State for Defence
    acting through the Defence Evaluation and Research Agency
    (DERA).  It is made available to Recipients with a
    royalty-free licence for its use, reproduction, transfer
    to other parties and amendment for any purpose not excluding
    product development provided that any such use et cetera
    shall be deemed to be acceptance of the following conditions:-
    
        (1) Its Recipients shall ensure that this Notice is
        reproduced upon any copies or amended versions of it;
    
        (2) Any amended version of it shall be clearly marked to
        show both the nature of and the organisation responsible
        for the relevant amendment or amendments;
    
        (3) Its onward transfer from a recipient to another
        party shall be deemed to be that party's acceptance of
        these conditions;
    
        (4) DERA gives no warranty or assurance as to its
        quality or suitability for any purpose and DERA accepts
        no liability whatsoever in relation to any use to which
        it may be put.
*/


#include "config.h"
#include <limits.h>
#include "c_types.h"
#include "exp_ops.h"
#include "flt_ops.h"
#include "id_ops.h"
#include "itype_ops.h"
#include "nat_ops.h"
#include "str_ops.h"
#include "type_ops.h"
#include "error.h"
#include "catalog.h"
#include "option.h"
#include "basetype.h"
#include "cast.h"
#include "char.h"
#include "chktype.h"
#include "constant.h"
#include "convert.h"
#include "dump.h"
#include "exception.h"
#include "expression.h"
#include "hash.h"
#include "inttype.h"
#include "lex.h"
#include "literal.h"
#include "preproc.h"
#include "syntax.h"
#include "tok.h"
#include "token.h"
#include "ustring.h"
#include "xalloc.h"


/*
    SPECIAL TABLE VALUES

    These macros are used in the tables of digits and escape sequences
    to indicate special values.
*/

#define NONE                    0xff
#define OCTE                    0xfe
#define HEXE                    0xfd
#define UNI4                    0xfc
#define UNI8                    0xfb


/*
    TABLE OF DIGITS

    This table gives the mapping of characters to digits.  The default
    table assumes the ASCII character set, for other codesets it needs
    to be rewritten.  The valid digits are 0-9, A-Z (which evaluate to
    10-35) and a-z (which evaluate to 10-35).  Invalid digits are
    indicated by NONE.
*/

unsigned char digit_values [ NO_CHAR + 1 ] = {
#define CHAR_DATA( A, B, C, D ) ( B ),
#include "char.h"
#undef CHAR_DATA
    NONE                /* dummy */
} ;


/*
    TABLE OF ESCAPE SEQUENCES

    This table gives the mapping of characters to escape sequences.  The
    default table assumes the ASCII character set, for other codesets it
    needs to be rewritten.  The valid escape sequences are \', \", \?,
    \\, \a, \b, \f, \n, \r, \t and \v.  Octal escape sequences are
    indicated by OCTE, hexadecimal escape sequences by HEXE, universal
    character names by UNI4 or UNI8, and illegal escape sequences by
    NONE.
*/

unsigned char escape_sequences [ NO_CHAR + 1 ] = {
#define CHAR_DATA( A, B, C, D ) ( C ),
#include "char.h"
#undef CHAR_DATA
    NONE                /* dummy */
} ;


/*
    SET AN ESCAPE SEQUENCE

    This routine sets the character escape value for the character
    literal expression a to be the character literal b, or an illegal
    escape if b is the null expression.
*/

void set_escape
    PROTO_N ( ( a, b ) )
    PROTO_T ( EXP a X EXP b )
{
    int c = get_char_value ( a ) ;
    int e = NONE ;
    if ( !IS_NULL_exp ( b ) ) {
        e = get_char_value ( b ) ;
        if ( e == char_illegal ) e = NONE ;
    }
    if ( c >= 0 && c < NO_CHAR ) {
        escape_sequences [c] = ( unsigned char ) e ;
    }
    return ;
}


/*
    CHECK A STRING OF DIGITS

    This routine scans the string s for valid digits for the given base.
    It returns a pointer to the first character which is not a valid
    digit.
*/

static string check_digits
    PROTO_N ( ( s, base ) )
    PROTO_T ( string s X unsigned base )
{
    unsigned b ;
    character c ;
    while ( c = *s, c != 0 ) {
#if FS_EXTENDED_CHAR
        if ( IS_EXTENDED ( c ) ) break ;
#endif
        b = ( unsigned ) digit_values [c] ; ;
        if ( b >= base ) break ;
        s++ ;
    }
    return ( s ) ;
}


/*
    EVALUATE A STRING OF DIGITS

    This routine evaluates the string of digits starting with s and
    ending with t using the given base (which will be at most 16).  It
    is assumed that all of these digits are in the correct range.
*/

static NAT eval_digits
    PROTO_N ( ( s, t, base ) )
    PROTO_T ( string s X string t X unsigned base )
{
    NAT n ;
    int m = 0 ;
    string r = s ;
    unsigned long v = 0 ;
    unsigned long b = ( unsigned long ) base ;
    while ( r != t && m < 8 ) {
        /* Evaluate first few digits */
        unsigned long d = ( unsigned long ) digit_values [ *r ] ;
        v = b * v + d ;
        m++ ;
        r++ ;
    }
    n = make_nat_value ( v ) ;
    while ( r != t ) {
        /* Evaluate further digits */
        unsigned d = ( unsigned ) digit_values [ *r ] ;
        n = make_nat_literal ( n, base, d ) ;
        r++ ;
    }
    return ( n ) ;
}


/*
    EVALUATE A STRING OF DIGITS

    This routine is the same as eval_digits except that it assumes that
    the result fits inside an unsigned long, and reports an error
    otherwise.
*/

static unsigned long eval_char_digits
    PROTO_N ( ( s, t, base ) )
    PROTO_T ( string s X string t X unsigned base )
{
    string r ;
    int overflow = 0 ;
    unsigned long n = 0 ;
    unsigned long b = ( unsigned long ) base ;
    for ( r = s ; r != t ; r++ ) {
        unsigned long m = n ;
        n = b * n + ( unsigned long ) digit_values [ *r ] ;
        if ( n < m ) overflow = 1 ;
    }
    if ( overflow ) report ( crt_loc, ERR_lex_ccon_large () ) ;
    return ( n ) ;
}


/*
    EVALUATE A LINE NUMBER

    This routine evaluates the sequence of decimal digits s as a line
    number in a #line, or similar, preprocessing directive.  Any errors
    arising are indicated using err.  This is a bit pattern consisting
    of 2 if s is not a simple string of decimal digits, and 1 if its
    value exceeds 32767.
*/

unsigned long eval_line_digits
    PROTO_N ( ( s, err ) )
    PROTO_T ( string s X unsigned *err )
{
    string r ;
    unsigned e = 0 ;
    unsigned long n = 0 ;
    string t = check_digits ( s, ( unsigned ) 10 ) ;
    if ( *t ) e = 2 ;
    for ( r = s ; r != t ; r++ ) {
        n = 10 * n + ( unsigned long ) digit_values [ *r ] ;
        if ( n > 0x7fff ) e |= 1 ;
    }
    *err = e ;
    return ( n ) ;
}


/*
    STRING HASH TABLE

    This variable gives the hash table used in shared string literals.
*/

static STRING *string_hash_table = NULL ;
#define HASH_STRING_SIZE        ( ( unsigned long ) 256 )


/*
    STRING AND CHARACTER LITERAL TYPES

    The type of a simple character literal is char in C++, but int in C.
    The variable type_char_lit is used to hold the appropriate result
    type.  Other string and character literals have fixed types, however
    for convenience variables are used to identify them.
*/

static TYPE type_char_lit ;
static TYPE type_mchar_lit ;
static TYPE type_wchar_lit ;
static TYPE type_string_lit ;
static TYPE type_wstring_lit ;
CV_SPEC cv_string = cv_none ;


/*
    SET THE CHARACTER LITERAL TYPE

    This routine sets the type of a character literal to be t.  t must be
    an integral type.  Note that only the representation type is set to t,
    the semantic type is always char.
*/

void set_char_lit
    PROTO_N ( ( t ) )
    PROTO_T ( TYPE t )
{
    if ( IS_type_integer ( t ) ) {
        INT_TYPE r = DEREF_itype ( type_integer_rep ( t ) ) ;
        INT_TYPE s = DEREF_itype ( type_integer_rep ( type_char ) ) ;
        type_char_lit = make_itype ( r, s ) ;
    } else {
        report ( preproc_loc, ERR_pragma_char_lit ( t ) ) ;
    }
    return ;
}


/*
    TABLE OF INTEGER LITERAL SPECIFICATIONS

    The type LITERAL_INFO is used to represent an item in an integer
    literal type specification.  The table int_lit_spec holds the
    specifications for the various combinations of base and suffix.
*/

typedef struct lit_info_tag {
    int tag ;
    TYPE type ;
    NAT bound ;
    IDENTIFIER tok ;
    int tok_no ;
    int opt ;
    struct lit_info_tag *next ;
} LITERAL_INFO ;

static LITERAL_INFO *int_lit_spec [ BASE_NO ] [ SUFFIX_NO ] = {
    { NULL, NULL, NULL, NULL, NULL, NULL },
    { NULL, NULL, NULL, NULL, NULL, NULL },
    { NULL, NULL, NULL, NULL, NULL, NULL }
} ;

static LITERAL_INFO *crt_int_lit = NULL ;
static LITERAL_INFO **ptr_int_lit = NULL ;


/*
    TABLE OF BUILT-IN INTEGER LITERAL SPECIFICATIONS

    This table gives the possible types and built-in tokens for the
    various base and suffix combinations.
*/

static struct {
    unsigned char type [6] ;
    int tok ;
    LIST ( TYPE ) cases ;
} int_lit_tok [ BASE_NO ] [ SUFFIX_NO ] = {
    {
        { { 2, 0, 2, 2, 1, 1 }, TOK_lit_int, NULL_list ( TYPE ) },
        { { 0, 2, 0, 2, 0, 1 }, TOK_lit_unsigned, NULL_list ( TYPE ) },
        { { 0, 0, 2, 2, 1, 1 }, TOK_lit_long, NULL_list ( TYPE ) },
        { { 0, 0, 0, 2, 0, 1 }, TOK_lit_ulong, NULL_list ( TYPE ) },
        { { 0, 0, 0, 0, 2, 2 }, TOK_lit_llong, NULL_list ( TYPE ) },
        { { 0, 0, 0, 0, 0, 2 }, TOK_lit_ullong, NULL_list ( TYPE ) }
    },
    {
        { { 2, 2, 2, 2, 1, 1 }, TOK_lit_hex, NULL_list ( TYPE ) },
        { { 0, 2, 0, 2, 0, 1 }, TOK_lit_unsigned, NULL_list ( TYPE ) },
        { { 0, 0, 2, 2, 1, 1 }, TOK_lit_long, NULL_list ( TYPE ) },
        { { 0, 0, 0, 2, 0, 1 }, TOK_lit_ulong, NULL_list ( TYPE ) },
        { { 0, 0, 0, 0, 2, 2 }, TOK_lit_llong, NULL_list ( TYPE ) },
        { { 0, 0, 0, 0, 0, 2 }, TOK_lit_ullong, NULL_list ( TYPE ) }
    },
    {
        { { 2, 2, 2, 2, 1, 1 }, TOK_lit_hex, NULL_list ( TYPE ) },
        { { 0, 2, 0, 2, 0, 1 }, TOK_lit_unsigned, NULL_list ( TYPE ) },
        { { 0, 0, 2, 2, 1, 1 }, TOK_lit_long, NULL_list ( TYPE ) },
        { { 0, 0, 0, 2, 0, 1 }, TOK_lit_ulong, NULL_list ( TYPE ) },
        { { 0, 0, 0, 0, 2, 2 }, TOK_lit_llong, NULL_list ( TYPE ) },
        { { 0, 0, 0, 0, 0, 2 }, TOK_lit_ullong, NULL_list ( TYPE ) }
    }
} ;


/*
    INITIALISE TABLE OF INTEGER LITERAL TYPES

    This routine initialises the string and character literal types and
    the table int_lit_info.  The initial values for the table are given
    by the following lists of types:

        decimal:        ( int, long, unsigned long ),
        octal/hex:      ( int, unsigned, long, unsigned long ),
        U suffix:       ( unsigned, unsigned long ),
        L suffix:       ( long, unsigned long ),
        UL suffix:      ( unsigned long ),
        LL suffix:      ( long long, unsigned long long ),
        ULL suffix:     ( unsigned long long ).

    Each integer literal is checked against each type in the list
    indicated by the form of the literal.  If it fits into a type then
    that is the type of the literal.  If it does not fit into any type
    then an error is raised.  If whether it fits into a particular type
    is target dependent then a literal integer type, giving the literal
    value and a list of possible types, is constructed to express the
    result type.

    The string and character types are:

        character:              char,
        multi-character:        int,
        wide character:         wchar_t,
        string:                 const char [n],
        wide string:            const wchar_t [n].

    Variants are that characters have type int in C and that string
    literals are not const in pre-ISO C++ and C.
*/

void init_literal
    PROTO_Z ()
{
    int b, s ;
    BUILTIN_TYPE n ;
    OPTION opt = option ( OPT_int_overflow ) ;
    ASSERT ( !IS_NULL_type ( type_char ) ) ;

    /* String and character literal types */
    type_mchar_lit = type_sint ;
    type_wchar_lit = type_wchar_t ;
    type_string_lit = type_char ;
    type_wstring_lit = type_wchar_t ;
#if LANGUAGE_CPP
    set_char_lit ( type_char ) ;
    set_string_qual ( cv_const ) ;
#else
    set_char_lit ( type_sint ) ;
    set_string_qual ( cv_none ) ;
#endif

    /* Set up type lists */
    for ( b = 0 ; b < BASE_NO ; b++ ) {
        for ( s = 0 ; s < SUFFIX_NO ; s++ ) {
            LIST ( TYPE ) p = NULL_list ( TYPE ) ;
            begin_literal ( b, s ) ;
            for ( n = 0 ; n < 6 ; n++ ) {
                if ( int_lit_tok [b] [s].type [n] == 2 ) {
                    TYPE t = type_builtin [ ntype_sint + n ] ;
                    add_range_literal ( NULL_exp, 1 ) ;
                    add_type_literal ( t ) ;
                    CONS_type ( t, p, p ) ;
                }
            }
            add_range_literal ( NULL_exp, 0 ) ;
            add_token_literal ( NULL_id, ( unsigned ) opt ) ;
            p = REVERSE_list ( p ) ;
            int_lit_tok [b] [s].cases = uniq_type_set ( p ) ;
        }
    }

    /* Set up string hash table */
    if ( string_hash_table == NULL ) {
        unsigned long i ;
        STRING *q = xmalloc_nof ( STRING, HASH_STRING_SIZE ) ;
        for ( i = 0 ; i < HASH_STRING_SIZE ; i++ ) q [i] = NULL_str ;
        string_hash_table = q ;
    }
    return ;
}


/*
    SET THE CV-QUALIFIERS FOR A STRING LITERAL

    This routine sets the string and wide string literal types to be
    cv-qualified.
*/

void set_string_qual
    PROTO_N ( ( cv ) )
    PROTO_T ( CV_SPEC cv )
{
    type_string_lit = qualify_type ( type_string_lit, cv, 0 ) ;
    type_wstring_lit = qualify_type ( type_wstring_lit, cv, 0 ) ;
    cv_string = cv ;
    return ;
}


/*
    BEGIN A LITERAL SPECIFICATION DEFINITION

    This routine is called to begin the specification of the integer
    literals of the given base and suffix.
*/

void begin_literal
    PROTO_N ( ( base, suff ) )
    PROTO_T ( int base X int suff )
{
    LITERAL_INFO **p = &( int_lit_spec [ base ] [ suff ] ) ;
    *p = NULL ;
    ptr_int_lit = p ;
    crt_int_lit = NULL ;
    return ;
}


/*
    ADD A BOUND TO A LITERAL SPECIFICATION

    This routine is used to specify a bound in the current literal
    specification.  If n is 0 then the bound matches all values, if it
    is 1 then the bound matches all the values in the following type,
    and if it is 2 then the bound matches all values less than or equal
    to the integer literal expression e.
*/

void add_range_literal
    PROTO_N ( ( e, n ) )
    PROTO_T ( EXP e X int n )
{
    LITERAL_INFO *p = xmalloc_one ( LITERAL_INFO ) ;
    p->tag = n ;
    if ( !IS_NULL_exp ( e ) && IS_exp_int_lit ( e ) ) {
        p->bound = DEREF_nat ( exp_int_lit_nat ( e ) ) ;
    } else {
        p->bound = small_nat [0] ;
    }
    p->type = NULL_type ;
    p->tok = NULL_id ;
    p->tok_no = -1 ;
    p->opt = OPT_none ;
    p->next = NULL ;
    *ptr_int_lit = p ;
    crt_int_lit = p ;
    ptr_int_lit = &( p->next ) ;
    return ;
}


/*
    ADD A TYPE TO A LITERAL SPECIFICATION

    This routine specifies the type t for all values under the current
    bound in the current literal specification.
*/

void add_type_literal
    PROTO_N ( ( t ) )
    PROTO_T ( TYPE t )
{
    NAT n ;
    LITERAL_INFO *p = crt_int_lit ;
    if ( IS_type_integer ( t ) ) {
        if ( !is_arg_promote ( t ) ) {
            /* Type should promote to itself */
            report ( preproc_loc, ERR_pragma_lit_type ( t ) ) ;
            t = promote_type ( t ) ;
        }
    } else {
        /* Type should be integral */
        if ( !IS_type_error ( t ) ) {
            report ( preproc_loc, ERR_pragma_lit_type ( t ) ) ;
        }
        t = type_ulong ;
    }
    p->type = qualify_type ( t, cv_none, 0 ) ;
    n = p->bound ;
    if ( p->tag == 2 ) {
        if ( check_nat_range ( t, n ) != 0 ) {
            /* Given bound should fit into type */
            report ( preproc_loc, ERR_pragma_lit_range ( n, t ) ) ;
            n = max_type_value ( t, 0 ) ;
        }
    } else {
        n = max_type_value ( t, 0 ) ;
    }
    p->bound = n ;
    return ;
}


/*
    ADD A TOKEN TO A LITERAL SPECIFICATION

    This routine specifies that the token id should be used to calculate
    the type for all values under the current bound in the current
    literal specification.  An error with severity sev is reported.
*/

void add_token_literal
    PROTO_N ( ( id, sev ) )
    PROTO_T ( IDENTIFIER id X unsigned sev )
{
    int n = -1 ;
    LITERAL_INFO *p = crt_int_lit ;
    if ( !IS_NULL_id ( id ) ) {
        id = resolve_token ( id, "ZZ", 0 ) ;
        if ( !IS_NULL_id ( id ) ) n = builtin_token ( id ) ;
    }
    if ( p->tag == 1 ) {
        report ( preproc_loc, ERR_pragma_lit_question () ) ;
        p->tag = 3 ;
    }
    p->tok = id ;
    p->tok_no = n ;
    switch ( sev ) {
        case OPTION_ON : p->opt = OPT_error ; break ;
        case OPTION_WARN : p->opt = OPT_warning ; break ;
        default : p->opt = OPT_none ; break ;
    }
    return ;
}


/*
    FIND AN INTEGER LITERAL TYPE

    This routine finds the type of the integer constant lit specified
    with base base and suffix suff.  num gives the text used to specify
    the constant for the purposes of error reporting.  fit is set to
    true if lit definitely fits into the result.
*/

TYPE find_literal_type
    PROTO_N ( ( lit, base, suff, num, fit ) )
    PROTO_T ( NAT lit X int base X int suff X string num X int *fit )
{
    TYPE t ;
    int tok ;
    INT_TYPE it ;
    int big = 0 ;
    int have_tok = 0 ;
    int opt = OPT_error ;
    NAT n = small_nat [0] ;
    IDENTIFIER tid = NULL_id ;
    LIST ( TYPE ) qt = NULL_list ( TYPE ) ;
    LITERAL_INFO *pt = int_lit_spec [ base ] [ suff ] ;

    /* Deal with calculated literals */
    switch ( TAG_nat ( lit ) ) {
        case nat_neg_tag : {
            lit = DEREF_nat ( nat_neg_arg ( lit ) ) ;
            t = find_literal_type ( lit, base, suff, num, fit ) ;
            return ( t ) ;
        }
        case nat_token_tag : {
            t = type_sint ;
            *fit = 1 ;
            return ( t ) ;
        }
        case nat_calc_tag : {
            EXP e = DEREF_exp ( nat_calc_value ( lit ) ) ;
            t = DEREF_type ( exp_type ( e ) ) ;
            *fit = 1 ;
            return ( t ) ;
        }
    }

    /* Deal with simple literals */
    while ( pt != NULL ) {
        int ch = 4 ;
        TYPE s = pt->type ;
        switch ( pt->tag ) {
            case 0 : {
                TYPE r = s ;
                if ( IS_NULL_type ( r ) ) r = type_ulong ;
                ch = check_nat_range ( r, lit ) ;
                if ( ch == 0 ) *fit = 1 ;
                if ( ch > 4 ) big = ch ;
                if ( big ) n = max_type_value ( NULL_type, 0 ) ;
                ch = big ;
                break ;
            }
            case 1 : {
                n = pt->bound ;
                ch = check_nat_range ( s, lit ) ;
                if ( ch == 0 ) *fit = 1 ;
                break ;
            }
            case 2 : {
                n = pt->bound ;
                if ( compare_nat ( n, lit ) >= 0 ) {
                    if ( !IS_NULL_type ( s ) ) *fit = 1 ;
                    ch = 0 ;
                }
                break ;
            }
        }
        if ( ch == 0 ) {
            /* lit definitely fits into bound */
            if ( !IS_NULL_type ( s ) && IS_NULL_list ( qt ) ) {
                /* No previous fit */
                return ( s ) ;
            }
        }
        if ( ch <= 2 ) {
            /* lit may fit into bound */
            if ( !IS_NULL_type ( s ) ) {
                if ( have_tok == 0 ) {
                    INT_TYPE is = DEREF_itype ( type_integer_rep ( s ) ) ;
                    LIST ( TYPE ) st = DEREF_list ( itype_cases ( is ) ) ;
                    qt = union_type_set ( qt, st ) ;
                    if ( ch == 0 ) have_tok = 1 ;
                }
            } else {
                if ( have_tok == 0 && !IS_NULL_id ( pt->tok ) ) {
                    DESTROY_list ( qt, SIZE_type ) ;
                    tok = int_lit_tok [ base ] [ suff ].tok ;
                    if ( pt->tok_no == tok ) {
                        qt = int_lit_tok [ base ] [ suff ].cases ;
                    } else if ( suff < SUFFIX_LL ) {
                        qt = all_prom_types ;
                    } else {
                        qt = all_llong_types ;
                    }
                    have_tok = 2 ;
                }
                break ;
            }
        }
        if ( ch > 4 ) big = ch ;
        pt = pt->next ;
    }

    /* Tokenised result */
    if ( have_tok != 2 ) {
        /* Find list of possible types */
        if ( IS_NULL_list ( qt ) ) {
            qt = int_lit_tok [ base ] [ suff ].cases ;
        } else {
            qt = REVERSE_list ( qt ) ;
            qt = uniq_type_set ( qt ) ;
        }
    }
    if ( pt ) {
        /* Get token information from table */
        tid = pt->tok ;
        opt = pt->opt ;
    }
    if ( num && !( *fit ) ) {
        /* Report error if necessary */
        ERROR err = ERR_lex_icon_large ( num, n ) ;
        err = set_severity ( err, opt, 0 ) ;
        if ( !IS_NULL_err ( err ) ) report ( crt_loc, err ) ;
    }
    if ( LENGTH_list ( qt ) == 1 ) {
        /* Only one possible case */
        t = DEREF_type ( HEAD_list ( qt ) ) ;
        DESTROY_list ( qt, SIZE_type ) ;
        return ( t ) ;
    }
    tok = int_lit_tok [ base ] [ suff ].tok ;
    MAKE_itype_literal ( NULL_type, qt, lit, tok, base, suff, tid, it ) ;
    t = promote_itype ( it, it ) ;
    return ( t ) ;
}


/*
    ANALYSE AN INTEGER OR FLOATING LITERAL

    This routine analyses the integer or floating literal given by the
    string str, constructing the corresponding expression.  The location
    given by ptok is assigned with lex_integer_Hexp or lex_floating_Hexp
    depending on the form of the literal.  Note that str can be an area
    of read-only memory for integer literals, but not for floating
    literals.
*/

EXP make_literal_exp
    PROTO_N ( ( str, ptok, force ) )
    PROTO_T ( string str X int *ptok X int force )
{
    EXP e ;
    string r ;
    int err = 0 ;
    int flt = 0 ;
    string s = str ;
    unsigned base = 10 ;
    string dot_posn = NULL ;
    string exp_posn = NULL ;
    int form = BASE_DECIMAL ;

    /* Check small literals */
    character c1 = s [0] ;
    character c2 = 0 ;
    if ( c1 ) c2 = s [1] ;
    if ( c2 == 0 && ( c1 >= char_zero && c1 <= char_nine ) ) {
        unsigned etag = exp_int_lit_tag ;
        int n = ( int ) digit_values [ c1 ] ;
        NAT lit = small_nat [n] ;
        if ( IS_NULL_nat ( lit ) ) lit = make_small_nat ( n ) ;
        if ( n == 0 ) etag = exp_null_tag ;
        MAKE_exp_int_lit ( type_sint, lit, etag, e ) ;
        *ptok = lex_integer_Hexp ;
        return ( e ) ;
    }

    if ( c1 == char_zero && ( c2 == char_x || c2 == char_X ) ) {
        /* Hexadecimal integer */
        base = 16 ;
        form = BASE_HEXADECIMAL ;
        r = s + 2 ;
        s = check_digits ( r, base ) ;
        if ( s == r ) err = 1 ;
    } else {
        if ( c1 == char_dot ) {
            /* Fractional component of floating literal */
            dot_posn = s ;
            r = s + 1 ;
            s = check_digits ( r, base ) ;
            if ( s == r ) err = 1 ;
            flt = 1 ;
        } else {
            /* Sequence of decimal digits */
            r = s ;
            s = check_digits ( r, base ) ;
            if ( s == r ) {
                if ( c1 == char_plus || c1 == char_minus ) {
                    /* Extension to handle signs */
                    e = make_literal_exp ( str + 1, ptok, force ) ;
                    if ( c1 == char_minus ) {
                        e = make_uminus_exp ( lex_minus, e ) ;
                    }
                    return ( e ) ;
                }
                err = 1 ;
            }
            if ( s [0] == char_dot ) {
                /* Fractional component of floating literal */
                dot_posn = s ;
                s = check_digits ( s + 1, base ) ;
                flt = 1 ;
            }
        }
        exp_posn = s ;
        c2 = s [0] ;
        if ( c2 == char_e || c2 == char_E ) {
            /* Exponent component of floating literal */
            c2 = s [1] ;
            if ( c2 == char_plus || c2 == char_minus ) s++ ;
            r = s + 1 ;
            s = check_digits ( r, base ) ;
            if ( s == r ) err = 1 ;
            flt = 1 ;
        }
        if ( c1 == char_zero && !flt ) {
            /* Octal integer */
            base = 8 ;
            form = BASE_OCTAL ;
            r = check_digits ( str, base ) ;
            if ( r != s ) {
                /* Digits contain 8 or 9 */
                report ( crt_loc, ERR_lex_icon_octal ( str ) ) ;
            }
        }
    }

    if ( flt ) {
        /* Floating literals */
        int zero ;
        NAT expon ;
        character ep ;
        string frac_part ;
        string int_part = str ;
        string suff_posn = s ;
        unsigned trail_zero = 0 ;
        FLOAT lit = NULL_flt ;
        TYPE t = type_double ;

        /* Check float suffix */
        c1 = s [0] ;
        if ( c1 == char_f || c1 == char_F ) {
            /* Suffix F */
            t = type_float ;
            s++ ;
            c1 = s [0] ;
        } else if ( c1 == char_l || c1 == char_L ) {
            /* Suffix L */
            t = type_ldouble ;
            s++ ;
            c1 = s [0] ;
        }

        /* Check for end of number */
        if ( c1 || err ) report ( crt_loc, ERR_lex_literal_bad ( str ) ) ;

        /* Find number components (involves writing to s)  */
        while ( int_part [0] == char_zero ) {
            /* Remove initial zeros */
            int_part++ ;
        }
        if ( dot_posn ) {
            dot_posn [0] = 0 ;
            if ( int_part == dot_posn ) int_part = small_number [0] ;
            frac_part = dot_posn + 1 ;
            if ( frac_part == exp_posn ) {
                frac_part = small_number [0] ;
            } else {
                /* Remove trailing zeros */
                string frac_zero = exp_posn - 1 ;
                while ( frac_zero [0] == char_zero ) {
                    frac_zero [0] = 0 ;
                    frac_zero-- ;
                    trail_zero++ ;
                }
                if ( frac_zero == dot_posn ) frac_part = small_number [0] ;
            }
        } else {
            if ( int_part == exp_posn ) int_part = small_number [0] ;
            frac_part = small_number [0] ;
        }
        ep = exp_posn [0] ;
        exp_posn [0] = 0 ;
        if ( ep == char_e || ep == char_E ) {
            /* Evaluate exponent */
            r = exp_posn + 1 ;
            c2 = r [0] ;
            if ( c2 == char_minus || c2 == char_plus ) r++ ;
            expon = eval_digits ( r, suff_posn, base ) ;
            if ( c2 == char_minus ) expon = negate_nat ( expon ) ;
            zero = is_zero_nat ( expon ) ;
        } else {
            expon = small_nat [0] ;
            zero = 1 ;
        }
        if ( zero && ustreq ( frac_part, small_number [0] ) ) {
            int i ;
            for ( i = 0 ; i < SMALL_FLT_SIZE ; i++ ) {
                if ( ustreq ( int_part, small_number [i] ) ) {
                    lit = get_float ( t, i ) ;
                    break ;
                }
            }
        }
        if ( IS_NULL_flt ( lit ) ) {
            int_part = xustrcpy ( int_part ) ;
            frac_part = xustrcpy ( frac_part ) ;
        }
        if ( trail_zero ) {
            /* Restore trailing zeros */
            r = exp_posn - 1 ;
            do {
                r [0] = char_zero ;
                r-- ;
                trail_zero-- ;
            } while ( trail_zero ) ;
        }
        if ( dot_posn ) dot_posn [0] = char_dot ;
        exp_posn [0] = ep ;

        /* Construct result - type is as per suffix */
        if ( IS_NULL_flt ( lit ) ) {
            MAKE_flt_simple ( int_part, frac_part, expon, lit ) ;
        }
        MAKE_exp_float_lit ( t, lit, e ) ;
        *ptok = lex_floating_Hexp ;

    } else {
        /* Integer literals */
        TYPE t ;
        NAT lit ;
        int ls = 0 ;
        int us = 0 ;
        int fit = 0 ;

        /* Find integer value */
        r = str ;
        if ( form == BASE_HEXADECIMAL ) r += 2 ;
        lit = eval_digits ( r, s, base ) ;

        /* Check integer suffix */
        c1 = s [0] ;
        if ( c1 == char_u || c1 == char_U ) {
            us = 1 ;
            s++ ;
            c1 = s [0] ;
        }
        if ( c1 == char_l || c1 == char_L ) {
            ls = 1 ;
            if ( s [1] == c1 && basetype_info [ ntype_sllong ].key ) {
                report ( crt_loc, ERR_lex_icon_llong ( str ) ) ;
                ls = 2 ;
                s++ ;
            }
            s++ ;
            c1 = s [0] ;
        } else {
            /* Map 'int' to 'long' in '#if' expressions */
            if ( in_hash_if_exp ) ls = 1 ;
        }
        if ( us == 0 && ( c1 == char_u || c1 == char_U ) ) {
            us = 1 ;
            s++ ;
            c1 = s [0] ;
        }

        /* Check for end of number */
        if ( c1 || err ) report ( crt_loc, ERR_lex_literal_bad ( str ) ) ;

        /* Find literal type */
        if ( force ) {
            t = type_ulong ;
            fit = 1 ;
        } else {
            int suff = SUFFIX ( us, ls ) ;
            t = find_literal_type ( lit, form, suff, str, &fit ) ;
        }
        MAKE_exp_int_lit ( t, lit, exp_int_lit_tag, e ) ;
        if ( !fit ) {
            /* Force result to be a calculated value */
            MAKE_exp_cast ( t, CONV_INT_INT, e, e ) ;
            MAKE_nat_calc ( e, lit ) ;
            MAKE_exp_int_lit ( t, lit, exp_int_lit_tag, e ) ;
        }
        *ptok = lex_integer_Hexp ;
    }
    return ( e ) ;
}


/*
    IS A FLOATING LITERAL ZERO?

    This routine checks whether the floating point literal f is zero.
*/

int is_zero_float
    PROTO_N ( ( f ) )
    PROTO_T ( FLOAT f )
{
    string s ;
    character c ;
    s = DEREF_string ( flt_simple_int_part ( f ) ) ;
    while ( c = *( s++ ), c != 0 ) {
        if ( c != char_zero ) return ( 0 ) ;
    }
    s = DEREF_string ( flt_simple_frac_part ( f ) ) ;
    while ( c = *( s++ ), c != 0 ) {
        if ( c != char_zero ) return ( 0 ) ;
    }
    return ( 1 ) ;
}


/*
    ARE TWO FLOATING LITERALS EQUAL?

    This routine checks whether the floating point literals f and g are
    equal.  Note that this is equality of representation rather than
    equality of the underlying numbers.
*/

int eq_float_lit
    PROTO_N ( ( f, g ) )
    PROTO_T ( FLOAT f X FLOAT g )
{
    NAT ef, eg ;
    ulong nf, ng ;
    string af, ag ;
    string bf, bg ;
    if ( EQ_flt ( f, g ) ) return ( 1 ) ;
    DECONS_flt_simple ( nf, af, bf, ef, f ) ;
    DECONS_flt_simple ( ng, ag, bg, eg, g ) ;
    if ( !ustreq ( af, ag ) ) return ( 0 ) ;
    if ( !ustreq ( bf, bg ) ) return ( 0 ) ;
    if ( compare_nat ( ef, eg ) != 0 ) return ( 0 ) ;
    if ( nf == LINK_NONE ) COPY_ulong ( flt_tok ( f ), ng ) ;
    if ( ng == LINK_NONE ) COPY_ulong ( flt_tok ( g ), nf ) ;
    return ( 1 ) ;
}


/*
    DEFAULT ROUNDING MODE

    This variable gives the default rounding mode used for converting
    floating point expressions to integers.
*/

RMODE crt_round_mode = rmode_to_zero ;


/*
    ROUND A FLOATING POINT LITERAL

    This routine rounds the floating point literal f to an integer
    literal by the rounding mode corresponding to mode.  The null integer
    literal is returned to indicate a target dependent literal.  The
    range of values in which the result is target independent is actually
    rather small - it is given by FLT_DIG.
*/

NAT round_float_lit
    PROTO_N ( ( f, mode ) )
    PROTO_T ( FLOAT f X RMODE mode )
{
    NAT res ;
    unsigned base = 10 ;
    unsigned long i, j, n ;
    unsigned long res_len ;
    unsigned long pre_len ;
    unsigned long exp_val ;
    character result [100] ;

    /* Decompose simple literal */
    string int_part = DEREF_string ( flt_simple_int_part ( f ) ) ;
    string frac_part = DEREF_string ( flt_simple_frac_part ( f ) ) ;
    NAT expon = DEREF_nat ( flt_simple_exponent ( f ) ) ;

    /* Find component lengths */
    unsigned long int_len = ( unsigned long ) ustrlen ( int_part ) ;
    unsigned long frac_len = ( unsigned long ) ustrlen ( frac_part ) ;

    /* Allow for initial zeros */
    while ( int_part [0] == char_zero ) {
        int_part++ ;
        int_len-- ;
    }

    /* Allow for exponent */
    if ( IS_nat_neg ( expon ) ) {
        expon = DEREF_nat ( nat_neg_arg ( expon ) ) ;
        exp_val = get_nat_value ( expon ) ;
        if ( exp_val > int_len ) {
            res_len = 0 ;
            pre_len = exp_val - int_len ;
        } else {
            res_len = int_len - exp_val ;
            pre_len = 0 ;
        }
    } else {
        exp_val = get_nat_value ( expon ) ;
        res_len = int_len + exp_val ;
        pre_len = 0 ;
    }

    /* Allow for initial zeros in fractional part */
    if ( int_part [0] == 0 ) {
        while ( frac_part [0] == char_zero ) {
            frac_part++ ;
            frac_len-- ;
            if ( res_len == 0 ) {
                pre_len++ ;
            } else {
                res_len-- ;
            }
        }
        if ( frac_part [0] == 0 ) {
            /* Zero floating literal */
            res = small_nat [0] ;
            return ( res ) ;
        }
    }

    /* Extreme values are target dependent */
    if ( pre_len > 6 ) return ( NULL_nat ) ;
    if ( res_len > 6 ) return ( NULL_nat ) ;
    if ( exp_val == EXTENDED_MAX ) return ( NULL_nat ) ;

    /* Construct integer string */
    j = 0 ;
    n = res_len ;
    for ( i = 0 ; i < pre_len ; i++ ) {
        if ( j < n ) {
            result [j] = char_zero ;
            j++ ;
        }
    }
    for ( i = 0 ; i < int_len ; i++ ) {
        if ( j < n ) {
            result [j] = int_part [i] ;
            j++ ;
        }
    }
    for ( i = 0 ; i < frac_len ; i++ ) {
        if ( j < n ) {
            result [j] = frac_part [i] ;
            j++ ;
        }
    }
    for ( ; j < n ; j++ ) result [j] = char_zero ;
    result [n] = 0 ;

    /* Calculate the result */
    res = eval_digits ( result, result + res_len, base ) ;
    UNUSED ( mode ) ;
    return ( res ) ;
}


/*
    EVALUATE A UNICODE CHARACTER

    This routine evaluates the unicode character with prefix c, consisting
    of n hex digits, given by ps.  ps is advanced to the position following
    the hex digits.
*/

unsigned long eval_unicode
    PROTO_N ( ( c, n, pc, ps, err ) )
    PROTO_T ( int c X unsigned n X int *pc X string *ps X ERROR *err )
{
    string r = *ps ;
    unsigned long u ;
    unsigned base = 16 ;
    string s = check_digits ( r, base ) ;
    unsigned m = ( unsigned ) ( s - r ) ;
    if ( m < n ) {
        add_error ( err, ERR_lex_charset_len ( c, n ) ) ;
    } else {
        s = r + n ;
    }
    *ps = s ;
    u = eval_char_digits ( r, s, base ) ;
    add_error ( err, ERR_lex_charset_replace ( u ) ) ;
    if ( u < 0x20 || ( u >= 0x7f && u <= 0x9f ) || is_legal_char ( u ) ) {
        add_error ( err, ERR_lex_charset_bad ( u ) ) ;
        *pc = CHAR_SIMPLE ;
    } else {
        if ( u <= ( unsigned long ) 0xffff ) *pc = CHAR_UNI4 ;
    }
    return ( u ) ;
}


/*
    GET A MULTI-BYTE CHARACTER FROM A STRING

    This routine returns the multi-byte character pointed to by the
    string s.  It assigns the character type to pc.
*/

unsigned long get_multi_char
    PROTO_N ( ( s, pc ) )
    PROTO_T ( string s X int *pc )
{
    int i ;
    unsigned long n = 0 ;
    for ( i = MULTI_WIDTH - 1 ; i >= 1 ; i-- ) {
        n = ( n << 8 ) + ( unsigned long ) s [i] ;
    }
    *pc = ( int ) s [0] ;
    return ( n ) ;
}


/*
    ADD A MULTI-BYTE CHARACTER TO A STRING

    This routine adds the multi-byte character n of type ch to the
    string s.  A multi-byte character is represented by 5 characters.
    The first is a key describing how the character was described (a
    simple character, a hex or octal escape sequence, a unicode
    character etc.).  The next four characters give the character value.
*/

void add_multi_char
    PROTO_N ( ( s, n, ch ) )
    PROTO_T ( string s X unsigned long n X int ch )
{
    int i ;
    s [0] = ( character ) ch ;
    for ( i = 1 ; i < MULTI_WIDTH ; i++ ) {
        s [i] = ( character ) ( n & 0xff ) ;
        n >>= 8 ;
    }
    if ( n ) report ( crt_loc, ERR_lex_ccon_large () ) ;
    return ;
}


/*
    CREATE A MULTI-BYTE STRING

    This routine creates a multi-byte string of length n in s from the
    string t of kind k.
*/

static void make_multi_string
    PROTO_N ( ( s, t, n, k ) )
    PROTO_T ( string s X string t X unsigned long n X unsigned k )
{
    if ( k & STRING_MULTI ) {
        n *= MULTI_WIDTH ;
        xumemcpy ( s, t, ( gen_size ) n ) ;
    } else {
        unsigned long i ;
        for ( i = 0 ; i < n ; i++ ) {
            add_multi_char ( s, ( unsigned long ) *t, CHAR_SIMPLE ) ;
            s += MULTI_WIDTH ;
            t++ ;
        }
    }
    return ;
}


/*
    GET A MULTIBYTE CHARACTER FROM A STRING

    This routine reads a multibyte character from the string s (which
    ends at se).  The value (as a wide character) is assigned to pc with
    the new value of s being returned.  Note that this routine is not
    required in, for example, check_digits because the representation
    of a simple single byte character as a multibyte character comprises
    that single byte.
*/

#if FS_MULTIBYTE

static string get_multibyte
    PROTO_N ( ( s, se, pc ) )
    PROTO_T ( string s X string se X unsigned long *pc )
{
    wchar_t c ;
    int n = mbtowc ( &c, s, ( size_t ) ( se - s ) ) ;
    if ( n > 0 ) {
        /* Valid multibyte character */
        *pc = ( unsigned long ) c ;
        s += n ;
    } else if ( n == 0 ) {
        /* Null character */
        *pc = 0 ;
        s++ ;
    } else {
        /* Invalid multibyte character */
        report ( crt_loc, ERR_lex_ccon_multibyte () ) ;
        *pc = ( unsigned long ) *( s++ ) ;
    }
    return ( s ) ;
}

#endif


/*
    ANALYSE A STRING OR CHARACTER LITERAL

    This routine analyses the string or character literal given by the
    string s (which ends at se).  Only characters in the range [0,0xff]
    are assumed to be valid. Note that this is the routine which should
    do the mapping from the source character set to the execution
    character set (translation phase 5), however this is deferred until
    the string output routines.
*/

STRING new_string_lit
    PROTO_N ( ( s, se, lex ) )
    PROTO_T ( string s X string se X int lex )
{
    STRING res ;
    STRING prev ;
    int multi = 0 ;
    int overflow = 0 ;
    unsigned long len = 0 ;
    unsigned kind = STRING_NONE ;
#if FS_MULTIBYTE
    int multibyte = allow_multibyte ;
#endif
    gen_size sz = ( gen_size ) ( se - s ) + 1 ;
    string str = xustr ( sz ) ;

    /* Find string type */
    switch ( lex ) {
        case lex_char_Hlit :
        case lex_char_Hexp : {
            kind = STRING_CHAR ;
            break ;
        }
        case lex_wchar_Hlit :
        case lex_wchar_Hexp : {
            kind = ( STRING_WIDE | STRING_CHAR ) ;
            break ;
        }
        case lex_string_Hlit :
        case lex_string_Hexp : {
            kind = STRING_NONE ;
            break ;
        }
        case lex_wstring_Hlit :
        case lex_wstring_Hexp : {
            kind = STRING_WIDE ;
            break ;
        }
    }
    if ( do_string ) dump_string_lit ( s, se, kind ) ;

    /* Scan string replacing escape sequences */
    while ( s != se ) {
        unsigned long c ;
        int ch = CHAR_SIMPLE ;
#if FS_MULTIBYTE
        if ( multibyte ) {
            s = get_multibyte ( s, se, &c ) ;
        } else {
            c = ( unsigned long ) *( s++ ) ;
        }
#else
        c = ( unsigned long ) *( s++ ) ;
#endif
        if ( c == char_backslash ) {
            if ( s != se ) {
                /* Unterminated string literals already reported */
                character e = NONE ;
#if FS_MULTIBYTE
                if ( multibyte ) {
                    s = get_multibyte ( s, se, &c ) ;
                } else {
                    c = ( unsigned long ) *( s++ ) ;
                }
#else
                c = ( unsigned long ) *( s++ ) ;
#endif
                if ( c < NO_CHAR ) e = escape_sequences [c] ;
                switch ( e ) {

                    case OCTE : {
                        /* Octal escape sequences */
                        unsigned base = 8 ;
                        string r = s - 1 ;
                        s = check_digits ( r, base ) ;
                        if ( s > r + 3 ) s = r + 3 ;
                        c = eval_char_digits ( r, s, base ) ;
                        ch = CHAR_OCTAL ;
                        break ;
                    }

                    case HEXE : {
                        /* Hexadecimal escape sequences */
                        unsigned base = 16 ;
                        string r = s ;
                        s = check_digits ( r, base ) ;
                        if ( s == r ) {
                            int i = ( int ) c ;
                            report ( crt_loc, ERR_lex_ccon_hex ( i ) ) ;
                        } else {
                            c = eval_char_digits ( r, s, base ) ;
                        }
                        ch = CHAR_HEX ;
                        break ;
                    }

                    case UNI4 : {
                        /* Short unicode escape sequences */
                        if ( allow_unicodes ) {
                            string r = s ;
                            unsigned d = 4 ;
                            ERROR err = NULL_err ;
                            c = eval_unicode ( char_u, d, &ch, &r, &err ) ;
                            if ( !IS_NULL_err ( err ) ) {
                                report ( crt_loc, err ) ;
                            }
                            ch = CHAR_UNI4 ;
                            s = r ;
                            break ;
                        }
                        goto illegal_lab ;
                    }

                    case UNI8 : {
                        /* Long unicode escape sequences */
                        if ( allow_unicodes ) {
                            string r = s ;
                            unsigned d = 8 ;
                            ERROR err = NULL_err ;
                            c = eval_unicode ( char_U, d, &ch, &r, &err ) ;
                            if ( !IS_NULL_err ( err ) ) {
                                report ( crt_loc, err ) ;
                            }
                            ch = CHAR_UNI8 ;
                            s = r ;
                            break ;
                        }
                        goto illegal_lab ;
                    }

                    case NONE :
                    illegal_lab : {
                        /* Illegal escape sequences */
                        int i = ( int ) c ;
                        report ( crt_loc, ERR_lex_ccon_escape ( i ) ) ;
                        break ;
                    }

                    default : {
                        /* Simple escape sequences */
                        c = ( unsigned long ) e ;
                        break ;
                    }
                }
            }
        }
        if ( ( ch != CHAR_SIMPLE || c >= 256 ) && !multi ) {
            /* Convert to multi-character format */
            string a ;
            sz *= MULTI_WIDTH ;
            a = xustr ( sz ) ;
            make_multi_string ( a, str, len, kind ) ;
            if ( len ) {
                len *= MULTI_WIDTH ;
                if ( len == 0 ) overflow = 1 ;
            }
            if ( c >= 256 ) {
                /* Mark fat strings */
                if ( !( kind & STRING_WIDE ) ) {
                    if ( ch == CHAR_UNI4 || ch == CHAR_UNI8 ) {
                        /* EMPTY */
                    } else {
                        report ( crt_loc, ERR_lex_ccon_large () ) ;
                    }
                }
                kind |= STRING_FAT ;
            }
            kind |= STRING_MULTI ;
            multi = 1 ;
            str = a ;
        }
        if ( multi ) {
            add_multi_char ( str + len, c, ch ) ;
            len += MULTI_WIDTH ;
        } else {
            str [ len++ ] = ( character ) c ;
        }
        if ( len == 0 ) overflow = 1 ;
    }
    if ( multi ) {
        add_multi_char ( str + len, ( unsigned long ) 0, CHAR_OCTAL ) ;
        len /= MULTI_WIDTH ;
    } else {
        str [ len ] = 0 ;
    }
    if ( overflow ) len = ULONG_MAX ;
    if ( !check_value ( OPT_VAL_string_length, len ) ) {
        len = option_value ( OPT_VAL_string_length ) ;
        if ( multi ) {
            unsigned long n = MULTI_WIDTH * len ;
            add_multi_char ( str + n, ( unsigned long ) 0, CHAR_OCTAL ) ;
        } else {
            str [ len ] = 0 ;
        }
    }
    MAKE_str_simple ( len, str, kind, res ) ;
    prev = share_string_lit ( res ) ;
    if ( !EQ_str ( prev, res ) ) {
        /* Share string literals */
        unsigned long v ;
        DESTROY_str_simple ( destroy, res, len, str, kind, v, res ) ;
        xufree ( str, sz ) ;
        UNUSED ( res ) ;
        UNUSED ( len ) ;
        UNUSED ( kind ) ;
        UNUSED ( v ) ;
        res = prev ;
    }
    return ( res ) ;
}


/*
    ARE TWO STRINGS EQUAL?

    This routine checks whether the string literals s and t are equal.
*/

int eq_string_lit
    PROTO_N ( ( s, t ) )
    PROTO_T ( STRING s X STRING t )
{
    string as, at ;
    unsigned ks, kt ;
    unsigned long ns, nt ;
    if ( EQ_str ( s, t ) ) return ( 1 ) ;
    ks = DEREF_unsigned ( str_simple_kind ( s ) ) ;
    kt = DEREF_unsigned ( str_simple_kind ( t ) ) ;
    ns = DEREF_ulong ( str_simple_len ( s ) ) ;
    nt = DEREF_ulong ( str_simple_len ( t ) ) ;
    if ( ks == kt && ns == nt ) {
        as = DEREF_string ( str_simple_text ( s ) ) ;
        at = DEREF_string ( str_simple_text ( t ) ) ;
        if ( as == at ) return ( 1 ) ;
        if ( ks & STRING_MULTI ) ns *= MULTI_WIDTH ;
        if ( xumemcmp ( as, at, ( gen_size ) ns ) == 0 ) return ( 1 ) ;
    }
    return ( 0 ) ;
}


/*
    CONCATENATE TWO STRING LITERALS

    This routine concatenates the string literals s and t.
*/

STRING concat_string_lit
    PROTO_N ( ( s, t ) )
    PROTO_T ( STRING s X STRING t )
{
    string c ;
    STRING res ;
    STRING prev ;
    unsigned kc ;
    gen_size sz ;
    unsigned long nc ;
    string a = DEREF_string ( str_simple_text ( s ) ) ;
    string b = DEREF_string ( str_simple_text ( t ) ) ;
    unsigned ka = DEREF_unsigned ( str_simple_kind ( s ) ) ;
    unsigned kb = DEREF_unsigned ( str_simple_kind ( t ) ) ;
    unsigned long na = DEREF_ulong ( str_simple_len ( s ) ) ;
    unsigned long nb = DEREF_ulong ( str_simple_len ( t ) ) ;

    /* Form the result literal */
    if ( na == 0 ) return ( t ) ;
    if ( nb == 0 ) return ( s ) ;
    nc = na + nb ;
    if ( nc < na || nc < nb ) nc = ULONG_MAX ;
    if ( !check_value ( OPT_VAL_string_length, nc ) ) {
        nc = option_value ( OPT_VAL_string_length ) ;
        nb = nc - na ;
    }
    kc = ( ka | kb ) ;
    if ( kc & STRING_MULTI ) {
        /* Multi-byte strings */
        unsigned long sa = MULTI_WIDTH * na ;
        unsigned long sc = MULTI_WIDTH * nc ;
        sz = ( gen_size ) ( sc + MULTI_WIDTH ) ;
        c = xustr ( sz ) ;
        make_multi_string ( c, a, na, ka ) ;
        make_multi_string ( c + sa, b, nb, kb ) ;
        add_multi_char ( c + sc, ( unsigned long ) 0, CHAR_OCTAL ) ;
    } else {
        /* Simple strings */
        sz = ( gen_size ) ( nc + 1 ) ;
        c = xustr ( sz ) ;
        xumemcpy ( c, a, ( gen_size ) na ) ;
        xumemcpy ( c + na, b, ( gen_size ) nb ) ;
        c [ nc ] = 0 ;
    }
    MAKE_str_simple ( nc, c, kc, res ) ;
    prev = share_string_lit ( res ) ;
    if ( !EQ_str ( prev, res ) ) {
        /* Share string literals */
        unsigned long v ;
        DESTROY_str_simple ( destroy, res, nc, c, kc, v, res ) ;
        xufree ( c, sz ) ;
        UNUSED ( res ) ;
        UNUSED ( nc ) ;
        UNUSED ( kc ) ;
        UNUSED ( v ) ;
        res = prev ;
    }
    return ( res ) ;
}


/*
    FIND THE SHARED COPY OF A STRING LITERAL

    This routine is used to implement shared string literals.  It returns
    the canonical copy of s (i.e. the first string equal to s for which
    the routine was called).
*/

STRING share_string_lit
    PROTO_N ( ( s ) )
    PROTO_T ( STRING s )
{
    string a = DEREF_string ( str_simple_text ( s ) ) ;
    unsigned long h = ( hash ( a ) % HASH_STRING_SIZE ) ;
    STRING p = string_hash_table [h] ;
    STRING t = p ;
    while ( !IS_NULL_str ( t ) ) {
        if ( eq_string_lit ( t, s ) ) return ( t ) ;
        t = DEREF_str ( str_next ( t ) ) ;
    }
    COPY_str ( str_next ( s ), p ) ;
    string_hash_table [h] = s ;
    return ( s ) ;
}


/*
    GET THE NEXT CHARACTER FROM A STRING

    This routine returns the next character from the string s, using
    the tok field as a counter.  The character type is assigned to pc,
    including CHAR_NONE to indicate the end of the string.
*/

unsigned long get_string_char
    PROTO_N ( ( s, pc ) )
    PROTO_T ( STRING s X int *pc )
{
    unsigned long c ;
    unsigned long i = DEREF_ulong ( str_simple_tok ( s ) ) ;
    unsigned long n = DEREF_ulong ( str_simple_len ( s ) ) ;
    if ( i < n ) {
        string text = DEREF_string ( str_simple_text ( s ) ) ;
        unsigned kind = DEREF_unsigned ( str_simple_kind ( s ) ) ;
        if ( kind & STRING_MULTI ) {
            c = get_multi_char ( text + MULTI_WIDTH * i, pc ) ;
        } else {
            c = ( unsigned long ) text [i] ;
            *pc = CHAR_SIMPLE ;
        }
    } else {
        c = 0 ;
        *pc = CHAR_NONE ;
    }
    COPY_ulong ( str_simple_tok ( s ), i + 1 ) ;
    return ( c ) ;
}


/*
    FIND A CHARACTER LITERAL

    This routine returns the character value corresponding to the character
    literal expression e.
*/

int get_char_value
    PROTO_N ( ( e ) )
    PROTO_T ( EXP e )
{
    int c = char_illegal ;
    if ( !IS_NULL_exp ( e ) ) {
        if ( IS_exp_int_lit ( e ) ) {
            NAT n = DEREF_nat ( exp_int_lit_nat ( e ) ) ;
            if ( IS_nat_calc ( n ) ) {
                e = DEREF_exp ( nat_calc_value ( n ) ) ;
            }
        }
        if ( IS_exp_cast ( e ) ) {
            e = DEREF_exp ( exp_cast_arg ( e ) ) ;
            if ( IS_exp_int_lit ( e ) ) {
                NAT n = DEREF_nat ( exp_int_lit_nat ( e ) ) ;
                if ( IS_nat_calc ( n ) ) {
                    e = DEREF_exp ( nat_calc_value ( n ) ) ;
                }
            }
        }
        if ( IS_exp_char_lit ( e ) ) {
            STRING s = DEREF_str ( exp_char_lit_str ( e ) ) ;
            unsigned kind = DEREF_unsigned ( str_simple_kind ( s ) ) ;
            if ( !( kind & STRING_MULTI ) ) {
                unsigned long len = DEREF_ulong ( str_simple_len ( s ) ) ;
                if ( len == 1 ) {
                    string t = DEREF_string ( str_simple_text ( s ) ) ;
                    c = ( int ) *t ;
                }
            }
        }
    }
    return ( c ) ;
}


/*
    EVALUATE A CHARACTER LITERAL

    This routine evaluates the character literal str by mapping it to
    its ASCII representation.  The value is stored in the tok field
    (the fact that LINK_NONE equals EXTENDED_MAX is convenient, but not
    essential).
*/

NAT eval_char_lit
    PROTO_N ( ( str ) )
    PROTO_T ( STRING str )
{
    NAT n ;
    unsigned long v = DEREF_ulong ( str_simple_tok ( str ) ) ;
    if ( v == LINK_NONE ) {
        unsigned long i ;
        string s = DEREF_string ( str_simple_text ( str ) ) ;
        unsigned long len = DEREF_ulong ( str_simple_len ( str ) ) ;
        unsigned kind = DEREF_unsigned ( str_simple_kind ( str ) ) ;
        if ( kind & STRING_MULTI ) {
            NAT b = make_small_nat ( 256 ) ;
            n = small_nat [0] ;
            for ( i = 0 ; i < len ; i++ ) {
                NAT d ;
                int ch = CHAR_SIMPLE ;
                unsigned long c = get_multi_char ( s, &ch ) ;
                if ( ch == CHAR_SIMPLE ) c = to_ascii ( c, &ch ) ;
                d = make_nat_value ( c ) ;
                n = binary_nat_op ( exp_mult_tag, n, b ) ;
                n = binary_nat_op ( exp_plus_tag, n, d ) ;
                s += MULTI_WIDTH ;
            }
        } else {
            n = small_nat [0] ;
            for ( i = 0 ; i < len ; i++ ) {
                int ch = CHAR_SIMPLE ;
                unsigned long c = ( unsigned long ) *s ;
                c = to_ascii ( c, &ch ) ;
                n = make_nat_literal ( n, ( unsigned ) 256, ( unsigned ) c ) ;
                s++ ;
            }
        }
        v = get_nat_value ( n ) ;
        if ( v != EXTENDED_MAX ) {
            /* Store calculated value */
            COPY_ulong ( str_simple_tok ( str ), v ) ;
        }
    } else {
        /* Use stored value */
        n = make_nat_value ( v ) ;
    }
    return ( n ) ;
}


/*
    FIND A CHARACTER REPRESENTATION TYPE

    In the case where a character literal type doesn't fit into its type
    then this routine gives a type in which the literal value can be
    constructed and then converted into its underlying type.
*/

TYPE find_char_type
    PROTO_N ( ( n ) )
    PROTO_T ( NAT n )
{
    TYPE t ;
    int fit = 0 ;
    string str = NULL_string ;
    t = find_literal_type ( n, BASE_OCTAL, SUFFIX_NONE, str, &fit ) ;
    return ( t ) ;
}


/*
    CREATE A STRING OR CHARACTER LITERAL EXPRESSION

    This routine turns a string or character literal into an expression.
    Note that the type of a normal character literal varies between C
    (where it is a char cast to an int) and C++ (where it stays as a
    char), and also that a string, or wide string, literal is an lvalue
    of array type.
*/

EXP make_string_exp
    PROTO_N ( ( s ) )
    PROTO_T ( STRING s )
{
    EXP e ;
    string text = DEREF_string ( str_simple_text ( s ) ) ;
    unsigned long len = DEREF_ulong ( str_simple_len ( s ) ) ;
    unsigned kind = DEREF_unsigned ( str_simple_kind ( s ) ) ;

    if ( kind & STRING_CHAR ) {
        int fits = 0 ;
        int digit = -1 ;
        TYPE t0, t1, t2 ;
        NAT n = NULL_nat ;
        ERROR err = NULL_err ;
        if ( kind & STRING_WIDE ) {
            t0 = type_wchar_lit ;
            t1 = t0 ;
            t2 = t0 ;
        } else if ( len <= 1 ) {
            t0 = type_char ;
            t1 = t0 ;
            t2 = type_char_lit ;
        } else {
            report ( crt_loc, ERR_lex_ccon_multi ( s ) ) ;
            t0 = type_mchar_lit ;
            t1 = t0 ;
            t2 = t0 ;
        }
        if ( len == 0 ) {
            fits = 1 ;
            n = small_nat [0] ;
            COPY_ulong ( str_simple_tok ( s ), 0 ) ;
        } else if ( len == 1 ) {
            if ( kind & STRING_MULTI ) {
                if ( !( kind & STRING_FAT ) ) {
                    /* Simple octal or hex escape sequence */
                    unsigned long v = DEREF_ulong ( str_simple_tok ( s ) ) ;
                    if ( v == LINK_NONE ) {
                        int ch = CHAR_SIMPLE ;
                        v = get_multi_char ( text, &ch ) ;
                        if ( ch == CHAR_OCTAL || ch == CHAR_HEX ) {
                            if ( v < 128 ) fits = 1 ;
                            n = make_nat_value ( v ) ;
                            COPY_ulong ( str_simple_tok ( s ), v ) ;
                        }
                    } else {
                        if ( v < 128 ) fits = 1 ;
                        n = make_nat_value ( v ) ;
                    }
                }
            } else {
                /* Single character */
                character c = text [0] ;
                if ( in_hash_if_exp ) {
                    /* Evaluate character value immediately */
                    unsigned long v = DEREF_ulong ( str_simple_tok ( s ) ) ;
                    if ( v == LINK_NONE ) {
                        int ch = CHAR_SIMPLE ;
                        v = ( unsigned long ) c ;
                        v = to_ascii ( v, &ch ) ;
                        COPY_ulong ( str_simple_tok ( s ), v ) ;
                    }
                    if ( v < 128 ) fits = 1 ;
                    n = make_nat_value ( v ) ;
                } else {
                    if ( c >= char_zero && c <= char_nine ) {
                        /* Allow for digits */
                        digit = ( int ) ( c - char_zero ) ;
                    }
                }
            }
        }
        if ( IS_NULL_nat ( n ) ) {
            /* Make character literal expression */
            MAKE_exp_char_lit ( t0, s, digit, e ) ;
            MAKE_nat_calc ( e, n ) ;
        } else {
            if ( !fits && check_nat_range ( t0, n ) != 0 ) {
                /* Value doesn't fit into t0 */
                t0 = find_char_type ( n ) ;
            }
        }
        MAKE_exp_int_lit ( t0, n, exp_char_lit_tag, e ) ;
        if ( !EQ_type ( t0, t1 ) ) {
            /* Convert from t0 to t1 */
            e = make_cast_nat ( t1, e, &err, CAST_STATIC ) ;
        }
        if ( !EQ_type ( t1, t2 ) ) {
            /* Convert from t1 to t2 */
            e = make_cast_nat ( t2, e, &err, CAST_IMPLICIT ) ;
        }
        if ( !IS_NULL_err ( err ) ) report ( crt_loc, err ) ;
    } else {
        /* String literals */
        TYPE t ;
        NAT n = make_nat_value ( len + 1 ) ;
        if ( kind & STRING_WIDE ) {
            t = type_wstring_lit ;
        } else {
            t = type_string_lit ;
        }
        MAKE_type_array ( cv_lvalue, t, n, t ) ;
        MAKE_exp_string_lit ( t, s, e ) ;
    }
    return ( e ) ;
}


/*
    CREATE A BOOLEAN LITERAL EXPRESSION

    This routine creates a boolean literal expression given by the boolean
    value b (which should be one of the values BOOL_FALSE and BOOL_TRUE
    defined in literal.h).
*/

EXP make_bool_exp
    PROTO_N ( ( b, tag ) )
    PROTO_T ( unsigned b X unsigned tag )
{
    EXP e ;
    NAT n = small_nat [b] ;
    MAKE_exp_int_lit ( type_bool, n, tag, e ) ;
    return ( e ) ;
}


/*
    TEST A BOOLEAN LITERAL EXPRESSION

    This routine is the reverse of the one above.  It returns the boolean
    value (BOOL_FALSE, BOOL_TRUE or BOOL_UNKNOWN) corresponding to the
    expression e.
*/

unsigned test_bool_exp
    PROTO_N ( ( e ) )
    PROTO_T ( EXP e )
{
    NAT n = DEREF_nat ( exp_int_lit_nat ( e ) ) ;
    if ( IS_nat_small ( n ) ) {
        unsigned b = DEREF_unsigned ( nat_small_value ( n ) ) ;
        if ( b == BOOL_FALSE ) return ( BOOL_FALSE ) ;
        if ( b == BOOL_TRUE ) return ( BOOL_TRUE ) ;
    }
    return ( BOOL_UNKNOWN ) ;
}
Subversion Repositories tendra.SVN

(root)/branches/tendra5/src/producers/common/parse/literal.c – Rev 5