WebSVN – tendra.SVN – /branches/tendra4/src/producers/common/parse/hash.c

/*
                 Crown Copyright (c) 1997
    
    This TenDRA(r) Computer Program is subject to Copyright
    owned by the United Kingdom Secretary of State for Defence
    acting through the Defence Evaluation and Research Agency
    (DERA).  It is made available to Recipients with a
    royalty-free licence for its use, reproduction, transfer
    to other parties and amendment for any purpose not excluding
    product development provided that any such use et cetera
    shall be deemed to be acceptance of the following conditions:-
    
        (1) Its Recipients shall ensure that this Notice is
        reproduced upon any copies or amended versions of it;
    
        (2) Any amended version of it shall be clearly marked to
        show both the nature of and the organisation responsible
        for the relevant amendment or amendments;
    
        (3) Its onward transfer from a recipient to another
        party shall be deemed to be that party's acceptance of
        these conditions;
    
        (4) DERA gives no warranty or assurance as to its
        quality or suitability for any purpose and DERA accepts
        no liability whatsoever in relation to any use to which
        it may be put.
*/


#include "config.h"
#include "c_types.h"
#include "ctype_ops.h"
#include "etype_ops.h"
#include "ftype_ops.h"
#include "hashid_ops.h"
#include "id_ops.h"
#include "itype_ops.h"
#include "member_ops.h"
#include "type_ops.h"
#include "error.h"
#include "option.h"
#include "basetype.h"
#include "char.h"
#include "chktype.h"
#include "hash.h"
#include "lex.h"
#include "namespace.h"
#include "symbols.h"
#include "syntax.h"
#include "token.h"
#include "ustring.h"
#include "xalloc.h"


/*
    HASH TABLES

    The hash tables consist of an array of hash identifiers, one for each
    hash value.  All the hash table entries with the same hash value are
    chained into a list using their next field.  There are two hash tables,
    one for strings and one for types.
*/

HASHID *hash_table ;
static HASHID *hash_type_table ;


/*
    IDENTIFIER NAME HASHING FUNCTION

    This routine calculates the hash value associated with the identifier
    name s.  The main parser routine, read_token, calculates the hash values
    of the identifiers it reads on the fly and stores them in token_hash.
    Therefore any changes to this routine should also be reflected in
    read_token.
*/

unsigned long hash
    PROTO_N ( ( s ) )
    PROTO_T ( string s )
{
    character c ;
    unsigned long h = 0 ;
    while ( c = *( s++ ), c != 0 ) {
        h = HASH_POWER * h + ( unsigned long ) c ;
    }
    return ( h % HASH_SIZE ) ;
}


/*
    TYPE NAME HASHING FUNCTION

    This routine calculates the hash value associated with the type t.
    This is used in the look-up for the conversion function identifier
    'operator t'.
*/

static unsigned long hash_type
    PROTO_N ( ( t ) )
    PROTO_T ( TYPE t )
{
    unsigned long h = 0 ;
    while ( !IS_NULL_type ( t ) ) {
        unsigned long sub = 0 ;
        unsigned long tag = ( unsigned long ) TAG_type ( t ) ;
        CV_SPEC qual = DEREF_cv ( type_qual ( t ) ) ;
        switch ( tag ) {
            case type_integer_tag : {
                INT_TYPE it = DEREF_itype ( type_integer_rep ( t ) ) ;
                if ( IS_itype_basic ( it ) ) {
                    BUILTIN_TYPE no = DEREF_ntype ( itype_basic_no ( it ) ) ;
                    sub = ( unsigned long ) no ;
                }
                t = NULL_type ;
                break ;
            }
            case type_floating_tag : {
                FLOAT_TYPE ft = DEREF_ftype ( type_floating_rep ( t ) ) ;
                if ( IS_ftype_basic ( ft ) ) {
                    BUILTIN_TYPE no = DEREF_ntype ( ftype_basic_no ( ft ) ) ;
                    sub = ( unsigned long ) no ;
                }
                t = NULL_type ;
                break ;
            }
            case type_ptr_tag :
            case type_ref_tag : {
                t = DEREF_type ( type_ptr_etc_sub ( t ) ) ;
                break ;
            }
            case type_ptr_mem_tag : {
                CLASS_TYPE ct = DEREF_ctype ( type_ptr_mem_of ( t ) ) ;
                IDENTIFIER cid = DEREF_id ( ctype_name ( ct ) ) ;
                HASHID cnm = DEREF_hashid ( id_name ( cid ) ) ;
                sub = DEREF_ulong ( hashid_hash ( cnm ) ) ;
                t = DEREF_type ( type_ptr_mem_sub ( t ) ) ;
                break ;
            }
            case type_func_tag : {
                LIST ( TYPE ) p = DEREF_list ( type_func_ptypes ( t ) ) ;
                sub = ( unsigned long ) LENGTH_list ( p ) ;
                t = DEREF_type ( type_func_ret ( t ) ) ;
                break ;
            }
            case type_array_tag : {
                t = DEREF_type ( type_array_sub ( t ) ) ;
                break ;
            }
            case type_bitfield_tag : {
                t = find_bitfield_type ( t ) ;
                break ;
            }
            case type_compound_tag : {
                CLASS_TYPE ct = DEREF_ctype ( type_compound_defn ( t ) ) ;
                IDENTIFIER cid = DEREF_id ( ctype_name ( ct ) ) ;
                HASHID cnm = DEREF_hashid ( id_name ( cid ) ) ;
                sub = DEREF_ulong ( hashid_hash ( cnm ) ) ;
                t = NULL_type ;
                break ;
            }
            case type_enumerate_tag : {
                ENUM_TYPE et = DEREF_etype ( type_enumerate_defn ( t ) ) ;
                IDENTIFIER eid = DEREF_id ( etype_name ( et ) ) ;
                HASHID enm = DEREF_hashid ( id_name ( eid ) ) ;
                sub = DEREF_ulong ( hashid_hash ( enm ) ) ;
                t = NULL_type ;
                break ;
            }
            default : {
                t = NULL_type ;
                break ;
            }
        }
        h += ( 64 * sub + 4 * tag + ( unsigned long ) qual ) ;
    }
    return ( h % HASH_TYPE_SIZE ) ;
}


/*
    INITIALISE A HASH TABLE ENTRY

    This routine initialises the hash table entry nm by creating a dummy
    identifier with lexical token value tok for it to point to.
*/

static void init_hashid
    PROTO_N ( ( nm, tok ) )
    PROTO_T ( HASHID nm X int tok )
{
    IDENTIFIER id ;
    MAKE_id_dummy ( nm, dspec_none, NULL_nspace, crt_loc, id ) ;
    COPY_ulong ( id_no ( id ), ( unsigned long ) tok ) ;
    COPY_id ( hashid_id ( nm ), id ) ;
    COPY_id ( hashid_cache ( nm ), id ) ;
    return ;
}


/*
    LOOK UP AN IDENTIFIER NAME IN THE HASH TABLE

    This routine looks up the identifier name s in the hash table, creating
    it if it does not already exist.  h gives the value of hash ( s ) (which
    gets checked by an assertion).  The argument tok is set to lex_unknown to
    indicate that the identifier has just been read by read_token, otherwise
    it gives the underlying default lexical token.
*/

HASHID lookup_name
    PROTO_N ( ( s, h, ext, tok ) )
    PROTO_T ( string s X unsigned long h X int ext X int tok )
{
    unsigned tag ;
    unsigned long len ;
    HASHID prev = NULL_hashid ;
    HASHID nm = hash_table [h] ;
    ASSERT ( h == hash ( s ) ) ;

    /* Search through existing entries */
    while ( !IS_NULL_hashid ( nm ) ) {
        string t = DEREF_string ( hashid_name_etc_text ( nm ) ) ;
        int c = ustrcmp ( t, s ) ;
        if ( c == 0 ) {
            /* Name matches */
            return ( nm ) ;
        }
        if ( c > 0 ) break ;
        prev = nm ;
        nm = DEREF_hashid ( hashid_next ( nm ) ) ;
    }

    /* Create new hash table entry */
    len = ( unsigned long ) ustrlen ( s ) ;
    if ( tok == lex_unknown ) {
        s = xustrncpy ( s, ( gen_size ) len ) ;
        tok = lex_identifier ;
    }
    tag = hashid_name_tag ;
    if ( ext ) {
        /* Check for extended identifiers */
        string t = s ;
        while ( t = ustrchr ( t, char_backslash ), t != NULL ) {
            t++ ;
            if ( *t == char_u ) {
                /* '\uxxxx' counts as one character */
                len -= 5 ;
            } else {
                /* '\Uxxxxxxxx' counts as one character */
                len -= 9 ;
            }
        }
        tag = hashid_ename_tag ;
    }
    MAKE_hashid_name_etc ( tag, nm, h, s, nm ) ;
    if ( IS_NULL_hashid ( prev ) ) {
        hash_table [h] = nm ;
    } else {
        COPY_hashid ( hashid_next ( prev ), nm ) ;
    }
    init_hashid ( nm, tok ) ;
    if ( len >= max_id_length ) {
        /* Check name length */
        IGNORE check_value ( OPT_VAL_name_limit, len, nm ) ;
    }
    return ( nm ) ;
}


/*
    CREATE A SPECIAL FUNCTION NAME

    This routine creates a constructor, destructor or conversion function
    name (as indicated by tag) for the non-class type t named id.
*/

static HASHID lookup_special
    PROTO_N ( ( t, id, tag ) )
    PROTO_T ( TYPE t X IDENTIFIER id X unsigned tag )
{
    unsigned long h = hash_type ( t ) ;
    HASHID prev = hash_type_table [h] ;
    HASHID nm = prev ;

    /* Search through existing entries */
    while ( !IS_NULL_hashid ( nm ) ) {
        if ( TAG_hashid ( nm ) == tag ) {
            TYPE s = DEREF_type ( hashid_constr_etc_type ( nm ) ) ;
            if ( eq_type ( s, t ) ) {
                COPY_id ( hashid_constr_etc_tid ( nm ), id ) ;
                return ( nm ) ;
            }
        }
        nm = DEREF_hashid ( hashid_next ( nm ) ) ;
    }

    /* Create new hash table entry */
    ASSERT ( h < HASH_SIZE ) ;
    MAKE_hashid_constr_etc ( tag, prev, h, t, id, nm ) ;
    init_hashid ( nm, lex_identifier ) ;
    hash_type_table [h] = nm ;
    return ( nm ) ;
}


/*
    CREATE THE CONSTRUCTOR FOR A TYPE

    This routine creates the hash table entry for the constructor of type t
    and name id.
*/

HASHID lookup_constr
    PROTO_N ( ( t, id ) )
    PROTO_T ( TYPE t X IDENTIFIER id )
{
    HASHID nm ;
    if ( IS_type_compound ( t ) ) {
        CLASS_TYPE ct = DEREF_ctype ( type_compound_defn ( t ) ) ;
        IDENTIFIER cid = DEREF_id ( ctype_constr ( ct ) ) ;
        if ( IS_NULL_id ( cid ) ) {
            /* Create class contructor */
            NAMESPACE ns = DEREF_nspace ( ctype_member ( ct ) ) ;
            MAKE_hashid_constr ( NULL_hashid, 0, t, id, nm ) ;
            init_hashid ( nm, lex_identifier ) ;
            cid = DEREF_id ( hashid_id ( nm ) ) ;
            COPY_nspace ( id_parent ( cid ), ns ) ;
            COPY_id ( ctype_constr ( ct ), cid ) ;
            IGNORE search_member ( ns, nm, 1 ) ;
        } else {
            nm = DEREF_hashid ( id_name ( cid ) ) ;
        }
    } else {
        nm = lookup_special ( t, id, hashid_constr_tag ) ;
    }
    return ( nm ) ;
}


/*
    CREATE THE DESTRUCTOR FOR A TYPE

    This routine creates the hash table entry for the destructor of type t
    and name id.
*/

HASHID lookup_destr
    PROTO_N ( ( t, id ) )
    PROTO_T ( TYPE t X IDENTIFIER id )
{
    HASHID nm ;
    if ( IS_type_compound ( t ) ) {
        CLASS_TYPE ct = DEREF_ctype ( type_compound_defn ( t ) ) ;
        IDENTIFIER cid = DEREF_id ( ctype_destr ( ct ) ) ;
        if ( IS_NULL_id ( cid ) ) {
            /* Create class destructor */
            NAMESPACE ns = DEREF_nspace ( ctype_member ( ct ) ) ;
            MAKE_hashid_destr ( NULL_hashid, 1, t, id, nm ) ;
            init_hashid ( nm, lex_identifier ) ;
            cid = DEREF_id ( hashid_id ( nm ) ) ;
            COPY_nspace ( id_parent ( cid ), ns ) ;
            COPY_id ( ctype_destr ( ct ), cid ) ;
            IGNORE search_member ( ns, nm, 1 ) ;
        } else {
            nm = DEREF_hashid ( id_name ( cid ) ) ;
        }
    } else {
        nm = lookup_special ( t, id, hashid_destr_tag ) ;
    }
    return ( nm ) ;
}


/*
    LOOK UP A CONVERSION FUNCTION NAME

    This routine returns the hash table entry for the conversion function
    corresponding to the type t.
*/

HASHID lookup_conv
    PROTO_N ( ( t ) )
    PROTO_T ( TYPE t )
{
    HASHID nm = lookup_special ( t, NULL_id, hashid_conv_tag ) ;
    return ( nm ) ;
}


/*
    OVERLOADED OPERATOR LOOK-UP TABLE

    This table gives the hash table entries for the overloaded operator
    function names, 'operator +' etc.  It gives a straight look-up depending
    on the lexical token number of the operator.  All ISO keyword and
    digraphs are represented in their primary form.
*/

HASHID *hash_ops_table ;


/*
    CREATE AN OPERATOR HASH TABLE ENTRY

    This routine creates a hash table entry for 'operator op' when op has
    lexical token number t.
*/

static HASHID make_op
    PROTO_N ( ( t ) )
    PROTO_T ( int t )
{
    HASHID nm ;
    unsigned long h = ( unsigned long ) t ;
    MAKE_hashid_op ( NULL_hashid, ( h % HASH_SIZE ), t, nm ) ;
    init_hashid ( nm, lex_identifier ) ;
    return ( nm ) ;
}


/*
    LOOK UP AN ANONYMOUS IDENTIFIER

    This routine creates a hash table entry for an anonymous identifier.
    Note that each anonymous identifier gives a distinct hash table entry.
*/

HASHID lookup_anon
    PROTO_Z ()
{
    HASHID nm ;
    static unsigned long anon_no = 0 ;
    unsigned long a = anon_no++ ;
    MAKE_hashid_anon ( NULL_hashid, ( a % HASH_SIZE ), a, nm ) ;
    init_hashid ( nm, lex_identifier ) ;
    return ( nm ) ;
}


/*
    EXPAND AN IDENTIFIER NAME

    This routine expands the identifier name nm.  For example, if t is
    a tokenised type defined to be int, then 'operator t' expands to
    'operator int'.  ct gives the expansion type for constructors and
    destructors.
*/

HASHID expand_name
    PROTO_N ( ( nm, ct ) )
    PROTO_T ( HASHID nm X CLASS_TYPE ct )
{
    switch ( TAG_hashid ( nm ) ) {
        case hashid_constr_tag : {
            /* Constructor names */
            if ( !IS_NULL_ctype ( ct ) ) {
                IDENTIFIER id = DEREF_id ( ctype_constr ( ct ) ) ;
                nm = DEREF_hashid ( id_name ( id ) ) ;
            } else {
                TYPE t = DEREF_type ( hashid_constr_type ( nm ) ) ;
                TYPE s = expand_type ( t, 1 ) ;
                if ( !EQ_type ( t, s ) ) {
                    nm = lookup_constr ( s, NULL_id ) ;
                }
            }
            break ;
        }
        case hashid_destr_tag : {
            /* Destructor names */
            if ( !IS_NULL_ctype ( ct ) ) {
                IDENTIFIER id = DEREF_id ( ctype_destr ( ct ) ) ;
                nm = DEREF_hashid ( id_name ( id ) ) ;
            } else {
                TYPE t = DEREF_type ( hashid_destr_type ( nm ) ) ;
                TYPE s = expand_type ( t, 1 ) ;
                if ( !EQ_type ( t, s ) ) {
                    nm = lookup_destr ( s, NULL_id ) ;
                }
            }
            break ;
        }
        case hashid_conv_tag : {
            /* Conversion function names */
            TYPE t = DEREF_type ( hashid_conv_type ( nm ) ) ;
            TYPE s = expand_type ( t, 1 ) ;
            if ( !EQ_type ( t, s ) ) {
                nm = lookup_conv ( s ) ;
            }
            break ;
        }
    }
    return ( nm ) ;
}


/*
    FIND NEXT VERSION OF AN EXPANDED IDENTIFIER NAME

    There is a complication in the expansion of conversion function names
    in that when types are identified more than one name may refer to the
    same type.  This routine finds the next such possible name returning
    the null identifier name to indicate that there are no more.
*/

HASHID next_expand_name
    PROTO_N ( ( nm ) )
    PROTO_T ( HASHID nm )
{
    if ( IS_hashid_conv ( nm ) ) {
        int started = 0 ;
        TYPE t = DEREF_type ( hashid_conv_type ( nm ) ) ;
        unsigned long h = hash_type ( t ) ;
        HASHID pnm = hash_type_table [h] ;
        while ( !IS_NULL_hashid ( pnm ) ) {
            if ( EQ_hashid ( pnm, nm ) ) {
                started = 1 ;
            } else if ( started && IS_hashid_conv ( pnm ) ) {
                TYPE s = DEREF_type ( hashid_conv_type ( pnm ) ) ;
                if ( eq_type ( s, t ) ) return ( pnm ) ;
            }
            pnm = DEREF_hashid ( hashid_next ( pnm ) ) ;
        }
    }
    return ( NULL_hashid ) ;
}


/*
    FIND A HASH IDENTIFIER NUMBER

    This routine finds the lexical token number associated with the hash
    identifier nm.  For a keyword, whether active or not, this is the
    associated value from syntax.h, otherwise it is lex_identifier.
*/

int find_hashid
    PROTO_N ( ( nm ) )
    PROTO_T ( HASHID nm )
{
    unsigned long lex ;
    IDENTIFIER id = DEREF_id ( hashid_id ( nm ) ) ;
    while ( !IS_id_dummy ( id ) ) {
        /* Scan to last hidden value */
        id = DEREF_id ( id_alias ( id ) ) ;
    }
    lex = DEREF_ulong ( id_no ( id ) ) ;
    return ( ( int ) lex ) ;
}


/*
    FIND AN UNDERLYING IDENTIFIER

    This routine finds the dummy identifier underlying id.
*/

IDENTIFIER underlying_id
    PROTO_N ( ( id ) )
    PROTO_T ( IDENTIFIER id )
{
    HASHID nm = DEREF_hashid ( id_name ( id ) ) ;
    id = DEREF_id ( hashid_id ( nm ) ) ;
    while ( !IS_id_dummy ( id ) ) {
        /* Scan to last hidden value */
        id = DEREF_id ( id_alias ( id ) ) ;
    }
    return ( id ) ;
}


/*
    SET THE LOCATION OF A COMPLEX IDENTIFIER

    The precise location of the last use of a hash identifier is stored
    in the loc field of its associated dummy identifier.  For simple
    identifiers this is set in read_token, however for more complex
    cases this routine sets the location of id to be the location of pid.
*/

void set_hashid_loc
    PROTO_N ( ( id, pid ) )
    PROTO_T ( IDENTIFIER id X IDENTIFIER pid )
{
    if ( !IS_NULL_id ( pid ) ) {
        LOCATION loc ;
        if ( !IS_id_dummy ( id ) ) id = underlying_id ( id ) ;
        if ( !IS_id_dummy ( pid ) ) pid = underlying_id ( pid ) ;
        DEREF_loc ( id_loc ( pid ), loc ) ;
        COPY_loc ( id_loc ( id ), loc ) ;
    }
    return ;
}


/*
    MODIFY AN IDENTIFIER NAME

    This routine modifies the name of the identifier id by adding a prime
    to it.  This is intended primarily for debugging purposes.
*/

void prime_name
    PROTO_N ( ( id ) )
    PROTO_T ( IDENTIFIER id )
{
    if ( !IS_NULL_id ( id ) ) {
        HASHID nm = DEREF_hashid ( id_name ( id ) ) ;
        if ( IS_hashid_name ( nm ) ) {
            string s = DEREF_string ( hashid_name_text ( nm ) ) ;
            s = xustrcat ( s, ustrlit ( "'" ) ) ;
            nm = lookup_name ( s, hash ( s ), 0, lex_identifier ) ;
        }
        COPY_hashid ( id_name ( id ), nm ) ;
    }
    return ;
}


/*
    KEYWORD HASH TABLE ENTRIES

    The table hash_keyword gives the hash table entries for the keywords.
    These are numbered from LAST_KEYWORD to FIRST_KEYWORD.  The array
    should be accessed through the macro KEYWORD defined in hash.h, which
    includes the appropriate offset.
*/

HASHID hash_keyword [ LAST_KEYWORD - FIRST_KEYWORD + 1 ] ;
IDENTIFIER underlying_op = NULL_id ;


/*
    INITIALISE THE HASH TABLE

    This routine allocates space for the hash table and sets all its entries
    to NULL.  It also sets up the operator look-up table.
*/

void init_hash
    PROTO_Z ()
{
    int n ;
    unsigned long i ;

    /* Set up identifier hash table */
    hash_table = xmalloc_nof ( HASHID, HASH_SIZE ) ;
    for ( i = 0 ; i < HASH_SIZE ; i++ ) {
        hash_table [i] = NULL_hashid ;
    }

    /* Set up type hash table */
    hash_type_table = xmalloc_nof ( HASHID, HASH_TYPE_SIZE ) ;
    for ( i = 0 ; i < HASH_TYPE_SIZE ; i++ ) {
        hash_type_table [i] = NULL_hashid ;
    }

    /* Set up operator look-up table */
    hash_ops_table = xmalloc_nof ( HASHID, LAST_TOKEN + 1 ) ;
    for ( n = 0 ; n <= LAST_TOKEN ; n++ ) {
        hash_ops_table [n] = NULL_hashid ;
    }

    /* Allocate hash table entries for all symbols */
    for ( n = FIRST_C_SYMBOL ; n <= LAST_C_SYMBOL ; n++ ) {
        hash_ops_table [n] = make_op ( n ) ;
    }
    for ( n = FIRST_CPP_SYMBOL ; n <= LAST_CPP_SYMBOL ; n++ ) {
        hash_ops_table [n] = make_op ( n ) ;
    }
    for ( n = FIRST_EXTRA_SYMBOL ; n <= LAST_EXTRA_SYMBOL ; n++ ) {
        hash_ops_table [n] = make_op ( n ) ;
    }
    hash_ops_table [ lex_array_Hop ] = make_op ( lex_array_Hop ) ;
    hash_ops_table [ lex_cond_Hop ] = make_op ( lex_cond_Hop ) ;
    hash_ops_table [ lex_delete ] = make_op ( lex_delete ) ;
    hash_ops_table [ lex_delete_Harray ] = make_op ( lex_delete_Harray ) ;
    hash_ops_table [ lex_func_Hop ] = make_op ( lex_func_Hop ) ;
    hash_ops_table [ lex_new ] = make_op ( lex_new ) ;
    hash_ops_table [ lex_new_Harray ] = make_op ( lex_new_Harray ) ;
    hash_ops_table [ lex_alignof ] = make_op ( lex_alignof ) ;
    hash_ops_table [ lex_sizeof ] = make_op ( lex_sizeof ) ;
    hash_ops_table [ lex_typeid ] = make_op ( lex_typeid ) ;
    hash_ops_table [ lex_vtable ] = make_op ( lex_vtable ) ;

    /* Map secondary representations to primary representations */
    for ( n = FIRST_DIGRAPH ; n <= LAST_DIGRAPH ; n++ ) {
        int m = primary_form ( n ) ;
        hash_ops_table [n] = hash_ops_table [m] ;
    }
    for ( n = FIRST_ISO_KEYWORD ; n <= LAST_ISO_KEYWORD ; n++ ) {
        int m = primary_form ( n ) ;
        hash_ops_table [n] = hash_ops_table [m] ;
    }

    /* This is necessary for the definition of KEYWORD */
    ASSERT ( FIRST_KEYWORD == lex_auto ) ;
    return ;
}
Subversion Repositories tendra.SVN

(root)/branches/tendra4/src/producers/common/parse/hash.c – Rev 2