WebSVN – tendra.SVN – /branches/algol60/src/tools/tspec/lex.c

/*
 * Copyright (c) 2002-2005 The TenDRA Project <http://www.tendra.org/>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of The TenDRA Project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific, prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $Id$
 */
/*
                 Crown Copyright (c) 1997

    This TenDRA(r) Computer Program is subject to Copyright
    owned by the United Kingdom Secretary of State for Defence
    acting through the Defence Evaluation and Research Agency
    (DERA).  It is made available to Recipients with a
    royalty-free licence for its use, reproduction, transfer
    to other parties and amendment for any purpose not excluding
    product development provided that any such use et cetera
    shall be deemed to be acceptance of the following conditions:-

        (1) Its Recipients shall ensure that this Notice is
        reproduced upon any copies or amended versions of it;

        (2) Any amended version of it shall be clearly marked to
        show both the nature of and the organisation responsible
        for the relevant amendment or amendments;

        (3) Its onward transfer from a recipient to another
        party shall be deemed to be that party's acceptance of
        these conditions;

        (4) DERA gives no warranty or assurance as to its
        quality or suitability for any purpose and DERA accepts
        no liability whatsoever in relation to any use to which
        it may be put.
*/


#include "config.h"
#include "object.h"
#include "hash.h"
#include "lex.h"
#include "name.h"
#include "syntax.h"
#include "type.h"
#include "utility.h"


/*
    CREATE A KEYWORD

    This routine creates a keyword nm with lexical token value t.
*/

static void
make_keyword(char *nm, int t)
{
    object *p = make_object(nm, OBJ_KEYWORD);
    p->u.u_num = t;
    IGNORE add_hash(keywords, p, no_version);
    return;
}


/*
    INITIALISE KEYWORDS

    This routine initialises the hash table of keywords.
*/

void
init_keywords(void)
{
#define MAKE_KEYWORD(NAME, LEX)\
    make_keyword(NAME, LEX)
#include "keyword.h"
    return;
}


/*
    CURRENT LEXICAL TOKEN

    These variables are used to store the value of the current lexical
    token.
*/

int crt_lex_token = lex_unknown;
int saved_lex_token = lex_unknown;
char *token_value = null;


/*
    INPUT FILE

    These variable input_file gives the file from which the input is read.
    The input_pending variable is used to unread one character.
*/

FILE *input_file;
int input_pending = LEX_EOF;


/*
    READ A CHARACTER FROM THE INPUT FILE

    This routine reads the next character from the input file.
*/

static int
read_char(void)
{
    int c = input_pending;
    if (c == LEX_EOF) {
        c = fgetc(input_file);
        if (c == '\n')line_no++;
        if (c == EOF) return(LEX_EOF);
        c &= 0xff;
    } else {
        input_pending = LEX_EOF;
    }
    return(c);
}


/*
    MAPPINGS OF LEXICAL ANALYSER ROUTINES

    These macros give the mappings from the lexical analyser to the
    routines defined in this module.
*/

static int read_identifier(int, int, int);
static int read_number(int, int);
static int read_string(int);
static int read_insert(int);
static int read_c_comment(int);
static int read_comment(int);

#define unread_char(A)  input_pending = (A)
#define get_global(A)           read_identifier(0,(A), 0)
#define get_local(A, B) read_identifier((A), (B), 0)
#define get_command(A, B)       read_identifier((A), (B), 0)
#define get_variable(A, B)      read_identifier((A), (B), 0)
#define get_number(A)           read_number((A), 0)
#define get_string(A)           read_string(0)
#define get_comment(A)  read_comment(0)
#define get_c_comment(A, B)     read_c_comment(0)
#define get_text(A, B)  read_insert(0)
#define unknown_token(A)        lex_unknown


/*
    INCLUDE THE LEXICAL ANALYSER

    The automatically generated lexical analyser is included at this
    point.  It defines the routine read_token which reads the next
    lexical token from the input file.
*/

#include "lexer.h"


/*
    READ AN IDENTIFIER NAME

    This routine reads an identifier name from the input file.  It is
    entered after the first character, b, has been read.  a gives the
    identifier prefix, '+' for commands, '$' for variables, '~' for
    local identifiers, and 0 for normal identifiers.
*/

static int
read_identifier(int a, int b, int pp)
{
    int c;
    object *p;
    int i = 0;
    char *s = buffer;
    if (a)s [ i++ ] = (char)a;
    s [ i++ ] = (char)b;
    for (; ;) {
        c = read_char();
        if (!is_alphanum(lookup_char(c)))break;
        s [i] = (char)c;
        if (++i >= buffsize) {
            error(ERR_SERIOUS, "Identifier too long");
            i = 1;
        }
    }
    unread_char(c);
    s [i] = 0;
    p = search_hash(keywords, s, no_version);
    if (p) return(p->u.u_num);
    token_value = s;
    if (a == 0) {
        if (!pp)token_value = string_copy(s);
        return(lex_name);
    }
    if (a == '$') {
        if (!pp)token_value = string_copy(s);
        return(lex_variable);
    }
    if (a == '+') {
        /* Commands */
        if (!pp)token_value = string_copy(s);
        error(ERR_SERIOUS, "Unknown command, '%s'", s);
        return(lex_name);
    }
    token_value = string_concat(HIDDEN_NAME, s + 1);
    return(lex_name);
}


/*
    READ A NUMBER

    This routine reads a number from the input file.  It is entered after
    the initial character, a, has been read.
*/

static int
read_number(int a, int pp)
{
    int c;
    int i = 0;
    char *s = buffer;
    s [ i++ ] = (char)a;
    for (; ;) {
        c = read_char();
        if (!is_digit(lookup_char(c)))break;
        s [i] = (char)c;
        if (++i >= buffsize) {
            error(ERR_SERIOUS, "Number too long");
            i = 0;
        }
    }
    unread_char(c);
    s [i] = 0;
    if (pp) {
        token_value = s;
    } else {
        token_value = string_copy(s);
    }
    return(lex_number);
}


/*
    READ A STRING

    This routine reads a string from the input file.  It is entered after
    the initial quote has been read.
*/

static int
read_string(int pp)
{
    int c;
    int i = 0;
    char *s = buffer;
    for (; ;) {
        c = read_char();
        if (c == '"') {
            /* End of string */
            break;
        } else if (c == '\\') {
            /* Deal with escaped characters */
            c = read_char();
            if (c == '\n' || c == LEX_EOF)goto new_line;
            if (pp) {
                /* Preserve escapes when preprocessing */
                s [i] = '\\';
                i++;
            } else {
                /* Examine escape sequence */
                switch (c) {
                    case 'n': c = '\n'; break;
                    case 'r': c = '\r'; break;
                    case 't': c = '\t'; break;
                }
            }
        } else if (c == '\n' || c == LEX_EOF) {
            /* Deal with new lines */
            new_line : {
                error(ERR_SERIOUS, "New line in string");
                s [i] = 0;
                return(lex_string);
            }
        }
        s [i] = (char)c;
        if (++i >= buffsize) {
            error(ERR_SERIOUS, "String too long");
            i = 0;
        }
    }
    s [i] = 0;
    if (pp) {
        token_value = s;
    } else {
        token_value = string_copy(s);
    }
    return(lex_string);
}


/*
    READ A SECTION OF QUOTED TEXT

    This routine reads a section of quoted text (indicated by enclosure
    in a number of percent signs) into the buffer.  On entry two percents
    have already been read.  Firstly any further percents are read, then
    the text is read until an equal number of percents are encountered.
    Any leading or trailing whitespace is ignored if pp is false.
*/

static int
read_insert(int pp)
{
    int c;
    int i = 0;
    int p = 0;
    int percents = 2;
    char *s = buffer;
    while (c = read_char(), c == '%')percents++;
    unread_char(c);
    if (pp) {
        /* Preserve percents when preprocessing */
        if (percents < buffsize) {
            for (i = 0; i < percents; i++)s [i] = '%';
        } else {
            error(ERR_SERIOUS, "Insert too long");
        }
    }
    do {
        c = read_char();
        if (c == '%') {
            p++;
        } else {
            if (c == LEX_EOF) {
                error(ERR_SERIOUS, "End of file in quoted text");
                return(lex_eof);
            }
            p = 0;
        }
        s [i] = (char)c;
        if (++i >= buffsize) {
            error(ERR_SERIOUS, "Insert too long");
            i = 0;
        }
    } while (p != percents);
    if (pp) {
        /* Preserve percents when preprocessing */
        s [i] = 0;
        token_value = s;
    } else {
        /* Strip out initial and final white space */
        if (i >= p)i -= p;
        s [i] = 0;
        while (--i >= 0) {
            int a = (int)s [i];
            int t = lookup_char(a & 0xff);
            if (!is_white(t))break;
            s [i] = 0;
        }
        i = 0;
        for (; ;) {
            int a = (int)s [i];
            int t = lookup_char(a & 0xff);
            if (!is_white(t))break;
            i++;
        }
        token_value = string_copy(s + i);
    }
    return(percents % 2 ? lex_build_Hinsert : lex_insert);
}


/*
    READ A C COMMENT

    This routine reads a C-style comment into the buffer.  The routine is
    entered just after the initial / * has been read, and continues until
    the corresponding * /.
*/

static int
read_c_comment(int pp)
{
    int c;
    int i = 2;
    int p = 0;
    char *s = buffer;
    s [0] = '/';
    s [1] = '*';
    do {
        c = read_char();
        if (c == '*' && p == 0) {
            p = 1;
        } else if (c == '/' && p == 1) {
            p = 2;
        } else {
            p = 0;
        }
        if (c == LEX_EOF) {
            error(ERR_SERIOUS, "End of file in comment");
            return(lex_eof);
        }
        s [i] = (char)c;
        if (++i >= buffsize) {
            error(ERR_SERIOUS, "Comment too long");
            i = 2;
        }
    } while (p != 2);
    s [i] = 0;
    if (pp) {
        token_value = s;
    } else {
        token_value = string_copy(s);
    }
    return(lex_comment);
}


/*
    READ A TSPEC COMMENT

    This routine steps over a tspec comment.  It is entered after the
    initial '#' has been read and skips to the end of the line.  If pp
    is false then the next token is returned.
*/

static int
read_comment(int pp)
{
    int c;
    while (c = read_char(), c != '\n') {
        if (c == LEX_EOF) {
            error(ERR_SERIOUS, "End of file in comment");
            return(lex_eof);
        }
    }
    if (pp) return(lex_unknown);
    return(read_token());
}


/*
    READ A PREPROCESSING TOKEN

    This routine is a stripped down version of read_token which is used
    in preprocessing.  Initial white space is skipped if w is true.
    The token read is always stored in the buffer.
*/

static int
read_pptoken(int w)
{
    int c;
    int t = lex_unknown;
    do {
        c = read_char();
    } while (w && is_white(lookup_char(c)));
    switch (c) {
        case '"': {
            return(read_string(1));
        }
        case '#': {
            IGNORE read_comment(1);
            if (w) return(read_pptoken(w));
            c = '\n';
            break;
        }
        case '%': {
            int a = read_char();
            if (a == '%') return(read_insert(1));
            unread_char(a);
            break;
        }
        case '+': {
            int a = read_char();
            if (is_alpha(lookup_char(a))) {
                return(read_identifier(c, a, 1));
            }
            unread_char(a);
            break;
        }
        case '/': {
            int a = read_char();
            if (a == '*') return(read_c_comment(1));
            unread_char(a);
            break;
        }
        case ':': {
            int a = read_char();
            if (a == '=') {
                buffer [0] = (char)c;
                buffer [1] = (char)a;
                buffer [2] = 0;
                return(lex_assign);
            }
            unread_char(a);
            break;
        }
        case '(': t = lex_open_Hround; break;
        case ')': t = lex_close_Hround; break;
        case '{': t = lex_open_Hbrace; break;
        case '}': t = lex_close_Hbrace; break;
        case ';': t = lex_semicolon; break;
        case ',': t = lex_comma; break;
        case LEX_EOF: t = lex_eof; break;
    }
    buffer [0] = (char)c;
    buffer [1] = 0;
    return(t);
}


/*
    READ A STRING

    This routine reads a string plus one other character from the input
    file, storing the string in str and returning the other character.
    b is set to true if the string is enclosed in brackets.
*/

static int
read_pp_string(char **str, int *b)
{
    int c = read_pptoken(1);
    if (c == lex_open_Hround) {
        *b = 1;
        c = read_pptoken(1);
    }
    if (c != lex_string) {
        error(ERR_SERIOUS, "Syntax error - string expected");
        *str = "???";
        return(c);
    }
    *str = string_copy(buffer);
    c = read_pptoken(1);
    if (*b) {
        if (c != lex_close_Hround) {
            error(ERR_SERIOUS, "Syntax error - ')' expected");
        }
        c = read_pptoken(1);
    }
    return(c);
}


/*
    PRINT A SUBSET NAME

    This routine prints the command cmd "api", "file", "subset" to the
    file output.
*/

static void
print_subset_name(FILE *output, char *cmd, char *api, char *file,
                  char *subset, int b)
{
    if (b) {
        IGNORE fprintf(output, "%s ( \"%s\" )", cmd, api);
    } else {
        IGNORE fprintf(output, "%s \"%s\"", cmd, api);
    }
    if (file)IGNORE fprintf(output, ", \"%s\"", file);
    if (subset) {
        if (file == null)IGNORE fputs(", \"\"", output);
        IGNORE fprintf(output, ", \"%s\"", subset);
    }
    return;
}


/*
    PRINT THE CURRENT FILE POSITION

    This routine prints file name and line number directives to the file
    output.
*/

static void
print_posn(FILE *output)
{
    static char *last_filename = "";
    if (!streq(filename, last_filename)) {
        IGNORE fprintf(output, "$FILE = \"%s\" ;\n", filename);
        last_filename = filename;
    }
    IGNORE fprintf(output, "$LINE = %d ;\n", line_no - 1);
    return;
}


/*
    PREPROCESS A SUBFILE

    This routine reads a +IMPLEMENT or +USE directive (indicated by n)
    from the input file to output.
*/

static void
preproc_subfile(FILE *output, char *cmd)
{
    int c;
    int txt;
    int b = 0;
    char *api = null;
    char *file = null;
    char *subset = null;
    c = read_pp_string(&api, &b);
    if (c == lex_comma) {
        int d = 0;
        c = read_pp_string(&file, &d);
        if (d) {
            error(ERR_SERIOUS, "Illegally bracketed string");
            d = 0;
        }
        if (c == lex_comma) {
            c = read_pp_string(&subset, &d);
            if (d)error(ERR_SERIOUS, "Illegally bracketed string");
        }
        if (*file == 0)file = null;
    }
    if (c == lex_semicolon) {
        txt = ';';
    } else if (c == lex_open_Hround) {
        txt = '(';
    } else {
        error(ERR_SERIOUS, "Syntax error - ';' or '(' expected");
        txt = ';';
    }
    preproc(output, api, file, subset);
    print_posn(output);
    print_subset_name(output, cmd, api, file, subset, b);
    IGNORE fputc(' ', output);
    IGNORE fputc(txt, output);
    return;
}


/*
    PREPROCESS A FILE

    This routine preprocesses the subset api:file:subset into output.
*/

void
preproc(FILE *output, char *api, char *file, char *subset)
{
    int c;
    char *s;
    object *p;
    char *sn, *nm;
    FILE *old_file;
    int old_pending;
    int old_line_no;
    char *old_filename;
    boolean found = 0;
    int brackets = 0;
    int end_brackets = 0;
    int if_depth = 0;
    int else_depth = 0;
    FILE *input = null;
    boolean printing = (boolean)(subset ? 0 : 1);

    /* Check for previous inclusion */
    sn = subset_name(api, file, subset);
    p = search_hash(subsets, sn, no_version);
    if (p != null) {
        if (p->u.u_info == null) {
            error(ERR_SERIOUS, "Recursive inclusion of '%s'", sn);
        } else if (p->u.u_info->implemented) {
            error(ERR_SERIOUS, "Set '%s' not found", sn);
        }
        return;
    }

    /* Open the input file */
    nm = (file ? file : MASTER_FILE);
    if (!streq(api, LOCAL_API)) {
        nm = string_printf("%s/%s", api, nm);
    }
    s = input_dir;
    while (s) {
        char *t = strchr(s, ':');
        if (t == null) {
           IGNORE sprintf(buffer, "%s/%s", s, nm);
           s = null;
        } else {
           IGNORE strcpy(buffer, s);
           IGNORE sprintf(buffer + (t - s), "/%s", nm);
           s = t + 1;
        }
        input = fopen(buffer, "r");
        if (input) {
            nm = string_copy(buffer);
            break;
        }
    }
    if (input == null) {
        input = fopen(nm, "r");
        if (input == null) {
            char *err = "Set '%s' not found (can't find file %s)";
            error(ERR_SERIOUS, err, sn, nm);
            p = make_object(sn, OBJ_SUBSET);
            IGNORE add_hash(subsets, p, no_version);
            p->u.u_info = make_info(api, file, subset);
            p->u.u_info->implemented = 1;
            return;
        }
    }
    if (verbose > 1) {
        if (subset) {
            IGNORE printf("Preprocessing %s [%s] ...\n", nm, subset);
        } else {
            IGNORE printf("Preprocessing %s ...\n", nm);
        }
    }
    old_filename = filename;
    old_line_no = line_no;
    old_file = input_file;
    old_pending = input_pending;
    filename = nm;
    line_no = 1;
    input_file = input;
    input_pending = LEX_EOF;
    p = make_object(sn, OBJ_SUBSET);
    IGNORE add_hash(subsets, p, no_version);

    /* Print position identifier */
    print_subset_name(output, "+SET", api, file, subset, 0);
    IGNORE fputs(" := {\n", output);
    if (printing)print_posn(output);

    /* Process the input */
    while (c = read_pptoken(0), c != lex_eof) {
        switch (c) {

            case lex_subset: {
                /* Deal with subsets */
                int d = 0;
                c = read_pp_string(&s, &d);
                if (d)error(ERR_SERIOUS, "Illegally bracketed string");
                if (c != lex_assign) {
                    error(ERR_SERIOUS, "Syntax error - ':=' expected");
                }
                c = read_pptoken(1);
                if (c != lex_open_Hbrace) {
                    error(ERR_SERIOUS, "Syntax error - '{' expected");
                }
                brackets++;
                if (printing) {
                    int b = brackets;
                    char *cmd = "+IMPLEMENT";
                    preproc(output, api, file, s);
                    print_subset_name(output, cmd, api, file, s, 0);
                    IGNORE fputs(" ;\n", output);
                    do {
                        c = read_pptoken(0);
                        if (c == lex_open_Hbrace) {
                            brackets++;
                        } else if (c == lex_close_Hbrace) {
                            brackets--;
                        } else if (c == lex_eof) {
                            char *err = "Can't find end of subset '%s'";
                            error(ERR_SERIOUS, err, s);
                            goto end_of_file;
                        }
                    } while (brackets >= b);
                    c = read_pptoken(1);
                    if (c != lex_semicolon) {
                        error(ERR_SERIOUS, "Syntax error - ';' expected");
                    }
                    print_posn(output);
                } else {
                    if (streq(s, subset)) {
                        if (found) {
                            char *err = "Set '%s' already defined (line %d)";
                            error(ERR_SERIOUS, err, sn, p->line_no);
                        } else {
                            found = 1;
                            printing = 1;
                            print_posn(output);
                            p->line_no = line_no;
                            end_brackets = brackets;
                        }
                    }
                }
                break;
            }

            case lex_implement: {
                /* Deal with subset uses */
                if (printing)preproc_subfile(output, "+IMPLEMENT");
                break;
            }

            case lex_use: {
                /* Deal with subset uses */
                if (printing)preproc_subfile(output, "+USE");
                break;
            }

            case lex_set: {
                /* Deal with sets */
                error(ERR_SERIOUS, "+SET directive in preprocessor");
                goto default_lab;
            }

            case lex_if:
            case lex_ifdef:
            case lex_ifndef: {
                if_depth++;
                else_depth = 0;
                goto default_lab;
            }

            case lex_else: {
                if (if_depth == 0) {
                    error(ERR_SERIOUS, "+ELSE without +IF");
                } else {
                    if (else_depth) {
                        error(ERR_SERIOUS, "Duplicate +ELSE");
                    }
                    else_depth = 1;
                }
                goto default_lab;
            }

            case lex_endif: {
                if (if_depth == 0) {
                    error(ERR_SERIOUS, "+ENDIF without +IF");
                } else {
                    if_depth--;
                }
                else_depth = 0;
                goto default_lab;
            }

            case lex_string: {
                /* Deal with strings */
                if (printing) {
                    IGNORE fprintf(output, "\"%s\"", buffer);
                }
                break;
            }

            case lex_open_Hbrace: {
                /* Start of subset */
                brackets++;
                goto default_lab;
            }

            case lex_close_Hbrace: {
                /* End of subset */
                brackets--;
                if (brackets < 0) {
                    error(ERR_SERIOUS, "Unmatched '}'");
                    brackets = 0;
                }
                if (subset && brackets < end_brackets) {
                    printing = 0;
                }
                goto default_lab;
            }

            default :
            default_lab : {
                /* Deal with simple tokens */
                if (printing)IGNORE fputs(buffer, output);
                break;
            }
        }
    }

    /* End of file */
    end_of_file : {
        if (brackets) {
            error(ERR_SERIOUS, "Bracket imbalance of %d", brackets);
        }
        while (if_depth) {
            error(ERR_SERIOUS, "+IF without +ENDIF");
            if_depth--;
        }
        IGNORE fputs("} ;\n", output);
        IGNORE fclose(input);
        p->u.u_info = make_info(api, file, subset);
        filename = old_filename;
        line_no = old_line_no;
        input_file = old_file;
        input_pending = old_pending;
        if (subset && !found) {
            char *err = "Set '%s' not found (can't find subset '%s')";
            error(ERR_SERIOUS, err, sn, subset);
            p->u.u_info->implemented = 1;
        }
        return;
    }
}
Subversion Repositories tendra.SVN

(root)/branches/algol60/src/tools/tspec/lex.c – Rev 7