WebSVN – planix.SVN – /ports/trunk/editors/exvi/libuxre/regnfa.c

/*
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
 *
 * Sccsid @(#)regnfa.c  1.8 (gritter) 2/6/05
 */
/*  UNIX(R) Regular Expresssion Library
 *
 *  Note: Code is released under the GNU LGPL
 *
 *  Copyright (C) 2001 Caldera International, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to:
 *        Free Software Foundation, Inc.
 *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*      #include "synonyms.h"   */
#include <string.h>
#include <stdlib.h>
#include "re.h"
#include <stddef.h>
#include <ctype.h>

typedef unsigned char   Uchar;
typedef unsigned short  Ushort;

/*
* Nondeterministic Finite Automata.
*/
typedef struct t_graph  Graph;
struct t_graph
{
        union
        {
                Graph   *ptr;
                Info    info;
        } alt;
        Graph           *next;
        w_type          op;
};

typedef struct t_stack  Stack;
struct t_stack
{
        Stack           *link;  /* simplifies cleanup */
        Stack           *prev;  /* covered states */
        Graph           *wasgp; /* node associated with this state */
        const Uchar     *str;   /* saved position in the string */
        Ushort          cnt;    /* ROP_BRACE: traversal count */
};

        /*
        * A Context holds all the information needed for each
        * potential path through the NFA graph.
        */
typedef struct t_ctxt   Context;
struct t_ctxt
{
        Context         *link;  /* simplifies cleanup */
        Context         *next;  /* singly linked */
        Stack           *sp;    /* nested counts */
        Graph           *gp;    /* starting node */
        Graph           *wasgp; /* node associated with this state */
        const Uchar     *str;   /* saved position in the string */
        Ushort          cnt;    /* ROP_BRACE: traversal count */
        size_t          nset;   /* length of rm[] that is currently set */
        regmatch_t      rm[1];  /* enough to cover re_nsub+1 (np->rmlen) */
};

struct re_nfa_ /*Nfa*/
{
        Graph   *gp;    /* entire NFA */
        Stack   *sp;    /* unused Stacks */
        Stack   *allsp; /* linked Stacks (for cleanup) */
        Context *allcp; /* linked Contexts (for cleanup) */
        Context *cur;   /* Contexts to be continued now */
        Context *step;  /* Contexts waiting for a step of the NFA */
        Context *avail; /* unused Contexts */
        Context **ecur; /* ends cur list of Contexts */
        Context **estp; /* ends step list of Contexts */
        size_t  rmlen;  /* length of rm[] in each Context */
        size_t  rmmin;  /* minimum length needed */
        size_t  used;   /* length used for this libuxre_regnfaexec() */
        w_type  beg;    /* nonzero for fixed char initial node NFAs */
};

#define ROP_MTOR        ROP_CAT /* ROP_OR, except might be empty loop */

        /*
        * Depth first traversal.
        * Make a singly linked list (in alt.ptr) of the graph's nodes.
        * Must toss any ROP_BKTs, too, since "alt" is overwritten.
        */
static void
deltolist(Graph *gp, Graph **list)
{
        Graph *ptr;

        if ((ptr = gp->next) != 0) /* first time */
        {
                gp->next = 0;
                if (gp->op == ROP_OR || gp->op == ROP_MTOR)
                        deltolist(gp->alt.ptr, list);
                deltolist(ptr, list);
                if (gp->op == ROP_BKT)
                {
                        libuxre_bktfree(gp->alt.info.bkt);
                        free(gp->alt.info.bkt);
                }
        }
        else if (gp->op == ROP_END)
                gp->op = ROP_NOP;
        else
                return;
        gp->alt.ptr = *list;
        *list = gp;
}

        /*
        * After the list is turned into a linked list,
        * walk that list freeing the nodes.
        */
static void
delgraph(Graph *gp)
{
        Graph *gp2, end;

        gp2 = &end;
        deltolist(gp, &gp2);
        while ((gp = gp2) != &end)
        {
                gp2 = gp->alt.ptr;
                free(gp);
        }
}

        /*
        * Depth first traversal.
        * Look for ROP_NOPs and prune them from the graph.
        * Chain them all together on *nop's list.
        */
static Graph *
nopskip(Graph *gp, Graph **nop)
{
        Graph *ptr;

        if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */
        {
                if (gp->op == ROP_NOP)
                {
                        if (gp->alt.ptr != 0) /* touched */
                                return gp->next; /* already did it */
                        gp->alt.ptr = *nop;
                        *nop = gp;
                }
                gp->next = 0; /* this subgraph's pending */
                if (gp->op == ROP_OR || gp->op == ROP_MTOR)
                        gp->alt.ptr = nopskip(gp->alt.ptr, nop);
                gp->next = nopskip(ptr, nop);
                if (gp->op == ROP_NOP)
                        return gp->next;
        }
        return gp;
}

        /*
        * Postorder traversal of the parse tree.
        * Build a graph using "Thompson's" algorithm.
        * The only significant modification is the
        * ROP_BRACE->ROP_MTOR construction.
        * Returns 1 => graph might match empty
        *         0 => graph cannot match empty
        *        -1 => error (in allocation)
        */
static int
mkgraph(Tree *tp, Graph **first, Graph **last)
{
        Graph *new = 0, *nop, *lf, *ll, *rf, *rl;
        int lmt, rmt = 0;

        if (tp->op != ROP_CAT)
        {
                if ((new = malloc(sizeof(Graph))) == 0)
                        return 0;
                new->op = tp->op;       /* usually */
        }
        switch (tp->op)
        {
        case ROP_REF:
                new->alt.info.sub = tp->right.info.sub;
                *first = new;
                *last = new;
                return 1; /* safe--can't really tell */
        case ROP_BKT:
                tp->op = ROP_BKTCOPY;   /* now graph owns clean up */
                /*FALLTHROUGH*/
        case ROP_BKTCOPY:
                new->alt.info.bkt = tp->right.info.bkt;
                /*FALLTHROUGH*/
        default:
                *first = new;
                *last = new;
                return 0;
        case ROP_EMPTY:
                new->op = ROP_NOP;
                new->alt.ptr = 0;       /* untouched */
                *first = new;
                *last = new;
                return 1;
        case ROP_OR:
        case ROP_CAT:
                lf = 0; /* in case of error */
                if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0)
                        goto err;
                /*FALLTHROUGH*/
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_QUEST:
        case ROP_BRACE:
        case ROP_LP:
                if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0)
                        goto err;
                break;
        }
        /*
        * Note that ROP_NOP only serves as the node that reconnects
        * the two choices of an incoming ROP_OR or ROP_QUEST.  To
        * prevent rewalking portions of the graph in nopskip(),
        * this code marks all ROP_NOP nodes as currently untouched.
        */
        switch (tp->op)
        {
        case ROP_OR:
                if ((nop = malloc(sizeof(Graph))) == 0)
                        goto err;
                nop->op = ROP_NOP;
                nop->alt.ptr = 0;       /* untouched */
                ll->next = nop;
                rl->next = nop;
                new->next = lf;
                new->alt.ptr = rf;
                *first = new;
                *last = nop;
                return lmt | rmt;
        case ROP_CAT:   /* no "new" */
                ll->next = rf;
                *first = lf;
                *last = rl;
                return lmt & rmt;
        case ROP_QUEST:
                if ((nop = malloc(sizeof(Graph))) == 0)
                        goto err;
                nop->op = ROP_NOP;
                nop->alt.ptr = 0;       /* untouched */
                new->op = ROP_OR;
                new->next = lf;
                new->alt.ptr = nop;
                ll->next = nop;
                *first = new;
                *last = nop;
                return 1;
        case ROP_STAR:
                *first = new;
                rmt = 1;
        star:;
                new->op = lmt ? ROP_MTOR : ROP_OR;
                new->alt.ptr = lf;
                ll->next = new;
                *last = new;
                return rmt;
        case ROP_PLUS:
                *first = lf;
                rmt = lmt;
                goto star;
        case ROP_BRACE:
                if ((nop = malloc(sizeof(Graph))) == 0)
                        goto err;
                nop->op = ROP_MTOR; /* going to save state anyway... */
                nop->alt.ptr = lf;
                ll->next = new;
                new->next = nop;
                new->alt.info.num[1] = tp->right.info.num[1];
                if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0)
                {
                        lmt = 1;
                        *first = new;
                }
                else
                {
                        new->alt.info.num[0]--; /* already done 1 */
                        if (new->alt.info.num[1] != BRACE_INF)
                                new->alt.info.num[1]--; /* likewise */
                        *first = lf;
                }
                *last = nop;
                return lmt;
        case ROP_LP:
                if ((nop = malloc(sizeof(Graph))) == 0)
                        goto err;
                nop->op = ROP_RP;
                nop->alt.info.sub = tp->right.info.sub;
                new->alt.info.sub = tp->right.info.sub;
                new->next = lf;
                ll->next = nop;
                *first = new;
                *last = nop;
                return lmt;
        }
err:;
        if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0)
                delgraph(rf);
        if (lf != 0)
                delgraph(lf);
        if (tp->op != ROP_CAT)
                free(new);
        return -1;
}

        /*
        * Semi-preorder traversal.
        * Return zero if there's no simple first character
        * (including the operation ROP_BOL) that must always
        * be at the start of a matching string.
        * This code doesn't attempt to get an answer if the
        * first of the tree many be empty.
        */
static w_type
firstop(Tree *tp)
{
        w_type op;

        switch (tp->op)
        {
        case ROP_OR:
                if ((op = firstop(tp->left.ptr)) == 0
                        || op != firstop(tp->right.ptr))
                {
                        return 0;
                }
                return op;
        case ROP_BRACE:
                if (tp->right.info.num[0] == 0)
                        return 0;
                /*FALLTHROUGH*/
        case ROP_CAT:
        case ROP_PLUS:
        case ROP_LP:
                return firstop(tp->left.ptr);
        default:
                if (tp->op < 0)
                        return 0;
                /*FALLTHROUGH*/
        case ROP_BOL:
                return tp->op;
        }
}

void
libuxre_regdelnfa(Nfa *np)
{
        Context *cp, *cpn;
        Stack *sp, *spn;

        if (np->gp != 0)
                delgraph(np->gp);
        for (cp = np->allcp; cp != 0; cp = cpn)
        {
                cpn = cp->link;
                free(cp);
        }
        for (sp = np->allsp; sp != 0; sp = spn)
        {
                spn = sp->link;
                free(sp);
        }
        free(np);
}

LIBUXRE_STATIC int
libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp)
{
        Graph *gp, end;
        Nfa *np;

        if ((np = malloc(sizeof(Nfa))) == 0)
                goto err;
        np->gp = 0; /* in case of error */
        if (mkgraph(tp, &np->gp, &gp) < 0)
                goto err;
        gp->next = 0;   /* nothing follows ROP_END */
        np->rmlen = 0;
        if ((ep->re_flags & REG_NOSUB) == 0)
                np->rmlen = ep->re_nsub + 1;
        np->rmmin = 0;
        if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen)
                np->rmlen = np->rmmin;
        /*
        * Delete all ROP_NOPs from the graph.
        * nopskip() disconnects them from the graph and
        * links them together through their alt.ptr's.
        */
        gp = &end;
        np->gp = nopskip(np->gp, &gp);
        while (gp != &end)
        {
                Graph *gp2 = gp;

                gp = gp->alt.ptr;
                free(gp2);
        }
        np->sp = 0;
        np->allsp = 0;
        np->avail = 0;
        np->allcp = 0;
        ep->re_nfa = np;
        np->beg = firstop(tp); 
        return 0;
err:;
        if (np != 0)
        {
                if (np->gp != 0)
                        delgraph(np->gp);
                free(np);
        }
        return REG_ESPACE;
}

static Stack *
newstck(Nfa *np)
{
        Stack *sp, **spp;
        int i;

        if ((sp = np->sp) == 0) /* get more */
        {
                spp = &np->sp;
                i = 4;
                while ((sp = malloc(sizeof(Stack))) != 0)
                {
                        sp->link = np->allsp;
                        np->allsp = sp;
                        *spp = sp;
                        spp = &sp->prev;
                        if (--i == 0)
                                break;
                }
                *spp = 0;
                if ((sp = np->sp) == 0) /* first malloc failed */
                        return 0;
        }
        np->sp = sp->prev;
        return sp;
}

static int
mkstck(Nfa *np, Context *cp, Graph *gp)
{
        Stack *new, *sp;

        if (gp == 0)    /* copy existing stack tail */
        {
                /*
                * Hoist up top of stack.
                */
                new = cp->sp;
                cp->wasgp = new->wasgp;
                cp->str = new->str;
                cp->cnt = new->cnt;
                cp->sp = new->prev;
                if ((sp = new->prev) == 0) /* only one below */
                {
                        new->prev = np->sp;
                        np->sp = new;
                        cp->sp = 0;
                        return 0;
                }
                for (;;) /* copy the rest; reusing the old top */
                {
                        new->wasgp = sp->wasgp;
                        new->str = sp->str;
                        new->cnt = sp->cnt;
                        if ((new->prev = sp->prev) == 0)
                                break;
                        if ((new->prev = newstck(np)) == 0)
                                return REG_ESPACE;
                        new = new->prev;
                        sp = sp->prev;
                }
                return 0;
        }
        if (cp->wasgp != 0)     /* push current down */
        {
                if ((new = newstck(np)) == 0)
                        return REG_ESPACE;
                new->prev = cp->sp;
                cp->sp = new;
                new->wasgp = cp->wasgp;
                new->str = cp->str;
                new->cnt = cp->cnt;
        }
        cp->wasgp = gp;
        cp->str = 0;
        cp->cnt = 0;
        return 0;
}

        /*
        * Allocate a new Context (from np->avail)
        * and add it to the end of the current list.
        */
static int
newctxt(Nfa *np, Context *cp, Graph *gp)
{
        Context *new;
        size_t n;

        if ((new = np->avail) == 0)     /* need more */
        {
                Context *ncp, **cpp;
                int i;

                /*
                * Can't easily allocate Contexts in one call because
                * the alignments (given the varying length of rm[])
                * are potentially nontrivial.
                */
                n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t);
                i = 4;
                cpp = &np->avail;
                while ((ncp = malloc(n)) != 0)
                {
                        ncp->link = np->allcp;
                        np->allcp = ncp;
                        *cpp = ncp;
                        cpp = &ncp->next;
                        if (--i == 0)
                                break;
                }
                *cpp = 0;
                if ((new = np->avail) == 0)     /* first malloc failed */
                        return REG_ESPACE;
        }
        np->avail = new->next;
        new->next = 0;
        new->gp = gp;
        new->sp = 0;
        new->wasgp = 0;
        new->nset = 0;
        if (cp != 0) /* copy existing context information */
        {
                if (cp->sp != 0) /* copy tail of stack */
                {
                        new->sp = cp->sp;
                        if (mkstck(np, new, 0) != 0)
                                return REG_ESPACE;
                }
                new->wasgp = cp->wasgp;
                new->str = cp->str;
                new->cnt = cp->cnt;
                /*
                * Copy any valid subexpression match information
                * from the existing context.
                */
                if (np->used != 0 && (n = cp->nset) != 0)
                {
                        regmatch_t *rmn = new->rm, *rmo = cp->rm;

                        new->nset = n;
                        for (;; ++rmn, ++rmo)
                        {
                                rmn->rm_so = rmo->rm_so;
                                rmn->rm_eo = rmo->rm_eo;
                                if (--n == 0)
                                        break;
                        }
                }
        }
        /*
        * Append it to the end of the current Context list.
        */
        *np->ecur = new;
        np->ecur = &new->next;
        return 0;
}

        /*
        * Compare two byte string sequences for equality.
        * If REG_ICASE, walk through the strings doing
        * caseless comparisons of the wide characters.
        */
static int
casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max)
{
        const Uchar *p = &xp->str[i];
        const Uchar *end;
        w_type wc1, wc2;
        int k;

        if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */
                return 1;
        if ((xp->flags & REG_ICASE) == 0)
                return 0;
        /*
        * Walk through each testing for a match, ignoring case,
        * of the resulting wide characters.
        * Note that only "s" can run out of characters.
        */
        end = &p[n];
        do
        {
                if ((wc1 = *s++) == '\0')
                        return 0;
                if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0)
                        s += k;
                if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0)
                        p += k;
                if (wc1 != wc2)
                {
                        wc1 = to_lower(wc1);
                        wc2 = to_lower(wc2);
                        if (wc1 != wc2)
                                return 0;
                }
        } while (p < end);
        return 1;
}

LIBUXRE_STATIC int
libuxre_regnfaexec(Nfa *np, Exec *xp)
{
        const Uchar *s, *s1, *s2;
        Context *cp, *cpn;
        Graph *gp, *brace;
        Stack *sp, *spn;
        ssize_t rmso, len;
        int i, ret, mb_cur_max;
        w_type wc;
        size_t n;

        ret = 0;        /* assume it matches */
        rmso = -1;      /* but no match yet */
        np->cur = 0;
        np->step = 0;
        np->ecur = &np->cur;
        np->estp = &np->step;
        if ((np->used = xp->nmatch) < np->rmmin)
                np->used = np->rmmin;
        s1 = 0;         /* one char back */
        s = xp->str;    /* current high water in string */
        mb_cur_max = xp->mb_cur_max;
        for (;;)
        {
                /*
                * Get next character from string.
                * If the engine proper hasn't started and the engine
                * requires a particular character to start and this
                * character isn't it, try the next one.
                */
                for (;;)
                {
                        s2 = s1;
                        s1 = s;
                        if (!ISONEBYTE(wc = *s++) &&
                                        (i = libuxre_mb2wc(&wc, s)) > 0)
                                s += i;
                        if (np->cur != 0 || np->beg == wc || np->beg == 0)
                                break;
                        if (np->beg == ROP_BOL)
                        {
                                if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0)
                                        break;
                                if ((xp->flags & REG_NEWLINE) == 0)
                                        goto nomatch;
                                if (s2 != 0 && *s2 == '\n')
                                        break;
                        }
                        if (wc == '\0')
                                goto nomatch;
                }
                /*
                * Start the engine by inserting a fresh initial context
                * if there's no known match as yet.  (Once some match
                * has been found, the end is near.)
                */
                if (rmso < 0 && newctxt(np, 0, np->gp) != 0)
                        goto err;
                /*
                * Walk the current Contexts list, trying each.
                * "loop" is when a new Context is to be tried,
                * "again" is when the same Context continues,
                * but wc was not yet matched.
                */
                cp = np->cur;
        loop:;
                gp = cp->gp;
        again:;
                switch (gp->op)
                {
                case ROP_BRACE: /* gp->next->op == ROP_MTOR */
                        brace = gp;
                        gp = gp->next;
                        goto mtor;
                case ROP_MTOR:
                        brace = 0;
                mtor:;
                        if (cp->wasgp != gp) /* first time */
                        {
                                if (mkstck(np, cp, gp) != 0)
                                        goto err;
                        }
                        else if (cp->str == s) /* spinning */
                                goto poptonext;
                        cp->str = s;
                        if (brace != 0)
                        {
                                if (cp->cnt >= brace->alt.info.num[1])
                                        goto poptonext;
                                if (++cp->cnt <= brace->alt.info.num[0])
                                {
                                        gp = gp->alt.ptr;
                                        goto again;
                                }
                                if (cp->cnt > BRACE_MAX)
                                        cp->cnt = BRACE_MAX;
                        }
                        if (newctxt(np, cp, gp->alt.ptr) != 0)
                                goto err;
                poptonext:;
                        cp->wasgp = 0;
                        if ((sp = cp->sp) != 0) /* pop stack */
                        {
                                cp->sp = sp->prev;
                                cp->wasgp = sp->wasgp;
                                cp->str = sp->str;
                                cp->cnt = sp->cnt;
                                sp->prev = np->sp;
                                np->sp = sp;
                        }
                        /*FALLTHROUGH*/
                case ROP_EMPTY:
                tonext:;
                        gp = gp->next;
                        goto again;
                case ROP_OR:
                        if (newctxt(np, cp, gp->alt.ptr) != 0)
                                goto err;
                        goto tonext;
                case ROP_LP:
                        if ((n = gp->alt.info.sub) < np->used)
                        {
                                size_t k;

                                cp->rm[n].rm_so = s1 - xp->str;
                                cp->rm[n].rm_eo = -1;
                                /*
                                * Mark any skipped subexpressions as
                                * failing to participate in the match.
                                */
                                if ((k = cp->nset) < n)
                                {
                                        regmatch_t *rmp = &cp->rm[k];

                                        for (;; rmp++)
                                        {
                                                rmp->rm_so = -1;
                                                rmp->rm_eo = -1;
                                                if (++k >= n)
                                                        break;
                                        }
                                }
                                cp->nset = n + 1;
                        }
                        goto tonext;
                case ROP_RP:
                        if ((n = gp->alt.info.sub) < np->used)
                                cp->rm[n].rm_eo = s1 - xp->str;
                        goto tonext;
                case ROP_BOL:
                        if (s2 == 0)
                        {
                                if (xp->flags & REG_NOTBOL)
                                        goto failed;
                        }
                        else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n')
                                goto failed;
                        goto tonext;
                case ROP_EOL:
                        if (wc == '\0')
                        {
                                if (xp->flags & REG_NOTEOL)
                                        goto failed;
                        }
                        else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n')
                                goto failed;
                        goto tonext;
                default:        /* character match */
                        if (gp->op != wc)
                        {
                                if ((xp->flags & REG_ICASE) == 0
                                        || gp->op != to_lower(wc))
                                {
                                        goto failed;
                                }
                        }
                nextwc:;
                        cp->gp = gp->next;
                tostep:;
                        cpn = cp->next;
                        cp->next = 0;
                        *np->estp = cp;
                        np->estp = &cp->next;
                        if ((cp = cpn) == 0)
                                break;
                        goto loop;
                case ROP_NOTNL:
                        if (wc == '\n')
                                goto failed;
                        /*FALLTHROUGH*/
                case ROP_ANYCH:
                        if (wc > '\0')
                                goto nextwc;
                        /*FALLTHROUGH*/
                case ROP_NONE:
                failed:;
                        cpn = cp->next;
                        cp->next = np->avail;
                        np->avail = cp;
                        if ((cp = cpn) == 0)
                                break;
                        goto loop;
                case ROP_LT:
                        if (s2 == 0)
                        {
                                if (xp->flags & REG_NOTBOL)
                                        goto failed;
                        }
                        else
                        {
                                w_type pwc;

                                if (wc != '_' &&
                                    !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
                                        goto failed;
                                if (!ISONEBYTE(pwc = *s2))
                                        libuxre_mb2wc(&pwc, &s2[1]);
                                if (pwc == '_' ||
                                    iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc))
                                        goto failed;
                        }
                        goto tonext;
                case ROP_GT:
                        if (wc == '_' ||
                            iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
                                goto failed;
                        goto tonext;
                case ROP_BKT:
                case ROP_BKTCOPY:
                        if (cp->wasgp == gp) /* rest of MCCE */
                        {
                        checkspin:;
                                if (s1 >= cp->str) /* got it all */
                                        goto poptonext;
                                goto tostep;
                        }
                        if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s,
                                                        mb_cur_max)) < 0)
                                goto failed;
                        if ((n = i) == 0)       /* only matched wc */
                                goto nextwc;
                spin:;
                        if (mkstck(np, cp, gp) != 0)
                                goto err;
                        cp->gp = gp;    /* stay here until reach past s+n */
                        cp->str = s + n;
                        goto tostep;
                case ROP_REF:
                        if (cp->wasgp == gp)    /* rest of matched string */
                                goto checkspin;
                        if ((n = gp->alt.info.sub) >= cp->nset)
                                goto failed;
                        if ((len = cp->rm[n].rm_eo) < 0)
                                goto failed;
                        if ((len -= n = cp->rm[n].rm_so) == 0)
                                goto tonext;
                        if (casecmp(s1, xp, n, len, mb_cur_max) == 0)
                                goto failed;
                        if ((n = s - s1) >= len)
                                goto nextwc;
                        n = len - n;
                        goto spin;
                case ROP_END:   /* success! */
                        if (xp->flags & REG_NONEMPTY)
                        {
                                if (s2 == 0)
                                        goto failed;
                        }
                        if (xp->nmatch == 0)
                                goto match;
                        /*
                        * Mark any skipped subexpressions as failing to match.
                        */
                        if ((n = cp->nset) < xp->nmatch)
                        {
                                do
                                {
                                        cp->rm[n].rm_so = -1;
                                        cp->rm[n].rm_eo = -1;
                                } while (++n < xp->nmatch);
                        }
                        /*
                        * Note the left-most match that's longest.
                        */
                        n = cp->rm[0].rm_so;
                        if (rmso < 0 || n < rmso)
                        {
                                rmso = n;
                        record:;
                                memcpy(xp->match, cp->rm,
                                        xp->nmatch * sizeof(regmatch_t));
                                goto failed;
                        }
                        if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo)
                                goto failed;
                        if (xp->match[0].rm_eo < cp->rm[0].rm_eo)
                                goto record;
#if 0 /* maximize the lengths of earlier LP...RPs */
                        /*
                        * If both are of the same length and start
                        * at the same point, choose the one with
                        * a "longest submatch from left to right"
                        * where an empty string wins over a nonmatch.
                        */
                        for (n = 1; n < xp->nmatch; n++)
                        {
                                ssize_t nlen;

                                /*
                                * First, go with the choice that has any
                                * match for subexpr n.
                                */
                                len = xp->match[n].rm_eo;
                                nlen = cp->rm[n].rm_eo;
                                if (nlen < 0)
                                {
                                        if (len >= 0)
                                                break;
                                }
                                else if (len < 0)
                                        goto record;
                                /*
                                * Both have a match; go with the longer.
                                */
                                len -= xp->match[n].rm_so;
                                nlen -= cp->rm[n].rm_so;
                                if (nlen < len)
                                        break;
                                if (nlen > len)
                                        goto record;
                        }
#else /* take LP and RP as "fence posts" and maximize earlier gaps */
                        /*
                        * If both are of the same length and start
                        * at the same point, choose the one with
                        * the larger earlier subpatterns, in which
                        * each rm_so and rm_eo serves as a separator.
                        */
                        for (n = 1; n < xp->nmatch; n++)
                        {
                                ssize_t nlen;
                                int     use;

                                if (xp->flags & REG_AVOIDNULL) {
                                        /*
                                        * This is to to satisfy POSIX.1-2001
                                        * XBD pp. 172-173 ll. 6127-6129, whose
                                        * translation is "do not match null
                                        * expressions if there is a choice".
                                        * See also POSIX.2 interpretation #43
                                        * in which the question was raised.
                                        *
                                        * The first subexpression of "\(x*\)*"
                                        * must thus match the string "xxx".
                                        */
                                        use = cp->rm[n].rm_eo -
                                                        cp->rm[n].rm_so >=
                                                xp->match[n].rm_eo -
                                                        xp->match[n].rm_so ||
                                                xp->match[n].rm_so < 0;
                                } else
                                        use = 1;
                                /*
                                * Choose the rightmost ROP_LP as that
                                * maximizes the gap from before.
                                */
                                len = xp->match[n].rm_so;
                                nlen = cp->rm[n].rm_so;
                                if (len < nlen && use)
                                        goto record;
                                if (len > nlen)
                                        break;
                                /*
                                * The ROP_LPs are at the same point:
                                * Choose the rightmost ROP_RP.
                                */
                                len = xp->match[n].rm_eo;
                                nlen = cp->rm[n].rm_eo;
                                if (len < nlen && use)
                                        goto record;
                                if (len > nlen)
                                        break;
                        }
#endif
                        goto failed;
                }
                /*
                * Finished the current Context list.  If the input string
                * has been entirely scanned, we're done.  Otherwise, make
                * the next step list current for the next character.
                * If the next step list was empty and there's an existing
                * match, that's the left-most longest.
                */
                if (wc == '\0')
                {
                        if (rmso >= 0)
                                goto match;
                        goto nomatch;
                }
                np->ecur = np->estp;
                if ((np->cur = np->step) == 0)
                {
                        if (rmso >= 0)
                                goto match;
                        np->ecur = &np->cur; /* was pointing at step */
                }
                np->step = 0;
                np->estp = &np->step;
        }
nomatch:;
        ret = REG_NOMATCH;
match:;
        np->avail = 0;
        for (cp = np->allcp; cp != 0; cp = cpn)
        {
                cpn = cp->link;
                cp->next = np->avail;
                np->avail = cp;
        }
        np->sp = 0;
        for (sp = np->allsp; sp != 0; sp = spn)
        {
                spn = sp->link;
                sp->prev = np->sp;
                np->sp = sp;
        }
        return ret;
err:;
        ret = REG_ESPACE;
        goto match;
}
Subversion Repositories planix.SVN

(root)/ports/trunk/editors/exvi/libuxre/regnfa.c – Rev 99