Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

/*
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
 *
 * Sccsid @(#)regdfa.c  1.9 (gritter) 9/22/03
 */
/*  UNIX(R) Regular Expresssion Library
 *
 *  Note: Code is released under the GNU LGPL
 *
 *  Copyright (C) 2001 Caldera International, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to:
 *        Free Software Foundation, Inc.
 *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*      #include "synonyms.h"   */
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "regdfa.h"

/*
* Deterministic Finite Automata.
*/

        /*
        * Postorder traversal that returns a copy of the subtree,
        * except that ROP_BKT becomes ROP_BKTCOPY (since they
        * share the same pointed to Bracket object).
        */
static Tree *
copy(regex_t *ep, Tree *tp)
{
        Tree *np;

        if ((np = malloc(sizeof(Tree))) == 0)
                return 0;
        switch (np->op = tp->op) /* almost always correct */
        {
        case ROP_BKT:
                np->op = ROP_BKTCOPY;
                /*FALLTHROUGH*/
        case ROP_BKTCOPY:
                np->right.info.bkt = tp->right.info.bkt;
                /*FALLTHROUGH*/
        default:
                np->left.pos = ep->re_dfa->nposn++;
                /*FALLTHROUGH*/
        case ROP_EMPTY:
                return np;
        case ROP_CAT:
        case ROP_OR:
                if ((np->right.ptr = copy(ep, tp->right.ptr)) == 0)
                {
                        free(np);
                        return 0;
                }
                np->right.ptr->parent = np;
                /*FALLTHROUGH*/
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_QUEST:
        case ROP_LP:
                if ((np->left.ptr = copy(ep, tp->left.ptr)) == 0)
                        break;
                np->left.ptr->parent = np;
                return np;
        }
        libuxre_regdeltree(np, 1);
        return 0;
}

        /*
        * Postorder traversal.
        * Assign unique ascending integer values to the leaves.
        * Since the right child is traversed before the left,
        * the position for ROP_END is guaranteed to be zero.
        * The parse tree is rewritten in two cases:
        * - Each ROP_BRACE is replaced by an equivalent--sometimes
        *   large--subtree using only ROP_CAT, ROP_QUEST, and
        *   ROP_PLUS.
        * - If REG_ICASE, replace each simple character that has
        *   an uppercase equivalent with a ROP_OR subtree over the
        *   two versions.
        * Since these rewrites occur bottom up, they have already
        * been applied before any subtrees passed to copy().
        */
static Tree *
findposn(regex_t *ep, Tree *tp, int mb_cur_max)
{
        unsigned int lo, hi;
        Tree *ptr, *par;
        w_type wc;

        switch (tp->op)
        {
        default:
                if (ep->re_flags & REG_ICASE
                        && (wc = to_upper(tp->op)) != tp->op)
                {
                        if ((ptr = libuxre_reg1tree(tp->op, 0)) == 0)
                                return 0;
                        ptr->parent = tp;
                        ptr->left.pos = ep->re_dfa->nposn++;
                        tp->op = ROP_OR;
                        tp->left.ptr = ptr;
                        ptr = libuxre_reg1tree(wc, 0);
                        if ((tp->right.ptr = ptr) == 0)
                                return 0;
                        ptr->parent = tp;
                        ptr->left.pos = ep->re_dfa->nposn++;
                        return tp;
                }
                /*FALLTHROUGH*/
        case ROP_BOL:
        case ROP_EOL:
        case ROP_ALL:
        case ROP_ANYCH:
        case ROP_NOTNL:
        case ROP_NONE:
        case ROP_BKT:
        case ROP_BKTCOPY:
        case ROP_END:
                tp->left.pos = ep->re_dfa->nposn++;
                return tp;
        case ROP_EMPTY:
                return tp;
        case ROP_OR:
        case ROP_CAT:
                if ((tp->right.ptr = findposn(ep, tp->right.ptr,
                                                mb_cur_max)) == 0)
                        return 0;
                /*FALLTHROUGH*/
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_QUEST:
        case ROP_LP:
                if ((tp->left.ptr = findposn(ep, tp->left.ptr,
                                                mb_cur_max)) == 0)
                        return 0;
                return tp;
        case ROP_BRACE:
                if ((tp->left.ptr = findposn(ep, tp->left.ptr,
                                                mb_cur_max)) == 0)
                        return 0;
                break;
        }
        /*
        * ROP_BRACE as is cannot be handled in a DFA.  This code
        * duplicates the ROP_BRACE subtree as a left-towering
        * series of ROP_CAT nodes, the first "lo" of which are
        * direct copies of the original subtree.  The tail of
        * the series are either some number of ROP_QUESTs over
        * copies of the original subtree, or a single ROP_PLUS
        * over a copy (when "hi" is infinity).
        *
        * All interesting cases {lo,hi}:
        *  {0,0} -> ROP_EMPTY, parsing, temporary
        *  {0,1} -> ROP_QUEST, parsing
        *  {0,2} -> CAT(QUEST(left), QUEST(copy))
        *  {0,n} -> CAT({0,n-1}, QUEST(copy))
        *  {0,}  -> ROP_STAR, parsing
        *
        *  {1,1} -> ROP_NOP, parsing, temporary
        *  {1,2} -> CAT(left, QUEST(copy))
        *  {1,n} -> CAT({1,n-1}, QUEST(copy))
        *  {1,}  -> ROP_PLUS, parsing
        *
        *  {2,2} -> CAT(left, copy)
        *  {2,n} -> CAT({2,n-1}, QUEST(copy))
        *  {2,}  -> CAT(left, PLUS(copy))
        *
        *  {3,3} -> CAT({2,2}, copy)
        *  {3,n} -> CAT({3,n-1}, QUEST(copy))
        *  {3,}  -> CAT({2,2}, PLUS(copy))
        *
        *  {n,}  -> CAT({n-1,n-1}, PLUS(copy))
        *
        * In all cases, the ROP_BRACE node is turned into the
        * left-most ROP_CAT, and a copy of its original subtree
        * is connected as the right child.  Note that the bottom-
        * up nature of this duplication guarantees that copy()
        * never sees a ROP_BRACE node.
        */
        par = tp->parent;
        lo = tp->right.info.num[0];
        hi = tp->right.info.num[1];
        if ((ptr = copy(ep, tp->left.ptr)) == 0)
                return 0;
        ptr->parent = tp;
        tp->op = ROP_CAT;
        tp->right.ptr = ptr;
        if (lo == 0)
        {
                if ((tp->left.ptr = libuxre_reg1tree(ROP_QUEST, tp->left.ptr))
                                == 0)
                        return 0;
                tp->left.ptr->parent = tp;
        }
        else
        {
                if (hi == BRACE_INF || (hi -= lo) == 0)
                        lo--;   /* lo > 1; no extra needed */
                while (--lo != 0)
                {
                        if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr)))
                                        == 0)
                                return 0;
                }
        }
        if (hi == BRACE_INF)
        {
                if ((tp->right.ptr = libuxre_reg1tree(ROP_PLUS, tp->right.ptr))
                                == 0)
                        return 0;
                tp->right.ptr->parent = tp;
        }
        else if (hi != 0)
        {
                if ((tp->right.ptr = libuxre_reg1tree(ROP_QUEST, tp->right.ptr))
                                == 0)
                        return 0;
                ptr = tp->right.ptr;
                ptr->parent = tp;
                while (--hi != 0)
                {
                        if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr)))
                                        == 0)
                                return 0;
                }
        }
        tp->parent = par;
        return tp;
}

        /*
        * Postorder traversal, but not always entire subtree.
        * For each leaf reachable by the empty string, add it
        * to the set.  Return 0 if the subtree can match empty.
        */
static int
first(Dfa *dp, Tree *tp)
{
        switch (tp->op)
        {
        case ROP_BOL:
                if (dp->flags & REG_NOTBOL)
                        return 0;
                break;
        case ROP_EOL:
                if (dp->flags & REG_NOTEOL)
                        return 0;
                break;
        case ROP_EMPTY:
                return 0;
        case ROP_OR:
                return first(dp, tp->left.ptr) & first(dp, tp->right.ptr);
        case ROP_CAT:
                if (first(dp, tp->left.ptr) != 0)
                        return 1;
                return first(dp, tp->right.ptr);
        case ROP_BRACE:
                if (tp->right.info.num[0] != 0 && first(dp, tp->left.ptr) != 0)
                        return 1;
                /*FALLTHROUGH*/
        case ROP_STAR:
        case ROP_QUEST:
                first(dp, tp->left.ptr);
                return 0;
        case ROP_LP:
        case ROP_PLUS:
                return first(dp, tp->left.ptr);
        }
        if (dp->posset[tp->left.pos] == 0)
        {
                dp->posset[tp->left.pos] = 1;
                dp->nset++;
        }
        return 1;
}

        /*
        * Walk from leaf up (most likely not to root).
        * Determine follow set for the leaf by filling
        * set[] with the positions reachable.
        */
static void
follow(Dfa *dp, Tree *tp)
{
        Tree *pp;

        switch ((pp = tp->parent)->op)
        {
        case ROP_CAT:
                if (pp->left.ptr == tp && first(dp, pp->right.ptr) != 0)
                        break;
                /*FALLTHROUGH*/
        case ROP_OR:
        case ROP_QUEST:
        case ROP_LP:
                follow(dp, pp);
                break;
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_BRACE:
                first(dp, tp);
                follow(dp, pp);
                break;
        }
}

        /*
        * Postorder traversal.
        * At each leaf, copy it into posn[] and assign its follow set.
        * Because the left-most subtree is ROP_ALL under ROP_STAR, the
        * follow set for its leaf (position dp->nposn-1) is the same
        * as the initial state's signature (prior to any ROP_BOL).
        */
static int
posnfoll(Dfa *dp, Tree *tp)
{
        unsigned char *s;
        size_t i, n;
        size_t *fp;
        Posn *p;
        int ret;

        switch (tp->op)
        {
        case ROP_OR:
        case ROP_CAT:
                if ((ret = posnfoll(dp, tp->right.ptr)) != 0)
                        return ret;
                /*FALLTHROUGH*/
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_QUEST:
        case ROP_LP:
                if ((ret = posnfoll(dp, tp->left.ptr)) != 0)
                        return ret;
                return 0;
        case ROP_END:   /* keeps follow() from walking above the root */
                p = &dp->posn[tp->left.pos];
                p->op = tp->op;
                p->seti = 0;
                p->nset = 0;
                return 0;
        case ROP_BKT:
        case ROP_BKTCOPY:
                p = &dp->posn[tp->left.pos];
                p->bkt = tp->right.info.bkt;
                goto skip;
        case ROP_BOL:
                dp->flags |= REG_NOTBOL; /* adjacent ROP_BOLs match empty */
                break;
        case ROP_EOL:
                dp->flags |= REG_NOTEOL; /* adjacent ROP_EOLs match empty */
                break;
        }
        p = &dp->posn[tp->left.pos];
skip:;
        p->op = tp->op;
        memset(dp->posset, 0, dp->nposn);
        dp->nset = 0;
        follow(dp, tp);
        dp->flags &= ~(REG_NOTBOL | REG_NOTEOL);
        fp = dp->posfoll;
        if ((p->nset = dp->nset) > dp->avail) /* need more */
        {
                if ((n = p->nset << 1) < dp->nposn)
                        n = dp->nposn;  
                dp->avail += n;
                if ((fp = realloc(dp->posfoll,
                        sizeof(size_t) * (dp->avail + dp->used))) == 0)
                {
                        return REG_ESPACE;
                }
                dp->posfoll = fp;
        }
        p->seti = dp->used;
        if ((i = dp->nset) != 0)
        {
                dp->used += i;
                dp->avail -= i;
                fp += p->seti;
                s = dp->posset;
                n = 0;
                do
                {
                        if (*s++ != 0)
                        {
                                *fp++ = n;
                                if (--i == 0)
                                        break;
                        }
                } while (++n != dp->nposn);
        }
        return 0;
}

static int
addstate(Dfa *dp) /* install state if unique; return its index */
{
        size_t *sp, *fp;
        size_t t, n, i;
        int flushed;

        /*
        * Compare dp->nset/dp->cursig[] against remembered states.
        */
        t = dp->top;
        do
        {
                if (dp->nsig[--t] != dp->nset)
                        continue;
                if ((n = dp->nset) != 0)
                {
                        fp = &dp->sigfoll[dp->sigi[t]];
                        sp = &dp->cursig[0];
                loop:;
                        if (*fp++ != *sp++)
                                continue; /* to the do-while */
                        if (--n != 0)
                                goto loop;
                }
                return t + 1;
        } while (t != 0);
        /*
        * Not in currently cached states; add it.
        */
        flushed = 0;
        if ((t = dp->top) >= CACHESZ)   /* need to flush the cache */
        {
                flushed = 1;
                n = dp->anybol;
                n = dp->sigi[n] + dp->nsig[n];  /* past invariant states */
                dp->avail += dp->used - n;
                dp->used = n;
                dp->top = n = dp->nfix;
                memset((void *)&dp->trans, 0, sizeof(dp->trans));
                memset((void *)&dp->acc[n], 0, CACHESZ - n);
                t = n;
        }
        dp->top++;
        fp = dp->sigfoll;
        if ((n = dp->nset) > dp->avail) /* grow strip */
        {
                i = dp->avail + n << 1;
                if ((fp = realloc(fp, sizeof(size_t) * (i + dp->used))) == 0)
                        return 0;
                dp->avail = i;
                dp->sigfoll = fp;
        }
        dp->acc[t] = 0;
        if ((dp->nsig[t] = n) != 0)
        {
                sp = dp->cursig;
                if (sp[0] == 0)
                        dp->acc[t] = 1;
                dp->sigi[t] = i = dp->used;
                dp->used += n;
                dp->avail -= n;
                fp += i;
                do
                        *fp++ = *sp++;
                while (--n != 0);
        }
        t++;
        if (flushed)
                return -t;
        return t;
}

void
libuxre_regdeldfa(Dfa *dp)
{
        Posn *pp;
        size_t np;

        if (dp->posfoll != 0)
                free(dp->posfoll);
        if (dp->sigfoll != 0)
                free(dp->sigfoll);
        if (dp->cursig != 0)
                free(dp->cursig);
        if ((pp = dp->posn) != 0)
        {
                /*
                * Need to walk the positions list to free any
                * space used for ROP_BKTs.
                */
                np = dp->nposn;
                do
                {
                        if (pp->op == ROP_BKT)
                        {
                                libuxre_bktfree(pp->bkt);
                                free(pp->bkt);
                        }
                } while (++pp, --np != 0);
                free(dp->posn);
        }
        free(dp);
}

int
regtrans(Dfa *dp, int st, w_type wc, int mb_cur_max)
{
        const unsigned char *s;
        size_t *fp, *sp;
        size_t i, n;
        Posn *pp;
        int nst;

        if ((n = dp->nsig[st]) == 0)    /* dead state */
                return st + 1;          /* stay here */
        memset(dp->posset, 0, dp->nposn);
        dp->nset = 0;
        fp = &dp->sigfoll[dp->sigi[st]];
        do
        {
                pp = &dp->posn[*fp];
                switch (pp->op)
                {
                case ROP_EOL:
                        if (wc == '\0' && (dp->flags & REG_NOTEOL) == 0)
                                break;
                        /*FALLTHROUGH*/
                case ROP_BOL:
                default:
                        if (pp->op == wc)
                                break;
                        /*FALLTHROUGH*/
                case ROP_END:
                case ROP_NONE:
                        continue;
                case ROP_NOTNL:
                        if (wc == '\n')
                                continue;
                        /*FALLTHROUGH*/
                case ROP_ANYCH:
                        if (wc <= '\0')
                                continue;
                        break;
                case ROP_ALL:
                        if (wc == '\0')
                                continue;
                        break;
                case ROP_BKT:
                case ROP_BKTCOPY:
                        /*
                        * Note that multiple character bracket matches
                        * are precluded from DFAs.  (See regparse.c and
                        * regcomp.c.)  Thus, the continuation string
                        * argument is not used in libuxre_bktmbexec().
                        */
                        if (wc > '\0' &&
                            libuxre_bktmbexec(pp->bkt, wc, 0, mb_cur_max) == 0)
                                break;
                        continue;
                }
                /*
                * Current character matches this position.
                * For each position in its follow list,
                * add that position to the new state's signature.
                */
                i = pp->nset;
                sp = &dp->posfoll[pp->seti];
                do
                {
                        if (dp->posset[*sp] == 0)
                        {
                                dp->posset[*sp] = 1;
                                dp->nset++;
                        }
                } while (++sp, --i != 0);
        } while (++fp, --n != 0);
        /*
        * Move the signature (if any) into cursig[] and install it.
        */
        if ((i = dp->nset) != 0)
        {
                fp = dp->cursig;
                s = dp->posset;
                for (n = 0;; n++)
                {
                        if (*s++ != 0)
                        {
                                *fp++ = n;
                                if (--i == 0)
                                        break;
                        }
                }
        }
        if ((nst = addstate(dp)) < 0) /* flushed cache */
                nst = -nst;
        else if (nst > 0 && (wc & ~(long)(NCHAR - 1)) == 0)
                dp->trans[st][wc] = nst;
        return nst;
}

LIBUXRE_STATIC int
libuxre_regdfacomp(regex_t *ep, Tree *tp, Lex *lxp)
{
        Tree *lp;
        Dfa *dp;
        Posn *p;
        int st;

        /*
        * It's convenient to insert an STAR(ALL) subtree to the
        * immediate left of the current tree.  This makes the
        * "any match" libuxre_regdfaexec() not a special case,
        * and the initial state signature will fall out when
        * building the follow sets for all the leaves.
        */
        if ((lp = libuxre_reg1tree(ROP_ALL, 0)) == 0
                || (lp = libuxre_reg1tree(ROP_STAR, lp)) == 0
                || (tp->left.ptr = lp
                        = libuxre_reg2tree(ROP_CAT, lp, tp->left.ptr)) == 0)
        {
                return REG_ESPACE;
        }
        lp->parent = tp;
        if ((dp = calloc(1, sizeof(Dfa))) == 0)
                return REG_ESPACE;
        ep->re_dfa = dp;
        /*
        * Just in case null pointers aren't just all bits zero...
        */
        dp->posfoll = 0;
        dp->sigfoll = 0;
        dp->cursig = 0;
        dp->posn = 0;
        /*
        * Assign position values to each of the tree's leaves
        * (the important parts), meanwhile potentially rewriting
        * the parse tree so that it fits within the restrictions
        * of our DFA.
        */
        if ((tp = findposn(ep, tp, lxp->mb_cur_max)) == 0)
                goto err;
        /*
        * Get space for the array of positions and current set,
        * now that the number of positions is known.
        */
        if ((dp->posn = malloc(sizeof(Posn) * dp->nposn + dp->nposn)) == 0)
                goto err;
        dp->posset = (unsigned char *)&dp->posn[dp->nposn];
        /*
        * Get follow sets for each position.
        */
        if (posnfoll(dp, tp) != 0)
                goto err;
        /*
        * Set up the special invariant states:
        *  - dead state (no valid transitions); index 0.
        *  - initial state for any match [STAR(ALL) follow set]; index 1.
        *  - initial state for any match after ROP_BOL.
        *  - initial state for left-most longest if REG_NOTBOL.
        *  - initial state for left-most longest after ROP_BOL.
        * The final two are not allocated if leftmost() cannot be called.
        * The pairs of initial states are the same if there is no
        * explicit ROP_BOL transition.
        */
        dp->avail += dp->used;
        dp->used = 0;
        if ((dp->sigfoll = malloc(sizeof(size_t) * dp->avail)) == 0)
                goto err;
        p = &dp->posn[dp->nposn - 1];   /* same as first(root) */
        dp->cursig = &dp->posfoll[p->seti];
        dp->nset = p->nset;
        dp->top = 1;    /* index 0 is dead state */
        addstate(dp);   /* must be state index 1 (returns 2) */
        if ((dp->cursig = malloc(sizeof(size_t) * dp->nposn)) == 0)
                goto err;
        dp->nfix = 2;
        if ((st = regtrans(dp, 1, ROP_BOL, lxp->mb_cur_max)) == 0)
                goto err;
        if ((dp->anybol = st - 1) == 2) /* new state */
                dp->nfix = 3;
        if ((ep->re_flags & REG_NOSUB) == 0) /* leftmost() might be called */
        {
                /*
                * leftmost() initial states are the same as the
                * "any match" ones without the STAR(ALL) position.
                */
                dp->sigi[dp->nfix] = 0;
                dp->nsig[dp->nfix] = dp->nsig[1] - 1;
                dp->acc[dp->nfix] = dp->acc[1];
                dp->leftbol = dp->leftmost = dp->nfix;
                dp->nfix++;
                if (dp->anybol != 1)    /* distinct state w/BOL */
                {
                        dp->sigi[dp->nfix] = dp->sigi[2];
                        dp->nsig[dp->nfix] = dp->nsig[2] - 1;
                        dp->acc[dp->nfix] = dp->acc[2];
                        dp->leftbol = dp->nfix;
                        dp->nfix++;
                }
                dp->top = dp->nfix;
        }
        return 0;
err:;
        libuxre_regdeldfa(dp);
        return REG_ESPACE;
}

static int
leftmost(Dfa *dp, Exec *xp)
{
        const unsigned char *s, *beg, *end;
        int i, nst, st, mb_cur_max;
        w_type wc;

        mb_cur_max = xp->mb_cur_max;
        beg = s = xp->str;
        end = 0;
        st = dp->leftbol;
        if (xp->flags & REG_NOTBOL)
                st = dp->leftmost;
        if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
                end = s;        /* initial empty match allowed */
        for (;;)
        {
                if ((wc = *s++) == '\n')
                {
                        if (xp->flags & REG_NEWLINE)
                                wc = ROP_EOL;
                }
                else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0)
                        s += i;
                if ((wc & ~(long)(NCHAR - 1)) != 0
                        || (nst = dp->trans[st][wc]) == 0)
                {
                        if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0)
                                return REG_ESPACE;
                        if (wc == ROP_EOL) /* REG_NEWLINE only */
                        {
                                if (dp->acc[nst - 1])
                                {
                                        if (end == 0 || end < s)
                                                end = s;
                                        break;
                                }
                                beg = s;
                                st = dp->leftbol;
                                goto newst;
                        }
                }
                if ((st = nst - 1) == 0) /* dead state */
                {
                        if (end != 0)
                                break;
                        if ((wc = *beg++) == '\0')
                                return REG_NOMATCH;
                        else if (!ISONEBYTE(wc) &&
                                        (i = libuxre_mb2wc(&wc, beg)) > 0)
                                beg += i;
                        s = beg;
                        st = dp->leftmost;
                        goto newst;
                }
                if (wc == '\0')
                {
                        if (dp->acc[st])
                        {
                                s--;    /* don't include \0 */
                                if (end == 0 || end < s)
                                        end = s;
                                break;
                        }
                        if (end != 0)
                                break;
                        return REG_NOMATCH;
                }
        newst:;
                if (dp->acc[st])
                {
                        if (end == 0 || end < s)
                                end = s;
                }
        }
        xp->match[0].rm_so = beg - xp->str;
        xp->match[0].rm_eo = end - xp->str;
        return 0;
}

/*
* Optimization by simplification: singlebyte locale and REG_NEWLINE not set.
* Performance gain for grep is 25% so it's worth the hack.
*/
static int
regdfaexec_opt(Dfa *dp, Exec *xp)
{
        const unsigned char *s;
        int nst, st;

        s = xp->str;
        st = dp->anybol;
        if (xp->flags & REG_NOTBOL)
                st = 1;
        if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
                return 0;       /* initial empty match allowed */
        do
        {
                if ((nst = dp->trans[st][*s]) == 0)
                {
                        if ((nst = regtrans(dp, st, *s, 1)) == 0)
                                return REG_ESPACE;
                }
                if (dp->acc[st = nst - 1])
                        return 0;
        } while (*s++ != '\0'); /* st != 0 */
        return REG_NOMATCH;
}

LIBUXRE_STATIC int
libuxre_regdfaexec(Dfa *dp, Exec *xp)
{
        const unsigned char *s;
        int i, nst, st, mb_cur_max;
        w_type wc;

        dp->flags = xp->flags & REG_NOTEOL;     /* for regtrans() */
        mb_cur_max = xp->mb_cur_max;
        if (xp->nmatch != 0)
                return leftmost(dp, xp);
        if (mb_cur_max == 1 && (xp->flags & REG_NEWLINE) == 0)
                return regdfaexec_opt(dp, xp);
        s = xp->str;
        st = dp->anybol;
        if (xp->flags & REG_NOTBOL)
                st = 1;
        if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
                return 0;       /* initial empty match allowed */
        for (;;)
        {
                if ((wc = *s++) == '\n')
                {
                        if (xp->flags & REG_NEWLINE)
                                wc = ROP_EOL;
                }
                else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0)
                        s += i;
                if ((wc & ~(long)(NCHAR - 1)) != 0
                        || (nst = dp->trans[st][wc]) == 0)
                {
                        if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0)
                                return REG_ESPACE;
                        if (wc == ROP_EOL) /* REG_NEWLINE only */
                        {
                                if (dp->acc[nst - 1])
                                        return 0;
                                if (dp->acc[st = dp->anybol])
                                        return 0;
                                continue;
                        }
                }
                if (dp->acc[st = nst - 1])
                        return 0;
                if (wc == '\0') /* st == 0 */
                        return REG_NOMATCH;
        }
}