Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

/*
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
 *
 * Sccsid @(#)regparse.c        1.12 (gritter) 9/22/03
 */
/*  UNIX(R) Regular Expresssion Library
 *
 *  Note: Code is released under the GNU LGPL
 *
 *  Copyright (C) 2001 Caldera International, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to:
 *        Free Software Foundation, Inc.
 *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*      #include "synonyms.h"   */
#include <stdlib.h>
#include <ctype.h>
#include "re.h"

LIBUXRE_STATIC void
libuxre_regdeltree(Tree *tp, int all)
{
        if (tp == 0)
                return;
        if (tp->op < 0)
        {
                switch (KIND_ROP(tp->op))
                {
                case BINARY_ROP:
                        libuxre_regdeltree(tp->right.ptr, all);
                        /*FALLTHROUGH*/
                case UNARY_ROP:
                        libuxre_regdeltree(tp->left.ptr, all);
                        break;
                default:
                        if (tp->op == ROP_BKT && all)
                        {
                                libuxre_bktfree(tp->right.info.bkt);
                                free(tp->right.info.bkt);
                        }
                        break;
                }
        }
        free(tp);
}

LIBUXRE_STATIC Tree *
libuxre_reg1tree(w_type op, Tree *lp)
{
        Tree *tp;

        if ((tp = malloc(sizeof(Tree))) == 0)
        {
                if (lp != 0)
                        libuxre_regdeltree(lp, 1);
                return 0;
        }
        tp->op = op;
        tp->left.ptr = lp;
        if (lp != 0)
                lp->parent = tp;
        return tp;
}

LIBUXRE_STATIC Tree *
libuxre_reg2tree(w_type op, Tree *lp, Tree *rp)
{
        Tree *tp;

        if ((tp = malloc(sizeof(Tree))) == 0)
        {
                libuxre_regdeltree(lp, 1);
                libuxre_regdeltree(rp, 1);
                return 0;
        }
        tp->op = op;
        tp->left.ptr = lp;
        lp->parent = tp;
        tp->right.ptr = rp;
        rp->parent = tp;
        return tp;
}

static int
lex(Lex *lxp)
{
        size_t num;
        w_type wc;
        int n, mb_cur_max;

        mb_cur_max = lxp->mb_cur_max;
nextc:  switch (wc = *lxp->pat++) /* interesting ones are single bytes */
        {
        case '\0':
                lxp->pat--;     /* continue to report ROP_END */
                wc = ROP_END;
                break;
        case '(':
                if (lxp->flags & REG_PARENS)
                {
                leftparen:;
                        /*
                        * Must keep track of the closed and
                        * yet-to-be closed groups as a list.
                        * Consider (()a(()b(()c(()d... in which
                        * at each letter another even-numbered
                        * group is made available, but no
                        * odd-numbered ones are.
                        */
                        if ((lxp->flags & REG_NOBACKREF) == 0)
                        {
                                if (lxp->nleft >= lxp->nclist) /* grow it */
                                {
                                        unsigned char *p;

                                        lxp->nclist += 8; /* arbitrary */
                                        if ((p = realloc(lxp->clist,
                                                lxp->nclist)) == 0)
                                        {
                                                lxp->err = REG_ESPACE;
                                                return -1;
                                        }
                                        lxp->clist = p;
                                }
                                lxp->clist[lxp->nleft] = 0; /* unavailable */
                        }
                        lxp->nleft++;
                        wc = ROP_LP;
                }
                break;
        case ')':
                /*
                * For REG_PARENS, only take a right paren as a close
                * if there is a matching left paren.
                */
                if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft)
                {
                        lxp->nright++;
                rightparen:;
                        /*
                        * The group that is being closed is the highest
                        * numbered as-yet-unclosed group.
                        */
                        if ((lxp->flags & REG_NOBACKREF) == 0)
                        {
                                num = lxp->nleft;
                                while (lxp->clist[--num] != 0)
                                        ;
                                lxp->clist[num] = 1;
                        }
                        wc = ROP_RP;
                }
                break;
        case '.':
                wc = ROP_ANYCH;
                if (lxp->flags & REG_NEWLINE)
                        wc = ROP_NOTNL;
                break;
        case '*':
                if (lxp->flags & REG_ADDITIVE)
                {
        nxtstar:        switch (*lxp->pat)
                        {
                        case '+':
                                if ((lxp->flags & REG_PLUS) == 0)
                                        break;
                                lxp->pat++;
                                goto nxtstar;
                        case '?':
                                if ((lxp->flags & REG_QUEST) == 0)
                                        break;
                                /*FALLTHRU*/
                        case '*':
                                lxp->pat++;
                                goto nxtstar;
                        }
                }
                wc = ROP_STAR;
                break;
        case '^':
                /*
                * Look "behind" to see if this is an anchor.
                * Take it as an anchor if it follows an alternation
                * operator.  (lxp->tok is initially set to ROP_OR.)
                */
                if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) {
                        if (lxp->flags & REG_ADDITIVE)
                        {
                                int optional = 0;

                        nxtcar: switch (*lxp->pat)
                                {
                                case '+':
                                        if ((lxp->flags & REG_PLUS) == 0)
                                                break;
                                        lxp->pat++;
                                        goto nxtcar;
                                case '?':
                                        if ((lxp->flags & REG_QUEST) == 0)
                                                break;
                                        /*FALLTHRU*/
                                case '*':
                                        optional = 1;
                                        lxp->pat++;
                                        goto nxtcar;
                                }
                                if (optional)
                                        goto nextc;
                        }
                        wc = ROP_BOL;
                }
                break;
        case '$':
                /*
                * Look ahead to see if this is an anchor,
                * unless any '$' is an anchor.
                * Take it as an anchor if it occurs just before
                * the pattern end or an alternation operator.
                */
                if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0'
                        || (lxp->flags & REG_OR && *lxp->pat == '|')
                        || (lxp->flags & REG_NLALT && *lxp->pat == '\n'))
                {
                        if (lxp->flags & REG_ADDITIVE)
                        {
                                int optional = 0;

                        nxtdol: switch (*lxp->pat)
                                {
                                case '+':
                                        if ((lxp->flags & REG_PLUS) == 0)
                                                break;
                                        lxp->pat++;
                                        goto nxtdol;
                                case '?':
                                        if ((lxp->flags & REG_QUEST) == 0)
                                                break;
                                        /*FALLTHRU*/
                                case '*':
                                        optional = 1;
                                        lxp->pat++;
                                        goto nxtdol;
                                }
                                if (optional)
                                        goto nextc;
                        }
                        wc = ROP_EOL;
                }
                break;
        case '+':
                if (lxp->flags & REG_PLUS)
                {
                        wc = ROP_PLUS;
                        if (lxp->flags & REG_ADDITIVE)
                        {
                nxtplus:        switch (*lxp->pat)
                                {
                                case '?':
                                        if ((lxp->flags & REG_QUEST) == 0)
                                                break;
                                case '*':
                                        wc = ROP_STAR;
                                        /*FALLTHRU*/
                                case '+':
                                        lxp->pat++;
                                        goto nxtplus;
                                }
                        }
                }
                break;
        case '?':
                if (lxp->flags & REG_QUEST)
                {
                        wc = ROP_QUEST;
                        if (lxp->flags & REG_ADDITIVE)
                        {
                nxtquest:       switch (*lxp->pat)
                                {
                                case '+':
                                        if ((lxp->flags & REG_PLUS) == 0)
                                                break;
                                case '*':
                                        wc = ROP_STAR;
                                        /*FALLTHRU*/
                                case '?':
                                        lxp->pat++;
                                        goto nxtquest;
                                }
                        }
                }
                break;
        case '\n':
                if (lxp->flags & REG_NLALT)
                {
                        /*
                        * Even when newline is an alternative separator,
                        * it doesn't permit parenthesized subexpressions
                        * to include it.
                        */
                        if (lxp->nleft != lxp->nright)
                        {
                                lxp->err = REG_EPAREN;
                                return -1;
                        }
                        wc = ROP_OR;
                }
                else if (lxp->flags & REG_NEWLINE)
                        lxp->flags |= REG_NFA;
                break;
        case '|':
                if (lxp->flags & REG_OR)
                        wc = ROP_OR;
                break;
        case '[':
                if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0)
                {
                        lxp->err = REG_ESPACE;
                        return -1;
                }
                if ((lxp->flags & REG_GOTBKT) == 0)     /* first time */
                {
                        struct lc_collate *col;

                        lxp->flags |= REG_GOTBKT;
                        lxp->bktflags = 0;
                        if (lxp->flags & REG_ICASE)
                                lxp->bktflags |= BKT_ONECASE;
                        if (lxp->flags & REG_NEWLINE)
                                lxp->bktflags |= BKT_NOTNL;
                        if (lxp->flags & REG_BADRANGE)
                                lxp->bktflags |= BKT_BADRANGE;
                        if (lxp->flags & REG_ODDRANGE)
                                lxp->bktflags |= BKT_ODDRANGE;
                        if (lxp->flags & REG_SEPRANGE)
                                lxp->bktflags |= BKT_SEPRANGE;
                        if (lxp->flags & REG_BKTQUOTE)
                                lxp->bktflags |= BKT_QUOTE;
                        if (lxp->flags & REG_BKTEMPTY)
                                lxp->bktflags |= BKT_EMPTY;
                        if (lxp->flags & REG_ESCNL)
                                lxp->bktflags |= BKT_ESCNL;
                        if (lxp->flags & REG_NLALT)
                                lxp->bktflags |= BKT_NLBAD;
                        if (lxp->flags & REG_ESCSEQ)
                                lxp->bktflags |= BKT_ESCSEQ;
                        if (lxp->flags & REG_BKTESCAPE)
                                lxp->bktflags |= BKT_ESCAPE;
                        if (lxp->flags & REG_NOI18N)
                                lxp->bktflags |= BKT_NOI18N;
                        if (lxp->flags & REG_OLDESC)
                                lxp->bktflags |= BKT_OLDESC;
                        if ((col = libuxre_lc_collate(0)) != 0)
                        {
                                if (col->maintbl == 0
                                        || col->flags & CHF_ENCODED)
                                {
                                        (void)libuxre_lc_collate(col);
                                        col = 0;
                                }
                                else if (col->flags & CHF_MULTICH)
                                        lxp->flags |= REG_NFA;
                        }
                        lxp->col = col;
                }
                n = lxp->bktflags;
                if (*lxp->pat == '^')
                {
                        n |= BKT_NEGATED;
                        lxp->pat++;
                }
                lxp->info.bkt->col = lxp->col;
                if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat,
                                                n, mb_cur_max)) < 0)
                {
                        free(lxp->info.bkt);
                        lxp->err = -n;  /* convert to REG_* errors */
                        return -1;
                }
                /*
                * NFA forced if newline can be a match and REG_NEWLINE is set.
                */
                if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE
                        && lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */
                        && libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0)
                {
                        lxp->flags |= REG_NFA;
                }
                lxp->pat += n;
                wc = ROP_BKT;
                break;
        case '{':
                if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0)
                        break;
        interval:;
                if (!isdigit(num = *lxp->pat))
                {
                badbr:;
                        lxp->err = REG_BADBR;
                        if (*lxp->pat == '\0')
                                lxp->err = REG_EBRACE; /* more accurate */
                        return -1;
                }
                num -= '0';
                while (isdigit(wc = *++lxp->pat))
                {
                        num *= 10;
                        if ((num += wc - '0') > BRACE_MAX)
                                goto badbr;
                }
                lxp->info.num[0] = num;
                lxp->info.num[1] = num;
                if (wc == ',')
                {
                        lxp->info.num[1] = BRACE_INF;
                        if (isdigit(wc = *++lxp->pat))
                        {
                                num = wc - '0';
                                while (isdigit(wc = *++lxp->pat))
                                {
                                        num *= 10;
                                        if ((num += wc - '0') > BRACE_MAX)
                                                goto badbr;
                                }
                                if (num < lxp->info.num[0])
                                        goto badbr;
                                lxp->info.num[1] = num;
                        }
                }
                if ((lxp->flags & REG_BRACES) == 0)
                {
                        if (wc != '\\')
                                goto badbr;
                        wc = *++lxp->pat;
                }
                if (wc != '}')
                        goto badbr;
                lxp->pat++;
                wc = ROP_BRACE;
                /*
                * Replace interval with simpler equivalents where possible,
                * even when the operators are not otherwise available.
                */
                if (lxp->info.num[1] <= 1)
                {
                        if (lxp->info.num[0] == 1)
                                wc = ROP_NOP;   /* {1,1} is noise */
                        else if (lxp->info.num[1] == 0)
                                wc = ROP_EMPTY; /* {0,0} is empty string */
                        else
                                wc = ROP_QUEST; /* {0,1} is ? */
                }
                else if (lxp->info.num[1] == BRACE_INF)
                {
                        if (lxp->info.num[0] == 0)
                                wc = ROP_STAR;
                        else if (lxp->info.num[0] == 1)
                                wc = ROP_PLUS;
                        else if (lxp->info.num[0] > BRACE_DFAMAX)
                                lxp->flags |= REG_NFA;
                }
                else if (lxp->info.num[1] > BRACE_DFAMAX)
                {
                        lxp->flags |= REG_NFA;
                }
                break;
        case '\\':
                switch (wc = *lxp->pat++)
                {
                case '\0':
                        lxp->err = REG_EESCAPE;
                        return -1;
                case '<':
                        if (lxp->flags & REG_ANGLES)
                        {
                                lxp->flags |= REG_NFA;
                                wc = ROP_LT;
                        }
                        goto out;
                case '>':
                        if (lxp->flags & REG_ANGLES)
                        {
                                lxp->flags |= REG_NFA;
                                wc = ROP_GT;
                        }
                        goto out;
                case '(':
                        if ((lxp->flags & REG_PARENS) == 0)
                                goto leftparen;
                        goto out;
                case ')':
                        if ((lxp->flags & REG_PARENS) == 0)
                        {
                                if (++lxp->nright > lxp->nleft)
                                {
                                        lxp->err = REG_EPAREN;
                                        return -1;
                                }
                                goto rightparen;
                        }
                        goto out;
                case '{':
                        if (lxp->flags & (REG_BRACES|REG_NOBRACES))
                                goto out;
                        goto interval;
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                        num = wc - '0';
                        if ((lxp->flags & REG_NOBACKREF) == 0)
                        {
                        backref:;
                                if (num > lxp->nleft
                                        || lxp->clist[num - 1] == 0)
                                {
                                        lxp->err = REG_ESUBREG;
                                        return -1;
                                }
                                lxp->info.sub = num;
                                if (lxp->maxref < num)
                                        lxp->maxref = num;
                                lxp->flags |= REG_NFA;
                                wc = ROP_REF;
                                goto out;
                        }
                        /*
                        * For compatibility (w/awk), permit "octal" 8 and 9.
                        * Already have the value of the first digit in num.
                        *
                        * If REG_OLDESC, exactly three digits must be present.
                        */
                tryoctal:;
                        if ((lxp->flags & REG_ESCSEQ) == 0)
                                goto out;
                        if ((wc = *lxp->pat) >= '0' && wc <= '9')
                        {
                                num <<= 3;
                                num += wc - '0';
                                if ((wc = *++lxp->pat) >= '0' && wc <= '9')
                                {
                                        num <<= 3;
                                        num += wc - '0';
                                        lxp->pat++;
                                }
                                else if (lxp->flags & REG_OLDESC)
                                {
                                        lxp->pat--;
                                        wc = lxp->pat[-1];
                                        goto out;
                                }
                        }
                        else if (lxp->flags & REG_OLDESC)
                        {
                                wc = lxp->pat[-1];
                                goto out;
                        }
                        if ((wc = num) <= 0)
                        {
                                lxp->err = REG_BADESC;
                                return -1;
                        }
                        goto out;
                case '0':
                        if ((lxp->flags & REG_NOBACKREF) == 0
                                && (num = *lxp->pat) >= '0' && num <= '9')
                        {
                                num -= '0';
                                /*
                                * This loop ignores wraparounds.
                                * Keep track of number of digits in n.
                                */
                                n = 1;
                                while ((wc = *++lxp->pat) >= '0' && wc <= '9')
                                {
                                        num *= 10;
                                        num += wc - '0';
                                        n++;
                                }
                                if (num != 0)
                                        goto backref;
                                lxp->pat -= n;
                        }
                        num = 0;
                        goto tryoctal;
                case 'a':
                        if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
                                wc = '\a';
                        goto out;
                case 'b':
                        if (lxp->flags & REG_ESCSEQ)
                                wc = '\b';
                        goto out;
                case 'f':
                        if (lxp->flags & REG_ESCSEQ)
                                wc = '\f';
                        goto out;
                case 'n':
                        if (lxp->flags & (REG_ESCSEQ | REG_ESCNL))
                        {
                                wc = '\n';
                                if (lxp->flags & REG_NEWLINE)
                                        lxp->flags |= REG_NFA;
                        }
                        goto out;
                case 'r':
                        if (lxp->flags & REG_ESCSEQ)
                                wc = '\r';
                        goto out;
                case 't':
                        if (lxp->flags & REG_ESCSEQ)
                                wc = '\t';
                        goto out;
                case 'v':
                        if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
                                wc = '\v';
                        goto out;
                case 'x':
                        if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ
                                && isxdigit(num = *lxp->pat))
                        {
                                wc = num;
                                num = 0;
                                /*
                                * Take as many hex digits as possible,
                                * ignoring overflows.
                                * If the result (squeezed into a w_type)
                                * is positive, it's okay.
                                */
                                do
                                {
                                        if (isdigit(wc))
                                                wc -= '0';
                                        else if (isupper(wc))
                                                wc -= 'A' + 10;
                                        else
                                                wc -= 'a' + 10;
                                        num <<= 4;
                                        num |= wc;
                                } while (isxdigit(wc = *++lxp->pat));
                                if ((wc = num) <= 0)
                                {
                                        lxp->err = REG_BADESC;
                                        return -1;
                                }
                        }
                        goto out;
                }
                /*FALLTHROUGH*/
        default:
                if (!ISONEBYTE(wc))
                {
                        if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0)
                                lxp->pat += n;
                        else if (n < 0)
                        {
                                lxp->err = REG_ILLSEQ;
                                return -1;
                        }
                }
                if (lxp->flags & REG_ICASE)
                        wc = to_lower(wc);
                break;
        }
out:;
        lxp->tok = wc;
        return 0;
}

static Tree     *alt(Lex *);

static Tree *
leaf(Lex *lxp)
{
        Tree *tp;

        if ((tp = malloc(sizeof(Tree))) == 0)
        {
                lxp->err = REG_ESPACE;
                return 0;
        }
        switch (tp->op = lxp->tok) /* covers most cases */
        {
        default:
                if (tp->op < 0)
                {
                        lxp->err = REG_BADPAT;
                        tp->right.ptr = 0;
                        goto badunary;
                }
                break;
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_QUEST:
                if ((lxp->flags & REG_NOAUTOQUOTE) == 0
                        && lxp->pat[-1] != '}')
                {
                        tp->op = lxp->pat[-1];
                        break;
                }
                /*FALLTHROUGH*/
        case ROP_BRACE:
        case ROP_EMPTY: /* was {0,0} ROP_BRACE */
        case ROP_NOP:   /* was {1,1} ROP_BRACE */
                lxp->err = REG_BADRPT;
        badunary:;
                tp->left.ptr = 0;
                goto err;
        case ROP_ANYCH:
        case ROP_NOTNL:
                break;
        case ROP_BOL:
        case ROP_EOL:
        case ROP_LT:
        case ROP_GT:
                /*
                * Look ahead for what would have been taken to be
                * postfix operators.
                */
                if (lex(lxp) != 0)
                        goto err;
                switch (lxp->tok)
                {
                case ROP_STAR:
                case ROP_PLUS:
                case ROP_QUEST:
                        if ((lxp->flags & REG_NOAUTOQUOTE) == 0
                                && lxp->pat[-1] != '}')
                        {
                                lxp->tok = lxp->pat[-1];
                                break;
                        }
                        /*FALLTHROUGH*/
                case ROP_BRACE:
                case ROP_EMPTY: /* was {0,0} ROP_BRACE */
                case ROP_NOP:   /* was {1,1} ROP_BRACE */
                        lxp->err = REG_BADRPT;
                        goto err;
                }
                return tp;
        case ROP_BKT:
                tp->right.info.bkt = lxp->info.bkt;
                break;
        case ROP_REF:
                tp->right.info.sub = lxp->info.sub;
                break;
        case ROP_LP:
                tp->right.info.sub = lxp->nleft;
                if (lex(lxp) != 0)
                        goto badunary;
                if (lxp->tok == ROP_RP) /* empty parens; choice of meaning */
                {
                        if (lxp->flags & REG_MTPARENBAD)
                        {
                                lxp->err = REG_EMPTYPAREN;
                                goto badunary;
                        }
                        lxp->tok = ROP_EMPTY;
                        if (lxp->flags & REG_MTPARENFAIL)
                                lxp->tok = ROP_NONE;
                        if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0)
                                goto badunary;
                }
                else if ((tp->left.ptr = alt(lxp)) == 0)
                {
                        if (lxp->err == REG_BADPAT)
                                goto parenerr;
                        goto badunary;
                }
                else if (lxp->tok != ROP_RP)
                {
                        lxp->err = REG_BADPAT;
                parenerr:;
                        if (lxp->nleft != lxp->nright)
                                lxp->err = REG_EPAREN;  /* better choice */
                        goto badunary;
                }
                tp->left.ptr->parent = tp;
                break;
        }
        if (lex(lxp) != 0)
        {
        err:;
                libuxre_regdeltree(tp, 1);
                tp = 0;
        }
        return tp;
}

static Tree *
post(Lex *lxp)
{
        Tree *lp;

        if ((lp = leaf(lxp)) == 0)
                return 0;
        switch (lxp->tok)
        {
        case ROP_EMPTY: /* this was {0,0} ROP_BRACE */
                libuxre_regdeltree(lp, 1);
                lp = 0;
                /*FALLTHROUGH*/
        case ROP_BRACE:
        case ROP_STAR:
        case ROP_PLUS:
        case ROP_QUEST:
                if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0)
                {
                        lxp->err = REG_ESPACE;
                        return 0;
                }
                if (lxp->tok == ROP_BRACE)
                        lp->right.info = lxp->info;
                /*FALLTHROUGH*/
        case ROP_NOP:   /* this was {1,1} ROP_BRACE */
                if (lex(lxp) != 0)
                {
                        libuxre_regdeltree(lp, 1);
                        return 0;
                }
                break;
        }
        return lp;
}

static Tree *
cat(Lex *lxp)
{
        Tree *lp, *rp;

        if ((lp = post(lxp)) == 0)
                return 0;
        for (;;)
        {
                if (lxp->tok == ROP_OR || lxp->tok == ROP_RP
                        || lxp->tok == ROP_END)
                {
                        return lp;
                }
                if ((rp = post(lxp)) == 0)
                        break;
                if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
                {
                        lxp->err = REG_ESPACE;
                        return 0;
                }
        }
        libuxre_regdeltree(lp, 1);
        return 0;
}

static Tree *
alt(Lex *lxp)
{
        Tree *lp, *rp;

        if ((lp = cat(lxp)) == 0)
                return 0;
        for (;;)
        {
                if (lxp->tok != ROP_OR)
                        return lp;
                if (lex(lxp) != 0)
                        break;
                if (lxp->tok == ROP_END)
                        return lp;      /* ignore trailing '|' */
                if ((rp = cat(lxp)) == 0)
                        break;
                if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0)
                {
                        lxp->err = REG_ESPACE;
                        return 0;
                }
        }
        libuxre_regdeltree(lp, 1);
        return 0;
}

LIBUXRE_STATIC Tree *
libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags)
{
        Tree *lp, *rp;

        lp = 0;                 /* in case of error */
        lxp->clist = 0;
        lxp->col = 0;
        lxp->err = 0;
        lxp->maxref = 0;
        lxp->nleft = 0;
        lxp->nright = 0;
        lxp->nclist = 0;
        lxp->mb_cur_max = MB_CUR_MAX;
        if (flags & REG_OR && *pat == '|')
                pat++;  /* skip initial OR like egrep did */
        lxp->pat = pat;
        lxp->flags = flags;
        lxp->tok = ROP_OR;      /* enables ^ as anchor */
        /*
        * Get initial token.
        */
        if (lex(lxp) != 0)
        {
        err:;
                if (lp != 0)
                {
                        libuxre_regdeltree(lp, 1);
                        lp = 0;
                }
                if (lxp->err == 0)
                        lxp->err = REG_ESPACE;
                goto ret;
        }
        if (lxp->tok == ROP_END)
        {
                lxp->err = REG_NOPAT;
                goto err;
        }
        if ((lp = alt(lxp)) == 0)       /* parse entire RE */
                goto err;
        if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0)
        {
                if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0)
                        goto err;
                lp->right.info.sub = 0;
        }
        if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0)
                goto err;
        if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
                goto err;
        lp->parent = 0;
ret:;
        if (lxp->clist != 0)
                free(lxp->clist);
        return lp;
}

#ifdef REGDEBUG

LIBUXRE_STATIC void
libuxre_regtree(Tree *tp, int n)
{
        const char *opstr;
        char buf[32];
        int kind, next;

        if (n < 0)
                next = -n + 2;
        else
                next = n + 2;
        switch (tp->op)
        {
        case ROP_OR:
                opstr = "|";
                kind = BINARY_ROP;
                break;
        case ROP_CAT:
                opstr = "&";
                kind = BINARY_ROP;
                break;
        case ROP_STAR:
                opstr = "*";
                kind = UNARY_ROP;
                break;
        case ROP_PLUS:
                opstr = "+";
                kind = UNARY_ROP;
                break;
        case ROP_QUEST:
                opstr = "?";
                kind = UNARY_ROP;
                break;
        case ROP_BRACE:
                opstr = buf;
                if (tp->right.info.num[1] == BRACE_INF)
                {
                        sprintf(buf, "{%u,inf}",
                                (unsigned)tp->right.info.num[0]);
                }
                else
                {
                        sprintf(buf, "{%u,%u}",
                                (unsigned)tp->right.info.num[0],
                                (unsigned)tp->right.info.num[1]);
                }
                kind = UNARY_ROP;
                break;
        case ROP_LP:
                opstr = buf;
                sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub);
                kind = UNARY_ROP;
                break;
        case ROP_RP:
                opstr = buf;
                sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub);
                kind = UNARY_ROP;
                break;
        case ROP_NOP:
                opstr = "<NOP>";
                kind = LEAF_ROP;
                break;
        case ROP_BOL:
                opstr = "<BOL>";
                kind = LEAF_ROP;
                break;
        case ROP_EOL:
                opstr = "<EOL>";
                kind = LEAF_ROP;
                break;
        case ROP_ALL:
                opstr = "<ALL>";
                kind = LEAF_ROP;
                break;
        case ROP_ANYCH:
                opstr = "<ANYCH>";
                kind = LEAF_ROP;
                break;
        case ROP_NOTNL:
                opstr = "<NOTNL>";
                kind = LEAF_ROP;
                break;
        case ROP_EMPTY:
                opstr = "<MT>";
                kind = LEAF_ROP;
                break;
        case ROP_NONE:
                opstr = "<NONE>";
                kind = LEAF_ROP;
                break;
        case ROP_BKT:
                opstr = buf;
                sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt);
                kind = LEAF_ROP;
                break;
        case ROP_BKTCOPY:
                opstr = buf;
                sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt);
                kind = LEAF_ROP;
                break;
        case ROP_LT:
                opstr = "\\<";
                kind = LEAF_ROP;
                break;
        case ROP_GT:
                opstr = "\\>";
                kind = LEAF_ROP;
                break;
        case ROP_REF:
                opstr = buf;
                sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub);
                kind = LEAF_ROP;
                break;
        case ROP_END:
                opstr = "<END>";
                kind = LEAF_ROP;
                break;
        default:
                opstr = buf;
                if (tp->op > UCHAR_MAX)
                        sprintf(buf, "W%#x", tp->op);
                else if (tp->op <= 0)
                        sprintf(buf, "UNK=%u", tp->op);
                else
                        sprintf(buf, "%c", tp->op);
                kind = LEAF_ROP;
                break;
        }
        if (kind == BINARY_ROP)
                libuxre_regtree(tp->right.ptr, -next);
        printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr);
        if (kind != LEAF_ROP)
                libuxre_regtree(tp->left.ptr, next);
}

#endif /*REGDEBUG*/