Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

/*
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
 *
 * Sccsid @(#)re.h      1.15 (gritter) 2/6/05
 */
/*  UNIX(R) Regular Expresssion Library
 *
 *  Note: Code is released under the GNU LGPL
 *
 *  Copyright (C) 2001 Caldera International, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to:
 *        Free Software Foundation, Inc.
 *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef LIBUXRE_RE_H
#define LIBUXRE_RE_H

        /*
        * Maps safe external tag to internal one
        */
#define re_coll_        lc_collate      /* <regex.h> */
/*      #define __fnm_collate   lc_collate      */      /* <fnmatch.h> */

#include <limits.h>
#include <regex.h>
/*      #include <fnmatch.h>    */
#include <colldata.h>

#define NBSHT   (sizeof(unsigned short) * CHAR_BIT)
#define NBYTE   (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT)
#define NTYPE   4
#define NWIDE   32
#define NQUIV   4

typedef struct
{
        struct lc_collate       *col;   /* only member set by caller */
        wctype_t                *extype;
        wuchar_type             *exquiv;
        wchar_t                 *exwide;
        wctype_t                type[NTYPE];
        wuchar_type             quiv[NQUIV];
        wchar_t                 wide[NWIDE];
        unsigned short          byte[NBYTE];
        unsigned short          ntype;
        unsigned short          nquiv;
        unsigned short          nwide;
        unsigned int            flags;
} Bracket;

#define BKT_NEGATED     0x001   /* complemented set */
#define BKT_ONECASE     0x002   /* uppercase same as lowercase */
#define BKT_NOTNL       0x004   /* do not match newline when BKT_NEGATED */
#define BKT_BADRANGE    0x008   /* accept [m-a] ranges as [ma] */
#define BKT_SEPRANGE    0x010   /* disallow [a-m-z] style ranges */
#define BKT_NLBAD       0x020   /* newline disallowed */
#define BKT_SLASHBAD    0x040   /* slash disallowed (for pathnames) */
#define BKT_EMPTY       0x080   /* take leading ] is end (empty set) */
#define BKT_ESCAPE      0x100   /* allow \ as quote for next anything */
#define BKT_QUOTE       0x200   /* allow \ as quote for \\, \^, \- or \] */
#define BKT_ESCNL       0x400   /* take \n as the newline character */
#define BKT_ESCSEQ      0x800   /* otherwise, take \ as in C escapes */
#define BKT_ODDRANGE    0x1000  /* oawk oddity: [m-a] means [m] */
#define BKT_NOI18N      0x2000  /* disable [::] [==] [..] */
#define BKT_OLDESC      0x4000  /* enable \b \f \n \r \t only */

        /*
        * These error returns for libuxre_bktmbcomp() are directly tied to
        * the error returns for regcomp() for convenience.
        */
#define BKT_BADPAT      (-REG_BADPAT)
#define BKT_ECOLLATE    (-REG_ECOLLATE)
#define BKT_ECTYPE      (-REG_ECTYPE)
#define BKT_EEQUIV      (-REG_EEQUIV)
#define BKT_BADCHAR     (-REG_EBKTCHAR)
#define BKT_EBRACK      (-REG_EBRACK)
#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT)
#define BKT_ERANGE      (-REG_ERANGE)
#define BKT_ESPACE      (-REG_ESPACE)
#define BKT_BADESC      (-REG_BADESC)
#define BKT_ILLSEQ      (-REG_ILLSEQ)

        /*
        * These must be distinct from the flags in <fnmatch.h>.
        */
#define FNM_COLLATE     0x2000  /* have collation information */
#define FNM_CURRENT     0x4000  /* have full-sized fnm_t structure */

        /*
        * These must be distinct from the flags in <regex.h>.
        */
#define REG_NFA         0x20000000
#define REG_DFA         0x40000000
#define REG_GOTBKT      0x80000000

#define BRACE_INF       USHRT_MAX
#define BRACE_MAX       5100    /* arbitrary number < SHRT_MAX */
#define BRACE_DFAMAX    255     /* max amount for r.e. duplication */

typedef union   /* extra info always kept for some tokens/nodes */
{
        Bracket         *bkt;   /* ROP_BKT */
        size_t          sub;    /* ROP_LP (ROP_RP), ROP_REF */
        unsigned short  num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */
} Info;

typedef struct  /* lexical context while parsing */
{
        Info                    info;
        const unsigned char     *pat;
        unsigned char           *clist;
        struct lc_collate       *col;
        unsigned long           flags;
        w_type                  tok;
        size_t                  maxref;
        size_t                  nleft;
        size_t                  nright;
        size_t                  nclist;
        int                     bktflags;
        int                     err;
        int                     mb_cur_max;
} Lex;

typedef struct t_tree   Tree;   /* RE parse tree node */
struct t_tree
{
        union
        {
                Tree    *ptr;   /* unary & binary nodes */
                size_t  pos;    /* position for DFA leaves */
        } left;
        union
        {
                Tree    *ptr;   /* binary nodes */
                Info    info;
        } right;
        Tree            *parent;
        w_type          op;     /* positive => char. to match */
};

typedef struct re_dfa_  Dfa;    /* DFA engine description */
typedef struct re_nfa_  Nfa;    /* NFA engine description */

typedef struct
{
        const unsigned char     *str;
        regmatch_t              *match;
        size_t                  nmatch;
        unsigned long           flags;
        int                     mb_cur_max;
} Exec;

        /*
        * Regular expression operators.  Some only used internally.
        * All are negative, to distinguish them from the regular
        * "match this particular wide character" operation.
        */
#define BINARY_ROP      0x02
#define UNARY_ROP       0x01
#define LEAF_ROP        0x00

#define MAKE_ROP(k, v)  (-((v) | ((k) << 4)))
#define KIND_ROP(v)     ((-(v)) >> 4)

#define ROP_OR          MAKE_ROP(BINARY_ROP, 1)
#define ROP_CAT         MAKE_ROP(BINARY_ROP, 2)

#define ROP_STAR        MAKE_ROP(UNARY_ROP, 1)
#define ROP_PLUS        MAKE_ROP(UNARY_ROP, 2)
#define ROP_QUEST       MAKE_ROP(UNARY_ROP, 3)
#define ROP_BRACE       MAKE_ROP(UNARY_ROP, 4)
#define ROP_LP          MAKE_ROP(UNARY_ROP, 5)
#define ROP_RP          MAKE_ROP(UNARY_ROP, 6)

#define ROP_NOP         MAKE_ROP(LEAF_ROP, 1)   /* temporary */
#define ROP_BOL         MAKE_ROP(LEAF_ROP, 2)   /* ^ anchor */
#define ROP_EOL         MAKE_ROP(LEAF_ROP, 3)   /* $ anchor */
#define ROP_ALL         MAKE_ROP(LEAF_ROP, 4)   /* anything (added) */
#define ROP_ANYCH       MAKE_ROP(LEAF_ROP, 5)   /* . w/\n */
#define ROP_NOTNL       MAKE_ROP(LEAF_ROP, 6)   /* . w/out \n */
#define ROP_EMPTY       MAKE_ROP(LEAF_ROP, 7)   /* empty string */
#define ROP_NONE        MAKE_ROP(LEAF_ROP, 8)   /* match failure */
#define ROP_BKT         MAKE_ROP(LEAF_ROP, 9)   /* [...] */
#define ROP_BKTCOPY     MAKE_ROP(LEAF_ROP, 10)  /* [...] (duplicated) */
#define ROP_LT          MAKE_ROP(LEAF_ROP, 11)  /* \< word begin */
#define ROP_GT          MAKE_ROP(LEAF_ROP, 12)  /* \> word end */
#define ROP_REF         MAKE_ROP(LEAF_ROP, 13)  /* \digit */
#define ROP_END         MAKE_ROP(LEAF_ROP, 14)  /* final (added) */

        /*
        * Return values:
        *  libuxre_bktmbcomp()
        *       <0 error (see BKT_* above); >0 #bytes scanned
        *  libuxre_bktmbexec()
        *       <0 doesn't match; >=0 matches, #extra bytes scanned
        */
LIBUXRE_STATIC void     libuxre_bktfree(Bracket *);
LIBUXRE_STATIC int      libuxre_bktmbcomp(Bracket *, const unsigned char *,
                                int, int);
LIBUXRE_STATIC int      libuxre_bktmbexec(Bracket *, wchar_t,
                                const unsigned char *, int);

LIBUXRE_STATIC void     libuxre_regdeltree(Tree *, int);
LIBUXRE_STATIC Tree     *libuxre_reg1tree(w_type, Tree *);
LIBUXRE_STATIC Tree     *libuxre_reg2tree(w_type, Tree *, Tree *);
LIBUXRE_STATIC Tree     *libuxre_regparse(Lex *, const unsigned char *, int);

extern void             libuxre_regdeldfa(Dfa *);
LIBUXRE_STATIC int      libuxre_regdfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int      libuxre_regdfaexec(Dfa *, Exec *);

extern void             libuxre_regdelnfa(Nfa *);
LIBUXRE_STATIC int      libuxre_regnfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int      libuxre_regnfaexec(Nfa *, Exec *);
#endif  /* !LIBUXRE_RE_H */