Blame | Last modification | View Log | RSS feed
/*
* Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
*
* Sccsid @(#)re.h 1.15 (gritter) 2/6/05
*/
/* UNIX(R) Regular Expresssion Library
*
* Note: Code is released under the GNU LGPL
*
* Copyright (C) 2001 Caldera International, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to:
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef LIBUXRE_RE_H
#define LIBUXRE_RE_H
/*
* Maps safe external tag to internal one
*/
#define re_coll_ lc_collate /* <regex.h> */
/* #define __fnm_collate lc_collate */ /* <fnmatch.h> */
#include <limits.h>
#include <regex.h>
/* #include <fnmatch.h> */
#include <colldata.h>
#define NBSHT (sizeof(unsigned short) * CHAR_BIT)
#define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT)
#define NTYPE 4
#define NWIDE 32
#define NQUIV 4
typedef struct
{
struct lc_collate *col; /* only member set by caller */
wctype_t *extype;
wuchar_type *exquiv;
wchar_t *exwide;
wctype_t type[NTYPE];
wuchar_type quiv[NQUIV];
wchar_t wide[NWIDE];
unsigned short byte[NBYTE];
unsigned short ntype;
unsigned short nquiv;
unsigned short nwide;
unsigned int flags;
} Bracket;
#define BKT_NEGATED 0x001 /* complemented set */
#define BKT_ONECASE 0x002 /* uppercase same as lowercase */
#define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */
#define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */
#define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */
#define BKT_NLBAD 0x020 /* newline disallowed */
#define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */
#define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */
#define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */
#define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */
#define BKT_ESCNL 0x400 /* take \n as the newline character */
#define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */
#define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */
#define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */
#define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */
/*
* These error returns for libuxre_bktmbcomp() are directly tied to
* the error returns for regcomp() for convenience.
*/
#define BKT_BADPAT (-REG_BADPAT)
#define BKT_ECOLLATE (-REG_ECOLLATE)
#define BKT_ECTYPE (-REG_ECTYPE)
#define BKT_EEQUIV (-REG_EEQUIV)
#define BKT_BADCHAR (-REG_EBKTCHAR)
#define BKT_EBRACK (-REG_EBRACK)
#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT)
#define BKT_ERANGE (-REG_ERANGE)
#define BKT_ESPACE (-REG_ESPACE)
#define BKT_BADESC (-REG_BADESC)
#define BKT_ILLSEQ (-REG_ILLSEQ)
/*
* These must be distinct from the flags in <fnmatch.h>.
*/
#define FNM_COLLATE 0x2000 /* have collation information */
#define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */
/*
* These must be distinct from the flags in <regex.h>.
*/
#define REG_NFA 0x20000000
#define REG_DFA 0x40000000
#define REG_GOTBKT 0x80000000
#define BRACE_INF USHRT_MAX
#define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */
#define BRACE_DFAMAX 255 /* max amount for r.e. duplication */
typedef union /* extra info always kept for some tokens/nodes */
{
Bracket *bkt; /* ROP_BKT */
size_t sub; /* ROP_LP (ROP_RP), ROP_REF */
unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */
} Info;
typedef struct /* lexical context while parsing */
{
Info info;
const unsigned char *pat;
unsigned char *clist;
struct lc_collate *col;
unsigned long flags;
w_type tok;
size_t maxref;
size_t nleft;
size_t nright;
size_t nclist;
int bktflags;
int err;
int mb_cur_max;
} Lex;
typedef struct t_tree Tree; /* RE parse tree node */
struct t_tree
{
union
{
Tree *ptr; /* unary & binary nodes */
size_t pos; /* position for DFA leaves */
} left;
union
{
Tree *ptr; /* binary nodes */
Info info;
} right;
Tree *parent;
w_type op; /* positive => char. to match */
};
typedef struct re_dfa_ Dfa; /* DFA engine description */
typedef struct re_nfa_ Nfa; /* NFA engine description */
typedef struct
{
const unsigned char *str;
regmatch_t *match;
size_t nmatch;
unsigned long flags;
int mb_cur_max;
} Exec;
/*
* Regular expression operators. Some only used internally.
* All are negative, to distinguish them from the regular
* "match this particular wide character" operation.
*/
#define BINARY_ROP 0x02
#define UNARY_ROP 0x01
#define LEAF_ROP 0x00
#define MAKE_ROP(k, v) (-((v) | ((k) << 4)))
#define KIND_ROP(v) ((-(v)) >> 4)
#define ROP_OR MAKE_ROP(BINARY_ROP, 1)
#define ROP_CAT MAKE_ROP(BINARY_ROP, 2)
#define ROP_STAR MAKE_ROP(UNARY_ROP, 1)
#define ROP_PLUS MAKE_ROP(UNARY_ROP, 2)
#define ROP_QUEST MAKE_ROP(UNARY_ROP, 3)
#define ROP_BRACE MAKE_ROP(UNARY_ROP, 4)
#define ROP_LP MAKE_ROP(UNARY_ROP, 5)
#define ROP_RP MAKE_ROP(UNARY_ROP, 6)
#define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */
#define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */
#define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */
#define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */
#define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */
#define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */
#define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */
#define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */
#define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */
#define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */
#define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */
#define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */
#define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */
#define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */
/*
* Return values:
* libuxre_bktmbcomp()
* <0 error (see BKT_* above); >0 #bytes scanned
* libuxre_bktmbexec()
* <0 doesn't match; >=0 matches, #extra bytes scanned
*/
LIBUXRE_STATIC void libuxre_bktfree(Bracket *);
LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *,
int, int);
LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t,
const unsigned char *, int);
LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int);
LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *);
LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *);
LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int);
extern void libuxre_regdeldfa(Dfa *);
LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *);
extern void libuxre_regdelnfa(Nfa *);
LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *);
#endif /* !LIBUXRE_RE_H */