Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#include        "grep.h"

/*
 * incremental compiler.
 * add the branch c to the
 * state s.
 */
void
increment(State *s, int c)
{
        int i;
        State *t, **tt;
        Re *re1, *re2;

        nfollow = 0;
        gen++;
        matched = 0;
        for(i=0; i<s->count; i++)
                fol1(s->re[i], c);
        qsort(follow, nfollow, sizeof(*follow), fcmp);
        for(tt=&state0; t = *tt;) {
                if(t->count > nfollow) {
                        tt = &t->linkleft;
                        goto cont;
                }
                if(t->count < nfollow) {
                        tt = &t->linkright;
                        goto cont;
                }
                for(i=0; i<nfollow; i++) {
                        re1 = t->re[i];
                        re2 = follow[i];
                        if(re1 > re2) {
                                tt = &t->linkleft;
                                goto cont;
                        }
                        if(re1 < re2) {
                                tt = &t->linkright;
                                goto cont;
                        }
                }
                if(!!matched && !t->match) {
                        tt = &t->linkleft;
                        goto cont;
                }
                if(!matched && !!t->match) {
                        tt = &t->linkright;
                        goto cont;
                }
                s->next[c] = t;
                return;
        cont:;
        }

        t = sal(nfollow);
        *tt = t;
        for(i=0; i<nfollow; i++) {
                re1 = follow[i];
                t->re[i] = re1;
        }
        s->next[c] = t;
        t->match = matched;
}

int
fcmp(void *va, void *vb)
{
        Re **aa, **bb;
        Re *a, *b;

        aa = va;
        bb = vb;
        a = *aa;
        b = *bb;
        if(a > b)
                return 1;
        if(a < b)
                return -1;
        return 0;
}

void
fol1(Re *r, int c)
{
        Re *r1;

loop:
        if(r->gen == gen)
                return;
        if(nfollow >= maxfollow)
                error("nfollow");
        r->gen = gen;
        switch(r->type) {
        default:
                error("fol1");

        case Tcase:
                if(c >= 0 && c < 256)
                if(r1 = r->cases[c])
                        follow[nfollow++] = r1;
                if(r = r->next)
                        goto loop;
                break;

        case Talt:
        case Tor:
                fol1(r->alt, c);
                r = r->next;
                goto loop;

        case Tbegin:
                if(c == '\n' || c == Cbegin)
                        follow[nfollow++] = r->next;
                break;

        case Tend:
                if(c == '\n') {
                        if(r->next == 0) {
                                matched = 1;
                                break;
                        }
                        r = r->next;
                        goto loop;
                }
                break;

        case Tclass:
                if(c >= r->lo && c <= r->hi)
                        follow[nfollow++] = r->next;
                break;
        }
}

Rune    tab1[] =
{
        0x007f,
        0x07ff,
};
Rune    tab2[] =
{
        0x003f,
        0x0fff,
};

Re2
rclass(Rune p0, Rune p1)
{
        char xc0[6], xc1[6];
        int i, n, m;
        Re2 x;

        if(p0 > p1)
                return re2char(0xff, 0xff);     // no match

        /*
         * bust range into same length
         * character sequences
         */
        for(i=0; i<nelem(tab1); i++) {
                m = tab1[i];
                if(p0 <= m && p1 > m)
                        return re2or(rclass(p0, m), rclass(m+1, p1));
        }

        /*
         * bust range into part of a single page
         * or into full pages
         */
        for(i=0; i<nelem(tab2); i++) {
                m = tab2[i];
                if((p0 & ~m) != (p1 & ~m)) {
                        if((p0 & m) != 0)
                                return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));
                        if((p1 & m) != m)
                                return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));
                }
        }

        n = runetochar(xc0, &p0);
        i = runetochar(xc1, &p1);
        if(i != n)
                error("length");

        x = re2char(xc0[0], xc1[0]);
        for(i=1; i<n; i++)
                x = re2cat(x, re2char(xc0[i], xc1[i]));
        return x;
}

int
pcmp(void *va, void *vb)
{
        int n;
        Rune *a, *b;

        a = va;
        b = vb;

        n = a[0] - b[0];
        if(n)
                return n;
        return a[1] - b[1];
}

/*
 * convert character chass into
 * run-pair ranges of matches.
 * this is 10646/utf specific and
 * needs to be changed for some
 * other input character set.
 * this is the key to a fast
 * regular search of characters
 * by looking at sequential bytes.
 */
Re2
re2class(char *s)
{
        Rune pairs[200+2], *p, *q, ov;
        int nc;
        Re2 x;

        nc = 0;
        if(*s == '^') {
                nc = 1;
                s++;
        }

        p = pairs;
        s += chartorune(p, s);
        for(;;) {
                if(*p == '\\')
                        s += chartorune(p, s);
                if(*p == 0)
                        break;
                p[1] = *p;
                p += 2;
                if(p >= pairs + nelem(pairs) - 2)
                        error("class too big");
                s += chartorune(p, s);
                if(*p != '-')
                        continue;
                s += chartorune(p, s);
                if(*p == '\\')
                        s += chartorune(p, s);
                if(*p == 0)
                        break;
                p[-1] = *p;
                s += chartorune(p, s);
        }
        *p = 0;
        qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);

        q = pairs;
        for(p=pairs+2; *p; p+=2) {
                if(p[0] > p[1])
                        continue;
                if(p[0] > q[1] || p[1] < q[0]) {
                        q[2] = p[0];
                        q[3] = p[1];
                        q += 2;
                        continue;
                }
                if(p[0] < q[0])
                        q[0] = p[0];
                if(p[1] > q[1])
                        q[1] = p[1];
        }
        q[2] = 0;

        p = pairs;
        if(nc) {
                x = rclass(0, p[0]-1);
                ov = p[1]+1;
                for(p+=2; *p; p+=2) {
                        x = re2or(x, rclass(ov, p[0]-1));
                        ov = p[1]+1;
                }
                x = re2or(x, rclass(ov, Runemask));
        } else {
                x = rclass(p[0], p[1]);
                for(p+=2; *p; p+=2)
                        x = re2or(x, rclass(p[0], p[1]));
        }
        return x;
}