Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#include        <u.h>
#include        <libc.h>
#include        <bio.h>

/*
bugs:
        00/ff for end of file can conflict with 00/ff characters
*/

enum
{
        Nline   = 100000,               /* default max number of lines saved in memory */
        Nmerge  = 10,                   /* max number of temporary files merged */
        Nfield  = 20,                   /* max number of argument fields */

        Bflag   = 1<<0,                 /* flags per field */
        B1flag  = 1<<1,

        Dflag   = 1<<2,
        Fflag   = 1<<3,
        Gflag   = 1<<4,
        Iflag   = 1<<5,
        Mflag   = 1<<6,
        Nflag   = 1<<7,
        Rflag   = 1<<8,
        Wflag   = 1<<9,

        NSstart = 0,                    /* states for number to key decoding */
        NSsign,
        NSzero,
        NSdigit,
        NSpoint,
        NSfract,
        NSzerofract,
        NSexp,
        NSexpsign,
        NSexpdigit,
};

typedef struct  Line    Line;
typedef struct  Key     Key;
typedef struct  Merge   Merge;
typedef struct  Field   Field;

struct  Line
{
        Key*    key;
        int     llen;           /* always >= 1 */
        uchar   line[1];        /* always ends in '\n' */
};

struct  Merge
{
        Key*    key;            /* copy of line->key so (Line*) looks like (Merge*) */
        Line*   line;           /* line at the head of a merged temp file */
        int     fd;             /* file descriptor */
        Biobuf  b;              /* iobuf for reading a temp file */
};

struct  Key
{
        int     klen;
        uchar   key[1];
};

struct  Field
{
        int     beg1;
        int     beg2;
        int     end1;
        int     end2;

        long    flags;
        uchar   mapto[1+255];

        void    (*dokey)(Key*, uchar*, uchar*, Field*);
};

struct args
{
        char*   ofile;
        char*   tname;
        Rune    tabchar;
        char    cflag;
        char    uflag;
        char    vflag;
        int     nfield;
        int     nfile;
        Field   field[Nfield];

        Line**  linep;
        long    nline;                  /* number of lines in this temp file */
        long    lineno;                 /* overall ordinal for -s option */
        int     ntemp;
        long    mline;                  /* max lines per file */
} args;

extern  Rune*   month[12];

void    buildkey(Line*);
void    doargs(int, char*[]);
void    dofield(char*, int*, int*, int, int);
void    dofile(Biobuf*);
void    dokey_(Key*, uchar*, uchar*, Field*);
void    dokey_dfi(Key*, uchar*, uchar*, Field*);
void    dokey_gn(Key*, uchar*, uchar*, Field*);
void    dokey_m(Key*, uchar*, uchar*, Field*);
void    dokey_r(Key*, uchar*, uchar*, Field*);
void    done(char*);
int     kcmp(Key*, Key*);
void    makemapd(Field*);
void    makemapm(Field*);
void    mergefiles(int, int, Biobuf*);
void    mergeout(Biobuf*);
void    newfield(void);
Line*   newline(Biobuf*);
void    nomem(void);
void    notifyf(void*, char*);
void    printargs(void);
void    printout(Biobuf*);
void    setfield(int, int);
uchar*  skip(uchar*, int, int, int, int);
void    sort4(void*, ulong);
char*   tempfile(int);
void    tempout(void);
void    lineout(Biobuf*, Line*);

void
main(int argc, char *argv[])
{
        int i, f;
        char *s;
        Biobuf bbuf;

        notify(notifyf);        /**/
        doargs(argc, argv);
        if(args.vflag)
                printargs();

        for(i=1; i<argc; i++) {
                s = argv[i];
                if(s == 0)
                        continue;
                if(strcmp(s, "-") == 0) {
                        Binit(&bbuf, 0, OREAD);
                        dofile(&bbuf);
                        Bterm(&bbuf);
                        continue;
                }
                f = open(s, OREAD);
                if(f < 0) {
                        fprint(2, "sort: open %s: %r\n", s);
                        done("open");
                }
                Binit(&bbuf, f, OREAD);
                dofile(&bbuf);
                Bterm(&bbuf);
                close(f);
        }
        if(args.nfile == 0) {
                Binit(&bbuf, 0, OREAD);
                dofile(&bbuf);
                Bterm(&bbuf);
        }
        if(args.cflag)
                done(0);
        if(args.vflag)
                fprint(2, "=========\n");

        f = 1;
        if(args.ofile) {
                f = create(args.ofile, OWRITE, 0666);
                if(f < 0) {
                        fprint(2, "sort: create %s: %r\n", args.ofile);
                        done("create");
                }
        }

        Binit(&bbuf, f, OWRITE);
        if(args.ntemp) {
                tempout();
                mergeout(&bbuf);
        } else {
                printout(&bbuf);
        }
        Bterm(&bbuf);
        done(0);
}

void
dofile(Biobuf *b)
{
        Line *l, *ol;
        int n;

        if(args.cflag) {
                ol = newline(b);
                if(ol == 0)
                        return;
                for(;;) {
                        l = newline(b);
                        if(l == 0)
                                break;
                        n = kcmp(ol->key, l->key);
                        if(n > 0 || (n == 0 && args.uflag)) {
                                fprint(2, "sort: -c file not in sort\n"); /**/
                                done("order");
                        }
                        free(ol->key);
                        free(ol);
                        ol = l;
                }
                return;
        }

        if(args.linep == 0) {
                args.linep = malloc(args.mline * sizeof(args.linep));
                if(args.linep == 0)
                        nomem();
        }
        for(;;) {
                l = newline(b);
                if(l == 0)
                        break;
                if(args.nline >= args.mline)
                        tempout();
                args.linep[args.nline] = l;
                args.nline++;
                args.lineno++;
        }
}

void
notifyf(void*, char *s)
{

        if(strcmp(s, "interrupt") == 0)
                done(0);
        if(strcmp(s, "hangup") == 0)
                done(0);
        if(strcmp(s, "kill") == 0)
                done(0);
        if(strncmp(s, "sys: write on closed pipe", 25) == 0)
                done(0);
        fprint(2, "sort: note: %s\n", s);
        abort();
}

Line*
newline(Biobuf *b)
{
        Line *l;
        char *p;
        int n, c;

        p = Brdline(b, '\n');
        n = Blinelen(b);
        if(p == 0) {
                if(n == 0)
                        return 0;
                l = 0;
                for(n=0;;) {
                        if((n & 31) == 0) {
                                l = realloc(l, sizeof(Line) +
                                        (n+31)*sizeof(l->line[0]));
                                if(l == 0)
                                        nomem();
                        }
                        c = Bgetc(b);
                        if(c < 0) {
                                fprint(2, "sort: newline added\n");
                                c = '\n';
                        }
                        l->line[n++] = c;
                        if(c == '\n')
                                break;
                }
                l->llen = n;
                buildkey(l);
                return l;
        }
        l = malloc(sizeof(Line) +
                (n-1)*sizeof(l->line[0]));
        if(l == 0)
                nomem();
        l->llen = n;
        memmove(l->line, p, n);
        buildkey(l);
        return l;
}

void
lineout(Biobuf *b, Line *l)
{
        int n, m;

        n = l->llen;
        m = Bwrite(b, l->line, n);
        if(n != m)
                exits("write");
}

void
tempout(void)
{
        long n;
        Line **lp, *l;
        char *tf;
        int f;
        Biobuf tb;

        sort4(args.linep, args.nline);
        tf = tempfile(args.ntemp);
        args.ntemp++;
        f = create(tf, OWRITE, 0666);
        if(f < 0) {
                fprint(2, "sort: create %s: %r\n", tf);
                done("create");
        }

        Binit(&tb, f, OWRITE);
        lp = args.linep;
        for(n=args.nline; n>0; n--) {
                l = *lp++;
                lineout(&tb, l);
                free(l->key);
                free(l);
        }
        args.nline = 0;
        Bterm(&tb);
        close(f);
}

void
done(char *xs)
{
        int i;

        for(i=0; i<args.ntemp; i++)
                remove(tempfile(i));
        exits(xs);
}

void
nomem(void)
{
        fprint(2, "sort: out of memory\n");
        done("mem");
}

char*
tempfile(int n)
{
        static char file[100];
        static uint pid;
        char *dir;

        dir = "/tmp";
        if(args.tname)
                dir = args.tname;
        if(strlen(dir) >= nelem(file)-20) {
                fprint(2, "temp file directory name is too long: %s\n", dir);
                done("tdir");
        }

        if(pid == 0) {
                pid = getpid();
                if(pid == 0) {
                        pid = time(0);
                        if(pid == 0)
                                pid = 1;
                }
        }

        sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
        return file;
}

void
mergeout(Biobuf *b)
{
        int n, i, f;
        char *tf;
        Biobuf tb;

        for(i=0; i<args.ntemp; i+=n) {
                n = args.ntemp - i;
                if(n > Nmerge) {
                        tf = tempfile(args.ntemp);
                        args.ntemp++;
                        f = create(tf, OWRITE, 0666);
                        if(f < 0) {
                                fprint(2, "sort: create %s: %r\n", tf);
                                done("create");
                        }
                        Binit(&tb, f, OWRITE);

                        n = Nmerge;
                        mergefiles(i, n, &tb);

                        Bterm(&tb);
                        close(f);
                } else
                        mergefiles(i, n, b);
        }
}

void
mergefiles(int t, int n, Biobuf *b)
{
        Merge *m, *mp, **mmp;
        Key *ok;
        Line *l;
        char *tf;
        int i, f, nn;

        mmp = malloc(n*sizeof(*mmp));
        mp = malloc(n*sizeof(*mp));
        if(mmp == 0 || mp == 0)
                nomem();

        nn = 0;
        m = mp;
        for(i=0; i<n; i++,m++) {
                tf = tempfile(t+i);
                f = open(tf, OREAD);
                if(f < 0) {
                        fprint(2, "sort: reopen %s: %r\n", tf);
                        done("open");
                }
                m->fd = f;
                Binit(&m->b, f, OREAD);
                mmp[nn] = m;

                l = newline(&m->b);
                if(l == 0)
                        continue;
                nn++;
                m->line = l;
                m->key = l->key;
        }

        ok = 0;
        for(;;) {
                sort4(mmp, nn);
                m = *mmp;
                if(nn == 0)
                        break;
                for(;;) {
                        l = m->line;
                        if(args.uflag && ok && kcmp(ok, l->key) == 0) {
                                free(l->key);
                                free(l);
                        } else {
                                lineout(b, l);
                                if(ok)
                                        free(ok);
                                ok = l->key;
                                free(l);
                        }

                        l = newline(&m->b);
                        if(l == 0) {
                                nn--;
                                mmp[0] = mmp[nn];
                                break;
                        }
                        m->line = l;
                        m->key = l->key;
                        if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
                                break;
                }
        }
        if(ok)
                free(ok);

        m = mp;
        for(i=0; i<n; i++,m++) {
                Bterm(&m->b);
                close(m->fd);
        }

        free(mp);
        free(mmp);
}

int
kcmp(Key *ka, Key *kb)
{
        int n, m;

        /*
         * set n to length of smaller key
         */
        n = ka->klen;
        m = kb->klen;
        if(n > m)
                n = m;
        return memcmp(ka->key, kb->key, n);
}

void
printout(Biobuf *b)
{
        long n;
        Line **lp, *l;
        Key *ok;

        sort4(args.linep, args.nline);
        lp = args.linep;
        ok = 0;
        for(n=args.nline; n>0; n--) {
                l = *lp++;
                if(args.uflag && ok && kcmp(ok, l->key) == 0)
                        continue;
                lineout(b, l);
                ok = l->key;
        }
}

void
setfield(int n, int c)
{
        Field *f;

        f = &args.field[n];
        switch(c) {
        default:
                fprint(2, "sort: unknown option: field.%C\n", c);
                done("option");
        case 'b':       /* skip blanks */
                f->flags |= Bflag;
                break;
        case 'd':       /* directory order */
                f->flags |= Dflag;
                break;
        case 'f':       /* fold case */
                f->flags |= Fflag;
                break;
        case 'g':       /* floating point -n case */
                f->flags |= Gflag;
                break;
        case 'i':       /* ignore non-ascii */
                f->flags |= Iflag;
                break;
        case 'M':       /* month */
                f->flags |= Mflag;
                break;
        case 'n':       /* numbers */
                f->flags |= Nflag;
                break;
        case 'r':       /* reverse */
                f->flags |= Rflag;
                break;
        case 'w':       /* ignore white */
                f->flags |= Wflag;
                break;
        }
}

void
dofield(char *s, int *n1, int *n2, int off1, int off2)
{
        int c, n;

        c = *s++;
        if(c >= '0' && c <= '9') {
                n = 0;
                while(c >= '0' && c <= '9') {
                        n = n*10 + (c-'0');
                        c = *s++;
                }
                n -= off1;      /* posix committee: rot in hell */
                if(n < 0) {
                        fprint(2, "sort: field offset must be positive\n");
                        done("option");
                }
                *n1 = n;
        }
        if(c == '.') {
                c = *s++;
                if(c >= '0' && c <= '9') {
                        n = 0;
                        while(c >= '0' && c <= '9') {
                                n = n*10 + (c-'0');
                                c = *s++;
                        }
                        n -= off2;
                        if(n < 0) {
                                fprint(2, "sort: character offset must be positive\n");
                                done("option");
                        }
                        *n2 = n;
                }
        }
        while(c != 0) {
                setfield(args.nfield, c);
                c = *s++;
        }
}

void
printargs(void)
{
        int i, n;
        Field *f;
        char *prefix;

        fprint(2, "sort");
        for(i=0; i<=args.nfield; i++) {
                f = &args.field[i];
                prefix = " -";
                if(i) {
                        n = f->beg1;
                        if(n >= 0)
                                fprint(2, " +%d", n);
                        else
                                fprint(2, " +*");
                        n = f->beg2;
                        if(n >= 0)
                                fprint(2, ".%d", n);
                        else
                                fprint(2, ".*");

                        if(f->flags & B1flag)
                                fprint(2, "b");

                        n = f->end1;
                        if(n >= 0)
                                fprint(2, " -%d", n);
                        else
                                fprint(2, " -*");
                        n = f->end2;
                        if(n >= 0)
                                fprint(2, ".%d", n);
                        else
                                fprint(2, ".*");
                        prefix = "";
                }
                if(f->flags & Bflag)
                        fprint(2, "%sb", prefix);
                if(f->flags & Dflag)
                        fprint(2, "%sd", prefix);
                if(f->flags & Fflag)
                        fprint(2, "%sf", prefix);
                if(f->flags & Gflag)
                        fprint(2, "%sg", prefix);
                if(f->flags & Iflag)
                        fprint(2, "%si", prefix);
                if(f->flags & Mflag)
                        fprint(2, "%sM", prefix);
                if(f->flags & Nflag)
                        fprint(2, "%sn", prefix);
                if(f->flags & Rflag)
                        fprint(2, "%sr", prefix);
                if(f->flags & Wflag)
                        fprint(2, "%sw", prefix);
        }
        if(args.cflag)
                fprint(2, " -c");
        if(args.uflag)
                fprint(2, " -u");
        if(args.ofile)
                fprint(2, " -o %s", args.ofile);
        if(args.mline != Nline)
                fprint(2, " -l %ld", args.mline);
        fprint(2, "\n");
}

void
newfield(void)
{
        int n;
        Field *f;

        n = args.nfield + 1;
        if(n >= Nfield) {
                fprint(2, "sort: too many fields specified\n");
                done("option");
        }
        args.nfield = n;
        f = &args.field[n];
        f->beg1 = -1;
        f->beg2 = -1;
        f->end1 = -1;
        f->end2 = -1;
}

void
doargs(int argc, char *argv[])
{
        int i, c, hadplus;
        char *s, *p, *q;
        Field *f;

        hadplus = 0;
        args.mline = Nline;
        for(i=1; i<argc; i++) {
                s = argv[i];
                c = *s++;
                if(c == '-') {
                        c = *s;
                        if(c == 0)              /* forced end of arg marker */
                                break;
                        argv[i] = 0;            /* clobber args processed */
                        if(c == '.' || (c >= '0' && c <= '9')) {
                                if(!hadplus)
                                        newfield();
                                f = &args.field[args.nfield];
                                dofield(s, &f->end1, &f->end2, 0, 0);
                                hadplus = 0;
                                continue;
                        }

                        while(c = *s++)
                        switch(c) {
                        case '-':       /* end of options */
                                i = argc;
                                continue;
                        case 'T':       /* temp directory */
                                if(*s == 0) {
                                        i++;
                                        if(i < argc) {
                                                args.tname = argv[i];
                                                argv[i] = 0;
                                        }
                                } else
                                        args.tname = s;
                                s = strchr(s, 0);
                                break;
                        case 'o':       /* output file */
                                if(*s == 0) {
                                        i++;
                                        if(i < argc) {
                                                args.ofile = argv[i];
                                                argv[i] = 0;
                                        }
                                } else
                                        args.ofile = s;
                                s = strchr(s, 0);
                                break;
                        case 'k':       /* posix key (what were they thinking?) */
                                p = 0;
                                if(*s == 0) {
                                        i++;
                                        if(i < argc) {
                                                p = argv[i];
                                                argv[i] = 0;
                                        }
                                } else
                                        p = s;
                                s = strchr(s, 0);
                                if(p == 0)
                                        break;

                                newfield();
                                q = strchr(p, ',');
                                if(q)
                                        *q++ = 0;
                                f = &args.field[args.nfield];
                                dofield(p, &f->beg1, &f->beg2, 1, 1);
                                if(f->flags & Bflag) {
                                        f->flags |= B1flag;
                                        f->flags &= ~Bflag;
                                }
                                if(q) {
                                        dofield(q, &f->end1, &f->end2, 1, 0);
                                        if(f->end2 <= 0)
                                                f->end1++;
                                }
                                hadplus = 0;
                                break;
                        case 't':       /* tab character */
                                if(*s == 0) {
                                        i++;
                                        if(i < argc) {
                                                chartorune(&args.tabchar, argv[i]);
                                                argv[i] = 0;
                                        }
                                } else
                                        s += chartorune(&args.tabchar, s);
                                if(args.tabchar == '\n') {
                                        fprint(2, "aw come on, rob\n");
                                        done("rob");
                                }
                                break;
                        case 'c':       /* check order */
                                args.cflag = 1;
                                break;
                        case 'u':       /* unique */
                                args.uflag = 1;
                                break;
                        case 'v':       /* debugging noise */
                                args.vflag = 1;
                                break;
                        case 'l':
                                if(*s == 0) {
                                        i++;
                                        if(i < argc) {
                                                args.mline = atol(argv[i]);
                                                argv[i] = 0;
                                        }
                                } else
                                        args.mline = atol(s);
                                s = strchr(s, 0);
                                break;

                        case 'M':       /* month */
                        case 'b':       /* skip blanks */
                        case 'd':       /* directory order */
                        case 'f':       /* fold case */
                        case 'g':       /* floating numbers */
                        case 'i':       /* ignore non-ascii */
                        case 'n':       /* numbers */
                        case 'r':       /* reverse */
                        case 'w':       /* ignore white */
                                if(args.nfield > 0)
                                        fprint(2, "sort: global field set after -k\n");
                                setfield(0, c);
                                break;
                        case 'm':
                                /* option m silently ignored but required by posix */
                                break;
                        default:
                                fprint(2, "sort: unknown option: -%C\n", c);
                                done("option");
                        }
                        continue;
                }
                if(c == '+') {
                        argv[i] = 0;            /* clobber args processed */
                        c = *s;
                        if(c == '.' || (c >= '0' && c <= '9')) {
                                newfield();
                                f = &args.field[args.nfield];
                                dofield(s, &f->beg1, &f->beg2, 0, 0);
                                if(f->flags & Bflag) {
                                        f->flags |= B1flag;
                                        f->flags &= ~Bflag;
                                }
                                hadplus = 1;
                                continue;
                        }
                        fprint(2, "sort: unknown option: +%C\n", c);
                        done("option");
                }
                args.nfile++;
        }

        for(i=0; i<=args.nfield; i++) {
                f = &args.field[i];

                /*
                 * global options apply to fields that
                 * specify no options
                 */
                if(f->flags == 0) {
                        f->flags = args.field[0].flags;
                        if(args.field[0].flags & Bflag)
                                f->flags |= B1flag;
                }


                /*
                 * build buildkey specification
                 */
                switch(f->flags & ~(Bflag|B1flag)) {
                default:
                        fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
                        done("option");
                case 0:
                        f->dokey = dokey_;
                        break;
                case Rflag:
                        f->dokey = dokey_r;
                        break;
                case Gflag:
                case Nflag:
                case Gflag|Nflag:
                case Gflag|Rflag:
                case Nflag|Rflag:
                case Gflag|Nflag|Rflag:
                        f->dokey = dokey_gn;
                        break;
                case Mflag:
                case Mflag|Rflag:
                        f->dokey = dokey_m;
                        makemapm(f);
                        break;
                case Dflag:
                case Dflag|Fflag:
                case Dflag|Fflag|Iflag:
                case Dflag|Fflag|Iflag|Rflag:
                case Dflag|Fflag|Iflag|Rflag|Wflag:
                case Dflag|Fflag|Iflag|Wflag:
                case Dflag|Fflag|Rflag:
                case Dflag|Fflag|Rflag|Wflag:
                case Dflag|Fflag|Wflag:
                case Dflag|Iflag:
                case Dflag|Iflag|Rflag:
                case Dflag|Iflag|Rflag|Wflag:
                case Dflag|Iflag|Wflag:
                case Dflag|Rflag:
                case Dflag|Rflag|Wflag:
                case Dflag|Wflag:
                case Fflag:
                case Fflag|Iflag:
                case Fflag|Iflag|Rflag:
                case Fflag|Iflag|Rflag|Wflag:
                case Fflag|Iflag|Wflag:
                case Fflag|Rflag:
                case Fflag|Rflag|Wflag:
                case Fflag|Wflag:
                case Iflag:
                case Iflag|Rflag:
                case Iflag|Rflag|Wflag:
                case Iflag|Wflag:
                case Wflag:
                        f->dokey = dokey_dfi;
                        makemapd(f);
                        break;
                }
        }

        /*
         * random spot checks
         */
        if(args.nfile > 1 && args.cflag) {
                fprint(2, "sort: -c can have at most one input file\n");
                done("option");
        }
        return;
}

uchar*
skip(uchar *l, int n1, int n2, int bflag, int endfield)
{
        int i, c, tc;
        Rune r;

        if(endfield && n1 < 0)
                return 0;

        c = *l++;
        tc = args.tabchar;
        if(tc) {
                if(tc < Runeself) {
                        for(i=n1; i>0; i--) {
                                while(c != tc) {
                                        if(c == '\n')
                                                return 0;
                                        c = *l++;
                                }
                                if(!(endfield && i == 1))
                                        c = *l++;
                        }
                } else {
                        l--;
                        l += chartorune(&r, (char*)l);
                        for(i=n1; i>0; i--) {
                                while(r != tc) {
                                        if(r == '\n')
                                                return 0;
                                        l += chartorune(&r, (char*)l);
                                }
                                if(!(endfield && i == 1))
                                        l += chartorune(&r, (char*)l);
                        }
                        c = r;
                }
        } else {
                for(i=n1; i>0; i--) {
                        while(c == ' ' || c == '\t')
                                c = *l++;
                        while(c != ' ' && c != '\t') {
                                if(c == '\n')
                                        return 0;
                                c = *l++;
                        }
                }
        }

        if(bflag)
                while(c == ' ' || c == '\t')
                        c = *l++;

        l--;
        for(i=n2; i>0; i--) {
                c = *l;
                if(c < Runeself) {
                        if(c == '\n')
                                return 0;
                        l++;
                        continue;
                }
                l += chartorune(&r, (char*)l);
        }
        return l;
}

void
dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)
{
        uchar *kp;
        int c, cl, dp;
        int state, nzero, exp, expsign, rflag;

        cl = k->klen + 3;
        kp = k->key + cl;       /* skip place for sign, exponent[2] */

        nzero = 0;              /* number of trailing zeros */
        exp = 0;                /* value of the exponent */
        expsign = 0;            /* sign of the exponent */
        dp = 0x4040;            /* location of decimal point */
        rflag = f->flags&Rflag; /* xor of rflag and - sign */
        state = NSstart;

        for(;; lp++) {
                if(lp >= lpe)
                        break;
                c = *lp;

                if(c == ' ' || c == '\t') {
                        switch(state) {
                        case NSstart:
                        case NSsign:
                                continue;
                        }
                        break;
                }
                if(c == '+' || c == '-') {
                        switch(state) {
                        case NSstart:
                                state = NSsign;
                                if(c == '-')
                                        rflag = !rflag;
                                continue;
                        case NSexp:
                                state = NSexpsign;
                                if(c == '-')
                                        expsign = 1;
                                continue;
                        }
                        break;
                }
                if(c == '0') {
                        switch(state) {
                        case NSdigit:
                                if(rflag)
                                        c = ~c;
                                *kp++ = c;
                                cl++;
                                nzero++;
                                dp++;
                                state = NSdigit;
                                continue;
                        case NSfract:
                                if(rflag)
                                        c = ~c;
                                *kp++ = c;
                                cl++;
                                nzero++;
                                state = NSfract;
                                continue;
                        case NSstart:
                        case NSsign:
                        case NSzero:
                                state = NSzero;
                                continue;
                        case NSzerofract:
                        case NSpoint:
                                dp--;
                                state = NSzerofract;
                                continue;
                        case NSexpsign:
                        case NSexp:
                        case NSexpdigit:
                                exp = exp*10 + (c - '0');
                                state = NSexpdigit;
                                continue;
                        }
                        break;
                }
                if(c >= '1' && c <= '9') {
                        switch(state) {
                        case NSzero:
                        case NSstart:
                        case NSsign:
                        case NSdigit:
                                if(rflag)
                                        c = ~c;
                                *kp++ = c;
                                cl++;
                                nzero = 0;
                                dp++;
                                state = NSdigit;
                                continue;
                        case NSzerofract:
                        case NSpoint:
                        case NSfract:
                                if(rflag)
                                        c = ~c;
                                *kp++ = c;
                                cl++;
                                nzero = 0;
                                state = NSfract;
                                continue;
                        case NSexpsign:
                        case NSexp:
                        case NSexpdigit:
                                exp = exp*10 + (c - '0');
                                state = NSexpdigit;
                                continue;
                        }
                        break;
                }
                if(c == '.') {
                        switch(state) {
                        case NSstart:
                        case NSsign:
                                state = NSpoint;
                                continue;
                        case NSzero:
                                state = NSzerofract;
                                continue;
                        case NSdigit:
                                state = NSfract;
                                continue;
                        }
                        break;
                }
                if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
                        switch(state) {
                        case NSdigit:
                        case NSfract:
                                state = NSexp;
                                continue;
                        }
                        break;
                }
                break;
        }

        switch(state) {
        /*
         * result is zero
         */
        case NSstart:
        case NSsign:
        case NSzero:
        case NSzerofract:
        case NSpoint:
                kp = k->key + k->klen;
                k->klen += 2;
                kp[0] = 0x20;   /* between + and - */
                kp[1] = 0;
                return;
        /*
         * result has exponent
         */
        case NSexpsign:
        case NSexp:
        case NSexpdigit:
                if(expsign)
                        exp = -exp;
                dp += exp;

        /*
         * result is fixed point number
         */
        case NSdigit:
        case NSfract:
                kp -= nzero;
                cl -= nzero;
                break;
        }

        /*
         * end of number
         */
        c = 0;
        if(rflag)
                c = ~c;
        *kp = c;

        /*
         * sign and exponent
         */
        c = 0x30;
        if(rflag) {
                c = 0x10;
                dp = ~dp;
        }
        kp = k->key + k->klen;
        kp[0] = c;
        kp[1] = (dp >> 8);
        kp[2] = dp;
        k->klen = cl+1;
}

void
dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f)
{
        uchar *kp;
        Rune r, place[3];
        int c, cl, pc;
        int rflag;

        rflag = f->flags&Rflag;
        pc = 0;

        cl = k->klen;
        kp = k->key + cl;

        for(;;) {
                /*
                 * get the character
                 */
                if(lp >= lpe)
                        break;
                c = *lp;
                if(c >= Runeself) {
                        lp += chartorune(&r, (char*)lp);
                        c = r;
                } else
                        lp++;

                if(c < nelem(f->mapto)) {
                        c = f->mapto[c];
                        if(c == 0)
                                continue;
                }
                place[pc++] = c;
                if(pc < 3)
                        continue;
                for(c=11; c>=0; c--)
                        if(memcmp(month[c], place, sizeof(place)) == 0)
                                break;
                c += 10;
                if(rflag)
                        c = ~c;
                *kp++ = c;
                cl++;
                break;
        }

        c = 0;
        if(rflag)
                c = ~c;
        *kp = c;
        k->klen = cl+1;
}

void
dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f)
{
        uchar *kp;
        Rune r;
        int c, cl, n, rflag;

        cl = k->klen;
        kp = k->key + cl;
        rflag = f->flags & Rflag;

        for(;;) {
                /*
                 * get the character
                 */
                if(lp >= lpe)
                        break;
                c = *lp;
                if(c >= Runeself) {
                        lp += chartorune(&r, (char*)lp);
                        c = r;
                } else
                        lp++;

                /*
                 * do the various mappings.
                 * the common case is handled
                 * completely by the table.
                 */
                if(c != 0 && c < Runeself) {
                        c = f->mapto[c];
                        if(c) {
                                *kp++ = c;
                                cl++;
                        }
                        continue;
                }

                /*
                 * for characters out of range,
                 * the table does not do Rflag.
                 * ignore is based on mapto[nelem(f->mapto)-1]
                 */
                if(c != 0 && c < nelem(f->mapto)) {
                        c = f->mapto[c];
                        if(c == 0)
                                continue;
                } else {
                        if(f->mapto[nelem(f->mapto)-1] == 0)
                                continue;
                        /*
                         * consider building maps as necessary
                         */
                        if(f->flags & Fflag)
                                c = tolowerrune(tobaserune(c));
                        if(f->flags & Dflag && !isalpharune(c) &&
                            !isdigitrune(c) && !isspacerune(c))
                                continue;
                        if((f->flags & Wflag) && isspacerune(c))
                                continue;
                }

                /*
                 * put it in the key
                 */
                r = c;
                n = runetochar((char*)kp, &r);
                kp += n;
                cl += n;
                if(rflag)
                        while(n > 0) {
                                kp[-n] = ~kp[-n];
                                n--;
                        }
        }

        /*
         * end of key
         */
        k->klen = cl+1;
        if(rflag) {
                *kp = ~0;
                return;
        }
        *kp = 0;
}

void
dokey_r(Key *k, uchar *lp, uchar *lpe, Field*)
{
        int cl, n;
        uchar *kp;

        n = lpe - lp;
        if(n < 0)
                n = 0;
        cl = k->klen;
        kp = k->key + cl;
        k->klen = cl+n+1;

        lpe -= 3;
        while(lp < lpe) {
                kp[0] = ~lp[0];
                kp[1] = ~lp[1];
                kp[2] = ~lp[2];
                kp[3] = ~lp[3];
                kp += 4;
                lp += 4;
        }

        lpe += 3;
        while(lp < lpe)
                *kp++ = ~*lp++;
        *kp = ~0;
}

void
dokey_(Key *k, uchar *lp, uchar *lpe, Field*)
{
        int n, cl;
        uchar *kp;

        n = lpe - lp;
        if(n < 0)
                n = 0;
        cl = k->klen;
        kp = k->key + cl;
        k->klen = cl+n+1;
        memmove(kp, lp, n);
        kp[n] = 0;
}

void
buildkey(Line *l)
{
        Key *k;
        uchar *lp, *lpe;
        int ll, kl, cl, i, n;
        Field *f;

        ll = l->llen - 1;
        kl = 0;                 /* allocated length */
        cl = 0;                 /* current length */
        k = 0;

        for(i=1; i<=args.nfield; i++) {
                f = &args.field[i];
                lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
                if(lp == 0)
                        lp = l->line + ll;
                lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
                if(lpe == 0)
                        lpe = l->line + ll;
                n = (lpe - lp) + 1;
                if(n <= 0)
                        n = 1;
                if(cl+(n+4) > kl) {
                        kl = cl+(n+4);
                        k = realloc(k, sizeof(Key) +
                                (kl-1)*sizeof(k->key[0]));
                        if(k == 0)
                                nomem();
                }
                k->klen = cl;
                (*f->dokey)(k, lp, lpe, f);
                cl = k->klen;
        }

        /*
         * global comparisons
         */
        if(!(args.uflag && cl > 0)) {
                f = &args.field[0];
                if(cl+(ll+4) > kl) {
                        kl = cl+(ll+4);
                        k = realloc(k, sizeof(Key) +
                                (kl-1)*sizeof(k->key[0]));
                        if(k == 0)
                                nomem();
                }
                k->klen = cl;
                (*f->dokey)(k, l->line, l->line+ll, f);
                cl = k->klen;
        }

        l->key = k;
        k->klen = cl;

        if(args.vflag) {
                if(write(2, l->line, l->llen) != l->llen)
                        exits("write");
                for(i=0; i<k->klen; i++) {
                        fprint(2, " %.2x", k->key[i]);
                        if(k->key[i] == 0x00 || k->key[i] == 0xff)
                                fprint(2, "\n");
                }
        }
}

void
makemapm(Field *f)
{
        int i, c;

        for(i=0; i<nelem(f->mapto); i++) {
                c = 1;
                if(i == ' ' || i == '\t')
                        c = 0;
                if(i >= 'a' && i <= 'z')
                        c = i + ('A' - 'a');
                if(i >= 'A' && i <= 'Z')
                        c = i;
                f->mapto[i] = c;
                if(args.vflag) {
                        if((i & 15) == 0)
                                fprint(2, "     ");
                        fprint(2, " %.2x", c);
                        if((i & 15) == 15)
                                fprint(2, "\n");
                }
        }
}

void
makemapd(Field *f)
{
        int i, c;

        for(i=0; i<nelem(f->mapto); i++) {
                c = i;
                if(f->flags & Iflag)
                        if(c < 040 || c > 0176)
                                c = -1;
                if((f->flags & Wflag) && c >= 0)
                        if(c == ' ' || c == '\t')
                                c = -1;
                if((f->flags & Dflag) && c >= 0)
                        if(!(c == ' ' || c == '\t' ||
                            (c >= 'a' && c <= 'z') ||
                            (c >= 'A' && c <= 'Z') ||
                            (c >= '0' && c <= '9'))){
                                if(!isupperrune(c = toupperrune(c)))
                                        c = -1;
                        }
                if((f->flags & Fflag) && c >= 0)
                        c = toupperrune(tobaserune(c));
                if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
                        c = ~c & 0xff;
                if(c < 0)
                        c = 0;
                f->mapto[i] = c;
                if(args.vflag) {
                        if((i & 15) == 0)
                                fprint(2, "     ");
                        fprint(2, " %.2x", c);
                        if((i & 15) == 15)
                                fprint(2, "\n");
                }
        }
}

Rune*   month[12] =
{
        L"JAN",
        L"FEB",
        L"MAR",
        L"APR",
        L"MAY",
        L"JUN",
        L"JUL",
        L"AUG",
        L"SEP",
        L"OCT",
        L"NOV",
        L"DEC",
};

/************** radix sort ***********/

enum
{
        Threshold       = 14,
};

void    rsort4(Key***, ulong, int);
void    bsort4(Key***, ulong, int);

void
sort4(void *a, ulong n)
{
        if(n > Threshold)
                rsort4((Key***)a, n, 0);
        else
                bsort4((Key***)a, n, 0);
}

void
rsort4(Key ***a, ulong n, int b)
{
        Key ***ea, ***t, ***u, **t1, **u1, *k;
        Key ***part[257];
        static long count[257];
        long clist[257+257], *cp, *cp1;
        int c, lowc, higc;

        /*
         * pass 1 over all keys,
         * count the number of each key[b].
         * find low count and high count.
         */
        lowc = 256;
        higc = 0;
        ea = a+n;
        for(t=a; t<ea; t++) {
                k = **t;
                n = k->klen;
                if(b >= n) {
                        count[256]++;
                        continue;
                }
                c = k->key[b];
                n = count[c]++;
                if(n == 0) {
                        if(c < lowc)
                                lowc = c;
                        if(c > higc)
                                higc = c;
                }
        }

        /*
         * pass 2 over all counts,
         * put partition pointers in part[c].
         * save compacted indexes and counts
         * in clist[].
         */
        t = a;
        n = count[256];
        clist[0] = n;
        part[256] = t;
        t += n;

        cp1 = clist+1;
        cp = count+lowc;
        for(c=lowc; c<=higc; c++,cp++) {
                n = *cp;
                if(n) {
                        cp1[0] = n;
                        cp1[1] = c;
                        cp1 += 2;
                        part[c] = t;
                        t += n;
                }
        }
        *cp1 = 0;

        /*
         * pass 3 over all counts.
         * chase lowest pointer in each partition
         * around a permutation until it comes
         * back and is stored where it started.
         * static array, count[], should be
         * reduced to zero entries except maybe
         * count[256].
         */
        for(cp1=clist+1; cp1[0]; cp1+=2) {
                c = cp1[1];
                cp = count+c;
                while(*cp) {
                        t1 = *part[c];
                        for(;;) {
                                k = *t1;
                                n = 256;
                                if(b < k->klen)
                                        n = k->key[b];
                                u = part[n]++;
                                count[n]--;
                                u1 = *u;
                                *u = t1;
                                if(n == c)
                                        break;
                                t1 = u1;
                        }
                }
        }

        /*
         * pass 4 over all partitions.
         * call recursively.
         */
        b++;
        t = a + clist[0];
        count[256] = 0;
        for(cp1=clist+1; n=cp1[0]; cp1+=2) {
                if(n > Threshold)
                        rsort4(t, n, b);
                else
                if(n > 1)
                        bsort4(t, n, b);
                t += n;
        }
}

/*
 * bubble sort to pick up
 * the pieces.
 */
void
bsort4(Key ***a, ulong n, int b)
{
        Key ***i, ***j, ***k, ***l, **t;
        Key *ka, *kb;
        int n1, n2;

        l = a+n;
        j = a;

loop:
        i = j;
        j++;
        if(j >= l)
                return;

        ka = **i;
        kb = **j;
        n1 = ka->klen - b;
        n2 = kb->klen - b;
        if(n1 > n2)
                n1 = n2;
        if(n1 <= 0)
                goto loop;
        n2 = ka->key[b] - kb->key[b];
        if(n2 == 0)
                n2 = memcmp(ka->key+b, kb->key+b, n1);
        if(n2 <= 0)
                goto loop;

        for(;;) {
                k = i+1;

                t = *k;
                *k = *i;
                *i = t;

                if(i <= a)
                        goto loop;

                i--;
                ka = **i;
                kb = *t;
                n1 = ka->klen - b;
                n2 = kb->klen - b;
                if(n1 > n2)
                        n1 = n2;
                if(n1 <= 0)
                        goto loop;
                n2 = ka->key[b] - kb->key[b];
                if(n2 == 0)
                        n2 = memcmp(ka->key+b, kb->key+b, n1);
                if(n2 <= 0)
                        goto loop;
        }
}