Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

/*
 * Check and fix an arena partition.
 *
 * This is a lot grittier than the rest of Venti because
 * it can't just give up if a byte here or there is wrong.
 *
 * The rule here (hopefully followed!) is that block corruption
 * only ever has a local effect -- there are no blocks that you
 * can wipe out that will cause large portions of 
 * uncorrupted data blocks to be useless.
 */

#include "stdinc.h"
#include "dat.h"
#include "fns.h"
#include "whack.h"

#define ROUNDUP(x,n)            (((x)+(n)-1)&~((n)-1))

#pragma varargck type "z" uvlong
#pragma varargck type "z" vlong
#pragma varargck type "t" uint

enum
{
        K = 1024,
        M = 1024*1024,
        G = 1024*1024*1024,
        
        Block = 4096,
};

int debugsha1;

int verbose;
Part *part;
char *file;
char *basename;
char *dumpbase;
int fix;
int badreads;
int unseal;
uchar zero[MaxDiskBlock];

Arena lastarena;
ArenaPart ap;
uvlong arenasize;
int nbadread;
int nbad;
uvlong partend;
void checkarena(vlong, int);

void
usage(void)
{
        fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
        threadexitsall(0);
}

/*
 * Format number in simplest way that is okay with unittoull.
 */
static int
zfmt(Fmt *fmt)
{
        vlong x;
        
        x = va_arg(fmt->args, vlong);
        if(x == 0)
                return fmtstrcpy(fmt, "0");
        if(x%G == 0)
                return fmtprint(fmt, "%lldG", x/G);
        if(x%M == 0)
                return fmtprint(fmt, "%lldM", x/M);
        if(x%K == 0)
                return fmtprint(fmt, "%lldK", x/K);
        return fmtprint(fmt, "%lld", x);
}

/*
 * Format time like ctime without newline.
 */
static int
tfmt(Fmt *fmt)
{
        uint t;
        char buf[30];
        
        t = va_arg(fmt->args, uint);
        strcpy(buf, ctime(t));
        buf[28] = 0;
        return fmtstrcpy(fmt, buf);
}

/*
 * Coalesce messages about unreadable sectors into larger ranges.
 * bad(0, 0) flushes the buffer.
 */
static void
bad(char *msg, vlong o, int len)
{
        static vlong lb0, lb1;
        static char *lmsg;

        if(msg == nil)
                msg = lmsg;
        if(o == -1){
                lmsg = nil;
                lb0 = 0;
                lb1 = 0;
                return;
        }
        if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
                if(lb0 != lb1)
                        print("%s %#llux+%#llux (%,lld+%,lld)\n",
                                lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
                lb0 = o;
        }
        lmsg = msg;
        lb1 = o+len;
}

/*
 * Read in the len bytes of data at the offset.  If can't for whatever reason,
 * fill it with garbage but print an error.
 */
static uchar*
readdisk(uchar *buf, vlong offset, int len)
{
        int i, j, k, n;

        if(offset >= partend){
                memset(buf, 0xFB, len);
                return buf;
        }
        
        if(offset+len > partend){
                memset(buf, 0xFB, len);
                len = partend - offset;
        }

        if(readpart(part, offset, buf, len) >= 0)
                return buf;
        
        /*
         * The read failed.  Clear the buffer to nonsense, and
         * then try reading in smaller pieces.  If that fails,
         * read in even smaller pieces.  And so on down to sectors.
         */
        memset(buf, 0xFD, len);
        for(i=0; i<len; i+=64*K){
                n = 64*K;
                if(i+n > len)
                        n = len-i;
                if(readpart(part, offset+i, buf+i, n) >= 0)
                        continue;
                for(j=i; j<len && j<i+64*K; j+=4*K){
                        n = 4*K;
                        if(j+n > len)
                                n = len-j;
                        if(readpart(part, offset+j, buf+j, n) >= 0)
                                continue;
                        for(k=j; k<len && k<j+4*K; k+=512){
                                if(readpart(part, offset+k, buf+k, 512) >= 0)
                                        continue;
                                bad("disk read failed at", k, 512);
                                badreads++;
                        }
                }
        }
        bad(nil, 0, 0);
        return buf;
}

/*
 * Buffer to support running SHA1 hash of the disk.
 */
typedef struct Shabuf Shabuf;
struct Shabuf
{
        int fd;
        vlong offset;
        DigestState state;
        int rollback;
        vlong r0;
        DigestState *hist;
        int nhist;
};

void
sbdebug(Shabuf *sb, char *file)
{
        int fd;
        
        if(sb->fd > 0){
                close(sb->fd);
                sb->fd = 0;
        }
        if((fd = create(file, OWRITE, 0666)) < 0)
                return;
        if(fd == 0){
                fd = dup(fd, -1);
                close(0);
        }
        sb->fd = fd;
}

void
sbupdate(Shabuf *sb, uchar *p, vlong offset, int len)
{
        int n, x;
        vlong o;

        if(sb->rollback && !sb->hist){
                sb->r0 = offset;
                sb->nhist = 1;
                sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
                memset(sb->hist, 0, sizeof sb->hist[0]);
        }
        if(sb->r0 == 0)
                sb->r0 = offset;

        if(sb->offset < offset || sb->offset >= offset+len){
                if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
                        p, offset, len, sb->offset);
                return;
        }
        x = sb->offset - offset;
        if(0) print("sbupdate %p %#llux+%d skip %d\n",
                sb, offset, len, x);
        if(x){
                p += x;
                offset += x;
                len -= x;
        }
        assert(sb->offset == offset);
        
        if(sb->fd > 0)
                pwrite(sb->fd, p, len, offset - sb->r0);

        if(!sb->rollback){
                sha1(p, len, nil, &sb->state);
                sb->offset += len;
                return;
        }
        
        /* save state every 4M so we can roll back quickly */
        o = offset - sb->r0;
        while(len > 0){
                n = 4*M - o%(4*M);
                if(n > len)
                        n = len;
                sha1(p, n, nil, &sb->state);
                sb->offset += n;
                o += n;
                p += n;
                len -= n;
                if(o%(4*M) == 0){
                        x = o/(4*M);
                        if(x >= sb->nhist){
                                if(x != sb->nhist)
                                        print("oops! x=%d nhist=%d\n", x, sb->nhist);
                                sb->nhist += 32;
                                sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist);
                        }
                        sb->hist[x] = sb->state;
                }
        }               
}

void
sbdiskhash(Shabuf *sb, vlong eoffset)
{
        static uchar dbuf[4*M];
        int n;
        
        while(sb->offset < eoffset){
                n = sizeof dbuf;
                if(sb->offset+n > eoffset)
                        n = eoffset - sb->offset;
                readdisk(dbuf, sb->offset, n);
                sbupdate(sb, dbuf, sb->offset, n);
        }
}

void
sbrollback(Shabuf *sb, vlong offset)
{
        int x;
        vlong o;
        Dir d;
        
        if(!sb->rollback || !sb->r0){
                print("cannot rollback sha\n");
                return;
        }
        if(offset >= sb->offset)
                return;
        o = offset - sb->r0;
        x = o/(4*M);
        if(x >= sb->nhist){
                print("cannot rollback sha\n");
                return;
        }
        sb->state = sb->hist[x];
        sb->offset = sb->r0 + x*4*M;
        assert(sb->offset <= offset);
        
        if(sb->fd > 0){
                nulldir(&d);
                d.length = sb->offset - sb->r0;
                dirfwstat(sb->fd, &d);
        }
}

void
sbscore(Shabuf *sb, uchar *score)
{
        if(sb->hist){
                free(sb->hist);
                sb->hist = nil;
        }
        sha1(nil, 0, score, &sb->state);
}

/*
 * If we're fixing arenas, then editing this memory edits the disk!
 * It will be written back out as new data is paged in. 
 */
uchar buf[4*M];
uchar sbuf[4*M];
vlong bufoffset;
int buflen;

static void pageout(void);
static uchar*
pagein(vlong offset, int len)
{
        pageout();
        if(offset >= partend){
                memset(buf, 0xFB, sizeof buf);
                return buf;
        }
        
        if(offset+len > partend){
                memset(buf, 0xFB, sizeof buf);
                len = partend - offset;
        }
        bufoffset = offset;
        buflen = len;
        readdisk(buf, offset, len);
        memmove(sbuf, buf, len);
        return buf;
}

static void
pageout(void)
{
        if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
                buflen = 0;
                return;
        }
        if(writepart(part, bufoffset, buf, buflen) < 0)
                print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
                        bufoffset, buflen, bufoffset, buflen);
        buflen = 0;
}

static void
zerorange(vlong offset, int len)
{
        int i;
        vlong ooff;
        int olen;
        enum { MinBlock = 4*K, MaxBlock = 8*K };
        
        if(0)
        if(bufoffset <= offset && offset+len <= bufoffset+buflen){
                memset(buf+(offset-bufoffset), 0, len);
                return;
        }
        
        ooff = bufoffset;
        olen = buflen;
        
        i = offset%MinBlock;
        if(i+len < MaxBlock){
                pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
                memset(buf+i, 0, len);
        }else{
                pagein(offset-i, MaxBlock);
                memset(buf+i, 0, MaxBlock-i);
                offset += MaxBlock-i;
                len -= MaxBlock-i;
                while(len >= MaxBlock){
                        pagein(offset, MaxBlock);
                        memset(buf, 0, MaxBlock);
                        offset += MaxBlock;
                        len -= MaxBlock;
                }
                pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
                memset(buf, 0, len);
        }
        pagein(ooff, olen);
}

/*
 * read/write integers
 *
static void
p16(uchar *p, u16int u)
{
        p[0] = (u>>8) & 0xFF;
        p[1] = u & 0xFF;
}
*/

static u16int
u16(uchar *p)
{
        return (p[0]<<8)|p[1];
}

static void
p32(uchar *p, u32int u)
{
        p[0] = (u>>24) & 0xFF;
        p[1] = (u>>16) & 0xFF;
        p[2] = (u>>8) & 0xFF;
        p[3] = u & 0xFF;
}

static u32int
u32(uchar *p)
{
        return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
}

/*
static void
p64(uchar *p, u64int u)
{
        p32(p, u>>32);
        p32(p, u);
}
*/

static u64int
u64(uchar *p)
{
        return ((u64int)u32(p)<<32) | u32(p+4);
}

static int
vlongcmp(const void *va, const void *vb)
{
        vlong a, b;
        
        a = *(vlong*)va;
        b = *(vlong*)vb;
        if(a < b)
                return -1;
        if(b > a)
                return 1;
        return 0;
}

/* D and S are in draw.h */
#define D VD
#define S VS

enum
{
        D = 0x10000,
        Z = 0x20000,
        S = 0x30000,
        T = 0x40000,
        N = 0xFFFF
};
typedef struct Info Info;
struct Info
{
        int len;
        char *name;
};

Info partinfo[] = {
        4,      "magic",
        D|4,    "version",
        Z|4,    "blocksize",
        4,      "arenabase",
        0
};

Info headinfo4[] = {
        4,      "magic",
        D|4,    "version",
        S|ANameSize,    "name",
        Z|4,    "blocksize",
        Z|8,    "size",
        0
};

Info headinfo5[] = {
        4,      "magic",
        D|4,    "version",
        S|ANameSize,    "name",
        Z|4,    "blocksize",
        Z|8,    "size",
        4,      "clumpmagic",
        0
};

Info tailinfo4[] = {
        4,      "magic",
        D|4,    "version",
        S|ANameSize,    "name",
        D|4,    "clumps",
        D|4,    "cclumps",
        T|4,    "ctime",
        T|4,    "wtime",
        D|8,    "used",
        D|8,    "uncsize",
        1,      "sealed",
        0
};
        
Info tailinfo4a[] = {
        /* tailinfo 4 */
        4,      "magic",
        D|4,    "version",
        S|ANameSize,    "name",
        D|4,    "clumps",
        D|4,    "cclumps",
        T|4,    "ctime",
        T|4,    "wtime",
        D|8,    "used",
        D|8,    "uncsize",
        1,      "sealed",

        /* mem stats */
        1,      "extension",
        D|4,    "mem.clumps",
        D|4,    "mem.cclumps",
        D|8,    "mem.used",
        D|8,    "mem.uncsize",
        1,      "mem.sealed",
        0
};
        
Info tailinfo5[] = {
        4,      "magic",
        D|4,    "version",
        S|ANameSize,    "name",
        D|4,    "clumps",
        D|4,    "cclumps",
        T|4,    "ctime",
        T|4,    "wtime",
        4,      "clumpmagic",
        D|8,    "used",
        D|8,    "uncsize",
        1,      "sealed",
        0
};

Info tailinfo5a[] = {
        /* tailinfo 5 */
        4,      "magic",
        D|4,    "version",
        S|ANameSize,    "name",
        D|4,    "clumps",
        D|4,    "cclumps",
        T|4,    "ctime",
        T|4,    "wtime",
        4,      "clumpmagic",
        D|8,    "used",
        D|8,    "uncsize",
        1,      "sealed",

        /* mem stats */
        1,      "extension",
        D|4,    "mem.clumps",
        D|4,    "mem.cclumps",
        D|8,    "mem.used",
        D|8,    "mem.uncsize",
        1,      "mem.sealed",
        0
};
        
void
showdiffs(uchar *want, uchar *have, int len, Info *info)
{
        int n;
        
        while(len > 0 && (n=info->len&N) > 0){
                if(memcmp(have, want, n) != 0){
                        switch(info->len){
                        case 1:
                                print("\t%s: correct=%d disk=%d\n",
                                        info->name, *want, *have);
                                break;
                        case 4:
                                print("\t%s: correct=%#ux disk=%#ux\n",
                                        info->name, u32(want), u32(have));
                                break;
                        case D|4:
                                print("\t%s: correct=%,ud disk=%,ud\n",
                                        info->name, u32(want), u32(have));
                                break;
                        case T|4:
                                print("\t%s: correct=%t\n\t\tdisk=%t\n",
                                        info->name, u32(want), u32(have));
                                break;
                        case Z|4:
                                print("\t%s: correct=%z disk=%z\n",
                                        info->name, (uvlong)u32(want), (uvlong)u32(have));
                                break;
                        case D|8:
                                print("\t%s: correct=%,lld disk=%,lld\n",
                                        info->name, u64(want), u64(have));
                                break;
                        case Z|8:
                                print("\t%s: correct=%z disk=%z\n",
                                        info->name, u64(want), u64(have));
                                break;
                        case S|ANameSize:
                                print("\t%s: correct=%s disk=%.*s\n",
                                        info->name, (char*)want, 
                                        utfnlen((char*)have, ANameSize-1),
                                        (char*)have);
                                break;
                        default:
                                print("\t%s: correct=%.*H disk=%.*H\n",
                                        info->name, n, want, n, have);
                                break;
                        }
                }
                have += n;
                want += n;
                len -= n;
                info++;
        }
        if(len > 0 && memcmp(have, want, len) != 0){
                if(memcmp(want, zero, len) != 0)
                        print("!!\textra want data in showdiffs (bug in fixarenas)\n");
                else
                        print("\tnon-zero data on disk after structure\n");
                if(verbose > 1){
                        print("want: %.*H\n", len, want);
                        print("have: %.*H\n", len, have);
                }
        }
}

/*
 * Does part begin with an arena?
 */
int
isonearena(void)
{
        return u32(pagein(0, Block)) == ArenaHeadMagic;
}

static int tabsizes[] = { 16*1024, 64*1024, 512*1024, 768*1024, };
/*
 * Poke around on the disk to guess what the ArenaPart numbers are.
 */
void
guessgeometry(void)
{
        int i, j, n, bestn, ndiff, nhead, ntail;
        uchar *p, *ep, *sp;
        u64int diff[100], head[20], tail[20];
        u64int offset, bestdiff;
        
        ap.version = ArenaPartVersion;

        if(arenasize == 0 || ap.blocksize == 0){
                /*
                 * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
                 * Instead, look for the individual arena headers and tails, which there
                 * are many of, and once we've seen enough, infer the spacing.
                 *
                 * Of course, nothing in the file format requires that arenas be evenly
                 * spaced, but fmtarenas always does that for us.
                 */
                nhead = 0;
                ntail = 0;
                for(offset=PartBlank; offset<partend; offset+=4*M){
                        p = pagein(offset, 4*M);
                        for(sp=p, ep=p+4*M; p<ep; p+=K){
                                if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
                                        if(verbose)
                                                print("arena head at %#llx\n", offset+(p-sp));
                                        head[nhead++] = offset+(p-sp);
                                }
                                if(u32(p) == ArenaMagic && ntail < nelem(tail)){
                                        tail[ntail++] = offset+(p-sp);
                                        if(verbose)
                                                print("arena tail at %#llx\n", offset+(p-sp));
                                }
                        }
                        if(nhead == nelem(head) && ntail == nelem(tail))
                                break;
                }
                if(nhead < 3 && ntail < 3)
                        sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
        
                /* 
                 * Arena size is likely the most common
                 * inter-head or inter-tail spacing.
                 */
                ndiff = 0;
                for(i=1; i<nhead; i++)
                        diff[ndiff++] = head[i] - head[i-1];
                for(i=1; i<ntail; i++)
                        diff[ndiff++] = tail[i] - tail[i-1];
                qsort(diff, ndiff, sizeof diff[0], vlongcmp);
                bestn = 0;
                bestdiff = 0;
                for(i=1, n=1; i<=ndiff; i++, n++){
                        if(i==ndiff || diff[i] != diff[i-1]){
                                if(n > bestn){
                                        bestn = n;
                                        bestdiff = diff[i-1];
                                }
                                n = 0;
                        }
                }
                print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
                if(arenasize != 0 && arenasize != bestdiff)
                        print("using user-specified size %z instead\n", arenasize);
                else
                        arenasize = bestdiff;

                /*
                 * The arena tail for an arena is arenasize-blocksize from the head.
                 */
                ndiff = 0;
                for(i=j=0; i<nhead && j<ntail; ){
                        if(tail[j] < head[i]){
                                j++;
                                continue;
                        }
                        if(tail[j] < head[i]+arenasize){
                                diff[ndiff++] = head[i]+arenasize - tail[j];
                                j++;
                                continue;
                        }
                        i++;
                }
                if(ndiff < 3)
                        sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
                qsort(diff, ndiff, sizeof diff[0], vlongcmp);
                bestn = 0;
                bestdiff = 0;
                for(i=1, n=1; i<=ndiff; i++, n++){
                        if(i==ndiff || diff[i] != diff[i-1]){
                                if(n > bestn){
                                        bestn = n;
                                        bestdiff = diff[i-1];
                                }
                                n = 0;
                        }
                }
                print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
                if(ap.blocksize != 0 && ap.blocksize != bestdiff)
                        print("using user-specified size %z instead\n", (vlong)ap.blocksize);
                else
                        ap.blocksize = bestdiff;
                if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
                        sysfatal("block size not a power of two");
                if(ap.blocksize > MaxDiskBlock)
                        sysfatal("block size too big (max=%d)", MaxDiskBlock);

                /*
                 * Use head/tail information to deduce arena base.
                 */
                ndiff = 0;
                for(i=0; i<nhead; i++)
                        diff[ndiff++] = head[i]%arenasize;
                for(i=0; i<ntail; i++)
                        diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
                qsort(diff, ndiff, sizeof diff[0], vlongcmp);
                bestn = 0;
                bestdiff = 0;
                for(i=1, n=1; i<=ndiff; i++, n++){
                        if(i==ndiff || diff[i] != diff[i-1]){
                                if(n > bestn){
                                        bestn = n;
                                        bestdiff = diff[i-1];
                                }
                                n = 0;
                        }
                }
                ap.arenabase = bestdiff;
        }

        ap.tabbase = ROUNDUP(PartBlank+HeadSize, ap.blocksize);
        /*
         * XXX pick up table, check arenabase.
         * XXX pick up table, record base name.
         */

        /*
         * Somewhat standard computation.
         * Fmtarenas used to use 64k tab, now uses 512k tab.
         */
        if(ap.arenabase == 0){
                print("trying standard arena bases...\n");
                for(i=0; i<nelem(tabsizes); i++){
                        ap.arenabase = ROUNDUP(PartBlank+HeadSize+tabsizes[i], ap.blocksize);
                        p = pagein(ap.arenabase, Block);
                        if(u32(p) == ArenaHeadMagic)
                                break;
                }
        }
        p = pagein(ap.arenabase, Block);
        print("arena base likely %z%s\n", (vlong)ap.arenabase, 
                u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");

        ap.tabsize = ap.arenabase - ap.tabbase;
}

/*
 * Check the arena partition blocks and then the arenas listed in range.
 */
void
checkarenas(char *range)
{
        char *s, *t;
        int i, lo, hi, narena;
        uchar dbuf[HeadSize];
        uchar *p;

        guessgeometry();

        partend -= partend%ap.blocksize;

        memset(dbuf, 0, sizeof dbuf);
        packarenapart(&ap, dbuf);
        p = pagein(PartBlank, Block);
        if(memcmp(p, dbuf, HeadSize) != 0){
                print("on-disk arena part superblock incorrect\n");
                showdiffs(dbuf, p, HeadSize, partinfo);
        }
        memmove(p, dbuf, HeadSize);

        narena = (partend-ap.arenabase + arenasize-1)/arenasize;
        if(range == nil){
                for(i=0; i<narena; i++)
                        checkarena(ap.arenabase+(vlong)i*arenasize, i);
        }else if(strcmp(range, "none") == 0){
                /* nothing */
        }else{
                /* parse, e.g., -4,8-9,10- */
                for(s=range; *s; s=t){
                        t = strchr(s, ',');
                        if(t)
                                *t++ = 0;
                        else
                                t = s+strlen(s);
                        if(*s == '-')
                                lo = 0;
                        else
                                lo = strtol(s, &s, 0);
                        hi = lo;
                        if(*s == '-'){
                                s++;
                                if(*s == 0)
                                        hi = narena-1;
                                else
                                        hi = strtol(s, &s, 0);
                        }
                        if(*s != 0){
                                print("bad arena range: %s\n", s);
                                continue;
                        }
                        for(i=lo; i<=hi; i++)
                                checkarena(ap.arenabase+(vlong)i*arenasize, i);
                }
        }
}

/*
 * Is there a clump here at p?
 */
static int
isclump(uchar *p, Clump *cl, u32int *pmagic)
{
        int n;
        u32int magic;
        uchar score[VtScoreSize], *bp;
        Unwhack uw;
        uchar ubuf[70*1024];
        
        bp = p;
        magic = u32(p);
        if(magic == 0)
                return 0;
        p += U32Size;

        cl->info.type = vtfromdisktype(*p);
        if(cl->info.type == 0xFF)
                return 0;
        p++;
        cl->info.size = u16(p);
        p += U16Size;
        cl->info.uncsize = u16(p);
        if(cl->info.size > cl->info.uncsize)
                return 0;
        p += U16Size;
        scorecp(cl->info.score, p);
        p += VtScoreSize;
        cl->encoding = *p;
        p++;
        cl->creator = u32(p);
        p += U32Size;
        cl->time = u32(p);
        p += U32Size;

        switch(cl->encoding){
        case ClumpENone:
                if(cl->info.size != cl->info.uncsize)
                        return 0;
                scoremem(score, p, cl->info.size);
                if(scorecmp(score, cl->info.score) != 0)
                        return 0;
                break;
        case ClumpECompress:
                if(cl->info.size >= cl->info.uncsize)
                        return 0;
                unwhackinit(&uw);
                n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
                if(n != cl->info.uncsize)
                        return 0;
                scoremem(score, ubuf, cl->info.uncsize);
                if(scorecmp(score, cl->info.score) != 0)
                        return 0;
                break;
        default:
                return 0;
        }
        p += cl->info.size;
        
        /* it all worked out in the end */
        *pmagic = magic;
        return p - bp;
}

/*
 * All ClumpInfos seen in this arena.
 * Kept in binary tree so we can look up by score.
 */
typedef struct Cit Cit;
struct Cit
{
        int left;
        int right;
        vlong corrupt;
        ClumpInfo ci;
};
Cit *cibuf;
int ciroot;
int ncibuf, mcibuf;

void
resetcibuf(void)
{
        ncibuf = 0;
        ciroot = -1;
}

int*
ltreewalk(int *p, uchar *score)
{
        int i;
        
        for(;;){
                if(*p == -1)
                        return p;
                i = scorecmp(cibuf[*p].ci.score, score);
                if(i == 0)
                        return p;
                if(i < 0)
                        p = &cibuf[*p].right;
                else
                        p = &cibuf[*p].left;
        }
}

void
addcibuf(ClumpInfo *ci, vlong corrupt)
{
        Cit *cit;
        
        if(ncibuf == mcibuf){
                mcibuf += 131072;
                cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
        }
        cit = &cibuf[ncibuf];
        cit->ci = *ci;
        cit->left = -1;
        cit->right = -1;
        cit->corrupt = corrupt;
        if(!corrupt)
                *ltreewalk(&ciroot, ci->score) = ncibuf;
        ncibuf++;
}

void
addcicorrupt(vlong len)
{
        static ClumpInfo zci;
        
        addcibuf(&zci, len);
}

int
haveclump(uchar *score)
{
        int i;
        int p;
        
        p = ciroot;
        for(;;){
                if(p == -1)
                        return 0;
                i = scorecmp(cibuf[p].ci.score, score);
                if(i == 0)
                        return 1;
                if(i < 0)
                        p = cibuf[p].right;
                else
                        p = cibuf[p].left;
        }
}

int
matchci(ClumpInfo *ci, uchar *p)
{
        if(ci->type != vtfromdisktype(p[0]))
                return 0;
        if(ci->size != u16(p+1))
                return 0;
        if(ci->uncsize != u16(p+3))
                return 0;
        if(scorecmp(ci->score, p+5) != 0)
                return 0;
        return 1;
}

int
sealedarena(uchar *p, int blocksize)
{
        int v, n;
        
        v = u32(p+4);
        switch(v){
        default:
                return 0;
        case ArenaVersion4:
                n = ArenaSize4;
                break;
        case ArenaVersion5:
                n = ArenaSize5;
                break;
        }
        if(p[n-1] != 1){
                print("arena tail says not sealed\n");
                return 0;
        }
        if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
                print("arena tail followed by non-zero data\n");
                return 0;
        }
        if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
                print("arena score zero\n");
                return 0;
        }
        return 1;
}

int
okayname(char *name, int n)
{
        char buf[20];
        
        if(nameok(name) < 0)
                return 0;
        sprint(buf, "%d", n);
        if(n == 0)
                buf[0] = 0;
        if(strlen(name) < strlen(buf) 
        || strcmp(name+strlen(name)-strlen(buf), buf) != 0)
                return 0;
        return 1;
}

int
clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
{
        if(a->type != b->type)
                return a->type - b->type;
        if(a->size != b->size)
                return a->size - b->size;
        if(a->uncsize != b->uncsize)
                return a->uncsize - b->uncsize;
        return scorecmp(a->score, b->score);
}

ClumpInfo*
loadci(vlong offset, Arena *arena, int nci)
{
        int i, j, per;
        uchar *p, *sp;
        ClumpInfo *bci, *ci;
        
        per = arena->blocksize/ClumpInfoSize;
        bci = vtmalloc(nci*sizeof bci[0]);
        ci = bci;
        offset += arena->size - arena->blocksize;
        p = sp = nil;
        for(i=0; i<nci; i+=per){
                if(p == sp){
                        sp = pagein(offset-4*M, 4*M);
                        p = sp+4*M;
                }
                p -= arena->blocksize;
                offset -= arena->blocksize;
                for(j=0; j<per && i+j<nci; j++)
                        unpackclumpinfo(ci++, p+j*ClumpInfoSize);
        }
        return bci;
}

vlong
writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
{
        int i, j, per;
        uchar *p, *sp;
        
        per = arena->blocksize/ClumpInfoSize;
        offset += arena->size - arena->blocksize;
        p = sp = nil;
        for(i=0; i<nci; i+=per){
                if(p == sp){
                        sp = pagein(offset-4*M, 4*M);
                        p = sp+4*M;
                }
                p -= arena->blocksize;
                offset -= arena->blocksize;
                memset(p, 0, arena->blocksize);
                for(j=0; j<per && i+j<nci; j++)
                        packclumpinfo(ci++, p+j*ClumpInfoSize);
        }
        pageout();
        return offset;
}

void
loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
{
        char dname[ANameSize];
        static char lastbase[ANameSize];
        uchar *p;
        Arena oarena;
        ArenaHead ohead;

        /*
         * Fmtarenas makes all arenas the same size
         * except the last, which may be smaller.
         * It uses the same block size for arenas as for
         * the arena partition blocks.
         */
        arena->size = arenasize;
        if(offset0+arena->size > partend)
                arena->size = partend - offset0;
        head->size = arena->size;
        
        arena->blocksize = ap.blocksize;
        head->blocksize = arena->blocksize;
        
        /* 
         * Look for clump magic and name in head/tail blocks.
         * All the other info we will reconstruct just in case.
         */
        p = pagein(offset0, arena->blocksize);
        memset(&ohead, 0, sizeof ohead);
        if(unpackarenahead(&ohead, p) >= 0){
                head->version = ohead.version;
                head->clumpmagic = ohead.clumpmagic;
                if(okayname(ohead.name, anum))
                        strcpy(head->name, ohead.name);
        }

        p = pagein(offset0+arena->size-arena->blocksize, 
                arena->blocksize);
        memset(&oarena, 0, sizeof oarena);
        if(unpackarena(&oarena, p) >= 0){
                arena->version = oarena.version;
                arena->clumpmagic = oarena.clumpmagic;
                if(okayname(oarena.name, anum))
                        strcpy(arena->name, oarena.name);
                arena->diskstats.clumps = oarena.diskstats.clumps;
print("old arena: sealed=%d\n", oarena.diskstats.sealed);
                arena->diskstats.sealed = oarena.diskstats.sealed;
        }

        /* Head trumps arena. */
        if(head->version){
                arena->version = head->version;
                arena->clumpmagic = head->clumpmagic;
        }
        if(arena->version == 0)
                arena->version = ArenaVersion5;
        if(basename){
                if(anum == -1)
                        snprint(arena->name, ANameSize, "%s", basename);
                else
                        snprint(arena->name, ANameSize, "%s%d", basename, anum);
        }else if(lastbase[0])
                snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
        else if(head->name[0])
                strcpy(arena->name, head->name);
        else if(arena->name[0] == 0)
                sysfatal("cannot determine base name for arena; use -n");
        strcpy(lastbase, arena->name);
        sprint(dname, "%d", anum);
        lastbase[strlen(lastbase)-strlen(dname)] = 0;
        
        /* Was working in arena, now copy to head. */
        head->version = arena->version;
        memmove(head->name, arena->name, sizeof head->name);
        head->blocksize = arena->blocksize;
        head->size = arena->size;
}

void
shahead(Shabuf *sb, vlong offset0, ArenaHead *head)
{
        uchar headbuf[MaxDiskBlock];
        
        sb->offset = offset0;
        memset(headbuf, 0, sizeof headbuf);
        packarenahead(head, headbuf);
        sbupdate(sb, headbuf, offset0, head->blocksize);
}

u32int
newclumpmagic(int version)
{
        u32int m;
        
        if(version == ArenaVersion4)
                return _ClumpMagic;
        do{
                m = fastrand();
        }while(m==0 || m == _ClumpMagic);
        return m;
}

/*
 * Poke around in the arena to find the clump data
 * and compute the relevant statistics.
 */
void
guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
        uchar *oldscore, uchar *score)
{
        uchar dbuf[MaxDiskBlock];
        int needtozero, clumps, nb1, nb2, minclumps;
        int inbad, n, ncib, printed, sealing, smart;
        u32int magic;
        uchar *sp, *ep, *p;
        vlong boffset, eoffset, lastclumpend, leaked;
        vlong offset, toffset, totalcorrupt, v;
        Clump cl;
        ClumpInfo *bci, *ci, *eci, *xci;
        Cit *bcit, *cit, *ecit;
        Shabuf oldsha, newsha;
        
        /*
         * We expect to find an arena, with data, between offset
         * and offset+arenasize.  With any luck, the data starts at
         * offset+ap.blocksize.  The blocks have variable size and
         * aren't padded at all, which doesn't give us any alignment
         * constraints.  The blocks are compressed or high entropy,
         * but the headers are pretty low entropy (except the score):
         *
         *      type[1] (range 0 thru 9, 13)
         *      size[2]
         *      uncsize[2] (<= size)
         *
         * so we can look for these.  We check the scores as we go,
         * so we can't make any wrong turns.  If we find ourselves
         * in a dead end, scan forward looking for a new start.
         */

        resetcibuf();
        memset(head, 0, sizeof *head);
        memset(arena, 0, sizeof *arena);
        memset(oldscore, 0, VtScoreSize);
        memset(score, 0, VtScoreSize);
        memset(&oldsha, 0, sizeof oldsha);
        memset(&newsha, 0, sizeof newsha);
        newsha.rollback = 1;

        if(0){
                sbdebug(&oldsha, "old.sha");
                sbdebug(&newsha, "new.sha");
        }

        loadarenabasics(offset0, anum, head, arena);

        /* start the clump hunt */
        
        clumps = 0;
        totalcorrupt = 0;
        sealing = 1;
        boffset = offset0 + arena->blocksize;
        offset = boffset;
        eoffset = offset0+arena->size - arena->blocksize;
        toffset = eoffset;
        sp = pagein(offset0, 4*M);

        if(arena->diskstats.sealed){
                oldsha.offset = offset0;
                sbupdate(&oldsha, sp, offset0, 4*M);
        }
        ep = sp+4*M;
        p = sp + (boffset - offset0);
        ncib = arena->blocksize / ClumpInfoSize;        /* ci per block in index */
        lastclumpend = offset;
        nbad = 0;
        inbad = 0;
        needtozero = 0;
        minclumps = 0;
        while(offset < eoffset){
                /*
                 * Shift buffer if we're running out of room.
                 */
                if(p+70*K >= ep){
                        /*
                         * Start the post SHA1 buffer.   By now we should know the
                         * clumpmagic and arena version, so we can create a
                         * correct head block to get things going.
                         */
                        if(sealing && fix && newsha.offset == 0){
                                newsha.offset = offset0;
                                if(arena->clumpmagic == 0){
                                        if(arena->version == 0)
                                                arena->version = ArenaVersion5;
                                        arena->clumpmagic = newclumpmagic(arena->version);
                                }
                                head->clumpmagic = arena->clumpmagic;
                                shahead(&newsha, offset0, head);
                        }
                        n = 4*M-256*K;
                        if(sealing && fix){
                                sbdiskhash(&newsha, bufoffset);
                                sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
                        }
                        pagein(bufoffset+n, 4*M);
                        p -= n;
                        if(arena->diskstats.sealed)
                                sbupdate(&oldsha, buf, bufoffset, 4*M);
                }

                /*
                 * Check for a clump at p, which is at offset in the disk.
                 * Duplicate clumps happen in corrupted disks
                 * (the same pattern gets written many times in a row)
                 * and should never happen during regular use.
                 */
                magic = 0;
                if((n = isclump(p, &cl, &magic)) > 0){
                        /*
                         * If we were in the middle of some corrupted data,
                         * flush a warning about it and then add any clump
                         * info blocks as necessary.
                         */
                        if(inbad){
                                inbad = 0;
                                v = offset-lastclumpend;
                                if(needtozero){
                                        zerorange(lastclumpend, v);
                                        sbrollback(&newsha, lastclumpend);
                                        print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
                                                lastclumpend, v, v);
                                }
                                addcicorrupt(v);
                                totalcorrupt += v;
                                nb1 = (minclumps+ncib-1)/ncib;
                                minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
                                nb2 = (minclumps+ncib-1)/ncib;
                                eoffset -= (nb2-nb1)*arena->blocksize;
                        }

                        if(haveclump(cl.info.score))
                                print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n);

                        /*
                         * If clumps use different magic numbers, we don't care.
                         * We'll just use the first one we find and make the others
                         * follow suit.
                         */
                        if(arena->clumpmagic == 0){
                                print("clump type %d size %d score %V magic %x\n",
                                        cl.info.type, cl.info.size, cl.info.score, magic);
                                arena->clumpmagic = magic;
                                if(magic == _ClumpMagic)
                                        arena->version = ArenaVersion4;
                                else
                                        arena->version = ArenaVersion5;
                        }
                        if(magic != arena->clumpmagic)
                                p32(p, arena->clumpmagic);
                        if(clumps == 0)
                                arena->ctime = cl.time;

                        /*
                         * Record the clump, update arena stats,
                         * grow clump info blocks if needed.
                         */
                        if(verbose > 1)
                                print("\tclump %d: %d %V at %#llux+%#ux (%d)\n", 
                                        clumps, cl.info.type, cl.info.score, offset, n, n);
                        addcibuf(&cl.info, 0);
                        if(minclumps%ncib == 0)
                                eoffset -= arena->blocksize;
                        minclumps++;
                        clumps++;
                        if(cl.encoding != ClumpENone)
                                arena->diskstats.cclumps++;
                        arena->diskstats.uncsize += cl.info.uncsize;
                        arena->wtime = cl.time;
                        
                        /*
                         * Move to next clump.
                         */
                        offset += n;
                        p += n;
                        lastclumpend = offset;
                }else{
                        /*
                         * Overwrite malformed clump data with zeros later.
                         * For now, just record whether it needs to be overwritten.
                         * Bad regions must be of size at least ClumpSize.
                         * Postponing the overwriting keeps us from writing past
                         * the end of the arena data (which might be directory data)
                         * with zeros.
                         */
                        if(!inbad){
                                inbad = 1;
                                needtozero = 0;
                                if(memcmp(p, zero, ClumpSize) != 0)
                                        needtozero = 1;
                                p += ClumpSize;
                                offset += ClumpSize;
                                nbad++;
                        }else{
                                if(*p != 0)
                                        needtozero = 1;
                                p++;
                                offset++;
                        }
                }
        }
        pageout();

        if(verbose)
                print("readable clumps: %d; min. directory entries: %d\n", 
                        clumps, minclumps);
        arena->diskstats.used = lastclumpend - boffset;
        leaked = eoffset - lastclumpend;
        if(verbose)
                print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
                        boffset, lastclumpend, arena->diskstats.used, leaked);

        /*
         * Finish the SHA1 of the old data.
         */
        if(arena->diskstats.sealed){
                sbdiskhash(&oldsha, toffset);
                readdisk(dbuf, toffset, arena->blocksize);
                scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
                sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
                sbscore(&oldsha, oldscore);
        }
        
        /*
         * If we still don't know the clump magic, the arena
         * must be empty.  It still needs a value, so make 
         * something up.
         */
        if(arena->version == 0)
                arena->version = ArenaVersion5;
        if(arena->clumpmagic == 0){
                if(arena->version == ArenaVersion4)
                        arena->clumpmagic = _ClumpMagic;
                else{
                        do
                                arena->clumpmagic = fastrand();
                        while(arena->clumpmagic==_ClumpMagic
                                ||arena->clumpmagic==0);
                }
                head->clumpmagic = arena->clumpmagic;
        }

        /*
         * Guess at number of clumpinfo blocks to load.
         * If we guess high, it's no big deal.  If we guess low,
         * we'll be forced into rewriting the whole directory.
         * Still not such a big deal.
         */
        if(clumps == 0 || arena->diskstats.used == totalcorrupt)
                goto Nocib;
        if(clumps < arena->diskstats.clumps)
                clumps = arena->diskstats.clumps;
        if(clumps < ncibuf)
                clumps = ncibuf;
        clumps += totalcorrupt/
                ((arena->diskstats.used - totalcorrupt)/clumps);
        clumps += totalcorrupt/2000;
        if(clumps < minclumps)
                clumps = minclumps;
        clumps += ncib-1;
        clumps -= clumps%ncib;

        /*
         * Can't write into the actual data.
         */
        v = offset0 + arena->size - arena->blocksize;
        v -= (clumps+ncib-1)/ncib * arena->blocksize;
        if(v < lastclumpend){
                v = offset0 + arena->size - arena->blocksize;
                clumps = (v-lastclumpend)/arena->blocksize * ncib;
        }
        
        if(clumps < minclumps)
                print("cannot happen?\n");

        /*
         * Check clumpinfo blocks against directory we created.
         * The tricky part is handling the corrupt sections of arena.
         * If possible, we remark just the affected directory entries
         * rather than slide everything down.
         * 
         * Allocate clumps+1 blocks and check that we don't need
         * the last one at the end.
         */
        bci = loadci(offset0, arena, clumps+1);
        eci = bci+clumps+1;
        bcit = cibuf;
        ecit = cibuf+ncibuf;
        
        smart = 0;      /* Somehow the smart code doesn't do corrupt clumps right. */
Again:
        nbad = 0;
        ci = bci;
        for(cit=bcit; cit<ecit && ci<eci; cit++){
                if(cit->corrupt){
                        vlong n, m;
                        if(smart){
                                /*
                                 * If we can, just mark existing entries as corrupt.
                                 */
                                n = cit->corrupt;
                                for(xci=ci; n>0 && xci<eci; xci++)
                                        n -= ClumpSize+xci->size;
                                if(n > 0 || xci >= eci)
                                        goto Dumb;
                                printed = 0;
                                for(; ci<xci; ci++){
                                        if(verbose && ci->type != VtCorruptType){
                                                if(!printed){
                                                        print("marking directory %d-%d as corrupt\n",
                                                                (int)(ci-bci), (int)(xci-bci));
                                                        printed = 1;
                                                }
                                                print("\ttype=%d size=%d uncsize=%d score=%V\n",
                                                        ci->type, ci->size, ci->uncsize, ci->score);
                                        }
                                        ci->type = VtCorruptType;
                                }
                        }else{
                        Dumb:
                                print("\trewriting clump directory\n");
                                /*
                                 * Otherwise, blaze a new trail.
                                 */
                                n = cit->corrupt;
                                while(n > 0 && ci < eci){
                                        if(n < ClumpSize)
                                                sysfatal("bad math in clump corrupt");
                                        if(n <= VtMaxLumpSize+ClumpSize)
                                                m = n;
                                        else{
                                                m = VtMaxLumpSize+ClumpSize;
                                                if(n-m < ClumpSize)
                                                        m -= ClumpSize;
                                        }
                                        ci->type = VtCorruptType;
                                        ci->size = m-ClumpSize;
                                        ci->uncsize = m-ClumpSize;
                                        memset(ci->score, 0, VtScoreSize);
                                        ci++;
                                        n -= m;
                                }
                        }
                        continue;
                }
                if(clumpinfocmp(&cit->ci, ci) != 0){
                        if(verbose && (smart || verbose>1)){
                                print("clumpinfo %d\n", (int)(ci-bci));
                                print("\twant: %d %d %d %V\n", 
                                        cit->ci.type, cit->ci.size,
                                        cit->ci.uncsize, cit->ci.score);
                                print("\thave: %d %d %d %V\n", 
                                        ci->type, ci->size, 
                                        ci->uncsize, ci->score);
                        }
                        *ci = cit->ci;
                        nbad++;
                }
                ci++;
        }
        if(ci >= eci || cit < ecit){
                print("ran out of space editing existing directory; rewriting\n");
                print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
                assert(smart);  /* can't happen second time thru */
                smart = 0;
                goto Again;
        }
        
        assert(ci <= eci);
        arena->diskstats.clumps = ci-bci;
        eoffset = writeci(offset0, arena, bci, ci-bci);
        if(sealing && fix)
                sbrollback(&newsha, v);
print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal);
        if(lastclumpend > eoffset)
                print("arena directory overwrote blocks!  cannot happen!\n");
        free(bci);
        if(smart && nbad)
                print("arena directory has %d bad or missing entries\n", nbad);
Nocib:
        if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){
                if(arena->diskstats.sealed)
                        print("unsealing arena\n");
                sealing = 0;
                memset(oldscore, 0, VtScoreSize);
        }

        /*
         * Finish the SHA1 of the new data - only meaningful
         * if we've been writing to disk (`fix').
         */
        arena->diskstats.sealed = sealing;
        arena->memstats = arena->diskstats;
        if(sealing && fix){
                uchar tbuf[MaxDiskBlock];
                
                sbdiskhash(&newsha, toffset);
                memset(tbuf, 0, sizeof tbuf);
                packarena(arena, tbuf);
                sbupdate(&newsha, tbuf, toffset, arena->blocksize);
                sbscore(&newsha, score);
        }
}

void
dumparena(vlong offset, int anum, Arena *arena)
{
        char buf[1000];
        vlong o, e;
        int fd, n;
        
        snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
        if((fd = create(buf, OWRITE, 0666)) < 0){
                fprint(2, "create %s: %r\n", buf);
                return;
        }
        e = offset+arena->size;
        for(o=offset; o<e; o+=n){
                n = 4*M;
                if(o+n > e)
                        n = e-o;
                if(pwrite(fd, pagein(o, n), n, o-offset) != n){
                        fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
                        return;
                }
        }
}

void
checkarena(vlong offset, int anum)
{
        uchar dbuf[MaxDiskBlock];
        uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
        Arena arena, oarena;
        ArenaHead head;
        Info *fmt, *fmta;
        int sz;
        
        print("# arena %d: offset %#llux\n", anum, offset);

        if(offset >= partend){
                print("arena offset out of bounds\n");
                return;
        }

        guessarena(offset, anum, &head, &arena, oldscore, score);

        if(verbose){
                print("#\tversion=%d name=%s blocksize=%d size=%z",
                        head.version, head.name, head.blocksize, head.size);
                if(head.clumpmagic)
                        print(" clumpmagic=%#.8ux", head.clumpmagic);
                print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
                        arena.diskstats.clumps, arena.diskstats.cclumps,
                        arena.diskstats.used, arena.diskstats.uncsize);
                print("#\tctime=%t\n", arena.ctime);
                print("#\twtime=%t\n", arena.wtime);
                if(arena.diskstats.sealed)
                        print("#\tsealed score=%V\n", score);
        }

        if(dumpbase){
                dumparena(offset, anum, &arena);
                return;
        }

        memset(dbuf, 0, sizeof dbuf);
        packarenahead(&head, dbuf);
        p = pagein(offset, arena.blocksize);
        if(memcmp(dbuf, p, arena.blocksize) != 0){
                print("on-disk arena header incorrect\n");
                showdiffs(dbuf, p, arena.blocksize, 
                        arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
        }
        memmove(p, dbuf, arena.blocksize);
        
        memset(dbuf, 0, sizeof dbuf);
        packarena(&arena, dbuf);
        if(arena.diskstats.sealed)
                scorecp(dbuf+arena.blocksize-VtScoreSize, score);
        p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
        memset(&oarena, 0, sizeof oarena);
        unpackarena(&oarena, p);
        if(arena.version == ArenaVersion4){
                sz = ArenaSize4;
                fmt = tailinfo4;
                fmta = tailinfo4a;
        }else{
                sz = ArenaSize5;
                fmt = tailinfo5;
                fmta = tailinfo5a;
        }
        if(p[sz] == 1){
                fmt = fmta;
                if(oarena.diskstats.sealed){
                        /*
                         * some arenas were sealed with the extension
                         * before we adopted the convention that if it didn't
                         * add new information it gets dropped.
                         */
                        _packarena(&arena, dbuf, 1);
                }
        }
        if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
                print("on-disk arena tail incorrect\n");
                showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
        }
        if(arena.diskstats.sealed){
                if(oarena.diskstats.sealed)
                if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
                        print("on-disk arena seal score incorrect\n");
                        print("\tcorrect=%V\n", oldscore);
                        print("\t   disk=%V\n", p+arena.blocksize-VtScoreSize);
                }
                if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
                        print("%ssealing arena%s: %V\n", 
                                oarena.diskstats.sealed ? "re" : "",
                                scorecmp(oldscore, score) == 0 ? 
                                        "" : " after changes", score);
                }
        }
        memmove(p, dbuf, arena.blocksize);
        
        pageout();
}

AMapN*
buildamap(void)
{
        uchar *p;
        vlong o;
        ArenaHead h;
        AMapN *an;
        AMap *m;
        
        an = vtmallocz(sizeof *an);
        for(o=ap.arenabase; o<partend; o+=arenasize){
                p = pagein(o, Block);
                if(unpackarenahead(&h, p) >= 0){
                        an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]);
                        m = &an->map[an->n++];
                        m->start = o;
                        m->stop = o+h.size;
                        strcpy(m->name, h.name);
                }
        }
        return an;      
}

void
checkmap(void)
{
        char *s;
        uchar *p;
        int i, len;
        AMapN *an;
        Fmt fmt;
        
        an = buildamap();
        fmtstrinit(&fmt);
        fmtprint(&fmt, "%ud\n", an->n);
        for(i=0; i<an->n; i++)
                fmtprint(&fmt, "%s\t%lld\t%lld\n",
                        an->map[i].name, an->map[i].start, an->map[i].stop);
        s = fmtstrflush(&fmt);
        len = strlen(s);
        if(len > ap.tabsize){
                print("arena partition map too long: need %z bytes have %z\n",
                        (vlong)len, (vlong)ap.tabsize);
                len = ap.tabsize;
        }
        
        if(ap.tabsize >= 4*M){  /* can't happen - max arenas is 2000 */
                print("arena partition map *way* too long\n");
                return;
        }

        p = pagein(ap.tabbase, ap.tabsize);
        if(memcmp(p, s, len) != 0){
                print("arena partition map incorrect; rewriting.\n");
                memmove(p, s, len);
        }
        pageout();
}

int mainstacksize = 512*1024;

void
threadmain(int argc, char **argv)
{
        int mode;
        
        mode = OREAD;
        readonly = 1;   
        ARGBEGIN{
        case 'U':
                unseal = 1;
                break;
        case 'a':
                arenasize = unittoull(EARGF(usage()));
                break;
        case 'b':
                ap.blocksize = unittoull(EARGF(usage()));
                break;
        case 'f':
                fix = 1;
                mode = ORDWR;
                readonly = 0;
                break;
        case 'n':
                basename = EARGF(usage());
                break;
        case 'v':
                verbose++;
                break;
        case 'x':
                dumpbase = EARGF(usage());
                break;
        default:
                usage();
        }ARGEND
        
        if(argc != 1 && argc != 2)
                usage();

        file = argv[0];
        
        ventifmtinstall();
        fmtinstall('z', zfmt);
        fmtinstall('t', tfmt);
        quotefmtinstall();
        
        part = initpart(file, mode|ODIRECT);
        if(part == nil)
                sysfatal("can't open %s: %r", file);
        partend = part->size;
        
        if(isonearena()){
                checkarena(0, -1);
                threadexitsall(nil);
        }
        checkarenas(argc > 1 ? argv[1] : nil);
        checkmap();
        threadexitsall(nil);
}