Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_fixcpp/sys/src/cmd/venti/srv/fixarenas.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * Check and fix an arena partition.
3
 *
4
 * This is a lot grittier than the rest of Venti because
5
 * it can't just give up if a byte here or there is wrong.
6
 *
7
 * The rule here (hopefully followed!) is that block corruption
8
 * only ever has a local effect -- there are no blocks that you
9
 * can wipe out that will cause large portions of 
10
 * uncorrupted data blocks to be useless.
11
 */
12
 
13
#include "stdinc.h"
14
#include "dat.h"
15
#include "fns.h"
16
#include "whack.h"
17
 
18
#define ROUNDUP(x,n)		(((x)+(n)-1)&~((n)-1))
19
 
20
#pragma varargck type "z" uvlong
21
#pragma varargck type "z" vlong
22
#pragma varargck type "t" uint
23
 
24
enum
25
{
26
	K = 1024,
27
	M = 1024*1024,
28
	G = 1024*1024*1024,
29
 
30
	Block = 4096,
31
};
32
 
33
int debugsha1;
34
 
35
int verbose;
36
Part *part;
37
char *file;
38
char *basename;
39
char *dumpbase;
40
int fix;
41
int badreads;
42
int unseal;
43
uchar zero[MaxDiskBlock];
44
 
45
Arena lastarena;
46
ArenaPart ap;
47
uvlong arenasize;
48
int nbadread;
49
int nbad;
50
uvlong partend;
51
void checkarena(vlong, int);
52
 
53
void
54
usage(void)
55
{
56
	fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
57
	threadexitsall(0);
58
}
59
 
60
/*
61
 * Format number in simplest way that is okay with unittoull.
62
 */
63
static int
64
zfmt(Fmt *fmt)
65
{
66
	vlong x;
67
 
68
	x = va_arg(fmt->args, vlong);
69
	if(x == 0)
70
		return fmtstrcpy(fmt, "0");
71
	if(x%G == 0)
72
		return fmtprint(fmt, "%lldG", x/G);
73
	if(x%M == 0)
74
		return fmtprint(fmt, "%lldM", x/M);
75
	if(x%K == 0)
76
		return fmtprint(fmt, "%lldK", x/K);
77
	return fmtprint(fmt, "%lld", x);
78
}
79
 
80
/*
81
 * Format time like ctime without newline.
82
 */
83
static int
84
tfmt(Fmt *fmt)
85
{
86
	uint t;
87
	char buf[30];
88
 
89
	t = va_arg(fmt->args, uint);
90
	strcpy(buf, ctime(t));
91
	buf[28] = 0;
92
	return fmtstrcpy(fmt, buf);
93
}
94
 
95
/*
96
 * Coalesce messages about unreadable sectors into larger ranges.
97
 * bad(0, 0) flushes the buffer.
98
 */
99
static void
100
bad(char *msg, vlong o, int len)
101
{
102
	static vlong lb0, lb1;
103
	static char *lmsg;
104
 
105
	if(msg == nil)
106
		msg = lmsg;
107
	if(o == -1){
108
		lmsg = nil;
109
		lb0 = 0;
110
		lb1 = 0;
111
		return;
112
	}
113
	if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
114
		if(lb0 != lb1)
115
			print("%s %#llux+%#llux (%,lld+%,lld)\n",
116
				lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
117
		lb0 = o;
118
	}
119
	lmsg = msg;
120
	lb1 = o+len;
121
}
122
 
123
/*
124
 * Read in the len bytes of data at the offset.  If can't for whatever reason,
125
 * fill it with garbage but print an error.
126
 */
127
static uchar*
128
readdisk(uchar *buf, vlong offset, int len)
129
{
130
	int i, j, k, n;
131
 
132
	if(offset >= partend){
133
		memset(buf, 0xFB, len);
134
		return buf;
135
	}
136
 
137
	if(offset+len > partend){
138
		memset(buf, 0xFB, len);
139
		len = partend - offset;
140
	}
141
 
142
	if(readpart(part, offset, buf, len) >= 0)
143
		return buf;
144
 
145
	/*
146
	 * The read failed.  Clear the buffer to nonsense, and
147
	 * then try reading in smaller pieces.  If that fails,
148
	 * read in even smaller pieces.  And so on down to sectors.
149
	 */
150
	memset(buf, 0xFD, len);
151
	for(i=0; i<len; i+=64*K){
152
		n = 64*K;
153
		if(i+n > len)
154
			n = len-i;
155
		if(readpart(part, offset+i, buf+i, n) >= 0)
156
			continue;
157
		for(j=i; j<len && j<i+64*K; j+=4*K){
158
			n = 4*K;
159
			if(j+n > len)
160
				n = len-j;
161
			if(readpart(part, offset+j, buf+j, n) >= 0)
162
				continue;
163
			for(k=j; k<len && k<j+4*K; k+=512){
164
				if(readpart(part, offset+k, buf+k, 512) >= 0)
165
					continue;
166
				bad("disk read failed at", k, 512);
167
				badreads++;
168
			}
169
		}
170
	}
171
	bad(nil, 0, 0);
172
	return buf;
173
}
174
 
175
/*
176
 * Buffer to support running SHA1 hash of the disk.
177
 */
178
typedef struct Shabuf Shabuf;
179
struct Shabuf
180
{
181
	int fd;
182
	vlong offset;
183
	DigestState state;
184
	int rollback;
185
	vlong r0;
186
	DigestState *hist;
187
	int nhist;
188
};
189
 
190
void
191
sbdebug(Shabuf *sb, char *file)
192
{
193
	int fd;
194
 
195
	if(sb->fd > 0){
196
		close(sb->fd);
197
		sb->fd = 0;
198
	}
199
	if((fd = create(file, OWRITE, 0666)) < 0)
200
		return;
201
	if(fd == 0){
202
		fd = dup(fd, -1);
203
		close(0);
204
	}
205
	sb->fd = fd;
206
}
207
 
208
void
209
sbupdate(Shabuf *sb, uchar *p, vlong offset, int len)
210
{
211
	int n, x;
212
	vlong o;
213
 
214
	if(sb->rollback && !sb->hist){
215
		sb->r0 = offset;
216
		sb->nhist = 1;
217
		sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
218
		memset(sb->hist, 0, sizeof sb->hist[0]);
219
	}
220
	if(sb->r0 == 0)
221
		sb->r0 = offset;
222
 
223
	if(sb->offset < offset || sb->offset >= offset+len){
224
		if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
225
			p, offset, len, sb->offset);
226
		return;
227
	}
228
	x = sb->offset - offset;
229
	if(0) print("sbupdate %p %#llux+%d skip %d\n",
230
		sb, offset, len, x);
231
	if(x){
232
		p += x;
233
		offset += x;
234
		len -= x;
235
	}
236
	assert(sb->offset == offset);
237
 
238
	if(sb->fd > 0)
239
		pwrite(sb->fd, p, len, offset - sb->r0);
240
 
241
	if(!sb->rollback){
242
		sha1(p, len, nil, &sb->state);
243
		sb->offset += len;
244
		return;
245
	}
246
 
247
	/* save state every 4M so we can roll back quickly */
248
	o = offset - sb->r0;
249
	while(len > 0){
250
		n = 4*M - o%(4*M);
251
		if(n > len)
252
			n = len;
253
		sha1(p, n, nil, &sb->state);
254
		sb->offset += n;
255
		o += n;
256
		p += n;
257
		len -= n;
258
		if(o%(4*M) == 0){
259
			x = o/(4*M);
260
			if(x >= sb->nhist){
261
				if(x != sb->nhist)
262
					print("oops! x=%d nhist=%d\n", x, sb->nhist);
263
				sb->nhist += 32;
264
				sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist);
265
			}
266
			sb->hist[x] = sb->state;
267
		}
268
	}		
269
}
270
 
271
void
272
sbdiskhash(Shabuf *sb, vlong eoffset)
273
{
274
	static uchar dbuf[4*M];
275
	int n;
276
 
277
	while(sb->offset < eoffset){
278
		n = sizeof dbuf;
279
		if(sb->offset+n > eoffset)
280
			n = eoffset - sb->offset;
281
		readdisk(dbuf, sb->offset, n);
282
		sbupdate(sb, dbuf, sb->offset, n);
283
	}
284
}
285
 
286
void
287
sbrollback(Shabuf *sb, vlong offset)
288
{
289
	int x;
290
	vlong o;
291
	Dir d;
292
 
293
	if(!sb->rollback || !sb->r0){
294
		print("cannot rollback sha\n");
295
		return;
296
	}
297
	if(offset >= sb->offset)
298
		return;
299
	o = offset - sb->r0;
300
	x = o/(4*M);
301
	if(x >= sb->nhist){
302
		print("cannot rollback sha\n");
303
		return;
304
	}
305
	sb->state = sb->hist[x];
306
	sb->offset = sb->r0 + x*4*M;
307
	assert(sb->offset <= offset);
308
 
309
	if(sb->fd > 0){
310
		nulldir(&d);
311
		d.length = sb->offset - sb->r0;
312
		dirfwstat(sb->fd, &d);
313
	}
314
}
315
 
316
void
317
sbscore(Shabuf *sb, uchar *score)
318
{
319
	if(sb->hist){
320
		free(sb->hist);
321
		sb->hist = nil;
322
	}
323
	sha1(nil, 0, score, &sb->state);
324
}
325
 
326
/*
327
 * If we're fixing arenas, then editing this memory edits the disk!
328
 * It will be written back out as new data is paged in. 
329
 */
330
uchar buf[4*M];
331
uchar sbuf[4*M];
332
vlong bufoffset;
333
int buflen;
334
 
335
static void pageout(void);
336
static uchar*
337
pagein(vlong offset, int len)
338
{
339
	pageout();
340
	if(offset >= partend){
341
		memset(buf, 0xFB, sizeof buf);
342
		return buf;
343
	}
344
 
345
	if(offset+len > partend){
346
		memset(buf, 0xFB, sizeof buf);
347
		len = partend - offset;
348
	}
349
	bufoffset = offset;
350
	buflen = len;
351
	readdisk(buf, offset, len);
352
	memmove(sbuf, buf, len);
353
	return buf;
354
}
355
 
356
static void
357
pageout(void)
358
{
359
	if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
360
		buflen = 0;
361
		return;
362
	}
363
	if(writepart(part, bufoffset, buf, buflen) < 0)
364
		print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
365
			bufoffset, buflen, bufoffset, buflen);
366
	buflen = 0;
367
}
368
 
369
static void
370
zerorange(vlong offset, int len)
371
{
372
	int i;
373
	vlong ooff;
374
	int olen;
375
	enum { MinBlock = 4*K, MaxBlock = 8*K };
376
 
377
	if(0)
378
	if(bufoffset <= offset && offset+len <= bufoffset+buflen){
379
		memset(buf+(offset-bufoffset), 0, len);
380
		return;
381
	}
382
 
383
	ooff = bufoffset;
384
	olen = buflen;
385
 
386
	i = offset%MinBlock;
387
	if(i+len < MaxBlock){
388
		pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
389
		memset(buf+i, 0, len);
390
	}else{
391
		pagein(offset-i, MaxBlock);
392
		memset(buf+i, 0, MaxBlock-i);
393
		offset += MaxBlock-i;
394
		len -= MaxBlock-i;
395
		while(len >= MaxBlock){
396
			pagein(offset, MaxBlock);
397
			memset(buf, 0, MaxBlock);
398
			offset += MaxBlock;
399
			len -= MaxBlock;
400
		}
401
		pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
402
		memset(buf, 0, len);
403
	}
404
	pagein(ooff, olen);
405
}
406
 
407
/*
408
 * read/write integers
409
 *
410
static void
411
p16(uchar *p, u16int u)
412
{
413
	p[0] = (u>>8) & 0xFF;
414
	p[1] = u & 0xFF;
415
}
416
*/
417
 
418
static u16int
419
u16(uchar *p)
420
{
421
	return (p[0]<<8)|p[1];
422
}
423
 
424
static void
425
p32(uchar *p, u32int u)
426
{
427
	p[0] = (u>>24) & 0xFF;
428
	p[1] = (u>>16) & 0xFF;
429
	p[2] = (u>>8) & 0xFF;
430
	p[3] = u & 0xFF;
431
}
432
 
433
static u32int
434
u32(uchar *p)
435
{
436
	return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
437
}
438
 
439
/*
440
static void
441
p64(uchar *p, u64int u)
442
{
443
	p32(p, u>>32);
444
	p32(p, u);
445
}
446
*/
447
 
448
static u64int
449
u64(uchar *p)
450
{
451
	return ((u64int)u32(p)<<32) | u32(p+4);
452
}
453
 
454
static int
455
vlongcmp(const void *va, const void *vb)
456
{
457
	vlong a, b;
458
 
459
	a = *(vlong*)va;
460
	b = *(vlong*)vb;
461
	if(a < b)
462
		return -1;
463
	if(b > a)
464
		return 1;
465
	return 0;
466
}
467
 
468
/* D and S are in draw.h */
469
#define D VD
470
#define S VS
471
 
472
enum
473
{
474
	D = 0x10000,
475
	Z = 0x20000,
476
	S = 0x30000,
477
	T = 0x40000,
478
	N = 0xFFFF
479
};
480
typedef struct Info Info;
481
struct Info
482
{
483
	int len;
484
	char *name;
485
};
486
 
487
Info partinfo[] = {
488
	4,	"magic",
489
	D|4,	"version",
490
	Z|4,	"blocksize",
491
	4,	"arenabase",
492
 
493
};
494
 
495
Info headinfo4[] = {
496
	4,	"magic",
497
	D|4,	"version",
498
	S|ANameSize,	"name",
499
	Z|4,	"blocksize",
500
	Z|8,	"size",
501
 
502
};
503
 
504
Info headinfo5[] = {
505
	4,	"magic",
506
	D|4,	"version",
507
	S|ANameSize,	"name",
508
	Z|4,	"blocksize",
509
	Z|8,	"size",
510
	4,	"clumpmagic",
511
 
512
};
513
 
514
Info tailinfo4[] = {
515
	4,	"magic",
516
	D|4,	"version",
517
	S|ANameSize,	"name",
518
	D|4,	"clumps",
519
	D|4,	"cclumps",
520
	T|4,	"ctime",
521
	T|4,	"wtime",
522
	D|8,	"used",
523
	D|8,	"uncsize",
524
	1,	"sealed",
525
 
526
};
527
 
528
Info tailinfo4a[] = {
529
	/* tailinfo 4 */
530
	4,	"magic",
531
	D|4,	"version",
532
	S|ANameSize,	"name",
533
	D|4,	"clumps",
534
	D|4,	"cclumps",
535
	T|4,	"ctime",
536
	T|4,	"wtime",
537
	D|8,	"used",
538
	D|8,	"uncsize",
539
	1,	"sealed",
540
 
541
	/* mem stats */
542
	1,	"extension",
543
	D|4,	"mem.clumps",
544
	D|4,	"mem.cclumps",
545
	D|8,	"mem.used",
546
	D|8,	"mem.uncsize",
547
	1,	"mem.sealed",
548
 
549
};
550
 
551
Info tailinfo5[] = {
552
	4,	"magic",
553
	D|4,	"version",
554
	S|ANameSize,	"name",
555
	D|4,	"clumps",
556
	D|4,	"cclumps",
557
	T|4,	"ctime",
558
	T|4,	"wtime",
559
	4,	"clumpmagic",
560
	D|8,	"used",
561
	D|8,	"uncsize",
562
	1,	"sealed",
563
 
564
};
565
 
566
Info tailinfo5a[] = {
567
	/* tailinfo 5 */
568
	4,	"magic",
569
	D|4,	"version",
570
	S|ANameSize,	"name",
571
	D|4,	"clumps",
572
	D|4,	"cclumps",
573
	T|4,	"ctime",
574
	T|4,	"wtime",
575
	4,	"clumpmagic",
576
	D|8,	"used",
577
	D|8,	"uncsize",
578
	1,	"sealed",
579
 
580
	/* mem stats */
581
	1,	"extension",
582
	D|4,	"mem.clumps",
583
	D|4,	"mem.cclumps",
584
	D|8,	"mem.used",
585
	D|8,	"mem.uncsize",
586
	1,	"mem.sealed",
587
 
588
};
589
 
590
void
591
showdiffs(uchar *want, uchar *have, int len, Info *info)
592
{
593
	int n;
594
 
595
	while(len > 0 && (n=info->len&N) > 0){
596
		if(memcmp(have, want, n) != 0){
597
			switch(info->len){
598
			case 1:
599
				print("\t%s: correct=%d disk=%d\n",
600
					info->name, *want, *have);
601
				break;
602
			case 4:
603
				print("\t%s: correct=%#ux disk=%#ux\n",
604
					info->name, u32(want), u32(have));
605
				break;
606
			case D|4:
607
				print("\t%s: correct=%,ud disk=%,ud\n",
608
					info->name, u32(want), u32(have));
609
				break;
610
			case T|4:
611
				print("\t%s: correct=%t\n\t\tdisk=%t\n",
612
					info->name, u32(want), u32(have));
613
				break;
614
			case Z|4:
615
				print("\t%s: correct=%z disk=%z\n",
616
					info->name, (uvlong)u32(want), (uvlong)u32(have));
617
				break;
618
			case D|8:
619
				print("\t%s: correct=%,lld disk=%,lld\n",
620
					info->name, u64(want), u64(have));
621
				break;
622
			case Z|8:
623
				print("\t%s: correct=%z disk=%z\n",
624
					info->name, u64(want), u64(have));
625
				break;
626
			case S|ANameSize:
627
				print("\t%s: correct=%s disk=%.*s\n",
628
					info->name, (char*)want, 
629
					utfnlen((char*)have, ANameSize-1),
630
					(char*)have);
631
				break;
632
			default:
633
				print("\t%s: correct=%.*H disk=%.*H\n",
634
					info->name, n, want, n, have);
635
				break;
636
			}
637
		}
638
		have += n;
639
		want += n;
640
		len -= n;
641
		info++;
642
	}
643
	if(len > 0 && memcmp(have, want, len) != 0){
644
		if(memcmp(want, zero, len) != 0)
645
			print("!!\textra want data in showdiffs (bug in fixarenas)\n");
646
		else
647
			print("\tnon-zero data on disk after structure\n");
648
		if(verbose > 1){
649
			print("want: %.*H\n", len, want);
650
			print("have: %.*H\n", len, have);
651
		}
652
	}
653
}
654
 
655
/*
656
 * Does part begin with an arena?
657
 */
658
int
659
isonearena(void)
660
{
661
	return u32(pagein(0, Block)) == ArenaHeadMagic;
662
}
663
 
664
static int tabsizes[] = { 16*1024, 64*1024, 512*1024, 768*1024, };
665
/*
666
 * Poke around on the disk to guess what the ArenaPart numbers are.
667
 */
668
void
669
guessgeometry(void)
670
{
671
	int i, j, n, bestn, ndiff, nhead, ntail;
672
	uchar *p, *ep, *sp;
673
	u64int diff[100], head[20], tail[20];
674
	u64int offset, bestdiff;
675
 
676
	ap.version = ArenaPartVersion;
677
 
678
	if(arenasize == 0 || ap.blocksize == 0){
679
		/*
680
		 * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
681
		 * Instead, look for the individual arena headers and tails, which there
682
		 * are many of, and once we've seen enough, infer the spacing.
683
		 *
684
		 * Of course, nothing in the file format requires that arenas be evenly
685
		 * spaced, but fmtarenas always does that for us.
686
		 */
687
		nhead = 0;
688
		ntail = 0;
689
		for(offset=PartBlank; offset<partend; offset+=4*M){
690
			p = pagein(offset, 4*M);
691
			for(sp=p, ep=p+4*M; p<ep; p+=K){
692
				if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
693
					if(verbose)
694
						print("arena head at %#llx\n", offset+(p-sp));
695
					head[nhead++] = offset+(p-sp);
696
				}
697
				if(u32(p) == ArenaMagic && ntail < nelem(tail)){
698
					tail[ntail++] = offset+(p-sp);
699
					if(verbose)
700
						print("arena tail at %#llx\n", offset+(p-sp));
701
				}
702
			}
703
			if(nhead == nelem(head) && ntail == nelem(tail))
704
				break;
705
		}
706
		if(nhead < 3 && ntail < 3)
707
			sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
708
 
709
		/* 
710
		 * Arena size is likely the most common
711
		 * inter-head or inter-tail spacing.
712
		 */
713
		ndiff = 0;
714
		for(i=1; i<nhead; i++)
715
			diff[ndiff++] = head[i] - head[i-1];
716
		for(i=1; i<ntail; i++)
717
			diff[ndiff++] = tail[i] - tail[i-1];
718
		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
719
		bestn = 0;
720
		bestdiff = 0;
721
		for(i=1, n=1; i<=ndiff; i++, n++){
722
			if(i==ndiff || diff[i] != diff[i-1]){
723
				if(n > bestn){
724
					bestn = n;
725
					bestdiff = diff[i-1];
726
				}
727
				n = 0;
728
			}
729
		}
730
		print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
731
		if(arenasize != 0 && arenasize != bestdiff)
732
			print("using user-specified size %z instead\n", arenasize);
733
		else
734
			arenasize = bestdiff;
735
 
736
		/*
737
		 * The arena tail for an arena is arenasize-blocksize from the head.
738
		 */
739
		ndiff = 0;
740
		for(i=j=0; i<nhead && j<ntail; ){
741
			if(tail[j] < head[i]){
742
				j++;
743
				continue;
744
			}
745
			if(tail[j] < head[i]+arenasize){
746
				diff[ndiff++] = head[i]+arenasize - tail[j];
747
				j++;
748
				continue;
749
			}
750
			i++;
751
		}
752
		if(ndiff < 3)
753
			sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
754
		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
755
		bestn = 0;
756
		bestdiff = 0;
757
		for(i=1, n=1; i<=ndiff; i++, n++){
758
			if(i==ndiff || diff[i] != diff[i-1]){
759
				if(n > bestn){
760
					bestn = n;
761
					bestdiff = diff[i-1];
762
				}
763
				n = 0;
764
			}
765
		}
766
		print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
767
		if(ap.blocksize != 0 && ap.blocksize != bestdiff)
768
			print("using user-specified size %z instead\n", (vlong)ap.blocksize);
769
		else
770
			ap.blocksize = bestdiff;
771
		if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
772
			sysfatal("block size not a power of two");
773
		if(ap.blocksize > MaxDiskBlock)
774
			sysfatal("block size too big (max=%d)", MaxDiskBlock);
775
 
776
		/*
777
		 * Use head/tail information to deduce arena base.
778
		 */
779
		ndiff = 0;
780
		for(i=0; i<nhead; i++)
781
			diff[ndiff++] = head[i]%arenasize;
782
		for(i=0; i<ntail; i++)
783
			diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
784
		qsort(diff, ndiff, sizeof diff[0], vlongcmp);
785
		bestn = 0;
786
		bestdiff = 0;
787
		for(i=1, n=1; i<=ndiff; i++, n++){
788
			if(i==ndiff || diff[i] != diff[i-1]){
789
				if(n > bestn){
790
					bestn = n;
791
					bestdiff = diff[i-1];
792
				}
793
				n = 0;
794
			}
795
		}
796
		ap.arenabase = bestdiff;
797
	}
798
 
799
	ap.tabbase = ROUNDUP(PartBlank+HeadSize, ap.blocksize);
800
	/*
801
	 * XXX pick up table, check arenabase.
802
	 * XXX pick up table, record base name.
803
	 */
804
 
805
	/*
806
	 * Somewhat standard computation.
807
	 * Fmtarenas used to use 64k tab, now uses 512k tab.
808
	 */
809
	if(ap.arenabase == 0){
810
		print("trying standard arena bases...\n");
811
		for(i=0; i<nelem(tabsizes); i++){
812
			ap.arenabase = ROUNDUP(PartBlank+HeadSize+tabsizes[i], ap.blocksize);
813
			p = pagein(ap.arenabase, Block);
814
			if(u32(p) == ArenaHeadMagic)
815
				break;
816
		}
817
	}
818
	p = pagein(ap.arenabase, Block);
819
	print("arena base likely %z%s\n", (vlong)ap.arenabase, 
820
		u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
821
 
822
	ap.tabsize = ap.arenabase - ap.tabbase;
823
}
824
 
825
/*
826
 * Check the arena partition blocks and then the arenas listed in range.
827
 */
828
void
829
checkarenas(char *range)
830
{
831
	char *s, *t;
832
	int i, lo, hi, narena;
833
	uchar dbuf[HeadSize];
834
	uchar *p;
835
 
836
	guessgeometry();
837
 
838
	partend -= partend%ap.blocksize;
839
 
840
	memset(dbuf, 0, sizeof dbuf);
841
	packarenapart(&ap, dbuf);
842
	p = pagein(PartBlank, Block);
843
	if(memcmp(p, dbuf, HeadSize) != 0){
844
		print("on-disk arena part superblock incorrect\n");
845
		showdiffs(dbuf, p, HeadSize, partinfo);
846
	}
847
	memmove(p, dbuf, HeadSize);
848
 
849
	narena = (partend-ap.arenabase + arenasize-1)/arenasize;
850
	if(range == nil){
851
		for(i=0; i<narena; i++)
852
			checkarena(ap.arenabase+(vlong)i*arenasize, i);
853
	}else if(strcmp(range, "none") == 0){
854
		/* nothing */
855
	}else{
856
		/* parse, e.g., -4,8-9,10- */
857
		for(s=range; *s; s=t){
858
			t = strchr(s, ',');
859
			if(t)
860
				*t++ = 0;
861
			else
862
				t = s+strlen(s);
863
			if(*s == '-')
864
				lo = 0;
865
			else
866
				lo = strtol(s, &s, 0);
867
			hi = lo;
868
			if(*s == '-'){
869
				s++;
870
				if(*s == 0)
871
					hi = narena-1;
872
				else
873
					hi = strtol(s, &s, 0);
874
			}
875
			if(*s != 0){
876
				print("bad arena range: %s\n", s);
877
				continue;
878
			}
879
			for(i=lo; i<=hi; i++)
880
				checkarena(ap.arenabase+(vlong)i*arenasize, i);
881
		}
882
	}
883
}
884
 
885
/*
886
 * Is there a clump here at p?
887
 */
888
static int
889
isclump(uchar *p, Clump *cl, u32int *pmagic)
890
{
891
	int n;
892
	u32int magic;
893
	uchar score[VtScoreSize], *bp;
894
	Unwhack uw;
895
	uchar ubuf[70*1024];
896
 
897
	bp = p;
898
	magic = u32(p);
899
	if(magic == 0)
900
		return 0;
901
	p += U32Size;
902
 
903
	cl->info.type = vtfromdisktype(*p);
904
	if(cl->info.type == 0xFF)
905
		return 0;
906
	p++;
907
	cl->info.size = u16(p);
908
	p += U16Size;
909
	cl->info.uncsize = u16(p);
910
	if(cl->info.size > cl->info.uncsize)
911
		return 0;
912
	p += U16Size;
913
	scorecp(cl->info.score, p);
914
	p += VtScoreSize;
915
	cl->encoding = *p;
916
	p++;
917
	cl->creator = u32(p);
918
	p += U32Size;
919
	cl->time = u32(p);
920
	p += U32Size;
921
 
922
	switch(cl->encoding){
923
	case ClumpENone:
924
		if(cl->info.size != cl->info.uncsize)
925
			return 0;
926
		scoremem(score, p, cl->info.size);
927
		if(scorecmp(score, cl->info.score) != 0)
928
			return 0;
929
		break;
930
	case ClumpECompress:
931
		if(cl->info.size >= cl->info.uncsize)
932
			return 0;
933
		unwhackinit(&uw);
934
		n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
935
		if(n != cl->info.uncsize)
936
			return 0;
937
		scoremem(score, ubuf, cl->info.uncsize);
938
		if(scorecmp(score, cl->info.score) != 0)
939
			return 0;
940
		break;
941
	default:
942
		return 0;
943
	}
944
	p += cl->info.size;
945
 
946
	/* it all worked out in the end */
947
	*pmagic = magic;
948
	return p - bp;
949
}
950
 
951
/*
952
 * All ClumpInfos seen in this arena.
953
 * Kept in binary tree so we can look up by score.
954
 */
955
typedef struct Cit Cit;
956
struct Cit
957
{
958
	int left;
959
	int right;
960
	vlong corrupt;
961
	ClumpInfo ci;
962
};
963
Cit *cibuf;
964
int ciroot;
965
int ncibuf, mcibuf;
966
 
967
void
968
resetcibuf(void)
969
{
970
	ncibuf = 0;
971
	ciroot = -1;
972
}
973
 
974
int*
975
ltreewalk(int *p, uchar *score)
976
{
977
	int i;
978
 
979
	for(;;){
980
		if(*p == -1)
981
			return p;
982
		i = scorecmp(cibuf[*p].ci.score, score);
983
		if(i == 0)
984
			return p;
985
		if(i < 0)
986
			p = &cibuf[*p].right;
987
		else
988
			p = &cibuf[*p].left;
989
	}
990
}
991
 
992
void
993
addcibuf(ClumpInfo *ci, vlong corrupt)
994
{
995
	Cit *cit;
996
 
997
	if(ncibuf == mcibuf){
998
		mcibuf += 131072;
999
		cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
1000
	}
1001
	cit = &cibuf[ncibuf];
1002
	cit->ci = *ci;
1003
	cit->left = -1;
1004
	cit->right = -1;
1005
	cit->corrupt = corrupt;
1006
	if(!corrupt)
1007
		*ltreewalk(&ciroot, ci->score) = ncibuf;
1008
	ncibuf++;
1009
}
1010
 
1011
void
1012
addcicorrupt(vlong len)
1013
{
1014
	static ClumpInfo zci;
1015
 
1016
	addcibuf(&zci, len);
1017
}
1018
 
1019
int
1020
haveclump(uchar *score)
1021
{
1022
	int i;
1023
	int p;
1024
 
1025
	p = ciroot;
1026
	for(;;){
1027
		if(p == -1)
1028
			return 0;
1029
		i = scorecmp(cibuf[p].ci.score, score);
1030
		if(i == 0)
1031
			return 1;
1032
		if(i < 0)
1033
			p = cibuf[p].right;
1034
		else
1035
			p = cibuf[p].left;
1036
	}
1037
}
1038
 
1039
int
1040
matchci(ClumpInfo *ci, uchar *p)
1041
{
1042
	if(ci->type != vtfromdisktype(p[0]))
1043
		return 0;
1044
	if(ci->size != u16(p+1))
1045
		return 0;
1046
	if(ci->uncsize != u16(p+3))
1047
		return 0;
1048
	if(scorecmp(ci->score, p+5) != 0)
1049
		return 0;
1050
	return 1;
1051
}
1052
 
1053
int
1054
sealedarena(uchar *p, int blocksize)
1055
{
1056
	int v, n;
1057
 
1058
	v = u32(p+4);
1059
	switch(v){
1060
	default:
1061
		return 0;
1062
	case ArenaVersion4:
1063
		n = ArenaSize4;
1064
		break;
1065
	case ArenaVersion5:
1066
		n = ArenaSize5;
1067
		break;
1068
	}
1069
	if(p[n-1] != 1){
1070
		print("arena tail says not sealed\n");
1071
		return 0;
1072
	}
1073
	if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
1074
		print("arena tail followed by non-zero data\n");
1075
		return 0;
1076
	}
1077
	if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
1078
		print("arena score zero\n");
1079
		return 0;
1080
	}
1081
	return 1;
1082
}
1083
 
1084
int
1085
okayname(char *name, int n)
1086
{
1087
	char buf[20];
1088
 
1089
	if(nameok(name) < 0)
1090
		return 0;
1091
	sprint(buf, "%d", n);
1092
	if(n == 0)
1093
		buf[0] = 0;
1094
	if(strlen(name) < strlen(buf) 
1095
	|| strcmp(name+strlen(name)-strlen(buf), buf) != 0)
1096
		return 0;
1097
	return 1;
1098
}
1099
 
1100
int
1101
clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
1102
{
1103
	if(a->type != b->type)
1104
		return a->type - b->type;
1105
	if(a->size != b->size)
1106
		return a->size - b->size;
1107
	if(a->uncsize != b->uncsize)
1108
		return a->uncsize - b->uncsize;
1109
	return scorecmp(a->score, b->score);
1110
}
1111
 
1112
ClumpInfo*
1113
loadci(vlong offset, Arena *arena, int nci)
1114
{
1115
	int i, j, per;
1116
	uchar *p, *sp;
1117
	ClumpInfo *bci, *ci;
1118
 
1119
	per = arena->blocksize/ClumpInfoSize;
1120
	bci = vtmalloc(nci*sizeof bci[0]);
1121
	ci = bci;
1122
	offset += arena->size - arena->blocksize;
1123
	p = sp = nil;
1124
	for(i=0; i<nci; i+=per){
1125
		if(p == sp){
1126
			sp = pagein(offset-4*M, 4*M);
1127
			p = sp+4*M;
1128
		}
1129
		p -= arena->blocksize;
1130
		offset -= arena->blocksize;
1131
		for(j=0; j<per && i+j<nci; j++)
1132
			unpackclumpinfo(ci++, p+j*ClumpInfoSize);
1133
	}
1134
	return bci;
1135
}
1136
 
1137
vlong
1138
writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
1139
{
1140
	int i, j, per;
1141
	uchar *p, *sp;
1142
 
1143
	per = arena->blocksize/ClumpInfoSize;
1144
	offset += arena->size - arena->blocksize;
1145
	p = sp = nil;
1146
	for(i=0; i<nci; i+=per){
1147
		if(p == sp){
1148
			sp = pagein(offset-4*M, 4*M);
1149
			p = sp+4*M;
1150
		}
1151
		p -= arena->blocksize;
1152
		offset -= arena->blocksize;
1153
		memset(p, 0, arena->blocksize);
1154
		for(j=0; j<per && i+j<nci; j++)
1155
			packclumpinfo(ci++, p+j*ClumpInfoSize);
1156
	}
1157
	pageout();
1158
	return offset;
1159
}
1160
 
1161
void
1162
loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
1163
{
1164
	char dname[ANameSize];
1165
	static char lastbase[ANameSize];
1166
	uchar *p;
1167
	Arena oarena;
1168
	ArenaHead ohead;
1169
 
1170
	/*
1171
	 * Fmtarenas makes all arenas the same size
1172
	 * except the last, which may be smaller.
1173
	 * It uses the same block size for arenas as for
1174
	 * the arena partition blocks.
1175
	 */
1176
	arena->size = arenasize;
1177
	if(offset0+arena->size > partend)
1178
		arena->size = partend - offset0;
1179
	head->size = arena->size;
1180
 
1181
	arena->blocksize = ap.blocksize;
1182
	head->blocksize = arena->blocksize;
1183
 
1184
	/* 
1185
	 * Look for clump magic and name in head/tail blocks.
1186
	 * All the other info we will reconstruct just in case.
1187
	 */
1188
	p = pagein(offset0, arena->blocksize);
1189
	memset(&ohead, 0, sizeof ohead);
1190
	if(unpackarenahead(&ohead, p) >= 0){
1191
		head->version = ohead.version;
1192
		head->clumpmagic = ohead.clumpmagic;
1193
		if(okayname(ohead.name, anum))
1194
			strcpy(head->name, ohead.name);
1195
	}
1196
 
1197
	p = pagein(offset0+arena->size-arena->blocksize, 
1198
		arena->blocksize);
1199
	memset(&oarena, 0, sizeof oarena);
1200
	if(unpackarena(&oarena, p) >= 0){
1201
		arena->version = oarena.version;
1202
		arena->clumpmagic = oarena.clumpmagic;
1203
		if(okayname(oarena.name, anum))
1204
			strcpy(arena->name, oarena.name);
1205
		arena->diskstats.clumps = oarena.diskstats.clumps;
1206
print("old arena: sealed=%d\n", oarena.diskstats.sealed);
1207
		arena->diskstats.sealed = oarena.diskstats.sealed;
1208
	}
1209
 
1210
	/* Head trumps arena. */
1211
	if(head->version){
1212
		arena->version = head->version;
1213
		arena->clumpmagic = head->clumpmagic;
1214
	}
1215
	if(arena->version == 0)
1216
		arena->version = ArenaVersion5;
1217
	if(basename){
1218
		if(anum == -1)
1219
			snprint(arena->name, ANameSize, "%s", basename);
1220
		else
1221
			snprint(arena->name, ANameSize, "%s%d", basename, anum);
1222
	}else if(lastbase[0])
1223
		snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
1224
	else if(head->name[0])
1225
		strcpy(arena->name, head->name);
1226
	else if(arena->name[0] == 0)
1227
		sysfatal("cannot determine base name for arena; use -n");
1228
	strcpy(lastbase, arena->name);
1229
	sprint(dname, "%d", anum);
1230
	lastbase[strlen(lastbase)-strlen(dname)] = 0;
1231
 
1232
	/* Was working in arena, now copy to head. */
1233
	head->version = arena->version;
1234
	memmove(head->name, arena->name, sizeof head->name);
1235
	head->blocksize = arena->blocksize;
1236
	head->size = arena->size;
1237
}
1238
 
1239
void
1240
shahead(Shabuf *sb, vlong offset0, ArenaHead *head)
1241
{
1242
	uchar headbuf[MaxDiskBlock];
1243
 
1244
	sb->offset = offset0;
1245
	memset(headbuf, 0, sizeof headbuf);
1246
	packarenahead(head, headbuf);
1247
	sbupdate(sb, headbuf, offset0, head->blocksize);
1248
}
1249
 
1250
u32int
1251
newclumpmagic(int version)
1252
{
1253
	u32int m;
1254
 
1255
	if(version == ArenaVersion4)
1256
		return _ClumpMagic;
1257
	do{
1258
		m = fastrand();
1259
	}while(m==0 || m == _ClumpMagic);
1260
	return m;
1261
}
1262
 
1263
/*
1264
 * Poke around in the arena to find the clump data
1265
 * and compute the relevant statistics.
1266
 */
1267
void
1268
guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
1269
	uchar *oldscore, uchar *score)
1270
{
1271
	uchar dbuf[MaxDiskBlock];
1272
	int needtozero, clumps, nb1, nb2, minclumps;
1273
	int inbad, n, ncib, printed, sealing, smart;
1274
	u32int magic;
1275
	uchar *sp, *ep, *p;
1276
	vlong boffset, eoffset, lastclumpend, leaked;
1277
	vlong offset, toffset, totalcorrupt, v;
1278
	Clump cl;
1279
	ClumpInfo *bci, *ci, *eci, *xci;
1280
	Cit *bcit, *cit, *ecit;
1281
	Shabuf oldsha, newsha;
1282
 
1283
	/*
1284
	 * We expect to find an arena, with data, between offset
1285
	 * and offset+arenasize.  With any luck, the data starts at
1286
	 * offset+ap.blocksize.  The blocks have variable size and
1287
	 * aren't padded at all, which doesn't give us any alignment
1288
	 * constraints.  The blocks are compressed or high entropy,
1289
	 * but the headers are pretty low entropy (except the score):
1290
	 *
1291
	 *	type[1] (range 0 thru 9, 13)
1292
	 *	size[2]
1293
	 *	uncsize[2] (<= size)
1294
	 *
1295
	 * so we can look for these.  We check the scores as we go,
1296
	 * so we can't make any wrong turns.  If we find ourselves
1297
	 * in a dead end, scan forward looking for a new start.
1298
	 */
1299
 
1300
	resetcibuf();
1301
	memset(head, 0, sizeof *head);
1302
	memset(arena, 0, sizeof *arena);
1303
	memset(oldscore, 0, VtScoreSize);
1304
	memset(score, 0, VtScoreSize);
1305
	memset(&oldsha, 0, sizeof oldsha);
1306
	memset(&newsha, 0, sizeof newsha);
1307
	newsha.rollback = 1;
1308
 
1309
	if(0){
1310
		sbdebug(&oldsha, "old.sha");
1311
		sbdebug(&newsha, "new.sha");
1312
	}
1313
 
1314
	loadarenabasics(offset0, anum, head, arena);
1315
 
1316
	/* start the clump hunt */
1317
 
1318
	clumps = 0;
1319
	totalcorrupt = 0;
1320
	sealing = 1;
1321
	boffset = offset0 + arena->blocksize;
1322
	offset = boffset;
1323
	eoffset = offset0+arena->size - arena->blocksize;
1324
	toffset = eoffset;
1325
	sp = pagein(offset0, 4*M);
1326
 
1327
	if(arena->diskstats.sealed){
1328
		oldsha.offset = offset0;
1329
		sbupdate(&oldsha, sp, offset0, 4*M);
1330
	}
1331
	ep = sp+4*M;
1332
	p = sp + (boffset - offset0);
1333
	ncib = arena->blocksize / ClumpInfoSize;	/* ci per block in index */
1334
	lastclumpend = offset;
1335
	nbad = 0;
1336
	inbad = 0;
1337
	needtozero = 0;
1338
	minclumps = 0;
1339
	while(offset < eoffset){
1340
		/*
1341
		 * Shift buffer if we're running out of room.
1342
		 */
1343
		if(p+70*K >= ep){
1344
			/*
1345
			 * Start the post SHA1 buffer.   By now we should know the
1346
			 * clumpmagic and arena version, so we can create a
1347
			 * correct head block to get things going.
1348
			 */
1349
			if(sealing && fix && newsha.offset == 0){
1350
				newsha.offset = offset0;
1351
				if(arena->clumpmagic == 0){
1352
					if(arena->version == 0)
1353
						arena->version = ArenaVersion5;
1354
					arena->clumpmagic = newclumpmagic(arena->version);
1355
				}
1356
				head->clumpmagic = arena->clumpmagic;
1357
				shahead(&newsha, offset0, head);
1358
			}
1359
			n = 4*M-256*K;
1360
			if(sealing && fix){
1361
				sbdiskhash(&newsha, bufoffset);
1362
				sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
1363
			}
1364
			pagein(bufoffset+n, 4*M);
1365
			p -= n;
1366
			if(arena->diskstats.sealed)
1367
				sbupdate(&oldsha, buf, bufoffset, 4*M);
1368
		}
1369
 
1370
		/*
1371
		 * Check for a clump at p, which is at offset in the disk.
1372
		 * Duplicate clumps happen in corrupted disks
1373
		 * (the same pattern gets written many times in a row)
1374
		 * and should never happen during regular use.
1375
		 */
1376
		magic = 0;
1377
		if((n = isclump(p, &cl, &magic)) > 0){
1378
			/*
1379
			 * If we were in the middle of some corrupted data,
1380
			 * flush a warning about it and then add any clump
1381
			 * info blocks as necessary.
1382
			 */
1383
			if(inbad){
1384
				inbad = 0;
1385
				v = offset-lastclumpend;
1386
				if(needtozero){
1387
					zerorange(lastclumpend, v);
1388
					sbrollback(&newsha, lastclumpend);
1389
					print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
1390
						lastclumpend, v, v);
1391
				}
1392
				addcicorrupt(v);
1393
				totalcorrupt += v;
1394
				nb1 = (minclumps+ncib-1)/ncib;
1395
				minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
1396
				nb2 = (minclumps+ncib-1)/ncib;
1397
				eoffset -= (nb2-nb1)*arena->blocksize;
1398
			}
1399
 
1400
			if(haveclump(cl.info.score))
1401
				print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n);
1402
 
1403
			/*
1404
			 * If clumps use different magic numbers, we don't care.
1405
			 * We'll just use the first one we find and make the others
1406
			 * follow suit.
1407
			 */
1408
			if(arena->clumpmagic == 0){
1409
				print("clump type %d size %d score %V magic %x\n",
1410
					cl.info.type, cl.info.size, cl.info.score, magic);
1411
				arena->clumpmagic = magic;
1412
				if(magic == _ClumpMagic)
1413
					arena->version = ArenaVersion4;
1414
				else
1415
					arena->version = ArenaVersion5;
1416
			}
1417
			if(magic != arena->clumpmagic)
1418
				p32(p, arena->clumpmagic);
1419
			if(clumps == 0)
1420
				arena->ctime = cl.time;
1421
 
1422
			/*
1423
			 * Record the clump, update arena stats,
1424
			 * grow clump info blocks if needed.
1425
			 */
1426
			if(verbose > 1)
1427
				print("\tclump %d: %d %V at %#llux+%#ux (%d)\n", 
1428
					clumps, cl.info.type, cl.info.score, offset, n, n);
1429
			addcibuf(&cl.info, 0);
1430
			if(minclumps%ncib == 0)
1431
				eoffset -= arena->blocksize;
1432
			minclumps++;
1433
			clumps++;
1434
			if(cl.encoding != ClumpENone)
1435
				arena->diskstats.cclumps++;
1436
			arena->diskstats.uncsize += cl.info.uncsize;
1437
			arena->wtime = cl.time;
1438
 
1439
			/*
1440
			 * Move to next clump.
1441
			 */
1442
			offset += n;
1443
			p += n;
1444
			lastclumpend = offset;
1445
		}else{
1446
			/*
1447
			 * Overwrite malformed clump data with zeros later.
1448
			 * For now, just record whether it needs to be overwritten.
1449
			 * Bad regions must be of size at least ClumpSize.
1450
			 * Postponing the overwriting keeps us from writing past
1451
			 * the end of the arena data (which might be directory data)
1452
			 * with zeros.
1453
			 */
1454
			if(!inbad){
1455
				inbad = 1;
1456
				needtozero = 0;
1457
				if(memcmp(p, zero, ClumpSize) != 0)
1458
					needtozero = 1;
1459
				p += ClumpSize;
1460
				offset += ClumpSize;
1461
				nbad++;
1462
			}else{
1463
				if(*p != 0)
1464
					needtozero = 1;
1465
				p++;
1466
				offset++;
1467
			}
1468
		}
1469
	}
1470
	pageout();
1471
 
1472
	if(verbose)
1473
		print("readable clumps: %d; min. directory entries: %d\n", 
1474
			clumps, minclumps);
1475
	arena->diskstats.used = lastclumpend - boffset;
1476
	leaked = eoffset - lastclumpend;
1477
	if(verbose)
1478
		print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
1479
			boffset, lastclumpend, arena->diskstats.used, leaked);
1480
 
1481
	/*
1482
	 * Finish the SHA1 of the old data.
1483
	 */
1484
	if(arena->diskstats.sealed){
1485
		sbdiskhash(&oldsha, toffset);
1486
		readdisk(dbuf, toffset, arena->blocksize);
1487
		scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
1488
		sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
1489
		sbscore(&oldsha, oldscore);
1490
	}
1491
 
1492
	/*
1493
	 * If we still don't know the clump magic, the arena
1494
	 * must be empty.  It still needs a value, so make 
1495
	 * something up.
1496
	 */
1497
	if(arena->version == 0)
1498
		arena->version = ArenaVersion5;
1499
	if(arena->clumpmagic == 0){
1500
		if(arena->version == ArenaVersion4)
1501
			arena->clumpmagic = _ClumpMagic;
1502
		else{
1503
			do
1504
				arena->clumpmagic = fastrand();
1505
			while(arena->clumpmagic==_ClumpMagic
1506
				||arena->clumpmagic==0);
1507
		}
1508
		head->clumpmagic = arena->clumpmagic;
1509
	}
1510
 
1511
	/*
1512
	 * Guess at number of clumpinfo blocks to load.
1513
	 * If we guess high, it's no big deal.  If we guess low,
1514
	 * we'll be forced into rewriting the whole directory.
1515
	 * Still not such a big deal.
1516
	 */
1517
	if(clumps == 0 || arena->diskstats.used == totalcorrupt)
1518
		goto Nocib;
1519
	if(clumps < arena->diskstats.clumps)
1520
		clumps = arena->diskstats.clumps;
1521
	if(clumps < ncibuf)
1522
		clumps = ncibuf;
1523
	clumps += totalcorrupt/
1524
		((arena->diskstats.used - totalcorrupt)/clumps);
1525
	clumps += totalcorrupt/2000;
1526
	if(clumps < minclumps)
1527
		clumps = minclumps;
1528
	clumps += ncib-1;
1529
	clumps -= clumps%ncib;
1530
 
1531
	/*
1532
	 * Can't write into the actual data.
1533
	 */
1534
	v = offset0 + arena->size - arena->blocksize;
1535
	v -= (clumps+ncib-1)/ncib * arena->blocksize;
1536
	if(v < lastclumpend){
1537
		v = offset0 + arena->size - arena->blocksize;
1538
		clumps = (v-lastclumpend)/arena->blocksize * ncib;
1539
	}
1540
 
1541
	if(clumps < minclumps)
1542
		print("cannot happen?\n");
1543
 
1544
	/*
1545
	 * Check clumpinfo blocks against directory we created.
1546
	 * The tricky part is handling the corrupt sections of arena.
1547
	 * If possible, we remark just the affected directory entries
1548
	 * rather than slide everything down.
1549
	 * 
1550
	 * Allocate clumps+1 blocks and check that we don't need
1551
	 * the last one at the end.
1552
	 */
1553
	bci = loadci(offset0, arena, clumps+1);
1554
	eci = bci+clumps+1;
1555
	bcit = cibuf;
1556
	ecit = cibuf+ncibuf;
1557
 
1558
	smart = 0;	/* Somehow the smart code doesn't do corrupt clumps right. */
1559
Again:
1560
	nbad = 0;
1561
	ci = bci;
1562
	for(cit=bcit; cit<ecit && ci<eci; cit++){
1563
		if(cit->corrupt){
1564
			vlong n, m;
1565
			if(smart){
1566
				/*
1567
				 * If we can, just mark existing entries as corrupt.
1568
				 */
1569
				n = cit->corrupt;
1570
				for(xci=ci; n>0 && xci<eci; xci++)
1571
					n -= ClumpSize+xci->size;
1572
				if(n > 0 || xci >= eci)
1573
					goto Dumb;
1574
				printed = 0;
1575
				for(; ci<xci; ci++){
1576
					if(verbose && ci->type != VtCorruptType){
1577
						if(!printed){
1578
							print("marking directory %d-%d as corrupt\n",
1579
								(int)(ci-bci), (int)(xci-bci));
1580
							printed = 1;
1581
						}
1582
						print("\ttype=%d size=%d uncsize=%d score=%V\n",
1583
							ci->type, ci->size, ci->uncsize, ci->score);
1584
					}
1585
					ci->type = VtCorruptType;
1586
				}
1587
			}else{
1588
			Dumb:
1589
				print("\trewriting clump directory\n");
1590
				/*
1591
				 * Otherwise, blaze a new trail.
1592
				 */
1593
				n = cit->corrupt;
1594
				while(n > 0 && ci < eci){
1595
					if(n < ClumpSize)
1596
						sysfatal("bad math in clump corrupt");
1597
					if(n <= VtMaxLumpSize+ClumpSize)
1598
						m = n;
1599
					else{
1600
						m = VtMaxLumpSize+ClumpSize;
1601
						if(n-m < ClumpSize)
1602
							m -= ClumpSize;
1603
					}
1604
					ci->type = VtCorruptType;
1605
					ci->size = m-ClumpSize;
1606
					ci->uncsize = m-ClumpSize;
1607
					memset(ci->score, 0, VtScoreSize);
1608
					ci++;
1609
					n -= m;
1610
				}
1611
			}
1612
			continue;
1613
		}
1614
		if(clumpinfocmp(&cit->ci, ci) != 0){
1615
			if(verbose && (smart || verbose>1)){
1616
				print("clumpinfo %d\n", (int)(ci-bci));
1617
				print("\twant: %d %d %d %V\n", 
1618
					cit->ci.type, cit->ci.size,
1619
					cit->ci.uncsize, cit->ci.score);
1620
				print("\thave: %d %d %d %V\n", 
1621
					ci->type, ci->size, 
1622
					ci->uncsize, ci->score);
1623
			}
1624
			*ci = cit->ci;
1625
			nbad++;
1626
		}
1627
		ci++;
1628
	}
1629
	if(ci >= eci || cit < ecit){
1630
		print("ran out of space editing existing directory; rewriting\n");
1631
		print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
1632
		assert(smart);	/* can't happen second time thru */
1633
		smart = 0;
1634
		goto Again;
1635
	}
1636
 
1637
	assert(ci <= eci);
1638
	arena->diskstats.clumps = ci-bci;
1639
	eoffset = writeci(offset0, arena, bci, ci-bci);
1640
	if(sealing && fix)
1641
		sbrollback(&newsha, v);
1642
print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal);
1643
	if(lastclumpend > eoffset)
1644
		print("arena directory overwrote blocks!  cannot happen!\n");
1645
	free(bci);
1646
	if(smart && nbad)
1647
		print("arena directory has %d bad or missing entries\n", nbad);
1648
Nocib:
1649
	if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){
1650
		if(arena->diskstats.sealed)
1651
			print("unsealing arena\n");
1652
		sealing = 0;
1653
		memset(oldscore, 0, VtScoreSize);
1654
	}
1655
 
1656
	/*
1657
	 * Finish the SHA1 of the new data - only meaningful
1658
	 * if we've been writing to disk (`fix').
1659
	 */
1660
	arena->diskstats.sealed = sealing;
1661
	arena->memstats = arena->diskstats;
1662
	if(sealing && fix){
1663
		uchar tbuf[MaxDiskBlock];
1664
 
1665
		sbdiskhash(&newsha, toffset);
1666
		memset(tbuf, 0, sizeof tbuf);
1667
		packarena(arena, tbuf);
1668
		sbupdate(&newsha, tbuf, toffset, arena->blocksize);
1669
		sbscore(&newsha, score);
1670
	}
1671
}
1672
 
1673
void
1674
dumparena(vlong offset, int anum, Arena *arena)
1675
{
1676
	char buf[1000];
1677
	vlong o, e;
1678
	int fd, n;
1679
 
1680
	snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
1681
	if((fd = create(buf, OWRITE, 0666)) < 0){
1682
		fprint(2, "create %s: %r\n", buf);
1683
		return;
1684
	}
1685
	e = offset+arena->size;
1686
	for(o=offset; o<e; o+=n){
1687
		n = 4*M;
1688
		if(o+n > e)
1689
			n = e-o;
1690
		if(pwrite(fd, pagein(o, n), n, o-offset) != n){
1691
			fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
1692
			return;
1693
		}
1694
	}
1695
}
1696
 
1697
void
1698
checkarena(vlong offset, int anum)
1699
{
1700
	uchar dbuf[MaxDiskBlock];
1701
	uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
1702
	Arena arena, oarena;
1703
	ArenaHead head;
1704
	Info *fmt, *fmta;
1705
	int sz;
1706
 
1707
	print("# arena %d: offset %#llux\n", anum, offset);
1708
 
1709
	if(offset >= partend){
1710
		print("arena offset out of bounds\n");
1711
		return;
1712
	}
1713
 
1714
	guessarena(offset, anum, &head, &arena, oldscore, score);
1715
 
1716
	if(verbose){
1717
		print("#\tversion=%d name=%s blocksize=%d size=%z",
1718
			head.version, head.name, head.blocksize, head.size);
1719
		if(head.clumpmagic)
1720
			print(" clumpmagic=%#.8ux", head.clumpmagic);
1721
		print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
1722
			arena.diskstats.clumps, arena.diskstats.cclumps,
1723
			arena.diskstats.used, arena.diskstats.uncsize);
1724
		print("#\tctime=%t\n", arena.ctime);
1725
		print("#\twtime=%t\n", arena.wtime);
1726
		if(arena.diskstats.sealed)
1727
			print("#\tsealed score=%V\n", score);
1728
	}
1729
 
1730
	if(dumpbase){
1731
		dumparena(offset, anum, &arena);
1732
		return;
1733
	}
1734
 
1735
	memset(dbuf, 0, sizeof dbuf);
1736
	packarenahead(&head, dbuf);
1737
	p = pagein(offset, arena.blocksize);
1738
	if(memcmp(dbuf, p, arena.blocksize) != 0){
1739
		print("on-disk arena header incorrect\n");
1740
		showdiffs(dbuf, p, arena.blocksize, 
1741
			arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
1742
	}
1743
	memmove(p, dbuf, arena.blocksize);
1744
 
1745
	memset(dbuf, 0, sizeof dbuf);
1746
	packarena(&arena, dbuf);
1747
	if(arena.diskstats.sealed)
1748
		scorecp(dbuf+arena.blocksize-VtScoreSize, score);
1749
	p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
1750
	memset(&oarena, 0, sizeof oarena);
1751
	unpackarena(&oarena, p);
1752
	if(arena.version == ArenaVersion4){
1753
		sz = ArenaSize4;
1754
		fmt = tailinfo4;
1755
		fmta = tailinfo4a;
1756
	}else{
1757
		sz = ArenaSize5;
1758
		fmt = tailinfo5;
1759
		fmta = tailinfo5a;
1760
	}
1761
	if(p[sz] == 1){
1762
		fmt = fmta;
1763
		if(oarena.diskstats.sealed){
1764
			/*
1765
			 * some arenas were sealed with the extension
1766
			 * before we adopted the convention that if it didn't
1767
			 * add new information it gets dropped.
1768
			 */
1769
			_packarena(&arena, dbuf, 1);
1770
		}
1771
	}
1772
	if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
1773
		print("on-disk arena tail incorrect\n");
1774
		showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
1775
	}
1776
	if(arena.diskstats.sealed){
1777
		if(oarena.diskstats.sealed)
1778
		if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
1779
			print("on-disk arena seal score incorrect\n");
1780
			print("\tcorrect=%V\n", oldscore);
1781
			print("\t   disk=%V\n", p+arena.blocksize-VtScoreSize);
1782
		}
1783
		if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
1784
			print("%ssealing arena%s: %V\n", 
1785
				oarena.diskstats.sealed ? "re" : "",
1786
				scorecmp(oldscore, score) == 0 ? 
1787
					"" : " after changes", score);
1788
		}
1789
	}
1790
	memmove(p, dbuf, arena.blocksize);
1791
 
1792
	pageout();
1793
}
1794
 
1795
AMapN*
1796
buildamap(void)
1797
{
1798
	uchar *p;
1799
	vlong o;
1800
	ArenaHead h;
1801
	AMapN *an;
1802
	AMap *m;
1803
 
1804
	an = vtmallocz(sizeof *an);
1805
	for(o=ap.arenabase; o<partend; o+=arenasize){
1806
		p = pagein(o, Block);
1807
		if(unpackarenahead(&h, p) >= 0){
1808
			an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]);
1809
			m = &an->map[an->n++];
1810
			m->start = o;
1811
			m->stop = o+h.size;
1812
			strcpy(m->name, h.name);
1813
		}
1814
	}
1815
	return an;	
1816
}
1817
 
1818
void
1819
checkmap(void)
1820
{
1821
	char *s;
1822
	uchar *p;
1823
	int i, len;
1824
	AMapN *an;
1825
	Fmt fmt;
1826
 
1827
	an = buildamap();
1828
	fmtstrinit(&fmt);
1829
	fmtprint(&fmt, "%ud\n", an->n);
1830
	for(i=0; i<an->n; i++)
1831
		fmtprint(&fmt, "%s\t%lld\t%lld\n",
1832
			an->map[i].name, an->map[i].start, an->map[i].stop);
1833
	s = fmtstrflush(&fmt);
1834
	len = strlen(s);
1835
	if(len > ap.tabsize){
1836
		print("arena partition map too long: need %z bytes have %z\n",
1837
			(vlong)len, (vlong)ap.tabsize);
1838
		len = ap.tabsize;
1839
	}
1840
 
1841
	if(ap.tabsize >= 4*M){	/* can't happen - max arenas is 2000 */
1842
		print("arena partition map *way* too long\n");
1843
		return;
1844
	}
1845
 
1846
	p = pagein(ap.tabbase, ap.tabsize);
1847
	if(memcmp(p, s, len) != 0){
1848
		print("arena partition map incorrect; rewriting.\n");
1849
		memmove(p, s, len);
1850
	}
1851
	pageout();
1852
}
1853
 
1854
int mainstacksize = 512*1024;
1855
 
1856
void
1857
threadmain(int argc, char **argv)
1858
{
1859
	int mode;
1860
 
1861
	mode = OREAD;
1862
	readonly = 1;	
1863
	ARGBEGIN{
1864
	case 'U':
1865
		unseal = 1;
1866
		break;
1867
	case 'a':
1868
		arenasize = unittoull(EARGF(usage()));
1869
		break;
1870
	case 'b':
1871
		ap.blocksize = unittoull(EARGF(usage()));
1872
		break;
1873
	case 'f':
1874
		fix = 1;
1875
		mode = ORDWR;
1876
		readonly = 0;
1877
		break;
1878
	case 'n':
1879
		basename = EARGF(usage());
1880
		break;
1881
	case 'v':
1882
		verbose++;
1883
		break;
1884
	case 'x':
1885
		dumpbase = EARGF(usage());
1886
		break;
1887
	default:
1888
		usage();
1889
	}ARGEND
1890
 
1891
	if(argc != 1 && argc != 2)
1892
		usage();
1893
 
1894
	file = argv[0];
1895
 
1896
	ventifmtinstall();
1897
	fmtinstall('z', zfmt);
1898
	fmtinstall('t', tfmt);
1899
	quotefmtinstall();
1900
 
1901
	part = initpart(file, mode|ODIRECT);
1902
	if(part == nil)
1903
		sysfatal("can't open %s: %r", file);
1904
	partend = part->size;
1905
 
1906
	if(isonearena()){
1907
		checkarena(0, -1);
1908
		threadexitsall(nil);
1909
	}
1910
	checkarenas(argc > 1 ? argv[1] : nil);
1911
	checkmap();
1912
	threadexitsall(nil);
1913
}
1914