Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include "stdinc.h"
2
#include "dat.h"
3
#include "fns.h"
4
 
5
typedef struct ASum ASum;
6
 
7
struct ASum
8
{
9
	Arena	*arena;
10
	ASum	*next;
11
};
12
 
13
static void	sealarena(Arena *arena);
14
static int	okarena(Arena *arena);
15
static int	loadarena(Arena *arena);
16
static CIBlock	*getcib(Arena *arena, int clump, int writing, CIBlock *rock);
17
static void	putcib(Arena *arena, CIBlock *cib);
18
static void	sumproc(void *);
19
static void loadcig(Arena *arena);
20
 
21
static QLock	sumlock;
22
static Rendez	sumwait;
23
static ASum	*sumq;
24
static ASum	*sumqtail;
25
static uchar zero[8192];
26
 
27
int	arenasumsleeptime;
28
 
29
int
30
initarenasum(void)
31
{
32
	needzeroscore();  /* OS X */
33
 
34
	sumwait.l = &sumlock;
35
 
36
	if(vtproc(sumproc, nil) < 0){
37
		seterr(EOk, "can't start arena checksum slave: %r");
38
		return -1;
39
	}
40
	return 0;
41
}
42
 
43
/*
44
 * make an Arena, and initialize it based upon the disk header and trailer.
45
 */
46
Arena*
47
initarena(Part *part, u64int base, u64int size, u32int blocksize)
48
{
49
	Arena *arena;
50
 
51
	arena = MKZ(Arena);
52
	arena->part = part;
53
	arena->blocksize = blocksize;
54
	arena->clumpmax = arena->blocksize / ClumpInfoSize;
55
	arena->base = base + blocksize;
56
	arena->size = size - 2 * blocksize;
57
 
58
	if(loadarena(arena) < 0){
59
		seterr(ECorrupt, "arena header or trailer corrupted");
60
		freearena(arena);
61
		return nil;
62
	}
63
	if(okarena(arena) < 0){
64
		freearena(arena);
65
		return nil;
66
	}
67
 
68
	if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
69
		sealarena(arena);
70
 
71
	return arena;
72
}
73
 
74
void
75
freearena(Arena *arena)
76
{
77
	if(arena == nil)
78
		return;
79
	free(arena);
80
}
81
 
82
Arena*
83
newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
84
{
85
	int bsize;
86
	Arena *arena;
87
 
88
	if(nameok(name) < 0){
89
		seterr(EOk, "illegal arena name", name);
90
		return nil;
91
	}
92
	arena = MKZ(Arena);
93
	arena->part = part;
94
	arena->version = vers;
95
	if(vers == ArenaVersion4)
96
		arena->clumpmagic = _ClumpMagic;
97
	else{
98
		do
99
			arena->clumpmagic = fastrand();
100
		while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
101
	}
102
	arena->blocksize = blocksize;
103
	arena->clumpmax = arena->blocksize / ClumpInfoSize;
104
	arena->base = base + blocksize;
105
	arena->size = size - 2 * blocksize;
106
 
107
	namecp(arena->name, name);
108
 
109
	bsize = sizeof zero;
110
	if(bsize > arena->blocksize)
111
		bsize = arena->blocksize;
112
 
113
	if(wbarena(arena)<0 || wbarenahead(arena)<0
114
	|| writepart(arena->part, arena->base, zero, bsize)<0){
115
		freearena(arena);
116
		return nil;
117
	}
118
 
119
	return arena;
120
}
121
 
122
int
123
readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
124
{
125
	CIBlock *cib, r;
126
 
127
	cib = getcib(arena, clump, 0, &r);
128
	if(cib == nil)
129
		return -1;
130
	unpackclumpinfo(ci, &cib->data->data[cib->offset]);
131
	putcib(arena, cib);
132
	return 0;
133
}
134
 
135
int
136
readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
137
{
138
	CIBlock *cib, r;
139
	int i;
140
 
141
	/*
142
	 * because the clump blocks are laid out
143
	 * in reverse order at the end of the arena,
144
	 * it can be a few percent faster to read
145
	 * the clumps backwards, which reads the
146
	 * disk blocks forwards.
147
	 */
148
	for(i = n-1; i >= 0; i--){
149
		cib = getcib(arena, clump + i, 0, &r);
150
		if(cib == nil){
151
			n = i;
152
			continue;
153
		}
154
		unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
155
		putcib(arena, cib);
156
	}
157
	return n;
158
}
159
 
160
/*
161
 * write directory information for one clump
162
 * must be called the arena locked
163
 */
164
int
165
writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
166
{
167
	CIBlock *cib, r;
168
 
169
	cib = getcib(arena, clump, 1, &r);
170
	if(cib == nil)
171
		return -1;
172
	dirtydblock(cib->data, DirtyArenaCib);
173
	packclumpinfo(ci, &cib->data->data[cib->offset]);
174
	putcib(arena, cib);
175
	return 0;
176
}
177
 
178
u64int
179
arenadirsize(Arena *arena, u32int clumps)
180
{
181
	return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
182
}
183
 
184
/*
185
 * read a clump of data
186
 * n is a hint of the size of the data, not including the header
187
 * make sure it won't run off the end, then return the number of bytes actually read
188
 */
189
u32int
190
readarena(Arena *arena, u64int aa, u8int *buf, long n)
191
{
192
	DBlock *b;
193
	u64int a;
194
	u32int blocksize, off, m;
195
	long nn;
196
 
197
	if(n == 0)
198
		return -1;
199
 
200
	qlock(&arena->lock);
201
	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
202
	qunlock(&arena->lock);
203
	if(aa >= a){
204
		seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
205
		return -1;
206
	}
207
	if(aa + n > a)
208
		n = a - aa;
209
 
210
	blocksize = arena->blocksize;
211
	a = arena->base + aa;
212
	off = a & (blocksize - 1);
213
	a -= off;
214
	nn = 0;
215
	for(;;){
216
		b = getdblock(arena->part, a, OREAD);
217
		if(b == nil)
218
			return -1;
219
		m = blocksize - off;
220
		if(m > n - nn)
221
			m = n - nn;
222
		memmove(&buf[nn], &b->data[off], m);
223
		putdblock(b);
224
		nn += m;
225
		if(nn == n)
226
			break;
227
		off = 0;
228
		a += blocksize;
229
	}
230
	return n;
231
}
232
 
233
/*
234
 * write some data to the clump section at a given offset
235
 * used to fix up corrupted arenas.
236
 */
237
u32int
238
writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
239
{
240
	DBlock *b;
241
	u64int a;
242
	u32int blocksize, off, m;
243
	long nn;
244
	int ok;
245
 
246
	if(n == 0)
247
		return -1;
248
 
249
	qlock(&arena->lock);
250
	a = arena->size - arenadirsize(arena, arena->memstats.clumps);
251
	if(aa >= a || aa + n > a){
252
		qunlock(&arena->lock);
253
		seterr(EOk, "writing beyond arena clump storage");
254
		return -1;
255
	}
256
 
257
	blocksize = arena->blocksize;
258
	a = arena->base + aa;
259
	off = a & (blocksize - 1);
260
	a -= off;
261
	nn = 0;
262
	for(;;){
263
		b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
264
		if(b == nil){
265
			qunlock(&arena->lock);
266
			return -1;
267
		}
268
		dirtydblock(b, DirtyArena);
269
		m = blocksize - off;
270
		if(m > n - nn)
271
			m = n - nn;
272
		memmove(&b->data[off], &clbuf[nn], m);
273
		ok = 0;
274
		putdblock(b);
275
		if(ok < 0){
276
			qunlock(&arena->lock);
277
			return -1;
278
		}
279
		nn += m;
280
		if(nn == n)
281
			break;
282
		off = 0;
283
		a += blocksize;
284
	}
285
	qunlock(&arena->lock);
286
	return n;
287
}
288
 
289
/*
290
 * allocate space for the clump and write it,
291
 * updating the arena directory
292
ZZZ question: should this distinguish between an arena
293
filling up and real errors writing the clump?
294
 */
295
u64int
296
writeaclump(Arena *arena, Clump *c, u8int *clbuf)
297
{
298
	DBlock *b;
299
	u64int a, aa;
300
	u32int clump, n, nn, m, off, blocksize;
301
	int ok;
302
 
303
	n = c->info.size + ClumpSize + U32Size;
304
	qlock(&arena->lock);
305
	aa = arena->memstats.used;
306
	if(arena->memstats.sealed
307
	|| aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
308
		if(!arena->memstats.sealed){
309
			logerr(EOk, "seal memstats %s", arena->name);
310
			arena->memstats.sealed = 1;
311
			wbarena(arena);
312
		}
313
		qunlock(&arena->lock);
314
		return TWID64;
315
	}
316
	if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
317
		qunlock(&arena->lock);
318
		return TWID64;
319
	}
320
 
321
	/*
322
	 * write the data out one block at a time
323
	 */
324
	blocksize = arena->blocksize;
325
	a = arena->base + aa;
326
	off = a & (blocksize - 1);
327
	a -= off;
328
	nn = 0;
329
	for(;;){
330
		b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
331
		if(b == nil){
332
			qunlock(&arena->lock);
333
			return TWID64;
334
		}
335
		dirtydblock(b, DirtyArena);
336
		m = blocksize - off;
337
		if(m > n - nn)
338
			m = n - nn;
339
		memmove(&b->data[off], &clbuf[nn], m);
340
		ok = 0;
341
		putdblock(b);
342
		if(ok < 0){
343
			qunlock(&arena->lock);
344
			return TWID64;
345
		}
346
		nn += m;
347
		if(nn == n)
348
			break;
349
		off = 0;
350
		a += blocksize;
351
	}
352
 
353
	arena->memstats.used += c->info.size + ClumpSize;
354
	arena->memstats.uncsize += c->info.uncsize;
355
	if(c->info.size < c->info.uncsize)
356
		arena->memstats.cclumps++;
357
 
358
	clump = arena->memstats.clumps;
359
	if(clump % ArenaCIGSize == 0){
360
		if(arena->cig == nil){
361
			loadcig(arena);
362
			if(arena->cig == nil)
363
				goto NoCIG;
364
		}
365
		/* add aa as start of next cig */
366
		if(clump/ArenaCIGSize != arena->ncig){
367
			fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
368
				arena->name, clump, arena->ncig);
369
			arena->ncig = -1;
370
			vtfree(arena->cig);
371
			arena->cig = nil;
372
			goto NoCIG;
373
		}
374
		arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
375
		arena->cig[arena->ncig++].offset = aa;
376
	}
377
NoCIG:
378
	arena->memstats.clumps++;
379
 
380
	if(arena->memstats.clumps == 0)
381
		sysfatal("clumps wrapped");
382
	arena->wtime = now();
383
	if(arena->ctime == 0)
384
		arena->ctime = arena->wtime;
385
 
386
	writeclumpinfo(arena, clump, &c->info);
387
	wbarena(arena);
388
 
389
	qunlock(&arena->lock);
390
 
391
	return aa;
392
}
393
 
394
int
395
atailcmp(ATailStats *a, ATailStats *b)
396
{
397
	/* good test */
398
	if(a->used < b->used)
399
		return -1;
400
	if(a->used > b->used)
401
		return 1;
402
 
403
	/* suspect tests - why order this way? (no one cares) */
404
	if(a->clumps < b->clumps)
405
		return -1;
406
	if(a->clumps > b->clumps)
407
		return 1;
408
	if(a->cclumps < b->cclumps)
409
		return -1;
410
	if(a->cclumps > b->cclumps)
411
		return 1;
412
	if(a->uncsize < b->uncsize)
413
		return -1;
414
	if(a->uncsize > b->uncsize)
415
		return 1;
416
	if(a->sealed < b->sealed)
417
		return -1;
418
	if(a->sealed > b->sealed)
419
		return 1;
420
 
421
	/* everything matches */
422
	return 0;
423
}
424
 
425
void
426
setatailstate(AState *as)
427
{
428
	int i, j, osealed;
429
	Arena *a;
430
	Index *ix;
431
 
432
	trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
433
 
434
	/*
435
	 * Look up as->arena to find index.
436
	 */
437
	needmainindex();	/* OS X linker */
438
	ix = mainindex;
439
	for(i=0; i<ix->narenas; i++)
440
		if(ix->arenas[i] == as->arena)
441
			break;
442
	if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
443
		fprint(2, "funny settailstate 0x%llux\n", as->aa);
444
		return;
445
	}
446
 
447
	for(j=0; j<=i; j++){
448
		a = ix->arenas[j];
449
		if(atailcmp(&a->diskstats, &a->memstats) == 0)
450
			continue;
451
		qlock(&a->lock);
452
		osealed = a->diskstats.sealed;
453
		if(j == i)
454
			a->diskstats = as->stats;
455
		else
456
			a->diskstats = a->memstats;
457
		wbarena(a);
458
		if(a->diskstats.sealed != osealed && !a->inqueue)
459
			sealarena(a);
460
		qunlock(&a->lock);
461
	}
462
}
463
 
464
/*
465
 * once sealed, an arena never has any data added to it.
466
 * it should only be changed to fix errors.
467
 * this also syncs the clump directory.
468
 */
469
static void
470
sealarena(Arena *arena)
471
{
472
	arena->inqueue = 1;
473
	backsumarena(arena);
474
}
475
 
476
void
477
backsumarena(Arena *arena)
478
{
479
	ASum *as;
480
 
481
	if(sumwait.l == nil)
482
		return;
483
 
484
	as = MK(ASum);
485
	if(as == nil)
486
		return;
487
	qlock(&sumlock);
488
	as->arena = arena;
489
	as->next = nil;
490
	if(sumq)
491
		sumqtail->next = as;
492
	else
493
		sumq = as;
494
	sumqtail = as;
495
	rwakeup(&sumwait);
496
	qunlock(&sumlock);
497
}
498
 
499
static void
500
sumproc(void *unused)
501
{
502
	ASum *as;
503
	Arena *arena;
504
 
505
	USED(unused);
506
 
507
	for(;;){
508
		qlock(&sumlock);
509
		while(sumq == nil)
510
			rsleep(&sumwait);
511
		as = sumq;
512
		sumq = as->next;
513
		qunlock(&sumlock);
514
		arena = as->arena;
515
		free(as);
516
 
517
		sumarena(arena);
518
	}
519
}
520
 
521
void
522
sumarena(Arena *arena)
523
{
524
	ZBlock *b;
525
	DigestState s;
526
	u64int a, e;
527
	u32int bs;
528
	int t;
529
	u8int score[VtScoreSize];
530
 
531
	bs = MaxIoSize;
532
	if(bs < arena->blocksize)
533
		bs = arena->blocksize;
534
 
535
	/*
536
	 * read & sum all blocks except the last one
537
	 */
538
	flushdcache();
539
	memset(&s, 0, sizeof s);
540
	b = alloczblock(bs, 0, arena->part->blocksize);
541
	e = arena->base + arena->size;
542
	for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
543
		disksched();
544
		while((t=arenasumsleeptime) == SleepForever){
545
			sleep(1000);
546
			disksched();
547
		}
548
		sleep(t);
549
		if(a + bs > e)
550
			bs = arena->blocksize;
551
		if(readpart(arena->part, a, b->data, bs) < 0)
552
			goto ReadErr;
553
		addstat(StatSumRead, 1);
554
		addstat(StatSumReadBytes, bs);
555
		sha1(b->data, bs, nil, &s);
556
	}
557
 
558
	/*
559
	 * the last one is special, since it may already have the checksum included
560
	 */
561
	bs = arena->blocksize;
562
	if(readpart(arena->part, e, b->data, bs) < 0){
563
ReadErr:
564
		logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
565
		freezblock(b);
566
		return;
567
	}
568
	addstat(StatSumRead, 1);
569
	addstat(StatSumReadBytes, bs);
570
 
571
	sha1(b->data, bs-VtScoreSize, nil, &s);
572
	sha1(zeroscore, VtScoreSize, nil, &s);
573
	sha1(nil, 0, score, &s);
574
 
575
	/*
576
	 * check for no checksum or the same
577
	 */
578
	if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
579
	&& scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
580
		logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
581
			arena->name, &b->data[bs - VtScoreSize], score);
582
	freezblock(b);
583
 
584
	qlock(&arena->lock);
585
	scorecp(arena->score, score);
586
	wbarena(arena);
587
	qunlock(&arena->lock);
588
}
589
 
590
/*
591
 * write the arena trailer block to the partition
592
 */
593
int
594
wbarena(Arena *arena)
595
{
596
	DBlock *b;
597
	int bad;
598
 
599
	if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
600
		logerr(EAdmin, "can't write arena trailer: %r");
601
		return -1;
602
	}
603
	dirtydblock(b, DirtyArenaTrailer);
604
	bad = okarena(arena)<0 || packarena(arena, b->data)<0;
605
	scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
606
	putdblock(b);
607
	if(bad)
608
		return -1;
609
	return 0;
610
}
611
 
612
int
613
wbarenahead(Arena *arena)
614
{
615
	ZBlock *b;
616
	ArenaHead head;
617
	int bad;
618
 
619
	namecp(head.name, arena->name);
620
	head.version = arena->version;
621
	head.size = arena->size + 2 * arena->blocksize;
622
	head.blocksize = arena->blocksize;
623
	head.clumpmagic = arena->clumpmagic;
624
	b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
625
	if(b == nil){
626
		logerr(EAdmin, "can't write arena header: %r");
627
/* ZZZ add error message? */
628
		return -1;
629
	}
630
	/*
631
	 * this writepart is okay because it only happens
632
	 * during initialization.
633
	 */
634
	bad = packarenahead(&head, b->data)<0 ||
635
	      writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
636
	      flushpart(arena->part)<0;
637
	freezblock(b);
638
	if(bad)
639
		return -1;
640
	return 0;
641
}
642
 
643
/*
644
 * read the arena header and trailer blocks from disk
645
 */
646
static int
647
loadarena(Arena *arena)
648
{
649
	ArenaHead head;
650
	ZBlock *b;
651
 
652
	b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
653
	if(b == nil)
654
		return -1;
655
	if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
656
		freezblock(b);
657
		return -1;
658
	}
659
	if(unpackarena(arena, b->data) < 0){
660
		freezblock(b);
661
		return -1;
662
	}
663
	if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
664
		seterr(EAdmin, "unknown arena version %d", arena->version);
665
		freezblock(b);
666
		return -1;
667
	}
668
	scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
669
 
670
	if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
671
		logerr(EAdmin, "can't read arena header: %r");
672
		freezblock(b);
673
		return 0;
674
	}
675
	if(unpackarenahead(&head, b->data) < 0)
676
		logerr(ECorrupt, "corrupted arena header: %r");
677
	else if(namecmp(arena->name, head.name)!=0
678
	     || arena->clumpmagic != head.clumpmagic
679
	     || arena->version != head.version
680
	     || arena->blocksize != head.blocksize
681
	     || arena->size + 2 * arena->blocksize != head.size){
682
		if(namecmp(arena->name, head.name)!=0)
683
			logerr(ECorrupt, "arena tail name %s head %s", 
684
				arena->name, head.name);
685
		else if(arena->clumpmagic != head.clumpmagic)
686
			logerr(ECorrupt, "arena %d tail clumpmagic 0x%lux head 0x%lux",
687
				debugarena, (ulong)arena->clumpmagic,
688
				(ulong)head.clumpmagic);
689
		else if(arena->version != head.version)
690
			logerr(ECorrupt, "arena tail version %d head version %d",
691
				arena->version, head.version);
692
		else if(arena->blocksize != head.blocksize)
693
			logerr(ECorrupt, "arena tail block size %d head %d",
694
				arena->blocksize, head.blocksize);
695
		else if(arena->size+2*arena->blocksize != head.size)
696
			logerr(ECorrupt, "arena tail size %lud head %lud",
697
				(ulong)arena->size+2*arena->blocksize, head.size);
698
		else
699
			logerr(ECorrupt, "arena header inconsistent with arena data");
700
	}
701
	freezblock(b);
702
 
703
	return 0;
704
}
705
 
706
static int
707
okarena(Arena *arena)
708
{
709
	u64int dsize;
710
	int ok;
711
 
712
	ok = 0;
713
	dsize = arenadirsize(arena, arena->diskstats.clumps);
714
	if(arena->diskstats.used + dsize > arena->size){
715
		seterr(ECorrupt, "arena %s used > size", arena->name);
716
		ok = -1;
717
	}
718
 
719
	if(arena->diskstats.cclumps > arena->diskstats.clumps)
720
		logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
721
 
722
	/*
723
	 * This need not be true if some of the disk is corrupted.
724
	 *
725
	if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
726
		logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
727
	 */
728
 
729
	/*
730
	 * this happens; it's harmless.
731
	 *
732
	if(arena->ctime > arena->wtime)
733
		logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
734
	 */
735
	return ok;
736
}
737
 
738
static CIBlock*
739
getcib(Arena *arena, int clump, int writing, CIBlock *rock)
740
{
741
	int mode;
742
	CIBlock *cib;
743
	u32int block, off;
744
 
745
	if(clump >= arena->memstats.clumps){
746
		seterr(EOk, "clump directory access out of range");
747
		return nil;
748
	}
749
	block = clump / arena->clumpmax;
750
	off = (clump - block * arena->clumpmax) * ClumpInfoSize;
751
	cib = rock;
752
	cib->block = block;
753
	cib->offset = off;
754
 
755
	if(writing){
756
		if(off == 0 && clump == arena->memstats.clumps-1)
757
			mode = OWRITE;
758
		else
759
			mode = ORDWR;
760
	}else
761
		mode = OREAD;
762
 
763
	cib->data = getdblock(arena->part,
764
		arena->base + arena->size - (block + 1) * arena->blocksize, mode);
765
	if(cib->data == nil)
766
		return nil;
767
	return cib;
768
}
769
 
770
static void
771
putcib(Arena *arena, CIBlock *cib)
772
{
773
	USED(arena);
774
 
775
	putdblock(cib->data);
776
	cib->data = nil;
777
}
778
 
779
 
780
/*
781
 * For index entry readahead purposes, the arenas are 
782
 * broken into smaller subpieces, called clump info groups
783
 * or cigs.  Each cig has ArenaCIGSize clumps (ArenaCIGSize
784
 * is chosen to make the index entries take up about half
785
 * a megabyte).  The index entries do not contain enough
786
 * information to determine what the clump index is for
787
 * a given address in an arena.  That info is needed both for
788
 * figuring out which clump group an address belongs to 
789
 * and for prefetching a clump group's index entries from
790
 * the arena table of contents.  The first time clump groups
791
 * are accessed, we scan the entire arena table of contents
792
 * (which might be 10s of megabytes), recording the data 
793
 * offset of each clump group.
794
 */
795
 
796
/* 
797
 * load clump info group information by scanning entire toc.
798
 */
799
static void
800
loadcig(Arena *arena)
801
{
802
	u32int i, j, ncig, nci;
803
	ArenaCIG *cig;
804
	ClumpInfo *ci;
805
	u64int offset;
806
	int ms;
807
 
808
	if(arena->cig || arena->ncig < 0)
809
		return;
810
 
811
//	fprint(2, "loadcig %s\n", arena->name);
812
 
813
	ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
814
	if(ncig == 0){
815
		arena->cig = vtmalloc(1);
816
		arena->ncig = 0;
817
		return;
818
	}
819
 
820
	ms = msec();
821
	cig = vtmalloc(ncig*sizeof cig[0]);
822
	ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
823
	offset = 0;
824
	for(i=0; i<ncig; i++){
825
		nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
826
		cig[i].offset = offset;
827
		for(j=0; j<nci; j++)
828
			offset += ClumpSize + ci[j].size;
829
		if(nci < ArenaCIGSize){
830
			if(i != ncig-1){
831
				vtfree(ci);
832
				vtfree(cig);
833
				arena->ncig = -1;
834
				fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
835
				goto out;
836
			}
837
		}
838
	}
839
	vtfree(ci);
840
 
841
	arena->ncig = ncig;
842
	arena->cig = cig;
843
 
844
out:
845
	ms = msec() - ms;
846
	addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
847
}
848
 
849
/*
850
 * convert arena address into arena group + data boundaries.
851
 */
852
int
853
arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
854
{
855
	int r, l, m;
856
 
857
	qlock(&arena->lock);
858
	if(arena->cig == nil)
859
		loadcig(arena);
860
	if(arena->cig == nil || arena->ncig == 0){
861
		qunlock(&arena->lock);
862
		return -1;
863
	}
864
 
865
	l = 1;
866
	r = arena->ncig - 1;
867
	while(l <= r){
868
		m = (r + l) / 2;
869
		if(arena->cig[m].offset <= addr)
870
			l = m + 1;
871
		else
872
			r = m - 1;
873
	}
874
	l--;
875
 
876
	*g = l;
877
	*gstart = arena->cig[l].offset;
878
	if(l+1 < arena->ncig)
879
		*glimit = arena->cig[l+1].offset;
880
	else
881
		*glimit = arena->memstats.used;
882
	qunlock(&arena->lock);
883
	return 0;
884
}
885
 
886
/*
887
 * load the clump info for group g into the index entries.
888
 */
889
int
890
asumload(Arena *arena, int g, IEntry *entries, int nentries)
891
{
892
	int i, base, limit;
893
	u64int addr;
894
	ClumpInfo ci;
895
	IEntry *ie;
896
 
897
	if(nentries < ArenaCIGSize){
898
		fprint(2, "asking for too few entries\n");
899
		return -1;
900
	}
901
 
902
	qlock(&arena->lock);
903
	if(arena->cig == nil)
904
		loadcig(arena);
905
	if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
906
		qunlock(&arena->lock);
907
		return -1;
908
	}
909
 
910
	addr = 0;
911
	base = g*ArenaCIGSize;
912
	limit = base + ArenaCIGSize;
913
	if(base > arena->memstats.clumps)
914
		base = arena->memstats.clumps;
915
	ie = entries;
916
	for(i=base; i<limit; i++){
917
		if(readclumpinfo(arena, i, &ci) < 0)
918
			break;
919
		if(ci.type != VtCorruptType){
920
			scorecp(ie->score, ci.score);
921
			ie->ia.type = ci.type;
922
			ie->ia.size = ci.uncsize;
923
			ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
924
			ie->ia.addr = addr;
925
			ie++;
926
		}
927
		addr += ClumpSize + ci.size;
928
	}
929
	qunlock(&arena->lock);
930
	return ie - entries;
931
}