Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * File system devices.
3
 * Follows device config in Ken's file server.
4
 * Builds mirrors, concatenations, interleavings, and partitions
5
 * of devices out of other (inner) devices.
6
 * It is ok if inner devices are provided by this driver.
7
 *
8
 * Built files are grouped on different directories
9
 * (called trees, and used to represent disks).
10
 * The "#k/fs" tree is always available and never goes away.
11
 * Configuration changes happen only while no I/O is in progress.
12
 *
13
 * Default sector size is one byte unless changed by the "disk" ctl.
14
 */
15
 
16
#include "u.h"
17
#include "../port/lib.h"
18
#include "mem.h"
19
#include "dat.h"
20
#include "fns.h"
21
#include "io.h"
22
#include "ureg.h"
23
#include "../port/error.h"
24
 
25
enum
26
{
27
	Fnone,
28
	Fmirror,		/* mirror of others */
29
	Fcat,			/* catenation of others */
30
	Finter,			/* interleaving of others */
31
	Fpart,			/* part of other */
32
	Fclear,			/* start over */
33
	Fdel,			/* delete a configure device */
34
	Fdisk,			/* set default tree and sector sz*/
35
 
36
	Sectorsz = 1,
37
	Blksize	= 8*1024,	/* for Finter only */
38
 
39
	Incr = 5,		/* Increments for the dev array */
40
 
41
	/*
42
	 * All qids are decorated with the tree number.
43
	 * #k/fs is tree number 0, is automatically added and
44
	 * its first qid is for the ctl file. It never goes away.
45
	 */
46
	Qtop	= 0,		/* #k */
47
	Qdir,			/* directory (#k/fs) */
48
	Qctl,			/* ctl, only for #k/fs/ctl */
49
	Qfirst,			/* first qid assigned for device */
50
 
51
	Iswrite = 0,
52
	Isread,
53
 
54
	Optional = 0,
55
	Mustexist,
56
 
57
	/* tunable parameters */
58
	Maxconf	= 4*1024,	/* max length for config */
59
	Ndevs	= 32,		/* max. inner devs per command */
60
	Ntrees	= 128,		/* max. number of trees */
61
	Maxretries = 3,		/* max. retries of i/o errors */
62
	Retrypause = 5000,	/* ms. to pause between retries */
63
};
64
 
65
typedef struct Inner Inner;
66
typedef struct Fsdev Fsdev;
67
typedef struct Tree Tree;
68
 
69
struct Inner
70
{
71
	char	*iname;		/* inner device name */
72
	vlong	isize;		/* size of inner device */
73
	Chan	*idev;		/* inner device */
74
};
75
 
76
struct Fsdev
77
{
78
	Ref;			/* one per Chan doing I/O */
79
	int	gone;		/* true if removed */
80
	int	vers;		/* qid version for this device */
81
	int	type;		/* Fnone, Fmirror, ... */
82
	char	*name;		/* name for this fsdev */
83
	Tree*	tree;		/* where the device is kept */
84
	vlong	size;		/* min(inner[X].isize) */
85
	vlong	start;		/* start address (for Fpart) */
86
	uint	ndevs;		/* number of inner devices */
87
	int	perm;		/* minimum of inner device perms */
88
	Inner	*inner[Ndevs];	/* inner devices */
89
};
90
 
91
struct Tree
92
{
93
	char	*name;		/* name for #k/<name> */
94
	Fsdev	**devs;		/* devices in dir. */
95
	uint	ndevs;		/* number of devices */
96
	uint	nadevs;		/* number of allocated devices in devs */
97
};
98
 
99
#define dprint if(debug)print
100
 
101
extern Dev fsdevtab;		/* forward */
102
 
103
static RWlock lck;		/* r: use devices; w: change config  */
104
static Tree fstree;		/* The main "fs" tree. Never goes away */
105
static Tree *trees[Ntrees];	/* internal representation of config */
106
static int ntrees;		/* max number of trees */
107
static int qidvers;
108
 
109
static char *disk;		/* default tree name used */
110
static char *source;		/* default inner device used */
111
static int sectorsz = Sectorsz;	/* default sector size */
112
 
113
static char confstr[Maxconf];	/* textual configuration */
114
 
115
static int debug;
116
 
117
static char cfgstr[] = "fsdev:\n";
118
 
119
static Qid tqid = {Qtop, 0, QTDIR};
120
static Qid cqid = {Qctl, 0, 0};
121
 
122
static char* tnames[] = {
123
	[Fmirror]	"mirror",
124
	[Fcat]		"cat",
125
	[Finter]	"inter",
126
	[Fpart]		"part",
127
};
128
 
129
static Cmdtab configs[] = {
130
	Fmirror,"mirror",	0,
131
	Fcat,	"cat",		0,
132
	Finter,	"inter",	0,
133
	Fpart,	"part",		0,
134
	Fclear,	"clear",	1,
135
	Fdel,	"del",		2,
136
	Fdisk,	"disk",		0,
137
};
138
 
139
static char Egone[] = "device is gone";		/* file has been removed */
140
 
141
static char*
142
seprintdev(char *s, char *e, Fsdev *mp)
143
{
144
	int i;
145
 
146
	if(mp == nil)
147
		return seprint(s, e, "<null Fsdev>");
148
	if(mp->type < 0 || mp->type >= nelem(tnames) || tnames[mp->type] == nil)
149
		return seprint(s, e, "bad device type %d\n", mp->type);
150
 
151
	s = strecpy(s, e, tnames[mp->type]);
152
	if(mp->tree != &fstree)
153
		s = seprint(s, e, " %s/%s", mp->tree->name, mp->name);
154
	else
155
		s = seprint(s, e, " %s", mp->name);
156
	for(i = 0; i < mp->ndevs; i++)
157
		s = seprint(s, e, " %s", mp->inner[i]->iname);
158
	switch(mp->type){
159
	case Fmirror:
160
	case Fcat:
161
	case Finter:
162
		s = strecpy(s, e, "\n");
163
		break;
164
	case Fpart:
165
		s = seprint(s, e, " %ulld %ulld\n", mp->start, mp->size);
166
		break;
167
	default:
168
		panic("#k: seprintdev bug");
169
	}
170
	return s;
171
}
172
 
173
static vlong
174
mkpath(int tree, int devno)
175
{
176
	return (tree&0xFFFF)<<16 | devno&0xFFFF;
177
}
178
 
179
static int
180
path2treeno(int q)
181
{
182
	return q>>16 & 0xFFFF;
183
}
184
 
185
static int
186
path2devno(int q)
187
{
188
	return q & 0xFFFF;
189
}
190
 
191
static Tree*
192
gettree(int i, int mustexist)
193
{
194
	dprint("gettree %d\n", i);
195
	if(i < 0)
196
		panic("#k: bug: bad tree index %d in gettree", i);
197
	if(i >= ntrees || trees[i] == nil)
198
		if(mustexist)
199
			error(Enonexist);
200
		else
201
			return nil;
202
	return trees[i];
203
}
204
 
205
static Fsdev*
206
getdev(Tree *t, int i, int mustexist)
207
{
208
	dprint("getdev %d\n", i);
209
	if(i < 0)
210
		panic("#k: bug: bad dev index %d in getdev", i);
211
	if(i >= t->nadevs || t->devs[i] == nil)
212
		if(mustexist)
213
			error(Enonexist);
214
		else
215
			return nil;
216
	return t->devs[i];
217
}
218
 
219
static Fsdev*
220
path2dev(int q)
221
{
222
	Tree	*t;
223
 
224
	dprint("path2dev %ux\n", q);
225
	t = gettree(path2treeno(q), Mustexist);
226
	return getdev(t, path2devno(q) - Qfirst, Mustexist);
227
}
228
 
229
static Tree*
230
treealloc(char *name)
231
{
232
	int	i;
233
	Tree	*t;
234
 
235
	dprint("treealloc %s\n", name);
236
	for(i = 0; i < nelem(trees); i++)
237
		if(trees[i] == nil)
238
			break;
239
	if(i == nelem(trees))
240
		return nil;
241
	t = trees[i] = mallocz(sizeof(Tree), 1);
242
	if(t == nil)
243
		return nil;
244
	if(i == ntrees)
245
		ntrees++;
246
	kstrdup(&t->name, name);
247
	return t;
248
}
249
 
250
static Tree*
251
lookuptree(char *name)
252
{
253
	int i;
254
 
255
	dprint("lookuptree %s\n", name);
256
	for(i = 0; i < ntrees; i++)
257
		if(trees[i] != nil && strcmp(trees[i]->name, name) == 0)
258
			return trees[i];
259
	return nil;
260
}
261
 
262
static Fsdev*
263
devalloc(Tree *t, char *name)
264
{
265
	int	i, ndevs;
266
	Fsdev	*mp, **devs;
267
 
268
	dprint("devalloc %s %s\n", t->name, name);
269
	mp = mallocz(sizeof(Fsdev), 1);
270
	if(mp == nil)
271
		return nil;
272
	for(i = 0; i < t->nadevs; i++)
273
		if(t->devs[i] == nil)
274
			break;
275
	if(i >= t->nadevs){
276
		if(t->nadevs % Incr == 0){
277
			ndevs = t->nadevs + Incr;
278
			devs = realloc(t->devs, ndevs * sizeof(Fsdev*));
279
			if(devs == nil){
280
				free(mp);
281
				return nil;
282
			}
283
			t->devs = devs;
284
		}
285
		t->devs[t->nadevs] = nil;
286
		t->nadevs++;
287
	}
288
	kstrdup(&mp->name, name);
289
	mp->vers = ++qidvers;
290
	mp->tree = t;
291
	t->devs[i] = mp;
292
	t->ndevs++;
293
	return mp;
294
}
295
 
296
static void
297
deltree(Tree *t)
298
{
299
	int i;
300
 
301
	dprint("deltree %s\n", t->name);
302
	for(i = 0; i < ntrees; i++)
303
		if(trees[i] == t){
304
			if(i > 0){		/* "fs" never goes away */
305
				free(t->name);
306
				free(t->devs);
307
				free(t);
308
				trees[i] = nil;
309
			}
310
			return;
311
		}
312
	panic("#k: deltree: bug: tree not found");
313
}
314
 
315
/*
316
 * A device is gone and we know that all its users are gone.
317
 * A tree is gone when all its devices are gone ("fs" is never gone).
318
 * Must close devices outside locks, so we could nest our own devices.
319
 */
320
static void
321
mdeldev(Fsdev *mp)
322
{
323
	int	i;
324
	Inner	*in;
325
	Tree	*t;
326
 
327
	dprint("deldev %s gone %d ref %uld\n", mp->name, mp->gone, mp->ref);
328
 
329
	mp->gone = 1;
330
	mp->vers = ++qidvers;
331
 
332
	wlock(&lck);
333
	t = mp->tree;
334
	for(i = 0; i < t->nadevs; i++)
335
		if(t->devs[i] == mp){
336
			t->devs[i] = nil;
337
			t->ndevs--;
338
			if(t->ndevs == 0)
339
				deltree(t);
340
			break;
341
		}
342
	wunlock(&lck);
343
 
344
	free(mp->name);
345
	for(i = 0; i < mp->ndevs; i++){
346
		in = mp->inner[i];
347
		if(in->idev != nil)
348
			cclose(in->idev);
349
		free(in->iname);
350
		free(in);
351
	}
352
	if(debug)
353
		memset(mp, 9, sizeof *mp);	/* poison */
354
	free(mp);
355
}
356
 
357
/*
358
 * Delete one or all devices in one or all trees.
359
 */
360
static void
361
mdelctl(char *tname, char *dname)
362
{
363
	int i, alldevs, alltrees, some;
364
	Fsdev *mp;
365
	Tree *t;
366
 
367
	dprint("delctl %s\n", dname);
368
	alldevs = strcmp(dname, "*") == 0;
369
	alltrees = strcmp(tname, "*") == 0;
370
	some = 0;
371
Again:
372
	wlock(&lck);
373
	for(i = 0; i < ntrees; i++){
374
		t = trees[i];
375
		if(t == nil)
376
			continue;
377
		if(alltrees == 0 && strcmp(t->name, tname) != 0)
378
			continue;
379
		for(i = 0; i < t->nadevs; i++){
380
			mp = t->devs[i];
381
			if(t->devs[i] == nil)
382
				continue;
383
			if(alldevs == 0 && strcmp(mp->name, dname) != 0)
384
				continue;
385
			/*
386
			 * Careful: must close outside locks and that
387
			 * may change the file tree we are looking at.
388
			 */
389
			some++;
390
			mp->gone = 1;
391
			if(mp->ref == 0){
392
				incref(mp);	/* keep it there */
393
				wunlock(&lck);
394
				mdeldev(mp);
395
				goto Again;	/* tree can change */
396
			}
397
		}
398
	}
399
	wunlock(&lck);
400
	if(some == 0 && alltrees == 0)
401
		error(Enonexist);
402
}
403
 
404
static void
405
setdsize(Fsdev* mp, vlong *ilen)
406
{
407
	int	i;
408
	vlong	inlen;
409
	Inner	*in;
410
 
411
	dprint("setdsize %s\n", mp->name);
412
	for (i = 0; i < mp->ndevs; i++){
413
		in = mp->inner[i];
414
		in->isize = ilen[i];
415
		inlen = in->isize;
416
		switch(mp->type){
417
		case Finter:
418
			/* truncate to multiple of Blksize */
419
			inlen &= ~(Blksize-1);
420
			in->isize = inlen;
421
			/* fall through */
422
		case Fmirror:
423
			/* use size of smallest inner device */
424
			if (mp->size == 0 || mp->size > inlen)
425
				mp->size = inlen;
426
			break;
427
		case Fcat:
428
			mp->size += inlen;
429
			break;
430
		case Fpart:
431
			if(mp->start > inlen)
432
				error("partition starts after device end");
433
			if(inlen < mp->start + mp->size){
434
				print("#k: %s: partition truncated from "
435
					"%lld to %lld bytes\n", mp->name,
436
					mp->size, inlen - mp->start);
437
				mp->size = inlen - mp->start;
438
			}
439
			break;
440
		}
441
	}
442
	if(mp->type == Finter)
443
		mp->size *= mp->ndevs;
444
}
445
 
446
static void
447
validdevname(Tree *t, char *dname)
448
{
449
	int i;
450
 
451
	for(i = 0; i < t->nadevs; i++)
452
		if(t->devs[i] != nil && strcmp(t->devs[i]->name, dname) == 0)
453
			error(Eexist);
454
}
455
 
456
static void
457
parseconfig(char *a, long n, Cmdbuf **cbp, Cmdtab **ctp)
458
{
459
	Cmdbuf	*cb;
460
	Cmdtab	*ct;
461
 
462
	*cbp = cb = parsecmd(a, n);
463
	*ctp = ct = lookupcmd(cb, configs, nelem(configs));
464
 
465
	cb->f++;			/* skip command */
466
	cb->nf--;
467
	switch(ct->index){
468
	case Fmirror:
469
	case Fcat:
470
	case Finter:
471
		if(cb->nf < 2)
472
			error("too few arguments for ctl");
473
		if(cb->nf - 1 > Ndevs)
474
			error("too many devices in ctl");
475
		break;
476
	case Fdisk:
477
		if(cb->nf < 1 || cb->nf > 3)
478
			error("ctl usage: disk name [sz dev]");
479
		break;
480
	case Fpart:
481
		if(cb->nf != 4 && (cb->nf != 3 || source == nil))
482
			error("ctl usage: part new [file] off len");
483
		break;
484
	}
485
}
486
 
487
static void
488
parsename(char *name, char *disk, char **tree, char **dev)
489
{
490
	char *slash;
491
 
492
	slash = strchr(name, '/');
493
	if(slash == nil){
494
		if(disk != nil)
495
			*tree = disk;
496
		else
497
			*tree = "fs";
498
		*dev = name;
499
	}else{
500
		*tree = name;
501
		*slash++ = 0;
502
		*dev = slash;
503
	}
504
	validname(*tree, 0);
505
	validname(*dev, 0);
506
}
507
 
508
static int
509
getattrs(Chan *c, vlong *lenp, int *permp)
510
{
511
	uchar	buf[128];	/* old DIRLEN plus a little should be plenty */
512
	Dir	d;
513
	long	l;
514
 
515
	*lenp = 0;
516
	*permp = 0;
517
	l = devtab[c->type]->stat(c, buf, sizeof buf);
518
	if (l >= 0 && convM2D(buf, l, &d, nil) > 0) {
519
		*lenp = d.length;
520
		*permp = d.mode & 0777;
521
	}
522
	return l;
523
}
524
 
525
/*
526
 * Process a single line of configuration,
527
 * often of the form "cmd newname idev0 idev1".
528
 * locking is tricky, because we need a write lock to
529
 * add/remove devices yet adding/removing them may lead
530
 * to calls to this driver that require a read lock (when
531
 * inner devices are also provided by us).
532
 */
533
static void
534
mconfig(char* a, long n)
535
{
536
	int	i;
537
	int	*iperm;
538
	vlong	size, start;
539
	vlong	*ilen;
540
	char	*tname, *dname, *fakef[4];
541
	Chan	**idev;
542
	Cmdbuf	*cb;
543
	Cmdtab	*ct;
544
	Fsdev	*mp;
545
	Inner	*inprv;
546
	Tree	*t;
547
 
548
	/* ignore comments & empty lines */
549
	if (*a == '\0' || *a == '#' || *a == '\n')
550
		return;
551
 
552
	dprint("mconfig\n");
553
	size = 0;
554
	start = 0;
555
	mp = nil;
556
	cb = nil;
557
	idev = nil;
558
	ilen = nil;
559
	iperm = nil;
560
 
561
	if(waserror()){
562
		free(cb);
563
		nexterror();
564
	}
565
 
566
	parseconfig(a, n, &cb, &ct);
567
	switch (ct->index) {
568
	case Fdisk:
569
		kstrdup(&disk, cb->f[0]);
570
		if(cb->nf >= 2)
571
			sectorsz = strtoul(cb->f[1], 0, 0);
572
		else
573
			sectorsz = Sectorsz;
574
		if(cb->nf == 3)
575
			kstrdup(&source, cb->f[2]);
576
		else{
577
			free(source);
578
			source = nil;
579
		}
580
		poperror();
581
		free(cb);
582
		return;
583
	case Fclear:
584
		poperror();
585
		free(cb);
586
		mdelctl("*", "*");		/* del everything */
587
		return;
588
	case Fpart:
589
		if(cb->nf == 3){
590
			/*
591
			 * got a request in the format of sd(3),
592
			 * pretend we got one in our format.
593
			 * later we change end to be len.
594
			 */
595
			fakef[0] = cb->f[0];
596
			fakef[1] = source;
597
			fakef[2] = cb->f[1];
598
			fakef[3] = cb->f[2];
599
			cb->f = fakef;
600
			cb->nf = 4;
601
		}
602
		start = strtoll(cb->f[2], nil, 10);
603
		size =  strtoll(cb->f[3], nil, 10);
604
		if(cb->f == fakef)
605
			size -= start;		/* it was end */
606
		cb->nf -= 2;
607
		break;
608
	}
609
	parsename(cb->f[0], disk, &tname, &dname);
610
	for(i = 1; i < cb->nf; i++)
611
		validname(cb->f[i], 1);
612
 
613
	if(ct->index == Fdel){
614
		mdelctl(tname, dname);
615
		poperror();
616
		free(cb);
617
		return;
618
	}
619
 
620
	/*
621
	 * Open all inner devices while we have only a read lock.
622
	 */
623
	poperror();
624
	rlock(&lck);
625
	if(waserror()){
626
		runlock(&lck);
627
Fail:
628
		for(i = 1; i < cb->nf; i++)
629
			if(idev != nil && idev[i-1] != nil)
630
				cclose(idev[i]);
631
		if(mp != nil)
632
			mdeldev(mp);
633
		free(idev);
634
		free(ilen);
635
		free(iperm);
636
		free(cb);
637
		nexterror();
638
	}
639
	/* record names, lengths and perms of all named files */
640
	idev = smalloc(sizeof(Chan*) * Ndevs);
641
	ilen = smalloc(sizeof(vlong) * Ndevs);
642
	iperm = smalloc(sizeof(int) * Ndevs);
643
	for(i = 1; i < cb->nf; i++){
644
		idev[i-1] = namec(cb->f[i], Aopen, ORDWR, 0);
645
		getattrs(idev[i-1], &ilen[i-1], &iperm[i-1]);
646
	}
647
	poperror();
648
	runlock(&lck);
649
 
650
	/*
651
	 * Get a write lock and add the device if we can.
652
	 */
653
	wlock(&lck);
654
	if(waserror()){
655
		wunlock(&lck);
656
		goto Fail;
657
	}
658
 
659
	t = lookuptree(tname);
660
	if(t != nil)
661
		validdevname(t, dname);
662
	else
663
		t = treealloc(tname);
664
	if(t == nil)
665
		error("no more trees");
666
	mp = devalloc(t, dname);
667
	if(mp == nil){
668
		if(t->ndevs == 0)	/* it was created for us */
669
			deltree(t);	/* but we will not mdeldev() */
670
		error(Enomem);
671
	}
672
 
673
	/* construct mp from iname, idev and iperm arrays */
674
	mp->type = ct->index;
675
	if(mp->type == Fpart){
676
		mp->start = start * sectorsz;
677
		mp->size = size * sectorsz;
678
	}
679
	mp->perm = 0666;
680
	for(i = 1; i < cb->nf; i++){
681
		inprv = mp->inner[i-1] = mallocz(sizeof(Inner), 1);
682
		if(inprv == nil)
683
			error(Enomem);
684
		mp->ndevs++;
685
		kstrdup(&inprv->iname, cb->f[i]);
686
		inprv->idev = idev[i-1];
687
		idev[i-1] = nil;
688
		/* use the most restrictive of the inner permissions */
689
		mp->perm &= iperm[i-1];
690
	}
691
	setdsize(mp, ilen);
692
 
693
	poperror();
694
	wunlock(&lck);
695
	free(idev);
696
	free(ilen);
697
	free(iperm);
698
	free(cb);
699
}
700
 
701
static void
702
rdconf(void)
703
{
704
	int mustrd;
705
	char *c, *e, *p, *s;
706
	Chan *cc;
707
	static int configed;
708
 
709
	/* only read config file once */
710
	if (configed)
711
		return;
712
	configed = 1;
713
 
714
	dprint("rdconf\n");
715
	/* add the std "fs" tree */
716
	trees[0] = &fstree;
717
	ntrees++;
718
	fstree.name = "fs";
719
 
720
	/* identify the config file */
721
	s = getconf("fsconfig");
722
	if (s == nil){
723
		mustrd = 0;
724
		s = "/dev/sdC0/fscfg";
725
	} else
726
		mustrd = 1;
727
 
728
	/* read it */
729
	cc = nil;
730
	c = nil;
731
	if (waserror()){
732
		if (cc != nil)
733
			cclose(cc);
734
		if (c)
735
			free(c);
736
		if (!mustrd)
737
			return;
738
		nexterror();
739
	}
740
	cc = namec(s, Aopen, OREAD, 0);
741
	devtab[cc->type]->read(cc, confstr, sizeof confstr, 0);
742
	cclose(cc);
743
	cc = nil;
744
 
745
	/* validate, copy and erase config; mconfig will repopulate confstr */
746
	if (strncmp(confstr, cfgstr, sizeof cfgstr - 1) != 0)
747
		error("bad #k config, first line must be: 'fsdev:\\n'");
748
	kstrdup(&c, confstr + sizeof cfgstr - 1);
749
	memset(confstr, 0, sizeof confstr);
750
 
751
	/* process config copy one line at a time */
752
	for (p = c; p != nil && *p != '\0'; p = e){
753
		e = strchr(p, '\n');
754
		if (e == nil)
755
			e = p + strlen(p);
756
		else
757
			e++;
758
		mconfig(p, e - p);
759
	}
760
	USED(cc);		/* until now, can be used in waserror clause */
761
	poperror();
762
}
763
 
764
static int
765
mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
766
{
767
	int	treeno;
768
	Fsdev	*mp;
769
	Qid	qid;
770
	Tree	*t;
771
 
772
	dprint("mgen %#ullx %d\n", c->qid.path, i);
773
	qid.type = QTDIR;
774
	qid.vers = 0;
775
	if(c->qid.path == Qtop){
776
		if(i == DEVDOTDOT){
777
			devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
778
			return 1;
779
		}
780
		t = gettree(i, Optional);
781
		if(t == nil){
782
			dprint("no\n");
783
			return -1;
784
		}
785
		qid.path = mkpath(i, Qdir);
786
		devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp);
787
		return 1;
788
	}
789
 
790
	treeno = path2treeno(c->qid.path);
791
	t = gettree(treeno, Optional);
792
	if(t == nil){
793
		dprint("no\n");
794
		return -1;
795
	}
796
	if((c->qid.type & QTDIR) != 0){
797
		if(i == DEVDOTDOT){
798
			devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp);
799
			return 1;
800
		}
801
		if(treeno == 0){
802
			/* take care of #k/fs/ctl */
803
			if(i == 0){
804
				devdir(c, cqid, "ctl", 0, eve, 0664, dp);
805
				return 1;
806
			}
807
			i--;
808
		}
809
		mp = getdev(t, i, Optional);
810
		if(mp == nil){
811
			dprint("no\n");
812
			return -1;
813
		}
814
		qid.type = QTFILE;
815
		qid.vers = mp->vers;
816
		qid.path = mkpath(treeno, Qfirst+i);
817
		devdir(c, qid, mp->name, mp->size, eve, mp->perm, dp);
818
		return 1;
819
	}
820
 
821
	if(i == DEVDOTDOT){
822
		qid.path = mkpath(treeno, Qdir);
823
		devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp);
824
		return 1;
825
	}
826
	dprint("no\n");
827
	return -1;
828
}
829
 
830
static Chan*
831
mattach(char *spec)
832
{
833
	dprint("mattach\n");
834
	return devattach(fsdevtab.dc, spec);
835
}
836
 
837
static Walkqid*
838
mwalk(Chan *c, Chan *nc, char **name, int nname)
839
{
840
	Walkqid *wq;
841
 
842
	rdconf();
843
 
844
	dprint("mwalk %llux\n", c->qid.path);
845
	rlock(&lck);
846
	if(waserror()){
847
		runlock(&lck);
848
		nexterror();
849
	}
850
	wq = devwalk(c, nc, name, nname, 0, 0, mgen);
851
	poperror();
852
	runlock(&lck);
853
	return wq;
854
}
855
 
856
static int
857
mstat(Chan *c, uchar *db, int n)
858
{
859
	int	p;
860
	Dir	d;
861
	Fsdev	*mp;
862
	Qid	q;
863
	Tree	*t;
864
 
865
	dprint("mstat %llux\n", c->qid.path);
866
	rlock(&lck);
867
	if(waserror()){
868
		runlock(&lck);
869
		nexterror();
870
	}
871
	p = c->qid.path;
872
	memset(&d, 0, sizeof d);
873
	switch(p){
874
	case Qtop:
875
		devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d);
876
		break;
877
	case Qctl:
878
		devdir(c, cqid, "ctl", 0, eve, 0664, &d);
879
		break;
880
	default:
881
		t = gettree(path2treeno(p), Mustexist);
882
		if(c->qid.type & QTDIR)
883
			devdir(c, c->qid, t->name, 0, eve, DMDIR|0775, &d);
884
		else{
885
			mp = getdev(t, path2devno(p) - Qfirst, Mustexist);
886
			q = c->qid;
887
			q.vers = mp->vers;
888
			devdir(c, q, mp->name, mp->size, eve, mp->perm, &d);
889
		}
890
	}
891
	n = convD2M(&d, db, n);
892
	if (n == 0)
893
		error(Ebadarg);
894
	poperror();
895
	runlock(&lck);
896
	return n;
897
}
898
 
899
static Chan*
900
mopen(Chan *c, int omode)
901
{
902
	int	q;
903
	Fsdev	*mp;
904
 
905
	dprint("mopen %llux\n", c->qid.path);
906
	if((c->qid.type & QTDIR) && omode != OREAD)
907
		error(Eperm);
908
	if(c->qid.path != Qctl && (c->qid.type&QTDIR) == 0){
909
		rlock(&lck);
910
		if(waserror()){
911
			runlock(&lck);
912
			nexterror();
913
		}
914
		q = c->qid.path;
915
		mp = path2dev(q);
916
		if(mp->gone)
917
			error(Egone);
918
		devpermcheck(eve, mp->perm, omode);
919
		incref(mp);
920
		poperror();
921
		runlock(&lck);
922
	}
923
	/*
924
	 * Our mgen does not return the info for the qid
925
	 * but only for its children. Don't use devopen here.
926
	 */
927
	c->offset = 0;
928
	c->mode = openmode(omode & ~OTRUNC);
929
	c->flag |= COPEN;
930
	return c;
931
}
932
 
933
static void
934
mclose(Chan *c)
935
{
936
	int	mustdel, q;
937
	Fsdev	*mp;
938
 
939
	dprint("mclose %llux\n", c->qid.path);
940
	if(c->qid.type & QTDIR || !(c->flag & COPEN))
941
		return;
942
	rlock(&lck);
943
	if(waserror()){
944
		runlock(&lck);
945
		nexterror();
946
	}
947
	mustdel = 0;
948
	mp = nil;
949
	q = c->qid.path;
950
	if(q == Qctl){
951
		free(disk);
952
		disk = nil;	/* restore defaults */
953
		free(source);
954
		source = nil;
955
		sectorsz = Sectorsz;
956
	}else{
957
		mp = path2dev(q);
958
		if(mp->gone != 0 && mp->ref == 1)
959
			mustdel = 1;
960
		else
961
			decref(mp);
962
	}
963
	poperror();
964
	runlock(&lck);
965
	if(mustdel)
966
		mdeldev(mp);
967
}
968
 
969
static long
970
io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off)
971
{
972
	long wl;
973
	Chan	*mc;
974
 
975
	mc = in->idev;
976
	if(mc == nil)
977
		error(Egone);
978
	if (waserror()) {
979
		print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n",
980
			in->iname, off, l, mp->name, (isread? "read": "write"),
981
			(up && up->errstr? up->errstr: ""));
982
		nexterror();
983
	}
984
	if (isread)
985
		wl = devtab[mc->type]->read(mc, a, l, off);
986
	else
987
		wl = devtab[mc->type]->write(mc, a, l, off);
988
	poperror();
989
	return wl;
990
}
991
 
992
/* NB: a transfer could span multiple inner devices */
993
static long
994
catio(Fsdev *mp, int isread, void *a, long n, vlong off)
995
{
996
	int	i;
997
	long	l, res;
998
	Inner	*in;
999
 
1000
	if(debug)
1001
		print("catio %d %p %ld %lld\n", isread, a, n, off);
1002
	res = n;
1003
	for (i = 0; n > 0 && i < mp->ndevs; i++){
1004
		in = mp->inner[i];
1005
		if (off >= in->isize){
1006
			off -= in->isize;
1007
			continue;		/* not there yet */
1008
		}
1009
		if (off + n > in->isize)
1010
			l = in->isize - off;
1011
		else
1012
			l = n;
1013
		if(debug)
1014
			print("\tdev %d %p %ld %lld\n", i, a, l, off);
1015
 
1016
		if (io(mp, in, isread, a, l, off) != l)
1017
			error(Eio);
1018
 
1019
		a = (char*)a + l;
1020
		off = 0;
1021
		n -= l;
1022
	}
1023
	if(debug)
1024
		print("\tres %ld\n", res - n);
1025
	return res - n;
1026
}
1027
 
1028
static long
1029
interio(Fsdev *mp, int isread, void *a, long n, vlong off)
1030
{
1031
	int	i;
1032
	long	boff, res, l, wl, wsz;
1033
	vlong	woff, blk, mblk;
1034
 
1035
	blk  = off / Blksize;
1036
	boff = off % Blksize;
1037
	wsz  = Blksize - boff;
1038
	res = n;
1039
	while(n > 0){
1040
		mblk = blk / mp->ndevs;
1041
		i    = blk % mp->ndevs;
1042
		woff = mblk*Blksize + boff;
1043
		if (n > wsz)
1044
			l = wsz;
1045
		else
1046
			l = n;
1047
 
1048
		wl = io(mp, mp->inner[i], isread, a, l, woff);
1049
		if (wl != l)
1050
			error(Eio);
1051
 
1052
		blk++;
1053
		boff = 0;
1054
		wsz = Blksize;
1055
		a = (char*)a + l;
1056
		n -= l;
1057
	}
1058
	return res;
1059
}
1060
 
1061
static char*
1062
seprintconf(char *s, char *e)
1063
{
1064
	int	i, j;
1065
	Tree	*t;
1066
 
1067
	*s = 0;
1068
	for(i = 0; i < ntrees; i++){
1069
		t = trees[i];
1070
		if(t != nil)
1071
			for(j = 0; j < t->nadevs; j++)
1072
				if(t->devs[j] != nil)
1073
					s = seprintdev(s, e, t->devs[j]);
1074
	}
1075
	return s;
1076
}
1077
 
1078
static long
1079
mread(Chan *c, void *a, long n, vlong off)
1080
{
1081
	int	i, retry;
1082
	long	l, res;
1083
	Fsdev	*mp;
1084
	Tree	*t;
1085
 
1086
	dprint("mread %llux\n", c->qid.path);
1087
	rlock(&lck);
1088
	if(waserror()){
1089
		runlock(&lck);
1090
		nexterror();
1091
	}
1092
	res = -1;
1093
	if(c->qid.type & QTDIR){
1094
		res = devdirread(c, a, n, 0, 0, mgen);
1095
		goto Done;
1096
	}
1097
	if(c->qid.path == Qctl){
1098
		seprintconf(confstr, confstr + sizeof(confstr));
1099
		res = readstr((long)off, a, n, confstr);
1100
		goto Done;
1101
	}
1102
 
1103
	t = gettree(path2treeno(c->qid.path), Mustexist);
1104
	mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
1105
 
1106
	if(off >= mp->size){
1107
		res = 0;
1108
		goto Done;
1109
	}
1110
	if(off + n > mp->size)
1111
		n = mp->size - off;
1112
	if(n == 0){
1113
		res = 0;
1114
		goto Done;
1115
	}
1116
 
1117
	switch(mp->type){
1118
	case Fcat:
1119
		res = catio(mp, Isread, a, n, off);
1120
		break;
1121
	case Finter:
1122
		res = interio(mp, Isread, a, n, off);
1123
		break;
1124
	case Fpart:
1125
		res = io(mp, mp->inner[0], Isread, a, n, mp->start + off);
1126
		break;
1127
	case Fmirror:
1128
		retry = 0;
1129
		do {
1130
			if (retry > 0) {
1131
				print("#k/%s: retry %d read for byte %,lld "
1132
					"count %ld: %s\n", mp->name, retry, off,
1133
					n, (up && up->errstr? up->errstr: ""));
1134
				/*
1135
				 * pause before retrying in case it's due to
1136
				 * a transient bus or controller problem.
1137
				 */
1138
				tsleep(&up->sleep, return0, 0, Retrypause);
1139
			}
1140
			for (i = 0; i < mp->ndevs; i++){
1141
				if (waserror())
1142
					continue;
1143
				l = io(mp, mp->inner[i], Isread, a, n, off);
1144
				poperror();
1145
				if (l >= 0){
1146
					res = l;
1147
					break;		/* read a good copy */
1148
				}
1149
			}
1150
		} while (i == mp->ndevs && ++retry <= Maxretries);
1151
		if (retry > Maxretries) {
1152
			/* no mirror had a good copy of the block */
1153
			print("#k/%s: byte %,lld count %ld: CAN'T READ "
1154
				"from mirror: %s\n", mp->name, off, n,
1155
				(up && up->errstr? up->errstr: ""));
1156
			error(Eio);
1157
		} else if (retry > 0)
1158
			print("#k/%s: byte %,lld count %ld: retry read OK "
1159
				"from mirror: %s\n", mp->name, off, n,
1160
				(up && up->errstr? up->errstr: ""));
1161
		break;
1162
	}
1163
Done:
1164
	poperror();
1165
	runlock(&lck);
1166
	return res;
1167
}
1168
 
1169
static long
1170
mwrite(Chan *c, void *a, long n, vlong off)
1171
{
1172
	int	i, allbad, anybad, retry;
1173
	long	l, res;
1174
	Fsdev	*mp;
1175
	Tree	*t;
1176
 
1177
	dprint("mwrite %llux\n", c->qid.path);
1178
	if (c->qid.type & QTDIR)
1179
		error(Eisdir);
1180
	if (c->qid.path == Qctl){
1181
		mconfig(a, n);
1182
		return n;
1183
	}
1184
 
1185
	rlock(&lck);
1186
	if(waserror()){
1187
		runlock(&lck);
1188
		nexterror();
1189
	}
1190
 
1191
	t = gettree(path2treeno(c->qid.path), Mustexist);
1192
	mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist);
1193
 
1194
	if(off >= mp->size){
1195
		res = 0;
1196
		goto Done;
1197
	}
1198
	if(off + n > mp->size)
1199
		n = mp->size - off;
1200
	if(n == 0){
1201
		res = 0;
1202
		goto Done;
1203
	}
1204
	res = n;
1205
	switch(mp->type){
1206
	case Fcat:
1207
		res = catio(mp, Iswrite, a, n, off);
1208
		break;
1209
	case Finter:
1210
		res = interio(mp, Iswrite, a, n, off);
1211
		break;
1212
	case Fpart:
1213
		res = io(mp, mp->inner[0], Iswrite, a, n, mp->start + off);
1214
		if (res != n)
1215
			error(Eio);
1216
		break;
1217
	case Fmirror:
1218
		retry = 0;
1219
		do {
1220
			if (retry > 0) {
1221
				print("#k/%s: retry %d write for byte %,lld "
1222
					"count %ld: %s\n", mp->name, retry, off,
1223
					n, (up && up->errstr? up->errstr: ""));
1224
				/*
1225
				 * pause before retrying in case it's due to
1226
				 * a transient bus or controller problem.
1227
				 */
1228
				tsleep(&up->sleep, return0, 0, Retrypause);
1229
			}
1230
			allbad = 1;
1231
			anybad = 0;
1232
			for (i = mp->ndevs - 1; i >= 0; i--){
1233
				if (waserror()) {
1234
					anybad = 1;
1235
					continue;
1236
				}
1237
				l = io(mp, mp->inner[i], Iswrite, a, n, off);
1238
				poperror();
1239
				if (l == n)
1240
					allbad = 0;	/* wrote a good copy */
1241
				else
1242
					anybad = 1;
1243
			}
1244
		} while (anybad && ++retry <= Maxretries);
1245
		if (allbad) {
1246
			/* no mirror took a good copy of the block */
1247
			print("#k/%s: byte %,lld count %ld: CAN'T WRITE "
1248
				"to mirror: %s\n", mp->name, off, n,
1249
				(up && up->errstr? up->errstr: ""));
1250
			error(Eio);
1251
		} else if (retry > 0)
1252
			print("#k/%s: byte %,lld count %ld: retry wrote OK "
1253
				"to mirror: %s\n", mp->name, off, n,
1254
				(up && up->errstr? up->errstr: ""));
1255
 
1256
		break;
1257
	}
1258
Done:
1259
	poperror();
1260
	runlock(&lck);
1261
	return res;
1262
}
1263
 
1264
Dev fsdevtab = {
1265
	'k',
1266
	"fs",
1267
 
1268
	devreset,
1269
	devinit,
1270
	devshutdown,
1271
	mattach,
1272
	mwalk,
1273
	mstat,
1274
	mopen,
1275
	devcreate,
1276
	mclose,
1277
	mread,
1278
	devbread,
1279
	mwrite,
1280
	devbwrite,
1281
	devremove,
1282
	devwstat,
1283
	devpower,
1284
	devconfig,
1285
};