Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 *	© 2005-2010 coraid
3
 *	ATA-over-Ethernet (AoE) storage initiator
4
 */
5
 
6
#include "u.h"
7
#include "../port/lib.h"
8
#include "mem.h"
9
#include "dat.h"
10
#include "fns.h"
11
#include "io.h"
12
#include "ureg.h"
13
#include "../port/error.h"
14
#include "../port/netif.h"
15
#include "etherif.h"
16
#include "../ip/ip.h"
17
#include "../port/aoe.h"
18
 
19
#pragma	varargck argpos	eventlog	1
20
 
21
#define dprint(...)	if(debug) eventlog(__VA_ARGS__); else USED(debug);
22
#define uprint(...)	snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
23
 
24
enum {
25
	Maxunits	= 0xff,
26
	Maxframes	= 128,
27
	Ndevlink	= 6,
28
	Nea		= 6,
29
	Nnetlink	= 6,
30
};
31
 
32
#define TYPE(q)		((ulong)(q).path & 0xf)
33
#define UNIT(q)		(((ulong)(q).path>>4) & 0xff)
34
#define L(q)		(((ulong)(q).path>>12) & 0xf)
35
#define QID(u, t) 	((u)<<4 | (t))
36
#define Q3(l, u, t)	((l)<<8 | QID(u, t))
37
#define UP(d)		((d)->flag & Dup)
38
/*
39
 * would like this to depend on the chan (srb).
40
 * not possible in the current structure.
41
 */
42
#define Nofail(d, s)	((d)->flag & Dnofail)
43
 
44
#define	MS2TK(t)	((t)/MS2HZ)
45
 
46
enum {
47
	Qzero,
48
	Qtopdir		= 1,
49
	Qtopbase,
50
	Qtopctl		= Qtopbase,
51
	Qtoplog,
52
	Qtopend,
53
 
54
	Qunitdir,
55
	Qunitbase,
56
	Qctl		= Qunitbase,
57
	Qdata,
58
	Qconfig,
59
	Qident,
60
 
61
	Qdevlinkdir,
62
	Qdevlinkbase,
63
	Qdevlink	= Qdevlinkbase,
64
	Qdevlinkend,
65
 
66
	Qtopfiles	= Qtopend-Qtopbase,
67
	Qdevlinkfiles	= Qdevlinkend-Qdevlinkbase,
68
 
69
	Eventlen 	= 256,
70
	Nevents 	= 64,			/* must be power of 2 */
71
 
72
	Fread		= 0,
73
	Fwrite,
74
	Tfree		= -1,
75
	Tmgmt,
76
 
77
	/*
78
	 * round trip bounds, timeouts, in ticks.
79
	 * timeouts should be long enough that rebooting
80
	 * the coraid (which usually takes under two minutes)
81
	 * doesn't trigger a timeout.
82
	 */
83
	Rtmax		= MS2TK(320),
84
	Rtmin		= MS2TK(20),
85
	Maxreqticks	= 4*60*HZ,		/* was 45*HZ */
86
 
87
	Dbcnt		= 1024,
88
 
89
	Crd		= 0x20,
90
	Crdext		= 0x24,
91
	Cwr		= 0x30,
92
	Cwrext		= 0x34,
93
	Cid		= 0xec,
94
};
95
 
96
enum {
97
	Read,
98
	Write,
99
};
100
 
101
/*
102
 * unified set of flags
103
 * a Netlink + Aoedev most both be jumbo capable
104
 * to send jumbograms to that interface.
105
 */
106
enum {
107
	/* sync with ahci.h */
108
	Dllba 	= 1<<0,
109
	Dsmart	= 1<<1,
110
	Dpower	= 1<<2,
111
	Dnop	= 1<<3,
112
	Datapi	= 1<<4,
113
	Datapi16= 1<<5,
114
 
115
	/* aoe specific */
116
	Dup	= 1<<6,
117
	Djumbo	= 1<<7,
118
	Dnofail	= 1<<8,
119
};
120
 
121
static char *flagname[] = {
122
	"llba",
123
	"smart",
124
	"power",
125
	"nop",
126
	"atapi",
127
	"atapi16",
128
 
129
	"up",
130
	"jumbo",
131
	"nofail",
132
};
133
 
134
typedef struct {
135
	ushort	flag;
136
	uint	lostjumbo;
137
	int	datamtu;
138
 
139
	Chan	*cc;
140
	Chan	*dc;
141
	Chan	*mtu;		/* open early to prevent bind issues. */
142
	char	path[Maxpath];
143
	uchar	ea[Eaddrlen];
144
} Netlink;
145
 
146
typedef struct {
147
	Netlink	*nl;
148
	int	nea;
149
	ulong	eaidx;
150
	uchar	eatab[Nea][Eaddrlen];
151
	ulong	npkt;
152
	ulong	resent;
153
	ushort	flag;
154
 
155
	ulong	rttavg;
156
	ulong	mintimer;
157
} Devlink;
158
 
159
typedef struct Srb Srb;
160
struct Srb {
161
	Rendez;
162
	Srb	*next;
163
	int	shared;	/* Srb shared with kproc (don't free) */
164
	ulong	ticksent;
165
	ulong	len;
166
	vlong	sector;
167
	short	write;
168
	short	nout;
169
	char	*error;
170
	void	*dp;
171
	void	*data;
172
};
173
 
174
typedef struct {
175
	int	tag;
176
	ulong	bcnt;
177
	ulong	dlen;
178
	vlong	lba;
179
	ulong	ticksent;
180
	int	nhdr;
181
	uchar	hdr[ETHERMINTU];
182
	void	*dp;
183
	Devlink	*dl;
184
	Netlink	*nl;
185
	int	eaidx;
186
	Srb	*srb;
187
} Frame;
188
 
189
typedef struct Aoedev Aoedev;
190
struct Aoedev {
191
	QLock;
192
	Aoedev	*next;
193
 
194
	ulong	vers;
195
 
196
	int	ndl;
197
	ulong	dlidx;
198
	Devlink	*dl;
199
	Devlink	dltab[Ndevlink];
200
 
201
	ushort	fwver;
202
	ushort	flag;
203
	int	nopen;
204
	int	major;
205
	int	minor;
206
	int	unit;
207
	int	lasttag;
208
	int	nframes;
209
	Frame	*frames;
210
	vlong	bsize;
211
	vlong	realbsize;
212
 
213
	uint	maxbcnt;
214
	ushort	nout;
215
	ushort	maxout;
216
	ulong	lastwadj;
217
	Srb	*head;
218
	Srb	*tail;
219
	Srb	*inprocess;
220
 
221
	/* magic numbers 'R' us */
222
	char	serial[20+1];
223
	char	firmware[8+1];
224
	char	model[40+1];
225
	int	nconfig;
226
	uchar	config[1024];
227
	uchar	ident[512];
228
};
229
 
230
#pragma	varargck type	"æ"	Aoedev*
231
 
232
static struct {
233
	Lock;
234
	QLock;
235
	Rendez;
236
	char	buf[Eventlen*Nevents];
237
	char	*rp;
238
	char	*wp;
239
} events;
240
 
241
static struct {
242
	RWlock;
243
	int	nd;
244
	Aoedev	*d;
245
} devs;
246
 
247
static struct {
248
	Lock;
249
	int	reader[Nnetlink];	/* reader is running. */
250
	Rendez	rendez[Nnetlink];	/* confirm exit. */
251
	Netlink	nl[Nnetlink];
252
} netlinks;
253
 
254
extern Dev 	aoedevtab;
255
static Ref 	units;
256
static Ref	drivevers;
257
static int	debug;
258
static int	autodiscover	= 1;
259
static int	rediscover;
260
 
261
static Srb*
262
srballoc(ulong sz)
263
{
264
	Srb *srb;
265
 
266
	srb = malloc(sizeof *srb+sz);
267
	if(srb == nil)
268
		error(Enomem);
269
	srb->dp = srb->data = srb+1;
270
	srb->ticksent = MACHP(0)->ticks;
271
	srb->shared = 0;
272
	return srb;
273
}
274
 
275
static Srb*
276
srbkalloc(void *db, ulong)
277
{
278
	Srb *srb;
279
 
280
	srb = malloc(sizeof *srb);
281
	if(srb == nil)
282
		error(Enomem);
283
	srb->dp = srb->data = db;
284
	srb->ticksent = MACHP(0)->ticks;
285
	srb->shared = 0;
286
	return srb;
287
}
288
 
289
static void
290
srbfree(Srb *srb)
291
{
292
	while(srb->shared)
293
		sched();
294
	free(srb);
295
}
296
 
297
static void
298
srberror(Srb *srb, char *s)
299
{
300
	srb->error = s;
301
	srb->nout--;
302
	if(srb->nout == 0)
303
		wakeup(srb);
304
}
305
 
306
static void
307
frameerror(Aoedev *d, Frame *f, char *s)
308
{
309
	Srb *srb;
310
 
311
	srb = f->srb;
312
	if(f->tag == Tfree || !srb)
313
		return;
314
	f->srb = nil;
315
	f->tag = Tfree;		/* don't get fooled by way-slow responses */
316
	srberror(srb, s);
317
	d->nout--;
318
}
319
 
320
static char*
321
unitname(Aoedev *d)
322
{
323
	uprint("%d.%d", d->major, d->minor);
324
	return up->genbuf;
325
}
326
 
327
static int
328
eventlogready(void*)
329
{
330
	return *events.rp;
331
}
332
 
333
static long
334
eventlogread(void *a, long n)
335
{
336
	int len;
337
	char *p, *buf;
338
 
339
	buf = smalloc(Eventlen);
340
	qlock(&events);
341
	lock(&events);
342
	p = events.rp;
343
	len = *p;
344
	if(len == 0){
345
		n = 0;
346
		unlock(&events);
347
	} else {
348
		if(n > len)
349
			n = len;
350
		/* can't move directly into pageable space with events lock held */
351
		memmove(buf, p+1, n);
352
		*p = 0;
353
		events.rp = p += Eventlen;
354
		if(p >= events.buf + sizeof events.buf)
355
			events.rp = events.buf;
356
		unlock(&events);
357
 
358
		/* the concern here is page faults in memmove below */
359
		if(waserror()){
360
			free(buf);
361
			qunlock(&events);
362
			nexterror();
363
		}
364
		memmove(a, buf, n);
365
		poperror();
366
	}
367
	free(buf);
368
	qunlock(&events);
369
	return n;
370
}
371
 
372
static int
373
eventlog(char *fmt, ...)
374
{
375
	int dragrp, n;
376
	char *p;
377
	va_list arg;
378
 
379
	lock(&events);
380
	p = events.wp;
381
	dragrp = *p++;
382
	va_start(arg, fmt);
383
	n = vsnprint(p, Eventlen-1, fmt, arg);
384
	*--p = n;
385
	p = events.wp += Eventlen;
386
	if(p >= events.buf + sizeof events.buf)
387
		p = events.wp = events.buf;
388
	if(dragrp)
389
		events.rp = p;
390
	unlock(&events);
391
	wakeup(&events);
392
	return n;
393
}
394
 
395
static int
396
eventcount(void)
397
{
398
	int n;
399
 
400
	lock(&events);
401
	if(*events.rp == 0)
402
		n = 0;
403
	else
404
		n = (events.wp - events.rp) & (Nevents - 1);
405
	unlock(&events);
406
	return n/Eventlen;
407
}
408
 
409
static int
410
tsince(int tag)
411
{
412
	int n;
413
 
414
	n = MACHP(0)->ticks & 0xffff;
415
	n -= tag & 0xffff;
416
	if(n < 0)
417
		n += 1<<16;
418
	return n;
419
}
420
 
421
static int
422
newtag(Aoedev *d)
423
{
424
	int t;
425
 
426
	do {
427
		t = ++d->lasttag << 16;
428
		t |= MACHP(0)->ticks & 0xffff;
429
	} while (t == Tfree || t == Tmgmt);
430
	return t;
431
}
432
 
433
static void
434
downdev(Aoedev *d, char *err)
435
{
436
	Frame *f, *e;
437
 
438
	d->flag &= ~Dup;
439
	f = d->frames;
440
	e = f + d->nframes;
441
	for(; f < e; f->tag = Tfree, f->srb = nil, f++)
442
		frameerror(d, f, Eaoedown);
443
	d->inprocess = nil;
444
	eventlog("%æ: removed; %s\n", d, err);
445
}
446
 
447
static Block*
448
allocfb(Frame *f)
449
{
450
	int len;
451
	Block *b;
452
 
453
	len = f->nhdr + f->dlen;
454
	if(len < ETHERMINTU)
455
		len = ETHERMINTU;
456
	b = allocb(len);
457
	memmove(b->wp, f->hdr, f->nhdr);
458
	if(f->dlen)
459
		memmove(b->wp + f->nhdr, f->dp, f->dlen);
460
	b->wp += len;
461
	return b;
462
}
463
 
464
static void
465
putlba(Aoeata *a, vlong lba)
466
{
467
	uchar *c;
468
 
469
	c = a->lba;
470
	c[0] = lba;
471
	c[1] = lba >> 8;
472
	c[2] = lba >> 16;
473
	c[3] = lba >> 24;
474
	c[4] = lba >> 32;
475
	c[5] = lba >> 40;
476
}
477
 
478
static Devlink*
479
pickdevlink(Aoedev *d)
480
{
481
	ulong i, n;
482
	Devlink *l;
483
 
484
	for(i = 0; i < d->ndl; i++){
485
		n = d->dlidx++ % d->ndl;
486
		l = d->dl + n;
487
		if(l && l->flag & Dup)
488
			return l;
489
	}
490
	return 0;
491
}
492
 
493
static int
494
pickea(Devlink *l)
495
{
496
	if(l == 0)
497
		return -1;
498
	if(l->nea == 0)
499
		return -1;
500
	return l->eaidx++ % l->nea;
501
}
502
 
503
static int
504
hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
505
{
506
	int i;
507
	Devlink *l;
508
 
509
	if(f->srb && MACHP(0)->ticks - f->srb->ticksent > Maxreqticks){
510
		eventlog("%æ: srb timeout\n", d);
511
		if(cmd == ACata && f->srb && Nofail(d, s))
512
			f->srb->ticksent = MACHP(0)->ticks;
513
		else
514
			frameerror(d, f, Etimedout);
515
		return -1;
516
	}
517
	l = pickdevlink(d);
518
	i = pickea(l);
519
	if(i == -1){
520
		if(cmd != ACata || f->srb == nil || !Nofail(d, s))
521
			downdev(d, "resend fails; no netlink/ea");
522
		return -1;
523
	}
524
	memmove(h->dst, l->eatab[i], Eaddrlen);
525
	memmove(h->src, l->nl->ea, sizeof h->src);
526
	hnputs(h->type, Aoetype);
527
	h->verflag = Aoever << 4;
528
	h->error = 0;
529
	hnputs(h->major, d->major);
530
	h->minor = d->minor;
531
	h->cmd = cmd;
532
 
533
	hnputl(h->tag, f->tag = newtag(d));
534
	f->dl = l;
535
	f->nl = l->nl;
536
	f->eaidx = i;
537
	f->ticksent = MACHP(0)->ticks;
538
 
539
	return f->tag;
540
}
541
 
542
static int
543
resend(Aoedev *d, Frame *f)
544
{
545
	ulong n;
546
	Aoeata *a;
547
 
548
	a = (Aoeata*)f->hdr;
549
	if(hset(d, f, a, a->cmd) == -1)
550
		return -1;
551
	n = f->bcnt;
552
	if(n > d->maxbcnt){
553
		n = d->maxbcnt;		/* mtu mismatch (jumbo fail?) */
554
		if(f->dlen > n)
555
			f->dlen = n;
556
	}
557
	a->scnt = n / Aoesectsz;
558
	f->dl->resent++;
559
	f->dl->npkt++;
560
	if(waserror())
561
		return -1;
562
	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
563
	poperror();
564
	return 0;
565
}
566
 
567
static void
568
discover(int major, int minor)
569
{
570
	Aoehdr *h;
571
	Block *b;
572
	Netlink *nl, *e;
573
 
574
	nl = netlinks.nl;
575
	e = nl + nelem(netlinks.nl);
576
	for(; nl < e; nl++){
577
		if(nl->cc == nil)
578
			continue;
579
		b = allocb(ETHERMINTU);
580
		if(waserror()){
581
			freeb(b);
582
			nexterror();
583
		}
584
		b->wp = b->rp + ETHERMINTU;
585
		memset(b->rp, 0, ETHERMINTU);
586
		h = (Aoehdr*)b->rp;
587
		memset(h->dst, 0xff, sizeof h->dst);
588
		memmove(h->src, nl->ea, sizeof h->src);
589
		hnputs(h->type, Aoetype);
590
		h->verflag = Aoever << 4;
591
		hnputs(h->major, major);
592
		h->minor = minor;
593
		h->cmd = ACconfig;
594
		poperror();
595
		/* send b down the queue */
596
		devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
597
	}
598
}
599
 
600
/*
601
 * Check all frames on device and resend any frames that have been
602
 * outstanding for 200% of the device round trip time average.
603
 */
604
static void
605
aoesweepproc(void*)
606
{
607
	ulong i, tx, timeout, nbc;
608
	vlong starttick;
609
	enum { Nms = 100, Nbcms = 30*1000, };		/* magic */
610
	uchar *ea;
611
	Aoeata *a;
612
	Aoedev *d;
613
	Devlink *l;
614
	Frame *f, *e;
615
 
616
	nbc = Nbcms/Nms;
617
loop:
618
	if(nbc-- == 0){
619
		if(rediscover && !waserror()){
620
			discover(0xffff, 0xff);
621
			poperror();
622
		}
623
		nbc = Nbcms/Nms;
624
	}
625
	starttick = MACHP(0)->ticks;
626
	rlock(&devs);
627
	for(d = devs.d; d; d = d->next){
628
		if(!canqlock(d))
629
			continue;
630
		if(!UP(d)){
631
			qunlock(d);
632
			continue;
633
		}
634
		tx = 0;
635
		f = d->frames;
636
		e = f + d->nframes;
637
		for (; f < e; f++){
638
			if(f->tag == Tfree)
639
				continue;
640
			l = f->dl;
641
			timeout = l->rttavg << 1;
642
			i = tsince(f->tag);
643
			if(i < timeout)
644
				continue;
645
			if(d->nout == d->maxout){
646
				if(d->maxout > 1)
647
					d->maxout--;
648
				d->lastwadj = MACHP(0)->ticks;
649
			}
650
			a = (Aoeata*)f->hdr;
651
			if(a->scnt > Dbcnt / Aoesectsz &&
652
			   ++f->nl->lostjumbo > (d->nframes << 1)){
653
				ea = f->dl->eatab[f->eaidx];
654
				eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
655
					d, f->nl->path, ea, f->lba);
656
				d->maxbcnt = Dbcnt;
657
				d->flag &= ~Djumbo;
658
			}
659
			resend(d, f);
660
			if(tx++ == 0){
661
				if((l->rttavg <<= 1) > Rtmax)
662
					l->rttavg = Rtmax;
663
				eventlog("%æ: rtt %ldms\n", d, TK2MS(l->rttavg));
664
			}
665
		}
666
		if(d->nout == d->maxout && d->maxout < d->nframes &&
667
		   TK2MS(MACHP(0)->ticks - d->lastwadj) > 10*1000){ /* more magic */
668
			d->maxout++;
669
			d->lastwadj = MACHP(0)->ticks;
670
		}
671
		qunlock(d);
672
	}
673
	runlock(&devs);
674
	i = Nms - TK2MS(MACHP(0)->ticks - starttick);
675
	if(i > 0)
676
		tsleep(&up->sleep, return0, 0, i);
677
	goto loop;
678
}
679
 
680
static int
681
fmtæ(Fmt *f)
682
{
683
	char buf[16];
684
	Aoedev *d;
685
 
686
	d = va_arg(f->args, Aoedev*);
687
	snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
688
	return fmtstrcpy(f, buf);
689
}
690
 
691
static void netbind(char *path);
692
 
693
static void
694
aoecfg(void)
695
{
696
	int n, i;
697
	char *p, *f[32], buf[24];
698
 
699
	if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
700
		return;
701
	/* goo! */
702
	for(i = 0; i < n; i++){
703
		p = f[i];
704
		if(strncmp(p, "ether", 5) == 0)
705
			snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
706
		else if(strncmp(p, "#l", 2) == 0)
707
			snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
708
		else
709
			continue;
710
		if(!waserror()){
711
			netbind(buf);
712
			poperror();
713
		}
714
	}
715
}
716
 
717
static void
718
aoeinit(void)
719
{
720
	static int init;
721
	static QLock l;
722
 
723
	if(!canqlock(&l))
724
		return;
725
	if(init == 0){
726
		fmtinstall(L'æ', fmtæ);
727
		events.rp = events.wp = events.buf;
728
		kproc("aoesweep", aoesweepproc, nil);
729
		aoecfg();
730
		init = 1;
731
	}
732
	qunlock(&l);
733
}
734
 
735
static Chan*
736
aoeattach(char *spec)
737
{
738
	Chan *c;
739
 
740
	if(*spec)
741
		error(Enonexist);
742
	aoeinit();
743
	c = devattach(L'æ', spec);
744
	mkqid(&c->qid, Qzero, 0, QTDIR);
745
	return c;
746
}
747
 
748
static Aoedev*
749
unit2dev(ulong unit)
750
{
751
	int i;
752
	Aoedev *d;
753
 
754
	rlock(&devs);
755
	i = 0;
756
	for(d = devs.d; d; d = d->next)
757
		if(i++ == unit){
758
			runlock(&devs);
759
			return d;
760
		}
761
	runlock(&devs);
762
	uprint("unit lookup failure: %lux pc %#p", unit, getcallerpc(&unit));
763
	error(up->genbuf);
764
	return nil;
765
}
766
 
767
static int
768
unitgen(Chan *c, ulong type, Dir *dp)
769
{
770
	int perm, t;
771
	ulong vers;
772
	vlong size;
773
	char *p;
774
	Aoedev *d;
775
	Qid q;
776
 
777
	d = unit2dev(UNIT(c->qid));
778
	perm = 0644;
779
	size = 0;
780
	vers = d->vers;
781
	t = QTFILE;
782
 
783
	switch(type){
784
	default:
785
		return -1;
786
	case Qctl:
787
		p = "ctl";
788
		break;
789
	case Qdata:
790
		p = "data";
791
		perm = 0640;
792
		if(UP(d))
793
			size = d->bsize;
794
		break;
795
	case Qconfig:
796
		p = "config";
797
		if(UP(d))
798
			size = d->nconfig;
799
		break;
800
	case Qident:
801
		p = "ident";
802
		if(UP(d))
803
			size = sizeof d->ident;
804
		break;
805
	case Qdevlinkdir:
806
		p = "devlink";
807
		t = QTDIR;
808
		perm = 0555;
809
		break;
810
	}
811
	mkqid(&q, QID(UNIT(c->qid), type), vers, t);
812
	devdir(c, q, p, size, eve, perm, dp);
813
	return 1;
814
}
815
 
816
static int
817
topgen(Chan *c, ulong type, Dir *d)
818
{
819
	int perm;
820
	vlong size;
821
	char *p;
822
	Qid q;
823
 
824
	perm = 0444;
825
	size = 0;
826
	switch(type){
827
	default:
828
		return -1;
829
	case Qtopctl:
830
		p = "ctl";
831
		perm = 0644;
832
		break;
833
	case Qtoplog:
834
		p = "log";
835
		size = eventcount();
836
		break;
837
	}
838
	mkqid(&q, type, 0, QTFILE);
839
	devdir(c, q, p, size, eve, perm, d);
840
	return 1;
841
}
842
 
843
static int
844
aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp)
845
{
846
	int i;
847
	Aoedev *d;
848
	Qid q;
849
 
850
	if(c->qid.path == 0){
851
		switch(s){
852
		case DEVDOTDOT:
853
			q.path = 0;
854
			q.type = QTDIR;
855
			devdir(c, q, "#æ", 0, eve, 0555, dp);
856
			break;
857
		case 0:
858
			q.path = Qtopdir;
859
			q.type = QTDIR;
860
			devdir(c, q, "aoe", 0, eve, 0555, dp);
861
			break;
862
		default:
863
			return -1;
864
		}
865
		return 1;
866
	}
867
 
868
	switch(TYPE(c->qid)){
869
	default:
870
		return -1;
871
	case Qtopdir:
872
		if(s == DEVDOTDOT){
873
			mkqid(&q, Qzero, 0, QTDIR);
874
			devdir(c, q, "aoe", 0, eve, 0555, dp);
875
			return 1;
876
		}
877
		if(s < Qtopfiles)
878
			return topgen(c, Qtopbase + s, dp);
879
		s -= Qtopfiles;
880
		if(s >= units.ref)
881
			return -1;
882
		mkqid(&q, QID(s, Qunitdir), 0, QTDIR);
883
		d = unit2dev(s);
884
		assert(d != nil);
885
		devdir(c, q, unitname(d), 0, eve, 0555, dp);
886
		return 1;
887
	case Qtopctl:
888
	case Qtoplog:
889
		return topgen(c, TYPE(c->qid), dp);
890
	case Qunitdir:
891
		if(s == DEVDOTDOT){
892
			mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
893
			uprint("%uld", UNIT(c->qid));
894
			devdir(c, q, up->genbuf, 0, eve, 0555, dp);
895
			return 1;
896
		}
897
		return unitgen(c, Qunitbase+s, dp);
898
	case Qctl:
899
	case Qdata:
900
	case Qconfig:
901
	case Qident:
902
		return unitgen(c, TYPE(c->qid), dp);
903
	case Qdevlinkdir:
904
		i = UNIT(c->qid);
905
		if(s == DEVDOTDOT){
906
			mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
907
			devdir(c, q, "devlink", 0, eve, 0555, dp);
908
			return 1;
909
		}
910
		if(i >= units.ref)
911
			return -1;
912
		d = unit2dev(i);
913
		if(s >= d->ndl)
914
			return -1;
915
		uprint("%d", s);
916
		mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
917
		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
918
		return 1;
919
	case Qdevlink:
920
		uprint("%d", s);
921
		mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
922
		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
923
		return 1;
924
	}
925
}
926
 
927
static Walkqid*
928
aoewalk(Chan *c, Chan *nc, char **name, int nname)
929
{
930
	return devwalk(c, nc, name, nname, nil, 0, aoegen);
931
}
932
 
933
static int
934
aoestat(Chan *c, uchar *db, int n)
935
{
936
	return devstat(c, db, n, nil, 0, aoegen);
937
}
938
 
939
static Chan*
940
aoeopen(Chan *c, int omode)
941
{
942
	Aoedev *d;
943
 
944
	if(TYPE(c->qid) != Qdata)
945
		return devopen(c, omode, 0, 0, aoegen);
946
 
947
	d = unit2dev(UNIT(c->qid));
948
	qlock(d);
949
	if(waserror()){
950
		qunlock(d);
951
		nexterror();
952
	}
953
	if(!UP(d))
954
		error(Eaoedown);
955
	c = devopen(c, omode, 0, 0, aoegen);
956
	d->nopen++;
957
	poperror();
958
	qunlock(d);
959
	return c;
960
}
961
 
962
static void
963
aoeclose(Chan *c)
964
{
965
	Aoedev *d;
966
 
967
	if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
968
		return;
969
 
970
	d = unit2dev(UNIT(c->qid));
971
	qlock(d);
972
	if(--d->nopen == 0 && !waserror()){
973
		discover(d->major, d->minor);
974
		poperror();
975
	}
976
	qunlock(d);
977
}
978
 
979
static void
980
atarw(Aoedev *d, Frame *f)
981
{
982
	ulong bcnt;
983
	char extbit, writebit;
984
	Aoeata *ah;
985
	Srb *srb;
986
 
987
	extbit = 0x4;
988
	writebit = 0x10;
989
 
990
	srb = d->inprocess;
991
	bcnt = d->maxbcnt;
992
	if(bcnt > srb->len)
993
		bcnt = srb->len;
994
	f->nhdr = AOEATASZ;
995
	memset(f->hdr, 0, f->nhdr);
996
	ah = (Aoeata*)f->hdr;
997
	if(hset(d, f, ah, ACata) == -1) {
998
		d->inprocess = nil;
999
		return;
1000
	}
1001
	f->dp = srb->dp;
1002
	f->bcnt = bcnt;
1003
	f->lba = srb->sector;
1004
	f->srb = srb;
1005
 
1006
	ah->scnt = bcnt / Aoesectsz;
1007
	putlba(ah, f->lba);
1008
	if(d->flag & Dllba)
1009
		ah->aflag |= AAFext;
1010
	else {
1011
		extbit = 0;
1012
		ah->lba[3] &= 0x0f;
1013
		ah->lba[3] |= 0xe0;	/* LBA bit+obsolete 0xa0 */
1014
	}
1015
	if(srb->write){
1016
		ah->aflag |= AAFwrite;
1017
		f->dlen = bcnt;
1018
	}else{
1019
		writebit = 0;
1020
		f->dlen = 0;
1021
	}
1022
	ah->cmdstat = 0x20 | writebit | extbit;
1023
 
1024
	/* mark tracking fields and load out */
1025
	srb->nout++;
1026
	srb->dp = (uchar*)srb->dp + bcnt;
1027
	srb->len -= bcnt;
1028
	srb->sector += bcnt / Aoesectsz;
1029
	if(srb->len == 0)
1030
		d->inprocess = nil;
1031
	d->nout++;
1032
	f->dl->npkt++;
1033
	if(waserror()){
1034
		f->tag = Tfree;
1035
		d->inprocess = nil;
1036
		nexterror();
1037
	}
1038
	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1039
	poperror();
1040
}
1041
 
1042
static char*
1043
aoeerror(Aoehdr *h)
1044
{
1045
	int n;
1046
	static char *errs[] = {
1047
		"aoe protocol error: unknown",
1048
		"aoe protocol error: bad command code",
1049
		"aoe protocol error: bad argument param",
1050
		"aoe protocol error: device unavailable",
1051
		"aoe protocol error: config string present",
1052
		"aoe protocol error: unsupported version",
1053
		"aoe protocol error: target is reserved",
1054
	};
1055
 
1056
	if((h->verflag & AFerr) == 0)
1057
		return 0;
1058
	n = h->error;
1059
	if(n > nelem(errs))
1060
		n = 0;
1061
	return errs[n];
1062
}
1063
 
1064
static void
1065
rtupdate(Devlink *l, int rtt)
1066
{
1067
	int n;
1068
 
1069
	n = rtt;
1070
	if(rtt < 0){
1071
		n = -rtt;
1072
		if(n < Rtmin)
1073
			n = Rtmin;
1074
		else if(n > Rtmax)
1075
			n = Rtmax;
1076
		l->mintimer += (n - l->mintimer) >> 1;
1077
	} else if(n < l->mintimer)
1078
		n = l->mintimer;
1079
	else if(n > Rtmax)
1080
		n = Rtmax;
1081
 
1082
	/* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
1083
	n -= l->rttavg;
1084
	l->rttavg += n >> 2;
1085
}
1086
 
1087
static int
1088
srbready(void *v)
1089
{
1090
	Srb *s;
1091
 
1092
	s = v;
1093
	return s->error || (s->nout == 0 && s->len == 0);
1094
}
1095
 
1096
static Frame*
1097
getframe(Aoedev *d, int tag)
1098
{
1099
	Frame *f, *e;
1100
 
1101
	f = d->frames;
1102
	e = f + d->nframes;
1103
	for(; f < e; f++)
1104
		if(f->tag == tag)
1105
			return f;
1106
	return nil;
1107
}
1108
 
1109
static Frame*
1110
freeframe(Aoedev *d)
1111
{
1112
	if(d->nout < d->maxout)
1113
		return getframe(d, Tfree);
1114
	return nil;
1115
}
1116
 
1117
static void
1118
work(Aoedev *d)
1119
{
1120
	Frame *f;
1121
 
1122
	while ((f = freeframe(d)) != nil) {
1123
		if(d->inprocess == nil){
1124
			if(d->head == nil)
1125
				return;
1126
			d->inprocess = d->head;
1127
			d->head = d->head->next;
1128
			if(d->head == nil)
1129
				d->tail = nil;
1130
		}
1131
		atarw(d, f);
1132
	}
1133
}
1134
 
1135
static void
1136
strategy(Aoedev *d, Srb *srb)
1137
{
1138
	qlock(d);
1139
	if(waserror()){
1140
		qunlock(d);
1141
		nexterror();
1142
	}
1143
	srb->next = nil;
1144
	if(d->tail)
1145
		d->tail->next = srb;
1146
	d->tail = srb;
1147
	if(d->head == nil)
1148
		d->head = srb;
1149
	srb->shared = 1;
1150
	work(d);
1151
	poperror();
1152
	qunlock(d);
1153
 
1154
	while(waserror())
1155
		;
1156
	sleep(srb, srbready, srb);
1157
	poperror();
1158
}
1159
 
1160
#define iskaddr(a)	((uintptr)(a) > KZERO)
1161
 
1162
static long
1163
rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
1164
{
1165
	long n, nlen, copy;
1166
	enum { Srbsz = 1<<19, };	/* magic allocation */
1167
	Srb *srb;
1168
 
1169
	if((off|len) & (Aoesectsz-1))
1170
		error("offset and length must be sector multiple.\n");
1171
	if(off > d->bsize || len == 0)
1172
		return 0;
1173
	if(off + len > d->bsize)
1174
		len = d->bsize - off;
1175
	copy = 0;
1176
	if(iskaddr(db)){
1177
		srb = srbkalloc(db, len);
1178
		copy = 1;
1179
	}else
1180
		srb = srballoc(Srbsz <= len? Srbsz: len);
1181
	if(waserror()){
1182
		srbfree(srb);
1183
		nexterror();
1184
	}
1185
	nlen = len;
1186
	srb->write = write;
1187
	do {
1188
		if(!UP(d))
1189
			error(Eio);
1190
		srb->sector = off / Aoesectsz;
1191
		srb->dp = srb->data;
1192
		n = nlen;
1193
		if(n > Srbsz)
1194
			n = Srbsz;
1195
		srb->len = n;
1196
		if(write && !copy)
1197
			memmove(srb->data, db, n);
1198
		strategy(d, srb);
1199
		if(srb->error)
1200
			error(srb->error);
1201
		if(!write && !copy)
1202
			memmove(db, srb->data, n);
1203
		nlen -= n;
1204
		db += n;
1205
		off += n;
1206
	} while (nlen > 0);
1207
	poperror();
1208
	srbfree(srb);
1209
	return len;
1210
}
1211
 
1212
static long
1213
readmem(ulong off, void *dst, long n, void *src, long size)
1214
{
1215
	if(off >= size)
1216
		return 0;
1217
	if(off + n > size)
1218
		n = size - off;
1219
	memmove(dst, (uchar*)src + off, n);
1220
	return n;
1221
}
1222
 
1223
static char *
1224
pflag(char *s, char *e, uchar f)
1225
{
1226
	uchar i;
1227
 
1228
	for(i = 0; i < 8; i++)
1229
		if(f & (1 << i))
1230
			s = seprint(s, e, "%s ", flagname[i]? flagname[i]: "oops");
1231
	return seprint(s, e, "\n");
1232
}
1233
 
1234
static int
1235
pstat(Aoedev *d, char *db, int len, int off)
1236
{
1237
	int i;
1238
	char *state, *s, *p, *e;
1239
 
1240
	s = p = malloc(READSTR);
1241
	if(s == nil)
1242
		error(Enomem);
1243
	e = p + READSTR;
1244
 
1245
	state = "down";
1246
	if(UP(d))
1247
		state = "up";
1248
 
1249
	p = seprint(p, e,
1250
		"state: %s\n"	"nopen: %d\n"	"nout: %d\n"
1251
		"nmaxout: %d\n"	"nframes: %d\n"	"maxbcnt: %d\n"
1252
		"fw: %.4ux\n"
1253
		"model: %s\n"	"serial: %s\n"	"firmware: %s\n",
1254
		state,		d->nopen,	d->nout,
1255
		d->maxout, 	d->nframes,	d->maxbcnt,
1256
		d->fwver,
1257
		d->model, 	d->serial, 	d->firmware);
1258
	p = seprint(p, e, "flag: ");
1259
	p = pflag(p, e, d->flag);
1260
 
1261
	if(p - s < len)
1262
		len = p - s;
1263
	i = readstr(off, db, len, s);
1264
	free(s);
1265
	return i;
1266
}
1267
 
1268
static long
1269
unitread(Chan *c, void *db, long len, vlong off)
1270
{
1271
	Aoedev *d;
1272
 
1273
	d = unit2dev(UNIT(c->qid));
1274
	if(d->vers != c->qid.vers)
1275
		error(Echange);
1276
	switch(TYPE(c->qid)){
1277
	default:
1278
		error(Ebadarg);
1279
	case Qctl:
1280
		return pstat(d, db, len, off);
1281
	case Qdata:
1282
		return rw(d, Read, db, len, off);
1283
	case Qconfig:
1284
		if (!UP(d))
1285
			error(Eaoedown);
1286
		return readmem(off, db, len, d->config, d->nconfig);
1287
	case Qident:
1288
		if (!UP(d))
1289
			error(Eaoedown);
1290
		return readmem(off, db, len, d->ident, sizeof d->ident);
1291
	}
1292
}
1293
 
1294
static int
1295
devlinkread(Chan *c, void *db, int len, int off)
1296
{
1297
	int i;
1298
	char *s, *p, *e;
1299
	Aoedev *d;
1300
	Devlink *l;
1301
 
1302
	d = unit2dev(UNIT(c->qid));
1303
	i = L(c->qid);
1304
	if(i >= d->ndl)
1305
		return 0;
1306
	l = d->dl + i;
1307
 
1308
	s = p = malloc(READSTR);
1309
	if(s == nil)
1310
		error(Enomem);
1311
	e = s + READSTR;
1312
 
1313
	p = seprint(p, e, "addr: ");
1314
	for(i = 0; i < l->nea; i++)
1315
		p = seprint(p, e, "%E ", l->eatab[i]);
1316
	p = seprint(p, e, "\n");
1317
	p = seprint(p, e, "npkt: %uld\n", l->npkt);
1318
	p = seprint(p, e, "resent: %uld\n", l->resent);
1319
	p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
1320
	p = seprint(p, e, "rttavg: %uld\n", TK2MS(l->rttavg));
1321
	p = seprint(p, e, "mintimer: %uld\n", TK2MS(l->mintimer));
1322
 
1323
	p = seprint(p, e, "nl path: %s\n", l->nl->path);
1324
	p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
1325
	p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
1326
	p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
1327
	p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
1328
 
1329
	if(p - s < len)
1330
		len = p - s;
1331
	i = readstr(off, db, len, s);
1332
	free(s);
1333
	return i;
1334
}
1335
 
1336
static long
1337
topctlread(Chan *, void *db, int len, int off)
1338
{
1339
	int i;
1340
	char *s, *p, *e;
1341
	Netlink *n;
1342
 
1343
	s = p = malloc(READSTR);
1344
	if(s == nil)
1345
		error(Enomem);
1346
	e = s + READSTR;
1347
 
1348
	p = seprint(p, e, "debug: %d\n", debug);
1349
	p = seprint(p, e, "autodiscover: %d\n", autodiscover);
1350
	p = seprint(p, e, "rediscover: %d\n", rediscover);
1351
 
1352
	for(i = 0; i < Nnetlink; i++){
1353
		n = netlinks.nl+i;
1354
		if(n->cc == 0)
1355
			continue;
1356
		p = seprint(p, e, "if%d path: %s\n", i, n->path);
1357
		p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
1358
		p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
1359
		p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
1360
		p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
1361
	}
1362
 
1363
	if(p - s < len)
1364
		len = p - s;
1365
	i = readstr(off, db, len, s);
1366
	free(s);
1367
	return i;
1368
}
1369
 
1370
static long
1371
aoeread(Chan *c, void *db, long n, vlong off)
1372
{
1373
	switch(TYPE(c->qid)){
1374
	default:
1375
		error(Eperm);
1376
	case Qzero:
1377
	case Qtopdir:
1378
	case Qunitdir:
1379
	case Qdevlinkdir:
1380
		return devdirread(c, db, n, 0, 0, aoegen);
1381
	case Qtopctl:
1382
		return topctlread(c, db, n, off);
1383
	case Qtoplog:
1384
		return eventlogread(db, n);
1385
	case Qctl:
1386
	case Qdata:
1387
	case Qconfig:
1388
	case Qident:
1389
		return unitread(c, db, n, off);
1390
	case Qdevlink:
1391
		return devlinkread(c, db, n, off);
1392
	}
1393
}
1394
 
1395
static long
1396
configwrite(Aoedev *d, void *db, long len)
1397
{
1398
	char *s;
1399
	Aoeqc *ch;
1400
	Frame *f;
1401
	Srb *srb;
1402
 
1403
	if(!UP(d))
1404
		error(Eaoedown);
1405
	if(len > ETHERMAXTU - AOEQCSZ)
1406
		error(Etoobig);
1407
	srb = srballoc(len);
1408
	s = malloc(len);
1409
	if(s == nil)
1410
		error(Enomem);
1411
	memmove(s, db, len);
1412
 
1413
	if(waserror()){
1414
		srbfree(srb);
1415
		free(s);
1416
		nexterror();
1417
	}
1418
	for (;;) {
1419
		qlock(d);
1420
		if(waserror()){
1421
			qunlock(d);
1422
			nexterror();
1423
		}
1424
		f = freeframe(d);
1425
		if(f != nil)
1426
			break;
1427
		poperror();
1428
		qunlock(d);
1429
 
1430
		if(waserror())
1431
			nexterror();
1432
		tsleep(&up->sleep, return0, 0, 100);
1433
		poperror();
1434
	}
1435
	f->nhdr = AOEQCSZ;
1436
	memset(f->hdr, 0, f->nhdr);
1437
	ch = (Aoeqc*)f->hdr;
1438
	if(hset(d, f, ch, ACconfig) == -1) {
1439
		/*
1440
		 * these refer to qlock & waserror in the above for loop.
1441
		 * there's still the first waserror outstanding.
1442
		 */
1443
		poperror();
1444
		qunlock(d);
1445
		return 0;
1446
	}
1447
	srb->shared = 1;
1448
	f->srb = srb;
1449
	f->dp = s;
1450
	ch->verccmd = AQCfset;
1451
	hnputs(ch->cslen, len);
1452
	d->nout++;
1453
	srb->nout++;
1454
	f->dl->npkt++;
1455
	f->dlen = len;
1456
	/* these too */
1457
	poperror();
1458
	qunlock(d);
1459
 
1460
	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1461
	sleep(srb, srbready, srb);
1462
	if(srb->error)
1463
		error(srb->error);
1464
 
1465
	qlock(d);
1466
	if(waserror()){
1467
		qunlock(d);
1468
		nexterror();
1469
	}
1470
	memmove(d->config, s, len);
1471
	d->nconfig = len;
1472
	poperror();
1473
	qunlock(d);
1474
 
1475
	poperror();			/* pop first waserror */
1476
 
1477
	srbfree(srb);
1478
	memmove(db, s, len);
1479
	free(s);
1480
	return len;
1481
}
1482
 
1483
static int getmtu(Chan*);
1484
 
1485
static int
1486
devmaxdata(Aoedev *d)		/* return aoe mtu (excluding headers) */
1487
{
1488
	int i, nmtu, mtu;
1489
	Devlink *l;
1490
	Netlink *n;
1491
 
1492
	mtu = 100000;
1493
	for(i = 0; i < d->ndl; i++){
1494
		l = d->dl + i;
1495
		n = l->nl;
1496
		if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
1497
			continue;
1498
		nmtu = getmtu(n->mtu);
1499
		if(mtu > nmtu)
1500
			mtu = nmtu;
1501
	}
1502
	if(mtu == 100000)
1503
		mtu = ETHERMAXTU;		/* normal ethernet mtu */
1504
	mtu -= AOEATASZ;
1505
	mtu -= (uint)mtu % Aoesectsz;
1506
	if(mtu < 2*Aoesectsz)			/* sanity */
1507
		mtu = 2*Aoesectsz;
1508
	return mtu;
1509
}
1510
 
1511
static int
1512
toggle(char *s, int f, int bit)
1513
{
1514
	if(s == nil)
1515
		f ^= bit;
1516
	else if(strcmp(s, "on") == 0)
1517
		f |= bit;
1518
	else
1519
		f &= ~bit;
1520
	return f;
1521
}
1522
 
1523
static void ataident(Aoedev*);
1524
 
1525
static long
1526
unitctlwrite(Aoedev *d, void *db, long n)
1527
{
1528
	uint maxbcnt, mtu;
1529
	uvlong bsize;
1530
	enum {
1531
		Failio,
1532
		Ident,
1533
		Jumbo,
1534
		Maxbno,
1535
		Mtu,
1536
		Nofailf,
1537
		Setsize,
1538
	};
1539
	Cmdbuf *cb;
1540
	Cmdtab *ct;
1541
	static Cmdtab cmds[] = {
1542
		{Failio, 	"failio", 	1 },
1543
		{Ident, 	"identify", 	1 },
1544
		{Jumbo, 	"jumbo", 	0 },
1545
		{Maxbno,	"maxbno",	0 },
1546
		{Mtu,		"mtu",		0 },
1547
		{Nofailf,	"nofail",	0 },
1548
		{Setsize, 	"setsize", 	0 },
1549
	};
1550
 
1551
	cb = parsecmd(db, n);
1552
	qlock(d);
1553
	if(waserror()){
1554
		qunlock(d);
1555
		free(cb);
1556
		nexterror();
1557
	}
1558
	ct = lookupcmd(cb, cmds, nelem(cmds));
1559
	switch(ct->index){
1560
	case Failio:
1561
		downdev(d, "i/o failure");
1562
		break;
1563
	case Ident:
1564
		ataident(d);
1565
		break;
1566
	case Jumbo:
1567
		d->flag = toggle(cb->f[1], d->flag, Djumbo);
1568
		break;
1569
	case Maxbno:
1570
	case Mtu:
1571
		maxbcnt = devmaxdata(d);
1572
		if(cb->nf > 2)
1573
			error(Ecmdargs);
1574
		if(cb->nf == 2){
1575
			mtu = strtoul(cb->f[1], 0, 0);
1576
			if(ct->index == Maxbno)
1577
				mtu *= Aoesectsz;
1578
			else{
1579
				mtu -= AOEATASZ;
1580
				mtu &= ~(Aoesectsz-1);
1581
			}
1582
			if(mtu == 0 || mtu > maxbcnt)
1583
				cmderror(cb, "mtu out of legal range");
1584
			maxbcnt = mtu;
1585
		}
1586
		d->maxbcnt = maxbcnt;
1587
		break;
1588
	case Nofailf:
1589
		d->flag = toggle(cb->f[1], d->flag, Dnofail);
1590
		break;
1591
	case Setsize:
1592
		bsize = d->realbsize;
1593
		if(cb->nf > 2)
1594
			error(Ecmdargs);
1595
		if(cb->nf == 2){
1596
			bsize = strtoull(cb->f[1], 0, 0);
1597
			if(bsize % Aoesectsz)
1598
				cmderror(cb, "disk size must be sector aligned");
1599
		}
1600
		d->bsize = bsize;
1601
		break;
1602
	default:
1603
		cmderror(cb, "unknown aoe control message");
1604
	}
1605
	poperror();
1606
	qunlock(d);
1607
	free(cb);
1608
	return n;
1609
}
1610
 
1611
static long
1612
unitwrite(Chan *c, void *db, long n, vlong off)
1613
{
1614
	long rv;
1615
	char *buf;
1616
	Aoedev *d;
1617
 
1618
	d = unit2dev(UNIT(c->qid));
1619
	switch(TYPE(c->qid)){
1620
	default:
1621
		error(Ebadarg);
1622
	case Qctl:
1623
		return unitctlwrite(d, db, n);
1624
	case Qident:
1625
		error(Eperm);
1626
	case Qdata:
1627
		return rw(d, Write, db, n, off);
1628
	case Qconfig:
1629
		if(off + n > sizeof d->config)
1630
			error(Etoobig);
1631
		buf = malloc(sizeof d->config);
1632
		if(buf == nil)
1633
			error(Enomem);
1634
		if(waserror()){
1635
			free(buf);
1636
			nexterror();
1637
		}
1638
		memmove(buf, d->config, d->nconfig);
1639
		memmove(buf + off, db, n);
1640
		rv = configwrite(d, buf, n + off);
1641
		poperror();
1642
		free(buf);
1643
		return rv;
1644
	}
1645
}
1646
 
1647
static Netlink*
1648
addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
1649
{
1650
	Netlink *nl, *e;
1651
 
1652
	lock(&netlinks);
1653
	if(waserror()){
1654
		unlock(&netlinks);
1655
		nexterror();
1656
	}
1657
	nl = netlinks.nl;
1658
	e = nl + nelem(netlinks.nl);
1659
	for(; nl < e && nl->cc; nl++)
1660
		continue;
1661
	if (nl >= e)
1662
		error("out of netlink structures");
1663
	nl->cc = cc;
1664
	nl->dc = dc;
1665
	nl->mtu = mtu;
1666
	strncpy(nl->path, path, sizeof nl->path);
1667
	memmove(nl->ea, ea, sizeof nl->ea);
1668
	poperror();
1669
	nl->flag |= Dup;
1670
	unlock(&netlinks);
1671
	return nl;
1672
}
1673
 
1674
static int
1675
newunit(void)
1676
{
1677
	int x;
1678
 
1679
	lock(&units);
1680
	if(units.ref == Maxunits)
1681
		x = -1;
1682
	else
1683
		x = units.ref++;
1684
	unlock(&units);
1685
	return x;
1686
}
1687
 
1688
static int
1689
dropunit(void)
1690
{
1691
	int x;
1692
 
1693
	lock(&units);
1694
	x = --units.ref;
1695
	unlock(&units);
1696
	return x;
1697
}
1698
 
1699
/*
1700
 * always allocate max frames.  maxout may change.
1701
 */
1702
static Aoedev*
1703
newdev(long major, long minor, int n)
1704
{
1705
	Aoedev *d;
1706
	Frame *f, *e;
1707
 
1708
	d = mallocz(sizeof *d, 1);
1709
	f = mallocz(sizeof *f * Maxframes, 1);
1710
	if (!d || !f) {
1711
		free(d);
1712
		free(f);
1713
		error("aoe device allocation failure");
1714
	}
1715
	d->nframes = n;
1716
	d->frames = f;
1717
	for (e = f + n; f < e; f++)
1718
		f->tag = Tfree;
1719
	d->maxout = n;
1720
	d->major = major;
1721
	d->minor = minor;
1722
	d->maxbcnt = Dbcnt;
1723
	d->flag = Djumbo;
1724
	d->unit = newunit();		/* bzzt.  inaccurate if units removed */
1725
	if(d->unit == -1){
1726
		free(d->frames);
1727
		free(d);
1728
		error("too many units");
1729
	}
1730
	d->dl = d->dltab;
1731
	return d;
1732
}
1733
 
1734
static Aoedev*
1735
mm2dev(int major, int minor)
1736
{
1737
	Aoedev *d;
1738
 
1739
	rlock(&devs);
1740
	for(d = devs.d; d; d = d->next)
1741
		if(d->major == major && d->minor == minor){
1742
			runlock(&devs);
1743
			return d;
1744
		}
1745
	runlock(&devs);
1746
	eventlog("mm2dev: %d.%d not found\n", major, minor);
1747
	return nil;
1748
}
1749
 
1750
/* Find the device in our list.  If not known, add it */
1751
static Aoedev*
1752
getdev(long major, long minor, int n)
1753
{
1754
	Aoedev *d;
1755
 
1756
	if(major == 0xffff || minor == 0xff)
1757
		return 0;
1758
	wlock(&devs);
1759
	if(waserror()){
1760
		wunlock(&devs);
1761
		nexterror();
1762
	}
1763
	for(d = devs.d; d; d = d->next)
1764
		if(d->major == major && d->minor == minor)
1765
			break;
1766
	if (d == nil) {
1767
		d = newdev(major, minor, n);
1768
		d->next = devs.d;
1769
		devs.d = d;
1770
	}
1771
	poperror();
1772
	wunlock(&devs);
1773
	return d;
1774
}
1775
 
1776
static ushort
1777
gbit16(void *a)
1778
{
1779
	uchar *i;
1780
 
1781
	i = a;
1782
	return i[1] << 8 | i[0];
1783
}
1784
 
1785
static ulong
1786
gbit32(void *a)
1787
{
1788
	ulong j;
1789
	uchar *i;
1790
 
1791
	i = a;
1792
	j  = i[3] << 24;
1793
	j |= i[2] << 16;
1794
	j |= i[1] << 8;
1795
	j |= i[0];
1796
	return j;
1797
}
1798
 
1799
static uvlong
1800
gbit64(void *a)
1801
{
1802
	uchar *i;
1803
 
1804
	i = a;
1805
	return (uvlong)gbit32(i+4) << 32 | gbit32(a);
1806
}
1807
 
1808
static void
1809
ataident(Aoedev *d)
1810
{
1811
	Aoeata *a;
1812
	Block *b;
1813
	Frame *f;
1814
 
1815
	f = freeframe(d);
1816
	if(f == nil)
1817
		return;
1818
	f->nhdr = AOEATASZ;
1819
	memset(f->hdr, 0, f->nhdr);
1820
	a = (Aoeata*)f->hdr;
1821
	if(hset(d, f, a, ACata) == -1)
1822
		return;
1823
	a->cmdstat = Cid;	/* ata 6, page 110 */
1824
	a->scnt = 1;
1825
	a->lba[3] = 0xa0;
1826
	d->nout++;
1827
	f->dl->npkt++;
1828
	f->bcnt = 512;
1829
	f->dlen = 0;
1830
	b = allocfb(f);
1831
	devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
1832
}
1833
 
1834
static int
1835
getmtu(Chan *mtuch)
1836
{
1837
	int n, mtu;
1838
	char buf[36];
1839
 
1840
	mtu = ETHERMAXTU;
1841
	if(mtuch == nil || waserror())
1842
		return mtu;
1843
	n = devtab[mtuch->type]->read(mtuch, buf, sizeof buf - 1, 0);
1844
	if(n > 12){
1845
		buf[n] = 0;
1846
		mtu = strtoul(buf + 12, 0, 0);
1847
	}
1848
	poperror();
1849
	return mtu;
1850
}
1851
 
1852
static int
1853
newdlea(Devlink *l, uchar *ea)
1854
{
1855
	int i;
1856
	uchar *t;
1857
 
1858
	for(i = 0; i < Nea; i++){
1859
		t = l->eatab[i];
1860
		if(i == l->nea){
1861
			memmove(t, ea, Eaddrlen);
1862
			return l->nea++;
1863
		}
1864
		if(memcmp(t, ea, Eaddrlen) == 0)
1865
			return i;
1866
	}
1867
	return -1;
1868
}
1869
 
1870
static Devlink*
1871
newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
1872
{
1873
	int i;
1874
	Devlink *l;
1875
 
1876
	for(i = 0; i < Ndevlink; i++){
1877
		l = d->dl + i;
1878
		if(i == d->ndl){
1879
			d->ndl++;
1880
			newdlea(l, c->src);
1881
			l->nl = n;
1882
			l->flag |= Dup;
1883
			l->mintimer = Rtmin;
1884
			l->rttavg = Rtmax;
1885
			return l;
1886
		}
1887
		if(l->nl == n) {
1888
			newdlea(l, c->src);
1889
			l->flag |= Dup;
1890
			return l;
1891
		}
1892
	}
1893
	eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
1894
	return 0;
1895
}
1896
 
1897
static void
1898
errrsp(Block *b, char *s)
1899
{
1900
	int n;
1901
	Aoedev *d;
1902
	Aoehdr *h;
1903
	Frame *f;
1904
 
1905
	h = (Aoehdr*)b->rp;
1906
	n = nhgetl(h->tag);
1907
	if(n == Tmgmt || n == Tfree)
1908
		return;
1909
	d = mm2dev(nhgets(h->major), h->minor);
1910
	if(d == 0)
1911
		return;
1912
	if(f = getframe(d, n))
1913
		frameerror(d, f, s);
1914
}
1915
 
1916
static void
1917
qcfgrsp(Block *b, Netlink *nl)
1918
{
1919
	int major, cmd, cslen, blen;
1920
	unsigned n;
1921
	Aoedev *d;
1922
	Aoeqc *ch;
1923
	Devlink *l;
1924
	Frame *f;
1925
 
1926
	ch = (Aoeqc*)b->rp;
1927
	major = nhgets(ch->major);
1928
	n = nhgetl(ch->tag);
1929
	if(n != Tmgmt){
1930
		d = mm2dev(major, ch->minor);
1931
		if(d == nil)
1932
			return;
1933
		qlock(d);
1934
		f = getframe(d, n);
1935
		if(f == nil){
1936
			qunlock(d);
1937
			eventlog("%æ: unknown response tag %ux\n", d, n);
1938
			return;
1939
		}
1940
		cslen = nhgets(ch->cslen);
1941
		blen = BLEN(b) - AOEQCSZ;
1942
		if(cslen < blen && BLEN(b) > 60)
1943
			eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
1944
				d, n, cslen, blen);
1945
		if(cslen > blen){
1946
			eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
1947
				d, n, cslen, blen);
1948
			cslen = blen;
1949
		}
1950
		memmove(f->dp, ch + 1, cslen);
1951
		f->srb->nout--;
1952
		wakeup(f->srb);
1953
		f->srb->shared = 0;
1954
		d->nout--;
1955
		f->srb = nil;
1956
		f->tag = Tfree;
1957
		qunlock(d);
1958
		return;
1959
	}
1960
 
1961
	cmd = ch->verccmd & 0xf;
1962
	if(cmd != 0){
1963
		eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
1964
		return;
1965
	}
1966
	n = nhgets(ch->bufcnt);
1967
	if(n > Maxframes)
1968
		n = Maxframes;
1969
 
1970
	if(waserror()){
1971
		eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
1972
		return;
1973
	}
1974
	d = getdev(major, ch->minor, n);
1975
	poperror();
1976
	if(d == 0)
1977
		return;
1978
 
1979
	qlock(d);
1980
	*up->errstr = 0;
1981
	if(waserror()){
1982
		qunlock(d);
1983
		eventlog("%æ: %s\n", d, up->errstr);
1984
		nexterror();
1985
	}
1986
 
1987
	l = newdevlink(d, nl, ch);		/* add this interface. */
1988
 
1989
	d->fwver = nhgets(ch->fwver);
1990
	n = nhgets(ch->cslen);
1991
	if(n > sizeof d->config)
1992
		n = sizeof d->config;
1993
	d->nconfig = n;
1994
	memmove(d->config, ch + 1, n);
1995
	if(l != 0 && d->flag & Djumbo){
1996
		n = getmtu(nl->mtu) - AOEATASZ;
1997
		n /= Aoesectsz;
1998
		if(n > ch->scnt)
1999
			n = ch->scnt;
2000
		n = n? n * Aoesectsz: Dbcnt;
2001
		if(n != d->maxbcnt){
2002
			eventlog("%æ: setting %d byte data frames on %s:%E\n",
2003
				d, n, nl->path, nl->ea);
2004
			d->maxbcnt = n;
2005
		}
2006
	}
2007
	if(d->nopen == 0)
2008
		ataident(d);
2009
	poperror();
2010
	qunlock(d);
2011
}
2012
 
2013
void
2014
aoeidmove(char *p, ushort *u, unsigned n)
2015
{
2016
	int i;
2017
	char *op, *e, *s;
2018
 
2019
	op = p;
2020
	/*
2021
	 * the ushort `*u' is sometimes not aligned on a short boundary,
2022
	 * so dereferencing u[i] causes an alignment exception on
2023
	 * some machines.
2024
	 */
2025
	s = (char *)u;
2026
	for(i = 0; i < n; i += 2){
2027
		*p++ = s[i + 1];
2028
		*p++ = s[i];
2029
	}
2030
	*p = 0;
2031
	while(p > op && *--p == ' ')
2032
		*p = 0;
2033
	e = p;
2034
	p = op;
2035
	while(*p == ' ')
2036
		p++;
2037
	memmove(op, p, n - (e - p));
2038
}
2039
 
2040
static vlong
2041
aoeidentify(Aoedev *d, ushort *id)
2042
{
2043
	int i;
2044
	vlong s;
2045
 
2046
	d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
2047
 
2048
	i = gbit16(id+83) | gbit16(id+86);
2049
	if(i & (1<<10)){
2050
		d->flag |= Dllba;
2051
		s = gbit64(id+100);
2052
	}else
2053
		s = gbit32(id+60);
2054
 
2055
	i = gbit16(id+83);
2056
	if((i>>14) == 1) {
2057
		if(i & (1<<3))
2058
			d->flag  |= Dpower;
2059
		i = gbit16(id+82);
2060
		if(i & 1)
2061
			d->flag  |= Dsmart;
2062
		if(i & (1<<14))
2063
			d->flag  |= Dnop;
2064
	}
2065
//	eventlog("%æ up\n", d);
2066
	d->flag |= Dup;
2067
	memmove(d->ident, id, sizeof d->ident);
2068
	return s;
2069
}
2070
 
2071
static void
2072
newvers(Aoedev *d)
2073
{
2074
	lock(&drivevers);
2075
	d->vers = drivevers.ref++;
2076
	unlock(&drivevers);
2077
}
2078
 
2079
static int
2080
identify(Aoedev *d, ushort *id)
2081
{
2082
	vlong osectors, s;
2083
	uchar oserial[21];
2084
 
2085
	s = aoeidentify(d, id);
2086
	if(s == -1)
2087
		return -1;
2088
	osectors = d->realbsize;
2089
	memmove(oserial, d->serial, sizeof d->serial);
2090
 
2091
	aoeidmove(d->serial, id+10, 20);
2092
	aoeidmove(d->firmware, id+23, 8);
2093
	aoeidmove(d->model, id+27, 40);
2094
 
2095
	s *= Aoesectsz;
2096
	if((osectors == 0 || osectors != s) &&
2097
	    memcmp(oserial, d->serial, sizeof oserial) != 0){
2098
		d->bsize = s;
2099
		d->realbsize = s;
2100
//		d->mediachange = 1;
2101
		newvers(d);
2102
	}
2103
	return 0;
2104
}
2105
 
2106
static void
2107
atarsp(Block *b)
2108
{
2109
	unsigned n;
2110
	short major;
2111
	Aoeata *ahin, *ahout;
2112
	Aoedev *d;
2113
	Frame *f;
2114
	Srb *srb;
2115
 
2116
	ahin = (Aoeata*)b->rp;
2117
	major = nhgets(ahin->major);
2118
	d = mm2dev(major, ahin->minor);
2119
	if(d == nil)
2120
		return;
2121
	qlock(d);
2122
	if(waserror()){
2123
		qunlock(d);
2124
		nexterror();
2125
	}
2126
	n = nhgetl(ahin->tag);
2127
	f = getframe(d, n);
2128
	if(f == nil){
2129
		dprint("%æ: unexpected response; tag %ux\n", d, n);
2130
		goto bail;
2131
	}
2132
	rtupdate(f->dl, tsince(f->tag));
2133
	ahout = (Aoeata*)f->hdr;
2134
	srb = f->srb;
2135
 
2136
	if(ahin->cmdstat & 0xa9){
2137
		eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
2138
			d, ahout->cmdstat, ahin->cmdstat);
2139
		if(srb)
2140
			srb->error = Eio;
2141
	} else {
2142
		n = ahout->scnt * Aoesectsz;
2143
		switch(ahout->cmdstat){
2144
		case Crd:
2145
		case Crdext:
2146
			if(BLEN(b) - AOEATASZ < n){
2147
				eventlog("%æ: runt read blen %ld expect %d\n",
2148
					d, BLEN(b), n);
2149
				goto bail;
2150
			}
2151
			memmove(f->dp, (uchar *)ahin + AOEATASZ, n);
2152
		case Cwr:
2153
		case Cwrext:
2154
			if(n > Dbcnt)
2155
				f->nl->lostjumbo = 0;
2156
			if(f->bcnt -= n){
2157
				f->lba += n / Aoesectsz;
2158
				f->dp = (uchar*)f->dp + n;
2159
				resend(d, f);
2160
				goto bail;
2161
			}
2162
			break;
2163
		case Cid:
2164
			if(BLEN(b) - AOEATASZ < 512){
2165
				eventlog("%æ: runt identify blen %ld expect %d\n",
2166
					d, BLEN(b), n);
2167
				goto bail;
2168
			}
2169
			identify(d, (ushort*)((uchar *)ahin + AOEATASZ));
2170
			break;
2171
		default:
2172
			eventlog("%æ: unknown ata command %.2ux \n",
2173
				d, ahout->cmdstat);
2174
		}
2175
	}
2176
 
2177
	if(srb && --srb->nout == 0 && srb->len == 0){
2178
		wakeup(srb);
2179
		srb->shared = 0;
2180
	}
2181
	f->srb = nil;
2182
	f->tag = Tfree;
2183
	d->nout--;
2184
 
2185
	work(d);
2186
bail:
2187
	poperror();
2188
	qunlock(d);
2189
}
2190
 
2191
static void
2192
netrdaoeproc(void *v)
2193
{
2194
	int idx;
2195
	char name[Maxpath+1], *s;
2196
	Aoehdr *h;
2197
	Block *b;
2198
	Netlink *nl;
2199
 
2200
	nl = (Netlink*)v;
2201
	idx = nl - netlinks.nl;
2202
	netlinks.reader[idx] = 1;
2203
	kstrcpy(name, nl->path, Maxpath);
2204
 
2205
	if(waserror()){
2206
		eventlog("netrdaoe exiting: %s\n", up->errstr);
2207
		netlinks.reader[idx] = 0;
2208
		wakeup(netlinks.rendez + idx);
2209
		pexit(up->errstr, 1);
2210
	}
2211
	if(autodiscover)
2212
		discover(0xffff, 0xff);
2213
	for (;;) {
2214
		if(!(nl->flag & Dup)) {
2215
			uprint("%s: netlink is down", name);
2216
			error(up->genbuf);
2217
		}
2218
		if (nl->dc == nil)
2219
			panic("netrdaoe: nl->dc == nil");
2220
		b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
2221
		if(b == nil) {
2222
			uprint("%s: nil read from network", name);
2223
			error(up->genbuf);
2224
		}
2225
		h = (Aoehdr*)b->rp;
2226
		if(h->verflag & AFrsp)
2227
			if(s = aoeerror(h)){
2228
				eventlog("%s: %s\n", nl->path, up->errstr);
2229
				errrsp(b, s);
2230
			}else
2231
				switch(h->cmd){
2232
				case ACata:
2233
					atarsp(b);
2234
					break;
2235
				case ACconfig:
2236
					qcfgrsp(b, nl);
2237
					break;
2238
				default:
2239
					if((h->cmd & 0xf0) == 0){
2240
						eventlog("%s: unknown cmd %d\n",
2241
							nl->path, h->cmd);
2242
						errrsp(b, "unknown command");
2243
					}
2244
					break;
2245
				}
2246
		freeb(b);
2247
	}
2248
}
2249
 
2250
static void
2251
getaddr(char *path, uchar *ea)
2252
{
2253
	int n;
2254
	char buf[2*Eaddrlen+1];
2255
	Chan *c;
2256
 
2257
	uprint("%s/addr", path);
2258
	c = namec(up->genbuf, Aopen, OREAD, 0);
2259
	if(waserror()) {
2260
		cclose(c);
2261
		nexterror();
2262
	}
2263
	if (c == nil)
2264
		panic("æ: getaddr: c == nil");
2265
	n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
2266
	poperror();
2267
	cclose(c);
2268
	buf[n] = 0;
2269
	if(parseether(ea, buf) < 0)
2270
		error("parseether failure");
2271
}
2272
 
2273
static void
2274
netbind(char *path)
2275
{
2276
	char addr[Maxpath];
2277
	uchar ea[2*Eaddrlen+1];
2278
	Chan *dc, *cc, *mtu;
2279
	Netlink *nl;
2280
 
2281
	snprint(addr, sizeof addr, "%s!%#x", path, Aoetype);
2282
	dc = chandial(addr, nil, nil, &cc);
2283
	snprint(addr, sizeof addr, "%s/mtu", path);
2284
	if(waserror())
2285
		mtu = nil;
2286
	else {
2287
		mtu = namec(addr, Aopen, OREAD, 0);
2288
		poperror();
2289
	}
2290
 
2291
	if(waserror()){
2292
		cclose(dc);
2293
		cclose(cc);
2294
		if(mtu)
2295
			cclose(mtu);
2296
		nexterror();
2297
	}
2298
	if(dc == nil  || cc == nil)
2299
		error(Enonexist);
2300
	getaddr(path, ea);
2301
	nl = addnet(path, cc, dc, mtu, ea);
2302
	snprint(addr, sizeof addr, "netrdaoe@%s", path);
2303
	kproc(addr, netrdaoeproc, nl);
2304
	poperror();
2305
}
2306
 
2307
static int
2308
unbound(void *v)
2309
{
2310
	return *(int*)v != 0;
2311
}
2312
 
2313
static void
2314
netunbind(char *path)
2315
{
2316
	int i, idx;
2317
	Aoedev *d, *p, *next;
2318
	Chan *dc, *cc;
2319
	Devlink *l;
2320
	Frame *f;
2321
	Netlink *n, *e;
2322
 
2323
	n = netlinks.nl;
2324
	e = n + nelem(netlinks.nl);
2325
 
2326
	lock(&netlinks);
2327
	for(; n < e; n++)
2328
		if(n->dc && strcmp(n->path, path) == 0)
2329
			break;
2330
	unlock(&netlinks);
2331
	if (n >= e)
2332
		error("device not bound");
2333
 
2334
	/*
2335
	 * hunt down devices using this interface; disable
2336
	 * this also terminates the reader.
2337
	 */
2338
	idx = n - netlinks.nl;
2339
	wlock(&devs);
2340
	for(d = devs.d; d; d = d->next){
2341
		qlock(d);
2342
		for(i = 0; i < d->ndl; i++){
2343
			l = d->dl + i;
2344
			if(l->nl == n)
2345
				l->flag &= ~Dup;
2346
		}
2347
		qunlock(d);
2348
	}
2349
	n->flag &= ~Dup;
2350
	wunlock(&devs);
2351
 
2352
	/* confirm reader is down. */
2353
	while(waserror())
2354
		;
2355
	sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
2356
	poperror();
2357
 
2358
	/* reschedule packets. */
2359
	wlock(&devs);
2360
	for(d = devs.d; d; d = d->next){
2361
		qlock(d);
2362
		for(i = 0; i < d->nframes; i++){
2363
			f = d->frames + i;
2364
			if(f->tag != Tfree && f->nl == n)
2365
				resend(d, f);
2366
		}
2367
		qunlock(d);
2368
	}
2369
	wunlock(&devs);
2370
 
2371
	/* squeeze devlink pool.  (we assert nobody is using them now) */
2372
	wlock(&devs);
2373
	for(d = devs.d; d; d = d->next){
2374
		qlock(d);
2375
		for(i = 0; i < d->ndl; i++){
2376
			l = d->dl + i;
2377
			if(l->nl == n)
2378
				memmove(l, l + 1, sizeof *l * (--d->ndl - i));
2379
		}
2380
		qunlock(d);
2381
	}
2382
	wunlock(&devs);
2383
 
2384
	/* close device link. */
2385
	lock(&netlinks);
2386
	dc = n->dc;
2387
	cc = n->cc;
2388
	if(n->mtu)
2389
		cclose(n->mtu);
2390
	memset(n, 0, sizeof *n);
2391
	unlock(&netlinks);
2392
 
2393
	cclose(dc);
2394
	cclose(cc);
2395
 
2396
	/* squeeze orphan devices */
2397
	wlock(&devs);
2398
	for(p = d = devs.d; d; d = next){
2399
		next = d->next;
2400
		if(d->ndl > 0) {
2401
			p = d;
2402
			continue;
2403
		}
2404
		qlock(d);
2405
		downdev(d, "orphan");
2406
		qunlock(d);
2407
		if(p != devs.d)
2408
			p->next = next;
2409
		else{
2410
			devs.d = next;
2411
			p = devs.d;
2412
		}
2413
		free(d->frames);
2414
		free(d);
2415
		dropunit();
2416
	}
2417
	wunlock(&devs);
2418
}
2419
 
2420
static void
2421
removeaoedev(Aoedev *d)
2422
{
2423
	int i;
2424
	Aoedev *p;
2425
 
2426
	wlock(&devs);
2427
	p = 0;
2428
	if(d != devs.d)
2429
		for(p = devs.d; p; p = p->next)
2430
			if(p->next == d)
2431
				break;
2432
	qlock(d);
2433
	d->flag &= ~Dup;
2434
 
2435
	/*
2436
	 * Changing the version number is, strictly speaking, correct,
2437
 	 * but doing so means that deleting a LUN that is not in use
2438
	 * invalidates all other LUNs too.  If your file server has
2439
	 * venti arenas or fossil file systems on 1.0, and you delete 1.1,
2440
	 * since you no longer need it, 1.0 will become inaccessible to your
2441
	 * file server, which will eventually panic.  Note that newdev()
2442
	 * does not change the version number.
2443
	 */
2444
	// newvers(d);
2445
 
2446
	d->ndl = 0;
2447
	qunlock(d);
2448
	for(i = 0; i < d->nframes; i++)
2449
		frameerror(d, d->frames+i, Eaoedown);
2450
 
2451
	if(p)
2452
		p->next = d->next;
2453
	else
2454
		devs.d = d->next;
2455
	free(d->frames);
2456
	free(d);
2457
	dropunit();
2458
	wunlock(&devs);
2459
}
2460
 
2461
static void
2462
removedev(char *name)
2463
{
2464
	Aoedev *d, *p;
2465
 
2466
	wlock(&devs);
2467
	for(p = d = devs.d; d; p = d, d = d->next)
2468
		if(strcmp(name, unitname(d)) == 0) {
2469
			wunlock(&devs);
2470
			removeaoedev(p);
2471
			return;
2472
		}
2473
	wunlock(&devs);
2474
	error("device not bound");
2475
}
2476
 
2477
static void
2478
discoverstr(char *f)
2479
{
2480
	ushort shelf, slot;
2481
	ulong sh;
2482
	char *s;
2483
 
2484
	if(f == 0){
2485
		discover(0xffff, 0xff);
2486
		return;
2487
	}
2488
 
2489
	shelf = sh = strtol(f, &s, 0);
2490
	if(s == f || sh > 0xffff)
2491
		error("bad shelf");
2492
	f = s;
2493
	if(*f++ == '.'){
2494
		slot = strtol(f, &s, 0);
2495
		if(s == f || slot > 0xff)
2496
			error("bad shelf");
2497
	}else
2498
		slot = 0xff;
2499
	discover(shelf, slot);
2500
}
2501
 
2502
 
2503
static void
2504
aoeremove(Chan *c)
2505
{
2506
	switch(TYPE(c->qid)){
2507
	default:
2508
		error(Eperm);
2509
	case Qunitdir:
2510
		removeaoedev(unit2dev(UNIT(c->qid)));
2511
		break;
2512
	}
2513
}
2514
 
2515
static long
2516
topctlwrite(void *db, long n)
2517
{
2518
	enum {
2519
		Autodiscover,
2520
		Bind,
2521
		Debug,
2522
		Discover,
2523
		Rediscover,
2524
		Remove,
2525
		Unbind,
2526
	};
2527
	char *f;
2528
	Cmdbuf *cb;
2529
	Cmdtab *ct;
2530
	static Cmdtab cmds[] = {
2531
		{ Autodiscover,	"autodiscover",	0	},
2532
		{ Bind, 	"bind", 	2	},
2533
		{ Debug, 	"debug", 	0	},
2534
		{ Discover, 	"discover", 	0	},
2535
		{ Rediscover,	"rediscover",	0	},
2536
		{ Remove,	"remove",	2	},
2537
		{ Unbind,	"unbind",	2	},
2538
	};
2539
 
2540
	cb = parsecmd(db, n);
2541
	if(waserror()){
2542
		free(cb);
2543
		nexterror();
2544
	}
2545
	ct = lookupcmd(cb, cmds, nelem(cmds));
2546
	f = cb->f[1];
2547
	switch(ct->index){
2548
	case Autodiscover:
2549
		autodiscover = toggle(f, autodiscover, 1);
2550
		break;
2551
	case Bind:
2552
		netbind(f);
2553
		break;
2554
	case Debug:
2555
		debug = toggle(f, debug, 1);
2556
		break;
2557
	case Discover:
2558
		discoverstr(f);
2559
		break;
2560
	case Rediscover:
2561
		rediscover = toggle(f, rediscover, 1);
2562
		break;
2563
	case Remove:
2564
		removedev(f);
2565
		break;
2566
	case Unbind:
2567
		netunbind(f);
2568
		break;
2569
	default:
2570
		cmderror(cb, "unknown aoe control message");
2571
	}
2572
	poperror();
2573
	free(cb);
2574
	return n;
2575
}
2576
 
2577
static long
2578
aoewrite(Chan *c, void *db, long n, vlong off)
2579
{
2580
	switch(TYPE(c->qid)){
2581
	default:
2582
	case Qzero:
2583
	case Qtopdir:
2584
	case Qunitdir:
2585
	case Qtoplog:
2586
		error(Eperm);
2587
	case Qtopctl:
2588
		return topctlwrite(db, n);
2589
	case Qctl:
2590
	case Qdata:
2591
	case Qconfig:
2592
	case Qident:
2593
		return unitwrite(c, db, n, off);
2594
	}
2595
}
2596
 
2597
Dev aoedevtab = {
2598
	L'æ',
2599
	"aoe",
2600
 
2601
	devreset,
2602
	devinit,
2603
	devshutdown,
2604
	aoeattach,
2605
	aoewalk,
2606
	aoestat,
2607
	aoeopen,
2608
	devcreate,
2609
	aoeclose,
2610
	aoeread,
2611
	devbread,
2612
	aoewrite,
2613
	devbwrite,
2614
	aoeremove,
2615
	devwstat,
2616
	devpower,
2617
	devconfig,
2618
};