Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * intel pci-express 10Gb ethernet driver for 8259[89]
3
 * copyright © 2007, coraid, inc.
4
 * depessimised and made to work on the 82599 at bell labs, 2013.
5
 *
6
 * 82599 requests should ideally not cross a 4KB (page) boundary.
7
 */
8
#include "u.h"
9
#include "../port/lib.h"
10
#include "mem.h"
11
#include "dat.h"
12
#include "fns.h"
13
#include "io.h"
14
#include "../port/error.h"
15
#include "../port/netif.h"
16
#include "etherif.h"
17
 
18
#define NEXTPOW2(x, m)	(((x)+1) & (m))
19
 
20
enum {
21
	Rbsz	= ETHERMAXTU+32, /* +slop is for vlan headers, crcs, etc. */
22
	Descalign= 128,		/* 599 manual needs 128-byte alignment */
23
 
24
	/* tunable parameters */
25
	Goslow	= 0,		/* flag: go slow by throttling intrs, etc. */
26
	/* were 256, 1024 & 64, but 30, 47 and 1 are ample. */
27
	Nrd	= 64,		/* multiple of 8, power of 2 for NEXTPOW2 */
28
	Nrb	= 128,
29
	Ntd	= 32,		/* multiple of 8, power of 2 for NEXTPOW2 */
30
};
31
 
32
enum {
33
	/* general */
34
	Ctrl		= 0x00000/4,	/* Device Control */
35
	Status		= 0x00008/4,	/* Device Status */
36
	Ctrlext		= 0x00018/4,	/* Extended Device Control */
37
	Esdp		= 0x00020/4,	/* extended sdp control */
38
	Esodp		= 0x00028/4,	/* extended od sdp control (i2cctl on 599) */
39
	Ledctl		= 0x00200/4,	/* led control */
40
	Tcptimer	= 0x0004c/4,	/* tcp timer */
41
	Ecc		= 0x110b0/4,	/* errata ecc control magic (pcie intr cause on 599) */
42
 
43
	/* nvm */
44
	Eec		= 0x10010/4,	/* eeprom/flash control */
45
	Eerd		= 0x10014/4,	/* eeprom read */
46
	Fla		= 0x1001c/4,	/* flash access */
47
	Flop		= 0x1013c/4,	/* flash opcode */
48
	Grc		= 0x10200/4,	/* general rx control */
49
 
50
	/* interrupt */
51
	Icr		= 0x00800/4,	/* interrupt cause read */
52
	Ics		= 0x00808/4,	/* " set */
53
	Ims		= 0x00880/4,	/* " mask read/set (actually enable) */
54
	Imc		= 0x00888/4,	/* " mask clear */
55
	Iac		= 0x00810/4,	/* " auto clear */
56
	Iam		= 0x00890/4,	/* " auto mask enable */
57
	Itr		= 0x00820/4,	/* " throttling rate regs (0-19) */
58
	Ivar		= 0x00900/4,	/* " vector allocation regs. */
59
	/* msi interrupt */
60
	Msixt		= 0x0000/4,	/* msix table (bar3) */
61
	Msipba		= 0x2000/4,	/* msix pending bit array (bar3) */
62
	Pbacl		= 0x11068/4,	/* pba clear */
63
	Gpie		= 0x00898/4,	/* general purpose int enable */
64
 
65
	/* flow control */
66
	Pfctop		= 0x03008/4,	/* priority flow ctl type opcode */
67
	Fcttv		= 0x03200/4,	/* " transmit timer value (0-3) */
68
	Fcrtl		= 0x03220/4,	/* " rx threshold low (0-7) +8n */
69
	Fcrth		= 0x03260/4,	/* " rx threshold high (0-7) +8n */
70
	Rcrtv		= 0x032a0/4,	/* " refresh value threshold */
71
	Tfcs		= 0x0ce00/4,	/* " tx status */
72
 
73
	/* rx dma */
74
	Rbal		= 0x01000/4,	/* rx desc base low (0-63) +0x40n */
75
	Rbah		= 0x01004/4,	/* " high */
76
	Rdlen		= 0x01008/4,	/* " length */
77
	Rdh		= 0x01010/4,	/* " head */
78
	Rdt		= 0x01018/4,	/* " tail */
79
	Rxdctl		= 0x01028/4,	/* " control */
80
 
81
	Srrctl		= 0x02100/4,	/* split & replication rx ctl. array */
82
	Dcarxctl	= 0x02200/4,	/* rx dca control */
83
	Rdrxctl		= 0x02f00/4,	/* rx dma control */
84
	Rxpbsize	= 0x03c00/4,	/* rx packet buffer size */
85
	Rxctl		= 0x03000/4,	/* rx control */
86
	Dropen		= 0x03d04/4,	/* drop enable control (598 only) */
87
 
88
	/* rx */
89
	Rxcsum		= 0x05000/4,	/* rx checksum control */
90
	Rfctl		= 0x05008/4,	/* rx filter control */
91
	Mta		= 0x05200/4,	/* multicast table array (0-127) */
92
	Ral98		= 0x05400/4,	/* rx address low (598) */
93
	Rah98		= 0x05404/4,
94
	Ral99		= 0x0a200/4,	/* rx address low array (599) */
95
	Rah99		= 0x0a204/4,
96
	Psrtype		= 0x05480/4,	/* packet split rx type. */
97
	Vfta		= 0x0a000/4,	/* vlan filter table array. */
98
	Fctrl		= 0x05080/4,	/* filter control */
99
	Vlnctrl		= 0x05088/4,	/* vlan control */
100
	Msctctrl	= 0x05090/4,	/* multicast control */
101
	Mrqc		= 0x05818/4,	/* multiple rx queues cmd */
102
	Vmdctl		= 0x0581c/4,	/* vmdq control (598 only) */
103
	Imir		= 0x05a80/4,	/* immediate irq rx (0-7) (598 only) */
104
	Imirext		= 0x05aa0/4,	/* immediate irq rx ext (598 only) */
105
	Imirvp		= 0x05ac0/4,	/* immediate irq vlan priority (598 only) */
106
	Reta		= 0x05c00/4,	/* redirection table */
107
	Rssrk		= 0x05c80/4,	/* rss random key */
108
 
109
	/* tx */
110
	Tdbal		= 0x06000/4,	/* tx desc base low +0x40n array */
111
	Tdbah		= 0x06004/4,	/* " high */
112
	Tdlen		= 0x06008/4,	/* " len */
113
	Tdh		= 0x06010/4,	/* " head */
114
	Tdt		= 0x06018/4,	/* " tail */
115
	Txdctl		= 0x06028/4,	/* " control */
116
	Tdwbal		= 0x06038/4,	/* " write-back address low */
117
	Tdwbah		= 0x0603c/4,
118
 
119
	Dtxctl98	= 0x07e00/4,	/* tx dma control (598 only) */
120
	Dtxctl99	= 0x04a80/4,	/* tx dma control (599 only) */
121
	Tdcatxctrl98	= 0x07200/4,	/* tx dca register (0-15) (598 only) */
122
	Tdcatxctrl99	= 0x0600c/4,	/* tx dca register (0-127) (599 only) */
123
	Tipg		= 0x0cb00/4,	/* tx inter-packet gap (598 only) */
124
	Txpbsize	= 0x0cc00/4,	/* tx packet-buffer size (0-15) */
125
 
126
	/* mac */
127
	Hlreg0		= 0x04240/4,	/* highlander control reg 0 */
128
	Hlreg1		= 0x04244/4,	/* highlander control reg 1 (ro) */
129
	Msca		= 0x0425c/4,	/* mdi signal cmd & addr */
130
	Msrwd		= 0x04260/4,	/* mdi single rw data */
131
	Mhadd		= 0x04268/4,	/* mac addr high & max frame */
132
	Pcss1		= 0x04288/4,	/* xgxs status 1 */
133
	Pcss2		= 0x0428c/4,
134
	Xpcss		= 0x04290/4,	/* 10gb-x pcs status */
135
	Serdesc		= 0x04298/4,	/* serdes control */
136
	Macs		= 0x0429c/4,	/* fifo control & report */
137
	Autoc		= 0x042a0/4,	/* autodetect control & status */
138
	Links		= 0x042a4/4,	/* link status */
139
	Links2		= 0x04324/4,	/* 599 only */
140
	Autoc2		= 0x042a8/4,
141
};
142
 
143
enum {
144
	Factive		= 1<<0,
145
	Enable		= 1<<31,
146
 
147
	/* Ctrl */
148
	Rst		= 1<<26,	/* full nic reset */
149
 
150
	/* Txdctl */
151
	Ten		= 1<<25,
152
 
153
	/* Dtxctl99 */
154
	Te		= 1<<0,		/* dma tx enable */
155
 
156
	/* Fctrl */
157
	Bam		= 1<<10,	/* broadcast accept mode */
158
	Upe 		= 1<<9,		/* unicast promiscuous */
159
	Mpe 		= 1<<8,		/* multicast promiscuous */
160
 
161
	/* Rxdctl */
162
	Pthresh		= 0,		/* prefresh threshold shift in bits */
163
	Hthresh		= 8,		/* host buffer minimum threshold " */
164
	Wthresh		= 16,		/* writeback threshold */
165
	Renable		= 1<<25,
166
 
167
	/* Rxctl */
168
	Rxen		= 1<<0,
169
	Dmbyps		= 1<<1,		/* descr. monitor bypass (598 only) */
170
 
171
	/* Rdrxctl */
172
	Rdmt½		= 0,		/* 598 */
173
	Rdmt¼		= 1,		/* 598 */
174
	Rdmt⅛		= 2,		/* 598 */
175
	Crcstrip	= 1<<1,		/* 599 */
176
	Rscfrstsize	= 037<<17,	/* 599; should be zero */
177
 
178
	/* Rxcsum */
179
	Ippcse		= 1<<12,	/* ip payload checksum enable */
180
 
181
	/* Eerd */
182
	EEstart		= 1<<0,		/* Start Read */
183
	EEdone		= 1<<1,		/* Read done */
184
 
185
	/* interrupts */
186
	Irx0		= 1<<0,		/* driver defined */
187
	Itx0		= 1<<1,		/* driver defined */
188
	Lsc		= 1<<20,	/* link status change */
189
 
190
	/* Links */
191
	Lnkup		= 1<<30,
192
	Lnkspd		= 1<<29,
193
 
194
	/* Hlreg0 */
195
	Txcrcen		= 1<<0,		/* add crc during xmit */
196
	Rxcrcstrip	= 1<<1,		/* strip crc during recv */
197
	Jumboen		= 1<<2,
198
	Txpaden		= 1<<10,	/* pad short frames during xmit */
199
 
200
	/* Autoc */
201
	Flu		= 1<<0,		/* force link up */
202
	Lmsshift	= 13,		/* link mode select shift */
203
	Lmsmask		= 7,
204
};
205
 
206
typedef struct Ctlr Ctlr;
207
typedef struct Rd Rd;
208
typedef struct Td Td;
209
 
210
typedef struct {
211
	uint	reg;
212
	char	*name;
213
} Stat;
214
 
215
Stat stattab[] = {
216
	0x4000,	"crc error",
217
	0x4004,	"illegal byte",
218
	0x4008,	"short packet",
219
	0x3fa0,	"missed pkt0",
220
	0x4034,	"mac local flt",
221
	0x4038,	"mac rmt flt",
222
	0x4040,	"rx length err",
223
	0x3f60,	"xon tx",
224
	0xcf60,	"xon rx",
225
	0x3f68,	"xoff tx",
226
	0xcf68,	"xoff rx",
227
	0x405c,	"rx 040",
228
	0x4060,	"rx 07f",
229
	0x4064,	"rx 100",
230
	0x4068,	"rx 200",
231
	0x406c,	"rx 3ff",
232
	0x4070,	"rx big",
233
	0x4074,	"rx ok",
234
	0x4078,	"rx bcast",
235
	0x3fc0,	"rx no buf0",
236
	0x40a4,	"rx runt",
237
	0x40a8,	"rx frag",
238
	0x40ac,	"rx ovrsz",
239
	0x40b0,	"rx jab",
240
	0x40d0,	"rx pkt",
241
 
242
	0x40d4,	"tx pkt",
243
	0x40d8,	"tx 040",
244
	0x40dc,	"tx 07f",
245
	0x40e0,	"tx 100",
246
	0x40e4,	"tx 200",
247
	0x40e8,	"tx 3ff",
248
	0x40ec,	"tx big",
249
	0x40f4,	"tx bcast",
250
	0x4120,	"xsum err",
251
};
252
 
253
/* status */
254
enum {
255
	Pif	= 1<<7,	/* past exact filter (sic) */
256
	Ipcs	= 1<<6,	/* ip checksum calculated */
257
	L4cs	= 1<<5,	/* layer 2 */
258
	Tcpcs	= 1<<4,	/* tcp checksum calculated */
259
	Vp	= 1<<3,	/* 802.1q packet matched vet */
260
	Ixsm	= 1<<2,	/* ignore checksum */
261
	Reop	= 1<<1,	/* end of packet */
262
	Rdd	= 1<<0,	/* descriptor done */
263
};
264
 
265
struct Rd {			/* Receive Descriptor */
266
	u32int	addr[2];
267
	ushort	length;
268
	ushort	cksum;
269
	uchar	status;
270
	uchar	errors;
271
	ushort	vlan;
272
};
273
 
274
enum {
275
	/* Td cmd */
276
	Rs	= 1<<3,		/* report status */
277
	Ic	= 1<<2,		/* insert checksum */
278
	Ifcs	= 1<<1,		/* insert FCS (ethernet crc) */
279
	Teop	= 1<<0,		/* end of packet */
280
 
281
	/* Td status */
282
	Tdd	= 1<<0,		/* descriptor done */
283
};
284
 
285
struct Td {			/* Transmit Descriptor */
286
	u32int	addr[2];
287
	ushort	length;
288
	uchar	cso;
289
	uchar	cmd;
290
	uchar	status;
291
	uchar	css;
292
	ushort	vlan;
293
};
294
 
295
struct Ctlr {
296
	Pcidev	*p;
297
	Ether	*edev;
298
	int	type;
299
 
300
	/* virtual */
301
	u32int	*reg;
302
	u32int	*msix;			/* unused */
303
 
304
	/* physical */
305
	u32int	*physreg;
306
	u32int	*physmsix;		/* unused */
307
 
308
	uchar	flag;
309
	int	nrd;
310
	int	ntd;
311
	int	nrb;			/* # bufs this Ctlr has in the pool */
312
	uint	rbsz;
313
	int	procsrunning;
314
	int	attached;
315
 
316
	Watermark wmrb;
317
	Watermark wmrd;
318
	Watermark wmtd;
319
 
320
	QLock	slock;
321
	QLock	alock;			/* attach lock */
322
	QLock	tlock;
323
	Rendez	lrendez;
324
	Rendez	trendez;
325
	Rendez	rrendez;
326
 
327
	uint	im;			/* interrupt mask */
328
	uint	lim;
329
	uint	rim;
330
	uint	tim;
331
	Lock	imlock;
332
 
333
	Rd*	rdba;			/* receive descriptor base address */
334
	Block**	rb;			/* receive buffers */
335
	int	rdt;			/* receive descriptor tail */
336
	int	rdfree;			/* rx descriptors awaiting packets */
337
 
338
	Td*	tdba;			/* transmit descriptor base address */
339
	int	tdh;			/* transmit descriptor head */
340
	int	tdt;			/* transmit descriptor tail */
341
	Block**	tb;			/* transmit buffers */
342
 
343
	uchar	ra[Eaddrlen];		/* receive address */
344
	uchar	mta[128];		/* multicast table array */
345
	ulong	stats[nelem(stattab)];
346
	uint	speeds[3];
347
};
348
 
349
enum {
350
	I82598 = 1,
351
	I82599,
352
};
353
 
354
static	Ctlr	*ctlrtab[4];
355
static	int	nctlr;
356
static	Lock	rblock;
357
static	Block	*rbpool;
358
static	int	nrbfull;  /* # of rcv Blocks with data awaiting processing */
359
 
360
static void
361
readstats(Ctlr *ctlr)
362
{
363
	int i;
364
 
365
	qlock(&ctlr->slock);
366
	for(i = 0; i < nelem(ctlr->stats); i++)
367
		ctlr->stats[i] += ctlr->reg[stattab[i].reg >> 2];
368
	qunlock(&ctlr->slock);
369
}
370
 
371
static int speedtab[] = {
372
	0,
373
	1000,
374
	10000,
375
};
376
 
377
static long
378
ifstat(Ether *edev, void *a, long n, ulong offset)
379
{
380
	uint i, *t;
381
	char *s, *p, *e;
382
	Ctlr *ctlr;
383
 
384
	ctlr = edev->ctlr;
385
	p = s = malloc(READSTR);
386
	if(p == nil)
387
		error(Enomem);
388
	e = p + READSTR;
389
 
390
	readstats(ctlr);
391
	for(i = 0; i < nelem(stattab); i++)
392
		if(ctlr->stats[i] > 0)
393
			p = seprint(p, e, "%.10s  %uld\n", stattab[i].name,
394
				ctlr->stats[i]);
395
	t = ctlr->speeds;
396
	p = seprint(p, e, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
397
	p = seprint(p, e, "mtu: min:%d max:%d\n", edev->minmtu, edev->maxmtu);
398
	p = seprint(p, e, "rdfree %d rdh %d rdt %d\n", ctlr->rdfree, ctlr->reg[Rdt],
399
		ctlr->reg[Rdh]);
400
	p = seprintmark(p, e, &ctlr->wmrb);
401
	p = seprintmark(p, e, &ctlr->wmrd);
402
	p = seprintmark(p, e, &ctlr->wmtd);
403
	USED(p);
404
	n = readstr(offset, a, n, s);
405
	free(s);
406
 
407
	return n;
408
}
409
 
410
static void
411
ienable(Ctlr *ctlr, int i)
412
{
413
	ilock(&ctlr->imlock);
414
	ctlr->im |= i;
415
	ctlr->reg[Ims] = ctlr->im;
416
	iunlock(&ctlr->imlock);
417
}
418
 
419
static int
420
lim(void *v)
421
{
422
	return ((Ctlr*)v)->lim != 0;
423
}
424
 
425
static void
426
lproc(void *v)
427
{
428
	int r, i;
429
	Ctlr *ctlr;
430
	Ether *e;
431
 
432
	e = v;
433
	ctlr = e->ctlr;
434
	for (;;) {
435
		r = ctlr->reg[Links];
436
		e->link = (r & Lnkup) != 0;
437
		i = 0;
438
		if(e->link)
439
			i = 1 + ((r & Lnkspd) != 0);
440
		ctlr->speeds[i]++;
441
		e->mbps = speedtab[i];
442
		ctlr->lim = 0;
443
		ienable(ctlr, Lsc);
444
		sleep(&ctlr->lrendez, lim, ctlr);
445
		ctlr->lim = 0;
446
	}
447
}
448
 
449
static long
450
ctl(Ether *, void *, long)
451
{
452
	error(Ebadarg);
453
	return -1;
454
}
455
 
456
static Block*
457
rballoc(void)
458
{
459
	Block *bp;
460
 
461
	ilock(&rblock);
462
	if((bp = rbpool) != nil){
463
		rbpool = bp->next;
464
		bp->next = 0;
465
		_xinc(&bp->ref);	/* prevent bp from being freed */
466
	}
467
	iunlock(&rblock);
468
	return bp;
469
}
470
 
471
void
472
rbfree(Block *b)
473
{
474
	b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
475
 	b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
476
	ilock(&rblock);
477
	b->next = rbpool;
478
	rbpool = b;
479
	nrbfull--;
480
	iunlock(&rblock);
481
}
482
 
483
static int
484
cleanup(Ctlr *ctlr, int tdh)
485
{
486
	Block *b;
487
	uint m, n;
488
 
489
	m = ctlr->ntd - 1;
490
	while(ctlr->tdba[n = NEXTPOW2(tdh, m)].status & Tdd){
491
		tdh = n;
492
		b = ctlr->tb[tdh];
493
		ctlr->tb[tdh] = 0;
494
		if (b)
495
			freeb(b);
496
		ctlr->tdba[tdh].status = 0;
497
	}
498
	return tdh;
499
}
500
 
501
void
502
transmit(Ether *e)
503
{
504
	uint i, m, tdt, tdh;
505
	Ctlr *ctlr;
506
	Block *b;
507
	Td *t;
508
 
509
	ctlr = e->ctlr;
510
	if(!canqlock(&ctlr->tlock)){
511
		ienable(ctlr, Itx0);
512
		return;
513
	}
514
	tdh = ctlr->tdh = cleanup(ctlr, ctlr->tdh);
515
	tdt = ctlr->tdt;
516
	m = ctlr->ntd - 1;
517
	for(i = 0; ; i++){
518
		if(NEXTPOW2(tdt, m) == tdh){	/* ring full? */
519
			ienable(ctlr, Itx0);
520
			break;
521
		}
522
		if((b = qget(e->oq)) == nil)
523
			break;
524
		assert(ctlr->tdba != nil);
525
		t = ctlr->tdba + tdt;
526
		t->addr[0] = PCIWADDR(b->rp);
527
		t->length = BLEN(b);
528
		t->cmd = Ifcs | Teop;
529
		if (!Goslow)
530
			t->cmd |= Rs;
531
		ctlr->tb[tdt] = b;
532
		/* note size of queue of tds awaiting transmission */
533
		notemark(&ctlr->wmtd, (tdt + Ntd - tdh) % Ntd);
534
		tdt = NEXTPOW2(tdt, m);
535
	}
536
	if(i) {
537
		coherence();
538
		ctlr->reg[Tdt] = ctlr->tdt = tdt;  /* make new Tds active */
539
		coherence();
540
		ienable(ctlr, Itx0);
541
	}
542
	qunlock(&ctlr->tlock);
543
}
544
 
545
static int
546
tim(void *c)
547
{
548
	return ((Ctlr*)c)->tim != 0;
549
}
550
 
551
static void
552
tproc(void *v)
553
{
554
	Ctlr *ctlr;
555
	Ether *e;
556
 
557
	e = v;
558
	ctlr = e->ctlr;
559
	for (;;) {
560
		sleep(&ctlr->trendez, tim, ctlr); /* xmit interrupt kicks us */
561
		ctlr->tim = 0;
562
		transmit(e);
563
	}
564
}
565
 
566
static void
567
rxinit(Ctlr *ctlr)
568
{
569
	int i, is598, autoc;
570
	ulong until;
571
	Block *b;
572
 
573
	ctlr->reg[Rxctl] &= ~Rxen;
574
	ctlr->reg[Rxdctl] = 0;
575
	for(i = 0; i < ctlr->nrd; i++){
576
		b = ctlr->rb[i];
577
		ctlr->rb[i] = 0;
578
		if(b)
579
			freeb(b);
580
	}
581
	ctlr->rdfree = 0;
582
 
583
	coherence();
584
	ctlr->reg[Fctrl] |= Bam;
585
	ctlr->reg[Fctrl] &= ~(Upe | Mpe);
586
 
587
	/* intel gets some csums wrong (e.g., errata 44) */
588
	ctlr->reg[Rxcsum] &= ~Ippcse;
589
	ctlr->reg[Hlreg0] &= ~Jumboen;		/* jumbos are a bad idea */
590
	ctlr->reg[Hlreg0] |= Txcrcen | Rxcrcstrip | Txpaden;
591
	ctlr->reg[Srrctl] = (ctlr->rbsz + 1024 - 1) / 1024;
592
	ctlr->reg[Mhadd] = ctlr->rbsz << 16;
593
 
594
	ctlr->reg[Rbal] = PCIWADDR(ctlr->rdba);
595
	ctlr->reg[Rbah] = 0;
596
	ctlr->reg[Rdlen] = ctlr->nrd*sizeof(Rd); /* must be multiple of 128 */
597
	ctlr->reg[Rdh] = 0;
598
	ctlr->reg[Rdt] = ctlr->rdt = 0;
599
	coherence();
600
 
601
	is598 = (ctlr->type == I82598);
602
	if (is598)
603
		ctlr->reg[Rdrxctl] = Rdmt¼;
604
	else {
605
		ctlr->reg[Rdrxctl] |= Crcstrip;
606
		ctlr->reg[Rdrxctl] &= ~Rscfrstsize;
607
	}
608
	if (Goslow && is598)
609
		ctlr->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
610
	else
611
		ctlr->reg[Rxdctl] = Renable;
612
	coherence();
613
 
614
	/*
615
	 * don't wait forever like an idiot (and hang the system),
616
	 * maybe it's disconnected.
617
	 */
618
	until = TK2MS(MACHP(0)->ticks) + 250;
619
	while (!(ctlr->reg[Rxdctl] & Renable) && TK2MS(MACHP(0)->ticks) < until)
620
		;
621
	if(!(ctlr->reg[Rxdctl] & Renable))
622
		print("#l%d: Renable didn't come on, might be disconnected\n",
623
			ctlr->edev->ctlrno);
624
 
625
	ctlr->reg[Rxctl] |= Rxen | (is598? Dmbyps: 0);
626
 
627
	if (is598){
628
		autoc = ctlr->reg[Autoc];
629
		/* what is this rubbish and why do we care? */
630
		print("#l%d: autoc %#ux; lms %d (3 is 10g sfp)\n",
631
			ctlr->edev->ctlrno, autoc, (autoc>>Lmsshift) & Lmsmask);
632
		ctlr->reg[Autoc] |= Flu;
633
		coherence();
634
		delay(50);
635
	}
636
}
637
 
638
static void
639
replenish(Ctlr *ctlr, uint rdh)
640
{
641
	int rdt, m, i;
642
	Block *b;
643
	Rd *r;
644
 
645
	m = ctlr->nrd - 1;
646
	i = 0;
647
	for(rdt = ctlr->rdt; NEXTPOW2(rdt, m) != rdh; rdt = NEXTPOW2(rdt, m)){
648
		r = ctlr->rdba + rdt;
649
		if((b = rballoc()) == nil){
650
			print("#l%d: no buffers\n", ctlr->edev->ctlrno);
651
			break;
652
		}
653
		ctlr->rb[rdt] = b;
654
		r->addr[0] = PCIWADDR(b->rp);
655
		r->status = 0;
656
		ctlr->rdfree++;
657
		i++;
658
	}
659
	if(i) {
660
		coherence();
661
		ctlr->reg[Rdt] = ctlr->rdt = rdt; /* hand back recycled rdescs */
662
		coherence();
663
	}
664
}
665
 
666
static int
667
rim(void *v)
668
{
669
	return ((Ctlr*)v)->rim != 0;
670
}
671
 
672
void
673
rproc(void *v)
674
{
675
	int passed;
676
	uint m, rdh;
677
	Block *bp;
678
	Ctlr *ctlr;
679
	Ether *e;
680
	Rd *r;
681
 
682
	e = v;
683
	ctlr = e->ctlr;
684
	m = ctlr->nrd - 1;
685
	for (rdh = 0; ; ) {
686
		replenish(ctlr, rdh);
687
		ienable(ctlr, Irx0);
688
		sleep(&ctlr->rrendez, rim, ctlr);
689
		passed = 0;
690
		for (;;) {
691
			ctlr->rim = 0;
692
			r = ctlr->rdba + rdh;
693
			if(!(r->status & Rdd))
694
				break;		/* wait for pkts to arrive */
695
			bp = ctlr->rb[rdh];
696
			ctlr->rb[rdh] = 0;
697
			if (r->length > ETHERMAXTU)
698
				print("#l%d: got jumbo of %d bytes\n",
699
					e->ctlrno, r->length);
700
			bp->wp += r->length;
701
			bp->lim = bp->wp;		/* lie like a dog */
702
//			r->status = 0;
703
 
704
			ilock(&rblock);
705
			nrbfull++;
706
			iunlock(&rblock);
707
			notemark(&ctlr->wmrb, nrbfull);
708
			etheriq(e, bp, 1);
709
 
710
			passed++;
711
			ctlr->rdfree--;
712
			rdh = NEXTPOW2(rdh, m);
713
			if (ctlr->rdfree <= ctlr->nrd - 16)
714
				replenish(ctlr, rdh);
715
		}
716
		/* note how many rds had full buffers */
717
		notemark(&ctlr->wmrd, passed);
718
	}
719
}
720
 
721
static void
722
promiscuous(void *a, int on)
723
{
724
	Ctlr *ctlr;
725
	Ether *e;
726
 
727
	e = a;
728
	ctlr = e->ctlr;
729
	if(on)
730
		ctlr->reg[Fctrl] |= Upe | Mpe;
731
	else
732
		ctlr->reg[Fctrl] &= ~(Upe | Mpe);
733
}
734
 
735
static void
736
multicast(void *a, uchar *ea, int on)
737
{
738
	int b, i;
739
	Ctlr *ctlr;
740
	Ether *e;
741
 
742
	e = a;
743
	ctlr = e->ctlr;
744
 
745
	/*
746
	 * multiple ether addresses can hash to the same filter bit,
747
	 * so it's never safe to clear a filter bit.
748
	 * if we want to clear filter bits, we need to keep track of
749
	 * all the multicast addresses in use, clear all the filter bits,
750
	 * then set the ones corresponding to in-use addresses.
751
	 */
752
	i = ea[5] >> 1;
753
	b = (ea[5]&1)<<4 | ea[4]>>4;
754
	b = 1 << b;
755
	if(on)
756
		ctlr->mta[i] |= b;
757
//	else
758
//		ctlr->mta[i] &= ~b;
759
	ctlr->reg[Mta+i] = ctlr->mta[i];
760
}
761
 
762
static void
763
freemem(Ctlr *ctlr)
764
{
765
	Block *b;
766
 
767
	while(b = rballoc()){
768
		b->free = 0;
769
		freeb(b);
770
	}
771
	free(ctlr->rdba);
772
	ctlr->rdba = nil;
773
	free(ctlr->tdba);
774
	ctlr->tdba = nil;
775
	free(ctlr->rb);
776
	ctlr->rb = nil;
777
	free(ctlr->tb);
778
	ctlr->tb = nil;
779
}
780
 
781
static int
782
detach(Ctlr *ctlr)
783
{
784
	int i, is598;
785
 
786
	ctlr->reg[Imc] = ~0;
787
	ctlr->reg[Ctrl] |= Rst;
788
	for(i = 0; i < 100; i++){
789
		delay(1);
790
		if((ctlr->reg[Ctrl] & Rst) == 0)
791
			break;
792
	}
793
	if (i >= 100)
794
		return -1;
795
	is598 = (ctlr->type == I82598);
796
	if (is598) {			/* errata */
797
		delay(50);
798
		ctlr->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
799
	}
800
 
801
	/* not cleared by reset; kill it manually. */
802
	for(i = 1; i < 16; i++)
803
		ctlr->reg[is598? Rah98: Rah99] &= ~Enable;
804
	for(i = 0; i < 128; i++)
805
		ctlr->reg[Mta + i] = 0;
806
	for(i = 1; i < (is598? 640: 128); i++)
807
		ctlr->reg[Vfta + i] = 0;
808
 
809
//	freemem(ctlr);			// TODO
810
	ctlr->attached = 0;
811
	return 0;
812
}
813
 
814
static void
815
shutdown(Ether *e)
816
{
817
	detach(e->ctlr);
818
//	freemem(e->ctlr);
819
}
820
 
821
/* ≤ 20ms */
822
static ushort
823
eeread(Ctlr *ctlr, int i)
824
{
825
	ctlr->reg[Eerd] = EEstart | i<<2;
826
	while((ctlr->reg[Eerd] & EEdone) == 0)
827
		;
828
	return ctlr->reg[Eerd] >> 16;
829
}
830
 
831
static int
832
eeload(Ctlr *ctlr)
833
{
834
	ushort u, v, p, l, i, j;
835
 
836
	if((eeread(ctlr, 0) & 0xc0) != 0x40)
837
		return -1;
838
	u = 0;
839
	for(i = 0; i < 0x40; i++)
840
		u +=  eeread(ctlr, i);
841
	for(i = 3; i < 0xf; i++){
842
		p = eeread(ctlr, i);
843
		l = eeread(ctlr, p++);
844
		if((int)p + l + 1 > 0xffff)
845
			continue;
846
		for(j = p; j < p + l; j++)
847
			u += eeread(ctlr, j);
848
	}
849
	if(u != 0xbaba)
850
		return -1;
851
	if(ctlr->reg[Status] & (1<<3))
852
		u = eeread(ctlr, 10);
853
	else
854
		u = eeread(ctlr, 9);
855
	u++;
856
	for(i = 0; i < Eaddrlen;){
857
		v = eeread(ctlr, u + i/2);
858
		ctlr->ra[i++] = v;
859
		ctlr->ra[i++] = v>>8;
860
	}
861
	ctlr->ra[5] += (ctlr->reg[Status] & 0xc) >> 2;
862
	return 0;
863
}
864
 
865
static int
866
reset(Ctlr *ctlr)
867
{
868
	int i, is598;
869
	uchar *p;
870
 
871
	if(detach(ctlr)){
872
		print("82598: reset timeout\n");
873
		return -1;
874
	}
875
	if(eeload(ctlr)){
876
		print("82598: eeprom failure\n");
877
		return -1;
878
	}
879
	p = ctlr->ra;
880
	is598 = (ctlr->type == I82598);
881
	ctlr->reg[is598? Ral98: Ral99] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
882
	ctlr->reg[is598? Rah98: Rah99] = p[5]<<8 | p[4] | Enable;
883
 
884
	readstats(ctlr);
885
	for(i = 0; i<nelem(ctlr->stats); i++)
886
		ctlr->stats[i] = 0;
887
 
888
	ctlr->reg[Ctrlext] |= 1 << 16;	/* required by errata (spec change 4) */
889
	if (Goslow) {
890
		/* make some guesses for flow control */
891
		ctlr->reg[Fcrtl] = 0x10000 | Enable;
892
		ctlr->reg[Fcrth] = 0x40000 | Enable;
893
		ctlr->reg[Rcrtv] = 0x6000;
894
	} else
895
		ctlr->reg[Fcrtl] = ctlr->reg[Fcrth] = ctlr->reg[Rcrtv] = 0;
896
 
897
	/* configure interrupt mapping (don't ask) */
898
	ctlr->reg[Ivar+0] =     0 | 1<<7;
899
	ctlr->reg[Ivar+64/4] =  1 | 1<<7;
900
//	ctlr->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
901
 
902
	if (Goslow) {
903
		/* interrupt throttling goes here. */
904
		for(i = Itr; i < Itr + 20; i++)
905
			ctlr->reg[i] = 128;		/* ¼µs intervals */
906
		ctlr->reg[Itr + Itx0] = 256;
907
	} else {					/* don't throttle */
908
		for(i = Itr; i < Itr + 20; i++)
909
			ctlr->reg[i] = 0;		/* ¼µs intervals */
910
		ctlr->reg[Itr + Itx0] = 0;
911
	}
912
	return 0;
913
}
914
 
915
static void
916
txinit(Ctlr *ctlr)
917
{
918
	Block *b;
919
	int i;
920
 
921
	if (Goslow)
922
		ctlr->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
923
	else
924
		ctlr->reg[Txdctl] = 0;
925
	if (ctlr->type == I82599)
926
		ctlr->reg[Dtxctl99] = 0;
927
	coherence();
928
	for(i = 0; i < ctlr->ntd; i++){
929
		b = ctlr->tb[i];
930
		ctlr->tb[i] = 0;
931
		if(b)
932
			freeb(b);
933
	}
934
 
935
	assert(ctlr->tdba != nil);
936
	memset(ctlr->tdba, 0, ctlr->ntd * sizeof(Td));
937
	ctlr->reg[Tdbal] = PCIWADDR(ctlr->tdba);
938
	ctlr->reg[Tdbah] = 0;
939
	ctlr->reg[Tdlen] = ctlr->ntd*sizeof(Td); /* must be multiple of 128 */
940
	ctlr->reg[Tdh] = 0;
941
	ctlr->tdh = ctlr->ntd - 1;
942
	ctlr->reg[Tdt] = ctlr->tdt = 0;
943
	coherence();
944
	if (ctlr->type == I82599)
945
		ctlr->reg[Dtxctl99] |= Te;
946
	coherence();
947
	ctlr->reg[Txdctl] |= Ten;
948
	coherence();
949
	while (!(ctlr->reg[Txdctl] & Ten))
950
		;
951
}
952
 
953
static void
954
attach(Ether *e)
955
{
956
	Block *b;
957
	Ctlr *ctlr;
958
	char buf[KNAMELEN];
959
 
960
	ctlr = e->ctlr;
961
	ctlr->edev = e;			/* point back to Ether* */
962
	qlock(&ctlr->alock);
963
	if(waserror()){
964
		reset(ctlr);
965
		freemem(ctlr);
966
		qunlock(&ctlr->alock);
967
		nexterror();
968
	}
969
	if(ctlr->rdba == nil) {
970
		ctlr->nrd = Nrd;
971
		ctlr->ntd = Ntd;
972
		ctlr->rdba = mallocalign(ctlr->nrd * sizeof *ctlr->rdba,
973
			Descalign, 0, 0);
974
		ctlr->tdba = mallocalign(ctlr->ntd * sizeof *ctlr->tdba,
975
			Descalign, 0, 0);
976
		ctlr->rb = malloc(ctlr->nrd * sizeof(Block *));
977
		ctlr->tb = malloc(ctlr->ntd * sizeof(Block *));
978
		if (ctlr->rdba == nil || ctlr->tdba == nil ||
979
		    ctlr->rb == nil || ctlr->tb == nil)
980
			error(Enomem);
981
 
982
		for(ctlr->nrb = 0; ctlr->nrb < 2*Nrb; ctlr->nrb++){
983
			b = allocb(ctlr->rbsz + BY2PG);	/* see rbfree() */
984
			if(b == nil)
985
				error(Enomem);
986
			b->free = rbfree;
987
			freeb(b);
988
		}
989
	}
990
	if (!ctlr->attached) {
991
		rxinit(ctlr);
992
		txinit(ctlr);
993
		nrbfull = 0;
994
		if (!ctlr->procsrunning) {
995
			snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
996
			kproc(buf, lproc, e);
997
			snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
998
			kproc(buf, rproc, e);
999
			snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
1000
			kproc(buf, tproc, e);
1001
			ctlr->procsrunning = 1;
1002
		}
1003
		initmark(&ctlr->wmrb, Nrb, "rcv bufs unprocessed");
1004
		initmark(&ctlr->wmrd, Nrd-1, "rcv descrs processed at once");
1005
		initmark(&ctlr->wmtd, Ntd-1, "xmit descr queue len");
1006
		ctlr->attached = 1;
1007
	}
1008
	qunlock(&ctlr->alock);
1009
	poperror();
1010
}
1011
 
1012
static void
1013
interrupt(Ureg*, void *v)
1014
{
1015
	int icr, im;
1016
	Ctlr *ctlr;
1017
	Ether *e;
1018
 
1019
	e = v;
1020
	ctlr = e->ctlr;
1021
	ilock(&ctlr->imlock);
1022
	ctlr->reg[Imc] = ~0;			/* disable all intrs */
1023
	im = ctlr->im;
1024
	while((icr = ctlr->reg[Icr] & ctlr->im) != 0){
1025
		if(icr & Irx0){
1026
			im &= ~Irx0;
1027
			ctlr->rim = Irx0;
1028
			wakeup(&ctlr->rrendez);
1029
		}
1030
		if(icr & Itx0){
1031
			im &= ~Itx0;
1032
			ctlr->tim = Itx0;
1033
			wakeup(&ctlr->trendez);
1034
		}
1035
		if(icr & Lsc){
1036
			im &= ~Lsc;
1037
			ctlr->lim = Lsc;
1038
			wakeup(&ctlr->lrendez);
1039
		}
1040
	}
1041
	ctlr->reg[Ims] = ctlr->im = im; /* enable only intrs we didn't service */
1042
	iunlock(&ctlr->imlock);
1043
}
1044
 
1045
static void
1046
scan(void)
1047
{
1048
	int pciregs, pcimsix, type;
1049
	ulong io, iomsi;
1050
	void *mem, *memmsi;
1051
	Ctlr *ctlr;
1052
	Pcidev *p;
1053
 
1054
	p = 0;
1055
	while(p = pcimatch(p, Vintel, 0)){
1056
		switch(p->did){
1057
		case 0x10b6:		/* 82598 backplane */
1058
		case 0x10c6:		/* 82598 af dual port */
1059
		case 0x10c7:		/* 82598 af single port */
1060
		case 0x10dd:		/* 82598 at cx4 */
1061
		case 0x10ec:		/* 82598 at cx4 dual port */
1062
			pcimsix = 3;
1063
			type = I82598;
1064
			break;
1065
		case 0x10f7:		/* 82599 kx/kx4 */
1066
		case 0x10f8:		/* 82599 kx/kx4/kx */
1067
		case 0x10f9:		/* 82599 cx4 */
1068
		case 0x10fb:		/* 82599 sfi/sfp+ */
1069
		case 0x10fc:		/* 82599 xaui/bx4 */
1070
		case 0x1557:		/* 82599 single-port sfi */
1071
			pcimsix = 4;
1072
			type = I82599;
1073
			break;
1074
		default:
1075
			continue;
1076
		}
1077
		pciregs = 0;
1078
		if(nctlr >= nelem(ctlrtab)){
1079
			print("i82598: too many controllers\n");
1080
			return;
1081
		}
1082
 
1083
		io = p->mem[pciregs].bar & ~0xf;
1084
		mem = vmap(io, p->mem[pciregs].size);
1085
		if(mem == nil){
1086
			print("i82598: can't map regs %#p\n",
1087
				p->mem[pciregs].bar);
1088
			continue;
1089
		}
1090
 
1091
		iomsi = p->mem[pcimsix].bar & ~0xf;
1092
		memmsi = vmap(iomsi, p->mem[pcimsix].size);
1093
		if(memmsi == nil){
1094
			print("i82598: can't map msi-x regs %#p\n",
1095
				p->mem[pcimsix].bar);
1096
			vunmap(mem, p->mem[pciregs].size);
1097
			continue;
1098
		}
1099
 
1100
		ctlr = malloc(sizeof *ctlr);
1101
		if(ctlr == nil) {
1102
			vunmap(mem, p->mem[pciregs].size);
1103
			vunmap(memmsi, p->mem[pcimsix].size);
1104
			error(Enomem);
1105
		}
1106
		ctlr->p = p;
1107
		ctlr->type = type;
1108
		ctlr->physreg = (u32int*)io;
1109
		ctlr->physmsix = (u32int*)iomsi;
1110
		ctlr->reg = (u32int*)mem;
1111
		ctlr->msix = (u32int*)memmsi;	/* unused */
1112
		ctlr->rbsz = Rbsz;
1113
		if(reset(ctlr)){
1114
			print("i82598: can't reset\n");
1115
			free(ctlr);
1116
			vunmap(mem, p->mem[pciregs].size);
1117
			vunmap(memmsi, p->mem[pcimsix].size);
1118
			continue;
1119
		}
1120
		pcisetbme(p);
1121
		ctlrtab[nctlr++] = ctlr;
1122
	}
1123
}
1124
 
1125
static int
1126
pnp(Ether *e)
1127
{
1128
	int i;
1129
	Ctlr *ctlr;
1130
 
1131
	if(nctlr == 0)
1132
		scan();
1133
	ctlr = nil;
1134
	for(i = 0; i < nctlr; i++){
1135
		ctlr = ctlrtab[i];
1136
		if(ctlr == nil || ctlr->flag & Factive)
1137
			continue;
1138
		if(e->port == 0 || e->port == (ulong)ctlr->reg)
1139
			break;
1140
	}
1141
	if (i >= nctlr)
1142
		return -1;
1143
	ctlr->flag |= Factive;
1144
	e->ctlr = ctlr;
1145
	e->port = (uintptr)ctlr->physreg;
1146
	e->irq = ctlr->p->intl;
1147
	e->tbdf = ctlr->p->tbdf;
1148
	e->mbps = 10000;
1149
	e->maxmtu = ETHERMAXTU;
1150
	memmove(e->ea, ctlr->ra, Eaddrlen);
1151
 
1152
	e->arg = e;
1153
	e->attach = attach;
1154
	e->detach = shutdown;
1155
	e->transmit = transmit;
1156
	e->interrupt = interrupt;
1157
	e->ifstat = ifstat;
1158
	e->shutdown = shutdown;
1159
	e->ctl = ctl;
1160
	e->multicast = multicast;
1161
	e->promiscuous = promiscuous;
1162
 
1163
	return 0;
1164
}
1165
 
1166
void
1167
ether82598link(void)
1168
{
1169
	addethercard("i82598", pnp);
1170
	addethercard("i10gbe", pnp);
1171
}