Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * IPv4 Ethernet bridge
3
 */
4
#include "u.h"
5
#include "../port/lib.h"
6
#include "mem.h"
7
#include "dat.h"
8
#include "fns.h"
9
#include "../ip/ip.h"
10
#include "../port/netif.h"
11
#include "../port/error.h"
12
 
13
typedef struct Bridge 	Bridge;
14
typedef struct Port 	Port;
15
typedef struct Centry	Centry;
16
typedef struct Iphdr	Iphdr;
17
typedef struct Tcphdr	Tcphdr;
18
 
19
enum
20
{
21
	Qtopdir=	1,		/* top level directory */
22
 
23
	Qbridgedir,			/* bridge* directory */
24
	Qbctl,
25
	Qstats,
26
	Qcache,
27
	Qlog,
28
 
29
	Qportdir,			/* directory for a protocol */
30
	Qpctl,
31
	Qlocal,
32
	Qstatus,
33
 
34
	MaxQ,
35
 
36
	Maxbridge=	4,
37
	Maxport=	128,		// power of 2
38
	CacheHash=	257,		// prime
39
	CacheLook=	5,		// how many cache entries to examine
40
	CacheSize=	(CacheHash+CacheLook-1),
41
	CacheTimeout=	5*60,		// timeout for cache entry in seconds
42
 
43
	TcpMssMax = 1300,		// max desirable Tcp MSS value
44
	TunnelMtu = 1400,
45
};
46
 
47
static Dirtab bridgedirtab[]={
48
	"ctl",		{Qbctl},	0,	0666,
49
	"stats",	{Qstats},	0,	0444,
50
	"cache",	{Qcache},	0,	0444,
51
	"log",		{Qlog},		0,	0666,
52
};
53
 
54
static Dirtab portdirtab[]={
55
	"ctl",		{Qpctl},	0,	0666,
56
	"local",	{Qlocal},	0,	0444,
57
	"status",	{Qstatus},	0,	0444,
58
};
59
 
60
enum {
61
	Logcache=	(1<<0),
62
	Logmcast=	(1<<1),
63
};
64
 
65
// types of interfaces
66
enum
67
{
68
	Tether,
69
	Ttun,
70
};
71
 
72
static Logflag logflags[] =
73
{
74
	{ "cache",	Logcache, },
75
	{ "multicast",	Logmcast, },
76
	{ nil,		0, },
77
};
78
 
79
static Dirtab	*dirtab[MaxQ];
80
 
81
#define TYPE(x) 	(((ulong)(x).path) & 0xff)
82
#define PORT(x) 	((((ulong)(x).path) >> 8)&(Maxport-1))
83
#define QID(x, y) 	(((x)<<8) | (y))
84
 
85
struct Centry
86
{
87
	uchar	d[Eaddrlen];
88
	int	port;
89
	long	expire;		// entry expires this many seconds after bootime
90
	long	src;
91
	long	dst;
92
};
93
 
94
struct Bridge
95
{
96
	QLock;
97
	int	nport;
98
	Port	*port[Maxport];
99
	Centry	cache[CacheSize];
100
	ulong	hit;
101
	ulong	miss;
102
	ulong	copy;
103
	long	delay0;		// constant microsecond delay per packet
104
	long	delayn;		// microsecond delay per byte
105
	int	tcpmss;		// modify tcpmss value
106
 
107
	Log;
108
};
109
 
110
struct Port
111
{
112
	int	id;
113
	Bridge	*bridge;
114
	int	ref;
115
	int	closed;
116
 
117
	Chan	*data[2];	// channel to data
118
 
119
	Proc	*readp;		// read proc
120
 
121
	// the following uniquely identifies the port
122
	int	type;
123
	char	name[KNAMELEN];
124
 
125
	// owner hash - avoids bind/unbind races
126
	ulong	ownhash;
127
 
128
	// various stats
129
	int	in;		// number of packets read
130
	int	inmulti;	// multicast or broadcast
131
	int	inunknown;	// unknown address
132
	int	out;		// number of packets read
133
	int	outmulti;	// multicast or broadcast
134
	int	outunknown;	// unknown address
135
	int	outfrag;	// fragmented the packet
136
	int	nentry;		// number of cache entries for this port
137
};
138
 
139
enum {
140
	IP_TCPPROTO	= 6,
141
	EOLOPT		= 0,
142
	NOOPOPT		= 1,
143
	MSSOPT		= 2,
144
	MSS_LENGTH	= 4,		/* Mean segment size */
145
	SYN		= 0x02,		/* Pkt. is synchronise */
146
	IPHDR		= 20,		/* sizeof(Iphdr) */
147
};
148
 
149
struct Iphdr
150
{
151
	uchar	vihl;		/* Version and header length */
152
	uchar	tos;		/* Type of service */
153
	uchar	length[2];	/* packet length */
154
	uchar	id[2];		/* ip->identification */
155
	uchar	frag[2];	/* Fragment information */
156
	uchar	ttl;		/* Time to live */
157
	uchar	proto;		/* Protocol */
158
	uchar	cksum[2];	/* Header checksum */
159
	uchar	src[4];		/* IP source */
160
	uchar	dst[4];		/* IP destination */
161
};
162
 
163
struct Tcphdr
164
{
165
	uchar	sport[2];
166
	uchar	dport[2];
167
	uchar	seq[4];
168
	uchar	ack[4];
169
	uchar	flag[2];
170
	uchar	win[2];
171
	uchar	cksum[2];
172
	uchar	urg[2];
173
};
174
 
175
static Bridge bridgetab[Maxbridge];
176
 
177
static int m2p[] = {
178
	[OREAD]		4,
179
	[OWRITE]	2,
180
	[ORDWR]		6
181
};
182
 
183
static int	bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
184
static void	portbind(Bridge *b, int argc, char *argv[]);
185
static void	portunbind(Bridge *b, int argc, char *argv[]);
186
static void	etherread(void *a);
187
static char	*cachedump(Bridge *b);
188
static void	portfree(Port *port);
189
static void	cacheflushport(Bridge *b, int port);
190
static void	etherwrite(Port *port, Block *bp);
191
 
192
static void
193
bridgeinit(void)
194
{
195
	int i;
196
	Dirtab *dt;
197
 
198
	// setup dirtab with non directory entries
199
	for(i=0; i<nelem(bridgedirtab); i++) {
200
		dt = bridgedirtab + i;
201
		dirtab[TYPE(dt->qid)] = dt;
202
	}
203
	for(i=0; i<nelem(portdirtab); i++) {
204
		dt = portdirtab + i;
205
		dirtab[TYPE(dt->qid)] = dt;
206
	}
207
}
208
 
209
static Chan*
210
bridgeattach(char* spec)
211
{
212
	Chan *c;
213
	int dev;
214
 
215
	dev = atoi(spec);
216
	if(dev<0 || dev >= Maxbridge)
217
		error("bad specification");
218
 
219
	c = devattach('B', spec);
220
	mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
221
	c->dev = dev;
222
	return c;
223
}
224
 
225
static Walkqid*
226
bridgewalk(Chan *c, Chan *nc, char **name, int nname)
227
{
228
	return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
229
}
230
 
231
static int
232
bridgestat(Chan* c, uchar* db, int n)
233
{
234
	return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
235
}
236
 
237
static Chan*
238
bridgeopen(Chan* c, int omode)
239
{
240
	int perm;
241
	Bridge *b;
242
 
243
	omode &= 3;
244
	perm = m2p[omode];
245
	USED(perm);
246
 
247
	b = bridgetab + c->dev;
248
	USED(b);
249
 
250
	switch(TYPE(c->qid)) {
251
	default:
252
		break;
253
	case Qlog:
254
		logopen(b);
255
		break;
256
	case Qcache:
257
		c->aux = cachedump(b);
258
		break;
259
	}
260
	c->mode = openmode(omode);
261
	c->flag |= COPEN;
262
	c->offset = 0;
263
	return c;
264
}
265
 
266
static void
267
bridgeclose(Chan* c)
268
{
269
	Bridge *b  = bridgetab + c->dev;
270
 
271
	switch(TYPE(c->qid)) {
272
	case Qcache:
273
		if(c->flag & COPEN)
274
			free(c->aux);
275
		break;
276
	case Qlog:
277
		if(c->flag & COPEN)
278
			logclose(b);
279
		break;
280
	}
281
}
282
 
283
static long
284
bridgeread(Chan *c, void *a, long n, vlong off)
285
{
286
	char buf[256];
287
	Bridge *b = bridgetab + c->dev;
288
	Port *port;
289
	int i, ingood, outgood;
290
 
291
	USED(off);
292
	switch(TYPE(c->qid)) {
293
	default:
294
		error(Eperm);
295
	case Qtopdir:
296
	case Qbridgedir:
297
	case Qportdir:
298
		return devdirread(c, a, n, 0, 0, bridgegen);
299
	case Qlog:
300
		return logread(b, a, off, n);
301
	case Qstatus:
302
		qlock(b);
303
		port = b->port[PORT(c->qid)];
304
		if(port == 0)
305
			strcpy(buf, "unbound\n");
306
		else {
307
			i = 0;
308
			switch(port->type) {
309
			default:
310
				panic("bridgeread: unknown port type: %d",
311
					port->type);
312
			case Tether:
313
				i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
314
				break;
315
			case Ttun:
316
				i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
317
				break;
318
			}
319
			ingood = port->in - port->inmulti - port->inunknown;
320
			outgood = port->out - port->outmulti - port->outunknown;
321
			i += snprint(buf+i, sizeof(buf)-i,
322
				"in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
323
				port->in, ingood, port->inmulti, port->inunknown,
324
				port->out, outgood, port->outmulti,
325
				port->outunknown, port->outfrag);
326
			USED(i);
327
		}
328
		n = readstr(off, a, n, buf);
329
		qunlock(b);
330
		return n;
331
	case Qbctl:
332
		snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
333
			b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
334
		n = readstr(off, a, n, buf);
335
		return n;
336
	case Qcache:
337
		n = readstr(off, a, n, c->aux);
338
		return n;
339
	case Qstats:
340
		snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
341
			b->hit, b->miss, b->copy);
342
		n = readstr(off, a, n, buf);
343
		return n;
344
	}
345
}
346
 
347
static void
348
bridgeoption(Bridge *b, char *option, int value)
349
{
350
	if(strcmp(option, "tcpmss") == 0)
351
		b->tcpmss = value;
352
	else
353
		error("unknown bridge option");
354
}
355
 
356
 
357
static long
358
bridgewrite(Chan *c, void *a, long n, vlong off)
359
{
360
	Bridge *b = bridgetab + c->dev;
361
	Cmdbuf *cb;
362
	char *arg0, *p;
363
 
364
	USED(off);
365
	switch(TYPE(c->qid)) {
366
	default:
367
		error(Eperm);
368
	case Qbctl:
369
		cb = parsecmd(a, n);
370
		qlock(b);
371
		if(waserror()) {
372
			qunlock(b);
373
			free(cb);
374
			nexterror();
375
		}
376
		if(cb->nf == 0)
377
			error("short write");
378
		arg0 = cb->f[0];
379
		if(strcmp(arg0, "bind") == 0) {
380
			portbind(b, cb->nf-1, cb->f+1);
381
		} else if(strcmp(arg0, "unbind") == 0) {
382
			portunbind(b, cb->nf-1, cb->f+1);
383
		} else if(strcmp(arg0, "cacheflush") == 0) {
384
			log(b, Logcache, "cache flush\n");
385
			memset(b->cache, 0, CacheSize*sizeof(Centry));
386
		} else if(strcmp(arg0, "set") == 0) {
387
			if(cb->nf != 2)
388
				error("usage: set option");
389
			bridgeoption(b, cb->f[1], 1);
390
		} else if(strcmp(arg0, "clear") == 0) {
391
			if(cb->nf != 2)
392
				error("usage: clear option");
393
			bridgeoption(b, cb->f[1], 0);
394
		} else if(strcmp(arg0, "delay") == 0) {
395
			if(cb->nf != 3)
396
				error("usage: delay delay0 delayn");
397
			b->delay0 = strtol(cb->f[1], nil, 10);
398
			b->delayn = strtol(cb->f[2], nil, 10);
399
		} else
400
			error("unknown control request");
401
		poperror();
402
		qunlock(b);
403
		free(cb);
404
		return n;
405
	case Qlog:
406
		cb = parsecmd(a, n);
407
		p = logctl(b, cb->nf, cb->f, logflags);
408
		free(cb);
409
		if(p != nil)
410
			error(p);
411
		return n;
412
	}
413
}
414
 
415
static int
416
bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
417
{
418
	Bridge *b = bridgetab + c->dev;
419
	int type = TYPE(c->qid);
420
	Dirtab *dt;
421
	Qid qid;
422
 
423
	if(s  == DEVDOTDOT){
424
		switch(TYPE(c->qid)){
425
		case Qtopdir:
426
		case Qbridgedir:
427
			snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
428
			mkqid(&qid, Qtopdir, 0, QTDIR);
429
			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
430
			break;
431
		case Qportdir:
432
			snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
433
			mkqid(&qid, Qbridgedir, 0, QTDIR);
434
			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
435
			break;
436
		default:
437
			panic("bridgewalk %llux", c->qid.path);
438
		}
439
		return 1;
440
	}
441
 
442
	switch(type) {
443
	default:
444
		/* non-directory entries end up here */
445
		if(c->qid.type & QTDIR)
446
			panic("bridgegen: unexpected directory");	
447
		if(s != 0)
448
			return -1;
449
		dt = dirtab[TYPE(c->qid)];
450
		if(dt == nil)
451
			panic("bridgegen: unknown type: %lud", TYPE(c->qid));
452
		devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
453
		return 1;
454
	case Qtopdir:
455
		if(s != 0)
456
			return -1;
457
		snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
458
		mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
459
		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
460
		return 1;
461
	case Qbridgedir:
462
		if(s<nelem(bridgedirtab)) {
463
			dt = bridgedirtab+s;
464
			devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
465
			return 1;
466
		}
467
		s -= nelem(bridgedirtab);
468
		if(s >= b->nport)
469
			return -1;
470
		mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
471
		snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
472
		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
473
		return 1;
474
	case Qportdir:
475
		if(s>=nelem(portdirtab))
476
			return -1;
477
		dt = portdirtab+s;
478
		mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
479
		devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
480
		return 1;
481
	}
482
}
483
 
484
// parse mac address; also in netif.c
485
static int
486
parseaddr(uchar *to, char *from, int alen)
487
{
488
	char nip[4];
489
	char *p;
490
	int i;
491
 
492
	p = from;
493
	for(i = 0; i < alen; i++){
494
		if(*p == 0)
495
			return -1;
496
		nip[0] = *p++;
497
		if(*p == 0)
498
			return -1;
499
		nip[1] = *p++;
500
		nip[2] = 0;
501
		to[i] = strtoul(nip, 0, 16);
502
		if(*p == ':')
503
			p++;
504
	}
505
	return 0;
506
}
507
 
508
// assumes b is locked
509
static void
510
portbind(Bridge *b, int argc, char *argv[])
511
{
512
	Port *port;
513
	Chan *ctl;
514
	int type = 0, i, n;
515
	ulong ownhash;
516
	char *dev, *dev2 = nil, *p;
517
	char buf[100], name[KNAMELEN], path[8*KNAMELEN];
518
	static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
519
 
520
	memset(name, 0, KNAMELEN);
521
	if(argc < 4)
522
		error(usage);
523
	if(strcmp(argv[0], "ether") == 0) {
524
		if(argc != 4)
525
			error(usage);
526
		type = Tether;
527
		strncpy(name, argv[1], KNAMELEN);
528
		name[KNAMELEN-1] = 0;
529
//		parseaddr(addr, argv[1], Eaddrlen);
530
	} else if(strcmp(argv[0], "tunnel") == 0) {
531
		if(argc != 5)
532
			error(usage);
533
		type = Ttun;
534
		strncpy(name, argv[1], KNAMELEN);
535
		name[KNAMELEN-1] = 0;
536
//		parseip(addr, argv[1]);
537
		dev2 = argv[4];
538
	} else
539
		error(usage);
540
	ownhash = atoi(argv[2]);
541
	dev = argv[3];
542
	for(i=0; i<b->nport; i++) {
543
		port = b->port[i];
544
		if(port != nil && port->type == type &&
545
		    memcmp(port->name, name, KNAMELEN) == 0)
546
			error("port in use");
547
	}
548
	for(i=0; i<Maxport; i++)
549
		if(b->port[i] == nil)
550
			break;
551
	if(i == Maxport)
552
		error("no more ports");
553
	port = smalloc(sizeof(Port));
554
	port->ref = 1;
555
	port->id = i;
556
	port->ownhash = ownhash;
557
 
558
	if(waserror()) {
559
		portfree(port);
560
		nexterror();
561
	}
562
	port->type = type;
563
	memmove(port->name, name, KNAMELEN);
564
	switch(port->type) {
565
	default:
566
		panic("portbind: unknown port type: %d", type);
567
	case Tether:
568
		snprint(path, sizeof(path), "%s/clone", dev);
569
		ctl = namec(path, Aopen, ORDWR, 0);
570
		if(waserror()) {
571
			cclose(ctl);
572
			nexterror();
573
		}
574
		// check addr?
575
 
576
		// get directory name
577
		n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
578
		buf[n] = 0;
579
		for(p = buf; *p == ' '; p++)
580
			;
581
		snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
582
 
583
		// setup connection to be promiscuous
584
		snprint(buf, sizeof(buf), "connect -1");
585
		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
586
		snprint(buf, sizeof(buf), "promiscuous");
587
		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
588
		snprint(buf, sizeof(buf), "bridge");
589
		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590
 
591
		// open data port
592
		port->data[0] = namec(path, Aopen, ORDWR, 0);
593
		// dup it
594
		incref(port->data[0]);
595
		port->data[1] = port->data[0];
596
 
597
		poperror();
598
		cclose(ctl);		
599
 
600
		break;
601
	case Ttun:
602
		port->data[0] = namec(dev, Aopen, OREAD, 0);
603
		port->data[1] = namec(dev2, Aopen, OWRITE, 0);
604
		break;
605
	}
606
 
607
	poperror();
608
 
609
	/* committed to binding port */
610
	b->port[port->id] = port;
611
	port->bridge = b;
612
	if(b->nport <= port->id)
613
		b->nport = port->id+1;
614
 
615
	// assumes kproc always succeeds
616
	kproc("etherread", etherread, port);	// poperror must be next
617
	port->ref++;
618
}
619
 
620
// assumes b is locked
621
static void
622
portunbind(Bridge *b, int argc, char *argv[])
623
{
624
	int type = 0, i;
625
	char name[KNAMELEN];
626
	ulong ownhash;
627
	Port *port = nil;
628
	static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
629
 
630
	memset(name, 0, KNAMELEN);
631
	if(argc < 2 || argc > 3)
632
		error(usage);
633
	if(strcmp(argv[0], "ether") == 0) {
634
		type = Tether;
635
		strncpy(name, argv[1], KNAMELEN);
636
		name[KNAMELEN-1] = 0;
637
//		parseaddr(addr, argv[1], Eaddrlen);
638
	} else if(strcmp(argv[0], "tunnel") == 0) {
639
		type = Ttun;
640
		strncpy(name, argv[1], KNAMELEN);
641
		name[KNAMELEN-1] = 0;
642
//		parseip(addr, argv[1]);
643
	} else
644
		error(usage);
645
	if(argc == 3)
646
		ownhash = atoi(argv[2]);
647
	else
648
		ownhash = 0;
649
	for(i=0; i<b->nport; i++) {
650
		port = b->port[i];
651
		if(port != nil && port->type == type &&
652
		    memcmp(port->name, name, KNAMELEN) == 0)
653
			break;
654
	}
655
	if(i == b->nport)
656
		error("port not found");
657
	if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
658
		error("bad owner hash");
659
 
660
	port->closed = 1;
661
	b->port[i] = nil;	// port is now unbound
662
	cacheflushport(b, i);
663
 
664
	// try and stop reader
665
	if(port->readp)
666
		postnote(port->readp, 1, "unbind", 0);
667
	portfree(port);
668
}
669
 
670
// assumes b is locked
671
static Centry *
672
cachelookup(Bridge *b, uchar d[Eaddrlen])
673
{
674
	int i;
675
	uint h;
676
	Centry *p;
677
	long sec;
678
 
679
	// dont cache multicast or broadcast
680
	if(d[0] & 1)
681
		return 0;
682
 
683
	h = 0;
684
	for(i=0; i<Eaddrlen; i++) {
685
		h *= 7;
686
		h += d[i];
687
	}
688
	h %= CacheHash;
689
	p = b->cache + h;
690
	sec = TK2SEC(m->ticks);
691
	for(i=0; i<CacheLook; i++,p++) {
692
		if(memcmp(d, p->d, Eaddrlen) == 0) {
693
			p->dst++;
694
			if(sec >= p->expire) {
695
				log(b, Logcache, "expired cache entry: %E %d\n",
696
					d, p->port);
697
				return nil;
698
			}
699
			p->expire = sec + CacheTimeout;
700
			return p;
701
		}
702
	}
703
	log(b, Logcache, "cache miss: %E\n", d);
704
	return nil;
705
}
706
 
707
// assumes b is locked
708
static void
709
cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
710
{
711
	int i;
712
	uint h;
713
	Centry *p, *pp;
714
	long sec;
715
 
716
	// dont cache multicast or broadcast
717
	if(d[0] & 1) {
718
		log(b, Logcache, "bad source address: %E\n", d);
719
		return;
720
	}
721
 
722
	h = 0;
723
	for(i=0; i<Eaddrlen; i++) {
724
		h *= 7;
725
		h += d[i];
726
	}
727
	h %= CacheHash;
728
	p = b->cache + h;
729
	pp = p;
730
	sec = p->expire;
731
 
732
	// look for oldest entry
733
	for(i=0; i<CacheLook; i++,p++) {
734
		if(memcmp(p->d, d, Eaddrlen) == 0) {
735
			p->expire = TK2SEC(m->ticks) + CacheTimeout;
736
			if(p->port != port) {
737
				log(b, Logcache, "NIC changed port %d->%d: %E\n",
738
					p->port, port, d);
739
				p->port = port;
740
			}
741
			p->src++;
742
			return;
743
		}
744
		if(p->expire < sec) {
745
			sec = p->expire;
746
			pp = p;
747
		}
748
	}
749
	if(pp->expire != 0)
750
		log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
751
	pp->expire = TK2SEC(m->ticks) + CacheTimeout;
752
	memmove(pp->d, d, Eaddrlen);
753
	pp->port = port;
754
	pp->src = 1;
755
	pp->dst = 0;
756
	log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
757
}
758
 
759
// assumes b is locked
760
static void
761
cacheflushport(Bridge *b, int port)
762
{
763
	Centry *ce;
764
	int i;
765
 
766
	ce = b->cache;
767
	for(i=0; i<CacheSize; i++,ce++) {
768
		if(ce->port != port)
769
			continue;
770
		memset(ce, 0, sizeof(Centry));
771
	}
772
}
773
 
774
static char *
775
cachedump(Bridge *b)
776
{
777
	int i, n;
778
	long sec, off;
779
	char *buf, *p, *ep;
780
	Centry *ce;
781
	char c;
782
 
783
	qlock(b);
784
	if(waserror()) {
785
		qunlock(b);
786
		nexterror();
787
	}
788
	sec = TK2SEC(m->ticks);
789
	n = 0;
790
	for(i=0; i<CacheSize; i++)
791
		if(b->cache[i].expire != 0)
792
			n++;
793
 
794
	n *= 51;	// change if print format is changed
795
	n += 10;	// some slop at the end
796
	buf = malloc(n);
797
	if(buf == nil)
798
		error(Enomem);
799
	p = buf;
800
	ep = buf + n;
801
	ce = b->cache;
802
	off = seconds() - sec;
803
	for(i=0; i<CacheSize; i++,ce++) {
804
		if(ce->expire == 0)
805
			continue;	
806
		c = (sec < ce->expire)?'v':'e';
807
		p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
808
			ce->port, ce->src, ce->dst, ce->expire+off, c);
809
	}
810
	*p = 0;
811
	poperror();
812
	qunlock(b);
813
 
814
	return buf;
815
}
816
 
817
 
818
 
819
// assumes b is locked
820
static void
821
ethermultiwrite(Bridge *b, Block *bp, Port *port)
822
{
823
	Port *oport;
824
	Block *bp2;
825
	Etherpkt *ep;
826
	int i, mcast;
827
 
828
	if(waserror()) {
829
		if(bp)
830
			freeb(bp);
831
		nexterror();
832
	}
833
 
834
	ep = (Etherpkt*)bp->rp;
835
	mcast = ep->d[0] & 1;		/* multicast bit of ethernet address */
836
 
837
	oport = nil;
838
	for(i=0; i<b->nport; i++) {
839
		if(i == port->id || b->port[i] == nil)
840
			continue;
841
		/*
842
		 * we need to forward multicast packets for ipv6,
843
		 * so always do it.
844
		 */
845
		if(mcast)
846
			b->port[i]->outmulti++;
847
		else
848
			b->port[i]->outunknown++;
849
 
850
		// delay one so that the last write does not copy
851
		if(oport != nil) {
852
			b->copy++;
853
			bp2 = copyblock(bp, blocklen(bp));
854
			if(!waserror()) {
855
				etherwrite(oport, bp2);
856
				poperror();
857
			}
858
		}
859
		oport = b->port[i];
860
	}
861
 
862
	// last write free block
863
	if(oport) {
864
		bp2 = bp; bp = nil; USED(bp);
865
		if(!waserror()) {
866
			etherwrite(oport, bp2);
867
			poperror();
868
		}
869
	} else
870
		freeb(bp);
871
 
872
	poperror();
873
}
874
 
875
static void
876
tcpmsshack(Etherpkt *epkt, int n)
877
{
878
	int hl, optlen;
879
	Iphdr *iphdr;
880
	Tcphdr *tcphdr;
881
	ulong mss, cksum;
882
	uchar *optr;
883
 
884
	/* ignore non-ipv4 packets */
885
	if(nhgets(epkt->type) != ETIP4)
886
		return;
887
	iphdr = (Iphdr*)(epkt->data);
888
	n -= ETHERHDRSIZE;
889
	if(n < IPHDR)
890
		return;
891
 
892
	/* ignore bad packets */
893
	if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
894
		hl = (iphdr->vihl&0xF)<<2;
895
		if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
896
			return;
897
	} else
898
		hl = IP_HLEN4<<2;
899
 
900
	/* ignore non-tcp packets */
901
	if(iphdr->proto != IP_TCPPROTO)
902
		return;
903
	n -= hl;
904
	if(n < sizeof(Tcphdr))
905
		return;
906
	tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
907
	// MSS can only appear in SYN packet
908
	if(!(tcphdr->flag[1] & SYN))
909
		return;
910
	hl = (tcphdr->flag[0] & 0xf0)>>2;
911
	if(n < hl)
912
		return;
913
 
914
	// check for MSS option
915
	optr = (uchar*)tcphdr + sizeof(Tcphdr);
916
	n = hl - sizeof(Tcphdr);
917
	for(;;) {
918
		if(n <= 0 || *optr == EOLOPT)
919
			return;
920
		if(*optr == NOOPOPT) {
921
			n--;
922
			optr++;
923
			continue;
924
		}
925
		optlen = optr[1];
926
		if(optlen < 2 || optlen > n)
927
			return;
928
		if(*optr == MSSOPT && optlen == MSS_LENGTH)
929
			break;
930
		n -= optlen;
931
		optr += optlen;
932
	}
933
 
934
	mss = nhgets(optr+2);
935
	if(mss <= TcpMssMax)
936
		return;
937
	// fit checksum
938
	cksum = nhgets(tcphdr->cksum);
939
	if(optr-(uchar*)tcphdr & 1) {
940
print("tcpmsshack: odd alignment!\n");
941
		// odd alignments are a pain
942
		cksum += nhgets(optr+1);
943
		cksum -= (optr[1]<<8)|(TcpMssMax>>8);
944
		cksum += (cksum>>16);
945
		cksum &= 0xffff;
946
		cksum += nhgets(optr+3);
947
		cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
948
		cksum += (cksum>>16);
949
	} else {
950
		cksum += mss;
951
		cksum -= TcpMssMax;
952
		cksum += (cksum>>16);
953
	}
954
	hnputs(tcphdr->cksum, cksum);
955
	hnputs(optr+2, TcpMssMax);
956
}
957
 
958
/*
959
 *  process to read from the ethernet
960
 */
961
static void
962
etherread(void *a)
963
{
964
	Port *port = a;
965
	Bridge *b = port->bridge;
966
	Block *bp, *bp2;
967
	Etherpkt *ep;
968
	Centry *ce;
969
	long md;
970
 
971
	qlock(b);
972
	port->readp = up;	/* hide identity under a rock for unbind */
973
 
974
	while(!port->closed){
975
		// release lock to read - error means it is time to quit
976
		qunlock(b);
977
		if(waserror()) {
978
			print("etherread read error: %s\n", up->errstr);
979
			qlock(b);
980
			break;
981
		}
982
		if(0)
983
			print("devbridge: etherread: reading\n");
984
		bp = devtab[port->data[0]->type]->bread(port->data[0],
985
			ETHERMAXTU, 0);
986
		if(0)
987
			print("devbridge: etherread: blocklen = %d\n",
988
				blocklen(bp));
989
		poperror();
990
		qlock(b);
991
		if(bp == nil || port->closed)
992
			break;
993
		if(waserror()) {
994
//			print("etherread bridge error\n");
995
			if(bp)
996
				freeb(bp);
997
			continue;
998
		}
999
		if(blocklen(bp) < ETHERMINTU)
1000
			error("short packet");
1001
		port->in++;
1002
 
1003
		ep = (Etherpkt*)bp->rp;
1004
		cacheupdate(b, ep->s, port->id);
1005
		if(b->tcpmss)
1006
			tcpmsshack(ep, BLEN(bp));
1007
 
1008
		/*
1009
		 * delay packets to simulate a slow link
1010
		 */
1011
		if(b->delay0 || b->delayn){
1012
			md = b->delay0 + b->delayn * BLEN(bp);
1013
			if(md > 0)
1014
				microdelay(md);
1015
		}
1016
 
1017
		if(ep->d[0] & 1) {
1018
			log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1019
				port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1020
			port->inmulti++;
1021
			bp2 = bp; bp = nil;
1022
			ethermultiwrite(b, bp2, port);
1023
		} else {
1024
			ce = cachelookup(b, ep->d);
1025
			if(ce == nil) {
1026
				b->miss++;
1027
				port->inunknown++;
1028
				bp2 = bp; bp = nil;
1029
				ethermultiwrite(b, bp2, port);
1030
			}else if(ce->port != port->id){
1031
				b->hit++;
1032
				bp2 = bp; bp = nil;
1033
				etherwrite(b->port[ce->port], bp2);
1034
			}
1035
		}
1036
 
1037
		poperror();
1038
		if(bp)
1039
			freeb(bp);
1040
	}
1041
//	print("etherread: trying to exit\n");
1042
	port->readp = nil;
1043
	portfree(port);
1044
	qunlock(b);
1045
	pexit("hangup", 1);
1046
}
1047
 
1048
static int
1049
fragment(Etherpkt *epkt, int n)
1050
{
1051
	Iphdr *iphdr;
1052
 
1053
	if(n <= TunnelMtu)
1054
		return 0;
1055
 
1056
	/* ignore non-ipv4 packets */
1057
	if(nhgets(epkt->type) != ETIP4)
1058
		return 0;
1059
	iphdr = (Iphdr*)(epkt->data);
1060
	n -= ETHERHDRSIZE;
1061
	/*
1062
	 * ignore: IP runt packets, bad packets (I don't handle IP
1063
	 * options for the moment), packets with don't-fragment set,
1064
	 * and short blocks.
1065
	 */
1066
	if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1067
	    iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1068
		return 0;
1069
 
1070
	return 1;
1071
}
1072
 
1073
 
1074
static void
1075
etherwrite(Port *port, Block *bp)
1076
{
1077
	Iphdr *eh, *feh;
1078
	Etherpkt *epkt;
1079
	int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1080
	Block *xp, *nb;
1081
	ushort fragoff, frag;
1082
 
1083
	port->out++;
1084
	epkt = (Etherpkt*)bp->rp;
1085
	n = blocklen(bp);
1086
	if(port->type != Ttun || !fragment(epkt, n)) {
1087
		devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1088
		return;
1089
	}
1090
	port->outfrag++;
1091
	if(waserror()){
1092
		freeblist(bp);	
1093
		nexterror();
1094
	}
1095
 
1096
	seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1097
	eh = (Iphdr*)(epkt->data);
1098
	len = nhgets(eh->length);
1099
	frag = nhgets(eh->frag);
1100
	mf = frag & IP_MF;
1101
	frag <<= 3;
1102
	dlen = len - IPHDR;
1103
	xp = bp;
1104
	lid = nhgets(eh->id);
1105
	offset = ETHERHDRSIZE+IPHDR;
1106
	while(xp != nil && offset && offset >= BLEN(xp)) {
1107
		offset -= BLEN(xp);
1108
		xp = xp->next;
1109
	}
1110
	xp->rp += offset;
1111
 
1112
	if(0)
1113
		print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1114
			seglen, dlen, mf, frag);
1115
	for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1116
		nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1117
 
1118
		feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1119
 
1120
		memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1121
		nb->wp += ETHERHDRSIZE+IPHDR;
1122
 
1123
		if((fragoff + seglen) >= dlen) {
1124
			seglen = dlen - fragoff;
1125
			hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1126
		}
1127
		else	
1128
			hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1129
 
1130
		hnputs(feh->length, seglen + IPHDR);
1131
		hnputs(feh->id, lid);
1132
 
1133
		/* Copy up the data area */
1134
		chunk = seglen;
1135
		while(chunk) {
1136
			blklen = chunk;
1137
			if(BLEN(xp) < chunk)
1138
				blklen = BLEN(xp);
1139
			memmove(nb->wp, xp->rp, blklen);
1140
			nb->wp += blklen;
1141
			xp->rp += blklen;
1142
			chunk -= blklen;
1143
			if(xp->rp == xp->wp)
1144
				xp = xp->next;
1145
		} 
1146
 
1147
		feh->cksum[0] = 0;
1148
		feh->cksum[1] = 0;
1149
		hnputs(feh->cksum, ipcsum(&feh->vihl));
1150
 
1151
		/* don't generate small packets */
1152
		if(BLEN(nb) < ETHERMINTU)
1153
			nb->wp = nb->rp + ETHERMINTU;
1154
		devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1155
	}
1156
	poperror();
1157
	freeblist(bp);	
1158
}
1159
 
1160
// hold b lock
1161
static void
1162
portfree(Port *port)
1163
{
1164
	port->ref--;
1165
	if(port->ref < 0)
1166
		panic("portfree: bad ref");
1167
	if(port->ref > 0)
1168
		return;
1169
 
1170
	if(port->data[0])
1171
		cclose(port->data[0]);
1172
	if(port->data[1])
1173
		cclose(port->data[1]);
1174
	memset(port, 0, sizeof(Port));
1175
	free(port);
1176
}
1177
 
1178
Dev bridgedevtab = {
1179
	'B',
1180
	"bridge",
1181
 
1182
	devreset,
1183
	bridgeinit,
1184
	devshutdown,
1185
	bridgeattach,
1186
	bridgewalk,
1187
	bridgestat,
1188
	bridgeopen,
1189
	devcreate,
1190
	bridgeclose,
1191
	bridgeread,
1192
	devbread,
1193
	bridgewrite,
1194
	devbwrite,
1195
	devremove,
1196
	devwstat,
1197
};