Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * domain name resolvers, see rfcs 1035 and 1123
3
 */
4
#include <u.h>
5
#include <libc.h>
6
#include <ip.h>
7
#include <bio.h>
8
#include <ndb.h>
9
#include "dns.h"
10
 
11
typedef struct Dest Dest;
12
typedef struct Ipaddr Ipaddr;
13
typedef struct Query Query;
14
 
15
enum
16
{
17
	Udp, Tcp,
18
 
19
	Answerr=	-1,
20
	Answnone,
21
 
22
	Maxdest=	24,	/* maximum destinations for a request message */
23
	Maxoutstanding=	15,	/* max. outstanding queries per domain name */
24
	Remntretry=	15,	/* min. sec.s between /net.alt remount tries */
25
 
26
	/*
27
	 * these are the old values; we're trying longer timeouts now
28
	 * primarily for the benefit of remote nameservers querying us
29
	 * during times of bad connectivity.
30
	 */
31
//	Maxtrans=	3,	/* maximum transmissions to a server */
32
//	Maxretries=	3, /* cname+actual resends: was 32; have pity on user */
33
//	Maxwaitms=	1000,	/* wait no longer for a remote dns query */
34
//	Minwaitms=	100,	/* willing to wait for a remote dns query */
35
 
36
	Maxtrans=	5,	/* maximum transmissions to a server */
37
	Maxretries=	5, /* cname+actual resends: was 32; have pity on user */
38
	Maxwaitms=	5000,	/* wait no longer for a remote dns query */
39
	Minwaitms=	500,	/* willing to wait for a remote dns query */
40
 
41
	Destmagic=	0xcafebabe,
42
	Querymagic=	0xdeadbeef,
43
};
44
enum { Hurry, Patient, };
45
enum { Outns, Inns, };
46
 
47
struct Ipaddr {
48
	Ipaddr *next;
49
	uchar	ip[IPaddrlen];
50
};
51
 
52
struct Dest
53
{
54
	uchar	a[IPaddrlen];	/* ip address */
55
	DN	*s;		/* name server */
56
	int	nx;		/* number of transmissions */
57
	int	code;		/* response code; used to clear dp->respcode */
58
 
59
	ulong	magic;
60
};
61
 
62
/*
63
 * Query has a QLock in it, thus it can't be an automatic
64
 * variable, since each process would see a separate copy
65
 * of the lock on its stack.
66
 */
67
struct Query {
68
	DN	*dp;		/* domain */
69
	ushort	type;		/* and type to look up */
70
	Request *req;
71
	RR	*nsrp;		/* name servers to consult */
72
 
73
	/* dest must not be on the stack due to forking in slave() */
74
	Dest	*dest;		/* array of destinations */
75
	Dest	*curdest;	/* pointer to next to fill */
76
	int	ndest;		/* transmit to this many on this round */
77
 
78
	int	udpfd;
79
 
80
	QLock	tcplock;	/* only one tcp call at a time per query */
81
	int	tcpset;
82
	int	tcpfd;		/* if Tcp, read replies from here */
83
	int	tcpctlfd;
84
	uchar	tcpip[IPaddrlen];
85
 
86
	ulong	magic;
87
};
88
 
89
/* estimated % probability of such a record existing at all */
90
int likely[] = {
91
	[Ta]		95,
92
	[Taaaa]		10,
93
	[Tcname]	15,
94
	[Tmx]		60,
95
	[Tns]		90,
96
	[Tnull]		5,
97
	[Tptr]		35,
98
	[Tsoa]		90,
99
	[Tsrv]		60,
100
	[Ttxt]		15,
101
	[Tall]		95,
102
};
103
 
104
static RR*	dnresolve1(char*, int, int, Request*, int, int);
105
static int	netquery(Query *, int);
106
 
107
/*
108
 * reading /proc/pid/args yields either "name args" or "name [display args]",
109
 * so return only display args, if any.
110
 */
111
static char *
112
procgetname(void)
113
{
114
	int fd, n;
115
	char *lp, *rp;
116
	char buf[256];
117
 
118
	snprint(buf, sizeof buf, "#p/%d/args", getpid());
119
	if((fd = open(buf, OREAD)) < 0)
120
		return strdup("");
121
	*buf = '\0';
122
	n = read(fd, buf, sizeof buf-1);
123
	close(fd);
124
	if (n >= 0)
125
		buf[n] = '\0';
126
	if ((lp = strchr(buf, '[')) == nil ||
127
	    (rp = strrchr(buf, ']')) == nil)
128
		return strdup("");
129
	*rp = '\0';
130
	return strdup(lp+1);
131
}
132
 
133
void
134
rrfreelistptr(RR **rpp)
135
{
136
	RR *rp;
137
 
138
	if (rpp == nil || *rpp == nil)
139
		return;
140
	rp = *rpp;
141
	*rpp = nil;	/* update pointer in memory before freeing list */
142
	rrfreelist(rp);
143
}
144
 
145
/*
146
 *  lookup 'type' info for domain name 'name'.  If it doesn't exist, try
147
 *  looking it up as a canonical name.
148
 *
149
 *  this process can be quite slow if time-outs are set too high when querying
150
 *  nameservers that just don't respond to certain query types.  in that case,
151
 *  there will be multiple udp retries, multiple nameservers will be queried,
152
 *  and this will be repeated for a cname query.  the whole thing will be
153
 *  retried several times until we get an answer or a time-out.
154
 */
155
RR*
156
dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
157
	int recurse, int rooted, int *status)
158
{
159
	RR *rp, *nrp, *drp;
160
	DN *dp;
161
	int loops;
162
	char *procname;
163
	char nname[Domlen];
164
 
165
	if(status)
166
		*status = 0;
167
 
168
	if(depth > 12)			/* in a recursive loop? */
169
		return nil;
170
 
171
	procname = procgetname();
172
	/*
173
	 *  hack for systems that don't have resolve search
174
	 *  lists.  Just look up the simple name in the database.
175
	 */
176
	if(!rooted && strchr(name, '.') == nil){
177
		rp = nil;
178
		drp = domainlist(class);
179
		for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
180
			snprint(nname, sizeof nname, "%s.%s", name,
181
				nrp->ptr->name);
182
			rp = dnresolve(nname, class, type, req, cn, depth+1,
183
				recurse, rooted, status);
184
			lock(&dnlock);
185
			rrfreelist(rrremneg(&rp));
186
			unlock(&dnlock);
187
		}
188
		if(drp != nil)
189
			rrfreelist(drp);
190
		procsetname(procname);
191
		free(procname);
192
		return rp;
193
	}
194
 
195
	/*
196
	 *  try the name directly
197
	 */
198
	rp = dnresolve1(name, class, type, req, depth, recurse);
199
	if(rp == nil) {
200
		/*
201
		 * try it as a canonical name if we weren't told
202
		 * that the name didn't exist
203
		 */
204
		dp = dnlookup(name, class, 0);
205
		if(type != Tptr && dp->respcode != Rname)
206
			for(loops = 0; rp == nil && loops < Maxretries; loops++){
207
				/* retry cname, then the actual type */
208
				rp = dnresolve1(name, class, Tcname, req,
209
					depth, recurse);
210
				if(rp == nil)
211
					break;
212
 
213
				/* rp->host == nil shouldn't happen, but does */
214
				if(rp->negative || rp->host == nil){
215
					rrfreelist(rp);
216
					rp = nil;
217
					break;
218
				}
219
 
220
				name = rp->host->name;
221
				lock(&dnlock);
222
				if(cn)
223
					rrcat(cn, rp);
224
				else
225
					rrfreelist(rp);
226
				unlock(&dnlock);
227
 
228
				rp = dnresolve1(name, class, type, req,
229
					depth, recurse);
230
			}
231
 
232
		/* distinction between not found and not good */
233
		if(rp == nil && status != nil && dp->respcode != Rok)
234
			*status = dp->respcode;
235
	}
236
	procsetname(procname);
237
	free(procname);
238
	return randomize(rp);
239
}
240
 
241
static void
242
queryinit(Query *qp, DN *dp, int type, Request *req)
243
{
244
	memset(qp, 0, sizeof *qp);
245
	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
246
	qp->dp = dp;
247
	qp->type = type;
248
	if (qp->type != type)
249
		dnslog("queryinit: bogus type %d", type);
250
	qp->req = req;
251
	qp->nsrp = nil;
252
	qp->dest = qp->curdest = nil;
253
	qp->magic = Querymagic;
254
}
255
 
256
static void
257
queryck(Query *qp)
258
{
259
	assert(qp);
260
	assert(qp->magic == Querymagic);
261
}
262
 
263
static void
264
querydestroy(Query *qp)
265
{
266
	queryck(qp);
267
	/* leave udpfd open */
268
	if (qp->tcpfd > 0)
269
		close(qp->tcpfd);
270
	if (qp->tcpctlfd > 0) {
271
		hangup(qp->tcpctlfd);
272
		close(qp->tcpctlfd);
273
	}
274
	free(qp->dest);
275
	memset(qp, 0, sizeof *qp);	/* prevent accidents */
276
	qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
277
}
278
 
279
static void
280
destinit(Dest *p)
281
{
282
	memset(p, 0, sizeof *p);
283
	p->magic = Destmagic;
284
}
285
 
286
static void
287
destck(Dest *p)
288
{
289
	assert(p);
290
	assert(p->magic == Destmagic);
291
}
292
 
293
/*
294
 * if the response to a query hasn't arrived within 100 ms.,
295
 * it's unlikely to arrive at all.  after 1 s., it's really unlikely.
296
 * queries for missing RRs are likely to produce time-outs rather than
297
 * negative responses, so cname and aaaa queries are likely to time out,
298
 * thus we don't wait very long for them.
299
 */
300
static void
301
notestats(vlong start, int tmout, int type)
302
{
303
	qlock(&stats);
304
	if (tmout) {
305
		stats.tmout++;
306
		if (type == Taaaa)
307
			stats.tmoutv6++;
308
		else if (type == Tcname)
309
			stats.tmoutcname++;
310
	} else {
311
		long wait10ths = NS2MS(nsec() - start) / 100;
312
 
313
		if (wait10ths <= 0)
314
			stats.under10ths[0]++;
315
		else if (wait10ths >= nelem(stats.under10ths))
316
			stats.under10ths[nelem(stats.under10ths) - 1]++;
317
		else
318
			stats.under10ths[wait10ths]++;
319
	}
320
	qunlock(&stats);
321
}
322
 
323
static void
324
noteinmem(void)
325
{
326
	qlock(&stats);
327
	stats.answinmem++;
328
	qunlock(&stats);
329
}
330
 
331
/* netquery with given name servers, free ns rrs when done */
332
static int
333
netqueryns(Query *qp, int depth, RR *nsrp)
334
{
335
	int rv;
336
 
337
	qp->nsrp = nsrp;
338
	rv = netquery(qp, depth);
339
	lock(&dnlock);
340
	rrfreelist(nsrp);
341
	unlock(&dnlock);
342
	return rv;
343
}
344
 
345
static RR*
346
issuequery(Query *qp, char *name, int class, int depth, int recurse)
347
{
348
	char *cp;
349
	DN *nsdp;
350
	RR *rp, *nsrp, *dbnsrp;
351
 
352
	/*
353
	 *  if we're running as just a resolver, query our
354
	 *  designated name servers
355
	 */
356
	if(cfg.resolver){
357
		nsrp = randomize(getdnsservers(class));
358
		if(nsrp != nil)
359
			if(netqueryns(qp, depth+1, nsrp) > Answnone)
360
				return rrlookup(qp->dp, qp->type, OKneg);
361
	}
362
 
363
	/*
364
 	 *  walk up the domain name looking for
365
	 *  a name server for the domain.
366
	 */
367
	for(cp = name; cp; cp = walkup(cp)){
368
		/*
369
		 *  if this is a local (served by us) domain,
370
		 *  return answer
371
		 */
372
		dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
373
		if(dbnsrp && dbnsrp->local){
374
			rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
375
			lock(&dnlock);
376
			rrfreelist(dbnsrp);
377
			unlock(&dnlock);
378
			return rp;
379
		}
380
 
381
		/*
382
		 *  if recursion isn't set, just accept local
383
		 *  entries
384
		 */
385
		if(recurse == Dontrecurse){
386
			if(dbnsrp) {
387
				lock(&dnlock);
388
				rrfreelist(dbnsrp);
389
				unlock(&dnlock);
390
			}
391
			continue;
392
		}
393
 
394
		/* look for ns in cache */
395
		nsdp = dnlookup(cp, class, 0);
396
		nsrp = nil;
397
		if(nsdp)
398
			nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
399
 
400
		/* if the entry timed out, ignore it */
401
		if(nsrp && nsrp->ttl < now){
402
			lock(&dnlock);
403
			rrfreelistptr(&nsrp);
404
			unlock(&dnlock);
405
		}
406
 
407
		if(nsrp){
408
			lock(&dnlock);
409
			rrfreelistptr(&dbnsrp);
410
			unlock(&dnlock);
411
 
412
			/* query the name servers found in cache */
413
			if(netqueryns(qp, depth+1, nsrp) > Answnone)
414
				return rrlookup(qp->dp, qp->type, OKneg);
415
		} else if(dbnsrp)
416
			/* try the name servers found in db */
417
			if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
418
				return rrlookup(qp->dp, qp->type, NOneg);
419
	}
420
	return nil;
421
}
422
 
423
static RR*
424
dnresolve1(char *name, int class, int type, Request *req, int depth,
425
	int recurse)
426
{
427
	Area *area;
428
	DN *dp;
429
	RR *rp;
430
	Query *qp;
431
 
432
	if(debug)
433
		dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
434
 
435
	/* only class Cin implemented so far */
436
	if(class != Cin)
437
		return nil;
438
 
439
	dp = dnlookup(name, class, 1);
440
 
441
	/*
442
	 *  Try the cache first
443
	 */
444
	rp = rrlookup(dp, type, OKneg);
445
	if(rp)
446
		if(rp->db){
447
			/* unauthoritative db entries are hints */
448
			if(rp->auth) {
449
				noteinmem();
450
				if(debug)
451
					dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
452
						getpid(), name, type, class);
453
				return rp;
454
			}
455
		} else
456
			/* cached entry must still be valid */
457
			if(rp->ttl > now)
458
				/* but Tall entries are special */
459
				if(type != Tall || rp->query == Tall) {
460
					noteinmem();
461
					if(debug)
462
						dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
463
							getpid(), name, type, class);
464
					return rp;
465
				}
466
	lock(&dnlock);
467
	rrfreelist(rp);
468
	unlock(&dnlock);
469
	rp = nil;		/* accident prevention */
470
	USED(rp);
471
 
472
	/*
473
	 * try the cache for a canonical name. if found punt
474
	 * since we'll find it during the canonical name search
475
	 * in dnresolve().
476
	 */
477
	if(type != Tcname){
478
		rp = rrlookup(dp, Tcname, NOneg);
479
		lock(&dnlock);
480
		rrfreelist(rp);
481
		unlock(&dnlock);
482
		if(rp){
483
			if(debug)
484
				dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
485
					getpid(), name, type, class);
486
			return nil;
487
		}
488
	}
489
 
490
	/*
491
	 * if the domain name is within an area of ours,
492
	 * we should have found its data in memory by now.
493
	 */
494
	area = inmyarea(dp->name);
495
	if (area || strncmp(dp->name, "local#", 6) == 0) {
496
//		char buf[32];
497
 
498
//		dnslog("%s %s: no data in area %s", dp->name,
499
//			rrname(type, buf, sizeof buf), area->soarr->owner->name);
500
		return nil;
501
	}
502
 
503
	qp = emalloc(sizeof *qp);
504
	queryinit(qp, dp, type, req);
505
	rp = issuequery(qp, name, class, depth, recurse);
506
	querydestroy(qp);
507
	free(qp);
508
	if(rp){
509
		if(debug)
510
			dnslog("[%d] dnresolve1 %s %d %d: rr from query",
511
				getpid(), name, type, class);
512
		return rp;
513
	}
514
 
515
	/* settle for a non-authoritative answer */
516
	rp = rrlookup(dp, type, OKneg);
517
	if(rp){
518
		if(debug)
519
			dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
520
				getpid(), name, type, class);
521
		return rp;
522
	}
523
 
524
	/* noone answered.  try the database, we might have a chance. */
525
	rp = dblookup(name, class, type, 0, 0);
526
	if (rp) {
527
		if(debug)
528
			dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
529
				getpid(), name, type, class);
530
	}else{
531
		if(debug)
532
			dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
533
				getpid(), name, type, class);
534
	}
535
	return rp;
536
}
537
 
538
/*
539
 *  walk a domain name one element to the right.
540
 *  return a pointer to that element.
541
 *  in other words, return a pointer to the parent domain name.
542
 */
543
char*
544
walkup(char *name)
545
{
546
	char *cp;
547
 
548
	cp = strchr(name, '.');
549
	if(cp)
550
		return cp+1;
551
	else if(*name)
552
		return "";
553
	else
554
		return 0;
555
}
556
 
557
/*
558
 *  Get a udp port for sending requests and reading replies.  Put the port
559
 *  into "headers" mode.
560
 */
561
static char *hmsg = "headers";
562
 
563
int
564
udpport(char *mtpt)
565
{
566
	int fd, ctl;
567
	char ds[64], adir[64];
568
 
569
	/* get a udp port */
570
	snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt? mtpt: "/net"));
571
	ctl = announce(ds, adir);
572
	if(ctl < 0){
573
		/* warning("can't get udp port"); */
574
		return -1;
575
	}
576
 
577
	/* turn on header style interface */
578
	if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
579
		close(ctl);
580
		warning(hmsg);
581
		return -1;
582
	}
583
 
584
	/* grab the data file */
585
	snprint(ds, sizeof ds, "%s/data", adir);
586
	fd = open(ds, ORDWR);
587
	close(ctl);
588
	if(fd < 0)
589
		warning("can't open udp port %s: %r", ds);
590
	return fd;
591
}
592
 
593
void
594
initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
595
{
596
	mp->flags = flags;
597
	mp->id = reqno;
598
	mp->qd = rp;
599
	if(rp != nil)
600
		mp->qdcount = 1;
601
}
602
 
603
DNSmsg *
604
newdnsmsg(RR *rp, int flags, ushort reqno)
605
{
606
	DNSmsg *mp;
607
 
608
	mp = emalloc(sizeof *mp);
609
	initdnsmsg(mp, rp, flags, reqno);
610
	return mp;
611
}
612
 
613
/* generate a DNS UDP query packet */
614
int
615
mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
616
{
617
	DNSmsg m;
618
	int len;
619
	Udphdr *uh = (Udphdr*)buf;
620
	RR *rp;
621
 
622
	/* stuff port number into output buffer */
623
	memset(uh, 0, sizeof *uh);
624
	hnputs(uh->rport, Dnsport);
625
 
626
	/* make request and convert it to output format */
627
	memset(&m, 0, sizeof m);
628
	rp = rralloc(type);
629
	rp->owner = dp;
630
	initdnsmsg(&m, rp, flags, reqno);
631
	len = convDNS2M(&m, &buf[Udphdrsize], Maxdnspayload);
632
	rrfreelistptr(&m.qd);
633
	memset(&m, 0, sizeof m);		/* cause trouble */
634
	return len;
635
}
636
 
637
void
638
freeanswers(DNSmsg *mp)
639
{
640
	lock(&dnlock);
641
	rrfreelistptr(&mp->qd);
642
	rrfreelistptr(&mp->an);
643
	rrfreelistptr(&mp->ns);
644
	rrfreelistptr(&mp->ar);
645
	unlock(&dnlock);
646
	mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
647
}
648
 
649
/* timed read of reply.  sets srcip.  ibuf must be 64K to handle tcp answers. */
650
static int
651
readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
652
	uchar *srcip)
653
{
654
	int len, fd;
655
	long ms;
656
	vlong startns = nsec();
657
	uchar *reply;
658
	uchar lenbuf[2];
659
 
660
	len = -1;			/* pessimism */
661
	ms = endms - NS2MS(startns);
662
	if (ms <= 0)
663
		return -1;		/* taking too long */
664
 
665
	reply = ibuf;
666
	memset(srcip, 0, IPaddrlen);
667
	alarm(ms);
668
	if (medium == Udp)
669
		if (qp->udpfd <= 0)
670
			dnslog("readnet: qp->udpfd closed");
671
		else {
672
			len = read(qp->udpfd, ibuf, Udphdrsize+Maxpayload);
673
			alarm(0);
674
			notestats(startns, len < 0, qp->type);
675
			if (len >= IPaddrlen)
676
				memmove(srcip, ibuf, IPaddrlen);
677
			if (len >= Udphdrsize) {
678
				len   -= Udphdrsize;
679
				reply += Udphdrsize;
680
			}
681
		}
682
	else {
683
		if (!qp->tcpset)
684
			dnslog("readnet: tcp params not set");
685
		fd = qp->tcpfd;
686
		if (fd <= 0)
687
			dnslog("readnet: %s: tcp fd unset for dest %I",
688
				qp->dp->name, qp->tcpip);
689
		else if (readn(fd, lenbuf, 2) != 2) {
690
			dnslog("readnet: short read of 2-byte tcp msg size from %I",
691
				qp->tcpip);
692
			/* probably a time-out */
693
			notestats(startns, 1, qp->type);
694
		} else {
695
			len = lenbuf[0]<<8 | lenbuf[1];
696
			if (readn(fd, ibuf, len) != len) {
697
				dnslog("readnet: short read of tcp data from %I",
698
					qp->tcpip);
699
				/* probably a time-out */
700
				notestats(startns, 1, qp->type);
701
				len = -1;
702
			}
703
		}
704
		memmove(srcip, qp->tcpip, IPaddrlen);
705
	}
706
	alarm(0);
707
	*replyp = reply;
708
	return len;
709
}
710
 
711
/*
712
 *  read replies to a request and remember the rrs in the answer(s).
713
 *  ignore any of the wrong type.
714
 *  wait at most until endms.
715
 */
716
static int
717
readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
718
	uvlong endms)
719
{
720
	int len;
721
	char *err;
722
	char tbuf[32];
723
	uchar *reply;
724
	uchar srcip[IPaddrlen];
725
	RR *rp;
726
 
727
	queryck(qp);
728
	memset(mp, 0, sizeof *mp);
729
	memset(srcip, 0, sizeof srcip);
730
	if (0)
731
		len = -1;
732
	for (; timems() < endms &&
733
	    (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
734
	    freeanswers(mp)){
735
		/* convert into internal format  */
736
		memset(mp, 0, sizeof *mp);
737
		err = convM2DNS(reply, len, mp, nil);
738
		if (mp->flags & Ftrunc) {
739
			free(err);
740
			freeanswers(mp);
741
			/* notify our caller to retry the query via tcp. */
742
			return -1;
743
		} else if(err){
744
			dnslog("readreply: %s: input err, len %d: %s: %I",
745
				qp->dp->name, len, err, srcip);
746
			free(err);
747
			continue;
748
		}
749
		if(debug)
750
			logreply(qp->req->id, srcip, mp);
751
 
752
		/* answering the right question? */
753
		if(mp->id != req)
754
			dnslog("%d: id %d instead of %d: %I", qp->req->id,
755
				mp->id, req, srcip);
756
		else if(mp->qd == 0)
757
			dnslog("%d: no question RR: %I", qp->req->id, srcip);
758
		else if(mp->qd->owner != qp->dp)
759
			dnslog("%d: owner %s instead of %s: %I", qp->req->id,
760
				mp->qd->owner->name, qp->dp->name, srcip);
761
		else if(mp->qd->type != qp->type)
762
			dnslog("%d: qp->type %d instead of %d: %I",
763
				qp->req->id, mp->qd->type, qp->type, srcip);
764
		else {
765
			/* remember what request this is in answer to */
766
			for(rp = mp->an; rp; rp = rp->next)
767
				rp->query = qp->type;
768
			return 0;
769
		}
770
	}
771
	if (timems() >= endms) {
772
		;				/* query expired */
773
	} else if (0) {
774
		/* this happens routinely when a read times out */
775
		dnslog("readreply: %s type %s: ns %I read error or eof "
776
			"(returned %d): %r", qp->dp->name, rrname(qp->type,
777
			tbuf, sizeof tbuf), srcip, len);
778
		if (medium == Udp)
779
			for (rp = qp->nsrp; rp != nil; rp = rp->next)
780
				if (rp->type == Tns)
781
					dnslog("readreply: %s: query sent to "
782
						"ns %s", qp->dp->name,
783
						rp->host->name);
784
	}
785
	return -1;
786
}
787
 
788
/*
789
 *	return non-0 if first list includes second list
790
 */
791
int
792
contains(RR *rp1, RR *rp2)
793
{
794
	RR *trp1, *trp2;
795
 
796
	for(trp2 = rp2; trp2; trp2 = trp2->next){
797
		for(trp1 = rp1; trp1; trp1 = trp1->next)
798
			if(trp1->type == trp2->type)
799
			if(trp1->host == trp2->host)
800
			if(trp1->owner == trp2->owner)
801
				break;
802
		if(trp1 == nil)
803
			return 0;
804
	}
805
	return 1;
806
}
807
 
808
 
809
/*
810
 *  return multicast version if any
811
 */
812
int
813
ipisbm(uchar *ip)
814
{
815
	if(isv4(ip)){
816
		if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
817
		    ipcmp(ip, IPv4bcast) == 0)
818
			return 4;
819
	} else
820
		if(ip[0] == 0xff)
821
			return 6;
822
	return 0;
823
}
824
 
825
/*
826
 *  Get next server address(es) into qp->dest[nd] and beyond
827
 */
828
static int
829
serveraddrs(Query *qp, int nd, int depth)
830
{
831
	RR *rp, *arp, *trp;
832
	Dest *cur;
833
 
834
	if(nd >= Maxdest)		/* dest array is full? */
835
		return Maxdest - 1;
836
 
837
	/*
838
	 *  look for a server whose address we already know.
839
	 *  if we find one, mark it so we ignore this on
840
	 *  subsequent passes.
841
	 */
842
	arp = 0;
843
	for(rp = qp->nsrp; rp; rp = rp->next){
844
		assert(rp->magic == RRmagic);
845
		if(rp->marker)
846
			continue;
847
		arp = rrlookup(rp->host, Ta, NOneg);
848
		if(arp == nil)
849
			arp = rrlookup(rp->host, Taaaa, NOneg);
850
		if(arp){
851
			rp->marker = 1;
852
			break;
853
		}
854
		arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
855
		if(arp == nil)
856
			arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
857
		if(arp){
858
			rp->marker = 1;
859
			break;
860
		}
861
	}
862
 
863
	/*
864
	 *  if the cache and database lookup didn't find any new
865
	 *  server addresses, try resolving one via the network.
866
	 *  Mark any we try to resolve so we don't try a second time.
867
	 */
868
	if(arp == 0)
869
		for(rp = qp->nsrp; rp; rp = rp->next){
870
			if(rp->marker)
871
				continue;
872
			rp->marker = 1;
873
 
874
			/*
875
			 *  avoid loops looking up a server under itself
876
			 */
877
			if(subsume(rp->owner->name, rp->host->name))
878
				continue;
879
 
880
			arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
881
				depth+1, Recurse, 1, 0);
882
			if(arp == nil)
883
				arp = dnresolve(rp->host->name, Cin, Taaaa,
884
					qp->req, 0, depth+1, Recurse, 1, 0);
885
			lock(&dnlock);
886
			rrfreelist(rrremneg(&arp));
887
			unlock(&dnlock);
888
			if(arp)
889
				break;
890
		}
891
 
892
	/* use any addresses that we found */
893
	for(trp = arp; trp && nd < Maxdest; trp = trp->next){
894
		cur = &qp->dest[nd];
895
		parseip(cur->a, trp->ip->name);
896
		/*
897
		 * straddling servers can reject all nameservers if they are all
898
		 * inside, so be sure to list at least one outside ns at
899
		 * the end of the ns list in /lib/ndb for `dom='.
900
		 */
901
		if (ipisbm(cur->a) ||
902
		    cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
903
			continue;
904
		cur->nx = 0;
905
		cur->s = trp->owner;
906
		cur->code = Rtimeout;
907
		nd++;
908
	}
909
	lock(&dnlock);
910
	rrfreelist(arp);
911
	unlock(&dnlock);
912
	if(nd >= Maxdest)		/* dest array is full? */
913
		return Maxdest - 1;
914
	return nd;
915
}
916
 
917
/*
918
 *  cache negative responses
919
 */
920
static void
921
cacheneg(DN *dp, int type, int rcode, RR *soarr)
922
{
923
	RR *rp;
924
	DN *soaowner;
925
	ulong ttl;
926
 
927
	stats.negcached++;
928
 
929
	/* no cache time specified, don't make anything up */
930
	if(soarr != nil){
931
		lock(&dnlock);
932
		if(soarr->next != nil)
933
			rrfreelistptr(&soarr->next);
934
		unlock(&dnlock);
935
		soaowner = soarr->owner;
936
	} else
937
		soaowner = nil;
938
 
939
	/* the attach can cause soarr to be freed so mine it now */
940
	if(soarr != nil && soarr->soa != nil)
941
		ttl = soarr->soa->minttl+now;
942
	else
943
		ttl = 5*Min;
944
 
945
	/* add soa and negative RR to the database */
946
	rrattach(soarr, Authoritative);
947
 
948
	rp = rralloc(type);
949
	rp->owner = dp;
950
	rp->negative = 1;
951
	rp->negsoaowner = soaowner;
952
	rp->negrcode = rcode;
953
	rp->ttl = ttl;
954
	rrattach(rp, Authoritative);
955
}
956
 
957
static int
958
setdestoutns(Dest *p, int n)
959
{
960
	uchar *outns = outsidens(n);
961
 
962
	destck(p);
963
	destinit(p);
964
	if (outns == nil) {
965
		if (n == 0)
966
			dnslog("[%d] no outside-ns in ndb", getpid());
967
		return -1;
968
	}
969
	memmove(p->a, outns, sizeof p->a);
970
	p->s = dnlookup("outside-ns-ips", Cin, 1);
971
	return 0;
972
}
973
 
974
/*
975
 * issue query via UDP or TCP as appropriate.
976
 * for TCP, returns with qp->tcpip set from udppkt header.
977
 */
978
static int
979
mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
980
{
981
	int rv = -1, nfd;
982
	char *domain;
983
	char conndir[NETPATHLEN], net[NETPATHLEN];
984
	uchar belen[2];
985
	NetConnInfo *nci;
986
 
987
	queryck(qp);
988
	domain = smprint("%I", udppkt);
989
	if (myaddr(domain)) {
990
		dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
991
			domain);
992
		free(domain);
993
		return rv;
994
	}
995
 
996
	switch (medium) {
997
	case Udp:
998
		free(domain);
999
		nfd = dup(qp->udpfd, -1);
1000
		if (nfd < 0) {
1001
			warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
1002
			close(qp->udpfd);	/* ensure it's closed */
1003
			qp->udpfd = -1;		/* poison it */
1004
			return rv;
1005
		}
1006
		close(nfd);
1007
 
1008
		if (qp->udpfd <= 0)
1009
			dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
1010
		else {
1011
			if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
1012
			    len+Udphdrsize)
1013
				warning("sending udp msg: %r");
1014
			else {
1015
				stats.qsent++;
1016
				rv = 0;
1017
			}
1018
		}
1019
		break;
1020
	case Tcp:
1021
		/* send via TCP & keep fd around for reply */
1022
		snprint(net, sizeof net, "%s/tcp",
1023
			(mntpt[0] != '\0'? mntpt: "/net"));
1024
		alarm(10*1000);
1025
		qp->tcpfd = rv = dial(netmkaddr(domain, net, "dns"), nil,
1026
			conndir, &qp->tcpctlfd);
1027
		alarm(0);
1028
		if (qp->tcpfd < 0) {
1029
			dnslog("can't dial tcp!%s!dns: %r", domain);
1030
			free(domain);
1031
			break;
1032
		}
1033
		free(domain);
1034
		nci = getnetconninfo(conndir, qp->tcpfd);
1035
		if (nci) {
1036
			parseip(qp->tcpip, nci->rsys);
1037
			freenetconninfo(nci);
1038
		} else
1039
			dnslog("mydnsquery: getnetconninfo failed");
1040
		qp->tcpset = 1;
1041
 
1042
		belen[0] = len >> 8;
1043
		belen[1] = len;
1044
		if (write(qp->tcpfd, belen, 2) != 2 ||
1045
		    write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1046
			warning("sending tcp msg: %r");
1047
		break;
1048
	default:
1049
		sysfatal("mydnsquery: bad medium");
1050
	}
1051
	return rv;
1052
}
1053
 
1054
/*
1055
 * send query to all UDP destinations or one TCP destination,
1056
 * taken from obuf (udp packet) header
1057
 */
1058
static int
1059
xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1060
{
1061
	int j, n;
1062
	char buf[32];
1063
	Dest *p;
1064
 
1065
	queryck(qp);
1066
	if(timems() >= qp->req->aborttime)
1067
		return -1;
1068
 
1069
	/*
1070
	 * get a nameserver address if we need one.
1071
	 * serveraddrs populates qp->dest.
1072
	 */
1073
	p = qp->dest;
1074
	destck(p);
1075
	if (qp->ndest < 0 || qp->ndest > Maxdest) {
1076
		dnslog("qp->ndest %d out of range", qp->ndest);
1077
		abort();
1078
	}
1079
	/*
1080
	 * we're to transmit to more destinations than we currently have,
1081
	 * so get another.
1082
	 */
1083
	if (qp->ndest > qp->curdest - p) {
1084
		j = serveraddrs(qp, qp->curdest - p, depth);
1085
		if (j < 0 || j >= Maxdest) {
1086
			dnslog("serveraddrs() result %d out of range", j);
1087
			abort();
1088
		}
1089
		qp->curdest = &qp->dest[j];
1090
	}
1091
	destck(qp->curdest);
1092
 
1093
	/* no servers, punt */
1094
	if (qp->ndest == 0)
1095
		if (cfg.straddle && cfg.inside) {
1096
			/* get ips of "outside-ns-ips" */
1097
			qp->curdest = qp->dest;
1098
			for(n = 0; n < Maxdest; n++, qp->curdest++)
1099
				if (setdestoutns(qp->curdest, n) < 0)
1100
					break;
1101
			if(n == 0)
1102
				dnslog("xmitquery: %s: no outside-ns nameservers",
1103
					qp->dp->name);
1104
		} else
1105
			/* it's probably just a bogus domain, don't log it */
1106
			return -1;
1107
 
1108
	/* send to first 'qp->ndest' destinations */
1109
	j = 0;
1110
	if (medium == Tcp) {
1111
		j++;
1112
		queryck(qp);
1113
		assert(qp->dp);
1114
		procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1115
			qp->dp->name, rrname(qp->type, buf, sizeof buf));
1116
		mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1117
		if(debug)
1118
			logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1119
				qp->type);
1120
	} else
1121
		for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1122
			/* skip destinations we've finished with */
1123
			if(p->nx >= Maxtrans)
1124
				continue;
1125
 
1126
			j++;
1127
 
1128
			/* exponential backoff of requests */
1129
			if((1<<p->nx) > qp->ndest)
1130
				continue;
1131
 
1132
			if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
1133
				continue;		/* mistake */
1134
 
1135
			procsetname("udp %sside query to %I/%s %s %s",
1136
				(inns? "in": "out"), p->a, p->s->name,
1137
				qp->dp->name, rrname(qp->type, buf, sizeof buf));
1138
			if(debug)
1139
				logsend(qp->req->id, depth, p->a, p->s->name,
1140
					qp->dp->name, qp->type);
1141
 
1142
			/* fill in UDP destination addr & send it */
1143
			memmove(obuf, p->a, sizeof p->a);
1144
			mydnsquery(qp, medium, obuf, len);
1145
			p->nx++;
1146
		}
1147
	if(j == 0) {
1148
		return -1;
1149
	}
1150
	return 0;
1151
}
1152
 
1153
static int lckindex[Maxlcks] = {
1154
	0,			/* all others map here */
1155
	Ta,
1156
	Tns,
1157
	Tcname,
1158
	Tsoa,
1159
	Tptr,
1160
	Tmx,
1161
	Ttxt,
1162
	Taaaa,
1163
};
1164
 
1165
static int
1166
qtype2lck(int qtype)		/* map query type to querylck index */
1167
{
1168
	int i;
1169
 
1170
	for (i = 1; i < nelem(lckindex); i++)
1171
		if (lckindex[i] == qtype)
1172
			return i;
1173
	return 0;
1174
}
1175
 
1176
/* is mp a cachable negative response (with Rname set)? */
1177
static int
1178
isnegrname(DNSmsg *mp)
1179
{
1180
	/* TODO: could add || cfg.justforw to RHS of && */
1181
	return mp->an == nil && (mp->flags & Rmask) == Rname;
1182
}
1183
 
1184
/* returns Answerr (-1) on errors, else number of answers, which can be zero. */
1185
static int
1186
procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1187
{
1188
	int rv;
1189
//	int lcktype;
1190
	char buf[32];
1191
	DN *ndp;
1192
	Query *nqp;
1193
	RR *tp, *soarr;
1194
 
1195
	if (mp->an == nil)
1196
		stats.negans++;
1197
 
1198
	/* ignore any error replies */
1199
	if((mp->flags & Rmask) == Rserver){
1200
		stats.negserver++;
1201
		freeanswers(mp);
1202
		if(p != qp->curdest)
1203
			p->code = Rserver;
1204
		return Answerr;
1205
	}
1206
 
1207
	/* ignore any bad delegations */
1208
	if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1209
		stats.negbaddeleg++;
1210
		if(mp->an == nil){
1211
			stats.negbdnoans++;
1212
			freeanswers(mp);
1213
			if(p != qp->curdest)
1214
				p->code = Rserver;
1215
			dnslog(" and no answers");
1216
			return Answerr;
1217
		}
1218
		dnslog(" but has answers; ignoring ns");
1219
		lock(&dnlock);
1220
		rrfreelistptr(&mp->ns);
1221
		unlock(&dnlock);
1222
		mp->nscount = 0;
1223
	}
1224
 
1225
	/* remove any soa's from the authority section */
1226
	lock(&dnlock);
1227
	soarr = rrremtype(&mp->ns, Tsoa);
1228
 
1229
	/* incorporate answers */
1230
	unique(mp->an);
1231
	unique(mp->ns);
1232
	unique(mp->ar);
1233
	unlock(&dnlock);
1234
 
1235
	if(mp->an)
1236
		rrattach(mp->an, (mp->flags & Fauth) != 0);
1237
	if(mp->ar)
1238
		rrattach(mp->ar, Notauthoritative);
1239
	if(mp->ns && !cfg.justforw){
1240
		ndp = mp->ns->owner;
1241
		rrattach(mp->ns, Notauthoritative);
1242
	} else {
1243
		ndp = nil;
1244
		lock(&dnlock);
1245
		rrfreelistptr(&mp->ns);
1246
		unlock(&dnlock);
1247
		mp->nscount = 0;
1248
	}
1249
 
1250
	/* free the question */
1251
	if(mp->qd) {
1252
		lock(&dnlock);
1253
		rrfreelistptr(&mp->qd);
1254
		unlock(&dnlock);
1255
		mp->qdcount = 0;
1256
	}
1257
 
1258
	/*
1259
	 *  Any reply from an authoritative server,
1260
	 *  or a positive reply terminates the search.
1261
	 *  A negative response now also terminates the search.
1262
	 */
1263
	if(mp->an != nil || (mp->flags & Fauth)){
1264
		if(isnegrname(mp))
1265
			qp->dp->respcode = Rname;
1266
		else
1267
			qp->dp->respcode = Rok;
1268
 
1269
		/*
1270
		 *  cache any negative responses, free soarr.
1271
		 *  negative responses need not be authoritative:
1272
		 *  they can legitimately come from a cache.
1273
		 */
1274
		if( /* (mp->flags & Fauth) && */ mp->an == nil)
1275
			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1276
		else {
1277
			lock(&dnlock);
1278
			rrfreelist(soarr);
1279
			unlock(&dnlock);
1280
		}
1281
		return 1;
1282
	} else if (isnegrname(mp)) {
1283
		qp->dp->respcode = Rname;
1284
		/*
1285
		 *  cache negative response.
1286
		 *  negative responses need not be authoritative:
1287
		 *  they can legitimately come from a cache.
1288
		 */
1289
		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1290
		return 1;
1291
	}
1292
	stats.negnorname++;
1293
	lock(&dnlock);
1294
	rrfreelist(soarr);
1295
	unlock(&dnlock);
1296
 
1297
	/*
1298
	 *  if we've been given better name servers, recurse.
1299
	 *  if we're a pure resolver, don't recurse, we have
1300
	 *  to forward to a fixed set of named servers.
1301
	 */
1302
	if(!mp->ns || cfg.resolver && cfg.justforw)
1303
		return Answnone;
1304
	tp = rrlookup(ndp, Tns, NOneg);
1305
	if(contains(qp->nsrp, tp)){
1306
		lock(&dnlock);
1307
		rrfreelist(tp);
1308
		unlock(&dnlock);
1309
		return Answnone;
1310
	}
1311
	procsetname("recursive query for %s %s", qp->dp->name,
1312
		rrname(qp->type, buf, sizeof buf));
1313
	/*
1314
	 *  we're called from udpquery, called from
1315
	 *  netquery, which current holds qp->dp->querylck,
1316
	 *  so release it now and acquire it upon return.
1317
	 */
1318
//	lcktype = qtype2lck(qp->type);		/* someday try this again */
1319
//	qunlock(&qp->dp->querylck[lcktype]);
1320
 
1321
	nqp = emalloc(sizeof *nqp);
1322
	queryinit(nqp, qp->dp, qp->type, qp->req);
1323
	nqp->nsrp = tp;
1324
	rv = netquery(nqp, depth+1);
1325
 
1326
//	qlock(&qp->dp->querylck[lcktype]);
1327
	rrfreelist(nqp->nsrp);
1328
	querydestroy(nqp);
1329
	free(nqp);
1330
	return rv;
1331
}
1332
 
1333
/*
1334
 * send a query via tcp to a single address (from ibuf's udp header)
1335
 * and read the answer(s) into mp->an.
1336
 */
1337
static int
1338
tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1339
	ulong waitms, int inns, ushort req)
1340
{
1341
	int rv = 0;
1342
	uvlong endms;
1343
 
1344
	endms = timems() + waitms;
1345
	if(endms > qp->req->aborttime)
1346
		endms = qp->req->aborttime;
1347
 
1348
	if (0)
1349
		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1350
			qp->dp->name, qp->tcpip);
1351
 
1352
	qlock(&qp->tcplock);
1353
	memmove(obuf, ibuf, IPaddrlen);		/* send back to respondent */
1354
	/* sets qp->tcpip from obuf's udp header */
1355
	if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1356
	    readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1357
		rv = -1;
1358
	if (qp->tcpfd > 0) {
1359
		hangup(qp->tcpctlfd);
1360
		close(qp->tcpctlfd);
1361
		close(qp->tcpfd);
1362
	}
1363
	qp->tcpfd = qp->tcpctlfd = -1;
1364
	qunlock(&qp->tcplock);
1365
	return rv;
1366
}
1367
 
1368
/*
1369
 *  query name servers.  fill in obuf with on-the-wire representation of a
1370
 *  DNSmsg derived from qp.  if the name server returns a pointer to another
1371
 *  name server, recurse.
1372
 */
1373
static int
1374
queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1375
{
1376
	int ndest, len, replywaits, rv;
1377
	ushort req;
1378
	uvlong endms;
1379
	char buf[12];
1380
	uchar srcip[IPaddrlen];
1381
	Dest *p, *np, *dest;
1382
 
1383
	/* pack request into a udp message */
1384
	req = rand();
1385
	len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1386
 
1387
	/* no server addresses yet */
1388
	queryck(qp);
1389
	dest = emalloc(Maxdest * sizeof *dest);	/* dest can't be on stack */
1390
	for (p = dest; p < dest + Maxdest; p++)
1391
		destinit(p);
1392
	/* this dest array is local to this call of queryns() */
1393
	free(qp->dest);
1394
	qp->curdest = qp->dest = dest;
1395
 
1396
	/*
1397
	 *  transmit udp requests and wait for answers.
1398
	 *  at most Maxtrans attempts to each address.
1399
	 *  each cycle send one more message than the previous.
1400
	 *  retry a query via tcp if its response is truncated.
1401
	 */
1402
	for(ndest = 1; ndest < Maxdest; ndest++){
1403
		qp->ndest = ndest;
1404
		qp->tcpset = 0;
1405
		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1406
			break;
1407
 
1408
		endms = timems() + waitms;
1409
		if(endms > qp->req->aborttime)
1410
			endms = qp->req->aborttime;
1411
 
1412
		for(replywaits = 0; replywaits < ndest; replywaits++){
1413
			DNSmsg m;
1414
 
1415
			procsetname("reading %sside reply from %I: %s %s from %s",
1416
				(inns? "in": "out"), obuf, qp->dp->name,
1417
				rrname(qp->type, buf, sizeof buf), qp->req->from);
1418
 
1419
			/* read udp answer into m */
1420
			if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1421
				memmove(srcip, ibuf, IPaddrlen);
1422
			else if (!(m.flags & Ftrunc)) {
1423
				freeanswers(&m);
1424
				break;		/* timed out on this dest */
1425
			} else {
1426
				/* whoops, it was truncated! ask again via tcp */
1427
				freeanswers(&m);
1428
				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1429
					waitms, inns, req);  /* answer in m */
1430
				if (rv < 0) {
1431
					freeanswers(&m);
1432
					break;		/* failed via tcp too */
1433
				}
1434
				memmove(srcip, qp->tcpip, IPaddrlen);
1435
			}
1436
 
1437
			/* find responder */
1438
			// dnslog("queryns got reply from %I", srcip);
1439
			for(p = qp->dest; p < qp->curdest; p++)
1440
				if(memcmp(p->a, srcip, sizeof p->a) == 0)
1441
					break;
1442
 
1443
			/* remove all addrs of responding server from list */
1444
			for(np = qp->dest; np < qp->curdest; np++)
1445
				if(np->s == p->s)
1446
					np->nx = Maxtrans;
1447
 
1448
			/* free or incorporate RRs in m */
1449
			rv = procansw(qp, &m, srcip, depth, p);
1450
			if (rv > Answnone) {
1451
				free(qp->dest);
1452
				qp->dest = qp->curdest = nil; /* prevent accidents */
1453
				return rv;
1454
			}
1455
		}
1456
	}
1457
 
1458
	/* if all servers returned failure, propagate it */
1459
	qp->dp->respcode = Rserver;
1460
	for(p = dest; p < qp->curdest; p++) {
1461
		destck(p);
1462
		if(p->code != Rserver)
1463
			qp->dp->respcode = Rok;
1464
		p->magic = 0;			/* prevent accidents */
1465
	}
1466
 
1467
//	if (qp->dp->respcode)
1468
//		dnslog("queryns setting Rserver for %s", qp->dp->name);
1469
 
1470
	free(qp->dest);
1471
	qp->dest = qp->curdest = nil;		/* prevent accidents */
1472
	return Answnone;
1473
}
1474
 
1475
/*
1476
 *  run a command with a supplied fd as standard input
1477
 */
1478
char *
1479
system(int fd, char *cmd)
1480
{
1481
	int pid, p, i;
1482
	static Waitmsg msg;
1483
 
1484
	if((pid = fork()) == -1)
1485
		sysfatal("fork failed: %r");
1486
	else if(pid == 0){
1487
		dup(fd, 0);
1488
		close(fd);
1489
		for (i = 3; i < 200; i++)
1490
			close(i);		/* don't leak fds */
1491
		execl("/bin/rc", "rc", "-c", cmd, nil);
1492
		sysfatal("exec rc: %r");
1493
	}
1494
	for(p = waitpid(); p >= 0; p = waitpid())
1495
		if(p == pid)
1496
			return msg.msg;
1497
	return "lost child";
1498
}
1499
 
1500
/* compute wait, weighted by probability of success, with bounds */
1501
static ulong
1502
weight(ulong ms, unsigned pcntprob)
1503
{
1504
	ulong wait;
1505
 
1506
	wait = (ms * pcntprob) / 100;
1507
	if (wait < Minwaitms)
1508
		wait = Minwaitms;
1509
	if (wait > Maxwaitms)
1510
		wait = Maxwaitms;
1511
	return wait;
1512
}
1513
 
1514
/*
1515
 * in principle we could use a single descriptor for a udp port
1516
 * to send all queries and receive all the answers to them,
1517
 * but we'd have to sort out the answers by dns-query id.
1518
 */
1519
static int
1520
udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1521
{
1522
	int fd, rv;
1523
	ulong now, pcntprob;
1524
	uvlong wait, reqtm;
1525
	char *msg;
1526
	uchar *obuf, *ibuf;
1527
	static QLock mntlck;
1528
	static ulong lastmount;
1529
 
1530
	/* use alloced buffers rather than ones from the stack */
1531
	ibuf = emalloc(64*1024);		/* max. tcp reply size */
1532
	obuf = emalloc(Maxpayload+Udphdrsize);
1533
 
1534
	fd = udpport(mntpt);
1535
	while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1536
		/* HACK: remount /net.alt */
1537
		now = time(nil);
1538
		if (now < lastmount + Remntretry)
1539
			sleep(S2MS(lastmount + Remntretry - now));
1540
		qlock(&mntlck);
1541
		fd = udpport(mntpt);	/* try again under lock */
1542
		if (fd < 0) {
1543
			dnslog("[%d] remounting /net.alt", getpid());
1544
			unmount(nil, "/net.alt");
1545
 
1546
			msg = system(open("/dev/null", ORDWR), "outside");
1547
 
1548
			lastmount = time(nil);
1549
			if (msg && *msg) {
1550
				dnslog("[%d] can't remount /net.alt: %s",
1551
					getpid(), msg);
1552
				sleep(10*1000);	/* don't spin remounting */
1553
			} else
1554
				fd = udpport(mntpt);
1555
		}
1556
		qunlock(&mntlck);
1557
	}
1558
	if (fd < 0) {
1559
		dnslog("can't get udpport for %s query of name %s: %r",
1560
			mntpt, qp->dp->name);
1561
		sysfatal("out of udp conversations");	/* we're buggered */
1562
	}
1563
 
1564
	/*
1565
	 * Our QIP servers are busted and respond to AAAA and CNAME queries
1566
	 * with (sometimes malformed [too short] packets and) no answers and
1567
	 * just NS RRs but not Rname errors.  so make time-to-wait
1568
	 * proportional to estimated probability of an RR of that type existing.
1569
	 */
1570
	if (qp->type >= nelem(likely))
1571
		pcntprob = 35;			/* unpopular query type */
1572
	else
1573
		pcntprob = likely[qp->type];
1574
	reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1575
	wait = weight(reqtm / 3, pcntprob);	/* time for one udp query */
1576
	qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1577
 
1578
	qp->udpfd = fd;
1579
	rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1580
	close(fd);
1581
	qp->udpfd = -1;
1582
 
1583
	free(obuf);
1584
	free(ibuf);
1585
	return rv;
1586
}
1587
 
1588
/*
1589
 * look up (qp->dp->name, qp->type) rr in dns,
1590
 * using nameservers in qp->nsrp.
1591
 */
1592
static int
1593
netquery(Query *qp, int depth)
1594
{
1595
	int lock, rv, triedin, inname;
1596
	char buf[32];
1597
	RR *rp;
1598
	DN *dp;
1599
	Querylck *qlp;
1600
	static int whined;
1601
 
1602
	rv = Answnone;			/* pessimism */
1603
	if(depth > 12)			/* in a recursive loop? */
1604
		return Answnone;
1605
 
1606
	slave(qp->req);
1607
	/*
1608
	 * slave might have forked.  if so, the parent process longjmped to
1609
	 * req->mret; we're usually the child slave, but if there are too
1610
	 * many children already, we're still the same process.
1611
	 */
1612
 
1613
	/*
1614
	 * don't lock before call to slave so only children can block.
1615
	 * just lock at top-level invocation.
1616
	 */
1617
	lock = depth <= 1 && qp->req->isslave;
1618
	dp = qp->dp;		/* ensure that it doesn't change underfoot */
1619
	qlp = nil;
1620
	if(lock) {
1621
		procsetname("query lock wait: %s %s from %s", dp->name,
1622
			rrname(qp->type, buf, sizeof buf), qp->req->from);
1623
		/*
1624
		 * don't make concurrent queries for this name.
1625
		 * dozens of processes blocking here probably indicates
1626
		 * an error in our dns data that causes us to not
1627
		 * recognise a zone (area) as one of our own, thus
1628
		 * causing us to query other nameservers.
1629
		 */
1630
		qlp = &dp->querylck[qtype2lck(qp->type)];
1631
		qlock(qlp);
1632
		if (qlp->Ref.ref > Maxoutstanding) {
1633
			qunlock(qlp);
1634
			if (!whined) {
1635
				whined = 1;
1636
				dnslog("too many outstanding queries for %s;"
1637
					" dropping this one; no further logging"
1638
					" of drops", dp->name);
1639
			}
1640
			return 0;
1641
		}
1642
		++qlp->Ref.ref;
1643
		qunlock(qlp);
1644
	}
1645
	procsetname("netquery: %s", dp->name);
1646
 
1647
	/* prepare server RR's for incremental lookup */
1648
	for(rp = qp->nsrp; rp; rp = rp->next)
1649
		rp->marker = 0;
1650
 
1651
	triedin = 0;
1652
 
1653
	/*
1654
	 * normal resolvers and servers will just use mntpt for all addresses,
1655
	 * even on the outside.  straddling servers will use mntpt (/net)
1656
	 * for inside addresses and /net.alt for outside addresses,
1657
	 * thus bypassing other inside nameservers.
1658
	 */
1659
	inname = insideaddr(dp->name);
1660
	if (!cfg.straddle || inname) {
1661
		rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1662
		triedin = 1;
1663
	}
1664
 
1665
	/*
1666
	 * if we're still looking, are inside, and have an outside domain,
1667
	 * try it on our outside interface, if any.
1668
	 */
1669
	if (rv == Answnone && cfg.inside && !inname) {
1670
		if (triedin)
1671
			dnslog(
1672
	   "[%d] netquery: internal nameservers failed for %s; trying external",
1673
				getpid(), dp->name);
1674
 
1675
		/* prepare server RR's for incremental lookup */
1676
		for(rp = qp->nsrp; rp; rp = rp->next)
1677
			rp->marker = 0;
1678
 
1679
		rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1680
	}
1681
//	if (rv == Answnone)		/* could ask /net.alt/dns directly */
1682
//		askoutdns(dp, qp->type);
1683
 
1684
	if(lock && qlp) {
1685
		qlock(qlp);
1686
		assert(qlp->Ref.ref > 0);
1687
		qunlock(qlp);
1688
		decref(qlp);
1689
	}
1690
	return rv;
1691
}
1692
 
1693
int
1694
seerootns(void)
1695
{
1696
	int rv;
1697
	char root[] = "";
1698
	Request req;
1699
	RR *rr;
1700
	Query *qp;
1701
 
1702
	memset(&req, 0, sizeof req);
1703
	req.isslave = 1;
1704
	req.aborttime = timems() + Maxreqtm;
1705
	req.from = "internal";
1706
 
1707
	qp = emalloc(sizeof *qp);
1708
	queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1709
	qp->nsrp = dblookup(root, Cin, Tns, 0, 0);
1710
	for (rr = qp->nsrp; rr != nil; rr = rr->next)	/* DEBUG */
1711
		dnslog("seerootns query nsrp: %R", rr);
1712
 
1713
	rv = netquery(qp, 0);		/* lookup ". ns" using qp->nsrp */
1714
 
1715
	rrfreelist(qp->nsrp);
1716
	querydestroy(qp);
1717
	free(qp);
1718
	return rv;
1719
}