Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include	"u.h"
2
#include	"tos.h"
3
#include	"../port/lib.h"
4
#include	"mem.h"
5
#include	"dat.h"
6
#include	"fns.h"
7
#include	"io.h"
8
#include	"ureg.h"
9
#include	"../port/error.h"
10
#include	<trace.h>
11
 
12
static int trapinited;
13
 
14
void	noted(Ureg*, ulong);
15
 
16
static void debugbpt(Ureg*, void*);
17
static void fault386(Ureg*, void*);
18
static void doublefault(Ureg*, void*);
19
static void unexpected(Ureg*, void*);
20
static void _dumpstack(Ureg*);
21
 
22
static Lock vctllock;
23
static Vctl *vctl[256];
24
 
25
enum
26
{
27
	Ntimevec = 20		/* number of time buckets for each intr */
28
};
29
ulong intrtimes[256][Ntimevec];
30
 
31
void
32
intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33
{
34
	int vno;
35
	Vctl *v;
36
 
37
	if(f == nil){
38
		print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39
			irq, tbdf, name);
40
		return;
41
	}
42
 
43
	v = xalloc(sizeof(Vctl));
44
	v->isintr = 1;
45
	v->irq = irq;
46
	v->tbdf = tbdf;
47
	v->f = f;
48
	v->a = a;
49
	strncpy(v->name, name, KNAMELEN-1);
50
	v->name[KNAMELEN-1] = 0;
51
 
52
	ilock(&vctllock);
53
	vno = arch->intrenable(v);
54
	if(vno == -1){
55
		iunlock(&vctllock);
56
		print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
57
			irq, tbdf, v->name);
58
		xfree(v);
59
		return;
60
	}
61
	if(vctl[vno]){
62
		if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63
			panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64
				vctl[vno]->name, v->name,
65
				vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
66
		v->next = vctl[vno];
67
	}
68
	vctl[vno] = v;
69
	iunlock(&vctllock);
70
}
71
 
72
int
73
intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
74
{
75
	Vctl **pv, *v;
76
	int vno;
77
 
78
	/*
79
	 * For now, none of this will work with the APIC code,
80
	 * there is no mapping between irq and vector as the IRQ
81
	 * is pretty meaningless.
82
	 */
83
	if(arch->intrvecno == nil)
84
		return -1;
85
	vno = arch->intrvecno(irq);
86
	ilock(&vctllock);
87
	pv = &vctl[vno];
88
	while (*pv &&
89
		  ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90
		   strcmp((*pv)->name, name)))
91
		pv = &((*pv)->next);
92
	assert(*pv);
93
 
94
	v = *pv;
95
	*pv = (*pv)->next;	/* Link out the entry */
96
 
97
	if(vctl[vno] == nil && arch->intrdisable != nil)
98
		arch->intrdisable(irq);
99
	iunlock(&vctllock);
100
	xfree(v);
101
	return 0;
102
}
103
 
104
static long
105
irqallocread(Chan*, void *vbuf, long n, vlong offset)
106
{
107
	char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
108
	int m, vno;
109
	long oldn;
110
	Vctl *v;
111
 
112
	if(n < 0 || offset < 0)
113
		error(Ebadarg);
114
 
115
	oldn = n;
116
	buf = vbuf;
117
	for(vno=0; vno<nelem(vctl); vno++){
118
		for(v=vctl[vno]; v; v=v->next){
119
			m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120
			if(m <= offset)	/* if do not want this, skip entry */
121
				offset -= m;
122
			else{
123
				/* skip offset bytes */
124
				m -= offset;
125
				p = str+offset;
126
				offset = 0;
127
 
128
				/* write at most max(n,m) bytes */
129
				if(m > n)
130
					m = n;
131
				memmove(buf, p, m);
132
				n -= m;
133
				buf += m;
134
 
135
				if(n == 0)
136
					return oldn;
137
			}
138
		}
139
	}
140
	return oldn - n;
141
}
142
 
143
void
144
trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
145
{
146
	Vctl *v;
147
 
148
	if(vno < 0 || vno >= VectorPIC)
149
		panic("trapenable: vno %d", vno);
150
	v = xalloc(sizeof(Vctl));
151
	v->tbdf = BUSUNKNOWN;
152
	v->f = f;
153
	v->a = a;
154
	strncpy(v->name, name, KNAMELEN);
155
	v->name[KNAMELEN-1] = 0;
156
 
157
	ilock(&vctllock);
158
	v->next = vctl[vno];
159
	vctl[vno] = v;
160
	iunlock(&vctllock);
161
}
162
 
163
static void
164
nmienable(void)
165
{
166
	int x;
167
 
168
	/*
169
	 * Hack: should be locked with NVRAM access.
170
	 */
171
	outb(0x70, 0x80);		/* NMI latch clear */
172
	outb(0x70, 0);
173
 
174
	x = inb(0x61) & 0x07;		/* Enable NMI */
175
	outb(0x61, 0x08|x);
176
	outb(0x61, x);
177
}
178
 
179
/*
180
 * Minimal trap setup.  Just enough so that we can panic
181
 * on traps (bugs) during kernel initialization.
182
 * Called very early - malloc is not yet available.
183
 */
184
void
185
trapinit0(void)
186
{
187
	int d1, v;
188
	ulong vaddr;
189
	Segdesc *idt;
190
 
191
	idt = (Segdesc*)IDTADDR;
192
	vaddr = (ulong)vectortable;
193
	for(v = 0; v < 256; v++){
194
		d1 = (vaddr & 0xFFFF0000)|SEGP;
195
		switch(v){
196
 
197
		case VectorBPT:
198
			d1 |= SEGPL(3)|SEGIG;
199
			break;
200
 
201
		case VectorSYSCALL:
202
			d1 |= SEGPL(3)|SEGIG;
203
			break;
204
 
205
		default:
206
			d1 |= SEGPL(0)|SEGIG;
207
			break;
208
		}
209
		idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
210
		idt[v].d1 = d1;
211
		vaddr += 6;
212
	}
213
}
214
 
215
void
216
trapinit(void)
217
{
218
	/*
219
	 * Special traps.
220
	 * Syscall() is called directly without going through trap().
221
	 */
222
	trapenable(VectorBPT, debugbpt, 0, "debugpt");
223
	trapenable(VectorPF, fault386, 0, "fault386");
224
	trapenable(Vector2F, doublefault, 0, "doublefault");
225
	trapenable(Vector15, unexpected, 0, "unexpected");
226
	nmienable();
227
 
228
	addarchfile("irqalloc", 0444, irqallocread, nil);
229
	trapinited = 1;
230
}
231
 
232
static char* excname[32] = {
233
	"divide error",
234
	"debug exception",
235
	"nonmaskable interrupt",
236
	"breakpoint",
237
	"overflow",
238
	"bounds check",
239
	"invalid opcode",
240
	"coprocessor not available",
241
	"double fault",
242
	"coprocessor segment overrun",
243
	"invalid TSS",
244
	"segment not present",
245
	"stack exception",
246
	"general protection violation",
247
	"page fault",
248
	"15 (reserved)",
249
	"coprocessor error",
250
	"alignment check",
251
	"machine check",
252
	"19 (reserved)",
253
	"20 (reserved)",
254
	"21 (reserved)",
255
	"22 (reserved)",
256
	"23 (reserved)",
257
	"24 (reserved)",
258
	"25 (reserved)",
259
	"26 (reserved)",
260
	"27 (reserved)",
261
	"28 (reserved)",
262
	"29 (reserved)",
263
	"30 (reserved)",
264
	"31 (reserved)",
265
};
266
 
267
/*
268
 *  keep histogram of interrupt service times
269
 */
270
void
271
intrtime(Mach*, int vno)
272
{
273
	ulong diff;
274
	ulong x;
275
 
276
	x = perfticks();
277
	diff = x - m->perf.intrts;
278
	m->perf.intrts = x;
279
 
280
	m->perf.inintr += diff;
281
	if(up == nil && m->perf.inidle > diff)
282
		m->perf.inidle -= diff;
283
 
284
	diff /= m->cpumhz*100;		/* quantum = 100µsec */
285
	if(diff >= Ntimevec)
286
		diff = Ntimevec-1;
287
	intrtimes[vno][diff]++;
288
}
289
 
290
/* go to user space */
291
void
292
kexit(Ureg*)
293
{
294
	uvlong t;
295
	Tos *tos;
296
 
297
	/* precise time accounting, kernel exit */
298
	tos = (Tos*)(USTKTOP-sizeof(Tos));
299
	cycles(&t);
300
	tos->kcycles += t - up->kentry;
301
	tos->pcycles = up->pcycles;
302
	tos->pid = up->pid;
303
}
304
 
305
/*
306
 *  All traps come here.  It is slower to have all traps call trap()
307
 *  rather than directly vectoring the handler.  However, this avoids a
308
 *  lot of code duplication and possible bugs.  The only exception is
309
 *  VectorSYSCALL.
310
 *  Trap is called with interrupts disabled via interrupt-gates.
311
 */
312
void
313
trap(Ureg* ureg)
314
{
315
	int clockintr, i, vno, user;
316
	char buf[ERRMAX];
317
	Vctl *ctl, *v;
318
	Mach *mach;
319
 
320
	if(!trapinited){
321
		/* fault386 can give a better error message */
322
		if(ureg->trap == VectorPF)
323
			fault386(ureg, nil);
324
		panic("trap %lud: not ready", ureg->trap);
325
	}
326
 
327
	m->perf.intrts = perfticks();
328
	user = (ureg->cs & 0xFFFF) == UESEL;
329
	if(user){
330
		up->dbgreg = ureg;
331
		cycles(&up->kentry);
332
	}
333
 
334
	clockintr = 0;
335
 
336
	vno = ureg->trap;
337
	if(ctl = vctl[vno]){
338
		if(ctl->isintr){
339
			m->intr++;
340
			if(vno >= VectorPIC && vno != VectorSYSCALL)
341
				m->lastintr = ctl->irq;
342
		}
343
 
344
		if(ctl->isr)
345
			ctl->isr(vno);
346
		for(v = ctl; v != nil; v = v->next){
347
			if(v->f)
348
				v->f(ureg, v->a);
349
		}
350
		if(ctl->eoi)
351
			ctl->eoi(vno);
352
 
353
		if(ctl->isintr){
354
			intrtime(m, vno);
355
 
356
			if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
357
				clockintr = 1;
358
 
359
			if(up && !clockintr)
360
				preempted();
361
		}
362
	}
363
	else if(vno < nelem(excname) && user){
364
		spllo();
365
		snprint(buf, sizeof buf, "sys: trap: %s", excname[vno]);
366
		postnote(up, 1, buf, NDebug);
367
	}
368
	else if(vno >= VectorPIC && vno != VectorSYSCALL){
369
		/*
370
		 * An unknown interrupt.
371
		 * Check for a default IRQ7. This can happen when
372
		 * the IRQ input goes away before the acknowledge.
373
		 * In this case, a 'default IRQ7' is generated, but
374
		 * the corresponding bit in the ISR isn't set.
375
		 * In fact, just ignore all such interrupts.
376
		 */
377
 
378
		/* call all interrupt routines, just in case */
379
		for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
380
			ctl = vctl[i];
381
			if(ctl == nil)
382
				continue;
383
			if(!ctl->isintr)
384
				continue;
385
			for(v = ctl; v != nil; v = v->next){
386
				if(v->f)
387
					v->f(ureg, v->a);
388
			}
389
			/* should we do this? */
390
			if(ctl->eoi)
391
				ctl->eoi(i);
392
		}
393
 
394
		/* clear the interrupt */
395
		i8259isr(vno);
396
 
397
		if(0)print("cpu%d: spurious interrupt %d, last %d\n",
398
			m->machno, vno, m->lastintr);
399
		if(0)if(conf.nmach > 1){
400
			for(i = 0; i < 32; i++){
401
				if(!(active.machs & (1<<i)))
402
					continue;
403
				mach = MACHP(i);
404
				if(m->machno == mach->machno)
405
					continue;
406
				print(" cpu%d: last %d",
407
					mach->machno, mach->lastintr);
408
			}
409
			print("\n");
410
		}
411
		m->spuriousintr++;
412
		if(user)
413
			kexit(ureg);
414
		return;
415
	}
416
	else{
417
		if(vno == VectorNMI){
418
			/*
419
			 * Don't re-enable, it confuses the crash dumps.
420
			nmienable();
421
			 */
422
			iprint("cpu%d: NMI PC %#8.8lux\n", m->machno, ureg->pc);
423
			while(m->machno != 0)
424
				;
425
		}
426
		dumpregs(ureg);
427
		if(!user){
428
			ureg->sp = (ulong)&ureg->sp;
429
			_dumpstack(ureg);
430
		}
431
		if(vno < nelem(excname))
432
			panic("%s", excname[vno]);
433
		panic("unknown trap/intr: %d", vno);
434
	}
435
	splhi();
436
 
437
	/* delaysched set because we held a lock or because our quantum ended */
438
	if(up && up->delaysched && clockintr){
439
		sched();
440
		splhi();
441
	}
442
 
443
	if(user){
444
		if(up->procctl || up->nnote)
445
			notify(ureg);
446
		kexit(ureg);
447
	}
448
}
449
 
450
/*
451
 *  dump registers
452
 */
453
void
454
dumpregs2(Ureg* ureg)
455
{
456
	if(up)
457
		iprint("cpu%d: registers for %s %lud\n",
458
			m->machno, up->text, up->pid);
459
	else
460
		iprint("cpu%d: registers for kernel\n", m->machno);
461
	iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
462
		ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
463
	iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
464
	iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
465
		ureg->ax, ureg->bx, ureg->cx, ureg->dx);
466
	iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
467
		ureg->si, ureg->di, ureg->bp);
468
	iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
469
		ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
470
		ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
471
}
472
 
473
void
474
dumpregs(Ureg* ureg)
475
{
476
	vlong mca, mct;
477
 
478
	dumpregs2(ureg);
479
 
480
	/*
481
	 * Processor control registers.
482
	 * If machine check exception, time stamp counter, page size extensions
483
	 * or enhanced virtual 8086 mode extensions are supported, there is a
484
	 * CR4. If there is a CR4 and machine check extensions, read the machine
485
	 * check address and machine check type registers if RDMSR supported.
486
	 */
487
	iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
488
		getcr0(), getcr2(), getcr3());
489
	if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
490
		iprint(" CR4 %8.8lux", getcr4());
491
		if((m->cpuiddx & (Mce|Cpumsr)) == (Mce|Cpumsr)){
492
			rdmsr(0x00, &mca);
493
			rdmsr(0x01, &mct);
494
			iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
495
		}
496
	}
497
	iprint("\n  ur %#p up %#p\n", ureg, up);
498
}
499
 
500
 
501
/*
502
 * Fill in enough of Ureg to get a stack trace, and call a function.
503
 * Used by debugging interface rdb.
504
 */
505
void
506
callwithureg(void (*fn)(Ureg*))
507
{
508
	Ureg ureg;
509
	ureg.pc = getcallerpc(&fn);
510
	ureg.sp = (ulong)&fn;
511
	fn(&ureg);
512
}
513
 
514
static void
515
_dumpstack(Ureg *ureg)
516
{
517
	uintptr l, v, i, estack;
518
	extern ulong etext;
519
	int x;
520
	char *s;
521
 
522
	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
523
		iprint("dumpstack disabled\n");
524
		return;
525
	}
526
	iprint("dumpstack\n");
527
 
528
	x = 0;
529
	x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
530
	i = 0;
531
	if(up
532
	&& (uintptr)&l >= (uintptr)up->kstack
533
	&& (uintptr)&l <= (uintptr)up->kstack+KSTACK)
534
		estack = (uintptr)up->kstack+KSTACK;
535
	else if((uintptr)&l >= (uintptr)m->stack
536
	&& (uintptr)&l <= (uintptr)m+MACHSIZE)
537
		estack = (uintptr)m+MACHSIZE;
538
	else
539
		return;
540
	x += iprint("estackx %p\n", estack);
541
 
542
	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
543
		v = *(uintptr*)l;
544
		if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
545
			/*
546
			 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
547
			 * and CALL indirect through AX
548
			 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
549
			 * but this is too clever and misses faulting address.
550
			 */
551
			x += iprint("%.8p=%.8p ", l, v);
552
			i++;
553
		}
554
		if(i == 4){
555
			i = 0;
556
			x += iprint("\n");
557
		}
558
	}
559
	if(i)
560
		iprint("\n");
561
	iprint("EOF\n");
562
 
563
	if(ureg->trap != VectorNMI)
564
		return;
565
 
566
	i = 0;
567
	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
568
		iprint("%.8p ", *(uintptr*)l);
569
		if(++i == 8){
570
			i = 0;
571
			iprint("\n");
572
		}
573
	}
574
	if(i)
575
		iprint("\n");
576
}
577
 
578
void
579
dumpstack(void)
580
{
581
	callwithureg(_dumpstack);
582
}
583
 
584
static void
585
debugbpt(Ureg* ureg, void*)
586
{
587
	char buf[ERRMAX];
588
 
589
	if(up == 0)
590
		panic("kernel bpt");
591
	/* restore pc to instruction that caused the trap */
592
	ureg->pc--;
593
	snprint(buf, sizeof buf, "sys: breakpoint");
594
	postnote(up, 1, buf, NDebug);
595
}
596
 
597
static void
598
doublefault(Ureg*, void*)
599
{
600
	panic("double fault");
601
}
602
 
603
static void
604
unexpected(Ureg* ureg, void*)
605
{
606
	print("unexpected trap %lud; ignoring\n", ureg->trap);
607
}
608
 
609
extern void checkpages(void);
610
extern void checkfault(ulong, ulong);
611
static void
612
fault386(Ureg* ureg, void*)
613
{
614
	ulong addr;
615
	int read, user, n, insyscall;
616
	char buf[ERRMAX];
617
 
618
	addr = getcr2();
619
	read = !(ureg->ecode & 2);
620
 
621
	user = (ureg->cs & 0xFFFF) == UESEL;
622
	if(!user){
623
		if(vmapsync(addr))
624
			return;
625
		if(addr >= USTKTOP)
626
			panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
627
		if(up == nil)
628
			panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
629
	}
630
	if(up == nil)
631
		panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
632
 
633
	insyscall = up->insyscall;
634
	up->insyscall = 1;
635
	n = fault(addr, read);
636
	if(n < 0){
637
		if(!user){
638
			dumpregs(ureg);
639
			panic("fault: 0x%lux", addr);
640
		}
641
		checkpages();
642
		checkfault(addr, ureg->pc);
643
		snprint(buf, sizeof buf, "sys: trap: fault %s addr=0x%lux",
644
			read ? "read" : "write", addr);
645
		postnote(up, 1, buf, NDebug);
646
	}
647
	up->insyscall = insyscall;
648
}
649
 
650
/*
651
 *  system calls
652
 */
653
#include "../port/systab.h"
654
 
655
/*
656
 *  Syscall is called directly from assembler without going through trap().
657
 */
658
void
659
syscall(Ureg* ureg)
660
{
661
	char *e;
662
	ulong	sp;
663
	long	ret;
664
	int	i, s;
665
	ulong scallnr;
666
	vlong startns, stopns;
667
 
668
	if((ureg->cs & 0xFFFF) != UESEL)
669
		panic("syscall: cs 0x%4.4luX", ureg->cs);
670
 
671
	cycles(&up->kentry);
672
 
673
	m->syscall++;
674
	up->insyscall = 1;
675
	up->pc = ureg->pc;
676
	up->dbgreg = ureg;
677
 
678
	sp = ureg->usp;
679
	scallnr = ureg->ax;
680
	up->scallnr = scallnr;
681
 
682
	if(up->procctl == Proc_tracesyscall){
683
		/*
684
		 * Redundant validaddr.  Do we care?
685
		 * Tracing syscalls is not exactly a fast path...
686
		 * Beware, validaddr currently does a pexit rather
687
		 * than an error if there's a problem; that might
688
		 * change in the future.
689
		 */
690
		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
691
			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
692
 
693
		syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
694
		up->procctl = Proc_stopme;
695
		procctl(up);
696
		if(up->syscalltrace)
697
			free(up->syscalltrace);
698
		up->syscalltrace = nil;
699
		startns = todget(nil);
700
	}
701
 
702
	if(scallnr == RFORK && up->fpstate == FPactive){
703
		fpsave(&up->fpsave);
704
		up->fpstate = FPinactive;
705
	}
706
	spllo();
707
 
708
	up->nerrlab = 0;
709
	ret = -1;
710
	if(!waserror()){
711
		if(scallnr >= nsyscall || systab[scallnr] == 0){
712
			pprint("bad sys call number %lud pc %lux\n",
713
				scallnr, ureg->pc);
714
			postnote(up, 1, "sys: bad sys call", NDebug);
715
			error(Ebadarg);
716
		}
717
 
718
		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
719
			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
720
 
721
		up->s = *((Sargs*)(sp+BY2WD));
722
		up->psstate = sysctab[scallnr];
723
 
724
		ret = systab[scallnr](up->s.args);
725
		poperror();
726
	}else{
727
		/* failure: save the error buffer for errstr */
728
		e = up->syserrstr;
729
		up->syserrstr = up->errstr;
730
		up->errstr = e;
731
		if(0 && up->pid == 1)
732
			print("syscall %lud error %s\n", scallnr, up->syserrstr);
733
	}
734
	if(up->nerrlab){
735
		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
736
		for(i = 0; i < NERR; i++)
737
			print("sp=%lux pc=%lux\n",
738
				up->errlab[i].sp, up->errlab[i].pc);
739
		panic("error stack");
740
	}
741
 
742
	/*
743
	 *  Put return value in frame.  On the x86 the syscall is
744
	 *  just another trap and the return value from syscall is
745
	 *  ignored.  On other machines the return value is put into
746
	 *  the results register by caller of syscall.
747
	 */
748
	ureg->ax = ret;
749
 
750
	if(up->procctl == Proc_tracesyscall){
751
		stopns = todget(nil);
752
		up->procctl = Proc_stopme;
753
		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
754
		s = splhi();
755
		procctl(up);
756
		splx(s);
757
		if(up->syscalltrace)
758
			free(up->syscalltrace);
759
		up->syscalltrace = nil;
760
	}
761
 
762
	up->insyscall = 0;
763
	up->psstate = 0;
764
 
765
	if(scallnr == NOTED)
766
		noted(ureg, *(ulong*)(sp+BY2WD));
767
 
768
	if(scallnr!=RFORK && (up->procctl || up->nnote)){
769
		splhi();
770
		notify(ureg);
771
	}
772
	/* if we delayed sched because we held a lock, sched now */
773
	if(up->delaysched)
774
		sched();
775
	kexit(ureg);
776
}
777
 
778
/*
779
 *  Call user, if necessary, with note.
780
 *  Pass user the Ureg struct and the note on his stack.
781
 */
782
int
783
notify(Ureg* ureg)
784
{
785
	int l;
786
	ulong s, sp;
787
	Note *n;
788
 
789
	if(up->procctl)
790
		procctl(up);
791
	if(up->nnote == 0)
792
		return 0;
793
 
794
	if(up->fpstate == FPactive){
795
		fpsave(&up->fpsave);
796
		up->fpstate = FPinactive;
797
	}
798
	up->fpstate |= FPillegal;
799
 
800
	s = spllo();
801
	qlock(&up->debug);
802
	up->notepending = 0;
803
	n = &up->note[0];
804
	if(strncmp(n->msg, "sys:", 4) == 0){
805
		l = strlen(n->msg);
806
		if(l > ERRMAX-15)	/* " pc=0x12345678\0" */
807
			l = ERRMAX-15;
808
		seprint(n->msg+l, &n->msg[sizeof n->msg], " pc=0x%.8lux",
809
			ureg->pc);
810
	}
811
 
812
	if(n->flag!=NUser && (up->notified || up->notify==0)){
813
		if(n->flag == NDebug)
814
			pprint("suicide: %s\n", n->msg);
815
		qunlock(&up->debug);
816
		pexit(n->msg, n->flag!=NDebug);
817
	}
818
 
819
	if(up->notified){
820
		qunlock(&up->debug);
821
		splhi();
822
		return 0;
823
	}
824
 
825
	if(!up->notify){
826
		qunlock(&up->debug);
827
		pexit(n->msg, n->flag!=NDebug);
828
	}
829
	sp = ureg->usp;
830
	sp -= 256;	/* debugging: preserve context causing problem */
831
	sp -= sizeof(Ureg);
832
if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
833
	up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
834
 
835
	if(!okaddr((ulong)up->notify, 1, 0)
836
	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
837
		qunlock(&up->debug);
838
		pprint("suicide: bad address in notify\n");
839
		pexit("Suicide", 0);
840
	}
841
 
842
	memmove((Ureg*)sp, ureg, sizeof(Ureg));
843
	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
844
	up->ureg = (void*)sp;
845
	sp -= BY2WD+ERRMAX;
846
	memmove((char*)sp, up->note[0].msg, ERRMAX);
847
	sp -= 3*BY2WD;
848
	*(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;		/* arg 2 is string */
849
	*(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;	/* arg 1 is ureg* */
850
	*(ulong*)(sp+0*BY2WD) = 0;			/* arg 0 is pc */
851
	ureg->usp = sp;
852
	ureg->pc = (ulong)up->notify;
853
	up->notified = 1;
854
	up->nnote--;
855
	memmove(&up->lastnote, &up->note[0], sizeof(Note));
856
	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
857
 
858
	qunlock(&up->debug);
859
	splx(s);
860
	return 1;
861
}
862
 
863
/*
864
 *   Return user to state before notify()
865
 */
866
void
867
noted(Ureg* ureg, ulong arg0)
868
{
869
	Ureg *nureg;
870
	ulong oureg, sp;
871
 
872
	qlock(&up->debug);
873
	if(arg0!=NRSTR && !up->notified) {
874
		qunlock(&up->debug);
875
		pprint("call to noted() when not notified\n");
876
		pexit("Suicide", 0);
877
	}
878
	up->notified = 0;
879
 
880
	nureg = up->ureg;	/* pointer to user returned Ureg struct */
881
 
882
	up->fpstate &= ~FPillegal;
883
 
884
	/* sanity clause */
885
	oureg = (ulong)nureg;
886
	if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
887
		qunlock(&up->debug);
888
		pprint("bad ureg in noted or call to noted when not notified\n");
889
		pexit("Suicide", 0);
890
	}
891
 
892
	/*
893
	 * Check the segment selectors are all valid, otherwise
894
	 * a fault will be taken on attempting to return to the
895
	 * user process.
896
	 * Take care with the comparisons as different processor
897
	 * generations push segment descriptors in different ways.
898
	 */
899
	if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
900
	  || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
901
	  || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
902
		qunlock(&up->debug);
903
		pprint("bad segment selector in noted\n");
904
		pexit("Suicide", 0);
905
	}
906
 
907
	/* don't let user change system flags */
908
	nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
909
 
910
	memmove(ureg, nureg, sizeof(Ureg));
911
 
912
	switch(arg0){
913
	case NCONT:
914
	case NRSTR:
915
if(0) print("%s %lud: noted %.8lux %.8lux\n",
916
	up->text, up->pid, nureg->pc, nureg->usp);
917
		if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
918
			qunlock(&up->debug);
919
			pprint("suicide: trap in noted\n");
920
			pexit("Suicide", 0);
921
		}
922
		up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
923
		qunlock(&up->debug);
924
		break;
925
 
926
	case NSAVE:
927
		if(!okaddr(nureg->pc, BY2WD, 0)
928
		|| !okaddr(nureg->usp, BY2WD, 0)){
929
			qunlock(&up->debug);
930
			pprint("suicide: trap in noted\n");
931
			pexit("Suicide", 0);
932
		}
933
		qunlock(&up->debug);
934
		sp = oureg-4*BY2WD-ERRMAX;
935
		splhi();
936
		ureg->sp = sp;
937
		((ulong*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */
938
		((ulong*)sp)[0] = 0;		/* arg 0 is pc */
939
		break;
940
 
941
	default:
942
		pprint("unknown noted arg 0x%lux\n", arg0);
943
		up->lastnote.flag = NDebug;
944
		/* fall through */
945
 
946
	case NDFLT:
947
		if(up->lastnote.flag == NDebug){
948
			qunlock(&up->debug);
949
			pprint("suicide: %s\n", up->lastnote.msg);
950
		} else
951
			qunlock(&up->debug);
952
		pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
953
	}
954
}
955
 
956
void
957
validalign(uintptr addr, unsigned align)
958
{
959
	/*
960
	 * Plan 9 is a 32-bit O/S, and the hardware it runs on
961
	 * does not usually have instructions which move 64-bit
962
	 * quantities directly, synthesizing the operations
963
	 * with 32-bit move instructions. Therefore, the compiler
964
	 * (and hardware) usually only enforce 32-bit alignment,
965
	 * if at all.
966
	 *
967
	 * Take this out if the architecture warrants it.
968
	 */
969
	if(align == sizeof(vlong))
970
		align = sizeof(long);
971
 
972
	/*
973
	 * Check align is a power of 2, then addr alignment.
974
	 */
975
	if((align != 0 && !(align & (align-1))) && !(addr & (align-1)))
976
		return;
977
	postnote(up, 1, "sys: odd address", NDebug);
978
	error(Ebadarg);
979
	/*NOTREACHED*/
980
}
981
 
982
long
983
execregs(ulong entry, ulong ssize, ulong nargs)
984
{
985
	ulong *sp;
986
	Ureg *ureg;
987
 
988
	up->fpstate = FPinit;
989
	fpoff();
990
 
991
	sp = (ulong*)(USTKTOP - ssize);
992
	*--sp = nargs;
993
 
994
	ureg = up->dbgreg;
995
	ureg->usp = (ulong)sp;
996
	ureg->pc = entry;
997
	return USTKTOP-sizeof(Tos);		/* address of kernel/user shared data */
998
}
999
 
1000
/*
1001
 *  return the userpc the last exception happened at
1002
 */
1003
ulong
1004
userpc(void)
1005
{
1006
	Ureg *ureg;
1007
 
1008
	ureg = (Ureg*)up->dbgreg;
1009
	return ureg->pc;
1010
}
1011
 
1012
/* This routine must save the values of registers the user is not permitted
1013
 * to write from devproc and then restore the saved values before returning.
1014
 */
1015
void
1016
setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1017
{
1018
	ulong cs, ds, es, flags, fs, gs, ss;
1019
 
1020
	ss = ureg->ss;
1021
	flags = ureg->flags;
1022
	cs = ureg->cs;
1023
	ds = ureg->ds;
1024
	es = ureg->es;
1025
	fs = ureg->fs;
1026
	gs = ureg->gs;
1027
	memmove(pureg, uva, n);
1028
	ureg->gs = gs;
1029
	ureg->fs = fs;
1030
	ureg->es = es;
1031
	ureg->ds = ds;
1032
	ureg->cs = cs;
1033
	ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
1034
	ureg->ss = ss;
1035
}
1036
 
1037
static void
1038
linkproc(void)
1039
{
1040
	spllo();
1041
	up->kpfun(up->kparg);
1042
	pexit("kproc dying", 0);
1043
}
1044
 
1045
void
1046
kprocchild(Proc* p, void (*func)(void*), void* arg)
1047
{
1048
	/*
1049
	 * gotolabel() needs a word on the stack in
1050
	 * which to place the return PC used to jump
1051
	 * to linkproc().
1052
	 */
1053
	p->sched.pc = (ulong)linkproc;
1054
	p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1055
 
1056
	p->kpfun = func;
1057
	p->kparg = arg;
1058
}
1059
 
1060
void
1061
forkchild(Proc *p, Ureg *ureg)
1062
{
1063
	Ureg *cureg;
1064
 
1065
	/*
1066
	 * Add 2*BY2WD to the stack to account for
1067
	 *  - the return PC
1068
	 *  - trap's argument (ur)
1069
	 */
1070
	p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1071
	p->sched.pc = (ulong)forkret;
1072
 
1073
	cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1074
	memmove(cureg, ureg, sizeof(Ureg));
1075
	/* return value of syscall in child */
1076
	cureg->ax = 0;
1077
 
1078
	/* Things from bottom of syscall which were never executed */
1079
	p->psstate = 0;
1080
	p->insyscall = 0;
1081
}
1082
 
1083
/* Give enough context in the ureg to produce a kernel stack for
1084
 * a sleeping process
1085
 */
1086
void
1087
setkernur(Ureg* ureg, Proc* p)
1088
{
1089
	ureg->pc = p->sched.pc;
1090
	ureg->sp = p->sched.sp+4;
1091
}
1092
 
1093
ulong
1094
dbgpc(Proc *p)
1095
{
1096
	Ureg *ureg;
1097
 
1098
	ureg = p->dbgreg;
1099
	if(ureg == 0)
1100
		return 0;
1101
 
1102
	return ureg->pc;
1103
}