Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include	<u.h>
2
#include	"../port/lib.h"
3
#include	"mem.h"
4
#include	"dat.h"
5
#include	"fns.h"
6
#include	"../port/error.h"
7
#include	"../port/edf.h"
8
#include	<trace.h>
9
 
10
int	schedgain = 30;	/* units in seconds */
11
int	nrdy;
12
Ref	noteidalloc;
13
 
14
void updatecpu(Proc*);
15
int reprioritize(Proc*);
16
 
17
ulong	delayedscheds;	/* statistics */
18
long skipscheds;
19
long preempts;
20
ulong load;
21
 
22
static Ref	pidalloc;
23
 
24
static struct Procalloc
25
{
26
	Lock;
27
	Proc*	ht[128];
28
	Proc*	arena;
29
	Proc*	free;
30
} procalloc;
31
 
32
enum
33
{
34
	Q=10,
35
	DQ=4,
36
	Scaling=2,
37
};
38
 
39
Schedq	runq[Nrq];
40
ulong	runvec;
41
 
42
char *statename[] =
43
{	/* BUG: generate automatically */
44
	"Dead",
45
	"Moribund",
46
	"Ready",
47
	"Scheding",
48
	"Running",
49
	"Queueing",
50
	"QueueingR",
51
	"QueueingW",
52
	"Wakeme",
53
	"Broken",
54
	"Stopped",
55
	"Rendez",
56
	"Waitrelease",
57
};
58
 
59
static void pidhash(Proc*);
60
static void pidunhash(Proc*);
61
static void rebalance(void);
62
 
63
/*
64
 * Always splhi()'ed.
65
 */
66
void
67
schedinit(void)		/* never returns */
68
{
69
	Edf *e;
70
 
71
	setlabel(&m->sched);
72
	if(up) {
73
		if((e = up->edf) && (e->flags & Admitted))
74
			edfrecord(up);
75
		m->proc = 0;
76
		switch(up->state) {
77
		case Running:
78
			ready(up);
79
			break;
80
		case Moribund:
81
			up->state = Dead;
82
			edfstop(up);
83
			if (up->edf)
84
				free(up->edf);
85
			up->edf = nil;
86
 
87
			/*
88
			 * Holding locks from pexit:
89
			 * 	procalloc
90
			 *	palloc
91
			 */
92
			mmurelease(up);
93
 
94
			up->qnext = procalloc.free;
95
			procalloc.free = up;
96
 
97
			unlock(&palloc);
98
			unlock(&procalloc);
99
			break;
100
		}
101
		up->mach = nil;
102
		updatecpu(up);
103
		up = nil;
104
	}
105
	sched();
106
}
107
 
108
/*
109
 *  If changing this routine, look also at sleep().  It
110
 *  contains a copy of the guts of sched().
111
 */
112
void
113
sched(void)
114
{
115
	Proc *p;
116
 
117
	if(m->ilockdepth)
118
		panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
119
			m->machno,
120
			m->ilockdepth,
121
			up? up->lastilock: nil,
122
			(up && up->lastilock)? up->lastilock->pc: 0,
123
			getcallerpc(&p+2));
124
	if(up){
125
		/*
126
		 * Delay the sched until the process gives up the locks
127
		 * it is holding.  This avoids dumb lock loops.
128
		 * Don't delay if the process is Moribund.
129
		 * It called sched to die.
130
		 * But do sched eventually.  This avoids a missing unlock
131
		 * from hanging the entire kernel. 
132
		 * But don't reschedule procs holding palloc or procalloc.
133
		 * Those are far too important to be holding while asleep.
134
		 *
135
		 * This test is not exact.  There can still be a few instructions
136
		 * in the middle of taslock when a process holds a lock
137
		 * but Lock.p has not yet been initialized.
138
		 */
139
		if(up->nlocks.ref)
140
		if(up->state != Moribund)
141
		if(up->delaysched < 20
142
		|| palloc.Lock.p == up
143
		|| procalloc.Lock.p == up){
144
			up->delaysched++;
145
 			delayedscheds++;
146
			return;
147
		}
148
		up->delaysched = 0;
149
 
150
		splhi();
151
 
152
		/* statistics */
153
		m->cs++;
154
 
155
		procsave(up);
156
		if(setlabel(&up->sched)){
157
			procrestore(up);
158
			spllo();
159
			return;
160
		}
161
		gotolabel(&m->sched);
162
	}
163
	p = runproc();
164
	if(!p->edf){
165
		updatecpu(p);
166
		p->priority = reprioritize(p);
167
	}
168
	if(p != m->readied)
169
		m->schedticks = m->ticks + HZ/10;
170
	m->readied = 0;
171
	up = p;
172
	up->state = Running;
173
	up->mach = MACHP(m->machno);
174
	m->proc = up;
175
	mmuswitch(up);
176
	gotolabel(&up->sched);
177
}
178
 
179
int
180
anyready(void)
181
{
182
	return runvec;
183
}
184
 
185
int
186
anyhigher(void)
187
{
188
	return runvec & ~((1<<(up->priority+1))-1);
189
}
190
 
191
/*
192
 *  here once per clock tick to see if we should resched
193
 */
194
void
195
hzsched(void)
196
{
197
	/* once a second, rebalance will reprioritize ready procs */
198
	if(m->machno == 0)
199
		rebalance();
200
 
201
	/* unless preempted, get to run for at least 100ms */
202
	if(anyhigher()
203
	|| (!up->fixedpri && m->ticks > m->schedticks && anyready())){
204
		m->readied = nil;	/* avoid cooperative scheduling */
205
		up->delaysched++;
206
	}
207
}
208
 
209
/*
210
 *  here at the end of non-clock interrupts to see if we should preempt the
211
 *  current process.  Returns 1 if preempted, 0 otherwise.
212
 */
213
int
214
preempted(void)
215
{
216
	if(up && up->state == Running)
217
	if(up->preempted == 0)
218
	if(anyhigher())
219
	if(!active.exiting){
220
		m->readied = nil;	/* avoid cooperative scheduling */
221
		up->preempted = 1;
222
		sched();
223
		splhi();
224
		up->preempted = 0;
225
		return 1;
226
	}
227
	return 0;
228
}
229
 
230
/*
231
 * Update the cpu time average for this particular process,
232
 * which is about to change from up -> not up or vice versa.
233
 * p->lastupdate is the last time an updatecpu happened.
234
 *
235
 * The cpu time average is a decaying average that lasts
236
 * about D clock ticks.  D is chosen to be approximately
237
 * the cpu time of a cpu-intensive "quick job".  A job has to run
238
 * for approximately D clock ticks before we home in on its 
239
 * actual cpu usage.  Thus if you manage to get in and get out
240
 * quickly, you won't be penalized during your burst.  Once you
241
 * start using your share of the cpu for more than about D
242
 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 
243
 * below all the other quick jobs.  Interactive tasks, because
244
 * they basically always use less than their fair share of cpu,
245
 * will be rewarded.
246
 *
247
 * If the process has not been running, then we want to
248
 * apply the filter
249
 *
250
 *	cpu = cpu * (D-1)/D
251
 *
252
 * n times, yielding 
253
 * 
254
 *	cpu = cpu * ((D-1)/D)^n
255
 *
256
 * but D is big enough that this is approximately 
257
 *
258
 * 	cpu = cpu * (D-n)/D
259
 *
260
 * so we use that instead.
261
 * 
262
 * If the process has been running, we apply the filter to
263
 * 1 - cpu, yielding a similar equation.  Note that cpu is 
264
 * stored in fixed point (* 1000).
265
 *
266
 * Updatecpu must be called before changing up, in order
267
 * to maintain accurate cpu usage statistics.  It can be called
268
 * at any time to bring the stats for a given proc up-to-date.
269
 */
270
void
271
updatecpu(Proc *p)
272
{
273
	int n, t, ocpu;
274
	int D = schedgain*HZ*Scaling;
275
 
276
	if(p->edf)
277
		return;
278
 
279
	t = MACHP(0)->ticks*Scaling + Scaling/2;
280
	n = t - p->lastupdate;
281
	p->lastupdate = t;
282
 
283
	if(n == 0)
284
		return;
285
	if(n > D)
286
		n = D;
287
 
288
	ocpu = p->cpu;
289
	if(p != up)
290
		p->cpu = (ocpu*(D-n))/D;
291
	else{
292
		t = 1000 - ocpu;
293
		t = (t*(D-n))/D;
294
		p->cpu = 1000 - t;
295
	}
296
 
297
//iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
298
}
299
 
300
/*
301
 * On average, p has used p->cpu of a cpu recently.
302
 * Its fair share is conf.nmach/m->load of a cpu.  If it has been getting
303
 * too much, penalize it.  If it has been getting not enough, reward it.
304
 * I don't think you can get much more than your fair share that 
305
 * often, so most of the queues are for using less.  Having a priority
306
 * of 3 means you're just right.  Having a higher priority (up to p->basepri) 
307
 * means you're not using as much as you could.
308
 */
309
int
310
reprioritize(Proc *p)
311
{
312
	int fairshare, n, load, ratio;
313
 
314
	load = MACHP(0)->load;
315
	if(load == 0)
316
		return p->basepri;
317
 
318
	/*
319
	 *  fairshare = 1.000 * conf.nproc * 1.000/load,
320
	 * except the decimal point is moved three places
321
	 * on both load and fairshare.
322
	 */
323
	fairshare = (conf.nmach*1000*1000)/load;
324
	n = p->cpu;
325
	if(n == 0)
326
		n = 1;
327
	ratio = (fairshare+n/2) / n;
328
	if(ratio > p->basepri)
329
		ratio = p->basepri;
330
	if(ratio < 0)
331
		panic("reprioritize");
332
//iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
333
	return ratio;
334
}
335
 
336
/*
337
 * add a process to a scheduling queue
338
 */
339
void
340
queueproc(Schedq *rq, Proc *p)
341
{
342
	int pri;
343
 
344
	pri = rq - runq;
345
	lock(runq);
346
	p->priority = pri;
347
	p->rnext = 0;
348
	if(rq->tail)
349
		rq->tail->rnext = p;
350
	else
351
		rq->head = p;
352
	rq->tail = p;
353
	rq->n++;
354
	nrdy++;
355
	runvec |= 1<<pri;
356
	unlock(runq);
357
}
358
 
359
/*
360
 *  try to remove a process from a scheduling queue (called splhi)
361
 */
362
Proc*
363
dequeueproc(Schedq *rq, Proc *tp)
364
{
365
	Proc *l, *p;
366
 
367
	if(!canlock(runq))
368
		return nil;
369
 
370
	/*
371
	 *  the queue may have changed before we locked runq,
372
	 *  refind the target process.
373
	 */
374
	l = 0;
375
	for(p = rq->head; p; p = p->rnext){
376
		if(p == tp)
377
			break;
378
		l = p;
379
	}
380
 
381
	/*
382
	 *  p->mach==0 only when process state is saved
383
	 */
384
	if(p == 0 || p->mach){
385
		unlock(runq);
386
		return nil;
387
	}
388
	if(p->rnext == 0)
389
		rq->tail = l;
390
	if(l)
391
		l->rnext = p->rnext;
392
	else
393
		rq->head = p->rnext;
394
	if(rq->head == nil)
395
		runvec &= ~(1<<(rq-runq));
396
	rq->n--;
397
	nrdy--;
398
	if(p->state != Ready)
399
		print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
400
 
401
	unlock(runq);
402
	return p;
403
}
404
 
405
/*
406
 *  ready(p) picks a new priority for a process and sticks it in the
407
 *  runq for that priority.
408
 */
409
void
410
ready(Proc *p)
411
{
412
	int s, pri;
413
	Schedq *rq;
414
	void (*pt)(Proc*, int, vlong);
415
 
416
	s = splhi();
417
	if(edfready(p)){
418
		splx(s);
419
		return;
420
	}
421
 
422
	if(up != p && (p->wired == nil || p->wired == m))
423
		m->readied = p;	/* group scheduling */
424
 
425
	updatecpu(p);
426
	pri = reprioritize(p);
427
	p->priority = pri;
428
	rq = &runq[pri];
429
	p->state = Ready;
430
	queueproc(rq, p);
431
	pt = proctrace;
432
	if(pt)
433
		pt(p, SReady, 0);
434
	splx(s);
435
}
436
 
437
/*
438
 *  yield the processor and drop our priority
439
 */
440
void
441
yield(void)
442
{
443
	if(anyready()){
444
		/* pretend we just used 1/2 tick */
445
		up->lastupdate -= Scaling/2;  
446
		sched();
447
	}
448
}
449
 
450
/*
451
 *  recalculate priorities once a second.  We need to do this
452
 *  since priorities will otherwise only be recalculated when
453
 *  the running process blocks.
454
 */
455
ulong balancetime;
456
 
457
static void
458
rebalance(void)
459
{
460
	int pri, npri, t, x;
461
	Schedq *rq;
462
	Proc *p;
463
 
464
	t = m->ticks;
465
	if(t - balancetime < HZ)
466
		return;
467
	balancetime = t;
468
 
469
	for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
470
another:
471
		p = rq->head;
472
		if(p == nil)
473
			continue;
474
		if(p->mp != MACHP(m->machno))
475
			continue;
476
		if(pri == p->basepri)
477
			continue;
478
		updatecpu(p);
479
		npri = reprioritize(p);
480
		if(npri != pri){
481
			x = splhi();
482
			p = dequeueproc(rq, p);
483
			if(p)
484
				queueproc(&runq[npri], p);
485
			splx(x);
486
			goto another;
487
		}
488
	}
489
}
490
 
491
 
492
/*
493
 *  pick a process to run
494
 */
495
Proc*
496
runproc(void)
497
{
498
	Schedq *rq;
499
	Proc *p;
500
	ulong start, now;
501
	int i;
502
	void (*pt)(Proc*, int, vlong);
503
 
504
	start = perfticks();
505
 
506
	/* cooperative scheduling until the clock ticks */
507
	if((p=m->readied) && p->mach==0 && p->state==Ready
508
	&& (p->wired == nil || p->wired == m)
509
	&& runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
510
		skipscheds++;
511
		rq = &runq[p->priority];
512
		goto found;
513
	}
514
 
515
	preempts++;
516
 
517
loop:
518
	/*
519
	 *  find a process that last ran on this processor (affinity),
520
	 *  or one that hasn't moved in a while (load balancing).  Every
521
	 *  time around the loop affinity goes down.
522
	 */
523
	spllo();
524
	for(i = 0;; i++){
525
		/*
526
		 *  find the highest priority target process that this
527
		 *  processor can run given affinity constraints.
528
		 *
529
		 */
530
		for(rq = &runq[Nrq-1]; rq >= runq; rq--){
531
			for(p = rq->head; p; p = p->rnext){
532
				if(p->mp == nil || p->mp == MACHP(m->machno)
533
				|| (!p->wired && i > 0))
534
					goto found;
535
			}
536
		}
537
 
538
		/* waste time or halt the CPU */
539
		idlehands();
540
 
541
		/* remember how much time we're here */
542
		now = perfticks();
543
		m->perf.inidle += now-start;
544
		start = now;
545
	}
546
 
547
found:
548
	splhi();
549
	p = dequeueproc(rq, p);
550
	if(p == nil)
551
		goto loop;
552
 
553
	p->state = Scheding;
554
	p->mp = MACHP(m->machno);
555
 
556
	if(edflock(p)){
557
		edfrun(p, rq == &runq[PriEdf]);	/* start deadline timer and do admin */
558
		edfunlock();
559
	}
560
	pt = proctrace;
561
	if(pt)
562
		pt(p, SRun, 0);
563
	return p;
564
}
565
 
566
int
567
canpage(Proc *p)
568
{
569
	int ok = 0;
570
 
571
	splhi();
572
	lock(runq);
573
	/* Only reliable way to see if we are Running */
574
	if(p->mach == 0) {
575
		p->newtlb = 1;
576
		ok = 1;
577
	}
578
	unlock(runq);
579
	spllo();
580
 
581
	return ok;
582
}
583
 
584
void
585
noprocpanic(char *msg)
586
{
587
	/*
588
	 * setting exiting will make hzclock() on each processor call exit(0).
589
	 * clearing our bit in machs avoids calling exit(0) from hzclock()
590
	 * on this processor.
591
	 */
592
	lock(&active);
593
	active.machs &= ~(1<<m->machno);
594
	active.exiting = 1;
595
	unlock(&active);
596
 
597
	procdump();
598
	delay(1000);
599
	panic(msg);
600
}
601
 
602
Proc*
603
newproc(void)
604
{
605
	char msg[64];
606
	Proc *p;
607
 
608
	lock(&procalloc);
609
	while((p = procalloc.free) == nil) {
610
		unlock(&procalloc);
611
 
612
		snprint(msg, sizeof msg, "no procs; %s forking",
613
			up? up->text: "kernel");
614
		/*
615
		 * the situation is unlikely to heal itself.
616
		 * dump the proc table and restart by default.
617
		 * *noprocspersist in plan9.ini will yield the old
618
		 * behaviour of trying forever.
619
		 */
620
		if(getconf("*noprocspersist") == nil)
621
			noprocpanic(msg);
622
		resrcwait(msg);
623
		lock(&procalloc);
624
	}
625
	procalloc.free = p->qnext;
626
	unlock(&procalloc);
627
 
628
	p->state = Scheding;
629
	p->psstate = "New";
630
	p->mach = 0;
631
	p->qnext = 0;
632
	p->nchild = 0;
633
	p->nwait = 0;
634
	p->waitq = 0;
635
	p->parent = 0;
636
	p->pgrp = 0;
637
	p->egrp = 0;
638
	p->fgrp = 0;
639
	p->rgrp = 0;
640
	p->pdbg = 0;
641
	p->fpstate = FPinit;
642
	p->kp = 0;
643
	if(up && up->procctl == Proc_tracesyscall)
644
		p->procctl = Proc_tracesyscall;
645
	else
646
		p->procctl = 0;
647
	p->syscalltrace = 0;	
648
	p->notepending = 0;
649
	p->ureg = 0;
650
	p->privatemem = 0;
651
	p->noswap = 0;
652
	p->errstr = p->errbuf0;
653
	p->syserrstr = p->errbuf1;
654
	p->errbuf0[0] = '\0';
655
	p->errbuf1[0] = '\0';
656
	p->nlocks.ref = 0;
657
	p->delaysched = 0;
658
	p->trace = 0;
659
	kstrdup(&p->user, "*nouser");
660
	kstrdup(&p->text, "*notext");
661
	kstrdup(&p->args, "");
662
	p->nargs = 0;
663
	p->setargs = 0;
664
	memset(p->seg, 0, sizeof p->seg);
665
	p->pid = incref(&pidalloc);
666
	pidhash(p);
667
	p->noteid = incref(&noteidalloc);
668
	if(p->pid==0 || p->noteid==0)
669
		panic("pidalloc");
670
	if(p->kstack == 0)
671
		p->kstack = smalloc(KSTACK);
672
 
673
	/* sched params */
674
	p->mp = 0;
675
	p->wired = 0;
676
	procpriority(p, PriNormal, 0);
677
	p->cpu = 0;
678
	p->lastupdate = MACHP(0)->ticks*Scaling;
679
	p->edf = nil;
680
 
681
	return p;
682
}
683
 
684
/*
685
 * wire this proc to a machine
686
 */
687
void
688
procwired(Proc *p, int bm)
689
{
690
	Proc *pp;
691
	int i;
692
	char nwired[MAXMACH];
693
	Mach *wm;
694
 
695
	if(bm < 0){
696
		/* pick a machine to wire to */
697
		memset(nwired, 0, sizeof(nwired));
698
		p->wired = 0;
699
		pp = proctab(0);
700
		for(i=0; i<conf.nproc; i++, pp++){
701
			wm = pp->wired;
702
			if(wm && pp->pid)
703
				nwired[wm->machno]++;
704
		}
705
		bm = 0;
706
		for(i=0; i<conf.nmach; i++)
707
			if(nwired[i] < nwired[bm])
708
				bm = i;
709
	} else {
710
		/* use the virtual machine requested */
711
		bm = bm % conf.nmach;
712
	}
713
 
714
	p->wired = MACHP(bm);
715
	p->mp = p->wired;
716
}
717
 
718
void
719
procpriority(Proc *p, int pri, int fixed)
720
{
721
	if(pri >= Npriq)
722
		pri = Npriq - 1;
723
	else if(pri < 0)
724
		pri = 0;
725
	p->basepri = pri;
726
	p->priority = pri;
727
	if(fixed){
728
		p->fixedpri = 1;
729
	} else {
730
		p->fixedpri = 0;
731
	}
732
}
733
 
734
void
735
procinit0(void)		/* bad planning - clashes with devproc.c */
736
{
737
	Proc *p;
738
	int i;
739
 
740
	procalloc.free = xalloc(conf.nproc*sizeof(Proc));
741
	if(procalloc.free == nil){
742
		xsummary();
743
		panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
744
	}
745
	procalloc.arena = procalloc.free;
746
 
747
	p = procalloc.free;
748
	for(i=0; i<conf.nproc-1; i++,p++)
749
		p->qnext = p+1;
750
	p->qnext = 0;
751
}
752
 
753
/*
754
 *  sleep if a condition is not true.  Another process will
755
 *  awaken us after it sets the condition.  When we awaken
756
 *  the condition may no longer be true.
757
 *
758
 *  we lock both the process and the rendezvous to keep r->p
759
 *  and p->r synchronized.
760
 */
761
void
762
sleep(Rendez *r, int (*f)(void*), void *arg)
763
{
764
	int s;
765
	void (*pt)(Proc*, int, vlong);
766
 
767
	s = splhi();
768
 
769
	if(up->nlocks.ref)
770
		print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n",
771
			up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r));
772
	lock(r);
773
	lock(&up->rlock);
774
	if(r->p){
775
		print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
776
		dumpstack();
777
	}
778
 
779
	/*
780
	 *  Wakeup only knows there may be something to do by testing
781
	 *  r->p in order to get something to lock on.
782
	 *  Flush that information out to memory in case the sleep is
783
	 *  committed.
784
	 */
785
	r->p = up;
786
 
787
	if((*f)(arg) || up->notepending){
788
		/*
789
		 *  if condition happened or a note is pending
790
		 *  never mind
791
		 */
792
		r->p = nil;
793
		unlock(&up->rlock);
794
		unlock(r);
795
	} else {
796
		/*
797
		 *  now we are committed to
798
		 *  change state and call scheduler
799
		 */
800
		pt = proctrace;
801
		if(pt)
802
			pt(up, SSleep, 0);
803
		up->state = Wakeme;
804
		up->r = r;
805
 
806
		/* statistics */
807
		m->cs++;
808
 
809
		procsave(up);
810
		if(setlabel(&up->sched)) {
811
			/*
812
			 *  here when the process is awakened
813
			 */
814
			procrestore(up);
815
			spllo();
816
		} else {
817
			/*
818
			 *  here to go to sleep (i.e. stop Running)
819
			 */
820
			unlock(&up->rlock);
821
			unlock(r);
822
			gotolabel(&m->sched);
823
		}
824
	}
825
 
826
	if(up->notepending) {
827
		up->notepending = 0;
828
		splx(s);
829
		if(up->procctl == Proc_exitme && up->closingfgrp)
830
			forceclosefgrp();
831
		error(Eintr);
832
	}
833
 
834
	splx(s);
835
}
836
 
837
static int
838
tfn(void *arg)
839
{
840
	return up->trend == nil || up->tfn(arg);
841
}
842
 
843
void
844
twakeup(Ureg*, Timer *t)
845
{
846
	Proc *p;
847
	Rendez *trend;
848
 
849
	p = t->ta;
850
	trend = p->trend;
851
	p->trend = 0;
852
	if(trend)
853
		wakeup(trend);
854
}
855
 
856
void
857
tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
858
{
859
	if (up->tt){
860
		print("tsleep: timer active: mode %d, tf %#p\n", up->tmode, up->tf);
861
		timerdel(up);
862
	}
863
	up->tns = MS2NS(ms);
864
	up->tf = twakeup;
865
	up->tmode = Trelative;
866
	up->ta = up;
867
	up->trend = r;
868
	up->tfn = fn;
869
	timeradd(up);
870
 
871
	if(waserror()){
872
		timerdel(up);
873
		nexterror();
874
	}
875
	sleep(r, tfn, arg);
876
	if(up->tt)
877
		timerdel(up);
878
	up->twhen = 0;
879
	poperror();
880
}
881
 
882
/*
883
 *  Expects that only one process can call wakeup for any given Rendez.
884
 *  We hold both locks to ensure that r->p and p->r remain consistent.
885
 *  Richard Miller has a better solution that doesn't require both to
886
 *  be held simultaneously, but I'm a paranoid - presotto.
887
 */
888
Proc*
889
wakeup(Rendez *r)
890
{
891
	Proc *p;
892
	int s;
893
 
894
	s = splhi();
895
 
896
	lock(r);
897
	p = r->p;
898
 
899
	if(p != nil){
900
		lock(&p->rlock);
901
		if(p->state != Wakeme || p->r != r){
902
			iprint("%p %p %d\n", p->r, r, p->state);
903
			panic("wakeup: state");
904
		}
905
		r->p = nil;
906
		p->r = nil;
907
		ready(p);
908
		unlock(&p->rlock);
909
	}
910
	unlock(r);
911
 
912
	splx(s);
913
 
914
	return p;
915
}
916
 
917
/*
918
 *  if waking a sleeping process, this routine must hold both
919
 *  p->rlock and r->lock.  However, it can't know them in
920
 *  the same order as wakeup causing a possible lock ordering
921
 *  deadlock.  We break the deadlock by giving up the p->rlock
922
 *  lock if we can't get the r->lock and retrying.
923
 */
924
int
925
postnote(Proc *p, int dolock, char *n, int flag)
926
{
927
	int s, ret;
928
	Rendez *r;
929
	Proc *d, **l;
930
 
931
	if(dolock)
932
		qlock(&p->debug);
933
 
934
	if(flag != NUser && (p->notify == 0 || p->notified))
935
		p->nnote = 0;
936
 
937
	ret = 0;
938
	if(p->nnote < NNOTE) {
939
		strcpy(p->note[p->nnote].msg, n);
940
		p->note[p->nnote++].flag = flag;
941
		ret = 1;
942
	}
943
	p->notepending = 1;
944
	if(dolock)
945
		qunlock(&p->debug);
946
 
947
	/* this loop is to avoid lock ordering problems. */
948
	for(;;){
949
		s = splhi();
950
		lock(&p->rlock);
951
		r = p->r;
952
 
953
		/* waiting for a wakeup? */
954
		if(r == nil)
955
			break;	/* no */
956
 
957
		/* try for the second lock */
958
		if(canlock(r)){
959
			if(p->state != Wakeme || r->p != p)
960
				panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
961
			p->r = nil;
962
			r->p = nil;
963
			ready(p);
964
			unlock(r);
965
			break;
966
		}
967
 
968
		/* give other process time to get out of critical section and try again */
969
		unlock(&p->rlock);
970
		splx(s);
971
		sched();
972
	}
973
	unlock(&p->rlock);
974
	splx(s);
975
 
976
	if(p->state != Rendezvous)
977
		return ret;
978
 
979
	/* Try and pull out of a rendezvous */
980
	lock(p->rgrp);
981
	if(p->state == Rendezvous) {
982
		p->rendval = ~0;
983
		l = &REND(p->rgrp, p->rendtag);
984
		for(d = *l; d; d = d->rendhash) {
985
			if(d == p) {
986
				*l = p->rendhash;
987
				break;
988
			}
989
			l = &d->rendhash;
990
		}
991
		ready(p);
992
	}
993
	unlock(p->rgrp);
994
	return ret;
995
}
996
 
997
/*
998
 * weird thing: keep at most NBROKEN around
999
 */
1000
#define	NBROKEN 4
1001
struct
1002
{
1003
	QLock;
1004
	int	n;
1005
	Proc	*p[NBROKEN];
1006
}broken;
1007
 
1008
void
1009
addbroken(Proc *p)
1010
{
1011
	qlock(&broken);
1012
	if(broken.n == NBROKEN) {
1013
		ready(broken.p[0]);
1014
		memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
1015
		--broken.n;
1016
	}
1017
	broken.p[broken.n++] = p;
1018
	qunlock(&broken);
1019
 
1020
	edfstop(up);
1021
	p->state = Broken;
1022
	p->psstate = 0;
1023
	sched();
1024
}
1025
 
1026
void
1027
unbreak(Proc *p)
1028
{
1029
	int b;
1030
 
1031
	qlock(&broken);
1032
	for(b=0; b < broken.n; b++)
1033
		if(broken.p[b] == p) {
1034
			broken.n--;
1035
			memmove(&broken.p[b], &broken.p[b+1],
1036
					sizeof(Proc*)*(NBROKEN-(b+1)));
1037
			ready(p);
1038
			break;
1039
		}
1040
	qunlock(&broken);
1041
}
1042
 
1043
int
1044
freebroken(void)
1045
{
1046
	int i, n;
1047
 
1048
	qlock(&broken);
1049
	n = broken.n;
1050
	for(i=0; i<n; i++) {
1051
		ready(broken.p[i]);
1052
		broken.p[i] = 0;
1053
	}
1054
	broken.n = 0;
1055
	qunlock(&broken);
1056
	return n;
1057
}
1058
 
1059
void
1060
pexit(char *exitstr, int freemem)
1061
{
1062
	Proc *p;
1063
	Segment **s, **es;
1064
	long utime, stime;
1065
	Waitq *wq, *f, *next;
1066
	Fgrp *fgrp;
1067
	Egrp *egrp;
1068
	Rgrp *rgrp;
1069
	Pgrp *pgrp;
1070
	Chan *dot;
1071
	void (*pt)(Proc*, int, vlong);
1072
 
1073
	if(up->syscalltrace)
1074
		free(up->syscalltrace);
1075
	up->alarm = 0;
1076
	if (up->tt)
1077
		timerdel(up);
1078
	pt = proctrace;
1079
	if(pt)
1080
		pt(up, SDead, 0);
1081
 
1082
	/* nil out all the resources under lock (free later) */
1083
	qlock(&up->debug);
1084
	fgrp = up->fgrp;
1085
	up->fgrp = nil;
1086
	egrp = up->egrp;
1087
	up->egrp = nil;
1088
	rgrp = up->rgrp;
1089
	up->rgrp = nil;
1090
	pgrp = up->pgrp;
1091
	up->pgrp = nil;
1092
	dot = up->dot;
1093
	up->dot = nil;
1094
	qunlock(&up->debug);
1095
 
1096
	if(fgrp)
1097
		closefgrp(fgrp);
1098
	if(egrp)
1099
		closeegrp(egrp);
1100
	if(rgrp)
1101
		closergrp(rgrp);
1102
	if(dot)
1103
		cclose(dot);
1104
	if(pgrp)
1105
		closepgrp(pgrp);
1106
 
1107
	/*
1108
	 * if not a kernel process and have a parent,
1109
	 * do some housekeeping.
1110
	 */
1111
	if(up->kp == 0) {
1112
		p = up->parent;
1113
		if(p == 0) {
1114
			if(exitstr == 0)
1115
				exitstr = "unknown";
1116
			panic("boot process died: %s", exitstr);
1117
		}
1118
 
1119
		while(waserror())
1120
			;
1121
 
1122
		wq = smalloc(sizeof(Waitq));
1123
		poperror();
1124
 
1125
		wq->w.pid = up->pid;
1126
		utime = up->time[TUser] + up->time[TCUser];
1127
		stime = up->time[TSys] + up->time[TCSys];
1128
		wq->w.time[TUser] = tk2ms(utime);
1129
		wq->w.time[TSys] = tk2ms(stime);
1130
		wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]);
1131
		if(exitstr && exitstr[0])
1132
			snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
1133
		else
1134
			wq->w.msg[0] = '\0';
1135
 
1136
		lock(&p->exl);
1137
		/*
1138
		 * Check that parent is still alive.
1139
		 */
1140
		if(p->pid == up->parentpid && p->state != Broken) {
1141
			p->nchild--;
1142
			p->time[TCUser] += utime;
1143
			p->time[TCSys] += stime;
1144
			/*
1145
			 * If there would be more than 128 wait records
1146
			 * processes for my parent, then don't leave a wait
1147
			 * record behind.  This helps prevent badly written
1148
			 * daemon processes from accumulating lots of wait
1149
			 * records.
1150
		 	 */
1151
			if(p->nwait < 128) {
1152
				wq->next = p->waitq;
1153
				p->waitq = wq;
1154
				p->nwait++;
1155
				wq = nil;
1156
				wakeup(&p->waitr);
1157
			}
1158
		}
1159
		unlock(&p->exl);
1160
		if(wq)
1161
			free(wq);
1162
	}
1163
 
1164
	if(!freemem)
1165
		addbroken(up);
1166
 
1167
	qlock(&up->seglock);
1168
	es = &up->seg[NSEG];
1169
	for(s = up->seg; s < es; s++) {
1170
		if(*s) {
1171
			putseg(*s);
1172
			*s = 0;
1173
		}
1174
	}
1175
	qunlock(&up->seglock);
1176
 
1177
	lock(&up->exl);		/* Prevent my children from leaving waits */
1178
	pidunhash(up);
1179
	up->pid = 0;
1180
	wakeup(&up->waitr);
1181
	unlock(&up->exl);
1182
 
1183
	for(f = up->waitq; f; f = next) {
1184
		next = f->next;
1185
		free(f);
1186
	}
1187
 
1188
	/* release debuggers */
1189
	qlock(&up->debug);
1190
	if(up->pdbg) {
1191
		wakeup(&up->pdbg->sleep);
1192
		up->pdbg = 0;
1193
	}
1194
	qunlock(&up->debug);
1195
 
1196
	/* Sched must not loop for these locks */
1197
	lock(&procalloc);
1198
	lock(&palloc);
1199
 
1200
	edfstop(up);
1201
	up->state = Moribund;
1202
	sched();
1203
	panic("pexit");
1204
}
1205
 
1206
int
1207
haswaitq(void *x)
1208
{
1209
	Proc *p;
1210
 
1211
	p = (Proc *)x;
1212
	return p->waitq != 0;
1213
}
1214
 
1215
ulong
1216
pwait(Waitmsg *w)
1217
{
1218
	ulong cpid;
1219
	Waitq *wq;
1220
 
1221
	if(!canqlock(&up->qwaitr))
1222
		error(Einuse);
1223
 
1224
	if(waserror()) {
1225
		qunlock(&up->qwaitr);
1226
		nexterror();
1227
	}
1228
 
1229
	lock(&up->exl);
1230
	if(up->nchild == 0 && up->waitq == 0) {
1231
		unlock(&up->exl);
1232
		error(Enochild);
1233
	}
1234
	unlock(&up->exl);
1235
 
1236
	sleep(&up->waitr, haswaitq, up);
1237
 
1238
	lock(&up->exl);
1239
	wq = up->waitq;
1240
	up->waitq = wq->next;
1241
	up->nwait--;
1242
	unlock(&up->exl);
1243
 
1244
	qunlock(&up->qwaitr);
1245
	poperror();
1246
 
1247
	if(w)
1248
		memmove(w, &wq->w, sizeof(Waitmsg));
1249
	cpid = wq->w.pid;
1250
	free(wq);
1251
	return cpid;
1252
}
1253
 
1254
Proc*
1255
proctab(int i)
1256
{
1257
	return &procalloc.arena[i];
1258
}
1259
 
1260
void
1261
dumpaproc(Proc *p)
1262
{
1263
	ulong bss;
1264
	char *s;
1265
 
1266
	if(p == 0)
1267
		return;
1268
 
1269
	bss = 0;
1270
	if(p->seg[BSEG])
1271
		bss = p->seg[BSEG]->top;
1272
 
1273
	s = p->psstate;
1274
	if(s == 0)
1275
		s = statename[p->state];
1276
	print("%3lud:%10s pc %8lux dbgpc %8lux  %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n",
1277
		p->pid, p->text, p->pc, dbgpc(p),  s, statename[p->state],
1278
		p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
1279
}
1280
 
1281
void
1282
procdump(void)
1283
{
1284
	int i;
1285
	Proc *p;
1286
 
1287
	if(up)
1288
		print("up %lud\n", up->pid);
1289
	else
1290
		print("no current process\n");
1291
	for(i=0; i<conf.nproc; i++) {
1292
		p = &procalloc.arena[i];
1293
		if(p->state == Dead)
1294
			continue;
1295
 
1296
		dumpaproc(p);
1297
	}
1298
}
1299
 
1300
/*
1301
 *  wait till all processes have flushed their mmu
1302
 *  state about segement s
1303
 */
1304
void
1305
procflushseg(Segment *s)
1306
{
1307
	int i, ns, nm, nwait;
1308
	Proc *p;
1309
 
1310
	/*
1311
	 *  tell all processes with this
1312
	 *  segment to flush their mmu's
1313
	 */
1314
	nwait = 0;
1315
	for(i=0; i<conf.nproc; i++) {
1316
		p = &procalloc.arena[i];
1317
		if(p->state == Dead)
1318
			continue;
1319
		for(ns = 0; ns < NSEG; ns++)
1320
			if(p->seg[ns] == s){
1321
				p->newtlb = 1;
1322
				for(nm = 0; nm < conf.nmach; nm++){
1323
					if(MACHP(nm)->proc == p){
1324
						MACHP(nm)->flushmmu = 1;
1325
						nwait++;
1326
					}
1327
				}
1328
				break;
1329
			}
1330
	}
1331
 
1332
	if(nwait == 0)
1333
		return;
1334
 
1335
	/*
1336
	 *  wait for all processors to take a clock interrupt
1337
	 *  and flush their mmu's
1338
	 */
1339
	for(nm = 0; nm < conf.nmach; nm++)
1340
		if(MACHP(nm) != m)
1341
			while(MACHP(nm)->flushmmu)
1342
				sched();
1343
}
1344
 
1345
void
1346
scheddump(void)
1347
{
1348
	Proc *p;
1349
	Schedq *rq;
1350
 
1351
	for(rq = &runq[Nrq-1]; rq >= runq; rq--){
1352
		if(rq->head == 0)
1353
			continue;
1354
		print("rq%ld:", rq-runq);
1355
		for(p = rq->head; p; p = p->rnext)
1356
			print(" %lud(%lud)", p->pid, m->ticks - p->readytime);
1357
		print("\n");
1358
		delay(150);
1359
	}
1360
	print("nrdy %d\n", nrdy);
1361
}
1362
 
1363
void
1364
kproc(char *name, void (*func)(void *), void *arg)
1365
{
1366
	Proc *p;
1367
	static Pgrp *kpgrp;
1368
 
1369
	p = newproc();
1370
	p->psstate = 0;
1371
	p->procmode = 0640;
1372
	p->kp = 1;
1373
	p->noswap = 1;
1374
 
1375
	p->fpsave = up->fpsave;
1376
	p->scallnr = up->scallnr;
1377
	p->s = up->s;
1378
	p->nerrlab = 0;
1379
	p->slash = up->slash;
1380
	p->dot = up->dot;
1381
	if(p->dot)
1382
		incref(p->dot);
1383
 
1384
	memmove(p->note, up->note, sizeof(p->note));
1385
	p->nnote = up->nnote;
1386
	p->notified = 0;
1387
	p->lastnote = up->lastnote;
1388
	p->notify = up->notify;
1389
	p->ureg = 0;
1390
	p->dbgreg = 0;
1391
 
1392
	procpriority(p, PriKproc, 0);
1393
 
1394
	kprocchild(p, func, arg);
1395
 
1396
	kstrdup(&p->user, eve);
1397
	kstrdup(&p->text, name);
1398
	if(kpgrp == 0)
1399
		kpgrp = newpgrp();
1400
	p->pgrp = kpgrp;
1401
	incref(kpgrp);
1402
 
1403
	memset(p->time, 0, sizeof(p->time));
1404
	p->time[TReal] = MACHP(0)->ticks;
1405
	ready(p);
1406
}
1407
 
1408
/*
1409
 *  called splhi() by notify().  See comment in notify for the
1410
 *  reasoning.
1411
 */
1412
void
1413
procctl(Proc *p)
1414
{
1415
	char *state;
1416
	ulong s;
1417
 
1418
	switch(p->procctl) {
1419
	case Proc_exitbig:
1420
		spllo();
1421
		pexit("Killed: Insufficient physical memory", 1);
1422
 
1423
	case Proc_exitme:
1424
		spllo();		/* pexit has locks in it */
1425
		pexit("Killed", 1);
1426
 
1427
	case Proc_traceme:
1428
		if(p->nnote == 0)
1429
			return;
1430
		/* No break */
1431
 
1432
	case Proc_stopme:
1433
		p->procctl = 0;
1434
		state = p->psstate;
1435
		p->psstate = "Stopped";
1436
		/* free a waiting debugger */
1437
		s = spllo();
1438
		qlock(&p->debug);
1439
		if(p->pdbg) {
1440
			wakeup(&p->pdbg->sleep);
1441
			p->pdbg = 0;
1442
		}
1443
		qunlock(&p->debug);
1444
		splhi();
1445
		p->state = Stopped;
1446
		sched();
1447
		p->psstate = state;
1448
		splx(s);
1449
		return;
1450
	}
1451
}
1452
 
1453
#include "errstr.h"
1454
 
1455
void
1456
error(char *err)
1457
{
1458
	spllo();
1459
 
1460
	assert(up->nerrlab < NERR);
1461
	kstrcpy(up->errstr, err, ERRMAX);
1462
	setlabel(&up->errlab[NERR-1]);
1463
	nexterror();
1464
}
1465
 
1466
void
1467
nexterror(void)
1468
{
1469
	gotolabel(&up->errlab[--up->nerrlab]);
1470
}
1471
 
1472
void
1473
exhausted(char *resource)
1474
{
1475
	char buf[ERRMAX];
1476
 
1477
	snprint(buf, sizeof buf, "no free %s", resource);
1478
	iprint("%s\n", buf);
1479
	error(buf);
1480
}
1481
 
1482
void
1483
killbig(char *why)
1484
{
1485
	int i;
1486
	Segment *s;
1487
	ulong l, max;
1488
	Proc *p, *ep, *kp;
1489
 
1490
	max = 0;
1491
	kp = 0;
1492
	ep = procalloc.arena+conf.nproc;
1493
	for(p = procalloc.arena; p < ep; p++) {
1494
		if(p->state == Dead || p->kp)
1495
			continue;
1496
		l = 0;
1497
		for(i=1; i<NSEG; i++) {
1498
			s = p->seg[i];
1499
			if(s != 0)
1500
				l += s->top - s->base;
1501
		}
1502
		if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
1503
			kp = p;
1504
			max = l;
1505
		}
1506
	}
1507
 
1508
	print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
1509
	for(p = procalloc.arena; p < ep; p++) {
1510
		if(p->state == Dead || p->kp)
1511
			continue;
1512
		if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
1513
			p->procctl = Proc_exitbig;
1514
	}
1515
	kp->procctl = Proc_exitbig;
1516
	for(i = 0; i < NSEG; i++) {
1517
		s = kp->seg[i];
1518
		if(s != 0 && canqlock(&s->lk)) {
1519
			mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
1520
			qunlock(&s->lk);
1521
		}
1522
	}
1523
}
1524
 
1525
/*
1526
 *  change ownership to 'new' of all processes owned by 'old'.  Used when
1527
 *  eve changes.
1528
 */
1529
void
1530
renameuser(char *old, char *new)
1531
{
1532
	Proc *p, *ep;
1533
 
1534
	ep = procalloc.arena+conf.nproc;
1535
	for(p = procalloc.arena; p < ep; p++)
1536
		if(p->user!=nil && strcmp(old, p->user)==0)
1537
			kstrdup(&p->user, new);
1538
}
1539
 
1540
/*
1541
 *  time accounting called by clock() splhi'd
1542
 */
1543
void
1544
accounttime(void)
1545
{
1546
	Proc *p;
1547
	ulong n, per;
1548
	static ulong nrun;
1549
 
1550
	p = m->proc;
1551
	if(p) {
1552
		nrun++;
1553
		p->time[p->insyscall]++;
1554
	}
1555
 
1556
	/* calculate decaying duty cycles */
1557
	n = perfticks();
1558
	per = n - m->perf.last;
1559
	m->perf.last = n;
1560
	per = (m->perf.period*(HZ-1) + per)/HZ;
1561
	if(per != 0)
1562
		m->perf.period = per;
1563
 
1564
	m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
1565
	m->perf.inidle = 0;
1566
 
1567
	m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
1568
	m->perf.inintr = 0;
1569
 
1570
	/* only one processor gets to compute system load averages */
1571
	if(m->machno != 0)
1572
		return;
1573
 
1574
	/*
1575
	 * calculate decaying load average.
1576
	 * if we decay by (n-1)/n then it takes
1577
	 * n clock ticks to go from load L to .36 L once
1578
	 * things quiet down.  it takes about 5 n clock
1579
	 * ticks to go to zero.  so using HZ means this is
1580
	 * approximately the load over the last second,
1581
	 * with a tail lasting about 5 seconds.
1582
	 */
1583
	n = nrun;
1584
	nrun = 0;
1585
	n = (nrdy+n)*1000;
1586
	m->load = (m->load*(HZ-1)+n)/HZ;
1587
}
1588
 
1589
static void
1590
pidhash(Proc *p)
1591
{
1592
	int h;
1593
 
1594
	h = p->pid % nelem(procalloc.ht);
1595
	lock(&procalloc);
1596
	p->pidhash = procalloc.ht[h];
1597
	procalloc.ht[h] = p;
1598
	unlock(&procalloc);
1599
}
1600
 
1601
static void
1602
pidunhash(Proc *p)
1603
{
1604
	int h;
1605
	Proc **l;
1606
 
1607
	h = p->pid % nelem(procalloc.ht);
1608
	lock(&procalloc);
1609
	for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
1610
		if(*l == p){
1611
			*l = p->pidhash;
1612
			break;
1613
		}
1614
	unlock(&procalloc);
1615
}
1616
 
1617
int
1618
procindex(ulong pid)
1619
{
1620
	Proc *p;
1621
	int h;
1622
	int s;
1623
 
1624
	s = -1;
1625
	h = pid % nelem(procalloc.ht);
1626
	lock(&procalloc);
1627
	for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
1628
		if(p->pid == pid){
1629
			s = p - procalloc.arena;
1630
			break;
1631
		}
1632
	unlock(&procalloc);
1633
	return s;
1634
}