Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_fixcpp/sys/src/9/port/sysproc.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include	"u.h"
2
#include	"tos.h"
3
#include	"../port/lib.h"
4
#include	"mem.h"
5
#include	"dat.h"
6
#include	"fns.h"
7
#include	"../port/error.h"
8
#include	"../port/edf.h"
9
 
10
#include	<a.out.h>
11
 
12
int	shargs(char*, int, char**);
13
 
14
extern void checkpages(void);
15
extern void checkpagerefs(void);
16
 
17
long
18
sysr1(ulong*)
19
{
20
	checkpagerefs();
21
	return 0;
22
}
23
 
24
long
25
sysrfork(ulong *arg)
26
{
27
	Proc *p;
28
	int n, i;
29
	Fgrp *ofg;
30
	Pgrp *opg;
31
	Rgrp *org;
32
	Egrp *oeg;
33
	ulong pid, flag;
34
	Mach *wm;
35
 
36
	flag = arg[0];
37
	/* Check flags before we commit */
38
	if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
39
		error(Ebadarg);
40
	if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
41
		error(Ebadarg);
42
	if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
43
		error(Ebadarg);
44
 
45
	if((flag&RFPROC) == 0) {
46
		if(flag & (RFMEM|RFNOWAIT))
47
			error(Ebadarg);
48
		if(flag & (RFFDG|RFCFDG)) {
49
			ofg = up->fgrp;
50
			if(flag & RFFDG)
51
				up->fgrp = dupfgrp(ofg);
52
			else
53
				up->fgrp = dupfgrp(nil);
54
			closefgrp(ofg);
55
		}
56
		if(flag & (RFNAMEG|RFCNAMEG)) {
57
			opg = up->pgrp;
58
			up->pgrp = newpgrp();
59
			if(flag & RFNAMEG)
60
				pgrpcpy(up->pgrp, opg);
61
			/* inherit noattach */
62
			up->pgrp->noattach = opg->noattach;
63
			closepgrp(opg);
64
		}
65
		if(flag & RFNOMNT)
66
			up->pgrp->noattach = 1;
67
		if(flag & RFREND) {
68
			org = up->rgrp;
69
			up->rgrp = newrgrp();
70
			closergrp(org);
71
		}
72
		if(flag & (RFENVG|RFCENVG)) {
73
			oeg = up->egrp;
74
			up->egrp = smalloc(sizeof(Egrp));
75
			up->egrp->ref = 1;
76
			if(flag & RFENVG)
77
				envcpy(up->egrp, oeg);
78
			closeegrp(oeg);
79
		}
80
		if(flag & RFNOTEG)
81
			up->noteid = incref(&noteidalloc);
82
		return 0;
83
	}
84
 
85
	p = newproc();
86
 
87
	p->fpsave = up->fpsave;
88
	p->scallnr = up->scallnr;
89
	p->s = up->s;
90
	p->nerrlab = 0;
91
	p->slash = up->slash;
92
	p->dot = up->dot;
93
	incref(p->dot);
94
 
95
	memmove(p->note, up->note, sizeof(p->note));
96
	p->privatemem = up->privatemem;
97
	p->noswap = up->noswap;
98
	p->nnote = up->nnote;
99
	p->notified = 0;
100
	p->lastnote = up->lastnote;
101
	p->notify = up->notify;
102
	p->ureg = up->ureg;
103
	p->dbgreg = 0;
104
 
105
	/* Make a new set of memory segments */
106
	n = flag & RFMEM;
107
	qlock(&p->seglock);
108
	if(waserror()){
109
		qunlock(&p->seglock);
110
		nexterror();
111
	}
112
	for(i = 0; i < NSEG; i++)
113
		if(up->seg[i])
114
			p->seg[i] = dupseg(up->seg, i, n);
115
	qunlock(&p->seglock);
116
	poperror();
117
 
118
	/* File descriptors */
119
	if(flag & (RFFDG|RFCFDG)) {
120
		if(flag & RFFDG)
121
			p->fgrp = dupfgrp(up->fgrp);
122
		else
123
			p->fgrp = dupfgrp(nil);
124
	}
125
	else {
126
		p->fgrp = up->fgrp;
127
		incref(p->fgrp);
128
	}
129
 
130
	/* Process groups */
131
	if(flag & (RFNAMEG|RFCNAMEG)) {
132
		p->pgrp = newpgrp();
133
		if(flag & RFNAMEG)
134
			pgrpcpy(p->pgrp, up->pgrp);
135
		/* inherit noattach */
136
		p->pgrp->noattach = up->pgrp->noattach;
137
	}
138
	else {
139
		p->pgrp = up->pgrp;
140
		incref(p->pgrp);
141
	}
142
	if(flag & RFNOMNT)
143
		p->pgrp->noattach = 1;
144
 
145
	if(flag & RFREND)
146
		p->rgrp = newrgrp();
147
	else {
148
		incref(up->rgrp);
149
		p->rgrp = up->rgrp;
150
	}
151
 
152
	/* Environment group */
153
	if(flag & (RFENVG|RFCENVG)) {
154
		p->egrp = smalloc(sizeof(Egrp));
155
		p->egrp->ref = 1;
156
		if(flag & RFENVG)
157
			envcpy(p->egrp, up->egrp);
158
	}
159
	else {
160
		p->egrp = up->egrp;
161
		incref(p->egrp);
162
	}
163
	p->hang = up->hang;
164
	p->procmode = up->procmode;
165
 
166
	/* Craft a return frame which will cause the child to pop out of
167
	 * the scheduler in user mode with the return register zero
168
	 */
169
	forkchild(p, up->dbgreg);
170
 
171
	p->parent = up;
172
	p->parentpid = up->pid;
173
	if(flag&RFNOWAIT)
174
		p->parentpid = 0;
175
	else {
176
		lock(&up->exl);
177
		up->nchild++;
178
		unlock(&up->exl);
179
	}
180
	if((flag&RFNOTEG) == 0)
181
		p->noteid = up->noteid;
182
 
183
	/* don't penalize the child, it hasn't done FP in a note handler. */
184
	p->fpstate = up->fpstate & ~FPillegal;
185
	pid = p->pid;
186
	memset(p->time, 0, sizeof(p->time));
187
	p->time[TReal] = MACHP(0)->ticks;
188
 
189
	kstrdup(&p->text, up->text);
190
	kstrdup(&p->user, up->user);
191
	/*
192
	 *  since the bss/data segments are now shareable,
193
	 *  any mmu info about this process is now stale
194
	 *  (i.e. has bad properties) and has to be discarded.
195
	 */
196
	flushmmu();
197
	p->basepri = up->basepri;
198
	p->priority = up->basepri;
199
	p->fixedpri = up->fixedpri;
200
	p->mp = up->mp;
201
	wm = up->wired;
202
	if(wm)
203
		procwired(p, wm->machno);
204
	ready(p);
205
	sched();
206
	return pid;
207
}
208
 
209
ulong
210
l2be(long l)
211
{
212
	uchar *cp;
213
 
214
	cp = (uchar*)&l;
215
	return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
216
}
217
 
218
long
219
sysexec(ulong *arg)
220
{
221
	Segment *s, *ts;
222
	ulong t, d, b;
223
	int i;
224
	Chan *tc;
225
	char **argv, **argp;
226
	char *a, *charp, *args, *file, *file0;
227
	char *progarg[sizeof(Exec)/2+1], *elem, progelem[64];
228
	ulong ssize, spage, nargs, nbytes, n, bssend;
229
	int indir;
230
	Exec exec;
231
	char line[sizeof(Exec)];
232
	Fgrp *f;
233
	Image *img;
234
	ulong magic, text, entry, data, bss;
235
	Tos *tos;
236
 
237
	indir = 0;
238
	elem = nil;
239
	validaddr(arg[0], 1, 0);
240
	file0 = validnamedup((char*)arg[0], 1);
241
	if(waserror()){
242
		free(file0);
243
		free(elem);
244
		nexterror();
245
	}
246
	file = file0;
247
	for(;;){
248
		tc = namec(file, Aopen, OEXEC, 0);
249
		if(waserror()){
250
			cclose(tc);
251
			nexterror();
252
		}
253
		if(!indir)
254
			kstrdup(&elem, up->genbuf);
255
 
256
		n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
257
		if(n < 2)
258
			error(Ebadexec);
259
		magic = l2be(exec.magic);
260
		text = l2be(exec.text);
261
		entry = l2be(exec.entry);
262
		if(n==sizeof(Exec) && (magic == AOUT_MAGIC)){
263
			if(text >= USTKTOP-UTZERO
264
			|| entry < UTZERO+sizeof(Exec)
265
			|| entry >= UTZERO+sizeof(Exec)+text)
266
				error(Ebadexec);
267
			break; /* for binary */
268
		}
269
 
270
		/*
271
		 * Process #! /bin/sh args ...
272
		 */
273
		memmove(line, &exec, sizeof(Exec));
274
		if(indir || line[0]!='#' || line[1]!='!')
275
			error(Ebadexec);
276
		n = shargs(line, n, progarg);
277
		if(n == 0)
278
			error(Ebadexec);
279
		indir = 1;
280
		/*
281
		 * First arg becomes complete file name
282
		 */
283
		progarg[n++] = file;
284
		progarg[n] = 0;
285
		validaddr(arg[1], BY2WD, 1);
286
		arg[1] += BY2WD;
287
		file = progarg[0];
288
		if(strlen(elem) >= sizeof progelem)
289
			error(Ebadexec);
290
		strcpy(progelem, elem);
291
		progarg[0] = progelem;
292
		poperror();
293
		cclose(tc);
294
	}
295
 
296
	data = l2be(exec.data);
297
	bss = l2be(exec.bss);
298
	t = UTROUND(UTZERO+sizeof(Exec)+text);
299
	d = (t + data + (BY2PG-1)) & ~(BY2PG-1);
300
	bssend = t + data + bss;
301
	b = (bssend + (BY2PG-1)) & ~(BY2PG-1);
302
	if(t >= KZERO || d >= KZERO || b >= KZERO)
303
		error(Ebadexec);
304
 
305
	/*
306
	 * Args: pass 1: count
307
	 */
308
	nbytes = sizeof(Tos);		/* hole for profiling clock at top of stack (and more) */
309
	nargs = 0;
310
	if(indir){
311
		argp = progarg;
312
		while(*argp){
313
			a = *argp++;
314
			nbytes += strlen(a) + 1;
315
			nargs++;
316
		}
317
	}
318
	validalign(arg[1], sizeof(char**));
319
	argp = (char**)arg[1];
320
	validaddr((ulong)argp, BY2WD, 0);
321
	while(*argp){
322
		a = *argp++;
323
		if(((ulong)argp&(BY2PG-1)) < BY2WD)
324
			validaddr((ulong)argp, BY2WD, 0);
325
		validaddr((ulong)a, 1, 0);
326
		nbytes += ((char*)vmemchr(a, 0, 0x7FFFFFFF) - a) + 1;
327
		nargs++;
328
	}
329
	ssize = BY2WD*(nargs+1) + ((nbytes+(BY2WD-1)) & ~(BY2WD-1));
330
 
331
	/*
332
	 * 8-byte align SP for those (e.g. sparc) that need it.
333
	 * execregs() will subtract another 4 bytes for argc.
334
	 */
335
	if((ssize+4) & 7)
336
		ssize += 4;
337
	spage = (ssize+(BY2PG-1)) >> PGSHIFT;
338
 
339
	/*
340
	 * Build the stack segment, putting it in kernel virtual for the moment
341
	 */
342
	if(spage > TSTKSIZ)
343
		error(Enovmem);
344
 
345
	qlock(&up->seglock);
346
	if(waserror()){
347
		qunlock(&up->seglock);
348
		nexterror();
349
	}
350
	up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
351
 
352
	/*
353
	 * Args: pass 2: assemble; the pages will be faulted in
354
	 */
355
	tos = (Tos*)(TSTKTOP - sizeof(Tos));
356
	tos->cyclefreq = m->cyclefreq;
357
	cycles((uvlong*)&tos->pcycles);
358
	tos->pcycles = -tos->pcycles;
359
	tos->kcycles = tos->pcycles;
360
	tos->clock = 0;
361
	argv = (char**)(TSTKTOP - ssize);
362
	charp = (char*)(TSTKTOP - nbytes);
363
	args = charp;
364
	if(indir)
365
		argp = progarg;
366
	else
367
		argp = (char**)arg[1];
368
 
369
	for(i=0; i<nargs; i++){
370
		if(indir && *argp==0) {
371
			indir = 0;
372
			argp = (char**)arg[1];
373
		}
374
		*argv++ = charp + (USTKTOP-TSTKTOP);
375
		n = strlen(*argp) + 1;
376
		memmove(charp, *argp++, n);
377
		charp += n;
378
	}
379
	free(file0);
380
 
381
	free(up->text);
382
	up->text = elem;
383
	elem = nil;	/* so waserror() won't free elem */
384
	USED(elem);
385
 
386
	/* copy args; easiest from new process's stack */
387
	n = charp - args;
388
	if(n > 128)	/* don't waste too much space on huge arg lists */
389
		n = 128;
390
	a = up->args;
391
	up->args = nil;
392
	free(a);
393
	up->args = smalloc(n);
394
	memmove(up->args, args, n);
395
	if(n>0 && up->args[n-1]!='\0'){
396
		/* make sure last arg is NUL-terminated */
397
		/* put NUL at UTF-8 character boundary */
398
		for(i=n-1; i>0; --i)
399
			if(fullrune(up->args+i, n-i))
400
				break;
401
		up->args[i] = 0;
402
		n = i+1;
403
	}
404
	up->nargs = n;
405
 
406
	/*
407
	 * Committed.
408
	 * Free old memory.
409
	 * Special segments are maintained across exec
410
	 */
411
	for(i = SSEG; i <= BSEG; i++) {
412
		putseg(up->seg[i]);
413
		/* prevent a second free if we have an error */
414
		up->seg[i] = 0;
415
	}
416
	for(i = BSEG+1; i < NSEG; i++) {
417
		s = up->seg[i];
418
		if(s != 0 && (s->type&SG_CEXEC)) {
419
			putseg(s);
420
			up->seg[i] = 0;
421
		}
422
	}
423
 
424
	/*
425
	 * Close on exec
426
	 */
427
	f = up->fgrp;
428
	for(i=0; i<=f->maxfd; i++)
429
		fdclose(i, CCEXEC);
430
 
431
	/* Text.  Shared. Attaches to cache image if possible */
432
	/* attachimage returns a locked cache image */
433
	img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (t-UTZERO)>>PGSHIFT);
434
	ts = img->s;
435
	up->seg[TSEG] = ts;
436
	ts->flushme = 1;
437
	ts->fstart = 0;
438
	ts->flen = sizeof(Exec)+text;
439
	unlock(img);
440
 
441
	/* Data. Shared. */
442
	s = newseg(SG_DATA, t, (d-t)>>PGSHIFT);
443
	up->seg[DSEG] = s;
444
 
445
	/* Attached by hand */
446
	incref(img);
447
	s->image = img;
448
	s->fstart = ts->fstart+ts->flen;
449
	s->flen = data;
450
 
451
	/* BSS. Zero fill on demand */
452
	up->seg[BSEG] = newseg(SG_BSS, d, (b-d)>>PGSHIFT);
453
 
454
	/*
455
	 * Move the stack
456
	 */
457
	s = up->seg[ESEG];
458
	up->seg[ESEG] = 0;
459
	up->seg[SSEG] = s;
460
	qunlock(&up->seglock);
461
	poperror();	/* seglock */
462
	poperror();	/* elem */
463
	s->base = USTKTOP-USTKSIZE;
464
	s->top = USTKTOP;
465
	relocateseg(s, USTKTOP-TSTKTOP);
466
 
467
	/*
468
	 *  '/' processes are higher priority (hack to make /ip more responsive).
469
	 */
470
	if(devtab[tc->type]->dc == L'/')
471
		up->basepri = PriRoot;
472
	up->priority = up->basepri;
473
	poperror();
474
	cclose(tc);
475
 
476
	/*
477
	 *  At this point, the mmu contains info about the old address
478
	 *  space and needs to be flushed
479
	 */
480
	flushmmu();
481
	qlock(&up->debug);
482
	up->nnote = 0;
483
	up->notify = 0;
484
	up->notified = 0;
485
	up->privatemem = 0;
486
	procsetup(up);
487
	qunlock(&up->debug);
488
	if(up->hang)
489
		up->procctl = Proc_stopme;
490
 
491
	return execregs(entry, ssize, nargs);
492
}
493
 
494
int
495
shargs(char *s, int n, char **ap)
496
{
497
	int i;
498
 
499
	s += 2;
500
	n -= 2;		/* skip #! */
501
	for(i=0; s[i]!='\n'; i++)
502
		if(i == n-1)
503
			return 0;
504
	s[i] = 0;
505
	*ap = 0;
506
	i = 0;
507
	for(;;) {
508
		while(*s==' ' || *s=='\t')
509
			s++;
510
		if(*s == 0)
511
			break;
512
		i++;
513
		*ap++ = s;
514
		*ap = 0;
515
		while(*s && *s!=' ' && *s!='\t')
516
			s++;
517
		if(*s == 0)
518
			break;
519
		else
520
			*s++ = 0;
521
	}
522
	return i;
523
}
524
 
525
int
526
return0(void*)
527
{
528
	return 0;
529
}
530
 
531
long
532
syssleep(ulong *arg)
533
{
534
 
535
	int n;
536
 
537
	n = arg[0];
538
	if(n <= 0) {
539
		if (up->edf && (up->edf->flags & Admitted))
540
			edfyield();
541
		else
542
			yield();
543
		return 0;
544
	}
545
	if(n < TK2MS(1))
546
		n = TK2MS(1);
547
	tsleep(&up->sleep, return0, 0, n);
548
	return 0;
549
}
550
 
551
long
552
sysalarm(ulong *arg)
553
{
554
	return procalarm(arg[0]);
555
}
556
 
557
long
558
sysexits(ulong *arg)
559
{
560
	char *status;
561
	char *inval = "invalid exit string";
562
	char buf[ERRMAX];
563
 
564
	status = (char*)arg[0];
565
	if(status){
566
		if(waserror())
567
			status = inval;
568
		else{
569
			validaddr((ulong)status, 1, 0);
570
			if(vmemchr(status, 0, ERRMAX) == 0){
571
				memmove(buf, status, ERRMAX);
572
				buf[ERRMAX-1] = 0;
573
				status = buf;
574
			}
575
			poperror();
576
		}
577
 
578
	}
579
	pexit(status, 1);
580
	return 0;		/* not reached */
581
}
582
 
583
long
584
sys_wait(ulong *arg)
585
{
586
	int pid;
587
	Waitmsg w;
588
	OWaitmsg *ow;
589
 
590
	if(arg[0] == 0)
591
		return pwait(nil);
592
 
593
	validaddr(arg[0], sizeof(OWaitmsg), 1);
594
	validalign(arg[0], BY2WD);			/* who cares? */
595
	pid = pwait(&w);
596
	if(pid >= 0){
597
		ow = (OWaitmsg*)arg[0];
598
		readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
599
		readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
600
		readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
601
		readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
602
		strncpy(ow->msg, w.msg, sizeof(ow->msg));
603
		ow->msg[sizeof(ow->msg)-1] = '\0';
604
	}
605
	return pid;
606
}
607
 
608
long
609
sysawait(ulong *arg)
610
{
611
	int i;
612
	int pid;
613
	Waitmsg w;
614
	ulong n;
615
 
616
	n = arg[1];
617
	validaddr(arg[0], n, 1);
618
	pid = pwait(&w);
619
	if(pid < 0)
620
		return -1;
621
	i = snprint((char*)arg[0], n, "%d %lud %lud %lud %q",
622
		w.pid,
623
		w.time[TUser], w.time[TSys], w.time[TReal],
624
		w.msg);
625
 
626
	return i;
627
}
628
 
629
void
630
werrstr(char *fmt, ...)
631
{
632
	va_list va;
633
 
634
	if(up == nil)
635
		return;
636
 
637
	va_start(va, fmt);
638
	vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
639
	va_end(va);
640
}
641
 
642
static long
643
generrstr(char *buf, uint nbuf)
644
{
645
	char tmp[ERRMAX];
646
 
647
	if(nbuf == 0)
648
		error(Ebadarg);
649
	validaddr((ulong)buf, nbuf, 1);
650
	if(nbuf > sizeof tmp)
651
		nbuf = sizeof tmp;
652
	memmove(tmp, buf, nbuf);
653
 
654
	/* make sure it's NUL-terminated */
655
	tmp[nbuf-1] = '\0';
656
	memmove(buf, up->syserrstr, nbuf);
657
	buf[nbuf-1] = '\0';
658
	memmove(up->syserrstr, tmp, nbuf);
659
	return 0;
660
}
661
 
662
long
663
syserrstr(ulong *arg)
664
{
665
	return generrstr((char*)arg[0], arg[1]);
666
}
667
 
668
/* compatibility for old binaries */
669
long
670
sys_errstr(ulong *arg)
671
{
672
	return generrstr((char*)arg[0], 64);
673
}
674
 
675
long
676
sysnotify(ulong *arg)
677
{
678
	if(arg[0] != 0)
679
		validaddr(arg[0], sizeof(ulong), 0);
680
	up->notify = (int(*)(void*, char*))(arg[0]);
681
	return 0;
682
}
683
 
684
long
685
sysnoted(ulong *arg)
686
{
687
	if(arg[0]!=NRSTR && !up->notified)
688
		error(Egreg);
689
	return 0;
690
}
691
 
692
long
693
syssegbrk(ulong *arg)
694
{
695
	int i;
696
	ulong addr;
697
	Segment *s;
698
 
699
	addr = arg[0];
700
	for(i = 0; i < NSEG; i++) {
701
		s = up->seg[i];
702
		if(s == 0 || addr < s->base || addr >= s->top)
703
			continue;
704
		switch(s->type&SG_TYPE) {
705
		case SG_TEXT:
706
		case SG_DATA:
707
		case SG_STACK:
708
			error(Ebadarg);
709
		default:
710
			return ibrk(arg[1], i);
711
		}
712
	}
713
 
714
	error(Ebadarg);
715
	return 0;		/* not reached */
716
}
717
 
718
long
719
syssegattach(ulong *arg)
720
{
721
	return segattach(up, arg[0], (char*)arg[1], arg[2], arg[3]);
722
}
723
 
724
long
725
syssegdetach(ulong *arg)
726
{
727
	int i;
728
	ulong addr;
729
	Segment *s;
730
 
731
	qlock(&up->seglock);
732
	if(waserror()){
733
		qunlock(&up->seglock);
734
		nexterror();
735
	}
736
 
737
	s = 0;
738
	addr = arg[0];
739
	for(i = 0; i < NSEG; i++)
740
		if(s = up->seg[i]) {
741
			qlock(&s->lk);
742
			if((addr >= s->base && addr < s->top) ||
743
			   (s->top == s->base && addr == s->base))
744
				goto found;
745
			qunlock(&s->lk);
746
		}
747
 
748
	error(Ebadarg);
749
 
750
found:
751
	/*
752
	 * Check we are not detaching the initial stack segment.
753
	 */
754
	if(s == up->seg[SSEG]){
755
		qunlock(&s->lk);
756
		error(Ebadarg);
757
	}
758
	up->seg[i] = 0;
759
	qunlock(&s->lk);
760
	putseg(s);
761
	qunlock(&up->seglock);
762
	poperror();
763
 
764
	/* Ensure we flush any entries from the lost segment */
765
	flushmmu();
766
	return 0;
767
}
768
 
769
long
770
syssegfree(ulong *arg)
771
{
772
	Segment *s;
773
	ulong from, to;
774
 
775
	from = arg[0];
776
	s = seg(up, from, 1);
777
	if(s == nil)
778
		error(Ebadarg);
779
	to = (from + arg[1]) & ~(BY2PG-1);
780
	from = PGROUND(from);
781
 
782
	if(to > s->top) {
783
		qunlock(&s->lk);
784
		error(Ebadarg);
785
	}
786
 
787
	mfreeseg(s, from, (to - from) / BY2PG);
788
	qunlock(&s->lk);
789
	flushmmu();
790
 
791
	return 0;
792
}
793
 
794
/* For binary compatibility */
795
long
796
sysbrk_(ulong *arg)
797
{
798
	return ibrk(arg[0], BSEG);
799
}
800
 
801
long
802
sysrendezvous(ulong *arg)
803
{
804
	uintptr tag, val;
805
	Proc *p, **l;
806
 
807
	tag = arg[0];
808
	l = &REND(up->rgrp, tag);
809
	up->rendval = ~(uintptr)0;
810
 
811
	lock(up->rgrp);
812
	for(p = *l; p; p = p->rendhash) {
813
		if(p->rendtag == tag) {
814
			*l = p->rendhash;
815
			val = p->rendval;
816
			p->rendval = arg[1];
817
 
818
			while(p->mach != 0)
819
				;
820
			ready(p);
821
			unlock(up->rgrp);
822
			return val;
823
		}
824
		l = &p->rendhash;
825
	}
826
 
827
	/* Going to sleep here */
828
	up->rendtag = tag;
829
	up->rendval = arg[1];
830
	up->rendhash = *l;
831
	*l = up;
832
	up->state = Rendezvous;
833
	unlock(up->rgrp);
834
 
835
	sched();
836
 
837
	return up->rendval;
838
}
839
 
840
/*
841
 * The implementation of semaphores is complicated by needing
842
 * to avoid rescheduling in syssemrelease, so that it is safe
843
 * to call from real-time processes.  This means syssemrelease
844
 * cannot acquire any qlocks, only spin locks.
845
 * 
846
 * Semacquire and semrelease must both manipulate the semaphore
847
 * wait list.  Lock-free linked lists only exist in theory, not
848
 * in practice, so the wait list is protected by a spin lock.
849
 * 
850
 * The semaphore value *addr is stored in user memory, so it
851
 * cannot be read or written while holding spin locks.
852
 * 
853
 * Thus, we can access the list only when holding the lock, and
854
 * we can access the semaphore only when not holding the lock.
855
 * This makes things interesting.  Note that sleep's condition function
856
 * is called while holding two locks - r and up->rlock - so it cannot
857
 * access the semaphore value either.
858
 * 
859
 * An acquirer announces its intention to try for the semaphore
860
 * by putting a Sema structure onto the wait list and then
861
 * setting Sema.waiting.  After one last check of semaphore,
862
 * the acquirer sleeps until Sema.waiting==0.  A releaser of n
863
 * must wake up n acquirers who have Sema.waiting set.  It does
864
 * this by clearing Sema.waiting and then calling wakeup.
865
 * 
866
 * There are three interesting races here.  
867
 
868
 * The first is that in this particular sleep/wakeup usage, a single
869
 * wakeup can rouse a process from two consecutive sleeps!  
870
 * The ordering is:
871
 * 
872
 * 	(a) set Sema.waiting = 1
873
 * 	(a) call sleep
874
 * 	(b) set Sema.waiting = 0
875
 * 	(a) check Sema.waiting inside sleep, return w/o sleeping
876
 * 	(a) try for semaphore, fail
877
 * 	(a) set Sema.waiting = 1
878
 * 	(a) call sleep
879
 * 	(b) call wakeup(a)
880
 * 	(a) wake up again
881
 * 
882
 * This is okay - semacquire will just go around the loop
883
 * again.  It does mean that at the top of the for(;;) loop in
884
 * semacquire, phore.waiting might already be set to 1.
885
 * 
886
 * The second is that a releaser might wake an acquirer who is
887
 * interrupted before he can acquire the lock.  Since
888
 * release(n) issues only n wakeup calls -- only n can be used
889
 * anyway -- if the interrupted process is not going to use his
890
 * wakeup call he must pass it on to another acquirer.
891
 * 
892
 * The third race is similar to the second but more subtle.  An
893
 * acquirer sets waiting=1 and then does a final canacquire()
894
 * before going to sleep.  The opposite order would result in
895
 * missing wakeups that happen between canacquire and
896
 * waiting=1.  (In fact, the whole point of Sema.waiting is to
897
 * avoid missing wakeups between canacquire() and sleep().) But
898
 * there can be spurious wakeups between a successful
899
 * canacquire() and the following semdequeue().  This wakeup is
900
 * not useful to the acquirer, since he has already acquired
901
 * the semaphore.  Like in the previous case, though, the
902
 * acquirer must pass the wakeup call along.
903
 * 
904
 * This is all rather subtle.  The code below has been verified
905
 * with the spin model /sys/src/9/port/semaphore.p.  The
906
 * original code anticipated the second race but not the first
907
 * or third, which were caught only with spin.  The first race
908
 * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
909
 * It was lucky that my abstract model of sleep/wakeup still managed
910
 * to preserve that behavior.
911
 *
912
 * I remain slightly concerned about memory coherence
913
 * outside of locks.  The spin model does not take 
914
 * queued processor writes into account so we have to
915
 * think hard.  The only variables accessed outside locks
916
 * are the semaphore value itself and the boolean flag
917
 * Sema.waiting.  The value is only accessed with cmpswap,
918
 * whose job description includes doing the right thing as
919
 * far as memory coherence across processors.  That leaves
920
 * Sema.waiting.  To handle it, we call coherence() before each
921
 * read and after each write.		- rsc
922
 */
923
 
924
/* Add semaphore p with addr a to list in seg. */
925
static void
926
semqueue(Segment *s, long *a, Sema *p)
927
{
928
	memset(p, 0, sizeof *p);
929
	p->addr = a;
930
	lock(&s->sema);	/* uses s->sema.Rendez.Lock, but no one else is */
931
	p->next = &s->sema;
932
	p->prev = s->sema.prev;
933
	p->next->prev = p;
934
	p->prev->next = p;
935
	unlock(&s->sema);
936
}
937
 
938
/* Remove semaphore p from list in seg. */
939
static void
940
semdequeue(Segment *s, Sema *p)
941
{
942
	lock(&s->sema);
943
	p->next->prev = p->prev;
944
	p->prev->next = p->next;
945
	unlock(&s->sema);
946
}
947
 
948
/* Wake up n waiters with addr a on list in seg. */
949
static void
950
semwakeup(Segment *s, long *a, long n)
951
{
952
	Sema *p;
953
 
954
	lock(&s->sema);
955
	for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
956
		if(p->addr == a && p->waiting){
957
			p->waiting = 0;
958
			coherence();
959
			wakeup(p);
960
			n--;
961
		}
962
	}
963
	unlock(&s->sema);
964
}
965
 
966
/* Add delta to semaphore and wake up waiters as appropriate. */
967
static long
968
semrelease(Segment *s, long *addr, long delta)
969
{
970
	long value;
971
 
972
	do
973
		value = *addr;
974
	while(!cmpswap(addr, value, value+delta));
975
	semwakeup(s, addr, delta);
976
	return value+delta;
977
}
978
 
979
/* Try to acquire semaphore using compare-and-swap */
980
static int
981
canacquire(long *addr)
982
{
983
	long value;
984
 
985
	while((value=*addr) > 0)
986
		if(cmpswap(addr, value, value-1))
987
			return 1;
988
	return 0;
989
}		
990
 
991
/* Should we wake up? */
992
static int
993
semawoke(void *p)
994
{
995
	coherence();
996
	return !((Sema*)p)->waiting;
997
}
998
 
999
/* Acquire semaphore (subtract 1). */
1000
static int
1001
semacquire(Segment *s, long *addr, int block)
1002
{
1003
	int acquired;
1004
	Sema phore;
1005
 
1006
	if(canacquire(addr))
1007
		return 1;
1008
	if(!block)
1009
		return 0;
1010
 
1011
	acquired = 0;
1012
	semqueue(s, addr, &phore);
1013
	for(;;){
1014
		phore.waiting = 1;
1015
		coherence();
1016
		if(canacquire(addr)){
1017
			acquired = 1;
1018
			break;
1019
		}
1020
		if(waserror())
1021
			break;
1022
		sleep(&phore, semawoke, &phore);
1023
		poperror();
1024
	}
1025
	semdequeue(s, &phore);
1026
	coherence();	/* not strictly necessary due to lock in semdequeue */
1027
	if(!phore.waiting)
1028
		semwakeup(s, addr, 1);
1029
	if(!acquired)
1030
		nexterror();
1031
	return 1;
1032
}
1033
 
1034
/* Acquire semaphore or time-out */
1035
static int
1036
tsemacquire(Segment *s, long *addr, ulong ms)
1037
{
1038
	int acquired, timedout;
1039
	ulong t, elms;
1040
	Sema phore;
1041
 
1042
	if(canacquire(addr))
1043
		return 1;
1044
	if(ms == 0)
1045
		return 0;
1046
	acquired = timedout = 0;
1047
	semqueue(s, addr, &phore);
1048
	for(;;){
1049
		phore.waiting = 1;
1050
		coherence();
1051
		if(canacquire(addr)){
1052
			acquired = 1;
1053
			break;
1054
		}
1055
		if(waserror())
1056
			break;
1057
		t = m->ticks;
1058
		tsleep(&phore, semawoke, &phore, ms);
1059
		elms = TK2MS(m->ticks - t);
1060
		poperror();
1061
		if(elms >= ms){
1062
			timedout = 1;
1063
			break;
1064
		}
1065
		ms -= elms;
1066
	}
1067
	semdequeue(s, &phore);
1068
	coherence();	/* not strictly necessary due to lock in semdequeue */
1069
	if(!phore.waiting)
1070
		semwakeup(s, addr, 1);
1071
	if(timedout)
1072
		return 0;
1073
	if(!acquired)
1074
		nexterror();
1075
	return 1;
1076
}
1077
 
1078
long
1079
syssemacquire(ulong *arg)
1080
{
1081
	int block;
1082
	long *addr;
1083
	Segment *s;
1084
 
1085
	validaddr(arg[0], sizeof(long), 1);
1086
	validalign(arg[0], sizeof(long));
1087
	addr = (long*)arg[0];
1088
	block = arg[1];
1089
 
1090
	if((s = seg(up, (ulong)addr, 0)) == nil)
1091
		error(Ebadarg);
1092
	if(*addr < 0)
1093
		error(Ebadarg);
1094
	return semacquire(s, addr, block);
1095
}
1096
 
1097
long
1098
systsemacquire(ulong *arg)
1099
{
1100
	long *addr;
1101
	ulong ms;
1102
	Segment *s;
1103
 
1104
	validaddr(arg[0], sizeof(long), 1);
1105
	validalign(arg[0], sizeof(long));
1106
	addr = (long*)arg[0];
1107
	ms = arg[1];
1108
 
1109
	if((s = seg(up, (ulong)addr, 0)) == nil)
1110
		error(Ebadarg);
1111
	if(*addr < 0)
1112
		error(Ebadarg);
1113
	return tsemacquire(s, addr, ms);
1114
}
1115
 
1116
long
1117
syssemrelease(ulong *arg)
1118
{
1119
	long *addr, delta;
1120
	Segment *s;
1121
 
1122
	validaddr(arg[0], sizeof(long), 1);
1123
	validalign(arg[0], sizeof(long));
1124
	addr = (long*)arg[0];
1125
	delta = arg[1];
1126
 
1127
	if((s = seg(up, (ulong)addr, 0)) == nil)
1128
		error(Ebadarg);
1129
	/* delta == 0 is a no-op, not a release */
1130
	if(delta < 0 || *addr < 0)
1131
		error(Ebadarg);
1132
	return semrelease(s, addr, delta);
1133
}
1134
 
1135
long
1136
sysnsec(ulong *arg)
1137
{
1138
	validaddr(arg[0], sizeof(vlong), 1);
1139
	validalign(arg[0], sizeof(vlong));
1140
 
1141
	*(vlong*)arg[0] = todget(nil);
1142
 
1143
	return 0;
1144
}