Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * Memory mappings.  Life was easier when 2G of memory was enough.
3
 *
4
 * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5
 * (9load sits under 1M during the load).  The memory from KZERO to the
6
 * top of memory is mapped 1-1 with physical memory, starting at physical
7
 * address 0.  All kernel memory and data structures (i.e., the entries stored
8
 * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9
 * then the kernel can only have 256MB of memory for itself.
10
 * 
11
 * The 256M below KZERO comprises three parts.  The lowest 4M is the
12
 * virtual page table, a virtual address representation of the current 
13
 * page table tree.  The second 4M is used for temporary per-process
14
 * mappings managed by kmap and kunmap.  The remaining 248M is used
15
 * for global (shared by all procs and all processors) device memory
16
 * mappings and managed by vmap and vunmap.  The total amount (256M)
17
 * could probably be reduced somewhat if desired.  The largest device
18
 * mapping is that of the video card, and even though modern video cards
19
 * have embarrassing amounts of memory, the video drivers only use one
20
 * frame buffer worth (at most 16M).  Each is described in more detail below.
21
 *
22
 * The VPT is a 4M frame constructed by inserting the pdb into itself.
23
 * This short-circuits one level of the page tables, with the result that 
24
 * the contents of second-level page tables can be accessed at VPT.  
25
 * We use the VPT to edit the page tables (see mmu) after inserting them
26
 * into the page directory.  It is a convenient mechanism for mapping what
27
 * might be otherwise-inaccessible pages.  The idea was borrowed from
28
 * the Exokernel.
29
 *
30
 * The VPT doesn't solve all our problems, because we still need to 
31
 * prepare page directories before we can install them.  For that, we
32
 * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33
 */
34
 
35
#include	"u.h"
36
#include	"../port/lib.h"
37
#include	"mem.h"
38
#include	"dat.h"
39
#include	"fns.h"
40
#include	"io.h"
41
 
42
/*
43
 * Simple segment descriptors with no translation.
44
 */
45
#define	DATASEGM(p) 	{ 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46
#define	EXECSEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47
#define	EXEC16SEGM(p) 	{ 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48
#define	TSSSEGM(b,p)	{ ((b)<<16)|sizeof(Tss),\
49
			  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50
 
51
void realmodeintrinst(void);
52
void _stop32pg(void);
53
 
54
Segdesc gdt[NGDT] =
55
{
56
[NULLSEG]	{ 0, 0},		/* null descriptor */
57
[KDSEG]		DATASEGM(0),		/* kernel data/stack */
58
[KESEG]		EXECSEGM(0),		/* kernel code */
59
[UDSEG]		DATASEGM(3),		/* user data/stack */
60
[UESEG]		EXECSEGM(3),		/* user code */
61
[TSSSEG]	TSSSEGM(0,0),		/* tss segment */
62
[KESEG16]		EXEC16SEGM(0),	/* kernel code 16-bit */
63
};
64
 
65
static int didmmuinit;
66
static void taskswitch(ulong, ulong);
67
static void memglobal(void);
68
 
69
#define	vpt ((ulong*)VPT)
70
#define	VPTX(va)		(((ulong)(va))>>12)
71
#define	vpd (vpt+VPTX(VPT))
72
 
73
void
74
mmuinit0(void)
75
{
76
	memmove(m->gdt, gdt, sizeof gdt);
77
}
78
 
79
void
80
mmuinit(void)
81
{
82
	ulong x, *p;
83
	ushort ptr[3];
84
 
85
	didmmuinit = 1;
86
 
87
	if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
88
		VPT, vpd, KMAP);
89
 
90
	memglobal();
91
	m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
92
 
93
	m->tss = malloc(sizeof(Tss));
94
	if(m->tss == nil)
95
		panic("mmuinit: no memory");
96
	memset(m->tss, 0, sizeof(Tss));
97
	m->tss->iomap = 0xDFFF<<16;
98
 
99
	/*
100
	 * We used to keep the GDT in the Mach structure, but it
101
	 * turns out that that slows down access to the rest of the
102
	 * page.  Since the Mach structure is accessed quite often,
103
	 * it pays off anywhere from a factor of 1.25 to 2 on real
104
	 * hardware to separate them (the AMDs are more sensitive
105
	 * than Intels in this regard).  Under VMware it pays off
106
	 * a factor of about 10 to 100.
107
	 */
108
	memmove(m->gdt, gdt, sizeof gdt);
109
	x = (ulong)m->tss;
110
	m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
111
	m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
112
 
113
	ptr[0] = sizeof(gdt)-1;
114
	x = (ulong)m->gdt;
115
	ptr[1] = x & 0xFFFF;
116
	ptr[2] = (x>>16) & 0xFFFF;
117
	lgdt(ptr);
118
 
119
	ptr[0] = sizeof(Segdesc)*256-1;
120
	x = IDTADDR;
121
	ptr[1] = x & 0xFFFF;
122
	ptr[2] = (x>>16) & 0xFFFF;
123
	lidt(ptr);
124
 
125
	/*
126
	 * this kills 9load but not 9boot.  9load dies at the taskswitch.
127
	 * should track down exactly why some day.
128
	 */
129
	/* make most kernel text unwritable */
130
if(0)	for(x = PGROUND((ulong)_stop32pg); x < (ulong)etext; x += BY2PG){
131
		if (x == (ulong)realmodeintrinst & ~(BY2PG-1))
132
			continue;
133
		p = mmuwalk(m->pdb, x, 2, 0);
134
		if(p == nil)
135
			panic("mmuinit");
136
		*p &= ~PTEWRITE;
137
	}
138
 
139
	taskswitch(PADDR(m->pdb), (ulong)m + MACHSIZE);
140
	ltr(TSSSEL);
141
}
142
 
143
/* 
144
 * On processors that support it, we set the PTEGLOBAL bit in
145
 * page table and page directory entries that map kernel memory.
146
 * Doing this tells the processor not to bother flushing them
147
 * from the TLB when doing the TLB flush associated with a 
148
 * context switch (write to CR3).  Since kernel memory mappings
149
 * are never removed, this is safe.  (If we ever remove kernel memory
150
 * mappings, we can do a full flush by turning off the PGE bit in CR4,
151
 * writing to CR3, and then turning the PGE bit back on.) 
152
 *
153
 * See also mmukmap below.
154
 * 
155
 * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
156
 */
157
static void
158
memglobal(void)
159
{
160
	int i, j;
161
	ulong *pde, *pte;
162
 
163
	/* only need to do this once, on bootstrap processor */
164
	if(m->machno != 0)
165
		return;
166
 
167
	if(!m->havepge)
168
		return;
169
 
170
	pde = m->pdb;
171
	for(i=PDX(KZERO); i<1024; i++){
172
		if(pde[i] & PTEVALID){
173
			pde[i] |= PTEGLOBAL;
174
			if(!(pde[i] & PTESIZE)){
175
				pte = KADDR(pde[i]&~(BY2PG-1));
176
				for(j=0; j<1024; j++)
177
					if(pte[j] & PTEVALID)
178
						pte[j] |= PTEGLOBAL;
179
			}
180
		}
181
	}			
182
}
183
 
184
/*
185
 * Flush all the user-space and device-mapping mmu info
186
 * for this process, because something has been deleted.
187
 * It will be paged back in on demand.
188
 */
189
void
190
flushmmu(void)
191
{
192
	int s;
193
 
194
	s = splhi();
195
	up->newtlb = 1;
196
	mmuswitch(up);
197
	splx(s);
198
}
199
 
200
/*
201
 * Flush a single page mapping from the tlb.
202
 */
203
void
204
flushpg(ulong va)
205
{
206
	if(X86FAMILY(m->cpuidax) >= 4)
207
		invlpg(va);
208
	else
209
		putcr3(getcr3());
210
}
211
 
212
/*
213
 * Allocate a new page for a page directory. 
214
 * We keep a small cache of pre-initialized
215
 * page directories in each mach.
216
 */
217
static Page*
218
mmupdballoc(void)
219
{
220
	int s;
221
	Page *page;
222
	ulong *pdb;
223
 
224
	s = splhi();
225
	m->pdballoc++;
226
	if(m->pdbpool == 0){
227
		spllo();
228
		page = newpage(0, 0, 0);
229
		page->va = (ulong)vpd;
230
		splhi();
231
		pdb = tmpmap(page);
232
		memmove(pdb, m->pdb, BY2PG);
233
		pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID;	/* set up VPT */
234
		tmpunmap(pdb);
235
	}else{
236
		page = m->pdbpool;
237
		m->pdbpool = page->next;
238
		m->pdbcnt--;
239
	}
240
	splx(s);
241
	return page;
242
}
243
 
244
static void
245
mmupdbfree(Proc *proc, Page *p)
246
{
247
	if(islo())
248
		panic("mmupdbfree: islo");
249
	m->pdbfree++;
250
	if(m->pdbcnt >= 10){
251
		p->next = proc->mmufree;
252
		proc->mmufree = p;
253
	}else{
254
		p->next = m->pdbpool;
255
		m->pdbpool = p;
256
		m->pdbcnt++;
257
	}
258
}
259
 
260
/*
261
 * A user-space memory segment has been deleted, or the
262
 * process is exiting.  Clear all the pde entries for user-space
263
 * memory mappings and device mappings.  Any entries that
264
 * are needed will be paged back in as necessary.
265
 */
266
static void
267
mmuptefree(Proc* proc)
268
{
269
	int s;
270
	ulong *pdb;
271
	Page **last, *page;
272
 
273
	if(proc->mmupdb == nil || proc->mmuused == nil)
274
		return;
275
	s = splhi();
276
	pdb = tmpmap(proc->mmupdb);
277
	last = &proc->mmuused;
278
	for(page = *last; page; page = page->next){
279
		pdb[page->daddr] = 0;
280
		last = &page->next;
281
	}
282
	tmpunmap(pdb);
283
	splx(s);
284
	*last = proc->mmufree;
285
	proc->mmufree = proc->mmuused;
286
	proc->mmuused = 0;
287
}
288
 
289
static void
290
taskswitch(ulong pdb, ulong stack)
291
{
292
	Tss *tss;
293
 
294
	tss = m->tss;
295
	tss->ss0 = KDSEL;
296
	tss->esp0 = stack;
297
	tss->ss1 = KDSEL;
298
	tss->esp1 = stack;
299
	tss->ss2 = KDSEL;
300
	tss->esp2 = stack;
301
	putcr3(pdb);
302
}
303
 
304
void
305
mmuswitch(Proc* proc)
306
{
307
	ulong *pdb;
308
 
309
	if(proc->newtlb){
310
		mmuptefree(proc);
311
		proc->newtlb = 0;
312
	}
313
 
314
	if(proc->mmupdb){
315
		pdb = tmpmap(proc->mmupdb);
316
		pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
317
		tmpunmap(pdb);
318
		taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
319
	}else
320
		taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
321
}
322
 
323
/*
324
 * Release any pages allocated for a page directory base or page-tables
325
 * for this process:
326
 *   switch to the prototype pdb for this processor (m->pdb);
327
 *   call mmuptefree() to place all pages used for page-tables (proc->mmuused)
328
 *   onto the process' free list (proc->mmufree). This has the side-effect of
329
 *   cleaning any user entries in the pdb (proc->mmupdb);
330
 *   if there's a pdb put it in the cache of pre-initialised pdb's
331
 *   for this processor (m->pdbpool) or on the process' free list;
332
 *   finally, place any pages freed back into the free pool (palloc).
333
 * This routine is only called from schedinit() with palloc locked.
334
 */
335
void
336
mmurelease(Proc* proc)
337
{
338
	Page *page, *next;
339
	ulong *pdb;
340
 
341
	if(islo())
342
		panic("mmurelease: islo");
343
	taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
344
	if(proc->kmaptable){
345
		if(proc->mmupdb == nil)
346
			panic("mmurelease: no mmupdb");
347
		if(--proc->kmaptable->ref)
348
			panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
349
		if(proc->nkmap)
350
			panic("mmurelease: nkmap %d", proc->nkmap);
351
		/*
352
		 * remove kmaptable from pdb before putting pdb up for reuse.
353
		 */
354
		pdb = tmpmap(proc->mmupdb);
355
		if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
356
			panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
357
				pdb[PDX(KMAP)], proc->kmaptable->pa);
358
		pdb[PDX(KMAP)] = 0;
359
		tmpunmap(pdb);
360
		/*
361
		 * move kmaptable to free list.
362
		 */
363
		pagechainhead(proc->kmaptable);
364
		proc->kmaptable = 0;
365
	}
366
	if(proc->mmupdb){
367
		mmuptefree(proc);
368
		mmupdbfree(proc, proc->mmupdb);
369
		proc->mmupdb = 0;
370
	}
371
	for(page = proc->mmufree; page; page = next){
372
		next = page->next;
373
		if(--page->ref)
374
			panic("mmurelease: page->ref %d", page->ref);
375
		pagechainhead(page);
376
	}
377
	if(proc->mmufree && palloc.r.p)
378
		wakeup(&palloc.r);
379
	proc->mmufree = 0;
380
}
381
 
382
/*
383
 * Allocate and install pdb for the current process.
384
 */
385
static void
386
upallocpdb(void)
387
{
388
	int s;
389
	ulong *pdb;
390
	Page *page;
391
 
392
	if(up->mmupdb != nil)
393
		return;
394
	page = mmupdballoc();
395
	s = splhi();
396
	if(up->mmupdb != nil){
397
		/*
398
		 * Perhaps we got an interrupt while
399
		 * mmupdballoc was sleeping and that
400
		 * interrupt allocated an mmupdb?
401
		 * Seems unlikely.
402
		 */
403
		mmupdbfree(up, page);
404
		splx(s);
405
		return;
406
	}
407
	pdb = tmpmap(page);
408
	pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
409
	tmpunmap(pdb);
410
	up->mmupdb = page;
411
	putcr3(up->mmupdb->pa);
412
	splx(s);
413
}
414
 
415
/*
416
 * Update the mmu in response to a user fault.  pa may have PTEWRITE set.
417
 */
418
void
419
putmmu(ulong va, ulong pa, Page*)
420
{
421
	int old, s;
422
	Page *page;
423
 
424
	if(up->mmupdb == nil)
425
		upallocpdb();
426
 
427
	/*
428
	 * We should be able to get through this with interrupts
429
	 * turned on (if we get interrupted we'll just pick up 
430
	 * where we left off) but we get many faults accessing
431
	 * vpt[] near the end of this function, and they always happen
432
	 * after the process has been switched out and then 
433
	 * switched back, usually many times in a row (perhaps
434
	 * it cannot switch back successfully for some reason).
435
	 * 
436
	 * In any event, I'm tired of searching for this bug.  
437
	 * Turn off interrupts during putmmu even though
438
	 * we shouldn't need to.		- rsc
439
	 */
440
 
441
	s = splhi();
442
	if(!(vpd[PDX(va)]&PTEVALID)){
443
		if(up->mmufree == 0){
444
			spllo();
445
			page = newpage(0, 0, 0);
446
			splhi();
447
		}
448
		else{
449
			page = up->mmufree;
450
			up->mmufree = page->next;
451
		}
452
		vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
453
		/* page is now mapped into the VPT - clear it */
454
		memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
455
		page->daddr = PDX(va);
456
		page->next = up->mmuused;
457
		up->mmuused = page;
458
	}
459
	old = vpt[VPTX(va)];
460
	vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
461
	if(old&PTEVALID)
462
		flushpg(va);
463
	if(getcr3() != up->mmupdb->pa)
464
		print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
465
	splx(s);
466
}
467
 
468
/*
469
 * Double-check the user MMU.
470
 * Error checking only.
471
 */
472
void
473
checkmmu(ulong va, ulong pa)
474
{
475
	if(up->mmupdb == 0)
476
		return;
477
	if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
478
		return;
479
	if(PPN(vpt[VPTX(va)]) != pa)
480
		print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
481
			up->pid, up->text,
482
			va, pa, vpt[VPTX(va)]);
483
}
484
 
485
/*
486
 * Walk the page-table pointed to by pdb and return a pointer
487
 * to the entry for virtual address va at the requested level.
488
 * If the entry is invalid and create isn't requested then bail
489
 * out early. Otherwise, for the 2nd level walk, allocate a new
490
 * page-table page and register it in the 1st level.  This is used
491
 * only to edit kernel mappings, which use pages from kernel memory,
492
 * so it's okay to use KADDR to look at the tables.
493
 */
494
ulong*
495
mmuwalk(ulong* pdb, ulong va, int level, int create)
496
{
497
	ulong *table;
498
	void *map;
499
 
500
	table = &pdb[PDX(va)];
501
	if(!(*table & PTEVALID) && create == 0)
502
		return 0;
503
 
504
	switch(level){
505
 
506
	default:
507
		return 0;
508
 
509
	case 1:
510
		return table;
511
 
512
	case 2:
513
		if(*table & PTESIZE)
514
			panic("mmuwalk2: va %luX entry %luX", va, *table);
515
		if(!(*table & PTEVALID)){
516
			/*
517
			 * Have to call low-level allocator from
518
			 * memory.c if we haven't set up the xalloc
519
			 * tables yet.
520
			 */
521
			if(didmmuinit)
522
				map = xspanalloc(BY2PG, BY2PG, 0);
523
			else
524
				map = rampage();
525
			if(map == nil)
526
				panic("mmuwalk xspanalloc failed");
527
			*table = PADDR(map)|PTEWRITE|PTEVALID;
528
		}
529
		table = KADDR(PPN(*table));
530
		return &table[PTX(va)];
531
	}
532
}
533
 
534
/*
535
 * Device mappings are shared by all procs and processors and
536
 * live in the virtual range VMAP to VMAP+VMAPSIZE.  The master
537
 * copy of the mappings is stored in mach0->pdb, and they are
538
 * paged in from there as necessary by vmapsync during faults.
539
 */
540
 
541
static Lock vmaplock;
542
 
543
static int findhole(ulong *a, int n, int count);
544
static ulong vmapalloc(ulong size);
545
static void pdbunmap(ulong*, ulong, int);
546
 
547
/*
548
 * Add a device mapping to the vmap range.
549
 */
550
void*
551
vmap(ulong pa, int size)
552
{
553
	int osize;
554
	ulong o, va;
555
 
556
	/*
557
	 * might be asking for less than a page.
558
	 */
559
	osize = size;
560
	o = pa & (BY2PG-1);
561
	pa -= o;
562
	size += o;
563
 
564
	size = ROUND(size, BY2PG);
565
	if(pa == 0){
566
		print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
567
		return nil;
568
	}
569
	ilock(&vmaplock);
570
	if((va = vmapalloc(size)) == 0 
571
	|| pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
572
		iunlock(&vmaplock);
573
		return 0;
574
	}
575
	iunlock(&vmaplock);
576
	/* avoid trap on local processor
577
	for(i=0; i<size; i+=4*MB)
578
		vmapsync(va+i);
579
	*/
580
	USED(osize);
581
//	print("  vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
582
	return (void*)(va + o);
583
}
584
 
585
static int
586
findhole(ulong *a, int n, int count)
587
{
588
	int have, i;
589
 
590
	have = 0;
591
	for(i=0; i<n; i++){
592
		if(a[i] == 0)
593
			have++;
594
		else
595
			have = 0;
596
		if(have >= count)
597
			return i+1 - have;
598
	}
599
	return -1;
600
}
601
 
602
/*
603
 * Look for free space in the vmap.
604
 */
605
static ulong
606
vmapalloc(ulong size)
607
{
608
	int i, n, o;
609
	ulong *vpdb;
610
	int vpdbsize;
611
 
612
	vpdb = &MACHP(0)->pdb[PDX(VMAP)];
613
	vpdbsize = VMAPSIZE/(4*MB);
614
 
615
	if(size >= 4*MB){
616
		n = (size+4*MB-1) / (4*MB);
617
		if((o = findhole(vpdb, vpdbsize, n)) != -1)
618
			return VMAP + o*4*MB;
619
		return 0;
620
	}
621
	n = (size+BY2PG-1) / BY2PG;
622
	for(i=0; i<vpdbsize; i++)
623
		if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
624
			if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
625
				return VMAP + i*4*MB + o*BY2PG;
626
	if((o = findhole(vpdb, vpdbsize, 1)) != -1)
627
		return VMAP + o*4*MB;
628
 
629
	/*
630
	 * could span page directory entries, but not worth the trouble.
631
	 * not going to be very much contention.
632
	 */
633
	return 0;
634
}
635
 
636
/*
637
 * Remove a device mapping from the vmap range.
638
 * Since pdbunmap does not remove page tables, just entries,
639
 * the call need not be interlocked with vmap.
640
 */
641
void
642
vunmap(void *v, int size)
643
{
644
	int i;
645
	ulong va, o;
646
	Mach *nm;
647
	Proc *p;
648
 
649
	/*
650
	 * might not be aligned
651
	 */
652
	va = (ulong)v;
653
	o = va&(BY2PG-1);
654
	va -= o;
655
	size += o;
656
	size = ROUND(size, BY2PG);
657
 
658
	if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
659
		panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
660
			va, size, getcallerpc(&v));
661
 
662
	pdbunmap(MACHP(0)->pdb, va, size);
663
 
664
	/*
665
	 * Flush mapping from all the tlbs and copied pdbs.
666
	 * This can be (and is) slow, since it is called only rarely.
667
	 * It is possible for vunmap to be called with up == nil,
668
	 * e.g. from the reset/init driver routines during system
669
	 * boot. In that case it suffices to flush the MACH(0) TLB
670
	 * and return.
671
	 */
672
	if(!active.thunderbirdsarego){
673
		if(MACHP(0)->pdb == 0)
674
			panic("vunmap: nil m->pdb pc=%#p", getcallerpc(&v));
675
		if(PADDR(MACHP(0)->pdb) == 0)
676
			panic("vunmap: nil PADDR(m->pdb)");
677
		putcr3(PADDR(MACHP(0)->pdb));
678
		return;
679
	}
680
	for(i=0; i<conf.nproc; i++){
681
		p = proctab(i);
682
		if(p->state == Dead)
683
			continue;
684
		if(p != up)
685
			p->newtlb = 1;
686
	}
687
	for(i=0; i<conf.nmach; i++){
688
		nm = MACHP(i);
689
		if(nm != m)
690
			nm->flushmmu = 1;
691
	}
692
	flushmmu();
693
	for(i=0; i<conf.nmach; i++){
694
		nm = MACHP(i);
695
		if(nm != m)
696
			while((active.machs&(1<<nm->machno)) && nm->flushmmu)
697
				;
698
	}
699
}
700
 
701
/*
702
 * Add kernel mappings for va -> pa for a section of size bytes.
703
 */
704
int
705
pdbmap(ulong *pdb, ulong pa, ulong va, int size)
706
{
707
	int pse;
708
	ulong pgsz, *pte, *table;
709
	ulong flag, off;
710
 
711
	flag = pa&0xFFF;
712
	pa &= ~0xFFF;
713
 
714
	if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
715
		pse = 1;
716
	else
717
		pse = 0;
718
 
719
	for(off=0; off<size; off+=pgsz){
720
		table = &pdb[PDX(va+off)];
721
		if((*table&PTEVALID) && (*table&PTESIZE))
722
			panic("vmap: pdb pte valid and big page: "
723
				"va=%#.8lux pa=%#.8lux pde=%#.8lux",
724
				va+off, pa+off, *table);
725
 
726
		/*
727
		 * Check if it can be mapped using a 4MB page:
728
		 * va, pa aligned and size >= 4MB and processor can do it.
729
		 */
730
		if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 &&
731
		    (size-off) >= 4*MB){
732
			*table = (pa+off)|flag|PTESIZE|PTEVALID;
733
			pgsz = 4*MB;
734
		}else{
735
			pte = mmuwalk(pdb, va+off, 2, 1);
736
			if(*pte&PTEVALID)
737
				panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
738
					va+off, pa+off, *pte);
739
			*pte = (pa+off)|flag|PTEVALID;
740
			pgsz = BY2PG;
741
		}
742
	}
743
	return 0;
744
}
745
 
746
/*
747
 * Remove mappings.  Must already exist, for sanity.
748
 * Only used for kernel mappings, so okay to use KADDR.
749
 */
750
static void
751
pdbunmap(ulong *pdb, ulong va, int size)
752
{
753
	ulong vae;
754
	ulong *table;
755
 
756
	vae = va+size;
757
	while(va < vae){
758
		table = &pdb[PDX(va)];
759
		if(!(*table & PTEVALID)){
760
			panic("vunmap: not mapped");
761
			/* 
762
			va = (va+4*MB-1) & ~(4*MB-1);
763
			continue;
764
			*/
765
		}
766
		if(*table & PTESIZE){
767
			*table = 0;
768
			va = (va+4*MB-1) & ~(4*MB-1);
769
			continue;
770
		}
771
		table = KADDR(PPN(*table));
772
		if(!(table[PTX(va)] & PTEVALID))
773
			panic("vunmap: not mapped");
774
		table[PTX(va)] = 0;
775
		va += BY2PG;
776
	}
777
}
778
 
779
/*
780
 * Handle a fault by bringing vmap up to date.
781
 * Only copy pdb entries and they never go away,
782
 * so no locking needed.
783
 */
784
int
785
vmapsync(ulong va)
786
{
787
	ulong entry, *table;
788
 
789
	if(va < VMAP || va >= VMAP+VMAPSIZE)
790
		return 0;
791
 
792
	entry = MACHP(0)->pdb[PDX(va)];
793
	if(!(entry&PTEVALID))
794
		return 0;
795
	if(!(entry&PTESIZE)){
796
		/* make sure entry will help the fault */
797
		table = KADDR(PPN(entry));
798
		if(!(table[PTX(va)]&PTEVALID))
799
			return 0;
800
	}
801
	vpd[PDX(va)] = entry;
802
	/*
803
	 * TLB doesn't cache negative results, so no flush needed.
804
	 */
805
	return 1;
806
}
807
 
808
 
809
/*
810
 * KMap is used to map individual pages into virtual memory.
811
 * It is rare to have more than a few KMaps at a time (in the 
812
 * absence of interrupts, only two at a time are ever used,
813
 * but interrupts can stack).  The mappings are local to a process,
814
 * so we can use the same range of virtual address space for
815
 * all processes without any coordination.
816
 */
817
#define kpt (vpt+VPTX(KMAP))
818
#define NKPT (KMAPSIZE/BY2PG)
819
 
820
KMap*
821
kmap(Page *page)
822
{
823
	int i, o, s;
824
 
825
	if(up == nil)
826
		panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
827
	if(up->mmupdb == nil)
828
		upallocpdb();
829
	if(up->nkmap < 0)
830
		panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
831
 
832
	/*
833
	 * Splhi shouldn't be necessary here, but paranoia reigns.
834
	 * See comment in putmmu above.
835
	 */
836
	s = splhi();
837
	up->nkmap++;
838
	if(!(vpd[PDX(KMAP)]&PTEVALID)){
839
		/* allocate page directory */
840
		if(KMAPSIZE > BY2XPG)
841
			panic("bad kmapsize");
842
		if(up->kmaptable != nil)
843
			panic("kmaptable");
844
		spllo();
845
		up->kmaptable = newpage(0, 0, 0);
846
		splhi();
847
		vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
848
		flushpg((ulong)kpt);
849
		memset(kpt, 0, BY2PG);
850
		kpt[0] = page->pa|PTEWRITE|PTEVALID;
851
		up->lastkmap = 0;
852
		splx(s);
853
		return (KMap*)KMAP;
854
	}
855
	if(up->kmaptable == nil)
856
		panic("no kmaptable");
857
	o = up->lastkmap+1;
858
	for(i=0; i<NKPT; i++){
859
		if(kpt[(i+o)%NKPT] == 0){
860
			o = (i+o)%NKPT;
861
			kpt[o] = page->pa|PTEWRITE|PTEVALID;
862
			up->lastkmap = o;
863
			splx(s);
864
			return (KMap*)(KMAP+o*BY2PG);
865
		}
866
	}
867
	panic("out of kmap");
868
	return nil;
869
}
870
 
871
void
872
kunmap(KMap *k)
873
{
874
	ulong va;
875
 
876
	va = (ulong)k;
877
	if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
878
		panic("kunmap: no kmaps");
879
	if(va < KMAP || va >= KMAP+KMAPSIZE)
880
		panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
881
	if(!(vpt[VPTX(va)]&PTEVALID))
882
		panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
883
	up->nkmap--;
884
	if(up->nkmap < 0)
885
		panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
886
	vpt[VPTX(va)] = 0;
887
	flushpg(va);
888
}
889
 
890
/*
891
 * Temporary one-page mapping used to edit page directories.
892
 *
893
 * The fasttmp #define controls whether the code optimizes
894
 * the case where the page is already mapped in the physical
895
 * memory window.  
896
 */
897
#define fasttmp 1
898
 
899
void*
900
tmpmap(Page *p)
901
{
902
	ulong i;
903
	ulong *entry;
904
 
905
	if(islo())
906
		panic("tmpaddr: islo");
907
 
908
	if(fasttmp && p->pa < -KZERO)
909
		return KADDR(p->pa);
910
 
911
	/*
912
	 * PDX(TMPADDR) == PDX(MACHADDR), so this
913
	 * entry is private to the processor and shared 
914
	 * between up->mmupdb (if any) and m->pdb.
915
	 */
916
	entry = &vpt[VPTX(TMPADDR)];
917
	if(!(*entry&PTEVALID)){
918
		for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
919
			print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
920
		panic("tmpmap: no entry");
921
	}
922
	if(PPN(*entry) != PPN(TMPADDR-KZERO))
923
		panic("tmpmap: already mapped entry=%#.8lux", *entry);
924
	*entry = p->pa|PTEWRITE|PTEVALID;
925
	flushpg(TMPADDR);
926
	return (void*)TMPADDR;
927
}
928
 
929
void
930
tmpunmap(void *v)
931
{
932
	ulong *entry;
933
 
934
	if(islo())
935
		panic("tmpaddr: islo");
936
	if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
937
		return;
938
	if(v != (void*)TMPADDR)
939
		panic("tmpunmap: bad address");
940
	entry = &vpt[VPTX(TMPADDR)];
941
	if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
942
		panic("tmpmap: not mapped entry=%#.8lux", *entry);
943
	*entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
944
	flushpg(TMPADDR);
945
}
946
 
947
/*
948
 * These could go back to being macros once the kernel is debugged,
949
 * but the extra checking is nice to have.
950
 */
951
void*
952
kaddr(ulong pa)
953
{
954
	if(pa > (ulong)-KZERO)
955
		panic("kaddr: pa=%#.8lux > -KZERO pc=%#p", pa, getcallerpc(&pa));
956
	return (void*)(pa | KZERO);
957
}
958
 
959
ulong
960
paddr(void *v)
961
{
962
	ulong va;
963
 
964
	va = (ulong)v;
965
	if(va < KZERO)
966
		panic("paddr: va=%#.8lux < KZERO pc=%#p", va, getcallerpc(&v));
967
	return va & ~KSEGM;
968
}
969
 
970
/*
971
 * More debugging.
972
 */
973
void
974
countpagerefs(ulong *ref, int print)
975
{
976
	USED(ref, print);
977
}
978
 
979
void
980
checkfault(ulong, ulong)
981
{
982
}
983
 
984
/*
985
 * Return the number of bytes that can be accessed via KADDR(pa).
986
 * If pa is not a valid argument to KADDR, return 0.
987
 */
988
ulong
989
cankaddr(ulong pa)
990
{
991
	if(pa >= -KZERO)
992
		return 0;
993
	return -KZERO - pa;
994
}
995