Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * Memory mappings.  Life was easier when 2G of memory was enough.
3
 *
4
 * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5
 * (9load sits under 1M during the load).  The memory from KZERO to the
6
 * top of memory is mapped 1-1 with physical memory, starting at physical
7
 * address 0.  All kernel memory and data structures (i.e., the entries stored
8
 * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9
 * then the kernel can only have 256MB of memory for itself.
10
 * 
11
 * The 256M below KZERO comprises three parts.  The lowest 4M is the
12
 * virtual page table, a virtual address representation of the current 
13
 * page table tree.  The second 4M is used for temporary per-process
14
 * mappings managed by kmap and kunmap.  The remaining 248M is used
15
 * for global (shared by all procs and all processors) device memory
16
 * mappings and managed by vmap and vunmap.  The total amount (256M)
17
 * could probably be reduced somewhat if desired.  The largest device
18
 * mapping is that of the video card, and even though modern video cards
19
 * have embarrassing amounts of memory, the video drivers only use one
20
 * frame buffer worth (at most 16M).  Each is described in more detail below.
21
 *
22
 * The VPT is a 4M frame constructed by inserting the pdb into itself.
23
 * This short-circuits one level of the page tables, with the result that 
24
 * the contents of second-level page tables can be accessed at VPT.  
25
 * We use the VPT to edit the page tables (see mmu) after inserting them
26
 * into the page directory.  It is a convenient mechanism for mapping what
27
 * might be otherwise-inaccessible pages.  The idea was borrowed from
28
 * the Exokernel.
29
 *
30
 * The VPT doesn't solve all our problems, because we still need to 
31
 * prepare page directories before we can install them.  For that, we
32
 * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33
 */
34
 
35
#include	"u.h"
36
#include	"../port/lib.h"
37
#include	"mem.h"
38
#include	"dat.h"
39
#include	"fns.h"
40
#include	"io.h"
41
 
42
/*
43
 * Simple segment descriptors with no translation.
44
 */
45
#define	DATASEGM(p) 	{ 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46
#define	EXECSEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47
#define	EXEC16SEGM(p) 	{ 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48
#define	TSSSEGM(b,p)	{ ((b)<<16)|sizeof(Tss),\
49
			  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50
 
51
Segdesc gdt[NGDT] =
52
{
53
[NULLSEG]	{ 0, 0},		/* null descriptor */
54
[KDSEG]		DATASEGM(0),		/* kernel data/stack */
55
[KESEG]		EXECSEGM(0),		/* kernel code */
56
[UDSEG]		DATASEGM(3),		/* user data/stack */
57
[UESEG]		EXECSEGM(3),		/* user code */
58
[TSSSEG]	TSSSEGM(0,0),		/* tss segment */
59
[KESEG16]		EXEC16SEGM(0),	/* kernel code 16-bit */
60
};
61
 
62
static int didmmuinit;
63
static void taskswitch(ulong, ulong);
64
static void memglobal(void);
65
 
66
#define	vpt ((ulong*)VPT)
67
#define	VPTX(va)		(((ulong)(va))>>12)
68
#define	vpd (vpt+VPTX(VPT))
69
 
70
void
71
mmuinit0(void)
72
{
73
	memmove(m->gdt, gdt, sizeof gdt);
74
}
75
 
76
void
77
mmuinit(void)
78
{
79
	ulong x, *p;
80
	ushort ptr[3];
81
 
82
	didmmuinit = 1;
83
 
84
	if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
85
		VPT, vpd, KMAP);
86
 
87
	memglobal();
88
	m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
89
 
90
	m->tss = malloc(sizeof(Tss));
91
	if(m->tss == nil)
92
		panic("mmuinit: no memory");
93
	memset(m->tss, 0, sizeof(Tss));
94
	m->tss->iomap = 0xDFFF<<16;
95
 
96
	/*
97
	 * We used to keep the GDT in the Mach structure, but it
98
	 * turns out that that slows down access to the rest of the
99
	 * page.  Since the Mach structure is accessed quite often,
100
	 * it pays off anywhere from a factor of 1.25 to 2 on real
101
	 * hardware to separate them (the AMDs are more sensitive
102
	 * than Intels in this regard).  Under VMware it pays off
103
	 * a factor of about 10 to 100.
104
	 */
105
	memmove(m->gdt, gdt, sizeof gdt);
106
	x = (ulong)m->tss;
107
	m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
108
	m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
109
 
110
	ptr[0] = sizeof(gdt)-1;
111
	x = (ulong)m->gdt;
112
	ptr[1] = x & 0xFFFF;
113
	ptr[2] = (x>>16) & 0xFFFF;
114
	lgdt(ptr);
115
 
116
	ptr[0] = sizeof(Segdesc)*256-1;
117
	x = IDTADDR;
118
	ptr[1] = x & 0xFFFF;
119
	ptr[2] = (x>>16) & 0xFFFF;
120
	lidt(ptr);
121
 
122
	/* make kernel text unwritable */
123
	for(x = KTZERO; x < (ulong)etext; x += BY2PG){
124
		p = mmuwalk(m->pdb, x, 2, 0);
125
		if(p == nil)
126
			panic("mmuinit");
127
		*p &= ~PTEWRITE;
128
	}
129
 
130
	taskswitch(PADDR(m->pdb),  (ulong)m + BY2PG);
131
	ltr(TSSSEL);
132
}
133
 
134
/* 
135
 * On processors that support it, we set the PTEGLOBAL bit in
136
 * page table and page directory entries that map kernel memory.
137
 * Doing this tells the processor not to bother flushing them
138
 * from the TLB when doing the TLB flush associated with a 
139
 * context switch (write to CR3).  Since kernel memory mappings
140
 * are never removed, this is safe.  (If we ever remove kernel memory
141
 * mappings, we can do a full flush by turning off the PGE bit in CR4,
142
 * writing to CR3, and then turning the PGE bit back on.) 
143
 *
144
 * See also mmukmap below.
145
 * 
146
 * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
147
 */
148
static void
149
memglobal(void)
150
{
151
	int i, j;
152
	ulong *pde, *pte;
153
 
154
	/* only need to do this once, on bootstrap processor */
155
	if(m->machno != 0)
156
		return;
157
 
158
	if(!m->havepge)
159
		return;
160
 
161
	pde = m->pdb;
162
	for(i=PDX(KZERO); i<1024; i++){
163
		if(pde[i] & PTEVALID){
164
			pde[i] |= PTEGLOBAL;
165
			if(!(pde[i] & PTESIZE)){
166
				pte = KADDR(pde[i]&~(BY2PG-1));
167
				for(j=0; j<1024; j++)
168
					if(pte[j] & PTEVALID)
169
						pte[j] |= PTEGLOBAL;
170
			}
171
		}
172
	}			
173
}
174
 
175
/*
176
 * Flush all the user-space and device-mapping mmu info
177
 * for this process, because something has been deleted.
178
 * It will be paged back in on demand.
179
 */
180
void
181
flushmmu(void)
182
{
183
	int s;
184
 
185
	s = splhi();
186
	up->newtlb = 1;
187
	mmuswitch(up);
188
	splx(s);
189
}
190
 
191
/*
192
 * Flush a single page mapping from the tlb.
193
 */
194
void
195
flushpg(ulong va)
196
{
197
	if(X86FAMILY(m->cpuidax) >= 4)
198
		invlpg(va);
199
	else
200
		putcr3(getcr3());
201
}
202
 
203
/*
204
 * Allocate a new page for a page directory. 
205
 * We keep a small cache of pre-initialized
206
 * page directories in each mach.
207
 */
208
static Page*
209
mmupdballoc(void)
210
{
211
	int s;
212
	Page *page;
213
	ulong *pdb;
214
 
215
	s = splhi();
216
	m->pdballoc++;
217
	if(m->pdbpool == 0){
218
		spllo();
219
		page = newpage(0, 0, 0);
220
		page->va = (ulong)vpd;
221
		splhi();
222
		pdb = tmpmap(page);
223
		memmove(pdb, m->pdb, BY2PG);
224
		pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID;	/* set up VPT */
225
		tmpunmap(pdb);
226
	}else{
227
		page = m->pdbpool;
228
		m->pdbpool = page->next;
229
		m->pdbcnt--;
230
	}
231
	splx(s);
232
	return page;
233
}
234
 
235
static void
236
mmupdbfree(Proc *proc, Page *p)
237
{
238
	if(islo())
239
		panic("mmupdbfree: islo");
240
	m->pdbfree++;
241
	if(m->pdbcnt >= 10){
242
		p->next = proc->mmufree;
243
		proc->mmufree = p;
244
	}else{
245
		p->next = m->pdbpool;
246
		m->pdbpool = p;
247
		m->pdbcnt++;
248
	}
249
}
250
 
251
/*
252
 * A user-space memory segment has been deleted, or the
253
 * process is exiting.  Clear all the pde entries for user-space
254
 * memory mappings and device mappings.  Any entries that
255
 * are needed will be paged back in as necessary.
256
 */
257
static void
258
mmuptefree(Proc* proc)
259
{
260
	int s;
261
	ulong *pdb;
262
	Page **last, *page;
263
 
264
	if(proc->mmupdb == nil || proc->mmuused == nil)
265
		return;
266
	s = splhi();
267
	pdb = tmpmap(proc->mmupdb);
268
	last = &proc->mmuused;
269
	for(page = *last; page; page = page->next){
270
		pdb[page->daddr] = 0;
271
		last = &page->next;
272
	}
273
	tmpunmap(pdb);
274
	splx(s);
275
	*last = proc->mmufree;
276
	proc->mmufree = proc->mmuused;
277
	proc->mmuused = 0;
278
}
279
 
280
static void
281
taskswitch(ulong pdb, ulong stack)
282
{
283
	Tss *tss;
284
 
285
	tss = m->tss;
286
	tss->ss0 = KDSEL;
287
	tss->esp0 = stack;
288
	tss->ss1 = KDSEL;
289
	tss->esp1 = stack;
290
	tss->ss2 = KDSEL;
291
	tss->esp2 = stack;
292
	putcr3(pdb);
293
}
294
 
295
void
296
mmuswitch(Proc* proc)
297
{
298
	ulong *pdb;
299
 
300
	if(proc->newtlb){
301
		mmuptefree(proc);
302
		proc->newtlb = 0;
303
	}
304
 
305
	if(proc->mmupdb){
306
		pdb = tmpmap(proc->mmupdb);
307
		pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
308
		tmpunmap(pdb);
309
		taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
310
	}else
311
		taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
312
}
313
 
314
/*
315
 * Release any pages allocated for a page directory base or page-tables
316
 * for this process:
317
 *   switch to the prototype pdb for this processor (m->pdb);
318
 *   call mmuptefree() to place all pages used for page-tables (proc->mmuused)
319
 *   onto the process' free list (proc->mmufree). This has the side-effect of
320
 *   cleaning any user entries in the pdb (proc->mmupdb);
321
 *   if there's a pdb put it in the cache of pre-initialised pdb's
322
 *   for this processor (m->pdbpool) or on the process' free list;
323
 *   finally, place any pages freed back into the free pool (palloc).
324
 * This routine is only called from schedinit() with palloc locked.
325
 */
326
void
327
mmurelease(Proc* proc)
328
{
329
	Page *page, *next;
330
	ulong *pdb;
331
 
332
	if(islo())
333
		panic("mmurelease: islo");
334
	taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
335
	if(proc->kmaptable){
336
		if(proc->mmupdb == nil)
337
			panic("mmurelease: no mmupdb");
338
		if(--proc->kmaptable->ref)
339
			panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
340
		if(proc->nkmap)
341
			panic("mmurelease: nkmap %d", proc->nkmap);
342
		/*
343
		 * remove kmaptable from pdb before putting pdb up for reuse.
344
		 */
345
		pdb = tmpmap(proc->mmupdb);
346
		if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
347
			panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
348
				pdb[PDX(KMAP)], proc->kmaptable->pa);
349
		pdb[PDX(KMAP)] = 0;
350
		tmpunmap(pdb);
351
		/*
352
		 * move kmaptable to free list.
353
		 */
354
		pagechainhead(proc->kmaptable);
355
		proc->kmaptable = 0;
356
	}
357
	if(proc->mmupdb){
358
		mmuptefree(proc);
359
		mmupdbfree(proc, proc->mmupdb);
360
		proc->mmupdb = 0;
361
	}
362
	for(page = proc->mmufree; page; page = next){
363
		next = page->next;
364
		if(--page->ref)
365
			panic("mmurelease: page->ref %d", page->ref);
366
		pagechainhead(page);
367
	}
368
	if(proc->mmufree && palloc.r.p)
369
		wakeup(&palloc.r);
370
	proc->mmufree = 0;
371
}
372
 
373
/*
374
 * Allocate and install pdb for the current process.
375
 */
376
static void
377
upallocpdb(void)
378
{
379
	int s;
380
	ulong *pdb;
381
	Page *page;
382
 
383
	if(up->mmupdb != nil)
384
		return;
385
	page = mmupdballoc();
386
	s = splhi();
387
	if(up->mmupdb != nil){
388
		/*
389
		 * Perhaps we got an interrupt while
390
		 * mmupdballoc was sleeping and that
391
		 * interrupt allocated an mmupdb?
392
		 * Seems unlikely.
393
		 */
394
		mmupdbfree(up, page);
395
		splx(s);
396
		return;
397
	}
398
	pdb = tmpmap(page);
399
	pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
400
	tmpunmap(pdb);
401
	up->mmupdb = page;
402
	putcr3(up->mmupdb->pa);
403
	splx(s);
404
}
405
 
406
/*
407
 * Update the mmu in response to a user fault.  pa may have PTEWRITE set.
408
 */
409
void
410
putmmu(ulong va, ulong pa, Page*)
411
{
412
	int old, s;
413
	Page *page;
414
 
415
	if(up->mmupdb == nil)
416
		upallocpdb();
417
 
418
	/*
419
	 * We should be able to get through this with interrupts
420
	 * turned on (if we get interrupted we'll just pick up 
421
	 * where we left off) but we get many faults accessing
422
	 * vpt[] near the end of this function, and they always happen
423
	 * after the process has been switched out and then 
424
	 * switched back, usually many times in a row (perhaps
425
	 * it cannot switch back successfully for some reason).
426
	 * 
427
	 * In any event, I'm tired of searching for this bug.  
428
	 * Turn off interrupts during putmmu even though
429
	 * we shouldn't need to.		- rsc
430
	 */
431
 
432
	s = splhi();
433
	if(!(vpd[PDX(va)]&PTEVALID)){
434
		if(up->mmufree == 0){
435
			spllo();
436
			page = newpage(0, 0, 0);
437
			splhi();
438
		}
439
		else{
440
			page = up->mmufree;
441
			up->mmufree = page->next;
442
		}
443
		vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
444
		/* page is now mapped into the VPT - clear it */
445
		memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
446
		page->daddr = PDX(va);
447
		page->next = up->mmuused;
448
		up->mmuused = page;
449
	}
450
	old = vpt[VPTX(va)];
451
	vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
452
	if(old&PTEVALID)
453
		flushpg(va);
454
	if(getcr3() != up->mmupdb->pa)
455
		print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
456
	splx(s);
457
}
458
 
459
/*
460
 * Double-check the user MMU.
461
 * Error checking only.
462
 */
463
void
464
checkmmu(ulong va, ulong pa)
465
{
466
	if(up->mmupdb == 0)
467
		return;
468
	if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
469
		return;
470
	if(PPN(vpt[VPTX(va)]) != pa)
471
		print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
472
			up->pid, up->text,
473
			va, pa, vpt[VPTX(va)]);
474
}
475
 
476
/*
477
 * Walk the page-table pointed to by pdb and return a pointer
478
 * to the entry for virtual address va at the requested level.
479
 * If the entry is invalid and create isn't requested then bail
480
 * out early. Otherwise, for the 2nd level walk, allocate a new
481
 * page-table page and register it in the 1st level.  This is used
482
 * only to edit kernel mappings, which use pages from kernel memory,
483
 * so it's okay to use KADDR to look at the tables.
484
 */
485
ulong*
486
mmuwalk(ulong* pdb, ulong va, int level, int create)
487
{
488
	ulong *table;
489
	void *map;
490
 
491
	table = &pdb[PDX(va)];
492
	if(!(*table & PTEVALID) && create == 0)
493
		return 0;
494
 
495
	switch(level){
496
 
497
	default:
498
		return 0;
499
 
500
	case 1:
501
		return table;
502
 
503
	case 2:
504
		if(*table & PTESIZE)
505
			panic("mmuwalk2: va %luX entry %luX", va, *table);
506
		if(!(*table & PTEVALID)){
507
			/*
508
			 * Have to call low-level allocator from
509
			 * memory.c if we haven't set up the xalloc
510
			 * tables yet.
511
			 */
512
			if(didmmuinit)
513
				map = xspanalloc(BY2PG, BY2PG, 0);
514
			else
515
				map = rampage();
516
			if(map == nil)
517
				panic("mmuwalk xspanalloc failed");
518
			*table = PADDR(map)|PTEWRITE|PTEVALID;
519
		}
520
		table = KADDR(PPN(*table));
521
		return &table[PTX(va)];
522
	}
523
}
524
 
525
/*
526
 * Device mappings are shared by all procs and processors and
527
 * live in the virtual range VMAP to VMAP+VMAPSIZE.  The master
528
 * copy of the mappings is stored in mach0->pdb, and they are
529
 * paged in from there as necessary by vmapsync during faults.
530
 */
531
 
532
static Lock vmaplock;
533
 
534
static int findhole(ulong *a, int n, int count);
535
static ulong vmapalloc(ulong size);
536
static void pdbunmap(ulong*, ulong, int);
537
 
538
/*
539
 * Add a device mapping to the vmap range.
540
 */
541
void*
542
vmap(ulong pa, int size)
543
{
544
	int osize;
545
	ulong o, va;
546
 
547
	/*
548
	 * might be asking for less than a page.
549
	 */
550
	osize = size;
551
	o = pa & (BY2PG-1);
552
	pa -= o;
553
	size += o;
554
 
555
	size = ROUND(size, BY2PG);
556
	if(pa == 0){
557
		print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
558
		return nil;
559
	}
560
	ilock(&vmaplock);
561
	if((va = vmapalloc(size)) == 0 
562
	|| pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
563
		iunlock(&vmaplock);
564
		return 0;
565
	}
566
	iunlock(&vmaplock);
567
	/* avoid trap on local processor
568
	for(i=0; i<size; i+=4*MB)
569
		vmapsync(va+i);
570
	*/
571
	USED(osize);
572
//	print("  vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
573
	return (void*)(va + o);
574
}
575
 
576
static int
577
findhole(ulong *a, int n, int count)
578
{
579
	int have, i;
580
 
581
	have = 0;
582
	for(i=0; i<n; i++){
583
		if(a[i] == 0)
584
			have++;
585
		else
586
			have = 0;
587
		if(have >= count)
588
			return i+1 - have;
589
	}
590
	return -1;
591
}
592
 
593
/*
594
 * Look for free space in the vmap.
595
 */
596
static ulong
597
vmapalloc(ulong size)
598
{
599
	int i, n, o;
600
	ulong *vpdb;
601
	int vpdbsize;
602
 
603
	vpdb = &MACHP(0)->pdb[PDX(VMAP)];
604
	vpdbsize = VMAPSIZE/(4*MB);
605
 
606
	if(size >= 4*MB){
607
		n = (size+4*MB-1) / (4*MB);
608
		if((o = findhole(vpdb, vpdbsize, n)) != -1)
609
			return VMAP + o*4*MB;
610
		return 0;
611
	}
612
	n = (size+BY2PG-1) / BY2PG;
613
	for(i=0; i<vpdbsize; i++)
614
		if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
615
			if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
616
				return VMAP + i*4*MB + o*BY2PG;
617
	if((o = findhole(vpdb, vpdbsize, 1)) != -1)
618
		return VMAP + o*4*MB;
619
 
620
	/*
621
	 * could span page directory entries, but not worth the trouble.
622
	 * not going to be very much contention.
623
	 */
624
	return 0;
625
}
626
 
627
/*
628
 * Remove a device mapping from the vmap range.
629
 * Since pdbunmap does not remove page tables, just entries,
630
 * the call need not be interlocked with vmap.
631
 */
632
void
633
vunmap(void *v, int size)
634
{
635
	int i;
636
	ulong va, o;
637
	Mach *nm;
638
	Proc *p;
639
 
640
	/*
641
	 * might not be aligned
642
	 */
643
	va = (ulong)v;
644
	o = va&(BY2PG-1);
645
	va -= o;
646
	size += o;
647
	size = ROUND(size, BY2PG);
648
 
649
	if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
650
		panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
651
			va, size, getcallerpc(&v));
652
 
653
	pdbunmap(MACHP(0)->pdb, va, size);
654
 
655
	/*
656
	 * Flush mapping from all the tlbs and copied pdbs.
657
	 * This can be (and is) slow, since it is called only rarely.
658
	 * It is possible for vunmap to be called with up == nil,
659
	 * e.g. from the reset/init driver routines during system
660
	 * boot. In that case it suffices to flush the MACH(0) TLB
661
	 * and return.
662
	 */
663
	if(!active.thunderbirdsarego){
664
		putcr3(PADDR(MACHP(0)->pdb));
665
		return;
666
	}
667
	for(i=0; i<conf.nproc; i++){
668
		p = proctab(i);
669
		if(p->state == Dead)
670
			continue;
671
		if(p != up)
672
			p->newtlb = 1;
673
	}
674
	for(i=0; i<conf.nmach; i++){
675
		nm = MACHP(i);
676
		if(nm != m)
677
			nm->flushmmu = 1;
678
	}
679
	flushmmu();
680
	for(i=0; i<conf.nmach; i++){
681
		nm = MACHP(i);
682
		if(nm != m)
683
			while((active.machs&(1<<nm->machno)) && nm->flushmmu)
684
				;
685
	}
686
}
687
 
688
/*
689
 * Add kernel mappings for pa -> va for a section of size bytes.
690
 */
691
int
692
pdbmap(ulong *pdb, ulong pa, ulong va, int size)
693
{
694
	int pse;
695
	ulong pgsz, *pte, *table;
696
	ulong flag, off;
697
 
698
	flag = pa&0xFFF;
699
	pa &= ~0xFFF;
700
 
701
	if((MACHP(0)->cpuiddx & Pse) && (getcr4() & 0x10))
702
		pse = 1;
703
	else
704
		pse = 0;
705
 
706
	for(off=0; off<size; off+=pgsz){
707
		table = &pdb[PDX(va+off)];
708
		if((*table&PTEVALID) && (*table&PTESIZE))
709
			panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
710
				va+off, pa+off, *table);
711
 
712
		/*
713
		 * Check if it can be mapped using a 4MB page:
714
		 * va, pa aligned and size >= 4MB and processor can do it.
715
		 */
716
		if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
717
			*table = (pa+off)|flag|PTESIZE|PTEVALID;
718
			pgsz = 4*MB;
719
		}else{
720
			pte = mmuwalk(pdb, va+off, 2, 1);
721
			if(*pte&PTEVALID)
722
				panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
723
					va+off, pa+off, *pte);
724
			*pte = (pa+off)|flag|PTEVALID;
725
			pgsz = BY2PG;
726
		}
727
	}
728
	return 0;
729
}
730
 
731
/*
732
 * Remove mappings.  Must already exist, for sanity.
733
 * Only used for kernel mappings, so okay to use KADDR.
734
 */
735
static void
736
pdbunmap(ulong *pdb, ulong va, int size)
737
{
738
	ulong vae;
739
	ulong *table;
740
 
741
	vae = va+size;
742
	while(va < vae){
743
		table = &pdb[PDX(va)];
744
		if(!(*table & PTEVALID)){
745
			panic("vunmap: not mapped");
746
			/* 
747
			va = (va+4*MB-1) & ~(4*MB-1);
748
			continue;
749
			*/
750
		}
751
		if(*table & PTESIZE){
752
			*table = 0;
753
			va = (va+4*MB-1) & ~(4*MB-1);
754
			continue;
755
		}
756
		table = KADDR(PPN(*table));
757
		if(!(table[PTX(va)] & PTEVALID))
758
			panic("vunmap: not mapped");
759
		table[PTX(va)] = 0;
760
		va += BY2PG;
761
	}
762
}
763
 
764
/*
765
 * Handle a fault by bringing vmap up to date.
766
 * Only copy pdb entries and they never go away,
767
 * so no locking needed.
768
 */
769
int
770
vmapsync(ulong va)
771
{
772
	ulong entry, *table;
773
 
774
	if(va < VMAP || va >= VMAP+VMAPSIZE)
775
		return 0;
776
 
777
	entry = MACHP(0)->pdb[PDX(va)];
778
	if(!(entry&PTEVALID))
779
		return 0;
780
	if(!(entry&PTESIZE)){
781
		/* make sure entry will help the fault */
782
		table = KADDR(PPN(entry));
783
		if(!(table[PTX(va)]&PTEVALID))
784
			return 0;
785
	}
786
	vpd[PDX(va)] = entry;
787
	/*
788
	 * TLB doesn't cache negative results, so no flush needed.
789
	 */
790
	return 1;
791
}
792
 
793
 
794
/*
795
 * KMap is used to map individual pages into virtual memory.
796
 * It is rare to have more than a few KMaps at a time (in the 
797
 * absence of interrupts, only two at a time are ever used,
798
 * but interrupts can stack).  The mappings are local to a process,
799
 * so we can use the same range of virtual address space for
800
 * all processes without any coordination.
801
 */
802
#define kpt (vpt+VPTX(KMAP))
803
#define NKPT (KMAPSIZE/BY2PG)
804
 
805
KMap*
806
kmap(Page *page)
807
{
808
	int i, o, s;
809
 
810
	if(up == nil)
811
		panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
812
	if(up->mmupdb == nil)
813
		upallocpdb();
814
	if(up->nkmap < 0)
815
		panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
816
 
817
	/*
818
	 * Splhi shouldn't be necessary here, but paranoia reigns.
819
	 * See comment in putmmu above.
820
	 */
821
	s = splhi();
822
	up->nkmap++;
823
	if(!(vpd[PDX(KMAP)]&PTEVALID)){
824
		/* allocate page directory */
825
		if(KMAPSIZE > BY2XPG)
826
			panic("bad kmapsize");
827
		if(up->kmaptable != nil)
828
			panic("kmaptable");
829
		spllo();
830
		up->kmaptable = newpage(0, 0, 0);
831
		splhi();
832
		vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
833
		flushpg((ulong)kpt);
834
		memset(kpt, 0, BY2PG);
835
		kpt[0] = page->pa|PTEWRITE|PTEVALID;
836
		up->lastkmap = 0;
837
		splx(s);
838
		return (KMap*)KMAP;
839
	}
840
	if(up->kmaptable == nil)
841
		panic("no kmaptable");
842
	o = up->lastkmap+1;
843
	for(i=0; i<NKPT; i++){
844
		if(kpt[(i+o)%NKPT] == 0){
845
			o = (i+o)%NKPT;
846
			kpt[o] = page->pa|PTEWRITE|PTEVALID;
847
			up->lastkmap = o;
848
			splx(s);
849
			return (KMap*)(KMAP+o*BY2PG);
850
		}
851
	}
852
	panic("out of kmap");
853
	return nil;
854
}
855
 
856
void
857
kunmap(KMap *k)
858
{
859
	ulong va;
860
 
861
	va = (ulong)k;
862
	if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
863
		panic("kunmap: no kmaps");
864
	if(va < KMAP || va >= KMAP+KMAPSIZE)
865
		panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
866
	if(!(vpt[VPTX(va)]&PTEVALID))
867
		panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
868
	up->nkmap--;
869
	if(up->nkmap < 0)
870
		panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
871
	vpt[VPTX(va)] = 0;
872
	flushpg(va);
873
}
874
 
875
/*
876
 * Temporary one-page mapping used to edit page directories.
877
 *
878
 * The fasttmp #define controls whether the code optimizes
879
 * the case where the page is already mapped in the physical
880
 * memory window.  
881
 */
882
#define fasttmp 1
883
 
884
void*
885
tmpmap(Page *p)
886
{
887
	ulong i;
888
	ulong *entry;
889
 
890
	if(islo())
891
		panic("tmpaddr: islo");
892
 
893
	if(fasttmp && p->pa < -KZERO)
894
		return KADDR(p->pa);
895
 
896
	/*
897
	 * PDX(TMPADDR) == PDX(MACHADDR), so this
898
	 * entry is private to the processor and shared 
899
	 * between up->mmupdb (if any) and m->pdb.
900
	 */
901
	entry = &vpt[VPTX(TMPADDR)];
902
	if(!(*entry&PTEVALID)){
903
		for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
904
			print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
905
		panic("tmpmap: no entry");
906
	}
907
	if(PPN(*entry) != PPN(TMPADDR-KZERO))
908
		panic("tmpmap: already mapped entry=%#.8lux", *entry);
909
	*entry = p->pa|PTEWRITE|PTEVALID;
910
	flushpg(TMPADDR);
911
	return (void*)TMPADDR;
912
}
913
 
914
void
915
tmpunmap(void *v)
916
{
917
	ulong *entry;
918
 
919
	if(islo())
920
		panic("tmpaddr: islo");
921
	if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
922
		return;
923
	if(v != (void*)TMPADDR)
924
		panic("tmpunmap: bad address");
925
	entry = &vpt[VPTX(TMPADDR)];
926
	if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
927
		panic("tmpmap: not mapped entry=%#.8lux", *entry);
928
	*entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
929
	flushpg(TMPADDR);
930
}
931
 
932
/*
933
 * These could go back to being macros once the kernel is debugged,
934
 * but the extra checking is nice to have.
935
 */
936
void*
937
kaddr(ulong pa)
938
{
939
	if(pa > (ulong)-KZERO)
940
		panic("kaddr: pa=%#.8lux", pa);
941
	return (void*)(pa+KZERO);
942
}
943
 
944
ulong
945
paddr(void *v)
946
{
947
	ulong va;
948
 
949
	va = (ulong)v;
950
	if(va < KZERO)
951
		panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
952
	return va-KZERO;
953
}
954
 
955
/*
956
 * More debugging.
957
 */
958
void
959
countpagerefs(ulong *ref, int print)
960
{
961
	int i, n;
962
	Mach *mm;
963
	Page *pg;
964
	Proc *p;
965
 
966
	n = 0;
967
	for(i=0; i<conf.nproc; i++){
968
		p = proctab(i);
969
		if(p->mmupdb){
970
			if(print){
971
				if(ref[pagenumber(p->mmupdb)])
972
					iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
973
						p->mmupdb->pa, i, p->pid);
974
				continue;
975
			}
976
			if(ref[pagenumber(p->mmupdb)]++ == 0)
977
				n++;
978
			else
979
				iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
980
					p->mmupdb->pa, i, p->pid);
981
		}
982
		if(p->kmaptable){
983
			if(print){
984
				if(ref[pagenumber(p->kmaptable)])
985
					iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
986
						p->kmaptable->pa, i, p->pid);
987
				continue;
988
			}
989
			if(ref[pagenumber(p->kmaptable)]++ == 0)
990
				n++;
991
			else
992
				iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
993
					p->kmaptable->pa, i, p->pid);
994
		}
995
		for(pg=p->mmuused; pg; pg=pg->next){
996
			if(print){
997
				if(ref[pagenumber(pg)])
998
					iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
999
						pg->pa, i, p->pid);
1000
				continue;
1001
			}
1002
			if(ref[pagenumber(pg)]++ == 0)
1003
				n++;
1004
			else
1005
				iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
1006
					pg->pa, i, p->pid);
1007
		}
1008
		for(pg=p->mmufree; pg; pg=pg->next){
1009
			if(print){
1010
				if(ref[pagenumber(pg)])
1011
					iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
1012
						pg->pa, i, p->pid);
1013
				continue;
1014
			}
1015
			if(ref[pagenumber(pg)]++ == 0)
1016
				n++;
1017
			else
1018
				iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
1019
					pg->pa, i, p->pid);
1020
		}
1021
	}
1022
	if(!print)
1023
		iprint("%d pages in proc mmu\n", n);
1024
	n = 0;
1025
	for(i=0; i<conf.nmach; i++){
1026
		mm = MACHP(i);
1027
		for(pg=mm->pdbpool; pg; pg=pg->next){
1028
			if(print){
1029
				if(ref[pagenumber(pg)])
1030
					iprint("page %#.8lux is in cpu%d pdbpool\n",
1031
						pg->pa, i);
1032
				continue;
1033
			}
1034
			if(ref[pagenumber(pg)]++ == 0)
1035
				n++;
1036
			else
1037
				iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
1038
					pg->pa, i);
1039
		}
1040
	}
1041
	if(!print){
1042
		iprint("%d pages in mach pdbpools\n", n);
1043
		for(i=0; i<conf.nmach; i++)
1044
			iprint("cpu%d: %d pdballoc, %d pdbfree\n",
1045
				i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
1046
	}
1047
}
1048
 
1049
void
1050
checkfault(ulong, ulong)
1051
{
1052
}
1053
 
1054
/*
1055
 * Return the number of bytes that can be accessed via KADDR(pa).
1056
 * If pa is not a valid argument to KADDR, return 0.
1057
 */
1058
ulong
1059
cankaddr(ulong pa)
1060
{
1061
	if(pa >= -KZERO)
1062
		return 0;
1063
	return -KZERO - pa;
1064
}
1065