pmap.c revision 1.42 1 /* $NetBSD: pmap.c,v 1.42 1999/02/26 22:03:29 is Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jeremy Cooper.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * XXX These comments aren't quite accurate. Need to change.
41 * The sun3x uses the MC68851 Memory Management Unit, which is built
42 * into the CPU. The 68851 maps virtual to physical addresses using
43 * a multi-level table lookup, which is stored in the very memory that
44 * it maps. The number of levels of lookup is configurable from one
45 * to four. In this implementation, we use three, named 'A' through 'C'.
46 *
47 * The MMU translates virtual addresses into physical addresses by
48 * traversing these tables in a proccess called a 'table walk'. The most
49 * significant 7 bits of the Virtual Address ('VA') being translated are
50 * used as an index into the level A table, whose base in physical memory
51 * is stored in a special MMU register, the 'CPU Root Pointer' or CRP. The
52 * address found at that index in the A table is used as the base
53 * address for the next table, the B table. The next six bits of the VA are
54 * used as an index into the B table, which in turn gives the base address
55 * of the third and final C table.
56 *
57 * The next six bits of the VA are used as an index into the C table to
58 * locate a Page Table Entry (PTE). The PTE is a physical address in memory
59 * to which the remaining 13 bits of the VA are added, producing the
60 * mapped physical address.
61 *
62 * To map the entire memory space in this manner would require 2114296 bytes
63 * of page tables per process - quite expensive. Instead we will
64 * allocate a fixed but considerably smaller space for the page tables at
65 * the time the VM system is initialized. When the pmap code is asked by
66 * the kernel to map a VA to a PA, it allocates tables as needed from this
67 * pool. When there are no more tables in the pool, tables are stolen
68 * from the oldest mapped entries in the tree. This is only possible
69 * because all memory mappings are stored in the kernel memory map
70 * structures, independent of the pmap structures. A VA which references
71 * one of these invalidated maps will cause a page fault. The kernel
72 * will determine that the page fault was caused by a task using a valid
73 * VA, but for some reason (which does not concern it), that address was
74 * not mapped. It will ask the pmap code to re-map the entry and then
75 * it will resume executing the faulting task.
76 *
77 * In this manner the most efficient use of the page table space is
78 * achieved. Tasks which do not execute often will have their tables
79 * stolen and reused by tasks which execute more frequently. The best
80 * size for the page table pool will probably be determined by
81 * experimentation.
82 *
83 * You read all of the comments so far. Good for you.
84 * Now go play!
85 */
86
87 /*** A Note About the 68851 Address Translation Cache
88 * The MC68851 has a 64 entry cache, called the Address Translation Cache
89 * or 'ATC'. This cache stores the most recently used page descriptors
90 * accessed by the MMU when it does translations. Using a marker called a
91 * 'task alias' the MMU can store the descriptors from 8 different table
92 * spaces concurrently. The task alias is associated with the base
93 * address of the level A table of that address space. When an address
94 * space is currently active (the CRP currently points to its A table)
95 * the only cached descriptors that will be obeyed are ones which have a
96 * matching task alias of the current space associated with them.
97 *
98 * Since the cache is always consulted before any table lookups are done,
99 * it is important that it accurately reflect the state of the MMU tables.
100 * Whenever a change has been made to a table that has been loaded into
101 * the MMU, the code must be sure to flush any cached entries that are
102 * affected by the change. These instances are documented in the code at
103 * various points.
104 */
105 /*** A Note About the Note About the 68851 Address Translation Cache
106 * 4 months into this code I discovered that the sun3x does not have
107 * a MC68851 chip. Instead, it has a version of this MMU that is part of the
108 * the 68030 CPU.
109 * All though it behaves very similarly to the 68851, it only has 1 task
110 * alias and a 22 entry cache. So sadly (or happily), the first paragraph
111 * of the previous note does not apply to the sun3x pmap.
112 */
113
114 #include "opt_uvm.h"
115
116 #include <sys/param.h>
117 #include <sys/systm.h>
118 #include <sys/proc.h>
119 #include <sys/malloc.h>
120 #include <sys/user.h>
121 #include <sys/queue.h>
122 #include <sys/kcore.h>
123
124 #include <vm/vm.h>
125 #include <vm/vm_kern.h>
126 #include <vm/vm_page.h>
127
128 #if defined(UVM)
129 #include <uvm/uvm.h>
130 /* XXX - Gratuitous name changes... */
131 #define vm_set_page_size uvm_setpagesize
132 /* XXX - Pager hacks... (explain?) */
133 #define PAGER_SVA (uvm.pager_sva)
134 #define PAGER_EVA (uvm.pager_eva)
135 #else /* UVM */
136 extern vm_offset_t pager_sva, pager_eva;
137 #define PAGER_SVA (pager_sva)
138 #define PAGER_EVA (pager_eva)
139 #endif /* UVM */
140
141 #include <machine/cpu.h>
142 #include <machine/kcore.h>
143 #include <machine/mon.h>
144 #include <machine/pmap.h>
145 #include <machine/pte.h>
146 #include <machine/vmparam.h>
147
148 #include <sun3/sun3/cache.h>
149 #include <sun3/sun3/machdep.h>
150
151 #include "pmap_pvt.h"
152
153 /* XXX - What headers declare these? */
154 extern struct pcb *curpcb;
155 extern int physmem;
156
157 extern void copypage __P((const void*, void*));
158 extern void zeropage __P((void*));
159
160 /* Defined in locore.s */
161 extern char kernel_text[];
162
163 /* Defined by the linker */
164 extern char etext[], edata[], end[];
165 extern char *esym; /* DDB */
166
167 /*************************** DEBUGGING DEFINITIONS ***********************
168 * Macros, preprocessor defines and variables used in debugging can make *
169 * code hard to read. Anything used exclusively for debugging purposes *
170 * is defined here to avoid having such mess scattered around the file. *
171 *************************************************************************/
172 #ifdef PMAP_DEBUG
173 /*
174 * To aid the debugging process, macros should be expanded into smaller steps
175 * that accomplish the same goal, yet provide convenient places for placing
176 * breakpoints. When this code is compiled with PMAP_DEBUG mode defined, the
177 * 'INLINE' keyword is defined to an empty string. This way, any function
178 * defined to be a 'static INLINE' will become 'outlined' and compiled as
179 * a separate function, which is much easier to debug.
180 */
181 #define INLINE /* nothing */
182
183 /*
184 * It is sometimes convenient to watch the activity of a particular table
185 * in the system. The following variables are used for that purpose.
186 */
187 a_tmgr_t *pmap_watch_atbl = 0;
188 b_tmgr_t *pmap_watch_btbl = 0;
189 c_tmgr_t *pmap_watch_ctbl = 0;
190
191 int pmap_debug = 0;
192 #define DPRINT(args) if (pmap_debug) printf args
193
194 #else /********** Stuff below is defined if NOT debugging **************/
195
196 #define INLINE inline
197 #define DPRINT(args) /* nada */
198
199 #endif /* PMAP_DEBUG */
200 /*********************** END OF DEBUGGING DEFINITIONS ********************/
201
202 /*** Management Structure - Memory Layout
203 * For every MMU table in the sun3x pmap system there must be a way to
204 * manage it; we must know which process is using it, what other tables
205 * depend on it, and whether or not it contains any locked pages. This
206 * is solved by the creation of 'table management' or 'tmgr'
207 * structures. One for each MMU table in the system.
208 *
209 * MAP OF MEMORY USED BY THE PMAP SYSTEM
210 *
211 * towards lower memory
212 * kernAbase -> +-------------------------------------------------------+
213 * | Kernel MMU A level table |
214 * kernBbase -> +-------------------------------------------------------+
215 * | Kernel MMU B level tables |
216 * kernCbase -> +-------------------------------------------------------+
217 * | |
218 * | Kernel MMU C level tables |
219 * | |
220 * mmuCbase -> +-------------------------------------------------------+
221 * | User MMU C level tables |
222 * mmuAbase -> +-------------------------------------------------------+
223 * | |
224 * | User MMU A level tables |
225 * | |
226 * mmuBbase -> +-------------------------------------------------------+
227 * | User MMU B level tables |
228 * tmgrAbase -> +-------------------------------------------------------+
229 * | TMGR A level table structures |
230 * tmgrBbase -> +-------------------------------------------------------+
231 * | TMGR B level table structures |
232 * tmgrCbase -> +-------------------------------------------------------+
233 * | TMGR C level table structures |
234 * pvbase -> +-------------------------------------------------------+
235 * | Physical to Virtual mapping table (list heads) |
236 * pvebase -> +-------------------------------------------------------+
237 * | Physical to Virtual mapping table (list elements) |
238 * | |
239 * +-------------------------------------------------------+
240 * towards higher memory
241 *
242 * For every A table in the MMU A area, there will be a corresponding
243 * a_tmgr structure in the TMGR A area. The same will be true for
244 * the B and C tables. This arrangement will make it easy to find the
245 * controling tmgr structure for any table in the system by use of
246 * (relatively) simple macros.
247 */
248
249 /*
250 * Global variables for storing the base addresses for the areas
251 * labeled above.
252 */
253 static vm_offset_t kernAphys;
254 static mmu_long_dte_t *kernAbase;
255 static mmu_short_dte_t *kernBbase;
256 static mmu_short_pte_t *kernCbase;
257 static mmu_short_pte_t *mmuCbase;
258 static mmu_short_dte_t *mmuBbase;
259 static mmu_long_dte_t *mmuAbase;
260 static a_tmgr_t *Atmgrbase;
261 static b_tmgr_t *Btmgrbase;
262 static c_tmgr_t *Ctmgrbase;
263 static pv_t *pvbase;
264 static pv_elem_t *pvebase;
265 struct pmap kernel_pmap;
266
267 /*
268 * This holds the CRP currently loaded into the MMU.
269 */
270 struct mmu_rootptr kernel_crp;
271
272 /*
273 * Just all around global variables.
274 */
275 static TAILQ_HEAD(a_pool_head_struct, a_tmgr_struct) a_pool;
276 static TAILQ_HEAD(b_pool_head_struct, b_tmgr_struct) b_pool;
277 static TAILQ_HEAD(c_pool_head_struct, c_tmgr_struct) c_pool;
278
279
280 /*
281 * Flags used to mark the safety/availability of certain operations or
282 * resources.
283 */
284 static boolean_t pv_initialized = FALSE, /* PV system has been initialized. */
285 bootstrap_alloc_enabled = FALSE; /*Safe to use pmap_bootstrap_alloc().*/
286 int tmp_vpages_inuse; /* Temporary virtual pages are in use */
287
288 /*
289 * XXX: For now, retain the traditional variables that were
290 * used in the old pmap/vm interface (without NONCONTIG).
291 */
292 /* Kernel virtual address space available: */
293 vm_offset_t virtual_avail, virtual_end;
294 /* Physical address space available: */
295 vm_offset_t avail_start, avail_end;
296
297 /* This keep track of the end of the contiguously mapped range. */
298 vm_offset_t virtual_contig_end;
299
300 /* Physical address used by pmap_next_page() */
301 vm_offset_t avail_next;
302
303 /* These are used by pmap_copy_page(), etc. */
304 vm_offset_t tmp_vpages[2];
305
306 /*
307 * The 3/80 is the only member of the sun3x family that has non-contiguous
308 * physical memory. Memory is divided into 4 banks which are physically
309 * locatable on the system board. Although the size of these banks varies
310 * with the size of memory they contain, their base addresses are
311 * permenently fixed. The following structure, which describes these
312 * banks, is initialized by pmap_bootstrap() after it reads from a similar
313 * structure provided by the ROM Monitor.
314 *
315 * For the other machines in the sun3x architecture which do have contiguous
316 * RAM, this list will have only one entry, which will describe the entire
317 * range of available memory.
318 */
319 struct pmap_physmem_struct avail_mem[SUN3X_NPHYS_RAM_SEGS];
320 u_int total_phys_mem;
321
322 /*************************************************************************/
323
324 /*
325 * XXX - Should "tune" these based on statistics.
326 *
327 * My first guess about the relative numbers of these needed is
328 * based on the fact that a "typical" process will have several
329 * pages mapped at low virtual addresses (text, data, bss), then
330 * some mapped shared libraries, and then some stack pages mapped
331 * near the high end of the VA space. Each process can use only
332 * one A table, and most will use only two B tables (maybe three)
333 * and probably about four C tables. Therefore, the first guess
334 * at the relative numbers of these needed is 1:2:4 -gwr
335 *
336 * The number of C tables needed is closely related to the amount
337 * of physical memory available plus a certain amount attributable
338 * to the use of double mappings. With a few simulation statistics
339 * we can find a reasonably good estimation of this unknown value.
340 * Armed with that and the above ratios, we have a good idea of what
341 * is needed at each level. -j
342 *
343 * Note: It is not physical memory memory size, but the total mapped
344 * virtual space required by the combined working sets of all the
345 * currently _runnable_ processes. (Sleeping ones don't count.)
346 * The amount of physical memory should be irrelevant. -gwr
347 */
348 #ifdef FIXED_NTABLES
349 #define NUM_A_TABLES 16
350 #define NUM_B_TABLES 32
351 #define NUM_C_TABLES 64
352 #else
353 unsigned int NUM_A_TABLES, NUM_B_TABLES, NUM_C_TABLES;
354 #endif /* FIXED_NTABLES */
355
356 /*
357 * This determines our total virtual mapping capacity.
358 * Yes, it is a FIXED value so we can pre-allocate.
359 */
360 #define NUM_USER_PTES (NUM_C_TABLES * MMU_C_TBL_SIZE)
361
362 /*
363 * The size of the Kernel Virtual Address Space (KVAS)
364 * for purposes of MMU table allocation is -KERNBASE
365 * (length from KERNBASE to 0xFFFFffff)
366 */
367 #define KVAS_SIZE (-KERNBASE)
368
369 /* Numbers of kernel MMU tables to support KVAS_SIZE. */
370 #define KERN_B_TABLES (KVAS_SIZE >> MMU_TIA_SHIFT)
371 #define KERN_C_TABLES (KVAS_SIZE >> MMU_TIB_SHIFT)
372 #define NUM_KERN_PTES (KVAS_SIZE >> MMU_TIC_SHIFT)
373
374 /*************************** MISCELANEOUS MACROS *************************/
375 #define PMAP_LOCK() ; /* Nothing, for now */
376 #define PMAP_UNLOCK() ; /* same. */
377 #define NULL 0
378
379 static INLINE void * mmu_ptov __P((vm_offset_t pa));
380 static INLINE vm_offset_t mmu_vtop __P((void * va));
381
382 #if 0
383 static INLINE a_tmgr_t * mmuA2tmgr __P((mmu_long_dte_t *));
384 #endif /* 0 */
385 static INLINE b_tmgr_t * mmuB2tmgr __P((mmu_short_dte_t *));
386 static INLINE c_tmgr_t * mmuC2tmgr __P((mmu_short_pte_t *));
387
388 static INLINE pv_t *pa2pv __P((vm_offset_t pa));
389 static INLINE int pteidx __P((mmu_short_pte_t *));
390 static INLINE pmap_t current_pmap __P((void));
391
392 /*
393 * We can always convert between virtual and physical addresses
394 * for anything in the range [KERNBASE ... avail_start] because
395 * that range is GUARANTEED to be mapped linearly.
396 * We rely heavily upon this feature!
397 */
398 static INLINE void *
399 mmu_ptov(pa)
400 vm_offset_t pa;
401 {
402 register vm_offset_t va;
403
404 va = (pa + KERNBASE);
405 #ifdef PMAP_DEBUG
406 if ((va < KERNBASE) || (va >= virtual_contig_end))
407 panic("mmu_ptov");
408 #endif
409 return ((void*)va);
410 }
411 static INLINE vm_offset_t
412 mmu_vtop(vva)
413 void *vva;
414 {
415 register vm_offset_t va;
416
417 va = (vm_offset_t)vva;
418 #ifdef PMAP_DEBUG
419 if ((va < KERNBASE) || (va >= virtual_contig_end))
420 panic("mmu_ptov");
421 #endif
422 return (va - KERNBASE);
423 }
424
425 /*
426 * These macros map MMU tables to their corresponding manager structures.
427 * They are needed quite often because many of the pointers in the pmap
428 * system reference MMU tables and not the structures that control them.
429 * There needs to be a way to find one when given the other and these
430 * macros do so by taking advantage of the memory layout described above.
431 * Here's a quick step through the first macro, mmuA2tmgr():
432 *
433 * 1) find the offset of the given MMU A table from the base of its table
434 * pool (table - mmuAbase).
435 * 2) convert this offset into a table index by dividing it by the
436 * size of one MMU 'A' table. (sizeof(mmu_long_dte_t) * MMU_A_TBL_SIZE)
437 * 3) use this index to select the corresponding 'A' table manager
438 * structure from the 'A' table manager pool (Atmgrbase[index]).
439 */
440 /* This function is not currently used. */
441 #if 0
442 static INLINE a_tmgr_t *
443 mmuA2tmgr(mmuAtbl)
444 mmu_long_dte_t *mmuAtbl;
445 {
446 register int idx;
447
448 /* Which table is this in? */
449 idx = (mmuAtbl - mmuAbase) / MMU_A_TBL_SIZE;
450 #ifdef PMAP_DEBUG
451 if ((idx < 0) || (idx >= NUM_A_TABLES))
452 panic("mmuA2tmgr");
453 #endif
454 return (&Atmgrbase[idx]);
455 }
456 #endif /* 0 */
457
458 static INLINE b_tmgr_t *
459 mmuB2tmgr(mmuBtbl)
460 mmu_short_dte_t *mmuBtbl;
461 {
462 register int idx;
463
464 /* Which table is this in? */
465 idx = (mmuBtbl - mmuBbase) / MMU_B_TBL_SIZE;
466 #ifdef PMAP_DEBUG
467 if ((idx < 0) || (idx >= NUM_B_TABLES))
468 panic("mmuB2tmgr");
469 #endif
470 return (&Btmgrbase[idx]);
471 }
472
473 /* mmuC2tmgr INTERNAL
474 **
475 * Given a pte known to belong to a C table, return the address of
476 * that table's management structure.
477 */
478 static INLINE c_tmgr_t *
479 mmuC2tmgr(mmuCtbl)
480 mmu_short_pte_t *mmuCtbl;
481 {
482 register int idx;
483
484 /* Which table is this in? */
485 idx = (mmuCtbl - mmuCbase) / MMU_C_TBL_SIZE;
486 #ifdef PMAP_DEBUG
487 if ((idx < 0) || (idx >= NUM_C_TABLES))
488 panic("mmuC2tmgr");
489 #endif
490 return (&Ctmgrbase[idx]);
491 }
492
493 /* This is now a function call below.
494 * #define pa2pv(pa) \
495 * (&pvbase[(unsigned long)\
496 * m68k_btop(pa)\
497 * ])
498 */
499
500 /* pa2pv INTERNAL
501 **
502 * Return the pv_list_head element which manages the given physical
503 * address.
504 */
505 static INLINE pv_t *
506 pa2pv(pa)
507 vm_offset_t pa;
508 {
509 register struct pmap_physmem_struct *bank;
510 register int idx;
511
512 bank = &avail_mem[0];
513 while (pa >= bank->pmem_end)
514 bank = bank->pmem_next;
515
516 pa -= bank->pmem_start;
517 idx = bank->pmem_pvbase + m68k_btop(pa);
518 #ifdef PMAP_DEBUG
519 if ((idx < 0) || (idx >= physmem))
520 panic("pa2pv");
521 #endif
522 return &pvbase[idx];
523 }
524
525 /* pteidx INTERNAL
526 **
527 * Return the index of the given PTE within the entire fixed table of
528 * PTEs.
529 */
530 static INLINE int
531 pteidx(pte)
532 mmu_short_pte_t *pte;
533 {
534 return (pte - kernCbase);
535 }
536
537 /*
538 * This just offers a place to put some debugging checks,
539 * and reduces the number of places "curproc" appears...
540 */
541 static INLINE pmap_t
542 current_pmap()
543 {
544 struct proc *p;
545 struct vmspace *vm;
546 vm_map_t map;
547 pmap_t pmap;
548
549 p = curproc; /* XXX */
550 if (p == NULL)
551 pmap = &kernel_pmap;
552 else {
553 vm = p->p_vmspace;
554 map = &vm->vm_map;
555 pmap = vm_map_pmap(map);
556 }
557
558 return (pmap);
559 }
560
561
562 /*************************** FUNCTION DEFINITIONS ************************
563 * These appear here merely for the compiler to enforce type checking on *
564 * all function calls. *
565 *************************************************************************/
566
567 /** External functions
568 ** - functions used within this module but written elsewhere.
569 ** both of these functions are in locore.s
570 ** XXX - These functions were later replaced with their more cryptic
571 ** hp300 counterparts. They may be removed now.
572 **/
573 #if 0 /* deprecated mmu */
574 void mmu_seturp __P((vm_offset_t));
575 void mmu_flush __P((int, vm_offset_t));
576 void mmu_flusha __P((void));
577 #endif /* 0 */
578
579 /** Internal functions
580 ** Most functions used only within this module are defined in
581 ** pmap_pvt.h (why not here if used only here?)
582 **/
583 static void pmap_page_upload __P((void));
584
585 /** Interface functions
586 ** - functions required by the Mach VM Pmap interface, with MACHINE_CONTIG
587 ** defined.
588 **/
589 #ifdef INCLUDED_IN_PMAP_H
590 void pmap_bootstrap __P((void));
591 void *pmap_bootstrap_alloc __P((int));
592 void pmap_enter __P((pmap_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
593 pmap_t pmap_create __P((vm_size_t));
594 void pmap_destroy __P((pmap_t));
595 void pmap_reference __P((pmap_t));
596 boolean_t pmap_is_referenced __P((vm_offset_t));
597 boolean_t pmap_is_modified __P((vm_offset_t));
598 void pmap_clear_modify __P((vm_offset_t));
599 vm_offset_t pmap_extract __P((pmap_t, vm_offset_t));
600 u_int pmap_free_pages __P((void));
601 #endif /* INCLUDED_IN_PMAP_H */
602 int pmap_page_index __P((vm_offset_t));
603 void pmap_pinit __P((pmap_t));
604 void pmap_release __P((pmap_t));
605
606 /********************************** CODE ********************************
607 * Functions that are called from other parts of the kernel are labeled *
608 * as 'INTERFACE' functions. Functions that are only called from *
609 * within the pmap module are labeled as 'INTERNAL' functions. *
610 * Functions that are internal, but are not (currently) used at all are *
611 * labeled 'INTERNAL_X'. *
612 ************************************************************************/
613
614 /* pmap_bootstrap INTERNAL
615 **
616 * Initializes the pmap system. Called at boot time from
617 * locore2.c:_vm_init()
618 *
619 * Reminder: having a pmap_bootstrap_alloc() and also having the VM
620 * system implement pmap_steal_memory() is redundant.
621 * Don't release this code without removing one or the other!
622 */
623 void
624 pmap_bootstrap(nextva)
625 vm_offset_t nextva;
626 {
627 struct physmemory *membank;
628 struct pmap_physmem_struct *pmap_membank;
629 vm_offset_t va, pa, eva;
630 int b, c, i, j; /* running table counts */
631 int size, resvmem;
632
633 /*
634 * This function is called by __bootstrap after it has
635 * determined the type of machine and made the appropriate
636 * patches to the ROM vectors (XXX- I don't quite know what I meant
637 * by that.) It allocates and sets up enough of the pmap system
638 * to manage the kernel's address space.
639 */
640
641 /*
642 * Determine the range of kernel virtual and physical
643 * space available. Note that we ABSOLUTELY DEPEND on
644 * the fact that the first bank of memory (4MB) is
645 * mapped linearly to KERNBASE (which we guaranteed in
646 * the first instructions of locore.s).
647 * That is plenty for our bootstrap work.
648 */
649 virtual_avail = m68k_round_page(nextva);
650 virtual_contig_end = KERNBASE + 0x400000; /* +4MB */
651 virtual_end = VM_MAX_KERNEL_ADDRESS;
652 /* Don't need avail_start til later. */
653
654 /* We may now call pmap_bootstrap_alloc(). */
655 bootstrap_alloc_enabled = TRUE;
656
657 /*
658 * This is a somewhat unwrapped loop to deal with
659 * copying the PROM's 'phsymem' banks into the pmap's
660 * banks. The following is always assumed:
661 * 1. There is always at least one bank of memory.
662 * 2. There is always a last bank of memory, and its
663 * pmem_next member must be set to NULL.
664 */
665 membank = romVectorPtr->v_physmemory;
666 pmap_membank = avail_mem;
667 total_phys_mem = 0;
668
669 for (;;) { /* break on !membank */
670 pmap_membank->pmem_start = membank->address;
671 pmap_membank->pmem_end = membank->address + membank->size;
672 total_phys_mem += membank->size;
673 membank = membank->next;
674 if (!membank)
675 break;
676 /* This silly syntax arises because pmap_membank
677 * is really a pre-allocated array, but it is put into
678 * use as a linked list.
679 */
680 pmap_membank->pmem_next = pmap_membank + 1;
681 pmap_membank = pmap_membank->pmem_next;
682 }
683 /* This is the last element. */
684 pmap_membank->pmem_next = NULL;
685
686 /*
687 * Note: total_phys_mem, physmem represent
688 * actual physical memory, including that
689 * reserved for the PROM monitor.
690 */
691 physmem = btoc(total_phys_mem);
692
693 /*
694 * The last bank of memory should be reduced to prevent the
695 * physical pages needed by the PROM monitor from being used
696 * in the VM system.
697 */
698 resvmem = total_phys_mem - *(romVectorPtr->memoryAvail);
699 resvmem = m68k_round_page(resvmem);
700 pmap_membank->pmem_end -= resvmem;
701
702 /*
703 * Avail_end is set to the first byte of physical memory
704 * after the end of the last bank. We use this only to
705 * determine if a physical address is "managed" memory.
706 */
707 avail_end = pmap_membank->pmem_end;
708
709 /*
710 * First allocate enough kernel MMU tables to map all
711 * of kernel virtual space from KERNBASE to 0xFFFFFFFF.
712 * Note: All must be aligned on 256 byte boundaries.
713 * Start with the level-A table (one of those).
714 */
715 size = sizeof(mmu_long_dte_t) * MMU_A_TBL_SIZE;
716 kernAbase = pmap_bootstrap_alloc(size);
717 bzero(kernAbase, size);
718
719 /* Now the level-B kernel tables... */
720 size = sizeof(mmu_short_dte_t) * MMU_B_TBL_SIZE * KERN_B_TABLES;
721 kernBbase = pmap_bootstrap_alloc(size);
722 bzero(kernBbase, size);
723
724 /* Now the level-C kernel tables... */
725 size = sizeof(mmu_short_pte_t) * MMU_C_TBL_SIZE * KERN_C_TABLES;
726 kernCbase = pmap_bootstrap_alloc(size);
727 bzero(kernCbase, size);
728 /*
729 * Note: In order for the PV system to work correctly, the kernel
730 * and user-level C tables must be allocated contiguously.
731 * Nothing should be allocated between here and the allocation of
732 * mmuCbase below. XXX: Should do this as one allocation, and
733 * then compute a pointer for mmuCbase instead of this...
734 *
735 * Allocate user MMU tables.
736 * These must be contiguous with the preceeding.
737 */
738
739 #ifndef FIXED_NTABLES
740 /*
741 * The number of user-level C tables that should be allocated is
742 * related to the size of physical memory. In general, there should
743 * be enough tables to map four times the amount of available RAM.
744 * The extra amount is needed because some table space is wasted by
745 * fragmentation.
746 */
747 NUM_C_TABLES = (total_phys_mem * 4) / (MMU_C_TBL_SIZE * MMU_PAGE_SIZE);
748 NUM_B_TABLES = NUM_C_TABLES / 2;
749 NUM_A_TABLES = NUM_B_TABLES / 2;
750 #endif /* !FIXED_NTABLES */
751
752 size = sizeof(mmu_short_pte_t) * MMU_C_TBL_SIZE * NUM_C_TABLES;
753 mmuCbase = pmap_bootstrap_alloc(size);
754
755 size = sizeof(mmu_short_dte_t) * MMU_B_TBL_SIZE * NUM_B_TABLES;
756 mmuBbase = pmap_bootstrap_alloc(size);
757
758 size = sizeof(mmu_long_dte_t) * MMU_A_TBL_SIZE * NUM_A_TABLES;
759 mmuAbase = pmap_bootstrap_alloc(size);
760
761 /*
762 * Fill in the never-changing part of the kernel tables.
763 * For simplicity, the kernel's mappings will be editable as a
764 * flat array of page table entries at kernCbase. The
765 * higher level 'A' and 'B' tables must be initialized to point
766 * to this lower one.
767 */
768 b = c = 0;
769
770 /*
771 * Invalidate all mappings below KERNBASE in the A table.
772 * This area has already been zeroed out, but it is good
773 * practice to explicitly show that we are interpreting
774 * it as a list of A table descriptors.
775 */
776 for (i = 0; i < MMU_TIA(KERNBASE); i++) {
777 kernAbase[i].addr.raw = 0;
778 }
779
780 /*
781 * Set up the kernel A and B tables so that they will reference the
782 * correct spots in the contiguous table of PTEs allocated for the
783 * kernel's virtual memory space.
784 */
785 for (i = MMU_TIA(KERNBASE); i < MMU_A_TBL_SIZE; i++) {
786 kernAbase[i].attr.raw =
787 MMU_LONG_DTE_LU | MMU_LONG_DTE_SUPV | MMU_DT_SHORT;
788 kernAbase[i].addr.raw = mmu_vtop(&kernBbase[b]);
789
790 for (j=0; j < MMU_B_TBL_SIZE; j++) {
791 kernBbase[b + j].attr.raw = mmu_vtop(&kernCbase[c])
792 | MMU_DT_SHORT;
793 c += MMU_C_TBL_SIZE;
794 }
795 b += MMU_B_TBL_SIZE;
796 }
797
798 /* XXX - Doing kernel_pmap a little further down. */
799
800 pmap_alloc_usermmu(); /* Allocate user MMU tables. */
801 pmap_alloc_usertmgr(); /* Allocate user MMU table managers.*/
802 pmap_alloc_pv(); /* Allocate physical->virtual map. */
803
804 /*
805 * We are now done with pmap_bootstrap_alloc(). Round up
806 * `virtual_avail' to the nearest page, and set the flag
807 * to prevent use of pmap_bootstrap_alloc() hereafter.
808 */
809 pmap_bootstrap_aalign(NBPG);
810 bootstrap_alloc_enabled = FALSE;
811
812 /*
813 * Now that we are done with pmap_bootstrap_alloc(), we
814 * must save the virtual and physical addresses of the
815 * end of the linearly mapped range, which are stored in
816 * virtual_contig_end and avail_start, respectively.
817 * These variables will never change after this point.
818 */
819 virtual_contig_end = virtual_avail;
820 avail_start = virtual_avail - KERNBASE;
821
822 /*
823 * `avail_next' is a running pointer used by pmap_next_page() to
824 * keep track of the next available physical page to be handed
825 * to the VM system during its initialization, in which it
826 * asks for physical pages, one at a time.
827 */
828 avail_next = avail_start;
829
830 /*
831 * Now allocate some virtual addresses, but not the physical pages
832 * behind them. Note that virtual_avail is already page-aligned.
833 *
834 * tmp_vpages[] is an array of two virtual pages used for temporary
835 * kernel mappings in the pmap module to facilitate various physical
836 * address-oritented operations.
837 */
838 tmp_vpages[0] = virtual_avail;
839 virtual_avail += NBPG;
840 tmp_vpages[1] = virtual_avail;
841 virtual_avail += NBPG;
842
843 /** Initialize the PV system **/
844 pmap_init_pv();
845
846 /*
847 * Fill in the kernel_pmap structure and kernel_crp.
848 */
849 kernAphys = mmu_vtop(kernAbase);
850 kernel_pmap.pm_a_tmgr = NULL;
851 kernel_pmap.pm_a_phys = kernAphys;
852 kernel_pmap.pm_refcount = 1; /* always in use */
853
854 kernel_crp.rp_attr = MMU_LONG_DTE_LU | MMU_DT_LONG;
855 kernel_crp.rp_addr = kernAphys;
856
857 /*
858 * Now pmap_enter_kernel() may be used safely and will be
859 * the main interface used hereafter to modify the kernel's
860 * virtual address space. Note that since we are still running
861 * under the PROM's address table, none of these table modifications
862 * actually take effect until pmap_takeover_mmu() is called.
863 *
864 * Note: Our tables do NOT have the PROM linear mappings!
865 * Only the mappings created here exist in our tables, so
866 * remember to map anything we expect to use.
867 */
868 va = (vm_offset_t) KERNBASE;
869 pa = 0;
870
871 /*
872 * The first page of the kernel virtual address space is the msgbuf
873 * page. The page attributes (data, non-cached) are set here, while
874 * the address is assigned to this global pointer in cpu_startup().
875 * It is non-cached, mostly due to paranoia.
876 */
877 pmap_enter_kernel(va, pa|PMAP_NC, VM_PROT_ALL);
878 va += NBPG; pa += NBPG;
879
880 /* Next page is used as the temporary stack. */
881 pmap_enter_kernel(va, pa, VM_PROT_ALL);
882 va += NBPG; pa += NBPG;
883
884 /*
885 * Map all of the kernel's text segment as read-only and cacheable.
886 * (Cacheable is implied by default). Unfortunately, the last bytes
887 * of kernel text and the first bytes of kernel data will often be
888 * sharing the same page. Therefore, the last page of kernel text
889 * has to be mapped as read/write, to accomodate the data.
890 */
891 eva = m68k_trunc_page((vm_offset_t)etext);
892 for (; va < eva; va += NBPG, pa += NBPG)
893 pmap_enter_kernel(va, pa, VM_PROT_READ|VM_PROT_EXECUTE);
894
895 /*
896 * Map all of the kernel's data as read/write and cacheable.
897 * This includes: data, BSS, symbols, and everything in the
898 * contiguous memory used by pmap_bootstrap_alloc()
899 */
900 for (; pa < avail_start; va += NBPG, pa += NBPG)
901 pmap_enter_kernel(va, pa, VM_PROT_READ|VM_PROT_WRITE);
902
903 /*
904 * At this point we are almost ready to take over the MMU. But first
905 * we must save the PROM's address space in our map, as we call its
906 * routines and make references to its data later in the kernel.
907 */
908 pmap_bootstrap_copyprom();
909 pmap_takeover_mmu();
910 pmap_bootstrap_setprom();
911
912 /* Notify the VM system of our page size. */
913 PAGE_SIZE = NBPG;
914 vm_set_page_size();
915
916 pmap_page_upload();
917 }
918
919
920 /* pmap_alloc_usermmu INTERNAL
921 **
922 * Called from pmap_bootstrap() to allocate MMU tables that will
923 * eventually be used for user mappings.
924 */
925 void
926 pmap_alloc_usermmu()
927 {
928 /* XXX: Moved into caller. */
929 }
930
931 /* pmap_alloc_pv INTERNAL
932 **
933 * Called from pmap_bootstrap() to allocate the physical
934 * to virtual mapping list. Each physical page of memory
935 * in the system has a corresponding element in this list.
936 */
937 void
938 pmap_alloc_pv()
939 {
940 int i;
941 unsigned int total_mem;
942
943 /*
944 * Allocate a pv_head structure for every page of physical
945 * memory that will be managed by the system. Since memory on
946 * the 3/80 is non-contiguous, we cannot arrive at a total page
947 * count by subtraction of the lowest available address from the
948 * highest, but rather we have to step through each memory
949 * bank and add the number of pages in each to the total.
950 *
951 * At this time we also initialize the offset of each bank's
952 * starting pv_head within the pv_head list so that the physical
953 * memory state routines (pmap_is_referenced(),
954 * pmap_is_modified(), et al.) can quickly find coresponding
955 * pv_heads in spite of the non-contiguity.
956 */
957 total_mem = 0;
958 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
959 avail_mem[i].pmem_pvbase = m68k_btop(total_mem);
960 total_mem += avail_mem[i].pmem_end -
961 avail_mem[i].pmem_start;
962 if (avail_mem[i].pmem_next == NULL)
963 break;
964 }
965 pvbase = (pv_t *) pmap_bootstrap_alloc(sizeof(pv_t) *
966 m68k_btop(total_phys_mem));
967 }
968
969 /* pmap_alloc_usertmgr INTERNAL
970 **
971 * Called from pmap_bootstrap() to allocate the structures which
972 * facilitate management of user MMU tables. Each user MMU table
973 * in the system has one such structure associated with it.
974 */
975 void
976 pmap_alloc_usertmgr()
977 {
978 /* Allocate user MMU table managers */
979 /* It would be a lot simpler to just make these BSS, but */
980 /* we may want to change their size at boot time... -j */
981 Atmgrbase = (a_tmgr_t *) pmap_bootstrap_alloc(sizeof(a_tmgr_t)
982 * NUM_A_TABLES);
983 Btmgrbase = (b_tmgr_t *) pmap_bootstrap_alloc(sizeof(b_tmgr_t)
984 * NUM_B_TABLES);
985 Ctmgrbase = (c_tmgr_t *) pmap_bootstrap_alloc(sizeof(c_tmgr_t)
986 * NUM_C_TABLES);
987
988 /*
989 * Allocate PV list elements for the physical to virtual
990 * mapping system.
991 */
992 pvebase = (pv_elem_t *) pmap_bootstrap_alloc(
993 sizeof(pv_elem_t) * (NUM_USER_PTES + NUM_KERN_PTES));
994 }
995
996 /* pmap_bootstrap_copyprom() INTERNAL
997 **
998 * Copy the PROM mappings into our own tables. Note, we
999 * can use physical addresses until __bootstrap returns.
1000 */
1001 void
1002 pmap_bootstrap_copyprom()
1003 {
1004 struct sunromvec *romp;
1005 int *mon_ctbl;
1006 mmu_short_pte_t *kpte;
1007 int i, len;
1008
1009 romp = romVectorPtr;
1010
1011 /*
1012 * Copy the mappings in SUN3X_MON_KDB_BASE...SUN3X_MONEND
1013 * Note: mon_ctbl[0] maps SUN3X_MON_KDB_BASE
1014 */
1015 mon_ctbl = *romp->monptaddr;
1016 i = m68k_btop(SUN3X_MON_KDB_BASE - KERNBASE);
1017 kpte = &kernCbase[i];
1018 len = m68k_btop(SUN3X_MONEND - SUN3X_MON_KDB_BASE);
1019
1020 for (i = 0; i < len; i++) {
1021 kpte[i].attr.raw = mon_ctbl[i];
1022 }
1023
1024 /*
1025 * Copy the mappings at MON_DVMA_BASE (to the end).
1026 * Note, in here, mon_ctbl[0] maps MON_DVMA_BASE.
1027 * Actually, we only want the last page, which the
1028 * PROM has set up for use by the "ie" driver.
1029 * (The i82686 needs its SCP there.)
1030 * If we copy all the mappings, pmap_enter_kernel
1031 * may complain about finding valid PTEs that are
1032 * not recorded in our PV lists...
1033 */
1034 mon_ctbl = *romp->shadowpteaddr;
1035 i = m68k_btop(SUN3X_MON_DVMA_BASE - KERNBASE);
1036 kpte = &kernCbase[i];
1037 len = m68k_btop(SUN3X_MON_DVMA_SIZE);
1038 for (i = (len-1); i < len; i++) {
1039 kpte[i].attr.raw = mon_ctbl[i];
1040 }
1041 }
1042
1043 /* pmap_takeover_mmu INTERNAL
1044 **
1045 * Called from pmap_bootstrap() after it has copied enough of the
1046 * PROM mappings into the kernel map so that we can use our own
1047 * MMU table.
1048 */
1049 void
1050 pmap_takeover_mmu()
1051 {
1052
1053 loadcrp(&kernel_crp);
1054 }
1055
1056 /* pmap_bootstrap_setprom() INTERNAL
1057 **
1058 * Set the PROM mappings so it can see kernel space.
1059 * Note that physical addresses are used here, which
1060 * we can get away with because this runs with the
1061 * low 1GB set for transparent translation.
1062 */
1063 void
1064 pmap_bootstrap_setprom()
1065 {
1066 mmu_long_dte_t *mon_dte;
1067 extern struct mmu_rootptr mon_crp;
1068 int i;
1069
1070 mon_dte = (mmu_long_dte_t *) mon_crp.rp_addr;
1071 for (i = MMU_TIA(KERNBASE); i < MMU_TIA(KERN_END); i++) {
1072 mon_dte[i].attr.raw = kernAbase[i].attr.raw;
1073 mon_dte[i].addr.raw = kernAbase[i].addr.raw;
1074 }
1075 }
1076
1077
1078 /* pmap_init INTERFACE
1079 **
1080 * Called at the end of vm_init() to set up the pmap system to go
1081 * into full time operation. All initialization of kernel_pmap
1082 * should be already done by now, so this should just do things
1083 * needed for user-level pmaps to work.
1084 */
1085 void
1086 pmap_init()
1087 {
1088 /** Initialize the manager pools **/
1089 TAILQ_INIT(&a_pool);
1090 TAILQ_INIT(&b_pool);
1091 TAILQ_INIT(&c_pool);
1092
1093 /**************************************************************
1094 * Initialize all tmgr structures and MMU tables they manage. *
1095 **************************************************************/
1096 /** Initialize A tables **/
1097 pmap_init_a_tables();
1098 /** Initialize B tables **/
1099 pmap_init_b_tables();
1100 /** Initialize C tables **/
1101 pmap_init_c_tables();
1102 }
1103
1104 /* pmap_init_a_tables() INTERNAL
1105 **
1106 * Initializes all A managers, their MMU A tables, and inserts
1107 * them into the A manager pool for use by the system.
1108 */
1109 void
1110 pmap_init_a_tables()
1111 {
1112 int i;
1113 a_tmgr_t *a_tbl;
1114
1115 for (i=0; i < NUM_A_TABLES; i++) {
1116 /* Select the next available A manager from the pool */
1117 a_tbl = &Atmgrbase[i];
1118
1119 /*
1120 * Clear its parent entry. Set its wired and valid
1121 * entry count to zero.
1122 */
1123 a_tbl->at_parent = NULL;
1124 a_tbl->at_wcnt = a_tbl->at_ecnt = 0;
1125
1126 /* Assign it the next available MMU A table from the pool */
1127 a_tbl->at_dtbl = &mmuAbase[i * MMU_A_TBL_SIZE];
1128
1129 /*
1130 * Initialize the MMU A table with the table in the `proc0',
1131 * or kernel, mapping. This ensures that every process has
1132 * the kernel mapped in the top part of its address space.
1133 */
1134 bcopy(kernAbase, a_tbl->at_dtbl, MMU_A_TBL_SIZE *
1135 sizeof(mmu_long_dte_t));
1136
1137 /*
1138 * Finally, insert the manager into the A pool,
1139 * making it ready to be used by the system.
1140 */
1141 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
1142 }
1143 }
1144
1145 /* pmap_init_b_tables() INTERNAL
1146 **
1147 * Initializes all B table managers, their MMU B tables, and
1148 * inserts them into the B manager pool for use by the system.
1149 */
1150 void
1151 pmap_init_b_tables()
1152 {
1153 int i,j;
1154 b_tmgr_t *b_tbl;
1155
1156 for (i=0; i < NUM_B_TABLES; i++) {
1157 /* Select the next available B manager from the pool */
1158 b_tbl = &Btmgrbase[i];
1159
1160 b_tbl->bt_parent = NULL; /* clear its parent, */
1161 b_tbl->bt_pidx = 0; /* parent index, */
1162 b_tbl->bt_wcnt = 0; /* wired entry count, */
1163 b_tbl->bt_ecnt = 0; /* valid entry count. */
1164
1165 /* Assign it the next available MMU B table from the pool */
1166 b_tbl->bt_dtbl = &mmuBbase[i * MMU_B_TBL_SIZE];
1167
1168 /* Invalidate every descriptor in the table */
1169 for (j=0; j < MMU_B_TBL_SIZE; j++)
1170 b_tbl->bt_dtbl[j].attr.raw = MMU_DT_INVALID;
1171
1172 /* Insert the manager into the B pool */
1173 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
1174 }
1175 }
1176
1177 /* pmap_init_c_tables() INTERNAL
1178 **
1179 * Initializes all C table managers, their MMU C tables, and
1180 * inserts them into the C manager pool for use by the system.
1181 */
1182 void
1183 pmap_init_c_tables()
1184 {
1185 int i,j;
1186 c_tmgr_t *c_tbl;
1187
1188 for (i=0; i < NUM_C_TABLES; i++) {
1189 /* Select the next available C manager from the pool */
1190 c_tbl = &Ctmgrbase[i];
1191
1192 c_tbl->ct_parent = NULL; /* clear its parent, */
1193 c_tbl->ct_pidx = 0; /* parent index, */
1194 c_tbl->ct_wcnt = 0; /* wired entry count, */
1195 c_tbl->ct_ecnt = 0; /* valid entry count, */
1196 c_tbl->ct_pmap = NULL; /* parent pmap, */
1197 c_tbl->ct_va = 0; /* base of managed range */
1198
1199 /* Assign it the next available MMU C table from the pool */
1200 c_tbl->ct_dtbl = &mmuCbase[i * MMU_C_TBL_SIZE];
1201
1202 for (j=0; j < MMU_C_TBL_SIZE; j++)
1203 c_tbl->ct_dtbl[j].attr.raw = MMU_DT_INVALID;
1204
1205 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
1206 }
1207 }
1208
1209 /* pmap_init_pv() INTERNAL
1210 **
1211 * Initializes the Physical to Virtual mapping system.
1212 */
1213 void
1214 pmap_init_pv()
1215 {
1216 int i;
1217
1218 /* Initialize every PV head. */
1219 for (i = 0; i < m68k_btop(total_phys_mem); i++) {
1220 pvbase[i].pv_idx = PVE_EOL; /* Indicate no mappings */
1221 pvbase[i].pv_flags = 0; /* Zero out page flags */
1222 }
1223
1224 pv_initialized = TRUE;
1225 }
1226
1227 /* get_a_table INTERNAL
1228 **
1229 * Retrieve and return a level A table for use in a user map.
1230 */
1231 a_tmgr_t *
1232 get_a_table()
1233 {
1234 a_tmgr_t *tbl;
1235 pmap_t pmap;
1236
1237 /* Get the top A table in the pool */
1238 tbl = a_pool.tqh_first;
1239 if (tbl == NULL) {
1240 /*
1241 * XXX - Instead of panicing here and in other get_x_table
1242 * functions, we do have the option of sleeping on the head of
1243 * the table pool. Any function which updates the table pool
1244 * would then issue a wakeup() on the head, thus waking up any
1245 * processes waiting for a table.
1246 *
1247 * Actually, the place to sleep would be when some process
1248 * asks for a "wired" mapping that would run us short of
1249 * mapping resources. This design DEPENDS on always having
1250 * some mapping resources in the pool for stealing, so we
1251 * must make sure we NEVER let the pool become empty. -gwr
1252 */
1253 panic("get_a_table: out of A tables.");
1254 }
1255
1256 TAILQ_REMOVE(&a_pool, tbl, at_link);
1257 /*
1258 * If the table has a non-null parent pointer then it is in use.
1259 * Forcibly abduct it from its parent and clear its entries.
1260 * No re-entrancy worries here. This table would not be in the
1261 * table pool unless it was available for use.
1262 *
1263 * Note that the second argument to free_a_table() is FALSE. This
1264 * indicates that the table should not be relinked into the A table
1265 * pool. That is a job for the function that called us.
1266 */
1267 if (tbl->at_parent) {
1268 pmap = tbl->at_parent;
1269 free_a_table(tbl, FALSE);
1270 pmap->pm_a_tmgr = NULL;
1271 pmap->pm_a_phys = kernAphys;
1272 }
1273 #ifdef NON_REENTRANT
1274 /*
1275 * If the table isn't to be wired down, re-insert it at the
1276 * end of the pool.
1277 */
1278 if (!wired)
1279 /*
1280 * Quandary - XXX
1281 * Would it be better to let the calling function insert this
1282 * table into the queue? By inserting it here, we are allowing
1283 * it to be stolen immediately. The calling function is
1284 * probably not expecting to use a table that it is not
1285 * assured full control of.
1286 * Answer - In the intrest of re-entrancy, it is best to let
1287 * the calling function determine when a table is available
1288 * for use. Therefore this code block is not used.
1289 */
1290 TAILQ_INSERT_TAIL(&a_pool, tbl, at_link);
1291 #endif /* NON_REENTRANT */
1292 return tbl;
1293 }
1294
1295 /* get_b_table INTERNAL
1296 **
1297 * Return a level B table for use.
1298 */
1299 b_tmgr_t *
1300 get_b_table()
1301 {
1302 b_tmgr_t *tbl;
1303
1304 /* See 'get_a_table' for comments. */
1305 tbl = b_pool.tqh_first;
1306 if (tbl == NULL)
1307 panic("get_b_table: out of B tables.");
1308 TAILQ_REMOVE(&b_pool, tbl, bt_link);
1309 if (tbl->bt_parent) {
1310 tbl->bt_parent->at_dtbl[tbl->bt_pidx].attr.raw = MMU_DT_INVALID;
1311 tbl->bt_parent->at_ecnt--;
1312 free_b_table(tbl, FALSE);
1313 }
1314 #ifdef NON_REENTRANT
1315 if (!wired)
1316 /* XXX see quandary in get_b_table */
1317 /* XXX start lock */
1318 TAILQ_INSERT_TAIL(&b_pool, tbl, bt_link);
1319 /* XXX end lock */
1320 #endif /* NON_REENTRANT */
1321 return tbl;
1322 }
1323
1324 /* get_c_table INTERNAL
1325 **
1326 * Return a level C table for use.
1327 */
1328 c_tmgr_t *
1329 get_c_table()
1330 {
1331 c_tmgr_t *tbl;
1332
1333 /* See 'get_a_table' for comments */
1334 tbl = c_pool.tqh_first;
1335 if (tbl == NULL)
1336 panic("get_c_table: out of C tables.");
1337 TAILQ_REMOVE(&c_pool, tbl, ct_link);
1338 if (tbl->ct_parent) {
1339 tbl->ct_parent->bt_dtbl[tbl->ct_pidx].attr.raw = MMU_DT_INVALID;
1340 tbl->ct_parent->bt_ecnt--;
1341 free_c_table(tbl, FALSE);
1342 }
1343 #ifdef NON_REENTRANT
1344 if (!wired)
1345 /* XXX See quandary in get_a_table */
1346 /* XXX start lock */
1347 TAILQ_INSERT_TAIL(&c_pool, tbl, c_link);
1348 /* XXX end lock */
1349 #endif /* NON_REENTRANT */
1350
1351 return tbl;
1352 }
1353
1354 /*
1355 * The following 'free_table' and 'steal_table' functions are called to
1356 * detach tables from their current obligations (parents and children) and
1357 * prepare them for reuse in another mapping.
1358 *
1359 * Free_table is used when the calling function will handle the fate
1360 * of the parent table, such as returning it to the free pool when it has
1361 * no valid entries. Functions that do not want to handle this should
1362 * call steal_table, in which the parent table's descriptors and entry
1363 * count are automatically modified when this table is removed.
1364 */
1365
1366 /* free_a_table INTERNAL
1367 **
1368 * Unmaps the given A table and all child tables from their current
1369 * mappings. Returns the number of pages that were invalidated.
1370 * If 'relink' is true, the function will return the table to the head
1371 * of the available table pool.
1372 *
1373 * Cache note: The MC68851 will automatically flush all
1374 * descriptors derived from a given A table from its
1375 * Automatic Translation Cache (ATC) if we issue a
1376 * 'PFLUSHR' instruction with the base address of the
1377 * table. This function should do, and does so.
1378 * Note note: We are using an MC68030 - there is no
1379 * PFLUSHR.
1380 */
1381 int
1382 free_a_table(a_tbl, relink)
1383 a_tmgr_t *a_tbl;
1384 boolean_t relink;
1385 {
1386 int i, removed_cnt;
1387 mmu_long_dte_t *dte;
1388 mmu_short_dte_t *dtbl;
1389 b_tmgr_t *tmgr;
1390
1391 /*
1392 * Flush the ATC cache of all cached descriptors derived
1393 * from this table.
1394 * Sun3x does not use 68851's cached table feature
1395 * flush_atc_crp(mmu_vtop(a_tbl->dte));
1396 */
1397
1398 /*
1399 * Remove any pending cache flushes that were designated
1400 * for the pmap this A table belongs to.
1401 * a_tbl->parent->atc_flushq[0] = 0;
1402 * Not implemented in sun3x.
1403 */
1404
1405 /*
1406 * All A tables in the system should retain a map for the
1407 * kernel. If the table contains any valid descriptors
1408 * (other than those for the kernel area), invalidate them all,
1409 * stopping short of the kernel's entries.
1410 */
1411 removed_cnt = 0;
1412 if (a_tbl->at_ecnt) {
1413 dte = a_tbl->at_dtbl;
1414 for (i=0; i < MMU_TIA(KERNBASE); i++) {
1415 /*
1416 * If a table entry points to a valid B table, free
1417 * it and its children.
1418 */
1419 if (MMU_VALID_DT(dte[i])) {
1420 /*
1421 * The following block does several things,
1422 * from innermost expression to the
1423 * outermost:
1424 * 1) It extracts the base (cc 1996)
1425 * address of the B table pointed
1426 * to in the A table entry dte[i].
1427 * 2) It converts this base address into
1428 * the virtual address it can be
1429 * accessed with. (all MMU tables point
1430 * to physical addresses.)
1431 * 3) It finds the corresponding manager
1432 * structure which manages this MMU table.
1433 * 4) It frees the manager structure.
1434 * (This frees the MMU table and all
1435 * child tables. See 'free_b_table' for
1436 * details.)
1437 */
1438 dtbl = mmu_ptov(dte[i].addr.raw);
1439 tmgr = mmuB2tmgr(dtbl);
1440 removed_cnt += free_b_table(tmgr, TRUE);
1441 dte[i].attr.raw = MMU_DT_INVALID;
1442 }
1443 }
1444 a_tbl->at_ecnt = 0;
1445 }
1446 if (relink) {
1447 a_tbl->at_parent = NULL;
1448 TAILQ_REMOVE(&a_pool, a_tbl, at_link);
1449 TAILQ_INSERT_HEAD(&a_pool, a_tbl, at_link);
1450 }
1451 return removed_cnt;
1452 }
1453
1454 /* free_b_table INTERNAL
1455 **
1456 * Unmaps the given B table and all its children from their current
1457 * mappings. Returns the number of pages that were invalidated.
1458 * (For comments, see 'free_a_table()').
1459 */
1460 int
1461 free_b_table(b_tbl, relink)
1462 b_tmgr_t *b_tbl;
1463 boolean_t relink;
1464 {
1465 int i, removed_cnt;
1466 mmu_short_dte_t *dte;
1467 mmu_short_pte_t *dtbl;
1468 c_tmgr_t *tmgr;
1469
1470 removed_cnt = 0;
1471 if (b_tbl->bt_ecnt) {
1472 dte = b_tbl->bt_dtbl;
1473 for (i=0; i < MMU_B_TBL_SIZE; i++) {
1474 if (MMU_VALID_DT(dte[i])) {
1475 dtbl = mmu_ptov(MMU_DTE_PA(dte[i]));
1476 tmgr = mmuC2tmgr(dtbl);
1477 removed_cnt += free_c_table(tmgr, TRUE);
1478 dte[i].attr.raw = MMU_DT_INVALID;
1479 }
1480 }
1481 b_tbl->bt_ecnt = 0;
1482 }
1483
1484 if (relink) {
1485 b_tbl->bt_parent = NULL;
1486 TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
1487 TAILQ_INSERT_HEAD(&b_pool, b_tbl, bt_link);
1488 }
1489 return removed_cnt;
1490 }
1491
1492 /* free_c_table INTERNAL
1493 **
1494 * Unmaps the given C table from use and returns it to the pool for
1495 * re-use. Returns the number of pages that were invalidated.
1496 *
1497 * This function preserves any physical page modification information
1498 * contained in the page descriptors within the C table by calling
1499 * 'pmap_remove_pte().'
1500 */
1501 int
1502 free_c_table(c_tbl, relink)
1503 c_tmgr_t *c_tbl;
1504 boolean_t relink;
1505 {
1506 int i, removed_cnt;
1507
1508 removed_cnt = 0;
1509 if (c_tbl->ct_ecnt) {
1510 for (i=0; i < MMU_C_TBL_SIZE; i++) {
1511 if (MMU_VALID_DT(c_tbl->ct_dtbl[i])) {
1512 pmap_remove_pte(&c_tbl->ct_dtbl[i]);
1513 removed_cnt++;
1514 }
1515 }
1516 c_tbl->ct_ecnt = 0;
1517 }
1518
1519 if (relink) {
1520 c_tbl->ct_parent = NULL;
1521 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
1522 TAILQ_INSERT_HEAD(&c_pool, c_tbl, ct_link);
1523 }
1524 return removed_cnt;
1525 }
1526
1527 #if 0
1528 /* free_c_table_novalid INTERNAL
1529 **
1530 * Frees the given C table manager without checking to see whether
1531 * or not it contains any valid page descriptors as it is assumed
1532 * that it does not.
1533 */
1534 void
1535 free_c_table_novalid(c_tbl)
1536 c_tmgr_t *c_tbl;
1537 {
1538 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
1539 TAILQ_INSERT_HEAD(&c_pool, c_tbl, ct_link);
1540 c_tbl->ct_parent->bt_dtbl[c_tbl->ct_pidx].attr.raw = MMU_DT_INVALID;
1541 c_tbl->ct_parent->bt_ecnt--;
1542 /*
1543 * XXX - Should call equiv. of 'free_b_table_novalid' here if
1544 * we just removed the last entry of the parent B table.
1545 * But I want to insure that this will not endanger pmap_enter()
1546 * with sudden removal of tables it is working with.
1547 *
1548 * We should probably add another field to each table, indicating
1549 * whether or not it is 'locked', ie. in the process of being
1550 * modified.
1551 */
1552 c_tbl->ct_parent = NULL;
1553 }
1554 #endif
1555
1556 /* pmap_remove_pte INTERNAL
1557 **
1558 * Unmap the given pte and preserve any page modification
1559 * information by transfering it to the pv head of the
1560 * physical page it maps to. This function does not update
1561 * any reference counts because it is assumed that the calling
1562 * function will do so.
1563 */
1564 void
1565 pmap_remove_pte(pte)
1566 mmu_short_pte_t *pte;
1567 {
1568 u_short pv_idx, targ_idx;
1569 int s;
1570 vm_offset_t pa;
1571 pv_t *pv;
1572
1573 pa = MMU_PTE_PA(*pte);
1574 if (is_managed(pa)) {
1575 pv = pa2pv(pa);
1576 targ_idx = pteidx(pte); /* Index of PTE being removed */
1577
1578 /*
1579 * If the PTE being removed is the first (or only) PTE in
1580 * the list of PTEs currently mapped to this page, remove the
1581 * PTE by changing the index found on the PV head. Otherwise
1582 * a linear search through the list will have to be executed
1583 * in order to find the PVE which points to the PTE being
1584 * removed, so that it may be modified to point to its new
1585 * neighbor.
1586 */
1587 s = splimp();
1588 pv_idx = pv->pv_idx; /* Index of first PTE in PV list */
1589 if (pv_idx == targ_idx) {
1590 pv->pv_idx = pvebase[targ_idx].pve_next;
1591 } else {
1592 /*
1593 * Find the PV element pointing to the target
1594 * element. Note: may have pv_idx==PVE_EOL
1595 */
1596 for (;;) {
1597 if (pv_idx == PVE_EOL) {
1598 #ifdef PMAP_DEBUG
1599 printf("pmap_remove_pte: PVE_EOL\n");
1600 Debugger();
1601 #endif
1602 goto pv_not_found;
1603 }
1604 if (pvebase[pv_idx].pve_next == targ_idx)
1605 break;
1606 pv_idx = pvebase[pv_idx].pve_next;
1607 }
1608 /*
1609 * At this point, pv_idx is the index of the PV
1610 * element just before the target element in the list.
1611 * Unlink the target.
1612 */
1613 pvebase[pv_idx].pve_next = pvebase[targ_idx].pve_next;
1614 pv_not_found:
1615 }
1616 /*
1617 * Save the mod/ref bits of the pte by simply
1618 * ORing the entire pte onto the pv_flags member
1619 * of the pv structure.
1620 * There is no need to use a separate bit pattern
1621 * for usage information on the pv head than that
1622 * which is used on the MMU ptes.
1623 */
1624 pv->pv_flags |= (u_short) pte->attr.raw;
1625 splx(s);
1626 }
1627
1628 pte->attr.raw = MMU_DT_INVALID;
1629 }
1630
1631 /* pmap_stroll INTERNAL
1632 **
1633 * Retrieve the addresses of all table managers involved in the mapping of
1634 * the given virtual address. If the table walk completed sucessfully,
1635 * return TRUE. If it was only partially sucessful, return FALSE.
1636 * The table walk performed by this function is important to many other
1637 * functions in this module.
1638 *
1639 * Note: This function ought to be easier to read.
1640 */
1641 boolean_t
1642 pmap_stroll(pmap, va, a_tbl, b_tbl, c_tbl, pte, a_idx, b_idx, pte_idx)
1643 pmap_t pmap;
1644 vm_offset_t va;
1645 a_tmgr_t **a_tbl;
1646 b_tmgr_t **b_tbl;
1647 c_tmgr_t **c_tbl;
1648 mmu_short_pte_t **pte;
1649 int *a_idx, *b_idx, *pte_idx;
1650 {
1651 mmu_long_dte_t *a_dte; /* A: long descriptor table */
1652 mmu_short_dte_t *b_dte; /* B: short descriptor table */
1653
1654 if (pmap == pmap_kernel())
1655 return FALSE;
1656
1657 /* Does the given pmap have its own A table? */
1658 *a_tbl = pmap->pm_a_tmgr;
1659 if (*a_tbl == NULL)
1660 return FALSE; /* No. Return unknown. */
1661 /* Does the A table have a valid B table
1662 * under the corresponding table entry?
1663 */
1664 *a_idx = MMU_TIA(va);
1665 a_dte = &((*a_tbl)->at_dtbl[*a_idx]);
1666 if (!MMU_VALID_DT(*a_dte))
1667 return FALSE; /* No. Return unknown. */
1668 /* Yes. Extract B table from the A table. */
1669 *b_tbl = mmuB2tmgr(mmu_ptov(a_dte->addr.raw));
1670 /* Does the B table have a valid C table
1671 * under the corresponding table entry?
1672 */
1673 *b_idx = MMU_TIB(va);
1674 b_dte = &((*b_tbl)->bt_dtbl[*b_idx]);
1675 if (!MMU_VALID_DT(*b_dte))
1676 return FALSE; /* No. Return unknown. */
1677 /* Yes. Extract C table from the B table. */
1678 *c_tbl = mmuC2tmgr(mmu_ptov(MMU_DTE_PA(*b_dte)));
1679 *pte_idx = MMU_TIC(va);
1680 *pte = &((*c_tbl)->ct_dtbl[*pte_idx]);
1681
1682 return TRUE;
1683 }
1684
1685 /* pmap_enter INTERFACE
1686 **
1687 * Called by the kernel to map a virtual address
1688 * to a physical address in the given process map.
1689 *
1690 * Note: this function should apply an exclusive lock
1691 * on the pmap system for its duration. (it certainly
1692 * would save my hair!!)
1693 * This function ought to be easier to read.
1694 */
1695 void
1696 pmap_enter(pmap, va, pa, prot, wired)
1697 pmap_t pmap;
1698 vm_offset_t va;
1699 vm_offset_t pa;
1700 vm_prot_t prot;
1701 boolean_t wired;
1702 {
1703 boolean_t insert, managed; /* Marks the need for PV insertion.*/
1704 u_short nidx; /* PV list index */
1705 int s; /* Used for splimp()/splx() */
1706 int flags; /* Mapping flags. eg. Cache inhibit */
1707 u_int a_idx, b_idx, pte_idx; /* table indices */
1708 a_tmgr_t *a_tbl; /* A: long descriptor table manager */
1709 b_tmgr_t *b_tbl; /* B: short descriptor table manager */
1710 c_tmgr_t *c_tbl; /* C: short page table manager */
1711 mmu_long_dte_t *a_dte; /* A: long descriptor table */
1712 mmu_short_dte_t *b_dte; /* B: short descriptor table */
1713 mmu_short_pte_t *c_pte; /* C: short page descriptor table */
1714 pv_t *pv; /* pv list head */
1715 enum {NONE, NEWA, NEWB, NEWC} llevel; /* used at end */
1716
1717 if (pmap == NULL)
1718 return;
1719 if (pmap == pmap_kernel()) {
1720 pmap_enter_kernel(va, pa, prot);
1721 return;
1722 }
1723
1724 flags = (pa & ~MMU_PAGE_MASK);
1725 pa &= MMU_PAGE_MASK;
1726
1727 /*
1728 * Determine if the physical address being mapped is on-board RAM.
1729 * Any other area of the address space is likely to belong to a
1730 * device and hence it would be disasterous to cache its contents.
1731 */
1732 if ((managed = is_managed(pa)) == FALSE)
1733 flags |= PMAP_NC;
1734
1735 /*
1736 * For user mappings we walk along the MMU tables of the given
1737 * pmap, reaching a PTE which describes the virtual page being
1738 * mapped or changed. If any level of the walk ends in an invalid
1739 * entry, a table must be allocated and the entry must be updated
1740 * to point to it.
1741 * There is a bit of confusion as to whether this code must be
1742 * re-entrant. For now we will assume it is. To support
1743 * re-entrancy we must unlink tables from the table pool before
1744 * we assume we may use them. Tables are re-linked into the pool
1745 * when we are finished with them at the end of the function.
1746 * But I don't feel like doing that until we have proof that this
1747 * needs to be re-entrant.
1748 * 'llevel' records which tables need to be relinked.
1749 */
1750 llevel = NONE;
1751
1752 /*
1753 * Step 1 - Retrieve the A table from the pmap. If it has no
1754 * A table, allocate a new one from the available pool.
1755 */
1756
1757 a_tbl = pmap->pm_a_tmgr;
1758 if (a_tbl == NULL) {
1759 /*
1760 * This pmap does not currently have an A table. Allocate
1761 * a new one.
1762 */
1763 a_tbl = get_a_table();
1764 a_tbl->at_parent = pmap;
1765
1766 /*
1767 * Assign this new A table to the pmap, and calculate its
1768 * physical address so that loadcrp() can be used to make
1769 * the table active.
1770 */
1771 pmap->pm_a_tmgr = a_tbl;
1772 pmap->pm_a_phys = mmu_vtop(a_tbl->at_dtbl);
1773
1774 /*
1775 * If the process receiving a new A table is the current
1776 * process, we are responsible for setting the MMU so that
1777 * it becomes the current address space. This only adds
1778 * new mappings, so no need to flush anything.
1779 */
1780 if (pmap == current_pmap()) {
1781 kernel_crp.rp_addr = pmap->pm_a_phys;
1782 loadcrp(&kernel_crp);
1783 }
1784
1785 if (!wired)
1786 llevel = NEWA;
1787 } else {
1788 /*
1789 * Use the A table already allocated for this pmap.
1790 * Unlink it from the A table pool if necessary.
1791 */
1792 if (wired && !a_tbl->at_wcnt)
1793 TAILQ_REMOVE(&a_pool, a_tbl, at_link);
1794 }
1795
1796 /*
1797 * Step 2 - Walk into the B table. If there is no valid B table,
1798 * allocate one.
1799 */
1800
1801 a_idx = MMU_TIA(va); /* Calculate the TIA of the VA. */
1802 a_dte = &a_tbl->at_dtbl[a_idx]; /* Retrieve descriptor from table */
1803 if (MMU_VALID_DT(*a_dte)) { /* Is the descriptor valid? */
1804 /* The descriptor is valid. Use the B table it points to. */
1805 /*************************************
1806 * a_idx *
1807 * v *
1808 * a_tbl -> +-+-+-+-+-+-+-+-+-+-+-+- *
1809 * | | | | | | | | | | | | *
1810 * +-+-+-+-+-+-+-+-+-+-+-+- *
1811 * | *
1812 * \- b_tbl -> +-+- *
1813 * | | *
1814 * +-+- *
1815 *************************************/
1816 b_dte = mmu_ptov(a_dte->addr.raw);
1817 b_tbl = mmuB2tmgr(b_dte);
1818
1819 /*
1820 * If the requested mapping must be wired, but this table
1821 * being used to map it is not, the table must be removed
1822 * from the available pool and its wired entry count
1823 * incremented.
1824 */
1825 if (wired && !b_tbl->bt_wcnt) {
1826 TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
1827 a_tbl->at_wcnt++;
1828 }
1829 } else {
1830 /* The descriptor is invalid. Allocate a new B table. */
1831 b_tbl = get_b_table();
1832
1833 /* Point the parent A table descriptor to this new B table. */
1834 a_dte->addr.raw = mmu_vtop(b_tbl->bt_dtbl);
1835 a_dte->attr.raw = MMU_LONG_DTE_LU | MMU_DT_SHORT;
1836 a_tbl->at_ecnt++; /* Update parent's valid entry count */
1837
1838 /* Create the necessary back references to the parent table */
1839 b_tbl->bt_parent = a_tbl;
1840 b_tbl->bt_pidx = a_idx;
1841
1842 /*
1843 * If this table is to be wired, make sure the parent A table
1844 * wired count is updated to reflect that it has another wired
1845 * entry.
1846 */
1847 if (wired)
1848 a_tbl->at_wcnt++;
1849 else if (llevel == NONE)
1850 llevel = NEWB;
1851 }
1852
1853 /*
1854 * Step 3 - Walk into the C table, if there is no valid C table,
1855 * allocate one.
1856 */
1857
1858 b_idx = MMU_TIB(va); /* Calculate the TIB of the VA */
1859 b_dte = &b_tbl->bt_dtbl[b_idx]; /* Retrieve descriptor from table */
1860 if (MMU_VALID_DT(*b_dte)) { /* Is the descriptor valid? */
1861 /* The descriptor is valid. Use the C table it points to. */
1862 /**************************************
1863 * c_idx *
1864 * | v *
1865 * \- b_tbl -> +-+-+-+-+-+-+-+-+-+-+- *
1866 * | | | | | | | | | | | *
1867 * +-+-+-+-+-+-+-+-+-+-+- *
1868 * | *
1869 * \- c_tbl -> +-+-- *
1870 * | | | *
1871 * +-+-- *
1872 **************************************/
1873 c_pte = mmu_ptov(MMU_PTE_PA(*b_dte));
1874 c_tbl = mmuC2tmgr(c_pte);
1875
1876 /* If mapping is wired and table is not */
1877 if (wired && !c_tbl->ct_wcnt) {
1878 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
1879 b_tbl->bt_wcnt++;
1880 }
1881 } else {
1882 /* The descriptor is invalid. Allocate a new C table. */
1883 c_tbl = get_c_table();
1884
1885 /* Point the parent B table descriptor to this new C table. */
1886 b_dte->attr.raw = mmu_vtop(c_tbl->ct_dtbl);
1887 b_dte->attr.raw |= MMU_DT_SHORT;
1888 b_tbl->bt_ecnt++; /* Update parent's valid entry count */
1889
1890 /* Create the necessary back references to the parent table */
1891 c_tbl->ct_parent = b_tbl;
1892 c_tbl->ct_pidx = b_idx;
1893 /*
1894 * Store the pmap and base virtual managed address for faster
1895 * retrieval in the PV functions.
1896 */
1897 c_tbl->ct_pmap = pmap;
1898 c_tbl->ct_va = (va & (MMU_TIA_MASK|MMU_TIB_MASK));
1899
1900 /*
1901 * If this table is to be wired, make sure the parent B table
1902 * wired count is updated to reflect that it has another wired
1903 * entry.
1904 */
1905 if (wired)
1906 b_tbl->bt_wcnt++;
1907 else if (llevel == NONE)
1908 llevel = NEWC;
1909 }
1910
1911 /*
1912 * Step 4 - Deposit a page descriptor (PTE) into the appropriate
1913 * slot of the C table, describing the PA to which the VA is mapped.
1914 */
1915
1916 pte_idx = MMU_TIC(va);
1917 c_pte = &c_tbl->ct_dtbl[pte_idx];
1918 if (MMU_VALID_DT(*c_pte)) { /* Is the entry currently valid? */
1919 /*
1920 * The PTE is currently valid. This particular call
1921 * is just a synonym for one (or more) of the following
1922 * operations:
1923 * change protection of a page
1924 * change wiring status of a page
1925 * remove the mapping of a page
1926 *
1927 * XXX - Semi critical: This code should unwire the PTE
1928 * and, possibly, associated parent tables if this is a
1929 * change wiring operation. Currently it does not.
1930 *
1931 * This may be ok if pmap_change_wiring() is the only
1932 * interface used to UNWIRE a page.
1933 */
1934
1935 /* First check if this is a wiring operation. */
1936 if (wired && (c_pte->attr.raw & MMU_SHORT_PTE_WIRED)) {
1937 /*
1938 * The PTE is already wired. To prevent it from being
1939 * counted as a new wiring operation, reset the 'wired'
1940 * variable.
1941 */
1942 wired = FALSE;
1943 }
1944
1945 /* Is the new address the same as the old? */
1946 if (MMU_PTE_PA(*c_pte) == pa) {
1947 /*
1948 * Yes, mark that it does not need to be reinserted
1949 * into the PV list.
1950 */
1951 insert = FALSE;
1952
1953 /*
1954 * Clear all but the modified, referenced and wired
1955 * bits on the PTE.
1956 */
1957 c_pte->attr.raw &= (MMU_SHORT_PTE_M
1958 | MMU_SHORT_PTE_USED | MMU_SHORT_PTE_WIRED);
1959 } else {
1960 /* No, remove the old entry */
1961 pmap_remove_pte(c_pte);
1962 insert = TRUE;
1963 }
1964
1965 /*
1966 * TLB flush is only necessary if modifying current map.
1967 * However, in pmap_enter(), the pmap almost always IS
1968 * the current pmap, so don't even bother to check.
1969 */
1970 TBIS(va);
1971 } else {
1972 /*
1973 * The PTE is invalid. Increment the valid entry count in
1974 * the C table manager to reflect the addition of a new entry.
1975 */
1976 c_tbl->ct_ecnt++;
1977
1978 /* XXX - temporarily make sure the PTE is cleared. */
1979 c_pte->attr.raw = 0;
1980
1981 /* It will also need to be inserted into the PV list. */
1982 insert = TRUE;
1983 }
1984
1985 /*
1986 * If page is changing from unwired to wired status, set an unused bit
1987 * within the PTE to indicate that it is wired. Also increment the
1988 * wired entry count in the C table manager.
1989 */
1990 if (wired) {
1991 c_pte->attr.raw |= MMU_SHORT_PTE_WIRED;
1992 c_tbl->ct_wcnt++;
1993 }
1994
1995 /*
1996 * Map the page, being careful to preserve modify/reference/wired
1997 * bits. At this point it is assumed that the PTE either has no bits
1998 * set, or if there are set bits, they are only modified, reference or
1999 * wired bits. If not, the following statement will cause erratic
2000 * behavior.
2001 */
2002 #ifdef PMAP_DEBUG
2003 if (c_pte->attr.raw & ~(MMU_SHORT_PTE_M |
2004 MMU_SHORT_PTE_USED | MMU_SHORT_PTE_WIRED)) {
2005 printf("pmap_enter: junk left in PTE at %p\n", c_pte);
2006 Debugger();
2007 }
2008 #endif
2009 c_pte->attr.raw |= ((u_long) pa | MMU_DT_PAGE);
2010
2011 /*
2012 * If the mapping should be read-only, set the write protect
2013 * bit in the PTE.
2014 */
2015 if (!(prot & VM_PROT_WRITE))
2016 c_pte->attr.raw |= MMU_SHORT_PTE_WP;
2017
2018 /*
2019 * If the mapping should be cache inhibited (indicated by the flag
2020 * bits found on the lower order of the physical address.)
2021 * mark the PTE as a cache inhibited page.
2022 */
2023 if (flags & PMAP_NC)
2024 c_pte->attr.raw |= MMU_SHORT_PTE_CI;
2025
2026 /*
2027 * If the physical address being mapped is managed by the PV
2028 * system then link the pte into the list of pages mapped to that
2029 * address.
2030 */
2031 if (insert && managed) {
2032 pv = pa2pv(pa);
2033 nidx = pteidx(c_pte);
2034
2035 s = splimp();
2036 pvebase[nidx].pve_next = pv->pv_idx;
2037 pv->pv_idx = nidx;
2038 splx(s);
2039 }
2040
2041 /* Move any allocated tables back into the active pool. */
2042
2043 switch (llevel) {
2044 case NEWA:
2045 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
2046 /* FALLTHROUGH */
2047 case NEWB:
2048 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
2049 /* FALLTHROUGH */
2050 case NEWC:
2051 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
2052 /* FALLTHROUGH */
2053 default:
2054 break;
2055 }
2056 }
2057
2058 /* pmap_enter_kernel INTERNAL
2059 **
2060 * Map the given virtual address to the given physical address within the
2061 * kernel address space. This function exists because the kernel map does
2062 * not do dynamic table allocation. It consists of a contiguous array of ptes
2063 * and can be edited directly without the need to walk through any tables.
2064 *
2065 * XXX: "Danger, Will Robinson!"
2066 * Note that the kernel should never take a fault on any page
2067 * between [ KERNBASE .. virtual_avail ] and this is checked in
2068 * trap.c for kernel-mode MMU faults. This means that mappings
2069 * created in that range must be implicily wired. -gwr
2070 */
2071 void
2072 pmap_enter_kernel(va, pa, prot)
2073 vm_offset_t va;
2074 vm_offset_t pa;
2075 vm_prot_t prot;
2076 {
2077 boolean_t was_valid, insert;
2078 u_short pte_idx;
2079 int s, flags;
2080 mmu_short_pte_t *pte;
2081 pv_t *pv;
2082 vm_offset_t old_pa;
2083
2084 flags = (pa & ~MMU_PAGE_MASK);
2085 pa &= MMU_PAGE_MASK;
2086
2087 if (is_managed(pa))
2088 insert = TRUE;
2089 else
2090 insert = FALSE;
2091
2092 /*
2093 * Calculate the index of the PTE being modified.
2094 */
2095 pte_idx = (u_long) m68k_btop(va - KERNBASE);
2096
2097 /* This array is traditionally named "Sysmap" */
2098 pte = &kernCbase[pte_idx];
2099
2100 s = splimp();
2101 if (MMU_VALID_DT(*pte)) {
2102 was_valid = TRUE;
2103 /*
2104 * If the PTE already maps a different
2105 * physical address, umap and pv_unlink.
2106 */
2107 old_pa = MMU_PTE_PA(*pte);
2108 if (pa != old_pa)
2109 pmap_remove_pte(pte);
2110 else {
2111 /*
2112 * Old PA and new PA are the same. No need to
2113 * relink the mapping within the PV list.
2114 */
2115 insert = FALSE;
2116
2117 /*
2118 * Save any mod/ref bits on the PTE.
2119 */
2120 pte->attr.raw &= (MMU_SHORT_PTE_USED|MMU_SHORT_PTE_M);
2121 }
2122 } else {
2123 pte->attr.raw = MMU_DT_INVALID;
2124 was_valid = FALSE;
2125 }
2126
2127 /*
2128 * Map the page. Being careful to preserve modified/referenced bits
2129 * on the PTE.
2130 */
2131 pte->attr.raw |= (pa | MMU_DT_PAGE);
2132
2133 if (!(prot & VM_PROT_WRITE)) /* If access should be read-only */
2134 pte->attr.raw |= MMU_SHORT_PTE_WP;
2135 if (flags & PMAP_NC)
2136 pte->attr.raw |= MMU_SHORT_PTE_CI;
2137 if (was_valid)
2138 TBIS(va);
2139
2140 /*
2141 * Insert the PTE into the PV system, if need be.
2142 */
2143 if (insert) {
2144 pv = pa2pv(pa);
2145 pvebase[pte_idx].pve_next = pv->pv_idx;
2146 pv->pv_idx = pte_idx;
2147 }
2148 splx(s);
2149
2150 }
2151
2152 /* pmap_map INTERNAL
2153 **
2154 * Map a contiguous range of physical memory into a contiguous range of
2155 * the kernel virtual address space.
2156 *
2157 * Used for device mappings and early mapping of the kernel text/data/bss.
2158 * Returns the first virtual address beyond the end of the range.
2159 */
2160 vm_offset_t
2161 pmap_map(va, pa, endpa, prot)
2162 vm_offset_t va;
2163 vm_offset_t pa;
2164 vm_offset_t endpa;
2165 int prot;
2166 {
2167 int sz;
2168
2169 sz = endpa - pa;
2170 do {
2171 pmap_enter_kernel(va, pa, prot);
2172 va += NBPG;
2173 pa += NBPG;
2174 sz -= NBPG;
2175 } while (sz > 0);
2176 return(va);
2177 }
2178
2179 /* pmap_protect INTERFACE
2180 **
2181 * Apply the given protection to the given virtual address range within
2182 * the given map.
2183 *
2184 * It is ok for the protection applied to be stronger than what is
2185 * specified. We use this to our advantage when the given map has no
2186 * mapping for the virtual address. By skipping a page when this
2187 * is discovered, we are effectively applying a protection of VM_PROT_NONE,
2188 * and therefore do not need to map the page just to apply a protection
2189 * code. Only pmap_enter() needs to create new mappings if they do not exist.
2190 *
2191 * XXX - This function could be speeded up by using pmap_stroll() for inital
2192 * setup, and then manual scrolling in the for() loop.
2193 */
2194 void
2195 pmap_protect(pmap, startva, endva, prot)
2196 pmap_t pmap;
2197 vm_offset_t startva, endva;
2198 vm_prot_t prot;
2199 {
2200 boolean_t iscurpmap;
2201 int a_idx, b_idx, c_idx;
2202 a_tmgr_t *a_tbl;
2203 b_tmgr_t *b_tbl;
2204 c_tmgr_t *c_tbl;
2205 mmu_short_pte_t *pte;
2206
2207 if (pmap == NULL)
2208 return;
2209 if (pmap == pmap_kernel()) {
2210 pmap_protect_kernel(startva, endva, prot);
2211 return;
2212 }
2213
2214 /*
2215 * In this particular pmap implementation, there are only three
2216 * types of memory protection: 'all' (read/write/execute),
2217 * 'read-only' (read/execute) and 'none' (no mapping.)
2218 * It is not possible for us to treat 'executable' as a separate
2219 * protection type. Therefore, protection requests that seek to
2220 * remove execute permission while retaining read or write, and those
2221 * that make little sense (write-only for example) are ignored.
2222 */
2223 switch (prot) {
2224 case VM_PROT_NONE:
2225 /*
2226 * A request to apply the protection code of
2227 * 'VM_PROT_NONE' is a synonym for pmap_remove().
2228 */
2229 pmap_remove(pmap, startva, endva);
2230 return;
2231 case VM_PROT_EXECUTE:
2232 case VM_PROT_READ:
2233 case VM_PROT_READ|VM_PROT_EXECUTE:
2234 /* continue */
2235 break;
2236 case VM_PROT_WRITE:
2237 case VM_PROT_WRITE|VM_PROT_READ:
2238 case VM_PROT_WRITE|VM_PROT_EXECUTE:
2239 case VM_PROT_ALL:
2240 /* None of these should happen in a sane system. */
2241 return;
2242 }
2243
2244 /*
2245 * If the pmap has no A table, it has no mappings and therefore
2246 * there is nothing to protect.
2247 */
2248 if ((a_tbl = pmap->pm_a_tmgr) == NULL)
2249 return;
2250
2251 a_idx = MMU_TIA(startva);
2252 b_idx = MMU_TIB(startva);
2253 c_idx = MMU_TIC(startva);
2254 b_tbl = (b_tmgr_t *) c_tbl = NULL;
2255
2256 iscurpmap = (pmap == current_pmap());
2257 while (startva < endva) {
2258 if (b_tbl || MMU_VALID_DT(a_tbl->at_dtbl[a_idx])) {
2259 if (b_tbl == NULL) {
2260 b_tbl = (b_tmgr_t *) a_tbl->at_dtbl[a_idx].addr.raw;
2261 b_tbl = mmu_ptov((vm_offset_t) b_tbl);
2262 b_tbl = mmuB2tmgr((mmu_short_dte_t *) b_tbl);
2263 }
2264 if (c_tbl || MMU_VALID_DT(b_tbl->bt_dtbl[b_idx])) {
2265 if (c_tbl == NULL) {
2266 c_tbl = (c_tmgr_t *) MMU_DTE_PA(b_tbl->bt_dtbl[b_idx]);
2267 c_tbl = mmu_ptov((vm_offset_t) c_tbl);
2268 c_tbl = mmuC2tmgr((mmu_short_pte_t *) c_tbl);
2269 }
2270 if (MMU_VALID_DT(c_tbl->ct_dtbl[c_idx])) {
2271 pte = &c_tbl->ct_dtbl[c_idx];
2272 /* make the mapping read-only */
2273 pte->attr.raw |= MMU_SHORT_PTE_WP;
2274 /*
2275 * If we just modified the current address space,
2276 * flush any translations for the modified page from
2277 * the translation cache and any data from it in the
2278 * data cache.
2279 */
2280 if (iscurpmap)
2281 TBIS(startva);
2282 }
2283 startva += NBPG;
2284
2285 if (++c_idx >= MMU_C_TBL_SIZE) { /* exceeded C table? */
2286 c_tbl = NULL;
2287 c_idx = 0;
2288 if (++b_idx >= MMU_B_TBL_SIZE) { /* exceeded B table? */
2289 b_tbl = NULL;
2290 b_idx = 0;
2291 }
2292 }
2293 } else { /* C table wasn't valid */
2294 c_tbl = NULL;
2295 c_idx = 0;
2296 startva += MMU_TIB_RANGE;
2297 if (++b_idx >= MMU_B_TBL_SIZE) { /* exceeded B table? */
2298 b_tbl = NULL;
2299 b_idx = 0;
2300 }
2301 } /* C table */
2302 } else { /* B table wasn't valid */
2303 b_tbl = NULL;
2304 b_idx = 0;
2305 startva += MMU_TIA_RANGE;
2306 a_idx++;
2307 } /* B table */
2308 }
2309 }
2310
2311 /* pmap_protect_kernel INTERNAL
2312 **
2313 * Apply the given protection code to a kernel address range.
2314 */
2315 void
2316 pmap_protect_kernel(startva, endva, prot)
2317 vm_offset_t startva, endva;
2318 vm_prot_t prot;
2319 {
2320 vm_offset_t va;
2321 mmu_short_pte_t *pte;
2322
2323 pte = &kernCbase[(unsigned long) m68k_btop(startva - KERNBASE)];
2324 for (va = startva; va < endva; va += NBPG, pte++) {
2325 if (MMU_VALID_DT(*pte)) {
2326 switch (prot) {
2327 case VM_PROT_ALL:
2328 break;
2329 case VM_PROT_EXECUTE:
2330 case VM_PROT_READ:
2331 case VM_PROT_READ|VM_PROT_EXECUTE:
2332 pte->attr.raw |= MMU_SHORT_PTE_WP;
2333 break;
2334 case VM_PROT_NONE:
2335 /* this is an alias for 'pmap_remove_kernel' */
2336 pmap_remove_pte(pte);
2337 break;
2338 default:
2339 break;
2340 }
2341 /*
2342 * since this is the kernel, immediately flush any cached
2343 * descriptors for this address.
2344 */
2345 TBIS(va);
2346 }
2347 }
2348 }
2349
2350 /* pmap_change_wiring INTERFACE
2351 **
2352 * Changes the wiring of the specified page.
2353 *
2354 * This function is called from vm_fault.c to unwire
2355 * a mapping. It really should be called 'pmap_unwire'
2356 * because it is never asked to do anything but remove
2357 * wirings.
2358 */
2359 void
2360 pmap_change_wiring(pmap, va, wire)
2361 pmap_t pmap;
2362 vm_offset_t va;
2363 boolean_t wire;
2364 {
2365 int a_idx, b_idx, c_idx;
2366 a_tmgr_t *a_tbl;
2367 b_tmgr_t *b_tbl;
2368 c_tmgr_t *c_tbl;
2369 mmu_short_pte_t *pte;
2370
2371 /* Kernel mappings always remain wired. */
2372 if (pmap == pmap_kernel())
2373 return;
2374
2375 #ifdef PMAP_DEBUG
2376 if (wire == TRUE)
2377 panic("pmap_change_wiring: wire requested.");
2378 #endif
2379
2380 /*
2381 * Walk through the tables. If the walk terminates without
2382 * a valid PTE then the address wasn't wired in the first place.
2383 * Return immediately.
2384 */
2385 if (pmap_stroll(pmap, va, &a_tbl, &b_tbl, &c_tbl, &pte, &a_idx,
2386 &b_idx, &c_idx) == FALSE)
2387 return;
2388
2389
2390 /* Is the PTE wired? If not, return. */
2391 if (!(pte->attr.raw & MMU_SHORT_PTE_WIRED))
2392 return;
2393
2394 /* Remove the wiring bit. */
2395 pte->attr.raw &= ~(MMU_SHORT_PTE_WIRED);
2396
2397 /*
2398 * Decrement the wired entry count in the C table.
2399 * If it reaches zero the following things happen:
2400 * 1. The table no longer has any wired entries and is considered
2401 * unwired.
2402 * 2. It is placed on the available queue.
2403 * 3. The parent table's wired entry count is decremented.
2404 * 4. If it reaches zero, this process repeats at step 1 and
2405 * stops at after reaching the A table.
2406 */
2407 if (--c_tbl->ct_wcnt == 0) {
2408 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
2409 if (--b_tbl->bt_wcnt == 0) {
2410 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
2411 if (--a_tbl->at_wcnt == 0) {
2412 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
2413 }
2414 }
2415 }
2416 }
2417
2418 /* pmap_pageable INTERFACE
2419 **
2420 * Make the specified range of addresses within the given pmap,
2421 * 'pageable' or 'not-pageable'. A pageable page must not cause
2422 * any faults when referenced. A non-pageable page may.
2423 *
2424 * This routine is only advisory. The VM system will call pmap_enter()
2425 * to wire or unwire pages that are going to be made pageable before calling
2426 * this function. By the time this routine is called, everything that needs
2427 * to be done has already been done.
2428 */
2429 void
2430 pmap_pageable(pmap, start, end, pageable)
2431 pmap_t pmap;
2432 vm_offset_t start, end;
2433 boolean_t pageable;
2434 {
2435 /* not implemented. */
2436 }
2437
2438 /* pmap_copy INTERFACE
2439 **
2440 * Copy the mappings of a range of addresses in one pmap, into
2441 * the destination address of another.
2442 *
2443 * This routine is advisory. Should we one day decide that MMU tables
2444 * may be shared by more than one pmap, this function should be used to
2445 * link them together. Until that day however, we do nothing.
2446 */
2447 void
2448 pmap_copy(pmap_a, pmap_b, dst, len, src)
2449 pmap_t pmap_a, pmap_b;
2450 vm_offset_t dst;
2451 vm_size_t len;
2452 vm_offset_t src;
2453 {
2454 /* not implemented. */
2455 }
2456
2457 /* pmap_copy_page INTERFACE
2458 **
2459 * Copy the contents of one physical page into another.
2460 *
2461 * This function makes use of two virtual pages allocated in pmap_bootstrap()
2462 * to map the two specified physical pages into the kernel address space.
2463 *
2464 * Note: We could use the transparent translation registers to make the
2465 * mappings. If we do so, be sure to disable interrupts before using them.
2466 */
2467 void
2468 pmap_copy_page(srcpa, dstpa)
2469 vm_offset_t srcpa, dstpa;
2470 {
2471 vm_offset_t srcva, dstva;
2472 int s;
2473
2474 srcva = tmp_vpages[0];
2475 dstva = tmp_vpages[1];
2476
2477 s = splimp();
2478 if (tmp_vpages_inuse++)
2479 panic("pmap_copy_page: temporary vpages are in use.");
2480
2481 /* Map pages as non-cacheable to avoid cache polution? */
2482 pmap_enter_kernel(srcva, srcpa, VM_PROT_READ);
2483 pmap_enter_kernel(dstva, dstpa, VM_PROT_READ|VM_PROT_WRITE);
2484
2485 /* Hand-optimized version of bcopy(src, dst, NBPG) */
2486 copypage((char *) srcva, (char *) dstva);
2487
2488 pmap_remove_kernel(srcva, srcva + NBPG);
2489 pmap_remove_kernel(dstva, dstva + NBPG);
2490
2491 --tmp_vpages_inuse;
2492 splx(s);
2493 }
2494
2495 /* pmap_zero_page INTERFACE
2496 **
2497 * Zero the contents of the specified physical page.
2498 *
2499 * Uses one of the virtual pages allocated in pmap_boostrap()
2500 * to map the specified page into the kernel address space.
2501 */
2502 void
2503 pmap_zero_page(dstpa)
2504 vm_offset_t dstpa;
2505 {
2506 vm_offset_t dstva;
2507 int s;
2508
2509 dstva = tmp_vpages[1];
2510 s = splimp();
2511 if (tmp_vpages_inuse++)
2512 panic("pmap_zero_page: temporary vpages are in use.");
2513
2514 /* The comments in pmap_copy_page() above apply here also. */
2515 pmap_enter_kernel(dstva, dstpa, VM_PROT_READ|VM_PROT_WRITE);
2516
2517 /* Hand-optimized version of bzero(ptr, NBPG) */
2518 zeropage((char *) dstva);
2519
2520 pmap_remove_kernel(dstva, dstva + NBPG);
2521
2522 --tmp_vpages_inuse;
2523 splx(s);
2524 }
2525
2526 /* pmap_collect INTERFACE
2527 **
2528 * Called from the VM system when we are about to swap out
2529 * the process using this pmap. This should give up any
2530 * resources held here, including all its MMU tables.
2531 */
2532 void
2533 pmap_collect(pmap)
2534 pmap_t pmap;
2535 {
2536 /* XXX - todo... */
2537 }
2538
2539 /* pmap_create INTERFACE
2540 **
2541 * Create and return a pmap structure.
2542 */
2543 pmap_t
2544 pmap_create(size)
2545 vm_size_t size;
2546 {
2547 pmap_t pmap;
2548
2549 if (size)
2550 return NULL;
2551
2552 pmap = (pmap_t) malloc(sizeof(struct pmap), M_VMPMAP, M_WAITOK);
2553 pmap_pinit(pmap);
2554
2555 return pmap;
2556 }
2557
2558 /* pmap_pinit INTERNAL
2559 **
2560 * Initialize a pmap structure.
2561 */
2562 void
2563 pmap_pinit(pmap)
2564 pmap_t pmap;
2565 {
2566 bzero(pmap, sizeof(struct pmap));
2567 pmap->pm_a_tmgr = NULL;
2568 pmap->pm_a_phys = kernAphys;
2569 }
2570
2571 /* pmap_release INTERFACE
2572 **
2573 * Release any resources held by the given pmap.
2574 *
2575 * This is the reverse analog to pmap_pinit. It does not
2576 * necessarily mean for the pmap structure to be deallocated,
2577 * as in pmap_destroy.
2578 */
2579 void
2580 pmap_release(pmap)
2581 pmap_t pmap;
2582 {
2583 /*
2584 * As long as the pmap contains no mappings,
2585 * which always should be the case whenever
2586 * this function is called, there really should
2587 * be nothing to do.
2588 */
2589 #ifdef PMAP_DEBUG
2590 if (pmap == NULL)
2591 return;
2592 if (pmap == pmap_kernel())
2593 panic("pmap_release: kernel pmap");
2594 #endif
2595 /*
2596 * XXX - If this pmap has an A table, give it back.
2597 * The pmap SHOULD be empty by now, and pmap_remove
2598 * should have already given back the A table...
2599 * However, I see: pmap->pm_a_tmgr->at_ecnt == 1
2600 * at this point, which means some mapping was not
2601 * removed when it should have been. -gwr
2602 */
2603 if (pmap->pm_a_tmgr != NULL) {
2604 /* First make sure we are not using it! */
2605 if (kernel_crp.rp_addr == pmap->pm_a_phys) {
2606 kernel_crp.rp_addr = kernAphys;
2607 loadcrp(&kernel_crp);
2608 }
2609 #ifdef PMAP_DEBUG /* XXX - todo! */
2610 /* XXX - Now complain... */
2611 printf("pmap_release: still have table\n");
2612 Debugger();
2613 #endif
2614 free_a_table(pmap->pm_a_tmgr, TRUE);
2615 pmap->pm_a_tmgr = NULL;
2616 pmap->pm_a_phys = kernAphys;
2617 }
2618 }
2619
2620 /* pmap_reference INTERFACE
2621 **
2622 * Increment the reference count of a pmap.
2623 */
2624 void
2625 pmap_reference(pmap)
2626 pmap_t pmap;
2627 {
2628 if (pmap == NULL)
2629 return;
2630
2631 /* pmap_lock(pmap); */
2632 pmap->pm_refcount++;
2633 /* pmap_unlock(pmap); */
2634 }
2635
2636 /* pmap_dereference INTERNAL
2637 **
2638 * Decrease the reference count on the given pmap
2639 * by one and return the current count.
2640 */
2641 int
2642 pmap_dereference(pmap)
2643 pmap_t pmap;
2644 {
2645 int rtn;
2646
2647 if (pmap == NULL)
2648 return 0;
2649
2650 /* pmap_lock(pmap); */
2651 rtn = --pmap->pm_refcount;
2652 /* pmap_unlock(pmap); */
2653
2654 return rtn;
2655 }
2656
2657 /* pmap_destroy INTERFACE
2658 **
2659 * Decrement a pmap's reference count and delete
2660 * the pmap if it becomes zero. Will be called
2661 * only after all mappings have been removed.
2662 */
2663 void
2664 pmap_destroy(pmap)
2665 pmap_t pmap;
2666 {
2667 if (pmap == NULL)
2668 return;
2669 if (pmap == &kernel_pmap)
2670 panic("pmap_destroy: kernel_pmap!");
2671 if (pmap_dereference(pmap) == 0) {
2672 pmap_release(pmap);
2673 free(pmap, M_VMPMAP);
2674 }
2675 }
2676
2677 /* pmap_is_referenced INTERFACE
2678 **
2679 * Determine if the given physical page has been
2680 * referenced (read from [or written to.])
2681 */
2682 boolean_t
2683 pmap_is_referenced(pa)
2684 vm_offset_t pa;
2685 {
2686 pv_t *pv;
2687 int idx, s;
2688
2689 if (!pv_initialized)
2690 return FALSE;
2691 /* XXX - this may be unecessary. */
2692 if (!is_managed(pa))
2693 return FALSE;
2694
2695 pv = pa2pv(pa);
2696 /*
2697 * Check the flags on the pv head. If they are set,
2698 * return immediately. Otherwise a search must be done.
2699 */
2700 if (pv->pv_flags & PV_FLAGS_USED)
2701 return TRUE;
2702
2703 s = splimp();
2704 /*
2705 * Search through all pv elements pointing
2706 * to this page and query their reference bits
2707 */
2708 for (idx = pv->pv_idx;
2709 idx != PVE_EOL;
2710 idx = pvebase[idx].pve_next) {
2711
2712 if (MMU_PTE_USED(kernCbase[idx])) {
2713 splx(s);
2714 return TRUE;
2715 }
2716 }
2717 splx(s);
2718
2719 return FALSE;
2720 }
2721
2722 /* pmap_is_modified INTERFACE
2723 **
2724 * Determine if the given physical page has been
2725 * modified (written to.)
2726 */
2727 boolean_t
2728 pmap_is_modified(pa)
2729 vm_offset_t pa;
2730 {
2731 pv_t *pv;
2732 int idx, s;
2733
2734 if (!pv_initialized)
2735 return FALSE;
2736 /* XXX - this may be unecessary. */
2737 if (!is_managed(pa))
2738 return FALSE;
2739
2740 /* see comments in pmap_is_referenced() */
2741 pv = pa2pv(pa);
2742 if (pv->pv_flags & PV_FLAGS_MDFY)
2743 return TRUE;
2744
2745 s = splimp();
2746 for (idx = pv->pv_idx;
2747 idx != PVE_EOL;
2748 idx = pvebase[idx].pve_next) {
2749
2750 if (MMU_PTE_MODIFIED(kernCbase[idx])) {
2751 splx(s);
2752 return TRUE;
2753 }
2754 }
2755 splx(s);
2756
2757 return FALSE;
2758 }
2759
2760 /* pmap_page_protect INTERFACE
2761 **
2762 * Applies the given protection to all mappings to the given
2763 * physical page.
2764 */
2765 void
2766 pmap_page_protect(pa, prot)
2767 vm_offset_t pa;
2768 vm_prot_t prot;
2769 {
2770 pv_t *pv;
2771 int idx, s;
2772 vm_offset_t va;
2773 struct mmu_short_pte_struct *pte;
2774 c_tmgr_t *c_tbl;
2775 pmap_t pmap, curpmap;
2776
2777 if (!is_managed(pa))
2778 return;
2779
2780 curpmap = current_pmap();
2781 pv = pa2pv(pa);
2782 s = splimp();
2783
2784 for (idx = pv->pv_idx;
2785 idx != PVE_EOL;
2786 idx = pvebase[idx].pve_next) {
2787
2788 pte = &kernCbase[idx];
2789 switch (prot) {
2790 case VM_PROT_ALL:
2791 /* do nothing */
2792 break;
2793 case VM_PROT_EXECUTE:
2794 case VM_PROT_READ:
2795 case VM_PROT_READ|VM_PROT_EXECUTE:
2796 /*
2797 * Determine the virtual address mapped by
2798 * the PTE and flush ATC entries if necessary.
2799 */
2800 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
2801 /* XXX don't write protect pager mappings */
2802 if (va >= PAGER_SVA && va < PAGER_EVA) {
2803 #ifdef PMAP_DEBUG
2804 /* XXX - Does this actually happen? */
2805 printf("pmap_page_protect: in pager!\n");
2806 Debugger();
2807 #endif
2808 } else
2809 pte->attr.raw |= MMU_SHORT_PTE_WP;
2810 if (pmap == curpmap || pmap == pmap_kernel())
2811 TBIS(va);
2812 break;
2813 case VM_PROT_NONE:
2814 /* Save the mod/ref bits. */
2815 pv->pv_flags |= pte->attr.raw;
2816 /* Invalidate the PTE. */
2817 pte->attr.raw = MMU_DT_INVALID;
2818
2819 /*
2820 * Update table counts. And flush ATC entries
2821 * if necessary.
2822 */
2823 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
2824
2825 /*
2826 * If the PTE belongs to the kernel map,
2827 * be sure to flush the page it maps.
2828 */
2829 if (pmap == pmap_kernel()) {
2830 TBIS(va);
2831 } else {
2832 /*
2833 * The PTE belongs to a user map.
2834 * update the entry count in the C
2835 * table to which it belongs and flush
2836 * the ATC if the mapping belongs to
2837 * the current pmap.
2838 */
2839 c_tbl->ct_ecnt--;
2840 if (pmap == curpmap)
2841 TBIS(va);
2842 }
2843 break;
2844 default:
2845 break;
2846 }
2847 }
2848
2849 /*
2850 * If the protection code indicates that all mappings to the page
2851 * be removed, truncate the PV list to zero entries.
2852 */
2853 if (prot == VM_PROT_NONE)
2854 pv->pv_idx = PVE_EOL;
2855 splx(s);
2856 }
2857
2858 /* pmap_get_pteinfo INTERNAL
2859 **
2860 * Called internally to find the pmap and virtual address within that
2861 * map to which the pte at the given index maps. Also includes the PTE's C
2862 * table manager.
2863 *
2864 * Returns the pmap in the argument provided, and the virtual address
2865 * by return value.
2866 */
2867 vm_offset_t
2868 pmap_get_pteinfo(idx, pmap, tbl)
2869 u_int idx;
2870 pmap_t *pmap;
2871 c_tmgr_t **tbl;
2872 {
2873 vm_offset_t va = 0;
2874
2875 /*
2876 * Determine if the PTE is a kernel PTE or a user PTE.
2877 */
2878 if (idx >= NUM_KERN_PTES) {
2879 /*
2880 * The PTE belongs to a user mapping.
2881 */
2882 /* XXX: Would like an inline for this to validate idx... */
2883 *tbl = &Ctmgrbase[(idx - NUM_KERN_PTES) / MMU_C_TBL_SIZE];
2884
2885 *pmap = (*tbl)->ct_pmap;
2886 /*
2887 * To find the va to which the PTE maps, we first take
2888 * the table's base virtual address mapping which is stored
2889 * in ct_va. We then increment this address by a page for
2890 * every slot skipped until we reach the PTE.
2891 */
2892 va = (*tbl)->ct_va;
2893 va += m68k_ptob(idx % MMU_C_TBL_SIZE);
2894 } else {
2895 /*
2896 * The PTE belongs to the kernel map.
2897 */
2898 *pmap = pmap_kernel();
2899
2900 va = m68k_ptob(idx);
2901 va += KERNBASE;
2902 }
2903
2904 return va;
2905 }
2906
2907 /* pmap_clear_modify INTERFACE
2908 **
2909 * Clear the modification bit on the page at the specified
2910 * physical address.
2911 *
2912 */
2913 void
2914 pmap_clear_modify(pa)
2915 vm_offset_t pa;
2916 {
2917 if (!is_managed(pa))
2918 return;
2919 pmap_clear_pv(pa, PV_FLAGS_MDFY);
2920 }
2921
2922 /* pmap_clear_reference INTERFACE
2923 **
2924 * Clear the referenced bit on the page at the specified
2925 * physical address.
2926 */
2927 void
2928 pmap_clear_reference(pa)
2929 vm_offset_t pa;
2930 {
2931 if (!is_managed(pa))
2932 return;
2933 pmap_clear_pv(pa, PV_FLAGS_USED);
2934 }
2935
2936 /* pmap_clear_pv INTERNAL
2937 **
2938 * Clears the specified flag from the specified physical address.
2939 * (Used by pmap_clear_modify() and pmap_clear_reference().)
2940 *
2941 * Flag is one of:
2942 * PV_FLAGS_MDFY - Page modified bit.
2943 * PV_FLAGS_USED - Page used (referenced) bit.
2944 *
2945 * This routine must not only clear the flag on the pv list
2946 * head. It must also clear the bit on every pte in the pv
2947 * list associated with the address.
2948 */
2949 void
2950 pmap_clear_pv(pa, flag)
2951 vm_offset_t pa;
2952 int flag;
2953 {
2954 pv_t *pv;
2955 int idx, s;
2956 vm_offset_t va;
2957 pmap_t pmap;
2958 mmu_short_pte_t *pte;
2959 c_tmgr_t *c_tbl;
2960
2961 pv = pa2pv(pa);
2962
2963 s = splimp();
2964 pv->pv_flags &= ~(flag);
2965
2966 for (idx = pv->pv_idx;
2967 idx != PVE_EOL;
2968 idx = pvebase[idx].pve_next) {
2969
2970 pte = &kernCbase[idx];
2971 pte->attr.raw &= ~(flag);
2972 /*
2973 * The MC68030 MMU will not set the modified or
2974 * referenced bits on any MMU tables for which it has
2975 * a cached descriptor with its modify bit set. To insure
2976 * that it will modify these bits on the PTE during the next
2977 * time it is written to or read from, we must flush it from
2978 * the ATC.
2979 *
2980 * Ordinarily it is only necessary to flush the descriptor
2981 * if it is used in the current address space. But since I
2982 * am not sure that there will always be a notion of
2983 * 'the current address space' when this function is called,
2984 * I will skip the test and always flush the address. It
2985 * does no harm.
2986 */
2987 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
2988 TBIS(va);
2989 }
2990 splx(s);
2991 }
2992
2993 /* pmap_extract INTERFACE
2994 **
2995 * Return the physical address mapped by the virtual address
2996 * in the specified pmap or 0 if it is not known.
2997 *
2998 * Note: this function should also apply an exclusive lock
2999 * on the pmap system during its duration.
3000 */
3001 vm_offset_t
3002 pmap_extract(pmap, va)
3003 pmap_t pmap;
3004 vm_offset_t va;
3005 {
3006 int a_idx, b_idx, pte_idx;
3007 a_tmgr_t *a_tbl;
3008 b_tmgr_t *b_tbl;
3009 c_tmgr_t *c_tbl;
3010 mmu_short_pte_t *c_pte;
3011
3012 if (pmap == pmap_kernel())
3013 return pmap_extract_kernel(va);
3014 if (pmap == NULL)
3015 return 0;
3016
3017 if (pmap_stroll(pmap, va, &a_tbl, &b_tbl, &c_tbl,
3018 &c_pte, &a_idx, &b_idx, &pte_idx) == FALSE)
3019 return 0;
3020
3021 if (!MMU_VALID_DT(*c_pte))
3022 return 0;
3023
3024 return (MMU_PTE_PA(*c_pte));
3025 }
3026
3027 /* pmap_extract_kernel INTERNAL
3028 **
3029 * Extract a translation from the kernel address space.
3030 */
3031 vm_offset_t
3032 pmap_extract_kernel(va)
3033 vm_offset_t va;
3034 {
3035 mmu_short_pte_t *pte;
3036
3037 pte = &kernCbase[(u_int) m68k_btop(va - KERNBASE)];
3038 return MMU_PTE_PA(*pte);
3039 }
3040
3041 /* pmap_remove_kernel INTERNAL
3042 **
3043 * Remove the mapping of a range of virtual addresses from the kernel map.
3044 * The arguments are already page-aligned.
3045 */
3046 void
3047 pmap_remove_kernel(sva, eva)
3048 vm_offset_t sva;
3049 vm_offset_t eva;
3050 {
3051 int idx, eidx;
3052
3053 #ifdef PMAP_DEBUG
3054 if ((sva & PGOFSET) || (eva & PGOFSET))
3055 panic("pmap_remove_kernel: alignment");
3056 #endif
3057
3058 idx = m68k_btop(sva - KERNBASE);
3059 eidx = m68k_btop(eva - KERNBASE);
3060
3061 while (idx < eidx) {
3062 pmap_remove_pte(&kernCbase[idx++]);
3063 TBIS(sva);
3064 sva += NBPG;
3065 }
3066 }
3067
3068 /* pmap_remove INTERFACE
3069 **
3070 * Remove the mapping of a range of virtual addresses from the given pmap.
3071 *
3072 * If the range contains any wired entries, this function will probably create
3073 * disaster.
3074 */
3075 void
3076 pmap_remove(pmap, start, end)
3077 pmap_t pmap;
3078 vm_offset_t start;
3079 vm_offset_t end;
3080 {
3081
3082 if (pmap == pmap_kernel()) {
3083 pmap_remove_kernel(start, end);
3084 return;
3085 }
3086
3087 /*
3088 * XXX - Temporary(?) statement to prevent panic caused
3089 * by vm_alloc_with_pager() handing us a software map (ie NULL)
3090 * to remove because it couldn't get backing store.
3091 * (I guess.)
3092 */
3093 if (pmap == NULL)
3094 return;
3095
3096 /*
3097 * If the pmap doesn't have an A table of its own, it has no mappings
3098 * that can be removed.
3099 */
3100 if (pmap->pm_a_tmgr == NULL)
3101 return;
3102
3103 /*
3104 * Remove the specified range from the pmap. If the function
3105 * returns true, the operation removed all the valid mappings
3106 * in the pmap and freed its A table. If this happened to the
3107 * currently loaded pmap, the MMU root pointer must be reloaded
3108 * with the default 'kernel' map.
3109 */
3110 if (pmap_remove_a(pmap->pm_a_tmgr, start, end)) {
3111 if (kernel_crp.rp_addr == pmap->pm_a_phys) {
3112 kernel_crp.rp_addr = kernAphys;
3113 loadcrp(&kernel_crp);
3114 /* will do TLB flush below */
3115 }
3116 pmap->pm_a_tmgr = NULL;
3117 pmap->pm_a_phys = kernAphys;
3118 }
3119
3120 /*
3121 * If we just modified the current address space,
3122 * make sure to flush the MMU cache.
3123 *
3124 * XXX - this could be an unecessarily large flush.
3125 * XXX - Could decide, based on the size of the VA range
3126 * to be removed, whether to flush "by pages" or "all".
3127 */
3128 if (pmap == current_pmap())
3129 TBIAU();
3130 }
3131
3132 /* pmap_remove_a INTERNAL
3133 **
3134 * This is function number one in a set of three that removes a range
3135 * of memory in the most efficient manner by removing the highest possible
3136 * tables from the memory space. This particular function attempts to remove
3137 * as many B tables as it can, delegating the remaining fragmented ranges to
3138 * pmap_remove_b().
3139 *
3140 * If the removal operation results in an empty A table, the function returns
3141 * TRUE.
3142 *
3143 * It's ugly but will do for now.
3144 */
3145 boolean_t
3146 pmap_remove_a(a_tbl, start, end)
3147 a_tmgr_t *a_tbl;
3148 vm_offset_t start;
3149 vm_offset_t end;
3150 {
3151 boolean_t empty;
3152 int idx;
3153 vm_offset_t nstart, nend;
3154 b_tmgr_t *b_tbl;
3155 mmu_long_dte_t *a_dte;
3156 mmu_short_dte_t *b_dte;
3157
3158 /*
3159 * The following code works with what I call a 'granularity
3160 * reduction algorithim'. A range of addresses will always have
3161 * the following properties, which are classified according to
3162 * how the range relates to the size of the current granularity
3163 * - an A table entry:
3164 *
3165 * 1 2 3 4
3166 * -+---+---+---+---+---+---+---+-
3167 * -+---+---+---+---+---+---+---+-
3168 *
3169 * A range will always start on a granularity boundary, illustrated
3170 * by '+' signs in the table above, or it will start at some point
3171 * inbetween a granularity boundary, as illustrated by point 1.
3172 * The first step in removing a range of addresses is to remove the
3173 * range between 1 and 2, the nearest granularity boundary. This
3174 * job is handled by the section of code governed by the
3175 * 'if (start < nstart)' statement.
3176 *
3177 * A range will always encompass zero or more intergral granules,
3178 * illustrated by points 2 and 3. Integral granules are easy to
3179 * remove. The removal of these granules is the second step, and
3180 * is handled by the code block 'if (nstart < nend)'.
3181 *
3182 * Lastly, a range will always end on a granularity boundary,
3183 * ill. by point 3, or it will fall just beyond one, ill. by point
3184 * 4. The last step involves removing this range and is handled by
3185 * the code block 'if (nend < end)'.
3186 */
3187 nstart = MMU_ROUND_UP_A(start);
3188 nend = MMU_ROUND_A(end);
3189
3190 if (start < nstart) {
3191 /*
3192 * This block is executed if the range starts between
3193 * a granularity boundary.
3194 *
3195 * First find the DTE which is responsible for mapping
3196 * the start of the range.
3197 */
3198 idx = MMU_TIA(start);
3199 a_dte = &a_tbl->at_dtbl[idx];
3200
3201 /*
3202 * If the DTE is valid then delegate the removal of the sub
3203 * range to pmap_remove_b(), which can remove addresses at
3204 * a finer granularity.
3205 */
3206 if (MMU_VALID_DT(*a_dte)) {
3207 b_dte = mmu_ptov(a_dte->addr.raw);
3208 b_tbl = mmuB2tmgr(b_dte);
3209
3210 /*
3211 * The sub range to be removed starts at the start
3212 * of the full range we were asked to remove, and ends
3213 * at the greater of:
3214 * 1. The end of the full range, -or-
3215 * 2. The end of the full range, rounded down to the
3216 * nearest granularity boundary.
3217 */
3218 if (end < nstart)
3219 empty = pmap_remove_b(b_tbl, start, end);
3220 else
3221 empty = pmap_remove_b(b_tbl, start, nstart);
3222
3223 /*
3224 * If the removal resulted in an empty B table,
3225 * invalidate the DTE that points to it and decrement
3226 * the valid entry count of the A table.
3227 */
3228 if (empty) {
3229 a_dte->attr.raw = MMU_DT_INVALID;
3230 a_tbl->at_ecnt--;
3231 }
3232 }
3233 /*
3234 * If the DTE is invalid, the address range is already non-
3235 * existant and can simply be skipped.
3236 */
3237 }
3238 if (nstart < nend) {
3239 /*
3240 * This block is executed if the range spans a whole number
3241 * multiple of granules (A table entries.)
3242 *
3243 * First find the DTE which is responsible for mapping
3244 * the start of the first granule involved.
3245 */
3246 idx = MMU_TIA(nstart);
3247 a_dte = &a_tbl->at_dtbl[idx];
3248
3249 /*
3250 * Remove entire sub-granules (B tables) one at a time,
3251 * until reaching the end of the range.
3252 */
3253 for (; nstart < nend; a_dte++, nstart += MMU_TIA_RANGE)
3254 if (MMU_VALID_DT(*a_dte)) {
3255 /*
3256 * Find the B table manager for the
3257 * entry and free it.
3258 */
3259 b_dte = mmu_ptov(a_dte->addr.raw);
3260 b_tbl = mmuB2tmgr(b_dte);
3261 free_b_table(b_tbl, TRUE);
3262
3263 /*
3264 * Invalidate the DTE that points to the
3265 * B table and decrement the valid entry
3266 * count of the A table.
3267 */
3268 a_dte->attr.raw = MMU_DT_INVALID;
3269 a_tbl->at_ecnt--;
3270 }
3271 }
3272 if (nend < end) {
3273 /*
3274 * This block is executed if the range ends beyond a
3275 * granularity boundary.
3276 *
3277 * First find the DTE which is responsible for mapping
3278 * the start of the nearest (rounded down) granularity
3279 * boundary.
3280 */
3281 idx = MMU_TIA(nend);
3282 a_dte = &a_tbl->at_dtbl[idx];
3283
3284 /*
3285 * If the DTE is valid then delegate the removal of the sub
3286 * range to pmap_remove_b(), which can remove addresses at
3287 * a finer granularity.
3288 */
3289 if (MMU_VALID_DT(*a_dte)) {
3290 /*
3291 * Find the B table manager for the entry
3292 * and hand it to pmap_remove_b() along with
3293 * the sub range.
3294 */
3295 b_dte = mmu_ptov(a_dte->addr.raw);
3296 b_tbl = mmuB2tmgr(b_dte);
3297
3298 empty = pmap_remove_b(b_tbl, nend, end);
3299
3300 /*
3301 * If the removal resulted in an empty B table,
3302 * invalidate the DTE that points to it and decrement
3303 * the valid entry count of the A table.
3304 */
3305 if (empty) {
3306 a_dte->attr.raw = MMU_DT_INVALID;
3307 a_tbl->at_ecnt--;
3308 }
3309 }
3310 }
3311
3312 /*
3313 * If there are no more entries in the A table, release it
3314 * back to the available pool and return TRUE.
3315 */
3316 if (a_tbl->at_ecnt == 0) {
3317 a_tbl->at_parent = NULL;
3318 TAILQ_REMOVE(&a_pool, a_tbl, at_link);
3319 TAILQ_INSERT_HEAD(&a_pool, a_tbl, at_link);
3320 empty = TRUE;
3321 } else {
3322 empty = FALSE;
3323 }
3324
3325 return empty;
3326 }
3327
3328 /* pmap_remove_b INTERNAL
3329 **
3330 * Remove a range of addresses from an address space, trying to remove entire
3331 * C tables if possible.
3332 *
3333 * If the operation results in an empty B table, the function returns TRUE.
3334 */
3335 boolean_t
3336 pmap_remove_b(b_tbl, start, end)
3337 b_tmgr_t *b_tbl;
3338 vm_offset_t start;
3339 vm_offset_t end;
3340 {
3341 boolean_t empty;
3342 int idx;
3343 vm_offset_t nstart, nend, rstart;
3344 c_tmgr_t *c_tbl;
3345 mmu_short_dte_t *b_dte;
3346 mmu_short_pte_t *c_dte;
3347
3348
3349 nstart = MMU_ROUND_UP_B(start);
3350 nend = MMU_ROUND_B(end);
3351
3352 if (start < nstart) {
3353 idx = MMU_TIB(start);
3354 b_dte = &b_tbl->bt_dtbl[idx];
3355 if (MMU_VALID_DT(*b_dte)) {
3356 c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
3357 c_tbl = mmuC2tmgr(c_dte);
3358 if (end < nstart)
3359 empty = pmap_remove_c(c_tbl, start, end);
3360 else
3361 empty = pmap_remove_c(c_tbl, start, nstart);
3362 if (empty) {
3363 b_dte->attr.raw = MMU_DT_INVALID;
3364 b_tbl->bt_ecnt--;
3365 }
3366 }
3367 }
3368 if (nstart < nend) {
3369 idx = MMU_TIB(nstart);
3370 b_dte = &b_tbl->bt_dtbl[idx];
3371 rstart = nstart;
3372 while (rstart < nend) {
3373 if (MMU_VALID_DT(*b_dte)) {
3374 c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
3375 c_tbl = mmuC2tmgr(c_dte);
3376 free_c_table(c_tbl, TRUE);
3377 b_dte->attr.raw = MMU_DT_INVALID;
3378 b_tbl->bt_ecnt--;
3379 }
3380 b_dte++;
3381 rstart += MMU_TIB_RANGE;
3382 }
3383 }
3384 if (nend < end) {
3385 idx = MMU_TIB(nend);
3386 b_dte = &b_tbl->bt_dtbl[idx];
3387 if (MMU_VALID_DT(*b_dte)) {
3388 c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
3389 c_tbl = mmuC2tmgr(c_dte);
3390 empty = pmap_remove_c(c_tbl, nend, end);
3391 if (empty) {
3392 b_dte->attr.raw = MMU_DT_INVALID;
3393 b_tbl->bt_ecnt--;
3394 }
3395 }
3396 }
3397
3398 if (b_tbl->bt_ecnt == 0) {
3399 b_tbl->bt_parent = NULL;
3400 TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
3401 TAILQ_INSERT_HEAD(&b_pool, b_tbl, bt_link);
3402 empty = TRUE;
3403 } else {
3404 empty = FALSE;
3405 }
3406
3407 return empty;
3408 }
3409
3410 /* pmap_remove_c INTERNAL
3411 **
3412 * Remove a range of addresses from the given C table.
3413 */
3414 boolean_t
3415 pmap_remove_c(c_tbl, start, end)
3416 c_tmgr_t *c_tbl;
3417 vm_offset_t start;
3418 vm_offset_t end;
3419 {
3420 boolean_t empty;
3421 int idx;
3422 mmu_short_pte_t *c_pte;
3423
3424 idx = MMU_TIC(start);
3425 c_pte = &c_tbl->ct_dtbl[idx];
3426 for (;start < end; start += MMU_PAGE_SIZE, c_pte++) {
3427 if (MMU_VALID_DT(*c_pte)) {
3428 pmap_remove_pte(c_pte);
3429 c_tbl->ct_ecnt--;
3430 }
3431 }
3432
3433 if (c_tbl->ct_ecnt == 0) {
3434 c_tbl->ct_parent = NULL;
3435 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
3436 TAILQ_INSERT_HEAD(&c_pool, c_tbl, ct_link);
3437 empty = TRUE;
3438 } else {
3439 empty = FALSE;
3440 }
3441
3442 return empty;
3443 }
3444
3445 /* is_managed INTERNAL
3446 **
3447 * Determine if the given physical address is managed by the PV system.
3448 * Note that this logic assumes that no one will ask for the status of
3449 * addresses which lie in-between the memory banks on the 3/80. If they
3450 * do so, it will falsely report that it is managed.
3451 *
3452 * Note: A "managed" address is one that was reported to the VM system as
3453 * a "usable page" during system startup. As such, the VM system expects the
3454 * pmap module to keep an accurate track of the useage of those pages.
3455 * Any page not given to the VM system at startup does not exist (as far as
3456 * the VM system is concerned) and is therefore "unmanaged." Examples are
3457 * those pages which belong to the ROM monitor and the memory allocated before
3458 * the VM system was started.
3459 */
3460 boolean_t
3461 is_managed(pa)
3462 vm_offset_t pa;
3463 {
3464 if (pa >= avail_start && pa < avail_end)
3465 return TRUE;
3466 else
3467 return FALSE;
3468 }
3469
3470 /* pmap_bootstrap_alloc INTERNAL
3471 **
3472 * Used internally for memory allocation at startup when malloc is not
3473 * available. This code will fail once it crosses the first memory
3474 * bank boundary on the 3/80. Hopefully by then however, the VM system
3475 * will be in charge of allocation.
3476 */
3477 void *
3478 pmap_bootstrap_alloc(size)
3479 int size;
3480 {
3481 void *rtn;
3482
3483 #ifdef PMAP_DEBUG
3484 if (bootstrap_alloc_enabled == FALSE) {
3485 mon_printf("pmap_bootstrap_alloc: disabled\n");
3486 sunmon_abort();
3487 }
3488 #endif
3489
3490 rtn = (void *) virtual_avail;
3491 virtual_avail += size;
3492
3493 #ifdef PMAP_DEBUG
3494 if (virtual_avail > virtual_contig_end) {
3495 mon_printf("pmap_bootstrap_alloc: out of mem\n");
3496 sunmon_abort();
3497 }
3498 #endif
3499
3500 return rtn;
3501 }
3502
3503 /* pmap_bootstap_aalign INTERNAL
3504 **
3505 * Used to insure that the next call to pmap_bootstrap_alloc() will
3506 * return a chunk of memory aligned to the specified size.
3507 *
3508 * Note: This function will only support alignment sizes that are powers
3509 * of two.
3510 */
3511 void
3512 pmap_bootstrap_aalign(size)
3513 int size;
3514 {
3515 int off;
3516
3517 off = virtual_avail & (size - 1);
3518 if (off) {
3519 (void) pmap_bootstrap_alloc(size - off);
3520 }
3521 }
3522
3523 /* pmap_pa_exists
3524 **
3525 * Used by the /dev/mem driver to see if a given PA is memory
3526 * that can be mapped. (The PA is not in a hole.)
3527 */
3528 int
3529 pmap_pa_exists(pa)
3530 vm_offset_t pa;
3531 {
3532 register int i;
3533
3534 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
3535 if ((pa >= avail_mem[i].pmem_start) &&
3536 (pa < avail_mem[i].pmem_end))
3537 return (1);
3538 if (avail_mem[i].pmem_next == NULL)
3539 break;
3540 }
3541 return (0);
3542 }
3543
3544 /* Called only from locore.s and pmap.c */
3545 void _pmap_switch __P((pmap_t pmap));
3546
3547 /*
3548 * _pmap_switch INTERNAL
3549 *
3550 * This is called by locore.s:cpu_switch() when it is
3551 * switching to a new process. Load new translations.
3552 * Note: done in-line by locore.s unless PMAP_DEBUG
3553 *
3554 * Note that we do NOT allocate a context here, but
3555 * share the "kernel only" context until we really
3556 * need our own context for user-space mappings in
3557 * pmap_enter_user(). [ s/context/mmu A table/ ]
3558 */
3559 void
3560 _pmap_switch(pmap)
3561 pmap_t pmap;
3562 {
3563 u_long rootpa;
3564
3565 /*
3566 * Only do reload/flush if we have to.
3567 * Note that if the old and new process
3568 * were BOTH using the "null" context,
3569 * then this will NOT flush the TLB.
3570 */
3571 rootpa = pmap->pm_a_phys;
3572 if (kernel_crp.rp_addr != rootpa) {
3573 DPRINT(("pmap_activate(%p)\n", pmap));
3574 kernel_crp.rp_addr = rootpa;
3575 loadcrp(&kernel_crp);
3576 TBIAU();
3577 }
3578 }
3579
3580 /*
3581 * Exported version of pmap_activate(). This is called from the
3582 * machine-independent VM code when a process is given a new pmap.
3583 * If (p == curproc) do like cpu_switch would do; otherwise just
3584 * take this as notification that the process has a new pmap.
3585 */
3586 void
3587 pmap_activate(p)
3588 struct proc *p;
3589 {
3590 pmap_t pmap = p->p_vmspace->vm_map.pmap;
3591 int s;
3592
3593 if (p == curproc) {
3594 s = splimp();
3595 _pmap_switch(pmap);
3596 splx(s);
3597 }
3598 }
3599
3600 /*
3601 * pmap_deactivate INTERFACE
3602 **
3603 * This is called to deactivate the specified process's address space.
3604 * XXX The semantics of this function are currently not well-defined.
3605 */
3606 void
3607 pmap_deactivate(p)
3608 struct proc *p;
3609 {
3610 /* not implemented. */
3611 }
3612
3613 /* pmap_update
3614 **
3615 * Apply any delayed changes scheduled for all pmaps immediately.
3616 *
3617 * No delayed operations are currently done in this pmap.
3618 */
3619 void
3620 pmap_update()
3621 {
3622 /* not implemented. */
3623 }
3624
3625 /*
3626 * Fill in the sun3x-specific part of the kernel core header
3627 * for dumpsys(). (See machdep.c for the rest.)
3628 */
3629 void
3630 pmap_kcore_hdr(sh)
3631 struct sun3x_kcore_hdr *sh;
3632 {
3633 u_long spa, len;
3634 int i;
3635
3636 sh->pg_frame = MMU_SHORT_PTE_BASEADDR;
3637 sh->pg_valid = MMU_DT_PAGE;
3638 sh->contig_end = virtual_contig_end;
3639 sh->kernCbase = (u_long) kernCbase;
3640 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
3641 spa = avail_mem[i].pmem_start;
3642 spa = m68k_trunc_page(spa);
3643 len = avail_mem[i].pmem_end - spa;
3644 len = m68k_round_page(len);
3645 sh->ram_segs[i].start = spa;
3646 sh->ram_segs[i].size = len;
3647 }
3648 }
3649
3650
3651 /* pmap_virtual_space INTERFACE
3652 **
3653 * Return the current available range of virtual addresses in the
3654 * arguuments provided. Only really called once.
3655 */
3656 void
3657 pmap_virtual_space(vstart, vend)
3658 vm_offset_t *vstart, *vend;
3659 {
3660 *vstart = virtual_avail;
3661 *vend = virtual_end;
3662 }
3663
3664 /*
3665 * Provide memory to the VM system.
3666 *
3667 * Assume avail_start is always in the
3668 * first segment as pmap_bootstrap does.
3669 */
3670 static void
3671 pmap_page_upload()
3672 {
3673 vm_offset_t a, b; /* memory range */
3674 int i;
3675
3676 /* Supply the memory in segments. */
3677 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
3678 a = atop(avail_mem[i].pmem_start);
3679 b = atop(avail_mem[i].pmem_end);
3680 if (i == 0)
3681 a = atop(avail_start);
3682
3683 #if defined(UVM)
3684 uvm_page_physload(a, b, a, b, VM_FREELIST_DEFAULT);
3685 #else
3686 vm_page_physload(a, b, a, b);
3687 #endif
3688
3689 if (avail_mem[i].pmem_next == NULL)
3690 break;
3691 }
3692 }
3693
3694 /* pmap_page_index INTERFACE
3695 **
3696 * Return the index of the given physical page in a list of useable
3697 * physical pages in the system. Holes in physical memory may be counted
3698 * if so desired. As long as pmap_free_pages() and pmap_page_index()
3699 * agree as to whether holes in memory do or do not count as valid pages,
3700 * it really doesn't matter. However, if you like to save a little
3701 * memory, don't count holes as valid pages. This is even more true when
3702 * the holes are large.
3703 *
3704 * We will not count holes as valid pages. We can generate page indices
3705 * that conform to this by using the memory bank structures initialized
3706 * in pmap_alloc_pv().
3707 */
3708 int
3709 pmap_page_index(pa)
3710 vm_offset_t pa;
3711 {
3712 struct pmap_physmem_struct *bank = avail_mem;
3713 vm_offset_t off;
3714
3715 /* Search for the memory bank with this page. */
3716 /* XXX - What if it is not physical memory? */
3717 while (pa > bank->pmem_end)
3718 bank = bank->pmem_next;
3719 off = pa - bank->pmem_start;
3720
3721 return (bank->pmem_pvbase + m68k_btop(off));
3722 }
3723
3724 /* pmap_count INTERFACE
3725 **
3726 * Return the number of resident (valid) pages in the given pmap.
3727 *
3728 * Note: If this function is handed the kernel map, it will report
3729 * that it has no mappings. Hopefully the VM system won't ask for kernel
3730 * map statistics.
3731 */
3732 segsz_t
3733 pmap_count(pmap, type)
3734 pmap_t pmap;
3735 int type;
3736 {
3737 u_int count;
3738 int a_idx, b_idx;
3739 a_tmgr_t *a_tbl;
3740 b_tmgr_t *b_tbl;
3741 c_tmgr_t *c_tbl;
3742
3743 /*
3744 * If the pmap does not have its own A table manager, it has no
3745 * valid entires.
3746 */
3747 if (pmap->pm_a_tmgr == NULL)
3748 return 0;
3749
3750 a_tbl = pmap->pm_a_tmgr;
3751
3752 count = 0;
3753 for (a_idx = 0; a_idx < MMU_TIA(KERNBASE); a_idx++) {
3754 if (MMU_VALID_DT(a_tbl->at_dtbl[a_idx])) {
3755 b_tbl = mmuB2tmgr(mmu_ptov(a_tbl->at_dtbl[a_idx].addr.raw));
3756 for (b_idx = 0; b_idx < MMU_B_TBL_SIZE; b_idx++) {
3757 if (MMU_VALID_DT(b_tbl->bt_dtbl[b_idx])) {
3758 c_tbl = mmuC2tmgr(
3759 mmu_ptov(MMU_DTE_PA(b_tbl->bt_dtbl[b_idx])));
3760 if (type == 0)
3761 /*
3762 * A resident entry count has been requested.
3763 */
3764 count += c_tbl->ct_ecnt;
3765 else
3766 /*
3767 * A wired entry count has been requested.
3768 */
3769 count += c_tbl->ct_wcnt;
3770 }
3771 }
3772 }
3773 }
3774
3775 return count;
3776 }
3777
3778 /************************ SUN3 COMPATIBILITY ROUTINES ********************
3779 * The following routines are only used by DDB for tricky kernel text *
3780 * text operations in db_memrw.c. They are provided for sun3 *
3781 * compatibility. *
3782 *************************************************************************/
3783 /* get_pte INTERNAL
3784 **
3785 * Return the page descriptor the describes the kernel mapping
3786 * of the given virtual address.
3787 */
3788 extern u_long ptest_addr __P((u_long)); /* XXX: locore.s */
3789 u_int
3790 get_pte(va)
3791 vm_offset_t va;
3792 {
3793 u_long pte_pa;
3794 mmu_short_pte_t *pte;
3795
3796 /* Get the physical address of the PTE */
3797 pte_pa = ptest_addr(va & ~PGOFSET);
3798
3799 /* Convert to a virtual address... */
3800 pte = (mmu_short_pte_t *) (KERNBASE + pte_pa);
3801
3802 /* Make sure it is in our level-C tables... */
3803 if ((pte < kernCbase) ||
3804 (pte >= &mmuCbase[NUM_USER_PTES]))
3805 return 0;
3806
3807 /* ... and just return its contents. */
3808 return (pte->attr.raw);
3809 }
3810
3811
3812 /* set_pte INTERNAL
3813 **
3814 * Set the page descriptor that describes the kernel mapping
3815 * of the given virtual address.
3816 */
3817 void
3818 set_pte(va, pte)
3819 vm_offset_t va;
3820 u_int pte;
3821 {
3822 u_long idx;
3823
3824 if (va < KERNBASE)
3825 return;
3826
3827 idx = (unsigned long) m68k_btop(va - KERNBASE);
3828 kernCbase[idx].attr.raw = pte;
3829 TBIS(va);
3830 }
3831
3832 /*
3833 * Routine: pmap_procwr
3834 *
3835 * Function:
3836 * Synchronize caches corresponding to [addr, addr+len) in p.
3837 */
3838 void
3839 pmap_procwr(p, va, len)
3840 struct proc *p;
3841 vaddr_t va;
3842 size_t len;
3843 {
3844 (void)cachectl1(0x80000004, va, len, p);
3845 }
3846
3847
3848 #ifdef PMAP_DEBUG
3849 /************************** DEBUGGING ROUTINES **************************
3850 * The following routines are meant to be an aid to debugging the pmap *
3851 * system. They are callable from the DDB command line and should be *
3852 * prepared to be handed unstable or incomplete states of the system. *
3853 ************************************************************************/
3854
3855 /* pv_list
3856 **
3857 * List all pages found on the pv list for the given physical page.
3858 * To avoid endless loops, the listing will stop at the end of the list
3859 * or after 'n' entries - whichever comes first.
3860 */
3861 void
3862 pv_list(pa, n)
3863 vm_offset_t pa;
3864 int n;
3865 {
3866 int idx;
3867 vm_offset_t va;
3868 pv_t *pv;
3869 c_tmgr_t *c_tbl;
3870 pmap_t pmap;
3871
3872 pv = pa2pv(pa);
3873 idx = pv->pv_idx;
3874
3875 for (;idx != PVE_EOL && n > 0;
3876 idx=pvebase[idx].pve_next, n--) {
3877
3878 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
3879 printf("idx %d, pmap 0x%x, va 0x%x, c_tbl %x\n",
3880 idx, (u_int) pmap, (u_int) va, (u_int) c_tbl);
3881 }
3882 }
3883 #endif /* PMAP_DEBUG */
3884
3885 #ifdef NOT_YET
3886 /* and maybe not ever */
3887 /************************** LOW-LEVEL ROUTINES **************************
3888 * These routines will eventualy be re-written into assembly and placed *
3889 * in locore.s. They are here now as stubs so that the pmap module can *
3890 * be linked as a standalone user program for testing. *
3891 ************************************************************************/
3892 /* flush_atc_crp INTERNAL
3893 **
3894 * Flush all page descriptors derived from the given CPU Root Pointer
3895 * (CRP), or 'A' table as it is known here, from the 68851's automatic
3896 * cache.
3897 */
3898 void
3899 flush_atc_crp(a_tbl)
3900 {
3901 mmu_long_rp_t rp;
3902
3903 /* Create a temporary root table pointer that points to the
3904 * given A table.
3905 */
3906 rp.attr.raw = ~MMU_LONG_RP_LU;
3907 rp.addr.raw = (unsigned int) a_tbl;
3908
3909 mmu_pflushr(&rp);
3910 /* mmu_pflushr:
3911 * movel sp(4)@,a0
3912 * pflushr a0@
3913 * rts
3914 */
3915 }
3916 #endif /* NOT_YET */
3917