pmap.c revision 1.19 1 /* $NetBSD: pmap.c,v 1.19 2003/12/27 13:35:52 mjl Exp $ */
2 /*-
3 * Copyright (c) 2001 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Matt Thomas <matt (at) 3am-software.com> of Allegro Networks, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
40 * Copyright (C) 1995, 1996 TooLs GmbH.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by TooLs GmbH.
54 * 4. The name of TooLs GmbH may not be used to endorse or promote products
55 * derived from this software without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
58 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
62 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
63 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
64 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
65 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
66 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.19 2003/12/27 13:35:52 mjl Exp $");
71
72 #include "opt_ppcarch.h"
73 #include "opt_altivec.h"
74 #include "opt_pmap.h"
75 #include <sys/param.h>
76 #include <sys/malloc.h>
77 #include <sys/proc.h>
78 #include <sys/user.h>
79 #include <sys/pool.h>
80 #include <sys/queue.h>
81 #include <sys/device.h> /* for evcnt */
82 #include <sys/systm.h>
83
84 #if __NetBSD_Version__ < 105010000
85 #include <vm/vm.h>
86 #include <vm/vm_kern.h>
87 #define splvm() splimp()
88 #endif
89
90 #include <uvm/uvm.h>
91
92 #include <machine/pcb.h>
93 #include <machine/powerpc.h>
94 #include <powerpc/spr.h>
95 #include <powerpc/oea/sr_601.h>
96 #include <powerpc/bat.h>
97
98 #if defined(DEBUG) || defined(PMAPCHECK)
99 #define STATIC
100 #else
101 #define STATIC static
102 #endif
103
104 #ifdef ALTIVEC
105 int pmap_use_altivec;
106 #endif
107
108 volatile struct pteg *pmap_pteg_table;
109 unsigned int pmap_pteg_cnt;
110 unsigned int pmap_pteg_mask;
111 paddr_t pmap_memlimit = -PAGE_SIZE; /* there is no limit */
112
113 struct pmap kernel_pmap_;
114 unsigned int pmap_pages_stolen;
115 u_long pmap_pte_valid;
116 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
117 u_long pmap_pvo_enter_depth;
118 u_long pmap_pvo_remove_depth;
119 #endif
120
121 int physmem;
122 #ifndef MSGBUFADDR
123 extern paddr_t msgbuf_paddr;
124 #endif
125
126 static struct mem_region *mem, *avail;
127 static u_int mem_cnt, avail_cnt;
128
129 #ifdef __HAVE_PMAP_PHYSSEG
130 /*
131 * This is a cache of referenced/modified bits.
132 * Bits herein are shifted by ATTRSHFT.
133 */
134 #define ATTR_SHFT 4
135 struct pmap_physseg pmap_physseg;
136 #endif
137
138 /*
139 * The following structure is exactly 32 bytes long (one cacheline).
140 */
141 struct pvo_entry {
142 LIST_ENTRY(pvo_entry) pvo_vlink; /* Link to common virt page */
143 TAILQ_ENTRY(pvo_entry) pvo_olink; /* Link to overflow entry */
144 struct pte pvo_pte; /* Prebuilt PTE */
145 pmap_t pvo_pmap; /* ptr to owning pmap */
146 vaddr_t pvo_vaddr; /* VA of entry */
147 #define PVO_PTEGIDX_MASK 0x0007 /* which PTEG slot */
148 #define PVO_PTEGIDX_VALID 0x0008 /* slot is valid */
149 #define PVO_WIRED 0x0010 /* PVO entry is wired */
150 #define PVO_MANAGED 0x0020 /* PVO e. for managed page */
151 #define PVO_EXECUTABLE 0x0040 /* PVO e. for executable page */
152 #define PVO_ENTER_INSERT 0 /* PVO has been removed */
153 #define PVO_SPILL_UNSET 1 /* PVO has been evicted */
154 #define PVO_SPILL_SET 2 /* PVO has been spilled */
155 #define PVO_SPILL_INSERT 3 /* PVO has been inserted */
156 #define PVO_PMAP_PAGE_PROTECT 4 /* PVO has changed */
157 #define PVO_PMAP_PROTECT 5 /* PVO has changed */
158 #define PVO_REMOVE 6 /* PVO has been removed */
159 #define PVO_WHERE_MASK 15
160 #define PVO_WHERE_SHFT 8
161 };
162 #define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF)
163 #define PVO_ISEXECUTABLE(pvo) ((pvo)->pvo_vaddr & PVO_EXECUTABLE)
164 #define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
165 #define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
166 #define PVO_PTEGIDX_CLR(pvo) \
167 ((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK)))
168 #define PVO_PTEGIDX_SET(pvo,i) \
169 ((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID))
170 #define PVO_WHERE(pvo,w) \
171 ((pvo)->pvo_vaddr &= ~(PVO_WHERE_MASK << PVO_WHERE_SHFT), \
172 (pvo)->pvo_vaddr |= ((PVO_ ## w) << PVO_WHERE_SHFT))
173
174 TAILQ_HEAD(pvo_tqhead, pvo_entry);
175 struct pvo_tqhead *pmap_pvo_table; /* pvo entries by ptegroup index */
176 struct pvo_head pmap_pvo_kunmanaged = LIST_HEAD_INITIALIZER(pmap_pvo_kunmanaged); /* list of unmanaged pages */
177 struct pvo_head pmap_pvo_unmanaged = LIST_HEAD_INITIALIZER(pmap_pvo_unmanaged); /* list of unmanaged pages */
178
179 struct pool pmap_pool; /* pool for pmap structures */
180 struct pool pmap_upvo_pool; /* pool for pvo entries for unmanaged pages */
181 struct pool pmap_mpvo_pool; /* pool for pvo entries for managed pages */
182
183 /*
184 * We keep a cache of unmanaged pages to be used for pvo entries for
185 * unmanaged pages.
186 */
187 struct pvo_page {
188 SIMPLEQ_ENTRY(pvo_page) pvop_link;
189 };
190 SIMPLEQ_HEAD(pvop_head, pvo_page);
191 struct pvop_head pmap_upvop_head = SIMPLEQ_HEAD_INITIALIZER(pmap_upvop_head);
192 struct pvop_head pmap_mpvop_head = SIMPLEQ_HEAD_INITIALIZER(pmap_mpvop_head);
193 u_long pmap_upvop_free;
194 u_long pmap_upvop_maxfree;
195 u_long pmap_mpvop_free;
196 u_long pmap_mpvop_maxfree;
197
198 STATIC void *pmap_pool_ualloc(struct pool *, int);
199 STATIC void *pmap_pool_malloc(struct pool *, int);
200
201 STATIC void pmap_pool_ufree(struct pool *, void *);
202 STATIC void pmap_pool_mfree(struct pool *, void *);
203
204 static struct pool_allocator pmap_pool_mallocator = {
205 pmap_pool_malloc, pmap_pool_mfree, 0,
206 };
207
208 static struct pool_allocator pmap_pool_uallocator = {
209 pmap_pool_ualloc, pmap_pool_ufree, 0,
210 };
211
212 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
213 void pmap_pte_print(volatile struct pte *);
214 #endif
215
216 #ifdef DDB
217 void pmap_pteg_check(void);
218 void pmap_pteg_dist(void);
219 void pmap_print_pte(pmap_t, vaddr_t);
220 void pmap_print_mmuregs(void);
221 #endif
222
223 #if defined(DEBUG) || defined(PMAPCHECK)
224 #ifdef PMAPCHECK
225 int pmapcheck = 1;
226 #else
227 int pmapcheck = 0;
228 #endif
229 void pmap_pvo_verify(void);
230 STATIC void pmap_pvo_check(const struct pvo_entry *);
231 #define PMAP_PVO_CHECK(pvo) \
232 do { \
233 if (pmapcheck) \
234 pmap_pvo_check(pvo); \
235 } while (0)
236 #else
237 #define PMAP_PVO_CHECK(pvo) do { } while (/*CONSTCOND*/0)
238 #endif
239 STATIC int pmap_pte_insert(int, struct pte *);
240 STATIC int pmap_pvo_enter(pmap_t, struct pool *, struct pvo_head *,
241 vaddr_t, paddr_t, register_t, int);
242 STATIC void pmap_pvo_remove(struct pvo_entry *, int);
243 STATIC struct pvo_entry *pmap_pvo_find_va(pmap_t, vaddr_t, int *);
244 STATIC volatile struct pte *pmap_pvo_to_pte(const struct pvo_entry *, int);
245 #define pmap_pvo_reclaim(pm) NULL
246 STATIC void pvo_set_exec(struct pvo_entry *);
247 STATIC void pvo_clear_exec(struct pvo_entry *);
248
249 STATIC void tlbia(void);
250
251 STATIC void pmap_release(pmap_t);
252 STATIC void *pmap_boot_find_memory(psize_t, psize_t, int);
253
254 #define VSID_NBPW (sizeof(uint32_t) * 8)
255 static uint32_t pmap_vsid_bitmap[NPMAPS / VSID_NBPW];
256
257 static int pmap_initialized;
258
259 #if defined(DEBUG) || defined(PMAPDEBUG)
260 #define PMAPDEBUG_BOOT 0x0001
261 #define PMAPDEBUG_PTE 0x0002
262 #define PMAPDEBUG_EXEC 0x0008
263 #define PMAPDEBUG_PVOENTER 0x0010
264 #define PMAPDEBUG_PVOREMOVE 0x0020
265 #define PMAPDEBUG_ACTIVATE 0x0100
266 #define PMAPDEBUG_CREATE 0x0200
267 #define PMAPDEBUG_ENTER 0x1000
268 #define PMAPDEBUG_KENTER 0x2000
269 #define PMAPDEBUG_KREMOVE 0x4000
270 #define PMAPDEBUG_REMOVE 0x8000
271 unsigned int pmapdebug = 0;
272 # define DPRINTF(x) printf x
273 # define DPRINTFN(n, x) if (pmapdebug & PMAPDEBUG_ ## n) printf x
274 #else
275 # define DPRINTF(x)
276 # define DPRINTFN(n, x)
277 #endif
278
279
280 #ifdef PMAPCOUNTERS
281 #define PMAPCOUNT(ev) ((pmap_evcnt_ ## ev).ev_count++)
282 #define PMAPCOUNT2(ev) ((ev).ev_count++)
283
284 struct evcnt pmap_evcnt_mappings =
285 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
286 "pmap", "pages mapped");
287 struct evcnt pmap_evcnt_unmappings =
288 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_mappings,
289 "pmap", "pages unmapped");
290
291 struct evcnt pmap_evcnt_kernel_mappings =
292 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
293 "pmap", "kernel pages mapped");
294 struct evcnt pmap_evcnt_kernel_unmappings =
295 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_kernel_mappings,
296 "pmap", "kernel pages unmapped");
297
298 struct evcnt pmap_evcnt_mappings_replaced =
299 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
300 "pmap", "page mappings replaced");
301
302 struct evcnt pmap_evcnt_exec_mappings =
303 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_mappings,
304 "pmap", "exec pages mapped");
305 struct evcnt pmap_evcnt_exec_cached =
306 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_mappings,
307 "pmap", "exec pages cached");
308
309 struct evcnt pmap_evcnt_exec_synced =
310 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
311 "pmap", "exec pages synced");
312 struct evcnt pmap_evcnt_exec_synced_clear_modify =
313 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
314 "pmap", "exec pages synced (CM)");
315
316 struct evcnt pmap_evcnt_exec_uncached_page_protect =
317 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
318 "pmap", "exec pages uncached (PP)");
319 struct evcnt pmap_evcnt_exec_uncached_clear_modify =
320 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
321 "pmap", "exec pages uncached (CM)");
322 struct evcnt pmap_evcnt_exec_uncached_zero_page =
323 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
324 "pmap", "exec pages uncached (ZP)");
325 struct evcnt pmap_evcnt_exec_uncached_copy_page =
326 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
327 "pmap", "exec pages uncached (CP)");
328
329 struct evcnt pmap_evcnt_updates =
330 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
331 "pmap", "updates");
332 struct evcnt pmap_evcnt_collects =
333 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
334 "pmap", "collects");
335 struct evcnt pmap_evcnt_copies =
336 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
337 "pmap", "copies");
338
339 struct evcnt pmap_evcnt_ptes_spilled =
340 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
341 "pmap", "ptes spilled from overflow");
342 struct evcnt pmap_evcnt_ptes_unspilled =
343 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
344 "pmap", "ptes not spilled");
345 struct evcnt pmap_evcnt_ptes_evicted =
346 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
347 "pmap", "ptes evicted");
348
349 struct evcnt pmap_evcnt_ptes_primary[8] = {
350 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
351 "pmap", "ptes added at primary[0]"),
352 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
353 "pmap", "ptes added at primary[1]"),
354 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
355 "pmap", "ptes added at primary[2]"),
356 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
357 "pmap", "ptes added at primary[3]"),
358
359 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
360 "pmap", "ptes added at primary[4]"),
361 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
362 "pmap", "ptes added at primary[5]"),
363 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
364 "pmap", "ptes added at primary[6]"),
365 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
366 "pmap", "ptes added at primary[7]"),
367 };
368 struct evcnt pmap_evcnt_ptes_secondary[8] = {
369 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
370 "pmap", "ptes added at secondary[0]"),
371 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
372 "pmap", "ptes added at secondary[1]"),
373 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
374 "pmap", "ptes added at secondary[2]"),
375 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
376 "pmap", "ptes added at secondary[3]"),
377
378 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
379 "pmap", "ptes added at secondary[4]"),
380 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
381 "pmap", "ptes added at secondary[5]"),
382 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
383 "pmap", "ptes added at secondary[6]"),
384 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
385 "pmap", "ptes added at secondary[7]"),
386 };
387 struct evcnt pmap_evcnt_ptes_removed =
388 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
389 "pmap", "ptes removed");
390 struct evcnt pmap_evcnt_ptes_changed =
391 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
392 "pmap", "ptes changed");
393
394 /*
395 * From pmap_subr.c
396 */
397 extern struct evcnt pmap_evcnt_zeroed_pages;
398 extern struct evcnt pmap_evcnt_copied_pages;
399 extern struct evcnt pmap_evcnt_idlezeroed_pages;
400 #else
401 #define PMAPCOUNT(ev) ((void) 0)
402 #define PMAPCOUNT2(ev) ((void) 0)
403 #endif
404
405 #define TLBIE(va) __asm __volatile("tlbie %0" :: "r"(va))
406 #define TLBSYNC() __asm __volatile("tlbsync")
407 #define SYNC() __asm __volatile("sync")
408 #define EIEIO() __asm __volatile("eieio")
409 #define MFMSR() mfmsr()
410 #define MTMSR(psl) mtmsr(psl)
411 #define MFPVR() mfpvr()
412 #define MFSRIN(va) mfsrin(va)
413 #define MFTB() mfrtcltbl()
414
415 #ifndef PPC_OEA64
416 static __inline register_t
417 mfsrin(vaddr_t va)
418 {
419 register_t sr;
420 __asm __volatile ("mfsrin %0,%1" : "=r"(sr) : "r"(va));
421 return sr;
422 }
423 #endif /* PPC_OEA64 */
424
425 static __inline register_t
426 pmap_interrupts_off(void)
427 {
428 register_t msr = MFMSR();
429 if (msr & PSL_EE)
430 MTMSR(msr & ~PSL_EE);
431 return msr;
432 }
433
434 static void
435 pmap_interrupts_restore(register_t msr)
436 {
437 if (msr & PSL_EE)
438 MTMSR(msr);
439 }
440
441 static __inline u_int32_t
442 mfrtcltbl(void)
443 {
444
445 if ((MFPVR() >> 16) == MPC601)
446 return (mfrtcl() >> 7);
447 else
448 return (mftbl());
449 }
450
451 /*
452 * These small routines may have to be replaced,
453 * if/when we support processors other that the 604.
454 */
455
456 void
457 tlbia(void)
458 {
459 caddr_t i;
460
461 SYNC();
462 /*
463 * Why not use "tlbia"? Because not all processors implement it.
464 *
465 * This needs to be a per-cpu callback to do the appropriate thing
466 * for the CPU. XXX
467 */
468 for (i = 0; i < (caddr_t)0x00040000; i += 0x00001000) {
469 TLBIE(i);
470 EIEIO();
471 SYNC();
472 }
473 TLBSYNC();
474 SYNC();
475 }
476
477 static __inline register_t
478 va_to_vsid(const struct pmap *pm, vaddr_t addr)
479 {
480 #ifdef PPC_OEA64
481 #if 0
482 const struct ste *ste;
483 register_t hash;
484 int i;
485
486 hash = (addr >> ADDR_ESID_SHFT) & ADDR_ESID_HASH;
487
488 /*
489 * Try the primary group first
490 */
491 ste = pm->pm_stes[hash].stes;
492 for (i = 0; i < 8; i++, ste++) {
493 if (ste->ste_hi & STE_V) &&
494 (addr & ~(ADDR_POFF|ADDR_PIDX)) == (ste->ste_hi & STE_ESID))
495 return ste;
496 }
497
498 /*
499 * Then the secondary group.
500 */
501 ste = pm->pm_stes[hash ^ ADDR_ESID_HASH].stes;
502 for (i = 0; i < 8; i++, ste++) {
503 if (ste->ste_hi & STE_V) &&
504 (addr & ~(ADDR_POFF|ADDR_PIDX)) == (ste->ste_hi & STE_ESID))
505 return addr;
506 }
507
508 return NULL;
509 #else
510 /*
511 * Rather than searching the STE groups for the VSID, we know
512 * how we generate that from the ESID and so do that.
513 */
514 return VSID_MAKE(addr >> ADDR_SR_SHFT, pm->pm_vsid) >> SR_VSID_SHFT;
515 #endif
516 #else
517 return (pm->pm_sr[addr >> ADDR_SR_SHFT] & SR_VSID) >> SR_VSID_SHFT;
518 #endif
519 }
520
521 static __inline register_t
522 va_to_pteg(const struct pmap *pm, vaddr_t addr)
523 {
524 register_t hash;
525
526 hash = va_to_vsid(pm, addr) ^ ((addr & ADDR_PIDX) >> ADDR_PIDX_SHFT);
527 return hash & pmap_pteg_mask;
528 }
529
530 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
531 /*
532 * Given a PTE in the page table, calculate the VADDR that hashes to it.
533 * The only bit of magic is that the top 4 bits of the address doesn't
534 * technically exist in the PTE. But we know we reserved 4 bits of the
535 * VSID for it so that's how we get it.
536 */
537 static vaddr_t
538 pmap_pte_to_va(volatile const struct pte *pt)
539 {
540 vaddr_t va;
541 uintptr_t ptaddr = (uintptr_t) pt;
542
543 if (pt->pte_hi & PTE_HID)
544 ptaddr ^= (pmap_pteg_mask * sizeof(struct pteg));
545
546 /* PPC Bits 10-19 PPC64 Bits 42-51 */
547 va = ((pt->pte_hi >> PTE_VSID_SHFT) ^ (ptaddr / sizeof(struct pteg))) & 0x3ff;
548 va <<= ADDR_PIDX_SHFT;
549
550 /* PPC Bits 4-9 PPC64 Bits 36-41 */
551 va |= (pt->pte_hi & PTE_API) << ADDR_API_SHFT;
552
553 #ifdef PPC_OEA64
554 /* PPC63 Bits 0-35 */
555 /* va |= VSID_TO_SR(pt->pte_hi >> PTE_VSID_SHFT) << ADDR_SR_SHFT; */
556 #endif
557 #ifdef PPC_OEA
558 /* PPC Bits 0-3 */
559 va |= VSID_TO_SR(pt->pte_hi >> PTE_VSID_SHFT) << ADDR_SR_SHFT;
560 #endif
561
562 return va;
563 }
564 #endif
565
566 static __inline struct pvo_head *
567 pa_to_pvoh(paddr_t pa, struct vm_page **pg_p)
568 {
569 #ifdef __HAVE_VM_PAGE_MD
570 struct vm_page *pg;
571
572 pg = PHYS_TO_VM_PAGE(pa);
573 if (pg_p != NULL)
574 *pg_p = pg;
575 if (pg == NULL)
576 return &pmap_pvo_unmanaged;
577 return &pg->mdpage.mdpg_pvoh;
578 #endif
579 #ifdef __HAVE_PMAP_PHYSSEG
580 int bank, pg;
581
582 bank = vm_physseg_find(atop(pa), &pg);
583 if (pg_p != NULL)
584 *pg_p = pg;
585 if (bank == -1)
586 return &pmap_pvo_unmanaged;
587 return &vm_physmem[bank].pmseg.pvoh[pg];
588 #endif
589 }
590
591 static __inline struct pvo_head *
592 vm_page_to_pvoh(struct vm_page *pg)
593 {
594 #ifdef __HAVE_VM_PAGE_MD
595 return &pg->mdpage.mdpg_pvoh;
596 #endif
597 #ifdef __HAVE_PMAP_PHYSSEG
598 return pa_to_pvoh(VM_PAGE_TO_PHYS(pg), NULL);
599 #endif
600 }
601
602
603 #ifdef __HAVE_PMAP_PHYSSEG
604 static __inline char *
605 pa_to_attr(paddr_t pa)
606 {
607 int bank, pg;
608
609 bank = vm_physseg_find(atop(pa), &pg);
610 if (bank == -1)
611 return NULL;
612 return &vm_physmem[bank].pmseg.attrs[pg];
613 }
614 #endif
615
616 static __inline void
617 pmap_attr_clear(struct vm_page *pg, int ptebit)
618 {
619 #ifdef __HAVE_PMAP_PHYSSEG
620 *pa_to_attr(VM_PAGE_TO_PHYS(pg)) &= ~(ptebit >> ATTR_SHFT);
621 #endif
622 #ifdef __HAVE_VM_PAGE_MD
623 pg->mdpage.mdpg_attrs &= ~ptebit;
624 #endif
625 }
626
627 static __inline int
628 pmap_attr_fetch(struct vm_page *pg)
629 {
630 #ifdef __HAVE_PMAP_PHYSSEG
631 return *pa_to_attr(VM_PAGE_TO_PHYS(pg)) << ATTR_SHFT;
632 #endif
633 #ifdef __HAVE_VM_PAGE_MD
634 return pg->mdpage.mdpg_attrs;
635 #endif
636 }
637
638 static __inline void
639 pmap_attr_save(struct vm_page *pg, int ptebit)
640 {
641 #ifdef __HAVE_PMAP_PHYSSEG
642 *pa_to_attr(VM_PAGE_TO_PHYS(pg)) |= (ptebit >> ATTR_SHFT);
643 #endif
644 #ifdef __HAVE_VM_PAGE_MD
645 pg->mdpage.mdpg_attrs |= ptebit;
646 #endif
647 }
648
649 static __inline int
650 pmap_pte_compare(const volatile struct pte *pt, const struct pte *pvo_pt)
651 {
652 if (pt->pte_hi == pvo_pt->pte_hi
653 #if 0
654 && ((pt->pte_lo ^ pvo_pt->pte_lo) &
655 ~(PTE_REF|PTE_CHG)) == 0
656 #endif
657 )
658 return 1;
659 return 0;
660 }
661
662 static __inline void
663 pmap_pte_create(struct pte *pt, const struct pmap *pm, vaddr_t va, register_t pte_lo)
664 {
665 /*
666 * Construct the PTE. Default to IMB initially. Valid bit
667 * only gets set when the real pte is set in memory.
668 *
669 * Note: Don't set the valid bit for correct operation of tlb update.
670 */
671 pt->pte_hi = (va_to_vsid(pm, va) << PTE_VSID_SHFT)
672 | (((va & ADDR_PIDX) >> (ADDR_API_SHFT - PTE_API_SHFT)) & PTE_API);
673 pt->pte_lo = pte_lo;
674 }
675
676 static __inline void
677 pmap_pte_synch(volatile struct pte *pt, struct pte *pvo_pt)
678 {
679 pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF|PTE_CHG);
680 }
681
682 static __inline void
683 pmap_pte_clear(volatile struct pte *pt, vaddr_t va, int ptebit)
684 {
685 /*
686 * As shown in Section 7.6.3.2.3
687 */
688 pt->pte_lo &= ~ptebit;
689 TLBIE(va);
690 SYNC();
691 EIEIO();
692 TLBSYNC();
693 SYNC();
694 }
695
696 static __inline void
697 pmap_pte_set(volatile struct pte *pt, struct pte *pvo_pt)
698 {
699 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
700 if (pvo_pt->pte_hi & PTE_VALID)
701 panic("pte_set: setting an already valid pte %p", pvo_pt);
702 #endif
703 pvo_pt->pte_hi |= PTE_VALID;
704 /*
705 * Update the PTE as defined in section 7.6.3.1
706 * Note that the REF/CHG bits are from pvo_pt and thus should
707 * have been saved so this routine can restore them (if desired).
708 */
709 pt->pte_lo = pvo_pt->pte_lo;
710 EIEIO();
711 pt->pte_hi = pvo_pt->pte_hi;
712 SYNC();
713 pmap_pte_valid++;
714 }
715
716 static __inline void
717 pmap_pte_unset(volatile struct pte *pt, struct pte *pvo_pt, vaddr_t va)
718 {
719 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
720 if ((pvo_pt->pte_hi & PTE_VALID) == 0)
721 panic("pte_unset: attempt to unset an inactive pte#1 %p/%p", pvo_pt, pt);
722 if ((pt->pte_hi & PTE_VALID) == 0)
723 panic("pte_unset: attempt to unset an inactive pte#2 %p/%p", pvo_pt, pt);
724 #endif
725
726 pvo_pt->pte_hi &= ~PTE_VALID;
727 /*
728 * Force the ref & chg bits back into the PTEs.
729 */
730 SYNC();
731 /*
732 * Invalidate the pte ... (Section 7.6.3.3)
733 */
734 pt->pte_hi &= ~PTE_VALID;
735 SYNC();
736 TLBIE(va);
737 SYNC();
738 EIEIO();
739 TLBSYNC();
740 SYNC();
741 /*
742 * Save the ref & chg bits ...
743 */
744 pmap_pte_synch(pt, pvo_pt);
745 pmap_pte_valid--;
746 }
747
748 static __inline void
749 pmap_pte_change(volatile struct pte *pt, struct pte *pvo_pt, vaddr_t va)
750 {
751 /*
752 * Invalidate the PTE
753 */
754 pmap_pte_unset(pt, pvo_pt, va);
755 pmap_pte_set(pt, pvo_pt);
756 }
757
758 /*
759 * Try to insert the PTE @ *pvo_pt into the pmap_pteg_table at ptegidx
760 * (either primary or secondary location).
761 *
762 * Note: both the destination and source PTEs must not have PTE_VALID set.
763 */
764
765 STATIC int
766 pmap_pte_insert(int ptegidx, struct pte *pvo_pt)
767 {
768 volatile struct pte *pt;
769 int i;
770
771 #if defined(DEBUG)
772 DPRINTFN(PTE, ("pmap_pte_insert: idx 0x%x, pte 0x%x 0x%x\n",
773 ptegidx, (unsigned int) pvo_pt->pte_hi, (unsigned int) pvo_pt->pte_lo));
774 #endif
775 /*
776 * First try primary hash.
777 */
778 for (pt = pmap_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
779 if ((pt->pte_hi & PTE_VALID) == 0) {
780 pvo_pt->pte_hi &= ~PTE_HID;
781 pmap_pte_set(pt, pvo_pt);
782 return i;
783 }
784 }
785
786 /*
787 * Now try secondary hash.
788 */
789 ptegidx ^= pmap_pteg_mask;
790 for (pt = pmap_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
791 if ((pt->pte_hi & PTE_VALID) == 0) {
792 pvo_pt->pte_hi |= PTE_HID;
793 pmap_pte_set(pt, pvo_pt);
794 return i;
795 }
796 }
797 return -1;
798 }
799
800 /*
801 * Spill handler.
802 *
803 * Tries to spill a page table entry from the overflow area.
804 * This runs in either real mode (if dealing with a exception spill)
805 * or virtual mode when dealing with manually spilling one of the
806 * kernel's pte entries. In either case, interrupts are already
807 * disabled.
808 */
809
810 int
811 pmap_pte_spill(struct pmap *pm, vaddr_t addr, boolean_t exec)
812 {
813 struct pvo_entry *source_pvo, *victim_pvo, *next_pvo;
814 struct pvo_entry *pvo;
815 /* XXX: gcc -- vpvoh is always set at either *1* or *2* */
816 struct pvo_tqhead *pvoh, *vpvoh = NULL;
817 int ptegidx, i, j;
818 volatile struct pteg *pteg;
819 volatile struct pte *pt;
820
821 ptegidx = va_to_pteg(pm, addr);
822
823 /*
824 * Have to substitute some entry. Use the primary hash for this.
825 * Use low bits of timebase as random generator. Make sure we are
826 * not picking a kernel pte for replacement.
827 */
828 pteg = &pmap_pteg_table[ptegidx];
829 i = MFTB() & 7;
830 for (j = 0; j < 8; j++) {
831 pt = &pteg->pt[i];
832 if ((pt->pte_hi & PTE_VALID) == 0 ||
833 VSID_TO_HASH((pt->pte_hi & PTE_VSID) >> PTE_VSID_SHFT)
834 != KERNEL_VSIDBITS)
835 break;
836 i = (i + 1) & 7;
837 }
838 KASSERT(j < 8);
839
840 source_pvo = NULL;
841 victim_pvo = NULL;
842 pvoh = &pmap_pvo_table[ptegidx];
843 TAILQ_FOREACH(pvo, pvoh, pvo_olink) {
844
845 /*
846 * We need to find pvo entry for this address...
847 */
848 PMAP_PVO_CHECK(pvo); /* sanity check */
849
850 /*
851 * If we haven't found the source and we come to a PVO with
852 * a valid PTE, then we know we can't find it because all
853 * evicted PVOs always are first in the list.
854 */
855 if (source_pvo == NULL && (pvo->pvo_pte.pte_hi & PTE_VALID))
856 break;
857 if (source_pvo == NULL && pm == pvo->pvo_pmap &&
858 addr == PVO_VADDR(pvo)) {
859
860 /*
861 * Now we have found the entry to be spilled into the
862 * pteg. Attempt to insert it into the page table.
863 */
864 j = pmap_pte_insert(ptegidx, &pvo->pvo_pte);
865 if (j >= 0) {
866 PVO_PTEGIDX_SET(pvo, j);
867 PMAP_PVO_CHECK(pvo); /* sanity check */
868 PVO_WHERE(pvo, SPILL_INSERT);
869 pvo->pvo_pmap->pm_evictions--;
870 PMAPCOUNT(ptes_spilled);
871 PMAPCOUNT2(((pvo->pvo_pte.pte_hi & PTE_HID)
872 ? pmap_evcnt_ptes_secondary
873 : pmap_evcnt_ptes_primary)[j]);
874
875 /*
876 * Since we keep the evicted entries at the
877 * from of the PVO list, we need move this
878 * (now resident) PVO after the evicted
879 * entries.
880 */
881 next_pvo = TAILQ_NEXT(pvo, pvo_olink);
882
883 /*
884 * If we don't have to move (either we were the
885 * last entry or the next entry was valid),
886 * don't change our position. Otherwise
887 * move ourselves to the tail of the queue.
888 */
889 if (next_pvo != NULL &&
890 !(next_pvo->pvo_pte.pte_hi & PTE_VALID)) {
891 TAILQ_REMOVE(pvoh, pvo, pvo_olink);
892 TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
893 }
894 return 1;
895 }
896 source_pvo = pvo;
897 if (exec && !PVO_ISEXECUTABLE(source_pvo)) {
898 return 0;
899 }
900 if (victim_pvo != NULL)
901 break;
902 }
903
904 /*
905 * We also need the pvo entry of the victim we are replacing
906 * so save the R & C bits of the PTE.
907 */
908 if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL &&
909 pmap_pte_compare(pt, &pvo->pvo_pte)) {
910 vpvoh = pvoh; /* *1* */
911 victim_pvo = pvo;
912 if (source_pvo != NULL)
913 break;
914 }
915 }
916
917 if (source_pvo == NULL) {
918 PMAPCOUNT(ptes_unspilled);
919 return 0;
920 }
921
922 if (victim_pvo == NULL) {
923 if ((pt->pte_hi & PTE_HID) == 0)
924 panic("pmap_pte_spill: victim p-pte (%p) has "
925 "no pvo entry!", pt);
926
927 /*
928 * If this is a secondary PTE, we need to search
929 * its primary pvo bucket for the matching PVO.
930 */
931 vpvoh = &pmap_pvo_table[ptegidx ^ pmap_pteg_mask]; /* *2* */
932 TAILQ_FOREACH(pvo, vpvoh, pvo_olink) {
933 PMAP_PVO_CHECK(pvo); /* sanity check */
934
935 /*
936 * We also need the pvo entry of the victim we are
937 * replacing so save the R & C bits of the PTE.
938 */
939 if (pmap_pte_compare(pt, &pvo->pvo_pte)) {
940 victim_pvo = pvo;
941 break;
942 }
943 }
944 if (victim_pvo == NULL)
945 panic("pmap_pte_spill: victim s-pte (%p) has "
946 "no pvo entry!", pt);
947 }
948
949 /*
950 * The victim should be not be a kernel PVO/PTE entry.
951 */
952 KASSERT(victim_pvo->pvo_pmap != pmap_kernel());
953 KASSERT(PVO_PTEGIDX_ISSET(victim_pvo));
954 KASSERT(PVO_PTEGIDX_GET(victim_pvo) == i);
955
956 /*
957 * We are invalidating the TLB entry for the EA for the
958 * we are replacing even though its valid; If we don't
959 * we lose any ref/chg bit changes contained in the TLB
960 * entry.
961 */
962 source_pvo->pvo_pte.pte_hi &= ~PTE_HID;
963
964 /*
965 * To enforce the PVO list ordering constraint that all
966 * evicted entries should come before all valid entries,
967 * move the source PVO to the tail of its list and the
968 * victim PVO to the head of its list (which might not be
969 * the same list, if the victim was using the secondary hash).
970 */
971 TAILQ_REMOVE(pvoh, source_pvo, pvo_olink);
972 TAILQ_INSERT_TAIL(pvoh, source_pvo, pvo_olink);
973 TAILQ_REMOVE(vpvoh, victim_pvo, pvo_olink);
974 TAILQ_INSERT_HEAD(vpvoh, victim_pvo, pvo_olink);
975 pmap_pte_unset(pt, &victim_pvo->pvo_pte, victim_pvo->pvo_vaddr);
976 pmap_pte_set(pt, &source_pvo->pvo_pte);
977 victim_pvo->pvo_pmap->pm_evictions++;
978 source_pvo->pvo_pmap->pm_evictions--;
979 PVO_WHERE(victim_pvo, SPILL_UNSET);
980 PVO_WHERE(source_pvo, SPILL_SET);
981
982 PVO_PTEGIDX_CLR(victim_pvo);
983 PVO_PTEGIDX_SET(source_pvo, i);
984 PMAPCOUNT2(pmap_evcnt_ptes_primary[i]);
985 PMAPCOUNT(ptes_spilled);
986 PMAPCOUNT(ptes_evicted);
987 PMAPCOUNT(ptes_removed);
988
989 PMAP_PVO_CHECK(victim_pvo);
990 PMAP_PVO_CHECK(source_pvo);
991 return 1;
992 }
993
994 /*
995 * Restrict given range to physical memory
996 */
997 void
998 pmap_real_memory(paddr_t *start, psize_t *size)
999 {
1000 struct mem_region *mp;
1001
1002 for (mp = mem; mp->size; mp++) {
1003 if (*start + *size > mp->start
1004 && *start < mp->start + mp->size) {
1005 if (*start < mp->start) {
1006 *size -= mp->start - *start;
1007 *start = mp->start;
1008 }
1009 if (*start + *size > mp->start + mp->size)
1010 *size = mp->start + mp->size - *start;
1011 return;
1012 }
1013 }
1014 *size = 0;
1015 }
1016
1017 /*
1018 * Initialize anything else for pmap handling.
1019 * Called during vm_init().
1020 */
1021 void
1022 pmap_init(void)
1023 {
1024 #ifdef __HAVE_PMAP_PHYSSEG
1025 struct pvo_tqhead *pvoh;
1026 int bank;
1027 long sz;
1028 char *attr;
1029
1030 pvoh = pmap_physseg.pvoh;
1031 attr = pmap_physseg.attrs;
1032 for (bank = 0; bank < vm_nphysseg; bank++) {
1033 sz = vm_physmem[bank].end - vm_physmem[bank].start;
1034 vm_physmem[bank].pmseg.pvoh = pvoh;
1035 vm_physmem[bank].pmseg.attrs = attr;
1036 for (; sz > 0; sz--, pvoh++, attr++) {
1037 TAILQ_INIT(pvoh);
1038 *attr = 0;
1039 }
1040 }
1041 #endif
1042
1043 pool_init(&pmap_mpvo_pool, sizeof(struct pvo_entry),
1044 sizeof(struct pvo_entry), 0, 0, "pmap_mpvopl",
1045 &pmap_pool_mallocator);
1046
1047 pool_setlowat(&pmap_mpvo_pool, 1008);
1048
1049 pmap_initialized = 1;
1050
1051 #ifdef PMAPCOUNTERS
1052 evcnt_attach_static(&pmap_evcnt_mappings);
1053 evcnt_attach_static(&pmap_evcnt_mappings_replaced);
1054 evcnt_attach_static(&pmap_evcnt_unmappings);
1055
1056 evcnt_attach_static(&pmap_evcnt_kernel_mappings);
1057 evcnt_attach_static(&pmap_evcnt_kernel_unmappings);
1058
1059 evcnt_attach_static(&pmap_evcnt_exec_mappings);
1060 evcnt_attach_static(&pmap_evcnt_exec_cached);
1061 evcnt_attach_static(&pmap_evcnt_exec_synced);
1062 evcnt_attach_static(&pmap_evcnt_exec_synced_clear_modify);
1063
1064 evcnt_attach_static(&pmap_evcnt_exec_uncached_page_protect);
1065 evcnt_attach_static(&pmap_evcnt_exec_uncached_clear_modify);
1066 evcnt_attach_static(&pmap_evcnt_exec_uncached_zero_page);
1067 evcnt_attach_static(&pmap_evcnt_exec_uncached_copy_page);
1068
1069 evcnt_attach_static(&pmap_evcnt_zeroed_pages);
1070 evcnt_attach_static(&pmap_evcnt_copied_pages);
1071 evcnt_attach_static(&pmap_evcnt_idlezeroed_pages);
1072
1073 evcnt_attach_static(&pmap_evcnt_updates);
1074 evcnt_attach_static(&pmap_evcnt_collects);
1075 evcnt_attach_static(&pmap_evcnt_copies);
1076
1077 evcnt_attach_static(&pmap_evcnt_ptes_spilled);
1078 evcnt_attach_static(&pmap_evcnt_ptes_unspilled);
1079 evcnt_attach_static(&pmap_evcnt_ptes_evicted);
1080 evcnt_attach_static(&pmap_evcnt_ptes_removed);
1081 evcnt_attach_static(&pmap_evcnt_ptes_changed);
1082 evcnt_attach_static(&pmap_evcnt_ptes_primary[0]);
1083 evcnt_attach_static(&pmap_evcnt_ptes_primary[1]);
1084 evcnt_attach_static(&pmap_evcnt_ptes_primary[2]);
1085 evcnt_attach_static(&pmap_evcnt_ptes_primary[3]);
1086 evcnt_attach_static(&pmap_evcnt_ptes_primary[4]);
1087 evcnt_attach_static(&pmap_evcnt_ptes_primary[5]);
1088 evcnt_attach_static(&pmap_evcnt_ptes_primary[6]);
1089 evcnt_attach_static(&pmap_evcnt_ptes_primary[7]);
1090 evcnt_attach_static(&pmap_evcnt_ptes_secondary[0]);
1091 evcnt_attach_static(&pmap_evcnt_ptes_secondary[1]);
1092 evcnt_attach_static(&pmap_evcnt_ptes_secondary[2]);
1093 evcnt_attach_static(&pmap_evcnt_ptes_secondary[3]);
1094 evcnt_attach_static(&pmap_evcnt_ptes_secondary[4]);
1095 evcnt_attach_static(&pmap_evcnt_ptes_secondary[5]);
1096 evcnt_attach_static(&pmap_evcnt_ptes_secondary[6]);
1097 evcnt_attach_static(&pmap_evcnt_ptes_secondary[7]);
1098 #endif
1099 }
1100
1101 /*
1102 * How much virtual space does the kernel get?
1103 */
1104 void
1105 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1106 {
1107 /*
1108 * For now, reserve one segment (minus some overhead) for kernel
1109 * virtual memory
1110 */
1111 *start = VM_MIN_KERNEL_ADDRESS;
1112 *end = VM_MAX_KERNEL_ADDRESS;
1113 }
1114
1115 /*
1116 * Allocate, initialize, and return a new physical map.
1117 */
1118 pmap_t
1119 pmap_create(void)
1120 {
1121 pmap_t pm;
1122
1123 pm = pool_get(&pmap_pool, PR_WAITOK);
1124 memset((caddr_t)pm, 0, sizeof *pm);
1125 pmap_pinit(pm);
1126
1127 DPRINTFN(CREATE,("pmap_create: pm %p:\n"
1128 "\t%06x %06x %06x %06x %06x %06x %06x %06x\n"
1129 "\t%06x %06x %06x %06x %06x %06x %06x %06x\n", pm,
1130 (unsigned int) pm->pm_sr[0], (unsigned int) pm->pm_sr[1],
1131 (unsigned int) pm->pm_sr[2], (unsigned int) pm->pm_sr[3],
1132 (unsigned int) pm->pm_sr[4], (unsigned int) pm->pm_sr[5],
1133 (unsigned int) pm->pm_sr[6], (unsigned int) pm->pm_sr[7],
1134 (unsigned int) pm->pm_sr[8], (unsigned int) pm->pm_sr[9],
1135 (unsigned int) pm->pm_sr[10], (unsigned int) pm->pm_sr[11],
1136 (unsigned int) pm->pm_sr[12], (unsigned int) pm->pm_sr[13],
1137 (unsigned int) pm->pm_sr[14], (unsigned int) pm->pm_sr[15]));
1138 return pm;
1139 }
1140
1141 /*
1142 * Initialize a preallocated and zeroed pmap structure.
1143 */
1144 void
1145 pmap_pinit(pmap_t pm)
1146 {
1147 register_t entropy = MFTB();
1148 register_t mask;
1149 int i;
1150
1151 /*
1152 * Allocate some segment registers for this pmap.
1153 */
1154 pm->pm_refs = 1;
1155 for (i = 0; i < NPMAPS; i += VSID_NBPW) {
1156 static register_t pmap_vsidcontext;
1157 register_t hash;
1158 unsigned int n;
1159
1160 /* Create a new value by multiplying by a prime adding in
1161 * entropy from the timebase register. This is to make the
1162 * VSID more random so that the PT Hash function collides
1163 * less often. (note that the prime causes gcc to do shifts
1164 * instead of a multiply)
1165 */
1166 pmap_vsidcontext = (pmap_vsidcontext * 0x1105) + entropy;
1167 hash = pmap_vsidcontext & (NPMAPS - 1);
1168 if (hash == 0) /* 0 is special, avoid it */
1169 continue;
1170 n = hash >> 5;
1171 mask = 1L << (hash & (VSID_NBPW-1));
1172 hash = pmap_vsidcontext;
1173 if (pmap_vsid_bitmap[n] & mask) { /* collision? */
1174 /* anything free in this bucket? */
1175 if (~pmap_vsid_bitmap[n] == 0) {
1176 entropy = hash >> PTE_VSID_SHFT;
1177 continue;
1178 }
1179 i = ffs(~pmap_vsid_bitmap[n]) - 1;
1180 mask = 1L << i;
1181 hash &= ~(VSID_NBPW-1);
1182 hash |= i;
1183 }
1184 hash &= PTE_VSID >> PTE_VSID_SHFT;
1185 pmap_vsid_bitmap[n] |= mask;
1186 pm->pm_vsid = hash;
1187 #ifndef PPC_OEA64
1188 for (i = 0; i < 16; i++)
1189 pm->pm_sr[i] = VSID_MAKE(i, hash) | SR_PRKEY |
1190 SR_NOEXEC;
1191 #endif
1192 return;
1193 }
1194 panic("pmap_pinit: out of segments");
1195 }
1196
1197 /*
1198 * Add a reference to the given pmap.
1199 */
1200 void
1201 pmap_reference(pmap_t pm)
1202 {
1203 pm->pm_refs++;
1204 }
1205
1206 /*
1207 * Retire the given pmap from service.
1208 * Should only be called if the map contains no valid mappings.
1209 */
1210 void
1211 pmap_destroy(pmap_t pm)
1212 {
1213 if (--pm->pm_refs == 0) {
1214 pmap_release(pm);
1215 pool_put(&pmap_pool, pm);
1216 }
1217 }
1218
1219 /*
1220 * Release any resources held by the given physical map.
1221 * Called when a pmap initialized by pmap_pinit is being released.
1222 */
1223 void
1224 pmap_release(pmap_t pm)
1225 {
1226 int idx, mask;
1227
1228 if (pm->pm_sr[0] == 0)
1229 panic("pmap_release");
1230 idx = VSID_TO_HASH(pm->pm_vsid) & (NPMAPS-1);
1231 mask = 1 << (idx % VSID_NBPW);
1232 idx /= VSID_NBPW;
1233 pmap_vsid_bitmap[idx] &= ~mask;
1234 }
1235
1236 /*
1237 * Copy the range specified by src_addr/len
1238 * from the source map to the range dst_addr/len
1239 * in the destination map.
1240 *
1241 * This routine is only advisory and need not do anything.
1242 */
1243 void
1244 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vaddr_t dst_addr,
1245 vsize_t len, vaddr_t src_addr)
1246 {
1247 PMAPCOUNT(copies);
1248 }
1249
1250 /*
1251 * Require that all active physical maps contain no
1252 * incorrect entries NOW.
1253 */
1254 void
1255 pmap_update(struct pmap *pmap)
1256 {
1257 PMAPCOUNT(updates);
1258 TLBSYNC();
1259 }
1260
1261 /*
1262 * Garbage collects the physical map system for
1263 * pages which are no longer used.
1264 * Success need not be guaranteed -- that is, there
1265 * may well be pages which are not referenced, but
1266 * others may be collected.
1267 * Called by the pageout daemon when pages are scarce.
1268 */
1269 void
1270 pmap_collect(pmap_t pm)
1271 {
1272 PMAPCOUNT(collects);
1273 }
1274
1275 static __inline int
1276 pmap_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx)
1277 {
1278 int pteidx;
1279 /*
1280 * We can find the actual pte entry without searching by
1281 * grabbing the PTEG index from 3 unused bits in pte_lo[11:9]
1282 * and by noticing the HID bit.
1283 */
1284 pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo);
1285 if (pvo->pvo_pte.pte_hi & PTE_HID)
1286 pteidx ^= pmap_pteg_mask * 8;
1287 return pteidx;
1288 }
1289
1290 volatile struct pte *
1291 pmap_pvo_to_pte(const struct pvo_entry *pvo, int pteidx)
1292 {
1293 volatile struct pte *pt;
1294
1295 #if !defined(DIAGNOSTIC) && !defined(DEBUG) && !defined(PMAPCHECK)
1296 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0)
1297 return NULL;
1298 #endif
1299
1300 /*
1301 * If we haven't been supplied the ptegidx, calculate it.
1302 */
1303 if (pteidx == -1) {
1304 int ptegidx;
1305 ptegidx = va_to_pteg(pvo->pvo_pmap, pvo->pvo_vaddr);
1306 pteidx = pmap_pvo_pte_index(pvo, ptegidx);
1307 }
1308
1309 pt = &pmap_pteg_table[pteidx >> 3].pt[pteidx & 7];
1310
1311 #if !defined(DIAGNOSTIC) && !defined(DEBUG) && !defined(PMAPCHECK)
1312 return pt;
1313 #else
1314 if ((pvo->pvo_pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) {
1315 panic("pmap_pvo_to_pte: pvo %p: has valid pte in "
1316 "pvo but no valid pte index", pvo);
1317 }
1318 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) {
1319 panic("pmap_pvo_to_pte: pvo %p: has valid pte index in "
1320 "pvo but no valid pte", pvo);
1321 }
1322
1323 if ((pt->pte_hi ^ (pvo->pvo_pte.pte_hi & ~PTE_VALID)) == PTE_VALID) {
1324 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0) {
1325 #if defined(DEBUG) || defined(PMAPCHECK)
1326 pmap_pte_print(pt);
1327 #endif
1328 panic("pmap_pvo_to_pte: pvo %p: has valid pte in "
1329 "pmap_pteg_table %p but invalid in pvo",
1330 pvo, pt);
1331 }
1332 if (((pt->pte_lo ^ pvo->pvo_pte.pte_lo) & ~(PTE_CHG|PTE_REF)) != 0) {
1333 #if defined(DEBUG) || defined(PMAPCHECK)
1334 pmap_pte_print(pt);
1335 #endif
1336 panic("pmap_pvo_to_pte: pvo %p: pvo pte does "
1337 "not match pte %p in pmap_pteg_table",
1338 pvo, pt);
1339 }
1340 return pt;
1341 }
1342
1343 if (pvo->pvo_pte.pte_hi & PTE_VALID) {
1344 #if defined(DEBUG) || defined(PMAPCHECK)
1345 pmap_pte_print(pt);
1346 #endif
1347 panic("pmap_pvo_to_pte: pvo %p: has nomatching pte %p in "
1348 "pmap_pteg_table but valid in pvo", pvo, pt);
1349 }
1350 return NULL;
1351 #endif /* !(!DIAGNOSTIC && !DEBUG && !PMAPCHECK) */
1352 }
1353
1354 struct pvo_entry *
1355 pmap_pvo_find_va(pmap_t pm, vaddr_t va, int *pteidx_p)
1356 {
1357 struct pvo_entry *pvo;
1358 int ptegidx;
1359
1360 va &= ~ADDR_POFF;
1361 ptegidx = va_to_pteg(pm, va);
1362
1363 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
1364 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1365 if ((uintptr_t) pvo >= SEGMENT_LENGTH)
1366 panic("pmap_pvo_find_va: invalid pvo %p on "
1367 "list %#x (%p)", pvo, ptegidx,
1368 &pmap_pvo_table[ptegidx]);
1369 #endif
1370 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1371 if (pteidx_p)
1372 *pteidx_p = pmap_pvo_pte_index(pvo, ptegidx);
1373 return pvo;
1374 }
1375 }
1376 return NULL;
1377 }
1378
1379 #if defined(DEBUG) || defined(PMAPCHECK)
1380 void
1381 pmap_pvo_check(const struct pvo_entry *pvo)
1382 {
1383 struct pvo_head *pvo_head;
1384 struct pvo_entry *pvo0;
1385 volatile struct pte *pt;
1386 int failed = 0;
1387
1388 if ((uintptr_t)(pvo+1) >= SEGMENT_LENGTH)
1389 panic("pmap_pvo_check: pvo %p: invalid address", pvo);
1390
1391 if ((uintptr_t)(pvo->pvo_pmap+1) >= SEGMENT_LENGTH) {
1392 printf("pmap_pvo_check: pvo %p: invalid pmap address %p\n",
1393 pvo, pvo->pvo_pmap);
1394 failed = 1;
1395 }
1396
1397 if ((uintptr_t)TAILQ_NEXT(pvo, pvo_olink) >= SEGMENT_LENGTH ||
1398 (((uintptr_t)TAILQ_NEXT(pvo, pvo_olink)) & 0x1f) != 0) {
1399 printf("pmap_pvo_check: pvo %p: invalid ovlink address %p\n",
1400 pvo, TAILQ_NEXT(pvo, pvo_olink));
1401 failed = 1;
1402 }
1403
1404 if ((uintptr_t)LIST_NEXT(pvo, pvo_vlink) >= SEGMENT_LENGTH ||
1405 (((uintptr_t)LIST_NEXT(pvo, pvo_vlink)) & 0x1f) != 0) {
1406 printf("pmap_pvo_check: pvo %p: invalid ovlink address %p\n",
1407 pvo, LIST_NEXT(pvo, pvo_vlink));
1408 failed = 1;
1409 }
1410
1411 if (pvo->pvo_vaddr & PVO_MANAGED) {
1412 pvo_head = pa_to_pvoh(pvo->pvo_pte.pte_lo & PTE_RPGN, NULL);
1413 } else {
1414 if (pvo->pvo_vaddr < VM_MIN_KERNEL_ADDRESS) {
1415 printf("pmap_pvo_check: pvo %p: non kernel address "
1416 "on kernel unmanaged list\n", pvo);
1417 failed = 1;
1418 }
1419 pvo_head = &pmap_pvo_kunmanaged;
1420 }
1421 LIST_FOREACH(pvo0, pvo_head, pvo_vlink) {
1422 if (pvo0 == pvo)
1423 break;
1424 }
1425 if (pvo0 == NULL) {
1426 printf("pmap_pvo_check: pvo %p: not present "
1427 "on its vlist head %p\n", pvo, pvo_head);
1428 failed = 1;
1429 }
1430 if (pvo != pmap_pvo_find_va(pvo->pvo_pmap, pvo->pvo_vaddr, NULL)) {
1431 printf("pmap_pvo_check: pvo %p: not present "
1432 "on its olist head\n", pvo);
1433 failed = 1;
1434 }
1435 pt = pmap_pvo_to_pte(pvo, -1);
1436 if (pt == NULL) {
1437 if (pvo->pvo_pte.pte_hi & PTE_VALID) {
1438 printf("pmap_pvo_check: pvo %p: pte_hi VALID but "
1439 "no PTE\n", pvo);
1440 failed = 1;
1441 }
1442 } else {
1443 if ((uintptr_t) pt < (uintptr_t) &pmap_pteg_table[0] ||
1444 (uintptr_t) pt >=
1445 (uintptr_t) &pmap_pteg_table[pmap_pteg_cnt]) {
1446 printf("pmap_pvo_check: pvo %p: pte %p not in "
1447 "pteg table\n", pvo, pt);
1448 failed = 1;
1449 }
1450 if (((((uintptr_t) pt) >> 3) & 7) != PVO_PTEGIDX_GET(pvo)) {
1451 printf("pmap_pvo_check: pvo %p: pte_hi VALID but "
1452 "no PTE\n", pvo);
1453 failed = 1;
1454 }
1455 if (pvo->pvo_pte.pte_hi != pt->pte_hi) {
1456 printf("pmap_pvo_check: pvo %p: pte_hi differ: "
1457 "%#x/%#x\n", pvo, (unsigned int) pvo->pvo_pte.pte_hi, (unsigned int) pt->pte_hi);
1458 failed = 1;
1459 }
1460 if (((pvo->pvo_pte.pte_lo ^ pt->pte_lo) &
1461 (PTE_PP|PTE_WIMG|PTE_RPGN)) != 0) {
1462 printf("pmap_pvo_check: pvo %p: pte_lo differ: "
1463 "%#x/%#x\n", pvo,
1464 (unsigned int) (pvo->pvo_pte.pte_lo & (PTE_PP|PTE_WIMG|PTE_RPGN)),
1465 (unsigned int) (pt->pte_lo & (PTE_PP|PTE_WIMG|PTE_RPGN)));
1466 failed = 1;
1467 }
1468 if ((pmap_pte_to_va(pt) ^ PVO_VADDR(pvo)) & 0x0fffffff) {
1469 printf("pmap_pvo_check: pvo %p: PTE %p derived VA %#lx"
1470 " doesn't not match PVO's VA %#lx\n",
1471 pvo, pt, pmap_pte_to_va(pt), PVO_VADDR(pvo));
1472 failed = 1;
1473 }
1474 if (failed)
1475 pmap_pte_print(pt);
1476 }
1477 if (failed)
1478 panic("pmap_pvo_check: pvo %p, pm %p: bugcheck!", pvo,
1479 pvo->pvo_pmap);
1480 }
1481 #endif /* DEBUG || PMAPCHECK */
1482
1483 /*
1484 * This returns whether this is the first mapping of a page.
1485 */
1486 int
1487 pmap_pvo_enter(pmap_t pm, struct pool *pl, struct pvo_head *pvo_head,
1488 vaddr_t va, paddr_t pa, register_t pte_lo, int flags)
1489 {
1490 struct pvo_entry *pvo;
1491 struct pvo_tqhead *pvoh;
1492 register_t msr;
1493 int ptegidx;
1494 int i;
1495 int poolflags = PR_NOWAIT;
1496
1497 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1498 if (pmap_pvo_remove_depth > 0)
1499 panic("pmap_pvo_enter: called while pmap_pvo_remove active!");
1500 if (++pmap_pvo_enter_depth > 1)
1501 panic("pmap_pvo_enter: called recursively!");
1502 #endif
1503
1504 /*
1505 * Compute the PTE Group index.
1506 */
1507 va &= ~ADDR_POFF;
1508 ptegidx = va_to_pteg(pm, va);
1509
1510 msr = pmap_interrupts_off();
1511 /*
1512 * Remove any existing mapping for this page. Reuse the
1513 * pvo entry if there a mapping.
1514 */
1515 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
1516 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1517 #ifdef DEBUG
1518 if ((pmapdebug & PMAPDEBUG_PVOENTER) &&
1519 ((pvo->pvo_pte.pte_lo ^ (pa|pte_lo)) &
1520 ~(PTE_REF|PTE_CHG)) == 0 &&
1521 va < VM_MIN_KERNEL_ADDRESS) {
1522 printf("pmap_pvo_enter: pvo %p: dup %#x/%#lx\n",
1523 pvo, (unsigned int) pvo->pvo_pte.pte_lo, (unsigned int) pte_lo|pa);
1524 printf("pmap_pvo_enter: pte_hi=%#x sr=%#x\n",
1525 (unsigned int) pvo->pvo_pte.pte_hi,
1526 (unsigned int) pm->pm_sr[va >> ADDR_SR_SHFT]);
1527 pmap_pte_print(pmap_pvo_to_pte(pvo, -1));
1528 #ifdef DDBX
1529 Debugger();
1530 #endif
1531 }
1532 #endif
1533 PMAPCOUNT(mappings_replaced);
1534 pmap_pvo_remove(pvo, -1);
1535 break;
1536 }
1537 }
1538
1539 /*
1540 * If we aren't overwriting an mapping, try to allocate
1541 */
1542 pmap_interrupts_restore(msr);
1543 pvo = pool_get(pl, poolflags);
1544 msr = pmap_interrupts_off();
1545 if (pvo == NULL) {
1546 pvo = pmap_pvo_reclaim(pm);
1547 if (pvo == NULL) {
1548 if ((flags & PMAP_CANFAIL) == 0)
1549 panic("pmap_pvo_enter: failed");
1550 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1551 pmap_pvo_enter_depth--;
1552 #endif
1553 pmap_interrupts_restore(msr);
1554 return ENOMEM;
1555 }
1556 }
1557 pvo->pvo_vaddr = va;
1558 pvo->pvo_pmap = pm;
1559 pvo->pvo_vaddr &= ~ADDR_POFF;
1560 if (flags & VM_PROT_EXECUTE) {
1561 PMAPCOUNT(exec_mappings);
1562 pvo_set_exec(pvo);
1563 }
1564 if (flags & PMAP_WIRED)
1565 pvo->pvo_vaddr |= PVO_WIRED;
1566 if (pvo_head != &pmap_pvo_kunmanaged) {
1567 pvo->pvo_vaddr |= PVO_MANAGED;
1568 PMAPCOUNT(mappings);
1569 } else {
1570 PMAPCOUNT(kernel_mappings);
1571 }
1572 pmap_pte_create(&pvo->pvo_pte, pm, va, pa | pte_lo);
1573
1574 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
1575 if (pvo->pvo_pte.pte_lo & PVO_WIRED)
1576 pvo->pvo_pmap->pm_stats.wired_count++;
1577 pvo->pvo_pmap->pm_stats.resident_count++;
1578 #if defined(DEBUG)
1579 if (pm != pmap_kernel() && va < VM_MIN_KERNEL_ADDRESS)
1580 DPRINTFN(PVOENTER,
1581 ("pmap_pvo_enter: pvo %p: pm %p va %#lx pa %#lx\n",
1582 pvo, pm, va, pa));
1583 #endif
1584
1585 /*
1586 * We hope this succeeds but it isn't required.
1587 */
1588 pvoh = &pmap_pvo_table[ptegidx];
1589 i = pmap_pte_insert(ptegidx, &pvo->pvo_pte);
1590 if (i >= 0) {
1591 PVO_PTEGIDX_SET(pvo, i);
1592 PVO_WHERE(pvo, ENTER_INSERT);
1593 PMAPCOUNT2(((pvo->pvo_pte.pte_hi & PTE_HID)
1594 ? pmap_evcnt_ptes_secondary : pmap_evcnt_ptes_primary)[i]);
1595 TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
1596 } else {
1597 /*
1598 * Since we didn't have room for this entry (which makes it
1599 * and evicted entry), place it at the head of the list.
1600 */
1601 TAILQ_INSERT_HEAD(pvoh, pvo, pvo_olink);
1602 PMAPCOUNT(ptes_evicted);
1603 pm->pm_evictions++;
1604 /*
1605 * If this is a kernel page, make sure it's active.
1606 */
1607 if (pm == pmap_kernel()) {
1608 i = pmap_pte_spill(pm, va, FALSE);
1609 KASSERT(i);
1610 }
1611 }
1612 PMAP_PVO_CHECK(pvo); /* sanity check */
1613 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1614 pmap_pvo_enter_depth--;
1615 #endif
1616 pmap_interrupts_restore(msr);
1617 return 0;
1618 }
1619
1620 void
1621 pmap_pvo_remove(struct pvo_entry *pvo, int pteidx)
1622 {
1623 volatile struct pte *pt;
1624 int ptegidx;
1625
1626 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1627 if (++pmap_pvo_remove_depth > 1)
1628 panic("pmap_pvo_remove: called recursively!");
1629 #endif
1630
1631 /*
1632 * If we haven't been supplied the ptegidx, calculate it.
1633 */
1634 if (pteidx == -1) {
1635 ptegidx = va_to_pteg(pvo->pvo_pmap, pvo->pvo_vaddr);
1636 pteidx = pmap_pvo_pte_index(pvo, ptegidx);
1637 } else {
1638 ptegidx = pteidx >> 3;
1639 if (pvo->pvo_pte.pte_hi & PTE_HID)
1640 ptegidx ^= pmap_pteg_mask;
1641 }
1642 PMAP_PVO_CHECK(pvo); /* sanity check */
1643
1644 /*
1645 * If there is an active pte entry, we need to deactivate it
1646 * (and save the ref & chg bits).
1647 */
1648 pt = pmap_pvo_to_pte(pvo, pteidx);
1649 if (pt != NULL) {
1650 pmap_pte_unset(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
1651 PVO_WHERE(pvo, REMOVE);
1652 PVO_PTEGIDX_CLR(pvo);
1653 PMAPCOUNT(ptes_removed);
1654 } else {
1655 KASSERT(pvo->pvo_pmap->pm_evictions > 0);
1656 pvo->pvo_pmap->pm_evictions--;
1657 }
1658
1659 /*
1660 * Account for executable mappings.
1661 */
1662 if (PVO_ISEXECUTABLE(pvo))
1663 pvo_clear_exec(pvo);
1664
1665 /*
1666 * Update our statistics.
1667 */
1668 pvo->pvo_pmap->pm_stats.resident_count--;
1669 if (pvo->pvo_pte.pte_lo & PVO_WIRED)
1670 pvo->pvo_pmap->pm_stats.wired_count--;
1671
1672 /*
1673 * Save the REF/CHG bits into their cache if the page is managed.
1674 */
1675 if (pvo->pvo_vaddr & PVO_MANAGED) {
1676 register_t ptelo = pvo->pvo_pte.pte_lo;
1677 struct vm_page *pg = PHYS_TO_VM_PAGE(ptelo & PTE_RPGN);
1678
1679 if (pg != NULL) {
1680 pmap_attr_save(pg, ptelo & (PTE_REF|PTE_CHG));
1681 }
1682 PMAPCOUNT(unmappings);
1683 } else {
1684 PMAPCOUNT(kernel_unmappings);
1685 }
1686
1687 /*
1688 * Remove the PVO from its lists and return it to the pool.
1689 */
1690 LIST_REMOVE(pvo, pvo_vlink);
1691 TAILQ_REMOVE(&pmap_pvo_table[ptegidx], pvo, pvo_olink);
1692 pool_put(pvo->pvo_vaddr & PVO_MANAGED
1693 ? &pmap_mpvo_pool : &pmap_upvo_pool, pvo);
1694 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1695 pmap_pvo_remove_depth--;
1696 #endif
1697 }
1698
1699 /*
1700 * Mark a mapping as executable.
1701 * If this is the first executable mapping in the segment,
1702 * clear the noexec flag.
1703 */
1704 STATIC void
1705 pvo_set_exec(struct pvo_entry *pvo)
1706 {
1707 struct pmap *pm = pvo->pvo_pmap;
1708
1709 if (pm == pmap_kernel() || PVO_ISEXECUTABLE(pvo)) {
1710 return;
1711 }
1712 pvo->pvo_vaddr |= PVO_EXECUTABLE;
1713 #ifdef PPC_OEA
1714 {
1715 int sr = PVO_VADDR(pvo) >> ADDR_SR_SHFT;
1716 if (pm->pm_exec[sr]++ == 0) {
1717 pm->pm_sr[sr] &= ~SR_NOEXEC;
1718 }
1719 }
1720 #endif
1721 }
1722
1723 /*
1724 * Mark a mapping as non-executable.
1725 * If this was the last executable mapping in the segment,
1726 * set the noexec flag.
1727 */
1728 STATIC void
1729 pvo_clear_exec(struct pvo_entry *pvo)
1730 {
1731 struct pmap *pm = pvo->pvo_pmap;
1732
1733 if (pm == pmap_kernel() || !PVO_ISEXECUTABLE(pvo)) {
1734 return;
1735 }
1736 pvo->pvo_vaddr &= ~PVO_EXECUTABLE;
1737 #ifdef PPC_OEA
1738 {
1739 int sr = PVO_VADDR(pvo) >> ADDR_SR_SHFT;
1740 if (--pm->pm_exec[sr] == 0) {
1741 pm->pm_sr[sr] |= SR_NOEXEC;
1742 }
1743 }
1744 #endif
1745 }
1746
1747 /*
1748 * Insert physical page at pa into the given pmap at virtual address va.
1749 */
1750 int
1751 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1752 {
1753 struct mem_region *mp;
1754 struct pvo_head *pvo_head;
1755 struct vm_page *pg;
1756 struct pool *pl;
1757 register_t pte_lo;
1758 int error;
1759 u_int pvo_flags;
1760 u_int was_exec = 0;
1761
1762 if (__predict_false(!pmap_initialized)) {
1763 pvo_head = &pmap_pvo_kunmanaged;
1764 pl = &pmap_upvo_pool;
1765 pvo_flags = 0;
1766 pg = NULL;
1767 was_exec = PTE_EXEC;
1768 } else {
1769 pvo_head = pa_to_pvoh(pa, &pg);
1770 pl = &pmap_mpvo_pool;
1771 pvo_flags = PVO_MANAGED;
1772 }
1773
1774 DPRINTFN(ENTER,
1775 ("pmap_enter(%p, 0x%lx, 0x%lx, 0x%x, 0x%x):",
1776 pm, va, pa, prot, flags));
1777
1778 /*
1779 * If this is a managed page, and it's the first reference to the
1780 * page clear the execness of the page. Otherwise fetch the execness.
1781 */
1782 if (pg != NULL)
1783 was_exec = pmap_attr_fetch(pg) & PTE_EXEC;
1784
1785 DPRINTFN(ENTER, (" was_exec=%d", was_exec));
1786
1787 /*
1788 * Assume the page is cache inhibited and access is guarded unless
1789 * it's in our available memory array. If it is in the memory array,
1790 * asssume it's in memory coherent memory.
1791 */
1792 pte_lo = PTE_IG;
1793 if ((flags & PMAP_NC) == 0) {
1794 for (mp = mem; mp->size; mp++) {
1795 if (pa >= mp->start && pa < mp->start + mp->size) {
1796 pte_lo = PTE_M;
1797 break;
1798 }
1799 }
1800 }
1801
1802 if (prot & VM_PROT_WRITE)
1803 pte_lo |= PTE_BW;
1804 else
1805 pte_lo |= PTE_BR;
1806
1807 /*
1808 * If this was in response to a fault, "pre-fault" the PTE's
1809 * changed/referenced bit appropriately.
1810 */
1811 if (flags & VM_PROT_WRITE)
1812 pte_lo |= PTE_CHG;
1813 if (flags & (VM_PROT_READ|VM_PROT_WRITE))
1814 pte_lo |= PTE_REF;
1815
1816 /*
1817 * We need to know if this page can be executable
1818 */
1819 flags |= (prot & VM_PROT_EXECUTE);
1820
1821 /*
1822 * Record mapping for later back-translation and pte spilling.
1823 * This will overwrite any existing mapping.
1824 */
1825 error = pmap_pvo_enter(pm, pl, pvo_head, va, pa, pte_lo, flags);
1826
1827 /*
1828 * Flush the real page from the instruction cache if this page is
1829 * mapped executable and cacheable and has not been flushed since
1830 * the last time it was modified.
1831 */
1832 if (error == 0 &&
1833 (flags & VM_PROT_EXECUTE) &&
1834 (pte_lo & PTE_I) == 0 &&
1835 was_exec == 0) {
1836 DPRINTFN(ENTER, (" syncicache"));
1837 PMAPCOUNT(exec_synced);
1838 pmap_syncicache(pa, PAGE_SIZE);
1839 if (pg != NULL) {
1840 pmap_attr_save(pg, PTE_EXEC);
1841 PMAPCOUNT(exec_cached);
1842 #if defined(DEBUG) || defined(PMAPDEBUG)
1843 if (pmapdebug & PMAPDEBUG_ENTER)
1844 printf(" marked-as-exec");
1845 else if (pmapdebug & PMAPDEBUG_EXEC)
1846 printf("[pmap_enter: %#lx: marked-as-exec]\n",
1847 pg->phys_addr);
1848
1849 #endif
1850 }
1851 }
1852
1853 DPRINTFN(ENTER, (": error=%d\n", error));
1854
1855 return error;
1856 }
1857
1858 void
1859 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1860 {
1861 struct mem_region *mp;
1862 register_t pte_lo;
1863 int error;
1864
1865 if (va < VM_MIN_KERNEL_ADDRESS)
1866 panic("pmap_kenter_pa: attempt to enter "
1867 "non-kernel address %#lx!", va);
1868
1869 DPRINTFN(KENTER,
1870 ("pmap_kenter_pa(%#lx,%#lx,%#x)\n", va, pa, prot));
1871
1872 /*
1873 * Assume the page is cache inhibited and access is guarded unless
1874 * it's in our available memory array. If it is in the memory array,
1875 * asssume it's in memory coherent memory.
1876 */
1877 pte_lo = PTE_IG;
1878 if ((prot & PMAP_NC) == 0) {
1879 for (mp = mem; mp->size; mp++) {
1880 if (pa >= mp->start && pa < mp->start + mp->size) {
1881 pte_lo = PTE_M;
1882 break;
1883 }
1884 }
1885 }
1886
1887 if (prot & VM_PROT_WRITE)
1888 pte_lo |= PTE_BW;
1889 else
1890 pte_lo |= PTE_BR;
1891
1892 /*
1893 * We don't care about REF/CHG on PVOs on the unmanaged list.
1894 */
1895 error = pmap_pvo_enter(pmap_kernel(), &pmap_upvo_pool,
1896 &pmap_pvo_kunmanaged, va, pa, pte_lo, prot|PMAP_WIRED);
1897
1898 if (error != 0)
1899 panic("pmap_kenter_pa: failed to enter va %#lx pa %#lx: %d",
1900 va, pa, error);
1901 }
1902
1903 void
1904 pmap_kremove(vaddr_t va, vsize_t len)
1905 {
1906 if (va < VM_MIN_KERNEL_ADDRESS)
1907 panic("pmap_kremove: attempt to remove "
1908 "non-kernel address %#lx!", va);
1909
1910 DPRINTFN(KREMOVE,("pmap_kremove(%#lx,%#lx)\n", va, len));
1911 pmap_remove(pmap_kernel(), va, va + len);
1912 }
1913
1914 /*
1915 * Remove the given range of mapping entries.
1916 */
1917 void
1918 pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva)
1919 {
1920 struct pvo_entry *pvo;
1921 register_t msr;
1922 int pteidx;
1923
1924 msr = pmap_interrupts_off();
1925 for (; va < endva; va += PAGE_SIZE) {
1926 pvo = pmap_pvo_find_va(pm, va, &pteidx);
1927 if (pvo != NULL) {
1928 pmap_pvo_remove(pvo, pteidx);
1929 }
1930 }
1931 pmap_interrupts_restore(msr);
1932 }
1933
1934 /*
1935 * Get the physical page address for the given pmap/virtual address.
1936 */
1937 boolean_t
1938 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
1939 {
1940 struct pvo_entry *pvo;
1941 register_t msr;
1942
1943 /*
1944 * If this is a kernel pmap lookup, also check the battable
1945 * and if we get a hit, translate the VA to a PA using the
1946 * BAT entries. Don't check for VM_MAX_KENREL_ADDRESS is
1947 * that will wrap back to 0.
1948 */
1949 if (pm == pmap_kernel() &&
1950 (va < VM_MIN_KERNEL_ADDRESS ||
1951 (KERNEL2_SR < 15 && VM_MAX_KERNEL_ADDRESS <= va))) {
1952 register_t batu = battable[va >> ADDR_SR_SHFT].batu;
1953 KASSERT((va >> ADDR_SR_SHFT) != USER_SR);
1954 if (BAT_VALID_P(batu,0) && BAT_VA_MATCH_P(batu,va)) {
1955 register_t batl = battable[va >> ADDR_SR_SHFT].batl;
1956 register_t mask = (~(batu & BAT_BL) << 15) & ~0x1ffffL;
1957 *pap = (batl & mask) | (va & ~mask);
1958 return TRUE;
1959 }
1960 return FALSE;
1961 }
1962
1963 msr = pmap_interrupts_off();
1964 pvo = pmap_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
1965 if (pvo != NULL) {
1966 PMAP_PVO_CHECK(pvo); /* sanity check */
1967 *pap = (pvo->pvo_pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF);
1968 }
1969 pmap_interrupts_restore(msr);
1970 return pvo != NULL;
1971 }
1972
1973 /*
1974 * Lower the protection on the specified range of this pmap.
1975 */
1976 void
1977 pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot)
1978 {
1979 struct pvo_entry *pvo;
1980 volatile struct pte *pt;
1981 register_t msr;
1982 int pteidx;
1983
1984 /*
1985 * Since this routine only downgrades protection, we should
1986 * always be called with at least one bit not set.
1987 */
1988 KASSERT(prot != VM_PROT_ALL);
1989
1990 /*
1991 * If there is no protection, this is equivalent to
1992 * remove the pmap from the pmap.
1993 */
1994 if ((prot & VM_PROT_READ) == 0) {
1995 pmap_remove(pm, va, endva);
1996 return;
1997 }
1998
1999 msr = pmap_interrupts_off();
2000 for (; va < endva; va += PAGE_SIZE) {
2001 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2002 if (pvo == NULL)
2003 continue;
2004 PMAP_PVO_CHECK(pvo); /* sanity check */
2005
2006 /*
2007 * Revoke executable if asked to do so.
2008 */
2009 if ((prot & VM_PROT_EXECUTE) == 0)
2010 pvo_clear_exec(pvo);
2011
2012 #if 0
2013 /*
2014 * If the page is already read-only, no change
2015 * needs to be made.
2016 */
2017 if ((pvo->pvo_pte.pte_lo & PTE_PP) == PTE_BR)
2018 continue;
2019 #endif
2020 /*
2021 * Grab the PTE pointer before we diddle with
2022 * the cached PTE copy.
2023 */
2024 pt = pmap_pvo_to_pte(pvo, pteidx);
2025 /*
2026 * Change the protection of the page.
2027 */
2028 pvo->pvo_pte.pte_lo &= ~PTE_PP;
2029 pvo->pvo_pte.pte_lo |= PTE_BR;
2030
2031 /*
2032 * If the PVO is in the page table, update
2033 * that pte at well.
2034 */
2035 if (pt != NULL) {
2036 pmap_pte_change(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
2037 PVO_WHERE(pvo, PMAP_PROTECT);
2038 PMAPCOUNT(ptes_changed);
2039 }
2040
2041 PMAP_PVO_CHECK(pvo); /* sanity check */
2042 }
2043 pmap_interrupts_restore(msr);
2044 }
2045
2046 void
2047 pmap_unwire(pmap_t pm, vaddr_t va)
2048 {
2049 struct pvo_entry *pvo;
2050 register_t msr;
2051
2052 msr = pmap_interrupts_off();
2053 pvo = pmap_pvo_find_va(pm, va, NULL);
2054 if (pvo != NULL) {
2055 if (pvo->pvo_vaddr & PVO_WIRED) {
2056 pvo->pvo_vaddr &= ~PVO_WIRED;
2057 pm->pm_stats.wired_count--;
2058 }
2059 PMAP_PVO_CHECK(pvo); /* sanity check */
2060 }
2061 pmap_interrupts_restore(msr);
2062 }
2063
2064 /*
2065 * Lower the protection on the specified physical page.
2066 */
2067 void
2068 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2069 {
2070 struct pvo_head *pvo_head;
2071 struct pvo_entry *pvo, *next_pvo;
2072 volatile struct pte *pt;
2073 register_t msr;
2074
2075 KASSERT(prot != VM_PROT_ALL);
2076 msr = pmap_interrupts_off();
2077
2078 /*
2079 * When UVM reuses a page, it does a pmap_page_protect with
2080 * VM_PROT_NONE. At that point, we can clear the exec flag
2081 * since we know the page will have different contents.
2082 */
2083 if ((prot & VM_PROT_READ) == 0) {
2084 DPRINTFN(EXEC, ("[pmap_page_protect: %#lx: clear-exec]\n",
2085 pg->phys_addr));
2086 if (pmap_attr_fetch(pg) & PTE_EXEC) {
2087 PMAPCOUNT(exec_uncached_page_protect);
2088 pmap_attr_clear(pg, PTE_EXEC);
2089 }
2090 }
2091
2092 pvo_head = vm_page_to_pvoh(pg);
2093 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
2094 next_pvo = LIST_NEXT(pvo, pvo_vlink);
2095 PMAP_PVO_CHECK(pvo); /* sanity check */
2096
2097 /*
2098 * Downgrading to no mapping at all, we just remove the entry.
2099 */
2100 if ((prot & VM_PROT_READ) == 0) {
2101 pmap_pvo_remove(pvo, -1);
2102 continue;
2103 }
2104
2105 /*
2106 * If EXEC permission is being revoked, just clear the
2107 * flag in the PVO.
2108 */
2109 if ((prot & VM_PROT_EXECUTE) == 0)
2110 pvo_clear_exec(pvo);
2111
2112 /*
2113 * If this entry is already RO, don't diddle with the
2114 * page table.
2115 */
2116 if ((pvo->pvo_pte.pte_lo & PTE_PP) == PTE_BR) {
2117 PMAP_PVO_CHECK(pvo);
2118 continue;
2119 }
2120
2121 /*
2122 * Grab the PTE before the we diddle the bits so
2123 * pvo_to_pte can verify the pte contents are as
2124 * expected.
2125 */
2126 pt = pmap_pvo_to_pte(pvo, -1);
2127 pvo->pvo_pte.pte_lo &= ~PTE_PP;
2128 pvo->pvo_pte.pte_lo |= PTE_BR;
2129 if (pt != NULL) {
2130 pmap_pte_change(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
2131 PVO_WHERE(pvo, PMAP_PAGE_PROTECT);
2132 PMAPCOUNT(ptes_changed);
2133 }
2134 PMAP_PVO_CHECK(pvo); /* sanity check */
2135 }
2136 pmap_interrupts_restore(msr);
2137 }
2138
2139 /*
2140 * Activate the address space for the specified process. If the process
2141 * is the current process, load the new MMU context.
2142 */
2143 void
2144 pmap_activate(struct lwp *l)
2145 {
2146 struct pcb *pcb = &l->l_addr->u_pcb;
2147 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
2148
2149 DPRINTFN(ACTIVATE,
2150 ("pmap_activate: lwp %p (curlwp %p)\n", l, curlwp));
2151
2152 /*
2153 * XXX Normally performed in cpu_fork().
2154 */
2155 pcb->pcb_pm = pmap;
2156
2157 /*
2158 * In theory, the SR registers need only be valid on return
2159 * to user space wait to do them there.
2160 */
2161 if (l == curlwp) {
2162 /* Store pointer to new current pmap. */
2163 curpm = pmap;
2164 }
2165 }
2166
2167 /*
2168 * Deactivate the specified process's address space.
2169 */
2170 void
2171 pmap_deactivate(struct lwp *l)
2172 {
2173 }
2174
2175 boolean_t
2176 pmap_query_bit(struct vm_page *pg, int ptebit)
2177 {
2178 struct pvo_entry *pvo;
2179 volatile struct pte *pt;
2180 register_t msr;
2181
2182 if (pmap_attr_fetch(pg) & ptebit)
2183 return TRUE;
2184
2185 msr = pmap_interrupts_off();
2186 LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
2187 PMAP_PVO_CHECK(pvo); /* sanity check */
2188 /*
2189 * See if we saved the bit off. If so cache, it and return
2190 * success.
2191 */
2192 if (pvo->pvo_pte.pte_lo & ptebit) {
2193 pmap_attr_save(pg, ptebit);
2194 PMAP_PVO_CHECK(pvo); /* sanity check */
2195 pmap_interrupts_restore(msr);
2196 return TRUE;
2197 }
2198 }
2199 /*
2200 * No luck, now go thru the hard part of looking at the ptes
2201 * themselves. Sync so any pending REF/CHG bits are flushed
2202 * to the PTEs.
2203 */
2204 SYNC();
2205 LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
2206 PMAP_PVO_CHECK(pvo); /* sanity check */
2207 /*
2208 * See if this pvo have a valid PTE. If so, fetch the
2209 * REF/CHG bits from the valid PTE. If the appropriate
2210 * ptebit is set, cache, it and return success.
2211 */
2212 pt = pmap_pvo_to_pte(pvo, -1);
2213 if (pt != NULL) {
2214 pmap_pte_synch(pt, &pvo->pvo_pte);
2215 if (pvo->pvo_pte.pte_lo & ptebit) {
2216 pmap_attr_save(pg, ptebit);
2217 PMAP_PVO_CHECK(pvo); /* sanity check */
2218 pmap_interrupts_restore(msr);
2219 return TRUE;
2220 }
2221 }
2222 }
2223 pmap_interrupts_restore(msr);
2224 return FALSE;
2225 }
2226
2227 boolean_t
2228 pmap_clear_bit(struct vm_page *pg, int ptebit)
2229 {
2230 struct pvo_head *pvoh = vm_page_to_pvoh(pg);
2231 struct pvo_entry *pvo;
2232 volatile struct pte *pt;
2233 register_t msr;
2234 int rv = 0;
2235
2236 msr = pmap_interrupts_off();
2237
2238 /*
2239 * Fetch the cache value
2240 */
2241 rv |= pmap_attr_fetch(pg);
2242
2243 /*
2244 * Clear the cached value.
2245 */
2246 pmap_attr_clear(pg, ptebit);
2247
2248 /*
2249 * Sync so any pending REF/CHG bits are flushed to the PTEs (so we
2250 * can reset the right ones). Note that since the pvo entries and
2251 * list heads are accessed via BAT0 and are never placed in the
2252 * page table, we don't have to worry about further accesses setting
2253 * the REF/CHG bits.
2254 */
2255 SYNC();
2256
2257 /*
2258 * For each pvo entry, clear pvo's ptebit. If this pvo have a
2259 * valid PTE. If so, clear the ptebit from the valid PTE.
2260 */
2261 LIST_FOREACH(pvo, pvoh, pvo_vlink) {
2262 PMAP_PVO_CHECK(pvo); /* sanity check */
2263 pt = pmap_pvo_to_pte(pvo, -1);
2264 if (pt != NULL) {
2265 /*
2266 * Only sync the PTE if the bit we are looking
2267 * for is not already set.
2268 */
2269 if ((pvo->pvo_pte.pte_lo & ptebit) == 0)
2270 pmap_pte_synch(pt, &pvo->pvo_pte);
2271 /*
2272 * If the bit we are looking for was already set,
2273 * clear that bit in the pte.
2274 */
2275 if (pvo->pvo_pte.pte_lo & ptebit)
2276 pmap_pte_clear(pt, PVO_VADDR(pvo), ptebit);
2277 }
2278 rv |= pvo->pvo_pte.pte_lo & (PTE_CHG|PTE_REF);
2279 pvo->pvo_pte.pte_lo &= ~ptebit;
2280 PMAP_PVO_CHECK(pvo); /* sanity check */
2281 }
2282 pmap_interrupts_restore(msr);
2283
2284 /*
2285 * If we are clearing the modify bit and this page was marked EXEC
2286 * and the user of the page thinks the page was modified, then we
2287 * need to clean it from the icache if it's mapped or clear the EXEC
2288 * bit if it's not mapped. The page itself might not have the CHG
2289 * bit set if the modification was done via DMA to the page.
2290 */
2291 if ((ptebit & PTE_CHG) && (rv & PTE_EXEC)) {
2292 if (LIST_EMPTY(pvoh)) {
2293 DPRINTFN(EXEC, ("[pmap_clear_bit: %#lx: clear-exec]\n",
2294 pg->phys_addr));
2295 pmap_attr_clear(pg, PTE_EXEC);
2296 PMAPCOUNT(exec_uncached_clear_modify);
2297 } else {
2298 DPRINTFN(EXEC, ("[pmap_clear_bit: %#lx: syncicache]\n",
2299 pg->phys_addr));
2300 pmap_syncicache(pg->phys_addr, PAGE_SIZE);
2301 PMAPCOUNT(exec_synced_clear_modify);
2302 }
2303 }
2304 return (rv & ptebit) != 0;
2305 }
2306
2307 void
2308 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
2309 {
2310 struct pvo_entry *pvo;
2311 size_t offset = va & ADDR_POFF;
2312 int s;
2313
2314 s = splvm();
2315 while (len > 0) {
2316 size_t seglen = PAGE_SIZE - offset;
2317 if (seglen > len)
2318 seglen = len;
2319 pvo = pmap_pvo_find_va(p->p_vmspace->vm_map.pmap, va, NULL);
2320 if (pvo != NULL && PVO_ISEXECUTABLE(pvo)) {
2321 pmap_syncicache(
2322 (pvo->pvo_pte.pte_lo & PTE_RPGN) | offset, seglen);
2323 PMAP_PVO_CHECK(pvo);
2324 }
2325 va += seglen;
2326 len -= seglen;
2327 offset = 0;
2328 }
2329 splx(s);
2330 }
2331
2332 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
2333 void
2334 pmap_pte_print(volatile struct pte *pt)
2335 {
2336 printf("PTE %p: ", pt);
2337 /* High word: */
2338 printf("0x%08lx: [", pt->pte_hi);
2339 printf("%c ", (pt->pte_hi & PTE_VALID) ? 'v' : 'i');
2340 printf("%c ", (pt->pte_hi & PTE_HID) ? 'h' : '-');
2341 printf("0x%06lx 0x%02lx",
2342 (pt->pte_hi &~ PTE_VALID)>>PTE_VSID_SHFT,
2343 pt->pte_hi & PTE_API);
2344 printf(" (va 0x%08lx)] ", pmap_pte_to_va(pt));
2345 /* Low word: */
2346 printf(" 0x%08lx: [", pt->pte_lo);
2347 printf("0x%05lx... ", pt->pte_lo >> 12);
2348 printf("%c ", (pt->pte_lo & PTE_REF) ? 'r' : 'u');
2349 printf("%c ", (pt->pte_lo & PTE_CHG) ? 'c' : 'n');
2350 printf("%c", (pt->pte_lo & PTE_W) ? 'w' : '.');
2351 printf("%c", (pt->pte_lo & PTE_I) ? 'i' : '.');
2352 printf("%c", (pt->pte_lo & PTE_M) ? 'm' : '.');
2353 printf("%c ", (pt->pte_lo & PTE_G) ? 'g' : '.');
2354 switch (pt->pte_lo & PTE_PP) {
2355 case PTE_BR: printf("br]\n"); break;
2356 case PTE_BW: printf("bw]\n"); break;
2357 case PTE_SO: printf("so]\n"); break;
2358 case PTE_SW: printf("sw]\n"); break;
2359 }
2360 }
2361 #endif
2362
2363 #if defined(DDB)
2364 void
2365 pmap_pteg_check(void)
2366 {
2367 volatile struct pte *pt;
2368 int i;
2369 int ptegidx;
2370 u_int p_valid = 0;
2371 u_int s_valid = 0;
2372 u_int invalid = 0;
2373
2374 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2375 for (pt = pmap_pteg_table[ptegidx].pt, i = 8; --i >= 0; pt++) {
2376 if (pt->pte_hi & PTE_VALID) {
2377 if (pt->pte_hi & PTE_HID)
2378 s_valid++;
2379 else
2380 p_valid++;
2381 } else
2382 invalid++;
2383 }
2384 }
2385 printf("pteg_check: v(p) %#x (%d), v(s) %#x (%d), i %#x (%d)\n",
2386 p_valid, p_valid, s_valid, s_valid,
2387 invalid, invalid);
2388 }
2389
2390 void
2391 pmap_print_mmuregs(void)
2392 {
2393 int i;
2394 u_int cpuvers;
2395 #ifndef PPC_OEA64
2396 vaddr_t addr;
2397 register_t soft_sr[16];
2398 #endif
2399 struct bat soft_ibat[4];
2400 struct bat soft_dbat[4];
2401 register_t sdr1;
2402
2403 cpuvers = MFPVR() >> 16;
2404
2405 __asm __volatile ("mfsdr1 %0" : "=r"(sdr1));
2406 #ifndef PPC_OEA64
2407 addr = 0;
2408 for (i=0; i<16; i++) {
2409 soft_sr[i] = MFSRIN(addr);
2410 addr += (1 << ADDR_SR_SHFT);
2411 }
2412 #endif
2413
2414 /* read iBAT (601: uBAT) registers */
2415 __asm __volatile ("mfibatu %0,0" : "=r"(soft_ibat[0].batu));
2416 __asm __volatile ("mfibatl %0,0" : "=r"(soft_ibat[0].batl));
2417 __asm __volatile ("mfibatu %0,1" : "=r"(soft_ibat[1].batu));
2418 __asm __volatile ("mfibatl %0,1" : "=r"(soft_ibat[1].batl));
2419 __asm __volatile ("mfibatu %0,2" : "=r"(soft_ibat[2].batu));
2420 __asm __volatile ("mfibatl %0,2" : "=r"(soft_ibat[2].batl));
2421 __asm __volatile ("mfibatu %0,3" : "=r"(soft_ibat[3].batu));
2422 __asm __volatile ("mfibatl %0,3" : "=r"(soft_ibat[3].batl));
2423
2424
2425 if (cpuvers != MPC601) {
2426 /* read dBAT registers */
2427 __asm __volatile ("mfdbatu %0,0" : "=r"(soft_dbat[0].batu));
2428 __asm __volatile ("mfdbatl %0,0" : "=r"(soft_dbat[0].batl));
2429 __asm __volatile ("mfdbatu %0,1" : "=r"(soft_dbat[1].batu));
2430 __asm __volatile ("mfdbatl %0,1" : "=r"(soft_dbat[1].batl));
2431 __asm __volatile ("mfdbatu %0,2" : "=r"(soft_dbat[2].batu));
2432 __asm __volatile ("mfdbatl %0,2" : "=r"(soft_dbat[2].batl));
2433 __asm __volatile ("mfdbatu %0,3" : "=r"(soft_dbat[3].batu));
2434 __asm __volatile ("mfdbatl %0,3" : "=r"(soft_dbat[3].batl));
2435 }
2436
2437 printf("SDR1:\t0x%lx\n", (long) sdr1);
2438 #ifndef PPC_OEA64
2439 printf("SR[]:\t");
2440 for (i=0; i<4; i++)
2441 printf("0x%08lx, ", soft_sr[i]);
2442 printf("\n\t");
2443 for ( ; i<8; i++)
2444 printf("0x%08lx, ", soft_sr[i]);
2445 printf("\n\t");
2446 for ( ; i<12; i++)
2447 printf("0x%08lx, ", soft_sr[i]);
2448 printf("\n\t");
2449 for ( ; i<16; i++)
2450 printf("0x%08lx, ", soft_sr[i]);
2451 printf("\n");
2452 #endif
2453
2454 printf("%cBAT[]:\t", cpuvers == MPC601 ? 'u' : 'i');
2455 for (i=0; i<4; i++) {
2456 printf("0x%08lx 0x%08lx, ",
2457 soft_ibat[i].batu, soft_ibat[i].batl);
2458 if (i == 1)
2459 printf("\n\t");
2460 }
2461 if (cpuvers != MPC601) {
2462 printf("\ndBAT[]:\t");
2463 for (i=0; i<4; i++) {
2464 printf("0x%08lx 0x%08lx, ",
2465 soft_dbat[i].batu, soft_dbat[i].batl);
2466 if (i == 1)
2467 printf("\n\t");
2468 }
2469 }
2470 printf("\n");
2471 }
2472
2473 void
2474 pmap_print_pte(pmap_t pm, vaddr_t va)
2475 {
2476 struct pvo_entry *pvo;
2477 volatile struct pte *pt;
2478 int pteidx;
2479
2480 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2481 if (pvo != NULL) {
2482 pt = pmap_pvo_to_pte(pvo, pteidx);
2483 if (pt != NULL) {
2484 printf("VA %#lx -> %p -> %s %#lx, %#lx\n",
2485 va, pt,
2486 pt->pte_hi & PTE_HID ? "(sec)" : "(pri)",
2487 pt->pte_hi, pt->pte_lo);
2488 } else {
2489 printf("No valid PTE found\n");
2490 }
2491 } else {
2492 printf("Address not in pmap\n");
2493 }
2494 }
2495
2496 void
2497 pmap_pteg_dist(void)
2498 {
2499 struct pvo_entry *pvo;
2500 int ptegidx;
2501 int depth;
2502 int max_depth = 0;
2503 unsigned int depths[64];
2504
2505 memset(depths, 0, sizeof(depths));
2506 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2507 depth = 0;
2508 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
2509 depth++;
2510 }
2511 if (depth > max_depth)
2512 max_depth = depth;
2513 if (depth > 63)
2514 depth = 63;
2515 depths[depth]++;
2516 }
2517
2518 for (depth = 0; depth < 64; depth++) {
2519 printf(" [%2d]: %8u", depth, depths[depth]);
2520 if ((depth & 3) == 3)
2521 printf("\n");
2522 if (depth == max_depth)
2523 break;
2524 }
2525 if ((depth & 3) != 3)
2526 printf("\n");
2527 printf("Max depth found was %d\n", max_depth);
2528 }
2529 #endif /* DEBUG */
2530
2531 #if defined(PMAPCHECK) || defined(DEBUG)
2532 void
2533 pmap_pvo_verify(void)
2534 {
2535 int ptegidx;
2536 int s;
2537
2538 s = splvm();
2539 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2540 struct pvo_entry *pvo;
2541 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
2542 if ((uintptr_t) pvo >= SEGMENT_LENGTH)
2543 panic("pmap_pvo_verify: invalid pvo %p "
2544 "on list %#x", pvo, ptegidx);
2545 pmap_pvo_check(pvo);
2546 }
2547 }
2548 splx(s);
2549 }
2550 #endif /* PMAPCHECK */
2551
2552
2553 void *
2554 pmap_pool_ualloc(struct pool *pp, int flags)
2555 {
2556 struct pvo_page *pvop;
2557
2558 pvop = SIMPLEQ_FIRST(&pmap_upvop_head);
2559 if (pvop != NULL) {
2560 pmap_upvop_free--;
2561 SIMPLEQ_REMOVE_HEAD(&pmap_upvop_head, pvop_link);
2562 return pvop;
2563 }
2564 if (uvm.page_init_done != TRUE) {
2565 return (void *) uvm_pageboot_alloc(PAGE_SIZE);
2566 }
2567 return pmap_pool_malloc(pp, flags);
2568 }
2569
2570 void *
2571 pmap_pool_malloc(struct pool *pp, int flags)
2572 {
2573 struct pvo_page *pvop;
2574 struct vm_page *pg;
2575
2576 pvop = SIMPLEQ_FIRST(&pmap_mpvop_head);
2577 if (pvop != NULL) {
2578 pmap_mpvop_free--;
2579 SIMPLEQ_REMOVE_HEAD(&pmap_mpvop_head, pvop_link);
2580 return pvop;
2581 }
2582 again:
2583 pg = uvm_pagealloc_strat(NULL, 0, NULL, UVM_PGA_USERESERVE,
2584 UVM_PGA_STRAT_ONLY, VM_FREELIST_FIRST256);
2585 if (__predict_false(pg == NULL)) {
2586 if (flags & PR_WAITOK) {
2587 uvm_wait("plpg");
2588 goto again;
2589 } else {
2590 return (0);
2591 }
2592 }
2593 return (void *) VM_PAGE_TO_PHYS(pg);
2594 }
2595
2596 void
2597 pmap_pool_ufree(struct pool *pp, void *va)
2598 {
2599 struct pvo_page *pvop;
2600 #if 0
2601 if (PHYS_TO_VM_PAGE((paddr_t) va) != NULL) {
2602 pmap_pool_mfree(va, size, tag);
2603 return;
2604 }
2605 #endif
2606 pvop = va;
2607 SIMPLEQ_INSERT_HEAD(&pmap_upvop_head, pvop, pvop_link);
2608 pmap_upvop_free++;
2609 if (pmap_upvop_free > pmap_upvop_maxfree)
2610 pmap_upvop_maxfree = pmap_upvop_free;
2611 }
2612
2613 void
2614 pmap_pool_mfree(struct pool *pp, void *va)
2615 {
2616 struct pvo_page *pvop;
2617
2618 pvop = va;
2619 SIMPLEQ_INSERT_HEAD(&pmap_mpvop_head, pvop, pvop_link);
2620 pmap_mpvop_free++;
2621 if (pmap_mpvop_free > pmap_mpvop_maxfree)
2622 pmap_mpvop_maxfree = pmap_mpvop_free;
2623 #if 0
2624 uvm_pagefree(PHYS_TO_VM_PAGE((paddr_t) va));
2625 #endif
2626 }
2627
2628 /*
2629 * This routine in bootstraping to steal to-be-managed memory (which will
2630 * then be unmanaged). We use it to grab from the first 256MB for our
2631 * pmap needs and above 256MB for other stuff.
2632 */
2633 vaddr_t
2634 pmap_steal_memory(vsize_t vsize, vaddr_t *vstartp, vaddr_t *vendp)
2635 {
2636 vsize_t size;
2637 vaddr_t va;
2638 paddr_t pa = 0;
2639 int npgs, bank;
2640 struct vm_physseg *ps;
2641
2642 if (uvm.page_init_done == TRUE)
2643 panic("pmap_steal_memory: called _after_ bootstrap");
2644
2645 *vstartp = VM_MIN_KERNEL_ADDRESS;
2646 *vendp = VM_MAX_KERNEL_ADDRESS;
2647
2648 size = round_page(vsize);
2649 npgs = atop(size);
2650
2651 /*
2652 * PA 0 will never be among those given to UVM so we can use it
2653 * to indicate we couldn't steal any memory.
2654 */
2655 for (ps = vm_physmem, bank = 0; bank < vm_nphysseg; bank++, ps++) {
2656 if (ps->free_list == VM_FREELIST_FIRST256 &&
2657 ps->avail_end - ps->avail_start >= npgs) {
2658 pa = ptoa(ps->avail_start);
2659 break;
2660 }
2661 }
2662
2663 if (pa == 0)
2664 panic("pmap_steal_memory: no approriate memory to steal!");
2665
2666 ps->avail_start += npgs;
2667 ps->start += npgs;
2668
2669 /*
2670 * If we've used up all the pages in the segment, remove it and
2671 * compact the list.
2672 */
2673 if (ps->avail_start == ps->end) {
2674 /*
2675 * If this was the last one, then a very bad thing has occurred
2676 */
2677 if (--vm_nphysseg == 0)
2678 panic("pmap_steal_memory: out of memory!");
2679
2680 printf("pmap_steal_memory: consumed bank %d\n", bank);
2681 for (; bank < vm_nphysseg; bank++, ps++) {
2682 ps[0] = ps[1];
2683 }
2684 }
2685
2686 va = (vaddr_t) pa;
2687 memset((caddr_t) va, 0, size);
2688 pmap_pages_stolen += npgs;
2689 #ifdef DEBUG
2690 if (pmapdebug && npgs > 1) {
2691 u_int cnt = 0;
2692 for (bank = 0, ps = vm_physmem; bank < vm_nphysseg; bank++, ps++)
2693 cnt += ps->avail_end - ps->avail_start;
2694 printf("pmap_steal_memory: stole %u (total %u) pages (%u left)\n",
2695 npgs, pmap_pages_stolen, cnt);
2696 }
2697 #endif
2698
2699 return va;
2700 }
2701
2702 /*
2703 * Find a chuck of memory with right size and alignment.
2704 */
2705 void *
2706 pmap_boot_find_memory(psize_t size, psize_t alignment, int at_end)
2707 {
2708 struct mem_region *mp;
2709 paddr_t s, e;
2710 int i, j;
2711
2712 size = round_page(size);
2713
2714 DPRINTFN(BOOT,
2715 ("pmap_boot_find_memory: size=%lx, alignment=%lx, at_end=%d",
2716 size, alignment, at_end));
2717
2718 if (alignment < PAGE_SIZE || (alignment & (alignment-1)) != 0)
2719 panic("pmap_boot_find_memory: invalid alignment %lx",
2720 alignment);
2721
2722 if (at_end) {
2723 if (alignment != PAGE_SIZE)
2724 panic("pmap_boot_find_memory: invalid ending "
2725 "alignment %lx", alignment);
2726
2727 for (mp = &avail[avail_cnt-1]; mp >= avail; mp--) {
2728 s = mp->start + mp->size - size;
2729 if (s >= mp->start && mp->size >= size) {
2730 DPRINTFN(BOOT,(": %lx\n", s));
2731 DPRINTFN(BOOT,
2732 ("pmap_boot_find_memory: b-avail[%d] start "
2733 "0x%lx size 0x%lx\n", mp - avail,
2734 mp->start, mp->size));
2735 mp->size -= size;
2736 DPRINTFN(BOOT,
2737 ("pmap_boot_find_memory: a-avail[%d] start "
2738 "0x%lx size 0x%lx\n", mp - avail,
2739 mp->start, mp->size));
2740 return (void *) s;
2741 }
2742 }
2743 panic("pmap_boot_find_memory: no available memory");
2744 }
2745
2746 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
2747 s = (mp->start + alignment - 1) & ~(alignment-1);
2748 e = s + size;
2749
2750 /*
2751 * Is the calculated region entirely within the region?
2752 */
2753 if (s < mp->start || e > mp->start + mp->size)
2754 continue;
2755
2756 DPRINTFN(BOOT,(": %lx\n", s));
2757 if (s == mp->start) {
2758 /*
2759 * If the block starts at the beginning of region,
2760 * adjust the size & start. (the region may now be
2761 * zero in length)
2762 */
2763 DPRINTFN(BOOT,
2764 ("pmap_boot_find_memory: b-avail[%d] start "
2765 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2766 mp->start += size;
2767 mp->size -= size;
2768 DPRINTFN(BOOT,
2769 ("pmap_boot_find_memory: a-avail[%d] start "
2770 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2771 } else if (e == mp->start + mp->size) {
2772 /*
2773 * If the block starts at the beginning of region,
2774 * adjust only the size.
2775 */
2776 DPRINTFN(BOOT,
2777 ("pmap_boot_find_memory: b-avail[%d] start "
2778 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2779 mp->size -= size;
2780 DPRINTFN(BOOT,
2781 ("pmap_boot_find_memory: a-avail[%d] start "
2782 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2783 } else {
2784 /*
2785 * Block is in the middle of the region, so we
2786 * have to split it in two.
2787 */
2788 for (j = avail_cnt; j > i + 1; j--) {
2789 avail[j] = avail[j-1];
2790 }
2791 DPRINTFN(BOOT,
2792 ("pmap_boot_find_memory: b-avail[%d] start "
2793 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2794 mp[1].start = e;
2795 mp[1].size = mp[0].start + mp[0].size - e;
2796 mp[0].size = s - mp[0].start;
2797 avail_cnt++;
2798 for (; i < avail_cnt; i++) {
2799 DPRINTFN(BOOT,
2800 ("pmap_boot_find_memory: a-avail[%d] "
2801 "start 0x%lx size 0x%lx\n", i,
2802 avail[i].start, avail[i].size));
2803 }
2804 }
2805 return (void *) s;
2806 }
2807 panic("pmap_boot_find_memory: not enough memory for "
2808 "%lx/%lx allocation?", size, alignment);
2809 }
2810
2811 /*
2812 * This is not part of the defined PMAP interface and is specific to the
2813 * PowerPC architecture. This is called during initppc, before the system
2814 * is really initialized.
2815 */
2816 void
2817 pmap_bootstrap(paddr_t kernelstart, paddr_t kernelend)
2818 {
2819 struct mem_region *mp, tmp;
2820 paddr_t s, e;
2821 psize_t size;
2822 int i, j;
2823
2824 /*
2825 * Get memory.
2826 */
2827 mem_regions(&mem, &avail);
2828 #if defined(DEBUG)
2829 if (pmapdebug & PMAPDEBUG_BOOT) {
2830 printf("pmap_bootstrap: memory configuration:\n");
2831 for (mp = mem; mp->size; mp++) {
2832 printf("pmap_bootstrap: mem start 0x%lx size 0x%lx\n",
2833 mp->start, mp->size);
2834 }
2835 for (mp = avail; mp->size; mp++) {
2836 printf("pmap_bootstrap: avail start 0x%lx size 0x%lx\n",
2837 mp->start, mp->size);
2838 }
2839 }
2840 #endif
2841
2842 /*
2843 * Find out how much physical memory we have and in how many chunks.
2844 */
2845 for (mem_cnt = 0, mp = mem; mp->size; mp++) {
2846 if (mp->start >= pmap_memlimit)
2847 continue;
2848 if (mp->start + mp->size > pmap_memlimit) {
2849 size = pmap_memlimit - mp->start;
2850 physmem += btoc(size);
2851 } else {
2852 physmem += btoc(mp->size);
2853 }
2854 mem_cnt++;
2855 }
2856
2857 /*
2858 * Count the number of available entries.
2859 */
2860 for (avail_cnt = 0, mp = avail; mp->size; mp++)
2861 avail_cnt++;
2862
2863 /*
2864 * Page align all regions.
2865 */
2866 kernelstart = trunc_page(kernelstart);
2867 kernelend = round_page(kernelend);
2868 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
2869 s = round_page(mp->start);
2870 mp->size -= (s - mp->start);
2871 mp->size = trunc_page(mp->size);
2872 mp->start = s;
2873 e = mp->start + mp->size;
2874
2875 DPRINTFN(BOOT,
2876 ("pmap_bootstrap: b-avail[%d] start 0x%lx size 0x%lx\n",
2877 i, mp->start, mp->size));
2878
2879 /*
2880 * Don't allow the end to run beyond our artificial limit
2881 */
2882 if (e > pmap_memlimit)
2883 e = pmap_memlimit;
2884
2885 /*
2886 * Is this region empty or strange? skip it.
2887 */
2888 if (e <= s) {
2889 mp->start = 0;
2890 mp->size = 0;
2891 continue;
2892 }
2893
2894 /*
2895 * Does this overlap the beginning of kernel?
2896 * Does extend past the end of the kernel?
2897 */
2898 else if (s < kernelstart && e > kernelstart) {
2899 if (e > kernelend) {
2900 avail[avail_cnt].start = kernelend;
2901 avail[avail_cnt].size = e - kernelend;
2902 avail_cnt++;
2903 }
2904 mp->size = kernelstart - s;
2905 }
2906 /*
2907 * Check whether this region overlaps the end of the kernel.
2908 */
2909 else if (s < kernelend && e > kernelend) {
2910 mp->start = kernelend;
2911 mp->size = e - kernelend;
2912 }
2913 /*
2914 * Look whether this regions is completely inside the kernel.
2915 * Nuke it if it does.
2916 */
2917 else if (s >= kernelstart && e <= kernelend) {
2918 mp->start = 0;
2919 mp->size = 0;
2920 }
2921 /*
2922 * If the user imposed a memory limit, enforce it.
2923 */
2924 else if (s >= pmap_memlimit) {
2925 mp->start = -PAGE_SIZE; /* let's know why */
2926 mp->size = 0;
2927 }
2928 else {
2929 mp->start = s;
2930 mp->size = e - s;
2931 }
2932 DPRINTFN(BOOT,
2933 ("pmap_bootstrap: a-avail[%d] start 0x%lx size 0x%lx\n",
2934 i, mp->start, mp->size));
2935 }
2936
2937 /*
2938 * Move (and uncount) all the null return to the end.
2939 */
2940 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
2941 if (mp->size == 0) {
2942 tmp = avail[i];
2943 avail[i] = avail[--avail_cnt];
2944 avail[avail_cnt] = avail[i];
2945 }
2946 }
2947
2948 /*
2949 * (Bubble)sort them into asecnding order.
2950 */
2951 for (i = 0; i < avail_cnt; i++) {
2952 for (j = i + 1; j < avail_cnt; j++) {
2953 if (avail[i].start > avail[j].start) {
2954 tmp = avail[i];
2955 avail[i] = avail[j];
2956 avail[j] = tmp;
2957 }
2958 }
2959 }
2960
2961 /*
2962 * Make sure they don't overlap.
2963 */
2964 for (mp = avail, i = 0; i < avail_cnt - 1; i++, mp++) {
2965 if (mp[0].start + mp[0].size > mp[1].start) {
2966 mp[0].size = mp[1].start - mp[0].start;
2967 }
2968 DPRINTFN(BOOT,
2969 ("pmap_bootstrap: avail[%d] start 0x%lx size 0x%lx\n",
2970 i, mp->start, mp->size));
2971 }
2972 DPRINTFN(BOOT,
2973 ("pmap_bootstrap: avail[%d] start 0x%lx size 0x%lx\n",
2974 i, mp->start, mp->size));
2975
2976 #ifdef PTEGCOUNT
2977 pmap_pteg_cnt = PTEGCOUNT;
2978 #else /* PTEGCOUNT */
2979 pmap_pteg_cnt = 0x1000;
2980
2981 while (pmap_pteg_cnt < physmem)
2982 pmap_pteg_cnt <<= 1;
2983
2984 pmap_pteg_cnt >>= 1;
2985 #endif /* PTEGCOUNT */
2986
2987 /*
2988 * Find suitably aligned memory for PTEG hash table.
2989 */
2990 size = pmap_pteg_cnt * sizeof(struct pteg);
2991 pmap_pteg_table = pmap_boot_find_memory(size, size, 0);
2992 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
2993 if ( (uintptr_t) pmap_pteg_table + size > SEGMENT_LENGTH)
2994 panic("pmap_bootstrap: pmap_pteg_table end (%p + %lx) > 256MB",
2995 pmap_pteg_table, size);
2996 #endif
2997
2998 memset((void *)pmap_pteg_table, 0, pmap_pteg_cnt * sizeof(struct pteg));
2999 pmap_pteg_mask = pmap_pteg_cnt - 1;
3000
3001 /*
3002 * We cannot do pmap_steal_memory here since UVM hasn't been loaded
3003 * with pages. So we just steal them before giving them to UVM.
3004 */
3005 size = sizeof(pmap_pvo_table[0]) * pmap_pteg_cnt;
3006 pmap_pvo_table = pmap_boot_find_memory(size, PAGE_SIZE, 0);
3007 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3008 if ( (uintptr_t) pmap_pvo_table + size > SEGMENT_LENGTH)
3009 panic("pmap_bootstrap: pmap_pvo_table end (%p + %lx) > 256MB",
3010 pmap_pvo_table, size);
3011 #endif
3012
3013 for (i = 0; i < pmap_pteg_cnt; i++)
3014 TAILQ_INIT(&pmap_pvo_table[i]);
3015
3016 #ifndef MSGBUFADDR
3017 /*
3018 * Allocate msgbuf in high memory.
3019 */
3020 msgbuf_paddr =
3021 (paddr_t) pmap_boot_find_memory(MSGBUFSIZE, PAGE_SIZE, 1);
3022 #endif
3023
3024 #ifdef __HAVE_PMAP_PHYSSEG
3025 {
3026 u_int npgs = 0;
3027 for (i = 0, mp = avail; i < avail_cnt; i++, mp++)
3028 npgs += btoc(mp->size);
3029 size = (sizeof(struct pvo_head) + 1) * npgs;
3030 pmap_physseg.pvoh = pmap_boot_find_memory(size, PAGE_SIZE, 0);
3031 pmap_physseg.attrs = (char *) &pmap_physseg.pvoh[npgs];
3032 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3033 if ((uintptr_t)pmap_physseg.pvoh + size > SEGMENT_LENGTH)
3034 panic("pmap_bootstrap: PVO list end (%p + %lx) > 256MB",
3035 pmap_physseg.pvoh, size);
3036 #endif
3037 }
3038 #endif
3039
3040 for (mp = avail, i = 0; i < avail_cnt; mp++, i++) {
3041 paddr_t pfstart = atop(mp->start);
3042 paddr_t pfend = atop(mp->start + mp->size);
3043 if (mp->size == 0)
3044 continue;
3045 if (mp->start + mp->size <= SEGMENT_LENGTH) {
3046 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3047 VM_FREELIST_FIRST256);
3048 } else if (mp->start >= SEGMENT_LENGTH) {
3049 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3050 VM_FREELIST_DEFAULT);
3051 } else {
3052 pfend = atop(SEGMENT_LENGTH);
3053 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3054 VM_FREELIST_FIRST256);
3055 pfstart = atop(SEGMENT_LENGTH);
3056 pfend = atop(mp->start + mp->size);
3057 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3058 VM_FREELIST_DEFAULT);
3059 }
3060 }
3061
3062 /*
3063 * Make sure kernel vsid is allocated as well as VSID 0.
3064 */
3065 pmap_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS-1)) / VSID_NBPW]
3066 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
3067 pmap_vsid_bitmap[0] |= 1;
3068
3069 /*
3070 * Initialize kernel pmap and hardware.
3071 */
3072 #ifndef PPC_OEA64
3073 for (i = 0; i < 16; i++) {
3074 pmap_kernel()->pm_sr[i] = EMPTY_SEGMENT;
3075 __asm __volatile ("mtsrin %0,%1"
3076 :: "r"(EMPTY_SEGMENT), "r"(i << ADDR_SR_SHFT));
3077 }
3078
3079 pmap_kernel()->pm_sr[KERNEL_SR] = KERNEL_SEGMENT|SR_SUKEY|SR_PRKEY;
3080 __asm __volatile ("mtsr %0,%1"
3081 :: "n"(KERNEL_SR), "r"(KERNEL_SEGMENT));
3082 #ifdef KERNEL2_SR
3083 pmap_kernel()->pm_sr[KERNEL2_SR] = KERNEL2_SEGMENT|SR_SUKEY|SR_PRKEY;
3084 __asm __volatile ("mtsr %0,%1"
3085 :: "n"(KERNEL2_SR), "r"(KERNEL2_SEGMENT));
3086 #endif
3087 for (i = 0; i < 16; i++) {
3088 if (iosrtable[i] & SR601_T) {
3089 pmap_kernel()->pm_sr[i] = iosrtable[i];
3090 __asm __volatile ("mtsrin %0,%1"
3091 :: "r"(iosrtable[i]), "r"(i << ADDR_SR_SHFT));
3092 }
3093 }
3094 #endif /* !PPC_OEA64 */
3095
3096 __asm __volatile ("sync; mtsdr1 %0; isync"
3097 :: "r"((uintptr_t)pmap_pteg_table | (pmap_pteg_mask >> 10)));
3098 tlbia();
3099
3100 #ifdef ALTIVEC
3101 pmap_use_altivec = cpu_altivec;
3102 #endif
3103
3104 #ifdef DEBUG
3105 if (pmapdebug & PMAPDEBUG_BOOT) {
3106 u_int cnt;
3107 int bank;
3108 char pbuf[9];
3109 for (cnt = 0, bank = 0; bank < vm_nphysseg; bank++) {
3110 cnt += vm_physmem[bank].avail_end - vm_physmem[bank].avail_start;
3111 printf("pmap_bootstrap: vm_physmem[%d]=%#lx-%#lx/%#lx\n",
3112 bank,
3113 ptoa(vm_physmem[bank].avail_start),
3114 ptoa(vm_physmem[bank].avail_end),
3115 ptoa(vm_physmem[bank].avail_end - vm_physmem[bank].avail_start));
3116 }
3117 format_bytes(pbuf, sizeof(pbuf), ptoa((u_int64_t) cnt));
3118 printf("pmap_bootstrap: UVM memory = %s (%u pages)\n",
3119 pbuf, cnt);
3120 }
3121 #endif
3122
3123 pool_init(&pmap_upvo_pool, sizeof(struct pvo_entry),
3124 sizeof(struct pvo_entry), 0, 0, "pmap_upvopl",
3125 &pmap_pool_uallocator);
3126
3127 pool_setlowat(&pmap_upvo_pool, 252);
3128
3129 pool_init(&pmap_pool, sizeof(struct pmap),
3130 sizeof(void *), 0, 0, "pmap_pl", &pmap_pool_uallocator);
3131 }
3132