pmap.c revision 1.53 1 /* $NetBSD: pmap.c,v 1.53 2008/02/05 18:10:47 garbled Exp $ */
2 /*-
3 * Copyright (c) 2001 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Matt Thomas <matt (at) 3am-software.com> of Allegro Networks, Inc.
8 *
9 * Support for PPC64 Bridge mode added by Sanjay Lal <sanjayl (at) kymasys.com>
10 * of Kyma Systems LLC.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*
42 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
43 * Copyright (C) 1995, 1996 TooLs GmbH.
44 * All rights reserved.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. All advertising materials mentioning features or use of this software
55 * must display the following acknowledgement:
56 * This product includes software developed by TooLs GmbH.
57 * 4. The name of TooLs GmbH may not be used to endorse or promote products
58 * derived from this software without specific prior written permission.
59 *
60 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
61 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
62 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
63 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
65 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
66 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
67 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
68 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
69 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.53 2008/02/05 18:10:47 garbled Exp $");
74
75 #define PMAP_NOOPNAMES
76
77 #include "opt_ppcarch.h"
78 #include "opt_altivec.h"
79 #include "opt_pmap.h"
80 #include <sys/param.h>
81 #include <sys/malloc.h>
82 #include <sys/proc.h>
83 #include <sys/user.h>
84 #include <sys/pool.h>
85 #include <sys/queue.h>
86 #include <sys/device.h> /* for evcnt */
87 #include <sys/systm.h>
88 #include <sys/atomic.h>
89
90 #include <uvm/uvm.h>
91
92 #include <machine/pcb.h>
93 #include <machine/powerpc.h>
94 #include <powerpc/spr.h>
95 #include <powerpc/oea/sr_601.h>
96 #include <powerpc/bat.h>
97 #include <powerpc/stdarg.h>
98
99 #ifdef ALTIVEC
100 int pmap_use_altivec;
101 #endif
102
103 volatile struct pteg *pmap_pteg_table;
104 unsigned int pmap_pteg_cnt;
105 unsigned int pmap_pteg_mask;
106 #ifdef PMAP_MEMLIMIT
107 static paddr_t pmap_memlimit = PMAP_MEMLIMIT;
108 #else
109 static paddr_t pmap_memlimit = -PAGE_SIZE; /* there is no limit */
110 #endif
111
112 struct pmap kernel_pmap_;
113 unsigned int pmap_pages_stolen;
114 u_long pmap_pte_valid;
115 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
116 u_long pmap_pvo_enter_depth;
117 u_long pmap_pvo_remove_depth;
118 #endif
119
120 int physmem;
121 #ifndef MSGBUFADDR
122 extern paddr_t msgbuf_paddr;
123 #endif
124
125 static struct mem_region *mem, *avail;
126 static u_int mem_cnt, avail_cnt;
127
128 #if !defined(PMAP_OEA64) && !defined(PMAP_OEA64_BRIDGE)
129 # define PMAP_OEA 1
130 # if defined(PMAP_EXCLUDE_DECLS) && !defined(PPC_OEA64) && !defined(PPC_OEA64_BRIDGE)
131 # define PMAPNAME(name) pmap_##name
132 # endif
133 #endif
134
135 #if defined(PMAP_OEA64)
136 # if defined(PMAP_EXCLUDE_DECLS) && !defined(PPC_OEA) && !defined(PPC_OEA64_BRIDGE)
137 # define PMAPNAME(name) pmap_##name
138 # endif
139 #endif
140
141 #if defined(PMAP_OEA64_BRIDGE)
142 # if defined(PMAP_EXCLUDE_DECLS) && !defined(PPC_OEA) && !defined(PPC_OEA64)
143 # define PMAPNAME(name) pmap_##name
144 # endif
145 #endif
146
147 #if defined(PMAP_OEA)
148 #define _PRIxpte "lx"
149 #else
150 #define _PRIxpte PRIx64
151 #endif
152 #define _PRIxpa "lx"
153 #define _PRIxva "lx"
154
155 #if defined(PMAP_EXCLUDE_DECLS) && !defined(PMAPNAME)
156 #if defined(PMAP_OEA)
157 #define PMAPNAME(name) pmap32_##name
158 #elif defined(PMAP_OEA64)
159 #define PMAPNAME(name) pmap64_##name
160 #elif defined(PMAP_OEA64_BRIDGE)
161 #define PMAPNAME(name) pmap64bridge_##name
162 #else
163 #error unknown variant for pmap
164 #endif
165 #endif /* PMAP_EXLCUDE_DECLS && !PMAPNAME */
166
167 #if defined(PMAPNAME)
168 #define STATIC static
169 #define pmap_pte_spill PMAPNAME(pte_spill)
170 #define pmap_real_memory PMAPNAME(real_memory)
171 #define pmap_init PMAPNAME(init)
172 #define pmap_virtual_space PMAPNAME(virtual_space)
173 #define pmap_create PMAPNAME(create)
174 #define pmap_reference PMAPNAME(reference)
175 #define pmap_destroy PMAPNAME(destroy)
176 #define pmap_copy PMAPNAME(copy)
177 #define pmap_update PMAPNAME(update)
178 #define pmap_collect PMAPNAME(collect)
179 #define pmap_enter PMAPNAME(enter)
180 #define pmap_remove PMAPNAME(remove)
181 #define pmap_kenter_pa PMAPNAME(kenter_pa)
182 #define pmap_kremove PMAPNAME(kremove)
183 #define pmap_extract PMAPNAME(extract)
184 #define pmap_protect PMAPNAME(protect)
185 #define pmap_unwire PMAPNAME(unwire)
186 #define pmap_page_protect PMAPNAME(page_protect)
187 #define pmap_query_bit PMAPNAME(query_bit)
188 #define pmap_clear_bit PMAPNAME(clear_bit)
189
190 #define pmap_activate PMAPNAME(activate)
191 #define pmap_deactivate PMAPNAME(deactivate)
192
193 #define pmap_pinit PMAPNAME(pinit)
194 #define pmap_procwr PMAPNAME(procwr)
195
196 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
197 #define pmap_pte_print PMAPNAME(pte_print)
198 #define pmap_pteg_check PMAPNAME(pteg_check)
199 #define pmap_print_mmruregs PMAPNAME(print_mmuregs)
200 #define pmap_print_pte PMAPNAME(print_pte)
201 #define pmap_pteg_dist PMAPNAME(pteg_dist)
202 #endif
203 #if defined(DEBUG) || defined(PMAPCHECK)
204 #define pmap_pvo_verify PMAPNAME(pvo_verify)
205 #endif
206 #define pmap_steal_memory PMAPNAME(steal_memory)
207 #define pmap_bootstrap PMAPNAME(bootstrap)
208 #else
209 #define STATIC /* nothing */
210 #endif /* PMAPNAME */
211
212 STATIC int pmap_pte_spill(struct pmap *, vaddr_t, bool);
213 STATIC void pmap_real_memory(paddr_t *, psize_t *);
214 STATIC void pmap_init(void);
215 STATIC void pmap_virtual_space(vaddr_t *, vaddr_t *);
216 STATIC pmap_t pmap_create(void);
217 STATIC void pmap_reference(pmap_t);
218 STATIC void pmap_destroy(pmap_t);
219 STATIC void pmap_copy(pmap_t, pmap_t, vaddr_t, vsize_t, vaddr_t);
220 STATIC void pmap_update(pmap_t);
221 STATIC void pmap_collect(pmap_t);
222 STATIC int pmap_enter(pmap_t, vaddr_t, paddr_t, vm_prot_t, int);
223 STATIC void pmap_remove(pmap_t, vaddr_t, vaddr_t);
224 STATIC void pmap_kenter_pa(vaddr_t, paddr_t, vm_prot_t);
225 STATIC void pmap_kremove(vaddr_t, vsize_t);
226 STATIC bool pmap_extract(pmap_t, vaddr_t, paddr_t *);
227
228 STATIC void pmap_protect(pmap_t, vaddr_t, vaddr_t, vm_prot_t);
229 STATIC void pmap_unwire(pmap_t, vaddr_t);
230 STATIC void pmap_page_protect(struct vm_page *, vm_prot_t);
231 STATIC bool pmap_query_bit(struct vm_page *, int);
232 STATIC bool pmap_clear_bit(struct vm_page *, int);
233
234 STATIC void pmap_activate(struct lwp *);
235 STATIC void pmap_deactivate(struct lwp *);
236
237 STATIC void pmap_pinit(pmap_t pm);
238 STATIC void pmap_procwr(struct proc *, vaddr_t, size_t);
239
240 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
241 STATIC void pmap_pte_print(volatile struct pte *);
242 STATIC void pmap_pteg_check(void);
243 STATIC void pmap_print_mmuregs(void);
244 STATIC void pmap_print_pte(pmap_t, vaddr_t);
245 STATIC void pmap_pteg_dist(void);
246 #endif
247 #if defined(DEBUG) || defined(PMAPCHECK)
248 STATIC void pmap_pvo_verify(void);
249 #endif
250 STATIC vaddr_t pmap_steal_memory(vsize_t, vaddr_t *, vaddr_t *);
251 STATIC void pmap_bootstrap(paddr_t, paddr_t);
252
253 #ifdef PMAPNAME
254 const struct pmap_ops PMAPNAME(ops) = {
255 .pmapop_pte_spill = pmap_pte_spill,
256 .pmapop_real_memory = pmap_real_memory,
257 .pmapop_init = pmap_init,
258 .pmapop_virtual_space = pmap_virtual_space,
259 .pmapop_create = pmap_create,
260 .pmapop_reference = pmap_reference,
261 .pmapop_destroy = pmap_destroy,
262 .pmapop_copy = pmap_copy,
263 .pmapop_update = pmap_update,
264 .pmapop_collect = pmap_collect,
265 .pmapop_enter = pmap_enter,
266 .pmapop_remove = pmap_remove,
267 .pmapop_kenter_pa = pmap_kenter_pa,
268 .pmapop_kremove = pmap_kremove,
269 .pmapop_extract = pmap_extract,
270 .pmapop_protect = pmap_protect,
271 .pmapop_unwire = pmap_unwire,
272 .pmapop_page_protect = pmap_page_protect,
273 .pmapop_query_bit = pmap_query_bit,
274 .pmapop_clear_bit = pmap_clear_bit,
275 .pmapop_activate = pmap_activate,
276 .pmapop_deactivate = pmap_deactivate,
277 .pmapop_pinit = pmap_pinit,
278 .pmapop_procwr = pmap_procwr,
279 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
280 .pmapop_pte_print = pmap_pte_print,
281 .pmapop_pteg_check = pmap_pteg_check,
282 .pmapop_print_mmuregs = pmap_print_mmuregs,
283 .pmapop_print_pte = pmap_print_pte,
284 .pmapop_pteg_dist = pmap_pteg_dist,
285 #else
286 .pmapop_pte_print = NULL,
287 .pmapop_pteg_check = NULL,
288 .pmapop_print_mmuregs = NULL,
289 .pmapop_print_pte = NULL,
290 .pmapop_pteg_dist = NULL,
291 #endif
292 #if defined(DEBUG) || defined(PMAPCHECK)
293 .pmapop_pvo_verify = pmap_pvo_verify,
294 #else
295 .pmapop_pvo_verify = NULL,
296 #endif
297 .pmapop_steal_memory = pmap_steal_memory,
298 .pmapop_bootstrap = pmap_bootstrap,
299 };
300 #endif /* !PMAPNAME */
301
302 /*
303 * The following structure is aligned to 32 bytes
304 */
305 struct pvo_entry {
306 LIST_ENTRY(pvo_entry) pvo_vlink; /* Link to common virt page */
307 TAILQ_ENTRY(pvo_entry) pvo_olink; /* Link to overflow entry */
308 struct pte pvo_pte; /* Prebuilt PTE */
309 pmap_t pvo_pmap; /* ptr to owning pmap */
310 vaddr_t pvo_vaddr; /* VA of entry */
311 #define PVO_PTEGIDX_MASK 0x0007 /* which PTEG slot */
312 #define PVO_PTEGIDX_VALID 0x0008 /* slot is valid */
313 #define PVO_WIRED 0x0010 /* PVO entry is wired */
314 #define PVO_MANAGED 0x0020 /* PVO e. for managed page */
315 #define PVO_EXECUTABLE 0x0040 /* PVO e. for executable page */
316 #define PVO_WIRED_P(pvo) ((pvo)->pvo_vaddr & PVO_WIRED)
317 #define PVO_MANAGED_P(pvo) ((pvo)->pvo_vaddr & PVO_MANAGED)
318 #define PVO_EXECUTABLE_P(pvo) ((pvo)->pvo_vaddr & PVO_EXECUTABLE)
319 #define PVO_ENTER_INSERT 0 /* PVO has been removed */
320 #define PVO_SPILL_UNSET 1 /* PVO has been evicted */
321 #define PVO_SPILL_SET 2 /* PVO has been spilled */
322 #define PVO_SPILL_INSERT 3 /* PVO has been inserted */
323 #define PVO_PMAP_PAGE_PROTECT 4 /* PVO has changed */
324 #define PVO_PMAP_PROTECT 5 /* PVO has changed */
325 #define PVO_REMOVE 6 /* PVO has been removed */
326 #define PVO_WHERE_MASK 15
327 #define PVO_WHERE_SHFT 8
328 } __attribute__ ((aligned (32)));
329 #define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF)
330 #define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
331 #define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
332 #define PVO_PTEGIDX_CLR(pvo) \
333 ((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK)))
334 #define PVO_PTEGIDX_SET(pvo,i) \
335 ((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID))
336 #define PVO_WHERE(pvo,w) \
337 ((pvo)->pvo_vaddr &= ~(PVO_WHERE_MASK << PVO_WHERE_SHFT), \
338 (pvo)->pvo_vaddr |= ((PVO_ ## w) << PVO_WHERE_SHFT))
339
340 TAILQ_HEAD(pvo_tqhead, pvo_entry);
341 struct pvo_tqhead *pmap_pvo_table; /* pvo entries by ptegroup index */
342 static struct pvo_head pmap_pvo_kunmanaged = LIST_HEAD_INITIALIZER(pmap_pvo_kunmanaged); /* list of unmanaged pages */
343 static struct pvo_head pmap_pvo_unmanaged = LIST_HEAD_INITIALIZER(pmap_pvo_unmanaged); /* list of unmanaged pages */
344
345 struct pool pmap_pool; /* pool for pmap structures */
346 struct pool pmap_upvo_pool; /* pool for pvo entries for unmanaged pages */
347 struct pool pmap_mpvo_pool; /* pool for pvo entries for managed pages */
348
349 /*
350 * We keep a cache of unmanaged pages to be used for pvo entries for
351 * unmanaged pages.
352 */
353 struct pvo_page {
354 SIMPLEQ_ENTRY(pvo_page) pvop_link;
355 };
356 SIMPLEQ_HEAD(pvop_head, pvo_page);
357 static struct pvop_head pmap_upvop_head = SIMPLEQ_HEAD_INITIALIZER(pmap_upvop_head);
358 static struct pvop_head pmap_mpvop_head = SIMPLEQ_HEAD_INITIALIZER(pmap_mpvop_head);
359 u_long pmap_upvop_free;
360 u_long pmap_upvop_maxfree;
361 u_long pmap_mpvop_free;
362 u_long pmap_mpvop_maxfree;
363
364 static void *pmap_pool_ualloc(struct pool *, int);
365 static void *pmap_pool_malloc(struct pool *, int);
366
367 static void pmap_pool_ufree(struct pool *, void *);
368 static void pmap_pool_mfree(struct pool *, void *);
369
370 static struct pool_allocator pmap_pool_mallocator = {
371 .pa_alloc = pmap_pool_malloc,
372 .pa_free = pmap_pool_mfree,
373 .pa_pagesz = 0,
374 };
375
376 static struct pool_allocator pmap_pool_uallocator = {
377 .pa_alloc = pmap_pool_ualloc,
378 .pa_free = pmap_pool_ufree,
379 .pa_pagesz = 0,
380 };
381
382 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
383 void pmap_pte_print(volatile struct pte *);
384 void pmap_pteg_check(void);
385 void pmap_pteg_dist(void);
386 void pmap_print_pte(pmap_t, vaddr_t);
387 void pmap_print_mmuregs(void);
388 #endif
389
390 #if defined(DEBUG) || defined(PMAPCHECK)
391 #ifdef PMAPCHECK
392 int pmapcheck = 1;
393 #else
394 int pmapcheck = 0;
395 #endif
396 void pmap_pvo_verify(void);
397 static void pmap_pvo_check(const struct pvo_entry *);
398 #define PMAP_PVO_CHECK(pvo) \
399 do { \
400 if (pmapcheck) \
401 pmap_pvo_check(pvo); \
402 } while (0)
403 #else
404 #define PMAP_PVO_CHECK(pvo) do { } while (/*CONSTCOND*/0)
405 #endif
406 static int pmap_pte_insert(int, struct pte *);
407 static int pmap_pvo_enter(pmap_t, struct pool *, struct pvo_head *,
408 vaddr_t, paddr_t, register_t, int);
409 static void pmap_pvo_remove(struct pvo_entry *, int, struct pvo_head *);
410 static void pmap_pvo_free(struct pvo_entry *);
411 static void pmap_pvo_free_list(struct pvo_head *);
412 static struct pvo_entry *pmap_pvo_find_va(pmap_t, vaddr_t, int *);
413 static volatile struct pte *pmap_pvo_to_pte(const struct pvo_entry *, int);
414 static struct pvo_entry *pmap_pvo_reclaim(struct pmap *);
415 static void pvo_set_exec(struct pvo_entry *);
416 static void pvo_clear_exec(struct pvo_entry *);
417
418 static void tlbia(void);
419
420 static void pmap_release(pmap_t);
421 static paddr_t pmap_boot_find_memory(psize_t, psize_t, int);
422
423 static uint32_t pmap_pvo_reclaim_nextidx;
424 #ifdef DEBUG
425 static int pmap_pvo_reclaim_debugctr;
426 #endif
427
428 #define VSID_NBPW (sizeof(uint32_t) * 8)
429 static uint32_t pmap_vsid_bitmap[NPMAPS / VSID_NBPW];
430
431 static int pmap_initialized;
432
433 #if defined(DEBUG) || defined(PMAPDEBUG)
434 #define PMAPDEBUG_BOOT 0x0001
435 #define PMAPDEBUG_PTE 0x0002
436 #define PMAPDEBUG_EXEC 0x0008
437 #define PMAPDEBUG_PVOENTER 0x0010
438 #define PMAPDEBUG_PVOREMOVE 0x0020
439 #define PMAPDEBUG_ACTIVATE 0x0100
440 #define PMAPDEBUG_CREATE 0x0200
441 #define PMAPDEBUG_ENTER 0x1000
442 #define PMAPDEBUG_KENTER 0x2000
443 #define PMAPDEBUG_KREMOVE 0x4000
444 #define PMAPDEBUG_REMOVE 0x8000
445
446 unsigned int pmapdebug = 0;
447
448 # define DPRINTF(x) printf x
449 # define DPRINTFN(n, x) if (pmapdebug & PMAPDEBUG_ ## n) printf x
450 #else
451 # define DPRINTF(x)
452 # define DPRINTFN(n, x)
453 #endif
454
455
456 #ifdef PMAPCOUNTERS
457 /*
458 * From pmap_subr.c
459 */
460 extern struct evcnt pmap_evcnt_mappings;
461 extern struct evcnt pmap_evcnt_unmappings;
462
463 extern struct evcnt pmap_evcnt_kernel_mappings;
464 extern struct evcnt pmap_evcnt_kernel_unmappings;
465
466 extern struct evcnt pmap_evcnt_mappings_replaced;
467
468 extern struct evcnt pmap_evcnt_exec_mappings;
469 extern struct evcnt pmap_evcnt_exec_cached;
470
471 extern struct evcnt pmap_evcnt_exec_synced;
472 extern struct evcnt pmap_evcnt_exec_synced_clear_modify;
473 extern struct evcnt pmap_evcnt_exec_synced_pvo_remove;
474
475 extern struct evcnt pmap_evcnt_exec_uncached_page_protect;
476 extern struct evcnt pmap_evcnt_exec_uncached_clear_modify;
477 extern struct evcnt pmap_evcnt_exec_uncached_zero_page;
478 extern struct evcnt pmap_evcnt_exec_uncached_copy_page;
479 extern struct evcnt pmap_evcnt_exec_uncached_pvo_remove;
480
481 extern struct evcnt pmap_evcnt_updates;
482 extern struct evcnt pmap_evcnt_collects;
483 extern struct evcnt pmap_evcnt_copies;
484
485 extern struct evcnt pmap_evcnt_ptes_spilled;
486 extern struct evcnt pmap_evcnt_ptes_unspilled;
487 extern struct evcnt pmap_evcnt_ptes_evicted;
488
489 extern struct evcnt pmap_evcnt_ptes_primary[8];
490 extern struct evcnt pmap_evcnt_ptes_secondary[8];
491 extern struct evcnt pmap_evcnt_ptes_removed;
492 extern struct evcnt pmap_evcnt_ptes_changed;
493 extern struct evcnt pmap_evcnt_pvos_reclaimed;
494 extern struct evcnt pmap_evcnt_pvos_failed;
495
496 extern struct evcnt pmap_evcnt_zeroed_pages;
497 extern struct evcnt pmap_evcnt_copied_pages;
498 extern struct evcnt pmap_evcnt_idlezeroed_pages;
499
500 #define PMAPCOUNT(ev) ((pmap_evcnt_ ## ev).ev_count++)
501 #define PMAPCOUNT2(ev) ((ev).ev_count++)
502 #else
503 #define PMAPCOUNT(ev) ((void) 0)
504 #define PMAPCOUNT2(ev) ((void) 0)
505 #endif
506
507 #define TLBIE(va) __asm volatile("tlbie %0" :: "r"(va))
508
509 /* XXXSL: this needs to be moved to assembler */
510 #define TLBIEL(va) __asm __volatile("tlbie %0" :: "r"(va))
511
512 #define TLBSYNC() __asm volatile("tlbsync")
513 #define SYNC() __asm volatile("sync")
514 #define EIEIO() __asm volatile("eieio")
515 #define MFMSR() mfmsr()
516 #define MTMSR(psl) mtmsr(psl)
517 #define MFPVR() mfpvr()
518 #define MFSRIN(va) mfsrin(va)
519 #define MFTB() mfrtcltbl()
520
521 #if defined (PMAP_OEA) || defined (PMAP_OEA64_BRIDGE)
522 static inline register_t
523 mfsrin(vaddr_t va)
524 {
525 register_t sr;
526 __asm volatile ("mfsrin %0,%1" : "=r"(sr) : "r"(va));
527 return sr;
528 }
529 #endif /* PMAP_OEA*/
530
531 #if defined (PMAP_OEA64_BRIDGE)
532 extern void mfmsr64 (register64_t *result);
533 #endif /* PMAP_OEA64_BRIDGE */
534
535 #define PMAP_LOCK() KERNEL_LOCK(1, NULL)
536 #define PMAP_UNLOCK() KERNEL_UNLOCK_ONE(NULL)
537
538 static inline register_t
539 pmap_interrupts_off(void)
540 {
541 register_t msr = MFMSR();
542 if (msr & PSL_EE)
543 MTMSR(msr & ~PSL_EE);
544 return msr;
545 }
546
547 static void
548 pmap_interrupts_restore(register_t msr)
549 {
550 if (msr & PSL_EE)
551 MTMSR(msr);
552 }
553
554 static inline u_int32_t
555 mfrtcltbl(void)
556 {
557
558 if ((MFPVR() >> 16) == MPC601)
559 return (mfrtcl() >> 7);
560 else
561 return (mftbl());
562 }
563
564 /*
565 * These small routines may have to be replaced,
566 * if/when we support processors other that the 604.
567 */
568
569 void
570 tlbia(void)
571 {
572 char *i;
573
574 SYNC();
575 #if defined(PMAP_OEA)
576 /*
577 * Why not use "tlbia"? Because not all processors implement it.
578 *
579 * This needs to be a per-CPU callback to do the appropriate thing
580 * for the CPU. XXX
581 */
582 for (i = 0; i < (char *)0x00040000; i += 0x00001000) {
583 TLBIE(i);
584 EIEIO();
585 SYNC();
586 }
587 #elif defined (PMAP_OEA64) || defined (PMAP_OEA64_BRIDGE)
588 /* This is specifically for the 970, 970UM v1.6 pp. 140. */
589 for (i = 0; i <= (char *)0xFF000; i += 0x00001000) {
590 TLBIEL(i);
591 EIEIO();
592 SYNC();
593 }
594 #endif
595 TLBSYNC();
596 SYNC();
597 }
598
599 static inline register_t
600 va_to_vsid(const struct pmap *pm, vaddr_t addr)
601 {
602 #if defined (PMAP_OEA) || defined (PMAP_OEA64_BRIDGE)
603 return (pm->pm_sr[addr >> ADDR_SR_SHFT] & SR_VSID) >> SR_VSID_SHFT;
604 #else /* PMAP_OEA64 */
605 #if 0
606 const struct ste *ste;
607 register_t hash;
608 int i;
609
610 hash = (addr >> ADDR_ESID_SHFT) & ADDR_ESID_HASH;
611
612 /*
613 * Try the primary group first
614 */
615 ste = pm->pm_stes[hash].stes;
616 for (i = 0; i < 8; i++, ste++) {
617 if (ste->ste_hi & STE_V) &&
618 (addr & ~(ADDR_POFF|ADDR_PIDX)) == (ste->ste_hi & STE_ESID))
619 return ste;
620 }
621
622 /*
623 * Then the secondary group.
624 */
625 ste = pm->pm_stes[hash ^ ADDR_ESID_HASH].stes;
626 for (i = 0; i < 8; i++, ste++) {
627 if (ste->ste_hi & STE_V) &&
628 (addr & ~(ADDR_POFF|ADDR_PIDX)) == (ste->ste_hi & STE_ESID))
629 return addr;
630 }
631
632 return NULL;
633 #else
634 /*
635 * Rather than searching the STE groups for the VSID, we know
636 * how we generate that from the ESID and so do that.
637 */
638 return VSID_MAKE(addr >> ADDR_SR_SHFT, pm->pm_vsid) >> SR_VSID_SHFT;
639 #endif
640 #endif /* PMAP_OEA */
641 }
642
643 static inline register_t
644 va_to_pteg(const struct pmap *pm, vaddr_t addr)
645 {
646 register_t hash;
647
648 hash = va_to_vsid(pm, addr) ^ ((addr & ADDR_PIDX) >> ADDR_PIDX_SHFT);
649 return hash & pmap_pteg_mask;
650 }
651
652 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
653 /*
654 * Given a PTE in the page table, calculate the VADDR that hashes to it.
655 * The only bit of magic is that the top 4 bits of the address doesn't
656 * technically exist in the PTE. But we know we reserved 4 bits of the
657 * VSID for it so that's how we get it.
658 */
659 static vaddr_t
660 pmap_pte_to_va(volatile const struct pte *pt)
661 {
662 vaddr_t va;
663 uintptr_t ptaddr = (uintptr_t) pt;
664
665 if (pt->pte_hi & PTE_HID)
666 ptaddr ^= (pmap_pteg_mask * sizeof(struct pteg));
667
668 /* PPC Bits 10-19 PPC64 Bits 42-51 */
669 #if defined(PMAP_OEA)
670 va = ((pt->pte_hi >> PTE_VSID_SHFT) ^ (ptaddr / sizeof(struct pteg))) & 0x3ff;
671 #elif defined (PMAP_OEA64) || defined (PMAP_OEA64_BRIDGE)
672 va = ((pt->pte_hi >> PTE_VSID_SHFT) ^ (ptaddr / sizeof(struct pteg))) & 0x7ff;
673 #endif
674 va <<= ADDR_PIDX_SHFT;
675
676 /* PPC Bits 4-9 PPC64 Bits 36-41 */
677 va |= (pt->pte_hi & PTE_API) << ADDR_API_SHFT;
678
679 #if defined(PMAP_OEA64)
680 /* PPC63 Bits 0-35 */
681 /* va |= VSID_TO_SR(pt->pte_hi >> PTE_VSID_SHFT) << ADDR_SR_SHFT; */
682 #elif defined(PMAP_OEA) || defined(PMAP_OEA64_BRIDGE)
683 /* PPC Bits 0-3 */
684 va |= VSID_TO_SR(pt->pte_hi >> PTE_VSID_SHFT) << ADDR_SR_SHFT;
685 #endif
686
687 return va;
688 }
689 #endif
690
691 static inline struct pvo_head *
692 pa_to_pvoh(paddr_t pa, struct vm_page **pg_p)
693 {
694 struct vm_page *pg;
695
696 pg = PHYS_TO_VM_PAGE(pa);
697 if (pg_p != NULL)
698 *pg_p = pg;
699 if (pg == NULL)
700 return &pmap_pvo_unmanaged;
701 return &pg->mdpage.mdpg_pvoh;
702 }
703
704 static inline struct pvo_head *
705 vm_page_to_pvoh(struct vm_page *pg)
706 {
707 return &pg->mdpage.mdpg_pvoh;
708 }
709
710
711 static inline void
712 pmap_attr_clear(struct vm_page *pg, int ptebit)
713 {
714 pg->mdpage.mdpg_attrs &= ~ptebit;
715 }
716
717 static inline int
718 pmap_attr_fetch(struct vm_page *pg)
719 {
720 return pg->mdpage.mdpg_attrs;
721 }
722
723 static inline void
724 pmap_attr_save(struct vm_page *pg, int ptebit)
725 {
726 pg->mdpage.mdpg_attrs |= ptebit;
727 }
728
729 static inline int
730 pmap_pte_compare(const volatile struct pte *pt, const struct pte *pvo_pt)
731 {
732 if (pt->pte_hi == pvo_pt->pte_hi
733 #if 0
734 && ((pt->pte_lo ^ pvo_pt->pte_lo) &
735 ~(PTE_REF|PTE_CHG)) == 0
736 #endif
737 )
738 return 1;
739 return 0;
740 }
741
742 static inline void
743 pmap_pte_create(struct pte *pt, const struct pmap *pm, vaddr_t va, register_t pte_lo)
744 {
745 /*
746 * Construct the PTE. Default to IMB initially. Valid bit
747 * only gets set when the real pte is set in memory.
748 *
749 * Note: Don't set the valid bit for correct operation of tlb update.
750 */
751 #if defined(PMAP_OEA)
752 pt->pte_hi = (va_to_vsid(pm, va) << PTE_VSID_SHFT)
753 | (((va & ADDR_PIDX) >> (ADDR_API_SHFT - PTE_API_SHFT)) & PTE_API);
754 pt->pte_lo = pte_lo;
755 #elif defined (PMAP_OEA64_BRIDGE)
756 pt->pte_hi = ((u_int64_t)va_to_vsid(pm, va) << PTE_VSID_SHFT)
757 | (((va & ADDR_PIDX) >> (ADDR_API_SHFT - PTE_API_SHFT)) & PTE_API);
758 pt->pte_lo = (u_int64_t) pte_lo;
759 #elif defined (PMAP_OEA64)
760 #error PMAP_OEA64 not supported
761 #endif /* PMAP_OEA */
762 }
763
764 static inline void
765 pmap_pte_synch(volatile struct pte *pt, struct pte *pvo_pt)
766 {
767 pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF|PTE_CHG);
768 }
769
770 static inline void
771 pmap_pte_clear(volatile struct pte *pt, vaddr_t va, int ptebit)
772 {
773 /*
774 * As shown in Section 7.6.3.2.3
775 */
776 pt->pte_lo &= ~ptebit;
777 TLBIE(va);
778 SYNC();
779 EIEIO();
780 TLBSYNC();
781 SYNC();
782 }
783
784 static inline void
785 pmap_pte_set(volatile struct pte *pt, struct pte *pvo_pt)
786 {
787 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
788 if (pvo_pt->pte_hi & PTE_VALID)
789 panic("pte_set: setting an already valid pte %p", pvo_pt);
790 #endif
791 pvo_pt->pte_hi |= PTE_VALID;
792
793 /*
794 * Update the PTE as defined in section 7.6.3.1
795 * Note that the REF/CHG bits are from pvo_pt and thus should
796 * have been saved so this routine can restore them (if desired).
797 */
798 pt->pte_lo = pvo_pt->pte_lo;
799 EIEIO();
800 pt->pte_hi = pvo_pt->pte_hi;
801 TLBSYNC();
802 SYNC();
803 pmap_pte_valid++;
804 }
805
806 static inline void
807 pmap_pte_unset(volatile struct pte *pt, struct pte *pvo_pt, vaddr_t va)
808 {
809 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
810 if ((pvo_pt->pte_hi & PTE_VALID) == 0)
811 panic("pte_unset: attempt to unset an inactive pte#1 %p/%p", pvo_pt, pt);
812 if ((pt->pte_hi & PTE_VALID) == 0)
813 panic("pte_unset: attempt to unset an inactive pte#2 %p/%p", pvo_pt, pt);
814 #endif
815
816 pvo_pt->pte_hi &= ~PTE_VALID;
817 /*
818 * Force the ref & chg bits back into the PTEs.
819 */
820 SYNC();
821 /*
822 * Invalidate the pte ... (Section 7.6.3.3)
823 */
824 pt->pte_hi &= ~PTE_VALID;
825 SYNC();
826 TLBIE(va);
827 SYNC();
828 EIEIO();
829 TLBSYNC();
830 SYNC();
831 /*
832 * Save the ref & chg bits ...
833 */
834 pmap_pte_synch(pt, pvo_pt);
835 pmap_pte_valid--;
836 }
837
838 static inline void
839 pmap_pte_change(volatile struct pte *pt, struct pte *pvo_pt, vaddr_t va)
840 {
841 /*
842 * Invalidate the PTE
843 */
844 pmap_pte_unset(pt, pvo_pt, va);
845 pmap_pte_set(pt, pvo_pt);
846 }
847
848 /*
849 * Try to insert the PTE @ *pvo_pt into the pmap_pteg_table at ptegidx
850 * (either primary or secondary location).
851 *
852 * Note: both the destination and source PTEs must not have PTE_VALID set.
853 */
854
855 static int
856 pmap_pte_insert(int ptegidx, struct pte *pvo_pt)
857 {
858 volatile struct pte *pt;
859 int i;
860
861 #if defined(DEBUG)
862 DPRINTFN(PTE, ("pmap_pte_insert: idx %#x, pte %#" _PRIx64 " %#" _PRIx64 "\n",
863 ptegidx, pvo_pt->pte_hi, pvo_pt->pte_lo));
864 #endif
865 /*
866 * First try primary hash.
867 */
868 for (pt = pmap_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
869 if ((pt->pte_hi & PTE_VALID) == 0) {
870 pvo_pt->pte_hi &= ~PTE_HID;
871 pmap_pte_set(pt, pvo_pt);
872 return i;
873 }
874 }
875
876 /*
877 * Now try secondary hash.
878 */
879 ptegidx ^= pmap_pteg_mask;
880 for (pt = pmap_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
881 if ((pt->pte_hi & PTE_VALID) == 0) {
882 pvo_pt->pte_hi |= PTE_HID;
883 pmap_pte_set(pt, pvo_pt);
884 return i;
885 }
886 }
887 return -1;
888 }
889
890 /*
891 * Spill handler.
892 *
893 * Tries to spill a page table entry from the overflow area.
894 * This runs in either real mode (if dealing with a exception spill)
895 * or virtual mode when dealing with manually spilling one of the
896 * kernel's pte entries. In either case, interrupts are already
897 * disabled.
898 */
899
900 int
901 pmap_pte_spill(struct pmap *pm, vaddr_t addr, bool exec)
902 {
903 struct pvo_entry *source_pvo, *victim_pvo, *next_pvo;
904 struct pvo_entry *pvo;
905 /* XXX: gcc -- vpvoh is always set at either *1* or *2* */
906 struct pvo_tqhead *pvoh, *vpvoh = NULL;
907 int ptegidx, i, j;
908 volatile struct pteg *pteg;
909 volatile struct pte *pt;
910
911 PMAP_LOCK();
912
913 ptegidx = va_to_pteg(pm, addr);
914
915 /*
916 * Have to substitute some entry. Use the primary hash for this.
917 * Use low bits of timebase as random generator. Make sure we are
918 * not picking a kernel pte for replacement.
919 */
920 pteg = &pmap_pteg_table[ptegidx];
921 i = MFTB() & 7;
922 for (j = 0; j < 8; j++) {
923 pt = &pteg->pt[i];
924 if ((pt->pte_hi & PTE_VALID) == 0)
925 break;
926 if (VSID_TO_HASH((pt->pte_hi & PTE_VSID) >> PTE_VSID_SHFT)
927 < PHYSMAP_VSIDBITS)
928 break;
929 i = (i + 1) & 7;
930 }
931 KASSERT(j < 8);
932
933 source_pvo = NULL;
934 victim_pvo = NULL;
935 pvoh = &pmap_pvo_table[ptegidx];
936 TAILQ_FOREACH(pvo, pvoh, pvo_olink) {
937
938 /*
939 * We need to find pvo entry for this address...
940 */
941 PMAP_PVO_CHECK(pvo); /* sanity check */
942
943 /*
944 * If we haven't found the source and we come to a PVO with
945 * a valid PTE, then we know we can't find it because all
946 * evicted PVOs always are first in the list.
947 */
948 if (source_pvo == NULL && (pvo->pvo_pte.pte_hi & PTE_VALID))
949 break;
950 if (source_pvo == NULL && pm == pvo->pvo_pmap &&
951 addr == PVO_VADDR(pvo)) {
952
953 /*
954 * Now we have found the entry to be spilled into the
955 * pteg. Attempt to insert it into the page table.
956 */
957 j = pmap_pte_insert(ptegidx, &pvo->pvo_pte);
958 if (j >= 0) {
959 PVO_PTEGIDX_SET(pvo, j);
960 PMAP_PVO_CHECK(pvo); /* sanity check */
961 PVO_WHERE(pvo, SPILL_INSERT);
962 pvo->pvo_pmap->pm_evictions--;
963 PMAPCOUNT(ptes_spilled);
964 PMAPCOUNT2(((pvo->pvo_pte.pte_hi & PTE_HID)
965 ? pmap_evcnt_ptes_secondary
966 : pmap_evcnt_ptes_primary)[j]);
967
968 /*
969 * Since we keep the evicted entries at the
970 * from of the PVO list, we need move this
971 * (now resident) PVO after the evicted
972 * entries.
973 */
974 next_pvo = TAILQ_NEXT(pvo, pvo_olink);
975
976 /*
977 * If we don't have to move (either we were the
978 * last entry or the next entry was valid),
979 * don't change our position. Otherwise
980 * move ourselves to the tail of the queue.
981 */
982 if (next_pvo != NULL &&
983 !(next_pvo->pvo_pte.pte_hi & PTE_VALID)) {
984 TAILQ_REMOVE(pvoh, pvo, pvo_olink);
985 TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
986 }
987 PMAP_UNLOCK();
988 return 1;
989 }
990 source_pvo = pvo;
991 if (exec && !PVO_EXECUTABLE_P(source_pvo)) {
992 return 0;
993 }
994 if (victim_pvo != NULL)
995 break;
996 }
997
998 /*
999 * We also need the pvo entry of the victim we are replacing
1000 * so save the R & C bits of the PTE.
1001 */
1002 if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL &&
1003 pmap_pte_compare(pt, &pvo->pvo_pte)) {
1004 vpvoh = pvoh; /* *1* */
1005 victim_pvo = pvo;
1006 if (source_pvo != NULL)
1007 break;
1008 }
1009 }
1010
1011 if (source_pvo == NULL) {
1012 PMAPCOUNT(ptes_unspilled);
1013 PMAP_UNLOCK();
1014 return 0;
1015 }
1016
1017 if (victim_pvo == NULL) {
1018 if ((pt->pte_hi & PTE_HID) == 0)
1019 panic("pmap_pte_spill: victim p-pte (%p) has "
1020 "no pvo entry!", pt);
1021
1022 /*
1023 * If this is a secondary PTE, we need to search
1024 * its primary pvo bucket for the matching PVO.
1025 */
1026 vpvoh = &pmap_pvo_table[ptegidx ^ pmap_pteg_mask]; /* *2* */
1027 TAILQ_FOREACH(pvo, vpvoh, pvo_olink) {
1028 PMAP_PVO_CHECK(pvo); /* sanity check */
1029
1030 /*
1031 * We also need the pvo entry of the victim we are
1032 * replacing so save the R & C bits of the PTE.
1033 */
1034 if (pmap_pte_compare(pt, &pvo->pvo_pte)) {
1035 victim_pvo = pvo;
1036 break;
1037 }
1038 }
1039 if (victim_pvo == NULL)
1040 panic("pmap_pte_spill: victim s-pte (%p) has "
1041 "no pvo entry!", pt);
1042 }
1043
1044 /*
1045 * The victim should be not be a kernel PVO/PTE entry.
1046 */
1047 KASSERT(victim_pvo->pvo_pmap != pmap_kernel());
1048 KASSERT(PVO_PTEGIDX_ISSET(victim_pvo));
1049 KASSERT(PVO_PTEGIDX_GET(victim_pvo) == i);
1050
1051 /*
1052 * We are invalidating the TLB entry for the EA for the
1053 * we are replacing even though its valid; If we don't
1054 * we lose any ref/chg bit changes contained in the TLB
1055 * entry.
1056 */
1057 source_pvo->pvo_pte.pte_hi &= ~PTE_HID;
1058
1059 /*
1060 * To enforce the PVO list ordering constraint that all
1061 * evicted entries should come before all valid entries,
1062 * move the source PVO to the tail of its list and the
1063 * victim PVO to the head of its list (which might not be
1064 * the same list, if the victim was using the secondary hash).
1065 */
1066 TAILQ_REMOVE(pvoh, source_pvo, pvo_olink);
1067 TAILQ_INSERT_TAIL(pvoh, source_pvo, pvo_olink);
1068 TAILQ_REMOVE(vpvoh, victim_pvo, pvo_olink);
1069 TAILQ_INSERT_HEAD(vpvoh, victim_pvo, pvo_olink);
1070 pmap_pte_unset(pt, &victim_pvo->pvo_pte, victim_pvo->pvo_vaddr);
1071 pmap_pte_set(pt, &source_pvo->pvo_pte);
1072 victim_pvo->pvo_pmap->pm_evictions++;
1073 source_pvo->pvo_pmap->pm_evictions--;
1074 PVO_WHERE(victim_pvo, SPILL_UNSET);
1075 PVO_WHERE(source_pvo, SPILL_SET);
1076
1077 PVO_PTEGIDX_CLR(victim_pvo);
1078 PVO_PTEGIDX_SET(source_pvo, i);
1079 PMAPCOUNT2(pmap_evcnt_ptes_primary[i]);
1080 PMAPCOUNT(ptes_spilled);
1081 PMAPCOUNT(ptes_evicted);
1082 PMAPCOUNT(ptes_removed);
1083
1084 PMAP_PVO_CHECK(victim_pvo);
1085 PMAP_PVO_CHECK(source_pvo);
1086
1087 PMAP_UNLOCK();
1088 return 1;
1089 }
1090
1091 /*
1092 * Restrict given range to physical memory
1093 */
1094 void
1095 pmap_real_memory(paddr_t *start, psize_t *size)
1096 {
1097 struct mem_region *mp;
1098
1099 for (mp = mem; mp->size; mp++) {
1100 if (*start + *size > mp->start
1101 && *start < mp->start + mp->size) {
1102 if (*start < mp->start) {
1103 *size -= mp->start - *start;
1104 *start = mp->start;
1105 }
1106 if (*start + *size > mp->start + mp->size)
1107 *size = mp->start + mp->size - *start;
1108 return;
1109 }
1110 }
1111 *size = 0;
1112 }
1113
1114 /*
1115 * Initialize anything else for pmap handling.
1116 * Called during vm_init().
1117 */
1118 void
1119 pmap_init(void)
1120 {
1121 pool_init(&pmap_mpvo_pool, sizeof(struct pvo_entry),
1122 sizeof(struct pvo_entry), 0, 0, "pmap_mpvopl",
1123 &pmap_pool_mallocator, IPL_NONE);
1124
1125 pool_setlowat(&pmap_mpvo_pool, 1008);
1126
1127 pmap_initialized = 1;
1128
1129 }
1130
1131 /*
1132 * How much virtual space does the kernel get?
1133 */
1134 void
1135 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1136 {
1137 /*
1138 * For now, reserve one segment (minus some overhead) for kernel
1139 * virtual memory
1140 */
1141 *start = VM_MIN_KERNEL_ADDRESS;
1142 *end = VM_MAX_KERNEL_ADDRESS;
1143 }
1144
1145 /*
1146 * Allocate, initialize, and return a new physical map.
1147 */
1148 pmap_t
1149 pmap_create(void)
1150 {
1151 pmap_t pm;
1152
1153 pm = pool_get(&pmap_pool, PR_WAITOK);
1154 memset((void *)pm, 0, sizeof *pm);
1155 pmap_pinit(pm);
1156
1157 DPRINTFN(CREATE,("pmap_create: pm %p:\n"
1158 "\t%06x %06x %06x %06x %06x %06x %06x %06x\n"
1159 "\t%06x %06x %06x %06x %06x %06x %06x %06x\n", pm,
1160 (unsigned int) pm->pm_sr[0], (unsigned int) pm->pm_sr[1],
1161 (unsigned int) pm->pm_sr[2], (unsigned int) pm->pm_sr[3],
1162 (unsigned int) pm->pm_sr[4], (unsigned int) pm->pm_sr[5],
1163 (unsigned int) pm->pm_sr[6], (unsigned int) pm->pm_sr[7],
1164 (unsigned int) pm->pm_sr[8], (unsigned int) pm->pm_sr[9],
1165 (unsigned int) pm->pm_sr[10], (unsigned int) pm->pm_sr[11],
1166 (unsigned int) pm->pm_sr[12], (unsigned int) pm->pm_sr[13],
1167 (unsigned int) pm->pm_sr[14], (unsigned int) pm->pm_sr[15]));
1168 return pm;
1169 }
1170
1171 /*
1172 * Initialize a preallocated and zeroed pmap structure.
1173 */
1174 void
1175 pmap_pinit(pmap_t pm)
1176 {
1177 register_t entropy = MFTB();
1178 register_t mask;
1179 int i;
1180
1181 /*
1182 * Allocate some segment registers for this pmap.
1183 */
1184 pm->pm_refs = 1;
1185 PMAP_LOCK();
1186 for (i = 0; i < NPMAPS; i += VSID_NBPW) {
1187 static register_t pmap_vsidcontext;
1188 register_t hash;
1189 unsigned int n;
1190
1191 /* Create a new value by multiplying by a prime adding in
1192 * entropy from the timebase register. This is to make the
1193 * VSID more random so that the PT Hash function collides
1194 * less often. (note that the prime causes gcc to do shifts
1195 * instead of a multiply)
1196 */
1197 pmap_vsidcontext = (pmap_vsidcontext * 0x1105) + entropy;
1198 hash = pmap_vsidcontext & (NPMAPS - 1);
1199 if (hash == 0) { /* 0 is special, avoid it */
1200 entropy += 0xbadf00d;
1201 continue;
1202 }
1203 n = hash >> 5;
1204 mask = 1L << (hash & (VSID_NBPW-1));
1205 hash = pmap_vsidcontext;
1206 if (pmap_vsid_bitmap[n] & mask) { /* collision? */
1207 /* anything free in this bucket? */
1208 if (~pmap_vsid_bitmap[n] == 0) {
1209 entropy = hash ^ (hash >> 16);
1210 continue;
1211 }
1212 i = ffs(~pmap_vsid_bitmap[n]) - 1;
1213 mask = 1L << i;
1214 hash &= ~(VSID_NBPW-1);
1215 hash |= i;
1216 }
1217 hash &= PTE_VSID >> PTE_VSID_SHFT;
1218 pmap_vsid_bitmap[n] |= mask;
1219 pm->pm_vsid = hash;
1220 #if defined (PMAP_OEA) || defined (PMAP_OEA64_BRIDGE)
1221 for (i = 0; i < 16; i++)
1222 pm->pm_sr[i] = VSID_MAKE(i, hash) | SR_PRKEY |
1223 SR_NOEXEC;
1224 #endif
1225 PMAP_UNLOCK();
1226 return;
1227 }
1228 PMAP_UNLOCK();
1229 panic("pmap_pinit: out of segments");
1230 }
1231
1232 /*
1233 * Add a reference to the given pmap.
1234 */
1235 void
1236 pmap_reference(pmap_t pm)
1237 {
1238 atomic_inc_uint(&pm->pm_refs);
1239 }
1240
1241 /*
1242 * Retire the given pmap from service.
1243 * Should only be called if the map contains no valid mappings.
1244 */
1245 void
1246 pmap_destroy(pmap_t pm)
1247 {
1248 if (atomic_dec_uint_nv(&pm->pm_refs) == 0) {
1249 pmap_release(pm);
1250 pool_put(&pmap_pool, pm);
1251 }
1252 }
1253
1254 /*
1255 * Release any resources held by the given physical map.
1256 * Called when a pmap initialized by pmap_pinit is being released.
1257 */
1258 void
1259 pmap_release(pmap_t pm)
1260 {
1261 int idx, mask;
1262
1263 KASSERT(pm->pm_stats.resident_count == 0);
1264 KASSERT(pm->pm_stats.wired_count == 0);
1265
1266 PMAP_LOCK();
1267 if (pm->pm_sr[0] == 0)
1268 panic("pmap_release");
1269 idx = pm->pm_vsid & (NPMAPS-1);
1270 mask = 1 << (idx % VSID_NBPW);
1271 idx /= VSID_NBPW;
1272
1273 KASSERT(pmap_vsid_bitmap[idx] & mask);
1274 pmap_vsid_bitmap[idx] &= ~mask;
1275 PMAP_UNLOCK();
1276 }
1277
1278 /*
1279 * Copy the range specified by src_addr/len
1280 * from the source map to the range dst_addr/len
1281 * in the destination map.
1282 *
1283 * This routine is only advisory and need not do anything.
1284 */
1285 void
1286 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vaddr_t dst_addr,
1287 vsize_t len, vaddr_t src_addr)
1288 {
1289 PMAPCOUNT(copies);
1290 }
1291
1292 /*
1293 * Require that all active physical maps contain no
1294 * incorrect entries NOW.
1295 */
1296 void
1297 pmap_update(struct pmap *pmap)
1298 {
1299 PMAPCOUNT(updates);
1300 TLBSYNC();
1301 }
1302
1303 /*
1304 * Garbage collects the physical map system for
1305 * pages which are no longer used.
1306 * Success need not be guaranteed -- that is, there
1307 * may well be pages which are not referenced, but
1308 * others may be collected.
1309 * Called by the pageout daemon when pages are scarce.
1310 */
1311 void
1312 pmap_collect(pmap_t pm)
1313 {
1314 PMAPCOUNT(collects);
1315 }
1316
1317 static inline int
1318 pmap_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx)
1319 {
1320 int pteidx;
1321 /*
1322 * We can find the actual pte entry without searching by
1323 * grabbing the PTEG index from 3 unused bits in pte_lo[11:9]
1324 * and by noticing the HID bit.
1325 */
1326 pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo);
1327 if (pvo->pvo_pte.pte_hi & PTE_HID)
1328 pteidx ^= pmap_pteg_mask * 8;
1329 return pteidx;
1330 }
1331
1332 volatile struct pte *
1333 pmap_pvo_to_pte(const struct pvo_entry *pvo, int pteidx)
1334 {
1335 volatile struct pte *pt;
1336
1337 #if !defined(DIAGNOSTIC) && !defined(DEBUG) && !defined(PMAPCHECK)
1338 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0)
1339 return NULL;
1340 #endif
1341
1342 /*
1343 * If we haven't been supplied the ptegidx, calculate it.
1344 */
1345 if (pteidx == -1) {
1346 int ptegidx;
1347 ptegidx = va_to_pteg(pvo->pvo_pmap, pvo->pvo_vaddr);
1348 pteidx = pmap_pvo_pte_index(pvo, ptegidx);
1349 }
1350
1351 pt = &pmap_pteg_table[pteidx >> 3].pt[pteidx & 7];
1352
1353 #if !defined(DIAGNOSTIC) && !defined(DEBUG) && !defined(PMAPCHECK)
1354 return pt;
1355 #else
1356 if ((pvo->pvo_pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) {
1357 panic("pmap_pvo_to_pte: pvo %p: has valid pte in "
1358 "pvo but no valid pte index", pvo);
1359 }
1360 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) {
1361 panic("pmap_pvo_to_pte: pvo %p: has valid pte index in "
1362 "pvo but no valid pte", pvo);
1363 }
1364
1365 if ((pt->pte_hi ^ (pvo->pvo_pte.pte_hi & ~PTE_VALID)) == PTE_VALID) {
1366 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0) {
1367 #if defined(DEBUG) || defined(PMAPCHECK)
1368 pmap_pte_print(pt);
1369 #endif
1370 panic("pmap_pvo_to_pte: pvo %p: has valid pte in "
1371 "pmap_pteg_table %p but invalid in pvo",
1372 pvo, pt);
1373 }
1374 if (((pt->pte_lo ^ pvo->pvo_pte.pte_lo) & ~(PTE_CHG|PTE_REF)) != 0) {
1375 #if defined(DEBUG) || defined(PMAPCHECK)
1376 pmap_pte_print(pt);
1377 #endif
1378 panic("pmap_pvo_to_pte: pvo %p: pvo pte does "
1379 "not match pte %p in pmap_pteg_table",
1380 pvo, pt);
1381 }
1382 return pt;
1383 }
1384
1385 if (pvo->pvo_pte.pte_hi & PTE_VALID) {
1386 #if defined(DEBUG) || defined(PMAPCHECK)
1387 pmap_pte_print(pt);
1388 #endif
1389 panic("pmap_pvo_to_pte: pvo %p: has nomatching pte %p in "
1390 "pmap_pteg_table but valid in pvo", pvo, pt);
1391 }
1392 return NULL;
1393 #endif /* !(!DIAGNOSTIC && !DEBUG && !PMAPCHECK) */
1394 }
1395
1396 struct pvo_entry *
1397 pmap_pvo_find_va(pmap_t pm, vaddr_t va, int *pteidx_p)
1398 {
1399 struct pvo_entry *pvo;
1400 int ptegidx;
1401
1402 va &= ~ADDR_POFF;
1403 ptegidx = va_to_pteg(pm, va);
1404
1405 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
1406 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1407 if ((uintptr_t) pvo >= SEGMENT_LENGTH)
1408 panic("pmap_pvo_find_va: invalid pvo %p on "
1409 "list %#x (%p)", pvo, ptegidx,
1410 &pmap_pvo_table[ptegidx]);
1411 #endif
1412 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1413 if (pteidx_p)
1414 *pteidx_p = pmap_pvo_pte_index(pvo, ptegidx);
1415 return pvo;
1416 }
1417 }
1418 if ((pm == pmap_kernel()) && (va < SEGMENT_LENGTH))
1419 panic("%s: returning NULL for %s pmap, va: 0x%08" _PRIxva "\n",
1420 __func__, (pm == pmap_kernel() ? "kernel" : "user"), va);
1421 return NULL;
1422 }
1423
1424 #if defined(DEBUG) || defined(PMAPCHECK)
1425 void
1426 pmap_pvo_check(const struct pvo_entry *pvo)
1427 {
1428 struct pvo_head *pvo_head;
1429 struct pvo_entry *pvo0;
1430 volatile struct pte *pt;
1431 int failed = 0;
1432
1433 PMAP_LOCK();
1434
1435 if ((uintptr_t)(pvo+1) >= SEGMENT_LENGTH)
1436 panic("pmap_pvo_check: pvo %p: invalid address", pvo);
1437
1438 if ((uintptr_t)(pvo->pvo_pmap+1) >= SEGMENT_LENGTH) {
1439 printf("pmap_pvo_check: pvo %p: invalid pmap address %p\n",
1440 pvo, pvo->pvo_pmap);
1441 failed = 1;
1442 }
1443
1444 if ((uintptr_t)TAILQ_NEXT(pvo, pvo_olink) >= SEGMENT_LENGTH ||
1445 (((uintptr_t)TAILQ_NEXT(pvo, pvo_olink)) & 0x1f) != 0) {
1446 printf("pmap_pvo_check: pvo %p: invalid ovlink address %p\n",
1447 pvo, TAILQ_NEXT(pvo, pvo_olink));
1448 failed = 1;
1449 }
1450
1451 if ((uintptr_t)LIST_NEXT(pvo, pvo_vlink) >= SEGMENT_LENGTH ||
1452 (((uintptr_t)LIST_NEXT(pvo, pvo_vlink)) & 0x1f) != 0) {
1453 printf("pmap_pvo_check: pvo %p: invalid ovlink address %p\n",
1454 pvo, LIST_NEXT(pvo, pvo_vlink));
1455 failed = 1;
1456 }
1457
1458 if (PVO_MANAGED_P(pvo)) {
1459 pvo_head = pa_to_pvoh(pvo->pvo_pte.pte_lo & PTE_RPGN, NULL);
1460 } else {
1461 if (pvo->pvo_vaddr < VM_MIN_KERNEL_ADDRESS) {
1462 printf("pmap_pvo_check: pvo %p: non kernel address "
1463 "on kernel unmanaged list\n", pvo);
1464 failed = 1;
1465 }
1466 pvo_head = &pmap_pvo_kunmanaged;
1467 }
1468 LIST_FOREACH(pvo0, pvo_head, pvo_vlink) {
1469 if (pvo0 == pvo)
1470 break;
1471 }
1472 if (pvo0 == NULL) {
1473 printf("pmap_pvo_check: pvo %p: not present "
1474 "on its vlist head %p\n", pvo, pvo_head);
1475 failed = 1;
1476 }
1477 if (pvo != pmap_pvo_find_va(pvo->pvo_pmap, pvo->pvo_vaddr, NULL)) {
1478 printf("pmap_pvo_check: pvo %p: not present "
1479 "on its olist head\n", pvo);
1480 failed = 1;
1481 }
1482 pt = pmap_pvo_to_pte(pvo, -1);
1483 if (pt == NULL) {
1484 if (pvo->pvo_pte.pte_hi & PTE_VALID) {
1485 printf("pmap_pvo_check: pvo %p: pte_hi VALID but "
1486 "no PTE\n", pvo);
1487 failed = 1;
1488 }
1489 } else {
1490 if ((uintptr_t) pt < (uintptr_t) &pmap_pteg_table[0] ||
1491 (uintptr_t) pt >=
1492 (uintptr_t) &pmap_pteg_table[pmap_pteg_cnt]) {
1493 printf("pmap_pvo_check: pvo %p: pte %p not in "
1494 "pteg table\n", pvo, pt);
1495 failed = 1;
1496 }
1497 if (((((uintptr_t) pt) >> 3) & 7) != PVO_PTEGIDX_GET(pvo)) {
1498 printf("pmap_pvo_check: pvo %p: pte_hi VALID but "
1499 "no PTE\n", pvo);
1500 failed = 1;
1501 }
1502 if (pvo->pvo_pte.pte_hi != pt->pte_hi) {
1503 printf("pmap_pvo_check: pvo %p: pte_hi differ: "
1504 "%#x/%#x\n", pvo, (unsigned int) pvo->pvo_pte.pte_hi, (unsigned int) pt->pte_hi);
1505 failed = 1;
1506 }
1507 if (((pvo->pvo_pte.pte_lo ^ pt->pte_lo) &
1508 (PTE_PP|PTE_WIMG|PTE_RPGN)) != 0) {
1509 printf("pmap_pvo_check: pvo %p: pte_lo differ: "
1510 "%#x/%#x\n", pvo,
1511 (unsigned int) (pvo->pvo_pte.pte_lo & (PTE_PP|PTE_WIMG|PTE_RPGN)),
1512 (unsigned int) (pt->pte_lo & (PTE_PP|PTE_WIMG|PTE_RPGN)));
1513 failed = 1;
1514 }
1515 if ((pmap_pte_to_va(pt) ^ PVO_VADDR(pvo)) & 0x0fffffff) {
1516 printf("pmap_pvo_check: pvo %p: PTE %p derived VA %#" _PRIxva ""
1517 " doesn't not match PVO's VA %#" _PRIxva "\n",
1518 pvo, pt, pmap_pte_to_va(pt), PVO_VADDR(pvo));
1519 failed = 1;
1520 }
1521 if (failed)
1522 pmap_pte_print(pt);
1523 }
1524 if (failed)
1525 panic("pmap_pvo_check: pvo %p, pm %p: bugcheck!", pvo,
1526 pvo->pvo_pmap);
1527
1528 PMAP_UNLOCK();
1529 }
1530 #endif /* DEBUG || PMAPCHECK */
1531
1532 /*
1533 * Search the PVO table looking for a non-wired entry.
1534 * If we find one, remove it and return it.
1535 */
1536
1537 struct pvo_entry *
1538 pmap_pvo_reclaim(struct pmap *pm)
1539 {
1540 struct pvo_tqhead *pvoh;
1541 struct pvo_entry *pvo;
1542 uint32_t idx, endidx;
1543
1544 endidx = pmap_pvo_reclaim_nextidx;
1545 for (idx = (endidx + 1) & pmap_pteg_mask; idx != endidx;
1546 idx = (idx + 1) & pmap_pteg_mask) {
1547 pvoh = &pmap_pvo_table[idx];
1548 TAILQ_FOREACH(pvo, pvoh, pvo_olink) {
1549 if (!PVO_WIRED_P(pvo)) {
1550 pmap_pvo_remove(pvo, -1, NULL);
1551 pmap_pvo_reclaim_nextidx = idx;
1552 PMAPCOUNT(pvos_reclaimed);
1553 return pvo;
1554 }
1555 }
1556 }
1557 return NULL;
1558 }
1559
1560 /*
1561 * This returns whether this is the first mapping of a page.
1562 */
1563 int
1564 pmap_pvo_enter(pmap_t pm, struct pool *pl, struct pvo_head *pvo_head,
1565 vaddr_t va, paddr_t pa, register_t pte_lo, int flags)
1566 {
1567 struct pvo_entry *pvo;
1568 struct pvo_tqhead *pvoh;
1569 register_t msr;
1570 int ptegidx;
1571 int i;
1572 int poolflags = PR_NOWAIT;
1573
1574 /*
1575 * Compute the PTE Group index.
1576 */
1577 va &= ~ADDR_POFF;
1578 ptegidx = va_to_pteg(pm, va);
1579
1580 msr = pmap_interrupts_off();
1581
1582 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1583 if (pmap_pvo_remove_depth > 0)
1584 panic("pmap_pvo_enter: called while pmap_pvo_remove active!");
1585 if (++pmap_pvo_enter_depth > 1)
1586 panic("pmap_pvo_enter: called recursively!");
1587 #endif
1588
1589 /*
1590 * Remove any existing mapping for this page. Reuse the
1591 * pvo entry if there a mapping.
1592 */
1593 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
1594 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1595 #ifdef DEBUG
1596 if ((pmapdebug & PMAPDEBUG_PVOENTER) &&
1597 ((pvo->pvo_pte.pte_lo ^ (pa|pte_lo)) &
1598 ~(PTE_REF|PTE_CHG)) == 0 &&
1599 va < VM_MIN_KERNEL_ADDRESS) {
1600 printf("pmap_pvo_enter: pvo %p: dup %" _PRIxpa "/%#" _PRIxpa "\n",
1601 pvo, (unsigned int) pvo->pvo_pte.pte_lo, (unsigned int) pte_lo|pa);
1602 printf("pmap_pvo_enter: pte_hi=%" _PRIxpa " sr=%#x\n",
1603 (unsigned int) pvo->pvo_pte.pte_hi,
1604 (unsigned int) pm->pm_sr[va >> ADDR_SR_SHFT]);
1605 pmap_pte_print(pmap_pvo_to_pte(pvo, -1));
1606 #ifdef DDBX
1607 Debugger();
1608 #endif
1609 }
1610 #endif
1611 PMAPCOUNT(mappings_replaced);
1612 pmap_pvo_remove(pvo, -1, NULL);
1613 break;
1614 }
1615 }
1616
1617 /*
1618 * If we aren't overwriting an mapping, try to allocate
1619 */
1620 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1621 --pmap_pvo_enter_depth;
1622 #endif
1623 pmap_interrupts_restore(msr);
1624 if (pvo) {
1625 pmap_pvo_free(pvo);
1626 }
1627 pvo = pool_get(pl, poolflags);
1628
1629 #ifdef DEBUG
1630 /*
1631 * Exercise pmap_pvo_reclaim() a little.
1632 */
1633 if (pvo && (flags & PMAP_CANFAIL) != 0 &&
1634 pmap_pvo_reclaim_debugctr++ > 0x1000 &&
1635 (pmap_pvo_reclaim_debugctr & 0xff) == 0) {
1636 pool_put(pl, pvo);
1637 pvo = NULL;
1638 }
1639 #endif
1640
1641 msr = pmap_interrupts_off();
1642 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1643 ++pmap_pvo_enter_depth;
1644 #endif
1645 if (pvo == NULL) {
1646 pvo = pmap_pvo_reclaim(pm);
1647 if (pvo == NULL) {
1648 if ((flags & PMAP_CANFAIL) == 0)
1649 panic("pmap_pvo_enter: failed");
1650 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1651 pmap_pvo_enter_depth--;
1652 #endif
1653 PMAPCOUNT(pvos_failed);
1654 pmap_interrupts_restore(msr);
1655 return ENOMEM;
1656 }
1657 }
1658
1659 pvo->pvo_vaddr = va;
1660 pvo->pvo_pmap = pm;
1661 pvo->pvo_vaddr &= ~ADDR_POFF;
1662 if (flags & VM_PROT_EXECUTE) {
1663 PMAPCOUNT(exec_mappings);
1664 pvo_set_exec(pvo);
1665 }
1666 if (flags & PMAP_WIRED)
1667 pvo->pvo_vaddr |= PVO_WIRED;
1668 if (pvo_head != &pmap_pvo_kunmanaged) {
1669 pvo->pvo_vaddr |= PVO_MANAGED;
1670 PMAPCOUNT(mappings);
1671 } else {
1672 PMAPCOUNT(kernel_mappings);
1673 }
1674 pmap_pte_create(&pvo->pvo_pte, pm, va, pa | pte_lo);
1675
1676 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
1677 if (PVO_WIRED_P(pvo))
1678 pvo->pvo_pmap->pm_stats.wired_count++;
1679 pvo->pvo_pmap->pm_stats.resident_count++;
1680 #if defined(DEBUG)
1681 /* if (pm != pmap_kernel() && va < VM_MIN_KERNEL_ADDRESS) */
1682 DPRINTFN(PVOENTER,
1683 ("pmap_pvo_enter: pvo %p: pm %p va %#" _PRIxva " pa %#" _PRIxpa "\n",
1684 pvo, pm, va, pa));
1685 #endif
1686
1687 /*
1688 * We hope this succeeds but it isn't required.
1689 */
1690 pvoh = &pmap_pvo_table[ptegidx];
1691 i = pmap_pte_insert(ptegidx, &pvo->pvo_pte);
1692 if (i >= 0) {
1693 PVO_PTEGIDX_SET(pvo, i);
1694 PVO_WHERE(pvo, ENTER_INSERT);
1695 PMAPCOUNT2(((pvo->pvo_pte.pte_hi & PTE_HID)
1696 ? pmap_evcnt_ptes_secondary : pmap_evcnt_ptes_primary)[i]);
1697 TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
1698
1699 } else {
1700 /*
1701 * Since we didn't have room for this entry (which makes it
1702 * and evicted entry), place it at the head of the list.
1703 */
1704 TAILQ_INSERT_HEAD(pvoh, pvo, pvo_olink);
1705 PMAPCOUNT(ptes_evicted);
1706 pm->pm_evictions++;
1707 /*
1708 * If this is a kernel page, make sure it's active.
1709 */
1710 if (pm == pmap_kernel()) {
1711 i = pmap_pte_spill(pm, va, false);
1712 KASSERT(i);
1713 }
1714 }
1715 PMAP_PVO_CHECK(pvo); /* sanity check */
1716 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1717 pmap_pvo_enter_depth--;
1718 #endif
1719 pmap_interrupts_restore(msr);
1720 return 0;
1721 }
1722
1723 static void
1724 pmap_pvo_remove(struct pvo_entry *pvo, int pteidx, struct pvo_head *pvol)
1725 {
1726 volatile struct pte *pt;
1727 int ptegidx;
1728
1729 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1730 if (++pmap_pvo_remove_depth > 1)
1731 panic("pmap_pvo_remove: called recursively!");
1732 #endif
1733
1734 /*
1735 * If we haven't been supplied the ptegidx, calculate it.
1736 */
1737 if (pteidx == -1) {
1738 ptegidx = va_to_pteg(pvo->pvo_pmap, pvo->pvo_vaddr);
1739 pteidx = pmap_pvo_pte_index(pvo, ptegidx);
1740 } else {
1741 ptegidx = pteidx >> 3;
1742 if (pvo->pvo_pte.pte_hi & PTE_HID)
1743 ptegidx ^= pmap_pteg_mask;
1744 }
1745 PMAP_PVO_CHECK(pvo); /* sanity check */
1746
1747 /*
1748 * If there is an active pte entry, we need to deactivate it
1749 * (and save the ref & chg bits).
1750 */
1751 pt = pmap_pvo_to_pte(pvo, pteidx);
1752 if (pt != NULL) {
1753 pmap_pte_unset(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
1754 PVO_WHERE(pvo, REMOVE);
1755 PVO_PTEGIDX_CLR(pvo);
1756 PMAPCOUNT(ptes_removed);
1757 } else {
1758 KASSERT(pvo->pvo_pmap->pm_evictions > 0);
1759 pvo->pvo_pmap->pm_evictions--;
1760 }
1761
1762 /*
1763 * Account for executable mappings.
1764 */
1765 if (PVO_EXECUTABLE_P(pvo))
1766 pvo_clear_exec(pvo);
1767
1768 /*
1769 * Update our statistics.
1770 */
1771 pvo->pvo_pmap->pm_stats.resident_count--;
1772 if (PVO_WIRED_P(pvo))
1773 pvo->pvo_pmap->pm_stats.wired_count--;
1774
1775 /*
1776 * Save the REF/CHG bits into their cache if the page is managed.
1777 */
1778 if (PVO_MANAGED_P(pvo)) {
1779 register_t ptelo = pvo->pvo_pte.pte_lo;
1780 struct vm_page *pg = PHYS_TO_VM_PAGE(ptelo & PTE_RPGN);
1781
1782 if (pg != NULL) {
1783 /*
1784 * If this page was changed and it is mapped exec,
1785 * invalidate it.
1786 */
1787 if ((ptelo & PTE_CHG) &&
1788 (pmap_attr_fetch(pg) & PTE_EXEC)) {
1789 struct pvo_head *pvoh = vm_page_to_pvoh(pg);
1790 if (LIST_EMPTY(pvoh)) {
1791 DPRINTFN(EXEC, ("[pmap_pvo_remove: "
1792 "%#" _PRIxpa ": clear-exec]\n",
1793 VM_PAGE_TO_PHYS(pg)));
1794 pmap_attr_clear(pg, PTE_EXEC);
1795 PMAPCOUNT(exec_uncached_pvo_remove);
1796 } else {
1797 DPRINTFN(EXEC, ("[pmap_pvo_remove: "
1798 "%#" _PRIxpa ": syncicache]\n",
1799 VM_PAGE_TO_PHYS(pg)));
1800 pmap_syncicache(VM_PAGE_TO_PHYS(pg),
1801 PAGE_SIZE);
1802 PMAPCOUNT(exec_synced_pvo_remove);
1803 }
1804 }
1805
1806 pmap_attr_save(pg, ptelo & (PTE_REF|PTE_CHG));
1807 }
1808 PMAPCOUNT(unmappings);
1809 } else {
1810 PMAPCOUNT(kernel_unmappings);
1811 }
1812
1813 /*
1814 * Remove the PVO from its lists and return it to the pool.
1815 */
1816 LIST_REMOVE(pvo, pvo_vlink);
1817 TAILQ_REMOVE(&pmap_pvo_table[ptegidx], pvo, pvo_olink);
1818 if (pvol) {
1819 LIST_INSERT_HEAD(pvol, pvo, pvo_vlink);
1820 }
1821 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1822 pmap_pvo_remove_depth--;
1823 #endif
1824 }
1825
1826 void
1827 pmap_pvo_free(struct pvo_entry *pvo)
1828 {
1829
1830 pool_put(PVO_MANAGED_P(pvo) ? &pmap_mpvo_pool : &pmap_upvo_pool, pvo);
1831 }
1832
1833 void
1834 pmap_pvo_free_list(struct pvo_head *pvol)
1835 {
1836 struct pvo_entry *pvo, *npvo;
1837
1838 for (pvo = LIST_FIRST(pvol); pvo != NULL; pvo = npvo) {
1839 npvo = LIST_NEXT(pvo, pvo_vlink);
1840 LIST_REMOVE(pvo, pvo_vlink);
1841 pmap_pvo_free(pvo);
1842 }
1843 }
1844
1845 /*
1846 * Mark a mapping as executable.
1847 * If this is the first executable mapping in the segment,
1848 * clear the noexec flag.
1849 */
1850 static void
1851 pvo_set_exec(struct pvo_entry *pvo)
1852 {
1853 struct pmap *pm = pvo->pvo_pmap;
1854
1855 if (pm == pmap_kernel() || PVO_EXECUTABLE_P(pvo)) {
1856 return;
1857 }
1858 pvo->pvo_vaddr |= PVO_EXECUTABLE;
1859 #if defined (PMAP_OEA) || defined (PMAP_OEA64_BRIDGE)
1860 {
1861 int sr = PVO_VADDR(pvo) >> ADDR_SR_SHFT;
1862 if (pm->pm_exec[sr]++ == 0) {
1863 pm->pm_sr[sr] &= ~SR_NOEXEC;
1864 }
1865 }
1866 #endif
1867 }
1868
1869 /*
1870 * Mark a mapping as non-executable.
1871 * If this was the last executable mapping in the segment,
1872 * set the noexec flag.
1873 */
1874 static void
1875 pvo_clear_exec(struct pvo_entry *pvo)
1876 {
1877 struct pmap *pm = pvo->pvo_pmap;
1878
1879 if (pm == pmap_kernel() || !PVO_EXECUTABLE_P(pvo)) {
1880 return;
1881 }
1882 pvo->pvo_vaddr &= ~PVO_EXECUTABLE;
1883 #if defined (PMAP_OEA) || defined (PMAP_OEA64_BRIDGE)
1884 {
1885 int sr = PVO_VADDR(pvo) >> ADDR_SR_SHFT;
1886 if (--pm->pm_exec[sr] == 0) {
1887 pm->pm_sr[sr] |= SR_NOEXEC;
1888 }
1889 }
1890 #endif
1891 }
1892
1893 /*
1894 * Insert physical page at pa into the given pmap at virtual address va.
1895 */
1896 int
1897 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1898 {
1899 struct mem_region *mp;
1900 struct pvo_head *pvo_head;
1901 struct vm_page *pg;
1902 struct pool *pl;
1903 register_t pte_lo;
1904 int error;
1905 u_int pvo_flags;
1906 u_int was_exec = 0;
1907
1908 PMAP_LOCK();
1909
1910 if (__predict_false(!pmap_initialized)) {
1911 pvo_head = &pmap_pvo_kunmanaged;
1912 pl = &pmap_upvo_pool;
1913 pvo_flags = 0;
1914 pg = NULL;
1915 was_exec = PTE_EXEC;
1916 } else {
1917 pvo_head = pa_to_pvoh(pa, &pg);
1918 pl = &pmap_mpvo_pool;
1919 pvo_flags = PVO_MANAGED;
1920 }
1921
1922 DPRINTFN(ENTER,
1923 ("pmap_enter(%p, 0x%" _PRIxva ", 0x%" _PRIxpa ", 0x%x, 0x%x):",
1924 pm, va, pa, prot, flags));
1925
1926 /*
1927 * If this is a managed page, and it's the first reference to the
1928 * page clear the execness of the page. Otherwise fetch the execness.
1929 */
1930 if (pg != NULL)
1931 was_exec = pmap_attr_fetch(pg) & PTE_EXEC;
1932
1933 DPRINTFN(ENTER, (" was_exec=%d", was_exec));
1934
1935 /*
1936 * Assume the page is cache inhibited and access is guarded unless
1937 * it's in our available memory array. If it is in the memory array,
1938 * asssume it's in memory coherent memory.
1939 */
1940 pte_lo = PTE_IG;
1941 if ((flags & PMAP_NC) == 0) {
1942 for (mp = mem; mp->size; mp++) {
1943 if (pa >= mp->start && pa < mp->start + mp->size) {
1944 pte_lo = PTE_M;
1945 break;
1946 }
1947 }
1948 }
1949
1950 if (prot & VM_PROT_WRITE)
1951 pte_lo |= PTE_BW;
1952 else
1953 pte_lo |= PTE_BR;
1954
1955 /*
1956 * If this was in response to a fault, "pre-fault" the PTE's
1957 * changed/referenced bit appropriately.
1958 */
1959 if (flags & VM_PROT_WRITE)
1960 pte_lo |= PTE_CHG;
1961 if (flags & VM_PROT_ALL)
1962 pte_lo |= PTE_REF;
1963
1964 /*
1965 * We need to know if this page can be executable
1966 */
1967 flags |= (prot & VM_PROT_EXECUTE);
1968
1969 /*
1970 * Record mapping for later back-translation and pte spilling.
1971 * This will overwrite any existing mapping.
1972 */
1973 error = pmap_pvo_enter(pm, pl, pvo_head, va, pa, pte_lo, flags);
1974
1975 /*
1976 * Flush the real page from the instruction cache if this page is
1977 * mapped executable and cacheable and has not been flushed since
1978 * the last time it was modified.
1979 */
1980 if (error == 0 &&
1981 (flags & VM_PROT_EXECUTE) &&
1982 (pte_lo & PTE_I) == 0 &&
1983 was_exec == 0) {
1984 DPRINTFN(ENTER, (" syncicache"));
1985 PMAPCOUNT(exec_synced);
1986 pmap_syncicache(pa, PAGE_SIZE);
1987 if (pg != NULL) {
1988 pmap_attr_save(pg, PTE_EXEC);
1989 PMAPCOUNT(exec_cached);
1990 #if defined(DEBUG) || defined(PMAPDEBUG)
1991 if (pmapdebug & PMAPDEBUG_ENTER)
1992 printf(" marked-as-exec");
1993 else if (pmapdebug & PMAPDEBUG_EXEC)
1994 printf("[pmap_enter: %#" _PRIxpa ": marked-as-exec]\n",
1995 VM_PAGE_TO_PHYS(pg));
1996
1997 #endif
1998 }
1999 }
2000
2001 DPRINTFN(ENTER, (": error=%d\n", error));
2002
2003 PMAP_UNLOCK();
2004
2005 return error;
2006 }
2007
2008 void
2009 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
2010 {
2011 struct mem_region *mp;
2012 register_t pte_lo;
2013 int error;
2014
2015 #if defined (PMAP_OEA64_BRIDGE)
2016 if (va < VM_MIN_KERNEL_ADDRESS)
2017 panic("pmap_kenter_pa: attempt to enter "
2018 "non-kernel address %#" _PRIxva "!", va);
2019 #endif
2020
2021 DPRINTFN(KENTER,
2022 ("pmap_kenter_pa(%#" _PRIxva ",%#" _PRIxpa ",%#x)\n", va, pa, prot));
2023
2024 PMAP_LOCK();
2025
2026 /*
2027 * Assume the page is cache inhibited and access is guarded unless
2028 * it's in our available memory array. If it is in the memory array,
2029 * asssume it's in memory coherent memory.
2030 */
2031 pte_lo = PTE_IG;
2032 if ((prot & PMAP_NC) == 0) {
2033 for (mp = mem; mp->size; mp++) {
2034 if (pa >= mp->start && pa < mp->start + mp->size) {
2035 pte_lo = PTE_M;
2036 break;
2037 }
2038 }
2039 }
2040
2041 if (prot & VM_PROT_WRITE)
2042 pte_lo |= PTE_BW;
2043 else
2044 pte_lo |= PTE_BR;
2045
2046 /*
2047 * We don't care about REF/CHG on PVOs on the unmanaged list.
2048 */
2049 error = pmap_pvo_enter(pmap_kernel(), &pmap_upvo_pool,
2050 &pmap_pvo_kunmanaged, va, pa, pte_lo, prot|PMAP_WIRED);
2051
2052 if (error != 0)
2053 panic("pmap_kenter_pa: failed to enter va %#" _PRIxva " pa %#" _PRIxpa ": %d",
2054 va, pa, error);
2055
2056 PMAP_UNLOCK();
2057 }
2058
2059 void
2060 pmap_kremove(vaddr_t va, vsize_t len)
2061 {
2062 if (va < VM_MIN_KERNEL_ADDRESS)
2063 panic("pmap_kremove: attempt to remove "
2064 "non-kernel address %#" _PRIxva "!", va);
2065
2066 DPRINTFN(KREMOVE,("pmap_kremove(%#" _PRIxva ",%#" _PRIxva ")\n", va, len));
2067 pmap_remove(pmap_kernel(), va, va + len);
2068 }
2069
2070 /*
2071 * Remove the given range of mapping entries.
2072 */
2073 void
2074 pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva)
2075 {
2076 struct pvo_head pvol;
2077 struct pvo_entry *pvo;
2078 register_t msr;
2079 int pteidx;
2080
2081 PMAP_LOCK();
2082 LIST_INIT(&pvol);
2083 msr = pmap_interrupts_off();
2084 for (; va < endva; va += PAGE_SIZE) {
2085 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2086 if (pvo != NULL) {
2087 pmap_pvo_remove(pvo, pteidx, &pvol);
2088 }
2089 }
2090 pmap_interrupts_restore(msr);
2091 pmap_pvo_free_list(&pvol);
2092 PMAP_UNLOCK();
2093 }
2094
2095 /*
2096 * Get the physical page address for the given pmap/virtual address.
2097 */
2098 bool
2099 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
2100 {
2101 struct pvo_entry *pvo;
2102 register_t msr;
2103
2104 PMAP_LOCK();
2105
2106 /*
2107 * If this is a kernel pmap lookup, also check the battable
2108 * and if we get a hit, translate the VA to a PA using the
2109 * BAT entries. Don't check for VM_MAX_KERNEL_ADDRESS is
2110 * that will wrap back to 0.
2111 */
2112 if (pm == pmap_kernel() &&
2113 (va < VM_MIN_KERNEL_ADDRESS ||
2114 (KERNEL2_SR < 15 && VM_MAX_KERNEL_ADDRESS <= va))) {
2115 KASSERT((va >> ADDR_SR_SHFT) != USER_SR);
2116 #if defined (PMAP_OEA)
2117 if ((MFPVR() >> 16) != MPC601) {
2118 register_t batu = battable[va >> ADDR_SR_SHFT].batu;
2119 if (BAT_VALID_P(batu,0) && BAT_VA_MATCH_P(batu,va)) {
2120 register_t batl =
2121 battable[va >> ADDR_SR_SHFT].batl;
2122 register_t mask =
2123 (~(batu & BAT_BL) << 15) & ~0x1ffffL;
2124 if (pap)
2125 *pap = (batl & mask) | (va & ~mask);
2126 PMAP_UNLOCK();
2127 return true;
2128 }
2129 } else {
2130 register_t batu = battable[va >> 23].batu;
2131 register_t batl = battable[va >> 23].batl;
2132 register_t sr = iosrtable[va >> ADDR_SR_SHFT];
2133 if (BAT601_VALID_P(batl) &&
2134 BAT601_VA_MATCH_P(batu, batl, va)) {
2135 register_t mask =
2136 (~(batl & BAT601_BSM) << 17) & ~0x1ffffL;
2137 if (pap)
2138 *pap = (batl & mask) | (va & ~mask);
2139 PMAP_UNLOCK();
2140 return true;
2141 } else if (SR601_VALID_P(sr) &&
2142 SR601_PA_MATCH_P(sr, va)) {
2143 if (pap)
2144 *pap = va;
2145 PMAP_UNLOCK();
2146 return true;
2147 }
2148 }
2149 return false;
2150 #elif defined (PMAP_OEA64_BRIDGE)
2151 if (va >= SEGMENT_LENGTH)
2152 panic("%s: pm: %s va >= SEGMENT_LENGTH, va: 0x%08lx\n",
2153 __func__, (pm == pmap_kernel() ? "kernel" : "user"), va);
2154 else {
2155 if (pap)
2156 *pap = va;
2157 PMAP_UNLOCK();
2158 return true;
2159 }
2160 #elif defined (PMAP_OEA64)
2161 #error PPC_OEA64 not supported
2162 #endif /* PPC_OEA */
2163 }
2164
2165 msr = pmap_interrupts_off();
2166 pvo = pmap_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
2167 if (pvo != NULL) {
2168 PMAP_PVO_CHECK(pvo); /* sanity check */
2169 if (pap)
2170 *pap = (pvo->pvo_pte.pte_lo & PTE_RPGN)
2171 | (va & ADDR_POFF);
2172 }
2173 pmap_interrupts_restore(msr);
2174 PMAP_UNLOCK();
2175 return pvo != NULL;
2176 }
2177
2178 /*
2179 * Lower the protection on the specified range of this pmap.
2180 */
2181 void
2182 pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot)
2183 {
2184 struct pvo_entry *pvo;
2185 volatile struct pte *pt;
2186 register_t msr;
2187 int pteidx;
2188
2189 /*
2190 * Since this routine only downgrades protection, we should
2191 * always be called with at least one bit not set.
2192 */
2193 KASSERT(prot != VM_PROT_ALL);
2194
2195 /*
2196 * If there is no protection, this is equivalent to
2197 * remove the pmap from the pmap.
2198 */
2199 if ((prot & VM_PROT_READ) == 0) {
2200 pmap_remove(pm, va, endva);
2201 return;
2202 }
2203
2204 PMAP_LOCK();
2205
2206 msr = pmap_interrupts_off();
2207 for (; va < endva; va += PAGE_SIZE) {
2208 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2209 if (pvo == NULL)
2210 continue;
2211 PMAP_PVO_CHECK(pvo); /* sanity check */
2212
2213 /*
2214 * Revoke executable if asked to do so.
2215 */
2216 if ((prot & VM_PROT_EXECUTE) == 0)
2217 pvo_clear_exec(pvo);
2218
2219 #if 0
2220 /*
2221 * If the page is already read-only, no change
2222 * needs to be made.
2223 */
2224 if ((pvo->pvo_pte.pte_lo & PTE_PP) == PTE_BR)
2225 continue;
2226 #endif
2227 /*
2228 * Grab the PTE pointer before we diddle with
2229 * the cached PTE copy.
2230 */
2231 pt = pmap_pvo_to_pte(pvo, pteidx);
2232 /*
2233 * Change the protection of the page.
2234 */
2235 pvo->pvo_pte.pte_lo &= ~PTE_PP;
2236 pvo->pvo_pte.pte_lo |= PTE_BR;
2237
2238 /*
2239 * If the PVO is in the page table, update
2240 * that pte at well.
2241 */
2242 if (pt != NULL) {
2243 pmap_pte_change(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
2244 PVO_WHERE(pvo, PMAP_PROTECT);
2245 PMAPCOUNT(ptes_changed);
2246 }
2247
2248 PMAP_PVO_CHECK(pvo); /* sanity check */
2249 }
2250 pmap_interrupts_restore(msr);
2251 PMAP_UNLOCK();
2252 }
2253
2254 void
2255 pmap_unwire(pmap_t pm, vaddr_t va)
2256 {
2257 struct pvo_entry *pvo;
2258 register_t msr;
2259
2260 PMAP_LOCK();
2261 msr = pmap_interrupts_off();
2262 pvo = pmap_pvo_find_va(pm, va, NULL);
2263 if (pvo != NULL) {
2264 if (PVO_WIRED_P(pvo)) {
2265 pvo->pvo_vaddr &= ~PVO_WIRED;
2266 pm->pm_stats.wired_count--;
2267 }
2268 PMAP_PVO_CHECK(pvo); /* sanity check */
2269 }
2270 pmap_interrupts_restore(msr);
2271 PMAP_UNLOCK();
2272 }
2273
2274 /*
2275 * Lower the protection on the specified physical page.
2276 */
2277 void
2278 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2279 {
2280 struct pvo_head *pvo_head, pvol;
2281 struct pvo_entry *pvo, *next_pvo;
2282 volatile struct pte *pt;
2283 register_t msr;
2284
2285 PMAP_LOCK();
2286
2287 KASSERT(prot != VM_PROT_ALL);
2288 LIST_INIT(&pvol);
2289 msr = pmap_interrupts_off();
2290
2291 /*
2292 * When UVM reuses a page, it does a pmap_page_protect with
2293 * VM_PROT_NONE. At that point, we can clear the exec flag
2294 * since we know the page will have different contents.
2295 */
2296 if ((prot & VM_PROT_READ) == 0) {
2297 DPRINTFN(EXEC, ("[pmap_page_protect: %#" _PRIxpa ": clear-exec]\n",
2298 VM_PAGE_TO_PHYS(pg)));
2299 if (pmap_attr_fetch(pg) & PTE_EXEC) {
2300 PMAPCOUNT(exec_uncached_page_protect);
2301 pmap_attr_clear(pg, PTE_EXEC);
2302 }
2303 }
2304
2305 pvo_head = vm_page_to_pvoh(pg);
2306 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
2307 next_pvo = LIST_NEXT(pvo, pvo_vlink);
2308 PMAP_PVO_CHECK(pvo); /* sanity check */
2309
2310 /*
2311 * Downgrading to no mapping at all, we just remove the entry.
2312 */
2313 if ((prot & VM_PROT_READ) == 0) {
2314 pmap_pvo_remove(pvo, -1, &pvol);
2315 continue;
2316 }
2317
2318 /*
2319 * If EXEC permission is being revoked, just clear the
2320 * flag in the PVO.
2321 */
2322 if ((prot & VM_PROT_EXECUTE) == 0)
2323 pvo_clear_exec(pvo);
2324
2325 /*
2326 * If this entry is already RO, don't diddle with the
2327 * page table.
2328 */
2329 if ((pvo->pvo_pte.pte_lo & PTE_PP) == PTE_BR) {
2330 PMAP_PVO_CHECK(pvo);
2331 continue;
2332 }
2333
2334 /*
2335 * Grab the PTE before the we diddle the bits so
2336 * pvo_to_pte can verify the pte contents are as
2337 * expected.
2338 */
2339 pt = pmap_pvo_to_pte(pvo, -1);
2340 pvo->pvo_pte.pte_lo &= ~PTE_PP;
2341 pvo->pvo_pte.pte_lo |= PTE_BR;
2342 if (pt != NULL) {
2343 pmap_pte_change(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
2344 PVO_WHERE(pvo, PMAP_PAGE_PROTECT);
2345 PMAPCOUNT(ptes_changed);
2346 }
2347 PMAP_PVO_CHECK(pvo); /* sanity check */
2348 }
2349 pmap_interrupts_restore(msr);
2350 pmap_pvo_free_list(&pvol);
2351
2352 PMAP_UNLOCK();
2353 }
2354
2355 /*
2356 * Activate the address space for the specified process. If the process
2357 * is the current process, load the new MMU context.
2358 */
2359 void
2360 pmap_activate(struct lwp *l)
2361 {
2362 struct pcb *pcb = &l->l_addr->u_pcb;
2363 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
2364
2365 DPRINTFN(ACTIVATE,
2366 ("pmap_activate: lwp %p (curlwp %p)\n", l, curlwp));
2367
2368 /*
2369 * XXX Normally performed in cpu_fork().
2370 */
2371 pcb->pcb_pm = pmap;
2372
2373 /*
2374 * In theory, the SR registers need only be valid on return
2375 * to user space wait to do them there.
2376 */
2377 if (l == curlwp) {
2378 /* Store pointer to new current pmap. */
2379 curpm = pmap;
2380 }
2381 }
2382
2383 /*
2384 * Deactivate the specified process's address space.
2385 */
2386 void
2387 pmap_deactivate(struct lwp *l)
2388 {
2389 }
2390
2391 bool
2392 pmap_query_bit(struct vm_page *pg, int ptebit)
2393 {
2394 struct pvo_entry *pvo;
2395 volatile struct pte *pt;
2396 register_t msr;
2397
2398 PMAP_LOCK();
2399
2400 if (pmap_attr_fetch(pg) & ptebit) {
2401 PMAP_UNLOCK();
2402 return true;
2403 }
2404
2405 msr = pmap_interrupts_off();
2406 LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
2407 PMAP_PVO_CHECK(pvo); /* sanity check */
2408 /*
2409 * See if we saved the bit off. If so cache, it and return
2410 * success.
2411 */
2412 if (pvo->pvo_pte.pte_lo & ptebit) {
2413 pmap_attr_save(pg, ptebit);
2414 PMAP_PVO_CHECK(pvo); /* sanity check */
2415 pmap_interrupts_restore(msr);
2416 PMAP_UNLOCK();
2417 return true;
2418 }
2419 }
2420 /*
2421 * No luck, now go thru the hard part of looking at the ptes
2422 * themselves. Sync so any pending REF/CHG bits are flushed
2423 * to the PTEs.
2424 */
2425 SYNC();
2426 LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
2427 PMAP_PVO_CHECK(pvo); /* sanity check */
2428 /*
2429 * See if this pvo have a valid PTE. If so, fetch the
2430 * REF/CHG bits from the valid PTE. If the appropriate
2431 * ptebit is set, cache, it and return success.
2432 */
2433 pt = pmap_pvo_to_pte(pvo, -1);
2434 if (pt != NULL) {
2435 pmap_pte_synch(pt, &pvo->pvo_pte);
2436 if (pvo->pvo_pte.pte_lo & ptebit) {
2437 pmap_attr_save(pg, ptebit);
2438 PMAP_PVO_CHECK(pvo); /* sanity check */
2439 pmap_interrupts_restore(msr);
2440 PMAP_UNLOCK();
2441 return true;
2442 }
2443 }
2444 }
2445 pmap_interrupts_restore(msr);
2446 PMAP_UNLOCK();
2447 return false;
2448 }
2449
2450 bool
2451 pmap_clear_bit(struct vm_page *pg, int ptebit)
2452 {
2453 struct pvo_head *pvoh = vm_page_to_pvoh(pg);
2454 struct pvo_entry *pvo;
2455 volatile struct pte *pt;
2456 register_t msr;
2457 int rv = 0;
2458
2459 PMAP_LOCK();
2460 msr = pmap_interrupts_off();
2461
2462 /*
2463 * Fetch the cache value
2464 */
2465 rv |= pmap_attr_fetch(pg);
2466
2467 /*
2468 * Clear the cached value.
2469 */
2470 pmap_attr_clear(pg, ptebit);
2471
2472 /*
2473 * Sync so any pending REF/CHG bits are flushed to the PTEs (so we
2474 * can reset the right ones). Note that since the pvo entries and
2475 * list heads are accessed via BAT0 and are never placed in the
2476 * page table, we don't have to worry about further accesses setting
2477 * the REF/CHG bits.
2478 */
2479 SYNC();
2480
2481 /*
2482 * For each pvo entry, clear pvo's ptebit. If this pvo have a
2483 * valid PTE. If so, clear the ptebit from the valid PTE.
2484 */
2485 LIST_FOREACH(pvo, pvoh, pvo_vlink) {
2486 PMAP_PVO_CHECK(pvo); /* sanity check */
2487 pt = pmap_pvo_to_pte(pvo, -1);
2488 if (pt != NULL) {
2489 /*
2490 * Only sync the PTE if the bit we are looking
2491 * for is not already set.
2492 */
2493 if ((pvo->pvo_pte.pte_lo & ptebit) == 0)
2494 pmap_pte_synch(pt, &pvo->pvo_pte);
2495 /*
2496 * If the bit we are looking for was already set,
2497 * clear that bit in the pte.
2498 */
2499 if (pvo->pvo_pte.pte_lo & ptebit)
2500 pmap_pte_clear(pt, PVO_VADDR(pvo), ptebit);
2501 }
2502 rv |= pvo->pvo_pte.pte_lo & (PTE_CHG|PTE_REF);
2503 pvo->pvo_pte.pte_lo &= ~ptebit;
2504 PMAP_PVO_CHECK(pvo); /* sanity check */
2505 }
2506 pmap_interrupts_restore(msr);
2507
2508 /*
2509 * If we are clearing the modify bit and this page was marked EXEC
2510 * and the user of the page thinks the page was modified, then we
2511 * need to clean it from the icache if it's mapped or clear the EXEC
2512 * bit if it's not mapped. The page itself might not have the CHG
2513 * bit set if the modification was done via DMA to the page.
2514 */
2515 if ((ptebit & PTE_CHG) && (rv & PTE_EXEC)) {
2516 if (LIST_EMPTY(pvoh)) {
2517 DPRINTFN(EXEC, ("[pmap_clear_bit: %#" _PRIxpa ": clear-exec]\n",
2518 VM_PAGE_TO_PHYS(pg)));
2519 pmap_attr_clear(pg, PTE_EXEC);
2520 PMAPCOUNT(exec_uncached_clear_modify);
2521 } else {
2522 DPRINTFN(EXEC, ("[pmap_clear_bit: %#" _PRIxpa ": syncicache]\n",
2523 VM_PAGE_TO_PHYS(pg)));
2524 pmap_syncicache(VM_PAGE_TO_PHYS(pg), PAGE_SIZE);
2525 PMAPCOUNT(exec_synced_clear_modify);
2526 }
2527 }
2528 PMAP_UNLOCK();
2529 return (rv & ptebit) != 0;
2530 }
2531
2532 void
2533 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
2534 {
2535 struct pvo_entry *pvo;
2536 size_t offset = va & ADDR_POFF;
2537 int s;
2538
2539 PMAP_LOCK();
2540 s = splvm();
2541 while (len > 0) {
2542 size_t seglen = PAGE_SIZE - offset;
2543 if (seglen > len)
2544 seglen = len;
2545 pvo = pmap_pvo_find_va(p->p_vmspace->vm_map.pmap, va, NULL);
2546 if (pvo != NULL && PVO_EXECUTABLE_P(pvo)) {
2547 pmap_syncicache(
2548 (pvo->pvo_pte.pte_lo & PTE_RPGN) | offset, seglen);
2549 PMAP_PVO_CHECK(pvo);
2550 }
2551 va += seglen;
2552 len -= seglen;
2553 offset = 0;
2554 }
2555 splx(s);
2556 PMAP_UNLOCK();
2557 }
2558
2559 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
2560 void
2561 pmap_pte_print(volatile struct pte *pt)
2562 {
2563 printf("PTE %p: ", pt);
2564
2565 #if defined(PMAP_OEA)
2566 /* High word: */
2567 printf("0x%08" _PRIxpte ": [", pt->pte_hi);
2568 #else
2569 printf("0x%016" _PRIxpte ": [", pt->pte_hi);
2570 #endif /* PMAP_OEA */
2571
2572 printf("%c ", (pt->pte_hi & PTE_VALID) ? 'v' : 'i');
2573 printf("%c ", (pt->pte_hi & PTE_HID) ? 'h' : '-');
2574
2575 printf("0x%06" _PRIxpte " 0x%02" _PRIxpte "",
2576 (pt->pte_hi &~ PTE_VALID)>>PTE_VSID_SHFT,
2577 pt->pte_hi & PTE_API);
2578 #if defined(PMAP_OEA) || defined(PMAP_OEA64_BRIDGE)
2579 printf(" (va 0x%08" _PRIxva ")] ", pmap_pte_to_va(pt));
2580 #else
2581 printf(" (va 0x%016" _PRIxva ")] ", pmap_pte_to_va(pt));
2582 #endif /* PMAP_OEA */
2583
2584 /* Low word: */
2585 #if defined (PMAP_OEA)
2586 printf(" 0x%08" _PRIxpte ": [", pt->pte_lo);
2587 printf("0x%05" _PRIxpte "... ", pt->pte_lo >> 12);
2588 #else
2589 printf(" 0x%016" _PRIxpte ": [", pt->pte_lo);
2590 printf("0x%012" _PRIxpte "... ", pt->pte_lo >> 12);
2591 #endif
2592 printf("%c ", (pt->pte_lo & PTE_REF) ? 'r' : 'u');
2593 printf("%c ", (pt->pte_lo & PTE_CHG) ? 'c' : 'n');
2594 printf("%c", (pt->pte_lo & PTE_W) ? 'w' : '.');
2595 printf("%c", (pt->pte_lo & PTE_I) ? 'i' : '.');
2596 printf("%c", (pt->pte_lo & PTE_M) ? 'm' : '.');
2597 printf("%c ", (pt->pte_lo & PTE_G) ? 'g' : '.');
2598 switch (pt->pte_lo & PTE_PP) {
2599 case PTE_BR: printf("br]\n"); break;
2600 case PTE_BW: printf("bw]\n"); break;
2601 case PTE_SO: printf("so]\n"); break;
2602 case PTE_SW: printf("sw]\n"); break;
2603 }
2604 }
2605 #endif
2606
2607 #if defined(DDB)
2608 void
2609 pmap_pteg_check(void)
2610 {
2611 volatile struct pte *pt;
2612 int i;
2613 int ptegidx;
2614 u_int p_valid = 0;
2615 u_int s_valid = 0;
2616 u_int invalid = 0;
2617
2618 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2619 for (pt = pmap_pteg_table[ptegidx].pt, i = 8; --i >= 0; pt++) {
2620 if (pt->pte_hi & PTE_VALID) {
2621 if (pt->pte_hi & PTE_HID)
2622 s_valid++;
2623 else
2624 {
2625 p_valid++;
2626 }
2627 } else
2628 invalid++;
2629 }
2630 }
2631 printf("pteg_check: v(p) %#x (%d), v(s) %#x (%d), i %#x (%d)\n",
2632 p_valid, p_valid, s_valid, s_valid,
2633 invalid, invalid);
2634 }
2635
2636 void
2637 pmap_print_mmuregs(void)
2638 {
2639 int i;
2640 u_int cpuvers;
2641 #ifndef PMAP_OEA64
2642 vaddr_t addr;
2643 register_t soft_sr[16];
2644 #endif
2645 #if defined (PMAP_OEA) || defined (PMAP_OEA_BRIDGE)
2646 struct bat soft_ibat[4];
2647 struct bat soft_dbat[4];
2648 #endif
2649 paddr_t sdr1;
2650
2651 cpuvers = MFPVR() >> 16;
2652 __asm volatile ("mfsdr1 %0" : "=r"(sdr1));
2653 #ifndef PMAP_OEA64
2654 addr = 0;
2655 for (i = 0; i < 16; i++) {
2656 soft_sr[i] = MFSRIN(addr);
2657 addr += (1 << ADDR_SR_SHFT);
2658 }
2659 #endif
2660
2661 #if defined (PMAP_OEA) || defined (PMAP_OEA_BRIDGE)
2662 /* read iBAT (601: uBAT) registers */
2663 __asm volatile ("mfibatu %0,0" : "=r"(soft_ibat[0].batu));
2664 __asm volatile ("mfibatl %0,0" : "=r"(soft_ibat[0].batl));
2665 __asm volatile ("mfibatu %0,1" : "=r"(soft_ibat[1].batu));
2666 __asm volatile ("mfibatl %0,1" : "=r"(soft_ibat[1].batl));
2667 __asm volatile ("mfibatu %0,2" : "=r"(soft_ibat[2].batu));
2668 __asm volatile ("mfibatl %0,2" : "=r"(soft_ibat[2].batl));
2669 __asm volatile ("mfibatu %0,3" : "=r"(soft_ibat[3].batu));
2670 __asm volatile ("mfibatl %0,3" : "=r"(soft_ibat[3].batl));
2671
2672
2673 if (cpuvers != MPC601) {
2674 /* read dBAT registers */
2675 __asm volatile ("mfdbatu %0,0" : "=r"(soft_dbat[0].batu));
2676 __asm volatile ("mfdbatl %0,0" : "=r"(soft_dbat[0].batl));
2677 __asm volatile ("mfdbatu %0,1" : "=r"(soft_dbat[1].batu));
2678 __asm volatile ("mfdbatl %0,1" : "=r"(soft_dbat[1].batl));
2679 __asm volatile ("mfdbatu %0,2" : "=r"(soft_dbat[2].batu));
2680 __asm volatile ("mfdbatl %0,2" : "=r"(soft_dbat[2].batl));
2681 __asm volatile ("mfdbatu %0,3" : "=r"(soft_dbat[3].batu));
2682 __asm volatile ("mfdbatl %0,3" : "=r"(soft_dbat[3].batl));
2683 }
2684 #endif
2685
2686 printf("SDR1:\t0x%" _PRIxpa "\n", sdr1);
2687 #ifndef PMAP_OEA64
2688 printf("SR[]:\t");
2689 for (i = 0; i < 4; i++)
2690 printf("0x%08lx, ", soft_sr[i]);
2691 printf("\n\t");
2692 for ( ; i < 8; i++)
2693 printf("0x%08lx, ", soft_sr[i]);
2694 printf("\n\t");
2695 for ( ; i < 12; i++)
2696 printf("0x%08lx, ", soft_sr[i]);
2697 printf("\n\t");
2698 for ( ; i < 16; i++)
2699 printf("0x%08lx, ", soft_sr[i]);
2700 printf("\n");
2701 #endif
2702
2703 #if defined(PMAP_OEA) || defined(PMAP_OEA_BRIDGE)
2704 printf("%cBAT[]:\t", cpuvers == MPC601 ? 'u' : 'i');
2705 for (i = 0; i < 4; i++) {
2706 printf("0x%08lx 0x%08lx, ",
2707 soft_ibat[i].batu, soft_ibat[i].batl);
2708 if (i == 1)
2709 printf("\n\t");
2710 }
2711 if (cpuvers != MPC601) {
2712 printf("\ndBAT[]:\t");
2713 for (i = 0; i < 4; i++) {
2714 printf("0x%08lx 0x%08lx, ",
2715 soft_dbat[i].batu, soft_dbat[i].batl);
2716 if (i == 1)
2717 printf("\n\t");
2718 }
2719 }
2720 printf("\n");
2721 #endif /* PMAP_OEA... */
2722 }
2723
2724 void
2725 pmap_print_pte(pmap_t pm, vaddr_t va)
2726 {
2727 struct pvo_entry *pvo;
2728 volatile struct pte *pt;
2729 int pteidx;
2730
2731 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2732 if (pvo != NULL) {
2733 pt = pmap_pvo_to_pte(pvo, pteidx);
2734 if (pt != NULL) {
2735 printf("VA %#" _PRIxva " -> %p -> %s %#" _PRIxpte ", %#" _PRIxpte "\n",
2736 va, pt,
2737 pt->pte_hi & PTE_HID ? "(sec)" : "(pri)",
2738 pt->pte_hi, pt->pte_lo);
2739 } else {
2740 printf("No valid PTE found\n");
2741 }
2742 } else {
2743 printf("Address not in pmap\n");
2744 }
2745 }
2746
2747 void
2748 pmap_pteg_dist(void)
2749 {
2750 struct pvo_entry *pvo;
2751 int ptegidx;
2752 int depth;
2753 int max_depth = 0;
2754 unsigned int depths[64];
2755
2756 memset(depths, 0, sizeof(depths));
2757 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2758 depth = 0;
2759 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
2760 depth++;
2761 }
2762 if (depth > max_depth)
2763 max_depth = depth;
2764 if (depth > 63)
2765 depth = 63;
2766 depths[depth]++;
2767 }
2768
2769 for (depth = 0; depth < 64; depth++) {
2770 printf(" [%2d]: %8u", depth, depths[depth]);
2771 if ((depth & 3) == 3)
2772 printf("\n");
2773 if (depth == max_depth)
2774 break;
2775 }
2776 if ((depth & 3) != 3)
2777 printf("\n");
2778 printf("Max depth found was %d\n", max_depth);
2779 }
2780 #endif /* DEBUG */
2781
2782 #if defined(PMAPCHECK) || defined(DEBUG)
2783 void
2784 pmap_pvo_verify(void)
2785 {
2786 int ptegidx;
2787 int s;
2788
2789 s = splvm();
2790 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2791 struct pvo_entry *pvo;
2792 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
2793 if ((uintptr_t) pvo >= SEGMENT_LENGTH)
2794 panic("pmap_pvo_verify: invalid pvo %p "
2795 "on list %#x", pvo, ptegidx);
2796 pmap_pvo_check(pvo);
2797 }
2798 }
2799 splx(s);
2800 }
2801 #endif /* PMAPCHECK */
2802
2803
2804 void *
2805 pmap_pool_ualloc(struct pool *pp, int flags)
2806 {
2807 struct pvo_page *pvop;
2808
2809 if (uvm.page_init_done != true) {
2810 return (void *) uvm_pageboot_alloc(PAGE_SIZE);
2811 }
2812
2813 PMAP_LOCK();
2814 pvop = SIMPLEQ_FIRST(&pmap_upvop_head);
2815 if (pvop != NULL) {
2816 pmap_upvop_free--;
2817 SIMPLEQ_REMOVE_HEAD(&pmap_upvop_head, pvop_link);
2818 PMAP_UNLOCK();
2819 return pvop;
2820 }
2821 PMAP_UNLOCK();
2822 return pmap_pool_malloc(pp, flags);
2823 }
2824
2825 void *
2826 pmap_pool_malloc(struct pool *pp, int flags)
2827 {
2828 struct pvo_page *pvop;
2829 struct vm_page *pg;
2830
2831 PMAP_LOCK();
2832 pvop = SIMPLEQ_FIRST(&pmap_mpvop_head);
2833 if (pvop != NULL) {
2834 pmap_mpvop_free--;
2835 SIMPLEQ_REMOVE_HEAD(&pmap_mpvop_head, pvop_link);
2836 PMAP_UNLOCK();
2837 return pvop;
2838 }
2839 PMAP_UNLOCK();
2840 again:
2841 pg = uvm_pagealloc_strat(NULL, 0, NULL, UVM_PGA_USERESERVE,
2842 UVM_PGA_STRAT_ONLY, VM_FREELIST_FIRST256);
2843 if (__predict_false(pg == NULL)) {
2844 if (flags & PR_WAITOK) {
2845 uvm_wait("plpg");
2846 goto again;
2847 } else {
2848 return (0);
2849 }
2850 }
2851 KDASSERT(VM_PAGE_TO_PHYS(pg) == (uintptr_t)VM_PAGE_TO_PHYS(pg));
2852 return (void *)(uintptr_t) VM_PAGE_TO_PHYS(pg);
2853 }
2854
2855 void
2856 pmap_pool_ufree(struct pool *pp, void *va)
2857 {
2858 struct pvo_page *pvop;
2859 #if 0
2860 if (PHYS_TO_VM_PAGE((paddr_t) va) != NULL) {
2861 pmap_pool_mfree(va, size, tag);
2862 return;
2863 }
2864 #endif
2865 PMAP_LOCK();
2866 pvop = va;
2867 SIMPLEQ_INSERT_HEAD(&pmap_upvop_head, pvop, pvop_link);
2868 pmap_upvop_free++;
2869 if (pmap_upvop_free > pmap_upvop_maxfree)
2870 pmap_upvop_maxfree = pmap_upvop_free;
2871 PMAP_UNLOCK();
2872 }
2873
2874 void
2875 pmap_pool_mfree(struct pool *pp, void *va)
2876 {
2877 struct pvo_page *pvop;
2878
2879 PMAP_LOCK();
2880 pvop = va;
2881 SIMPLEQ_INSERT_HEAD(&pmap_mpvop_head, pvop, pvop_link);
2882 pmap_mpvop_free++;
2883 if (pmap_mpvop_free > pmap_mpvop_maxfree)
2884 pmap_mpvop_maxfree = pmap_mpvop_free;
2885 PMAP_UNLOCK();
2886 #if 0
2887 uvm_pagefree(PHYS_TO_VM_PAGE((paddr_t) va));
2888 #endif
2889 }
2890
2891 /*
2892 * This routine in bootstraping to steal to-be-managed memory (which will
2893 * then be unmanaged). We use it to grab from the first 256MB for our
2894 * pmap needs and above 256MB for other stuff.
2895 */
2896 vaddr_t
2897 pmap_steal_memory(vsize_t vsize, vaddr_t *vstartp, vaddr_t *vendp)
2898 {
2899 vsize_t size;
2900 vaddr_t va;
2901 paddr_t pa = 0;
2902 int npgs, bank;
2903 struct vm_physseg *ps;
2904
2905 if (uvm.page_init_done == true)
2906 panic("pmap_steal_memory: called _after_ bootstrap");
2907
2908 *vstartp = VM_MIN_KERNEL_ADDRESS;
2909 *vendp = VM_MAX_KERNEL_ADDRESS;
2910
2911 size = round_page(vsize);
2912 npgs = atop(size);
2913
2914 /*
2915 * PA 0 will never be among those given to UVM so we can use it
2916 * to indicate we couldn't steal any memory.
2917 */
2918 for (ps = vm_physmem, bank = 0; bank < vm_nphysseg; bank++, ps++) {
2919 if (ps->free_list == VM_FREELIST_FIRST256 &&
2920 ps->avail_end - ps->avail_start >= npgs) {
2921 pa = ptoa(ps->avail_start);
2922 break;
2923 }
2924 }
2925
2926 if (pa == 0)
2927 panic("pmap_steal_memory: no approriate memory to steal!");
2928
2929 ps->avail_start += npgs;
2930 ps->start += npgs;
2931
2932 /*
2933 * If we've used up all the pages in the segment, remove it and
2934 * compact the list.
2935 */
2936 if (ps->avail_start == ps->end) {
2937 /*
2938 * If this was the last one, then a very bad thing has occurred
2939 */
2940 if (--vm_nphysseg == 0)
2941 panic("pmap_steal_memory: out of memory!");
2942
2943 printf("pmap_steal_memory: consumed bank %d\n", bank);
2944 for (; bank < vm_nphysseg; bank++, ps++) {
2945 ps[0] = ps[1];
2946 }
2947 }
2948
2949 va = (vaddr_t) pa;
2950 memset((void *) va, 0, size);
2951 pmap_pages_stolen += npgs;
2952 #ifdef DEBUG
2953 if (pmapdebug && npgs > 1) {
2954 u_int cnt = 0;
2955 for (bank = 0, ps = vm_physmem; bank < vm_nphysseg; bank++, ps++)
2956 cnt += ps->avail_end - ps->avail_start;
2957 printf("pmap_steal_memory: stole %u (total %u) pages (%u left)\n",
2958 npgs, pmap_pages_stolen, cnt);
2959 }
2960 #endif
2961
2962 return va;
2963 }
2964
2965 /*
2966 * Find a chuck of memory with right size and alignment.
2967 */
2968 paddr_t
2969 pmap_boot_find_memory(psize_t size, psize_t alignment, int at_end)
2970 {
2971 struct mem_region *mp;
2972 paddr_t s, e;
2973 int i, j;
2974
2975 size = round_page(size);
2976
2977 DPRINTFN(BOOT,
2978 ("pmap_boot_find_memory: size=%" _PRIxpa ", alignment=%" _PRIxpa ", at_end=%d",
2979 size, alignment, at_end));
2980
2981 if (alignment < PAGE_SIZE || (alignment & (alignment-1)) != 0)
2982 panic("pmap_boot_find_memory: invalid alignment %" _PRIxpa,
2983 alignment);
2984
2985 if (at_end) {
2986 if (alignment != PAGE_SIZE)
2987 panic("pmap_boot_find_memory: invalid ending "
2988 "alignment %#" _PRIxpa, alignment);
2989
2990 for (mp = &avail[avail_cnt-1]; mp >= avail; mp--) {
2991 s = mp->start + mp->size - size;
2992 if (s >= mp->start && mp->size >= size) {
2993 DPRINTFN(BOOT,(": %" _PRIxpa "\n", s));
2994 DPRINTFN(BOOT,
2995 ("pmap_boot_find_memory: b-avail[%d] start "
2996 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", mp - avail,
2997 mp->start, mp->size));
2998 mp->size -= size;
2999 DPRINTFN(BOOT,
3000 ("pmap_boot_find_memory: a-avail[%d] start "
3001 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", mp - avail,
3002 mp->start, mp->size));
3003 return s;
3004 }
3005 }
3006 panic("pmap_boot_find_memory: no available memory");
3007 }
3008
3009 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
3010 s = (mp->start + alignment - 1) & ~(alignment-1);
3011 e = s + size;
3012
3013 /*
3014 * Is the calculated region entirely within the region?
3015 */
3016 if (s < mp->start || e > mp->start + mp->size)
3017 continue;
3018
3019 DPRINTFN(BOOT,(": %" _PRIxpa "\n", s));
3020 if (s == mp->start) {
3021 /*
3022 * If the block starts at the beginning of region,
3023 * adjust the size & start. (the region may now be
3024 * zero in length)
3025 */
3026 DPRINTFN(BOOT,
3027 ("pmap_boot_find_memory: b-avail[%d] start "
3028 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", i, mp->start, mp->size));
3029 mp->start += size;
3030 mp->size -= size;
3031 DPRINTFN(BOOT,
3032 ("pmap_boot_find_memory: a-avail[%d] start "
3033 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", i, mp->start, mp->size));
3034 } else if (e == mp->start + mp->size) {
3035 /*
3036 * If the block starts at the beginning of region,
3037 * adjust only the size.
3038 */
3039 DPRINTFN(BOOT,
3040 ("pmap_boot_find_memory: b-avail[%d] start "
3041 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", i, mp->start, mp->size));
3042 mp->size -= size;
3043 DPRINTFN(BOOT,
3044 ("pmap_boot_find_memory: a-avail[%d] start "
3045 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", i, mp->start, mp->size));
3046 } else {
3047 /*
3048 * Block is in the middle of the region, so we
3049 * have to split it in two.
3050 */
3051 for (j = avail_cnt; j > i + 1; j--) {
3052 avail[j] = avail[j-1];
3053 }
3054 DPRINTFN(BOOT,
3055 ("pmap_boot_find_memory: b-avail[%d] start "
3056 "0x%" _PRIxpa " size 0x%" _PRIxpa "\n", i, mp->start, mp->size));
3057 mp[1].start = e;
3058 mp[1].size = mp[0].start + mp[0].size - e;
3059 mp[0].size = s - mp[0].start;
3060 avail_cnt++;
3061 for (; i < avail_cnt; i++) {
3062 DPRINTFN(BOOT,
3063 ("pmap_boot_find_memory: a-avail[%d] "
3064 "start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n", i,
3065 avail[i].start, avail[i].size));
3066 }
3067 }
3068 KASSERT(s == (uintptr_t) s);
3069 return s;
3070 }
3071 panic("pmap_boot_find_memory: not enough memory for "
3072 "%" _PRIxpa "/%" _PRIxpa " allocation?", size, alignment);
3073 }
3074
3075 /* XXXSL: we dont have any BATs to do this, map in Segment 0 1:1 using page tables */
3076 #if defined (PMAP_OEA64_BRIDGE)
3077 int
3078 pmap_setup_segment0_map(int use_large_pages, ...)
3079 {
3080 vaddr_t va;
3081
3082 register_t pte_lo = 0x0;
3083 int ptegidx = 0, i = 0;
3084 struct pte pte;
3085 va_list ap;
3086
3087 /* Coherent + Supervisor RW, no user access */
3088 pte_lo = PTE_M;
3089
3090 /* XXXSL
3091 * Map in 1st segment 1:1, we'll be careful not to spill kernel entries later,
3092 * these have to take priority.
3093 */
3094 for (va = 0x0; va < SEGMENT_LENGTH; va += 0x1000) {
3095 ptegidx = va_to_pteg(pmap_kernel(), va);
3096 pmap_pte_create(&pte, pmap_kernel(), va, va | pte_lo);
3097 i = pmap_pte_insert(ptegidx, &pte);
3098 }
3099
3100 va_start(ap, use_large_pages);
3101 while (1) {
3102 paddr_t pa;
3103 size_t size;
3104
3105 va = va_arg(ap, vaddr_t);
3106
3107 if (va == 0)
3108 break;
3109
3110 pa = va_arg(ap, paddr_t);
3111 size = va_arg(ap, size_t);
3112
3113 for (; va < (va + size); va += 0x1000, pa += 0x1000) {
3114 #if 0
3115 printf("%s: Inserting: va: 0x%08" _PRIxva ", pa: 0x%08" _PRIxpa "\n", __func__, va, pa);
3116 #endif
3117 ptegidx = va_to_pteg(pmap_kernel(), va);
3118 pmap_pte_create(&pte, pmap_kernel(), va, pa | pte_lo);
3119 i = pmap_pte_insert(ptegidx, &pte);
3120 }
3121 }
3122
3123 TLBSYNC();
3124 SYNC();
3125 return (0);
3126 }
3127 #endif /* PMAP_OEA64_BRIDGE */
3128
3129 /*
3130 * This is not part of the defined PMAP interface and is specific to the
3131 * PowerPC architecture. This is called during initppc, before the system
3132 * is really initialized.
3133 */
3134 void
3135 pmap_bootstrap(paddr_t kernelstart, paddr_t kernelend)
3136 {
3137 struct mem_region *mp, tmp;
3138 paddr_t s, e;
3139 psize_t size;
3140 int i, j;
3141
3142 /*
3143 * Get memory.
3144 */
3145 mem_regions(&mem, &avail);
3146 #if defined(DEBUG)
3147 if (pmapdebug & PMAPDEBUG_BOOT) {
3148 printf("pmap_bootstrap: memory configuration:\n");
3149 for (mp = mem; mp->size; mp++) {
3150 printf("pmap_bootstrap: mem start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n",
3151 mp->start, mp->size);
3152 }
3153 for (mp = avail; mp->size; mp++) {
3154 printf("pmap_bootstrap: avail start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n",
3155 mp->start, mp->size);
3156 }
3157 }
3158 #endif
3159
3160 /*
3161 * Find out how much physical memory we have and in how many chunks.
3162 */
3163 for (mem_cnt = 0, mp = mem; mp->size; mp++) {
3164 if (mp->start >= pmap_memlimit)
3165 continue;
3166 if (mp->start + mp->size > pmap_memlimit) {
3167 size = pmap_memlimit - mp->start;
3168 physmem += btoc(size);
3169 } else {
3170 physmem += btoc(mp->size);
3171 }
3172 mem_cnt++;
3173 }
3174
3175 /*
3176 * Count the number of available entries.
3177 */
3178 for (avail_cnt = 0, mp = avail; mp->size; mp++)
3179 avail_cnt++;
3180
3181 /*
3182 * Page align all regions.
3183 */
3184 kernelstart = trunc_page(kernelstart);
3185 kernelend = round_page(kernelend);
3186 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
3187 s = round_page(mp->start);
3188 mp->size -= (s - mp->start);
3189 mp->size = trunc_page(mp->size);
3190 mp->start = s;
3191 e = mp->start + mp->size;
3192
3193 DPRINTFN(BOOT,
3194 ("pmap_bootstrap: b-avail[%d] start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n",
3195 i, mp->start, mp->size));
3196
3197 /*
3198 * Don't allow the end to run beyond our artificial limit
3199 */
3200 if (e > pmap_memlimit)
3201 e = pmap_memlimit;
3202
3203 /*
3204 * Is this region empty or strange? skip it.
3205 */
3206 if (e <= s) {
3207 mp->start = 0;
3208 mp->size = 0;
3209 continue;
3210 }
3211
3212 /*
3213 * Does this overlap the beginning of kernel?
3214 * Does extend past the end of the kernel?
3215 */
3216 else if (s < kernelstart && e > kernelstart) {
3217 if (e > kernelend) {
3218 avail[avail_cnt].start = kernelend;
3219 avail[avail_cnt].size = e - kernelend;
3220 avail_cnt++;
3221 }
3222 mp->size = kernelstart - s;
3223 }
3224 /*
3225 * Check whether this region overlaps the end of the kernel.
3226 */
3227 else if (s < kernelend && e > kernelend) {
3228 mp->start = kernelend;
3229 mp->size = e - kernelend;
3230 }
3231 /*
3232 * Look whether this regions is completely inside the kernel.
3233 * Nuke it if it does.
3234 */
3235 else if (s >= kernelstart && e <= kernelend) {
3236 mp->start = 0;
3237 mp->size = 0;
3238 }
3239 /*
3240 * If the user imposed a memory limit, enforce it.
3241 */
3242 else if (s >= pmap_memlimit) {
3243 mp->start = -PAGE_SIZE; /* let's know why */
3244 mp->size = 0;
3245 }
3246 else {
3247 mp->start = s;
3248 mp->size = e - s;
3249 }
3250 DPRINTFN(BOOT,
3251 ("pmap_bootstrap: a-avail[%d] start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n",
3252 i, mp->start, mp->size));
3253 }
3254
3255 /*
3256 * Move (and uncount) all the null return to the end.
3257 */
3258 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
3259 if (mp->size == 0) {
3260 tmp = avail[i];
3261 avail[i] = avail[--avail_cnt];
3262 avail[avail_cnt] = avail[i];
3263 }
3264 }
3265
3266 /*
3267 * (Bubble)sort them into asecnding order.
3268 */
3269 for (i = 0; i < avail_cnt; i++) {
3270 for (j = i + 1; j < avail_cnt; j++) {
3271 if (avail[i].start > avail[j].start) {
3272 tmp = avail[i];
3273 avail[i] = avail[j];
3274 avail[j] = tmp;
3275 }
3276 }
3277 }
3278
3279 /*
3280 * Make sure they don't overlap.
3281 */
3282 for (mp = avail, i = 0; i < avail_cnt - 1; i++, mp++) {
3283 if (mp[0].start + mp[0].size > mp[1].start) {
3284 mp[0].size = mp[1].start - mp[0].start;
3285 }
3286 DPRINTFN(BOOT,
3287 ("pmap_bootstrap: avail[%d] start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n",
3288 i, mp->start, mp->size));
3289 }
3290 DPRINTFN(BOOT,
3291 ("pmap_bootstrap: avail[%d] start 0x%" _PRIxpa " size 0x%" _PRIxpa "\n",
3292 i, mp->start, mp->size));
3293
3294 #ifdef PTEGCOUNT
3295 pmap_pteg_cnt = PTEGCOUNT;
3296 #else /* PTEGCOUNT */
3297
3298 pmap_pteg_cnt = 0x1000;
3299
3300 while (pmap_pteg_cnt < physmem)
3301 pmap_pteg_cnt <<= 1;
3302
3303 pmap_pteg_cnt >>= 1;
3304 #endif /* PTEGCOUNT */
3305
3306 #ifdef DEBUG
3307 DPRINTFN(BOOT,
3308 ("pmap_pteg_cnt: 0x%x\n", pmap_pteg_cnt));
3309 #endif
3310
3311 /*
3312 * Find suitably aligned memory for PTEG hash table.
3313 */
3314 size = pmap_pteg_cnt * sizeof(struct pteg);
3315 pmap_pteg_table = (void *)(uintptr_t) pmap_boot_find_memory(size, size, 0);
3316
3317 #ifdef DEBUG
3318 DPRINTFN(BOOT,
3319 ("PTEG cnt: 0x%x HTAB size: 0x%08x bytes, address: %p\n", pmap_pteg_cnt, (unsigned int)size, pmap_pteg_table));
3320 #endif
3321
3322
3323 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3324 if ( (uintptr_t) pmap_pteg_table + size > SEGMENT_LENGTH)
3325 panic("pmap_bootstrap: pmap_pteg_table end (%p + %" _PRIxpa ") > 256MB",
3326 pmap_pteg_table, size);
3327 #endif
3328
3329 memset(__UNVOLATILE(pmap_pteg_table), 0,
3330 pmap_pteg_cnt * sizeof(struct pteg));
3331 pmap_pteg_mask = pmap_pteg_cnt - 1;
3332
3333 /*
3334 * We cannot do pmap_steal_memory here since UVM hasn't been loaded
3335 * with pages. So we just steal them before giving them to UVM.
3336 */
3337 size = sizeof(pmap_pvo_table[0]) * pmap_pteg_cnt;
3338 pmap_pvo_table = (void *)(uintptr_t) pmap_boot_find_memory(size, PAGE_SIZE, 0);
3339 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3340 if ( (uintptr_t) pmap_pvo_table + size > SEGMENT_LENGTH)
3341 panic("pmap_bootstrap: pmap_pvo_table end (%p + %" _PRIxpa ") > 256MB",
3342 pmap_pvo_table, size);
3343 #endif
3344
3345 for (i = 0; i < pmap_pteg_cnt; i++)
3346 TAILQ_INIT(&pmap_pvo_table[i]);
3347
3348 #ifndef MSGBUFADDR
3349 /*
3350 * Allocate msgbuf in high memory.
3351 */
3352 msgbuf_paddr = pmap_boot_find_memory(MSGBUFSIZE, PAGE_SIZE, 1);
3353 #endif
3354
3355 for (mp = avail, i = 0; i < avail_cnt; mp++, i++) {
3356 paddr_t pfstart = atop(mp->start);
3357 paddr_t pfend = atop(mp->start + mp->size);
3358 if (mp->size == 0)
3359 continue;
3360 if (mp->start + mp->size <= SEGMENT_LENGTH) {
3361 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3362 VM_FREELIST_FIRST256);
3363 } else if (mp->start >= SEGMENT_LENGTH) {
3364 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3365 VM_FREELIST_DEFAULT);
3366 } else {
3367 pfend = atop(SEGMENT_LENGTH);
3368 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3369 VM_FREELIST_FIRST256);
3370 pfstart = atop(SEGMENT_LENGTH);
3371 pfend = atop(mp->start + mp->size);
3372 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3373 VM_FREELIST_DEFAULT);
3374 }
3375 }
3376
3377 /*
3378 * Make sure kernel vsid is allocated as well as VSID 0.
3379 */
3380 pmap_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS-1)) / VSID_NBPW]
3381 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
3382 pmap_vsid_bitmap[(PHYSMAP_VSIDBITS & (NPMAPS-1)) / VSID_NBPW]
3383 |= 1 << (PHYSMAP_VSIDBITS % VSID_NBPW);
3384 pmap_vsid_bitmap[0] |= 1;
3385
3386 /*
3387 * Initialize kernel pmap and hardware.
3388 */
3389
3390 /* PMAP_OEA64_BRIDGE does support these instructions */
3391 #if defined (PMAP_OEA) || defined (PMAP_OEA64_BRIDGE)
3392 for (i = 0; i < 16; i++) {
3393 pmap_kernel()->pm_sr[i] = KERNELN_SEGMENT(i)|SR_PRKEY;
3394 __asm volatile ("mtsrin %0,%1"
3395 :: "r"(KERNELN_SEGMENT(i)|SR_PRKEY), "r"(i << ADDR_SR_SHFT));
3396 }
3397
3398 pmap_kernel()->pm_sr[KERNEL_SR] = KERNEL_SEGMENT|SR_SUKEY|SR_PRKEY;
3399 __asm volatile ("mtsr %0,%1"
3400 :: "n"(KERNEL_SR), "r"(KERNEL_SEGMENT));
3401 #ifdef KERNEL2_SR
3402 pmap_kernel()->pm_sr[KERNEL2_SR] = KERNEL2_SEGMENT|SR_SUKEY|SR_PRKEY;
3403 __asm volatile ("mtsr %0,%1"
3404 :: "n"(KERNEL2_SR), "r"(KERNEL2_SEGMENT));
3405 #endif
3406 #endif /* PMAP_OEA || PMAP_OEA64_BRIDGE */
3407 #if defined (PMAP_OEA)
3408 for (i = 0; i < 16; i++) {
3409 if (iosrtable[i] & SR601_T) {
3410 pmap_kernel()->pm_sr[i] = iosrtable[i];
3411 __asm volatile ("mtsrin %0,%1"
3412 :: "r"(iosrtable[i]), "r"(i << ADDR_SR_SHFT));
3413 }
3414 }
3415 __asm volatile ("sync; mtsdr1 %0; isync"
3416 :: "r"((uintptr_t)pmap_pteg_table | (pmap_pteg_mask >> 10)));
3417 #elif defined (PMAP_OEA64) || defined (PMAP_OEA64_BRIDGE)
3418 __asm __volatile ("sync; mtsdr1 %0; isync"
3419 :: "r"((uintptr_t)pmap_pteg_table | (32 - cntlzw(pmap_pteg_mask >> 11))));
3420 #endif
3421 tlbia();
3422
3423 #ifdef ALTIVEC
3424 pmap_use_altivec = cpu_altivec;
3425 #endif
3426
3427 #ifdef DEBUG
3428 if (pmapdebug & PMAPDEBUG_BOOT) {
3429 u_int cnt;
3430 int bank;
3431 char pbuf[9];
3432 for (cnt = 0, bank = 0; bank < vm_nphysseg; bank++) {
3433 cnt += vm_physmem[bank].avail_end - vm_physmem[bank].avail_start;
3434 printf("pmap_bootstrap: vm_physmem[%d]=%#" _PRIxpa "-%#" _PRIxpa "/%#" _PRIxpa "\n",
3435 bank,
3436 ptoa(vm_physmem[bank].avail_start),
3437 ptoa(vm_physmem[bank].avail_end),
3438 ptoa(vm_physmem[bank].avail_end - vm_physmem[bank].avail_start));
3439 }
3440 format_bytes(pbuf, sizeof(pbuf), ptoa((u_int64_t) cnt));
3441 printf("pmap_bootstrap: UVM memory = %s (%u pages)\n",
3442 pbuf, cnt);
3443 }
3444 #endif
3445
3446 pool_init(&pmap_upvo_pool, sizeof(struct pvo_entry),
3447 sizeof(struct pvo_entry), 0, 0, "pmap_upvopl",
3448 &pmap_pool_uallocator, IPL_NONE);
3449
3450 pool_setlowat(&pmap_upvo_pool, 252);
3451
3452 pool_init(&pmap_pool, sizeof(struct pmap),
3453 sizeof(void *), 0, 0, "pmap_pl", &pmap_pool_uallocator,
3454 IPL_NONE);
3455
3456 #if defined(PMAP_NEED_MAPKERNEL) || 1
3457 {
3458 struct pmap *pm = pmap_kernel();
3459 #if 0
3460 extern int etext[], kernel_text[];
3461 vaddr_t va, va_etext = (paddr_t) etext;
3462 #endif
3463 paddr_t pa, pa_end;
3464 register_t sr;
3465 struct pte pt;
3466 unsigned int ptegidx;
3467 int bank;
3468
3469 sr = PHYSMAPN_SEGMENT(0) | SR_SUKEY|SR_PRKEY;
3470 pm->pm_sr[0] = sr;
3471
3472 for (bank = 0; bank < vm_nphysseg; bank++) {
3473 pa_end = ptoa(vm_physmem[bank].avail_end);
3474 pa = ptoa(vm_physmem[bank].avail_start);
3475 for (; pa < pa_end; pa += PAGE_SIZE) {
3476 ptegidx = va_to_pteg(pm, pa);
3477 pmap_pte_create(&pt, pm, pa, pa | PTE_M|PTE_BW);
3478 pmap_pte_insert(ptegidx, &pt);
3479 }
3480 }
3481
3482 #if 0
3483 va = (vaddr_t) kernel_text;
3484
3485 for (pa = kernelstart; va < va_etext;
3486 pa += PAGE_SIZE, va += PAGE_SIZE) {
3487 ptegidx = va_to_pteg(pm, va);
3488 pmap_pte_create(&pt, pm, va, pa | PTE_M|PTE_BR);
3489 pmap_pte_insert(ptegidx, &pt);
3490 }
3491
3492 for (; pa < kernelend;
3493 pa += PAGE_SIZE, va += PAGE_SIZE) {
3494 ptegidx = va_to_pteg(pm, va);
3495 pmap_pte_create(&pt, pm, va, pa | PTE_M|PTE_BW);
3496 pmap_pte_insert(ptegidx, &pt);
3497 }
3498
3499 for (va = 0, pa = 0; va < 0x3000;
3500 pa += PAGE_SIZE, va += PAGE_SIZE) {
3501 ptegidx = va_to_pteg(pm, va);
3502 pmap_pte_create(&pt, pm, va, pa | PTE_M|PTE_BW);
3503 pmap_pte_insert(ptegidx, &pt);
3504 }
3505 #endif
3506
3507 __asm volatile ("mtsrin %0,%1"
3508 :: "r"(sr), "r"(kernelstart));
3509 }
3510 #endif
3511 }
3512