pmap.c revision 1.35.8.1 1 /* $NetBSD: pmap.c,v 1.35.8.1 2006/05/24 10:57:09 yamt Exp $ */
2 /*-
3 * Copyright (c) 2001 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Matt Thomas <matt (at) 3am-software.com> of Allegro Networks, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
40 * Copyright (C) 1995, 1996 TooLs GmbH.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by TooLs GmbH.
54 * 4. The name of TooLs GmbH may not be used to endorse or promote products
55 * derived from this software without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
58 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
60 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
62 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
63 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
64 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
65 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
66 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.35.8.1 2006/05/24 10:57:09 yamt Exp $");
71
72 #include "opt_ppcarch.h"
73 #include "opt_altivec.h"
74 #include "opt_pmap.h"
75 #include <sys/param.h>
76 #include <sys/malloc.h>
77 #include <sys/proc.h>
78 #include <sys/user.h>
79 #include <sys/pool.h>
80 #include <sys/queue.h>
81 #include <sys/device.h> /* for evcnt */
82 #include <sys/systm.h>
83
84 #if __NetBSD_Version__ < 105010000
85 #include <vm/vm.h>
86 #include <vm/vm_kern.h>
87 #define splvm() splimp()
88 #endif
89
90 #include <uvm/uvm.h>
91
92 #include <machine/pcb.h>
93 #include <machine/powerpc.h>
94 #include <powerpc/spr.h>
95 #include <powerpc/oea/sr_601.h>
96 #include <powerpc/bat.h>
97
98 #if defined(DEBUG) || defined(PMAPCHECK)
99 #define STATIC
100 #else
101 #define STATIC static
102 #endif
103
104 #ifdef ALTIVEC
105 int pmap_use_altivec;
106 #endif
107
108 volatile struct pteg *pmap_pteg_table;
109 unsigned int pmap_pteg_cnt;
110 unsigned int pmap_pteg_mask;
111 #ifdef PMAP_MEMLIMIT
112 paddr_t pmap_memlimit = PMAP_MEMLIMIT;
113 #else
114 paddr_t pmap_memlimit = -PAGE_SIZE; /* there is no limit */
115 #endif
116
117 struct pmap kernel_pmap_;
118 unsigned int pmap_pages_stolen;
119 u_long pmap_pte_valid;
120 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
121 u_long pmap_pvo_enter_depth;
122 u_long pmap_pvo_remove_depth;
123 #endif
124
125 int physmem;
126 #ifndef MSGBUFADDR
127 extern paddr_t msgbuf_paddr;
128 #endif
129
130 static struct mem_region *mem, *avail;
131 static u_int mem_cnt, avail_cnt;
132
133 #ifdef __HAVE_PMAP_PHYSSEG
134 /*
135 * This is a cache of referenced/modified bits.
136 * Bits herein are shifted by ATTRSHFT.
137 */
138 #define ATTR_SHFT 4
139 struct pmap_physseg pmap_physseg;
140 #endif
141
142 /*
143 * The following structure is exactly 32 bytes long (one cacheline).
144 */
145 struct pvo_entry {
146 LIST_ENTRY(pvo_entry) pvo_vlink; /* Link to common virt page */
147 TAILQ_ENTRY(pvo_entry) pvo_olink; /* Link to overflow entry */
148 struct pte pvo_pte; /* Prebuilt PTE */
149 pmap_t pvo_pmap; /* ptr to owning pmap */
150 vaddr_t pvo_vaddr; /* VA of entry */
151 #define PVO_PTEGIDX_MASK 0x0007 /* which PTEG slot */
152 #define PVO_PTEGIDX_VALID 0x0008 /* slot is valid */
153 #define PVO_WIRED 0x0010 /* PVO entry is wired */
154 #define PVO_MANAGED 0x0020 /* PVO e. for managed page */
155 #define PVO_EXECUTABLE 0x0040 /* PVO e. for executable page */
156 #define PVO_ENTER_INSERT 0 /* PVO has been removed */
157 #define PVO_SPILL_UNSET 1 /* PVO has been evicted */
158 #define PVO_SPILL_SET 2 /* PVO has been spilled */
159 #define PVO_SPILL_INSERT 3 /* PVO has been inserted */
160 #define PVO_PMAP_PAGE_PROTECT 4 /* PVO has changed */
161 #define PVO_PMAP_PROTECT 5 /* PVO has changed */
162 #define PVO_REMOVE 6 /* PVO has been removed */
163 #define PVO_WHERE_MASK 15
164 #define PVO_WHERE_SHFT 8
165 };
166 #define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF)
167 #define PVO_ISEXECUTABLE(pvo) ((pvo)->pvo_vaddr & PVO_EXECUTABLE)
168 #define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
169 #define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
170 #define PVO_PTEGIDX_CLR(pvo) \
171 ((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK)))
172 #define PVO_PTEGIDX_SET(pvo,i) \
173 ((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID))
174 #define PVO_WHERE(pvo,w) \
175 ((pvo)->pvo_vaddr &= ~(PVO_WHERE_MASK << PVO_WHERE_SHFT), \
176 (pvo)->pvo_vaddr |= ((PVO_ ## w) << PVO_WHERE_SHFT))
177
178 TAILQ_HEAD(pvo_tqhead, pvo_entry);
179 struct pvo_tqhead *pmap_pvo_table; /* pvo entries by ptegroup index */
180 struct pvo_head pmap_pvo_kunmanaged = LIST_HEAD_INITIALIZER(pmap_pvo_kunmanaged); /* list of unmanaged pages */
181 struct pvo_head pmap_pvo_unmanaged = LIST_HEAD_INITIALIZER(pmap_pvo_unmanaged); /* list of unmanaged pages */
182
183 struct pool pmap_pool; /* pool for pmap structures */
184 struct pool pmap_upvo_pool; /* pool for pvo entries for unmanaged pages */
185 struct pool pmap_mpvo_pool; /* pool for pvo entries for managed pages */
186
187 /*
188 * We keep a cache of unmanaged pages to be used for pvo entries for
189 * unmanaged pages.
190 */
191 struct pvo_page {
192 SIMPLEQ_ENTRY(pvo_page) pvop_link;
193 };
194 SIMPLEQ_HEAD(pvop_head, pvo_page);
195 struct pvop_head pmap_upvop_head = SIMPLEQ_HEAD_INITIALIZER(pmap_upvop_head);
196 struct pvop_head pmap_mpvop_head = SIMPLEQ_HEAD_INITIALIZER(pmap_mpvop_head);
197 u_long pmap_upvop_free;
198 u_long pmap_upvop_maxfree;
199 u_long pmap_mpvop_free;
200 u_long pmap_mpvop_maxfree;
201
202 STATIC void *pmap_pool_ualloc(struct pool *, int);
203 STATIC void *pmap_pool_malloc(struct pool *, int);
204
205 STATIC void pmap_pool_ufree(struct pool *, void *);
206 STATIC void pmap_pool_mfree(struct pool *, void *);
207
208 static struct pool_allocator pmap_pool_mallocator = {
209 pmap_pool_malloc, pmap_pool_mfree, 0,
210 };
211
212 static struct pool_allocator pmap_pool_uallocator = {
213 pmap_pool_ualloc, pmap_pool_ufree, 0,
214 };
215
216 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
217 void pmap_pte_print(volatile struct pte *);
218 #endif
219
220 #ifdef DDB
221 void pmap_pteg_check(void);
222 void pmap_pteg_dist(void);
223 void pmap_print_pte(pmap_t, vaddr_t);
224 void pmap_print_mmuregs(void);
225 #endif
226
227 #if defined(DEBUG) || defined(PMAPCHECK)
228 #ifdef PMAPCHECK
229 int pmapcheck = 1;
230 #else
231 int pmapcheck = 0;
232 #endif
233 void pmap_pvo_verify(void);
234 STATIC void pmap_pvo_check(const struct pvo_entry *);
235 #define PMAP_PVO_CHECK(pvo) \
236 do { \
237 if (pmapcheck) \
238 pmap_pvo_check(pvo); \
239 } while (0)
240 #else
241 #define PMAP_PVO_CHECK(pvo) do { } while (/*CONSTCOND*/0)
242 #endif
243 STATIC int pmap_pte_insert(int, struct pte *);
244 STATIC int pmap_pvo_enter(pmap_t, struct pool *, struct pvo_head *,
245 vaddr_t, paddr_t, register_t, int);
246 STATIC void pmap_pvo_remove(struct pvo_entry *, int, struct pvo_head *);
247 STATIC void pmap_pvo_free(struct pvo_entry *);
248 STATIC void pmap_pvo_free_list(struct pvo_head *);
249 STATIC struct pvo_entry *pmap_pvo_find_va(pmap_t, vaddr_t, int *);
250 STATIC volatile struct pte *pmap_pvo_to_pte(const struct pvo_entry *, int);
251 STATIC struct pvo_entry *pmap_pvo_reclaim(struct pmap *);
252 STATIC void pvo_set_exec(struct pvo_entry *);
253 STATIC void pvo_clear_exec(struct pvo_entry *);
254
255 STATIC void tlbia(void);
256
257 STATIC void pmap_release(pmap_t);
258 STATIC void *pmap_boot_find_memory(psize_t, psize_t, int);
259
260 static uint32_t pmap_pvo_reclaim_nextidx;
261 #ifdef DEBUG
262 static int pmap_pvo_reclaim_debugctr;
263 #endif
264
265 #define VSID_NBPW (sizeof(uint32_t) * 8)
266 static uint32_t pmap_vsid_bitmap[NPMAPS / VSID_NBPW];
267
268 static int pmap_initialized;
269
270 #if defined(DEBUG) || defined(PMAPDEBUG)
271 #define PMAPDEBUG_BOOT 0x0001
272 #define PMAPDEBUG_PTE 0x0002
273 #define PMAPDEBUG_EXEC 0x0008
274 #define PMAPDEBUG_PVOENTER 0x0010
275 #define PMAPDEBUG_PVOREMOVE 0x0020
276 #define PMAPDEBUG_ACTIVATE 0x0100
277 #define PMAPDEBUG_CREATE 0x0200
278 #define PMAPDEBUG_ENTER 0x1000
279 #define PMAPDEBUG_KENTER 0x2000
280 #define PMAPDEBUG_KREMOVE 0x4000
281 #define PMAPDEBUG_REMOVE 0x8000
282 unsigned int pmapdebug = 0;
283 # define DPRINTF(x) printf x
284 # define DPRINTFN(n, x) if (pmapdebug & PMAPDEBUG_ ## n) printf x
285 #else
286 # define DPRINTF(x)
287 # define DPRINTFN(n, x)
288 #endif
289
290
291 #ifdef PMAPCOUNTERS
292 #define PMAPCOUNT(ev) ((pmap_evcnt_ ## ev).ev_count++)
293 #define PMAPCOUNT2(ev) ((ev).ev_count++)
294
295 struct evcnt pmap_evcnt_mappings =
296 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
297 "pmap", "pages mapped");
298 struct evcnt pmap_evcnt_unmappings =
299 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_mappings,
300 "pmap", "pages unmapped");
301
302 struct evcnt pmap_evcnt_kernel_mappings =
303 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
304 "pmap", "kernel pages mapped");
305 struct evcnt pmap_evcnt_kernel_unmappings =
306 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_kernel_mappings,
307 "pmap", "kernel pages unmapped");
308
309 struct evcnt pmap_evcnt_mappings_replaced =
310 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
311 "pmap", "page mappings replaced");
312
313 struct evcnt pmap_evcnt_exec_mappings =
314 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_mappings,
315 "pmap", "exec pages mapped");
316 struct evcnt pmap_evcnt_exec_cached =
317 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_mappings,
318 "pmap", "exec pages cached");
319
320 struct evcnt pmap_evcnt_exec_synced =
321 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
322 "pmap", "exec pages synced");
323 struct evcnt pmap_evcnt_exec_synced_clear_modify =
324 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
325 "pmap", "exec pages synced (CM)");
326 struct evcnt pmap_evcnt_exec_synced_pvo_remove =
327 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
328 "pmap", "exec pages synced (PR)");
329
330 struct evcnt pmap_evcnt_exec_uncached_page_protect =
331 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
332 "pmap", "exec pages uncached (PP)");
333 struct evcnt pmap_evcnt_exec_uncached_clear_modify =
334 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
335 "pmap", "exec pages uncached (CM)");
336 struct evcnt pmap_evcnt_exec_uncached_zero_page =
337 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
338 "pmap", "exec pages uncached (ZP)");
339 struct evcnt pmap_evcnt_exec_uncached_copy_page =
340 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
341 "pmap", "exec pages uncached (CP)");
342 struct evcnt pmap_evcnt_exec_uncached_pvo_remove =
343 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &pmap_evcnt_exec_mappings,
344 "pmap", "exec pages uncached (PR)");
345
346 struct evcnt pmap_evcnt_updates =
347 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
348 "pmap", "updates");
349 struct evcnt pmap_evcnt_collects =
350 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
351 "pmap", "collects");
352 struct evcnt pmap_evcnt_copies =
353 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
354 "pmap", "copies");
355
356 struct evcnt pmap_evcnt_ptes_spilled =
357 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
358 "pmap", "ptes spilled from overflow");
359 struct evcnt pmap_evcnt_ptes_unspilled =
360 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
361 "pmap", "ptes not spilled");
362 struct evcnt pmap_evcnt_ptes_evicted =
363 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
364 "pmap", "ptes evicted");
365
366 struct evcnt pmap_evcnt_ptes_primary[8] = {
367 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
368 "pmap", "ptes added at primary[0]"),
369 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
370 "pmap", "ptes added at primary[1]"),
371 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
372 "pmap", "ptes added at primary[2]"),
373 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
374 "pmap", "ptes added at primary[3]"),
375
376 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
377 "pmap", "ptes added at primary[4]"),
378 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
379 "pmap", "ptes added at primary[5]"),
380 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
381 "pmap", "ptes added at primary[6]"),
382 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
383 "pmap", "ptes added at primary[7]"),
384 };
385 struct evcnt pmap_evcnt_ptes_secondary[8] = {
386 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
387 "pmap", "ptes added at secondary[0]"),
388 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
389 "pmap", "ptes added at secondary[1]"),
390 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
391 "pmap", "ptes added at secondary[2]"),
392 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
393 "pmap", "ptes added at secondary[3]"),
394
395 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
396 "pmap", "ptes added at secondary[4]"),
397 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
398 "pmap", "ptes added at secondary[5]"),
399 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
400 "pmap", "ptes added at secondary[6]"),
401 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
402 "pmap", "ptes added at secondary[7]"),
403 };
404 struct evcnt pmap_evcnt_ptes_removed =
405 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
406 "pmap", "ptes removed");
407 struct evcnt pmap_evcnt_ptes_changed =
408 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
409 "pmap", "ptes changed");
410 struct evcnt pmap_evcnt_pvos_reclaimed =
411 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
412 "pmap", "pvos reclaimed");
413 struct evcnt pmap_evcnt_pvos_failed =
414 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
415 "pmap", "pvo allocation failures");
416
417 /*
418 * From pmap_subr.c
419 */
420 extern struct evcnt pmap_evcnt_zeroed_pages;
421 extern struct evcnt pmap_evcnt_copied_pages;
422 extern struct evcnt pmap_evcnt_idlezeroed_pages;
423
424 EVCNT_ATTACH_STATIC(pmap_evcnt_mappings);
425 EVCNT_ATTACH_STATIC(pmap_evcnt_mappings_replaced);
426 EVCNT_ATTACH_STATIC(pmap_evcnt_unmappings);
427
428 EVCNT_ATTACH_STATIC(pmap_evcnt_kernel_mappings);
429 EVCNT_ATTACH_STATIC(pmap_evcnt_kernel_unmappings);
430
431 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_mappings);
432 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_cached);
433 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_synced);
434 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_synced_clear_modify);
435 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_synced_pvo_remove);
436
437 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_uncached_page_protect);
438 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_uncached_clear_modify);
439 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_uncached_zero_page);
440 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_uncached_copy_page);
441 EVCNT_ATTACH_STATIC(pmap_evcnt_exec_uncached_pvo_remove);
442
443 EVCNT_ATTACH_STATIC(pmap_evcnt_zeroed_pages);
444 EVCNT_ATTACH_STATIC(pmap_evcnt_copied_pages);
445 EVCNT_ATTACH_STATIC(pmap_evcnt_idlezeroed_pages);
446
447 EVCNT_ATTACH_STATIC(pmap_evcnt_updates);
448 EVCNT_ATTACH_STATIC(pmap_evcnt_collects);
449 EVCNT_ATTACH_STATIC(pmap_evcnt_copies);
450
451 EVCNT_ATTACH_STATIC(pmap_evcnt_ptes_spilled);
452 EVCNT_ATTACH_STATIC(pmap_evcnt_ptes_unspilled);
453 EVCNT_ATTACH_STATIC(pmap_evcnt_ptes_evicted);
454 EVCNT_ATTACH_STATIC(pmap_evcnt_ptes_removed);
455 EVCNT_ATTACH_STATIC(pmap_evcnt_ptes_changed);
456
457 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 0);
458 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 1);
459 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 2);
460 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 3);
461 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 4);
462 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 5);
463 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 6);
464 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_primary, 7);
465 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 0);
466 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 1);
467 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 2);
468 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 3);
469 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 4);
470 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 5);
471 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 6);
472 EVCNT_ATTACH_STATIC2(pmap_evcnt_ptes_secondary, 7);
473
474 EVCNT_ATTACH_STATIC(pmap_evcnt_pvos_reclaimed);
475 EVCNT_ATTACH_STATIC(pmap_evcnt_pvos_failed);
476 #else
477 #define PMAPCOUNT(ev) ((void) 0)
478 #define PMAPCOUNT2(ev) ((void) 0)
479 #endif
480
481 #define TLBIE(va) __asm volatile("tlbie %0" :: "r"(va))
482 #define TLBSYNC() __asm volatile("tlbsync")
483 #define SYNC() __asm volatile("sync")
484 #define EIEIO() __asm volatile("eieio")
485 #define MFMSR() mfmsr()
486 #define MTMSR(psl) mtmsr(psl)
487 #define MFPVR() mfpvr()
488 #define MFSRIN(va) mfsrin(va)
489 #define MFTB() mfrtcltbl()
490
491 #ifndef PPC_OEA64
492 static inline register_t
493 mfsrin(vaddr_t va)
494 {
495 register_t sr;
496 __asm volatile ("mfsrin %0,%1" : "=r"(sr) : "r"(va));
497 return sr;
498 }
499 #endif /* PPC_OEA64 */
500
501 static inline register_t
502 pmap_interrupts_off(void)
503 {
504 register_t msr = MFMSR();
505 if (msr & PSL_EE)
506 MTMSR(msr & ~PSL_EE);
507 return msr;
508 }
509
510 static void
511 pmap_interrupts_restore(register_t msr)
512 {
513 if (msr & PSL_EE)
514 MTMSR(msr);
515 }
516
517 static inline u_int32_t
518 mfrtcltbl(void)
519 {
520
521 if ((MFPVR() >> 16) == MPC601)
522 return (mfrtcl() >> 7);
523 else
524 return (mftbl());
525 }
526
527 /*
528 * These small routines may have to be replaced,
529 * if/when we support processors other that the 604.
530 */
531
532 void
533 tlbia(void)
534 {
535 caddr_t i;
536
537 SYNC();
538 /*
539 * Why not use "tlbia"? Because not all processors implement it.
540 *
541 * This needs to be a per-CPU callback to do the appropriate thing
542 * for the CPU. XXX
543 */
544 for (i = 0; i < (caddr_t)0x00040000; i += 0x00001000) {
545 TLBIE(i);
546 EIEIO();
547 SYNC();
548 }
549 TLBSYNC();
550 SYNC();
551 }
552
553 static inline register_t
554 va_to_vsid(const struct pmap *pm, vaddr_t addr)
555 {
556 #ifdef PPC_OEA64
557 #if 0
558 const struct ste *ste;
559 register_t hash;
560 int i;
561
562 hash = (addr >> ADDR_ESID_SHFT) & ADDR_ESID_HASH;
563
564 /*
565 * Try the primary group first
566 */
567 ste = pm->pm_stes[hash].stes;
568 for (i = 0; i < 8; i++, ste++) {
569 if (ste->ste_hi & STE_V) &&
570 (addr & ~(ADDR_POFF|ADDR_PIDX)) == (ste->ste_hi & STE_ESID))
571 return ste;
572 }
573
574 /*
575 * Then the secondary group.
576 */
577 ste = pm->pm_stes[hash ^ ADDR_ESID_HASH].stes;
578 for (i = 0; i < 8; i++, ste++) {
579 if (ste->ste_hi & STE_V) &&
580 (addr & ~(ADDR_POFF|ADDR_PIDX)) == (ste->ste_hi & STE_ESID))
581 return addr;
582 }
583
584 return NULL;
585 #else
586 /*
587 * Rather than searching the STE groups for the VSID, we know
588 * how we generate that from the ESID and so do that.
589 */
590 return VSID_MAKE(addr >> ADDR_SR_SHFT, pm->pm_vsid) >> SR_VSID_SHFT;
591 #endif
592 #else
593 return (pm->pm_sr[addr >> ADDR_SR_SHFT] & SR_VSID) >> SR_VSID_SHFT;
594 #endif
595 }
596
597 static inline register_t
598 va_to_pteg(const struct pmap *pm, vaddr_t addr)
599 {
600 register_t hash;
601
602 hash = va_to_vsid(pm, addr) ^ ((addr & ADDR_PIDX) >> ADDR_PIDX_SHFT);
603 return hash & pmap_pteg_mask;
604 }
605
606 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
607 /*
608 * Given a PTE in the page table, calculate the VADDR that hashes to it.
609 * The only bit of magic is that the top 4 bits of the address doesn't
610 * technically exist in the PTE. But we know we reserved 4 bits of the
611 * VSID for it so that's how we get it.
612 */
613 static vaddr_t
614 pmap_pte_to_va(volatile const struct pte *pt)
615 {
616 vaddr_t va;
617 uintptr_t ptaddr = (uintptr_t) pt;
618
619 if (pt->pte_hi & PTE_HID)
620 ptaddr ^= (pmap_pteg_mask * sizeof(struct pteg));
621
622 /* PPC Bits 10-19 PPC64 Bits 42-51 */
623 va = ((pt->pte_hi >> PTE_VSID_SHFT) ^ (ptaddr / sizeof(struct pteg))) & 0x3ff;
624 va <<= ADDR_PIDX_SHFT;
625
626 /* PPC Bits 4-9 PPC64 Bits 36-41 */
627 va |= (pt->pte_hi & PTE_API) << ADDR_API_SHFT;
628
629 #ifdef PPC_OEA64
630 /* PPC63 Bits 0-35 */
631 /* va |= VSID_TO_SR(pt->pte_hi >> PTE_VSID_SHFT) << ADDR_SR_SHFT; */
632 #endif
633 #ifdef PPC_OEA
634 /* PPC Bits 0-3 */
635 va |= VSID_TO_SR(pt->pte_hi >> PTE_VSID_SHFT) << ADDR_SR_SHFT;
636 #endif
637
638 return va;
639 }
640 #endif
641
642 static inline struct pvo_head *
643 pa_to_pvoh(paddr_t pa, struct vm_page **pg_p)
644 {
645 #ifdef __HAVE_VM_PAGE_MD
646 struct vm_page *pg;
647
648 pg = PHYS_TO_VM_PAGE(pa);
649 if (pg_p != NULL)
650 *pg_p = pg;
651 if (pg == NULL)
652 return &pmap_pvo_unmanaged;
653 return &pg->mdpage.mdpg_pvoh;
654 #endif
655 #ifdef __HAVE_PMAP_PHYSSEG
656 int bank, pg;
657
658 bank = vm_physseg_find(atop(pa), &pg);
659 if (pg_p != NULL)
660 *pg_p = pg;
661 if (bank == -1)
662 return &pmap_pvo_unmanaged;
663 return &vm_physmem[bank].pmseg.pvoh[pg];
664 #endif
665 }
666
667 static inline struct pvo_head *
668 vm_page_to_pvoh(struct vm_page *pg)
669 {
670 #ifdef __HAVE_VM_PAGE_MD
671 return &pg->mdpage.mdpg_pvoh;
672 #endif
673 #ifdef __HAVE_PMAP_PHYSSEG
674 return pa_to_pvoh(VM_PAGE_TO_PHYS(pg), NULL);
675 #endif
676 }
677
678
679 #ifdef __HAVE_PMAP_PHYSSEG
680 static inline char *
681 pa_to_attr(paddr_t pa)
682 {
683 int bank, pg;
684
685 bank = vm_physseg_find(atop(pa), &pg);
686 if (bank == -1)
687 return NULL;
688 return &vm_physmem[bank].pmseg.attrs[pg];
689 }
690 #endif
691
692 static inline void
693 pmap_attr_clear(struct vm_page *pg, int ptebit)
694 {
695 #ifdef __HAVE_PMAP_PHYSSEG
696 *pa_to_attr(VM_PAGE_TO_PHYS(pg)) &= ~(ptebit >> ATTR_SHFT);
697 #endif
698 #ifdef __HAVE_VM_PAGE_MD
699 pg->mdpage.mdpg_attrs &= ~ptebit;
700 #endif
701 }
702
703 static inline int
704 pmap_attr_fetch(struct vm_page *pg)
705 {
706 #ifdef __HAVE_PMAP_PHYSSEG
707 return *pa_to_attr(VM_PAGE_TO_PHYS(pg)) << ATTR_SHFT;
708 #endif
709 #ifdef __HAVE_VM_PAGE_MD
710 return pg->mdpage.mdpg_attrs;
711 #endif
712 }
713
714 static inline void
715 pmap_attr_save(struct vm_page *pg, int ptebit)
716 {
717 #ifdef __HAVE_PMAP_PHYSSEG
718 *pa_to_attr(VM_PAGE_TO_PHYS(pg)) |= (ptebit >> ATTR_SHFT);
719 #endif
720 #ifdef __HAVE_VM_PAGE_MD
721 pg->mdpage.mdpg_attrs |= ptebit;
722 #endif
723 }
724
725 static inline int
726 pmap_pte_compare(const volatile struct pte *pt, const struct pte *pvo_pt)
727 {
728 if (pt->pte_hi == pvo_pt->pte_hi
729 #if 0
730 && ((pt->pte_lo ^ pvo_pt->pte_lo) &
731 ~(PTE_REF|PTE_CHG)) == 0
732 #endif
733 )
734 return 1;
735 return 0;
736 }
737
738 static inline void
739 pmap_pte_create(struct pte *pt, const struct pmap *pm, vaddr_t va, register_t pte_lo)
740 {
741 /*
742 * Construct the PTE. Default to IMB initially. Valid bit
743 * only gets set when the real pte is set in memory.
744 *
745 * Note: Don't set the valid bit for correct operation of tlb update.
746 */
747 pt->pte_hi = (va_to_vsid(pm, va) << PTE_VSID_SHFT)
748 | (((va & ADDR_PIDX) >> (ADDR_API_SHFT - PTE_API_SHFT)) & PTE_API);
749 pt->pte_lo = pte_lo;
750 }
751
752 static inline void
753 pmap_pte_synch(volatile struct pte *pt, struct pte *pvo_pt)
754 {
755 pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF|PTE_CHG);
756 }
757
758 static inline void
759 pmap_pte_clear(volatile struct pte *pt, vaddr_t va, int ptebit)
760 {
761 /*
762 * As shown in Section 7.6.3.2.3
763 */
764 pt->pte_lo &= ~ptebit;
765 TLBIE(va);
766 SYNC();
767 EIEIO();
768 TLBSYNC();
769 SYNC();
770 }
771
772 static inline void
773 pmap_pte_set(volatile struct pte *pt, struct pte *pvo_pt)
774 {
775 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
776 if (pvo_pt->pte_hi & PTE_VALID)
777 panic("pte_set: setting an already valid pte %p", pvo_pt);
778 #endif
779 pvo_pt->pte_hi |= PTE_VALID;
780 /*
781 * Update the PTE as defined in section 7.6.3.1
782 * Note that the REF/CHG bits are from pvo_pt and thus should
783 * have been saved so this routine can restore them (if desired).
784 */
785 pt->pte_lo = pvo_pt->pte_lo;
786 EIEIO();
787 pt->pte_hi = pvo_pt->pte_hi;
788 SYNC();
789 pmap_pte_valid++;
790 }
791
792 static inline void
793 pmap_pte_unset(volatile struct pte *pt, struct pte *pvo_pt, vaddr_t va)
794 {
795 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
796 if ((pvo_pt->pte_hi & PTE_VALID) == 0)
797 panic("pte_unset: attempt to unset an inactive pte#1 %p/%p", pvo_pt, pt);
798 if ((pt->pte_hi & PTE_VALID) == 0)
799 panic("pte_unset: attempt to unset an inactive pte#2 %p/%p", pvo_pt, pt);
800 #endif
801
802 pvo_pt->pte_hi &= ~PTE_VALID;
803 /*
804 * Force the ref & chg bits back into the PTEs.
805 */
806 SYNC();
807 /*
808 * Invalidate the pte ... (Section 7.6.3.3)
809 */
810 pt->pte_hi &= ~PTE_VALID;
811 SYNC();
812 TLBIE(va);
813 SYNC();
814 EIEIO();
815 TLBSYNC();
816 SYNC();
817 /*
818 * Save the ref & chg bits ...
819 */
820 pmap_pte_synch(pt, pvo_pt);
821 pmap_pte_valid--;
822 }
823
824 static inline void
825 pmap_pte_change(volatile struct pte *pt, struct pte *pvo_pt, vaddr_t va)
826 {
827 /*
828 * Invalidate the PTE
829 */
830 pmap_pte_unset(pt, pvo_pt, va);
831 pmap_pte_set(pt, pvo_pt);
832 }
833
834 /*
835 * Try to insert the PTE @ *pvo_pt into the pmap_pteg_table at ptegidx
836 * (either primary or secondary location).
837 *
838 * Note: both the destination and source PTEs must not have PTE_VALID set.
839 */
840
841 STATIC int
842 pmap_pte_insert(int ptegidx, struct pte *pvo_pt)
843 {
844 volatile struct pte *pt;
845 int i;
846
847 #if defined(DEBUG)
848 DPRINTFN(PTE, ("pmap_pte_insert: idx 0x%x, pte 0x%x 0x%x\n",
849 ptegidx, (unsigned int) pvo_pt->pte_hi, (unsigned int) pvo_pt->pte_lo));
850 #endif
851 /*
852 * First try primary hash.
853 */
854 for (pt = pmap_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
855 if ((pt->pte_hi & PTE_VALID) == 0) {
856 pvo_pt->pte_hi &= ~PTE_HID;
857 pmap_pte_set(pt, pvo_pt);
858 return i;
859 }
860 }
861
862 /*
863 * Now try secondary hash.
864 */
865 ptegidx ^= pmap_pteg_mask;
866 for (pt = pmap_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
867 if ((pt->pte_hi & PTE_VALID) == 0) {
868 pvo_pt->pte_hi |= PTE_HID;
869 pmap_pte_set(pt, pvo_pt);
870 return i;
871 }
872 }
873 return -1;
874 }
875
876 /*
877 * Spill handler.
878 *
879 * Tries to spill a page table entry from the overflow area.
880 * This runs in either real mode (if dealing with a exception spill)
881 * or virtual mode when dealing with manually spilling one of the
882 * kernel's pte entries. In either case, interrupts are already
883 * disabled.
884 */
885
886 int
887 pmap_pte_spill(struct pmap *pm, vaddr_t addr, boolean_t exec)
888 {
889 struct pvo_entry *source_pvo, *victim_pvo, *next_pvo;
890 struct pvo_entry *pvo;
891 /* XXX: gcc -- vpvoh is always set at either *1* or *2* */
892 struct pvo_tqhead *pvoh, *vpvoh = NULL;
893 int ptegidx, i, j;
894 volatile struct pteg *pteg;
895 volatile struct pte *pt;
896
897 ptegidx = va_to_pteg(pm, addr);
898
899 /*
900 * Have to substitute some entry. Use the primary hash for this.
901 * Use low bits of timebase as random generator. Make sure we are
902 * not picking a kernel pte for replacement.
903 */
904 pteg = &pmap_pteg_table[ptegidx];
905 i = MFTB() & 7;
906 for (j = 0; j < 8; j++) {
907 pt = &pteg->pt[i];
908 if ((pt->pte_hi & PTE_VALID) == 0 ||
909 VSID_TO_HASH((pt->pte_hi & PTE_VSID) >> PTE_VSID_SHFT)
910 != KERNEL_VSIDBITS)
911 break;
912 i = (i + 1) & 7;
913 }
914 KASSERT(j < 8);
915
916 source_pvo = NULL;
917 victim_pvo = NULL;
918 pvoh = &pmap_pvo_table[ptegidx];
919 TAILQ_FOREACH(pvo, pvoh, pvo_olink) {
920
921 /*
922 * We need to find pvo entry for this address...
923 */
924 PMAP_PVO_CHECK(pvo); /* sanity check */
925
926 /*
927 * If we haven't found the source and we come to a PVO with
928 * a valid PTE, then we know we can't find it because all
929 * evicted PVOs always are first in the list.
930 */
931 if (source_pvo == NULL && (pvo->pvo_pte.pte_hi & PTE_VALID))
932 break;
933 if (source_pvo == NULL && pm == pvo->pvo_pmap &&
934 addr == PVO_VADDR(pvo)) {
935
936 /*
937 * Now we have found the entry to be spilled into the
938 * pteg. Attempt to insert it into the page table.
939 */
940 j = pmap_pte_insert(ptegidx, &pvo->pvo_pte);
941 if (j >= 0) {
942 PVO_PTEGIDX_SET(pvo, j);
943 PMAP_PVO_CHECK(pvo); /* sanity check */
944 PVO_WHERE(pvo, SPILL_INSERT);
945 pvo->pvo_pmap->pm_evictions--;
946 PMAPCOUNT(ptes_spilled);
947 PMAPCOUNT2(((pvo->pvo_pte.pte_hi & PTE_HID)
948 ? pmap_evcnt_ptes_secondary
949 : pmap_evcnt_ptes_primary)[j]);
950
951 /*
952 * Since we keep the evicted entries at the
953 * from of the PVO list, we need move this
954 * (now resident) PVO after the evicted
955 * entries.
956 */
957 next_pvo = TAILQ_NEXT(pvo, pvo_olink);
958
959 /*
960 * If we don't have to move (either we were the
961 * last entry or the next entry was valid),
962 * don't change our position. Otherwise
963 * move ourselves to the tail of the queue.
964 */
965 if (next_pvo != NULL &&
966 !(next_pvo->pvo_pte.pte_hi & PTE_VALID)) {
967 TAILQ_REMOVE(pvoh, pvo, pvo_olink);
968 TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
969 }
970 return 1;
971 }
972 source_pvo = pvo;
973 if (exec && !PVO_ISEXECUTABLE(source_pvo)) {
974 return 0;
975 }
976 if (victim_pvo != NULL)
977 break;
978 }
979
980 /*
981 * We also need the pvo entry of the victim we are replacing
982 * so save the R & C bits of the PTE.
983 */
984 if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL &&
985 pmap_pte_compare(pt, &pvo->pvo_pte)) {
986 vpvoh = pvoh; /* *1* */
987 victim_pvo = pvo;
988 if (source_pvo != NULL)
989 break;
990 }
991 }
992
993 if (source_pvo == NULL) {
994 PMAPCOUNT(ptes_unspilled);
995 return 0;
996 }
997
998 if (victim_pvo == NULL) {
999 if ((pt->pte_hi & PTE_HID) == 0)
1000 panic("pmap_pte_spill: victim p-pte (%p) has "
1001 "no pvo entry!", pt);
1002
1003 /*
1004 * If this is a secondary PTE, we need to search
1005 * its primary pvo bucket for the matching PVO.
1006 */
1007 vpvoh = &pmap_pvo_table[ptegidx ^ pmap_pteg_mask]; /* *2* */
1008 TAILQ_FOREACH(pvo, vpvoh, pvo_olink) {
1009 PMAP_PVO_CHECK(pvo); /* sanity check */
1010
1011 /*
1012 * We also need the pvo entry of the victim we are
1013 * replacing so save the R & C bits of the PTE.
1014 */
1015 if (pmap_pte_compare(pt, &pvo->pvo_pte)) {
1016 victim_pvo = pvo;
1017 break;
1018 }
1019 }
1020 if (victim_pvo == NULL)
1021 panic("pmap_pte_spill: victim s-pte (%p) has "
1022 "no pvo entry!", pt);
1023 }
1024
1025 /*
1026 * The victim should be not be a kernel PVO/PTE entry.
1027 */
1028 KASSERT(victim_pvo->pvo_pmap != pmap_kernel());
1029 KASSERT(PVO_PTEGIDX_ISSET(victim_pvo));
1030 KASSERT(PVO_PTEGIDX_GET(victim_pvo) == i);
1031
1032 /*
1033 * We are invalidating the TLB entry for the EA for the
1034 * we are replacing even though its valid; If we don't
1035 * we lose any ref/chg bit changes contained in the TLB
1036 * entry.
1037 */
1038 source_pvo->pvo_pte.pte_hi &= ~PTE_HID;
1039
1040 /*
1041 * To enforce the PVO list ordering constraint that all
1042 * evicted entries should come before all valid entries,
1043 * move the source PVO to the tail of its list and the
1044 * victim PVO to the head of its list (which might not be
1045 * the same list, if the victim was using the secondary hash).
1046 */
1047 TAILQ_REMOVE(pvoh, source_pvo, pvo_olink);
1048 TAILQ_INSERT_TAIL(pvoh, source_pvo, pvo_olink);
1049 TAILQ_REMOVE(vpvoh, victim_pvo, pvo_olink);
1050 TAILQ_INSERT_HEAD(vpvoh, victim_pvo, pvo_olink);
1051 pmap_pte_unset(pt, &victim_pvo->pvo_pte, victim_pvo->pvo_vaddr);
1052 pmap_pte_set(pt, &source_pvo->pvo_pte);
1053 victim_pvo->pvo_pmap->pm_evictions++;
1054 source_pvo->pvo_pmap->pm_evictions--;
1055 PVO_WHERE(victim_pvo, SPILL_UNSET);
1056 PVO_WHERE(source_pvo, SPILL_SET);
1057
1058 PVO_PTEGIDX_CLR(victim_pvo);
1059 PVO_PTEGIDX_SET(source_pvo, i);
1060 PMAPCOUNT2(pmap_evcnt_ptes_primary[i]);
1061 PMAPCOUNT(ptes_spilled);
1062 PMAPCOUNT(ptes_evicted);
1063 PMAPCOUNT(ptes_removed);
1064
1065 PMAP_PVO_CHECK(victim_pvo);
1066 PMAP_PVO_CHECK(source_pvo);
1067 return 1;
1068 }
1069
1070 /*
1071 * Restrict given range to physical memory
1072 */
1073 void
1074 pmap_real_memory(paddr_t *start, psize_t *size)
1075 {
1076 struct mem_region *mp;
1077
1078 for (mp = mem; mp->size; mp++) {
1079 if (*start + *size > mp->start
1080 && *start < mp->start + mp->size) {
1081 if (*start < mp->start) {
1082 *size -= mp->start - *start;
1083 *start = mp->start;
1084 }
1085 if (*start + *size > mp->start + mp->size)
1086 *size = mp->start + mp->size - *start;
1087 return;
1088 }
1089 }
1090 *size = 0;
1091 }
1092
1093 /*
1094 * Initialize anything else for pmap handling.
1095 * Called during vm_init().
1096 */
1097 void
1098 pmap_init(void)
1099 {
1100 #ifdef __HAVE_PMAP_PHYSSEG
1101 struct pvo_tqhead *pvoh;
1102 int bank;
1103 long sz;
1104 char *attr;
1105
1106 pvoh = pmap_physseg.pvoh;
1107 attr = pmap_physseg.attrs;
1108 for (bank = 0; bank < vm_nphysseg; bank++) {
1109 sz = vm_physmem[bank].end - vm_physmem[bank].start;
1110 vm_physmem[bank].pmseg.pvoh = pvoh;
1111 vm_physmem[bank].pmseg.attrs = attr;
1112 for (; sz > 0; sz--, pvoh++, attr++) {
1113 TAILQ_INIT(pvoh);
1114 *attr = 0;
1115 }
1116 }
1117 #endif
1118
1119 pool_init(&pmap_mpvo_pool, sizeof(struct pvo_entry),
1120 sizeof(struct pvo_entry), 0, 0, "pmap_mpvopl",
1121 &pmap_pool_mallocator);
1122
1123 pool_setlowat(&pmap_mpvo_pool, 1008);
1124
1125 pmap_initialized = 1;
1126
1127 }
1128
1129 /*
1130 * How much virtual space does the kernel get?
1131 */
1132 void
1133 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1134 {
1135 /*
1136 * For now, reserve one segment (minus some overhead) for kernel
1137 * virtual memory
1138 */
1139 *start = VM_MIN_KERNEL_ADDRESS;
1140 *end = VM_MAX_KERNEL_ADDRESS;
1141 }
1142
1143 /*
1144 * Allocate, initialize, and return a new physical map.
1145 */
1146 pmap_t
1147 pmap_create(void)
1148 {
1149 pmap_t pm;
1150
1151 pm = pool_get(&pmap_pool, PR_WAITOK);
1152 memset((caddr_t)pm, 0, sizeof *pm);
1153 pmap_pinit(pm);
1154
1155 DPRINTFN(CREATE,("pmap_create: pm %p:\n"
1156 "\t%06x %06x %06x %06x %06x %06x %06x %06x\n"
1157 "\t%06x %06x %06x %06x %06x %06x %06x %06x\n", pm,
1158 (unsigned int) pm->pm_sr[0], (unsigned int) pm->pm_sr[1],
1159 (unsigned int) pm->pm_sr[2], (unsigned int) pm->pm_sr[3],
1160 (unsigned int) pm->pm_sr[4], (unsigned int) pm->pm_sr[5],
1161 (unsigned int) pm->pm_sr[6], (unsigned int) pm->pm_sr[7],
1162 (unsigned int) pm->pm_sr[8], (unsigned int) pm->pm_sr[9],
1163 (unsigned int) pm->pm_sr[10], (unsigned int) pm->pm_sr[11],
1164 (unsigned int) pm->pm_sr[12], (unsigned int) pm->pm_sr[13],
1165 (unsigned int) pm->pm_sr[14], (unsigned int) pm->pm_sr[15]));
1166 return pm;
1167 }
1168
1169 /*
1170 * Initialize a preallocated and zeroed pmap structure.
1171 */
1172 void
1173 pmap_pinit(pmap_t pm)
1174 {
1175 register_t entropy = MFTB();
1176 register_t mask;
1177 int i;
1178
1179 /*
1180 * Allocate some segment registers for this pmap.
1181 */
1182 pm->pm_refs = 1;
1183 for (i = 0; i < NPMAPS; i += VSID_NBPW) {
1184 static register_t pmap_vsidcontext;
1185 register_t hash;
1186 unsigned int n;
1187
1188 /* Create a new value by multiplying by a prime adding in
1189 * entropy from the timebase register. This is to make the
1190 * VSID more random so that the PT Hash function collides
1191 * less often. (note that the prime causes gcc to do shifts
1192 * instead of a multiply)
1193 */
1194 pmap_vsidcontext = (pmap_vsidcontext * 0x1105) + entropy;
1195 hash = pmap_vsidcontext & (NPMAPS - 1);
1196 if (hash == 0) { /* 0 is special, avoid it */
1197 entropy += 0xbadf00d;
1198 continue;
1199 }
1200 n = hash >> 5;
1201 mask = 1L << (hash & (VSID_NBPW-1));
1202 hash = pmap_vsidcontext;
1203 if (pmap_vsid_bitmap[n] & mask) { /* collision? */
1204 /* anything free in this bucket? */
1205 if (~pmap_vsid_bitmap[n] == 0) {
1206 entropy = hash ^ (hash >> 16);
1207 continue;
1208 }
1209 i = ffs(~pmap_vsid_bitmap[n]) - 1;
1210 mask = 1L << i;
1211 hash &= ~(VSID_NBPW-1);
1212 hash |= i;
1213 }
1214 hash &= PTE_VSID >> PTE_VSID_SHFT;
1215 pmap_vsid_bitmap[n] |= mask;
1216 pm->pm_vsid = hash;
1217 #ifndef PPC_OEA64
1218 for (i = 0; i < 16; i++)
1219 pm->pm_sr[i] = VSID_MAKE(i, hash) | SR_PRKEY |
1220 SR_NOEXEC;
1221 #endif
1222 return;
1223 }
1224 panic("pmap_pinit: out of segments");
1225 }
1226
1227 /*
1228 * Add a reference to the given pmap.
1229 */
1230 void
1231 pmap_reference(pmap_t pm)
1232 {
1233 pm->pm_refs++;
1234 }
1235
1236 /*
1237 * Retire the given pmap from service.
1238 * Should only be called if the map contains no valid mappings.
1239 */
1240 void
1241 pmap_destroy(pmap_t pm)
1242 {
1243 if (--pm->pm_refs == 0) {
1244 pmap_release(pm);
1245 pool_put(&pmap_pool, pm);
1246 }
1247 }
1248
1249 /*
1250 * Release any resources held by the given physical map.
1251 * Called when a pmap initialized by pmap_pinit is being released.
1252 */
1253 void
1254 pmap_release(pmap_t pm)
1255 {
1256 int idx, mask;
1257
1258 if (pm->pm_sr[0] == 0)
1259 panic("pmap_release");
1260 idx = pm->pm_vsid & (NPMAPS-1);
1261 mask = 1 << (idx % VSID_NBPW);
1262 idx /= VSID_NBPW;
1263
1264 KASSERT(pmap_vsid_bitmap[idx] & mask);
1265 pmap_vsid_bitmap[idx] &= ~mask;
1266 }
1267
1268 /*
1269 * Copy the range specified by src_addr/len
1270 * from the source map to the range dst_addr/len
1271 * in the destination map.
1272 *
1273 * This routine is only advisory and need not do anything.
1274 */
1275 void
1276 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vaddr_t dst_addr,
1277 vsize_t len, vaddr_t src_addr)
1278 {
1279 PMAPCOUNT(copies);
1280 }
1281
1282 /*
1283 * Require that all active physical maps contain no
1284 * incorrect entries NOW.
1285 */
1286 void
1287 pmap_update(struct pmap *pmap)
1288 {
1289 PMAPCOUNT(updates);
1290 TLBSYNC();
1291 }
1292
1293 /*
1294 * Garbage collects the physical map system for
1295 * pages which are no longer used.
1296 * Success need not be guaranteed -- that is, there
1297 * may well be pages which are not referenced, but
1298 * others may be collected.
1299 * Called by the pageout daemon when pages are scarce.
1300 */
1301 void
1302 pmap_collect(pmap_t pm)
1303 {
1304 PMAPCOUNT(collects);
1305 }
1306
1307 static inline int
1308 pmap_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx)
1309 {
1310 int pteidx;
1311 /*
1312 * We can find the actual pte entry without searching by
1313 * grabbing the PTEG index from 3 unused bits in pte_lo[11:9]
1314 * and by noticing the HID bit.
1315 */
1316 pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo);
1317 if (pvo->pvo_pte.pte_hi & PTE_HID)
1318 pteidx ^= pmap_pteg_mask * 8;
1319 return pteidx;
1320 }
1321
1322 volatile struct pte *
1323 pmap_pvo_to_pte(const struct pvo_entry *pvo, int pteidx)
1324 {
1325 volatile struct pte *pt;
1326
1327 #if !defined(DIAGNOSTIC) && !defined(DEBUG) && !defined(PMAPCHECK)
1328 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0)
1329 return NULL;
1330 #endif
1331
1332 /*
1333 * If we haven't been supplied the ptegidx, calculate it.
1334 */
1335 if (pteidx == -1) {
1336 int ptegidx;
1337 ptegidx = va_to_pteg(pvo->pvo_pmap, pvo->pvo_vaddr);
1338 pteidx = pmap_pvo_pte_index(pvo, ptegidx);
1339 }
1340
1341 pt = &pmap_pteg_table[pteidx >> 3].pt[pteidx & 7];
1342
1343 #if !defined(DIAGNOSTIC) && !defined(DEBUG) && !defined(PMAPCHECK)
1344 return pt;
1345 #else
1346 if ((pvo->pvo_pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) {
1347 panic("pmap_pvo_to_pte: pvo %p: has valid pte in "
1348 "pvo but no valid pte index", pvo);
1349 }
1350 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) {
1351 panic("pmap_pvo_to_pte: pvo %p: has valid pte index in "
1352 "pvo but no valid pte", pvo);
1353 }
1354
1355 if ((pt->pte_hi ^ (pvo->pvo_pte.pte_hi & ~PTE_VALID)) == PTE_VALID) {
1356 if ((pvo->pvo_pte.pte_hi & PTE_VALID) == 0) {
1357 #if defined(DEBUG) || defined(PMAPCHECK)
1358 pmap_pte_print(pt);
1359 #endif
1360 panic("pmap_pvo_to_pte: pvo %p: has valid pte in "
1361 "pmap_pteg_table %p but invalid in pvo",
1362 pvo, pt);
1363 }
1364 if (((pt->pte_lo ^ pvo->pvo_pte.pte_lo) & ~(PTE_CHG|PTE_REF)) != 0) {
1365 #if defined(DEBUG) || defined(PMAPCHECK)
1366 pmap_pte_print(pt);
1367 #endif
1368 panic("pmap_pvo_to_pte: pvo %p: pvo pte does "
1369 "not match pte %p in pmap_pteg_table",
1370 pvo, pt);
1371 }
1372 return pt;
1373 }
1374
1375 if (pvo->pvo_pte.pte_hi & PTE_VALID) {
1376 #if defined(DEBUG) || defined(PMAPCHECK)
1377 pmap_pte_print(pt);
1378 #endif
1379 panic("pmap_pvo_to_pte: pvo %p: has nomatching pte %p in "
1380 "pmap_pteg_table but valid in pvo", pvo, pt);
1381 }
1382 return NULL;
1383 #endif /* !(!DIAGNOSTIC && !DEBUG && !PMAPCHECK) */
1384 }
1385
1386 struct pvo_entry *
1387 pmap_pvo_find_va(pmap_t pm, vaddr_t va, int *pteidx_p)
1388 {
1389 struct pvo_entry *pvo;
1390 int ptegidx;
1391
1392 va &= ~ADDR_POFF;
1393 ptegidx = va_to_pteg(pm, va);
1394
1395 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
1396 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1397 if ((uintptr_t) pvo >= SEGMENT_LENGTH)
1398 panic("pmap_pvo_find_va: invalid pvo %p on "
1399 "list %#x (%p)", pvo, ptegidx,
1400 &pmap_pvo_table[ptegidx]);
1401 #endif
1402 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1403 if (pteidx_p)
1404 *pteidx_p = pmap_pvo_pte_index(pvo, ptegidx);
1405 return pvo;
1406 }
1407 }
1408 return NULL;
1409 }
1410
1411 #if defined(DEBUG) || defined(PMAPCHECK)
1412 void
1413 pmap_pvo_check(const struct pvo_entry *pvo)
1414 {
1415 struct pvo_head *pvo_head;
1416 struct pvo_entry *pvo0;
1417 volatile struct pte *pt;
1418 int failed = 0;
1419
1420 if ((uintptr_t)(pvo+1) >= SEGMENT_LENGTH)
1421 panic("pmap_pvo_check: pvo %p: invalid address", pvo);
1422
1423 if ((uintptr_t)(pvo->pvo_pmap+1) >= SEGMENT_LENGTH) {
1424 printf("pmap_pvo_check: pvo %p: invalid pmap address %p\n",
1425 pvo, pvo->pvo_pmap);
1426 failed = 1;
1427 }
1428
1429 if ((uintptr_t)TAILQ_NEXT(pvo, pvo_olink) >= SEGMENT_LENGTH ||
1430 (((uintptr_t)TAILQ_NEXT(pvo, pvo_olink)) & 0x1f) != 0) {
1431 printf("pmap_pvo_check: pvo %p: invalid ovlink address %p\n",
1432 pvo, TAILQ_NEXT(pvo, pvo_olink));
1433 failed = 1;
1434 }
1435
1436 if ((uintptr_t)LIST_NEXT(pvo, pvo_vlink) >= SEGMENT_LENGTH ||
1437 (((uintptr_t)LIST_NEXT(pvo, pvo_vlink)) & 0x1f) != 0) {
1438 printf("pmap_pvo_check: pvo %p: invalid ovlink address %p\n",
1439 pvo, LIST_NEXT(pvo, pvo_vlink));
1440 failed = 1;
1441 }
1442
1443 if (pvo->pvo_vaddr & PVO_MANAGED) {
1444 pvo_head = pa_to_pvoh(pvo->pvo_pte.pte_lo & PTE_RPGN, NULL);
1445 } else {
1446 if (pvo->pvo_vaddr < VM_MIN_KERNEL_ADDRESS) {
1447 printf("pmap_pvo_check: pvo %p: non kernel address "
1448 "on kernel unmanaged list\n", pvo);
1449 failed = 1;
1450 }
1451 pvo_head = &pmap_pvo_kunmanaged;
1452 }
1453 LIST_FOREACH(pvo0, pvo_head, pvo_vlink) {
1454 if (pvo0 == pvo)
1455 break;
1456 }
1457 if (pvo0 == NULL) {
1458 printf("pmap_pvo_check: pvo %p: not present "
1459 "on its vlist head %p\n", pvo, pvo_head);
1460 failed = 1;
1461 }
1462 if (pvo != pmap_pvo_find_va(pvo->pvo_pmap, pvo->pvo_vaddr, NULL)) {
1463 printf("pmap_pvo_check: pvo %p: not present "
1464 "on its olist head\n", pvo);
1465 failed = 1;
1466 }
1467 pt = pmap_pvo_to_pte(pvo, -1);
1468 if (pt == NULL) {
1469 if (pvo->pvo_pte.pte_hi & PTE_VALID) {
1470 printf("pmap_pvo_check: pvo %p: pte_hi VALID but "
1471 "no PTE\n", pvo);
1472 failed = 1;
1473 }
1474 } else {
1475 if ((uintptr_t) pt < (uintptr_t) &pmap_pteg_table[0] ||
1476 (uintptr_t) pt >=
1477 (uintptr_t) &pmap_pteg_table[pmap_pteg_cnt]) {
1478 printf("pmap_pvo_check: pvo %p: pte %p not in "
1479 "pteg table\n", pvo, pt);
1480 failed = 1;
1481 }
1482 if (((((uintptr_t) pt) >> 3) & 7) != PVO_PTEGIDX_GET(pvo)) {
1483 printf("pmap_pvo_check: pvo %p: pte_hi VALID but "
1484 "no PTE\n", pvo);
1485 failed = 1;
1486 }
1487 if (pvo->pvo_pte.pte_hi != pt->pte_hi) {
1488 printf("pmap_pvo_check: pvo %p: pte_hi differ: "
1489 "%#x/%#x\n", pvo, (unsigned int) pvo->pvo_pte.pte_hi, (unsigned int) pt->pte_hi);
1490 failed = 1;
1491 }
1492 if (((pvo->pvo_pte.pte_lo ^ pt->pte_lo) &
1493 (PTE_PP|PTE_WIMG|PTE_RPGN)) != 0) {
1494 printf("pmap_pvo_check: pvo %p: pte_lo differ: "
1495 "%#x/%#x\n", pvo,
1496 (unsigned int) (pvo->pvo_pte.pte_lo & (PTE_PP|PTE_WIMG|PTE_RPGN)),
1497 (unsigned int) (pt->pte_lo & (PTE_PP|PTE_WIMG|PTE_RPGN)));
1498 failed = 1;
1499 }
1500 if ((pmap_pte_to_va(pt) ^ PVO_VADDR(pvo)) & 0x0fffffff) {
1501 printf("pmap_pvo_check: pvo %p: PTE %p derived VA %#lx"
1502 " doesn't not match PVO's VA %#lx\n",
1503 pvo, pt, pmap_pte_to_va(pt), PVO_VADDR(pvo));
1504 failed = 1;
1505 }
1506 if (failed)
1507 pmap_pte_print(pt);
1508 }
1509 if (failed)
1510 panic("pmap_pvo_check: pvo %p, pm %p: bugcheck!", pvo,
1511 pvo->pvo_pmap);
1512 }
1513 #endif /* DEBUG || PMAPCHECK */
1514
1515 /*
1516 * Search the PVO table looking for a non-wired entry.
1517 * If we find one, remove it and return it.
1518 */
1519
1520 struct pvo_entry *
1521 pmap_pvo_reclaim(struct pmap *pm)
1522 {
1523 struct pvo_tqhead *pvoh;
1524 struct pvo_entry *pvo;
1525 uint32_t idx, endidx;
1526
1527 endidx = pmap_pvo_reclaim_nextidx;
1528 for (idx = (endidx + 1) & pmap_pteg_mask; idx != endidx;
1529 idx = (idx + 1) & pmap_pteg_mask) {
1530 pvoh = &pmap_pvo_table[idx];
1531 TAILQ_FOREACH(pvo, pvoh, pvo_olink) {
1532 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) {
1533 pmap_pvo_remove(pvo, -1, NULL);
1534 pmap_pvo_reclaim_nextidx = idx;
1535 PMAPCOUNT(pvos_reclaimed);
1536 return pvo;
1537 }
1538 }
1539 }
1540 return NULL;
1541 }
1542
1543 /*
1544 * This returns whether this is the first mapping of a page.
1545 */
1546 int
1547 pmap_pvo_enter(pmap_t pm, struct pool *pl, struct pvo_head *pvo_head,
1548 vaddr_t va, paddr_t pa, register_t pte_lo, int flags)
1549 {
1550 struct pvo_entry *pvo;
1551 struct pvo_tqhead *pvoh;
1552 register_t msr;
1553 int ptegidx;
1554 int i;
1555 int poolflags = PR_NOWAIT;
1556
1557 /*
1558 * Compute the PTE Group index.
1559 */
1560 va &= ~ADDR_POFF;
1561 ptegidx = va_to_pteg(pm, va);
1562
1563 msr = pmap_interrupts_off();
1564
1565 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1566 if (pmap_pvo_remove_depth > 0)
1567 panic("pmap_pvo_enter: called while pmap_pvo_remove active!");
1568 if (++pmap_pvo_enter_depth > 1)
1569 panic("pmap_pvo_enter: called recursively!");
1570 #endif
1571
1572 /*
1573 * Remove any existing mapping for this page. Reuse the
1574 * pvo entry if there a mapping.
1575 */
1576 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
1577 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
1578 #ifdef DEBUG
1579 if ((pmapdebug & PMAPDEBUG_PVOENTER) &&
1580 ((pvo->pvo_pte.pte_lo ^ (pa|pte_lo)) &
1581 ~(PTE_REF|PTE_CHG)) == 0 &&
1582 va < VM_MIN_KERNEL_ADDRESS) {
1583 printf("pmap_pvo_enter: pvo %p: dup %#x/%#lx\n",
1584 pvo, (unsigned int) pvo->pvo_pte.pte_lo, (unsigned int) pte_lo|pa);
1585 printf("pmap_pvo_enter: pte_hi=%#x sr=%#x\n",
1586 (unsigned int) pvo->pvo_pte.pte_hi,
1587 (unsigned int) pm->pm_sr[va >> ADDR_SR_SHFT]);
1588 pmap_pte_print(pmap_pvo_to_pte(pvo, -1));
1589 #ifdef DDBX
1590 Debugger();
1591 #endif
1592 }
1593 #endif
1594 PMAPCOUNT(mappings_replaced);
1595 pmap_pvo_remove(pvo, -1, NULL);
1596 break;
1597 }
1598 }
1599
1600 /*
1601 * If we aren't overwriting an mapping, try to allocate
1602 */
1603 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1604 --pmap_pvo_enter_depth;
1605 #endif
1606 pmap_interrupts_restore(msr);
1607 if (pvo) {
1608 pmap_pvo_free(pvo);
1609 }
1610 pvo = pool_get(pl, poolflags);
1611
1612 #ifdef DEBUG
1613 /*
1614 * Exercise pmap_pvo_reclaim() a little.
1615 */
1616 if (pvo && (flags & PMAP_CANFAIL) != 0 &&
1617 pmap_pvo_reclaim_debugctr++ > 0x1000 &&
1618 (pmap_pvo_reclaim_debugctr & 0xff) == 0) {
1619 pool_put(pl, pvo);
1620 pvo = NULL;
1621 }
1622 #endif
1623
1624 msr = pmap_interrupts_off();
1625 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1626 ++pmap_pvo_enter_depth;
1627 #endif
1628 if (pvo == NULL) {
1629 pvo = pmap_pvo_reclaim(pm);
1630 if (pvo == NULL) {
1631 if ((flags & PMAP_CANFAIL) == 0)
1632 panic("pmap_pvo_enter: failed");
1633 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1634 pmap_pvo_enter_depth--;
1635 #endif
1636 PMAPCOUNT(pvos_failed);
1637 pmap_interrupts_restore(msr);
1638 return ENOMEM;
1639 }
1640 }
1641
1642 pvo->pvo_vaddr = va;
1643 pvo->pvo_pmap = pm;
1644 pvo->pvo_vaddr &= ~ADDR_POFF;
1645 if (flags & VM_PROT_EXECUTE) {
1646 PMAPCOUNT(exec_mappings);
1647 pvo_set_exec(pvo);
1648 }
1649 if (flags & PMAP_WIRED)
1650 pvo->pvo_vaddr |= PVO_WIRED;
1651 if (pvo_head != &pmap_pvo_kunmanaged) {
1652 pvo->pvo_vaddr |= PVO_MANAGED;
1653 PMAPCOUNT(mappings);
1654 } else {
1655 PMAPCOUNT(kernel_mappings);
1656 }
1657 pmap_pte_create(&pvo->pvo_pte, pm, va, pa | pte_lo);
1658
1659 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
1660 if (pvo->pvo_pte.pte_lo & PVO_WIRED)
1661 pvo->pvo_pmap->pm_stats.wired_count++;
1662 pvo->pvo_pmap->pm_stats.resident_count++;
1663 #if defined(DEBUG)
1664 if (pm != pmap_kernel() && va < VM_MIN_KERNEL_ADDRESS)
1665 DPRINTFN(PVOENTER,
1666 ("pmap_pvo_enter: pvo %p: pm %p va %#lx pa %#lx\n",
1667 pvo, pm, va, pa));
1668 #endif
1669
1670 /*
1671 * We hope this succeeds but it isn't required.
1672 */
1673 pvoh = &pmap_pvo_table[ptegidx];
1674 i = pmap_pte_insert(ptegidx, &pvo->pvo_pte);
1675 if (i >= 0) {
1676 PVO_PTEGIDX_SET(pvo, i);
1677 PVO_WHERE(pvo, ENTER_INSERT);
1678 PMAPCOUNT2(((pvo->pvo_pte.pte_hi & PTE_HID)
1679 ? pmap_evcnt_ptes_secondary : pmap_evcnt_ptes_primary)[i]);
1680 TAILQ_INSERT_TAIL(pvoh, pvo, pvo_olink);
1681 } else {
1682 /*
1683 * Since we didn't have room for this entry (which makes it
1684 * and evicted entry), place it at the head of the list.
1685 */
1686 TAILQ_INSERT_HEAD(pvoh, pvo, pvo_olink);
1687 PMAPCOUNT(ptes_evicted);
1688 pm->pm_evictions++;
1689 /*
1690 * If this is a kernel page, make sure it's active.
1691 */
1692 if (pm == pmap_kernel()) {
1693 i = pmap_pte_spill(pm, va, FALSE);
1694 KASSERT(i);
1695 }
1696 }
1697 PMAP_PVO_CHECK(pvo); /* sanity check */
1698 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1699 pmap_pvo_enter_depth--;
1700 #endif
1701 pmap_interrupts_restore(msr);
1702 return 0;
1703 }
1704
1705 void
1706 pmap_pvo_remove(struct pvo_entry *pvo, int pteidx, struct pvo_head *pvol)
1707 {
1708 volatile struct pte *pt;
1709 int ptegidx;
1710
1711 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1712 if (++pmap_pvo_remove_depth > 1)
1713 panic("pmap_pvo_remove: called recursively!");
1714 #endif
1715
1716 /*
1717 * If we haven't been supplied the ptegidx, calculate it.
1718 */
1719 if (pteidx == -1) {
1720 ptegidx = va_to_pteg(pvo->pvo_pmap, pvo->pvo_vaddr);
1721 pteidx = pmap_pvo_pte_index(pvo, ptegidx);
1722 } else {
1723 ptegidx = pteidx >> 3;
1724 if (pvo->pvo_pte.pte_hi & PTE_HID)
1725 ptegidx ^= pmap_pteg_mask;
1726 }
1727 PMAP_PVO_CHECK(pvo); /* sanity check */
1728
1729 /*
1730 * If there is an active pte entry, we need to deactivate it
1731 * (and save the ref & chg bits).
1732 */
1733 pt = pmap_pvo_to_pte(pvo, pteidx);
1734 if (pt != NULL) {
1735 pmap_pte_unset(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
1736 PVO_WHERE(pvo, REMOVE);
1737 PVO_PTEGIDX_CLR(pvo);
1738 PMAPCOUNT(ptes_removed);
1739 } else {
1740 KASSERT(pvo->pvo_pmap->pm_evictions > 0);
1741 pvo->pvo_pmap->pm_evictions--;
1742 }
1743
1744 /*
1745 * Account for executable mappings.
1746 */
1747 if (PVO_ISEXECUTABLE(pvo))
1748 pvo_clear_exec(pvo);
1749
1750 /*
1751 * Update our statistics.
1752 */
1753 pvo->pvo_pmap->pm_stats.resident_count--;
1754 if (pvo->pvo_pte.pte_lo & PVO_WIRED)
1755 pvo->pvo_pmap->pm_stats.wired_count--;
1756
1757 /*
1758 * Save the REF/CHG bits into their cache if the page is managed.
1759 */
1760 if (pvo->pvo_vaddr & PVO_MANAGED) {
1761 register_t ptelo = pvo->pvo_pte.pte_lo;
1762 struct vm_page *pg = PHYS_TO_VM_PAGE(ptelo & PTE_RPGN);
1763
1764 if (pg != NULL) {
1765 /*
1766 * If this page was changed and it is mapped exec,
1767 * invalidate it.
1768 */
1769 if ((ptelo & PTE_CHG) &&
1770 (pmap_attr_fetch(pg) & PTE_EXEC)) {
1771 struct pvo_head *pvoh = vm_page_to_pvoh(pg);
1772 if (LIST_EMPTY(pvoh)) {
1773 DPRINTFN(EXEC, ("[pmap_pvo_remove: "
1774 "%#lx: clear-exec]\n",
1775 VM_PAGE_TO_PHYS(pg)));
1776 pmap_attr_clear(pg, PTE_EXEC);
1777 PMAPCOUNT(exec_uncached_pvo_remove);
1778 } else {
1779 DPRINTFN(EXEC, ("[pmap_pvo_remove: "
1780 "%#lx: syncicache]\n",
1781 VM_PAGE_TO_PHYS(pg)));
1782 pmap_syncicache(VM_PAGE_TO_PHYS(pg),
1783 PAGE_SIZE);
1784 PMAPCOUNT(exec_synced_pvo_remove);
1785 }
1786 }
1787
1788 pmap_attr_save(pg, ptelo & (PTE_REF|PTE_CHG));
1789 }
1790 PMAPCOUNT(unmappings);
1791 } else {
1792 PMAPCOUNT(kernel_unmappings);
1793 }
1794
1795 /*
1796 * Remove the PVO from its lists and return it to the pool.
1797 */
1798 LIST_REMOVE(pvo, pvo_vlink);
1799 TAILQ_REMOVE(&pmap_pvo_table[ptegidx], pvo, pvo_olink);
1800 if (pvol) {
1801 LIST_INSERT_HEAD(pvol, pvo, pvo_vlink);
1802 }
1803 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
1804 pmap_pvo_remove_depth--;
1805 #endif
1806 }
1807
1808 void
1809 pmap_pvo_free(struct pvo_entry *pvo)
1810 {
1811
1812 pool_put(pvo->pvo_vaddr & PVO_MANAGED ? &pmap_mpvo_pool :
1813 &pmap_upvo_pool, pvo);
1814 }
1815
1816 void
1817 pmap_pvo_free_list(struct pvo_head *pvol)
1818 {
1819 struct pvo_entry *pvo, *npvo;
1820
1821 for (pvo = LIST_FIRST(pvol); pvo != NULL; pvo = npvo) {
1822 npvo = LIST_NEXT(pvo, pvo_vlink);
1823 LIST_REMOVE(pvo, pvo_vlink);
1824 pmap_pvo_free(pvo);
1825 }
1826 }
1827
1828 /*
1829 * Mark a mapping as executable.
1830 * If this is the first executable mapping in the segment,
1831 * clear the noexec flag.
1832 */
1833 STATIC void
1834 pvo_set_exec(struct pvo_entry *pvo)
1835 {
1836 struct pmap *pm = pvo->pvo_pmap;
1837
1838 if (pm == pmap_kernel() || PVO_ISEXECUTABLE(pvo)) {
1839 return;
1840 }
1841 pvo->pvo_vaddr |= PVO_EXECUTABLE;
1842 #ifdef PPC_OEA
1843 {
1844 int sr = PVO_VADDR(pvo) >> ADDR_SR_SHFT;
1845 if (pm->pm_exec[sr]++ == 0) {
1846 pm->pm_sr[sr] &= ~SR_NOEXEC;
1847 }
1848 }
1849 #endif
1850 }
1851
1852 /*
1853 * Mark a mapping as non-executable.
1854 * If this was the last executable mapping in the segment,
1855 * set the noexec flag.
1856 */
1857 STATIC void
1858 pvo_clear_exec(struct pvo_entry *pvo)
1859 {
1860 struct pmap *pm = pvo->pvo_pmap;
1861
1862 if (pm == pmap_kernel() || !PVO_ISEXECUTABLE(pvo)) {
1863 return;
1864 }
1865 pvo->pvo_vaddr &= ~PVO_EXECUTABLE;
1866 #ifdef PPC_OEA
1867 {
1868 int sr = PVO_VADDR(pvo) >> ADDR_SR_SHFT;
1869 if (--pm->pm_exec[sr] == 0) {
1870 pm->pm_sr[sr] |= SR_NOEXEC;
1871 }
1872 }
1873 #endif
1874 }
1875
1876 /*
1877 * Insert physical page at pa into the given pmap at virtual address va.
1878 */
1879 int
1880 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1881 {
1882 struct mem_region *mp;
1883 struct pvo_head *pvo_head;
1884 struct vm_page *pg;
1885 struct pool *pl;
1886 register_t pte_lo;
1887 int error;
1888 u_int pvo_flags;
1889 u_int was_exec = 0;
1890
1891 if (__predict_false(!pmap_initialized)) {
1892 pvo_head = &pmap_pvo_kunmanaged;
1893 pl = &pmap_upvo_pool;
1894 pvo_flags = 0;
1895 pg = NULL;
1896 was_exec = PTE_EXEC;
1897 } else {
1898 pvo_head = pa_to_pvoh(pa, &pg);
1899 pl = &pmap_mpvo_pool;
1900 pvo_flags = PVO_MANAGED;
1901 }
1902
1903 DPRINTFN(ENTER,
1904 ("pmap_enter(%p, 0x%lx, 0x%lx, 0x%x, 0x%x):",
1905 pm, va, pa, prot, flags));
1906
1907 /*
1908 * If this is a managed page, and it's the first reference to the
1909 * page clear the execness of the page. Otherwise fetch the execness.
1910 */
1911 if (pg != NULL)
1912 was_exec = pmap_attr_fetch(pg) & PTE_EXEC;
1913
1914 DPRINTFN(ENTER, (" was_exec=%d", was_exec));
1915
1916 /*
1917 * Assume the page is cache inhibited and access is guarded unless
1918 * it's in our available memory array. If it is in the memory array,
1919 * asssume it's in memory coherent memory.
1920 */
1921 pte_lo = PTE_IG;
1922 if ((flags & PMAP_NC) == 0) {
1923 for (mp = mem; mp->size; mp++) {
1924 if (pa >= mp->start && pa < mp->start + mp->size) {
1925 pte_lo = PTE_M;
1926 break;
1927 }
1928 }
1929 }
1930
1931 if (prot & VM_PROT_WRITE)
1932 pte_lo |= PTE_BW;
1933 else
1934 pte_lo |= PTE_BR;
1935
1936 /*
1937 * If this was in response to a fault, "pre-fault" the PTE's
1938 * changed/referenced bit appropriately.
1939 */
1940 if (flags & VM_PROT_WRITE)
1941 pte_lo |= PTE_CHG;
1942 if (flags & VM_PROT_ALL)
1943 pte_lo |= PTE_REF;
1944
1945 /*
1946 * We need to know if this page can be executable
1947 */
1948 flags |= (prot & VM_PROT_EXECUTE);
1949
1950 /*
1951 * Record mapping for later back-translation and pte spilling.
1952 * This will overwrite any existing mapping.
1953 */
1954 error = pmap_pvo_enter(pm, pl, pvo_head, va, pa, pte_lo, flags);
1955
1956 /*
1957 * Flush the real page from the instruction cache if this page is
1958 * mapped executable and cacheable and has not been flushed since
1959 * the last time it was modified.
1960 */
1961 if (error == 0 &&
1962 (flags & VM_PROT_EXECUTE) &&
1963 (pte_lo & PTE_I) == 0 &&
1964 was_exec == 0) {
1965 DPRINTFN(ENTER, (" syncicache"));
1966 PMAPCOUNT(exec_synced);
1967 pmap_syncicache(pa, PAGE_SIZE);
1968 if (pg != NULL) {
1969 pmap_attr_save(pg, PTE_EXEC);
1970 PMAPCOUNT(exec_cached);
1971 #if defined(DEBUG) || defined(PMAPDEBUG)
1972 if (pmapdebug & PMAPDEBUG_ENTER)
1973 printf(" marked-as-exec");
1974 else if (pmapdebug & PMAPDEBUG_EXEC)
1975 printf("[pmap_enter: %#lx: marked-as-exec]\n",
1976 VM_PAGE_TO_PHYS(pg));
1977
1978 #endif
1979 }
1980 }
1981
1982 DPRINTFN(ENTER, (": error=%d\n", error));
1983
1984 return error;
1985 }
1986
1987 void
1988 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1989 {
1990 struct mem_region *mp;
1991 register_t pte_lo;
1992 int error;
1993
1994 if (va < VM_MIN_KERNEL_ADDRESS)
1995 panic("pmap_kenter_pa: attempt to enter "
1996 "non-kernel address %#lx!", va);
1997
1998 DPRINTFN(KENTER,
1999 ("pmap_kenter_pa(%#lx,%#lx,%#x)\n", va, pa, prot));
2000
2001 /*
2002 * Assume the page is cache inhibited and access is guarded unless
2003 * it's in our available memory array. If it is in the memory array,
2004 * asssume it's in memory coherent memory.
2005 */
2006 pte_lo = PTE_IG;
2007 if ((prot & PMAP_NC) == 0) {
2008 for (mp = mem; mp->size; mp++) {
2009 if (pa >= mp->start && pa < mp->start + mp->size) {
2010 pte_lo = PTE_M;
2011 break;
2012 }
2013 }
2014 }
2015
2016 if (prot & VM_PROT_WRITE)
2017 pte_lo |= PTE_BW;
2018 else
2019 pte_lo |= PTE_BR;
2020
2021 /*
2022 * We don't care about REF/CHG on PVOs on the unmanaged list.
2023 */
2024 error = pmap_pvo_enter(pmap_kernel(), &pmap_upvo_pool,
2025 &pmap_pvo_kunmanaged, va, pa, pte_lo, prot|PMAP_WIRED);
2026
2027 if (error != 0)
2028 panic("pmap_kenter_pa: failed to enter va %#lx pa %#lx: %d",
2029 va, pa, error);
2030 }
2031
2032 void
2033 pmap_kremove(vaddr_t va, vsize_t len)
2034 {
2035 if (va < VM_MIN_KERNEL_ADDRESS)
2036 panic("pmap_kremove: attempt to remove "
2037 "non-kernel address %#lx!", va);
2038
2039 DPRINTFN(KREMOVE,("pmap_kremove(%#lx,%#lx)\n", va, len));
2040 pmap_remove(pmap_kernel(), va, va + len);
2041 }
2042
2043 /*
2044 * Remove the given range of mapping entries.
2045 */
2046 void
2047 pmap_remove(pmap_t pm, vaddr_t va, vaddr_t endva)
2048 {
2049 struct pvo_head pvol;
2050 struct pvo_entry *pvo;
2051 register_t msr;
2052 int pteidx;
2053
2054 LIST_INIT(&pvol);
2055 msr = pmap_interrupts_off();
2056 for (; va < endva; va += PAGE_SIZE) {
2057 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2058 if (pvo != NULL) {
2059 pmap_pvo_remove(pvo, pteidx, &pvol);
2060 }
2061 }
2062 pmap_interrupts_restore(msr);
2063 pmap_pvo_free_list(&pvol);
2064 }
2065
2066 /*
2067 * Get the physical page address for the given pmap/virtual address.
2068 */
2069 boolean_t
2070 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
2071 {
2072 struct pvo_entry *pvo;
2073 register_t msr;
2074
2075 /*
2076 * If this is a kernel pmap lookup, also check the battable
2077 * and if we get a hit, translate the VA to a PA using the
2078 * BAT entries. Don't check for VM_MAX_KERNEL_ADDRESS is
2079 * that will wrap back to 0.
2080 */
2081 if (pm == pmap_kernel() &&
2082 (va < VM_MIN_KERNEL_ADDRESS ||
2083 (KERNEL2_SR < 15 && VM_MAX_KERNEL_ADDRESS <= va))) {
2084 KASSERT((va >> ADDR_SR_SHFT) != USER_SR);
2085 if ((MFPVR() >> 16) != MPC601) {
2086 register_t batu = battable[va >> ADDR_SR_SHFT].batu;
2087 if (BAT_VALID_P(batu,0) && BAT_VA_MATCH_P(batu,va)) {
2088 register_t batl =
2089 battable[va >> ADDR_SR_SHFT].batl;
2090 register_t mask =
2091 (~(batu & BAT_BL) << 15) & ~0x1ffffL;
2092 if (pap)
2093 *pap = (batl & mask) | (va & ~mask);
2094 return TRUE;
2095 }
2096 } else {
2097 register_t batu = battable[va >> 23].batu;
2098 register_t batl = battable[va >> 23].batl;
2099 register_t sr = iosrtable[va >> ADDR_SR_SHFT];
2100 if (BAT601_VALID_P(batl) &&
2101 BAT601_VA_MATCH_P(batu, batl, va)) {
2102 register_t mask =
2103 (~(batl & BAT601_BSM) << 17) & ~0x1ffffL;
2104 if (pap)
2105 *pap = (batl & mask) | (va & ~mask);
2106 return TRUE;
2107 } else if (SR601_VALID_P(sr) &&
2108 SR601_PA_MATCH_P(sr, va)) {
2109 if (pap)
2110 *pap = va;
2111 return TRUE;
2112 }
2113 }
2114 return FALSE;
2115 }
2116
2117 msr = pmap_interrupts_off();
2118 pvo = pmap_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
2119 if (pvo != NULL) {
2120 PMAP_PVO_CHECK(pvo); /* sanity check */
2121 if (pap)
2122 *pap = (pvo->pvo_pte.pte_lo & PTE_RPGN)
2123 | (va & ADDR_POFF);
2124 }
2125 pmap_interrupts_restore(msr);
2126 return pvo != NULL;
2127 }
2128
2129 /*
2130 * Lower the protection on the specified range of this pmap.
2131 */
2132 void
2133 pmap_protect(pmap_t pm, vaddr_t va, vaddr_t endva, vm_prot_t prot)
2134 {
2135 struct pvo_entry *pvo;
2136 volatile struct pte *pt;
2137 register_t msr;
2138 int pteidx;
2139
2140 /*
2141 * Since this routine only downgrades protection, we should
2142 * always be called with at least one bit not set.
2143 */
2144 KASSERT(prot != VM_PROT_ALL);
2145
2146 /*
2147 * If there is no protection, this is equivalent to
2148 * remove the pmap from the pmap.
2149 */
2150 if ((prot & VM_PROT_READ) == 0) {
2151 pmap_remove(pm, va, endva);
2152 return;
2153 }
2154
2155 msr = pmap_interrupts_off();
2156 for (; va < endva; va += PAGE_SIZE) {
2157 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2158 if (pvo == NULL)
2159 continue;
2160 PMAP_PVO_CHECK(pvo); /* sanity check */
2161
2162 /*
2163 * Revoke executable if asked to do so.
2164 */
2165 if ((prot & VM_PROT_EXECUTE) == 0)
2166 pvo_clear_exec(pvo);
2167
2168 #if 0
2169 /*
2170 * If the page is already read-only, no change
2171 * needs to be made.
2172 */
2173 if ((pvo->pvo_pte.pte_lo & PTE_PP) == PTE_BR)
2174 continue;
2175 #endif
2176 /*
2177 * Grab the PTE pointer before we diddle with
2178 * the cached PTE copy.
2179 */
2180 pt = pmap_pvo_to_pte(pvo, pteidx);
2181 /*
2182 * Change the protection of the page.
2183 */
2184 pvo->pvo_pte.pte_lo &= ~PTE_PP;
2185 pvo->pvo_pte.pte_lo |= PTE_BR;
2186
2187 /*
2188 * If the PVO is in the page table, update
2189 * that pte at well.
2190 */
2191 if (pt != NULL) {
2192 pmap_pte_change(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
2193 PVO_WHERE(pvo, PMAP_PROTECT);
2194 PMAPCOUNT(ptes_changed);
2195 }
2196
2197 PMAP_PVO_CHECK(pvo); /* sanity check */
2198 }
2199 pmap_interrupts_restore(msr);
2200 }
2201
2202 void
2203 pmap_unwire(pmap_t pm, vaddr_t va)
2204 {
2205 struct pvo_entry *pvo;
2206 register_t msr;
2207
2208 msr = pmap_interrupts_off();
2209 pvo = pmap_pvo_find_va(pm, va, NULL);
2210 if (pvo != NULL) {
2211 if (pvo->pvo_vaddr & PVO_WIRED) {
2212 pvo->pvo_vaddr &= ~PVO_WIRED;
2213 pm->pm_stats.wired_count--;
2214 }
2215 PMAP_PVO_CHECK(pvo); /* sanity check */
2216 }
2217 pmap_interrupts_restore(msr);
2218 }
2219
2220 /*
2221 * Lower the protection on the specified physical page.
2222 */
2223 void
2224 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2225 {
2226 struct pvo_head *pvo_head, pvol;
2227 struct pvo_entry *pvo, *next_pvo;
2228 volatile struct pte *pt;
2229 register_t msr;
2230
2231 KASSERT(prot != VM_PROT_ALL);
2232 LIST_INIT(&pvol);
2233 msr = pmap_interrupts_off();
2234
2235 /*
2236 * When UVM reuses a page, it does a pmap_page_protect with
2237 * VM_PROT_NONE. At that point, we can clear the exec flag
2238 * since we know the page will have different contents.
2239 */
2240 if ((prot & VM_PROT_READ) == 0) {
2241 DPRINTFN(EXEC, ("[pmap_page_protect: %#lx: clear-exec]\n",
2242 VM_PAGE_TO_PHYS(pg)));
2243 if (pmap_attr_fetch(pg) & PTE_EXEC) {
2244 PMAPCOUNT(exec_uncached_page_protect);
2245 pmap_attr_clear(pg, PTE_EXEC);
2246 }
2247 }
2248
2249 pvo_head = vm_page_to_pvoh(pg);
2250 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
2251 next_pvo = LIST_NEXT(pvo, pvo_vlink);
2252 PMAP_PVO_CHECK(pvo); /* sanity check */
2253
2254 /*
2255 * Downgrading to no mapping at all, we just remove the entry.
2256 */
2257 if ((prot & VM_PROT_READ) == 0) {
2258 pmap_pvo_remove(pvo, -1, &pvol);
2259 continue;
2260 }
2261
2262 /*
2263 * If EXEC permission is being revoked, just clear the
2264 * flag in the PVO.
2265 */
2266 if ((prot & VM_PROT_EXECUTE) == 0)
2267 pvo_clear_exec(pvo);
2268
2269 /*
2270 * If this entry is already RO, don't diddle with the
2271 * page table.
2272 */
2273 if ((pvo->pvo_pte.pte_lo & PTE_PP) == PTE_BR) {
2274 PMAP_PVO_CHECK(pvo);
2275 continue;
2276 }
2277
2278 /*
2279 * Grab the PTE before the we diddle the bits so
2280 * pvo_to_pte can verify the pte contents are as
2281 * expected.
2282 */
2283 pt = pmap_pvo_to_pte(pvo, -1);
2284 pvo->pvo_pte.pte_lo &= ~PTE_PP;
2285 pvo->pvo_pte.pte_lo |= PTE_BR;
2286 if (pt != NULL) {
2287 pmap_pte_change(pt, &pvo->pvo_pte, pvo->pvo_vaddr);
2288 PVO_WHERE(pvo, PMAP_PAGE_PROTECT);
2289 PMAPCOUNT(ptes_changed);
2290 }
2291 PMAP_PVO_CHECK(pvo); /* sanity check */
2292 }
2293 pmap_interrupts_restore(msr);
2294 pmap_pvo_free_list(&pvol);
2295 }
2296
2297 /*
2298 * Activate the address space for the specified process. If the process
2299 * is the current process, load the new MMU context.
2300 */
2301 void
2302 pmap_activate(struct lwp *l)
2303 {
2304 struct pcb *pcb = &l->l_addr->u_pcb;
2305 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
2306
2307 DPRINTFN(ACTIVATE,
2308 ("pmap_activate: lwp %p (curlwp %p)\n", l, curlwp));
2309
2310 /*
2311 * XXX Normally performed in cpu_fork().
2312 */
2313 pcb->pcb_pm = pmap;
2314
2315 /*
2316 * In theory, the SR registers need only be valid on return
2317 * to user space wait to do them there.
2318 */
2319 if (l == curlwp) {
2320 /* Store pointer to new current pmap. */
2321 curpm = pmap;
2322 }
2323 }
2324
2325 /*
2326 * Deactivate the specified process's address space.
2327 */
2328 void
2329 pmap_deactivate(struct lwp *l)
2330 {
2331 }
2332
2333 boolean_t
2334 pmap_query_bit(struct vm_page *pg, int ptebit)
2335 {
2336 struct pvo_entry *pvo;
2337 volatile struct pte *pt;
2338 register_t msr;
2339
2340 if (pmap_attr_fetch(pg) & ptebit)
2341 return TRUE;
2342
2343 msr = pmap_interrupts_off();
2344 LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
2345 PMAP_PVO_CHECK(pvo); /* sanity check */
2346 /*
2347 * See if we saved the bit off. If so cache, it and return
2348 * success.
2349 */
2350 if (pvo->pvo_pte.pte_lo & ptebit) {
2351 pmap_attr_save(pg, ptebit);
2352 PMAP_PVO_CHECK(pvo); /* sanity check */
2353 pmap_interrupts_restore(msr);
2354 return TRUE;
2355 }
2356 }
2357 /*
2358 * No luck, now go thru the hard part of looking at the ptes
2359 * themselves. Sync so any pending REF/CHG bits are flushed
2360 * to the PTEs.
2361 */
2362 SYNC();
2363 LIST_FOREACH(pvo, vm_page_to_pvoh(pg), pvo_vlink) {
2364 PMAP_PVO_CHECK(pvo); /* sanity check */
2365 /*
2366 * See if this pvo have a valid PTE. If so, fetch the
2367 * REF/CHG bits from the valid PTE. If the appropriate
2368 * ptebit is set, cache, it and return success.
2369 */
2370 pt = pmap_pvo_to_pte(pvo, -1);
2371 if (pt != NULL) {
2372 pmap_pte_synch(pt, &pvo->pvo_pte);
2373 if (pvo->pvo_pte.pte_lo & ptebit) {
2374 pmap_attr_save(pg, ptebit);
2375 PMAP_PVO_CHECK(pvo); /* sanity check */
2376 pmap_interrupts_restore(msr);
2377 return TRUE;
2378 }
2379 }
2380 }
2381 pmap_interrupts_restore(msr);
2382 return FALSE;
2383 }
2384
2385 boolean_t
2386 pmap_clear_bit(struct vm_page *pg, int ptebit)
2387 {
2388 struct pvo_head *pvoh = vm_page_to_pvoh(pg);
2389 struct pvo_entry *pvo;
2390 volatile struct pte *pt;
2391 register_t msr;
2392 int rv = 0;
2393
2394 msr = pmap_interrupts_off();
2395
2396 /*
2397 * Fetch the cache value
2398 */
2399 rv |= pmap_attr_fetch(pg);
2400
2401 /*
2402 * Clear the cached value.
2403 */
2404 pmap_attr_clear(pg, ptebit);
2405
2406 /*
2407 * Sync so any pending REF/CHG bits are flushed to the PTEs (so we
2408 * can reset the right ones). Note that since the pvo entries and
2409 * list heads are accessed via BAT0 and are never placed in the
2410 * page table, we don't have to worry about further accesses setting
2411 * the REF/CHG bits.
2412 */
2413 SYNC();
2414
2415 /*
2416 * For each pvo entry, clear pvo's ptebit. If this pvo have a
2417 * valid PTE. If so, clear the ptebit from the valid PTE.
2418 */
2419 LIST_FOREACH(pvo, pvoh, pvo_vlink) {
2420 PMAP_PVO_CHECK(pvo); /* sanity check */
2421 pt = pmap_pvo_to_pte(pvo, -1);
2422 if (pt != NULL) {
2423 /*
2424 * Only sync the PTE if the bit we are looking
2425 * for is not already set.
2426 */
2427 if ((pvo->pvo_pte.pte_lo & ptebit) == 0)
2428 pmap_pte_synch(pt, &pvo->pvo_pte);
2429 /*
2430 * If the bit we are looking for was already set,
2431 * clear that bit in the pte.
2432 */
2433 if (pvo->pvo_pte.pte_lo & ptebit)
2434 pmap_pte_clear(pt, PVO_VADDR(pvo), ptebit);
2435 }
2436 rv |= pvo->pvo_pte.pte_lo & (PTE_CHG|PTE_REF);
2437 pvo->pvo_pte.pte_lo &= ~ptebit;
2438 PMAP_PVO_CHECK(pvo); /* sanity check */
2439 }
2440 pmap_interrupts_restore(msr);
2441
2442 /*
2443 * If we are clearing the modify bit and this page was marked EXEC
2444 * and the user of the page thinks the page was modified, then we
2445 * need to clean it from the icache if it's mapped or clear the EXEC
2446 * bit if it's not mapped. The page itself might not have the CHG
2447 * bit set if the modification was done via DMA to the page.
2448 */
2449 if ((ptebit & PTE_CHG) && (rv & PTE_EXEC)) {
2450 if (LIST_EMPTY(pvoh)) {
2451 DPRINTFN(EXEC, ("[pmap_clear_bit: %#lx: clear-exec]\n",
2452 VM_PAGE_TO_PHYS(pg)));
2453 pmap_attr_clear(pg, PTE_EXEC);
2454 PMAPCOUNT(exec_uncached_clear_modify);
2455 } else {
2456 DPRINTFN(EXEC, ("[pmap_clear_bit: %#lx: syncicache]\n",
2457 VM_PAGE_TO_PHYS(pg)));
2458 pmap_syncicache(VM_PAGE_TO_PHYS(pg), PAGE_SIZE);
2459 PMAPCOUNT(exec_synced_clear_modify);
2460 }
2461 }
2462 return (rv & ptebit) != 0;
2463 }
2464
2465 void
2466 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
2467 {
2468 struct pvo_entry *pvo;
2469 size_t offset = va & ADDR_POFF;
2470 int s;
2471
2472 s = splvm();
2473 while (len > 0) {
2474 size_t seglen = PAGE_SIZE - offset;
2475 if (seglen > len)
2476 seglen = len;
2477 pvo = pmap_pvo_find_va(p->p_vmspace->vm_map.pmap, va, NULL);
2478 if (pvo != NULL && PVO_ISEXECUTABLE(pvo)) {
2479 pmap_syncicache(
2480 (pvo->pvo_pte.pte_lo & PTE_RPGN) | offset, seglen);
2481 PMAP_PVO_CHECK(pvo);
2482 }
2483 va += seglen;
2484 len -= seglen;
2485 offset = 0;
2486 }
2487 splx(s);
2488 }
2489
2490 #if defined(DEBUG) || defined(PMAPCHECK) || defined(DDB)
2491 void
2492 pmap_pte_print(volatile struct pte *pt)
2493 {
2494 printf("PTE %p: ", pt);
2495 /* High word: */
2496 printf("0x%08lx: [", pt->pte_hi);
2497 printf("%c ", (pt->pte_hi & PTE_VALID) ? 'v' : 'i');
2498 printf("%c ", (pt->pte_hi & PTE_HID) ? 'h' : '-');
2499 printf("0x%06lx 0x%02lx",
2500 (pt->pte_hi &~ PTE_VALID)>>PTE_VSID_SHFT,
2501 pt->pte_hi & PTE_API);
2502 printf(" (va 0x%08lx)] ", pmap_pte_to_va(pt));
2503 /* Low word: */
2504 printf(" 0x%08lx: [", pt->pte_lo);
2505 printf("0x%05lx... ", pt->pte_lo >> 12);
2506 printf("%c ", (pt->pte_lo & PTE_REF) ? 'r' : 'u');
2507 printf("%c ", (pt->pte_lo & PTE_CHG) ? 'c' : 'n');
2508 printf("%c", (pt->pte_lo & PTE_W) ? 'w' : '.');
2509 printf("%c", (pt->pte_lo & PTE_I) ? 'i' : '.');
2510 printf("%c", (pt->pte_lo & PTE_M) ? 'm' : '.');
2511 printf("%c ", (pt->pte_lo & PTE_G) ? 'g' : '.');
2512 switch (pt->pte_lo & PTE_PP) {
2513 case PTE_BR: printf("br]\n"); break;
2514 case PTE_BW: printf("bw]\n"); break;
2515 case PTE_SO: printf("so]\n"); break;
2516 case PTE_SW: printf("sw]\n"); break;
2517 }
2518 }
2519 #endif
2520
2521 #if defined(DDB)
2522 void
2523 pmap_pteg_check(void)
2524 {
2525 volatile struct pte *pt;
2526 int i;
2527 int ptegidx;
2528 u_int p_valid = 0;
2529 u_int s_valid = 0;
2530 u_int invalid = 0;
2531
2532 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2533 for (pt = pmap_pteg_table[ptegidx].pt, i = 8; --i >= 0; pt++) {
2534 if (pt->pte_hi & PTE_VALID) {
2535 if (pt->pte_hi & PTE_HID)
2536 s_valid++;
2537 else
2538 p_valid++;
2539 } else
2540 invalid++;
2541 }
2542 }
2543 printf("pteg_check: v(p) %#x (%d), v(s) %#x (%d), i %#x (%d)\n",
2544 p_valid, p_valid, s_valid, s_valid,
2545 invalid, invalid);
2546 }
2547
2548 void
2549 pmap_print_mmuregs(void)
2550 {
2551 int i;
2552 u_int cpuvers;
2553 #ifndef PPC_OEA64
2554 vaddr_t addr;
2555 register_t soft_sr[16];
2556 #endif
2557 struct bat soft_ibat[4];
2558 struct bat soft_dbat[4];
2559 register_t sdr1;
2560
2561 cpuvers = MFPVR() >> 16;
2562 __asm volatile ("mfsdr1 %0" : "=r"(sdr1));
2563 #ifndef PPC_OEA64
2564 addr = 0;
2565 for (i = 0; i < 16; i++) {
2566 soft_sr[i] = MFSRIN(addr);
2567 addr += (1 << ADDR_SR_SHFT);
2568 }
2569 #endif
2570
2571 /* read iBAT (601: uBAT) registers */
2572 __asm volatile ("mfibatu %0,0" : "=r"(soft_ibat[0].batu));
2573 __asm volatile ("mfibatl %0,0" : "=r"(soft_ibat[0].batl));
2574 __asm volatile ("mfibatu %0,1" : "=r"(soft_ibat[1].batu));
2575 __asm volatile ("mfibatl %0,1" : "=r"(soft_ibat[1].batl));
2576 __asm volatile ("mfibatu %0,2" : "=r"(soft_ibat[2].batu));
2577 __asm volatile ("mfibatl %0,2" : "=r"(soft_ibat[2].batl));
2578 __asm volatile ("mfibatu %0,3" : "=r"(soft_ibat[3].batu));
2579 __asm volatile ("mfibatl %0,3" : "=r"(soft_ibat[3].batl));
2580
2581
2582 if (cpuvers != MPC601) {
2583 /* read dBAT registers */
2584 __asm volatile ("mfdbatu %0,0" : "=r"(soft_dbat[0].batu));
2585 __asm volatile ("mfdbatl %0,0" : "=r"(soft_dbat[0].batl));
2586 __asm volatile ("mfdbatu %0,1" : "=r"(soft_dbat[1].batu));
2587 __asm volatile ("mfdbatl %0,1" : "=r"(soft_dbat[1].batl));
2588 __asm volatile ("mfdbatu %0,2" : "=r"(soft_dbat[2].batu));
2589 __asm volatile ("mfdbatl %0,2" : "=r"(soft_dbat[2].batl));
2590 __asm volatile ("mfdbatu %0,3" : "=r"(soft_dbat[3].batu));
2591 __asm volatile ("mfdbatl %0,3" : "=r"(soft_dbat[3].batl));
2592 }
2593
2594 printf("SDR1:\t0x%lx\n", (long) sdr1);
2595 #ifndef PPC_OEA64
2596 printf("SR[]:\t");
2597 for (i = 0; i < 4; i++)
2598 printf("0x%08lx, ", soft_sr[i]);
2599 printf("\n\t");
2600 for ( ; i < 8; i++)
2601 printf("0x%08lx, ", soft_sr[i]);
2602 printf("\n\t");
2603 for ( ; i < 12; i++)
2604 printf("0x%08lx, ", soft_sr[i]);
2605 printf("\n\t");
2606 for ( ; i < 16; i++)
2607 printf("0x%08lx, ", soft_sr[i]);
2608 printf("\n");
2609 #endif
2610
2611 printf("%cBAT[]:\t", cpuvers == MPC601 ? 'u' : 'i');
2612 for (i = 0; i < 4; i++) {
2613 printf("0x%08lx 0x%08lx, ",
2614 soft_ibat[i].batu, soft_ibat[i].batl);
2615 if (i == 1)
2616 printf("\n\t");
2617 }
2618 if (cpuvers != MPC601) {
2619 printf("\ndBAT[]:\t");
2620 for (i = 0; i < 4; i++) {
2621 printf("0x%08lx 0x%08lx, ",
2622 soft_dbat[i].batu, soft_dbat[i].batl);
2623 if (i == 1)
2624 printf("\n\t");
2625 }
2626 }
2627 printf("\n");
2628 }
2629
2630 void
2631 pmap_print_pte(pmap_t pm, vaddr_t va)
2632 {
2633 struct pvo_entry *pvo;
2634 volatile struct pte *pt;
2635 int pteidx;
2636
2637 pvo = pmap_pvo_find_va(pm, va, &pteidx);
2638 if (pvo != NULL) {
2639 pt = pmap_pvo_to_pte(pvo, pteidx);
2640 if (pt != NULL) {
2641 printf("VA %#lx -> %p -> %s %#lx, %#lx\n",
2642 va, pt,
2643 pt->pte_hi & PTE_HID ? "(sec)" : "(pri)",
2644 pt->pte_hi, pt->pte_lo);
2645 } else {
2646 printf("No valid PTE found\n");
2647 }
2648 } else {
2649 printf("Address not in pmap\n");
2650 }
2651 }
2652
2653 void
2654 pmap_pteg_dist(void)
2655 {
2656 struct pvo_entry *pvo;
2657 int ptegidx;
2658 int depth;
2659 int max_depth = 0;
2660 unsigned int depths[64];
2661
2662 memset(depths, 0, sizeof(depths));
2663 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2664 depth = 0;
2665 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
2666 depth++;
2667 }
2668 if (depth > max_depth)
2669 max_depth = depth;
2670 if (depth > 63)
2671 depth = 63;
2672 depths[depth]++;
2673 }
2674
2675 for (depth = 0; depth < 64; depth++) {
2676 printf(" [%2d]: %8u", depth, depths[depth]);
2677 if ((depth & 3) == 3)
2678 printf("\n");
2679 if (depth == max_depth)
2680 break;
2681 }
2682 if ((depth & 3) != 3)
2683 printf("\n");
2684 printf("Max depth found was %d\n", max_depth);
2685 }
2686 #endif /* DEBUG */
2687
2688 #if defined(PMAPCHECK) || defined(DEBUG)
2689 void
2690 pmap_pvo_verify(void)
2691 {
2692 int ptegidx;
2693 int s;
2694
2695 s = splvm();
2696 for (ptegidx = 0; ptegidx < pmap_pteg_cnt; ptegidx++) {
2697 struct pvo_entry *pvo;
2698 TAILQ_FOREACH(pvo, &pmap_pvo_table[ptegidx], pvo_olink) {
2699 if ((uintptr_t) pvo >= SEGMENT_LENGTH)
2700 panic("pmap_pvo_verify: invalid pvo %p "
2701 "on list %#x", pvo, ptegidx);
2702 pmap_pvo_check(pvo);
2703 }
2704 }
2705 splx(s);
2706 }
2707 #endif /* PMAPCHECK */
2708
2709
2710 void *
2711 pmap_pool_ualloc(struct pool *pp, int flags)
2712 {
2713 struct pvo_page *pvop;
2714
2715 pvop = SIMPLEQ_FIRST(&pmap_upvop_head);
2716 if (pvop != NULL) {
2717 pmap_upvop_free--;
2718 SIMPLEQ_REMOVE_HEAD(&pmap_upvop_head, pvop_link);
2719 return pvop;
2720 }
2721 if (uvm.page_init_done != TRUE) {
2722 return (void *) uvm_pageboot_alloc(PAGE_SIZE);
2723 }
2724 return pmap_pool_malloc(pp, flags);
2725 }
2726
2727 void *
2728 pmap_pool_malloc(struct pool *pp, int flags)
2729 {
2730 struct pvo_page *pvop;
2731 struct vm_page *pg;
2732
2733 pvop = SIMPLEQ_FIRST(&pmap_mpvop_head);
2734 if (pvop != NULL) {
2735 pmap_mpvop_free--;
2736 SIMPLEQ_REMOVE_HEAD(&pmap_mpvop_head, pvop_link);
2737 return pvop;
2738 }
2739 again:
2740 pg = uvm_pagealloc_strat(NULL, 0, NULL, UVM_PGA_USERESERVE,
2741 UVM_PGA_STRAT_ONLY, VM_FREELIST_FIRST256);
2742 if (__predict_false(pg == NULL)) {
2743 if (flags & PR_WAITOK) {
2744 uvm_wait("plpg");
2745 goto again;
2746 } else {
2747 return (0);
2748 }
2749 }
2750 return (void *) VM_PAGE_TO_PHYS(pg);
2751 }
2752
2753 void
2754 pmap_pool_ufree(struct pool *pp, void *va)
2755 {
2756 struct pvo_page *pvop;
2757 #if 0
2758 if (PHYS_TO_VM_PAGE((paddr_t) va) != NULL) {
2759 pmap_pool_mfree(va, size, tag);
2760 return;
2761 }
2762 #endif
2763 pvop = va;
2764 SIMPLEQ_INSERT_HEAD(&pmap_upvop_head, pvop, pvop_link);
2765 pmap_upvop_free++;
2766 if (pmap_upvop_free > pmap_upvop_maxfree)
2767 pmap_upvop_maxfree = pmap_upvop_free;
2768 }
2769
2770 void
2771 pmap_pool_mfree(struct pool *pp, void *va)
2772 {
2773 struct pvo_page *pvop;
2774
2775 pvop = va;
2776 SIMPLEQ_INSERT_HEAD(&pmap_mpvop_head, pvop, pvop_link);
2777 pmap_mpvop_free++;
2778 if (pmap_mpvop_free > pmap_mpvop_maxfree)
2779 pmap_mpvop_maxfree = pmap_mpvop_free;
2780 #if 0
2781 uvm_pagefree(PHYS_TO_VM_PAGE((paddr_t) va));
2782 #endif
2783 }
2784
2785 /*
2786 * This routine in bootstraping to steal to-be-managed memory (which will
2787 * then be unmanaged). We use it to grab from the first 256MB for our
2788 * pmap needs and above 256MB for other stuff.
2789 */
2790 vaddr_t
2791 pmap_steal_memory(vsize_t vsize, vaddr_t *vstartp, vaddr_t *vendp)
2792 {
2793 vsize_t size;
2794 vaddr_t va;
2795 paddr_t pa = 0;
2796 int npgs, bank;
2797 struct vm_physseg *ps;
2798
2799 if (uvm.page_init_done == TRUE)
2800 panic("pmap_steal_memory: called _after_ bootstrap");
2801
2802 *vstartp = VM_MIN_KERNEL_ADDRESS;
2803 *vendp = VM_MAX_KERNEL_ADDRESS;
2804
2805 size = round_page(vsize);
2806 npgs = atop(size);
2807
2808 /*
2809 * PA 0 will never be among those given to UVM so we can use it
2810 * to indicate we couldn't steal any memory.
2811 */
2812 for (ps = vm_physmem, bank = 0; bank < vm_nphysseg; bank++, ps++) {
2813 if (ps->free_list == VM_FREELIST_FIRST256 &&
2814 ps->avail_end - ps->avail_start >= npgs) {
2815 pa = ptoa(ps->avail_start);
2816 break;
2817 }
2818 }
2819
2820 if (pa == 0)
2821 panic("pmap_steal_memory: no approriate memory to steal!");
2822
2823 ps->avail_start += npgs;
2824 ps->start += npgs;
2825
2826 /*
2827 * If we've used up all the pages in the segment, remove it and
2828 * compact the list.
2829 */
2830 if (ps->avail_start == ps->end) {
2831 /*
2832 * If this was the last one, then a very bad thing has occurred
2833 */
2834 if (--vm_nphysseg == 0)
2835 panic("pmap_steal_memory: out of memory!");
2836
2837 printf("pmap_steal_memory: consumed bank %d\n", bank);
2838 for (; bank < vm_nphysseg; bank++, ps++) {
2839 ps[0] = ps[1];
2840 }
2841 }
2842
2843 va = (vaddr_t) pa;
2844 memset((caddr_t) va, 0, size);
2845 pmap_pages_stolen += npgs;
2846 #ifdef DEBUG
2847 if (pmapdebug && npgs > 1) {
2848 u_int cnt = 0;
2849 for (bank = 0, ps = vm_physmem; bank < vm_nphysseg; bank++, ps++)
2850 cnt += ps->avail_end - ps->avail_start;
2851 printf("pmap_steal_memory: stole %u (total %u) pages (%u left)\n",
2852 npgs, pmap_pages_stolen, cnt);
2853 }
2854 #endif
2855
2856 return va;
2857 }
2858
2859 /*
2860 * Find a chuck of memory with right size and alignment.
2861 */
2862 void *
2863 pmap_boot_find_memory(psize_t size, psize_t alignment, int at_end)
2864 {
2865 struct mem_region *mp;
2866 paddr_t s, e;
2867 int i, j;
2868
2869 size = round_page(size);
2870
2871 DPRINTFN(BOOT,
2872 ("pmap_boot_find_memory: size=%lx, alignment=%lx, at_end=%d",
2873 size, alignment, at_end));
2874
2875 if (alignment < PAGE_SIZE || (alignment & (alignment-1)) != 0)
2876 panic("pmap_boot_find_memory: invalid alignment %lx",
2877 alignment);
2878
2879 if (at_end) {
2880 if (alignment != PAGE_SIZE)
2881 panic("pmap_boot_find_memory: invalid ending "
2882 "alignment %lx", alignment);
2883
2884 for (mp = &avail[avail_cnt-1]; mp >= avail; mp--) {
2885 s = mp->start + mp->size - size;
2886 if (s >= mp->start && mp->size >= size) {
2887 DPRINTFN(BOOT,(": %lx\n", s));
2888 DPRINTFN(BOOT,
2889 ("pmap_boot_find_memory: b-avail[%d] start "
2890 "0x%lx size 0x%lx\n", mp - avail,
2891 mp->start, mp->size));
2892 mp->size -= size;
2893 DPRINTFN(BOOT,
2894 ("pmap_boot_find_memory: a-avail[%d] start "
2895 "0x%lx size 0x%lx\n", mp - avail,
2896 mp->start, mp->size));
2897 return (void *) s;
2898 }
2899 }
2900 panic("pmap_boot_find_memory: no available memory");
2901 }
2902
2903 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
2904 s = (mp->start + alignment - 1) & ~(alignment-1);
2905 e = s + size;
2906
2907 /*
2908 * Is the calculated region entirely within the region?
2909 */
2910 if (s < mp->start || e > mp->start + mp->size)
2911 continue;
2912
2913 DPRINTFN(BOOT,(": %lx\n", s));
2914 if (s == mp->start) {
2915 /*
2916 * If the block starts at the beginning of region,
2917 * adjust the size & start. (the region may now be
2918 * zero in length)
2919 */
2920 DPRINTFN(BOOT,
2921 ("pmap_boot_find_memory: b-avail[%d] start "
2922 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2923 mp->start += size;
2924 mp->size -= size;
2925 DPRINTFN(BOOT,
2926 ("pmap_boot_find_memory: a-avail[%d] start "
2927 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2928 } else if (e == mp->start + mp->size) {
2929 /*
2930 * If the block starts at the beginning of region,
2931 * adjust only the size.
2932 */
2933 DPRINTFN(BOOT,
2934 ("pmap_boot_find_memory: b-avail[%d] start "
2935 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2936 mp->size -= size;
2937 DPRINTFN(BOOT,
2938 ("pmap_boot_find_memory: a-avail[%d] start "
2939 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2940 } else {
2941 /*
2942 * Block is in the middle of the region, so we
2943 * have to split it in two.
2944 */
2945 for (j = avail_cnt; j > i + 1; j--) {
2946 avail[j] = avail[j-1];
2947 }
2948 DPRINTFN(BOOT,
2949 ("pmap_boot_find_memory: b-avail[%d] start "
2950 "0x%lx size 0x%lx\n", i, mp->start, mp->size));
2951 mp[1].start = e;
2952 mp[1].size = mp[0].start + mp[0].size - e;
2953 mp[0].size = s - mp[0].start;
2954 avail_cnt++;
2955 for (; i < avail_cnt; i++) {
2956 DPRINTFN(BOOT,
2957 ("pmap_boot_find_memory: a-avail[%d] "
2958 "start 0x%lx size 0x%lx\n", i,
2959 avail[i].start, avail[i].size));
2960 }
2961 }
2962 return (void *) s;
2963 }
2964 panic("pmap_boot_find_memory: not enough memory for "
2965 "%lx/%lx allocation?", size, alignment);
2966 }
2967
2968 /*
2969 * This is not part of the defined PMAP interface and is specific to the
2970 * PowerPC architecture. This is called during initppc, before the system
2971 * is really initialized.
2972 */
2973 void
2974 pmap_bootstrap(paddr_t kernelstart, paddr_t kernelend)
2975 {
2976 struct mem_region *mp, tmp;
2977 paddr_t s, e;
2978 psize_t size;
2979 int i, j;
2980
2981 /*
2982 * Get memory.
2983 */
2984 mem_regions(&mem, &avail);
2985 #if defined(DEBUG)
2986 if (pmapdebug & PMAPDEBUG_BOOT) {
2987 printf("pmap_bootstrap: memory configuration:\n");
2988 for (mp = mem; mp->size; mp++) {
2989 printf("pmap_bootstrap: mem start 0x%lx size 0x%lx\n",
2990 mp->start, mp->size);
2991 }
2992 for (mp = avail; mp->size; mp++) {
2993 printf("pmap_bootstrap: avail start 0x%lx size 0x%lx\n",
2994 mp->start, mp->size);
2995 }
2996 }
2997 #endif
2998
2999 /*
3000 * Find out how much physical memory we have and in how many chunks.
3001 */
3002 for (mem_cnt = 0, mp = mem; mp->size; mp++) {
3003 if (mp->start >= pmap_memlimit)
3004 continue;
3005 if (mp->start + mp->size > pmap_memlimit) {
3006 size = pmap_memlimit - mp->start;
3007 physmem += btoc(size);
3008 } else {
3009 physmem += btoc(mp->size);
3010 }
3011 mem_cnt++;
3012 }
3013
3014 /*
3015 * Count the number of available entries.
3016 */
3017 for (avail_cnt = 0, mp = avail; mp->size; mp++)
3018 avail_cnt++;
3019
3020 /*
3021 * Page align all regions.
3022 */
3023 kernelstart = trunc_page(kernelstart);
3024 kernelend = round_page(kernelend);
3025 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
3026 s = round_page(mp->start);
3027 mp->size -= (s - mp->start);
3028 mp->size = trunc_page(mp->size);
3029 mp->start = s;
3030 e = mp->start + mp->size;
3031
3032 DPRINTFN(BOOT,
3033 ("pmap_bootstrap: b-avail[%d] start 0x%lx size 0x%lx\n",
3034 i, mp->start, mp->size));
3035
3036 /*
3037 * Don't allow the end to run beyond our artificial limit
3038 */
3039 if (e > pmap_memlimit)
3040 e = pmap_memlimit;
3041
3042 /*
3043 * Is this region empty or strange? skip it.
3044 */
3045 if (e <= s) {
3046 mp->start = 0;
3047 mp->size = 0;
3048 continue;
3049 }
3050
3051 /*
3052 * Does this overlap the beginning of kernel?
3053 * Does extend past the end of the kernel?
3054 */
3055 else if (s < kernelstart && e > kernelstart) {
3056 if (e > kernelend) {
3057 avail[avail_cnt].start = kernelend;
3058 avail[avail_cnt].size = e - kernelend;
3059 avail_cnt++;
3060 }
3061 mp->size = kernelstart - s;
3062 }
3063 /*
3064 * Check whether this region overlaps the end of the kernel.
3065 */
3066 else if (s < kernelend && e > kernelend) {
3067 mp->start = kernelend;
3068 mp->size = e - kernelend;
3069 }
3070 /*
3071 * Look whether this regions is completely inside the kernel.
3072 * Nuke it if it does.
3073 */
3074 else if (s >= kernelstart && e <= kernelend) {
3075 mp->start = 0;
3076 mp->size = 0;
3077 }
3078 /*
3079 * If the user imposed a memory limit, enforce it.
3080 */
3081 else if (s >= pmap_memlimit) {
3082 mp->start = -PAGE_SIZE; /* let's know why */
3083 mp->size = 0;
3084 }
3085 else {
3086 mp->start = s;
3087 mp->size = e - s;
3088 }
3089 DPRINTFN(BOOT,
3090 ("pmap_bootstrap: a-avail[%d] start 0x%lx size 0x%lx\n",
3091 i, mp->start, mp->size));
3092 }
3093
3094 /*
3095 * Move (and uncount) all the null return to the end.
3096 */
3097 for (mp = avail, i = 0; i < avail_cnt; i++, mp++) {
3098 if (mp->size == 0) {
3099 tmp = avail[i];
3100 avail[i] = avail[--avail_cnt];
3101 avail[avail_cnt] = avail[i];
3102 }
3103 }
3104
3105 /*
3106 * (Bubble)sort them into asecnding order.
3107 */
3108 for (i = 0; i < avail_cnt; i++) {
3109 for (j = i + 1; j < avail_cnt; j++) {
3110 if (avail[i].start > avail[j].start) {
3111 tmp = avail[i];
3112 avail[i] = avail[j];
3113 avail[j] = tmp;
3114 }
3115 }
3116 }
3117
3118 /*
3119 * Make sure they don't overlap.
3120 */
3121 for (mp = avail, i = 0; i < avail_cnt - 1; i++, mp++) {
3122 if (mp[0].start + mp[0].size > mp[1].start) {
3123 mp[0].size = mp[1].start - mp[0].start;
3124 }
3125 DPRINTFN(BOOT,
3126 ("pmap_bootstrap: avail[%d] start 0x%lx size 0x%lx\n",
3127 i, mp->start, mp->size));
3128 }
3129 DPRINTFN(BOOT,
3130 ("pmap_bootstrap: avail[%d] start 0x%lx size 0x%lx\n",
3131 i, mp->start, mp->size));
3132
3133 #ifdef PTEGCOUNT
3134 pmap_pteg_cnt = PTEGCOUNT;
3135 #else /* PTEGCOUNT */
3136 pmap_pteg_cnt = 0x1000;
3137
3138 while (pmap_pteg_cnt < physmem)
3139 pmap_pteg_cnt <<= 1;
3140
3141 pmap_pteg_cnt >>= 1;
3142 #endif /* PTEGCOUNT */
3143
3144 /*
3145 * Find suitably aligned memory for PTEG hash table.
3146 */
3147 size = pmap_pteg_cnt * sizeof(struct pteg);
3148 pmap_pteg_table = pmap_boot_find_memory(size, size, 0);
3149 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3150 if ( (uintptr_t) pmap_pteg_table + size > SEGMENT_LENGTH)
3151 panic("pmap_bootstrap: pmap_pteg_table end (%p + %lx) > 256MB",
3152 pmap_pteg_table, size);
3153 #endif
3154
3155 memset(__UNVOLATILE(pmap_pteg_table), 0,
3156 pmap_pteg_cnt * sizeof(struct pteg));
3157 pmap_pteg_mask = pmap_pteg_cnt - 1;
3158
3159 /*
3160 * We cannot do pmap_steal_memory here since UVM hasn't been loaded
3161 * with pages. So we just steal them before giving them to UVM.
3162 */
3163 size = sizeof(pmap_pvo_table[0]) * pmap_pteg_cnt;
3164 pmap_pvo_table = pmap_boot_find_memory(size, PAGE_SIZE, 0);
3165 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3166 if ( (uintptr_t) pmap_pvo_table + size > SEGMENT_LENGTH)
3167 panic("pmap_bootstrap: pmap_pvo_table end (%p + %lx) > 256MB",
3168 pmap_pvo_table, size);
3169 #endif
3170
3171 for (i = 0; i < pmap_pteg_cnt; i++)
3172 TAILQ_INIT(&pmap_pvo_table[i]);
3173
3174 #ifndef MSGBUFADDR
3175 /*
3176 * Allocate msgbuf in high memory.
3177 */
3178 msgbuf_paddr =
3179 (paddr_t) pmap_boot_find_memory(MSGBUFSIZE, PAGE_SIZE, 1);
3180 #endif
3181
3182 #ifdef __HAVE_PMAP_PHYSSEG
3183 {
3184 u_int npgs = 0;
3185 for (i = 0, mp = avail; i < avail_cnt; i++, mp++)
3186 npgs += btoc(mp->size);
3187 size = (sizeof(struct pvo_head) + 1) * npgs;
3188 pmap_physseg.pvoh = pmap_boot_find_memory(size, PAGE_SIZE, 0);
3189 pmap_physseg.attrs = (char *) &pmap_physseg.pvoh[npgs];
3190 #if defined(DIAGNOSTIC) || defined(DEBUG) || defined(PMAPCHECK)
3191 if ((uintptr_t)pmap_physseg.pvoh + size > SEGMENT_LENGTH)
3192 panic("pmap_bootstrap: PVO list end (%p + %lx) > 256MB",
3193 pmap_physseg.pvoh, size);
3194 #endif
3195 }
3196 #endif
3197
3198 for (mp = avail, i = 0; i < avail_cnt; mp++, i++) {
3199 paddr_t pfstart = atop(mp->start);
3200 paddr_t pfend = atop(mp->start + mp->size);
3201 if (mp->size == 0)
3202 continue;
3203 if (mp->start + mp->size <= SEGMENT_LENGTH) {
3204 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3205 VM_FREELIST_FIRST256);
3206 } else if (mp->start >= SEGMENT_LENGTH) {
3207 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3208 VM_FREELIST_DEFAULT);
3209 } else {
3210 pfend = atop(SEGMENT_LENGTH);
3211 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3212 VM_FREELIST_FIRST256);
3213 pfstart = atop(SEGMENT_LENGTH);
3214 pfend = atop(mp->start + mp->size);
3215 uvm_page_physload(pfstart, pfend, pfstart, pfend,
3216 VM_FREELIST_DEFAULT);
3217 }
3218 }
3219
3220 /*
3221 * Make sure kernel vsid is allocated as well as VSID 0.
3222 */
3223 pmap_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS-1)) / VSID_NBPW]
3224 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
3225 pmap_vsid_bitmap[0] |= 1;
3226
3227 /*
3228 * Initialize kernel pmap and hardware.
3229 */
3230 #ifndef PPC_OEA64
3231 for (i = 0; i < 16; i++) {
3232 pmap_kernel()->pm_sr[i] = EMPTY_SEGMENT;
3233 __asm volatile ("mtsrin %0,%1"
3234 :: "r"(EMPTY_SEGMENT), "r"(i << ADDR_SR_SHFT));
3235 }
3236
3237 pmap_kernel()->pm_sr[KERNEL_SR] = KERNEL_SEGMENT|SR_SUKEY|SR_PRKEY;
3238 __asm volatile ("mtsr %0,%1"
3239 :: "n"(KERNEL_SR), "r"(KERNEL_SEGMENT));
3240 #ifdef KERNEL2_SR
3241 pmap_kernel()->pm_sr[KERNEL2_SR] = KERNEL2_SEGMENT|SR_SUKEY|SR_PRKEY;
3242 __asm volatile ("mtsr %0,%1"
3243 :: "n"(KERNEL2_SR), "r"(KERNEL2_SEGMENT));
3244 #endif
3245 for (i = 0; i < 16; i++) {
3246 if (iosrtable[i] & SR601_T) {
3247 pmap_kernel()->pm_sr[i] = iosrtable[i];
3248 __asm volatile ("mtsrin %0,%1"
3249 :: "r"(iosrtable[i]), "r"(i << ADDR_SR_SHFT));
3250 }
3251 }
3252 #endif /* !PPC_OEA64 */
3253
3254 __asm volatile ("sync; mtsdr1 %0; isync"
3255 :: "r"((uintptr_t)pmap_pteg_table | (pmap_pteg_mask >> 10)));
3256 tlbia();
3257
3258 #ifdef ALTIVEC
3259 pmap_use_altivec = cpu_altivec;
3260 #endif
3261
3262 #ifdef DEBUG
3263 if (pmapdebug & PMAPDEBUG_BOOT) {
3264 u_int cnt;
3265 int bank;
3266 char pbuf[9];
3267 for (cnt = 0, bank = 0; bank < vm_nphysseg; bank++) {
3268 cnt += vm_physmem[bank].avail_end - vm_physmem[bank].avail_start;
3269 printf("pmap_bootstrap: vm_physmem[%d]=%#lx-%#lx/%#lx\n",
3270 bank,
3271 ptoa(vm_physmem[bank].avail_start),
3272 ptoa(vm_physmem[bank].avail_end),
3273 ptoa(vm_physmem[bank].avail_end - vm_physmem[bank].avail_start));
3274 }
3275 format_bytes(pbuf, sizeof(pbuf), ptoa((u_int64_t) cnt));
3276 printf("pmap_bootstrap: UVM memory = %s (%u pages)\n",
3277 pbuf, cnt);
3278 }
3279 #endif
3280
3281 pool_init(&pmap_upvo_pool, sizeof(struct pvo_entry),
3282 sizeof(struct pvo_entry), 0, 0, "pmap_upvopl",
3283 &pmap_pool_uallocator);
3284
3285 pool_setlowat(&pmap_upvo_pool, 252);
3286
3287 pool_init(&pmap_pool, sizeof(struct pmap),
3288 sizeof(void *), 0, 0, "pmap_pl", &pmap_pool_uallocator);
3289 }
3290