1 /* $NetBSD: pmap_68k.c,v 1.49 2025/12/17 07:05:50 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 2025 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Pmap module for the Motorola 68851 / 68030 / 68040 / 68060 MMUs. 34 * (...and HP 68851-like MMU.) 35 * 36 * This implementation supports both 2-level and 3-level page table 37 * layouts. The 3-level is mandated by 68040 / 68060, and the 2-level 38 * is mandated by the HP MMU. The 68851 and 68030 can do either, and 39 * for now, the 2-level arrangement is retained for those MMUs, although 40 * eventually we will switch them to the 3-level configuration. 41 * 42 * To support both configurations, page tables are abstracted away from 43 * the page table pages that contain them. The interface pmap operations 44 * operate on "leaf" (page) tables, and only when one of those tables needs 45 * to be allocated or freed, do the differences between the two configurations 46 * need to be dealt with. All of the tables are kept in a red-black tree 47 * that's indexed by their "segment" number (where "segment" is defined as 48 * "the amount of space mapped by a single leaf table"). This avoids having 49 * to burn large amounts of kernel address space to access tables which are 50 * expected to be sparsely-populated. 51 * 52 * In order to reduce the number of tree lookups, the most recently used 53 * leaf table is cached, and the interface contract is such that bulk 54 * operations are allowed to access subsequent PTEs within a given table 55 * (segment) without having to perform another PTE lookup. 56 * 57 * This illustrates the initial table layout for a simple program 58 * (/usr/bin/yes) using the standard m68k address space layout (based 59 * on the historical 4.3BSD-on-hp300 layout, which was itself based on 60 * HP-UX in order to facilitate HP-UX binary compatibility back when 61 * that was considered to be important). This example uses a 4K page 62 * size. 63 * 64 * TEXTADDR is $0000.2000 (not always strictly true, but close enough) 65 * USRSTACK is $FFF0.0000 (grows down, first used page VA is $FFEF.F000) 66 * 67 * (TEXTADDR is $0000.2000 because the linker uses 8K page size for 68 * broader compatibility and keeps the 0-page unmapped so that NULL 69 * pointer dereferences blow up.) 70 * 71 * This is to say: the text / data / heap of this program are in the 72 * bottom 1MB of the address space, and the stack is in the second-from- 73 * the-top 1MB of the address space. 74 * 75 * In the 2-level layout, the level-1 table is 4KB in size, and has 1024 76 * entries. Those 1024 entries together represent the 4GB user address 77 * space, and each entry thus maps a 4MB "segment" by itself pointing to 78 * a level-2 table which themselves are 4KB in size and have 1024 entries 79 * (4MB / 1024 -> 4KB, which is the page size ... convenient!) So, when 80 * our very simple program is loaded, we have a table structure that looks 81 * like this: 82 * 83 * (4KB) 84 * +----------------------+ 85 * | Level-1 | 86 * |0 1023| 87 * +----------------------+ 88 * | | 89 * | | 90 * +---------+ +---------+ 91 * | | 92 * v v 93 * (4KB) (4KB) 94 * +----------------------+ +----------------------+ 95 * | Level-2 | | Level-2 | 96 * | 2 4 | | 767 | 97 * +----------------------+ +----------------------+ 98 * | | | 99 * | +-+ | 100 * v v v 101 * TEXT DATA/bss/heap stack 102 * 103 * As you can see, this requires 3 tables (1 level-1 and 2 level-2). Each 104 * table consumes a full 4KB page, so mapping this address space requires 105 * 3 total pages. 106 * 107 * In the 3-level layout, the level-1 and level-2 tables each contain 128 108 * entries, making them 512 bytes in size. When using 4KB pages, the level-3 109 * tables contain 64 entries, making them 256 bytes in size. 110 * 111 * So, assuming the same address space layout, the 3-level structure looks 112 * like this: 113 * 114 * (512B) 115 * +--------------+ 116 * | Level-1 | 117 * |0 127| 118 * +--------------+ 119 * | | 120 * +---+ +---+ 121 * v v 122 * (512B) (512B) 123 * +--------------+ +--------------+ 124 * | Level-2 | | Level-2 | 125 * |0 | | 123 | 126 * +--------------+ +--------------+ 127 * | | 128 * +---------+ +-----+ 129 * v v 130 * (256B) (256B) 131 * +------------+ +------------+ 132 * | Level-3 | | Level-3 | 133 * | 2 4 | | 63| 134 * +------------+ +------------+ 135 * | | | 136 * | +-+ | 137 * v v v 138 * TEXT DATA/bss/heap stack 139 * 140 * The table allocator has two pools of memory for tables in the 3-level 141 * configuration: one for "segment" tables (always 512 bytes) and one for 142 * "page" or "leaf" tables (256 bytes in size for 4K pages). Pages are 143 * allocated to the pools one at a time, and then the tables are allocated 144 * from the pages. Because of this, we only need two pages, 33% less (!), 145 * than the 2-level configuration to map the same address space. 146 * 147 * There is a cost, however: each access that misses the Address Translation 148 * Cache costs one extra memory cycle in the 3-level configuration. 149 * 150 * LOCKING IN THIS PMAP MODULE: 151 * 152 * MULTIPROCESSING IS NOT SUPPORTED IN THIS PMAP MODULE. Adding support 153 * for it would not be terribly difficult, but there is little value in 154 * doing that work until such time as a multiprocessor m68k machine exists 155 * that NetBSD runs on. 156 * 157 * As such, there is **no** locking performed of any data structures here. 158 * We do actually reap a benefit from this perceived laziness: we do not 159 * have to worry about lock ordering, which means we can take some shortcuts 160 * in some places (especially around pv_entry manipulation). 161 * 162 * THERE IS A CAVEAT, HOWEVER! Because there are no guard rails, we cannot, 163 * under any circumstances, yield the CPU during the critical section of a 164 * pmap operation, as doing so could cause the world to change beneath our 165 * feet, possibly rendering our work, for lack of a better term, "crashy". 166 * Specifically, this means: 167 * 168 * - Adaptive mutexes must not be acquired (e.g. when calling into 169 * other code, e.g. UVM to get a VA or a page). 170 * - Waiting for memory is not allowed. 171 * - The current thread may not be preempted. 172 * 173 * If any of those things are required, they must be performed outside of 174 * a critical section. If we discover that this is required while inside 175 * a critical section, then we must exit the critical section, perform the 176 * blocking work, re-enter the critical section and re-evaluate everything. 177 * Macros are provided to mark the boundaries of critical sections: 178 * 179 * - PMAP_CRIT_ENTER() 180 * - PMAP_CRIT_EXIT() 181 * 182 * XXX Alas, doesn't seem to be a way for us to hook into ASSERT_SLEEPABLE() 183 * XXX when inside a critical section. We should explore that for a future 184 * XXX enhancement. 185 */ 186 187 /* 188 * Current status: 189 * - Very stable multi-user on virt68k (qemu 68040; does not accurately 190 * model cache or ATC, but suitable for exercising large memory configs). 191 * 192 * - Single-user mode on 68030 w/ no external cache (luna68k). 193 * 194 * - Single-user mode on 68040 (hp425t). 195 * 196 * - Ports that have been adapted: hp300, luna68k, mvme68k (not tested), 197 * news68k (see below), next68k (not tested), virt68k, x68k. 198 * 199 * XXX TODO XXX 200 * 201 * - Adapt amiga (hard), atari (hard), cesfic (easy), mac68k (moderate). 202 * - Test on 68020. 203 * - Test on 68060. 204 * - More rigorous 68040 testing. 205 * - More rigorous 68030 testing. 206 * - Test on machines above listed as "not tested". 207 * - More rigorous testing in various emulators (Nono, UAE?) 208 * - Fix problems observed on news68k (external cache related?) 209 * - Finish HP MMU support and test on real HP MMU. 210 * - Convert '851 / '030 to 3-level. 211 * - Optimize ATC / cache manipulation. 212 * - Add some more instrumentation. 213 * - Eventually disable instrumentation by default. 214 * - ... 215 * - PROFIT! 216 */ 217 218 #include "opt_m68k_arch.h" 219 220 #include <sys/cdefs.h> 221 __KERNEL_RCSID(0, "$NetBSD: pmap_68k.c,v 1.49 2025/12/17 07:05:50 thorpej Exp $"); 222 223 #include <sys/param.h> 224 #include <sys/systm.h> 225 #include <sys/evcnt.h> 226 #include <sys/proc.h> 227 #include <sys/pool.h> 228 #include <sys/cpu.h> 229 #include <sys/atomic.h> 230 #include <sys/kmem.h> 231 232 #include <machine/pcb.h> 233 234 #include <uvm/uvm.h> 235 #include <uvm/uvm_physseg.h> 236 237 #include <m68k/cacheops.h> 238 239 #if !defined(M68K_MMU_MOTOROLA) && !defined(M68K_MMU_HP) 240 #error Hit the road, Jack... 241 #endif 242 243 /****************************** SERIALIZATION ********************************/ 244 245 /* 246 * XXX Would like to make these do something lightweight-ish in 247 * XXX DIAGNOSTIC kernels (and also make ASSERT_SLEEPABLE() trip 248 * XXX if we're in a critical section). 249 */ 250 251 #define PMAP_CRIT_ENTER() __nothing 252 #define PMAP_CRIT_EXIT() __nothing 253 #define PMAP_CRIT_ASSERT() __nothing 254 255 /**************************** MMU CONFIGURATION ******************************/ 256 257 #include "opt_m68k_arch.h" 258 259 #if defined(M68K_MMU_68030) 260 #include <m68k/mmu_30.h> /* for cpu_kcore_hdr_t */ 261 #endif 262 263 /* 264 * We consider 3 different MMU classes: 265 * - 68851 (includes 68030) 266 * - 68040 (includes 68060) 267 * - HP MMU for 68020 (68851-like, 2-level 4K only, external VAC) 268 */ 269 270 #define MMU_CLASS_68851 0 271 #define MMU_CLASS_68040 1 272 #define MMU_CLASS_HP 3 273 274 static int pmap_mmuclass __read_mostly; 275 276 #if defined(M68K_MMU_68851) || defined(M68K_MMU_68030) 277 #define MMU_CONFIG_68851_CLASS 1 278 #else 279 #define MMU_CONFIG_68851_CLASS 0 280 #endif 281 282 #if defined(M68K_MMU_68040) || defined(M68K_MMU_68060) 283 #define MMU_CONFIG_68040_CLASS 1 284 #else 285 #define MMU_CONFIG_68040_CLASS 0 286 #endif 287 288 #if defined(M68K_MMU_HP) 289 #define MMU_CONFIG_HP_CLASS 1 290 #else 291 #define MMU_CONFIG_HP_CLASS 0 292 #endif 293 294 #define MMU_CONFIG_NCLASSES (MMU_CONFIG_68851_CLASS + \ 295 MMU_CONFIG_68040_CLASS + \ 296 MMU_CONFIG_HP_CLASS) 297 298 #if MMU_CONFIG_NCLASSES == 1 299 300 #if MMU_CONFIG_68851_CLASS 301 #define MMU_IS_68851_CLASS 1 302 #elif MMU_CONFIG_68040_CLASS 303 #define MMU_IS_68040_CLASS 1 304 #elif MMU_CONFIG_HP_CLASS 305 #define MMU_IS_HP_CLASS 1 306 #else 307 #error Single MMU config predicate error. 308 #endif 309 310 #else /* MMU_CONFIG_NCLASSES != 1 */ 311 312 #if MMU_CONFIG_68851_CLASS 313 #define MMU_IS_68851_CLASS (pmap_mmuclass == MMU_CLASS_68851) 314 #endif 315 316 #if MMU_CONFIG_68040_CLASS 317 #define MMU_IS_68040_CLASS (pmap_mmuclass == MMU_CLASS_68040) 318 #endif 319 320 #if MMU_CONFIG_HP_CLASS 321 #define MMU_IS_HP_CLASS (pmap_mmuclass == MMU_CLASS_HP) 322 #endif 323 324 #endif /* MMU_CONFIG_NCLASSES == 1 */ 325 326 #ifndef MMU_IS_68851_CLASS 327 #define MMU_IS_68851_CLASS 0 328 #endif 329 330 #ifndef MMU_IS_68040_CLASS 331 #define MMU_IS_68040_CLASS 0 332 #endif 333 334 #ifndef MMU_IS_HP_CLASS 335 #define MMU_IS_HP_CLASS 0 336 #endif 337 338 /* 339 * 68040 must always use 3-level. Eventually, we will switch the '851 340 * type over to 3-level as well, for for now, it gets 2-level. The 341 * HP MMU is stuck there for all eternity. 342 */ 343 #define MMU_USE_3L (MMU_IS_68040_CLASS) 344 #define MMU_USE_2L (!MMU_USE_3L) 345 346 /***************************** INSTRUMENTATION *******************************/ 347 348 #define PMAP_EVENT_COUNTERS 349 350 static struct evcnt pmap_nkptpages_initial_ev = 351 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap nkptpages", "initial"); 352 static struct evcnt pmap_nkptpages_current_ev = 353 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap nkptpages", "current"); 354 EVCNT_ATTACH_STATIC(pmap_nkptpages_initial_ev); 355 EVCNT_ATTACH_STATIC(pmap_nkptpages_current_ev); 356 357 static struct evcnt pmap_nkstpages_initial_ev = 358 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap nkstpages", "initial"); 359 static struct evcnt pmap_nkstpages_current_ev = 360 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap nkstpages", "current"); 361 EVCNT_ATTACH_STATIC(pmap_nkstpages_initial_ev); 362 EVCNT_ATTACH_STATIC(pmap_nkstpages_current_ev); 363 364 static struct evcnt pmap_maxkva_ev = 365 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", "maxkva"); 366 EVCNT_ATTACH_STATIC(pmap_maxkva_ev); 367 368 static struct evcnt pmap_kvalimit_ev = 369 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", "kvalimit"); 370 EVCNT_ATTACH_STATIC(pmap_kvalimit_ev); 371 372 #ifdef PMAP_EVENT_COUNTERS 373 static struct evcnt pmap_pv_alloc_wait_ev = 374 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_alloc", "wait"); 375 EVCNT_ATTACH_STATIC(pmap_pv_alloc_wait_ev); 376 377 static struct evcnt pmap_pv_alloc_nowait_ev = 378 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_alloc", "nowait"); 379 EVCNT_ATTACH_STATIC(pmap_pv_alloc_nowait_ev); 380 381 static struct evcnt pmap_pv_enter_called_ev = 382 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_enter", "called"); 383 EVCNT_ATTACH_STATIC(pmap_pv_enter_called_ev); 384 385 static struct evcnt pmap_pv_enter_usr_ci_ev = 386 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_enter", "usr_ci"); 387 EVCNT_ATTACH_STATIC(pmap_pv_enter_usr_ci_ev); 388 389 #if MMU_CONFIG_HP_CLASS 390 static struct evcnt pmap_pv_enter_vac_ci_ev = 391 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_enter", "vac_ci"); 392 EVCNT_ATTACH_STATIC(pmap_pv_enter_vac_ci_ev); 393 #endif 394 395 static struct evcnt pmap_pv_enter_ci_multi_ev = 396 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_enter", "ci_multi"); 397 EVCNT_ATTACH_STATIC(pmap_pv_enter_ci_multi_ev); 398 399 static struct evcnt pmap_pv_remove_called_ev = 400 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_remove", "called"); 401 EVCNT_ATTACH_STATIC(pmap_pv_remove_called_ev); 402 403 static struct evcnt pmap_pv_remove_ci_ev = 404 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pv_remove", "ci"); 405 EVCNT_ATTACH_STATIC(pmap_pv_remove_ci_ev); 406 407 static struct evcnt pmap_pt_cache_hit_ev = 408 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pt_cache", "hit"); 409 EVCNT_ATTACH_STATIC(pmap_pt_cache_hit_ev); 410 411 static struct evcnt pmap_pt_cache_miss_ev = 412 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap pt_cache", "miss"); 413 EVCNT_ATTACH_STATIC(pmap_pt_cache_miss_ev); 414 415 static struct evcnt pmap_enter_nowait_ev = 416 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "nowait"); 417 EVCNT_ATTACH_STATIC(pmap_enter_nowait_ev); 418 419 static struct evcnt pmap_enter_yeswait_ev = 420 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "yeswait"); 421 EVCNT_ATTACH_STATIC(pmap_enter_yeswait_ev); 422 423 static struct evcnt pmap_enter_pte_alloc_fail_ev = 424 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "pte alloc failed"); 425 EVCNT_ATTACH_STATIC(pmap_enter_pte_alloc_fail_ev); 426 427 static struct evcnt pmap_enter_pv_alloc_fail_ev = 428 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "pv alloc failed"); 429 EVCNT_ATTACH_STATIC(pmap_enter_pv_alloc_fail_ev); 430 431 static struct evcnt pmap_enter_valid_ev = 432 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "valid"); 433 EVCNT_ATTACH_STATIC(pmap_enter_valid_ev); 434 435 static struct evcnt pmap_enter_wire_change_ev = 436 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "wire change"); 437 EVCNT_ATTACH_STATIC(pmap_enter_wire_change_ev); 438 439 static struct evcnt pmap_enter_prot_change_ev = 440 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "prot change"); 441 EVCNT_ATTACH_STATIC(pmap_enter_prot_change_ev); 442 443 static struct evcnt pmap_enter_pa_change_ev = 444 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "pa change"); 445 EVCNT_ATTACH_STATIC(pmap_enter_pa_change_ev); 446 447 static struct evcnt pmap_enter_pv_recycle_ev = 448 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap enter", "pv recycle"); 449 EVCNT_ATTACH_STATIC(pmap_enter_pv_recycle_ev); 450 451 #define pmap_evcnt(e) pmap_ ## e ## _ev.ev_count++ 452 #else 453 #define pmap_evcnt(e) __nothing 454 #endif 455 456 static void (*pmap_load_urp_func)(paddr_t) __read_mostly; 457 458 static void 459 pmap_mmuclass_init(void) 460 { 461 switch (mmutype) { 462 #if MMU_CONFIG_68040_CLASS 463 case MMU_68040: 464 case MMU_68060: 465 pmap_mmuclass = MMU_CLASS_68040; 466 /* 467 * XXX This is messy because 68060 frequently gets 468 * XXX initialize to MMU_68040. Should be cleaned 469 * XXX up once the Hibler pmap is obsoleted. 470 */ 471 #if defined(M68040) 472 if (cputype == CPU_68040) { 473 pmap_load_urp_func = mmu_load_urp40; 474 } 475 #endif 476 #if defined(M68060) 477 if (cputype == CPU_68060) { 478 pmap_load_urp_func = mmu_load_urp60; 479 } 480 #endif 481 break; 482 #endif 483 #if MMU_CONFIG_68851_CLASS 484 case MMU_68851: 485 case MMU_68030: 486 pmap_mmuclass = MMU_CLASS_68851; 487 protorp[0] = MMU51_CRP_BITS; 488 pmap_load_urp_func = mmu_load_urp51; 489 break; 490 #endif 491 #if MMU_CONFIG_HP_CLASS 492 case MMU_HP: 493 pmap_mmuclass = MMU_CLASS_HP; 494 pmap_load_urp_func = mmu_load_urp20hp; 495 break; 496 #endif 497 default: 498 panic("%s: mmutype=%d not configured?", __func__, mmutype); 499 } 500 501 if (pmap_load_urp_func == NULL) { 502 panic("%s: No mmu_load_*() for cputype=%d mmutype=%d", 503 __func__, cputype, mmutype); 504 } 505 } 506 507 /* 508 * pmap_load_urp: 509 * 510 * Load the user root table into the MMU. 511 */ 512 static inline void 513 pmap_load_urp(paddr_t urp) 514 { 515 (*pmap_load_urp_func)(urp); 516 } 517 518 #if MMU_CONFIG_HP_CLASS 519 static vaddr_t pmap_aliasmask __read_mostly; 520 #endif 521 522 /* 523 * pmap_init_vac: 524 * 525 * Set up virtually-addressed cache information. Only relevant 526 * for the HP MMU. 527 */ 528 void 529 pmap_init_vac(size_t vacsize) 530 { 531 #if MMU_CONFIG_HP_CLASS 532 KASSERT(pmap_aliasmask == 0); 533 KASSERT(powerof2(vacsize)); 534 pmap_aliasmask = vacsize - 1; 535 #endif 536 } 537 538 /***************************** PHYS <-> VM PAGE ******************************/ 539 540 static bool pmap_initialized_p; 541 542 static inline struct vm_page * 543 pmap_pa_to_pg(paddr_t pa) 544 { 545 return pmap_initialized_p ? PHYS_TO_VM_PAGE(pa) : NULL; 546 } 547 548 /*************************** RESOURCE MANAGEMENT *****************************/ 549 550 static struct pmap kernel_pmap_store; 551 struct pmap * const kernel_pmap_ptr = &kernel_pmap_store; 552 553 /* 554 * Physical address of kernel level 1 table. This name is compatible 555 * with the Hibler pmap's name. 556 */ 557 paddr_t Sysseg_pa; 558 559 /* 560 * Avoid a memory load when doing comparisons against pmap_kernel() 561 * within this compilation unit. 562 */ 563 #undef pmap_kernel 564 #define pmap_kernel() (&kernel_pmap_store) 565 566 static inline bool 567 active_pmap(pmap_t pmap) 568 { 569 return pmap == pmap_kernel() || 570 pmap == curproc->p_vmspace->vm_map.pmap; 571 } 572 573 static inline bool 574 active_user_pmap(pmap_t pmap) 575 { 576 return curproc != NULL && 577 pmap != pmap_kernel() && 578 pmap == curproc->p_vmspace->vm_map.pmap; 579 } 580 581 /* 582 * Number of tables per page table page: 583 * 0 - number of leaf page tables per page 584 * 1 - number of segment tables per page 585 */ 586 static unsigned int pmap_ptpage_table_counts[2]; 587 588 __CTASSERT(LA40_L1_COUNT == LA40_L2_COUNT); 589 590 static void 591 pmap_ptpage_init(void) 592 { 593 if (MMU_USE_3L) { 594 pmap_ptpage_table_counts[0] = PAGE_SIZE / TBL40_L3_SIZE; 595 pmap_ptpage_table_counts[1] = PAGE_SIZE / TBL40_L2_SIZE; 596 } else { 597 pmap_ptpage_table_counts[0] = 1; 598 pmap_ptpage_table_counts[1] = 1; 599 } 600 } 601 602 static struct vm_page * 603 pmap_page_alloc(bool nowait) 604 { 605 struct vm_page *pg; 606 const int flags = nowait ? UVM_PGA_USERESERVE : 0; 607 608 while ((pg = uvm_pagealloc(NULL, 0, NULL, flags)) == NULL) { 609 if (nowait) { 610 return NULL; 611 } 612 uvm_wait("pmappg"); 613 } 614 pg->flags &= ~PG_BUSY; /* never busy */ 615 616 return pg; 617 } 618 619 static struct pmap_ptpage * 620 pmap_ptpage_alloc(bool segtab, bool nowait) 621 { 622 const unsigned int tabcnt = pmap_ptpage_table_counts[segtab]; 623 const size_t size = sizeof(struct pmap_ptpage) + 624 (sizeof(struct pmap_table) * tabcnt); 625 const size_t tabsize = PAGE_SIZE / tabcnt; 626 struct pmap_ptpage *ptp; 627 struct pmap_table *pt; 628 struct vm_page *pg; 629 const int uvm_f_nowait = nowait ? UVM_KMF_NOWAIT : 0; 630 vaddr_t ptpva; 631 632 ptp = kmem_zalloc(size, nowait ? KM_NOSLEEP : KM_SLEEP); 633 if (__predict_false(ptp == NULL)) { 634 return NULL; 635 } 636 637 /* Allocate a VA for the PT page. */ 638 ptpva = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 639 UVM_KMF_VAONLY | uvm_f_nowait); 640 if (__predict_false(ptpva == 0)) { 641 kmem_free(ptp, size); 642 return NULL; 643 } 644 645 /* Get a page. */ 646 pg = pmap_page_alloc(nowait); 647 if (__predict_false(pg == NULL)) { 648 uvm_km_free(kernel_map, ptpva, PAGE_SIZE, UVM_KMF_VAONLY); 649 kmem_free(ptp, size); 650 return NULL; 651 } 652 653 /* Map the page cache-inhibited and zero it out. */ 654 pmap_kenter_pa(ptpva, VM_PAGE_TO_PHYS(pg), 655 UVM_PROT_READ | UVM_PROT_WRITE, PMAP_NOCACHE); 656 zeropage((void *)ptpva); 657 658 /* 659 * All resources for the PT page have been allocated. 660 * Now initialize it and the individual table descriptors. 661 */ 662 LIST_INIT(&ptp->ptp_freelist); 663 ptp->ptp_pg = pg; 664 ptp->ptp_vpagenum = m68k_btop(ptpva); 665 ptp->ptp_freecnt = tabcnt; 666 ptp->ptp_segtab = segtab; 667 668 for (unsigned int i = 0; i < tabcnt; ptpva += tabsize, i++) { 669 pt = &ptp->ptp_tables[i]; 670 pt->pt_ptpage = ptp; 671 pt->pt_entries = (pt_entry_t *)ptpva; 672 LIST_INSERT_HEAD(&ptp->ptp_freelist, pt, pt_freelist); 673 } 674 675 return ptp; 676 } 677 678 static void 679 pmap_ptpage_free(struct pmap_ptpage *ptp) 680 { 681 const unsigned int tabcnt = pmap_ptpage_table_counts[ptp->ptp_segtab]; 682 const size_t size = sizeof(struct pmap_ptpage) + 683 (sizeof(struct pmap_table) * tabcnt); 684 685 uvm_km_free(kernel_map, m68k_ptob(ptp->ptp_vpagenum), PAGE_SIZE, 686 UVM_KMF_WIRED); 687 kmem_free(ptp, size); 688 } 689 690 static struct pool pmap_pool; 691 static struct pool pmap_pv_pool; 692 693 #define PMAP_PV_LOWAT 16 694 695 static void 696 pmap_alloc_init(void) 697 { 698 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 699 PVH_ATTR_MASK + 1, /* align */ 700 0, /* ioff */ 701 0, /* flags */ 702 "pmappv", /* wchan */ 703 &pool_allocator_meta, /* palloc */ 704 IPL_VM); /* ipl */ 705 706 /* 707 * Set a low water mark on the pv_entry pool, so that we are 708 * more likely to have these around even in extreme memory 709 * starvation. 710 */ 711 pool_setlowat(&pmap_pv_pool, PMAP_PV_LOWAT); 712 713 pool_init(&pmap_pool, sizeof(struct pmap), 714 0, /* align */ 715 0, /* ioff */ 716 0, /* flags */ 717 "pmappl", /* wchan */ 718 &pool_allocator_kmem, /* palloc */ 719 IPL_NONE); /* ipl */ 720 } 721 722 static inline pmap_t 723 pmap_alloc(void) 724 { 725 pmap_t pmap = pool_get(&pmap_pool, PR_WAITOK); 726 memset(pmap, 0, sizeof(*pmap)); 727 return pmap; 728 } 729 730 static inline void 731 pmap_free(pmap_t pmap) 732 { 733 pool_put(&pmap_pool, pmap); 734 } 735 736 static struct pv_entry * 737 pmap_pv_alloc(bool nowait) 738 { 739 struct pv_entry *pv; 740 741 #ifdef PMAP_EVENT_COUNTERS 742 if (nowait) { 743 pmap_evcnt(pv_alloc_nowait); 744 } else { 745 pmap_evcnt(pv_alloc_wait); 746 } 747 #endif 748 749 pv = pool_get(&pmap_pv_pool, nowait ? PR_NOWAIT : 0); 750 if (__predict_true(pv != NULL)) { 751 KASSERT((((uintptr_t)pv) & PVH_ATTR_MASK) == 0); 752 } 753 return pv; 754 } 755 756 static void 757 pmap_pv_free(struct pv_entry *pv) 758 { 759 pool_put(&pmap_pv_pool, pv); 760 } 761 762 /* 763 * Whenever we need to free resources back to the system, we want to 764 * do it in a batch with any locks released. So, we have this around 765 * to collect the garbage, as needed. 766 */ 767 struct pmap_completion { 768 struct pmap_ptpage_list pc_ptpages; 769 struct pmap_pv_list pc_pvlist; 770 }; 771 772 static inline void 773 pmap_completion_init(struct pmap_completion *pc) 774 { 775 TAILQ_INIT(&pc->pc_ptpages); 776 LIST_INIT(&pc->pc_pvlist); 777 } 778 779 static void 780 pmap_completion_fini(struct pmap_completion *pc) 781 { 782 struct pmap_ptpage *ptp; 783 struct pv_entry *pv; 784 785 while ((ptp = TAILQ_FIRST(&pc->pc_ptpages)) != NULL) { 786 TAILQ_REMOVE(&pc->pc_ptpages, ptp, ptp_list); 787 /* 788 * Can't assert ptp_freecnt here; it won't match up 789 * in the pmap_remove_all() case. 790 * 791 * KASSERT(ptp->ptp_freecnt == 792 * pmap_ptpage_table_counts[ptp->ptp_segtab]); 793 */ 794 pmap_ptpage_free(ptp); 795 } 796 797 while ((pv = LIST_FIRST(&pc->pc_pvlist)) != NULL) { 798 LIST_REMOVE(pv, pv_pmlist); 799 pmap_pv_free(pv); 800 } 801 } 802 803 /************************ PTE MANIPULATION HELPERS ***************************/ 804 805 /* Assert assumptions made in <machine/pmap.h>. */ 806 __CTASSERT(DT51_PAGE == PTE40_RESIDENT); 807 __CTASSERT(PTE51_WP == PTE40_W); 808 __CTASSERT(PTE51_U == PTE40_U); 809 __CTASSERT(PTE51_M == PTE40_M); 810 __CTASSERT(PTE51_CI == PTE40_CM_NC_SER); 811 812 static pt_entry_t pmap_pte_proto[UVM_PROT_ALL + 1]; 813 static pt_entry_t pmap_pte_proto_ci[UVM_PROT_ALL + 1]; 814 static pt_entry_t pmap_pte_proto_um[UVM_PROT_ALL + 1]; 815 static pt_entry_t pmap_ste_proto; 816 817 static inline paddr_t 818 pte_pa(pt_entry_t pte) 819 { 820 return pte & PTE40_PGA; 821 } 822 823 /* 824 * These predicate inlines compile down into BFEXTU, so are quite fast. 825 */ 826 827 static inline bool 828 pte_valid_p(pt_entry_t pte) 829 { 830 return !!(pte & PTE_VALID); 831 } 832 833 static inline bool 834 pte_wired_p(pt_entry_t pte) 835 { 836 return !!(pte & PTE_WIRED); 837 } 838 839 static inline bool 840 pte_managed_p(pt_entry_t pte) 841 { 842 return !!(pte & PTE_PVLIST); 843 } 844 845 static inline bool 846 pte_ci_p(pt_entry_t pte) 847 { 848 /* 849 * Happily, PTE51_CI is bit 6, which is set for both of the 850 * cache-inhibited modes on 68040, so we can just check for 851 * that. 852 */ 853 return !!(pte & PTE51_CI); 854 } 855 856 #define PTE_PROT_CHANGE_BITS (PTE_WP | PTE_CMASK) 857 858 static inline pt_entry_t 859 pte_change_prot(pt_entry_t opte, vm_prot_t prot) 860 { 861 pt_entry_t *pte_proto = pte_ci_p(opte) ? pmap_pte_proto_ci 862 : pmap_pte_proto; 863 864 return (opte & ~PTE_PROT_CHANGE_BITS) | pte_proto[prot]; 865 } 866 867 static inline pt_entry_t 868 pte_load(pt_entry_t *ptep) 869 { 870 return atomic_load_relaxed(ptep); 871 } 872 873 static inline void 874 pte_store(pt_entry_t *ptep, pt_entry_t npte) 875 { 876 atomic_store_relaxed(ptep, npte); 877 } 878 879 /* 880 * Don't inline the CAS.L instruction; some systems have non-working 881 * READ-MODIFY-WRITE cycle logic. This will ensure that we'll use 882 * restartable atomic sequence, if required. 883 * 884 * AND.L and OR.L don't use the RMC signal, so they aren't subject 885 * to the same constraints. 886 */ 887 static inline bool 888 pte_update(pt_entry_t *ptep, pt_entry_t opte, pt_entry_t npte) 889 { 890 /* 891 * Use compare-and-swap to update the PTE. This ensures there's 892 * no possibility of losing any hardware-maintained bits when 893 * updating the PTE. 894 * 895 * XXX Should turn this into a single instruction when possible 896 * XXX to deduce at compile time. 897 */ 898 return atomic_cas_uint(ptep, opte, npte) == opte; 899 } 900 901 #if MMU_CONFIG_HP_CLASS 902 /* 903 * These are only used for HP MMU VAC shenanigans. There is no need 904 * for these to be truly atomic, and systems with an HP MMU can't do 905 * truly atomic operations anyway. 906 */ 907 static inline void 908 pte_set(pt_entry_t *ptep, pt_entry_t bits) 909 { 910 *ptep |= bits; 911 } 912 913 static inline void 914 pte_mask(pt_entry_t *ptep, pt_entry_t mask) 915 { 916 *ptep &= mask; 917 } 918 #endif /* MMU_CONFIG_HP_CLASS */ 919 920 static inline pt_entry_t 921 pte_set_ci(pt_entry_t pte) 922 { 923 return (pte & ~PTE_CMASK) | (MMU_IS_68040_CLASS ? PTE40_CM_NC_SER 924 : PTE51_CI); 925 } 926 927 static inline pt_entry_t 928 pte_clr_ci(pt_entry_t pte) 929 { 930 pte &= ~PTE_CMASK; 931 if (MMU_IS_68040_CLASS) { 932 pte |= (pte & PTE_WP) ? PTE40_CM_WT 933 : PTE40_CM_CB; 934 } 935 return pte; 936 } 937 938 static void 939 pmap_pte_proto_init(void) 940 { 941 pt_entry_t c_bits, ro_c_bits, rw_c_bits, ci_bits, prot_bits, um_bits; 942 int prot; 943 944 if (MMU_IS_68040_CLASS) { 945 ro_c_bits = PTE40_CM_WT; /* this is what the Hibler pmap did */ 946 rw_c_bits = PTE40_CM_CB; 947 ci_bits = PTE40_CM_NC_SER; 948 } else { 949 ro_c_bits = rw_c_bits = 0; 950 ci_bits = PTE51_CI; 951 } 952 953 for (prot = 1; prot <= UVM_PROT_ALL; prot++) { 954 prot_bits = um_bits = 0; 955 if (prot & UVM_PROT_WRITE) { 956 um_bits = PTE_U | PTE_M; 957 } else if (prot & (UVM_PROT_READ|UVM_PROT_EXEC)) { 958 prot_bits = PTE_WP; 959 um_bits = PTE_U; 960 } 961 c_bits = (prot & UVM_PROT_WRITE) ? rw_c_bits : ro_c_bits; 962 pmap_pte_proto[prot] = PTE_VALID | prot_bits | c_bits; 963 pmap_pte_proto_ci[prot] = PTE_VALID | prot_bits | ci_bits; 964 pmap_pte_proto_um[prot] = um_bits; 965 } 966 967 /* 968 * from hp300/DOC/HPMMU.notes: 969 * 970 * Segment table entries: 971 * 972 * bits 31-12: Physical page frame number of PT page 973 * bits 11-4: Reserved at zero (can software use them?) 974 * bit 3: Reserved at one 975 * bits 1-0: Valid bits (hardware uses bit 1) 976 * 977 * This is all roughly compatible with 68851 and 68040: 978 * 979 * bit 3: DTE51_U / UTE40_U (used) 980 * bits 1-0: DT51_SHORT / UTE40_RESIDENT 981 * 982 * The Hibler pmap set "SG_U" in the 68040 case, but not in 983 * any others (??), which seems at odds with HPMMU.notes, but 984 * whatever. It does not seem to cause any harm to set the 985 * "used" bit in all cases, so that's what we'll do. If it 986 * does prove to be problematic, we can make adjustments. 987 */ 988 pmap_ste_proto = DTE51_U | DT51_SHORT; 989 } 990 991 static inline pt_entry_t 992 pmap_make_pte(paddr_t pa, vm_prot_t prot, u_int flags) 993 { 994 pt_entry_t *pte_proto = (flags & PMAP_NOCACHE) ? pmap_pte_proto_ci 995 : pmap_pte_proto; 996 997 prot &= UVM_PROT_ALL; 998 KASSERT(prot != 0); 999 1000 pt_entry_t npte = pa | pte_proto[prot] | 1001 pmap_pte_proto_um[flags & UVM_PROT_ALL]; 1002 1003 if (flags & PMAP_WIRED) { 1004 npte |= PTE_WIRED; 1005 } 1006 1007 return npte; 1008 } 1009 1010 /************************** PAGE TABLE MANAGEMENT ****************************/ 1011 1012 /* 1013 * Kernel page table management works differently from user page table 1014 * management. An initial set of kernel PTs are allocated during early 1015 * bootstrap (enough to map the virtual addresses set up at that time, 1016 * plus a little extra to give the kernel some breathing room while 1017 * UVM gets initialized -- see pmap_bootstrap1()). If more PTs are 1018 * needed in order to expand the kernel address space, pmap_growkernel() 1019 * is called to allocate some more. We always allocate kernel PTs in 1020 * chunks of one page, allocating more inner segment tables as needed 1021 * to link them into the MMU tree (3-level), or just poking them in 1022 * directly to the level-1 table (2-level). 1023 * 1024 * The kernel PTs are mapped into a single linear array to make that 1025 * makes it possible to simply index by virtual page number to find 1026 * the PTE that maps that virtual address. 1027 */ 1028 #define PTPAGEVASZ ((PAGE_SIZE / sizeof(pt_entry_t)) * PAGE_SIZE) 1029 #define PTPAGEVAOFS (PTPAGEVASZ - 1) 1030 1031 #define pmap_round_ptpage(va) (((va) + PTPAGEVAOFS) & ~PTPAGEVAOFS) 1032 1033 /* 1034 * kernel_virtual_start marks the first kernel virtual address that 1035 * is handed off to UVM to manage. kernel_virtual_end marks the end 1036 * of the kernel address space that is currently mappable with the 1037 * number of pages allocated to kernel PTs. 1038 * 1039 * kernel_virtual_start is fixed once pmap_bootstrap1() completes. 1040 * kernel_virtual_end can be extended by calling pmap_growkernel(). 1041 * 1042 * kernel_virtual_max represents the absolute maximum. It starts at 1043 * KERNEL_MAX_ADDRESS, but may get clamped by fixed mappings that 1044 * start beyond the end of kernel virtual address space. 1045 * 1046 * kernel_virtual_max is exported to the rest of the kernel via 1047 * pmap_virtual_space() and VM_MAX_KERNEL_ADDRESS. 1048 */ 1049 #define KERNEL_MAX_ADDRESS ((vaddr_t)0 - PAGE_SIZE) 1050 static vaddr_t kernel_virtual_start, kernel_virtual_end; 1051 vaddr_t kernel_virtual_max = KERNEL_MAX_ADDRESS; 1052 1053 /* 1054 * kernel_stnext_pa and kernel_stnext_endpa together implement a 1055 * simple allocator for inner segment tables used in the 3-level 1056 * configuration. When the initial level-1 table is allocated 1057 * the remained of that page is set in kernel_stnext_pa, and 1058 * kernel_stnext_endpa is set to the next page boundary. When 1059 * a segment table is needed, kernel_stnext_pa is the address 1060 * of the next free table and is advanced by the L2 table size 1061 * (512 bytes). If that allocation attempt finds that kernel_stnext_pa 1062 * is equal to kernel_stnext_endpa, a new page is allocated and 1063 * kernel_stnext_pa and kernel_stnext_endpa updated to reflect 1064 * the newly-allocated page before the table is taken from it. 1065 */ 1066 static paddr_t kernel_stnext_pa, kernel_stnext_endpa; 1067 1068 /* 1069 * Null segment table that every pmap gets as its initial level 1 1070 * map. This is a single page allocated in pmap_bootstrap1(), and 1071 * we zero it out in pmap_init(). 1072 */ 1073 static paddr_t null_segtab_pa __read_mostly; 1074 1075 static inline void 1076 pmap_set_lev1map(pmap_t pmap, struct pmap_table *pt, paddr_t pa) 1077 { 1078 pmap->pm_lev1map = pt; 1079 pmap->pm_lev1pa = pa; 1080 if (active_user_pmap(pmap)) { 1081 #if MMU_CONFIG_HP_CLASS 1082 /* 1083 * N.B. re-loading the user segment table pointer also 1084 * invalidates the user side of the VAC, so no additional 1085 * work is necessary. 1086 */ 1087 #endif 1088 pmap_load_urp(pmap->pm_lev1pa); 1089 TBIAU(); /* XXX optimize? */ 1090 ICIA(); /* XXX optimize? */ 1091 } 1092 } 1093 1094 /* 1095 * Table accessors. 1096 */ 1097 static inline unsigned int 1098 pmap_pagenum(vaddr_t va) 1099 { 1100 return ((va) >> PGSHIFT); 1101 } 1102 1103 static inline unsigned int 1104 pmap_segnum(vaddr_t va) 1105 { 1106 return MMU_USE_3L ? ((va) >> SEGSHIFT3L) : ((va) >> SEGSHIFT2L); 1107 } 1108 1109 static inline unsigned int 1110 pmap_st1_index(vaddr_t va) 1111 { 1112 return MMU_USE_3L ? LA40_RI(va) : LA2L_RI(va); 1113 } 1114 1115 static inline unsigned int 1116 pmap_st_index(vaddr_t va) 1117 { 1118 return MMU_USE_3L ? LA40_PI(va) : LA2L_RI(va); 1119 } 1120 1121 static inline unsigned int 1122 pmap_pt_index(vaddr_t va) 1123 { 1124 return MMU_USE_3L ? LA40_PGI(va) : LA2L_PGI(va); 1125 } 1126 1127 static inline vaddr_t 1128 pmap_trunc_seg(vaddr_t va) 1129 { 1130 return MMU_USE_3L ? pmap_trunc_seg_3L(va) : pmap_trunc_seg_2L(va); 1131 } 1132 1133 static inline vaddr_t 1134 pmap_trunc_seg1(vaddr_t va) 1135 { 1136 KASSERT(MMU_USE_3L); 1137 return pmap_trunc_seg1_3L(va); 1138 } 1139 1140 static inline vaddr_t 1141 pmap_round_seg(vaddr_t va) 1142 { 1143 return MMU_USE_3L ? pmap_round_seg_3L(va) : pmap_round_seg_2L(va); 1144 } 1145 1146 static inline vaddr_t 1147 pmap_next_seg(vaddr_t va) 1148 { 1149 return pmap_round_seg(va + PAGE_SIZE); 1150 } 1151 1152 static paddr_t 1153 pmap_table_pa(const struct pmap_table * const pt) 1154 { 1155 const struct pmap_ptpage * const ptp = pt->pt_ptpage; 1156 const vaddr_t ptpva = m68k_ptob(ptp->ptp_vpagenum); 1157 const vaddr_t ptva = (vaddr_t)pt->pt_entries; 1158 1159 return VM_PAGE_TO_PHYS(ptp->ptp_pg) + (ptva - ptpva); 1160 } 1161 1162 static inline unsigned int 1163 pmap_table_make_key(unsigned int segnum, bool segtab) 1164 { 1165 KASSERT((segnum & 0x80000000) == 0); 1166 return (segnum << 1) | (unsigned int)segtab; 1167 } 1168 1169 static int 1170 pmap_table_rb_compare_key(void *v __unused, const void *n, const void *k) 1171 { 1172 const struct pmap_table * const pt1 = n; 1173 const unsigned int k1 = pt1->pt_key; 1174 const unsigned int k2 = *(const unsigned int *)k; 1175 1176 return (int)(k1 - k2); 1177 } 1178 1179 static int 1180 pmap_table_rb_compare_nodes(void *v, const void *n1, const void *n2) 1181 { 1182 const struct pmap_table * const pt2 = n2; 1183 1184 return pmap_table_rb_compare_key(v, n1, &pt2->pt_key); 1185 } 1186 1187 static const rb_tree_ops_t pmap_table_rb_ops = { 1188 .rbto_compare_nodes = pmap_table_rb_compare_nodes, 1189 .rbto_compare_key = pmap_table_rb_compare_key, 1190 .rbto_node_offset = offsetof(struct pmap_table, pt_node), 1191 }; 1192 1193 static struct pmap_table * 1194 pmap_table_alloc(pmap_t pmap, bool segtab, bool nowait, 1195 struct pmap_completion *pc) 1196 { 1197 struct pmap_ptpage_list *pmlist = &pmap->pm_ptpages[segtab]; 1198 struct pmap_ptpage *ptp, *newptp = NULL; 1199 struct pmap_table *pt; 1200 1201 KASSERT(pc != NULL); 1202 1203 try_again: 1204 if ((ptp = TAILQ_FIRST(pmlist)) == NULL || ptp->ptp_freecnt == 0) { 1205 /* 1206 * No PT pages with free tables (empty PT pages are moved 1207 * to the tail of the list). Allocate a new PT page and 1208 * try again. If someone else successfully allocates one 1209 * while we're sleeping, then we'll use it and free what 1210 * we allocated back to the system. 1211 */ 1212 KASSERT(ptp == NULL || LIST_FIRST(&ptp->ptp_freelist) == NULL); 1213 if (newptp == NULL) { 1214 newptp = pmap_ptpage_alloc(segtab, nowait); 1215 if (newptp == NULL) { 1216 /* 1217 * If we didn't wait, then no one would 1218 * have allocted one behind our back. 1219 */ 1220 KASSERT(nowait); 1221 return NULL; 1222 } 1223 goto try_again; 1224 } 1225 ptp = newptp; 1226 TAILQ_INSERT_HEAD(pmlist, newptp, ptp_list); 1227 } 1228 if (__predict_false(newptp != NULL && ptp != newptp)) { 1229 /* Not using newly-allocated PT page; free it back. */ 1230 TAILQ_INSERT_TAIL(&pc->pc_ptpages, newptp, ptp_list); 1231 } 1232 pt = LIST_FIRST(&ptp->ptp_freelist); 1233 KASSERT(pt != NULL); 1234 LIST_REMOVE(pt, pt_freelist); 1235 ptp->ptp_freecnt--; 1236 if (ptp->ptp_freecnt == 0 && 1237 TAILQ_NEXT(ptp, ptp_list) != NULL) { 1238 TAILQ_REMOVE(pmlist, ptp, ptp_list); 1239 TAILQ_INSERT_TAIL(pmlist, ptp, ptp_list); 1240 } 1241 KASSERT(pt->pt_st == NULL); 1242 pt->pt_holdcnt = 1; 1243 1244 return pt; 1245 } 1246 1247 static void 1248 pmap_table_free(pmap_t pmap, struct pmap_table *pt, 1249 struct pmap_completion *pc) 1250 { 1251 struct pmap_ptpage *ptp = pt->pt_ptpage; 1252 struct pmap_ptpage_list *pmlist = &pmap->pm_ptpages[ptp->ptp_segtab]; 1253 1254 KASSERT(pt->pt_st == NULL); 1255 1256 LIST_INSERT_HEAD(&ptp->ptp_freelist, pt, pt_freelist); 1257 KASSERT(ptp->ptp_freecnt < pmap_ptpage_table_counts[ptp->ptp_segtab]); 1258 ptp->ptp_freecnt++; 1259 1260 /* 1261 * If the PT page no longer has any active tables, then 1262 * remove it from the pmap and queue it up to be given 1263 * back to the system. 1264 */ 1265 if (ptp->ptp_freecnt == pmap_ptpage_table_counts[ptp->ptp_segtab]) { 1266 TAILQ_REMOVE(pmlist, ptp, ptp_list); 1267 TAILQ_INSERT_TAIL(&pc->pc_ptpages, ptp, ptp_list); 1268 } 1269 /* 1270 * If the PT page now has exactly one free table, then 1271 * put it at the head of its list so that it is allocated 1272 * from first the next time a table is needed. 1273 */ 1274 else if (ptp->ptp_freecnt == 1) { 1275 TAILQ_REMOVE(pmlist, ptp, ptp_list); 1276 TAILQ_INSERT_HEAD(pmlist, ptp, ptp_list); 1277 } 1278 /* 1279 * Push this PT page down the list if it has more free tables 1280 * than the ones that come after. The goal is to keep PT pages 1281 * with the fewest free tables at the head of the list so that 1282 * they're allocated from first. This is an effort to keep 1283 * fragmentation at bay so as to increase the likelihood that 1284 * we can free PT pages back to the system. 1285 */ 1286 else { 1287 struct pmap_ptpage *next_ptp; 1288 for (next_ptp = TAILQ_NEXT(ptp, ptp_list); 1289 next_ptp != NULL; 1290 next_ptp = TAILQ_NEXT(next_ptp, ptp_list)) { 1291 if (next_ptp->ptp_freecnt < ptp->ptp_freecnt) { 1292 break; 1293 } 1294 } 1295 if (next_ptp != NULL && 1296 next_ptp != TAILQ_NEXT(ptp, ptp_list) && 1297 next_ptp->ptp_freecnt != 0) { 1298 TAILQ_REMOVE(pmlist, ptp, ptp_list); 1299 TAILQ_INSERT_AFTER(pmlist, next_ptp, ptp, ptp_list); 1300 } 1301 } 1302 } 1303 1304 /* 1305 * pmap_table_retain: 1306 * 1307 * Take a retain count on the specified table. Retain counts 1308 * are used to ensure the table remains stable while working 1309 * on it, and each mapping placed into the table also gets 1310 * a retain count. 1311 */ 1312 static inline void 1313 pmap_table_retain(struct pmap_table *pt) 1314 { 1315 if (__predict_true(pt != NULL)) { 1316 pt->pt_holdcnt++; 1317 KASSERT(pt->pt_holdcnt != 0); 1318 } 1319 } 1320 1321 /* 1322 * pmap_table_release: 1323 * 1324 * Release a previously-taken retain count on the specified 1325 * table. If the retain count drops to zero, the table is 1326 * unlinked from the lookup tree and the MMU tree and freed. 1327 */ 1328 static __noinline void 1329 pmap_table_release_slow(pmap_t pmap, struct pmap_table *pt, 1330 struct pmap_completion *pc) 1331 { 1332 KASSERT(pt != NULL); 1333 KASSERT(pt->pt_holdcnt != 0); 1334 pt->pt_holdcnt--; 1335 if (__predict_false(pt->pt_holdcnt != 0)) { 1336 return; 1337 } 1338 1339 /* 1340 * If the caller doesn't expect the count to go to zero, 1341 * they won't have bothered with a completion context. 1342 * Going to zero is unexpected in this case, so blow up 1343 * if it happens. 1344 */ 1345 KASSERT(pc != NULL); 1346 if (__predict_true(pt == pmap->pm_pt_cache)) { 1347 pmap->pm_pt_cache = NULL; 1348 } 1349 if (__predict_true(pt->pt_st != NULL)) { 1350 /* 1351 * This table needs to be unlinked from the lookup 1352 * tree and the MMU tree. 1353 */ 1354 pte_store(&pt->pt_st->pt_entries[pt->pt_stidx], 0); 1355 rb_tree_remove_node(&pmap->pm_tables, pt); 1356 pmap_table_release_slow(pmap, pt->pt_st, pc); 1357 pt->pt_st = NULL; 1358 } else if (pt == pmap->pm_lev1map) { 1359 pmap_set_lev1map(pmap, NULL, null_segtab_pa); 1360 } 1361 pmap_table_free(pmap, pt, pc); 1362 } 1363 1364 static inline void 1365 pmap_table_release(pmap_t pmap, struct pmap_table *pt, 1366 struct pmap_completion *pc) 1367 { 1368 if (__predict_true(pt != NULL)) { 1369 if (__predict_true(pt->pt_holdcnt > 1)) { 1370 pt->pt_holdcnt--; 1371 return; 1372 } 1373 pmap_table_release_slow(pmap, pt, pc); 1374 } 1375 } 1376 1377 /* 1378 * pmap_table_lookup: 1379 * 1380 * Lookup the table corresponding to the specified segment. 1381 */ 1382 static struct pmap_table * 1383 pmap_table_lookup(pmap_t pmap, unsigned int segnum, bool segtab) 1384 { 1385 const unsigned int key = pmap_table_make_key(segnum, segtab); 1386 struct pmap_table *pt; 1387 1388 if ((pt = pmap->pm_pt_cache) == NULL || pt->pt_key != key) { 1389 pmap_evcnt(pt_cache_miss); 1390 pt = rb_tree_find_node(&pmap->pm_tables, &key); 1391 if (__predict_true(!segtab)) { 1392 pmap->pm_pt_cache = pt; 1393 } 1394 } else { 1395 pmap_evcnt(pt_cache_hit); 1396 } 1397 if (pt != NULL) { 1398 pmap_table_retain(pt); 1399 } 1400 return pt; 1401 } 1402 1403 /* 1404 * pmap_table_insert: 1405 * 1406 * Allocate and insert a table into the tree at the specified 1407 * location. 1408 */ 1409 static struct pmap_table * 1410 pmap_table_insert(pmap_t pmap, struct pmap_table *t1, unsigned int stidx, 1411 unsigned int segnum, bool segtab, bool nowait, struct pmap_completion *pc) 1412 { 1413 struct pmap_table *t2, *ret_t; 1414 1415 t2 = pmap_table_lookup(pmap, segnum, segtab); 1416 if (t2 != NULL) { 1417 /* 1418 * Table at this level already exists, and looking 1419 * it up gave us a retain count, so we no longer need 1420 * the retain count on the upper level table (it is 1421 * retained-by-proxy by the table we just found). 1422 * We pass NULL for the completion context because 1423 * we don't expect the upper level table's retain count 1424 * to drop to zero, and we want things to blow up 1425 * loudly if it does! 1426 */ 1427 pmap_table_release(pmap, t1, NULL); 1428 return t2; 1429 } 1430 1431 /* Allocate the new table. */ 1432 PMAP_CRIT_EXIT(); 1433 t2 = pmap_table_alloc(pmap, segtab, nowait, pc); 1434 PMAP_CRIT_ENTER(); 1435 if (__predict_false(t2 == NULL)) { 1436 pmap_table_release(pmap, t1, pc); 1437 return NULL; 1438 } 1439 t2->pt_key = pmap_table_make_key(segnum, segtab); 1440 1441 /* 1442 * Now that we have the new table, we need to insert it into the 1443 * table lookup tree. If we blocked while allocating, it's possible 1444 * someone raced with us and inserted one behind our back, so we need 1445 * to check for that. 1446 */ 1447 ret_t = rb_tree_insert_node(&pmap->pm_tables, t2); 1448 if (__predict_false(ret_t != t2)) { 1449 /* 1450 * Someone beat us to the punch. If this happens, 1451 * then we also need to drop the retain count on 1452 * t1 because the table we just found already has 1453 * a retain count on it. 1454 */ 1455 pmap_table_retain(ret_t); 1456 pmap_table_release(pmap, t2, pc); 1457 pmap_table_release(pmap, t1, NULL); 1458 return ret_t; 1459 } 1460 1461 /* 1462 * Table has been successfully inserted into the lookup 1463 * tree, now link it into the MMU's tree. The new table 1464 * takes ownership of the retain count that was taken on 1465 * the upper level table while working. 1466 */ 1467 t2->pt_st = t1; 1468 t2->pt_stidx = (unsigned short)stidx; 1469 pte_store(&t1->pt_entries[stidx], pmap_ste_proto | pmap_table_pa(t2)); 1470 1471 return t2; 1472 } 1473 1474 /*************************** PTE LOOKUP FUNCTIONS ****************************/ 1475 1476 static pt_entry_t *kernel_ptes; 1477 1478 /* 1479 * pmap_kernel_pte: 1480 * 1481 * Get the PTE that maps the specified kernel virtual address. 1482 * 1483 * Take note: the caller *may assume* they they can linearly 1484 * access adjacent PTEs up until the address indicated by 1485 * virtual_end! That means, "pte++" is totally fine until you 1486 * get to the current limit of the kernel virtual address space! 1487 * 1488 * XXX This is exported because db_memrw.c needs it. 1489 */ 1490 pt_entry_t * 1491 pmap_kernel_pte(vaddr_t va) 1492 { 1493 /* 1494 * The kernel PTEs are mapped as a linear array, whose entries 1495 * represent the entire possible 4GB supervisor address space. 1496 * 1497 * Kernel PT pages are pre-allocated and mapped into this linear 1498 * space (via pmap_growkernel(), as needed) and never freed back. 1499 * So, as long as the VA is below virtual_end, we know that a PTE 1500 * exists to back it. 1501 * 1502 * We don't assert that the VA < virtual_end, however; there may 1503 * be special cases where we need to get a PTE that has been 1504 * statically-allocated out beyond where virtual space is allowed 1505 * to grow. We'll find out soon enough if a PT page doesn't back 1506 * it, because a fault will occur when the PTE is accessed. 1507 */ 1508 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 1509 return &kernel_ptes[m68k_btop(va - VM_MIN_KERNEL_ADDRESS)]; 1510 } 1511 1512 /* 1513 * pmap_pte_lookup: 1514 * 1515 * Lookup the PTE for the given address, returning a retained 1516 * reference to the table containing the PTE. 1517 * 1518 * Take note: the caller *may assume* they they can linearly 1519 * access adjacent PTEs that map addresses within the same 1520 * segment! That means, "pte++" is totally fine until you 1521 * get to the next segment boundary! 1522 */ 1523 static pt_entry_t * 1524 pmap_pte_lookup(pmap_t pmap, vaddr_t va, struct pmap_table **out_pt) 1525 { 1526 if (pmap == pmap_kernel()) { 1527 *out_pt = NULL; 1528 return pmap_kernel_pte(va); 1529 } 1530 1531 const unsigned int segnum = pmap_segnum(va); 1532 1533 struct pmap_table *pt = pmap_table_lookup(pmap, segnum, false); 1534 if (__predict_true(pt != NULL)) { 1535 *out_pt = pt; /* already retained */ 1536 return &pt->pt_entries[pmap_pt_index(va)]; 1537 } 1538 1539 *out_pt = NULL; 1540 return NULL; 1541 } 1542 1543 /* 1544 * pmap_pte_alloc: 1545 * 1546 * Like pmap_pte_lookup(), but allocates tables as necessary. 1547 * 1548 * We enter in a critical section, but may drop that along 1549 * the way and re-validate our own assumptions. Callers 1550 * (pmap_enter(), basically), should be aware of this. 1551 */ 1552 static pt_entry_t * 1553 pmap_pte_alloc(pmap_t pmap, vaddr_t va, struct pmap_table **out_pt, 1554 bool nowait, struct pmap_completion *pc) 1555 { 1556 struct pmap_table *st, *pt; 1557 pt_entry_t *ptep; 1558 1559 PMAP_CRIT_ASSERT(); 1560 1561 ptep = pmap_pte_lookup(pmap, va, out_pt); 1562 if (__predict_true(ptep != NULL)) { 1563 return ptep; 1564 } 1565 1566 /* 1567 * First get a reference on the top-level segment table and 1568 * retain it so that it's stable while we work. 1569 */ 1570 if (__predict_true((st = pmap->pm_lev1map) != NULL)) { 1571 pmap_table_retain(st); 1572 } else { 1573 /* 1574 * Oh look! Baby pmap's first mapping! Allocate 1575 * a segment table. 1576 */ 1577 PMAP_CRIT_EXIT(); 1578 st = pmap_table_alloc(pmap, true/*segtab*/, nowait, pc); 1579 PMAP_CRIT_ENTER(); 1580 if (__predict_false(st == NULL)) { 1581 return NULL; 1582 } 1583 1584 /* Re-validate that we still need the segment table. */ 1585 if (__predict_false(pmap->pm_lev1map != NULL)) { 1586 /* Raced and lost. */ 1587 pmap_table_release(pmap, st, pc); 1588 st = pmap->pm_lev1map; 1589 pmap_table_retain(st); 1590 } else { 1591 /* New table is returned to us retained. */ 1592 pmap_set_lev1map(pmap, st, pmap_table_pa(st)); 1593 } 1594 } 1595 1596 /* 1597 * Now we know that st points to a valid segment table with a 1598 * retain count that lets us safely reference it. 1599 */ 1600 1601 if (MMU_USE_3L) { 1602 /* Get the inner segment table for this virtual address. */ 1603 struct pmap_table * const st1 = st; 1604 st = pmap_table_insert(pmap, st1, pmap_st1_index(va), 1605 pmap_st1_index(va), true/*segtab*/, nowait, pc); 1606 if (__predict_false(st == NULL)) { 1607 pmap_table_release(pmap, st1, pc); 1608 return NULL; 1609 } 1610 } 1611 1612 /* We can now allocate and insert the leaf page table. */ 1613 pt = pmap_table_insert(pmap, st, pmap_st_index(va), pmap_segnum(va), 1614 false/*segtab*/, nowait, pc); 1615 if (__predict_false(pt == NULL)) { 1616 pmap_table_release(pmap, st, pc); 1617 return NULL; 1618 } 1619 1620 *out_pt = pt; 1621 return &pt->pt_entries[pmap_pt_index(va)]; 1622 } 1623 1624 /************************** P->V ENTRY MANAGEMENT ****************************/ 1625 1626 static inline pt_entry_t * 1627 pmap_pv_pte(struct pv_entry * const pv) 1628 { 1629 const vaddr_t va = PV_VA(pv); 1630 1631 if (__predict_true(pv->pv_pmap != pmap_kernel())) { 1632 KASSERT(pv->pv_pt != NULL); 1633 return &pv->pv_pt->pt_entries[pmap_pt_index(va)]; 1634 } 1635 return pmap_kernel_pte(va); 1636 } 1637 1638 #define MATCHING_PMAP(p1, p2) \ 1639 ((p1) == (p2) || \ 1640 (p1) == pmap_kernel() || (p2) == pmap_kernel()) 1641 1642 #define CONFLICTING_ALIAS(va1, va2) \ 1643 (((va1) & pmap_aliasmask) != ((va2) & pmap_aliasmask)) 1644 1645 /* 1646 * pmap_pv_enter: 1647 * 1648 * Add a physical->virtual entry to the pv table. Caller must provide 1649 * the storage for the new PV entry. 1650 * 1651 * We are responsible for storing the new PTE into the destination 1652 * table. We are also guaranteed that no mapping exists there, but 1653 * the MMU has a negative cache in the ATC (see 68030UM Figure 9-8. 1654 * Address Translation General Flowchart, ATC hit, B==1 case, as well 1655 * as 68040UM Figure 3-21. ATC Entry and Tag Fields, R bit and the 1656 * associated descriptive text), so we still have to handle ATC entry 1657 * invalidation. 1658 */ 1659 static void 1660 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, vm_prot_t prot, 1661 struct pmap_table *pt, pt_entry_t npte, struct pv_entry *newpv) 1662 { 1663 const bool usr_ci = pte_ci_p(npte); 1664 struct pv_entry *pv; 1665 pt_entry_t opte; 1666 1667 pmap_evcnt(pv_enter_called); 1668 1669 PMAP_CRIT_ASSERT(); 1670 KASSERT(newpv != NULL); 1671 1672 npte |= PTE_PVLIST; 1673 1674 newpv->pv_pmap = pmap; 1675 newpv->pv_vf = va; 1676 newpv->pv_pt = pt; 1677 1678 pt_entry_t *ptep = pmap_pv_pte(newpv); 1679 1680 #ifdef DEBUG 1681 /* 1682 * Make sure the entry doesn't already exist. 1683 */ 1684 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 1685 if (pmap == pv->pv_pmap && va == PV_VA(pv)) { 1686 panic("%s: pmap=%p va=0x%08lx already in PV table", 1687 __func__, pmap, va); 1688 } 1689 } 1690 #endif 1691 1692 if (__predict_false(usr_ci)) { 1693 newpv->pv_vf |= PV_F_CI_USR; 1694 } 1695 1696 newpv->pv_next = VM_MDPAGE_PVS(pg); 1697 VM_MDPAGE_SETPVP(VM_MDPAGE_HEAD_PVP(pg), newpv); 1698 LIST_INSERT_HEAD(&pmap->pm_pvlist, newpv, pv_pmlist); 1699 1700 /* 1701 * If this is an EXEC mapping, then we have to ensure that 1702 * the I$ doesn't load stale data. 1703 * 1704 * XXX Should have a soft-PTE bit for this. 1705 */ 1706 if (prot & UVM_PROT_EXEC) { 1707 #if MMU_CONFIG_68040_CLASS 1708 if (MMU_IS_68040_CLASS) { 1709 /* 1710 * XXX Potential future optimization: is only 1711 * XXX the DCFP() needed here to deal with 1712 * XXX write-back? Should we track EXEC-ness 1713 * XXX in the VM_MDPAGE? 1714 */ 1715 const paddr_t pa = VM_PAGE_TO_PHYS(pg); 1716 DCFP(pa); 1717 ICPP(pa); 1718 } 1719 #endif 1720 } 1721 1722 #if MMU_CONFIG_HP_CLASS 1723 if (MMU_IS_HP_CLASS) { 1724 /* Go handle the HP MMU's VAC. */ 1725 goto hp_mmu_vac_shenanigans; 1726 } 1727 #endif 1728 1729 /* 1730 * If the page is marked as being cache-inhibited, it means 1731 * there is at least one user-requested CI mapping already 1732 * (and that all of the extant mappings are thus CI). 1733 * 1734 * In this case, we need to make sure that the one we're 1735 * establishing now is CI as well. 1736 */ 1737 if (__predict_false(VM_MDPAGE_CI_P(pg))) { 1738 npte = pte_set_ci(npte); 1739 pte_store(ptep, npte); 1740 /* See below. */ 1741 if (active_pmap(pmap)) { 1742 TBIS(va); 1743 } 1744 return; 1745 } 1746 1747 /* Set the PTE for the new mapping. */ 1748 pte_store(ptep, npte); 1749 1750 /* 1751 * Invalidate the ATC entry **after** storing the PTE so that 1752 * there is no window where another MMU table walk finds the 1753 * stale invalid entry. 1754 */ 1755 if (active_pmap(pmap)) { 1756 TBIS(va); 1757 } 1758 1759 /* 1760 * If this is a user-requested CI mapping, we need to make 1761 * sure the page is purged from the cache and mark any other 1762 * mappings of this page CI as well. 1763 */ 1764 if (__predict_false(usr_ci)) { 1765 VM_MDPAGE_SET_CI(pg); 1766 1767 pmap_evcnt(pv_enter_usr_ci); 1768 1769 /* 1770 * There shouldn't be very many of these; CI mappings 1771 * of managed pages are typically only for coherent DMA 1772 * purposes, and multiple mappings of the same page are 1773 * extremely uncommon in that scenario. 1774 */ 1775 for (pv = newpv->pv_next; pv != NULL; pv = pv->pv_next) { 1776 pmap_evcnt(pv_enter_ci_multi); 1777 ptep = pmap_pv_pte(pv); 1778 for (;;) { 1779 opte = pte_load(ptep); 1780 npte = pte_set_ci(opte); 1781 if (pte_update(ptep, opte, npte)) { 1782 if (active_pmap(pv->pv_pmap)) { 1783 TBIS(PV_VA(pv)); 1784 } 1785 break; 1786 } 1787 } 1788 } 1789 #if MMU_CONFIG_68040_CLASS 1790 if (MMU_IS_68040_CLASS) { 1791 const paddr_t pa = VM_PAGE_TO_PHYS(pg); 1792 DCFP(pa); 1793 ICPP(pa); 1794 } 1795 #endif 1796 } 1797 return; 1798 1799 #if MMU_CONFIG_HP_CLASS 1800 hp_mmu_vac_shenanigans: 1801 /* 1802 * We have ourselves a VAC, so in addition to checking for 1803 * user-requested-CI mappings, we have to check for cache 1804 * aliases and cache-inhibit all mappings for a page that 1805 * have a cache alias conflict. 1806 * 1807 * - All mappings of a given page within the same pmap must 1808 * not collide. (The VAC is flushed when switching pmaps 1809 * by virtue of a new segment table pointer being loaded 1810 * into the user segment table register.) 1811 * 1812 * - The Hibler pmap check to see that the kernel doesn't have 1813 * conflicting mappings with any user pmap. We'll do the same, 1814 * which seems reasonable on the surface if you think about it 1815 * for a couple of minutes. 1816 * 1817 * - The Hibler pmap also just punts and cache-inhibits all 1818 * mappings once it becomes > 2, but we do NOT do that because 1819 * it will severely penalize shared libraries. 1820 * 1821 * N.B. The method used here will not universally render all 1822 * mappings for a given page uncached; only address spaces with 1823 * conflicts are penalized. 1824 * 1825 * XXX This probably only matters if one of the mappings is 1826 * XXX writable, as this is the only situation where data 1827 * XXX inconsistency could arise. There is probably room 1828 * XXX for further optimization if someone with one of these 1829 * XXX machines cares to take it up. 1830 */ 1831 bool flush_s_vac = false; 1832 bool flush_u_vac = false; 1833 1834 /* Set the PTE for the new mapping. */ 1835 pte_store(ptep, npte); 1836 1837 /* 1838 * Invalidate the ATC entry **after** storing the PTE so that 1839 * there is no window where another MMU table walk finds the 1840 * stale invalid entry. 1841 * 1842 * XXX I don't know that this is strictly necessary with the 1843 * XXX HP MMU, but there is basically zero documentation available 1844 * XXX for it, so we err on the side of caution. 1845 */ 1846 if (active_pmap(pmap)) { 1847 TBIS(va); 1848 } 1849 1850 vaddr_t pv_flags = newpv->pv_vf & PV_F_CI_USR; 1851 if (usr_ci) { 1852 pmap_evcnt(pv_enter_usr_ci); 1853 } 1854 1855 for (pv = newpv->pv_next; pv != NULL; pv = pv->pv_next) { 1856 if (MATCHING_PMAP(pmap, pv->pv_pmap) && 1857 CONFLICTING_ALIAS(va, PV_VA(pv))) { 1858 pmap_evcnt(pv_enter_vac_ci); 1859 pv_flags |= PV_F_CI_VAC; 1860 break; 1861 } 1862 } 1863 1864 if (__predict_true(pv_flags == 0)) { 1865 /* No new inhibitions! */ 1866 return; 1867 } 1868 1869 VM_MDPAGE_SET_CI(pg); 1870 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 1871 if (MATCHING_PMAP(pmap, pv->pv_pmap)) { 1872 pmap_evcnt(pv_enter_ci_multi); 1873 pv->pv_vf |= pv_flags; 1874 pte_set(pmap_pv_pte(pv), PTE51_CI); 1875 if (active_pmap(pv->pv_pmap)) { 1876 TBIS(PV_VA(pv)); 1877 if (pv->pv_pmap == pmap_kernel()) { 1878 flush_s_vac = true; 1879 } else { 1880 flush_u_vac = true; 1881 } 1882 } 1883 } 1884 } 1885 if (flush_u_vac && flush_s_vac) { 1886 DCIA(); 1887 } else if (flush_u_vac) { 1888 DCIU(); 1889 } else if (flush_s_vac) { 1890 DCIS(); 1891 } 1892 #endif /* MMU_CONFIG_HP_CLASS */ 1893 } 1894 1895 /* 1896 * pmap_pv_remove: 1897 * 1898 * Remove a physical->virtual entry from the pv table. 1899 */ 1900 static void 1901 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, 1902 struct pmap_completion *pc) 1903 { 1904 struct pv_entry **pvp, *pv; 1905 pt_entry_t *ptep, opte, npte; 1906 1907 pmap_evcnt(pv_remove_called); 1908 1909 PMAP_CRIT_ASSERT(); 1910 1911 for (pvp = VM_MDPAGE_HEAD_PVP(pg), pv = VM_MDPAGE_PVS(pg); 1912 pv != NULL; 1913 pvp = &pv->pv_next, pv = *pvp) { 1914 if (pmap == pv->pv_pmap && va == PV_VA(pv)) { 1915 break; 1916 } 1917 } 1918 1919 KASSERT(pv != NULL); 1920 VM_MDPAGE_SETPVP(pvp, pv->pv_next); 1921 LIST_REMOVE(pv, pv_pmlist); 1922 1923 KASSERT(pc != NULL); 1924 LIST_INSERT_HEAD(&pc->pc_pvlist, pv, pv_pmlist); 1925 1926 #if MMU_CONFIG_HP_CLASS 1927 if (MMU_IS_HP_CLASS) { 1928 /* Go handle the HP MMU's VAC. */ 1929 goto hp_mmu_vac_shenanigans; 1930 } 1931 #endif 1932 1933 /* 1934 * If the page is marked as being cache-inhibited, then it 1935 * means there was at least one user-requested CI mapping 1936 * for the page. In that case, we need to scan the P->V 1937 * list to see if any remain, and if not, clear the CI 1938 * status for the page. 1939 * 1940 * N.B. This requires traversing the list twice: once to 1941 * check if any of the mappings are user-requested-CI, 1942 * and one again to fix them up. But, we're making a 1943 * classical space-vs-time trade-off here: Assuming that 1944 * this is a rare situation, it's better to pay the cpu 1945 * cost on the rare edge transitions rather than always pay 1946 * the memory cost of having a counter to track something 1947 * that almost never happens (and, when it does, the list 1948 * will be very short). 1949 */ 1950 if (__predict_false(VM_MDPAGE_CI_P(pg))) { 1951 pmap_evcnt(pv_remove_ci); 1952 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 1953 if (pv->pv_vf & PV_F_CI_USR) { 1954 /* 1955 * There is still at least one user-requested 1956 * CI mapping, so we can't change the page's CI 1957 * status. 1958 */ 1959 return; 1960 } 1961 } 1962 KASSERT(pv == NULL); 1963 VM_MDPAGE_CLR_CI(pg); 1964 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 1965 ptep = pmap_pv_pte(pv); 1966 for (;;) { 1967 opte = pte_load(ptep); 1968 npte = pte_clr_ci(opte); 1969 if (pte_update(ptep, opte, npte)) { 1970 if (active_pmap(pv->pv_pmap)) { 1971 TBIS(PV_VA(pv)); 1972 } 1973 break; 1974 } 1975 } 1976 } 1977 } 1978 return; 1979 1980 #if MMU_CONFIG_HP_CLASS 1981 hp_mmu_vac_shenanigans: 1982 /* 1983 * If we have a VAC and the page was cache-inhibited due to 1984 * a cache alias conflict, we can re-enable the cache if there 1985 * is just one such mapping left. 1986 */ 1987 if (__predict_false(VM_MDPAGE_CI_P(pg))) { 1988 vaddr_t all_ci_flags = PV_F_CI_USR; 1989 1990 pmap_evcnt(pv_remove_ci); 1991 1992 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 1993 if (! MATCHING_PMAP(pmap, pv->pv_pmap)) { 1994 continue; 1995 } 1996 if (pv->pv_vf & all_ci_flags) { 1997 /* 1998 * There is at least one CI_USR mapping 1999 * or more than one CI_VAC mapping, so 2000 * the CI status of the page remains 2001 * unchanged. 2002 */ 2003 return; 2004 } 2005 all_ci_flags |= pv->pv_vf & PV_F_CI_VAC; 2006 } 2007 KASSERT(pv == NULL); 2008 /* 2009 * We now know we can remove CI from the page mappings 2010 * in the matching address space. If no CI mappings 2011 * remain, then we can clear the CI indicator on the 2012 * page. 2013 */ 2014 all_ci_flags = 0; 2015 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 2016 if (! MATCHING_PMAP(pmap, pv->pv_pmap)) { 2017 all_ci_flags |= pv->pv_vf; 2018 continue; 2019 } 2020 pte_mask(pmap_pv_pte(pv), ~((uint32_t)PTE51_CI)); 2021 if (active_pmap(pv->pv_pmap)) { 2022 TBIS(PV_VA(pv)); 2023 } 2024 } 2025 all_ci_flags &= PV_F_CI_USR | PV_F_CI_VAC; 2026 if (__predict_true(all_ci_flags == 0)) { 2027 VM_MDPAGE_CLR_CI(pg); 2028 } 2029 } 2030 #endif /* MMU_CONFIG_HP_CLASS */ 2031 } 2032 2033 #undef CONFLICTING_ALIAS 2034 #undef MATCHING_PMAP 2035 2036 /***************** PMAP INTERFACE (AND ADJACENT) FUNCTIONS *******************/ 2037 2038 static inline void 2039 pmap_stat_update_impl(long *valp, int val) 2040 { 2041 *valp += val; 2042 } 2043 2044 #define pmap_stat_update(pm, stat, delta) \ 2045 pmap_stat_update_impl(&(pm)->pm_stats.stat, (delta)) 2046 2047 static inline void 2048 pmap_stat_set_impl(long *valp, int val) 2049 { 2050 atomic_store_relaxed(valp, val); 2051 } 2052 2053 #define pmap_stat_set(pm, stat, val) \ 2054 pmap_stat_set_impl(&(pm)->pm_stats.stat, (val)) 2055 2056 /* 2057 * pmap_pinit: 2058 * 2059 * Common bits of pmap structure initialization shared between 2060 * the kernel pmap and user pmaps. 2061 */ 2062 static void 2063 pmap_pinit(pmap_t pmap, paddr_t lev1pa) 2064 { 2065 pmap->pm_lev1pa = lev1pa; 2066 rb_tree_init(&pmap->pm_tables, &pmap_table_rb_ops); 2067 TAILQ_INIT(&pmap->pm_ptpages[0]); 2068 TAILQ_INIT(&pmap->pm_ptpages[1]); 2069 LIST_INIT(&pmap->pm_pvlist); 2070 2071 atomic_store_relaxed(&pmap->pm_refcnt, 1); 2072 } 2073 2074 /* 2075 * pmap_virtual_space: [ INTERFACE ] 2076 * 2077 * Define the initial bounds of the kernel virtual address space. 2078 * 2079 * In this implementation, the start address we return marks the 2080 * end of the statically allocated special kernel virtual addresses 2081 * set up in pmap_bootstrap1(). We return kernel_virtual_max as 2082 * the end because we can grow the kernel address space using 2083 * pmap_growkernel(). 2084 */ 2085 void 2086 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 2087 { 2088 *vstartp = kernel_virtual_start; 2089 *vendp = kernel_virtual_max; 2090 } 2091 2092 /* 2093 * pmap_init: [ INTERFACE ] 2094 * 2095 * Initialize the pmap module. Called by vm_init(), to initialize any 2096 * structures that the pmap system needs to map virtual memory. 2097 */ 2098 void 2099 pmap_init(void) 2100 { 2101 /* Initialize the pmap / pv_entry allocators. */ 2102 pmap_alloc_init(); 2103 2104 /* Initialize the PT page allocator. */ 2105 pmap_ptpage_init(); 2106 2107 /* Now it's safe to do P->V entry recording! */ 2108 pmap_initialized_p = true; 2109 } 2110 2111 /* 2112 * pmap_create: [ INTERFACE ] 2113 * 2114 * Create and return a physical map. 2115 */ 2116 pmap_t 2117 pmap_create(void) 2118 { 2119 pmap_t pmap; 2120 2121 /* 2122 * We reference the null segment table and and have a NULL 2123 * lev1map pointer until the first mapping is entered. 2124 */ 2125 pmap = pmap_alloc(); 2126 pmap_pinit(pmap, null_segtab_pa); 2127 2128 return pmap; 2129 } 2130 2131 /* 2132 * pmap_destroy: [ INTERFACE ] 2133 * 2134 * Drop the reference count on the specified pmap, releasing 2135 * all resources if the reference count drops to zero. 2136 */ 2137 void 2138 pmap_destroy(pmap_t pmap) 2139 { 2140 unsigned int newval; 2141 2142 PMAP_CRIT_ENTER(); 2143 KASSERT(pmap->pm_refcnt > 0); 2144 newval = --pmap->pm_refcnt; 2145 PMAP_CRIT_EXIT(); 2146 2147 if (newval) { 2148 return; 2149 } 2150 2151 /* We assume all mappings have been removed. */ 2152 KASSERT(pmap->pm_lev1map == NULL); 2153 KASSERT(pmap->pm_lev1pa == null_segtab_pa); 2154 2155 pmap_free(pmap); 2156 } 2157 2158 /* 2159 * pmap_reference: [ INTERFACE ] 2160 * 2161 * Add a reference to the specified pmap. 2162 */ 2163 void 2164 pmap_reference(pmap_t pmap) 2165 { 2166 PMAP_CRIT_ENTER(); 2167 pmap->pm_refcnt++; 2168 KASSERT(pmap->pm_refcnt > 0); 2169 PMAP_CRIT_EXIT(); 2170 } 2171 2172 /* 2173 * pmap_remove_mapping: 2174 * 2175 * Invalidate a single page denoted by pmap/va. 2176 * 2177 * If (ptep != NULL), it is the already computed PTE for the mapping. 2178 * 2179 * If (flags & PRM_TFLUSH), we must invalidate any TLB information. 2180 * 2181 * If (flags & PRM_CFLUSH), we must flush/invalidate any cache 2182 * information. 2183 * 2184 * If the caller wishes to prevent the page table from being freed, 2185 * they should perform an extra retain. 2186 */ 2187 #define PRM_TFLUSH __BIT(0) 2188 #define PRM_CFLUSH __BIT(1) 2189 static void 2190 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *ptep, 2191 struct pmap_table *pt, int flags, struct pmap_completion *pc) 2192 { 2193 KASSERT(ptep != NULL); 2194 2195 const paddr_t opte = pte_load(ptep); 2196 if (! pte_valid_p(opte)) { 2197 return; 2198 } 2199 2200 const paddr_t pa = pte_pa(opte); 2201 2202 /* Update statistics. */ 2203 if (pte_wired_p(opte)) { 2204 pmap_stat_update(pmap, wired_count, -1); 2205 } 2206 pmap_stat_update(pmap, resident_count, -1); 2207 2208 if (flags & PRM_CFLUSH) { 2209 #if MMU_CONFIG_68040_CLASS 2210 if (MMU_IS_68040_CLASS) { 2211 DCFP(pa); 2212 ICPP(pa); 2213 } 2214 #endif 2215 #if MMU_CONFIG_HP_CLASS 2216 if (MMU_IS_HP_CLASS) { 2217 if (pmap == pmap_kernel()) { 2218 DCIS(); 2219 } else if (active_user_pmap(pmap)) { 2220 DCIU(); 2221 } 2222 } 2223 #endif 2224 } 2225 2226 /* 2227 * Zap the PTE and drop the retain count that the mapping 2228 * had on the table. 2229 */ 2230 pte_store(ptep, 0); 2231 pmap_table_release(pmap, pt, pc); 2232 2233 /* 2234 * Now that the ATC can't be reloaded from the PTE, invalidate 2235 * the ATC entry. 2236 */ 2237 if (__predict_true((flags & PRM_TFLUSH) != 0 && active_pmap(pmap))) { 2238 TBIS(va); 2239 } 2240 2241 struct vm_page * const pg = pmap_pa_to_pg(pa); 2242 if (__predict_true(pg != NULL)) { 2243 KASSERT(pte_managed_p(opte)); 2244 /* Update cached U/M bits from mapping that's going away. */ 2245 VM_MDPAGE_ADD_UM(pg, opte); 2246 pmap_pv_remove(pmap, pg, va, pc); 2247 } else { 2248 KASSERT(! pte_managed_p(opte)); 2249 } 2250 } 2251 2252 /* 2253 * pmap_remove: [ INTERFACE ] 2254 * 2255 * Remove the given range of addresses from the specified map. 2256 * 2257 * It is assumed that the start and end are properly rounded 2258 * to the page size. 2259 * 2260 * N.B. Callers of pmap_remove_internal() are expected to 2261 * provide an initialized completion context, which we 2262 * will finalize. 2263 */ 2264 static void 2265 pmap_remove_internal(pmap_t pmap, vaddr_t sva, vaddr_t eva, 2266 struct pmap_completion *pc) 2267 { 2268 pt_entry_t opte, *ptep; 2269 struct pmap_table *pt; 2270 vaddr_t nextseg; 2271 int prm_flags; 2272 #if MMU_CONFIG_HP_CLASS 2273 pt_entry_t all_ci = PTE51_CI; 2274 #endif 2275 2276 /* 2277 * If this is the kernel pmap, we can use a faster method 2278 * for accessing the PTEs (since the PT pages are always 2279 * resident). 2280 * 2281 * Note that this routine should NEVER be called from an 2282 * interrupt context; pmap_kremove() is used for that. 2283 */ 2284 prm_flags = active_pmap(pmap) ? PRM_TFLUSH : 0; 2285 if (pmap == pmap_kernel()) { 2286 PMAP_CRIT_ENTER(); 2287 2288 for (ptep = pmap_kernel_pte(sva); sva < eva; 2289 ptep++, sva += PAGE_SIZE) { 2290 opte = pte_load(ptep); 2291 if (pte_valid_p(opte)) { 2292 #if MMU_CONFIG_HP_CLASS 2293 /* 2294 * If all of the PTEs we're zapping have the 2295 * cache-inhibit bit set, ci_pte will remain 2296 * non-zero and we'll be able to skip flushing 2297 * the VAC when we're done. 2298 */ 2299 all_ci &= opte; 2300 #endif 2301 pmap_remove_mapping(pmap, sva, ptep, NULL, 2302 prm_flags, pc); 2303 } 2304 } 2305 #if MMU_CONFIG_HP_CLASS 2306 if (MMU_IS_HP_CLASS && !all_ci) { 2307 /* 2308 * Cacheable mappings were removed, so invalidate 2309 * the cache. 2310 */ 2311 DCIS(); 2312 } 2313 #endif 2314 PMAP_CRIT_EXIT(); 2315 2316 /* kernel PT pages are never freed. */ 2317 KASSERT(TAILQ_EMPTY(&pc->pc_ptpages)); 2318 2319 /* ...but we might have freed PV entries. */ 2320 pmap_completion_fini(pc); 2321 2322 return; 2323 } 2324 2325 PMAP_CRIT_ENTER(); 2326 2327 while (sva < eva) { 2328 nextseg = pmap_next_seg(sva); 2329 if (nextseg == 0 || nextseg > eva) { 2330 nextseg = eva; 2331 } 2332 2333 ptep = pmap_pte_lookup(pmap, sva, &pt); 2334 if (ptep == NULL) { 2335 /* 2336 * No table for this address, meaning nothing 2337 * within this segment; advance to the next 2338 * one. 2339 */ 2340 sva = nextseg; 2341 continue; 2342 } 2343 2344 for (; sva < nextseg; ptep++, sva += PAGE_SIZE) { 2345 opte = pte_load(ptep); 2346 if (! pte_valid_p(opte)) { 2347 continue; 2348 } 2349 #if MMU_CONFIG_HP_CLASS 2350 /* 2351 * If all of the PTEs we're zapping have the 2352 * cache-inhibit bit set, ci_pte will remain 2353 * non-zero and we'll be able to skip flushing 2354 * the VAC when we're done. 2355 */ 2356 all_ci &= opte; 2357 #endif 2358 pmap_remove_mapping(pmap, sva, ptep, pt, prm_flags, pc); 2359 } 2360 pmap_table_release(pmap, pt, pc); 2361 } 2362 #if MMU_CONFIG_HP_CLASS 2363 if (MMU_IS_HP_CLASS && !all_ci) { 2364 /* 2365 * Cacheable mappings were removed, so invalidate 2366 * the cache. 2367 */ 2368 if (pmap == pmap_kernel()) { 2369 DCIS(); 2370 } else if (active_user_pmap(pmap)) { 2371 DCIU(); 2372 } 2373 } 2374 #endif 2375 PMAP_CRIT_EXIT(); 2376 2377 pmap_completion_fini(pc); 2378 } 2379 2380 void 2381 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 2382 { 2383 struct pmap_completion pc; 2384 pmap_completion_init(&pc); 2385 pmap_remove_internal(pmap, sva, eva, &pc); 2386 /* pmap_remove_internal() calls pmap_completion_fini(). */ 2387 } 2388 2389 /* 2390 * pmap_remove_all: [ INTERFACE ] 2391 * 2392 * Remove all mappings from a pmap in bulk. This is only called 2393 * when it's known that the address space is no longer visible to 2394 * any user process (e.g. during exit or exec). 2395 */ 2396 bool 2397 pmap_remove_all(pmap_t pmap) 2398 { 2399 struct pmap_completion pc; 2400 struct pv_entry *pv; 2401 2402 KASSERT(pmap != pmap_kernel()); 2403 2404 /* 2405 * This process is pretty simple: 2406 * 2407 * ==> (1) Set the segment table pointer to the NULL segment table. 2408 * 2409 * ==> (2) Copy the PT page list to a tempory list and re-init. 2410 * 2411 * ==> (3) Walk the PV entry list and remove each entry. 2412 * 2413 * ==> (4) Zero the wired and resident count. 2414 * 2415 * Once we've done that, we just need to free everything 2416 * back to the system. 2417 */ 2418 2419 pmap_completion_init(&pc); 2420 2421 PMAP_CRIT_ENTER(); 2422 2423 /* Step 1. */ 2424 pmap_set_lev1map(pmap, NULL, null_segtab_pa); 2425 2426 /* Step 2. */ 2427 pmap->pm_pt_cache = NULL; 2428 TAILQ_CONCAT(&pc.pc_ptpages, &pmap->pm_ptpages[0], ptp_list); 2429 TAILQ_CONCAT(&pc.pc_ptpages, &pmap->pm_ptpages[1], ptp_list); 2430 memset(&pmap->pm_tables, 0, sizeof(pmap->pm_tables)); 2431 rb_tree_init(&pmap->pm_tables, &pmap_table_rb_ops); 2432 KASSERT(RB_TREE_MIN(&pmap->pm_tables) == NULL); 2433 2434 /* Step 3. */ 2435 while ((pv = LIST_FIRST(&pmap->pm_pvlist)) != NULL) { 2436 KASSERT(pv->pv_pmap == pmap); 2437 pmap_pv_remove(pmap, 2438 pmap_pa_to_pg(pte_pa(pte_load(pmap_pv_pte(pv)))), 2439 PV_VA(pv), &pc); 2440 } 2441 2442 /* Step 4. */ 2443 pmap_stat_set(pmap, wired_count, 0); 2444 pmap_stat_set(pmap, resident_count, 0); 2445 2446 PMAP_CRIT_EXIT(); 2447 2448 pmap_completion_fini(&pc); 2449 2450 return true; 2451 } 2452 2453 /* 2454 * pmap_page_protect: [ INTERFACE ] 2455 * 2456 * Lower the permission for all mappings to a given page to 2457 * the permissions specified. 2458 */ 2459 void 2460 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 2461 { 2462 struct pmap_completion pc; 2463 struct pv_entry *pv; 2464 2465 if (prot & UVM_PROT_WRITE) { 2466 /* No protection to revoke. */ 2467 return; 2468 } 2469 2470 if (prot & UVM_PROT_READ) { 2471 /* Making page copy-on-write. */ 2472 pmap_changebit(pg, PTE_WP, ~0U); 2473 return; 2474 } 2475 2476 /* Removing all mappings for a page. */ 2477 pmap_completion_init(&pc); 2478 2479 PMAP_CRIT_ENTER(); 2480 2481 while ((pv = VM_MDPAGE_PVS(pg)) != NULL) { 2482 pmap_remove_mapping(pv->pv_pmap, PV_VA(pv), pmap_pv_pte(pv), 2483 pv->pv_pt, PRM_TFLUSH|PRM_CFLUSH, &pc); 2484 } 2485 2486 PMAP_CRIT_EXIT(); 2487 2488 pmap_completion_fini(&pc); 2489 } 2490 2491 /* 2492 * pmap_protect: [ INTERFACE ] 2493 * 2494 * Set the physical protection on the specified range of this map 2495 * as requested. 2496 */ 2497 void 2498 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 2499 { 2500 pt_entry_t *ptep, opte, npte; 2501 struct pmap_table *pt; 2502 vaddr_t nextseg; 2503 #if MMU_CONFIG_68040_CLASS 2504 bool removing_write; 2505 #endif 2506 bool need_tflush; 2507 2508 if ((prot & UVM_PROT_READ) == 0) { 2509 struct pmap_completion pc; 2510 pmap_completion_init(&pc); 2511 pmap_remove_internal(pmap, sva, eva, &pc); 2512 /* pmap_remove_internal() calls pmap_completion_fini(). */ 2513 return; 2514 } 2515 2516 PMAP_CRIT_ENTER(); 2517 2518 #if MMU_CONFIG_68040_CLASS 2519 removing_write = (prot & UVM_PROT_WRITE) == 0; 2520 #endif 2521 need_tflush = active_pmap(pmap); 2522 while (sva < eva) { 2523 nextseg = pmap_next_seg(sva); 2524 if (nextseg == 0 || nextseg > eva) { 2525 nextseg = eva; 2526 } 2527 2528 ptep = pmap_pte_lookup(pmap, sva, &pt); 2529 if (ptep == NULL) { 2530 /* 2531 * No table for this address, meaning nothing 2532 * within this segment; advance to the next 2533 * one. 2534 */ 2535 sva = nextseg; 2536 continue; 2537 } 2538 2539 /* 2540 * Change protection on mapping if it is valid and doesn't 2541 * already have the correct protection. 2542 */ 2543 for (; sva < nextseg; ptep++, sva += PAGE_SIZE) { 2544 try_again: 2545 opte = pte_load(ptep); 2546 if (! pte_valid_p(opte)) { 2547 continue; 2548 } 2549 npte = pte_change_prot(opte, prot); 2550 if (npte == opte) { 2551 continue; 2552 } 2553 #if MMU_CONFIG_68040_CLASS 2554 if (MMU_IS_68040_CLASS && removing_write) { 2555 /* 2556 * Clear caches if making RO (see section 2557 * "7.3 Cache Coherency" in the manual). 2558 */ 2559 paddr_t pa = pte_pa(opte); 2560 DCFP(pa); 2561 ICPP(pa); 2562 } 2563 #endif 2564 if (! pte_update(ptep, opte, npte)) { 2565 /* Lost race updating PTE; try again. */ 2566 goto try_again; 2567 } 2568 if (need_tflush) { 2569 TBIS(sva); 2570 } 2571 } 2572 pmap_table_release(pmap, pt, NULL); 2573 } 2574 2575 PMAP_CRIT_EXIT(); 2576 } 2577 2578 /* 2579 * pmap_enter: [ INTERFACE ] 2580 * 2581 * Insert the given physical address (pa) at the specified 2582 * virtual address (va) in the target physical map with the 2583 * protection requested. 2584 * 2585 * If specified, the page will be wired down, meaning that 2586 * related pte can not be reclaimed. 2587 * 2588 * Note: This is the only routine which MAY NOT lazy-evaluate 2589 * or lose information. That is, this routine must actually 2590 * insert this page into the given map NOW. 2591 */ 2592 int 2593 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2594 { 2595 struct pmap_table *pt; 2596 pt_entry_t *ptep, npte, opte; 2597 struct pv_entry *newpv; 2598 struct pmap_completion pc; 2599 int error = 0; 2600 const bool nowait = !!(flags & PMAP_CANFAIL); 2601 2602 pmap_completion_init(&pc); 2603 2604 struct vm_page * const pg = pmap_pa_to_pg(pa); 2605 if (__predict_false(pg == NULL)) { 2606 /* 2607 * PA is not part of managed memory. Make the mapping 2608 * cache-inhibited on the assumption that it's a device. 2609 */ 2610 flags |= PMAP_NOCACHE; 2611 } 2612 2613 PMAP_CRIT_ENTER(); 2614 2615 if (nowait) { 2616 pmap_evcnt(enter_nowait); 2617 } else { 2618 pmap_evcnt(enter_yeswait); 2619 } 2620 2621 /* Get the destination table. */ 2622 ptep = pmap_pte_alloc(pmap, va, &pt, nowait, &pc); 2623 if (__predict_false(ptep == NULL)) { 2624 pmap_evcnt(enter_pte_alloc_fail); 2625 error = ENOMEM; 2626 goto out; 2627 } 2628 2629 /* Compute the new PTE. */ 2630 npte = pmap_make_pte(pa, prot, flags); 2631 2632 /* Fetch old PTE. */ 2633 opte = pte_load(ptep); 2634 2635 /* 2636 * Check to see if there is a valid mapping at this address. 2637 * It might simply be a wiring or protection change. 2638 */ 2639 if (pte_valid_p(opte)) { 2640 pmap_evcnt(enter_valid); 2641 restart: 2642 if (pte_pa(opte) == pa) { 2643 /* 2644 * Just a protection or wiring change. 2645 * 2646 * Since the old PTE is handy, go ahead and update 2647 * the cached U/M attributes now. Normally we would 2648 * do this in pmap_remove_mapping(), but we're not 2649 * taking that path in this case. We also add in 2650 * any U/M attributes hinted by the access type 2651 * that brought us to pmap_enter() in the first 2652 * place (a write-fault on a writable page mapped 2653 * read-only during a page-out, for example). 2654 * 2655 * Also ensure that the PV list status of the mapping 2656 * is consistent. 2657 */ 2658 if (__predict_true(pg != NULL)) { 2659 VM_MDPAGE_ADD_UM(pg, opte | npte); 2660 KASSERT(pte_managed_p(opte)); 2661 npte |= PTE_PVLIST; 2662 } 2663 2664 /* Preserve cache-inhibited status. */ 2665 if (__predict_false(pte_ci_p(opte))) { 2666 npte = 2667 (npte & ~PTE_CMASK) | (opte & PTE_CMASK); 2668 } 2669 2670 /* Set the new PTE. */ 2671 pte_store(ptep, npte); 2672 2673 const pt_entry_t diff = opte ^ npte; 2674 2675 #ifdef PMAP_EVENT_COUNTERS 2676 if (diff & PTE_WIRED) { 2677 pmap_evcnt(enter_wire_change); 2678 } 2679 if (diff & PTE_WP) { 2680 pmap_evcnt(enter_prot_change); 2681 } 2682 #endif 2683 2684 if (pte_wired_p(diff)) { 2685 pmap_stat_update(pmap, wired_count, 2686 pte_wired_p(npte) ? 1 : -1); 2687 } 2688 if (diff & PTE_CRIT_BITS) { 2689 #if MMU_CONFIG_68040_CLASS 2690 /* 2691 * Protection or caching status is changing; 2692 * flush the page from the cache. 2693 */ 2694 if (MMU_IS_68040_CLASS) { 2695 DCFP(pa); 2696 ICPP(pa); 2697 } 2698 #endif 2699 if (active_pmap(pmap)) { 2700 TBIS(va); 2701 #if MMU_CONFIG_HP_CLASS 2702 /* 2703 * If the new mapping is CI and the old 2704 * one is not, then flush the VAC. 2705 */ 2706 if (__predict_false(MMU_IS_HP_CLASS && 2707 pte_ci_p(diff) && 2708 pte_ci_p(npte))) { 2709 DCIA(); 2710 } 2711 #endif 2712 } 2713 } 2714 2715 /* All done! */ 2716 goto out_release; 2717 } 2718 2719 /* 2720 * The mapping has completely changed. Need to remove 2721 * the old one first. 2722 * 2723 * This drops the retain count on the PT owned by the 2724 * previous mapping, but the newly-entered mapping will 2725 * inherit the retain count taken when we looked up the 2726 * PTE. 2727 * 2728 * XXX Can we elide the ATC flush here? We're going to 2729 * XXX hit the ATC after setting the new PTE anyway. 2730 */ 2731 pmap_evcnt(enter_pa_change); 2732 pmap_remove_mapping(pmap, va, ptep, pt, 2733 PRM_TFLUSH|PRM_CFLUSH, &pc); 2734 } 2735 2736 /* 2737 * By the time we get here, we should be assured that the 2738 * PTE at ptep is invalid. 2739 */ 2740 KASSERT(! pte_valid_p(pte_load(ptep))); 2741 2742 /* Update pmap stats now. */ 2743 pmap_stat_update(pmap, resident_count, 1); 2744 if (__predict_false(pte_wired_p(npte))) { 2745 pmap_stat_update(pmap, wired_count, 1); 2746 } 2747 2748 if (__predict_true(pg != NULL)) { 2749 /* 2750 * Managed pages also go on the PV list, so we are 2751 * going to need a PV entry. 2752 */ 2753 newpv = LIST_FIRST(&pc.pc_pvlist); 2754 if (__predict_true(newpv == NULL)) { 2755 /* 2756 * No PV entry to recycle; allocate a new one. 2757 * Because this is an extremely common case, we 2758 * are first going to attempt allocation while 2759 * still in the critical section. If that fails 2760 * and waiting is allowed, we'll leave the critical 2761 * section and try a blocking allocation. 2762 */ 2763 newpv = pmap_pv_alloc(true/*nowait flag*/); 2764 if (__predict_false(newpv == NULL)) { 2765 if (nowait) { 2766 pmap_evcnt(enter_pv_alloc_fail); 2767 error = ENOMEM; 2768 goto out_release; 2769 } 2770 PMAP_CRIT_EXIT(); 2771 newpv = pmap_pv_alloc(false/*nowait flag*/); 2772 KASSERT(newpv != NULL); 2773 PMAP_CRIT_ENTER(); 2774 /* 2775 * Because we may have blocked while allocating 2776 * the PV entry, we have to re-validate our 2777 * environment, as another thread could have 2778 * inserted a mapping here behind our back. 2779 */ 2780 opte = pte_load(ptep); 2781 if (__predict_false(pte_valid_p(opte))) { 2782 pmap_stat_update(pmap, 2783 resident_count, -1); 2784 if (pte_wired_p(npte)) { 2785 pmap_stat_update(pmap, 2786 wired_count, -1); 2787 } 2788 LIST_INSERT_HEAD(&pc.pc_pvlist, 2789 newpv, pv_pmlist); 2790 goto restart; 2791 } 2792 } 2793 } else { 2794 pmap_evcnt(enter_pv_recycle); 2795 LIST_REMOVE(newpv, pv_pmlist); 2796 } 2797 2798 /* 2799 * Enter the mapping into the PV list. pmap_pv_enter() 2800 * will also set the PTE in the table. 2801 */ 2802 pmap_pv_enter(pmap, pg, va, prot, pt, npte, newpv); 2803 2804 /* 2805 * The new mapping takes ownership of the PT 2806 * retain count we took while looking up the PTE. 2807 */ 2808 goto out_crit_exit; 2809 } 2810 2811 /* 2812 * Not a managed mapping, so set the new PTE. As with managed 2813 * mappings, the new mapping takes ownership of the PT retain 2814 * count we took while looking up the PTE. 2815 */ 2816 pte_store(ptep, npte); 2817 2818 /* 2819 * See comments in pmap_pv_enter() for why we have to hit 2820 * the ATC here. 2821 */ 2822 if (active_pmap(pmap)) { 2823 TBIS(va); 2824 } 2825 goto out_crit_exit; 2826 2827 out_release: 2828 pmap_table_release(pmap, pt, &pc); 2829 out_crit_exit: 2830 PMAP_CRIT_EXIT(); 2831 out: 2832 pmap_completion_fini(&pc); 2833 return error; 2834 } 2835 2836 /* 2837 * pmap_kenter_pa: [ INTERFACE ] 2838 * 2839 * Enter a va -> pa mapping into the kernel pmap without any 2840 * physical->virtual tracking. 2841 */ 2842 void 2843 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2844 { 2845 pmap_t const pmap = pmap_kernel(); 2846 2847 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2848 2849 pt_entry_t * const ptep = pmap_kernel_pte(va); 2850 2851 /* Build the new PTE. */ 2852 const pt_entry_t npte = pmap_make_pte(pa, prot, flags | PMAP_WIRED); 2853 2854 /* 2855 * If this is an EXEC mapping, then we have to ensure that 2856 * the I$ doesn't load stale data. 2857 */ 2858 if (__predict_false(prot & UVM_PROT_EXEC)) { 2859 #if MMU_CONFIG_68040_CLASS 2860 if (MMU_IS_68040_CLASS) { 2861 /* 2862 * XXX Potential future optimization: is only 2863 * XXX the DCFP() needed here to deal with 2864 * XXX write-back? 2865 */ 2866 DCFP(pa); 2867 ICPP(pa); 2868 } 2869 #endif 2870 } 2871 2872 /* Set the new PTE. */ 2873 const pt_entry_t opte = pte_load(ptep); 2874 pte_store(ptep, npte); 2875 2876 /* 2877 * See comments in pmap_pv_enter() as for why we hit the ATC here. 2878 * This *should* be unnecessary because this is a wired kernel 2879 * mapping and no demand-page-ins should have happened at this 2880 * VA, but we're erring on the side of caution for now. 2881 */ 2882 TBIS(va); 2883 2884 /* 2885 * There should not have been anything here, previously, 2886 * so we can skip ATC invalidation in the common case. 2887 */ 2888 if (__predict_false(pte_valid_p(opte))) { 2889 if (__predict_false(pte_managed_p(opte))) { 2890 /* 2891 * Can't handle this case and it's a legitimate 2892 * error if it happens. 2893 */ 2894 panic("%s: old mapping was managed", __func__); 2895 } 2896 if (__predict_false(! pte_wired_p(opte))) { 2897 pmap_stat_update(pmap, wired_count, 1); 2898 } 2899 } else { 2900 pmap_stat_update(pmap, resident_count, 1); 2901 pmap_stat_update(pmap, wired_count, 1); 2902 } 2903 } 2904 2905 /* 2906 * pmap_kremove: [ INTERFACE ] 2907 * 2908 * Remove a mapping entered with pmap_kenter_pa() starting at va, 2909 * for size bytes (assumed to be page rounded). 2910 */ 2911 void 2912 pmap_kremove(vaddr_t va, vsize_t size) 2913 { 2914 pt_entry_t *ptep, opte; 2915 pmap_t const pmap = pmap_kernel(); 2916 int count = 0; 2917 #if MMU_CONFIG_HP_CLASS 2918 pt_entry_t all_ci = PTE51_CI; 2919 #endif 2920 2921 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2922 2923 for (ptep = pmap_kernel_pte(va); size != 0; 2924 ptep++, size -= PAGE_SIZE, va += PAGE_SIZE) { 2925 opte = pte_load(ptep); 2926 if (pte_valid_p(opte)) { 2927 KASSERT(! pte_managed_p(opte)); 2928 KASSERT(pte_wired_p(opte)); 2929 #if MMU_CONFIG_HP_CLASS 2930 /* 2931 * If all of the PTEs we're zapping have the 2932 * cache-inhibit bit set, ci_pte will remain 2933 * non-zero and we'll be able to skip flushing 2934 * the VAC when we're done. 2935 */ 2936 all_ci &= opte; 2937 #endif 2938 /* Zap the mapping. */ 2939 pte_store(ptep, 0); 2940 TBIS(va); 2941 count++; 2942 } 2943 } 2944 #if MMU_CONFIG_HP_CLASS 2945 if (MMU_IS_HP_CLASS && !all_ci) { 2946 /* 2947 * Cacheable mappings were removed, so invalidate 2948 * the cache. 2949 */ 2950 DCIS(); 2951 } 2952 #endif 2953 /* Update stats. */ 2954 if (__predict_true(count != 0)) { 2955 pmap_stat_update(pmap, resident_count, -count); 2956 pmap_stat_update(pmap, wired_count, -count); 2957 } 2958 } 2959 2960 /* 2961 * pmap_unwire: [ INTERFACE ] 2962 * 2963 * Clear the wired attribute for a map/virtual-address pair. 2964 * 2965 * The mapping must already exist in the pmap. 2966 */ 2967 void 2968 pmap_unwire(pmap_t pmap, vaddr_t va) 2969 { 2970 struct pmap_table *pt; 2971 pt_entry_t opte, npte, *ptep; 2972 2973 PMAP_CRIT_ENTER(); 2974 2975 ptep = pmap_pte_lookup(pmap, va, &pt); 2976 KASSERT(ptep != NULL); 2977 2978 for (;;) { 2979 opte = pte_load(ptep); 2980 KASSERT(pte_valid_p(opte)); 2981 2982 /* 2983 * If the wiring actually changed (always?), clear the wire 2984 * bit and update the wire count. Note that the wiring is 2985 * not a hardware characteristic so there is no need to 2986 * invalidate the ATC. 2987 */ 2988 if (! pte_wired_p(opte)) { 2989 break; 2990 } 2991 npte = opte & ~PTE_WIRED; 2992 if (pte_update(ptep, opte, npte)) { 2993 pmap_stat_update(pmap, wired_count, -1); 2994 break; 2995 } 2996 } 2997 2998 pmap_table_release(pmap, pt, NULL); 2999 3000 PMAP_CRIT_EXIT(); 3001 } 3002 3003 /* 3004 * pmap_extract: [ INTERFACE ] 3005 * 3006 * Extract the physical address associated with the given 3007 * pmap/virtual address pair. 3008 * 3009 * pmap_extract_info: 3010 * 3011 * Like pmap_extract(), but also returns information 3012 * about the mapping (wired, cache-inhibited, etc.) 3013 */ 3014 bool 3015 pmap_extract_info(pmap_t pmap, vaddr_t va, paddr_t *pap, int *flagsp) 3016 { 3017 struct pmap_table *pt; 3018 pt_entry_t pte, *ptep; 3019 bool rv = false; 3020 3021 if (__predict_false(pmap == pmap_kernel() && 3022 va >= kernel_virtual_end)) { 3023 return false; 3024 } 3025 3026 PMAP_CRIT_ENTER(); 3027 3028 ptep = pmap_pte_lookup(pmap, va, &pt); 3029 if (__predict_true(ptep != NULL)) { 3030 pte = pte_load(ptep); 3031 if (__predict_true(pte_valid_p(pte))) { 3032 if (__predict_true(pap != NULL)) { 3033 *pap = pte_pa(pte) | (va & PGOFSET); 3034 } 3035 if (__predict_false(flagsp != NULL)) { 3036 *flagsp = 3037 (pte_wired_p(pte) ? PMAP_WIRED : 0) | 3038 (pte_ci_p(pte) ? PMAP_NOCACHE : 0); 3039 } 3040 rv = true; 3041 } 3042 pmap_table_release(pmap, pt, NULL); 3043 } 3044 3045 PMAP_CRIT_EXIT(); 3046 3047 return rv; 3048 } 3049 3050 bool 3051 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 3052 { 3053 return pmap_extract_info(pmap, va, pap, NULL); 3054 } 3055 3056 /* 3057 * vtophys: 3058 * 3059 * Dumber version of pmap_extract(pmap_kernel(), ...) 3060 */ 3061 paddr_t 3062 vtophys(vaddr_t va) 3063 { 3064 paddr_t pa; 3065 bool rv __diagused; 3066 3067 rv = pmap_extract_info(pmap_kernel(), va, &pa, NULL); 3068 KASSERT(rv); 3069 return rv ? pa : -1; 3070 } 3071 3072 /* 3073 * kvtop: 3074 * 3075 * Sigh. 3076 */ 3077 int 3078 kvtop(void *v) 3079 { 3080 return (int)vtophys((vaddr_t)v); 3081 } 3082 3083 /* 3084 * pmap_copy: [ INTERFACE ] 3085 * 3086 * Copy the mapping range specified by src_addr/len 3087 * from the source map to the range dst_addr/len 3088 * in the destination map. 3089 * 3090 * This routine is only advisory and need not do anything. 3091 */ 3092 /* call deleted in <machine/pmap.h> */ 3093 3094 /* 3095 * pmap_update: [ INTERFACE ] 3096 * 3097 * Require that all active physical maps contain no 3098 * incorrect entries NOW, by processing any deferred 3099 * pmap operations. 3100 */ 3101 /* call deleted in <machine/pmap.h> */ 3102 3103 /* 3104 * pmap_activate: [ INTERFACE ] 3105 * 3106 * Activate the pmap used by the specified process. This includes 3107 * reloading the MMU context of the current process, and marking 3108 * the pmap in use by the processor. 3109 */ 3110 void 3111 pmap_activate(struct lwp *l) 3112 { 3113 pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap; 3114 3115 KASSERT(l == curlwp); 3116 3117 /* 3118 * Because the kernel has a separate root pointer, we don't 3119 * need to activate the kernel pmap. 3120 */ 3121 if (pmap != pmap_kernel()) { 3122 PMAP_CRIT_ENTER(); 3123 pmap_load_urp(pmap->pm_lev1pa); 3124 PMAP_CRIT_EXIT(); 3125 } 3126 } 3127 3128 /* 3129 * pmap_deactivate: [ INTERFACE ] 3130 * 3131 * Mark that the pmap used by the specified process is no longer 3132 * in use by the processor. 3133 */ 3134 void 3135 pmap_deactivate(struct lwp *l) 3136 { 3137 /* No action necessary in this pmap implementation. */ 3138 } 3139 3140 static vaddr_t pmap_tmpmap_srcva; 3141 static vaddr_t pmap_tmpmap_dstva; 3142 3143 /* 3144 * pmap_zero_page: [ INTERFACE ] 3145 * 3146 * Zero the specified VM page by mapping the page into the kernel 3147 * and using memset() (or equivalent) to clear its contents. 3148 */ 3149 void 3150 pmap_zero_page(paddr_t pa) 3151 { 3152 const int flags = MMU_IS_HP_CLASS ? PMAP_NOCACHE|PMAP_WIRED 3153 : PMAP_WIRED; 3154 pt_entry_t * const dst_ptep = pmap_kernel_pte(pmap_tmpmap_dstva); 3155 3156 /* Build the new PTE. */ 3157 const pt_entry_t dst_pte = 3158 pmap_make_pte(pa, UVM_PROT_READ | UVM_PROT_WRITE, flags); 3159 3160 /* Set the new PTE. */ 3161 KASSERT(! pte_valid_p(pte_load(dst_ptep))); 3162 pte_store(dst_ptep, dst_pte); 3163 /* XXX Possibly being over-cautious here; see pmap_kenter_pa(). */ 3164 TBIS(pmap_tmpmap_dstva); 3165 3166 /* Zero the page. */ 3167 zeropage((void *)pmap_tmpmap_dstva); 3168 3169 /* Invalidate the PTEs. */ 3170 pte_store(dst_ptep, 0); 3171 TBIS(pmap_tmpmap_dstva); 3172 } 3173 3174 /* 3175 * pmap_copy_page: [ INTERFACE ] 3176 * 3177 * Copy the specified VM page by mapping the page(s) into the kernel 3178 * and using memcpy() (or equivalent). 3179 */ 3180 void 3181 pmap_copy_page(paddr_t src, paddr_t dst) 3182 { 3183 const int flags = MMU_IS_HP_CLASS ? PMAP_NOCACHE|PMAP_WIRED 3184 : PMAP_WIRED; 3185 pt_entry_t * const src_ptep = pmap_kernel_pte(pmap_tmpmap_srcva); 3186 pt_entry_t * const dst_ptep = pmap_kernel_pte(pmap_tmpmap_dstva); 3187 3188 /* Build the new PTEs. */ 3189 const pt_entry_t src_pte = 3190 pmap_make_pte(src, UVM_PROT_READ, flags); 3191 const pt_entry_t dst_pte = 3192 pmap_make_pte(dst, UVM_PROT_READ | UVM_PROT_WRITE, flags); 3193 3194 /* Set the new PTEs. */ 3195 KASSERT(! pte_valid_p(pte_load(src_ptep))); 3196 pte_store(src_ptep, src_pte); 3197 /* XXX Possibly being over-cautious here; see pmap_kenter_pa(). */ 3198 TBIS(pmap_tmpmap_srcva); 3199 3200 KASSERT(! pte_valid_p(pte_load(dst_ptep))); 3201 pte_store(dst_ptep, dst_pte); 3202 /* XXX Possibly being over-cautious here; see pmap_kenter_pa(). */ 3203 TBIS(pmap_tmpmap_dstva); 3204 3205 /* Copy the page. */ 3206 copypage((void *)pmap_tmpmap_srcva, (void *)pmap_tmpmap_dstva); 3207 3208 /* Invalidate the PTEs. */ 3209 pte_store(src_ptep, 0); 3210 TBIS(pmap_tmpmap_srcva); 3211 3212 pte_store(dst_ptep, 0); 3213 TBIS(pmap_tmpmap_dstva); 3214 } 3215 3216 /* 3217 * pmap_clear_modify: [ INTERFACE ] 3218 * 3219 * Clear the modify bits on the specified physical page. 3220 */ 3221 /* See <machine/pmap.h> */ 3222 3223 /* 3224 * pmap_clear_reference: [ INTERFACE ] 3225 * 3226 * Clear the reference bit on the specified physical page. 3227 */ 3228 /* See <machine/pmap.h> */ 3229 3230 /* 3231 * pmap_is_referenced: [ INTERFACE ] 3232 * 3233 * Return whether or not the specified physical page has been referenced 3234 * by any physical maps. 3235 */ 3236 /* See <machine/pmap.h> */ 3237 3238 /* 3239 * pmap_is_modified: [ INTERFACE ] 3240 * 3241 * Return whether or not the specified physical page has been modified 3242 * by any physical maps. 3243 */ 3244 /* See <machine/pmap.h> */ 3245 3246 /* 3247 * pmap_testbit: 3248 * 3249 * Test the modified / referenced bits of a physical page. 3250 */ 3251 bool 3252 pmap_testbit(struct vm_page *pg, pt_entry_t bit) 3253 { 3254 struct pv_entry *pv; 3255 pt_entry_t pte = 0; 3256 bool rv = false; 3257 3258 PMAP_CRIT_ENTER(); 3259 3260 if (VM_MDPAGE_UM(pg) & bit) { 3261 rv = true; 3262 goto out; 3263 } 3264 3265 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 3266 pte |= pte_load(pmap_pv_pte(pv)); 3267 if (pte & bit) { 3268 rv = true; 3269 break; 3270 } 3271 } 3272 VM_MDPAGE_ADD_UM(pg, pte); 3273 out: 3274 PMAP_CRIT_EXIT(); 3275 3276 return rv; 3277 } 3278 3279 /* 3280 * pmap_changebit: 3281 * 3282 * Test-and-change various bits (including mod/ref bits). 3283 */ 3284 bool 3285 pmap_changebit(struct vm_page *pg, pt_entry_t set, pt_entry_t mask) 3286 { 3287 struct pv_entry *pv; 3288 pt_entry_t *ptep, combined_pte = 0, diff, opte, npte; 3289 bool rv = false; 3290 3291 #if MMU_CONFIG_68040_CLASS 3292 /* 3293 * If we're making the page read-only or changing the caching 3294 * status of the page, we need to flush it the first time we 3295 * change a mapping. 3296 */ 3297 bool cflush_040; 3298 if (MMU_IS_68040_CLASS && 3299 ((set & PTE_CRIT_BITS) != 0 || 3300 (mask & PTE_CRIT_BITS) == 0)) { 3301 cflush_040 = true; 3302 } else { 3303 cflush_040 = false; 3304 } 3305 #endif 3306 3307 PMAP_CRIT_ENTER(); 3308 3309 /* 3310 * Since we're running over every mapping for the page anyway, 3311 * we might as well synchronize any attribute bits that we're 3312 * not clearing. 3313 */ 3314 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 3315 for (;;) { 3316 ptep = pmap_pv_pte(pv); 3317 opte = pte_load(ptep); 3318 npte = (opte | set) & mask; 3319 if ((diff = (opte ^ npte)) == 0) { 3320 break; 3321 } 3322 #if MMU_CONFIG_68040_CLASS 3323 if (__predict_false(cflush_040)) { 3324 paddr_t pa = VM_PAGE_TO_PHYS(pg); 3325 DCFP(pa); 3326 ICPP(pa); 3327 cflush_040 = false; 3328 } 3329 #endif 3330 if (pte_update(ptep, opte, npte)) { 3331 rv = true; 3332 break; 3333 } 3334 /* Lost race, try again. */ 3335 } 3336 combined_pte |= opte; 3337 if ((diff & PTE_CRIT_BITS) != 0 && active_pmap(pv->pv_pmap)) { 3338 TBIS(PV_VA(pv)); 3339 } 3340 } 3341 3342 /* 3343 * Update any attributes we looked at, clear the ones we're clearing. 3344 */ 3345 VM_MDPAGE_SET_UM(pg, 3346 (VM_MDPAGE_UM(pg) | combined_pte | set) & mask); 3347 3348 PMAP_CRIT_EXIT(); 3349 3350 return rv; 3351 } 3352 3353 /* 3354 * pmap_phys_address: [ INTERFACE ] 3355 * 3356 * Return the physical address corresponding to the specified 3357 * cookie. Used by the device pager to decode a device driver's 3358 * mmap entry point return value. 3359 */ 3360 paddr_t 3361 pmap_phys_address(paddr_t cookie) 3362 { 3363 return m68k_ptob(cookie); 3364 } 3365 3366 static pt_entry_t *kernel_lev1map; 3367 3368 /* 3369 * pmap_growkernel_alloc_page: 3370 * 3371 * Helper for pmap_growkernel(). 3372 */ 3373 static paddr_t 3374 pmap_growkernel_alloc_page(void) 3375 { 3376 /* 3377 * XXX Needs more work if we're going to do this during 3378 * XXX early bootstrap. 3379 */ 3380 if (! uvm.page_init_done) { 3381 panic("%s: called before UVM initialized", __func__); 3382 } 3383 3384 struct vm_page *pg = pmap_page_alloc(true/*nowait*/); 3385 if (pg == NULL) { 3386 panic("%s: out of memory", __func__); 3387 } 3388 3389 paddr_t pa = VM_PAGE_TO_PHYS(pg); 3390 pmap_zero_page(pa); 3391 #if MMU_CONFIG_68040_CLASS 3392 if (MMU_IS_68040_CLASS) { 3393 DCFP(pa); 3394 } 3395 #endif 3396 return pa; 3397 } 3398 3399 /* 3400 * pmap_growkernel_link_kptpage: 3401 * 3402 * Helper for pmap_growkernel(). 3403 */ 3404 static void 3405 pmap_growkernel_link_kptpage(vaddr_t va, paddr_t ptp_pa) 3406 { 3407 /* 3408 * This is trivial for the 2-level MMU configuration. 3409 */ 3410 if (MMU_USE_2L) { 3411 KASSERT((kernel_lev1map[LA2L_RI(va)] & DT51_SHORT) == 0); 3412 kernel_lev1map[LA2L_RI(va)] = pmap_ste_proto | ptp_pa; 3413 return; 3414 } 3415 3416 /* 3417 * N.B. pmap_zero_page() is used in this process, which 3418 * uses pmap_tmpmap_dstva. pmap_tmpmap_srcva is available 3419 * for our use, however, so that's what we used to temporarily 3420 * map inner segment table pages. 3421 */ 3422 const vaddr_t stpg_va = pmap_tmpmap_srcva; 3423 3424 paddr_t stpa, stpg_pa, stpgoff, last_stpg_pa = (paddr_t)-1; 3425 paddr_t pa = ptp_pa, end_pa = ptp_pa + PAGE_SIZE; 3426 pt_entry_t *stes; 3427 3428 for (; pa < end_pa; va += NBSEG3L, pa += TBL40_L3_SIZE) { 3429 if ((kernel_lev1map[LA40_RI(va)] & UTE40_RESIDENT) == 0) { 3430 /* Level-2 table for this segment needed. */ 3431 if (kernel_stnext_pa == kernel_stnext_endpa) { 3432 /* 3433 * No more slots left in the last page 3434 * we allocated for segment tables. Grab 3435 * another one. 3436 */ 3437 kernel_stnext_pa = pmap_growkernel_alloc_page(); 3438 kernel_stnext_endpa = 3439 kernel_stnext_pa + PAGE_SIZE; 3440 pmap_nkstpages_current_ev.ev_count++; 3441 } 3442 kernel_lev1map[LA40_RI(va)] = 3443 pmap_ste_proto | kernel_stnext_pa; 3444 kernel_stnext_pa += TBL40_L2_SIZE; 3445 } 3446 stpa = kernel_lev1map[LA40_RI(va)] & UTE40_PTA; 3447 stpg_pa = m68k_trunc_page(stpa); 3448 if (stpg_pa != last_stpg_pa) { 3449 if (last_stpg_pa != (paddr_t)-1) { 3450 pmap_kremove(stpg_va, PAGE_SIZE); 3451 } 3452 pmap_kenter_pa(stpg_va, stpg_pa, 3453 UVM_PROT_READ | UVM_PROT_WRITE, 3454 PMAP_WIRED | PMAP_NOCACHE); 3455 last_stpg_pa = stpg_pa; 3456 } 3457 stpgoff = stpa - stpg_pa; 3458 stes = (pt_entry_t *)(stpg_va + stpgoff); 3459 stes[LA40_PI(va)] = pmap_ste_proto | pa; 3460 } 3461 if (last_stpg_pa != (paddr_t)-1) { 3462 pmap_kremove(stpg_va, PAGE_SIZE); 3463 } 3464 } 3465 3466 /* 3467 * pmap_growkernel: [ INTERFACE ] 3468 * 3469 * Grow the kernel address space. This is a hint from the 3470 * upper layer to pre-allocate more kernel PT pages. 3471 */ 3472 vaddr_t 3473 pmap_growkernel(vaddr_t maxkvaddr) 3474 { 3475 PMAP_CRIT_ENTER(); 3476 3477 KASSERT((kernel_virtual_end & PTPAGEVAOFS) == 0); 3478 3479 vaddr_t new_maxkva = pmap_round_ptpage(maxkvaddr); 3480 if (new_maxkva < kernel_virtual_end) { 3481 /* 3482 * Great news! We already have what we need to map 3483 * the requested max address. This happens one during 3484 * early bootstrap before UVM's notion of "maxkvaddr" 3485 * has been initialized. 3486 */ 3487 new_maxkva = kernel_virtual_end; 3488 goto done; 3489 } 3490 3491 if (new_maxkva > kernel_virtual_max) { 3492 panic("%s: out of kernel VA space (req=0x%08lx limit=0x%08lx)", 3493 __func__, maxkvaddr, kernel_virtual_max); 3494 } 3495 3496 /* 3497 * Allocate PT pages and link them into the MMU tree as we 3498 * go. 3499 */ 3500 vaddr_t va, ptp_pa; 3501 for (va = kernel_virtual_end; va < new_maxkva; va += PTPAGEVASZ) { 3502 /* Allocate page and link it into the MMU tree. */ 3503 ptp_pa = pmap_growkernel_alloc_page(); 3504 pmap_growkernel_link_kptpage(va, ptp_pa); 3505 pmap_nkptpages_current_ev.ev_count++; 3506 3507 /* Map the PT page into the kernel PTE array. */ 3508 pmap_kenter_pa((vaddr_t)pmap_kernel_pte(va), 3509 ptp_pa, UVM_PROT_READ | UVM_PROT_WRITE, 3510 PMAP_WIRED | PMAP_NOCACHE); 3511 } 3512 kernel_virtual_end = new_maxkva; 3513 done: 3514 pmap_maxkva_ev.ev_count32 = new_maxkva; 3515 pmap_kvalimit_ev.ev_count32 = kernel_virtual_max; 3516 PMAP_CRIT_EXIT(); 3517 return new_maxkva; 3518 } 3519 3520 /* 3521 * pmap_prefer: [ INTERFACE ] 3522 * 3523 * Attempt to arrange for pages at a given VM object offset 3524 * to occupy the same virtually-addressed cache footprint 3525 * in order to avoid cache aliases. 3526 */ 3527 #if MMU_CONFIG_HP_CLASS 3528 static struct evcnt pmap_prefer_nochange_ev = 3529 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap prefer", "nochange"); 3530 static struct evcnt pmap_prefer_change_ev = 3531 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap prefer", "change"); 3532 3533 EVCNT_ATTACH_STATIC(pmap_prefer_change_ev); 3534 EVCNT_ATTACH_STATIC(pmap_prefer_nochange_ev); 3535 #endif 3536 void 3537 pmap_prefer(vaddr_t hint, vaddr_t *vap, int td) 3538 { 3539 #if MMU_CONFIG_HP_CLASS 3540 if (MMU_IS_HP_CLASS) { 3541 vaddr_t va = *vap; 3542 ptrdiff_t diff = (hint - va) & pmap_aliasmask; 3543 3544 if (diff == 0) { 3545 pmap_prefer_nochange_ev.ev_count++; 3546 } else { 3547 pmap_prefer_change_ev.ev_count++; 3548 if (__predict_false(td)) { 3549 va -= pmap_aliasmask + 1; 3550 } 3551 *vap = va + diff; 3552 } 3553 } 3554 #endif 3555 } 3556 3557 /* 3558 * pmap_procwr: [ INTERFACE ] 3559 * 3560 * Perform any cache synchronization required after writing 3561 * to a process's address space. 3562 */ 3563 void 3564 pmap_procwr(struct proc *p, vaddr_t va, size_t len) 3565 { 3566 /* 3567 * This is just a wrapper around the "cachectl" machdep 3568 * system call. 3569 * 3570 * XXX This is kind of gross, to be honest. 3571 */ 3572 (void)cachectl1(0x80000004, va, len, p); 3573 } 3574 3575 static paddr_t kernel_reloc_offset; 3576 static vaddr_t kernel_reloc_end; 3577 3578 /* 3579 * pmap_init_kcore_hdr: 3580 * 3581 * Initialize the m68k kernel crash dump header with information 3582 * necessary to perform KVA -> phys translations. 3583 * 3584 * Returns a pointer to the crash dump RAM segment entries for 3585 * machine-specific code to initialize. 3586 */ 3587 phys_ram_seg_t * 3588 pmap_init_kcore_hdr(cpu_kcore_hdr_t *h) 3589 { 3590 struct gen68k_kcore_hdr *m = &h->un._gen68k; 3591 3592 memset(h, 0, sizeof(*h)); 3593 3594 /* 3595 * Initialize the `dispatcher' portion of the header. 3596 */ 3597 strcpy(h->name, "gen68k"); 3598 h->page_size = PAGE_SIZE; 3599 h->kernbase = VM_MIN_KERNEL_ADDRESS; 3600 3601 /* Fixed relocation information. */ 3602 m->reloc = kernel_reloc_offset; 3603 m->relocend = kernel_reloc_end; 3604 3605 /* 3606 * Fill in information about our MMU configuration. 3607 * 3608 * We essentially pretend to be a 68851 as far as table- 3609 * walks are concerned. 3610 * 3611 * We provide the kernel's MMU_* constant so that the TT 3612 * registers can be interpreted correctly. 3613 */ 3614 m->mmutype = mmutype; 3615 m->tcr = MMU_USE_3L ? MMU51_3L_TCR_BITS : MMU51_TCR_BITS; 3616 m->srp[0] = MMU51_SRP_BITS; 3617 m->srp[1] = Sysseg_pa; 3618 3619 #if MMU_CONFIG_68040_CLASS 3620 if (MMU_IS_68040_CLASS) { 3621 m->itt0 = mmu_tt40[MMU_TTREG_ITT0]; 3622 m->itt1 = mmu_tt40[MMU_TTREG_ITT1]; 3623 m->tt0 = mmu_tt40[MMU_TTREG_DTT0]; 3624 m->tt1 = mmu_tt40[MMU_TTREG_DTT1]; 3625 } 3626 #endif 3627 #if defined(M68K_MMU_68030) 3628 if (mmutype == MMU_68030) { 3629 m->tt0 = mmu_tt30[MMU_TTREG_TT0]; 3630 m->tt1 = mmu_tt30[MMU_TTREG_TT1]; 3631 } 3632 #endif 3633 3634 return m->ram_segs; 3635 } 3636 3637 /***************************** PMAP BOOTSTRAP ********************************/ 3638 3639 /* 3640 * The kernel virtual address space layout that this implementation is tuned 3641 * for assumes that KVA space begins at $0000.0000, that the static kernel 3642 * image (text/data/bss, etc.) resides at or near the bottom of this space, 3643 * and that all additional KVA that's mapped by PTEs grows upwards from there. 3644 * 3645 * Regions mapped by Transparent Translation registers (68030 and up) 3646 * are assumed to lie beyond where the KVA space is expected to grow. When 3647 * we encounter these regions in the machine_bootmap[] (represented by a 3648 * KEEPOUT entry), we clamp the maximum KVA to prevent its growth into that 3649 * region. The TT mechanism is not terribly precise, and only supports 3650 * VA==PA mappings, so it's only really suitable for device regions that 3651 * are in the upper reaches of the physical address space (at or beyond 1GB 3652 * or so). 3653 * 3654 * This implementation certainly could be adjusted to work with other address 3655 * space layouts, but the assumption asserted here is a bit baked-in. 3656 */ 3657 __CTASSERT(VM_MIN_KERNEL_ADDRESS == 0); 3658 3659 /* 3660 * The virtual kernel PTE array covers the entire 4GB kernel supervisor 3661 * address space, but is sparsely populated. The amount of VA space required 3662 * for this linear array is: 3663 * 3664 * (4GB / PAGE_SIZE) * sizeof(pt_entry_t) 3665 * -or- 3666 * 4KB: 4MB (1024 pages) 3667 * 8KB: 2MB (512 pages) 3668 * 3669 * To avoid doing 64-bit math, we calculate it like so: 3670 * 3671 * ((0xffffffff >> PGSHIFT) + 1) * sizeof(pt_entry_t) 3672 * 3673 * The traditional name for this virtual array is "Sysmap". 3674 */ 3675 #define SYSMAP_VA_SIZE (((0xffffffffU >> PGSHIFT) + 1) * sizeof(pt_entry_t)) 3676 3677 /* 3678 * In the Hibler/Utah pmap, the kernel PTE array was placed right near 3679 * the very top of the kernel virtual address space. This was because 3680 * of the hp300's unique physical memory arrangement: the last page of 3681 * memory is always located at PA $FFFF.F000 and the physical address 3682 * of the beginning of RAM varied based on the RAM size. This meant that 3683 * VA $FFFF.F000 is a convenient place to map the RAM VA==PA, making 3684 * transition between "MMU off" and "MMU on" (and vice versa) easier. 3685 * Since VA $FFFF.F000 was already going to be mapped, it made sense to 3686 * put something else along side of it in order to minimize waste in 3687 * PT pages. 3688 * 3689 * As noted above, this implementation is tuned for a growing-from-0 3690 * virtual space layout. However, we have a special case for this 3691 * particular requirement: if a platform defines SYSMAP_VA, then we 3692 * will assume it is as a high address, place the kernel PTE array at 3693 * that KVA, and ensure sufficient page tables to map from that VA until 3694 * the very end of the 4GB supervisor address space. These tables will 3695 * be allocated before the machine_bootmap[] is processed to map physical 3696 * addresses, thus allowing the machine_bootmap[] use it to map physical 3697 * addresses into one of these high virtual addresses if necessary. The 3698 * beginning of this region will also serve to clamp the maximum kernel 3699 * virtual address, in the same way as a KEEPOUT region in machine_bootmap[]. 3700 * 3701 * For reference, the traditional hp300 definition is: 3702 * 3703 * #define SYSMAP_VA ((vaddr_t)(0-PAGE_SIZE*NPTEPG*2)) 3704 * 3705 * ...and because the hp300 always used a 4KB page size (restriction 3706 * of HP MMU), this is: 0 - 4096*1024*2 3707 * -> 0 - 8388608 (8MB) 3708 * -> $FF80.0000 3709 * 3710 * Unfortunately (for the hp300), this means 2 PT pages for the top of 3711 * the address space (in the 2-level case), but that's unavoidable anyway 3712 * because of the last page being a separate mapping and the kernel PTE 3713 * array needs 4MB of space on its own. 3714 */ 3715 3716 static vaddr_t lwp0uarea; 3717 char * vmmap; 3718 void * msgbufaddr; 3719 3720 /* XXX Doesn't belong here. */ 3721 paddr_t avail_start; /* PA of first available physical page */ 3722 paddr_t avail_end; /* PA of last available physical page */ 3723 3724 extern char * kernel_text; 3725 extern char * etext; 3726 3727 /* 3728 * pmap_bootstrap1: 3729 * 3730 * Phase 1 of bootstrapping virtual memory. This is called before 3731 * the MMU is enabled to set up the initial kernel MMU tables and 3732 * allocate other important data structures. 3733 * 3734 * Because the MMU has not yet been turned on, and we don't know if 3735 * we're running VA==PA, we have to manually relocate all global 3736 * symbol references. 3737 * 3738 * Arguments: nextpa Physical address immediately 3739 * following the kernel / symbols / 3740 * etc. This will be page-rounded 3741 * before use. 3742 * 3743 * reloff VA<->PA relocation offset 3744 * 3745 * Returns: nextpa Updated value after all of the 3746 * allocations performed. 3747 */ 3748 paddr_t __attribute__((no_instrument_function)) 3749 pmap_bootstrap1(paddr_t nextpa, paddr_t reloff) 3750 { 3751 paddr_t lwp0upa, stnext_endpa, stnext_pa; 3752 paddr_t pa, kernimg_endpa, kern_lev1pa; 3753 vaddr_t va, nextva, kern_lev1va; 3754 pt_entry_t *pte, *epte; 3755 int entry_count = 0; 3756 3757 #ifdef SYSMAP_VA 3758 #define VA_RANGE_DEFAULT 0 3759 #define VA_RANGE_KPTES 1 3760 #define NRANGES 2 3761 #else 3762 #define VA_RANGE_DEFAULT 0 3763 #define VA_RANGE_KPTES 0 3764 #define NRANGES 1 3765 #endif 3766 3767 struct va_range { 3768 vaddr_t start_va; 3769 vaddr_t end_va; 3770 paddr_t start_ptp; 3771 paddr_t end_ptp; 3772 } va_ranges[NRANGES], *var; 3773 int r; 3774 3775 #define VA_IN_RANGE(va, var) \ 3776 ((va) >= (var)->start_va && \ 3777 ((va) < (var)->end_va || (var)->end_va == 0)) 3778 3779 #define VA_PTE_BASE(va, var) \ 3780 (&((pt_entry_t *) \ 3781 PMAP_BOOTSTRAP_RELOC_PA((var)->start_ptp))[ \ 3782 m68k_btop((va) - (var)->start_va)]) 3783 3784 #define RELOC(v, t) *((t *)PMAP_BOOTSTRAP_RELOC_GLOB(&(v))) 3785 3786 /* Record the relocation offset for kernel crash dumps. */ 3787 RELOC(kernel_reloc_offset, paddr_t) = reloff; 3788 3789 /* 3790 * First determination we have to make is our configuration: 3791 * Are we using a 2-level or 3-level table? For the purposes 3792 * of bootstrapping the kernel, it's "68040-class" and "other", 3793 * the former getting the 3-level table. 3794 */ 3795 const bool is_68040_class = RELOC(mmutype, int) == MMU_68040 || 3796 RELOC(mmutype, int) == MMU_68060; 3797 const bool use_3l = is_68040_class; 3798 3799 /* 3800 * Based on MMU class, figure out what the constant values of 3801 * segment / page table entries look like. 3802 * 3803 * See pmap_pte_proto_init(). 3804 */ 3805 pt_entry_t proto_ro_pte; /* read-only */ 3806 pt_entry_t proto_rw_pte; /* read-write */ 3807 pt_entry_t proto_rw_ci_pte; /* read-write, cache-inhibited */ 3808 pt_entry_t proto_ste; 3809 3810 if (is_68040_class) { 3811 proto_ro_pte = PTE_VALID|PTE_WIRED|PTE_WP|PTE40_CM_WT; 3812 proto_rw_pte = PTE_VALID|PTE_WIRED |PTE40_CM_CB; 3813 proto_rw_ci_pte = PTE_VALID|PTE_WIRED |PTE40_CM_NC_SER; 3814 } else { 3815 proto_ro_pte = PTE_VALID|PTE_WIRED|PTE_WP; 3816 proto_rw_pte = PTE_VALID|PTE_WIRED; 3817 proto_rw_ci_pte = PTE_VALID|PTE_WIRED |PTE51_CI; 3818 } 3819 proto_ste = DTE51_U | DT51_SHORT; 3820 3821 /* 3822 * Allocate some important fixed virtual (and physical) addresses. 3823 * We use the sum total of this initial mapped kernel space to 3824 * determine how many inital kernel PT pages to allocate. The 3825 * things that consume physical space will come first, and the 3826 * virtual-space-{only,mostly} things come at the end. 3827 * 3828 * lwp0upa lwp0 u-area USPACE (p) 3829 * lwp0uarea (v) 3830 * 3831 * Sysseg_pa kernel lev1map PAGE_SIZE (p) 3832 * kernel_lev1map PAGE_SIZE (v, ci) 3833 * 3834 * ^^^^ end of simple relocation region ^^^^ 3835 * 3836 * null_segtab_pa null segtab PAGE_SIZE (p) 3837 * 3838 * tmpmap_srcva temp map, src PAGE_SIZE (v) 3839 * tmpmap_dstva temp map, dst PAGE_SIZE (v) 3840 * 3841 * vmmap ya tmp map PAGE_SIZE (v) 3842 * 3843 * msgbufaddr kernel msg buf round_page(MSGBUFSIZE) (v) 3844 * 3845 * kernel_ptes kernel PTEs SYSMAP_VA_SIZE (v, ci) 3846 * (see comments above) 3847 * 3848 * When we allocate the kernel lev1map, for the 2-level 3849 * configuration, there is no inner segment tables to allocate, 3850 * the leaf PT pages get poked directly into the level-1 table. 3851 * 3852 * In the 3-level configuration, to map all of the leaf tables, 3853 * inner segment table pages are allocated as necessary. We 3854 * first take those tables from the page containing the level-1 3855 * table, and allocate additional pages as necessary. 3856 */ 3857 3858 nextpa = m68k_round_page(nextpa); 3859 nextva = PMAP_BOOTSTRAP_PA_TO_VA(nextpa); 3860 3861 /* 3862 * nextpa now represents the end of the loaded kernel image. 3863 * This includes the .data + .bss segments, the debugger symbols, 3864 * and any other ancillary data loaded after the kernel. 3865 * 3866 * N.B. This represents the start of our dynamic memory allocation, 3867 * which will be referenced below when we zero the memory we've 3868 * allocated. 3869 */ 3870 kernimg_endpa = nextpa; 3871 3872 /* 3873 * lwp0 u-area. We allocate it here, and finish setting it 3874 * up in pmap_bootstrap2(). 3875 */ 3876 lwp0upa = nextpa; 3877 nextpa += USPACE; 3878 RELOC(lwp0uarea, vaddr_t) = nextva; 3879 nextva += USPACE; 3880 3881 size_t nstpages = 0; 3882 3883 /* kernel level-1 map */ 3884 RELOC(Sysseg_pa, paddr_t) = kern_lev1pa = nextpa; 3885 nextpa += PAGE_SIZE; 3886 RELOC(kernel_lev1map, vaddr_t) = kern_lev1va = nextva; 3887 nextva += PAGE_SIZE; 3888 nstpages++; 3889 3890 /* This is the end of the simple relocation region. */ 3891 RELOC(kernel_reloc_end, vaddr_t) = nextva; 3892 3893 /* 3894 * For 3-level configs, we now have space to allocate 3895 * inner segment tables. 3896 */ 3897 stnext_pa = kern_lev1pa + TBL40_L1_SIZE; 3898 stnext_endpa = m68k_round_page(stnext_pa); 3899 3900 /* null segment table */ 3901 #ifdef NULL_SEGTAB_PA 3902 RELOC(null_segtab_pa, paddr_t) = (paddr_t)NULL_SEGTAB_PA; 3903 #else 3904 RELOC(null_segtab_pa, paddr_t) = nextpa; 3905 nextpa += PAGE_SIZE; 3906 #endif 3907 3908 /* pmap temporary map addresses */ 3909 RELOC(pmap_tmpmap_srcva, vaddr_t) = nextva; 3910 nextva += PAGE_SIZE; 3911 RELOC(pmap_tmpmap_dstva, vaddr_t) = nextva; 3912 nextva += PAGE_SIZE; 3913 3914 /* vmmap temporary map address */ 3915 RELOC(vmmap, vaddr_t) = nextva; 3916 nextva += PAGE_SIZE; 3917 3918 /* kernel message buffer */ 3919 RELOC(msgbufaddr, vaddr_t) = nextva; 3920 nextva += m68k_round_page(MSGBUFSIZE); 3921 3922 /* Kernel PTE array. */ 3923 #ifdef SYSMAP_VA 3924 if ((vaddr_t)SYSMAP_VA < RELOC(kernel_virtual_max, vaddr_t)) { 3925 RELOC(kernel_virtual_max, vaddr_t) = (vaddr_t)SYSMAP_VA; 3926 } 3927 RELOC(kernel_ptes, vaddr_t) = (vaddr_t)SYSMAP_VA; 3928 va_ranges[VA_RANGE_KPTES].start_va = (vaddr_t)SYSMAP_VA; 3929 va_ranges[VA_RANGE_KPTES].end_va = 0; /* end of the address space */ 3930 #else 3931 RELOC(kernel_ptes, vaddr_t) = nextva; 3932 nextva += SYSMAP_VA_SIZE; 3933 #endif /* SYSMAP_VA */ 3934 3935 /* 3936 * Allocate machine-specific VAs. 3937 */ 3938 extern const struct pmap_bootmap machine_bootmap[]; 3939 const struct pmap_bootmap *pmbm = (const struct pmap_bootmap *) 3940 PMAP_BOOTSTRAP_RELOC_GLOB(machine_bootmap); 3941 for (; pmbm->pmbm_vaddr != (vaddr_t)-1; pmbm++) { 3942 if (pmbm->pmbm_size == 0) { 3943 continue; 3944 } 3945 if (pmbm->pmbm_flags & (PMBM_F_FIXEDVA | PMBM_F_KEEPOUT)) { 3946 va = m68k_trunc_page(pmbm->pmbm_vaddr); 3947 if (va < RELOC(kernel_virtual_max, vaddr_t)) { 3948 RELOC(kernel_virtual_max, vaddr_t) = va; 3949 } 3950 } else { 3951 *(vaddr_t *) 3952 PMAP_BOOTSTRAP_RELOC_GLOB(pmbm->pmbm_vaddr_ptr) = 3953 nextva; 3954 nextva += m68k_round_page(pmbm->pmbm_size); 3955 } 3956 } 3957 3958 /* UVM-managed kernel virtual starts here. */ 3959 RELOC(kernel_virtual_start, vaddr_t) = nextva; 3960 3961 /* 3962 * Allocate enough PT pages to map all of physical memory. 3963 * This should be sufficient to prevent pmap_growkernel() 3964 * from having to do any work before the VM system is set 3965 * up. 3966 * 3967 * XXX mac68k also relies on being able to map the last page 3968 * XXX of RAM VA==PA for the mmu-switchoff dance. Unlike hp300, 3969 * XXX this is not at a fixed location. However, RAM generally 3970 * XXX starts at $0000.0000 on Macs, so this calculation should 3971 * XXX be sufficient to ensure there is a PTE available for this 3972 * XXX purpose. 3973 * XXX TODO: Provide a way for cpu_startup() on mac68k to assert 3974 * XXX this (export kernel_virtual_end?). 3975 */ 3976 nextva += RELOC(physmem, psize_t) << PGSHIFT; 3977 nextva = pmap_round_ptpage(nextva); 3978 if (nextva > RELOC(kernel_virtual_max, vaddr_t) || 3979 nextva < RELOC(kernel_virtual_start, vaddr_t)) { 3980 /* clamp it. */ 3981 nextva = RELOC(kernel_virtual_max, vaddr_t); 3982 } 3983 3984 /* 3985 * This marks the end of UVM-managed kernel virtual space, 3986 * until such time as pmap_growkernel() is called to expand 3987 * it. 3988 */ 3989 va_ranges[VA_RANGE_DEFAULT].start_va = VM_MIN_KERNEL_ADDRESS; 3990 va_ranges[VA_RANGE_DEFAULT].end_va = nextva; 3991 RELOC(kernel_virtual_end, vaddr_t) = nextva; 3992 3993 /* 3994 * Now, compute the number of PT pages required to map the 3995 * required VA ranges and allocate them. 3996 */ 3997 size_t nptpages, total_ptpages = 0; 3998 for (r = 0; r < NRANGES; r++) { 3999 var = &va_ranges[r]; 4000 nptpages = (var->end_va - var->start_va) / PTPAGEVASZ; 4001 var->start_ptp = nextpa; 4002 nextpa += nptpages * PAGE_SIZE; 4003 var->end_ptp = nextpa; 4004 total_ptpages += nptpages; 4005 } 4006 4007 #ifdef PMAP_MACHINE_CHECK_BOOTSTRAP_ALLOCATIONS 4008 /* 4009 * Right here, the old mac68k Utah pmap_bootstrap1() has a 4010 * check to see if the kernel + bootstrap allocations fit 4011 * within one of the memory segments mapped by the loader. 4012 * This is a hook to accommodate that requirement. 4013 */ 4014 void (*alloc_checkfn)(paddr_t, paddr_t) = (void *) 4015 PMAP_BOOTSTRAP_RELOC_GLOB(pmap_machine_check_bootstrap_allocations); 4016 (*alloc_checkfn)(nextpa, reloff); 4017 #endif 4018 4019 /* 4020 * The bulk of the dynamic memory allocation is done (there 4021 * may be more below if we have to allocate more inner segment 4022 * table pages, but we'll burn that bridge when we come to it). 4023 * 4024 * Zero out all of these freshly-allocated pages. 4025 */ 4026 pte = (pt_entry_t *)PMAP_BOOTSTRAP_RELOC_PA(kernimg_endpa); 4027 epte = (pt_entry_t *)PMAP_BOOTSTRAP_RELOC_PA(nextpa); 4028 while (pte < epte) { 4029 *pte++ = 0; 4030 } 4031 4032 /* 4033 * Ok, let's get to mapping stuff! Almost everything is in 4034 * the default VA range. 4035 */ 4036 var = &va_ranges[VA_RANGE_DEFAULT]; 4037 4038 /* 4039 * Kernel text - read-only. 4040 * 4041 * ...that is, unless, a platform as some quirky requirement 4042 * (hello mac68k!). This hook lets a platform specify an 4043 * alternate proto PTE for the kernel text (in the mac68k case, 4044 * it will be read/write write-though-cacheable). Once the 4045 * kernel is up and running on its own mappings, machine-specific 4046 * code can perform any fixups as necessary. 4047 */ 4048 #ifndef PMAP_BOOTSTRAP_TEXT_PROTO_PTE 4049 #define PMAP_BOOTSTRAP_TEXT_PROTO_PTE proto_ro_pte 4050 #endif 4051 pa = PMAP_BOOTSTRAP_VA_TO_PA(m68k_trunc_page(&kernel_text)); 4052 pte = VA_PTE_BASE(&kernel_text, var); 4053 epte = VA_PTE_BASE(&etext, var); 4054 while (pte < epte) { 4055 *pte++ = PMAP_BOOTSTRAP_TEXT_PROTO_PTE | pa; 4056 pa += PAGE_SIZE; 4057 entry_count++; 4058 } 4059 4060 /* Remainder of kernel image - read-write. */ 4061 epte = VA_PTE_BASE(PMAP_BOOTSTRAP_PA_TO_VA(kernimg_endpa), var); 4062 while (pte < epte) { 4063 *pte++ = proto_rw_pte | pa; 4064 pa += PAGE_SIZE; 4065 entry_count++; 4066 } 4067 4068 /* lwp0 u-area - read-write. */ 4069 pa = lwp0upa; 4070 pte = VA_PTE_BASE(RELOC(lwp0uarea, vaddr_t), var); 4071 epte = VA_PTE_BASE(RELOC(lwp0uarea, vaddr_t) + USPACE, var); 4072 while (pte < epte) { 4073 *pte++ = proto_rw_pte | pa; 4074 pa += PAGE_SIZE; 4075 entry_count++; 4076 } 4077 4078 /* Kernel lev1map - read-write, cache-inhibited. */ 4079 pte = VA_PTE_BASE(kern_lev1va, var); 4080 *pte = proto_rw_ci_pte | kern_lev1pa; 4081 entry_count++; 4082 4083 /* 4084 * Kernel leaf PT pages - read-write, cache-inhibited. 4085 * 4086 * These will be in a different VA range if the machine 4087 * defines SYSMAP_VA. 4088 */ 4089 va = RELOC(kernel_ptes, vaddr_t); 4090 pt_entry_t *kptes = (pt_entry_t *)va; 4091 struct va_range *kpte_var = &va_ranges[VA_RANGE_KPTES]; 4092 4093 for (r = 0; r < NRANGES; r++) { 4094 var = &va_ranges[r]; 4095 va = (vaddr_t)(&kptes[m68k_btop(var->start_va)]); 4096 pte = VA_PTE_BASE(va, kpte_var); 4097 for (pa = var->start_ptp; pa < var->end_ptp; pa += PAGE_SIZE) { 4098 *pte++ = proto_rw_ci_pte | pa; 4099 entry_count++; 4100 } 4101 } 4102 4103 /* 4104 * Now perform any machine-specific mappings at VAs 4105 * allocated earlier. 4106 */ 4107 pmbm = (const struct pmap_bootmap *) 4108 PMAP_BOOTSTRAP_RELOC_GLOB(machine_bootmap); 4109 for (; pmbm->pmbm_vaddr != (vaddr_t)-1; pmbm++) { 4110 pt_entry_t proto; 4111 4112 if (pmbm->pmbm_size == 0 || 4113 (pmbm->pmbm_flags & (PMBM_F_VAONLY | PMBM_F_KEEPOUT))) { 4114 continue; 4115 } 4116 if (pmbm->pmbm_flags & PMBM_F_FIXEDVA) { 4117 va = pmbm->pmbm_vaddr; 4118 } else { 4119 va = *(vaddr_t *) 4120 PMAP_BOOTSTRAP_RELOC_GLOB(pmbm->pmbm_vaddr_ptr); 4121 } 4122 for (r = 0; r < NRANGES; r++) { 4123 var = &va_ranges[r]; 4124 if (VA_IN_RANGE(va, var)) { 4125 break; 4126 } 4127 } 4128 pa = pmbm->pmbm_paddr; 4129 pte = VA_PTE_BASE(va, var); 4130 switch (pmbm->pmbm_flags & (PMBM_F_CI|PMBM_F_RO)) { 4131 case PMBM_F_CI|PMBM_F_RO: 4132 proto = proto_rw_ci_pte | PTE_WP; 4133 break; 4134 case PMBM_F_CI: 4135 proto = proto_rw_ci_pte; 4136 break; 4137 case PMBM_F_RO: 4138 proto = proto_ro_pte; 4139 break; 4140 default: 4141 proto = proto_rw_pte; 4142 break; 4143 } 4144 for (vsize_t size = m68k_round_page(pmbm->pmbm_size); 4145 size != 0; 4146 va += PAGE_SIZE, pa += PAGE_SIZE, size -= PAGE_SIZE) { 4147 *pte++ = proto | pa; 4148 entry_count++; 4149 } 4150 } 4151 4152 /* 4153 * Now that all of the invidual VAs are mapped in the leaf 4154 * tables, it's time to link those tables into the segment 4155 * table. 4156 * 4157 * For the 2-level case, this is trivial. For the 3-level 4158 * case, we will have to allocate inner segment tables. 4159 */ 4160 for (r = 0; r < NRANGES; r++) { 4161 var = &va_ranges[r]; 4162 if (use_3l) { 4163 pt_entry_t *stes, *stes1 = (pt_entry_t *) 4164 PMAP_BOOTSTRAP_RELOC_PA(kern_lev1pa); 4165 for (va = var->start_va, pa = var->start_ptp; 4166 pa < var->end_ptp; 4167 va += NBSEG3L, pa += TBL40_L3_SIZE) { 4168 unsigned int ri = LA40_RI(va); 4169 if ((stes1[ri] & UTE40_RESIDENT) == 0) { 4170 /* 4171 * Level-2 table for this segment 4172 * needed. 4173 */ 4174 if (stnext_pa == stnext_endpa) { 4175 /* 4176 * No more slots left in the 4177 * last page we allocated for 4178 * segment tables. Grab 4179 * another one. 4180 */ 4181 stnext_pa = nextpa; 4182 nextpa += PAGE_SIZE; 4183 stnext_endpa = nextpa; 4184 nstpages++; 4185 #ifdef PMAP_MACHINE_CHECK_BOOTSTRAP_ALLOCATIONS 4186 (*alloc_checkfn)(nextpa, 4187 reloff); 4188 #endif 4189 /* 4190 * Zero out the new inner 4191 * segment table page. 4192 */ 4193 pte = (pt_entry_t *) 4194 PMAP_BOOTSTRAP_RELOC_PA( 4195 stnext_pa); 4196 epte = (pt_entry_t *) 4197 PMAP_BOOTSTRAP_RELOC_PA( 4198 stnext_endpa); 4199 while (pte < epte) { 4200 *pte++ = 0; 4201 } 4202 } 4203 stes1[ri] = proto_ste | stnext_pa; 4204 stnext_pa += TBL40_L2_SIZE; 4205 } 4206 stes = (pt_entry_t *) 4207 PMAP_BOOTSTRAP_RELOC_PA( 4208 stes1[ri] & UTE40_PTA); 4209 stes[LA40_PI(va)] = proto_ste | pa; 4210 } 4211 } else { 4212 pt_entry_t *stes = (pt_entry_t *) 4213 PMAP_BOOTSTRAP_RELOC_PA(kern_lev1pa); 4214 for (va = var->start_va, pa = var->start_ptp; 4215 pa < var->end_ptp; 4216 va += NBSEG2L, pa += PAGE_SIZE) { 4217 stes[LA2L_RI(va)] = proto_ste | pa; 4218 } 4219 } 4220 } 4221 4222 /* Instrumentation. */ 4223 RELOC(pmap_nkptpages_initial_ev.ev_count32, uint32_t) = 4224 RELOC(pmap_nkptpages_current_ev.ev_count32, uint32_t) = total_ptpages; 4225 RELOC(pmap_nkstpages_initial_ev.ev_count32, uint32_t) = 4226 RELOC(pmap_nkstpages_current_ev.ev_count32, uint32_t) = nstpages; 4227 4228 /* 4229 * Record the number of wired mappings we created above 4230 * in the kernel pmap stats. 4231 */ 4232 RELOC(kernel_pmap_store.pm_stats.resident_count, long) = entry_count; 4233 RELOC(kernel_pmap_store.pm_stats.wired_count, long) = entry_count; 4234 4235 /* 4236 * Stash any left-over segment table space for use by 4237 * pmap_growkernel() later. 4238 */ 4239 RELOC(kernel_stnext_pa, paddr_t) = stnext_pa; 4240 RELOC(kernel_stnext_endpa, paddr_t) = stnext_endpa; 4241 4242 return nextpa; 4243 } 4244 4245 /* 4246 * pmap_bootstrap2: 4247 * 4248 * Phase 2 of bootstrapping virtual memory. This is called after 4249 * the MMU has been enabled to finish setting up run-time-computed 4250 * global pmap data, plus the lwp0 u-area, curlwp, and curpcb. 4251 */ 4252 void * 4253 pmap_bootstrap2(void) 4254 { 4255 /* Setup the MMU class; needed before anything else. */ 4256 pmap_mmuclass_init(); 4257 4258 /* Early low-level UVM initialization. */ 4259 uvmexp.pagesize = NBPG; /* XXX ick, NBPG */ 4260 uvm_md_init(); 4261 4262 /* Initialize prototype PTEs; needed before anything else is mapped. */ 4263 pmap_pte_proto_init(); 4264 4265 /* Initialize the kernel pmap. */ 4266 pmap_pinit(pmap_kernel(), Sysseg_pa); 4267 4268 /* Initialize lwp0 u-area, curlwp, and curpcb. */ 4269 memset((void *)lwp0uarea, 0, USPACE); 4270 uvm_lwp_setuarea(&lwp0, lwp0uarea); 4271 curlwp = &lwp0; 4272 curpcb = lwp_getpcb(&lwp0); 4273 4274 /* 4275 * Initialize the source/destination control registers for 4276 * movs. 4277 */ 4278 setsfc(FC_USERD); 4279 setdfc(FC_USERD); 4280 4281 return (void *)lwp0uarea; 4282 } 4283